summaryrefslogtreecommitdiff
path: root/storage/xtradb
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb')
-rw-r--r--storage/xtradb/CMakeLists.txt515
-rw-r--r--storage/xtradb/COPYING.Google30
-rw-r--r--storage/xtradb/COPYING.Percona30
-rw-r--r--storage/xtradb/Doxyfile1419
-rw-r--r--storage/xtradb/api/api0api.cc3886
-rw-r--r--storage/xtradb/api/api0misc.cc203
-rw-r--r--storage/xtradb/btr/btr0btr.cc5330
-rw-r--r--storage/xtradb/btr/btr0cur.cc6148
-rw-r--r--storage/xtradb/btr/btr0defragment.cc833
-rw-r--r--storage/xtradb/btr/btr0pcur.cc620
-rw-r--r--storage/xtradb/btr/btr0scrub.cc931
-rw-r--r--storage/xtradb/btr/btr0sea.cc2085
-rw-r--r--storage/xtradb/buf/buf0buddy.cc741
-rw-r--r--storage/xtradb/buf/buf0buf.cc6504
-rw-r--r--storage/xtradb/buf/buf0checksum.cc156
-rw-r--r--storage/xtradb/buf/buf0dblwr.cc1288
-rw-r--r--storage/xtradb/buf/buf0dump.cc732
-rw-r--r--storage/xtradb/buf/buf0flu.cc3133
-rw-r--r--storage/xtradb/buf/buf0lru.cc3016
-rw-r--r--storage/xtradb/buf/buf0mtflu.cc756
-rw-r--r--storage/xtradb/buf/buf0rea.cc1125
-rw-r--r--storage/xtradb/data/data0data.cc751
-rw-r--r--storage/xtradb/data/data0type.cc298
-rw-r--r--storage/xtradb/dict/dict0boot.cc531
-rw-r--r--storage/xtradb/dict/dict0crea.cc1992
-rw-r--r--storage/xtradb/dict/dict0dict.cc7325
-rw-r--r--storage/xtradb/dict/dict0load.cc3275
-rw-r--r--storage/xtradb/dict/dict0mem.cc823
-rw-r--r--storage/xtradb/dict/dict0stats.cc4463
-rw-r--r--storage/xtradb/dict/dict0stats_bg.cc585
-rw-r--r--storage/xtradb/dyn/dyn0dyn.cc65
-rw-r--r--storage/xtradb/eval/eval0eval.cc950
-rw-r--r--storage/xtradb/eval/eval0proc.cc296
-rw-r--r--storage/xtradb/fil/fil0crypt.cc2662
-rw-r--r--storage/xtradb/fil/fil0fil.cc7725
-rw-r--r--storage/xtradb/fil/fil0pagecompress.cc745
-rw-r--r--storage/xtradb/fsp/fsp0fsp.cc4171
-rw-r--r--storage/xtradb/fts/Makefile.query32
-rw-r--r--storage/xtradb/fts/fts0ast.cc744
-rw-r--r--storage/xtradb/fts/fts0blex.cc1957
-rw-r--r--storage/xtradb/fts/fts0blex.l73
-rw-r--r--storage/xtradb/fts/fts0config.cc564
-rw-r--r--storage/xtradb/fts/fts0fts.cc7711
-rw-r--r--storage/xtradb/fts/fts0opt.cc3246
-rw-r--r--storage/xtradb/fts/fts0pars.cc2010
-rw-r--r--storage/xtradb/fts/fts0pars.y294
-rw-r--r--storage/xtradb/fts/fts0que.cc4491
-rw-r--r--storage/xtradb/fts/fts0sql.cc363
-rw-r--r--storage/xtradb/fts/fts0tlex.cc1952
-rw-r--r--storage/xtradb/fts/fts0tlex.l68
-rwxr-xr-xstorage/xtradb/fts/make_parser.sh49
-rw-r--r--storage/xtradb/fut/fut0fut.cc31
-rw-r--r--storage/xtradb/fut/fut0lst.cc432
-rw-r--r--storage/xtradb/ha/ha0ha.cc528
-rw-r--r--storage/xtradb/ha/ha0storage.cc184
-rw-r--r--storage/xtradb/ha/hash0hash.cc403
-rw-r--r--storage/xtradb/ha_innodb.def4
-rw-r--r--storage/xtradb/handler/ha_innodb.cc22318
-rw-r--r--storage/xtradb/handler/ha_innodb.h746
-rw-r--r--storage/xtradb/handler/handler0alter.cc6431
-rw-r--r--storage/xtradb/handler/i_s.cc9647
-rw-r--r--storage/xtradb/handler/i_s.h159
-rw-r--r--storage/xtradb/handler/xtradb_i_s.cc544
-rw-r--r--storage/xtradb/handler/xtradb_i_s.h27
-rw-r--r--storage/xtradb/ibuf/ibuf0ibuf.cc5276
-rw-r--r--storage/xtradb/include/api0api.h1312
-rw-r--r--storage/xtradb/include/api0misc.h78
-rw-r--r--storage/xtradb/include/btr0btr.h883
-rw-r--r--storage/xtradb/include/btr0btr.ic335
-rw-r--r--storage/xtradb/include/btr0cur.h946
-rw-r--r--storage/xtradb/include/btr0cur.ic223
-rw-r--r--storage/xtradb/include/btr0defragment.h102
-rw-r--r--storage/xtradb/include/btr0pcur.h548
-rw-r--r--storage/xtradb/include/btr0pcur.ic612
-rw-r--r--storage/xtradb/include/btr0scrub.h166
-rw-r--r--storage/xtradb/include/btr0sea.h356
-rw-r--r--storage/xtradb/include/btr0sea.ic210
-rw-r--r--storage/xtradb/include/btr0types.h204
-rw-r--r--storage/xtradb/include/buf0buddy.h77
-rw-r--r--storage/xtradb/include/buf0buddy.ic142
-rw-r--r--storage/xtradb/include/buf0buf.h2353
-rw-r--r--storage/xtradb/include/buf0buf.ic1561
-rw-r--r--storage/xtradb/include/buf0checksum.h83
-rw-r--r--storage/xtradb/include/buf0dblwr.h167
-rw-r--r--storage/xtradb/include/buf0dump.h72
-rw-r--r--storage/xtradb/include/buf0flu.h382
-rw-r--r--storage/xtradb/include/buf0flu.ic167
-rw-r--r--storage/xtradb/include/buf0lru.h314
-rw-r--r--storage/xtradb/include/buf0lru.ic25
-rw-r--r--storage/xtradb/include/buf0mtflu.h95
-rw-r--r--storage/xtradb/include/buf0rea.h190
-rw-r--r--storage/xtradb/include/buf0types.h157
-rw-r--r--storage/xtradb/include/data0data.h536
-rw-r--r--storage/xtradb/include/data0data.ic651
-rw-r--r--storage/xtradb/include/data0type.h545
-rw-r--r--storage/xtradb/include/data0type.ic714
-rw-r--r--storage/xtradb/include/data0types.h36
-rw-r--r--storage/xtradb/include/db0err.h170
-rw-r--r--storage/xtradb/include/dict0boot.h343
-rw-r--r--storage/xtradb/include/dict0boot.ic95
-rw-r--r--storage/xtradb/include/dict0crea.h266
-rw-r--r--storage/xtradb/include/dict0crea.ic98
-rw-r--r--storage/xtradb/include/dict0dict.h1907
-rw-r--r--storage/xtradb/include/dict0dict.ic1588
-rw-r--r--storage/xtradb/include/dict0load.h430
-rw-r--r--storage/xtradb/include/dict0load.ic26
-rw-r--r--storage/xtradb/include/dict0mem.h1522
-rw-r--r--storage/xtradb/include/dict0mem.ic74
-rw-r--r--storage/xtradb/include/dict0pagecompress.h83
-rw-r--r--storage/xtradb/include/dict0pagecompress.ic105
-rw-r--r--storage/xtradb/include/dict0priv.h64
-rw-r--r--storage/xtradb/include/dict0priv.ic126
-rw-r--r--storage/xtradb/include/dict0stats.h235
-rw-r--r--storage/xtradb/include/dict0stats.ic236
-rw-r--r--storage/xtradb/include/dict0stats_bg.h155
-rw-r--r--storage/xtradb/include/dict0stats_bg.ic45
-rw-r--r--storage/xtradb/include/dict0types.h100
-rw-r--r--storage/xtradb/include/dyn0dyn.h197
-rw-r--r--storage/xtradb/include/dyn0dyn.ic298
-rw-r--r--storage/xtradb/include/eval0eval.h114
-rw-r--r--storage/xtradb/include/eval0eval.ic255
-rw-r--r--storage/xtradb/include/eval0proc.h104
-rw-r--r--storage/xtradb/include/eval0proc.ic88
-rw-r--r--storage/xtradb/include/fil0crypt.h511
-rw-r--r--storage/xtradb/include/fil0crypt.ic36
-rw-r--r--storage/xtradb/include/fil0fil.h1540
-rw-r--r--storage/xtradb/include/fil0fil.ic148
-rw-r--r--storage/xtradb/include/fil0pagecompress.h132
-rw-r--r--storage/xtradb/include/fsp0fsp.h1068
-rw-r--r--storage/xtradb/include/fsp0fsp.ic202
-rw-r--r--storage/xtradb/include/fsp0pagecompress.h75
-rw-r--r--storage/xtradb/include/fsp0pagecompress.ic142
-rw-r--r--storage/xtradb/include/fsp0types.h130
-rw-r--r--storage/xtradb/include/fts0ast.h342
-rw-r--r--storage/xtradb/include/fts0blex.h349
-rw-r--r--storage/xtradb/include/fts0fts.h1064
-rw-r--r--storage/xtradb/include/fts0opt.h37
-rw-r--r--storage/xtradb/include/fts0pars.h72
-rw-r--r--storage/xtradb/include/fts0priv.h653
-rw-r--r--storage/xtradb/include/fts0priv.ic130
-rw-r--r--storage/xtradb/include/fts0tlex.h349
-rw-r--r--storage/xtradb/include/fts0types.h480
-rw-r--r--storage/xtradb/include/fts0types.ic388
-rw-r--r--storage/xtradb/include/fts0vlc.ic142
-rw-r--r--storage/xtradb/include/fut0fut.h55
-rw-r--r--storage/xtradb/include/fut0fut.ic60
-rw-r--r--storage/xtradb/include/fut0lst.h192
-rw-r--r--storage/xtradb/include/fut0lst.ic167
-rw-r--r--storage/xtradb/include/ha0ha.h265
-rw-r--r--storage/xtradb/include/ha0ha.ic246
-rw-r--r--storage/xtradb/include/ha0storage.h140
-rw-r--r--storage/xtradb/include/ha0storage.ic146
-rw-r--r--storage/xtradb/include/ha_prototypes.h692
-rw-r--r--storage/xtradb/include/handler0alter.h114
-rw-r--r--storage/xtradb/include/hash0hash.h603
-rw-r--r--storage/xtradb/include/hash0hash.ic225
-rw-r--r--storage/xtradb/include/ibuf0ibuf.h493
-rw-r--r--storage/xtradb/include/ibuf0ibuf.ic368
-rw-r--r--storage/xtradb/include/ibuf0types.h31
-rw-r--r--storage/xtradb/include/lock0iter.h69
-rw-r--r--storage/xtradb/include/lock0lock.h1036
-rw-r--r--storage/xtradb/include/lock0lock.ic92
-rw-r--r--storage/xtradb/include/lock0priv.h124
-rw-r--r--storage/xtradb/include/lock0priv.ic67
-rw-r--r--storage/xtradb/include/lock0types.h47
-rw-r--r--storage/xtradb/include/log0crypt.h128
-rw-r--r--storage/xtradb/include/log0log.h1077
-rw-r--r--storage/xtradb/include/log0log.ic567
-rw-r--r--storage/xtradb/include/log0online.h187
-rw-r--r--storage/xtradb/include/log0recv.h517
-rw-r--r--storage/xtradb/include/log0recv.ic37
-rw-r--r--storage/xtradb/include/mach0data.h418
-rw-r--r--storage/xtradb/include/mach0data.ic869
-rw-r--r--storage/xtradb/include/mem0dbg.h150
-rw-r--r--storage/xtradb/include/mem0dbg.ic109
-rw-r--r--storage/xtradb/include/mem0mem.h425
-rw-r--r--storage/xtradb/include/mem0mem.ic649
-rw-r--r--storage/xtradb/include/mem0pool.h121
-rw-r--r--storage/xtradb/include/mem0pool.ic24
-rw-r--r--storage/xtradb/include/mtr0log.h251
-rw-r--r--storage/xtradb/include/mtr0log.ic277
-rw-r--r--storage/xtradb/include/mtr0mtr.h453
-rw-r--r--storage/xtradb/include/mtr0mtr.ic298
-rw-r--r--storage/xtradb/include/mtr0types.h31
-rw-r--r--storage/xtradb/include/os0file.h1565
-rw-r--r--storage/xtradb/include/os0file.ic629
-rw-r--r--storage/xtradb/include/os0once.h125
-rw-r--r--storage/xtradb/include/os0proc.h77
-rw-r--r--storage/xtradb/include/os0proc.ic27
-rw-r--r--storage/xtradb/include/os0stacktrace.h44
-rw-r--r--storage/xtradb/include/os0sync.h999
-rw-r--r--storage/xtradb/include/os0sync.ic265
-rw-r--r--storage/xtradb/include/os0thread.h211
-rw-r--r--storage/xtradb/include/os0thread.ic25
-rw-r--r--storage/xtradb/include/page0cur.h387
-rw-r--r--storage/xtradb/include/page0cur.ic328
-rw-r--r--storage/xtradb/include/page0page.h1140
-rw-r--r--storage/xtradb/include/page0page.ic1184
-rw-r--r--storage/xtradb/include/page0types.h173
-rw-r--r--storage/xtradb/include/page0zip.h554
-rw-r--r--storage/xtradb/include/page0zip.ic458
-rw-r--r--storage/xtradb/include/pars0grm.h261
-rw-r--r--storage/xtradb/include/pars0opt.h75
-rw-r--r--storage/xtradb/include/pars0opt.ic24
-rw-r--r--storage/xtradb/include/pars0pars.h826
-rw-r--r--storage/xtradb/include/pars0pars.ic24
-rw-r--r--storage/xtradb/include/pars0sym.h258
-rw-r--r--storage/xtradb/include/pars0sym.ic24
-rw-r--r--storage/xtradb/include/pars0types.h50
-rw-r--r--storage/xtradb/include/que0que.h531
-rw-r--r--storage/xtradb/include/que0que.ic309
-rw-r--r--storage/xtradb/include/que0types.h57
-rw-r--r--storage/xtradb/include/read0i_s.h54
-rw-r--r--storage/xtradb/include/read0read.h232
-rw-r--r--storage/xtradb/include/read0read.ic131
-rw-r--r--storage/xtradb/include/read0types.h32
-rw-r--r--storage/xtradb/include/rem0cmp.h301
-rw-r--r--storage/xtradb/include/rem0cmp.ic186
-rw-r--r--storage/xtradb/include/rem0rec.h996
-rw-r--r--storage/xtradb/include/rem0rec.ic1719
-rw-r--r--storage/xtradb/include/rem0types.h77
-rw-r--r--storage/xtradb/include/row0ext.h102
-rw-r--r--storage/xtradb/include/row0ext.ic87
-rw-r--r--storage/xtradb/include/row0ftsort.h285
-rw-r--r--storage/xtradb/include/row0import.h91
-rw-r--r--storage/xtradb/include/row0import.ic25
-rw-r--r--storage/xtradb/include/row0ins.h240
-rw-r--r--storage/xtradb/include/row0ins.ic26
-rw-r--r--storage/xtradb/include/row0log.h244
-rw-r--r--storage/xtradb/include/row0log.ic84
-rw-r--r--storage/xtradb/include/row0merge.h469
-rw-r--r--storage/xtradb/include/row0mysql.h932
-rw-r--r--storage/xtradb/include/row0mysql.ic24
-rw-r--r--storage/xtradb/include/row0purge.h138
-rw-r--r--storage/xtradb/include/row0purge.ic25
-rw-r--r--storage/xtradb/include/row0quiesce.h74
-rw-r--r--storage/xtradb/include/row0quiesce.ic26
-rw-r--r--storage/xtradb/include/row0row.h343
-rw-r--r--storage/xtradb/include/row0row.ic174
-rw-r--r--storage/xtradb/include/row0sel.h409
-rw-r--r--storage/xtradb/include/row0sel.ic105
-rw-r--r--storage/xtradb/include/row0types.h55
-rw-r--r--storage/xtradb/include/row0uins.h54
-rw-r--r--storage/xtradb/include/row0uins.ic25
-rw-r--r--storage/xtradb/include/row0umod.h52
-rw-r--r--storage/xtradb/include/row0umod.ic24
-rw-r--r--storage/xtradb/include/row0undo.h135
-rw-r--r--storage/xtradb/include/row0undo.ic24
-rw-r--r--storage/xtradb/include/row0upd.h539
-rw-r--r--storage/xtradb/include/row0upd.ic188
-rw-r--r--storage/xtradb/include/row0vers.h146
-rw-r--r--storage/xtradb/include/row0vers.ic30
-rw-r--r--storage/xtradb/include/srv0conc.h111
-rw-r--r--storage/xtradb/include/srv0mon.h961
-rw-r--r--storage/xtradb/include/srv0mon.ic113
-rw-r--r--storage/xtradb/include/srv0srv.h1351
-rw-r--r--storage/xtradb/include/srv0srv.ic24
-rw-r--r--storage/xtradb/include/srv0start.h163
-rw-r--r--storage/xtradb/include/sync0arr.h171
-rw-r--r--storage/xtradb/include/sync0arr.ic64
-rw-r--r--storage/xtradb/include/sync0rw.h1094
-rw-r--r--storage/xtradb/include/sync0rw.ic1275
-rw-r--r--storage/xtradb/include/sync0sync.h1050
-rw-r--r--storage/xtradb/include/sync0sync.ic665
-rw-r--r--storage/xtradb/include/sync0types.h44
-rw-r--r--storage/xtradb/include/trx0i_s.h315
-rw-r--r--storage/xtradb/include/trx0purge.h226
-rw-r--r--storage/xtradb/include/trx0purge.ic62
-rw-r--r--storage/xtradb/include/trx0rec.h319
-rw-r--r--storage/xtradb/include/trx0rec.ic113
-rw-r--r--storage/xtradb/include/trx0roll.h298
-rw-r--r--storage/xtradb/include/trx0roll.ic40
-rw-r--r--storage/xtradb/include/trx0rseg.h231
-rw-r--r--storage/xtradb/include/trx0rseg.ic167
-rw-r--r--storage/xtradb/include/trx0sys.h756
-rw-r--r--storage/xtradb/include/trx0sys.ic568
-rw-r--r--storage/xtradb/include/trx0trx.h1232
-rw-r--r--storage/xtradb/include/trx0trx.ic184
-rw-r--r--storage/xtradb/include/trx0types.h147
-rw-r--r--storage/xtradb/include/trx0undo.h595
-rw-r--r--storage/xtradb/include/trx0undo.ic363
-rw-r--r--storage/xtradb/include/trx0xa.h61
-rw-r--r--storage/xtradb/include/univ.i706
-rw-r--r--storage/xtradb/include/usr0sess.h77
-rw-r--r--storage/xtradb/include/usr0sess.ic24
-rw-r--r--storage/xtradb/include/usr0types.h31
-rw-r--r--storage/xtradb/include/ut0bh.h152
-rw-r--r--storage/xtradb/include/ut0bh.ic125
-rw-r--r--storage/xtradb/include/ut0byte.h119
-rw-r--r--storage/xtradb/include/ut0byte.ic173
-rw-r--r--storage/xtradb/include/ut0counter.h159
-rw-r--r--storage/xtradb/include/ut0crc32.h51
-rw-r--r--storage/xtradb/include/ut0dbg.h132
-rw-r--r--storage/xtradb/include/ut0list.h189
-rw-r--r--storage/xtradb/include/ut0list.ic80
-rw-r--r--storage/xtradb/include/ut0lst.h408
-rw-r--r--storage/xtradb/include/ut0mem.h261
-rw-r--r--storage/xtradb/include/ut0mem.ic317
-rw-r--r--storage/xtradb/include/ut0rbt.h346
-rw-r--r--storage/xtradb/include/ut0rnd.h148
-rw-r--r--storage/xtradb/include/ut0rnd.ic262
-rw-r--r--storage/xtradb/include/ut0sort.h106
-rw-r--r--storage/xtradb/include/ut0timer.h104
-rw-r--r--storage/xtradb/include/ut0timer.ic113
-rw-r--r--storage/xtradb/include/ut0ut.h526
-rw-r--r--storage/xtradb/include/ut0ut.ic162
-rw-r--r--storage/xtradb/include/ut0vec.h337
-rw-r--r--storage/xtradb/include/ut0vec.ic425
-rw-r--r--storage/xtradb/include/ut0wqueue.h125
-rw-r--r--storage/xtradb/lock/lock0iter.cc111
-rw-r--r--storage/xtradb/lock/lock0lock.cc8338
-rw-r--r--storage/xtradb/lock/lock0wait.cc576
-rw-r--r--storage/xtradb/log/log0crypt.cc638
-rw-r--r--storage/xtradb/log/log0log.cc4141
-rw-r--r--storage/xtradb/log/log0online.cc1912
-rw-r--r--storage/xtradb/log/log0recv.cc3814
-rw-r--r--storage/xtradb/mach/mach0data.cc125
-rw-r--r--storage/xtradb/mem/mem0dbg.cc1050
-rw-r--r--storage/xtradb/mem/mem0mem.cc583
-rw-r--r--storage/xtradb/mem/mem0pool.cc728
-rw-r--r--storage/xtradb/mtr/mtr0log.cc620
-rw-r--r--storage/xtradb/mtr/mtr0mtr.cc474
-rw-r--r--storage/xtradb/mysql-test/storage_engine/alter_tablespace.opt2
-rw-r--r--storage/xtradb/mysql-test/storage_engine/autoinc_secondary.rdiff30
-rw-r--r--storage/xtradb/mysql-test/storage_engine/cache_index.rdiff71
-rw-r--r--storage/xtradb/mysql-test/storage_engine/checksum_table_live.rdiff13
-rw-r--r--storage/xtradb/mysql-test/storage_engine/col_opt_not_null.opt1
-rw-r--r--storage/xtradb/mysql-test/storage_engine/col_opt_null.opt1
-rw-r--r--storage/xtradb/mysql-test/storage_engine/define_engine.inc49
-rw-r--r--storage/xtradb/mysql-test/storage_engine/disabled.def8
-rw-r--r--storage/xtradb/mysql-test/storage_engine/fulltext_search.rdiff49
-rw-r--r--storage/xtradb/mysql-test/storage_engine/index_enable_disable.rdiff33
-rw-r--r--storage/xtradb/mysql-test/storage_engine/index_type_hash.rdiff60
-rw-r--r--storage/xtradb/mysql-test/storage_engine/insert_delayed.rdiff26
-rw-r--r--storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff25
-rw-r--r--storage/xtradb/mysql-test/storage_engine/optimize_table.rdiff37
-rw-r--r--storage/xtradb/mysql-test/storage_engine/parts/checksum_table.rdiff13
-rw-r--r--storage/xtradb/mysql-test/storage_engine/parts/create_table.rdiff20
-rw-r--r--storage/xtradb/mysql-test/storage_engine/parts/disabled.def1
-rw-r--r--storage/xtradb/mysql-test/storage_engine/parts/optimize_table.rdiff58
-rw-r--r--storage/xtradb/mysql-test/storage_engine/parts/repair_table.rdiff158
-rw-r--r--storage/xtradb/mysql-test/storage_engine/parts/suite.opt2
-rw-r--r--storage/xtradb/mysql-test/storage_engine/repair_table.rdiff138
-rw-r--r--storage/xtradb/mysql-test/storage_engine/suite.opt1
-rw-r--r--storage/xtradb/mysql-test/storage_engine/suite.pm8
-rw-r--r--storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff23
-rw-r--r--storage/xtradb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff11
-rw-r--r--storage/xtradb/mysql-test/storage_engine/tbl_opt_key_block_size.opt3
-rw-r--r--storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.opt3
-rw-r--r--storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.rdiff10
-rw-r--r--storage/xtradb/mysql-test/storage_engine/tbl_opt_union.rdiff16
-rw-r--r--storage/xtradb/mysql-test/storage_engine/trx/cons_snapshot_serializable.rdiff18
-rw-r--r--storage/xtradb/mysql-test/storage_engine/trx/level_read_committed.rdiff11
-rw-r--r--storage/xtradb/mysql-test/storage_engine/trx/level_read_uncommitted.rdiff11
-rw-r--r--storage/xtradb/mysql-test/storage_engine/trx/suite.opt3
-rw-r--r--storage/xtradb/mysql-test/storage_engine/type_blob.opt1
-rw-r--r--storage/xtradb/mysql-test/storage_engine/type_char_indexes.rdiff11
-rw-r--r--storage/xtradb/mysql-test/storage_engine/type_float_indexes.rdiff11
-rw-r--r--storage/xtradb/mysql-test/storage_engine/type_spatial_indexes.rdiff712
-rw-r--r--storage/xtradb/mysql-test/storage_engine/type_text.opt1
-rw-r--r--storage/xtradb/os/os0file.cc6545
-rw-r--r--storage/xtradb/os/os0proc.cc232
-rw-r--r--storage/xtradb/os/os0stacktrace.cc131
-rw-r--r--storage/xtradb/os/os0sync.cc635
-rw-r--r--storage/xtradb/os/os0thread.cc355
-rw-r--r--storage/xtradb/page/page0cur.cc2180
-rw-r--r--storage/xtradb/page/page0page.cc2872
-rw-r--r--storage/xtradb/page/page0zip.cc5066
-rw-r--r--storage/xtradb/pars/lexyy.cc3132
-rwxr-xr-xstorage/xtradb/pars/make_bison.sh32
-rwxr-xr-xstorage/xtradb/pars/make_flex.sh48
-rw-r--r--storage/xtradb/pars/pars0grm.cc3034
-rw-r--r--storage/xtradb/pars/pars0grm.y732
-rw-r--r--storage/xtradb/pars/pars0lex.l706
-rw-r--r--storage/xtradb/pars/pars0opt.cc1261
-rw-r--r--storage/xtradb/pars/pars0pars.cc2670
-rw-r--r--storage/xtradb/pars/pars0sym.cc440
-rw-r--r--storage/xtradb/que/que0que.cc1308
-rw-r--r--storage/xtradb/read/read0read.cc691
-rw-r--r--storage/xtradb/rem/rem0cmp.cc1465
-rw-r--r--storage/xtradb/rem/rem0rec.cc2107
-rw-r--r--storage/xtradb/row/row0ext.cc143
-rw-r--r--storage/xtradb/row/row0ftsort.cc1662
-rw-r--r--storage/xtradb/row/row0import.cc3774
-rw-r--r--storage/xtradb/row/row0ins.cc3458
-rw-r--r--storage/xtradb/row/row0log.cc3710
-rw-r--r--storage/xtradb/row/row0merge.cc4411
-rw-r--r--storage/xtradb/row/row0mysql.cc5687
-rw-r--r--storage/xtradb/row/row0purge.cc1057
-rw-r--r--storage/xtradb/row/row0quiesce.cc700
-rw-r--r--storage/xtradb/row/row0row.cc1260
-rw-r--r--storage/xtradb/row/row0sel.cc5521
-rw-r--r--storage/xtradb/row/row0uins.cc475
-rw-r--r--storage/xtradb/row/row0umod.cc1168
-rw-r--r--storage/xtradb/row/row0undo.cc375
-rw-r--r--storage/xtradb/row/row0upd.cc3017
-rw-r--r--storage/xtradb/row/row0vers.cc770
-rw-r--r--storage/xtradb/srv/srv0conc.cc713
-rw-r--r--storage/xtradb/srv/srv0mon.cc2177
-rw-r--r--storage/xtradb/srv/srv0srv.cc3693
-rw-r--r--storage/xtradb/srv/srv0start.cc3430
-rw-r--r--storage/xtradb/sync/sync0arr.cc1564
-rw-r--r--storage/xtradb/sync/sync0rw.cc1297
-rw-r--r--storage/xtradb/sync/sync0sync.cc1705
-rw-r--r--storage/xtradb/trx/trx0i_s.cc1692
-rw-r--r--storage/xtradb/trx/trx0purge.cc1409
-rw-r--r--storage/xtradb/trx/trx0rec.cc1633
-rw-r--r--storage/xtradb/trx/trx0roll.cc1417
-rw-r--r--storage/xtradb/trx/trx0rseg.cc424
-rw-r--r--storage/xtradb/trx/trx0sys.cc1523
-rw-r--r--storage/xtradb/trx/trx0trx.cc2748
-rw-r--r--storage/xtradb/trx/trx0undo.cc2051
-rw-r--r--storage/xtradb/usr/usr0sess.cc67
-rw-r--r--storage/xtradb/ut/ut0bh.cc159
-rw-r--r--storage/xtradb/ut/ut0byte.cc30
-rw-r--r--storage/xtradb/ut/ut0crc32.cc342
-rw-r--r--storage/xtradb/ut/ut0dbg.cc139
-rw-r--r--storage/xtradb/ut/ut0list.cc203
-rw-r--r--storage/xtradb/ut/ut0mem.cc609
-rw-r--r--storage/xtradb/ut/ut0rbt.cc1353
-rw-r--r--storage/xtradb/ut/ut0rnd.cc97
-rw-r--r--storage/xtradb/ut/ut0timer.cc92
-rw-r--r--storage/xtradb/ut/ut0ut.cc870
-rw-r--r--storage/xtradb/ut/ut0vec.cc78
-rw-r--r--storage/xtradb/ut/ut0wqueue.cc224
425 files changed, 0 insertions, 356480 deletions
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
deleted file mode 100644
index 4f9d2bd2cbb..00000000000
--- a/storage/xtradb/CMakeLists.txt
+++ /dev/null
@@ -1,515 +0,0 @@
-# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-# This is the CMakeLists for XtraDB
-RETURN() # until upgraded to 5.7 XtraDB doesn't compile at all
-
-INCLUDE(CheckFunctionExists)
-INCLUDE(CheckCSourceCompiles)
-INCLUDE(CheckCSourceRuns)
-INCLUDE(lz4)
-INCLUDE(lzo)
-INCLUDE(lzma)
-INCLUDE(bzip2)
-INCLUDE(snappy)
-INCLUDE(numa)
-
-MYSQL_CHECK_LZ4()
-MYSQL_CHECK_LZO()
-MYSQL_CHECK_LZMA()
-MYSQL_CHECK_BZIP2()
-MYSQL_CHECK_SNAPPY()
-MYSQL_CHECK_NUMA()
-
-IF(CMAKE_CROSSCOMPILING)
- # Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when
- # cross-compiling. Not as precise, but usually good enough.
- # This only make sense for atomic tests in this file, this trick doesn't
- # work in a general case.
- MACRO(CHECK_C_SOURCE SOURCE VAR)
- CHECK_C_SOURCE_COMPILES("${SOURCE}" "${VAR}")
- ENDMACRO()
-ELSE()
- MACRO(CHECK_C_SOURCE SOURCE VAR)
- CHECK_C_SOURCE_RUNS("${SOURCE}" "${VAR}")
- ENDMACRO()
-ENDIF()
-
-# OS tests
-IF(UNIX)
-
- IF(CMAKE_SYSTEM_NAME STREQUAL "Linux")
- CHECK_INCLUDE_FILES (libaio.h HAVE_LIBAIO_H)
- IF (XTRADB_PREFER_STATIC_LIBAIO)
- SET(CMAKE_FIND_LIBRARY_SUFFIXES .a ${CMAKE_FIND_LIBRARY_SUFFIXES})
- ENDIF()
- FIND_LIBRARY(AIO_LIBRARY aio)
- MARK_AS_ADVANCED(AIO_LIBRARY)
- IF(AIO_LIBRARY)
- CHECK_LIBRARY_EXISTS(${AIO_LIBRARY} io_queue_init "" HAVE_LIBAIO)
- IF(HAVE_LIBAIO AND HAVE_LIBAIO_H)
- ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
- ENDIF()
- LINK_LIBRARIES(${AIO_LIBRARY})
- ENDIF()
- ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1")
- IF(HAVE_LIBNUMA)
- LINK_LIBRARIES(numa)
- ENDIF()
- ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
- ADD_DEFINITIONS("-DUNIV_HPUX")
- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
- ADD_DEFINITIONS("-DUNIV_AIX")
- ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
- ADD_DEFINITIONS("-DUNIV_SOLARIS")
- ENDIF()
-ENDIF()
-
-IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-# After: WL#5825 Using C++ Standard Library with MySQL code
-# we no longer use -fno-exceptions
-# SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-exceptions")
-ENDIF()
-
-# Enable InnoDB's UNIV_DEBUG and UNIV_SYNC_DEBUG in debug builds
-SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEBUG")
-
-# Add -Wconversion if compiling with GCC
-## As of Mar 15 2011 this flag causes 3573+ warnings. If you are reading this
-## please fix them and enable the following code:
-#IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-#SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion")
-#ENDIF()
-
-CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU)
-IF(HAVE_SCHED_GETCPU)
- ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU)
-ENDIF()
-
-IF(NOT MSVC)
- # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not
- # workaround for gcc 4.1.2 RHEL5/x86, gcc atomic ops only work under -march=i686
- IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND
- CMAKE_C_COMPILER_VERSION VERSION_LESS "4.1.3")
- SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686")
- SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686")
- ENDIF()
- CHECK_C_SOURCE(
- "
- int main()
- {
- long x;
- long y;
- long res;
-
- x = 10;
- y = 123;
- res = __sync_bool_compare_and_swap(&x, x, y);
- if (!res || x != y) {
- return(1);
- }
-
- x = 10;
- y = 123;
- res = __sync_bool_compare_and_swap(&x, x + 1, y);
- if (res || x != 10) {
- return(1);
- }
- x = 10;
- y = 123;
- res = __sync_add_and_fetch(&x, y);
- if (res != 123 + 10 || x != 123 + 10) {
- return(1);
- }
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_BUILTINS
- )
- CHECK_C_SOURCE(
- "
- int main()
- {
- long res;
- char c;
-
- c = 10;
- res = __sync_lock_test_and_set(&c, 123);
- if (res != 10 || c != 123) {
- return(1);
- }
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
- )
- CHECK_C_SOURCE(
- "#include<stdint.h>
- int main()
- {
- int64_t x,y,res;
-
- x = 10;
- y = 123;
- res = __sync_sub_and_fetch(&y, x);
- if (res != y || y != 113) {
- return(1);
- }
- res = __sync_add_and_fetch(&y, x);
- if (res != y || y != 123) {
- return(1);
- }
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_BUILTINS_64
- )
- CHECK_C_SOURCE(
- "#include<stdint.h>
- int main()
- {
- __sync_synchronize();
- return(0);
- }"
- HAVE_IB_GCC_SYNC_SYNCHRONISE
- )
- CHECK_C_SOURCE(
- "#include<stdint.h>
- int main()
- {
- __atomic_thread_fence(__ATOMIC_ACQUIRE);
- __atomic_thread_fence(__ATOMIC_RELEASE);
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_THREAD_FENCE
- )
- CHECK_C_SOURCE(
- "#include<stdint.h>
- int main()
- {
- unsigned char c;
-
- __atomic_test_and_set(&c, __ATOMIC_ACQUIRE);
- __atomic_clear(&c, __ATOMIC_RELEASE);
- return(0);
- }"
- HAVE_IB_GCC_ATOMIC_TEST_AND_SET
- )
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1)
- SET(XTRADB_OK 1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_BYTE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_BUILTINS_64)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS_64=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_SYNC_SYNCHRONISE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_SYNC_SYNCHRONISE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1)
-ENDIF()
-
-IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
- ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1)
-ENDIF()
-
-# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
-CHECK_C_SOURCE(
-"
-#include <pthread.h>
-#include <string.h>
-
-int main() {
- pthread_t x1;
- pthread_t x2;
- pthread_t x3;
-
- memset(&x1, 0x0, sizeof(x1));
- memset(&x2, 0x0, sizeof(x2));
- memset(&x3, 0x0, sizeof(x3));
-
- __sync_bool_compare_and_swap(&x1, x2, x3);
-
- return(0);
-}"
-HAVE_IB_ATOMIC_PTHREAD_T_GCC)
-
-IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC)
- ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1)
-ENDIF()
-
-CHECK_CXX_SOURCE_COMPILES("struct t1{ int a; char *b; }; struct t1 c= { .a=1, .b=0 }; main() { }" HAVE_C99_INITIALIZERS)
-IF(HAVE_C99_INITIALIZERS)
- ADD_DEFINITIONS(-DHAVE_C99_INITIALIZERS)
-ENDIF()
-
-ENDIF(NOT MSVC)
-
-CHECK_FUNCTION_EXISTS(vasprintf HAVE_VASPRINTF)
-
-# Solaris atomics
-IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
- CHECK_FUNCTION_EXISTS(atomic_cas_ulong HAVE_ATOMIC_CAS_ULONG)
- CHECK_FUNCTION_EXISTS(atomic_cas_32 HAVE_ATOMIC_CAS_32)
- CHECK_FUNCTION_EXISTS(atomic_cas_64 HAVE_ATOMIC_CAS_64)
- CHECK_FUNCTION_EXISTS(atomic_add_long_nv HAVE_ATOMIC_ADD_LONG_NV)
- CHECK_FUNCTION_EXISTS(atomic_swap_uchar HAVE_ATOMIC_SWAP_UCHAR)
- IF(HAVE_ATOMIC_CAS_ULONG AND
- HAVE_ATOMIC_CAS_32 AND
- HAVE_ATOMIC_CAS_64 AND
- HAVE_ATOMIC_ADD_LONG_NV AND
- HAVE_ATOMIC_SWAP_UCHAR)
- SET(HAVE_IB_SOLARIS_ATOMICS 1)
- ENDIF()
-
- IF(HAVE_IB_SOLARIS_ATOMICS)
- ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1)
- SET(XTRADB_OK 1)
- ENDIF()
-
- # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not
- CHECK_C_SOURCE_COMPILES(
- " #include <pthread.h>
- #include <string.h>
-
- int main(int argc, char** argv) {
- pthread_t x1;
- pthread_t x2;
- pthread_t x3;
-
- memset(&x1, 0x0, sizeof(x1));
- memset(&x2, 0x0, sizeof(x2));
- memset(&x3, 0x0, sizeof(x3));
-
- if (sizeof(pthread_t) == 4) {
-
- atomic_cas_32(&x1, x2, x3);
-
- } else if (sizeof(pthread_t) == 8) {
-
- atomic_cas_64(&x1, x2, x3);
-
- } else {
-
- return(1);
- }
-
- return(0);
- }
- " HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
- CHECK_C_SOURCE_COMPILES(
- "#include <mbarrier.h>
- int main() {
- __machine_r_barrier();
- __machine_w_barrier();
- return(0);
- }"
- HAVE_IB_MACHINE_BARRIER_SOLARIS)
-
- IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS)
- ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1)
- ENDIF()
- IF(HAVE_IB_MACHINE_BARRIER_SOLARIS)
- ADD_DEFINITIONS(-DHAVE_IB_MACHINE_BARRIER_SOLARIS=1)
- ENDIF()
-ENDIF()
-
-
-IF(UNIX)
-# this is needed to know which one of atomic_cas_32() or atomic_cas_64()
-# to use in the source
-SET(CMAKE_EXTRA_INCLUDE_FILES pthread.h)
-CHECK_TYPE_SIZE(pthread_t SIZEOF_PTHREAD_T)
-SET(CMAKE_EXTRA_INCLUDE_FILES)
-ENDIF()
-
-IF(SIZEOF_PTHREAD_T)
- ADD_DEFINITIONS(-DSIZEOF_PTHREAD_T=${SIZEOF_PTHREAD_T})
-ENDIF()
-
-IF(MSVC)
- ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS)
- ADD_DEFINITIONS(-DHAVE_WINDOWS_MM_FENCE)
- SET(XTRADB_OK 1)
-ENDIF()
-
-
-# Include directories under xtradb
-INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/xtradb/include
- ${CMAKE_SOURCE_DIR}/storage/xtradb/handler)
-
-# Sun Studio bug with -xO2
-IF(CMAKE_CXX_COMPILER_ID MATCHES "SunPro"
- AND CMAKE_CXX_FLAGS_RELEASE MATCHES "O2"
- AND NOT CMAKE_BUILD_TYPE STREQUAL "Debug")
- # Sun Studio 12 crashes with -xO2 flag, but not with higher optimization
- # -xO3
- SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_SOURCE_DIR}/rem/rem0rec.cc
- PROPERTIES COMPILE_FLAGS -xO3)
-ENDIF()
-
-# Removing compiler optimizations for innodb/mem/* files on 64-bit Windows
-# due to 64-bit compiler error, See MySQL Bug #19424, #36366, #34297
-IF (MSVC AND CMAKE_SIZEOF_VOID_P EQUAL 8)
- SET_SOURCE_FILES_PROPERTIES(mem/mem0mem.cc mem/mem0pool.cc
- PROPERTIES COMPILE_FLAGS -Od)
-ENDIF()
-
-SET(INNOBASE_SOURCES
- api/api0api.cc
- api/api0misc.cc
- btr/btr0btr.cc
- btr/btr0cur.cc
- btr/btr0pcur.cc
- btr/btr0scrub.cc
- btr/btr0sea.cc
- btr/btr0defragment.cc
- buf/buf0buddy.cc
- buf/buf0buf.cc
- buf/buf0dblwr.cc
- buf/buf0checksum.cc
- buf/buf0dump.cc
- buf/buf0flu.cc
- buf/buf0lru.cc
- buf/buf0rea.cc
- buf/buf0mtflu.cc
- data/data0data.cc
- data/data0type.cc
- dict/dict0boot.cc
- dict/dict0crea.cc
- dict/dict0dict.cc
- dict/dict0load.cc
- dict/dict0mem.cc
- dict/dict0stats.cc
- dict/dict0stats_bg.cc
- dyn/dyn0dyn.cc
- eval/eval0eval.cc
- eval/eval0proc.cc
- fil/fil0fil.cc
- fil/fil0pagecompress.cc
- fil/fil0crypt.cc
- fsp/fsp0fsp.cc
- fut/fut0fut.cc
- fut/fut0lst.cc
- ha/ha0ha.cc
- ha/ha0storage.cc
- ha/hash0hash.cc
- fts/fts0fts.cc
- fts/fts0ast.cc
- fts/fts0blex.cc
- fts/fts0config.cc
- fts/fts0opt.cc
- fts/fts0pars.cc
- fts/fts0que.cc
- fts/fts0sql.cc
- fts/fts0tlex.cc
- handler/ha_innodb.cc
- handler/handler0alter.cc
- handler/i_s.cc
- handler/xtradb_i_s.cc
- ibuf/ibuf0ibuf.cc
- lock/lock0iter.cc
- lock/lock0lock.cc
- lock/lock0wait.cc
- log/log0log.cc
- log/log0online.cc
- log/log0recv.cc
- log/log0crypt.cc
- mach/mach0data.cc
- mem/mem0mem.cc
- mem/mem0pool.cc
- mtr/mtr0log.cc
- mtr/mtr0mtr.cc
- os/os0file.cc
- os/os0proc.cc
- os/os0sync.cc
- os/os0thread.cc
- os/os0stacktrace.cc
- page/page0cur.cc
- page/page0page.cc
- page/page0zip.cc
- pars/lexyy.cc
- pars/pars0grm.cc
- pars/pars0opt.cc
- pars/pars0pars.cc
- pars/pars0sym.cc
- que/que0que.cc
- read/read0read.cc
- rem/rem0cmp.cc
- rem/rem0rec.cc
- row/row0ext.cc
- row/row0ftsort.cc
- row/row0import.cc
- row/row0ins.cc
- row/row0merge.cc
- row/row0mysql.cc
- row/row0log.cc
- row/row0purge.cc
- row/row0row.cc
- row/row0sel.cc
- row/row0uins.cc
- row/row0umod.cc
- row/row0undo.cc
- row/row0upd.cc
- row/row0quiesce.cc
- row/row0vers.cc
- srv/srv0conc.cc
- srv/srv0mon.cc
- srv/srv0srv.cc
- srv/srv0start.cc
- sync/sync0arr.cc
- sync/sync0rw.cc
- sync/sync0sync.cc
- trx/trx0i_s.cc
- trx/trx0purge.cc
- trx/trx0rec.cc
- trx/trx0roll.cc
- trx/trx0rseg.cc
- trx/trx0sys.cc
- trx/trx0trx.cc
- trx/trx0undo.cc
- usr/usr0sess.cc
- ut/ut0bh.cc
- ut/ut0byte.cc
- ut/ut0crc32.cc
- ut/ut0dbg.cc
- ut/ut0list.cc
- ut/ut0mem.cc
- ut/ut0rbt.cc
- ut/ut0rnd.cc
- ut/ut0ut.cc
- ut/ut0vec.cc
- ut/ut0wqueue.cc
- ut/ut0timer.cc)
-
-MYSQL_ADD_PLUGIN(xtradb ${INNOBASE_SOURCES} STORAGE_ENGINE
- RECOMPILE_FOR_EMBEDDED
- LINK_LIBRARIES
- ${ZLIB_LIBRARY}
- ${CRC32_VPMSUM_LIBRARY}
- ${NUMA_LIBRARY}
- ${LINKER_SCRIPT})
-
-IF(TARGET xtradb AND NOT XTRADB_OK)
- MESSAGE(FATAL_ERROR "Percona XtraDB is not supported on this platform")
-ENDIF()
-
-ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup)
-
diff --git a/storage/xtradb/COPYING.Google b/storage/xtradb/COPYING.Google
deleted file mode 100644
index 5ade2b0e381..00000000000
--- a/storage/xtradb/COPYING.Google
+++ /dev/null
@@ -1,30 +0,0 @@
-Portions of this software contain modifications contributed by Google, Inc.
-These contributions are used with the following license:
-
-Copyright (c) 2008, Google Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following
- disclaimer in the documentation and/or other materials
- provided with the distribution.
- * Neither the name of the Google Inc. nor the names of its
- contributors may be used to endorse or promote products
- derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/storage/xtradb/COPYING.Percona b/storage/xtradb/COPYING.Percona
deleted file mode 100644
index 8c786811719..00000000000
--- a/storage/xtradb/COPYING.Percona
+++ /dev/null
@@ -1,30 +0,0 @@
-Portions of this software contain modifications contributed by Percona, Inc.
-These contributions are used with the following license:
-
-Copyright (c) 2008, 2009, Percona Inc. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
- * Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions and the following
- disclaimer in the documentation and/or other materials
- provided with the distribution.
- * Neither the name of the Percona Inc. nor the names of its
- contributors may be used to endorse or promote products
- derived from this software without specific prior written
- permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/storage/xtradb/Doxyfile b/storage/xtradb/Doxyfile
deleted file mode 100644
index 7cf5048fa52..00000000000
--- a/storage/xtradb/Doxyfile
+++ /dev/null
@@ -1,1419 +0,0 @@
-# Doxyfile 1.5.6
-
-# Usage: SVNVERSION=-r$(svnversion) doxygen
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project
-#
-# All text after a hash (#) is considered a comment and will be ignored
-# The format is:
-# TAG = value [value, ...]
-# For lists items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (" ")
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all
-# text before the first occurrence of this tag. Doxygen uses libiconv (or the
-# iconv built into libc) for the transcoding. See
-# http://www.gnu.org/software/libiconv for the list of possible encodings.
-
-DOXYFILE_ENCODING = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
-# by quotes) that should identify the project.
-
-PROJECT_NAME = "InnoDB Plugin"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number.
-# This could be handy for archiving the generated documentation or
-# if some version control system is used.
-
-PROJECT_NUMBER = 1.0$(SVNVERSION)
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
-# base path where the generated documentation will be put.
-# If a relative path is entered, it will be relative to the location
-# where doxygen was started. If left blank the current directory will be used.
-
-OUTPUT_DIRECTORY = dox
-
-# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
-# 4096 sub-directories (in 2 levels) under the output directory of each output
-# format and will distribute the generated files over these directories.
-# Enabling this option can be useful when feeding doxygen a huge amount of
-# source files, where putting all generated files in the same directory would
-# otherwise cause performance problems for the file system.
-
-CREATE_SUBDIRS = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# The default language is English, other supported languages are:
-# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
-# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek,
-# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish,
-# Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, Swedish,
-# and Ukrainian.
-
-OUTPUT_LANGUAGE = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
-# include brief member descriptions after the members that are listed in
-# the file and class documentation (similar to JavaDoc).
-# Set to NO to disable this.
-
-BRIEF_MEMBER_DESC = YES
-
-# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
-# the brief description of a member or function before the detailed description.
-# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-
-REPEAT_BRIEF = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator
-# that is used to form the text in various listings. Each string
-# in this list, if found as the leading text of the brief description, will be
-# stripped from the text and the result after processing the whole list, is
-# used as the annotated text. Otherwise, the brief description is used as-is.
-# If left blank, the following values are used ("$name" is automatically
-# replaced with the name of the entity): "The $name class" "The $name widget"
-# "The $name file" "is" "provides" "specifies" "contains"
-# "represents" "a" "an" "the"
-
-ABBREVIATE_BRIEF =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# Doxygen will generate a detailed section even if there is only a brief
-# description.
-
-ALWAYS_DETAILED_SEC = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-
-INLINE_INHERITED_MEMB = NO
-
-# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
-# path before files name in the file list and in the header files. If set
-# to NO the shortest path that makes the file name unique will be used.
-
-FULL_PATH_NAMES = YES
-
-# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
-# can be used to strip a user-defined part of the path. Stripping is
-# only done if one of the specified strings matches the left-hand part of
-# the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the
-# path to strip.
-
-STRIP_FROM_PATH =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
-# the path mentioned in the documentation of a class, which tells
-# the reader which header file to include in order to use a class.
-# If left blank only the name of the header file containing the class
-# definition is used. Otherwise one should specify the include paths that
-# are normally passed to the compiler using the -I flag.
-
-STRIP_FROM_INC_PATH =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
-# (but less readable) file names. This can be useful is your file systems
-# doesn't support long names like on DOS, Mac, or CD-ROM.
-
-SHORT_NAMES = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
-# will interpret the first line (until the first dot) of a JavaDoc-style
-# comment as the brief description. If set to NO, the JavaDoc
-# comments will behave just like regular Qt-style comments
-# (thus requiring an explicit @brief command for a brief description.)
-
-JAVADOC_AUTOBRIEF = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
-# interpret the first line (until the first dot) of a Qt-style
-# comment as the brief description. If set to NO, the comments
-# will behave just like regular Qt-style comments (thus requiring
-# an explicit \brief command for a brief description.)
-
-QT_AUTOBRIEF = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
-# treat a multi-line C++ special comment block (i.e. a block of //! or ///
-# comments) as a brief description. This used to be the default behaviour.
-# The new default is to treat a multi-line C++ comment block as a detailed
-# description. Set this tag to YES if you prefer the old behaviour instead.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the DETAILS_AT_TOP tag is set to YES then Doxygen
-# will output the detailed description near the top, like JavaDoc.
-# If set to NO, the detailed description appears after the member
-# documentation.
-
-DETAILS_AT_TOP = NO
-
-# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
-# member inherits the documentation from any documented member that it
-# re-implements.
-
-INHERIT_DOCS = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
-# a new page for each member. If set to NO, the documentation of a member will
-# be part of the file/class/namespace that contains it.
-
-SEPARATE_MEMBER_PAGES = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab.
-# Doxygen uses this value to replace tabs by spaces in code fragments.
-
-TAB_SIZE = 8
-
-# This tag can be used to specify a number of aliases that acts
-# as commands in the documentation. An alias has the form "name=value".
-# For example adding "sideeffect=\par Side Effects:\n" will allow you to
-# put the command \sideeffect (or @sideeffect) in the documentation, which
-# will result in a user-defined paragraph with heading "Side Effects:".
-# You can put \n's in the value part of an alias to insert newlines.
-
-ALIASES =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
-# sources only. Doxygen will then generate output that is more tailored for C.
-# For instance, some of the names that are used will be different. The list
-# of all members will be omitted, etc.
-
-OPTIMIZE_OUTPUT_FOR_C = YES
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
-# sources only. Doxygen will then generate output that is more tailored for
-# Java. For instance, namespaces will be presented as packages, qualified
-# scopes will look different, etc.
-
-OPTIMIZE_OUTPUT_JAVA = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources only. Doxygen will then generate output that is more tailored for
-# Fortran.
-
-OPTIMIZE_FOR_FORTRAN = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for
-# VHDL.
-
-OPTIMIZE_OUTPUT_VHDL = NO
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should
-# set this tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
-# func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-
-BUILTIN_STL_SUPPORT = NO
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-
-CPP_CLI_SUPPORT = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
-# Doxygen will parse them like normal C++ but will assume all classes use public
-# instead of private inheritance when no explicit protection keyword is present.
-
-SIP_SUPPORT = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate getter
-# and setter methods for a property. Setting this option to YES (the default)
-# will make doxygen to replace the get and set methods by a property in the
-# documentation. This will only work if the methods are indeed getting or
-# setting a simple type. If this is not the case, or you want to show the
-# methods anyway, you should set this option to NO.
-
-IDL_PROPERTY_SUPPORT = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES, then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-
-DISTRIBUTE_GROUP_DOC = NO
-
-# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
-# the same type (for instance a group of public functions) to be put as a
-# subgroup of that type (e.g. under the Public Functions section). Set it to
-# NO to prevent subgrouping. Alternatively, this can be done per class using
-# the \nosubgrouping command.
-
-SUBGROUPING = YES
-
-# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
-# is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically
-# be useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-
-TYPEDEF_HIDES_STRUCT = NO
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
-# documentation are documented, even if no documentation was available.
-# Private class members and static file members will be hidden unless
-# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
-
-EXTRACT_ALL = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
-# will be included in the documentation.
-
-EXTRACT_PRIVATE = YES
-
-# If the EXTRACT_STATIC tag is set to YES all static members of a file
-# will be included in the documentation.
-
-EXTRACT_STATIC = YES
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
-# defined locally in source files will be included in the documentation.
-# If set to NO only classes defined in header files are included.
-
-EXTRACT_LOCAL_CLASSES = YES
-
-# This flag is only useful for Objective-C code. When set to YES local
-# methods, which are defined in the implementation section but not in
-# the interface are included in the documentation.
-# If set to NO (the default) only methods in the interface are included.
-
-EXTRACT_LOCAL_METHODS = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base
-# name of the file that contains the anonymous namespace. By default
-# anonymous namespace are hidden.
-
-EXTRACT_ANON_NSPACES = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
-# undocumented members of documented classes, files or namespaces.
-# If set to NO (the default) these members will be included in the
-# various overviews, but no documentation section is generated.
-# This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_MEMBERS = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy.
-# If set to NO (the default) these classes will be included in the various
-# overviews. This option has no effect if EXTRACT_ALL is enabled.
-
-HIDE_UNDOC_CLASSES = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
-# friend (class|struct|union) declarations.
-# If set to NO (the default) these declarations will be included in the
-# documentation.
-
-HIDE_FRIEND_COMPOUNDS = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
-# documentation blocks found inside the body of a function.
-# If set to NO (the default) these blocks will be appended to the
-# function's detailed documentation block.
-
-HIDE_IN_BODY_DOCS = NO
-
-# The INTERNAL_DOCS tag determines if documentation
-# that is typed after a \internal command is included. If the tag is set
-# to NO (the default) then the documentation will be excluded.
-# Set it to YES to include the internal documentation.
-
-INTERNAL_DOCS = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
-# file names in lower-case letters. If set to YES upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-
-CASE_SENSE_NAMES = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
-# will show members with their full class and namespace scopes in the
-# documentation. If set to YES the scope will be hidden.
-
-HIDE_SCOPE_NAMES = NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
-# will put a list of the files that are included by a file in the documentation
-# of that file.
-
-SHOW_INCLUDE_FILES = YES
-
-# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
-# is inserted in the documentation for inline members.
-
-INLINE_INFO = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
-# will sort the (detailed) documentation of file and class members
-# alphabetically by member name. If set to NO the members will appear in
-# declaration order.
-
-SORT_MEMBER_DOCS = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
-# brief documentation of file, namespace and class members alphabetically
-# by member name. If set to NO (the default) the members will appear in
-# declaration order.
-
-SORT_BRIEF_DOCS = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
-# hierarchy of group names into alphabetical order. If set to NO (the default)
-# the group names will appear in their defined order.
-
-SORT_GROUP_NAMES = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
-# sorted by fully-qualified names, including namespaces. If set to
-# NO (the default), the class list will be sorted only by class name,
-# not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the
-# alphabetical list.
-
-SORT_BY_SCOPE_NAME = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or
-# disable (NO) the todo list. This list is created by putting \todo
-# commands in the documentation.
-
-GENERATE_TODOLIST = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or
-# disable (NO) the test list. This list is created by putting \test
-# commands in the documentation.
-
-GENERATE_TESTLIST = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or
-# disable (NO) the bug list. This list is created by putting \bug
-# commands in the documentation.
-
-GENERATE_BUGLIST = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
-# disable (NO) the deprecated list. This list is created by putting
-# \deprecated commands in the documentation.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional
-# documentation sections, marked by \if sectionname ... \endif.
-
-ENABLED_SECTIONS =
-
-# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
-# the initial value of a variable or define consists of for it to appear in
-# the documentation. If the initializer consists of more lines than specified
-# here it will be hidden. Use a value of 0 to hide initializers completely.
-# The appearance of the initializer of individual variables and defines in the
-# documentation can be controlled using \showinitializer or \hideinitializer
-# command in the documentation regardless of this setting.
-
-MAX_INITIALIZER_LINES = 30
-
-# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
-# at the bottom of the documentation of classes and structs. If set to YES the
-# list will mention the files that were used to generate the documentation.
-
-SHOW_USED_FILES = YES
-
-# If the sources in your project are distributed over multiple directories
-# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
-# in the documentation. The default is NO.
-
-SHOW_DIRECTORIES = NO
-
-# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
-# This will remove the Files entry from the Quick Index and from the
-# Folder Tree View (if specified). The default is YES.
-
-SHOW_FILES = YES
-
-# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
-# Namespaces page. This will remove the Namespaces entry from the Quick Index
-# and from the Folder Tree View (if specified). The default is YES.
-
-SHOW_NAMESPACES = YES
-
-# The FILE_VERSION_FILTER tag can be used to specify a program or script that
-# doxygen should invoke to get the current version for each file (typically from
-# the version control system). Doxygen will invoke the program by executing (via
-# popen()) the command <command> <input-file>, where <command> is the value of
-# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
-# provided by doxygen. Whatever the program writes to standard output
-# is used as the file version. See the manual for examples.
-
-FILE_VERSION_FILTER =
-
-#---------------------------------------------------------------------------
-# configuration options related to warning and progress messages
-#---------------------------------------------------------------------------
-
-# The QUIET tag can be used to turn on/off the messages that are generated
-# by doxygen. Possible values are YES and NO. If left blank NO is used.
-
-QUIET = YES
-
-# The WARNINGS tag can be used to turn on/off the warning messages that are
-# generated by doxygen. Possible values are YES and NO. If left blank
-# NO is used.
-
-WARNINGS = YES
-
-# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
-# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
-# automatically be disabled.
-
-WARN_IF_UNDOCUMENTED = YES
-
-# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
-# potential errors in the documentation, such as not documenting some
-# parameters in a documented function, or documenting parameters that
-# don't exist or using markup commands wrongly.
-
-WARN_IF_DOC_ERROR = YES
-
-# This WARN_NO_PARAMDOC option can be abled to get warnings for
-# functions that are documented, but have no documentation for their parameters
-# or return value. If set to NO (the default) doxygen will only warn about
-# wrong or incomplete parameter documentation, but not about the absence of
-# documentation.
-
-WARN_NO_PARAMDOC = NO
-
-# The WARN_FORMAT tag determines the format of the warning messages that
-# doxygen can produce. The string should contain the $file, $line, and $text
-# tags, which will be replaced by the file and line number from which the
-# warning originated and the warning text. Optionally the format may contain
-# $version, which will be replaced by the version of the file (if it could
-# be obtained via FILE_VERSION_FILTER)
-
-WARN_FORMAT = "$file:$line: $text"
-
-# The WARN_LOGFILE tag can be used to specify a file to which warning
-# and error messages should be written. If left blank the output is written
-# to stderr.
-
-WARN_LOGFILE =
-
-#---------------------------------------------------------------------------
-# configuration options related to the input files
-#---------------------------------------------------------------------------
-
-# The INPUT tag can be used to specify the files and/or directories that contain
-# documented source files. You may enter file names like "myfile.cpp" or
-# directories like "/usr/src/myproject". Separate the files or directories
-# with spaces.
-
-INPUT = . include/univ.i
-
-# This tag can be used to specify the character encoding of the source files
-# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
-# also the default input encoding. Doxygen uses libiconv (or the iconv built
-# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
-# the list of possible encodings.
-
-INPUT_ENCODING = UTF-8
-
-# If the value of the INPUT tag contains directories, you can use the
-# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank the following patterns are tested:
-# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
-# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
-
-FILE_PATTERNS = *.c *.ic *.h
-
-# The RECURSIVE tag can be used to turn specify whether or not subdirectories
-# should be searched for input files as well. Possible values are YES and NO.
-# If left blank NO is used.
-
-RECURSIVE = YES
-
-# The EXCLUDE tag can be used to specify files and/or directories that should
-# excluded from the INPUT source files. This way you can easily exclude a
-# subdirectory from a directory tree whose root is specified with the INPUT tag.
-
-EXCLUDE =
-
-# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
-# directories that are symbolic links (a Unix filesystem feature) are excluded
-# from the input.
-
-EXCLUDE_SYMLINKS = NO
-
-# If the value of the INPUT tag contains directories, you can use the
-# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
-# certain files from those directories. Note that the wildcards are matched
-# against the file with absolute path, so to exclude all test directories
-# for example use the pattern */test/*
-
-EXCLUDE_PATTERNS =
-
-# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
-# (namespaces, classes, functions, etc.) that should be excluded from the
-# output. The symbol name can be a fully qualified name, a word, or if the
-# wildcard * is used, a substring. Examples: ANamespace, AClass,
-# AClass::ANamespace, ANamespace::*Test
-
-EXCLUDE_SYMBOLS =
-
-# The EXAMPLE_PATH tag can be used to specify one or more files or
-# directories that contain example code fragments that are included (see
-# the \include command).
-
-EXAMPLE_PATH =
-
-# If the value of the EXAMPLE_PATH tag contains directories, you can use the
-# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
-# and *.h) to filter out the source-files in the directories. If left
-# blank all files are included.
-
-EXAMPLE_PATTERNS =
-
-# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
-# searched for input files to be used with the \include or \dontinclude
-# commands irrespective of the value of the RECURSIVE tag.
-# Possible values are YES and NO. If left blank NO is used.
-
-EXAMPLE_RECURSIVE = NO
-
-# The IMAGE_PATH tag can be used to specify one or more files or
-# directories that contain image that are included in the documentation (see
-# the \image command).
-
-IMAGE_PATH =
-
-# The INPUT_FILTER tag can be used to specify a program that doxygen should
-# invoke to filter for each input file. Doxygen will invoke the filter program
-# by executing (via popen()) the command <filter> <input-file>, where <filter>
-# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
-# input file. Doxygen will then use the output that the filter program writes
-# to standard output. If FILTER_PATTERNS is specified, this tag will be
-# ignored.
-
-INPUT_FILTER =
-
-# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
-# basis. Doxygen will compare the file name with each pattern and apply the
-# filter if there is a match. The filters are a list of the form:
-# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
-# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
-# is applied to all files.
-
-FILTER_PATTERNS =
-
-# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
-# INPUT_FILTER) will be used to filter the input files when producing source
-# files to browse (i.e. when SOURCE_BROWSER is set to YES).
-
-FILTER_SOURCE_FILES = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to source browsing
-#---------------------------------------------------------------------------
-
-# If the SOURCE_BROWSER tag is set to YES then a list of source files will
-# be generated. Documented entities will be cross-referenced with these sources.
-# Note: To get rid of all source code in the generated output, make sure also
-# VERBATIM_HEADERS is set to NO.
-
-SOURCE_BROWSER = NO
-
-# Setting the INLINE_SOURCES tag to YES will include the body
-# of functions and classes directly in the documentation.
-
-INLINE_SOURCES = NO
-
-# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
-# doxygen to hide any special comment blocks from generated source code
-# fragments. Normal C and C++ comments will always remain visible.
-
-STRIP_CODE_COMMENTS = YES
-
-# If the REFERENCED_BY_RELATION tag is set to YES
-# then for each documented function all documented
-# functions referencing it will be listed.
-
-REFERENCED_BY_RELATION = NO
-
-# If the REFERENCES_RELATION tag is set to YES
-# then for each documented function all documented entities
-# called/used by that function will be listed.
-
-REFERENCES_RELATION = NO
-
-# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
-# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
-# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
-# link to the source code. Otherwise they will link to the documentstion.
-
-REFERENCES_LINK_SOURCE = YES
-
-# If the USE_HTAGS tag is set to YES then the references to source code
-# will point to the HTML generated by the htags(1) tool instead of doxygen
-# built-in source browser. The htags tool is part of GNU's global source
-# tagging system (see http://www.gnu.org/software/global/global.html). You
-# will need version 4.8.6 or higher.
-
-USE_HTAGS = NO
-
-# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
-# will generate a verbatim copy of the header file for each class for
-# which an include is specified. Set to NO to disable this.
-
-VERBATIM_HEADERS = YES
-
-#---------------------------------------------------------------------------
-# configuration options related to the alphabetical class index
-#---------------------------------------------------------------------------
-
-# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
-# of all compounds will be generated. Enable this if the project
-# contains a lot of classes, structs, unions or interfaces.
-
-ALPHABETICAL_INDEX = NO
-
-# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
-# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
-# in which this list will be split (can be a number in the range [1..20])
-
-COLS_IN_ALPHA_INDEX = 5
-
-# In case all classes in a project start with a common prefix, all
-# classes will be put under the same header in the alphabetical index.
-# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
-# should be ignored while generating the index headers.
-
-IGNORE_PREFIX =
-
-#---------------------------------------------------------------------------
-# configuration options related to the HTML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
-# generate HTML output.
-
-GENERATE_HTML = YES
-
-# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `html' will be used as the default path.
-
-HTML_OUTPUT = html
-
-# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
-# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
-# doxygen will generate files with .html extension.
-
-HTML_FILE_EXTENSION = .html
-
-# The HTML_HEADER tag can be used to specify a personal HTML header for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard header.
-
-HTML_HEADER =
-
-# The HTML_FOOTER tag can be used to specify a personal HTML footer for
-# each generated HTML page. If it is left blank doxygen will generate a
-# standard footer.
-
-HTML_FOOTER =
-
-# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
-# style sheet that is used by each HTML page. It can be used to
-# fine-tune the look of the HTML output. If the tag is left blank doxygen
-# will generate a default style sheet. Note that doxygen will try to copy
-# the style sheet file to the HTML output directory, so don't put your own
-# stylesheet in the HTML output directory as well, or it will be erased!
-
-HTML_STYLESHEET =
-
-# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
-# files or namespaces will be aligned in HTML using tables. If set to
-# NO a bullet list will be used.
-
-HTML_ALIGN_MEMBERS = YES
-
-# If the GENERATE_HTMLHELP tag is set to YES, additional index files
-# will be generated that can be used as input for tools like the
-# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
-# of the generated HTML documentation.
-
-GENERATE_HTMLHELP = NO
-
-# If the GENERATE_DOCSET tag is set to YES, additional index files
-# will be generated that can be used as input for Apple's Xcode 3
-# integrated development environment, introduced with OSX 10.5 (Leopard).
-# To create a documentation set, doxygen will generate a Makefile in the
-# HTML output directory. Running make will produce the docset in that
-# directory and running "make install" will install the docset in
-# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
-# it at startup.
-
-GENERATE_DOCSET = NO
-
-# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
-# feed. A documentation feed provides an umbrella under which multiple
-# documentation sets from a single provider (such as a company or product suite)
-# can be grouped.
-
-DOCSET_FEEDNAME = "Doxygen generated docs"
-
-# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
-# should uniquely identify the documentation set bundle. This should be a
-# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
-# will append .docset to the name.
-
-DOCSET_BUNDLE_ID = org.doxygen.Project
-
-# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
-# documentation will contain sections that can be hidden and shown after the
-# page has loaded. For this to work a browser that supports
-# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
-# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
-
-HTML_DYNAMIC_SECTIONS = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
-# be used to specify the file name of the resulting .chm file. You
-# can add a path in front of the file if the result should not be
-# written to the html output directory.
-
-CHM_FILE =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
-# be used to specify the location (absolute path including file name) of
-# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
-# the HTML help compiler on the generated index.hhp.
-
-HHC_LOCATION =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
-# controls if a separate .chi index file is generated (YES) or that
-# it should be included in the master .chm file (NO).
-
-GENERATE_CHI = NO
-
-# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
-# is used to encode HtmlHelp index (hhk), content (hhc) and project file
-# content.
-
-CHM_INDEX_ENCODING =
-
-# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
-# controls whether a binary table of contents is generated (YES) or a
-# normal table of contents (NO) in the .chm file.
-
-BINARY_TOC = NO
-
-# The TOC_EXPAND flag can be set to YES to add extra items for group members
-# to the contents of the HTML help documentation and to the tree view.
-
-TOC_EXPAND = NO
-
-# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
-# top of each HTML page. The value NO (the default) enables the index and
-# the value YES disables it.
-
-DISABLE_INDEX = NO
-
-# This tag can be used to set the number of enum values (range [1..20])
-# that doxygen will group on one line in the generated HTML documentation.
-
-ENUM_VALUES_PER_LINE = 4
-
-# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
-# structure should be generated to display hierarchical information.
-# If the tag value is set to FRAME, a side panel will be generated
-# containing a tree-like index structure (just like the one that
-# is generated for HTML Help). For this to work a browser that supports
-# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+,
-# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are
-# probably better off using the HTML help feature. Other possible values
-# for this tag are: HIERARCHIES, which will generate the Groups, Directories,
-# and Class Hiererachy pages using a tree view instead of an ordered list;
-# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which
-# disables this behavior completely. For backwards compatibility with previous
-# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE
-# respectively.
-
-GENERATE_TREEVIEW = NONE
-
-# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
-# used to set the initial width (in pixels) of the frame in which the tree
-# is shown.
-
-TREEVIEW_WIDTH = 250
-
-# Use this tag to change the font size of Latex formulas included
-# as images in the HTML documentation. The default is 10. Note that
-# when you change the font size after a successful doxygen run you need
-# to manually remove any form_*.png images from the HTML output directory
-# to force them to be regenerated.
-
-FORMULA_FONTSIZE = 10
-
-#---------------------------------------------------------------------------
-# configuration options related to the LaTeX output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
-# generate Latex output.
-
-GENERATE_LATEX = NO
-
-# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `latex' will be used as the default path.
-
-LATEX_OUTPUT = latex
-
-# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
-# invoked. If left blank `latex' will be used as the default command name.
-
-LATEX_CMD_NAME = latex
-
-# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
-# generate index for LaTeX. If left blank `makeindex' will be used as the
-# default command name.
-
-MAKEINDEX_CMD_NAME = makeindex
-
-# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
-# LaTeX documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_LATEX = NO
-
-# The PAPER_TYPE tag can be used to set the paper type that is used
-# by the printer. Possible values are: a4, a4wide, letter, legal and
-# executive. If left blank a4wide will be used.
-
-PAPER_TYPE = a4wide
-
-# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
-# packages that should be included in the LaTeX output.
-
-EXTRA_PACKAGES =
-
-# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
-# the generated latex document. The header should contain everything until
-# the first chapter. If it is left blank doxygen will generate a
-# standard header. Notice: only use this tag if you know what you are doing!
-
-LATEX_HEADER =
-
-# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
-# is prepared for conversion to pdf (using ps2pdf). The pdf file will
-# contain links (just like the HTML output) instead of page references
-# This makes the output suitable for online browsing using a pdf viewer.
-
-PDF_HYPERLINKS = YES
-
-# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
-# plain latex in the generated Makefile. Set this option to YES to get a
-# higher quality PDF documentation.
-
-USE_PDFLATEX = YES
-
-# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
-# command to the generated LaTeX files. This will instruct LaTeX to keep
-# running if errors occur, instead of asking the user for help.
-# This option is also used when generating formulas in HTML.
-
-LATEX_BATCHMODE = NO
-
-# If LATEX_HIDE_INDICES is set to YES then doxygen will not
-# include the index chapters (such as File Index, Compound Index, etc.)
-# in the output.
-
-LATEX_HIDE_INDICES = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the RTF output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
-# The RTF output is optimized for Word 97 and may not look very pretty with
-# other RTF readers or editors.
-
-GENERATE_RTF = NO
-
-# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `rtf' will be used as the default path.
-
-RTF_OUTPUT = rtf
-
-# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
-# RTF documents. This may be useful for small projects and may help to
-# save some trees in general.
-
-COMPACT_RTF = NO
-
-# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
-# will contain hyperlink fields. The RTF file will
-# contain links (just like the HTML output) instead of page references.
-# This makes the output suitable for online browsing using WORD or other
-# programs which support those fields.
-# Note: wordpad (write) and others do not support links.
-
-RTF_HYPERLINKS = NO
-
-# Load stylesheet definitions from file. Syntax is similar to doxygen's
-# config file, i.e. a series of assignments. You only have to provide
-# replacements, missing definitions are set to their default value.
-
-RTF_STYLESHEET_FILE =
-
-# Set optional variables used in the generation of an rtf document.
-# Syntax is similar to doxygen's config file.
-
-RTF_EXTENSIONS_FILE =
-
-#---------------------------------------------------------------------------
-# configuration options related to the man page output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
-# generate man pages
-
-GENERATE_MAN = NO
-
-# The MAN_OUTPUT tag is used to specify where the man pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `man' will be used as the default path.
-
-MAN_OUTPUT = man
-
-# The MAN_EXTENSION tag determines the extension that is added to
-# the generated man pages (default is the subroutine's section .3)
-
-MAN_EXTENSION = .3
-
-# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
-# then it will generate one additional man file for each entity
-# documented in the real man page(s). These additional files
-# only source the real man page, but without them the man command
-# would be unable to find the correct page. The default is NO.
-
-MAN_LINKS = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the XML output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_XML tag is set to YES Doxygen will
-# generate an XML file that captures the structure of
-# the code including all documentation.
-
-GENERATE_XML = NO
-
-# The XML_OUTPUT tag is used to specify where the XML pages will be put.
-# If a relative path is entered the value of OUTPUT_DIRECTORY will be
-# put in front of it. If left blank `xml' will be used as the default path.
-
-XML_OUTPUT = xml
-
-# The XML_SCHEMA tag can be used to specify an XML schema,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_SCHEMA =
-
-# The XML_DTD tag can be used to specify an XML DTD,
-# which can be used by a validating XML parser to check the
-# syntax of the XML files.
-
-XML_DTD =
-
-# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
-# dump the program listings (including syntax highlighting
-# and cross-referencing information) to the XML output. Note that
-# enabling this will significantly increase the size of the XML output.
-
-XML_PROGRAMLISTING = YES
-
-#---------------------------------------------------------------------------
-# configuration options for the AutoGen Definitions output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
-# generate an AutoGen Definitions (see autogen.sf.net) file
-# that captures the structure of the code including all
-# documentation. Note that this feature is still experimental
-# and incomplete at the moment.
-
-GENERATE_AUTOGEN_DEF = NO
-
-#---------------------------------------------------------------------------
-# configuration options related to the Perl module output
-#---------------------------------------------------------------------------
-
-# If the GENERATE_PERLMOD tag is set to YES Doxygen will
-# generate a Perl module file that captures the structure of
-# the code including all documentation. Note that this
-# feature is still experimental and incomplete at the
-# moment.
-
-GENERATE_PERLMOD = NO
-
-# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
-# the necessary Makefile rules, Perl scripts and LaTeX code to be able
-# to generate PDF and DVI output from the Perl module output.
-
-PERLMOD_LATEX = NO
-
-# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
-# nicely formatted so it can be parsed by a human reader. This is useful
-# if you want to understand what is going on. On the other hand, if this
-# tag is set to NO the size of the Perl module output will be much smaller
-# and Perl will parse it just the same.
-
-PERLMOD_PRETTY = YES
-
-# The names of the make variables in the generated doxyrules.make file
-# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
-# This is useful so different doxyrules.make files included by the same
-# Makefile don't overwrite each other's variables.
-
-PERLMOD_MAKEVAR_PREFIX =
-
-#---------------------------------------------------------------------------
-# Configuration options related to the preprocessor
-#---------------------------------------------------------------------------
-
-# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
-# evaluate all C-preprocessor directives found in the sources and include
-# files.
-
-ENABLE_PREPROCESSING = YES
-
-# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
-# names in the source code. If set to NO (the default) only conditional
-# compilation will be performed. Macro expansion can be done in a controlled
-# way by setting EXPAND_ONLY_PREDEF to YES.
-
-MACRO_EXPANSION = YES
-
-# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
-# then the macro expansion is limited to the macros specified with the
-# PREDEFINED and EXPAND_AS_DEFINED tags.
-
-EXPAND_ONLY_PREDEF = YES
-
-# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
-# in the INCLUDE_PATH (see below) will be search if a #include is found.
-
-SEARCH_INCLUDES = YES
-
-# The INCLUDE_PATH tag can be used to specify one or more directories that
-# contain include files that are not input files but should be processed by
-# the preprocessor.
-
-INCLUDE_PATH =
-
-# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
-# patterns (like *.h and *.hpp) to filter out the header-files in the
-# directories. If left blank, the patterns specified with FILE_PATTERNS will
-# be used.
-
-INCLUDE_FILE_PATTERNS =
-
-# The PREDEFINED tag can be used to specify one or more macro names that
-# are defined before the preprocessor is started (similar to the -D option of
-# gcc). The argument of the tag is a list of macros of the form: name
-# or name=definition (no spaces). If the definition and the = are
-# omitted =1 is assumed. To prevent a macro definition from being
-# undefined via #undef or recursively expanded use the := operator
-# instead of the = operator.
-
-PREDEFINED = DOXYGEN UNIV_DEBUG UNIV_SYNC_DEBUG __attribute__()=
-
-# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
-# this tag can be used to specify a list of macro names that should be expanded.
-# The macro definition that is found in the sources will be used.
-# Use the PREDEFINED tag if you want to use a different macro definition.
-
-EXPAND_AS_DEFINED = UT_LIST_BASE_NODE_T UT_LIST_NODE_T
-
-# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
-# doxygen's preprocessor will remove all function-like macros that are alone
-# on a line, have an all uppercase name, and do not end with a semicolon. Such
-# function macros are typically used for boiler-plate code, and will confuse
-# the parser if not removed.
-
-SKIP_FUNCTION_MACROS = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to external references
-#---------------------------------------------------------------------------
-
-# The TAGFILES option can be used to specify one or more tagfiles.
-# Optionally an initial location of the external documentation
-# can be added for each tagfile. The format of a tag file without
-# this location is as follows:
-# TAGFILES = file1 file2 ...
-# Adding location for the tag files is done as follows:
-# TAGFILES = file1=loc1 "file2 = loc2" ...
-# where "loc1" and "loc2" can be relative or absolute paths or
-# URLs. If a location is present for each tag, the installdox tool
-# does not have to be run to correct the links.
-# Note that each tag file must have a unique name
-# (where the name does NOT include the path)
-# If a tag file is not located in the directory in which doxygen
-# is run, you must also specify the path to the tagfile here.
-
-TAGFILES =
-
-# When a file name is specified after GENERATE_TAGFILE, doxygen will create
-# a tag file that is based on the input files it reads.
-
-GENERATE_TAGFILE =
-
-# If the ALLEXTERNALS tag is set to YES all external classes will be listed
-# in the class index. If set to NO only the inherited external classes
-# will be listed.
-
-ALLEXTERNALS = NO
-
-# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
-# in the modules index. If set to NO, only the current project's groups will
-# be listed.
-
-EXTERNAL_GROUPS = NO
-
-# The PERL_PATH should be the absolute path and name of the perl script
-# interpreter (i.e. the result of `which perl').
-
-PERL_PATH = /usr/bin/perl
-
-#---------------------------------------------------------------------------
-# Configuration options related to the dot tool
-#---------------------------------------------------------------------------
-
-# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
-# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
-# or super classes. Setting the tag to NO turns the diagrams off. Note that
-# this option is superseded by the HAVE_DOT option below. This is only a
-# fallback. It is recommended to install and use dot, since it yields more
-# powerful graphs.
-
-CLASS_DIAGRAMS = YES
-
-# You can define message sequence charts within doxygen comments using the \msc
-# command. Doxygen will then run the mscgen tool (see
-# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
-# documentation. The MSCGEN_PATH tag allows you to specify the directory where
-# the mscgen tool resides. If left empty the tool is assumed to be found in the
-# default search path.
-
-MSCGEN_PATH =
-
-# If set to YES, the inheritance and collaboration graphs will hide
-# inheritance and usage relations if the target is undocumented
-# or is not a class.
-
-HIDE_UNDOC_RELATIONS = YES
-
-# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
-# available from the path. This tool is part of Graphviz, a graph visualization
-# toolkit from AT&T and Lucent Bell Labs. The other options in this section
-# have no effect if this option is set to NO (the default)
-
-HAVE_DOT = YES
-
-# By default doxygen will write a font called FreeSans.ttf to the output
-# directory and reference it in all dot files that doxygen generates. This
-# font does not include all possible unicode characters however, so when you need
-# these (or just want a differently looking font) you can specify the font name
-# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
-# which can be done by putting it in a standard location or by setting the
-# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
-# containing the font.
-
-DOT_FONTNAME = FreeSans
-
-# By default doxygen will tell dot to use the output directory to look for the
-# FreeSans.ttf font (which doxygen will put there itself). If you specify a
-# different font using DOT_FONTNAME you can set the path where dot
-# can find it using this tag.
-
-DOT_FONTPATH =
-
-# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect inheritance relations. Setting this tag to YES will force the
-# the CLASS_DIAGRAMS tag to NO.
-
-CLASS_GRAPH = YES
-
-# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for each documented class showing the direct and
-# indirect implementation dependencies (inheritance, containment, and
-# class references variables) of the class with other documented classes.
-
-COLLABORATION_GRAPH = YES
-
-# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
-# will generate a graph for groups, showing the direct groups dependencies
-
-GROUP_GRAPHS = NO
-
-# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
-# collaboration diagrams in a style similar to the OMG's Unified Modeling
-# Language.
-
-UML_LOOK = NO
-
-# If set to YES, the inheritance and collaboration graphs will show the
-# relations between templates and their instances.
-
-TEMPLATE_RELATIONS = NO
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
-# tags are set to YES then doxygen will generate a graph for each documented
-# file showing the direct and indirect include dependencies of the file with
-# other documented files.
-
-INCLUDE_GRAPH = YES
-
-# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
-# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
-# documented header file showing the documented files that directly or
-# indirectly include this file.
-
-INCLUDED_BY_GRAPH = YES
-
-# If the CALL_GRAPH and HAVE_DOT options are set to YES then
-# doxygen will generate a call dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable call graphs
-# for selected functions only using the \callgraph command.
-
-CALL_GRAPH = NO
-
-# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
-# doxygen will generate a caller dependency graph for every global function
-# or class method. Note that enabling this option will significantly increase
-# the time of a run. So in most cases it will be better to enable caller
-# graphs for selected functions only using the \callergraph command.
-
-CALLER_GRAPH = NO
-
-# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
-# will graphical hierarchy of all classes instead of a textual one.
-
-GRAPHICAL_HIERARCHY = YES
-
-# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
-# then doxygen will show the dependencies a directory has on other directories
-# in a graphical way. The dependency relations are determined by the #include
-# relations between the files in the directories.
-
-DIRECTORY_GRAPH = YES
-
-# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
-# generated by dot. Possible values are png, jpg, or gif
-# If left blank png will be used.
-
-DOT_IMAGE_FORMAT = png
-
-# The tag DOT_PATH can be used to specify the path where the dot tool can be
-# found. If left blank, it is assumed the dot tool can be found in the path.
-
-DOT_PATH =
-
-# The DOTFILE_DIRS tag can be used to specify one or more directories that
-# contain dot files that are included in the documentation (see the
-# \dotfile command).
-
-DOTFILE_DIRS =
-
-# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
-# nodes that will be shown in the graph. If the number of nodes in a graph
-# becomes larger than this value, doxygen will truncate the graph, which is
-# visualized by representing a node as a red box. Note that doxygen if the
-# number of direct children of the root node in a graph is already larger than
-# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
-# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
-
-DOT_GRAPH_MAX_NODES = 50
-
-# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
-# graphs generated by dot. A depth value of 3 means that only nodes reachable
-# from the root by following a path via at most 3 edges will be shown. Nodes
-# that lay further from the root node will be omitted. Note that setting this
-# option to 1 or 2 may greatly reduce the computation time needed for large
-# code bases. Also note that the size of a graph can be further restricted by
-# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
-
-MAX_DOT_GRAPH_DEPTH = 3
-
-# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
-# background. This is enabled by default, which results in a transparent
-# background. Warning: Depending on the platform used, enabling this option
-# may lead to badly anti-aliased labels on the edges of a graph (i.e. they
-# become hard to read).
-
-DOT_TRANSPARENT = YES
-
-# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
-# files in one run (i.e. multiple -o and -T options on the command line). This
-# makes dot run faster, but since only newer versions of dot (>1.8.10)
-# support this, this feature is disabled by default.
-
-DOT_MULTI_TARGETS = NO
-
-# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
-# generate a legend page explaining the meaning of the various boxes and
-# arrows in the dot generated graphs.
-
-GENERATE_LEGEND = YES
-
-# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
-# remove the intermediate dot files that are used to generate
-# the various graphs.
-
-DOT_CLEANUP = YES
-
-#---------------------------------------------------------------------------
-# Configuration::additions related to the search engine
-#---------------------------------------------------------------------------
-
-# The SEARCHENGINE tag specifies whether or not a search engine should be
-# used. If set to NO the values of all tags below this one will be ignored.
-
-SEARCHENGINE = NO
diff --git a/storage/xtradb/api/api0api.cc b/storage/xtradb/api/api0api.cc
deleted file mode 100644
index 2a46dd4b4c1..00000000000
--- a/storage/xtradb/api/api0api.cc
+++ /dev/null
@@ -1,3886 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2008, 2015, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file api/api0api.cc
-InnoDB Native API
-
-2008-08-01 Created Sunny Bains
-3/20/2011 Jimmy Yang extracted from Embedded InnoDB
-*******************************************************/
-
-#include "univ.i"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#include "api0api.h"
-#include "api0misc.h"
-#include "srv0start.h"
-#include "dict0dict.h"
-#include "btr0pcur.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "row0vers.h"
-#include "trx0roll.h"
-#include "dict0crea.h"
-#include "row0merge.h"
-#include "pars0pars.h"
-#include "lock0types.h"
-#include "row0sel.h"
-#include "lock0lock.h"
-#include "rem0cmp.h"
-#include "ut0dbg.h"
-#include "dict0priv.h"
-#include "ut0ut.h"
-#include "ha_prototypes.h"
-#include "trx0roll.h"
-
-/** configure variable for binlog option with InnoDB APIs */
-my_bool ib_binlog_enabled = FALSE;
-
-/** configure variable for MDL option with InnoDB APIs */
-my_bool ib_mdl_enabled = FALSE;
-
-/** configure variable for disable rowlock with InnoDB APIs */
-my_bool ib_disable_row_lock = FALSE;
-
-/** configure variable for Transaction isolation levels */
-ulong ib_trx_level_setting = IB_TRX_READ_UNCOMMITTED;
-
-/** configure variable for background commit interval in seconds */
-ulong ib_bk_commit_interval = 0;
-
-/** InnoDB tuple types. */
-enum ib_tuple_type_t{
- TPL_TYPE_ROW, /*!< Data row tuple */
- TPL_TYPE_KEY /*!< Index key tuple */
-};
-
-/** Query types supported. */
-enum ib_qry_type_t{
- QRY_NON, /*!< None/Sentinel */
- QRY_INS, /*!< Insert operation */
- QRY_UPD, /*!< Update operation */
- QRY_SEL /*!< Select operation */
-};
-
-/** Query graph types. */
-struct ib_qry_grph_t {
- que_fork_t* ins; /*!< Innobase SQL query graph used
- in inserts */
- que_fork_t* upd; /*!< Innobase SQL query graph used
- in updates or deletes */
- que_fork_t* sel; /*!< dummy query graph used in
- selects */
-};
-
-/** Query node types. */
-struct ib_qry_node_t {
- ins_node_t* ins; /*!< Innobase SQL insert node
- used to perform inserts to the table */
- upd_node_t* upd; /*!< Innobase SQL update node
- used to perform updates and deletes */
- sel_node_t* sel; /*!< Innobase SQL select node
- used to perform selects on the table */
-};
-
-/** Query processing fields. */
-struct ib_qry_proc_t {
-
- ib_qry_node_t node; /*!< Query node*/
-
- ib_qry_grph_t grph; /*!< Query graph */
-};
-
-/** Cursor instance for traversing tables/indexes. This will eventually
-become row_prebuilt_t. */
-struct ib_cursor_t {
- mem_heap_t* heap; /*!< Instance heap */
-
- mem_heap_t* query_heap; /*!< Heap to use for query graphs */
-
- ib_qry_proc_t q_proc; /*!< Query processing info */
-
- ib_match_mode_t match_mode; /*!< ib_cursor_moveto match mode */
-
- row_prebuilt_t* prebuilt; /*!< For reading rows */
-
- bool valid_trx; /*!< Valid transaction attached */
-};
-
-/** InnoDB table columns used during table and index schema creation. */
-struct ib_col_t {
- const char* name; /*!< Name of column */
-
- ib_col_type_t ib_col_type; /*!< Main type of the column */
-
- ulint len; /*!< Length of the column */
-
- ib_col_attr_t ib_col_attr; /*!< Column attributes */
-
-};
-
-/** InnoDB index columns used during index and index schema creation. */
-struct ib_key_col_t {
- const char* name; /*!< Name of column */
-
- ulint prefix_len; /*!< Column index prefix len or 0 */
-};
-
-struct ib_table_def_t;
-
-/** InnoDB index schema used during index creation */
-struct ib_index_def_t {
- mem_heap_t* heap; /*!< Heap used to build this and all
- its columns in the list */
-
- const char* name; /*!< Index name */
-
- dict_table_t* table; /*!< Parent InnoDB table */
-
- ib_table_def_t* schema; /*!< Parent table schema that owns
- this instance */
-
- ibool clustered; /*!< True if clustered index */
-
- ibool unique; /*!< True if unique index */
-
- ib_vector_t* cols; /*!< Vector of columns */
-
- trx_t* usr_trx; /*!< User transacton covering the
- DDL operations */
-};
-
-/** InnoDB table schema used during table creation */
-struct ib_table_def_t {
- mem_heap_t* heap; /*!< Heap used to build this and all
- its columns in the list */
- const char* name; /*!< Table name */
-
- ib_tbl_fmt_t ib_tbl_fmt; /*!< Row format */
-
- ulint page_size; /*!< Page size */
-
- ib_vector_t* cols; /*!< Vector of columns */
-
- ib_vector_t* indexes; /*!< Vector of indexes */
-
- dict_table_t* table; /* Table read from or NULL */
-};
-
-/** InnoDB tuple used for key operations. */
-struct ib_tuple_t {
- mem_heap_t* heap; /*!< Heap used to build
- this and for copying
- the column values. */
-
- ib_tuple_type_t type; /*!< Tuple discriminitor. */
-
- const dict_index_t* index; /*!< Index for tuple can be either
- secondary or cluster index. */
-
- dtuple_t* ptr; /*!< The internal tuple
- instance */
-};
-
-/** The following counter is used to convey information to InnoDB
-about server activity: in case of normal DML ops it is not
-sensible to call srv_active_wake_master_thread after each
-operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
-
-#define INNOBASE_WAKE_INTERVAL 32
-
-/*****************************************************************//**
-Check whether the Innodb persistent cursor is positioned.
-@return IB_TRUE if positioned */
-UNIV_INLINE
-ib_bool_t
-ib_btr_cursor_is_positioned(
-/*========================*/
- btr_pcur_t* pcur) /*!< in: InnoDB persistent cursor */
-{
- return(pcur->old_stored == BTR_PCUR_OLD_STORED
- && (pcur->pos_state == BTR_PCUR_IS_POSITIONED
- || pcur->pos_state == BTR_PCUR_WAS_POSITIONED));
-}
-
-
-/********************************************************************//**
-Open a table using the table id, if found then increment table ref count.
-@return table instance if found */
-static
-dict_table_t*
-ib_open_table_by_id(
-/*================*/
- ib_id_u64_t tid, /*!< in: table id to lookup */
- ib_bool_t locked) /*!< in: TRUE if own dict mutex */
-{
- dict_table_t* table;
- table_id_t table_id;
-
- table_id = tid;
-
- if (!locked) {
- dict_mutex_enter_for_mysql();
- }
-
- table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL);
-
- if (table != NULL && table->file_unreadable) {
- table = NULL;
- }
-
- if (!locked) {
- dict_mutex_exit_for_mysql();
- }
-
- return(table);
-}
-
-/********************************************************************//**
-Open a table using the table name, if found then increment table ref count.
-@return table instance if found */
-UNIV_INTERN
-void*
-ib_open_table_by_name(
-/*==================*/
- const char* name) /*!< in: table name to lookup */
-{
- dict_table_t* table;
-
- table = dict_table_open_on_name(name, FALSE, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- if (table != NULL && table->file_unreadable) {
- table = NULL;
- }
-
- return(table);
-}
-
-/********************************************************************//**
-Find table using table name.
-@return table instance if found */
-static
-dict_table_t*
-ib_lookup_table_by_name(
-/*====================*/
- const char* name) /*!< in: table name to lookup */
-{
- dict_table_t* table;
-
- table = dict_table_get_low(name);
-
- if (table != NULL && table->file_unreadable) {
- table = NULL;
- }
-
- return(table);
-}
-
-/********************************************************************//**
-Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
-time calls srv_active_wake_master_thread. This function should be used
-when a single database operation may introduce a small need for
-server utility activity, like checkpointing. */
-UNIV_INLINE
-void
-ib_wake_master_thread(void)
-/*=======================*/
-{
- static ulint ib_signal_counter = 0;
-
- ++ib_signal_counter;
-
- if ((ib_signal_counter % INNOBASE_WAKE_INTERVAL) == 0) {
- srv_active_wake_master_thread();
- }
-}
-
-/*****************************************************************//**
-Read the columns from a rec into a tuple. */
-static
-void
-ib_read_tuple(
-/*==========*/
- const rec_t* rec, /*!< in: Record to read */
- ib_bool_t page_format, /*!< in: IB_TRUE if compressed format */
- ib_tuple_t* tuple, /*!< in: tuple to read into */
- void** rec_buf, /*!< in/out: row buffer */
- ulint* len) /*!< in/out: buffer len */
-{
- ulint i;
- void* ptr;
- rec_t* copy;
- ulint rec_meta_data;
- ulint n_index_fields;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- dtuple_t* dtuple = tuple->ptr;
- const dict_index_t* index = tuple->index;
- ulint offset_size;
-
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED, &tuple->heap);
-
- rec_meta_data = rec_get_info_bits(rec, page_format);
- dtuple_set_info_bits(dtuple, rec_meta_data);
-
- offset_size = rec_offs_size(offsets);
-
- if (rec_buf && *rec_buf) {
- if (*len < offset_size) {
- free(*rec_buf);
- *rec_buf = malloc(offset_size);
- *len = offset_size;
- }
- ptr = *rec_buf;
- } else {
- /* Make a copy of the rec. */
- ptr = mem_heap_alloc(tuple->heap, offset_size);
- }
-
- copy = rec_copy(ptr, rec, offsets);
-
- n_index_fields = ut_min(
- rec_offs_n_fields(offsets), dtuple_get_n_fields(dtuple));
-
- for (i = 0; i < n_index_fields; ++i) {
- ulint len;
- const byte* data;
- dfield_t* dfield;
-
- if (tuple->type == TPL_TYPE_ROW) {
- const dict_col_t* col;
- ulint col_no;
- const dict_field_t* index_field;
-
- index_field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(index_field);
- col_no = dict_col_get_no(col);
-
- dfield = dtuple_get_nth_field(dtuple, col_no);
- } else {
- dfield = dtuple_get_nth_field(dtuple, i);
- }
-
- data = rec_get_nth_field(copy, offsets, i, &len);
-
- /* Fetch and copy any externally stored column. */
- if (rec_offs_nth_extern(offsets, i)) {
-
- ulint zip_size;
-
- zip_size = dict_table_zip_size(index->table);
-
- data = btr_rec_copy_externally_stored_field(
- copy, offsets, zip_size, i, &len,
- tuple->heap, NULL);
-
- ut_a(len != UNIV_SQL_NULL);
- }
-
- dfield_set_data(dfield, data, len);
- }
-}
-
-/*****************************************************************//**
-Create an InnoDB key tuple.
-@return tuple instance created, or NULL */
-static
-ib_tpl_t
-ib_key_tuple_new_low(
-/*=================*/
- const dict_index_t* index, /*!< in: index for which tuple
- required */
- ulint n_cols, /*!< in: no. of user defined cols */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ib_tuple_t* tuple;
- ulint i;
- ulint n_cmp_cols;
-
- tuple = static_cast<ib_tuple_t*>(
- mem_heap_alloc(heap, sizeof(*tuple)));
-
- if (tuple == NULL) {
- mem_heap_free(heap);
- return(NULL);
- }
-
- tuple->heap = heap;
- tuple->index = index;
- tuple->type = TPL_TYPE_KEY;
-
- /* Is it a generated clustered index ? */
- if (n_cols == 0) {
- ++n_cols;
- }
-
- tuple->ptr = dtuple_create(heap, n_cols);
-
- /* Copy types and set to SQL_NULL. */
- dict_index_copy_types(tuple->ptr, index, n_cols);
-
- for (i = 0; i < n_cols; i++) {
-
- dfield_t* dfield;
-
- dfield = dtuple_get_nth_field(tuple->ptr, i);
- dfield_set_null(dfield);
- }
-
- n_cmp_cols = dict_index_get_n_ordering_defined_by_user(index);
-
- dtuple_set_n_fields_cmp(tuple->ptr, n_cmp_cols);
-
- return((ib_tpl_t) tuple);
-}
-
-/*****************************************************************//**
-Create an InnoDB key tuple.
-@return tuple instance created, or NULL */
-static
-ib_tpl_t
-ib_key_tuple_new(
-/*=============*/
- const dict_index_t* index, /*!< in: index of tuple */
- ulint n_cols) /*!< in: no. of user defined cols */
-{
- mem_heap_t* heap;
-
- heap = mem_heap_create(64);
-
- if (heap == NULL) {
- return(NULL);
- }
-
- return(ib_key_tuple_new_low(index, n_cols, heap));
-}
-
-/*****************************************************************//**
-Create an InnoDB row tuple.
-@return tuple instance, or NULL */
-static
-ib_tpl_t
-ib_row_tuple_new_low(
-/*=================*/
- const dict_index_t* index, /*!< in: index of tuple */
- ulint n_cols, /*!< in: no. of cols in tuple */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ib_tuple_t* tuple;
-
- tuple = static_cast<ib_tuple_t*>(mem_heap_alloc(heap, sizeof(*tuple)));
-
- if (tuple == NULL) {
- mem_heap_free(heap);
- return(NULL);
- }
-
- tuple->heap = heap;
- tuple->index = index;
- tuple->type = TPL_TYPE_ROW;
-
- tuple->ptr = dtuple_create(heap, n_cols);
-
- /* Copy types and set to SQL_NULL. */
- dict_table_copy_types(tuple->ptr, index->table);
-
- return((ib_tpl_t) tuple);
-}
-
-/*****************************************************************//**
-Create an InnoDB row tuple.
-@return tuple instance, or NULL */
-static
-ib_tpl_t
-ib_row_tuple_new(
-/*=============*/
- const dict_index_t* index, /*!< in: index of tuple */
- ulint n_cols) /*!< in: no. of cols in tuple */
-{
- mem_heap_t* heap;
-
- heap = mem_heap_create(64);
-
- if (heap == NULL) {
- return(NULL);
- }
-
- return(ib_row_tuple_new_low(index, n_cols, heap));
-}
-
-/*****************************************************************//**
-Begin a transaction.
-@return innobase txn handle */
-UNIV_INTERN
-ib_err_t
-ib_trx_start(
-/*=========*/
- ib_trx_t ib_trx, /*!< in: transaction to restart */
- ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
- ib_bool_t read_write, /*!< in: true if read write
- transaction */
- ib_bool_t auto_commit, /*!< in: auto commit after each
- single DML */
- void* thd) /*!< in: THD */
-{
- ib_err_t err = DB_SUCCESS;
- trx_t* trx = (trx_t*) ib_trx;
-
- ut_a(ib_trx_level <= IB_TRX_SERIALIZABLE);
-
- trx->api_trx = true;
- trx->api_auto_commit = auto_commit;
- trx->read_write = read_write;
-
- trx_start_if_not_started(trx);
-
- trx->isolation_level = ib_trx_level;
-
- /* FIXME: This is a place holder, we should add an arg that comes
- from the client. */
- trx->mysql_thd = static_cast<THD*>(thd);
-
- return(err);
-}
-
-/*****************************************************************//**
-Begin a transaction. This will allocate a new transaction handle.
-put the transaction in the active state.
-@return innobase txn handle */
-UNIV_INTERN
-ib_trx_t
-ib_trx_begin(
-/*=========*/
- ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
- ib_bool_t read_write, /*!< in: true if read write
- transaction */
- ib_bool_t auto_commit) /*!< in: auto commit after each
- single DML */
-{
- trx_t* trx;
- ib_bool_t started;
-
- trx = trx_allocate_for_mysql();
-
- started = ib_trx_start(static_cast<ib_trx_t>(trx), ib_trx_level,
- read_write, auto_commit, NULL);
- ut_a(started);
-
- return(static_cast<ib_trx_t>(trx));
-}
-
-
-/*****************************************************************//**
-Check if transaction is read_only
-@return transaction read_only status */
-UNIV_INTERN
-ib_u32_t
-ib_trx_read_only(
-/*=============*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- trx_t* trx = (trx_t*) ib_trx;
-
- return(trx->read_only);
-}
-
-/*****************************************************************//**
-Get the transaction's state.
-@return transaction state */
-UNIV_INTERN
-ib_trx_state_t
-ib_trx_state(
-/*=========*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- trx_t* trx = (trx_t*) ib_trx;
-
- return((ib_trx_state_t) trx->state);
-}
-
-/*****************************************************************//**
-Get a trx start time.
-@return trx start_time */
-UNIV_INTERN
-ib_u64_t
-ib_trx_get_start_time(
-/*==================*/
- ib_trx_t ib_trx) /*!< in: transaction */
-{
- trx_t* trx = (trx_t*) ib_trx;
- return(static_cast<ib_u64_t>(trx->start_time));
-}
-/*****************************************************************//**
-Release the resources of the transaction.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_trx_release(
-/*===========*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- trx_t* trx = (trx_t*) ib_trx;
-
- ut_ad(trx != NULL);
- trx_free_for_mysql(trx);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Commit a transaction. This function will also release the schema
-latches too.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_trx_commit(
-/*==========*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- ib_err_t err = DB_SUCCESS;
- trx_t* trx = (trx_t*) ib_trx;
-
- if (trx->state == TRX_STATE_NOT_STARTED) {
- return(err);
- }
-
- trx_commit(trx);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Rollback a transaction. This function will also release the schema
-latches too.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_trx_rollback(
-/*============*/
- ib_trx_t ib_trx) /*!< in: trx handle */
-{
- ib_err_t err;
- trx_t* trx = (trx_t*) ib_trx;
-
- err = static_cast<ib_err_t>(trx_rollback_for_mysql(trx));
-
- /* It should always succeed */
- ut_a(err == DB_SUCCESS);
-
- return(err);
-}
-
-#ifdef __WIN__
-/*****************************************************************//**
-Convert a string to lower case. */
-static
-void
-ib_to_lower_case(
-/*=============*/
- char* ptr) /*!< string to convert to lower case */
-{
- while (*ptr) {
- *ptr = tolower(*ptr);
- ++ptr;
- }
-}
-#endif /* __WIN__ */
-
-/*****************************************************************//**
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
-table name always to lower case. This function can be called for system
-tables and they don't have a database component. For tables that don't have
-a database component, we don't normalize them to lower case on Windows.
-The assumption is that they are system tables that reside in the system
-table space. */
-static
-void
-ib_normalize_table_name(
-/*====================*/
- char* norm_name, /*!< out: normalized name as a
- null-terminated string */
- const char* name) /*!< in: table name string */
-{
- const char* ptr = name;
-
- /* Scan name from the end */
-
- ptr += ut_strlen(name) - 1;
-
- /* Find the start of the table name. */
- while (ptr >= name && *ptr != '\\' && *ptr != '/' && ptr > name) {
- --ptr;
- }
-
-
- /* For system tables there is no '/' or dbname. */
- ut_a(ptr >= name);
-
- if (ptr > name) {
- const char* db_name;
- const char* table_name;
-
- table_name = ptr + 1;
-
- --ptr;
-
- while (ptr >= name && *ptr != '\\' && *ptr != '/') {
- ptr--;
- }
-
- db_name = ptr + 1;
-
- memcpy(norm_name, db_name,
- ut_strlen(name) + 1 - (db_name - name));
-
- norm_name[table_name - db_name - 1] = '/';
-#ifdef __WIN__
- ib_to_lower_case(norm_name);
-#endif
- } else {
- ut_strcpy(norm_name, name);
- }
-}
-
-/*****************************************************************//**
-Check whether the table name conforms to our requirements. Currently
-we only do a simple check for the presence of a '/'.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_table_name_check(
-/*================*/
- const char* name) /*!< in: table name to check */
-{
- const char* slash = NULL;
- ulint len = ut_strlen(name);
-
- if (len < 2
- || *name == '/'
- || name[len - 1] == '/'
- || (name[0] == '.' && name[1] == '/')
- || (name[0] == '.' && name[1] == '.' && name[2] == '/')) {
-
- return(DB_DATA_MISMATCH);
- }
-
- for ( ; *name; ++name) {
-#ifdef __WIN__
- /* Check for reserved characters in DOS filenames. */
- switch (*name) {
- case ':':
- case '|':
- case '"':
- case '*':
- case '<':
- case '>':
- return(DB_DATA_MISMATCH);
- }
-#endif /* __WIN__ */
- if (*name == '/') {
- if (slash) {
- return(DB_DATA_MISMATCH);
- }
- slash = name;
- }
- }
-
- return(slash ? DB_SUCCESS : DB_DATA_MISMATCH);
-}
-
-
-
-/*****************************************************************//**
-Get a table id. The caller must have acquired the dictionary mutex.
-@return DB_SUCCESS if found */
-static
-ib_err_t
-ib_table_get_id_low(
-/*================*/
- const char* table_name, /*!< in: table to find */
- ib_id_u64_t* table_id) /*!< out: table id if found */
-{
- dict_table_t* table;
- ib_err_t err = DB_TABLE_NOT_FOUND;
-
- *table_id = 0;
-
- table = ib_lookup_table_by_name(table_name);
-
- if (table != NULL) {
- *table_id = (table->id);
-
- err = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Create an internal cursor instance.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_create_cursor(
-/*=============*/
- ib_crsr_t* ib_crsr, /*!< out: InnoDB cursor */
- dict_table_t* table, /*!< in: table instance */
- dict_index_t* index, /*!< in: index to use */
- trx_t* trx) /*!< in: transaction */
-{
- mem_heap_t* heap;
- ib_cursor_t* cursor;
- ib_err_t err = DB_SUCCESS;
-
- heap = mem_heap_create(sizeof(*cursor) * 2);
-
- if (heap != NULL) {
- row_prebuilt_t* prebuilt;
-
- cursor = static_cast<ib_cursor_t*>(
- mem_heap_zalloc(heap, sizeof(*cursor)));
-
- cursor->heap = heap;
-
- cursor->query_heap = mem_heap_create(64);
-
- if (cursor->query_heap == NULL) {
- mem_heap_free(heap);
-
- return(DB_OUT_OF_MEMORY);
- }
-
- cursor->prebuilt = row_create_prebuilt(table, 0);
-
- prebuilt = cursor->prebuilt;
-
- prebuilt->trx = trx;
-
- cursor->valid_trx = TRUE;
-
- prebuilt->table = table;
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->innodb_api = TRUE;
-
- prebuilt->index = index;
-
- ut_a(prebuilt->index != NULL);
-
- if (prebuilt->trx != NULL) {
- ++prebuilt->trx->n_mysql_tables_in_use;
-
- prebuilt->index_usable =
- row_merge_is_index_usable(
- prebuilt->trx, prebuilt->index);
-
- /* Assign a read view if the transaction does
- not have it yet */
-
- trx_assign_read_view(prebuilt->trx);
- }
-
- *ib_crsr = (ib_crsr_t) cursor;
- } else {
- err = DB_OUT_OF_MEMORY;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Create an internal cursor instance, and set prebuilt->index to index
-with supplied index_id.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_create_cursor_with_index_id(
-/*===========================*/
- ib_crsr_t* ib_crsr, /*!< out: InnoDB cursor */
- dict_table_t* table, /*!< in: table instance */
- ib_id_u64_t index_id, /*!< in: index id or 0 */
- trx_t* trx) /*!< in: transaction */
-{
- dict_index_t* index;
-
- if (index_id != 0) {
- mutex_enter(&dict_sys->mutex);
- index = dict_index_find_on_id_low(index_id);
- mutex_exit(&dict_sys->mutex);
- } else {
- index = dict_table_get_first_index(table);
- }
-
- return(ib_create_cursor(ib_crsr, table, index, trx));
-}
-
-/*****************************************************************//**
-Open an InnoDB table and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_table_using_id(
-/*==========================*/
- ib_id_u64_t table_id, /*!< in: table id of table to open */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr) /*!< out,own: InnoDB cursor */
-{
- ib_err_t err;
- dict_table_t* table;
-
- if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
- table = ib_open_table_by_id(table_id, FALSE);
- } else {
- table = ib_open_table_by_id(table_id, TRUE);
- }
-
- if (table == NULL) {
-
- return(DB_TABLE_NOT_FOUND);
- }
-
- err = ib_create_cursor_with_index_id(ib_crsr, table, 0,
- (trx_t*) ib_trx);
-
- return(err);
-}
-
-/*****************************************************************//**
-Open an InnoDB index and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_index_using_id(
-/*==========================*/
- ib_id_u64_t index_id, /*!< in: index id of index to open */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr) /*!< out: InnoDB cursor */
-{
- ib_err_t err;
- dict_table_t* table;
- ulint table_id = (ulint)( index_id >> 32);
-
- if (ib_trx == NULL || !ib_schema_lock_is_exclusive(ib_trx)) {
- table = ib_open_table_by_id(table_id, FALSE);
- } else {
- table = ib_open_table_by_id(table_id, TRUE);
- }
-
- if (table == NULL) {
-
- return(DB_TABLE_NOT_FOUND);
- }
-
- /* We only return the lower 32 bits of the dulint. */
- err = ib_create_cursor_with_index_id(
- ib_crsr, table, index_id, (trx_t*) ib_trx);
-
- if (ib_crsr != NULL) {
- const ib_cursor_t* cursor;
-
- cursor = *(ib_cursor_t**) ib_crsr;
-
- if (cursor->prebuilt->index == NULL) {
- ib_err_t crsr_err;
-
- crsr_err = ib_cursor_close(*ib_crsr);
- ut_a(crsr_err == DB_SUCCESS);
-
- *ib_crsr = NULL;
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Open an InnoDB secondary index cursor and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_index_using_name(
-/*============================*/
- ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */
- const char* index_name, /*!< in: secondary index name */
- ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */
- int* idx_type, /*!< out: index is cluster index */
- ib_id_u64_t* idx_id) /*!< out: index id */
-{
- dict_table_t* table;
- dict_index_t* index;
- index_id_t index_id = 0;
- ib_err_t err = DB_TABLE_NOT_FOUND;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_open_crsr;
-
- *idx_type = 0;
- *idx_id = 0;
- *ib_crsr = NULL;
-
- /* We want to increment the ref count, so we do a redundant search. */
- table = dict_table_open_on_id(cursor->prebuilt->table->id,
- FALSE, DICT_TABLE_OP_NORMAL);
- ut_a(table != NULL);
-
- /* The first index is always the cluster index. */
- index = dict_table_get_first_index(table);
-
- /* Traverse the user defined indexes. */
- while (index != NULL) {
- if (innobase_strcasecmp(index->name, index_name) == 0) {
- index_id = index->id;
- *idx_type = index->type;
- *idx_id = index_id;
- break;
- }
- index = UT_LIST_GET_NEXT(indexes, index);
- }
-
- if (!index_id) {
- dict_table_close(table, FALSE, FALSE);
- return(DB_ERROR);
- }
-
- if (index_id > 0) {
- ut_ad(index->id == index_id);
- err = ib_create_cursor(
- ib_crsr, table, index, cursor->prebuilt->trx);
- }
-
- if (*ib_crsr != NULL) {
- const ib_cursor_t* cursor;
-
- cursor = *(ib_cursor_t**) ib_crsr;
-
- if (cursor->prebuilt->index == NULL) {
- err = ib_cursor_close(*ib_crsr);
- ut_a(err == DB_SUCCESS);
- *ib_crsr = NULL;
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Open an InnoDB table and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_open_table(
-/*=================*/
- const char* name, /*!< in: table name */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr) /*!< out,own: InnoDB cursor */
-{
- ib_err_t err;
- dict_table_t* table;
- char* normalized_name;
-
- normalized_name = static_cast<char*>(mem_alloc(ut_strlen(name) + 1));
- ib_normalize_table_name(normalized_name, name);
-
- if (ib_trx != NULL) {
- if (!ib_schema_lock_is_exclusive(ib_trx)) {
- table = (dict_table_t*)ib_open_table_by_name(
- normalized_name);
- } else {
- /* NOTE: We do not acquire MySQL metadata lock */
- table = ib_lookup_table_by_name(normalized_name);
- }
- } else {
- table = (dict_table_t*)ib_open_table_by_name(normalized_name);
- }
-
- mem_free(normalized_name);
- normalized_name = NULL;
-
- /* It can happen that another thread has created the table but
- not the cluster index or it's a broken table definition. Refuse to
- open if that's the case. */
- if (table != NULL && dict_table_get_first_index(table) == NULL) {
- table = NULL;
- }
-
- if (table != NULL) {
- err = ib_create_cursor_with_index_id(ib_crsr, table, 0,
- (trx_t*) ib_trx);
- } else {
- err = DB_TABLE_NOT_FOUND;
- }
-
- return(err);
-}
-
-/********************************************************************//**
-Free a context struct for a table handle. */
-static
-void
-ib_qry_proc_free(
-/*=============*/
- ib_qry_proc_t* q_proc) /*!< in, own: qproc struct */
-{
- que_graph_free_recursive(q_proc->grph.ins);
- que_graph_free_recursive(q_proc->grph.upd);
- que_graph_free_recursive(q_proc->grph.sel);
-
- memset(q_proc, 0x0, sizeof(*q_proc));
-}
-
-/*****************************************************************//**
-set a cursor trx to NULL */
-UNIV_INTERN
-void
-ib_cursor_clear_trx(
-/*================*/
- ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- cursor->prebuilt->trx = NULL;
-}
-
-/*****************************************************************//**
-Reset the cursor.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_reset(
-/*============*/
- ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- if (cursor->valid_trx && prebuilt->trx != NULL
- && prebuilt->trx->n_mysql_tables_in_use > 0) {
-
- --prebuilt->trx->n_mysql_tables_in_use;
- }
-
- /* The fields in this data structure are allocated from
- the query heap and so need to be reset too. */
- ib_qry_proc_free(&cursor->q_proc);
-
- mem_heap_empty(cursor->query_heap);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-update the cursor with new transactions and also reset the cursor
-@return DB_SUCCESS or err code */
-ib_err_t
-ib_cursor_new_trx(
-/*==============*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_trx_t ib_trx) /*!< in: transaction */
-{
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- trx_t* trx = (trx_t*) ib_trx;
-
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- row_update_prebuilt_trx(prebuilt, trx);
-
- cursor->valid_trx = TRUE;
-
- trx_assign_read_view(prebuilt->trx);
-
- ib_qry_proc_free(&cursor->q_proc);
-
- mem_heap_empty(cursor->query_heap);
-
- return(err);
-}
-
-/*****************************************************************//**
-Commit the transaction in a cursor
-@return DB_SUCCESS or err code */
-ib_err_t
-ib_cursor_commit_trx(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_trx_t ib_trx) /*!< in: transaction */
-{
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-#ifdef UNIV_DEBUG
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- ut_ad(prebuilt->trx == (trx_t*) ib_trx);
-#endif /* UNIV_DEBUG */
- ib_trx_commit(ib_trx);
- cursor->valid_trx = FALSE;
- return(err);
-}
-
-/*****************************************************************//**
-Close an InnoDB table and free the cursor.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_close(
-/*============*/
- ib_crsr_t ib_crsr) /*!< in,own: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt;
- trx_t* trx;
-
- if (!cursor) {
- return(DB_SUCCESS);
- }
-
- prebuilt = cursor->prebuilt;
- trx = prebuilt->trx;
-
- ib_qry_proc_free(&cursor->q_proc);
-
- /* The transaction could have been detached from the cursor. */
- if (cursor->valid_trx && trx != NULL
- && trx->n_mysql_tables_in_use > 0) {
- --trx->n_mysql_tables_in_use;
- }
-
- row_prebuilt_free(prebuilt, FALSE);
- cursor->prebuilt = NULL;
-
- mem_heap_free(cursor->query_heap);
- mem_heap_free(cursor->heap);
- cursor = NULL;
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Close the table, decrement n_ref_count count.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_close_table(
-/*==================*/
- ib_crsr_t ib_crsr) /*!< in,own: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- if (prebuilt && prebuilt->table) {
- dict_table_close(prebuilt->table, FALSE, FALSE);
- }
-
- return(DB_SUCCESS);
-}
-/**********************************************************************//**
-Run the insert query and do error handling.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-ib_err_t
-ib_insert_row_with_lock_retry(
-/*==========================*/
- que_thr_t* thr, /*!< in: insert query graph */
- ins_node_t* node, /*!< in: insert node for the query */
- trx_savept_t* savept) /*!< in: savepoint to rollback to
- in case of an error */
-{
- trx_t* trx;
- ib_err_t err;
- ib_bool_t lock_wait;
-
- trx = thr_get_trx(thr);
-
- do {
- thr->run_node = node;
- thr->prev_node = node;
-
- row_ins_step(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- thr->lock_state = QUE_THR_LOCK_ROW;
- lock_wait = static_cast<ib_bool_t>(
- ib_handle_errors(&err, trx, thr, savept));
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
- } else {
- lock_wait = FALSE;
- }
- } while (lock_wait);
-
- return(err);
-}
-
-/*****************************************************************//**
-Write a row.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_execute_insert_query_graph(
-/*==========================*/
- dict_table_t* table, /*!< in: table where to insert */
- que_fork_t* ins_graph, /*!< in: query graph */
- ins_node_t* node) /*!< in: insert node */
-{
- trx_t* trx;
- que_thr_t* thr;
- trx_savept_t savept;
- ib_err_t err = DB_SUCCESS;
-
- trx = ins_graph->trx;
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(ins_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
- err = ib_insert_row_with_lock_retry(thr, node, &savept);
-
- if (err == DB_SUCCESS) {
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- dict_table_n_rows_inc(table);
-
- if (table->is_system_db) {
- srv_stats.n_system_rows_inserted.inc();
- } else {
- srv_stats.n_rows_inserted.inc();
- }
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*****************************************************************//**
-Create an insert query graph node. */
-static
-void
-ib_insert_query_graph_create(
-/*==========================*/
- ib_cursor_t* cursor) /*!< in: Cursor instance */
-{
- ib_qry_proc_t* q_proc = &cursor->q_proc;
- ib_qry_node_t* node = &q_proc->node;
- trx_t* trx = cursor->prebuilt->trx;
-
- ut_a(trx->state != TRX_STATE_NOT_STARTED);
-
- if (node->ins == NULL) {
- dtuple_t* row;
- ib_qry_grph_t* grph = &q_proc->grph;
- mem_heap_t* heap = cursor->query_heap;
- dict_table_t* table = cursor->prebuilt->table;
-
- node->ins = ins_node_create(INS_DIRECT, table, heap);
-
- node->ins->select = NULL;
- node->ins->values_list = NULL;
-
- row = dtuple_create(heap, dict_table_get_n_cols(table));
- dict_table_copy_types(row, table);
-
- ins_node_set_new_row(node->ins, row);
-
- grph->ins = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(node->ins, trx,
- heap)));
-
- grph->ins->state = QUE_FORK_ACTIVE;
- }
-}
-
-/*****************************************************************//**
-Insert a row to a table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_insert_row(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */
- const ib_tpl_t ib_tpl) /*!< in: tuple to insert */
-{
- ib_ulint_t i;
- ib_qry_node_t* node;
- ib_qry_proc_t* q_proc;
- ulint n_fields;
- dtuple_t* dst_dtuple;
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- const ib_tuple_t* src_tuple = (const ib_tuple_t*) ib_tpl;
-
- ib_insert_query_graph_create(cursor);
-
- ut_ad(src_tuple->type == TPL_TYPE_ROW);
-
- q_proc = &cursor->q_proc;
- node = &q_proc->node;
-
- node->ins->state = INS_NODE_ALLOC_ROW_ID;
- dst_dtuple = node->ins->row;
-
- n_fields = dtuple_get_n_fields(src_tuple->ptr);
- ut_ad(n_fields == dtuple_get_n_fields(dst_dtuple));
-
- /* Do a shallow copy of the data fields and check for NULL
- constraints on columns. */
- for (i = 0; i < n_fields; i++) {
- ulint mtype;
- dfield_t* src_field;
- dfield_t* dst_field;
-
- src_field = dtuple_get_nth_field(src_tuple->ptr, i);
-
- mtype = dtype_get_mtype(dfield_get_type(src_field));
-
- /* Don't touch the system columns. */
- if (mtype != DATA_SYS) {
- ulint prtype;
-
- prtype = dtype_get_prtype(dfield_get_type(src_field));
-
- if ((prtype & DATA_NOT_NULL)
- && dfield_is_null(src_field)) {
-
- err = DB_DATA_MISMATCH;
- break;
- }
-
- dst_field = dtuple_get_nth_field(dst_dtuple, i);
- ut_ad(mtype
- == dtype_get_mtype(dfield_get_type(dst_field)));
-
- /* Do a shallow copy. */
- dfield_set_data(
- dst_field, src_field->data, src_field->len);
-
- if (dst_field->len != IB_SQL_NULL) {
- UNIV_MEM_ASSERT_RW(dst_field->data,
- dst_field->len);
- }
- }
- }
-
- if (err == DB_SUCCESS) {
- err = ib_execute_insert_query_graph(
- src_tuple->index->table, q_proc->grph.ins, node->ins);
- }
-
- ib_wake_master_thread();
-
- return(err);
-}
-
-/*********************************************************************//**
-Gets pointer to a prebuilt update vector used in updates.
-@return update vector */
-UNIV_INLINE
-upd_t*
-ib_update_vector_create(
-/*====================*/
- ib_cursor_t* cursor) /*!< in: current cursor */
-{
- trx_t* trx = cursor->prebuilt->trx;
- mem_heap_t* heap = cursor->query_heap;
- dict_table_t* table = cursor->prebuilt->table;
- ib_qry_proc_t* q_proc = &cursor->q_proc;
- ib_qry_grph_t* grph = &q_proc->grph;
- ib_qry_node_t* node = &q_proc->node;
-
- ut_a(trx->state != TRX_STATE_NOT_STARTED);
-
- if (node->upd == NULL) {
- node->upd = static_cast<upd_node_t*>(
- row_create_update_node_for_mysql(table, heap));
- }
-
- grph->upd = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(node->upd, trx, heap)));
-
- grph->upd->state = QUE_FORK_ACTIVE;
-
- return(node->upd->update);
-}
-
-/**********************************************************************//**
-Note that a column has changed. */
-static
-void
-ib_update_col(
-/*==========*/
-
- ib_cursor_t* cursor, /*!< in: current cursor */
- upd_field_t* upd_field, /*!< in/out: update field */
- ulint col_no, /*!< in: column number */
- dfield_t* dfield) /*!< in: updated dfield */
-{
- ulint data_len;
- dict_table_t* table = cursor->prebuilt->table;
- dict_index_t* index = dict_table_get_first_index(table);
-
- data_len = dfield_get_len(dfield);
-
- if (data_len == UNIV_SQL_NULL) {
- dfield_set_null(&upd_field->new_val);
- } else {
- dfield_copy_data(&upd_field->new_val, dfield);
- }
-
- upd_field->exp = NULL;
-
- upd_field->orig_len = 0;
-
- upd_field->field_no = dict_col_get_clust_pos(
- &table->cols[col_no], index);
-}
-
-/**********************************************************************//**
-Checks which fields have changed in a row and stores the new data
-to an update vector.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_calc_diff(
-/*=========*/
- ib_cursor_t* cursor, /*!< in: current cursor */
- upd_t* upd, /*!< in/out: update vector */
- const ib_tuple_t*old_tuple, /*!< in: Old tuple in table */
- const ib_tuple_t*new_tuple) /*!< in: New tuple to update */
-{
- ulint i;
- ulint n_changed = 0;
- ib_err_t err = DB_SUCCESS;
- ulint n_fields = dtuple_get_n_fields(new_tuple->ptr);
-
- ut_a(old_tuple->type == TPL_TYPE_ROW);
- ut_a(new_tuple->type == TPL_TYPE_ROW);
- ut_a(old_tuple->index->table == new_tuple->index->table);
-
- for (i = 0; i < n_fields; ++i) {
- ulint mtype;
- ulint prtype;
- upd_field_t* upd_field;
- dfield_t* new_dfield;
- dfield_t* old_dfield;
-
- new_dfield = dtuple_get_nth_field(new_tuple->ptr, i);
- old_dfield = dtuple_get_nth_field(old_tuple->ptr, i);
-
- mtype = dtype_get_mtype(dfield_get_type(old_dfield));
- prtype = dtype_get_prtype(dfield_get_type(old_dfield));
-
- /* Skip the system columns */
- if (mtype == DATA_SYS) {
- continue;
-
- } else if ((prtype & DATA_NOT_NULL)
- && dfield_is_null(new_dfield)) {
-
- err = DB_DATA_MISMATCH;
- break;
- }
-
- if (dfield_get_len(new_dfield) != dfield_get_len(old_dfield)
- || (!dfield_is_null(old_dfield)
- && memcmp(dfield_get_data(new_dfield),
- dfield_get_data(old_dfield),
- dfield_get_len(old_dfield)) != 0)) {
-
- upd_field = &upd->fields[n_changed];
-
- ib_update_col(cursor, upd_field, i, new_dfield);
-
- ++n_changed;
- }
- }
-
- if (err == DB_SUCCESS) {
- upd->info_bits = 0;
- upd->n_fields = n_changed;
- }
-
- return(err);
-}
-
-/**********************************************************************//**
-Run the update query and do error handling.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-ib_err_t
-ib_update_row_with_lock_retry(
-/*==========================*/
- que_thr_t* thr, /*!< in: Update query graph */
- upd_node_t* node, /*!< in: Update node for the query */
- trx_savept_t* savept) /*!< in: savepoint to rollback to
- in case of an error */
-
-{
- trx_t* trx;
- ib_err_t err;
- ib_bool_t lock_wait;
-
- trx = thr_get_trx(thr);
-
- do {
- thr->run_node = node;
- thr->prev_node = node;
-
- row_upd_step(thr);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- if (err != DB_RECORD_NOT_FOUND) {
- thr->lock_state = QUE_THR_LOCK_ROW;
-
- lock_wait = static_cast<ib_bool_t>(
- ib_handle_errors(&err, trx, thr, savept));
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
- } else {
- lock_wait = FALSE;
- }
- } else {
- lock_wait = FALSE;
- }
- } while (lock_wait);
-
- return(err);
-}
-
-/*********************************************************************//**
-Does an update or delete of a row.
-@return DB_SUCCESS or err code */
-UNIV_INLINE
-ib_err_t
-ib_execute_update_query_graph(
-/*==========================*/
- ib_cursor_t* cursor, /*!< in: Cursor instance */
- btr_pcur_t* pcur) /*!< in: Btree persistent cursor */
-{
- ib_err_t err;
- que_thr_t* thr;
- upd_node_t* node;
- trx_savept_t savept;
- trx_t* trx = cursor->prebuilt->trx;
- dict_table_t* table = cursor->prebuilt->table;
- ib_qry_proc_t* q_proc = &cursor->q_proc;
-
- /* The transaction must be running. */
- ut_a(trx->state != TRX_STATE_NOT_STARTED);
-
- node = q_proc->node.upd;
-
- ut_a(dict_index_is_clust(pcur->btr_cur.index));
- btr_pcur_copy_stored_position(node->pcur, pcur);
-
- ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(q_proc->grph.upd);
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
- err = ib_update_row_with_lock_retry(thr, node, &savept);
-
- if (err == DB_SUCCESS) {
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- if (node->is_delete) {
-
- dict_table_n_rows_dec(table);
-
- if (table->is_system_db) {
- srv_stats.n_system_rows_deleted.inc();
- } else {
- srv_stats.n_rows_deleted.inc();
- }
- } else {
- if (table->is_system_db) {
- srv_stats.n_system_rows_updated.inc();
- } else {
- srv_stats.n_rows_updated.inc();
- }
- }
-
- } else if (err == DB_RECORD_NOT_FOUND) {
- trx->error_state = DB_SUCCESS;
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*****************************************************************//**
-Update a row in a table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_update_row(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */
- const ib_tpl_t ib_new_tpl) /*!< in: New tuple to update */
-{
- upd_t* upd;
- ib_err_t err;
- btr_pcur_t* pcur;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- const ib_tuple_t*old_tuple = (const ib_tuple_t*) ib_old_tpl;
- const ib_tuple_t*new_tuple = (const ib_tuple_t*) ib_new_tpl;
-
- if (dict_index_is_clust(prebuilt->index)) {
- pcur = &cursor->prebuilt->pcur;
- } else if (prebuilt->need_to_access_clustered) {
- pcur = &cursor->prebuilt->clust_pcur;
- } else {
- return(DB_ERROR);
- }
-
- ut_a(old_tuple->type == TPL_TYPE_ROW);
- ut_a(new_tuple->type == TPL_TYPE_ROW);
-
- upd = ib_update_vector_create(cursor);
-
- err = ib_calc_diff(cursor, upd, old_tuple, new_tuple);
-
- if (err == DB_SUCCESS) {
- /* Note that this is not a delete. */
- cursor->q_proc.node.upd->is_delete = FALSE;
-
- err = ib_execute_update_query_graph(cursor, pcur);
- }
-
- ib_wake_master_thread();
-
- return(err);
-}
-
-/**********************************************************************//**
-Build the update query graph to delete a row from an index.
-@return DB_SUCCESS or err code */
-static
-ib_err_t
-ib_delete_row(
-/*==========*/
- ib_cursor_t* cursor, /*!< in: current cursor */
- btr_pcur_t* pcur, /*!< in: Btree persistent cursor */
- const rec_t* rec) /*!< in: record to delete */
-{
- ulint i;
- upd_t* upd;
- ib_err_t err;
- ib_tuple_t* tuple;
- ib_tpl_t ib_tpl;
- ulint n_cols;
- upd_field_t* upd_field;
- ib_bool_t page_format;
- dict_table_t* table = cursor->prebuilt->table;
- dict_index_t* index = dict_table_get_first_index(table);
-
- n_cols = dict_index_get_n_ordering_defined_by_user(index);
- ib_tpl = ib_key_tuple_new(index, n_cols);
-
- if (!ib_tpl) {
- return(DB_OUT_OF_MEMORY);
- }
-
- tuple = (ib_tuple_t*) ib_tpl;
-
- upd = ib_update_vector_create(cursor);
-
- page_format = static_cast<ib_bool_t>(
- dict_table_is_comp(index->table));
- ib_read_tuple(rec, page_format, tuple, NULL, NULL);
-
- upd->n_fields = ib_tuple_get_n_cols(ib_tpl);
-
- for (i = 0; i < upd->n_fields; ++i) {
- dfield_t* dfield;
-
- upd_field = &upd->fields[i];
- dfield = dtuple_get_nth_field(tuple->ptr, i);
-
- dfield_copy_data(&upd_field->new_val, dfield);
-
- upd_field->exp = NULL;
-
- upd_field->orig_len = 0;
-
- upd->info_bits = 0;
-
- upd_field->field_no = dict_col_get_clust_pos(
- &table->cols[i], index);
- }
-
- /* Note that this is a delete. */
- cursor->q_proc.node.upd->is_delete = TRUE;
-
- err = ib_execute_update_query_graph(cursor, pcur);
-
- ib_tuple_delete(ib_tpl);
-
- return(err);
-}
-
-/*****************************************************************//**
-Delete a row in a table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_delete_row(
-/*=================*/
- ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- ib_err_t err;
- btr_pcur_t* pcur;
- dict_index_t* index;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- index = dict_table_get_first_index(prebuilt->index->table);
-
- /* Check whether this is a secondary index cursor */
- if (index != prebuilt->index) {
- if (prebuilt->need_to_access_clustered) {
- pcur = &prebuilt->clust_pcur;
- } else {
- return(DB_ERROR);
- }
- } else {
- pcur = &prebuilt->pcur;
- }
-
- if (ib_btr_cursor_is_positioned(pcur)) {
- const rec_t* rec;
- ib_bool_t page_format;
- mtr_t mtr;
- rec_t* copy = NULL;
- byte ptr[UNIV_PAGE_SIZE_MAX];
-
- page_format = static_cast<ib_bool_t>(
- dict_table_is_comp(index->table));
-
- mtr_start(&mtr);
-
- if (btr_pcur_restore_position(
- BTR_SEARCH_LEAF, pcur, &mtr)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- rec_offs_init(offsets_);
-
- rec = btr_pcur_get_rec(pcur);
-
- /* Since mtr will be commited, the rec
- will not be protected. Make a copy of
- the rec. */
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED, &heap);
- ut_ad(rec_offs_size(offsets) < UNIV_PAGE_SIZE_MAX);
- copy = rec_copy(ptr, rec, offsets);
- }
-
- mtr_commit(&mtr);
-
- if (copy && !rec_get_deleted_flag(copy, page_format)) {
- err = ib_delete_row(cursor, pcur, copy);
- } else {
- err = DB_RECORD_NOT_FOUND;
- }
- } else {
- err = DB_RECORD_NOT_FOUND;
- }
-
- ib_wake_master_thread();
-
- return(err);
-}
-
-/*****************************************************************//**
-Read current row.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_read_row(
-/*===============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_tpl_t ib_tpl, /*!< out: read cols into this tuple */
- void** row_buf, /*!< in/out: row buffer */
- ib_ulint_t* row_len) /*!< in/out: row buffer len */
-{
- ib_err_t err;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- ut_a(cursor->prebuilt->trx->state != TRX_STATE_NOT_STARTED);
-
- /* When searching with IB_EXACT_MATCH set, row_search_for_mysql()
- will not position the persistent cursor but will copy the record
- found into the row cache. It should be the only entry. */
- if (!ib_cursor_is_positioned(ib_crsr) ) {
- err = DB_RECORD_NOT_FOUND;
- } else {
- mtr_t mtr;
- btr_pcur_t* pcur;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- if (prebuilt->need_to_access_clustered
- && tuple->type == TPL_TYPE_ROW) {
- pcur = &prebuilt->clust_pcur;
- } else {
- pcur = &prebuilt->pcur;
- }
-
- if (pcur == NULL) {
- return(DB_ERROR);
- }
-
- mtr_start(&mtr);
-
- if (btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr)) {
- const rec_t* rec;
- ib_bool_t page_format;
-
- page_format = static_cast<ib_bool_t>(
- dict_table_is_comp(tuple->index->table));
- rec = btr_pcur_get_rec(pcur);
-
- if (prebuilt->innodb_api_rec &&
- prebuilt->innodb_api_rec != rec) {
- rec = prebuilt->innodb_api_rec;
- }
-
- if (!rec_get_deleted_flag(rec, page_format)) {
- ib_read_tuple(rec, page_format, tuple,
- row_buf, (ulint*) row_len);
- err = DB_SUCCESS;
- } else{
- err = DB_RECORD_NOT_FOUND;
- }
-
- } else {
- err = DB_RECORD_NOT_FOUND;
- }
-
- mtr_commit(&mtr);
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Move cursor to the first record in the table.
-@return DB_SUCCESS or err code */
-UNIV_INLINE
-ib_err_t
-ib_cursor_position(
-/*===============*/
- ib_cursor_t* cursor, /*!< in: InnoDB cursor instance */
- ib_srch_mode_t mode) /*!< in: Search mode */
-{
- ib_err_t err;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- unsigned char* buf;
-
- buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
-
- /* We want to position at one of the ends, row_search_for_mysql()
- uses the search_tuple fields to work out what to do. */
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
- err = static_cast<ib_err_t>(row_search_for_mysql(
- buf, mode, prebuilt, 0, 0));
-
- mem_free(buf);
-
- return(err);
-}
-
-/*****************************************************************//**
-Move cursor to the first record in the table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_first(
-/*============*/
- ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- return(ib_cursor_position(cursor, IB_CUR_G));
-}
-
-/*****************************************************************//**
-Move cursor to the last record in the table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_last(
-/*===========*/
- ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- return(ib_cursor_position(cursor, IB_CUR_L));
-}
-
-/*****************************************************************//**
-Move cursor to the next user record in the table.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_next(
-/*===========*/
- ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- ib_err_t err;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- byte buf[UNIV_PAGE_SIZE_MAX];
-
- /* We want to move to the next record */
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
- err = static_cast<ib_err_t>(row_search_for_mysql(
- buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT));
-
- return(err);
-}
-
-/*****************************************************************//**
-Search for key.
-@return DB_SUCCESS or err code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_moveto(
-/*=============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_tpl_t ib_tpl, /*!< in: Key to search for */
- ib_srch_mode_t ib_srch_mode) /*!< in: search mode */
-{
- ulint i;
- ulint n_fields;
- ib_err_t err = DB_SUCCESS;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- dtuple_t* search_tuple = prebuilt->search_tuple;
- unsigned char* buf;
-
- ut_a(tuple->type == TPL_TYPE_KEY);
-
- n_fields = dict_index_get_n_ordering_defined_by_user(prebuilt->index);
-
- if (n_fields > dtuple_get_n_fields(tuple->ptr)) {
- n_fields = dtuple_get_n_fields(tuple->ptr);
- }
-
- dtuple_set_n_fields(search_tuple, n_fields);
- dtuple_set_n_fields_cmp(search_tuple, n_fields);
-
- /* Do a shallow copy */
- for (i = 0; i < n_fields; ++i) {
- dfield_copy(dtuple_get_nth_field(search_tuple, i),
- dtuple_get_nth_field(tuple->ptr, i));
- }
-
- ut_a(prebuilt->select_lock_type <= LOCK_NUM);
-
- prebuilt->innodb_api_rec = NULL;
-
- buf = static_cast<unsigned char*>(mem_alloc(UNIV_PAGE_SIZE));
-
- err = static_cast<ib_err_t>(row_search_for_mysql(
- buf, ib_srch_mode, prebuilt, cursor->match_mode, 0));
-
- mem_free(buf);
-
- return(err);
-}
-
-/*****************************************************************//**
-Set the cursor search mode. */
-UNIV_INTERN
-void
-ib_cursor_set_match_mode(
-/*=====================*/
- ib_crsr_t ib_crsr, /*!< in: Cursor instance */
- ib_match_mode_t match_mode) /*!< in: ib_cursor_moveto match mode */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- cursor->match_mode = match_mode;
-}
-
-/*****************************************************************//**
-Get the dfield instance for the column in the tuple.
-@return dfield instance in tuple */
-UNIV_INLINE
-dfield_t*
-ib_col_get_dfield(
-/*==============*/
- ib_tuple_t* tuple, /*!< in: tuple instance */
- ulint col_no) /*!< in: col no. in tuple */
-{
- dfield_t* dfield;
-
- dfield = dtuple_get_nth_field(tuple->ptr, col_no);
-
- return(dfield);
-}
-
-/*****************************************************************//**
-Predicate to check whether a column type contains variable length data.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-ib_err_t
-ib_col_is_capped(
-/*==============*/
- const dtype_t* dtype) /*!< in: column type */
-{
- return(static_cast<ib_err_t>(
- (dtype_get_mtype(dtype) == DATA_VARCHAR
- || dtype_get_mtype(dtype) == DATA_CHAR
- || dtype_get_mtype(dtype) == DATA_MYSQL
- || dtype_get_mtype(dtype) == DATA_VARMYSQL
- || dtype_get_mtype(dtype) == DATA_FIXBINARY
- || dtype_get_mtype(dtype) == DATA_BINARY)
- && dtype_get_len(dtype) > 0));
-}
-
-/*****************************************************************//**
-Set a column of the tuple. Make a copy using the tuple's heap.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_col_set_value(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t col_no, /*!< in: column index in tuple */
- const void* src, /*!< in: data value */
- ib_ulint_t len, /*!< in: data value len */
- ib_bool_t need_cpy) /*!< in: if need memcpy */
-{
- const dtype_t* dtype;
- dfield_t* dfield;
- void* dst = NULL;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
- ulint col_len;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- /* User wants to set the column to NULL. */
- if (len == IB_SQL_NULL) {
- dfield_set_null(dfield);
- return(DB_SUCCESS);
- }
-
- dtype = dfield_get_type(dfield);
- col_len = dtype_get_len(dtype);
-
- /* Not allowed to update system columns. */
- if (dtype_get_mtype(dtype) == DATA_SYS) {
- return(DB_DATA_MISMATCH);
- }
-
- dst = dfield_get_data(dfield);
-
- /* Since TEXT/CLOB also map to DATA_VARCHAR we need to make an
- exception. Perhaps we need to set the precise type and check
- for that. */
- if (ib_col_is_capped(dtype)) {
-
- len = ut_min(len, static_cast<ib_ulint_t>(col_len));
-
- if (dst == NULL || len > dfield_get_len(dfield)) {
- dst = mem_heap_alloc(tuple->heap, col_len);
- ut_a(dst != NULL);
- }
- } else if (dst == NULL || len > dfield_get_len(dfield)) {
- dst = mem_heap_alloc(tuple->heap, len);
- }
-
- if (dst == NULL) {
- return(DB_OUT_OF_MEMORY);
- }
-
- switch (dtype_get_mtype(dtype)) {
- case DATA_INT: {
-
- if (col_len == len) {
- ibool usign;
-
- usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
- mach_write_int_type(static_cast<byte*>(dst),
- static_cast<const byte*>(src),
- len, usign);
-
- } else {
- return(DB_DATA_MISMATCH);
- }
- break;
- }
-
- case DATA_FLOAT:
- if (len == sizeof(float)) {
- mach_float_write(static_cast<byte*>(dst), *(float*)src);
- } else {
- return(DB_DATA_MISMATCH);
- }
- break;
-
- case DATA_DOUBLE:
- if (len == sizeof(double)) {
- mach_double_write(static_cast<byte*>(dst),
- *(double*)src);
- } else {
- return(DB_DATA_MISMATCH);
- }
- break;
-
- case DATA_SYS:
- ut_error;
- break;
-
- case DATA_CHAR: {
- ulint pad_char = ULINT_UNDEFINED;
-
- pad_char = dtype_get_pad_char(
- dtype_get_mtype(dtype), dtype_get_prtype(dtype));
-
- ut_a(pad_char != ULINT_UNDEFINED);
-
- memset((byte*) dst + len,
- static_cast<int>(pad_char),
- static_cast<size_t>(col_len - len));
-
- memcpy(dst, src, len);
-
- len = static_cast<ib_ulint_t>(col_len);
- break;
- }
- case DATA_BLOB:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARCHAR:
- case DATA_FIXBINARY:
- if (need_cpy) {
- memcpy(dst, src, len);
- } else {
- dfield_set_data(dfield, src, len);
- dst = dfield_get_data(dfield);
- }
- break;
-
- case DATA_MYSQL:
- case DATA_VARMYSQL: {
- ulint cset;
- CHARSET_INFO* cs;
- int error = 0;
- ulint true_len = len;
-
- /* For multi byte character sets we need to
- calculate the true length of the data. */
- cset = dtype_get_charset_coll(
- dtype_get_prtype(dtype));
- cs = all_charsets[cset];
- if (cs) {
- uint pos = (uint)(col_len / cs->mbmaxlen);
-
- if (len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint)
- my_well_formed_length(
- cs,
- (const char*)src,
- (const char*)src + len,
- pos,
- &error);
-
- if (true_len < len) {
- len = static_cast<ib_ulint_t>(true_len);
- }
- }
- }
-
- /* All invalid bytes in data need be truncated.
- If len == 0, means all bytes of the data is invalid.
- In this case, the data will be truncated to empty.*/
- memcpy(dst, src, len);
-
- /* For DATA_MYSQL, need to pad the unused
- space with spaces. */
- if (dtype_get_mtype(dtype) == DATA_MYSQL) {
- ulint n_chars;
-
- if (len < col_len) {
- ulint pad_len = col_len - len;
-
- ut_a(cs != NULL);
- ut_a(!(pad_len % cs->mbminlen));
-
- cs->cset->fill(cs, (char*)dst + len,
- pad_len,
- 0x20 /* space */);
- }
-
- /* Why we should do below? See function
- row_mysql_store_col_in_innobase_format */
-
- ut_a(!(dtype_get_len(dtype)
- % dtype_get_mbmaxlen(dtype)));
-
- n_chars = dtype_get_len(dtype)
- / dtype_get_mbmaxlen(dtype);
-
- /* Strip space padding. */
- while (col_len > n_chars
- && ((char*)dst)[col_len - 1] == 0x20) {
- col_len--;
- }
-
- len = static_cast<ib_ulint_t>(col_len);
- }
- break;
- }
-
- default:
- ut_error;
- }
-
- if (dst != dfield_get_data(dfield)) {
- dfield_set_data(dfield, dst, len);
- } else {
- dfield_set_len(dfield, len);
- }
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Get the size of the data available in a column of the tuple.
-@return bytes avail or IB_SQL_NULL */
-UNIV_INTERN
-ib_ulint_t
-ib_col_get_len(
-/*===========*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i) /*!< in: column index in tuple */
-{
- const dfield_t* dfield;
- ulint data_len;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, i);
-
- data_len = dfield_get_len(dfield);
-
- return(static_cast<ib_ulint_t>(
- data_len == UNIV_SQL_NULL ? IB_SQL_NULL : data_len));
-}
-
-/*****************************************************************//**
-Copy a column value from the tuple.
-@return bytes copied or IB_SQL_NULL */
-UNIV_INLINE
-ib_ulint_t
-ib_col_copy_value_low(
-/*==================*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- void* dst, /*!< out: copied data value */
- ib_ulint_t len) /*!< in: max data value len to copy */
-{
- const void* data;
- const dfield_t* dfield;
- ulint data_len;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, i);
-
- data = dfield_get_data(dfield);
- data_len = dfield_get_len(dfield);
-
- if (data_len != UNIV_SQL_NULL) {
-
- const dtype_t* dtype = dfield_get_type(dfield);
-
- switch (dtype_get_mtype(dfield_get_type(dfield))) {
- case DATA_INT: {
- ibool usign;
- ullint ret;
-
- ut_a(data_len == len);
-
- usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
- ret = mach_read_int_type(static_cast<const byte*>(data),
- data_len, usign);
-
- if (usign) {
- if (len == 1) {
- *(ib_i8_t*)dst = (ib_i8_t)ret;
- } else if (len == 2) {
- *(ib_i16_t*)dst = (ib_i16_t)ret;
- } else if (len == 4) {
- *(ib_i32_t*)dst = (ib_i32_t)ret;
- } else {
- *(ib_i64_t*)dst = (ib_i64_t)ret;
- }
- } else {
- if (len == 1) {
- *(ib_u8_t*)dst = (ib_i8_t)ret;
- } else if (len == 2) {
- *(ib_u16_t*)dst = (ib_i16_t)ret;
- } else if (len == 4) {
- *(ib_u32_t*)dst = (ib_i32_t)ret;
- } else {
- *(ib_u64_t*)dst = (ib_i64_t)ret;
- }
- }
-
- break;
- }
- case DATA_FLOAT:
- if (len == data_len) {
- float f;
-
- ut_a(data_len == sizeof(f));
- f = mach_float_read(static_cast<const byte*>(
- data));
- memcpy(dst, &f, sizeof(f));
- } else {
- data_len = 0;
- }
- break;
- case DATA_DOUBLE:
- if (len == data_len) {
- double d;
-
- ut_a(data_len == sizeof(d));
- d = mach_double_read(static_cast<const byte*>(
- data));
- memcpy(dst, &d, sizeof(d));
- } else {
- data_len = 0;
- }
- break;
- default:
- data_len = ut_min(data_len, len);
- memcpy(dst, data, data_len);
- }
- } else {
- data_len = IB_SQL_NULL;
- }
-
- return(static_cast<ib_ulint_t>(data_len));
-}
-
-/*****************************************************************//**
-Copy a column value from the tuple.
-@return bytes copied or IB_SQL_NULL */
-UNIV_INTERN
-ib_ulint_t
-ib_col_copy_value(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- void* dst, /*!< out: copied data value */
- ib_ulint_t len) /*!< in: max data value len to copy */
-{
- return(ib_col_copy_value_low(ib_tpl, i, dst, len));
-}
-
-/*****************************************************************//**
-Get the InnoDB column attribute from the internal column precise type.
-@return precise type in api format */
-UNIV_INLINE
-ib_col_attr_t
-ib_col_get_attr(
-/*============*/
- ulint prtype) /*!< in: column definition */
-{
- ib_col_attr_t attr = IB_COL_NONE;
-
- if (prtype & DATA_UNSIGNED) {
- attr = static_cast<ib_col_attr_t>(attr | IB_COL_UNSIGNED);
- }
-
- if (prtype & DATA_NOT_NULL) {
- attr = static_cast<ib_col_attr_t>(attr | IB_COL_NOT_NULL);
- }
-
- return(attr);
-}
-
-/*****************************************************************//**
-Get a column name from the tuple.
-@return name of the column */
-UNIV_INTERN
-const char*
-ib_col_get_name(
-/*============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_ulint_t i) /*!< in: column index in tuple */
-{
- const char* name;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_table_t* table = cursor->prebuilt->table;
- dict_col_t* col = dict_table_get_nth_col(table, i);
- ulint col_no = dict_col_get_no(col);
-
- name = dict_table_get_col_name(table, col_no);
-
- return(name);
-}
-
-/*****************************************************************//**
-Get an index field name from the cursor.
-@return name of the field */
-UNIV_INTERN
-const char*
-ib_get_idx_field_name(
-/*==================*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_ulint_t i) /*!< in: column index in tuple */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index = cursor->prebuilt->index;
- dict_field_t* field;
-
- if (index) {
- field = dict_index_get_nth_field(cursor->prebuilt->index, i);
-
- if (field) {
- return(field->name);
- }
- }
-
- return(NULL);
-}
-
-/*****************************************************************//**
-Get a column type, length and attributes from the tuple.
-@return len of column data */
-UNIV_INLINE
-ib_ulint_t
-ib_col_get_meta_low(
-/*================*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- ib_col_meta_t* ib_col_meta) /*!< out: column meta data */
-{
- ib_u16_t prtype;
- const dfield_t* dfield;
- ulint data_len;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, i);
-
- data_len = dfield_get_len(dfield);
-
- /* We assume 1-1 mapping between the ENUM and internal type codes. */
- ib_col_meta->type = static_cast<ib_col_type_t>(
- dtype_get_mtype(dfield_get_type(dfield)));
-
- ib_col_meta->type_len = static_cast<ib_u32_t>(
- dtype_get_len(dfield_get_type(dfield)));
-
- prtype = (ib_u16_t) dtype_get_prtype(dfield_get_type(dfield));
-
- ib_col_meta->attr = ib_col_get_attr(prtype);
- ib_col_meta->client_type = prtype & DATA_MYSQL_TYPE_MASK;
-
- return(static_cast<ib_ulint_t>(data_len));
-}
-
-/*************************************************************//**
-Read a signed int 8 bit column from an InnoDB tuple. */
-UNIV_INLINE
-ib_err_t
-ib_tuple_check_int(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_bool_t usign, /*!< in: true if unsigned */
- ulint size) /*!< in: size of integer */
-{
- ib_col_meta_t ib_col_meta;
-
- ib_col_get_meta_low(ib_tpl, i, &ib_col_meta);
-
- if (ib_col_meta.type != IB_INT) {
- return(DB_DATA_MISMATCH);
- } else if (ib_col_meta.type_len == IB_SQL_NULL) {
- return(DB_UNDERFLOW);
- } else if (ib_col_meta.type_len != size) {
- return(DB_DATA_MISMATCH);
- } else if ((ib_col_meta.attr & IB_COL_UNSIGNED) && !usign) {
- return(DB_DATA_MISMATCH);
- }
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************//**
-Read a signed int 8 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_i8(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i8_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_FALSE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read an unsigned int 8 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_u8(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u8_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read a signed int 16 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_i16(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i16_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read an unsigned int 16 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_u16(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u16_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read a signed int 32 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_i32(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i32_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read an unsigned int 32 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_u32(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u32_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read a signed int 64 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_i64(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i64_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, FALSE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Read an unsigned int 64 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_u64(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u64_t* ival) /*!< out: integer value */
-{
- ib_err_t err;
-
- err = ib_tuple_check_int(ib_tpl, i, IB_TRUE, sizeof(*ival));
-
- if (err == DB_SUCCESS) {
- ib_col_copy_value_low(ib_tpl, i, ival, sizeof(*ival));
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Get a column value pointer from the tuple.
-@return NULL or pointer to buffer */
-UNIV_INTERN
-const void*
-ib_col_get_value(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i) /*!< in: column index in tuple */
-{
- const void* data;
- const dfield_t* dfield;
- ulint data_len;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, i);
-
- data = dfield_get_data(dfield);
- data_len = dfield_get_len(dfield);
-
- return(data_len != UNIV_SQL_NULL ? data : NULL);
-}
-
-/*****************************************************************//**
-Get a column type, length and attributes from the tuple.
-@return len of column data */
-UNIV_INTERN
-ib_ulint_t
-ib_col_get_meta(
-/*============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- ib_col_meta_t* ib_col_meta) /*!< out: column meta data */
-{
- return(ib_col_get_meta_low(ib_tpl, i, ib_col_meta));
-}
-
-/*****************************************************************//**
-"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
-@return new tuple, or NULL */
-UNIV_INTERN
-ib_tpl_t
-ib_tuple_clear(
-/*============*/
- ib_tpl_t ib_tpl) /*!< in,own: tuple (will be freed) */
-{
- const dict_index_t* index;
- ulint n_cols;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
- ib_tuple_type_t type = tuple->type;
- mem_heap_t* heap = tuple->heap;
-
- index = tuple->index;
- n_cols = dtuple_get_n_fields(tuple->ptr);
-
- mem_heap_empty(heap);
-
- if (type == TPL_TYPE_ROW) {
- return(ib_row_tuple_new_low(index, n_cols, heap));
- } else {
- return(ib_key_tuple_new_low(index, n_cols, heap));
- }
-}
-
-/*****************************************************************//**
-Create a new cluster key search tuple and copy the contents of the
-secondary index key tuple columns that refer to the cluster index record
-to the cluster key. It does a deep copy of the column data.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_tuple_get_cluster_key(
-/*=====================*/
- ib_crsr_t ib_crsr, /*!< in: secondary index cursor */
- ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */
- const ib_tpl_t ib_src_tpl) /*!< in: source tuple */
-{
- ulint i;
- ulint n_fields;
- ib_err_t err = DB_SUCCESS;
- ib_tuple_t* dst_tuple = NULL;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- ib_tuple_t* src_tuple = (ib_tuple_t*) ib_src_tpl;
- dict_index_t* clust_index;
-
- clust_index = dict_table_get_first_index(cursor->prebuilt->table);
-
- /* We need to ensure that the src tuple belongs to the same table
- as the open cursor and that it's not a tuple for a cluster index. */
- if (src_tuple->type != TPL_TYPE_KEY) {
- return(DB_ERROR);
- } else if (src_tuple->index->table != cursor->prebuilt->table) {
- return(DB_DATA_MISMATCH);
- } else if (src_tuple->index == clust_index) {
- return(DB_ERROR);
- }
-
- /* Create the cluster index key search tuple. */
- *ib_dst_tpl = ib_clust_search_tuple_create(ib_crsr);
-
- if (!*ib_dst_tpl) {
- return(DB_OUT_OF_MEMORY);
- }
-
- dst_tuple = (ib_tuple_t*) *ib_dst_tpl;
- ut_a(dst_tuple->index == clust_index);
-
- n_fields = dict_index_get_n_unique(dst_tuple->index);
-
- /* Do a deep copy of the data fields. */
- for (i = 0; i < n_fields; i++) {
- ulint pos;
- dfield_t* src_field;
- dfield_t* dst_field;
-
- pos = dict_index_get_nth_field_pos(
- src_tuple->index, dst_tuple->index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- src_field = dtuple_get_nth_field(src_tuple->ptr, pos);
- dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
-
- if (!dfield_is_null(src_field)) {
- UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
-
- dst_field->data = mem_heap_dup(
- dst_tuple->heap,
- src_field->data,
- src_field->len);
-
- dst_field->len = src_field->len;
- } else {
- dfield_set_null(dst_field);
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Copy the contents of source tuple to destination tuple. The tuples
-must be of the same type and belong to the same table/index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_tuple_copy(
-/*==========*/
- ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */
- const ib_tpl_t ib_src_tpl) /*!< in: source tuple */
-{
- ulint i;
- ulint n_fields;
- ib_err_t err = DB_SUCCESS;
- const ib_tuple_t*src_tuple = (const ib_tuple_t*) ib_src_tpl;
- ib_tuple_t* dst_tuple = (ib_tuple_t*) ib_dst_tpl;
-
- /* Make sure src and dst are not the same. */
- ut_a(src_tuple != dst_tuple);
-
- /* Make sure they are the same type and refer to the same index. */
- if (src_tuple->type != dst_tuple->type
- || src_tuple->index != dst_tuple->index) {
-
- return(DB_DATA_MISMATCH);
- }
-
- n_fields = dtuple_get_n_fields(src_tuple->ptr);
- ut_ad(n_fields == dtuple_get_n_fields(dst_tuple->ptr));
-
- /* Do a deep copy of the data fields. */
- for (i = 0; i < n_fields; ++i) {
- dfield_t* src_field;
- dfield_t* dst_field;
-
- src_field = dtuple_get_nth_field(src_tuple->ptr, i);
- dst_field = dtuple_get_nth_field(dst_tuple->ptr, i);
-
- if (!dfield_is_null(src_field)) {
- UNIV_MEM_ASSERT_RW(src_field->data, src_field->len);
-
- dst_field->data = mem_heap_dup(
- dst_tuple->heap,
- src_field->data,
- src_field->len);
-
- dst_field->len = src_field->len;
- } else {
- dfield_set_null(dst_field);
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Create an InnoDB tuple used for index/table search.
-@return own: Tuple for current index */
-UNIV_INTERN
-ib_tpl_t
-ib_sec_search_tuple_create(
-/*=======================*/
- ib_crsr_t ib_crsr) /*!< in: Cursor instance */
-{
- ulint n_cols;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index = cursor->prebuilt->index;
-
- n_cols = dict_index_get_n_unique_in_tree(index);
- return(ib_key_tuple_new(index, n_cols));
-}
-
-/*****************************************************************//**
-Create an InnoDB tuple used for index/table search.
-@return own: Tuple for current index */
-UNIV_INTERN
-ib_tpl_t
-ib_sec_read_tuple_create(
-/*=====================*/
- ib_crsr_t ib_crsr) /*!< in: Cursor instance */
-{
- ulint n_cols;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index = cursor->prebuilt->index;
-
- n_cols = dict_index_get_n_fields(index);
- return(ib_row_tuple_new(index, n_cols));
-}
-
-/*****************************************************************//**
-Create an InnoDB tuple used for table key operations.
-@return own: Tuple for current table */
-UNIV_INTERN
-ib_tpl_t
-ib_clust_search_tuple_create(
-/*=========================*/
- ib_crsr_t ib_crsr) /*!< in: Cursor instance */
-{
- ulint n_cols;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index;
-
- index = dict_table_get_first_index(cursor->prebuilt->table);
-
- n_cols = dict_index_get_n_ordering_defined_by_user(index);
- return(ib_key_tuple_new(index, n_cols));
-}
-
-/*****************************************************************//**
-Create an InnoDB tuple for table row operations.
-@return own: Tuple for current table */
-UNIV_INTERN
-ib_tpl_t
-ib_clust_read_tuple_create(
-/*=======================*/
- ib_crsr_t ib_crsr) /*!< in: Cursor instance */
-{
- ulint n_cols;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- dict_index_t* index;
-
- index = dict_table_get_first_index(cursor->prebuilt->table);
-
- n_cols = dict_table_get_n_cols(cursor->prebuilt->table);
- return(ib_row_tuple_new(index, n_cols));
-}
-
-/*****************************************************************//**
-Return the number of user columns in the tuple definition.
-@return number of user columns */
-UNIV_INTERN
-ib_ulint_t
-ib_tuple_get_n_user_cols(
-/*=====================*/
- const ib_tpl_t ib_tpl) /*!< in: Tuple for current table */
-{
- const ib_tuple_t* tuple = (const ib_tuple_t*) ib_tpl;
-
- if (tuple->type == TPL_TYPE_ROW) {
- return(static_cast<ib_ulint_t>(
- dict_table_get_n_user_cols(tuple->index->table)));
- }
-
- return(static_cast<ib_ulint_t>(
- dict_index_get_n_ordering_defined_by_user(tuple->index)));
-}
-
-/*****************************************************************//**
-Return the number of columns in the tuple definition.
-@return number of columns */
-UNIV_INTERN
-ib_ulint_t
-ib_tuple_get_n_cols(
-/*================*/
- const ib_tpl_t ib_tpl) /*!< in: Tuple for table/index */
-{
- const ib_tuple_t* tuple = (const ib_tuple_t*) ib_tpl;
-
- return(static_cast<ib_ulint_t>(dtuple_get_n_fields(tuple->ptr)));
-}
-
-/*****************************************************************//**
-Destroy an InnoDB tuple. */
-UNIV_INTERN
-void
-ib_tuple_delete(
-/*============*/
- ib_tpl_t ib_tpl) /*!< in,own: Tuple instance to delete */
-{
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- if (!ib_tpl) {
- return;
- }
-
- mem_heap_free(tuple->heap);
-}
-
-/*****************************************************************//**
-Get a table id. This function will acquire the dictionary mutex.
-@return DB_SUCCESS if found */
-UNIV_INTERN
-ib_err_t
-ib_table_get_id(
-/*============*/
- const char* table_name, /*!< in: table to find */
- ib_id_u64_t* table_id) /*!< out: table id if found */
-{
- ib_err_t err;
-
- dict_mutex_enter_for_mysql();
-
- err = ib_table_get_id_low(table_name, table_id);
-
- dict_mutex_exit_for_mysql();
-
- return(err);
-}
-
-/*****************************************************************//**
-Get an index id.
-@return DB_SUCCESS if found */
-UNIV_INTERN
-ib_err_t
-ib_index_get_id(
-/*============*/
- const char* table_name, /*!< in: find index for this table */
- const char* index_name, /*!< in: index to find */
- ib_id_u64_t* index_id) /*!< out: index id if found */
-{
- dict_table_t* table;
- char* normalized_name;
- ib_err_t err = DB_TABLE_NOT_FOUND;
-
- *index_id = 0;
-
- normalized_name = static_cast<char*>(
- mem_alloc(ut_strlen(table_name) + 1));
- ib_normalize_table_name(normalized_name, table_name);
-
- table = ib_lookup_table_by_name(normalized_name);
-
- mem_free(normalized_name);
- normalized_name = NULL;
-
- if (table != NULL) {
- dict_index_t* index;
-
- index = dict_table_get_index_on_name(table, index_name);
-
- if (index != NULL) {
- /* We only support 32 bit table and index ids. Because
- we need to pack the table id into the index id. */
-
- *index_id = (table->id);
- *index_id <<= 32;
- *index_id |= (index->id);
-
- err = DB_SUCCESS;
- }
- }
-
- return(err);
-}
-
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR '\\'
-#else
-#define SRV_PATH_SEPARATOR '/'
-#endif
-
-
-/*****************************************************************//**
-Check if cursor is positioned.
-@return IB_TRUE if positioned */
-UNIV_INTERN
-ib_bool_t
-ib_cursor_is_positioned(
-/*====================*/
- const ib_crsr_t ib_crsr) /*!< in: InnoDB cursor instance */
-{
- const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- return(ib_btr_cursor_is_positioned(&prebuilt->pcur));
-}
-
-
-/*****************************************************************//**
-Checks if the data dictionary is latched in exclusive mode.
-@return TRUE if exclusive latch */
-UNIV_INTERN
-ib_bool_t
-ib_schema_lock_is_exclusive(
-/*========================*/
- const ib_trx_t ib_trx) /*!< in: transaction */
-{
- const trx_t* trx = (const trx_t*) ib_trx;
-
- return(trx->dict_operation_lock_mode == RW_X_LATCH);
-}
-
-/*****************************************************************//**
-Checks if the data dictionary is latched in shared mode.
-@return TRUE if shared latch */
-UNIV_INTERN
-ib_bool_t
-ib_schema_lock_is_shared(
-/*=====================*/
- const ib_trx_t ib_trx) /*!< in: transaction */
-{
- const trx_t* trx = (const trx_t*) ib_trx;
-
- return(trx->dict_operation_lock_mode == RW_S_LATCH);
-}
-
-/*****************************************************************//**
-Set the Lock an InnoDB cursor/table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_lock(
-/*===========*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- trx_t* trx = prebuilt->trx;
- dict_table_t* table = prebuilt->table;
-
- return(ib_trx_lock_table_with_retry(
- trx, table, (enum lock_mode) ib_lck_mode));
-}
-
-/*****************************************************************//**
-Set the Lock an InnoDB table using the table id.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_table_lock(
-/*==========*/
- ib_trx_t ib_trx, /*!< in/out: transaction */
- ib_id_u64_t table_id, /*!< in: table id */
- ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
-{
- ib_err_t err;
- que_thr_t* thr;
- mem_heap_t* heap;
- dict_table_t* table;
- ib_qry_proc_t q_proc;
- trx_t* trx = (trx_t*) ib_trx;
-
- ut_a(trx->state != TRX_STATE_NOT_STARTED);
-
- table = ib_open_table_by_id(table_id, FALSE);
-
- if (table == NULL) {
- return(DB_TABLE_NOT_FOUND);
- }
-
- ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
-
- heap = mem_heap_create(128);
-
- q_proc.node.sel = sel_node_create(heap);
-
- thr = pars_complete_graph_for_exec(q_proc.node.sel, trx, heap);
-
- q_proc.grph.sel = static_cast<que_fork_t*>(que_node_get_parent(thr));
- q_proc.grph.sel->state = QUE_FORK_ACTIVE;
-
- trx->op_info = "setting table lock";
-
- ut_a(ib_lck_mode == IB_LOCK_IS || ib_lck_mode == IB_LOCK_IX);
- err = static_cast<ib_err_t>(
- lock_table(0, table, (enum lock_mode) ib_lck_mode, thr));
-
- trx->error_state = err;
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/*****************************************************************//**
-Unlock an InnoDB table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_unlock(
-/*=============*/
- ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
-{
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- if (prebuilt->trx->mysql_n_tables_locked > 0) {
- --prebuilt->trx->mysql_n_tables_locked;
- } else {
- err = DB_ERROR;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Set the Lock mode of the cursor.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_set_lock_mode(
-/*====================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_lck_mode_t ib_lck_mode) /*!< in: InnoDB lock mode */
-{
- ib_err_t err = DB_SUCCESS;
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- ut_a(ib_lck_mode <= static_cast<ib_lck_mode_t>(LOCK_NUM));
-
- if (ib_lck_mode == IB_LOCK_X) {
- err = ib_cursor_lock(ib_crsr, IB_LOCK_IX);
- } else if (ib_lck_mode == IB_LOCK_S) {
- err = ib_cursor_lock(ib_crsr, IB_LOCK_IS);
- }
-
- if (err == DB_SUCCESS) {
- prebuilt->select_lock_type = (enum lock_mode) ib_lck_mode;
- ut_a(prebuilt->trx->state != TRX_STATE_NOT_STARTED);
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Set need to access clustered index record. */
-UNIV_INTERN
-void
-ib_cursor_set_cluster_access(
-/*=========================*/
- ib_crsr_t ib_crsr) /*!< in/out: InnoDB cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- prebuilt->need_to_access_clustered = TRUE;
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i8(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i8_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i16(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i16_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i32(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i32_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_i64(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i64_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u8(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u8_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u16(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tupe to write to */
- int col_no, /*!< in: column number */
- ib_u16_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u32(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u32_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_u64(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u64_t val) /*!< in: value to write */
-{
- return(ib_col_set_value(ib_tpl, col_no, &val, sizeof(val), true));
-}
-
-/*****************************************************************//**
-Inform the cursor that it's the start of an SQL statement. */
-UNIV_INTERN
-void
-ib_cursor_stmt_begin(
-/*=================*/
- ib_crsr_t ib_crsr) /*!< in: cursor */
-{
- ib_cursor_t* cursor = (ib_cursor_t*) ib_crsr;
-
- cursor->prebuilt->sql_stat_start = TRUE;
-}
-
-/*****************************************************************//**
-Write a double value to a column.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_double(
-/*==================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- double val) /*!< in: value to write */
-{
- const dfield_t* dfield;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
- return(ib_col_set_value(ib_tpl, col_no,
- &val, sizeof(val), true));
- } else {
- return(DB_DATA_MISMATCH);
- }
-}
-
-/*************************************************************//**
-Read a double column value from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_double(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t col_no, /*!< in: column number */
- double* dval) /*!< out: double value */
-{
- ib_err_t err;
- const dfield_t* dfield;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_DOUBLE) {
- ib_col_copy_value_low(ib_tpl, col_no, dval, sizeof(*dval));
- err = DB_SUCCESS;
- } else {
- err = DB_DATA_MISMATCH;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Write a float value to a column.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_write_float(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- float val) /*!< in: value to write */
-{
- const dfield_t* dfield;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
- return(ib_col_set_value(ib_tpl, col_no,
- &val, sizeof(val), true));
- } else {
- return(DB_DATA_MISMATCH);
- }
-}
-
-/*************************************************************//**
-Read a float value from an InnoDB tuple.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-ib_err_t
-ib_tuple_read_float(
-/*================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t col_no, /*!< in: column number */
- float* fval) /*!< out: float value */
-{
- ib_err_t err;
- const dfield_t* dfield;
- ib_tuple_t* tuple = (ib_tuple_t*) ib_tpl;
-
- dfield = ib_col_get_dfield(tuple, col_no);
-
- if (dtype_get_mtype(dfield_get_type(dfield)) == DATA_FLOAT) {
- ib_col_copy_value_low(ib_tpl, col_no, fval, sizeof(*fval));
- err = DB_SUCCESS;
- } else {
- err = DB_DATA_MISMATCH;
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Truncate a table. The cursor handle will be closed and set to NULL
-on success.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_cursor_truncate(
-/*===============*/
- ib_crsr_t* ib_crsr, /*!< in/out: cursor for table
- to truncate */
- ib_id_u64_t* table_id) /*!< out: new table id */
-{
- ib_err_t err;
- ib_cursor_t* cursor = *(ib_cursor_t**) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
-
- *table_id = 0;
-
- err = ib_cursor_lock(*ib_crsr, IB_LOCK_X);
-
- if (err == DB_SUCCESS) {
- trx_t* trx;
- dict_table_t* table = prebuilt->table;
-
- /* We are going to free the cursor and the prebuilt. Store
- the transaction handle locally. */
- trx = prebuilt->trx;
- err = ib_cursor_close(*ib_crsr);
- ut_a(err == DB_SUCCESS);
-
- *ib_crsr = NULL;
-
- /* A temp go around for assertion in trx_start_for_ddl_low
- we already start the trx */
- if (trx->state == TRX_STATE_ACTIVE) {
-#ifdef UNIV_DEBUG
- trx->start_file = 0;
-#endif /* UNIV_DEBUG */
- trx->dict_operation = TRX_DICT_OP_TABLE;
- }
-
- /* This function currently commits the transaction
- on success. */
- err = static_cast<ib_err_t>(
- row_truncate_table_for_mysql(table, trx));
-
- if (err == DB_SUCCESS) {
- *table_id = (table->id);
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Truncate a table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ib_err_t
-ib_table_truncate(
-/*==============*/
- const char* table_name, /*!< in: table name */
- ib_id_u64_t* table_id) /*!< out: new table id */
-{
- ib_err_t err;
- dict_table_t* table;
- ib_err_t trunc_err;
- ib_trx_t ib_trx = NULL;
- ib_crsr_t ib_crsr = NULL;
- ib_ulint_t memcached_sync = 0;
-
- ib_trx = ib_trx_begin(IB_TRX_SERIALIZABLE, true, false);
-
- dict_mutex_enter_for_mysql();
-
- table = dict_table_open_on_name(table_name, TRUE, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- if (table != NULL && dict_table_get_first_index(table)) {
- err = ib_create_cursor_with_index_id(&ib_crsr, table, 0,
- (trx_t*) ib_trx);
- } else {
- err = DB_TABLE_NOT_FOUND;
- }
-
- /* Remember the memcached_sync_count and set it to 0, so the
- truncate can be executed. */
- if (table != NULL && err == DB_SUCCESS) {
- memcached_sync = static_cast<ib_ulint_t>(
- table->memcached_sync_count);
- table->memcached_sync_count = 0;
- }
-
- dict_mutex_exit_for_mysql();
-
- if (err == DB_SUCCESS) {
- trunc_err = ib_cursor_truncate(&ib_crsr, table_id);
- ut_a(err == DB_SUCCESS);
- } else {
- trunc_err = err;
- }
-
- if (ib_crsr != NULL) {
- err = ib_cursor_close(ib_crsr);
- ut_a(err == DB_SUCCESS);
- }
-
- if (trunc_err == DB_SUCCESS) {
- ut_a(ib_trx_state(ib_trx) == static_cast<ib_trx_state_t>(
- TRX_STATE_NOT_STARTED));
- } else {
- err = ib_trx_rollback(ib_trx);
- ut_a(err == DB_SUCCESS);
- }
-
- err = ib_trx_release(ib_trx);
- ut_a(err == DB_SUCCESS);
-
- /* Set the memcached_sync_count back. */
- if (table != NULL && memcached_sync != 0) {
- dict_mutex_enter_for_mysql();
-
- table->memcached_sync_count = memcached_sync;
-
- dict_mutex_exit_for_mysql();
- }
-
- return(trunc_err);
-}
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-ib_err_t
-ib_close_thd(
-/*=========*/
- void* thd) /*!< in: handle to the MySQL thread of the user
- whose resources should be free'd */
-{
- innobase_close_thd(static_cast<THD*>(thd));
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Return isolation configuration set by "innodb_api_trx_level"
-@return trx isolation level*/
-UNIV_INTERN
-ib_trx_state_t
-ib_cfg_trx_level()
-/*==============*/
-{
- return(static_cast<ib_trx_state_t>(ib_trx_level_setting));
-}
-
-/*****************************************************************//**
-Return configure value for background commit interval (in seconds)
-@return background commit interval (in seconds) */
-UNIV_INTERN
-ib_ulint_t
-ib_cfg_bk_commit_interval()
-/*=======================*/
-{
- return(static_cast<ib_ulint_t>(ib_bk_commit_interval));
-}
-
-/*****************************************************************//**
-Get generic configure status
-@return configure status*/
-UNIV_INTERN
-int
-ib_cfg_get_cfg()
-/*============*/
-{
- int cfg_status;
-
- cfg_status = (ib_binlog_enabled) ? IB_CFG_BINLOG_ENABLED : 0;
-
- if (ib_mdl_enabled) {
- cfg_status |= IB_CFG_MDL_ENABLED;
- }
-
- if (ib_disable_row_lock) {
- cfg_status |= IB_CFG_DISABLE_ROWLOCK;
- }
-
- return(cfg_status);
-}
-
-/*****************************************************************//**
-Increase/decrease the memcached sync count of table to sync memcached
-DML with SQL DDLs.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-ib_err_t
-ib_cursor_set_memcached_sync(
-/*=========================*/
- ib_crsr_t ib_crsr, /*!< in: cursor */
- ib_bool_t flag) /*!< in: true for increase */
-{
- const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr;
- row_prebuilt_t* prebuilt = cursor->prebuilt;
- dict_table_t* table = prebuilt->table;
- ib_err_t err = DB_SUCCESS;
-
- if (table != NULL) {
- /* If memcached_sync_count is -1, means table is
- doing DDL, we just return error. */
- if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
- return(DB_ERROR);
- }
-
- if (flag) {
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_increment_lint(&table->memcached_sync_count, 1);
-#else
- dict_mutex_enter_for_mysql();
- ++table->memcached_sync_count;
- dict_mutex_exit_for_mysql();
-#endif
- } else {
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_decrement_lint(&table->memcached_sync_count, 1);
-#else
- dict_mutex_enter_for_mysql();
- --table->memcached_sync_count;
- dict_mutex_exit_for_mysql();
-#endif
- ut_a(table->memcached_sync_count >= 0);
- }
- } else {
- err = DB_TABLE_NOT_FOUND;
- }
-
- return(err);
-}
diff --git a/storage/xtradb/api/api0misc.cc b/storage/xtradb/api/api0misc.cc
deleted file mode 100644
index 5daee5de4c9..00000000000
--- a/storage/xtradb/api/api0misc.cc
+++ /dev/null
@@ -1,203 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file api/api0misc.cc
-InnoDB Native API
-
-2008-08-01 Created by Sunny Bains
-3/20/2011 Jimmy Yang extracted from Embedded InnoDB
-*******************************************************/
-
-#include <my_config.h>
-#include <errno.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif /* HAVE_UNISTD_H */
-
-#include "api0misc.h"
-#include "trx0roll.h"
-#include "srv0srv.h"
-#include "dict0mem.h"
-#include "dict0dict.h"
-#include "pars0pars.h"
-#include "row0sel.h"
-#include "lock0lock.h"
-#include "ha_prototypes.h"
-#include <m_ctype.h>
-#include <mysys_err.h>
-#include <mysql/plugin.h>
-
-/*********************************************************************//**
-Sets a lock on a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-ib_trx_lock_table_with_retry(
-/*=========================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
-{
- que_thr_t* thr;
- dberr_t err;
- mem_heap_t* heap;
- sel_node_t* node;
-
- heap = mem_heap_create(512);
-
- trx->op_info = "setting table lock";
-
- node = sel_node_create(heap);
- thr = pars_complete_graph_for_exec(node, trx, heap);
- thr->graph->state = QUE_FORK_ACTIVE;
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(static_cast<que_fork_t*>(
- que_node_get_parent(thr)));
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- err = lock_table(0, table, mode, thr);
-
- trx->error_state = err;
-
- if (UNIV_LIKELY(err == DB_SUCCESS)) {
- que_thr_stop_for_mysql_no_error(thr, trx);
- } else {
- que_thr_stop_for_mysql(thr);
-
- if (err != DB_QUE_THR_SUSPENDED) {
- ibool was_lock_wait;
-
- was_lock_wait = ib_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
- } else {
- que_thr_t* run_thr;
- que_node_t* parent;
-
- parent = que_node_get_parent(thr);
- run_thr = que_fork_start_command(
- static_cast<que_fork_t*>(parent));
-
- ut_a(run_thr == thr);
-
- /* There was a lock wait but the thread was not
- in a ready to run or running state. */
- trx->error_state = DB_LOCK_WAIT;
-
- goto run_again;
- }
- }
-
- que_graph_free(thr->graph);
- trx->op_info = "";
-
- return(err);
-}
-/****************************************************************//**
-Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running
-the query thread */
-UNIV_INTERN
-ibool
-ib_handle_errors(
-/*=============*/
- dberr_t* new_err,/*!< out: possible new error encountered in
- lock wait, or if no new error, the value
- of trx->error_state at the entry of this
- function */
- trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept) /*!< in: savepoint or NULL */
-{
- dberr_t err;
-handle_new_error:
- err = trx->error_state;
-
- ut_a(err != DB_SUCCESS);
-
- trx->error_state = DB_SUCCESS;
-
- switch (err) {
- case DB_LOCK_WAIT_TIMEOUT:
- trx_rollback_for_mysql(trx);
- break;
- /* fall through */
- case DB_DUPLICATE_KEY:
- case DB_FOREIGN_DUPLICATE_KEY:
- case DB_TOO_BIG_RECORD:
- case DB_ROW_IS_REFERENCED:
- case DB_NO_REFERENCED_ROW:
- case DB_CANNOT_ADD_CONSTRAINT:
- case DB_TOO_MANY_CONCURRENT_TRXS:
- case DB_OUT_OF_FILE_SPACE:
- if (savept) {
- /* Roll back the latest, possibly incomplete
- insertion or update */
-
- trx_rollback_to_savepoint(trx, savept);
- }
- break;
- case DB_LOCK_WAIT:
- lock_wait_suspend_thread(thr);
-
- if (trx->error_state != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- goto handle_new_error;
- }
-
- *new_err = err;
-
- return(TRUE); /* Operation needs to be retried. */
-
- case DB_DEADLOCK:
- case DB_LOCK_TABLE_FULL:
- /* Roll back the whole transaction; this resolution was added
- to version 3.23.43 */
-
- trx_rollback_for_mysql(trx);
- break;
-
- case DB_CORRUPTION:
- case DB_FOREIGN_EXCEED_MAX_CASCADE:
- break;
- default:
- ut_error;
- }
-
- if (trx->error_state != DB_SUCCESS) {
- *new_err = trx->error_state;
- } else {
- *new_err = err;
- }
-
- trx->error_state = DB_SUCCESS;
-
- return(FALSE);
-}
diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc
deleted file mode 100644
index 85a083aaee0..00000000000
--- a/storage/xtradb/btr/btr0btr.cc
+++ /dev/null
@@ -1,5330 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file btr/btr0btr.cc
-The B-tree
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0btr.h"
-
-#ifdef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "page0page.h"
-#include "page0zip.h"
-
-#ifndef UNIV_HOTBACKUP
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "btr0pcur.h"
-#include "btr0defragment.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "ibuf0ibuf.h"
-#include "trx0trx.h"
-#include "srv0mon.h"
-
-/**************************************************************//**
-Checks if the page in the cursor can be merged with given page.
-If necessary, re-organize the merge_page.
-@return TRUE if possible to merge. */
-UNIV_INTERN
-ibool
-btr_can_merge_with_page(
-/*====================*/
- btr_cur_t* cursor, /*!< in: cursor on the page to merge */
- ulint page_no, /*!< in: a sibling page */
- buf_block_t** merge_block, /*!< out: the merge block */
- mtr_t* mtr); /*!< in: mini-transaction */
-
-#endif /* UNIV_HOTBACKUP */
-
-/**************************************************************//**
-Report that an index page is corrupted. */
-UNIV_INTERN
-void
-btr_corruption_report(
-/*==================*/
- const buf_block_t* block, /*!< in: corrupted block */
- const dict_index_t* index) /*!< in: index tree */
-{
- fprintf(stderr, "InnoDB: flag mismatch in space %u page %u"
- " index %s of table %s\n",
- (unsigned) buf_block_get_space(block),
- (unsigned) buf_block_get_page_no(block),
- index->name, index->table_name);
- if (block->page.zip.data) {
- buf_page_print(block->page.zip.data,
- buf_block_get_zip_size(block),
- BUF_PAGE_PRINT_NO_CRASH);
- }
- buf_page_print(buf_nonnull_block_get_frame(block), 0, 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_BLOB_DEBUG
-# include "srv0srv.h"
-# include "ut0rbt.h"
-
-/** TRUE when messages about index->blobs modification are enabled. */
-static ibool btr_blob_dbg_msg;
-
-/** Issue a message about an operation on index->blobs.
-@param op operation
-@param b the entry being subjected to the operation
-@param ctx the context of the operation */
-#define btr_blob_dbg_msg_issue(op, b, ctx) \
- fprintf(stderr, op " %u:%u:%u->%u %s(%u,%u,%u)\n", \
- (b)->ref_page_no, (b)->ref_heap_no, \
- (b)->ref_field_no, (b)->blob_page_no, ctx, \
- (b)->owner, (b)->always_owner, (b)->del)
-
-/** Insert to index->blobs a reference to an off-page column.
-@param index the index tree
-@param b the reference
-@param ctx context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_insert(
-/*====================*/
- dict_index_t* index, /*!< in/out: index tree */
- const btr_blob_dbg_t* b, /*!< in: the reference */
- const char* ctx) /*!< in: context (for logging) */
-{
- if (btr_blob_dbg_msg) {
- btr_blob_dbg_msg_issue("insert", b, ctx);
- }
- mutex_enter(&index->blobs_mutex);
- rbt_insert(index->blobs, b, b);
- mutex_exit(&index->blobs_mutex);
-}
-
-/** Remove from index->blobs a reference to an off-page column.
-@param index the index tree
-@param b the reference
-@param ctx context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_delete(
-/*====================*/
- dict_index_t* index, /*!< in/out: index tree */
- const btr_blob_dbg_t* b, /*!< in: the reference */
- const char* ctx) /*!< in: context (for logging) */
-{
- if (btr_blob_dbg_msg) {
- btr_blob_dbg_msg_issue("delete", b, ctx);
- }
- mutex_enter(&index->blobs_mutex);
- ut_a(rbt_delete(index->blobs, b));
- mutex_exit(&index->blobs_mutex);
-}
-
-/**************************************************************//**
-Comparator for items (btr_blob_dbg_t) in index->blobs.
-The key in index->blobs is (ref_page_no, ref_heap_no, ref_field_no).
-@return negative, 0 or positive if *a<*b, *a=*b, *a>*b */
-static
-int
-btr_blob_dbg_cmp(
-/*=============*/
- const void* a, /*!< in: first btr_blob_dbg_t to compare */
- const void* b) /*!< in: second btr_blob_dbg_t to compare */
-{
- const btr_blob_dbg_t* aa = static_cast<const btr_blob_dbg_t*>(a);
- const btr_blob_dbg_t* bb = static_cast<const btr_blob_dbg_t*>(b);
-
- ut_ad(aa != NULL);
- ut_ad(bb != NULL);
-
- if (aa->ref_page_no != bb->ref_page_no) {
- return(aa->ref_page_no < bb->ref_page_no ? -1 : 1);
- }
- if (aa->ref_heap_no != bb->ref_heap_no) {
- return(aa->ref_heap_no < bb->ref_heap_no ? -1 : 1);
- }
- if (aa->ref_field_no != bb->ref_field_no) {
- return(aa->ref_field_no < bb->ref_field_no ? -1 : 1);
- }
- return(0);
-}
-
-/**************************************************************//**
-Add a reference to an off-page column to the index->blobs map. */
-UNIV_INTERN
-void
-btr_blob_dbg_add_blob(
-/*==================*/
- const rec_t* rec, /*!< in: clustered index record */
- ulint field_no, /*!< in: off-page column number */
- ulint page_no, /*!< in: start page of the column */
- dict_index_t* index, /*!< in/out: index tree */
- const char* ctx) /*!< in: context (for logging) */
-{
- btr_blob_dbg_t b;
- const page_t* page = page_align(rec);
-
- ut_a(index->blobs);
-
- b.blob_page_no = page_no;
- b.ref_page_no = page_get_page_no(page);
- b.ref_heap_no = page_rec_get_heap_no(rec);
- b.ref_field_no = field_no;
- ut_a(b.ref_field_no >= index->n_uniq);
- b.always_owner = b.owner = TRUE;
- b.del = FALSE;
- ut_a(!rec_get_deleted_flag(rec, page_is_comp(page)));
- btr_blob_dbg_rbt_insert(index, &b, ctx);
-}
-
-/**************************************************************//**
-Add to index->blobs any references to off-page columns from a record.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add_rec(
-/*=================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: offsets */
- const char* ctx) /*!< in: context (for logging) */
-{
- ulint count = 0;
- ulint i;
- btr_blob_dbg_t b;
- ibool del;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!rec_offs_any_extern(offsets)) {
- return(0);
- }
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
- del = (rec_get_deleted_flag(rec, rec_offs_comp(offsets)) != 0);
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- const byte* field_ref = rec_get_nth_field(
- rec, offsets, i, &len);
-
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- if (!memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE)) {
- /* the column has not been stored yet */
- continue;
- }
-
- b.ref_field_no = i;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
- ut_a(b.ref_field_no >= index->n_uniq);
- b.always_owner = b.owner
- = !(field_ref[BTR_EXTERN_LEN]
- & BTR_EXTERN_OWNER_FLAG);
- b.del = del;
-
- btr_blob_dbg_rbt_insert(index, &b, ctx);
- count++;
- }
- }
-
- return(count);
-}
-
-/**************************************************************//**
-Display the references to off-page columns.
-This function is to be called from a debugger,
-for example when a breakpoint on ut_dbg_assertion_failed is hit. */
-UNIV_INTERN
-void
-btr_blob_dbg_print(
-/*===============*/
- const dict_index_t* index) /*!< in: index tree */
-{
- const ib_rbt_node_t* node;
-
- if (!index->blobs) {
- return;
- }
-
- /* We intentionally do not acquire index->blobs_mutex here.
- This function is to be called from a debugger, and the caller
- should make sure that the index->blobs_mutex is held. */
-
- for (node = rbt_first(index->blobs);
- node != NULL; node = rbt_next(index->blobs, node)) {
- const btr_blob_dbg_t* b
- = rbt_value(btr_blob_dbg_t, node);
- fprintf(stderr, "%u:%u:%u->%u%s%s%s\n",
- b->ref_page_no, b->ref_heap_no, b->ref_field_no,
- b->blob_page_no,
- b->owner ? "" : "(disowned)",
- b->always_owner ? "" : "(has disowned)",
- b->del ? "(deleted)" : "");
- }
-}
-
-/**************************************************************//**
-Remove from index->blobs any references to off-page columns from a record.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove_rec(
-/*====================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: offsets */
- const char* ctx) /*!< in: context (for logging) */
-{
- ulint i;
- ulint count = 0;
- btr_blob_dbg_t b;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!rec_offs_any_extern(offsets)) {
- return(0);
- }
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- const byte* field_ref = rec_get_nth_field(
- rec, offsets, i, &len);
-
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b.ref_field_no = i;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
-
- switch (b.blob_page_no) {
- case 0:
- /* The column has not been stored yet.
- The BLOB pointer must be all zero.
- There cannot be a BLOB starting at
- page 0, because page 0 is reserved for
- the tablespace header. */
- ut_a(!memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE));
- /* fall through */
- case FIL_NULL:
- /* the column has been freed already */
- continue;
- }
-
- btr_blob_dbg_rbt_delete(index, &b, ctx);
- count++;
- }
- }
-
- return(count);
-}
-
-/**************************************************************//**
-Check that there are no references to off-page columns from or to
-the given page. Invoked when freeing or clearing a page.
-@return TRUE when no orphan references exist */
-UNIV_INTERN
-ibool
-btr_blob_dbg_is_empty(
-/*==================*/
- dict_index_t* index, /*!< in: index */
- ulint page_no) /*!< in: page number */
-{
- const ib_rbt_node_t* node;
- ibool success = TRUE;
-
- if (!index->blobs) {
- return(success);
- }
-
- mutex_enter(&index->blobs_mutex);
-
- for (node = rbt_first(index->blobs);
- node != NULL; node = rbt_next(index->blobs, node)) {
- const btr_blob_dbg_t* b
- = rbt_value(btr_blob_dbg_t, node);
-
- if (b->ref_page_no != page_no && b->blob_page_no != page_no) {
- continue;
- }
-
- fprintf(stderr,
- "InnoDB: orphan BLOB ref%s%s%s %u:%u:%u->%u\n",
- b->owner ? "" : "(disowned)",
- b->always_owner ? "" : "(has disowned)",
- b->del ? "(deleted)" : "",
- b->ref_page_no, b->ref_heap_no, b->ref_field_no,
- b->blob_page_no);
-
- if (b->blob_page_no != page_no || b->owner || !b->del) {
- success = FALSE;
- }
- }
-
- mutex_exit(&index->blobs_mutex);
- return(success);
-}
-
-/**************************************************************//**
-Count and process all references to off-page columns on a page.
-@return number of references processed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_op(
-/*============*/
- const page_t* page, /*!< in: B-tree leaf page */
- const rec_t* rec, /*!< in: record to start from
- (NULL to process the whole page) */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx, /*!< in: context (for logging) */
- const btr_blob_dbg_op_f op) /*!< in: operation on records */
-{
- ulint count = 0;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_a(!rec || page_align(rec) == page);
-
- if (!index->blobs || !page_is_leaf(page)
- || !dict_index_is_clust(index)) {
- return(0);
- }
-
- if (rec == NULL) {
- rec = page_get_infimum_rec(page);
- }
-
- do {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- count += op(rec, index, offsets, ctx);
- rec = page_rec_get_next_const(rec);
- } while (!page_rec_is_supremum(rec));
-
- if (heap) {
- mem_heap_free(heap);
- }
-
- return(count);
-}
-
-/**************************************************************//**
-Count and add to index->blobs any references to off-page columns
-from records on a page.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add(
-/*=============*/
- const page_t* page, /*!< in: rewritten page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
-{
- btr_blob_dbg_assert_empty(index, page_get_page_no(page));
-
- return(btr_blob_dbg_op(page, NULL, index, ctx, btr_blob_dbg_add_rec));
-}
-
-/**************************************************************//**
-Count and remove from index->blobs any references to off-page columns
-from records on a page.
-Used when reorganizing a page, before copying the records.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove(
-/*================*/
- const page_t* page, /*!< in: b-tree page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
-{
- ulint count;
-
- count = btr_blob_dbg_op(page, NULL, index, ctx,
- btr_blob_dbg_remove_rec);
-
- /* Check that no references exist. */
- btr_blob_dbg_assert_empty(index, page_get_page_no(page));
-
- return(count);
-}
-
-/**************************************************************//**
-Restore in index->blobs any references to off-page columns
-Used when page reorganize fails due to compressed page overflow. */
-UNIV_INTERN
-void
-btr_blob_dbg_restore(
-/*=================*/
- const page_t* npage, /*!< in: page that failed to compress */
- const page_t* page, /*!< in: copy of original page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
-{
- ulint removed;
- ulint added;
-
- ut_a(page_get_page_no(npage) == page_get_page_no(page));
- ut_a(page_get_space_id(npage) == page_get_space_id(page));
-
- removed = btr_blob_dbg_remove(npage, index, ctx);
- added = btr_blob_dbg_add(page, index, ctx);
- ut_a(added == removed);
-}
-
-/**************************************************************//**
-Modify the 'deleted' flag of a record. */
-UNIV_INTERN
-void
-btr_blob_dbg_set_deleted_flag(
-/*==========================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
- ibool del) /*!< in: TRUE=deleted, FALSE=exists */
-{
- const ib_rbt_node_t* node;
- btr_blob_dbg_t b;
- btr_blob_dbg_t* c;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_a(dict_index_is_clust(index));
- ut_a(del == !!del);/* must be FALSE==0 or TRUE==1 */
-
- if (!rec_offs_any_extern(offsets) || !index->blobs) {
-
- return;
- }
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- const byte* field_ref = rec_get_nth_field(
- rec, offsets, i, &len);
-
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b.ref_field_no = i;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
-
- switch (b.blob_page_no) {
- case 0:
- ut_a(memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE));
- /* page number 0 is for the
- page allocation bitmap */
- case FIL_NULL:
- /* the column has been freed already */
- ut_error;
- }
-
- mutex_enter(&index->blobs_mutex);
- node = rbt_lookup(index->blobs, &b);
- ut_a(node);
-
- c = rbt_value(btr_blob_dbg_t, node);
- /* The flag should be modified. */
- c->del = del;
- if (btr_blob_dbg_msg) {
- b = *c;
- mutex_exit(&index->blobs_mutex);
- btr_blob_dbg_msg_issue("del_mk", &b, "");
- } else {
- mutex_exit(&index->blobs_mutex);
- }
- }
- }
-}
-
-/**************************************************************//**
-Change the ownership of an off-page column. */
-UNIV_INTERN
-void
-btr_blob_dbg_owner(
-/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
- ulint i, /*!< in: ith field in rec */
- ibool own) /*!< in: TRUE=owned, FALSE=disowned */
-{
- const ib_rbt_node_t* node;
- btr_blob_dbg_t b;
- const byte* field_ref;
- ulint len;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_a(rec_offs_nth_extern(offsets, i));
-
- field_ref = rec_get_nth_field(rec, offsets, i, &len);
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
- b.ref_field_no = i;
- b.owner = !(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG);
- b.blob_page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
-
- ut_a(b.owner == own);
-
- mutex_enter(&index->blobs_mutex);
- node = rbt_lookup(index->blobs, &b);
- /* row_ins_clust_index_entry_by_modify() invokes
- btr_cur_unmark_extern_fields() also for the newly inserted
- references, which are all zero bytes until the columns are stored.
- The node lookup must fail if and only if that is the case. */
- ut_a(!memcmp(field_ref, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE)
- == !node);
-
- if (node) {
- btr_blob_dbg_t* c = rbt_value(btr_blob_dbg_t, node);
- /* Some code sets ownership from TRUE to TRUE.
- We do not allow changing ownership from FALSE to FALSE. */
- ut_a(own || c->owner);
-
- c->owner = own;
- if (!own) {
- c->always_owner = FALSE;
- }
- }
-
- mutex_exit(&index->blobs_mutex);
-}
-#endif /* UNIV_BLOB_DEBUG */
-
-/*
-Latching strategy of the InnoDB B-tree
---------------------------------------
-A tree latch protects all non-leaf nodes of the tree. Each node of a tree
-also has a latch of its own.
-
-A B-tree operation normally first acquires an S-latch on the tree. It
-searches down the tree and releases the tree latch when it has the
-leaf node latch. To save CPU time we do not acquire any latch on
-non-leaf nodes of the tree during a search, those pages are only bufferfixed.
-
-If an operation needs to restructure the tree, it acquires an X-latch on
-the tree before searching to a leaf node. If it needs, for example, to
-split a leaf,
-(1) InnoDB decides the split point in the leaf,
-(2) allocates a new page,
-(3) inserts the appropriate node pointer to the first non-leaf level,
-(4) releases the tree X-latch,
-(5) and then moves records from the leaf to the new allocated page.
-
-Node pointers
--------------
-Leaf pages of a B-tree contain the index records stored in the
-tree. On levels n > 0 we store 'node pointers' to pages on level
-n - 1. For each page there is exactly one node pointer stored:
-thus the our tree is an ordinary B-tree, not a B-link tree.
-
-A node pointer contains a prefix P of an index record. The prefix
-is long enough so that it determines an index record uniquely.
-The file page number of the child page is added as the last
-field. To the child page we can store node pointers or index records
-which are >= P in the alphabetical order, but < P1 if there is
-a next node pointer on the level, and P1 is its prefix.
-
-If a node pointer with a prefix P points to a non-leaf child,
-then the leftmost record in the child must have the same
-prefix P. If it points to a leaf node, the child is not required
-to contain any record with a prefix equal to P. The leaf case
-is decided this way to allow arbitrary deletions in a leaf node
-without touching upper levels of the tree.
-
-We have predefined a special minimum record which we
-define as the smallest record in any alphabetical order.
-A minimum record is denoted by setting a bit in the record
-header. A minimum record acts as the prefix of a node pointer
-which points to a leftmost node on any level of the tree.
-
-File page allocation
---------------------
-In the root node of a B-tree there are two file segment headers.
-The leaf pages of a tree are allocated from one file segment, to
-make them consecutive on disk if possible. From the other file segment
-we allocate pages for the non-leaf levels of the tree.
-*/
-
-#ifdef UNIV_BTR_DEBUG
-/**************************************************************//**
-Checks a file segment header within a B-tree root page.
-@return TRUE if valid */
-static
-ibool
-btr_root_fseg_validate(
-/*===================*/
- const fseg_header_t* seg_header, /*!< in: segment header */
- ulint space) /*!< in: tablespace identifier */
-{
- ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
-
- if (UNIV_UNLIKELY(srv_pass_corrupt_table != 0)) {
- return (mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space)
- && (offset >= FIL_PAGE_DATA)
- && (offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
- }
-
- ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
- ut_a(offset >= FIL_PAGE_DATA);
- ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
- return(TRUE);
-}
-#endif /* UNIV_BTR_DEBUG */
-
-/**************************************************************//**
-Gets the root node of a tree and x- or s-latches it.
-@return root page, x- or s-latched */
-buf_block_t*
-btr_root_block_get(
-/*===============*/
- const dict_index_t* index, /*!< in: index tree */
- ulint mode, /*!< in: either RW_S_LATCH
- or RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- ulint root_page_no;
- buf_block_t* block;
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- root_page_no = dict_index_get_page(index);
-
- block = btr_block_get(space, zip_size, root_page_no, mode, (dict_index_t*)index, mtr);
-
- if (!block) {
- if (index && index->table) {
- index->table->file_unreadable = true;
-
- ib_push_warning(
- static_cast<THD*>(NULL), DB_DECRYPTION_FAILED,
- "Table %s in tablespace %lu is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- index->table->name, space);
- }
-
- return NULL;
- }
-
- SRV_CORRUPT_TABLE_CHECK(block, return(0););
-
- btr_assert_not_corrupted(block, index);
-
-#ifdef UNIV_BTR_DEBUG
- if (!dict_index_is_ibuf(index)) {
- const page_t* root = buf_block_get_frame(block);
-
- if (UNIV_UNLIKELY(srv_pass_corrupt_table != 0)) {
- if (!btr_root_fseg_validate(FIL_PAGE_DATA
- + PAGE_BTR_SEG_LEAF
- + root, space))
- return(NULL);
- if (!btr_root_fseg_validate(FIL_PAGE_DATA
- + PAGE_BTR_SEG_TOP
- + root, space))
- return(NULL);
- return(block);
- }
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
- }
-#endif /* UNIV_BTR_DEBUG */
-
- return(block);
-}
-
-/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
-UNIV_INTERN
-page_t*
-btr_root_get(
-/*=========*/
- const dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* root = btr_root_block_get(index, RW_X_LATCH,
- mtr);
-
- if (root && root->page.encrypted == true) {
- root = NULL;
- }
-
- return(root ? buf_block_get_frame(root) : NULL);
-}
-
-/**************************************************************//**
-Gets the height of the B-tree (the level of the root, when the leaf
-level is assumed to be 0). The caller must hold an S or X latch on
-the index.
-@return tree height (level of the root) */
-UNIV_INTERN
-ulint
-btr_height_get(
-/*===========*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint height=0;
- buf_block_t* root_block;
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK)
- || mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
-
- /* S latches the page */
- root_block = btr_root_block_get(index, RW_S_LATCH, mtr);
- ut_ad(root_block); // The index must not be corrupted
-
- if (root_block) {
-
- height = btr_page_get_level(buf_nonnull_block_get_frame(root_block),
- mtr);
- /* Release the S latch on the root page. */
- mtr_memo_release(mtr, root_block, MTR_MEMO_PAGE_S_FIX);
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_reset_level(&root_block->lock);
-#endif /* UNIV_SYNC_DEBUG */
- }
-
- return(height);
-}
-
-/**************************************************************//**
-Checks a file segment header within a B-tree root page and updates
-the segment header space id.
-@return TRUE if valid */
-static
-bool
-btr_root_fseg_adjust_on_import(
-/*===========================*/
- fseg_header_t* seg_header, /*!< in/out: segment header */
- page_zip_des_t* page_zip, /*!< in/out: compressed page,
- or NULL */
- ulint space, /*!< in: tablespace identifier */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
-
- if (offset < FIL_PAGE_DATA
- || offset > UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) {
-
- return(FALSE);
-
- } else if (page_zip) {
- mach_write_to_4(seg_header + FSEG_HDR_SPACE, space);
- page_zip_write_header(page_zip, seg_header + FSEG_HDR_SPACE,
- 4, mtr);
- } else {
- mlog_write_ulint(seg_header + FSEG_HDR_SPACE,
- space, MLOG_4BYTES, mtr);
- }
-
- return(TRUE);
-}
-
-/**************************************************************//**
-Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
-@return error code, or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-btr_root_adjust_on_import(
-/*======================*/
- const dict_index_t* index) /*!< in: index tree */
-{
- dberr_t err;
- mtr_t mtr;
- page_t* page;
- buf_block_t* block;
- page_zip_des_t* page_zip;
- dict_table_t* table = index->table;
- ulint space_id = dict_index_get_space(index);
- ulint zip_size = dict_table_zip_size(table);
- ulint root_page_no = dict_index_get_page(index);
-
- mtr_start(&mtr);
-
- mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
- DBUG_EXECUTE_IF("ib_import_trigger_corruption_3",
- return(DB_CORRUPTION););
-
- block = btr_block_get(
- space_id, zip_size, root_page_no, RW_X_LATCH, (dict_index_t*)index, &mtr);
-
- page = buf_block_get_frame(block);
- page_zip = buf_block_get_page_zip(block);
-
- /* Check that this is a B-tree page and both the PREV and NEXT
- pointers are FIL_NULL, because the root page does not have any
- siblings. */
- if (fil_page_get_type(page) != FIL_PAGE_INDEX
- || fil_page_get_prev(page) != FIL_NULL
- || fil_page_get_next(page) != FIL_NULL) {
-
- err = DB_CORRUPTION;
-
- } else if (dict_index_is_clust(index)) {
- bool page_is_compact_format;
-
- page_is_compact_format = page_is_comp(page) > 0;
-
- /* Check if the page format and table format agree. */
- if (page_is_compact_format != dict_table_is_comp(table)) {
- err = DB_CORRUPTION;
- } else {
-
- /* Check that the table flags and the tablespace
- flags match. */
- ulint flags = fil_space_get_flags(table->space);
-
- if (flags
- && flags != dict_tf_to_fsp_flags(table->flags)) {
-
- err = DB_CORRUPTION;
- } else {
- err = DB_SUCCESS;
- }
- }
- } else {
- err = DB_SUCCESS;
- }
-
- /* Check and adjust the file segment headers, if all OK so far. */
- if (err == DB_SUCCESS
- && (!btr_root_fseg_adjust_on_import(
- FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + page, page_zip, space_id, &mtr)
- || !btr_root_fseg_adjust_on_import(
- FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + page, page_zip, space_id, &mtr))) {
-
- err = DB_CORRUPTION;
- }
-
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed that
-the caller has appropriate latches on the page and its neighbor.
-@return previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the previous page */
-{
- page_t* page;
- page_t* prev_page;
- ulint prev_page_no;
-
- if (!page_rec_is_infimum(rec)) {
-
- rec_t* prev_rec = page_rec_get_prev(rec);
-
- if (!page_rec_is_infimum(prev_rec)) {
-
- return(prev_rec);
- }
- }
-
- page = page_align(rec);
- prev_page_no = btr_page_get_prev(page, mtr);
-
- if (prev_page_no != FIL_NULL) {
-
- ulint space;
- ulint zip_size;
- buf_block_t* prev_block;
-
- space = page_get_space_id(page);
- zip_size = fil_space_get_zip_size(space);
-
- prev_block = buf_page_get_with_no_latch(space, zip_size,
- prev_page_no, mtr);
- prev_page = buf_block_get_frame(prev_block);
- /* The caller must already have a latch to the brother */
- ut_ad(mtr_memo_contains(mtr, prev_block,
- MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, prev_block,
- MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- return(page_rec_get_prev(page_get_supremum_rec(prev_page)));
- }
-
- return(NULL);
-}
-
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed that the
-caller has appropriate latches on the page and its neighbor.
-@return next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the next page */
-{
- page_t* page;
- page_t* next_page;
- ulint next_page_no;
-
- if (!page_rec_is_supremum(rec)) {
-
- rec_t* next_rec = page_rec_get_next(rec);
-
- if (!page_rec_is_supremum(next_rec)) {
-
- return(next_rec);
- }
- }
-
- page = page_align(rec);
- next_page_no = btr_page_get_next(page, mtr);
-
- if (next_page_no != FIL_NULL) {
- ulint space;
- ulint zip_size;
- buf_block_t* next_block;
-
- space = page_get_space_id(page);
- zip_size = fil_space_get_zip_size(space);
-
- next_block = buf_page_get_with_no_latch(space, zip_size,
- next_page_no, mtr);
- next_page = buf_block_get_frame(next_block);
- /* The caller must already have a latch to the brother */
- ut_ad(mtr_memo_contains(mtr, next_block, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, next_block,
- MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- return(page_rec_get_next(page_get_infimum_rec(next_page)));
- }
-
- return(NULL);
-}
-
-/**************************************************************//**
-Creates a new index page (not the root, and also not
-used in page reorganization). @see btr_page_empty(). */
-UNIV_INTERN
-void
-btr_page_create(
-/*============*/
- buf_block_t* block, /*!< in/out: page to be created */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page = buf_block_get_frame(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
-
- if (page_zip) {
- page_create_zip(block, index, level, 0, mtr);
- } else {
- page_create(block, mtr, dict_table_is_comp(index->table));
- /* Set the level of the new index page */
- btr_page_set_level(page, NULL, level, mtr);
- }
-
- block->check_index_page_at_flush = TRUE;
-
- btr_page_set_index_id(page, page_zip, index->id, mtr);
-}
-
-/**************************************************************//**
-Allocates a new file page to be used in an ibuf tree. Takes the page from
-the free list of the tree, which must contain pages!
-@return new allocated block, x-latched */
-static
-buf_block_t*
-btr_page_alloc_for_ibuf(
-/*====================*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
-{
- fil_addr_t node_addr;
- page_t* root;
- page_t* new_page;
- buf_block_t* new_block;
-
- root = btr_root_get(index, mtr);
-
- node_addr = flst_get_first(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, mtr);
- ut_a(node_addr.page != FIL_NULL);
-
- new_block = buf_page_get(dict_index_get_space(index),
- dict_table_zip_size(index->table),
- node_addr.page, RW_X_LATCH, mtr);
- new_page = buf_block_get_frame(new_block);
- buf_block_dbg_add_level(new_block, SYNC_IBUF_TREE_NODE_NEW);
-
- flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
- mtr);
- ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- mtr));
-
- return(new_block);
-}
-
-/**************************************************************//**
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents!
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-buf_block_t*
-btr_page_alloc_low(
-/*===============*/
- dict_index_t* index, /*!< in: index */
- ulint hint_page_no, /*!< in: hint of a good page */
- byte file_direction, /*!< in: direction where a possible
- page split is made */
- ulint level, /*!< in: level where the page is placed
- in the tree */
- mtr_t* mtr, /*!< in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr) /*!< in/out: mtr or another
- mini-transaction in which the
- page should be initialized.
- If init_mtr!=mtr, but the page
- is already X-latched in mtr, do
- not initialize the page. */
-{
- fseg_header_t* seg_header;
- page_t* root;
-
- root = btr_root_get(index, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
-
- /* Parameter TRUE below states that the caller has made the
- reservation for free extents, and thus we know that a page can
- be allocated: */
-
- buf_block_t* block = fseg_alloc_free_page_general(
- seg_header, hint_page_no, file_direction,
- TRUE, mtr, init_mtr);
-
-#ifdef UNIV_DEBUG_SCRUBBING
- if (block != NULL) {
- fprintf(stderr,
- "alloc %lu:%lu to index: %lu root: %lu\n",
- buf_block_get_page_no(block),
- buf_block_get_space(block),
- index->id,
- dict_index_get_page(index));
- } else {
- fprintf(stderr,
- "failed alloc index: %lu root: %lu\n",
- index->id,
- dict_index_get_page(index));
- }
-#endif /* UNIV_DEBUG_SCRUBBING */
-
- return block;
-}
-
-/**************************************************************//**
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents!
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
-buf_block_t*
-btr_page_alloc(
-/*===========*/
- dict_index_t* index, /*!< in: index */
- ulint hint_page_no, /*!< in: hint of a good page */
- byte file_direction, /*!< in: direction where a possible
- page split is made */
- ulint level, /*!< in: level where the page is placed
- in the tree */
- mtr_t* mtr, /*!< in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr) /*!< in/out: mini-transaction
- for x-latching and initializing
- the page */
-{
- buf_block_t* new_block;
-
- if (dict_index_is_ibuf(index)) {
-
- return(btr_page_alloc_for_ibuf(index, mtr));
- }
-
- new_block = btr_page_alloc_low(
- index, hint_page_no, file_direction, level, mtr, init_mtr);
-
- if (new_block) {
- buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
- }
-
- return(new_block);
-}
-
-/**************************************************************//**
-Gets the number of pages in a B-tree.
-@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
-UNIV_INTERN
-ulint
-btr_get_size(
-/*=========*/
- dict_index_t* index, /*!< in: index */
- ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
- mtr_t* mtr) /*!< in/out: mini-transaction where index
- is s-latched */
-{
- ulint used;
- if (flag == BTR_N_LEAF_PAGES) {
- btr_get_size_and_reserved(index, flag, &used, mtr);
- return used;
- } else if (flag == BTR_TOTAL_SIZE) {
- return btr_get_size_and_reserved(index, flag, &used, mtr);
- } else {
- ut_error;
- }
- return (ULINT_UNDEFINED);
-}
-
-/**************************************************************//**
-Gets the number of reserved and used pages in a B-tree.
-@return number of pages reserved, or ULINT_UNDEFINED if the index
-is unavailable */
-UNIV_INTERN
-ulint
-btr_get_size_and_reserved(
-/*======================*/
- dict_index_t* index, /*!< in: index */
- ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
- ulint* used, /*!< out: number of pages used (<= reserved) */
- mtr_t* mtr) /*!< in/out: mini-transaction where index
- is s-latched */
-{
- fseg_header_t* seg_header;
- page_t* root;
- ulint n=ULINT_UNDEFINED;
- ulint dummy;
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
-
- ut_a(flag == BTR_N_LEAF_PAGES || flag == BTR_TOTAL_SIZE);
-
- if (index->page == FIL_NULL || dict_index_is_online_ddl(index)
- || *index->name == TEMP_INDEX_PREFIX) {
- return(ULINT_UNDEFINED);
- }
-
- root = btr_root_get(index, mtr);
- *used = 0;
-
- if (root) {
-
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- n = fseg_n_reserved_pages(seg_header, used, mtr);
-
- if (flag == BTR_TOTAL_SIZE) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
- n += fseg_n_reserved_pages(seg_header, &dummy, mtr);
- *used += dummy;
-
- }
- }
-
- return(n);
-}
-
-/**************************************************************//**
-Frees a page used in an ibuf tree. Puts the page to the free list of the
-ibuf tree. */
-static
-void
-btr_page_free_for_ibuf(
-/*===================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* root;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- root = btr_root_get(index, mtr);
-
- flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- buf_block_get_frame(block)
- + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr);
-
- ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- mtr));
-}
-
-/**************************************************************//**
-Frees a file page used in an index tree. Can be used also to (BLOB)
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
-void
-btr_page_free_low(
-/*==============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- ulint level, /*!< in: page level */
- bool blob, /*!< in: blob page */
- mtr_t* mtr) /*!< in: mtr */
-{
- fseg_header_t* seg_header;
- page_t* root;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* The page gets invalid for optimistic searches: increment the frame
- modify clock */
-
- buf_block_modify_clock_inc(block);
- btr_blob_dbg_assert_empty(index, buf_block_get_page_no(block));
-
- if (blob) {
- ut_a(level == 0);
- }
-
- bool scrub = srv_immediate_scrub_data_uncompressed;
- /* scrub page */
- if (scrub && blob) {
- /* blob page: scrub entire page */
- // TODO(jonaso): scrub only what is actually needed
- page_t* page = buf_block_get_frame(block);
- memset(page + PAGE_HEADER, 0,
- UNIV_PAGE_SIZE - PAGE_HEADER);
-#ifdef UNIV_DEBUG_SCRUBBING
- fprintf(stderr,
- "btr_page_free_low: scrub blob page %lu/%lu\n",
- buf_block_get_space(block),
- buf_block_get_page_no(block));
-#endif /* UNIV_DEBUG_SCRUBBING */
- } else if (scrub) {
- /* scrub records on page */
-
- /* TODO(jonaso): in theory we could clear full page
- * but, since page still remains in buffer pool, and
- * gets flushed etc. Lots of routines validates consistency
- * of it. And in order to remain structurally consistent
- * we clear each record by it own
- *
- * NOTE: The TODO below mentions removing page from buffer pool
- * and removing redo entries, once that is done, clearing full
- * pages should be possible
- */
- uint cnt = 0;
- uint bytes = 0;
- page_t* page = buf_block_get_frame(block);
- mem_heap_t* heap = NULL;
- ulint* offsets = NULL;
- rec_t* rec = page_rec_get_next(page_get_infimum_rec(page));
- while (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, index,
- offsets, ULINT_UNDEFINED,
- &heap);
- uint size = rec_offs_data_size(offsets);
- memset(rec, 0, size);
- rec = page_rec_get_next(rec);
- cnt++;
- bytes += size;
- }
-#ifdef UNIV_DEBUG_SCRUBBING
- fprintf(stderr,
- "btr_page_free_low: scrub %lu/%lu - "
- "%u records %u bytes\n",
- buf_block_get_space(block),
- buf_block_get_page_no(block),
- cnt, bytes);
-#endif /* UNIV_DEBUG_SCRUBBING */
- if (heap) {
- mem_heap_free(heap);
- }
- }
-
-#ifdef UNIV_DEBUG_SCRUBBING
- if (scrub == false) {
- fprintf(stderr,
- "btr_page_free_low %lu/%lu blob: %u\n",
- buf_block_get_space(block),
- buf_block_get_page_no(block),
- blob);
- }
-#endif /* UNIV_DEBUG_SCRUBBING */
-
- if (dict_index_is_ibuf(index)) {
-
- btr_page_free_for_ibuf(index, block, mtr);
-
- return;
- }
-
- root = btr_root_get(index, mtr);
-
- if (level == 0) {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
- } else {
- seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
- }
-
- if (scrub) {
- /**
- * Reset page type so that scrub thread won't try to scrub it
- */
- mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_ALLOCATED, MLOG_2BYTES, mtr);
- }
-
- fseg_free_page(seg_header,
- buf_block_get_space(block),
- buf_block_get_page_no(block), mtr);
-
- /* The page was marked free in the allocation bitmap, but it
- should remain buffer-fixed until mtr_commit(mtr) or until it
- is explicitly freed from the mini-transaction. */
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* TODO: Discard any operations on the page from the redo log
- and remove the block from the flush list and the buffer pool.
- This would free up buffer pool earlier and reduce writes to
- both the tablespace and the redo log. */
-}
-
-/**************************************************************//**
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-UNIV_INTERN
-void
-btr_page_free(
-/*==========*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- const page_t* page = buf_block_get_frame(block);
- ulint level = btr_page_get_level(page, mtr);
-
- ut_ad(fil_page_get_type(block->frame) == FIL_PAGE_INDEX);
- btr_page_free_low(index, block, level, false, mtr);
-}
-
-/**************************************************************//**
-Sets the child node file address in a node pointer. */
-UNIV_INLINE
-void
-btr_node_ptr_set_child_page_no(
-/*===========================*/
- rec_t* rec, /*!< in: node pointer record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint page_no,/*!< in: child node address */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* field;
- ulint len;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!page_is_leaf(page_align(rec)));
- ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
-
- /* The child address is in the last field */
- field = rec_get_nth_field(rec, offsets,
- rec_offs_n_fields(offsets) - 1, &len);
-
- ut_ad(len == REC_NODE_PTR_SIZE);
-
- if (page_zip) {
- page_zip_write_node_ptr(page_zip, rec,
- rec_offs_data_size(offsets),
- page_no, mtr);
- } else {
- mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr);
- }
-}
-
-/************************************************************//**
-Returns the child page of a node pointer and x-latches it.
-@return child page, x-latched */
-buf_block_t*
-btr_node_ptr_get_child(
-/*===================*/
- const rec_t* node_ptr,/*!< in: node pointer */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint page_no;
- ulint space;
-
- ut_ad(rec_offs_validate(node_ptr, index, offsets));
- space = page_get_space_id(page_align(node_ptr));
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
-
- return(btr_block_get(space, dict_table_zip_size(index->table),
- page_no, RW_X_LATCH, index, mtr));
-}
-
-/************************************************************//**
-Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree.
-@return rec_get_offsets() of the node pointer record */
-static
-ulint*
-btr_page_get_father_node_ptr_func(
-/*==============================*/
- ulint* offsets,/*!< in: work area for the return value */
- mem_heap_t* heap, /*!< in: memory heap to use */
- btr_cur_t* cursor, /*!< in: cursor pointing to user record,
- out: cursor on node pointer record,
- its page x-latched */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- dtuple_t* tuple;
- rec_t* user_rec;
- rec_t* node_ptr;
- ulint level;
- ulint page_no;
- dict_index_t* index;
-
- page_no = buf_block_get_page_no(btr_cur_get_block(cursor));
- index = btr_cur_get_index(cursor);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
-
- ut_ad(dict_index_get_page(index) != page_no);
-
- level = btr_page_get_level(btr_cur_get_page(cursor), mtr);
-
- user_rec = btr_cur_get_rec(cursor);
- ut_a(page_rec_is_user_rec(user_rec));
- tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
-
- btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE, cursor, 0,
- file, line, mtr);
-
- node_ptr = btr_cur_get_rec(cursor);
- ut_ad(!page_rec_is_comp(node_ptr)
- || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != page_no) {
- rec_t* print_rec;
- fputs("InnoDB: Dump of the child page:\n", stderr);
- buf_page_print(page_align(user_rec), 0,
- BUF_PAGE_PRINT_NO_CRASH);
- fputs("InnoDB: Dump of the parent page:\n", stderr);
- buf_page_print(page_align(node_ptr), 0,
- BUF_PAGE_PRINT_NO_CRASH);
-
- fputs("InnoDB: Corruption of an index tree: table ", stderr);
- ut_print_name(stderr, NULL, TRUE, index->table_name);
- fputs(", index ", stderr);
- ut_print_name(stderr, NULL, FALSE, index->name);
- fprintf(stderr, ",\n"
- "InnoDB: father ptr page no %lu, child page no %lu\n",
- (ulong)
- btr_node_ptr_get_child_page_no(node_ptr, offsets),
- (ulong) page_no);
- print_rec = page_rec_get_next(
- page_get_infimum_rec(page_align(user_rec)));
- offsets = rec_get_offsets(print_rec, index,
- offsets, ULINT_UNDEFINED, &heap);
- page_rec_print(print_rec, offsets);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(node_ptr, offsets);
-
- fputs("InnoDB: You should dump + drop + reimport the table"
- " to fix the\n"
- "InnoDB: corruption. If the crash happens at "
- "the database startup, see\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html about\n"
- "InnoDB: forcing recovery. "
- "Then dump + drop + reimport.\n", stderr);
-
- ut_error;
- }
-
- return(offsets);
-}
-
-#define btr_page_get_father_node_ptr(of,heap,cur,mtr) \
- btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
-
-/************************************************************//**
-Returns the upper level node pointer to a page. It is assumed that mtr holds
-an x-latch on the tree.
-@return rec_get_offsets() of the node pointer record */
-static
-ulint*
-btr_page_get_father_block(
-/*======================*/
- ulint* offsets,/*!< in: work area for the return value */
- mem_heap_t* heap, /*!< in: memory heap to use */
- dict_index_t* index, /*!< in: b-tree index */
- buf_block_t* block, /*!< in: child page in the index */
- mtr_t* mtr, /*!< in: mtr */
- btr_cur_t* cursor) /*!< out: cursor on node pointer record,
- its page x-latched */
-{
- rec_t* rec
- = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
- block)));
- btr_cur_position(index, rec, block, cursor);
- return(btr_page_get_father_node_ptr(offsets, heap, cursor, mtr));
-}
-
-/************************************************************//**
-Seeks to the upper level node pointer to a page.
-It is assumed that mtr holds an x-latch on the tree. */
-static
-void
-btr_page_get_father(
-/*================*/
- dict_index_t* index, /*!< in: b-tree index */
- buf_block_t* block, /*!< in: child page in the index */
- mtr_t* mtr, /*!< in: mtr */
- btr_cur_t* cursor) /*!< out: cursor on node pointer record,
- its page x-latched */
-{
- mem_heap_t* heap;
- rec_t* rec
- = page_rec_get_next(page_get_infimum_rec(buf_block_get_frame(
- block)));
- btr_cur_position(index, rec, block, cursor);
-
- heap = mem_heap_create(100);
- btr_page_get_father_node_ptr(NULL, heap, cursor, mtr);
- mem_heap_free(heap);
-}
-
-/************************************************************//**
-Creates the root node for a new index tree.
-@return page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
-ulint
-btr_create(
-/*=======*/
- ulint type, /*!< in: type of the index */
- ulint space, /*!< in: space where created */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- index_id_t index_id,/*!< in: index id */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
-
- /* Create the two new segments (one, in the case of an ibuf tree) for
- the index tree; the segment headers are put on the allocated root page
- (for an ibuf tree, not in the root, but on a separate ibuf header
- page) */
-
- if (type & DICT_IBUF) {
- /* Allocate first the ibuf header page */
- buf_block_t* ibuf_hdr_block = fseg_create(
- space, 0,
- IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
-
- if (ibuf_hdr_block == NULL) {
- return(FIL_NULL);
- }
-
- buf_block_dbg_add_level(
- ibuf_hdr_block, SYNC_IBUF_TREE_NODE_NEW);
-
- ut_ad(buf_block_get_page_no(ibuf_hdr_block)
- == IBUF_HEADER_PAGE_NO);
- /* Allocate then the next page to the segment: it will be the
- tree root page */
-
- block = fseg_alloc_free_page(
- buf_block_get_frame(ibuf_hdr_block)
- + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- IBUF_TREE_ROOT_PAGE_NO,
- FSP_UP, mtr);
-
- if (block == NULL) {
- return(FIL_NULL);
- }
-
- ut_ad(buf_block_get_page_no(block) == IBUF_TREE_ROOT_PAGE_NO);
-
- buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
-
- flst_init(block->frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- mtr);
- } else {
-#ifdef UNIV_BLOB_DEBUG
- if ((type & DICT_CLUSTERED) && !index->blobs) {
- mutex_create(PFS_NOT_INSTRUMENTED,
- &index->blobs_mutex, SYNC_ANY_LATCH);
- index->blobs = rbt_create(sizeof(btr_blob_dbg_t),
- btr_blob_dbg_cmp);
- }
-#endif /* UNIV_BLOB_DEBUG */
- block = fseg_create(space, 0,
- PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr);
-
- if (block == NULL) {
- return(FIL_NULL);
- }
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
-
- if (!fseg_create(space, buf_block_get_page_no(block),
- PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) {
- /* Not enough space for new segment, free root
- segment before return. */
- btr_free_root(space, zip_size,
- buf_block_get_page_no(block), mtr);
- return(FIL_NULL);
- }
-
- /* The fseg create acquires a second latch on the page,
- therefore we must declare it: */
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
- }
-
- /* Create a new index page on the allocated segment page */
- page_zip = buf_block_get_page_zip(block);
-
- if (page_zip) {
- page = page_create_zip(block, index, 0, 0, mtr);
- } else {
- page = page_create(block, mtr,
- dict_table_is_comp(index->table));
- /* Set the level of the new index page */
- btr_page_set_level(page, NULL, 0, mtr);
- }
-
- block->check_index_page_at_flush = TRUE;
-
- /* Set the index id of the page */
- btr_page_set_index_id(page, page_zip, index_id, mtr);
-
- /* Set the next node and previous node fields */
- btr_page_set_next(page, page_zip, FIL_NULL, mtr);
- btr_page_set_prev(page, page_zip, FIL_NULL, mtr);
-
- /* We reset the free bits for the page to allow creation of several
- trees in the same mtr, otherwise the latch on a bitmap page would
- prevent it because of the latching order */
-
- if (!(type & DICT_CLUSTERED)) {
- ibuf_reset_free_bits(block);
- }
-
- /* In the following assertion we test that two records of maximum
- allowed size fit on the root page: this fact is needed to ensure
- correctness of split algorithms */
-
- ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE);
-
- return(buf_block_get_page_no(block));
-}
-
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
-void
-btr_free_but_not_root(
-/*==================*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no) /*!< in: root page number */
-{
- ibool finished;
- page_t* root;
- mtr_t mtr;
-
-leaf_loop:
- mtr_start(&mtr);
-
- root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
- NULL, &mtr);
-
- if (!root) {
- mtr_commit(&mtr);
- return;
- }
-
- SRV_CORRUPT_TABLE_CHECK(root,
- {
- mtr_commit(&mtr);
- return;
- });
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
-#endif /* UNIV_BTR_DEBUG */
-
- /* NOTE: page hash indexes are dropped when a page is freed inside
- fsp0fsp. */
-
- finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF,
- &mtr);
- mtr_commit(&mtr);
-
- if (!finished) {
-
- goto leaf_loop;
- }
-top_loop:
- mtr_start(&mtr);
-
- root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH,
- NULL, &mtr);
-
- SRV_CORRUPT_TABLE_CHECK(root,
- {
- mtr_commit(&mtr);
- return;
- });
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
-#endif /* UNIV_BTR_DEBUG */
-
- finished = fseg_free_step_not_header(
- root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
- mtr_commit(&mtr);
-
- if (!finished) {
-
- goto top_loop;
- }
-}
-
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
-void
-btr_free_root(
-/*==========*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no, /*!< in: root page number */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- buf_block_t* block;
- fseg_header_t* header;
-
- block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH,
- NULL, mtr);
-
- if (block) {
- SRV_CORRUPT_TABLE_CHECK(block, return;);
-
- btr_search_drop_page_hash_index(block);
-
- header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_root_fseg_validate(header, space));
-#endif /* UNIV_BTR_DEBUG */
-
- while (!fseg_free_step(header, mtr)) {
- /* Free the entire segment in small steps. */
- }
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
-@retval true if the operation was successful
-@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
-bool
-btr_page_reorganize_low(
-/*====================*/
- bool recovery,/*!< in: true if called in recovery:
- locks should not be updated, i.e.,
- there cannot exist locks on the
- page, and a hash index should not be
- dropped: it cannot exist */
- ulint z_level,/*!< in: compression level to be used
- if dealing with compressed page */
- page_cur_t* cursor, /*!< in/out: page cursor */
- dict_index_t* index, /*!< in: the index tree of the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- buf_block_t* block = page_cur_get_block(cursor);
-#ifndef UNIV_HOTBACKUP
- buf_pool_t* buf_pool = buf_pool_from_bpage(&block->page);
-#endif /* !UNIV_HOTBACKUP */
- page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
- buf_block_t* temp_block;
- page_t* temp_page;
- ulint log_mode;
- ulint data_size1;
- ulint data_size2;
- ulint max_ins_size1;
- ulint max_ins_size2;
- bool success = false;
- ulint pos;
- bool log_compressed;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- btr_assert_not_corrupted(block, index);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- data_size1 = page_get_data_size(page);
- max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
-
- /* Turn logging off */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
-#ifndef UNIV_HOTBACKUP
- temp_block = buf_block_alloc(buf_pool);
-#else /* !UNIV_HOTBACKUP */
- ut_ad(block == back_block1);
- temp_block = back_block2;
-#endif /* !UNIV_HOTBACKUP */
- temp_page = temp_block->frame;
-
- MONITOR_INC(MONITOR_INDEX_REORG_ATTEMPTS);
-
- /* Copy the old page to temporary space */
- buf_frame_copy(temp_page, page);
-
-#ifndef UNIV_HOTBACKUP
- if (!recovery) {
- btr_search_drop_page_hash_index(block);
- }
-
- block->check_index_page_at_flush = TRUE;
-#endif /* !UNIV_HOTBACKUP */
- btr_blob_dbg_remove(page, index, "btr_page_reorganize");
-
- /* Save the cursor position. */
- pos = page_rec_get_n_recs_before(page_cur_get_rec(cursor));
-
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
-
- page_create(block, mtr, dict_table_is_comp(index->table));
-
- /* Copy the records from the temporary space to the recreated page;
- do not copy the lock bits yet */
-
- page_copy_rec_list_end_no_locks(block, temp_block,
- page_get_infimum_rec(temp_page),
- index, mtr);
-
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
- /* Copy max trx id to recreated page */
- trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
- page_set_max_trx_id(block, NULL, max_trx_id, mtr);
- /* In crash recovery, dict_index_is_sec_or_ibuf() always
- holds, even for clustered indexes. max_trx_id is
- unused in clustered index pages. */
- ut_ad(max_trx_id != 0 || recovery);
- }
-
- /* If innodb_log_compressed_pages is ON, page reorganize should log the
- compressed page image.*/
- log_compressed = page_zip && page_zip_log_pages;
-
- if (log_compressed) {
- mtr_set_log_mode(mtr, log_mode);
- }
-
- if (page_zip
- && !page_zip_compress(page_zip, page, index, z_level, mtr)) {
-
- /* Restore the old page and exit. */
- btr_blob_dbg_restore(page, temp_page, index,
- "btr_page_reorganize_compress_fail");
-
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
- /* Check that the bytes that we skip are identical. */
- ut_a(!memcmp(page, temp_page, PAGE_HEADER));
- ut_a(!memcmp(PAGE_HEADER + PAGE_N_RECS + page,
- PAGE_HEADER + PAGE_N_RECS + temp_page,
- PAGE_DATA - (PAGE_HEADER + PAGE_N_RECS)));
- ut_a(!memcmp(UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page,
- UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + temp_page,
- FIL_PAGE_DATA_END));
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-
- memcpy(PAGE_HEADER + page, PAGE_HEADER + temp_page,
- PAGE_N_RECS - PAGE_N_DIR_SLOTS);
- memcpy(PAGE_DATA + page, PAGE_DATA + temp_page,
- UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
-
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
- ut_a(!memcmp(page, temp_page, UNIV_PAGE_SIZE));
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-
- goto func_exit;
- }
-
-#ifndef UNIV_HOTBACKUP
- if (!recovery) {
- /* Update the record lock bitmaps */
- lock_move_reorganize_page(block, temp_block);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- data_size2 = page_get_data_size(page);
- max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1);
-
- if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) {
- buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(temp_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
- fprintf(stderr,
- "InnoDB: Error: page old data size %lu"
- " new data size %lu\n"
- "InnoDB: Error: page old max ins size %lu"
- " new max ins size %lu\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- (unsigned long) data_size1, (unsigned long) data_size2,
- (unsigned long) max_ins_size1,
- (unsigned long) max_ins_size2);
- ut_ad(0);
- } else {
- success = true;
- }
-
- /* Restore the cursor position. */
- if (pos > 0) {
- cursor->rec = page_rec_get_nth(page, pos);
- } else {
- ut_ad(cursor->rec == page_get_infimum_rec(page));
- }
-
-func_exit:
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-#ifndef UNIV_HOTBACKUP
- buf_block_free(temp_block);
-#endif /* !UNIV_HOTBACKUP */
-
- /* Restore logging mode */
- mtr_set_log_mode(mtr, log_mode);
-
-#ifndef UNIV_HOTBACKUP
- if (success) {
- byte type;
- byte* log_ptr;
-
- /* Write the log record */
- if (page_zip) {
- ut_ad(page_is_comp(page));
- type = MLOG_ZIP_PAGE_REORGANIZE;
- } else if (page_is_comp(page)) {
- type = MLOG_COMP_PAGE_REORGANIZE;
- } else {
- type = MLOG_PAGE_REORGANIZE;
- }
-
- log_ptr = log_compressed
- ? NULL
- : mlog_open_and_write_index(
- mtr, page, index, type,
- page_zip ? 1 : 0);
-
- /* For compressed pages write the compression level. */
- if (log_ptr && page_zip) {
- mach_write_to_1(log_ptr, z_level);
- mlog_close(mtr, log_ptr + 1);
- }
-
- MONITOR_INC(MONITOR_INDEX_REORG_SUCCESSFUL);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- return(success);
-}
-
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
-@retval true if the operation was successful
-@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
-bool
-btr_page_reorganize_block(
-/*======================*/
- bool recovery,/*!< in: true if called in recovery:
- locks should not be updated, i.e.,
- there cannot exist locks on the
- page, and a hash index should not be
- dropped: it cannot exist */
- ulint z_level,/*!< in: compression level to be used
- if dealing with compressed page */
- buf_block_t* block, /*!< in/out: B-tree page */
- dict_index_t* index, /*!< in: the index tree of the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- page_cur_t cur;
- page_cur_set_before_first(block, &cur);
-
- return(btr_page_reorganize_low(recovery, z_level, &cur, index, mtr));
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
-@retval true if the operation was successful
-@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
-bool
-btr_page_reorganize(
-/*================*/
- page_cur_t* cursor, /*!< in/out: page cursor */
- dict_index_t* index, /*!< in: the index tree of the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- return(btr_page_reorganize_low(false, page_zip_level,
- cursor, index, mtr));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of reorganizing a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_parse_page_reorganize(
-/*======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- dict_index_t* index, /*!< in: record descriptor */
- bool compressed,/*!< in: true if compressed page */
- buf_block_t* block, /*!< in: page to be reorganized, or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ulint level;
-
- ut_ad(ptr != NULL);
- ut_ad(end_ptr != NULL);
-
- /* If dealing with a compressed page the record has the
- compression level used during original compression written in
- one byte. Otherwise record is empty. */
- if (compressed) {
- if (ptr == end_ptr) {
- return(NULL);
- }
-
- level = mach_read_from_1(ptr);
-
- ut_a(level <= 9);
- ++ptr;
- } else {
- level = page_zip_level;
- }
-
- if (block != NULL) {
- btr_page_reorganize_block(true, level, block, index, mtr);
- }
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Empties an index page. @see btr_page_create(). */
-UNIV_INTERN
-void
-btr_page_empty(
-/*===========*/
- buf_block_t* block, /*!< in: page to be emptied */
- page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
- dict_index_t* index, /*!< in: index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page = buf_block_get_frame(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_zip == buf_block_get_page_zip(block));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- btr_search_drop_page_hash_index(block);
- btr_blob_dbg_remove(page, index, "btr_page_empty");
-
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
-
- if (page_zip) {
- page_create_zip(block, index, level, 0, mtr);
- } else {
- page_create(block, mtr, dict_table_is_comp(index->table));
- btr_page_set_level(page, NULL, level, mtr);
- }
-
- block->check_index_page_at_flush = TRUE;
-}
-
-/*************************************************************//**
-Makes tree one level higher by splitting the root, and inserts
-the tuple. It is assumed that mtr contains an x-latch on the tree.
-NOTE that the operation of this function must always succeed,
-we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called.
-@return inserted record or NULL if run out of space */
-UNIV_INTERN
-rec_t*
-btr_root_raise_and_insert(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
- on the root page; when the function returns,
- the cursor is positioned on the predecessor
- of the inserted record */
- ulint** offsets,/*!< out: offsets on inserted record */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- page_t* root;
- page_t* new_page;
- ulint new_page_no;
- rec_t* rec;
- dtuple_t* node_ptr;
- ulint level;
- rec_t* node_ptr_rec;
- page_cur_t* page_cursor;
- page_zip_des_t* root_page_zip;
- page_zip_des_t* new_page_zip;
- buf_block_t* root_block;
- buf_block_t* new_block;
-
- root = btr_cur_get_page(cursor);
- root_block = btr_cur_get_block(cursor);
- root_page_zip = buf_block_get_page_zip(root_block);
- ut_ad(!page_is_empty(root));
- index = btr_cur_get_index(cursor);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!root_page_zip || page_zip_validate(root_page_zip, root, index));
-#endif /* UNIV_ZIP_DEBUG */
-#ifdef UNIV_BTR_DEBUG
- if (!dict_index_is_ibuf(index)) {
- ulint space = dict_index_get_space(index);
-
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
- }
-
- ut_a(dict_index_get_page(index) == page_get_page_no(root));
-#endif /* UNIV_BTR_DEBUG */
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
-
- /* Allocate a new page to the tree. Root splitting is done by first
- moving the root records to the new page, emptying the root, putting
- a node pointer to the new page, and then splitting the new page. */
-
- level = btr_page_get_level(root, mtr);
-
- new_block = btr_page_alloc(index, 0, FSP_NO_DIR, level, mtr, mtr);
-
- if (new_block == NULL && os_has_said_disk_full) {
- return(NULL);
- }
-
- new_page = buf_block_get_frame(new_block);
- new_page_zip = buf_block_get_page_zip(new_block);
- ut_a(!new_page_zip == !root_page_zip);
- ut_a(!new_page_zip
- || page_zip_get_size(new_page_zip)
- == page_zip_get_size(root_page_zip));
-
- btr_page_create(new_block, new_page_zip, index, level, mtr);
-
- /* Set the next node and previous node fields of new page */
- btr_page_set_next(new_page, new_page_zip, FIL_NULL, mtr);
- btr_page_set_prev(new_page, new_page_zip, FIL_NULL, mtr);
-
- /* Copy the records from root to the new page one by one. */
-
- if (0
-#ifdef UNIV_ZIP_COPY
- || new_page_zip
-#endif /* UNIV_ZIP_COPY */
- || !page_copy_rec_list_end(new_block, root_block,
- page_get_infimum_rec(root),
- index, mtr)) {
- ut_a(new_page_zip);
-
- /* Copy the page byte for byte. */
- page_zip_copy_recs(new_page_zip, new_page,
- root_page_zip, root, index, mtr);
-
- /* Update the lock table and possible hash index. */
-
- lock_move_rec_list_end(new_block, root_block,
- page_get_infimum_rec(root));
-
- btr_search_move_or_delete_hash_entries(new_block, root_block,
- index);
- }
-
- /* If this is a pessimistic insert which is actually done to
- perform a pessimistic update then we have stored the lock
- information of the record to be inserted on the infimum of the
- root page: we cannot discard the lock structs on the root page */
-
- lock_update_root_raise(new_block, root_block);
-
- /* Create a memory heap where the node pointer is stored */
- if (!*heap) {
- *heap = mem_heap_create(1000);
- }
-
- rec = page_rec_get_next(page_get_infimum_rec(new_page));
- new_page_no = buf_block_get_page_no(new_block);
-
- /* Build the node pointer (= node key and page address) for the
- child */
-
- node_ptr = dict_index_build_node_ptr(
- index, rec, new_page_no, *heap, level);
- /* The node pointer must be marked as the predefined minimum record,
- as there is no lower alphabetical limit to records in the leftmost
- node of a level: */
- dtuple_set_info_bits(node_ptr,
- dtuple_get_info_bits(node_ptr)
- | REC_INFO_MIN_REC_FLAG);
-
- /* Rebuild the root page to get free space */
- btr_page_empty(root_block, root_page_zip, index, level + 1, mtr);
-
- /* Set the next node and previous node fields, although
- they should already have been set. The previous node field
- must be FIL_NULL if root_page_zip != NULL, because the
- REC_INFO_MIN_REC_FLAG (of the first user record) will be
- set if and only if btr_page_get_prev() == FIL_NULL. */
- btr_page_set_next(root, root_page_zip, FIL_NULL, mtr);
- btr_page_set_prev(root, root_page_zip, FIL_NULL, mtr);
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Insert node pointer to the root */
-
- page_cur_set_before_first(root_block, page_cursor);
-
- node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr,
- index, offsets, heap, 0, mtr);
-
- /* The root page should only contain the node pointer
- to new_page at this point. Thus, the data should fit. */
- ut_a(node_ptr_rec);
-
- /* We play safe and reset the free bits for the new page */
-
-#if 0
- fprintf(stderr, "Root raise new page no %lu\n", new_page_no);
-#endif
-
- if (!dict_index_is_clust(index)) {
- ibuf_reset_free_bits(new_block);
- }
-
- if (tuple != NULL) {
- /* Reposition the cursor to the child node */
- page_cur_search(new_block, index, tuple,
- PAGE_CUR_LE, page_cursor);
- } else {
- /* Set cursor to first record on child node */
- page_cur_set_before_first(new_block, page_cursor);
- }
-
- /* Split the child and insert tuple */
- return(btr_page_split_and_insert(flags, cursor, offsets, heap,
- tuple, n_ext, mtr));
-}
-
-/*************************************************************//**
-Decides if the page should be split at the convergence point of inserts
-converging to the left.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec) /*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
-{
- page_t* page;
- rec_t* insert_point;
- rec_t* infimum;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
-
- if (page_header_get_ptr(page, PAGE_LAST_INSERT)
- == page_rec_get_next(insert_point)) {
-
- infimum = page_get_infimum_rec(page);
-
- /* If the convergence is in the middle of a page, include also
- the record immediately before the new insert to the upper
- page. Otherwise, we could repeatedly move from page to page
- lots of records smaller than the convergence point. */
-
- if (infimum != insert_point
- && page_rec_get_next(infimum) != insert_point) {
-
- *split_rec = insert_point;
- } else {
- *split_rec = page_rec_get_next(insert_point);
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Decides if the page should be split at the convergence point of inserts
-converging to the right.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec) /*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple to be inserted should
- be first */
-{
- page_t* page;
- rec_t* insert_point;
-
- page = btr_cur_get_page(cursor);
- insert_point = btr_cur_get_rec(cursor);
-
- /* We use eager heuristics: if the new insert would be right after
- the previous insert on the same page, we assume that there is a
- pattern of sequential inserts here. */
-
- if (page_header_get_ptr(page, PAGE_LAST_INSERT) == insert_point) {
-
- rec_t* next_rec;
-
- next_rec = page_rec_get_next(insert_point);
-
- if (page_rec_is_supremum(next_rec)) {
-split_at_new:
- /* Split at the new record to insert */
- *split_rec = NULL;
- } else {
- rec_t* next_next_rec = page_rec_get_next(next_rec);
- if (page_rec_is_supremum(next_next_rec)) {
-
- goto split_at_new;
- }
-
- /* If there are >= 2 user records up from the insert
- point, split all but 1 off. We want to keep one because
- then sequential inserts can use the adaptive hash
- index, as they can do the necessary checks of the right
- search position just by looking at the records on this
- page. */
-
- *split_rec = next_next_rec;
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Calculates a split record such that the tuple will certainly fit on
-its half-page when the split is performed. We assume in this function
-only that the cursor page has at least one user record.
-@return split record, or NULL if tuple will be the first record on
-the lower or upper half-page (determined by btr_page_tuple_smaller()) */
-static
-rec_t*
-btr_page_get_split_rec(
-/*===================*/
- btr_cur_t* cursor, /*!< in: cursor at which insert should be made */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- page_t* page;
- page_zip_des_t* page_zip;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- ulint total_space;
- ulint incl_data;
- rec_t* ins_rec;
- rec_t* rec;
- rec_t* next_rec;
- ulint n;
- mem_heap_t* heap;
- ulint* offsets;
-
- page = btr_cur_get_page(cursor);
-
- insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- page_zip = btr_cur_get_page_zip(cursor);
- if (page_zip) {
- /* Estimate the free space of an empty compressed page. */
- ulint free_space_zip = page_zip_empty_size(
- cursor->index->n_fields,
- page_zip_get_size(page_zip));
-
- if (free_space > (ulint) free_space_zip) {
- free_space = (ulint) free_space_zip;
- }
- }
-
- /* free_space is now the free space of a created new page */
-
- total_data = page_get_data_size(page) + insert_size;
- total_n_recs = page_get_n_recs(page) + 1;
- ut_ad(total_n_recs >= 2);
- total_space = total_data + page_dir_calc_reserved_space(total_n_recs);
-
- n = 0;
- incl_data = 0;
- ins_rec = btr_cur_get_rec(cursor);
- rec = page_get_infimum_rec(page);
-
- heap = NULL;
- offsets = NULL;
-
- /* We start to include records to the left half, and when the
- space reserved by them exceeds half of total_space, then if
- the included records fit on the left page, they will be put there
- if something was left over also for the right page,
- otherwise the last included record will be the first on the right
- half page */
-
- do {
- /* Decide the next record to include */
- if (rec == ins_rec) {
- rec = NULL; /* NULL denotes that tuple is
- now included */
- } else if (rec == NULL) {
- rec = page_rec_get_next(ins_rec);
- } else {
- rec = page_rec_get_next(rec);
- }
-
- if (rec == NULL) {
- /* Include tuple */
- incl_data += insert_size;
- } else {
- offsets = rec_get_offsets(rec, cursor->index,
- offsets, ULINT_UNDEFINED,
- &heap);
- incl_data += rec_offs_size(offsets);
- }
-
- n++;
- } while (incl_data + page_dir_calc_reserved_space(n)
- < total_space / 2);
-
- if (incl_data + page_dir_calc_reserved_space(n) <= free_space) {
- /* The next record will be the first on
- the right half page if it is not the
- supremum record of page */
-
- if (rec == ins_rec) {
- rec = NULL;
-
- goto func_exit;
- } else if (rec == NULL) {
- next_rec = page_rec_get_next(ins_rec);
- } else {
- next_rec = page_rec_get_next(rec);
- }
- ut_ad(next_rec);
- if (!page_rec_is_supremum(next_rec)) {
- rec = next_rec;
- }
- }
-
-func_exit:
- if (heap) {
- mem_heap_free(heap);
- }
- return(rec);
-}
-
-/*************************************************************//**
-Returns TRUE if the insert fits on the appropriate half-page with the
-chosen split_rec.
-@return true if fits */
-static MY_ATTRIBUTE((nonnull(1,3,4,6), warn_unused_result))
-bool
-btr_page_insert_fits(
-/*=================*/
- btr_cur_t* cursor, /*!< in: cursor at which insert
- should be made */
- const rec_t* split_rec,/*!< in: suggestion for first record
- on upper half-page, or NULL if
- tuple to be inserted should be first */
- ulint** offsets,/*!< in: rec_get_offsets(
- split_rec, cursor->index); out: garbage */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mem_heap_t** heap) /*!< in: temporary memory heap */
-{
- page_t* page;
- ulint insert_size;
- ulint free_space;
- ulint total_data;
- ulint total_n_recs;
- const rec_t* rec;
- const rec_t* end_rec;
-
- page = btr_cur_get_page(cursor);
-
- ut_ad(!split_rec
- || !page_is_comp(page) == !rec_offs_comp(*offsets));
- ut_ad(!split_rec
- || rec_offs_validate(split_rec, cursor->index, *offsets));
-
- insert_size = rec_get_converted_size(cursor->index, tuple, n_ext);
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- /* free_space is now the free space of a created new page */
-
- total_data = page_get_data_size(page) + insert_size;
- total_n_recs = page_get_n_recs(page) + 1;
-
- /* We determine which records (from rec to end_rec, not including
- end_rec) will end up on the other half page from tuple when it is
- inserted. */
-
- if (split_rec == NULL) {
- rec = page_rec_get_next(page_get_infimum_rec(page));
- end_rec = page_rec_get_next(btr_cur_get_rec(cursor));
-
- } else if (cmp_dtuple_rec(tuple, split_rec, *offsets) >= 0) {
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
- end_rec = split_rec;
- } else {
- rec = split_rec;
- end_rec = page_get_supremum_rec(page);
- }
-
- if (total_data + page_dir_calc_reserved_space(total_n_recs)
- <= free_space) {
-
- /* Ok, there will be enough available space on the
- half page where the tuple is inserted */
-
- return(true);
- }
-
- while (rec != end_rec) {
- /* In this loop we calculate the amount of reserved
- space after rec is removed from page. */
-
- *offsets = rec_get_offsets(rec, cursor->index, *offsets,
- ULINT_UNDEFINED, heap);
-
- total_data -= rec_offs_size(*offsets);
- total_n_recs--;
-
- if (total_data + page_dir_calc_reserved_space(total_n_recs)
- <= free_space) {
-
- /* Ok, there will be enough available space on the
- half page where the tuple is inserted */
-
- return(true);
- }
-
- rec = page_rec_get_next_const(rec);
- }
-
- return(false);
-}
-
-/*******************************************************//**
-Inserts a data tuple to a tree on a non-leaf level. It is assumed
-that mtr holds an x-latch on the tree. */
-UNIV_INTERN
-void
-btr_insert_on_non_leaf_level_func(
-/*==============================*/
- ulint flags, /*!< in: undo logging and locking flags */
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: level, must be > 0 */
- dtuple_t* tuple, /*!< in: the record to be inserted */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- big_rec_t* dummy_big_rec;
- btr_cur_t cursor;
- dberr_t err;
- rec_t* rec;
- ulint* offsets = NULL;
- mem_heap_t* heap = NULL;
-
- ut_ad(level > 0);
-
- btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
- BTR_CONT_MODIFY_TREE,
- &cursor, 0, file, line, mtr);
-
- ut_ad(cursor.flag == BTR_CUR_BINARY);
-
- err = btr_cur_optimistic_insert(
- flags
- | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- &cursor, &offsets, &heap,
- tuple, &rec, &dummy_big_rec, 0, NULL, mtr);
-
- if (err == DB_FAIL) {
- err = btr_cur_pessimistic_insert(flags
- | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG
- | BTR_NO_UNDO_LOG_FLAG,
- &cursor, &offsets, &heap,
- tuple, &rec,
- &dummy_big_rec, 0, NULL, mtr);
- ut_a(err == DB_SUCCESS);
- }
- mem_heap_free(heap);
-}
-
-/**************************************************************//**
-Attaches the halves of an index page on the appropriate level in an
-index tree. */
-static MY_ATTRIBUTE((nonnull))
-void
-btr_attach_half_pages(
-/*==================*/
- ulint flags, /*!< in: undo logging and
- locking flags */
- dict_index_t* index, /*!< in: the index tree */
- buf_block_t* block, /*!< in/out: page to be split */
- const rec_t* split_rec, /*!< in: first record on upper
- half page */
- buf_block_t* new_block, /*!< in/out: the new half page */
- ulint direction, /*!< in: FSP_UP or FSP_DOWN */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- ulint prev_page_no;
- ulint next_page_no;
- ulint level;
- page_t* page = buf_block_get_frame(block);
- page_t* lower_page;
- page_t* upper_page;
- ulint lower_page_no;
- ulint upper_page_no;
- page_zip_des_t* lower_page_zip;
- page_zip_des_t* upper_page_zip;
- dtuple_t* node_ptr_upper;
- mem_heap_t* heap;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains(mtr, new_block, MTR_MEMO_PAGE_X_FIX));
-
- /* Create a memory heap where the data tuple is stored */
- heap = mem_heap_create(1024);
-
- /* Based on split direction, decide upper and lower pages */
- if (direction == FSP_DOWN) {
-
- btr_cur_t cursor;
- ulint* offsets;
-
- lower_page = buf_block_get_frame(new_block);
- lower_page_no = buf_block_get_page_no(new_block);
- lower_page_zip = buf_block_get_page_zip(new_block);
- upper_page = buf_block_get_frame(block);
- upper_page_no = buf_block_get_page_no(block);
- upper_page_zip = buf_block_get_page_zip(block);
-
- /* Look up the index for the node pointer to page */
- offsets = btr_page_get_father_block(NULL, heap, index,
- block, mtr, &cursor);
-
- /* Replace the address of the old child node (= page) with the
- address of the new lower half */
-
- btr_node_ptr_set_child_page_no(
- btr_cur_get_rec(&cursor),
- btr_cur_get_page_zip(&cursor),
- offsets, lower_page_no, mtr);
- mem_heap_empty(heap);
- } else {
- lower_page = buf_block_get_frame(block);
- lower_page_no = buf_block_get_page_no(block);
- lower_page_zip = buf_block_get_page_zip(block);
- upper_page = buf_block_get_frame(new_block);
- upper_page_no = buf_block_get_page_no(new_block);
- upper_page_zip = buf_block_get_page_zip(new_block);
- }
-
- /* Get the level of the split pages */
- level = btr_page_get_level(buf_nonnull_block_get_frame(block), mtr);
- ut_ad(level
- == btr_page_get_level(buf_block_get_frame(new_block), mtr));
-
- /* Build the node pointer (= node key and page address) for the upper
- half */
-
- node_ptr_upper = dict_index_build_node_ptr(index, split_rec,
- upper_page_no, heap, level);
-
- /* Insert it next to the pointer to the lower half. Note that this
- may generate recursion leading to a split on the higher level. */
-
- btr_insert_on_non_leaf_level(flags, index, level + 1,
- node_ptr_upper, mtr);
-
- /* Free the memory heap */
- mem_heap_free(heap);
-
- /* Get the previous and next pages of page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_block_get_space(block);
- zip_size = buf_block_get_zip_size(block);
-
- /* Update page links of the level */
-
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block = btr_block_get(
- space, zip_size, prev_page_no, RW_X_LATCH, index, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(prev_block->frame) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_block->frame, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_next(buf_block_get_frame(prev_block),
- buf_block_get_page_zip(prev_block),
- lower_page_no, mtr);
- }
-
- if (next_page_no != FIL_NULL) {
- buf_block_t* next_block = btr_block_get(
- space, zip_size, next_page_no, RW_X_LATCH, index, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_block->frame) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_prev(buf_block_get_frame(next_block),
- buf_block_get_page_zip(next_block),
- upper_page_no, mtr);
- }
-
- btr_page_set_prev(lower_page, lower_page_zip, prev_page_no, mtr);
- btr_page_set_next(lower_page, lower_page_zip, upper_page_no, mtr);
-
- btr_page_set_prev(upper_page, upper_page_zip, lower_page_no, mtr);
- btr_page_set_next(upper_page, upper_page_zip, next_page_no, mtr);
-}
-
-/*************************************************************//**
-Determine if a tuple is smaller than any record on the page.
-@return TRUE if smaller */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-btr_page_tuple_smaller(
-/*===================*/
- btr_cur_t* cursor, /*!< in: b-tree cursor */
- const dtuple_t* tuple, /*!< in: tuple to consider */
- ulint** offsets,/*!< in/out: temporary storage */
- ulint n_uniq, /*!< in: number of unique fields
- in the index page records */
- mem_heap_t** heap) /*!< in/out: heap for offsets */
-{
- buf_block_t* block;
- const rec_t* first_rec;
- page_cur_t pcur;
-
- /* Read the first user record in the page. */
- block = btr_cur_get_block(cursor);
- page_cur_set_before_first(block, &pcur);
- page_cur_move_to_next(&pcur);
- first_rec = page_cur_get_rec(&pcur);
-
- *offsets = rec_get_offsets(
- first_rec, cursor->index, *offsets,
- n_uniq, heap);
-
- return(cmp_dtuple_rec(tuple, first_rec, *offsets) < 0);
-}
-
-/** Insert the tuple into the right sibling page, if the cursor is at the end
-of a page.
-@param[in] flags undo logging and locking flags
-@param[in,out] cursor cursor at which to insert; when the function succeeds,
- the cursor is positioned before the insert point.
-@param[out] offsets offsets on inserted record
-@param[in,out] heap memory heap for allocating offsets
-@param[in] tuple tuple to insert
-@param[in] n_ext number of externally stored columns
-@param[in,out] mtr mini-transaction
-@return inserted record (first record on the right sibling page);
- the cursor will be positioned on the page infimum
-@retval NULL if the operation was not performed */
-static
-rec_t*
-btr_insert_into_right_sibling(
- ulint flags,
- btr_cur_t* cursor,
- ulint** offsets,
- mem_heap_t* heap,
- const dtuple_t* tuple,
- ulint n_ext,
- mtr_t* mtr)
-{
- buf_block_t* block = btr_cur_get_block(cursor);
- page_t* page = buf_block_get_frame(block);
- ulint next_page_no = btr_page_get_next(page, mtr);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(heap);
-
- if (next_page_no == FIL_NULL || !page_rec_is_supremum(
- page_rec_get_next(btr_cur_get_rec(cursor)))) {
-
- return(NULL);
- }
-
- page_cur_t next_page_cursor;
- buf_block_t* next_block;
- page_t* next_page;
- btr_cur_t next_father_cursor;
- rec_t* rec = NULL;
- ulint zip_size = buf_block_get_zip_size(block);
- ulint max_size;
-
- next_block = btr_block_get(
- buf_block_get_space(block), zip_size,
- next_page_no, RW_X_LATCH, cursor->index, mtr);
- next_page = buf_block_get_frame(next_block);
-
- bool is_leaf = page_is_leaf(next_page);
-
- btr_page_get_father(
- cursor->index, next_block, mtr, &next_father_cursor);
-
- page_cur_search(
- next_block, cursor->index, tuple, PAGE_CUR_LE,
- &next_page_cursor);
-
- max_size = page_get_max_insert_size_after_reorganize(next_page, 1);
-
- /* Extends gap lock for the next page */
- lock_update_split_left(next_block, block);
-
- rec = page_cur_tuple_insert(
- &next_page_cursor, tuple, cursor->index, offsets, &heap,
- n_ext, mtr);
-
- if (rec == NULL) {
- if (zip_size && is_leaf
- && !dict_index_is_clust(cursor->index)) {
- /* Reset the IBUF_BITMAP_FREE bits, because
- page_cur_tuple_insert() will have attempted page
- reorganize before failing. */
- ibuf_reset_free_bits(next_block);
- }
- return(NULL);
- }
-
- ibool compressed;
- dberr_t err;
- ulint level = btr_page_get_level(next_page, mtr);
-
- /* adjust cursor position */
- *btr_cur_get_page_cur(cursor) = next_page_cursor;
-
- ut_ad(btr_cur_get_rec(cursor) == page_get_infimum_rec(next_page));
- ut_ad(page_rec_get_next(page_get_infimum_rec(next_page)) == rec);
-
- /* We have to change the parent node pointer */
-
- compressed = btr_cur_pessimistic_delete(
- &err, TRUE, &next_father_cursor,
- BTR_CREATE_FLAG, RB_NONE, mtr);
-
- ut_a(err == DB_SUCCESS);
-
- if (!compressed) {
- btr_cur_compress_if_useful(&next_father_cursor, FALSE, mtr);
- }
-
- dtuple_t* node_ptr = dict_index_build_node_ptr(
- cursor->index, rec, buf_block_get_page_no(next_block),
- heap, level);
-
- btr_insert_on_non_leaf_level(
- flags, cursor->index, level + 1, node_ptr, mtr);
-
- ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
-
- if (is_leaf && !dict_index_is_clust(cursor->index)) {
- /* Update the free bits of the B-tree page in the
- insert buffer bitmap. */
-
- if (zip_size) {
- ibuf_update_free_bits_zip(next_block, mtr);
- } else {
- ibuf_update_free_bits_if_full(
- next_block, max_size,
- rec_offs_size(*offsets) + PAGE_DIR_SLOT_SIZE);
- }
- }
-
- return(rec);
-}
-
-/*************************************************************//**
-Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
-released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore enough
-free disk space (2 pages) must be guaranteed to be available before
-this function is called.
-
-NOTE: jonaso added support for calling function with tuple == NULL
-which cause it to only split a page.
-
-@return inserted record or NULL if run out of space */
-UNIV_INTERN
-rec_t*
-btr_page_split_and_insert(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
- function returns, the cursor is positioned
- on the predecessor of the inserted record */
- ulint** offsets,/*!< out: offsets on inserted record */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
- ulint page_no;
- byte direction;
- ulint hint_page_no;
- buf_block_t* new_block;
- page_t* new_page;
- page_zip_des_t* new_page_zip;
- rec_t* split_rec;
- buf_block_t* left_block;
- buf_block_t* right_block;
- buf_block_t* insert_block;
- page_cur_t* page_cursor;
- rec_t* first_rec;
- byte* buf = 0; /* remove warning */
- rec_t* move_limit;
- ibool insert_will_fit;
- ibool insert_left;
- ulint n_iterations = 0;
- rec_t* rec;
- ulint n_uniq;
-
- if (!*heap) {
- *heap = mem_heap_create(1024);
- }
- n_uniq = dict_index_get_n_unique_in_tree(cursor->index);
-func_start:
- mem_heap_empty(*heap);
- *offsets = NULL;
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK));
- ut_ad(!dict_index_is_online_ddl(cursor->index)
- || (flags & BTR_CREATE_FLAG)
- || dict_index_is_clust(cursor->index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- page_zip = buf_block_get_page_zip(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(!page_is_empty(page));
-
- /* try to insert to the next page if possible before split */
- rec = btr_insert_into_right_sibling(
- flags, cursor, offsets, *heap, tuple, n_ext, mtr);
-
- if (rec != NULL) {
- return(rec);
- }
-
- page_no = buf_block_get_page_no(block);
-
- /* 1. Decide the split record; split_rec == NULL means that the
- tuple to be inserted should be the first record on the upper
- half-page */
- insert_left = FALSE;
-
- if (tuple != NULL && n_iterations > 0) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
- split_rec = btr_page_get_split_rec(cursor, tuple, n_ext);
-
- if (split_rec == NULL) {
- insert_left = btr_page_tuple_smaller(
- cursor, tuple, offsets, n_uniq, heap);
- }
- } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
- } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) {
- direction = FSP_DOWN;
- hint_page_no = page_no - 1;
- ut_ad(split_rec);
- } else {
- direction = FSP_UP;
- hint_page_no = page_no + 1;
-
- /* If there is only one record in the index page, we
- can't split the node in the middle by default. We need
- to determine whether the new record will be inserted
- to the left or right. */
-
- if (page_get_n_recs(page) > 1) {
- split_rec = page_get_middle_rec(page);
- } else if (btr_page_tuple_smaller(cursor, tuple,
- offsets, n_uniq, heap)) {
- split_rec = page_rec_get_next(
- page_get_infimum_rec(page));
- } else {
- split_rec = NULL;
- }
- }
-
- DBUG_EXECUTE_IF("disk_is_full",
- os_has_said_disk_full = true;
- return(NULL););
-
- /* 2. Allocate a new page to the index */
- new_block = btr_page_alloc(cursor->index, hint_page_no, direction,
- btr_page_get_level(page, mtr), mtr, mtr);
-
- if (new_block == NULL && os_has_said_disk_full) {
- return(NULL);
- }
-
- new_page = buf_block_get_frame(new_block);
- new_page_zip = buf_block_get_page_zip(new_block);
- btr_page_create(new_block, new_page_zip, cursor->index,
- btr_page_get_level(page, mtr), mtr);
- /* Only record the leaf level page splits. */
- if (page_is_leaf(page)) {
- cursor->index->stat_defrag_n_page_split ++;
- cursor->index->stat_defrag_modified_counter ++;
- btr_defragment_save_defrag_stats_if_needed(cursor->index);
- }
-
- /* 3. Calculate the first record on the upper half-page, and the
- first record (move_limit) on original page which ends up on the
- upper half */
-
- if (split_rec) {
- first_rec = move_limit = split_rec;
-
- *offsets = rec_get_offsets(split_rec, cursor->index, *offsets,
- n_uniq, heap);
-
- if (tuple != NULL) {
- insert_left = cmp_dtuple_rec(
- tuple, split_rec, *offsets) < 0;
- } else {
- insert_left = 1;
- }
-
- if (!insert_left && new_page_zip && n_iterations > 0) {
- /* If a compressed page has already been split,
- avoid further splits by inserting the record
- to an empty page. */
- split_rec = NULL;
- goto insert_empty;
- }
- } else if (insert_left) {
- ut_a(n_iterations > 0);
- first_rec = page_rec_get_next(page_get_infimum_rec(page));
- move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
- } else {
-insert_empty:
- ut_ad(!split_rec);
- ut_ad(!insert_left);
- buf = (byte*) mem_alloc(rec_get_converted_size(cursor->index,
- tuple, n_ext));
-
- first_rec = rec_convert_dtuple_to_rec(buf, cursor->index,
- tuple, n_ext);
- move_limit = page_rec_get_next(btr_cur_get_rec(cursor));
- }
-
- /* 4. Do first the modifications in the tree structure */
-
- btr_attach_half_pages(flags, cursor->index, block,
- first_rec, new_block, direction, mtr);
-
- /* If the split is made on the leaf level and the insert will fit
- on the appropriate half-page, we may release the tree x-latch.
- We can then move the records after releasing the tree latch,
- thus reducing the tree latch contention. */
- if (tuple == NULL) {
- insert_will_fit = 1;
- }
- else if (split_rec) {
- insert_will_fit = !new_page_zip
- && btr_page_insert_fits(cursor, split_rec,
- offsets, tuple, n_ext, heap);
- } else {
- if (!insert_left) {
- mem_free(buf);
- buf = NULL;
- }
-
- insert_will_fit = !new_page_zip
- && btr_page_insert_fits(cursor, NULL,
- offsets, tuple, n_ext, heap);
- }
-
- if (insert_will_fit && page_is_leaf(page)
- && !dict_index_is_online_ddl(cursor->index)) {
-
- mtr_memo_release(mtr, dict_index_get_lock(cursor->index),
- MTR_MEMO_X_LOCK);
- }
-
- /* 5. Move then the records to the new page */
- if (direction == FSP_DOWN) {
- /* fputs("Split left\n", stderr); */
-
- if (0
-#ifdef UNIV_ZIP_COPY
- || page_zip
-#endif /* UNIV_ZIP_COPY */
- || !page_move_rec_list_start(new_block, block, move_limit,
- cursor->index, mtr)) {
- /* For some reason, compressing new_page failed,
- even though it should contain fewer records than
- the original page. Copy the page byte for byte
- and then delete the records from both pages
- as appropriate. Deleting will always succeed. */
- ut_a(new_page_zip);
-
- page_zip_copy_recs(new_page_zip, new_page,
- page_zip, page, cursor->index, mtr);
- page_delete_rec_list_end(move_limit - page + new_page,
- new_block, cursor->index,
- ULINT_UNDEFINED,
- ULINT_UNDEFINED, mtr);
-
- /* Update the lock table and possible hash index. */
-
- lock_move_rec_list_start(
- new_block, block, move_limit,
- new_page + PAGE_NEW_INFIMUM);
-
- btr_search_move_or_delete_hash_entries(
- new_block, block, cursor->index);
-
- /* Delete the records from the source page. */
-
- page_delete_rec_list_start(move_limit, block,
- cursor->index, mtr);
- }
-
- left_block = new_block;
- right_block = block;
-
- lock_update_split_left(right_block, left_block);
- } else {
- /* fputs("Split right\n", stderr); */
-
- if (0
-#ifdef UNIV_ZIP_COPY
- || page_zip
-#endif /* UNIV_ZIP_COPY */
- || !page_move_rec_list_end(new_block, block, move_limit,
- cursor->index, mtr)) {
- /* For some reason, compressing new_page failed,
- even though it should contain fewer records than
- the original page. Copy the page byte for byte
- and then delete the records from both pages
- as appropriate. Deleting will always succeed. */
- ut_a(new_page_zip);
-
- page_zip_copy_recs(new_page_zip, new_page,
- page_zip, page, cursor->index, mtr);
- page_delete_rec_list_start(move_limit - page
- + new_page, new_block,
- cursor->index, mtr);
-
- /* Update the lock table and possible hash index. */
-
- lock_move_rec_list_end(new_block, block, move_limit);
-
- btr_search_move_or_delete_hash_entries(
- new_block, block, cursor->index);
-
- /* Delete the records from the source page. */
-
- page_delete_rec_list_end(move_limit, block,
- cursor->index,
- ULINT_UNDEFINED,
- ULINT_UNDEFINED, mtr);
- }
-
- left_block = block;
- right_block = new_block;
-
- lock_update_split_right(right_block, left_block);
- }
-
-#ifdef UNIV_ZIP_DEBUG
- if (page_zip) {
- ut_a(page_zip_validate(page_zip, page, cursor->index));
- ut_a(page_zip_validate(new_page_zip, new_page, cursor->index));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- /* At this point, split_rec, move_limit and first_rec may point
- to garbage on the old page. */
-
- /* 6. The split and the tree modification is now completed. Decide the
- page where the tuple should be inserted */
-
- if (tuple == NULL) {
- rec = NULL;
- goto func_exit;
- }
-
- if (insert_left) {
- insert_block = left_block;
- } else {
- insert_block = right_block;
- }
-
- /* 7. Reposition the cursor for insert and try insertion */
- page_cursor = btr_cur_get_page_cur(cursor);
-
- page_cur_search(insert_block, cursor->index, tuple,
- PAGE_CUR_LE, page_cursor);
-
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
- offsets, heap, n_ext, mtr);
-
-#ifdef UNIV_ZIP_DEBUG
- {
- page_t* insert_page
- = buf_block_get_frame(insert_block);
-
- page_zip_des_t* insert_page_zip
- = buf_block_get_page_zip(insert_block);
-
- ut_a(!insert_page_zip
- || page_zip_validate(insert_page_zip, insert_page,
- cursor->index));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- if (rec != NULL) {
-
- goto func_exit;
- }
-
- /* 8. If insert did not fit, try page reorganization.
- For compressed pages, page_cur_tuple_insert() will have
- attempted this already. */
-
- if (page_cur_get_page_zip(page_cursor)
- || !btr_page_reorganize(page_cursor, cursor->index, mtr)) {
-
- goto insert_failed;
- }
-
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
- offsets, heap, n_ext, mtr);
-
- if (rec == NULL) {
- /* The insert did not fit on the page: loop back to the
- start of the function for a new split */
-insert_failed:
- /* We play safe and reset the free bits */
- if (!dict_index_is_clust(cursor->index)) {
- ibuf_reset_free_bits(new_block);
- ibuf_reset_free_bits(block);
- }
-
- /* fprintf(stderr, "Split second round %lu\n",
- page_get_page_no(page)); */
- n_iterations++;
- ut_ad(n_iterations < 2
- || buf_block_get_page_zip(insert_block));
- ut_ad(!insert_will_fit);
-
- goto func_start;
- }
-
-func_exit:
- /* Insert fit on the page: update the free bits for the
- left and right pages in the same mtr */
-
- if (!dict_index_is_clust(cursor->index) && page_is_leaf(page)) {
- ibuf_update_free_bits_for_two_pages_low(
- buf_block_get_zip_size(left_block),
- left_block, right_block, mtr);
- }
-
-#if 0
- fprintf(stderr, "Split and insert done %lu %lu\n",
- buf_block_get_page_no(left_block),
- buf_block_get_page_no(right_block));
-#endif
- MONITOR_INC(MONITOR_INDEX_SPLIT);
-
- ut_ad(page_validate(buf_block_get_frame(left_block), cursor->index));
- ut_ad(page_validate(buf_block_get_frame(right_block), cursor->index));
-
- if (tuple == NULL) {
- ut_ad(rec == NULL);
- }
- ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
- return(rec);
-}
-
-/*************************************************************//**
-Removes a page from the level list of pages. */
-UNIV_INTERN
-void
-btr_level_list_remove_func(
-/*=======================*/
- ulint space, /*!< in: space where removed */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- page_t* page, /*!< in/out: page to remove */
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint prev_page_no;
- ulint next_page_no;
-
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
- ut_ad(space == page_get_space_id(page));
- /* Get the previous and next page numbers of page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
- next_page_no = btr_page_get_next(page, mtr);
-
- /* Update page links of the level */
-
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block
- = btr_block_get(space, zip_size, prev_page_no,
- RW_X_LATCH, index, mtr);
- page_t* prev_page
- = buf_block_get_frame(prev_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(prev_page) == page_is_comp(page));
- ut_a(btr_page_get_next(prev_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_next(prev_page,
- buf_block_get_page_zip(prev_block),
- next_page_no, mtr);
- }
-
- if (next_page_no != FIL_NULL) {
- buf_block_t* next_block
- = btr_block_get(space, zip_size, next_page_no,
- RW_X_LATCH, index, mtr);
- page_t* next_page
- = buf_block_get_frame(next_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_set_prev(next_page,
- buf_block_get_page_zip(next_block),
- prev_page_no, mtr);
- }
-}
-
-/****************************************************************//**
-Writes the redo log record for setting an index record as the predefined
-minimum record. */
-UNIV_INLINE
-void
-btr_set_min_rec_mark_log(
-/*=====================*/
- rec_t* rec, /*!< in: record */
- byte type, /*!< in: MLOG_COMP_REC_MIN_MARK or MLOG_REC_MIN_MARK */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(rec, type, mtr);
-
- /* Write rec offset as a 2-byte ulint */
- mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES);
-}
-#else /* !UNIV_HOTBACKUP */
-# define btr_set_min_rec_mark_log(rec,comp,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/****************************************************************//**
-Parses the redo log record for setting an index record as the predefined
-minimum record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_parse_set_min_rec_mark(
-/*=======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- rec_t* rec;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- if (page) {
- ut_a(!page_is_comp(page) == !comp);
-
- rec = page + mach_read_from_2(ptr);
-
- btr_set_min_rec_mark(rec, mtr);
- }
-
- return(ptr + 2);
-}
-
-/****************************************************************//**
-Sets a record as the predefined minimum record. */
-UNIV_INTERN
-void
-btr_set_min_rec_mark(
-/*=================*/
- rec_t* rec, /*!< in: record */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint info_bits;
-
- if (page_rec_is_comp(rec)) {
- info_bits = rec_get_info_bits(rec, TRUE);
-
- rec_set_info_bits_new(rec, info_bits | REC_INFO_MIN_REC_FLAG);
-
- btr_set_min_rec_mark_log(rec, MLOG_COMP_REC_MIN_MARK, mtr);
- } else {
- info_bits = rec_get_info_bits(rec, FALSE);
-
- rec_set_info_bits_old(rec, info_bits | REC_INFO_MIN_REC_FLAG);
-
- btr_set_min_rec_mark_log(rec, MLOG_REC_MIN_MARK, mtr);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_cur_t cursor;
- ibool compressed;
- dberr_t err;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- /* Delete node pointer on father page */
- btr_page_get_father(index, block, mtr, &cursor);
-
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor,
- BTR_CREATE_FLAG, RB_NONE, mtr);
- ut_a(err == DB_SUCCESS);
-
- if (!compressed) {
- btr_cur_compress_if_useful(&cursor, FALSE, mtr);
- }
-}
-
-/*************************************************************//**
-If page is the only on its level, this function moves its records to the
-father page, thus reducing the tree height.
-@return father block */
-UNIV_INTERN
-buf_block_t*
-btr_lift_page_up(
-/*=============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page which is the only on its level;
- must not be empty: use
- btr_discard_only_page_on_level if the last
- record from the page should be removed */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* father_block;
- page_t* father_page;
- ulint page_level;
- page_zip_des_t* father_page_zip;
- page_t* page = buf_block_get_frame(block);
- ulint root_page_no;
- buf_block_t* blocks[BTR_MAX_LEVELS];
- ulint n_blocks; /*!< last used index in blocks[] */
- ulint i;
- bool lift_father_up;
- buf_block_t* block_orig = block;
-
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- page_level = btr_page_get_level(page, mtr);
- root_page_no = dict_index_get_page(index);
-
- {
- btr_cur_t cursor;
- ulint* offsets = NULL;
- mem_heap_t* heap = mem_heap_create(
- sizeof(*offsets)
- * (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
- buf_block_t* b;
-
- offsets = btr_page_get_father_block(offsets, heap, index,
- block, mtr, &cursor);
- father_block = btr_cur_get_block(&cursor);
- father_page_zip = buf_block_get_page_zip(father_block);
- father_page = buf_block_get_frame(father_block);
-
- n_blocks = 0;
-
- /* Store all ancestor pages so we can reset their
- levels later on. We have to do all the searches on
- the tree now because later on, after we've replaced
- the first level, the tree is in an inconsistent state
- and can not be searched. */
- for (b = father_block;
- buf_block_get_page_no(b) != root_page_no; ) {
- ut_a(n_blocks < BTR_MAX_LEVELS);
-
- offsets = btr_page_get_father_block(offsets, heap,
- index, b,
- mtr, &cursor);
-
- blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
- }
-
- lift_father_up = (n_blocks && page_level == 0);
- if (lift_father_up) {
- /* The father page also should be the only on its level (not
- root). We should lift up the father page at first.
- Because the leaf page should be lifted up only for root page.
- The freeing page is based on page_level (==0 or !=0)
- to choose segment. If the page_level is changed ==0 from !=0,
- later freeing of the page doesn't find the page allocation
- to be freed.*/
-
- block = father_block;
- page = buf_block_get_frame(block);
- page_level = btr_page_get_level(page, mtr);
-
- ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- father_block = blocks[0];
- father_page_zip = buf_block_get_page_zip(father_block);
- father_page = buf_block_get_frame(father_block);
- }
-
- mem_heap_free(heap);
- }
-
- btr_search_drop_page_hash_index(block);
-
- /* Make the father empty */
- btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
- page_level++;
-
- /* Copy the records to the father page one by one. */
- if (0
-#ifdef UNIV_ZIP_COPY
- || father_page_zip
-#endif /* UNIV_ZIP_COPY */
- || !page_copy_rec_list_end(father_block, block,
- page_get_infimum_rec(page),
- index, mtr)) {
- const page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(father_page_zip);
- ut_a(page_zip);
-
- /* Copy the page byte for byte. */
- page_zip_copy_recs(father_page_zip, father_page,
- page_zip, page, index, mtr);
-
- /* Update the lock table and possible hash index. */
-
- lock_move_rec_list_end(father_block, block,
- page_get_infimum_rec(page));
-
- btr_search_move_or_delete_hash_entries(father_block, block,
- index);
- }
-
- btr_blob_dbg_remove(page, index, "btr_lift_page_up");
- lock_update_copy_and_discard(father_block, block);
-
- /* Go upward to root page, decrementing levels by one. */
- for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
- page_t* page = buf_block_get_frame(blocks[i]);
- page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
-
- ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
-
- btr_page_set_level(page, page_zip, page_level, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- }
-
- /* Free the file page */
- btr_page_free(index, block, mtr);
-
- /* We play it safe and reset the free bits for the father */
- if (!dict_index_is_clust(index)) {
- ibuf_reset_free_bits(father_block);
- }
- ut_ad(page_validate(father_page, index));
- ut_ad(btr_check_node_ptr(index, father_block, mtr));
-
- return(lift_father_up ? block_orig : father_block);
-}
-
-/*************************************************************//**
-Tries to merge the page first to the left immediate brother if such a
-brother exists, and the node pointers to the current page and to the brother
-reside on the same page. If the left brother does not satisfy these
-conditions, looks at the right brother. If the page is the only one on that
-level lifts the records of the page to the father page, thus reducing the
-tree height. It is assumed that mtr holds an x-latch on the tree and on the
-page. If cursor is on the leaf level, mtr must also hold x-latches to the
-brothers, if they exist.
-@return TRUE on success */
-UNIV_INTERN
-ibool
-btr_compress(
-/*=========*/
- btr_cur_t* cursor, /*!< in/out: cursor on the page to merge
- or lift; the page must not be empty:
- when deleting records, use btr_discard_page()
- if the page would become empty */
- ibool adjust, /*!< in: TRUE if should adjust the
- cursor position even if compression occurs */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- dict_index_t* index;
- ulint space;
- ulint zip_size;
- ulint left_page_no;
- ulint right_page_no;
- buf_block_t* merge_block;
- page_t* merge_page = NULL;
- page_zip_des_t* merge_page_zip;
- ibool is_left;
- buf_block_t* block;
- page_t* page;
- btr_cur_t father_cursor;
- mem_heap_t* heap;
- ulint* offsets;
- ulint nth_rec = 0; /* remove bogus warning */
- DBUG_ENTER("btr_compress");
-
- block = btr_cur_get_block(cursor);
- page = btr_cur_get_page(cursor);
- index = btr_cur_get_index(cursor);
-
- btr_assert_not_corrupted(block, index);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
- MONITOR_INC(MONITOR_INDEX_MERGE_ATTEMPTS);
-
- left_page_no = btr_page_get_prev(page, mtr);
- right_page_no = btr_page_get_next(page, mtr);
-
-#ifdef UNIV_DEBUG
- if (!page_is_leaf(page) && left_page_no == FIL_NULL) {
- ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
- page_rec_get_next(page_get_infimum_rec(page)),
- page_is_comp(page)));
- }
-#endif /* UNIV_DEBUG */
-
- heap = mem_heap_create(100);
- offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
- &father_cursor);
-
- if (adjust) {
- nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor));
- ut_ad(nth_rec > 0);
- }
-
- if (left_page_no == FIL_NULL && right_page_no == FIL_NULL) {
- /* The page is the only one on the level, lift the records
- to the father */
-
- merge_block = btr_lift_page_up(index, block, mtr);
- goto func_exit;
- }
-
- /* Decide the page to which we try to merge and which will inherit
- the locks */
-
- is_left = btr_can_merge_with_page(cursor, left_page_no,
- &merge_block, mtr);
-
- DBUG_EXECUTE_IF("ib_always_merge_right", is_left = FALSE;);
-
- if(!is_left
- && !btr_can_merge_with_page(cursor, right_page_no, &merge_block,
- mtr)) {
- goto err_exit;
- }
-
- merge_page = buf_block_get_frame(merge_block);
-
-#ifdef UNIV_BTR_DEBUG
- if (is_left) {
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_block_get_page_no(block));
- } else {
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_block_get_page_no(block));
- }
-#endif /* UNIV_BTR_DEBUG */
-
- ut_ad(page_validate(merge_page, index));
-
- merge_page_zip = buf_block_get_page_zip(merge_block);
-#ifdef UNIV_ZIP_DEBUG
- if (merge_page_zip) {
- const page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(page_zip);
- ut_a(page_zip_validate(merge_page_zip, merge_page, index));
- ut_a(page_zip_validate(page_zip, page, index));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- /* Move records to the merge page */
- if (is_left) {
- rec_t* orig_pred = page_copy_rec_list_start(
- merge_block, block, page_get_supremum_rec(page),
- index, mtr);
-
- if (!orig_pred) {
- goto err_exit;
- }
-
- btr_search_drop_page_hash_index(block);
-
- /* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, index, mtr);
-
- btr_node_ptr_delete(index, block, mtr);
- lock_update_merge_left(merge_block, orig_pred, block);
-
- if (adjust) {
- nth_rec += page_rec_get_n_recs_before(orig_pred);
- }
- } else {
- rec_t* orig_succ;
- ibool compressed;
- dberr_t err;
- btr_cur_t cursor2;
- /* father cursor pointing to node ptr
- of the right sibling */
-#ifdef UNIV_BTR_DEBUG
- byte fil_page_prev[4];
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_get_father(index, merge_block, mtr, &cursor2);
-
- if (merge_page_zip && left_page_no == FIL_NULL) {
-
- /* The function page_zip_compress(), which will be
- invoked by page_copy_rec_list_end() below,
- requires that FIL_PAGE_PREV be FIL_NULL.
- Clear the field, but prepare to restore it. */
-#ifdef UNIV_BTR_DEBUG
- memcpy(fil_page_prev, merge_page + FIL_PAGE_PREV, 4);
-#endif /* UNIV_BTR_DEBUG */
-#if FIL_NULL != 0xffffffff
-# error "FIL_NULL != 0xffffffff"
-#endif
- memset(merge_page + FIL_PAGE_PREV, 0xff, 4);
- }
-
- orig_succ = page_copy_rec_list_end(merge_block, block,
- page_get_infimum_rec(page),
- cursor->index, mtr);
-
- if (!orig_succ) {
- ut_a(merge_page_zip);
-#ifdef UNIV_BTR_DEBUG
- if (left_page_no == FIL_NULL) {
- /* FIL_PAGE_PREV was restored from
- merge_page_zip. */
- ut_a(!memcmp(fil_page_prev,
- merge_page + FIL_PAGE_PREV, 4));
- }
-#endif /* UNIV_BTR_DEBUG */
- goto err_exit;
- }
-
- btr_search_drop_page_hash_index(block);
-
-#ifdef UNIV_BTR_DEBUG
- if (merge_page_zip && left_page_no == FIL_NULL) {
-
- /* Restore FIL_PAGE_PREV in order to avoid an assertion
- failure in btr_level_list_remove(), which will set
- the field again to FIL_NULL. Even though this makes
- merge_page and merge_page_zip inconsistent for a
- split second, it is harmless, because the pages
- are X-latched. */
- memcpy(merge_page + FIL_PAGE_PREV, fil_page_prev, 4);
- }
-#endif /* UNIV_BTR_DEBUG */
-
- /* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, index, mtr);
-
- /* Replace the address of the old child node (= page) with the
- address of the merge page to the right */
- btr_node_ptr_set_child_page_no(
- btr_cur_get_rec(&father_cursor),
- btr_cur_get_page_zip(&father_cursor),
- offsets, right_page_no, mtr);
-
- compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor2,
- BTR_CREATE_FLAG,
- RB_NONE, mtr);
- ut_a(err == DB_SUCCESS);
-
- if (!compressed) {
- btr_cur_compress_if_useful(&cursor2, FALSE, mtr);
- }
-
- lock_update_merge_right(merge_block, orig_succ, block);
- }
-
- btr_blob_dbg_remove(page, index, "btr_compress");
-
- if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) {
- /* Update the free bits of the B-tree page in the
- insert buffer bitmap. This has to be done in a
- separate mini-transaction that is committed before the
- main mini-transaction. We cannot update the insert
- buffer bitmap in this mini-transaction, because
- btr_compress() can be invoked recursively without
- committing the mini-transaction in between. Since
- insert buffer bitmap pages have a lower rank than
- B-tree pages, we must not access other pages in the
- same mini-transaction after accessing an insert buffer
- bitmap page. */
-
- /* The free bits in the insert buffer bitmap must
- never exceed the free space on a page. It is safe to
- decrement or reset the bits in the bitmap in a
- mini-transaction that is committed before the
- mini-transaction that affects the free space. */
-
- /* It is unsafe to increment the bits in a separately
- committed mini-transaction, because in crash recovery,
- the free bits could momentarily be set too high. */
-
- if (zip_size) {
- /* Because the free bits may be incremented
- and we cannot update the insert buffer bitmap
- in the same mini-transaction, the only safe
- thing we can do here is the pessimistic
- approach: reset the free bits. */
- ibuf_reset_free_bits(merge_block);
- } else {
- /* On uncompressed pages, the free bits will
- never increase here. Thus, it is safe to
- write the bits accurately in a separate
- mini-transaction. */
- ibuf_update_free_bits_if_full(merge_block,
- UNIV_PAGE_SIZE,
- ULINT_UNDEFINED);
- }
- }
-
- ut_ad(page_validate(merge_page, index));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page,
- index));
-#endif /* UNIV_ZIP_DEBUG */
-
- /* Free the file page */
- btr_page_free(index, block, mtr);
-
- ut_ad(btr_check_node_ptr(index, merge_block, mtr));
-func_exit:
- mem_heap_free(heap);
-
- if (adjust) {
- ut_ad(nth_rec > 0);
- btr_cur_position(
- index,
- page_rec_get_nth(merge_block->frame, nth_rec),
- merge_block, cursor);
- }
-
- MONITOR_INC(MONITOR_INDEX_MERGE_SUCCESSFUL);
-
- DBUG_RETURN(TRUE);
-
-err_exit:
- /* We play it safe and reset the free bits. */
- if (zip_size
- && merge_page
- && page_is_leaf(merge_page)
- && !dict_index_is_clust(index)) {
- ibuf_reset_free_bits(merge_block);
- }
-
- mem_heap_free(heap);
- DBUG_RETURN(FALSE);
-}
-
-/*************************************************************//**
-Discards a page that is the only page on its level. This will empty
-the whole B-tree, leaving just an empty root page. This function
-should never be reached, because btr_compress(), which is invoked in
-delete operations, calls btr_lift_page_up() to flatten the B-tree. */
-static
-void
-btr_discard_only_page_on_level(
-/*===========================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page which is the only on its level */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint page_level = 0;
- trx_id_t max_trx_id;
-
- /* Save the PAGE_MAX_TRX_ID from the leaf page. */
- max_trx_id = page_get_max_trx_id(buf_block_get_frame(block));
-
- while (buf_block_get_page_no(block) != dict_index_get_page(index)) {
- btr_cur_t cursor;
- buf_block_t* father;
- const page_t* page = buf_block_get_frame(block);
-
- ut_a(page_get_n_recs(page) == 1);
- ut_a(page_level == btr_page_get_level(page, mtr));
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- btr_search_drop_page_hash_index(block);
-
- btr_page_get_father(index, block, mtr, &cursor);
- father = btr_cur_get_block(&cursor);
-
- lock_update_discard(father, PAGE_HEAP_NO_SUPREMUM, block);
-
- /* Free the file page */
- btr_page_free(index, block, mtr);
-
- block = father;
- page_level++;
- }
-
- /* block is the root page, which must be empty, except
- for the node pointer to the (now discarded) block(s). */
-
-#ifdef UNIV_BTR_DEBUG
- if (!dict_index_is_ibuf(index)) {
- const page_t* root = buf_block_get_frame(block);
- const ulint space = dict_index_get_space(index);
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
- + root, space));
- ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
- + root, space));
- }
-#endif /* UNIV_BTR_DEBUG */
-
- btr_page_empty(block, buf_block_get_page_zip(block), index, 0, mtr);
- ut_ad(page_is_leaf(buf_block_get_frame(block)));
-
- if (!dict_index_is_clust(index)) {
- /* We play it safe and reset the free bits for the root */
- ibuf_reset_free_bits(block);
-
- ut_a(max_trx_id);
- page_set_max_trx_id(block,
- buf_block_get_page_zip(block),
- max_trx_id, mtr);
- }
-}
-
-/*************************************************************//**
-Discards a page from a B-tree. This is used to remove the last record from
-a B-tree page: the whole page must be removed at the same time. This cannot
-be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
-void
-btr_discard_page(
-/*=============*/
- btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
- the root page */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- ulint space;
- ulint zip_size;
- ulint left_page_no;
- ulint right_page_no;
- buf_block_t* merge_block;
- page_t* merge_page;
- buf_block_t* block;
- page_t* page;
- rec_t* node_ptr;
-
- block = btr_cur_get_block(cursor);
- index = btr_cur_get_index(cursor);
-
- ut_ad(dict_index_get_page(index) != buf_block_get_page_no(block));
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
- MONITOR_INC(MONITOR_INDEX_DISCARD);
-
- /* Decide the page which will inherit the locks */
-
- left_page_no = btr_page_get_prev(buf_nonnull_block_get_frame(block),
- mtr);
- right_page_no = btr_page_get_next(buf_nonnull_block_get_frame(block),
- mtr);
-
- if (left_page_no != FIL_NULL) {
- merge_block = btr_block_get(space, zip_size, left_page_no,
- RW_X_LATCH, index, mtr);
- merge_page = buf_block_get_frame(merge_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(merge_page, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
- } else if (right_page_no != FIL_NULL) {
- merge_block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, index, mtr);
- merge_page = buf_block_get_frame(merge_block);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(merge_page, mtr)
- == buf_block_get_page_no(block));
-#endif /* UNIV_BTR_DEBUG */
- } else {
- btr_discard_only_page_on_level(index, block, mtr);
-
- return;
- }
-
- page = buf_block_get_frame(block);
- ut_a(page_is_comp(merge_page) == page_is_comp(page));
- btr_search_drop_page_hash_index(block);
-
- if (left_page_no == FIL_NULL && !page_is_leaf(page)) {
-
- /* We have to mark the leftmost node pointer on the right
- side page as the predefined minimum record */
- node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page));
-
- ut_ad(page_rec_is_user_rec(node_ptr));
-
- /* This will make page_zip_validate() fail on merge_page
- until btr_level_list_remove() completes. This is harmless,
- because everything will take place within a single
- mini-transaction and because writing to the redo log
- is an atomic operation (performed by mtr_commit()). */
- btr_set_min_rec_mark(node_ptr, mtr);
- }
-
- btr_node_ptr_delete(index, block, mtr);
-
- /* Remove the page from the level list */
- btr_level_list_remove(space, zip_size, page, index, mtr);
-#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* merge_page_zip
- = buf_block_get_page_zip(merge_block);
- ut_a(!merge_page_zip
- || page_zip_validate(merge_page_zip, merge_page, index));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- if (left_page_no != FIL_NULL) {
- lock_update_discard(merge_block, PAGE_HEAP_NO_SUPREMUM,
- block);
- } else {
- lock_update_discard(merge_block,
- lock_get_min_heap_no(merge_block),
- block);
- }
-
- btr_blob_dbg_remove(page, index, "btr_discard_page");
-
- /* Free the file page */
- btr_page_free(index, block, mtr);
-
- ut_ad(btr_check_node_ptr(index, merge_block, mtr));
-}
-
-#ifdef UNIV_BTR_PRINT
-/*************************************************************//**
-Prints size info of a B-tree. */
-UNIV_INTERN
-void
-btr_print_size(
-/*===========*/
- dict_index_t* index) /*!< in: index tree */
-{
- page_t* root;
- fseg_header_t* seg;
- mtr_t mtr;
-
- if (dict_index_is_ibuf(index)) {
- fputs("Sorry, cannot print info of an ibuf tree:"
- " use ibuf functions\n", stderr);
-
- return;
- }
-
- mtr_start(&mtr);
-
- root = btr_root_get(index, &mtr);
-
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
- fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr);
- fseg_print(seg, &mtr);
-
- if (!dict_index_is_univ(index)) {
-
- seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
-
- fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr);
- fseg_print(seg, &mtr);
- }
-
- mtr_commit(&mtr);
-}
-
-/************************************************************//**
-Prints recursively index tree pages. */
-static
-void
-btr_print_recursive(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: index page */
- ulint width, /*!< in: print this many entries from start
- and end */
- mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
- ulint** offsets,/*!< in/out: buffer for rec_get_offsets() */
- mtr_t* mtr) /*!< in: mtr */
-{
- const page_t* page = buf_block_get_frame(block);
- page_cur_t cursor;
- ulint n_recs;
- ulint i = 0;
- mtr_t mtr2;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n",
- (ulong) btr_page_get_level(page, mtr),
- (ulong) buf_block_get_page_no(block));
-
- page_print(block, index, width, width);
-
- n_recs = page_get_n_recs(page);
-
- page_cur_set_before_first(block, &cursor);
- page_cur_move_to_next(&cursor);
-
- while (!page_cur_is_after_last(&cursor)) {
-
- if (page_is_leaf(page)) {
-
- /* If this is the leaf level, do nothing */
-
- } else if ((i <= width) || (i >= n_recs - width)) {
-
- const rec_t* node_ptr;
-
- mtr_start(&mtr2);
-
- node_ptr = page_cur_get_rec(&cursor);
-
- *offsets = rec_get_offsets(node_ptr, index, *offsets,
- ULINT_UNDEFINED, heap);
- btr_print_recursive(index,
- btr_node_ptr_get_child(node_ptr,
- index,
- *offsets,
- &mtr2),
- width, heap, offsets, &mtr2);
- mtr_commit(&mtr2);
- }
-
- page_cur_move_to_next(&cursor);
- i++;
- }
-}
-
-/**************************************************************//**
-Prints directories and other info of all nodes in the tree. */
-UNIV_INTERN
-void
-btr_print_index(
-/*============*/
- dict_index_t* index, /*!< in: index */
- ulint width) /*!< in: print this many entries from start
- and end */
-{
- mtr_t mtr;
- buf_block_t* root;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- fputs("--------------------------\n"
- "INDEX TREE PRINT\n", stderr);
-
- mtr_start(&mtr);
-
- root = btr_root_block_get(index, RW_X_LATCH, &mtr);
-
- btr_print_recursive(index, root, width, &heap, &offsets, &mtr);
- if (heap) {
- mem_heap_free(heap);
- }
-
- mtr_commit(&mtr);
-
- btr_validate_index(index, 0);
-}
-#endif /* UNIV_BTR_PRINT */
-
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Checks that the node pointer to a page is appropriate.
-@return TRUE */
-UNIV_INTERN
-ibool
-btr_check_node_ptr(
-/*===============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: index page */
- mtr_t* mtr) /*!< in: mtr */
-{
- mem_heap_t* heap;
- dtuple_t* tuple;
- ulint* offsets;
- btr_cur_t cursor;
- page_t* page = buf_block_get_frame(block);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
-
- return(TRUE);
- }
-
- heap = mem_heap_create(256);
- offsets = btr_page_get_father_block(NULL, heap, index, block, mtr,
- &cursor);
-
- if (page_is_leaf(page)) {
-
- goto func_exit;
- }
-
- tuple = dict_index_build_node_ptr(
- index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap,
- btr_page_get_level(page, mtr));
-
- ut_a(!cmp_dtuple_rec(tuple, btr_cur_get_rec(&cursor), offsets));
-func_exit:
- mem_heap_free(heap);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/************************************************************//**
-Display identification information for a record. */
-static
-void
-btr_index_rec_validate_report(
-/*==========================*/
- const page_t* page, /*!< in: index page */
- const rec_t* rec, /*!< in: index record */
- const dict_index_t* index) /*!< in: index */
-{
- fputs("InnoDB: Record in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, ", page %lu, at offset %lu\n",
- page_get_page_no(page), (ulint) page_offset(rec));
-}
-
-/************************************************************//**
-Checks the size and number of fields in a record based on the definition of
-the index.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_index_rec_validate(
-/*===================*/
- const rec_t* rec, /*!< in: index record */
- const dict_index_t* index, /*!< in: index */
- ibool dump_on_error) /*!< in: TRUE if the function
- should print hex dump of record
- and page on error */
-{
- ulint len;
- ulint n;
- ulint i;
- const page_t* page;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- page = page_align(rec);
-
- if (dict_index_is_univ(index)) {
- /* The insert buffer index tree can contain records from any
- other index: we cannot check the number of fields or
- their length */
-
- return(TRUE);
- }
-
- if ((ibool)!!page_is_comp(page) != dict_table_is_comp(index->table)) {
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n",
- (ulong) !!page_is_comp(page),
- (ulong) dict_table_is_comp(index->table));
-
- return(FALSE);
- }
-
- n = dict_index_get_n_fields(index);
-
- if (!page_is_comp(page) && rec_get_n_fields_old(rec) != n) {
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n",
- (ulong) rec_get_n_fields_old(rec), (ulong) n);
-
- if (dump_on_error) {
- buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
- fputs("InnoDB: corrupt record ", stderr);
- rec_print_old(stderr, rec);
- putc('\n', stderr);
- }
- return(FALSE);
- }
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- for (i = 0; i < n; i++) {
- ulint fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i), page_is_comp(page));
-
- rec_get_nth_field_offs(offsets, i, &len);
-
- /* Note that if fixed_size != 0, it equals the
- length of a fixed-size column in the clustered index.
- A prefix index of the column is of fixed, but different
- length. When fixed_size == 0, prefix_len is the maximum
- length of the prefix index column. */
-
- if ((dict_index_get_nth_field(index, i)->prefix_len == 0
- && len != UNIV_SQL_NULL && fixed_size
- && len != fixed_size)
- || (dict_index_get_nth_field(index, i)->prefix_len > 0
- && len != UNIV_SQL_NULL
- && len
- > dict_index_get_nth_field(index, i)->prefix_len)) {
-
- btr_index_rec_validate_report(page, rec, index);
- fprintf(stderr,
- "InnoDB: field %lu len is %lu,"
- " should be %lu\n",
- (ulong) i, (ulong) len, (ulong) fixed_size);
-
- if (dump_on_error) {
- buf_page_print(page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
-
- fputs("InnoDB: corrupt record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- }
- if (heap) {
- mem_heap_free(heap);
- }
- return(FALSE);
- }
- }
-
- if (heap) {
- mem_heap_free(heap);
- }
- return(TRUE);
-}
-
-/************************************************************//**
-Checks the size and number of fields in records based on the definition of
-the index.
-@return TRUE if ok */
-static
-ibool
-btr_index_page_validate(
-/*====================*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index) /*!< in: index */
-{
- page_cur_t cur;
- ibool ret = TRUE;
-#ifndef DBUG_OFF
- ulint nth = 1;
-#endif /* !DBUG_OFF */
-
- page_cur_set_before_first(block, &cur);
-
- /* Directory slot 0 should only contain the infimum record. */
- DBUG_EXECUTE_IF("check_table_rec_next",
- ut_a(page_rec_get_nth_const(
- page_cur_get_page(&cur), 0)
- == cur.rec);
- ut_a(page_dir_slot_get_n_owned(
- page_dir_get_nth_slot(
- page_cur_get_page(&cur), 0))
- == 1););
-
- page_cur_move_to_next(&cur);
-
- for (;;) {
- if (page_cur_is_after_last(&cur)) {
-
- break;
- }
-
- if (!btr_index_rec_validate(cur.rec, index, TRUE)) {
-
- return(FALSE);
- }
-
- /* Verify that page_rec_get_nth_const() is correctly
- retrieving each record. */
- DBUG_EXECUTE_IF("check_table_rec_next",
- ut_a(cur.rec == page_rec_get_nth_const(
- page_cur_get_page(&cur),
- page_rec_get_n_recs_before(
- cur.rec)));
- ut_a(nth++ == page_rec_get_n_recs_before(
- cur.rec)););
-
- page_cur_move_to_next(&cur);
- }
-
- return(ret);
-}
-
-/************************************************************//**
-Report an error on one page of an index tree. */
-static
-void
-btr_validate_report1(
-/*=================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: B-tree level */
- const buf_block_t* block) /*!< in: index page */
-{
- fprintf(stderr, "InnoDB: Error in page %lu of ",
- buf_block_get_page_no(block));
- dict_index_name_print(stderr, NULL, index);
- if (level) {
- fprintf(stderr, ", index tree level %lu", level);
- }
- putc('\n', stderr);
-}
-
-/************************************************************//**
-Report an error on two pages of an index tree. */
-static
-void
-btr_validate_report2(
-/*=================*/
- const dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: B-tree level */
- const buf_block_t* block1, /*!< in: first index page */
- const buf_block_t* block2) /*!< in: second index page */
-{
- fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ",
- buf_block_get_page_no(block1),
- buf_block_get_page_no(block2));
- dict_index_name_print(stderr, NULL, index);
- if (level) {
- fprintf(stderr, ", index tree level %lu", level);
- }
- putc('\n', stderr);
-}
-
-/************************************************************//**
-Validates index tree level.
-@return TRUE if ok */
-static
-bool
-btr_validate_level(
-/*===============*/
- dict_index_t* index, /*!< in: index tree */
- const trx_t* trx, /*!< in: transaction or NULL */
- ulint level) /*!< in: level number */
-{
- ulint space;
- ulint space_flags;
- ulint zip_size;
- buf_block_t* block;
- page_t* page;
- buf_block_t* right_block = 0; /* remove warning */
- page_t* right_page = 0; /* remove warning */
- page_t* father_page;
- btr_cur_t node_cur;
- btr_cur_t right_node_cur;
- rec_t* rec;
- ulint right_page_no;
- ulint left_page_no;
- page_cur_t cursor;
- dtuple_t* node_ptr_tuple;
- bool ret = true;
- mtr_t mtr;
- mem_heap_t* heap = mem_heap_create(256);
- fseg_header_t* seg;
- ulint* offsets = NULL;
- ulint* offsets2= NULL;
-#ifdef UNIV_ZIP_DEBUG
- page_zip_des_t* page_zip;
-#endif /* UNIV_ZIP_DEBUG */
-
- mtr_start(&mtr);
-
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- block = btr_root_block_get(index, RW_X_LATCH, &mtr);
- page = buf_block_get_frame(block);
- seg = page + PAGE_HEADER + PAGE_BTR_SEG_TOP;
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
- fil_space_get_latch(space, &space_flags);
-
- if (zip_size != dict_tf_get_zip_size(space_flags)) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Flags mismatch: table=%lu, tablespace=%lu",
- (ulint) index->table->flags, (ulint) space_flags);
-
- mtr_commit(&mtr);
-
- return(false);
- }
-
- while (level != btr_page_get_level(page, &mtr)) {
- const rec_t* node_ptr;
-
- if (fseg_page_is_free(seg,
- block->page.space, block->page.offset)) {
-
- btr_validate_report1(index, level, block);
-
- ib_logf(IB_LOG_LEVEL_WARN, "page is free");
-
- ret = false;
- }
-
- ut_a(space == buf_block_get_space(block));
- ut_a(space == page_get_space_id(page));
-#ifdef UNIV_ZIP_DEBUG
- page_zip = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- ut_a(!page_is_leaf(page));
-
- page_cur_set_before_first(block, &cursor);
- page_cur_move_to_next(&cursor);
-
- node_ptr = page_cur_get_rec(&cursor);
- offsets = rec_get_offsets(node_ptr, index, offsets,
- ULINT_UNDEFINED, &heap);
- block = btr_node_ptr_get_child(node_ptr, index, offsets, &mtr);
- page = buf_block_get_frame(block);
- }
-
- /* Now we are on the desired level. Loop through the pages on that
- level. */
-
- if (level == 0) {
- /* Leaf pages are managed in their own file segment. */
- seg -= PAGE_BTR_SEG_TOP - PAGE_BTR_SEG_LEAF;
- }
-
-loop:
- mem_heap_empty(heap);
- offsets = offsets2 = NULL;
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
-#ifdef UNIV_ZIP_DEBUG
- page_zip = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- ut_a(block->page.space == space);
-
- if (fseg_page_is_free(seg, block->page.space, block->page.offset)) {
-
- btr_validate_report1(index, level, block);
-
- ib_logf(IB_LOG_LEVEL_WARN, "Page is marked as free");
- ret = false;
-
- } else if (btr_page_get_index_id(page) != index->id) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page index id " IB_ID_FMT " != data dictionary "
- "index id " IB_ID_FMT,
- btr_page_get_index_id(page), index->id);
-
- ret = false;
-
- } else if (!page_validate(page, index)) {
-
- btr_validate_report1(index, level, block);
- ret = false;
-
- } else if (level == 0 && !btr_index_page_validate(block, index)) {
-
- /* We are on level 0. Check that the records have the right
- number of fields, and field lengths are right. */
-
- ret = false;
- }
-
- ut_a(btr_page_get_level(page, &mtr) == level);
-
- right_page_no = btr_page_get_next(page, &mtr);
- left_page_no = btr_page_get_prev(page, &mtr);
-
- ut_a(!page_is_empty(page)
- || (level == 0
- && page_get_page_no(page) == dict_index_get_page(index)));
-
- if (right_page_no != FIL_NULL) {
- const rec_t* right_rec;
- right_block = btr_block_get(space, zip_size, right_page_no,
- RW_X_LATCH, index, &mtr);
- right_page = buf_block_get_frame(right_block);
- if (btr_page_get_prev(right_page, &mtr)
- != page_get_page_no(page)) {
-
- btr_validate_report2(index, level, block, right_block);
- fputs("InnoDB: broken FIL_PAGE_NEXT"
- " or FIL_PAGE_PREV links\n", stderr);
- buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
- ret = false;
- }
-
- if (page_is_comp(right_page) != page_is_comp(page)) {
- btr_validate_report2(index, level, block, right_block);
- fputs("InnoDB: 'compact' flag mismatch\n", stderr);
- buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
- ret = false;
-
- goto node_ptr_fails;
- }
-
- rec = page_rec_get_prev(page_get_supremum_rec(page));
- right_rec = page_rec_get_next(page_get_infimum_rec(
- right_page));
- offsets = rec_get_offsets(rec, index,
- offsets, ULINT_UNDEFINED, &heap);
- offsets2 = rec_get_offsets(right_rec, index,
- offsets2, ULINT_UNDEFINED, &heap);
- if (cmp_rec_rec(rec, right_rec, offsets, offsets2,
- index) >= 0) {
-
- btr_validate_report2(index, level, block, right_block);
-
- fputs("InnoDB: records in wrong order"
- " on adjacent pages\n", stderr);
-
- buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(right_page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
- fputs("InnoDB: record ", stderr);
- rec = page_rec_get_prev(page_get_supremum_rec(page));
- rec_print(stderr, rec, index);
- putc('\n', stderr);
- fputs("InnoDB: record ", stderr);
- rec = page_rec_get_next(
- page_get_infimum_rec(right_page));
- rec_print(stderr, rec, index);
- putc('\n', stderr);
-
- ret = false;
- }
- }
-
- if (level > 0 && left_page_no == FIL_NULL) {
- ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
- page_rec_get_next(page_get_infimum_rec(page)),
- page_is_comp(page)));
- }
-
- if (buf_block_get_page_no(block) != dict_index_get_page(index)) {
-
- /* Check father node pointers */
-
- rec_t* node_ptr;
-
- offsets = btr_page_get_father_block(offsets, heap, index,
- block, &mtr, &node_cur);
- father_page = btr_cur_get_page(&node_cur);
- node_ptr = btr_cur_get_rec(&node_cur);
-
- btr_cur_position(
- index, page_rec_get_prev(page_get_supremum_rec(page)),
- block, &node_cur);
- offsets = btr_page_get_father_node_ptr(offsets, heap,
- &node_cur, &mtr);
-
- if (node_ptr != btr_cur_get_rec(&node_cur)
- || btr_node_ptr_get_child_page_no(node_ptr, offsets)
- != buf_block_get_page_no(block)) {
-
- btr_validate_report1(index, level, block);
-
- fputs("InnoDB: node pointer to the page is wrong\n",
- stderr);
-
- buf_page_print(father_page, 0, BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
- fputs("InnoDB: node ptr ", stderr);
- rec_print(stderr, node_ptr, index);
-
- rec = btr_cur_get_rec(&node_cur);
- fprintf(stderr, "\n"
- "InnoDB: node ptr child page n:o %lu\n",
- (ulong) btr_node_ptr_get_child_page_no(
- rec, offsets));
-
- fputs("InnoDB: record on page ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- ret = false;
-
- goto node_ptr_fails;
- }
-
- if (!page_is_leaf(page)) {
- node_ptr_tuple = dict_index_build_node_ptr(
- index,
- page_rec_get_next(page_get_infimum_rec(page)),
- 0, heap, btr_page_get_level(page, &mtr));
-
- if (cmp_dtuple_rec(node_ptr_tuple, node_ptr,
- offsets)) {
- const rec_t* first_rec = page_rec_get_next(
- page_get_infimum_rec(page));
-
- btr_validate_report1(index, level, block);
-
- buf_page_print(father_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
-
- fputs("InnoDB: Error: node ptrs differ"
- " on levels > 0\n"
- "InnoDB: node ptr ", stderr);
- rec_print_new(stderr, node_ptr, offsets);
- fputs("InnoDB: first rec ", stderr);
- rec_print(stderr, first_rec, index);
- putc('\n', stderr);
- ret = false;
-
- goto node_ptr_fails;
- }
- }
-
- if (left_page_no == FIL_NULL) {
- ut_a(node_ptr == page_rec_get_next(
- page_get_infimum_rec(father_page)));
- ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL);
- }
-
- if (right_page_no == FIL_NULL) {
- ut_a(node_ptr == page_rec_get_prev(
- page_get_supremum_rec(father_page)));
- ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL);
- } else {
- const rec_t* right_node_ptr
- = page_rec_get_next(node_ptr);
-
- offsets = btr_page_get_father_block(
- offsets, heap, index, right_block,
- &mtr, &right_node_cur);
- if (right_node_ptr
- != page_get_supremum_rec(father_page)) {
-
- if (btr_cur_get_rec(&right_node_cur)
- != right_node_ptr) {
- ret = false;
- fputs("InnoDB: node pointer to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- block);
-
- buf_page_print(
- father_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(
- page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(
- right_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- }
- } else {
- page_t* right_father_page
- = btr_cur_get_page(&right_node_cur);
-
- if (btr_cur_get_rec(&right_node_cur)
- != page_rec_get_next(
- page_get_infimum_rec(
- right_father_page))) {
- ret = false;
- fputs("InnoDB: node pointer 2 to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- block);
-
- buf_page_print(
- father_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(
- right_father_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(
- page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(
- right_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- }
-
- if (page_get_page_no(right_father_page)
- != btr_page_get_next(father_page, &mtr)) {
-
- ret = false;
- fputs("InnoDB: node pointer 3 to"
- " the right page is wrong\n",
- stderr);
-
- btr_validate_report1(index, level,
- block);
-
- buf_page_print(
- father_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(
- right_father_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(
- page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(
- right_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- }
- }
- }
- }
-
-node_ptr_fails:
- /* Commit the mini-transaction to release the latch on 'page'.
- Re-acquire the latch on right_page, which will become 'page'
- on the next loop. The page has already been checked. */
- mtr_commit(&mtr);
-
- if (trx_is_interrupted(trx)) {
- /* On interrupt, return the current status. */
- } else if (right_page_no != FIL_NULL) {
-
- mtr_start(&mtr);
-
- block = btr_block_get(
- space, zip_size, right_page_no,
- RW_X_LATCH, index, &mtr);
-
- page = buf_block_get_frame(block);
-
- goto loop;
- }
-
- mem_heap_free(heap);
-
- return(ret);
-}
-
-/**************************************************************//**
-Checks the consistency of an index tree.
-@return DB_SUCCESS if ok, error code if not */
-UNIV_INTERN
-dberr_t
-btr_validate_index(
-/*===============*/
- dict_index_t* index, /*!< in: index */
- const trx_t* trx) /*!< in: transaction or NULL */
-{
- dberr_t err = DB_SUCCESS;
-
- /* Full Text index are implemented by auxiliary tables,
- not the B-tree */
- if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
- return(err);
- }
-
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- page_t* root = btr_root_get(index, &mtr);
-
- if (root == NULL && index->table->file_unreadable) {
- err = DB_DECRYPTION_FAILED;
- mtr_commit(&mtr);
- return err;
- }
-
- SRV_CORRUPT_TABLE_CHECK(root,
- {
- mtr_commit(&mtr);
- return(DB_CORRUPTION);
- });
-
- ulint n = btr_page_get_level(root, &mtr);
-
- for (ulint i = 0; i <= n; ++i) {
-
- if (!btr_validate_level(index, trx, n - i)) {
- err = DB_CORRUPTION;
- break;
- }
- }
-
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/**************************************************************//**
-Checks if the page in the cursor can be merged with given page.
-If necessary, re-organize the merge_page.
-@return TRUE if possible to merge. */
-UNIV_INTERN
-ibool
-btr_can_merge_with_page(
-/*====================*/
- btr_cur_t* cursor, /*!< in: cursor on the page to merge */
- ulint page_no, /*!< in: a sibling page */
- buf_block_t** merge_block, /*!< out: the merge block */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- dict_index_t* index;
- page_t* page;
- ulint space;
- ulint zip_size;
- ulint n_recs;
- ulint data_size;
- ulint max_ins_size_reorg;
- ulint max_ins_size;
- buf_block_t* mblock;
- page_t* mpage;
- DBUG_ENTER("btr_can_merge_with_page");
-
- if (page_no == FIL_NULL) {
- goto error;
- }
-
- index = btr_cur_get_index(cursor);
- page = btr_cur_get_page(cursor);
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
- mblock = btr_block_get(space, zip_size, page_no, RW_X_LATCH, index,
- mtr);
- mpage = buf_block_get_frame(mblock);
-
- n_recs = page_get_n_recs(page);
- data_size = page_get_data_size(page);
-
- max_ins_size_reorg = page_get_max_insert_size_after_reorganize(
- mpage, n_recs);
-
- if (data_size > max_ins_size_reorg) {
- goto error;
- }
-
- /* If compression padding tells us that merging will result in
- too packed up page i.e.: which is likely to cause compression
- failure then don't merge the pages. */
- if (zip_size && page_is_leaf(mpage)
- && (page_get_data_size(mpage) + data_size
- >= dict_index_zip_pad_optimal_page_size(index))) {
-
- goto error;
- }
-
-
- max_ins_size = page_get_max_insert_size(mpage, n_recs);
-
- if (data_size > max_ins_size) {
-
- /* We have to reorganize mpage */
-
- if (!btr_page_reorganize_block(
- false, page_zip_level, mblock, index, mtr)) {
-
- goto error;
- }
-
- max_ins_size = page_get_max_insert_size(mpage, n_recs);
-
- ut_ad(page_validate(mpage, index));
- ut_ad(max_ins_size == max_ins_size_reorg);
-
- if (data_size > max_ins_size) {
-
- /* Add fault tolerance, though this should
- never happen */
-
- goto error;
- }
- }
-
- *merge_block = mblock;
- DBUG_RETURN(TRUE);
-
-error:
- *merge_block = NULL;
- DBUG_RETURN(FALSE);
-}
-
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc
deleted file mode 100644
index ffd7ebc7504..00000000000
--- a/storage/xtradb/btr/btr0cur.cc
+++ /dev/null
@@ -1,6148 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file btr/btr0cur.cc
-The index tree cursor
-
-All changes that row operations make to a B-tree or the records
-there must go through this module! Undo log records are written here
-of every modify or insert of a clustered index record.
-
- NOTE!!!
-To make sure we do not run out of disk space during a pessimistic
-insert or update, we have to reserve 2 x the height of the index tree
-many pages in the tablespace before we start the operation, because
-if leaf splitting has been started, it is difficult to undo, except
-by crashing the database and doing a roll-forward.
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#include "btr0cur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
-#include "row0upd.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0log.h"
-#include "page0page.h"
-#include "page0zip.h"
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "buf0lru.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "row0log.h"
-#include "row0purge.h"
-#include "row0upd.h"
-#include "trx0rec.h"
-#include "trx0roll.h" /* trx_is_recv() */
-#include "que0que.h"
-#include "row0row.h"
-#include "srv0srv.h"
-#include "ibuf0ibuf.h"
-#include "lock0lock.h"
-#include "zlib.h"
-
-/** Buffered B-tree operation types, introduced as part of delete buffering. */
-enum btr_op_t {
- BTR_NO_OP = 0, /*!< Not buffered */
- BTR_INSERT_OP, /*!< Insert, do not ignore UNIQUE */
- BTR_INSERT_IGNORE_UNIQUE_OP, /*!< Insert, ignoring UNIQUE */
- BTR_DELETE_OP, /*!< Purge a delete-marked record */
- BTR_DELMARK_OP /*!< Mark a record for deletion */
-};
-
-#ifdef UNIV_DEBUG
-/** If the following is set to TRUE, this module prints a lot of
-trace information of individual record operations */
-UNIV_INTERN ibool btr_cur_print_record_ops = FALSE;
-#endif /* UNIV_DEBUG */
-
-/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint btr_cur_n_non_sea = 0;
-/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
-UNIV_INTERN ulint btr_cur_n_sea = 0;
-/** Old value of btr_cur_n_non_sea. Copied by
-srv_refresh_innodb_monitor_stats(). Referenced by
-srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint btr_cur_n_non_sea_old = 0;
-/** Old value of btr_cur_n_sea. Copied by
-srv_refresh_innodb_monitor_stats(). Referenced by
-srv_printf_innodb_monitor(). */
-UNIV_INTERN ulint btr_cur_n_sea_old = 0;
-
-#ifdef UNIV_DEBUG
-/* Flag to limit optimistic insert records */
-UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0;
-#endif /* UNIV_DEBUG */
-
-/** In the optimistic insert, if the insert does not fit, but this much space
-can be released by page reorganize, then it is reorganized */
-#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
-
-/** The structure of a BLOB part header */
-/* @{ */
-/*--------------------------------------*/
-#define BTR_BLOB_HDR_PART_LEN 0 /*!< BLOB part len on this
- page */
-#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /*!< next BLOB part page no,
- FIL_NULL if none */
-/*--------------------------------------*/
-#define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB
- part header, in bytes */
-
-/** Estimated table level stats from sampled value.
-@param value sampled stats
-@param index index being sampled
-@param sample number of sampled rows
-@param ext_size external stored data size
-@param not_empty table not empty
-@return estimated table wide stats from sampled value */
-#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\
- (((value) * (ib_int64_t) index->stat_n_leaf_pages \
- + (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size)))
-
-/* @} */
-#endif /* !UNIV_HOTBACKUP */
-
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE] = {
- 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0,
-};
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Marks all extern fields in a record as owned by the record. This function
-should be called if the delete mark of a record is removed: a not delete
-marked record always owns all its extern fields. */
-static
-void
-btr_cur_unmark_extern_fields(
-/*=========================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: record in a clustered index */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- mtr_t* mtr); /*!< in: mtr, or NULL if not logged */
-/*******************************************************************//**
-Adds path information to the cursor for the current page, for which
-the binary search has been performed. */
-static
-void
-btr_cur_add_path_info(
-/*==================*/
- btr_cur_t* cursor, /*!< in: cursor positioned on a page */
- ulint height, /*!< in: height of the page in tree;
- 0 means leaf node */
- ulint root_height); /*!< in: root node height in tree */
-/***********************************************************//**
-Frees the externally stored fields for a record, if the field is mentioned
-in the update vector. */
-static
-void
-btr_rec_free_updated_extern_fields(
-/*===============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree MUST be
- X-latched */
- rec_t* rec, /*!< in: record */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const upd_t* update, /*!< in: update vector */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr); /*!< in: mini-transaction handle which contains
- an X-latch to record page and to the tree */
-/***********************************************************//**
-Frees the externally stored fields for a record. */
-static
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
- dict_index_t* index, /*!< in: index of the data, the index
- tree MUST be X-latched */
- rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr); /*!< in: mini-transaction handle which contains
- an X-latch to record page and to the index
- tree */
-#endif /* !UNIV_HOTBACKUP */
-
-/******************************************************//**
-The following function is used to set the deleted bit of a record. */
-UNIV_INLINE
-void
-btr_rec_set_deleted_flag(
-/*=====================*/
- rec_t* rec, /*!< in/out: physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page (or NULL) */
- ulint flag) /*!< in: nonzero if delete marked */
-{
- if (page_rec_is_comp(rec)) {
- rec_set_deleted_flag_new(rec, page_zip, flag);
- } else {
- ut_ad(!page_zip);
- rec_set_deleted_flag_old(rec, flag);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*==================== B-TREE SEARCH =========================*/
-
-/********************************************************************//**
-Latches the leaf page or pages requested. */
-static
-void
-btr_cur_latch_leaves(
-/*=================*/
- page_t* page, /*!< in: leaf page where the search
- converged */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the leaf */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< in: cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint mode;
- ulint sibling_mode;
- ulint left_page_no;
- ulint right_page_no;
- buf_block_t* get_block;
-
- ut_ad(page && mtr);
-
- switch (latch_mode) {
- case BTR_SEARCH_LEAF:
- case BTR_MODIFY_LEAF:
- mode = latch_mode == BTR_SEARCH_LEAF ? RW_S_LATCH : RW_X_LATCH;
- get_block = btr_block_get(
- space, zip_size, page_no, mode, cursor->index, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(get_block, return;);
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- return;
- case BTR_SEARCH_TREE:
- case BTR_MODIFY_TREE:
- if (UNIV_UNLIKELY(latch_mode == BTR_SEARCH_TREE)) {
- mode = RW_S_LATCH;
- sibling_mode = RW_NO_LATCH;
- } else {
- mode = sibling_mode = RW_X_LATCH;
- }
- /* Fetch and possibly latch also brothers from left to right */
- left_page_no = btr_page_get_prev(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- get_block = btr_block_get(
- space, zip_size, left_page_no,
- sibling_mode, cursor->index, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(get_block, return;);
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame)
- == page_is_comp(page));
-
- /* For fake_change mode we avoid a detailed validation
- as it operate in tweaked format where-in validation
- may fail. */
- ut_a(sibling_mode == RW_NO_LATCH
- || btr_page_get_next(get_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- if (sibling_mode == RW_NO_LATCH) {
- /* btr_block_get() called with RW_NO_LATCH will
- fix the read block in the buffer. This serves
- no purpose for the fake changes prefetching,
- thus we unfix the sibling blocks immediately.*/
- mtr_memo_release(mtr, get_block,
- MTR_MEMO_BUF_FIX);
- } else {
- get_block->check_index_page_at_flush = TRUE;
- }
- }
-
- get_block = btr_block_get(
- space, zip_size, page_no,
- mode, cursor->index, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(get_block, return;);
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
-
- right_page_no = btr_page_get_next(page, mtr);
-
- if (right_page_no != FIL_NULL) {
- get_block = btr_block_get(
- space, zip_size, right_page_no,
- sibling_mode, cursor->index, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(get_block, return;);
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame)
- == page_is_comp(page));
- ut_a(btr_page_get_prev(get_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- if (sibling_mode == RW_NO_LATCH) {
- mtr_memo_release(mtr, get_block,
- MTR_MEMO_BUF_FIX);
- } else {
- get_block->check_index_page_at_flush = TRUE;
- }
- }
-
- return;
-
- case BTR_SEARCH_PREV:
- case BTR_MODIFY_PREV:
- mode = latch_mode == BTR_SEARCH_PREV ? RW_S_LATCH : RW_X_LATCH;
- /* latch also left brother */
- left_page_no = btr_page_get_prev(page, mtr);
-
- if (left_page_no != FIL_NULL) {
- get_block = btr_block_get(
- space, zip_size,
- left_page_no, mode, cursor->index, mtr);
- cursor->left_block = get_block;
-
- SRV_CORRUPT_TABLE_CHECK(get_block, return;);
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame)
- == page_is_comp(page));
- ut_a(btr_page_get_next(get_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- }
-
- get_block = btr_block_get(
- space, zip_size, page_no, mode, cursor->index, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(get_block, return;);
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(get_block->frame) == page_is_comp(page));
-#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
- return;
- }
-
- ut_error;
-}
-
-/********************************************************************//**
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value.
-
-If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the
-search tuple should be performed in the B-tree. InnoDB does an insert
-immediately after the cursor. Thus, the cursor may end up on a user record,
-or on a page infimum record. */
-UNIV_INTERN
-dberr_t
-btr_cur_search_to_nth_level(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: the tree level of search */
- const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
- tuple must be set so that it cannot get
- compared to the node ptr page number field! */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- Inserts should always be made using
- PAGE_CUR_LE to search the position! */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
- at most one of BTR_INSERT, BTR_DELETE_MARK,
- BTR_DELETE, or BTR_ESTIMATE;
- cursor->left_block is used to store a pointer
- to the left neighbor page, in the cases
- BTR_SEARCH_PREV and BTR_MODIFY_PREV;
- NOTE that if has_search_latch
- is != 0, we maybe do not have a latch set
- on the cursor page, we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
- s- or x-latched, but see also above! */
- ulint has_search_latch,/*!< in: info on the latch mode the
- caller currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page;
- buf_block_t* block;
- ulint space;
- buf_block_t* guess;
- ulint height;
- ulint page_no;
- ulint up_match;
- ulint up_bytes;
- ulint low_match;
- ulint low_bytes;
- ulint savepoint;
- ulint rw_latch;
- ulint page_mode;
- ulint buf_mode;
- ulint estimate;
- ulint zip_size;
- page_cur_t* page_cursor;
- btr_op_t btr_op;
- ulint root_height = 0; /* remove warning */
- dberr_t err = DB_SUCCESS;
-
-#ifdef BTR_CUR_ADAPT
- btr_search_t* info;
-#endif
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
- /* Currently, PAGE_CUR_LE is the only search mode used for searches
- ending to upper levels */
-
- ut_ad(level == 0 || mode == PAGE_CUR_LE);
- ut_ad(dict_index_check_search_tuple(index, tuple));
- ut_ad(!dict_index_is_ibuf(index) || ibuf_inside(mtr));
- ut_ad(dtuple_check_typed(tuple));
- ut_ad(!(index->type & DICT_FTS));
- ut_ad(index->page != FIL_NULL);
-
- UNIV_MEM_INVALID(&cursor->up_match, sizeof cursor->up_match);
- UNIV_MEM_INVALID(&cursor->up_bytes, sizeof cursor->up_bytes);
- UNIV_MEM_INVALID(&cursor->low_match, sizeof cursor->low_match);
- UNIV_MEM_INVALID(&cursor->low_bytes, sizeof cursor->low_bytes);
-#ifdef UNIV_DEBUG
- cursor->up_match = ULINT_UNDEFINED;
- cursor->low_match = ULINT_UNDEFINED;
-#endif
-
- ibool s_latch_by_caller;
-
- s_latch_by_caller = latch_mode & BTR_ALREADY_S_LATCHED;
-
- ut_ad(!s_latch_by_caller
- || mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
-
- /* These flags are mutually exclusive, they are lumped together
- with the latch mode for historical reasons. It's possible for
- none of the flags to be set. */
- switch (UNIV_EXPECT(latch_mode
- & (BTR_INSERT | BTR_DELETE | BTR_DELETE_MARK),
- 0)) {
- case 0:
- btr_op = BTR_NO_OP;
- break;
- case BTR_INSERT:
- btr_op = (latch_mode & BTR_IGNORE_SEC_UNIQUE)
- ? BTR_INSERT_IGNORE_UNIQUE_OP
- : BTR_INSERT_OP;
- break;
- case BTR_DELETE:
- btr_op = BTR_DELETE_OP;
- ut_a(cursor->purge_node);
- break;
- case BTR_DELETE_MARK:
- btr_op = BTR_DELMARK_OP;
- break;
- default:
- /* only one of BTR_INSERT, BTR_DELETE, BTR_DELETE_MARK
- should be specified at a time */
- ut_error;
- }
-
- /* Operations on the insert buffer tree cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !dict_index_is_ibuf(index));
- /* Operations on the clustered index cannot be buffered. */
- ut_ad(btr_op == BTR_NO_OP || !dict_index_is_clust(index));
-
- estimate = latch_mode & BTR_ESTIMATE;
-
- /* Turn the flags unrelated to the latch mode off. */
- latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
-
- ut_ad(!s_latch_by_caller
- || latch_mode == BTR_SEARCH_LEAF
- || latch_mode == BTR_MODIFY_LEAF);
-
- cursor->flag = BTR_CUR_BINARY;
- cursor->index = index;
-
-#ifndef BTR_CUR_ADAPT
- guess = NULL;
-#else
- info = btr_search_get_info(index);
-
- guess = info->root_guess;
-
-#ifdef BTR_CUR_HASH_ADAPT
-
-# ifdef UNIV_SEARCH_PERF_STAT
- info->n_searches++;
-# endif
- if (rw_lock_get_writer(btr_search_get_latch(cursor->index)) ==
- RW_LOCK_NOT_LOCKED
- && latch_mode <= BTR_MODIFY_LEAF
- && info->last_hash_succ
- && !estimate
-# ifdef PAGE_CUR_LE_OR_EXTENDS
- && mode != PAGE_CUR_LE_OR_EXTENDS
-# endif /* PAGE_CUR_LE_OR_EXTENDS */
- /* If !has_search_latch, we do a dirty read of
- btr_search_enabled below, and btr_search_guess_on_hash()
- will have to check it again. */
- && UNIV_LIKELY(btr_search_enabled)
- && btr_search_guess_on_hash(index, info, tuple, mode,
- latch_mode, cursor,
- has_search_latch, mtr)) {
-
- /* Search using the hash index succeeded */
-
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- btr_cur_n_sea++;
-
- return err;
- }
-# endif /* BTR_CUR_HASH_ADAPT */
-#endif /* BTR_CUR_ADAPT */
- btr_cur_n_non_sea++;
-
- /* If the hash search did not succeed, do binary search down the
- tree */
-
- if (has_search_latch) {
- /* Release possible search latch to obey latching order */
- rw_lock_s_unlock(btr_search_get_latch(cursor->index));
- }
-
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched leaf node(s) */
-
- savepoint = mtr_set_savepoint(mtr);
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- mtr_x_lock(dict_index_get_lock(index), mtr);
- break;
- case BTR_CONT_MODIFY_TREE:
- /* Do nothing */
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- break;
- default:
- if (!s_latch_by_caller) {
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- space = dict_index_get_space(index);
- page_no = dict_index_get_page(index);
-
- up_match = 0;
- up_bytes = 0;
- low_match = 0;
- low_bytes = 0;
-
- height = ULINT_UNDEFINED;
-
- /* We use these modified search modes on non-leaf levels of the
- B-tree. These let us end up in the right B-tree leaf. In that leaf
- we use the original search mode. */
-
- switch (mode) {
- case PAGE_CUR_GE:
- page_mode = PAGE_CUR_L;
- break;
- case PAGE_CUR_G:
- page_mode = PAGE_CUR_LE;
- break;
- default:
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || mode == PAGE_CUR_LE_OR_EXTENDS);
-#else /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE);
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- page_mode = mode;
- break;
- }
-
- /* Loop and search until we arrive at the desired level */
-
-search_loop:
- buf_mode = BUF_GET;
- rw_latch = RW_NO_LATCH;
-
- if (height != 0) {
- /* We are about to fetch the root or a non-leaf page. */
- } else if (latch_mode <= BTR_MODIFY_LEAF) {
- rw_latch = latch_mode;
-
- if (btr_op != BTR_NO_OP
- && ibuf_should_try(index, btr_op != BTR_INSERT_OP)) {
-
- /* Try to buffer the operation if the leaf
- page is not in the buffer pool. */
-
- buf_mode = btr_op == BTR_DELETE_OP
- ? BUF_GET_IF_IN_POOL_OR_WATCH
- : BUF_GET_IF_IN_POOL;
- }
- }
-
- zip_size = dict_table_zip_size(index->table);
-
-retry_page_get:
- block = buf_page_get_gen(
- space, zip_size, page_no, rw_latch, guess, buf_mode,
- file, line, mtr, &err);
-
- /* Note that block==NULL signifies either an error or change
- buffering. */
- if (err != DB_SUCCESS) {
- ut_ad(block == NULL);
- if (err == DB_DECRYPTION_FAILED) {
- ib_push_warning((void *)NULL,
- DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- index->table->name);
- index->table->file_unreadable = true;
- }
-
- goto func_exit;
- }
-
- if (block == NULL) {
- SRV_CORRUPT_TABLE_CHECK(buf_mode == BUF_GET_IF_IN_POOL ||
- buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH,
- {
- page_cursor->block = 0;
- page_cursor->rec = 0;
- if (estimate) {
-
- cursor->path_arr->nth_rec =
- ULINT_UNDEFINED;
- }
-
- goto func_exit;
- });
-
- /* This must be a search to perform an insert/delete
- mark/ delete; try using the insert/delete buffer */
-
- ut_ad(height == 0);
- ut_ad(cursor->thr);
-
- switch (btr_op) {
- case BTR_INSERT_OP:
- case BTR_INSERT_IGNORE_UNIQUE_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
-
- if (ibuf_insert(IBUF_OP_INSERT, tuple, index,
- space, zip_size, page_no,
- cursor->thr)) {
-
- cursor->flag = BTR_CUR_INSERT_TO_IBUF;
-
- goto func_exit;
- }
- break;
-
- case BTR_DELMARK_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL);
-
- if (ibuf_insert(IBUF_OP_DELETE_MARK, tuple,
- index, space, zip_size,
- page_no, cursor->thr)) {
-
- cursor->flag = BTR_CUR_DEL_MARK_IBUF;
-
- goto func_exit;
- }
-
- break;
-
- case BTR_DELETE_OP:
- ut_ad(buf_mode == BUF_GET_IF_IN_POOL_OR_WATCH);
-
- if (!row_purge_poss_sec(cursor->purge_node,
- index, tuple)) {
-
- /* The record cannot be purged yet. */
- cursor->flag = BTR_CUR_DELETE_REF;
- } else if (ibuf_insert(IBUF_OP_DELETE, tuple,
- index, space, zip_size,
- page_no,
- cursor->thr)) {
-
- /* The purge was buffered. */
- cursor->flag = BTR_CUR_DELETE_IBUF;
- } else {
- /* The purge could not be buffered. */
- buf_pool_watch_unset(space, page_no);
- break;
- }
-
- buf_pool_watch_unset(space, page_no);
- goto func_exit;
-
- default:
- ut_error;
- }
-
- /* Insert to the insert/delete buffer did not succeed, we
- must read the page from disk. */
-
- buf_mode = BUF_GET;
-
- goto retry_page_get;
- }
-
- block->check_index_page_at_flush = TRUE;
- page = buf_block_get_frame(block);
-
- SRV_CORRUPT_TABLE_CHECK(page,
- {
- page_cursor->block = 0;
- page_cursor->rec = 0;
-
- if (estimate) {
-
- cursor->path_arr->nth_rec = ULINT_UNDEFINED;
- }
-
- goto func_exit;
- });
-
- if (rw_latch != RW_NO_LATCH) {
-#ifdef UNIV_ZIP_DEBUG
- const page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- buf_block_dbg_add_level(
- block, dict_index_is_ibuf(index)
- ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
- }
-
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(index->id == btr_page_get_index_id(page));
-
- if (UNIV_UNLIKELY(height == ULINT_UNDEFINED)) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- root_height = height;
- cursor->tree_height = root_height + 1;
-
-#ifdef BTR_CUR_ADAPT
- if (block != guess) {
- info->root_guess = block;
- }
-#endif
- }
-
- if (height == 0) {
- if (rw_latch == RW_NO_LATCH) {
-
- btr_cur_latch_leaves(
- page, space, zip_size, page_no, latch_mode,
- cursor, mtr);
- }
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- case BTR_CONT_MODIFY_TREE:
- break;
- default:
- if (!s_latch_by_caller) {
- /* Release the tree s-latch */
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
- }
- }
-
- page_mode = mode;
- }
-
- page_cur_search_with_match(
- block, index, tuple, page_mode, &up_match, &up_bytes,
- &low_match, &low_bytes, page_cursor);
-
- if (estimate) {
- btr_cur_add_path_info(cursor, height, root_height);
- }
-
- /* If this is the desired level, leave the loop */
-
- ut_ad(height == btr_page_get_level(page_cur_get_page(page_cursor),
- mtr));
-
- if (level != height) {
-
- const rec_t* node_ptr;
- ut_ad(height > 0);
-
- height--;
- guess = NULL;
-
- node_ptr = page_cur_get_rec(page_cursor);
-
- offsets = rec_get_offsets(
- node_ptr, index, offsets, ULINT_UNDEFINED, &heap);
-
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
-
- if (UNIV_UNLIKELY(height == 0 && dict_index_is_ibuf(index))) {
- /* We're doing a search on an ibuf tree and we're one
- level above the leaf page. */
-
- ut_ad(level == 0);
-
- buf_mode = BUF_GET;
- rw_latch = RW_NO_LATCH;
- goto retry_page_get;
- }
-
- goto search_loop;
- }
-
- if (level != 0) {
- /* x-latch the page */
- buf_block_t* child_block = btr_block_get(
- space, zip_size, page_no, RW_X_LATCH, index, mtr);
-
- page = buf_block_get_frame(child_block);
- btr_assert_not_corrupted(child_block, index);
- } else {
- cursor->low_match = low_match;
- cursor->low_bytes = low_bytes;
- cursor->up_match = up_match;
- cursor->up_bytes = up_bytes;
-
-#ifdef BTR_CUR_ADAPT
- /* We do a dirty read of btr_search_enabled here. We
- will properly check btr_search_enabled again in
- btr_search_build_page_hash_index() before building a
- page hash index, while holding btr_search_latch. */
- if (btr_search_enabled) {
- btr_search_info_update(index, cursor);
- }
-#endif
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_GE);
- ut_ad(cursor->up_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- ut_ad(cursor->low_match != ULINT_UNDEFINED
- || mode != PAGE_CUR_LE);
- }
-
-func_exit:
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (has_search_latch) {
-
- rw_lock_s_lock(btr_search_get_latch(cursor->index));
- }
-
- return err;
-}
-
-/*****************************************************************//**
-Opens a cursor at either end of an index. */
-UNIV_INTERN
-dberr_t
-btr_cur_open_at_index_side_func(
-/*============================*/
- bool from_left, /*!< in: true if open to the low end,
- false if to the high end */
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: latch mode */
- btr_cur_t* cursor, /*!< in/out: cursor */
- ulint level, /*!< in: level to search for
- (0=leaf). */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- page_cur_t* page_cursor;
- ulint page_no;
- ulint space;
- ulint zip_size;
- ulint height;
- ulint root_height = 0; /* remove warning */
- rec_t* node_ptr;
- ulint estimate;
- ulint savepoint;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- dberr_t err = DB_SUCCESS;
-
- rec_offs_init(offsets_);
-
- estimate = latch_mode & BTR_ESTIMATE;
- latch_mode &= ~BTR_ESTIMATE;
-
- ut_ad(level != ULINT_UNDEFINED);
-
- /* Store the position of the tree latch we push to mtr so that we
- know how to release it when we have latched the leaf node */
-
- savepoint = mtr_set_savepoint(mtr);
-
- switch (latch_mode) {
- case BTR_CONT_MODIFY_TREE:
- break;
- case BTR_MODIFY_TREE:
- mtr_x_lock(dict_index_get_lock(index), mtr);
- break;
- case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
- case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
- break;
- default:
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
- cursor->index = index;
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- page_no = dict_index_get_page(index);
-
- height = ULINT_UNDEFINED;
-
- for (;;) {
- buf_block_t* block=NULL;
- page_t* page=NULL;
-
- block = buf_page_get_gen(space, zip_size, page_no,
- RW_NO_LATCH, NULL, BUF_GET,
- file, line, mtr, &err);
-
- ut_ad((block != NULL) == (err == DB_SUCCESS));
-
- if (err != DB_SUCCESS) {
- if (err == DB_DECRYPTION_FAILED) {
- ib_push_warning((void *)NULL,
- DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- index->table->name);
- index->table->file_unreadable = true;
- }
-
- goto exit_loop;
- }
-
- page = buf_block_get_frame(block);
-
- SRV_CORRUPT_TABLE_CHECK(page,
- {
- page_cursor->block = 0;
- page_cursor->rec = 0;
-
- if (estimate) {
-
- cursor->path_arr->nth_rec =
- ULINT_UNDEFINED;
- }
- /* Can't use break with the macro */
- goto exit_loop;
- });
-
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-
- ut_ad(index->id == btr_page_get_index_id(page));
-
- block->check_index_page_at_flush = TRUE;
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- root_height = height;
- ut_a(height >= level);
- } else {
- /* TODO: flag the index corrupted if this fails */
- ut_ad(height == btr_page_get_level(page, mtr));
- }
-
- if (height == level) {
- btr_cur_latch_leaves(
- page, space, zip_size, page_no,
- latch_mode & ~BTR_ALREADY_S_LATCHED,
- cursor, mtr);
-
- if (height == 0) {
- /* In versions <= 3.23.52 we had
- forgotten to release the tree latch
- here. If in an index scan we had to
- scan far to find a record visible to
- the current transaction, that could
- starve others waiting for the tree
- latch. */
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- case BTR_CONT_MODIFY_TREE:
- case BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED:
- case BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED:
- break;
- default:
- /* Release the tree s-latch */
-
- mtr_release_s_latch_at_savepoint(
- mtr, savepoint,
- dict_index_get_lock(index));
- }
- }
- }
-
- if (from_left) {
- page_cur_set_before_first(block, page_cursor);
- } else {
- page_cur_set_after_last(block, page_cursor);
- }
-
- if (height == level) {
- if (estimate) {
- btr_cur_add_path_info(cursor, height,
- root_height);
- }
-
- break;
- }
-
- ut_ad(height > 0);
-
- if (from_left) {
- page_cur_move_to_next(page_cursor);
- } else {
- page_cur_move_to_prev(page_cursor);
- }
-
- if (estimate) {
- btr_cur_add_path_info(cursor, height, root_height);
- }
-
- height--;
-
- node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- }
-
-exit_loop:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return err;
-}
-
-/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
-btr_cur_open_at_rnd_pos_func(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< in/out: B-tree cursor */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t* page_cursor;
- ulint page_no;
- ulint space;
- ulint zip_size;
- ulint height;
- rec_t* node_ptr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- switch (latch_mode) {
- case BTR_MODIFY_TREE:
- mtr_x_lock(dict_index_get_lock(index), mtr);
- break;
- default:
- ut_ad(latch_mode != BTR_CONT_MODIFY_TREE);
- mtr_s_lock(dict_index_get_lock(index), mtr);
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
- cursor->index = index;
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
- page_no = dict_index_get_page(index);
-
- height = ULINT_UNDEFINED;
-
- for (;;) {
- buf_block_t* block;
- page_t* page;
- dberr_t err=DB_SUCCESS;
-
- block = buf_page_get_gen(space, zip_size, page_no,
- RW_NO_LATCH, NULL, BUF_GET,
- file, line, mtr, &err);
-
- ut_ad((block != NULL) == (err == DB_SUCCESS));
-
- if (err != DB_SUCCESS) {
- if (err == DB_DECRYPTION_FAILED) {
- ib_push_warning((void *)NULL,
- DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- index->table->name);
- index->table->file_unreadable = true;
- }
-
- goto exit_loop;
- }
-
- page = buf_block_get_frame(block);
-
- SRV_CORRUPT_TABLE_CHECK(page,
- {
- page_cursor->block = 0;
- page_cursor->rec = 0;
-
- goto exit_loop;
- });
-
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-
- ut_ad(index->id == btr_page_get_index_id(page));
-
- if (height == ULINT_UNDEFINED) {
- /* We are in the root node */
-
- height = btr_page_get_level(page, mtr);
- }
-
- if (height == 0) {
- btr_cur_latch_leaves(page, space, zip_size, page_no,
- latch_mode, cursor, mtr);
- }
-
- page_cur_open_on_rnd_user_rec(block, page_cursor);
-
- if (height == 0) {
-
- break;
- }
-
- ut_ad(height > 0);
-
- height--;
-
- node_ptr = page_cur_get_rec(page_cursor);
- offsets = rec_get_offsets(node_ptr, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Go to the child node */
- page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets);
- }
-
-exit_loop:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/*==================== B-TREE INSERT =========================*/
-
-/*************************************************************//**
-Inserts a record if there is enough space, or if enough space can
-be freed by reorganizing. Differs from btr_cur_optimistic_insert because
-no heuristics is applied to whether it pays to use CPU time for
-reorganizing the page or not.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to inserted record if succeed, else NULL */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-rec_t*
-btr_cur_insert_if_possible(
-/*=======================*/
- btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
- cursor stays valid */
- const dtuple_t* tuple, /*!< in: tuple to insert; the size info need not
- have been stored to tuple */
- ulint** offsets,/*!< out: offsets on *rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- page_cur_t* page_cursor;
- rec_t* rec;
-
- ut_ad(dtuple_check_typed(tuple));
-
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Now, try the insert */
- rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index,
- offsets, heap, n_ext, mtr);
-
- /* If the record did not fit, reorganize.
- For compressed pages, page_cur_tuple_insert()
- attempted this already. */
- if (!rec && !page_cur_get_page_zip(page_cursor)
- && btr_page_reorganize(page_cursor, cursor->index, mtr)) {
- rec = page_cur_tuple_insert(
- page_cursor, tuple, cursor->index,
- offsets, heap, n_ext, mtr);
- }
-
- ut_ad(!rec || rec_offs_validate(rec, cursor->index, *offsets));
- return(rec);
-}
-
-/*************************************************************//**
-For an insert, checks the locks and does the undo logging if desired.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INLINE MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,5,6)))
-dberr_t
-btr_cur_ins_lock_and_undo(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags: if
- not zero, the parameters index and thr
- should be specified */
- btr_cur_t* cursor, /*!< in: cursor on page after which to insert */
- dtuple_t* entry, /*!< in/out: entry to insert */
- que_thr_t* thr, /*!< in: query thread or NULL */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit)/*!< out: TRUE if the inserted new record maybe
- should inherit LOCK_GAP type locks from the
- successor record */
-{
- dict_index_t* index;
- dberr_t err;
- rec_t* rec;
- roll_ptr_t roll_ptr;
-
- if (UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes)) {
- /* skip LOCK, UNDO */
- return(DB_SUCCESS);
- }
-
- /* Check if we have to wait for a lock: enqueue an explicit lock
- request if yes */
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
-
- ut_ad(!dict_index_is_online_ddl(index)
- || dict_index_is_clust(index)
- || (flags & BTR_CREATE_FLAG));
-
- err = lock_rec_insert_check_and_lock(flags, rec,
- btr_cur_get_block(cursor),
- index, thr, mtr, inherit);
-
- if (err != DB_SUCCESS
- || !(~flags | (BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG))
- || !dict_index_is_clust(index) || dict_index_is_ibuf(index)) {
-
- return(err);
- }
-
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
- roll_ptr = 0;
- } else {
- err = trx_undo_report_row_operation(thr, index, entry,
- NULL, 0, NULL, NULL,
- &roll_ptr);
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- /* Now we can fill in the roll ptr field in entry */
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
-
- row_upd_index_entry_sys_field(entry, index,
- DATA_ROLL_PTR, roll_ptr);
- }
-
- return(DB_SUCCESS);
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Report information about a transaction. */
-static
-void
-btr_cur_trx_report(
-/*===============*/
- trx_id_t trx_id, /*!< in: transaction id */
- const dict_index_t* index, /*!< in: index */
- const char* op) /*!< in: operation */
-{
- fprintf(stderr, "Trx with id " TRX_ID_FMT " going to ", trx_id);
- fputs(op, stderr);
- dict_index_name_print(stderr, NULL, index);
- putc('\n', stderr);
-}
-#endif /* UNIV_DEBUG */
-
-/*************************************************************//**
-Tries to perform an insert to a page in an index tree, next to cursor.
-It is assumed that mtr holds an x-latch on the page. The operation does
-not succeed if there is too little space on the page. If there is just
-one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
-dberr_t
-btr_cur_optimistic_insert(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags: if not
- zero, the parameters index and thr should be
- specified */
- btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
- cursor stays valid */
- ulint** offsets,/*!< out: offsets on *rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap */
- dtuple_t* entry, /*!< in/out: entry to insert */
- rec_t** rec, /*!< out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr, /*!< in/out: query thread; can be NULL if
- !(~flags
- & (BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG)) */
- mtr_t* mtr) /*!< in/out: mini-transaction;
- if this function returns DB_SUCCESS on
- a leaf page of a secondary index in a
- compressed tablespace, the caller must
- mtr_commit(mtr) before latching
- any further pages */
-{
- big_rec_t* big_rec_vec = NULL;
- dict_index_t* index;
- page_cur_t* page_cursor;
- buf_block_t* block;
- page_t* page;
- rec_t* dummy;
- ibool leaf;
- ibool reorg;
- ibool inherit = TRUE;
- ulint zip_size;
- ulint rec_size;
- dberr_t err;
-
- ut_ad(thr || !(~flags & (BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG)));
- *big_rec = NULL;
-
- block = btr_cur_get_block(cursor);
-
- SRV_CORRUPT_TABLE_CHECK(block, return(DB_CORRUPTION););
-
- page = buf_block_get_frame(block);
- index = cursor->index;
-
- const bool fake_changes = (~flags & (BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG))
- && thr_get_trx(thr)->fake_changes;
- ut_ad(fake_changes
- || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(!dict_index_is_online_ddl(index)
- || dict_index_is_clust(index)
- || (flags & BTR_CREATE_FLAG));
- ut_ad(dtuple_check_typed(entry));
-
- zip_size = buf_block_get_zip_size(block);
-#ifdef UNIV_DEBUG_VALGRIND
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
- UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops && thr) {
- btr_cur_trx_report(thr_get_trx(thr)->id, index, "insert ");
- dtuple_print(stderr, entry);
- }
-#endif /* UNIV_DEBUG */
-
- leaf = page_is_leaf(page);
-
- /* Calculate the record size when entry is converted to a record */
- rec_size = rec_get_converted_size(index, entry, n_ext);
-
- if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
- dtuple_get_n_fields(entry), zip_size)) {
-
- /* The record is so big that we have to store some fields
- externally on separate database pages */
- big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
-
- if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
-
- return(DB_TOO_BIG_RECORD);
- }
-
- rec_size = rec_get_converted_size(index, entry, n_ext);
- }
-
- if (zip_size) {
- /* Estimate the free space of an empty compressed page.
- Subtract one byte for the encoded heap_no in the
- modification log. */
- ulint free_space_zip = page_zip_empty_size(
- cursor->index->n_fields, zip_size);
- ulint n_uniq = dict_index_get_n_unique_in_tree(index);
-
- ut_ad(dict_table_is_comp(index->table));
-
- if (free_space_zip == 0) {
-too_big:
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(
- index, entry, big_rec_vec);
- }
-
- return(DB_TOO_BIG_RECORD);
- }
-
- /* Subtract one byte for the encoded heap_no in the
- modification log. */
- free_space_zip--;
-
- /* There should be enough room for two node pointer
- records on an empty non-leaf page. This prevents
- infinite page splits. */
-
- if (entry->n_fields >= n_uniq
- && (REC_NODE_PTR_SIZE
- + rec_get_converted_size_comp_prefix(
- index, entry->fields, n_uniq, NULL)
- /* On a compressed page, there is
- a two-byte entry in the dense
- page directory for every record.
- But there is no record header. */
- - (REC_N_NEW_EXTRA_BYTES - 2)
- > free_space_zip / 2)) {
- goto too_big;
- }
- }
-
- LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
- goto fail);
-
- if (leaf && zip_size
- && (page_get_data_size(page) + rec_size
- >= dict_index_zip_pad_optimal_page_size(index))) {
- /* If compression padding tells us that insertion will
- result in too packed up page i.e.: which is likely to
- cause compression failure then don't do an optimistic
- insertion. */
-fail:
- err = DB_FAIL;
-fail_err:
-
- if (big_rec_vec) {
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
-
- return(err);
- }
-
- ulint max_size = page_get_max_insert_size_after_reorganize(page, 1);
-
- if (page_has_garbage(page)) {
- if ((max_size < rec_size
- || max_size < BTR_CUR_PAGE_REORGANIZE_LIMIT)
- && page_get_n_recs(page) > 1
- && page_get_max_insert_size(page, 1) < rec_size) {
-
- goto fail;
- }
- } else if (max_size < rec_size) {
- goto fail;
- }
-
- /* If there have been many consecutive inserts to the
- clustered index leaf page of an uncompressed table, check if
- we have to split the page to reserve enough free space for
- future updates of records. */
-
- if (leaf && !zip_size && dict_index_is_clust(index)
- && page_get_n_recs(page) >= 2
- && dict_index_get_space_reserve() + rec_size > max_size
- && (btr_page_get_split_rec_to_right(cursor, &dummy)
- || btr_page_get_split_rec_to_left(cursor, &dummy))) {
- goto fail;
- }
-
- /* Check locks and write to the undo log, if specified */
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
- thr, mtr, &inherit);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
- goto fail_err;
- }
-
- if (UNIV_UNLIKELY(fake_changes)) {
- /* skip CHANGE, LOG */
- *big_rec = big_rec_vec;
- return(err); /* == DB_SUCCESS */
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- /* Now, try the insert */
-
- {
- const rec_t* page_cursor_rec = page_cur_get_rec(page_cursor);
- *rec = page_cur_tuple_insert(page_cursor, entry, index,
- offsets, heap, n_ext, mtr);
- reorg = page_cursor_rec != page_cur_get_rec(page_cursor);
- }
-
- if (*rec) {
- } else if (zip_size) {
- /* Reset the IBUF_BITMAP_FREE bits, because
- page_cur_tuple_insert() will have attempted page
- reorganize before failing. */
- if (leaf && !dict_index_is_clust(index)) {
- ibuf_reset_free_bits(block);
- }
-
- goto fail;
- } else {
- ut_ad(!reorg);
-
- /* If the record did not fit, reorganize */
- if (!btr_page_reorganize(page_cursor, index, mtr)) {
- ut_ad(0);
- goto fail;
- }
-
- ut_ad(page_get_max_insert_size(page, 1) == max_size);
-
- reorg = TRUE;
-
- *rec = page_cur_tuple_insert(page_cursor, entry, index,
- offsets, heap, n_ext, mtr);
-
- if (UNIV_UNLIKELY(!*rec)) {
- fputs("InnoDB: Error: cannot insert tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs(" into ", stderr);
- dict_index_name_print(stderr, thr_get_trx(thr), index);
- fprintf(stderr, "\nInnoDB: max insert size %lu\n",
- (ulong) max_size);
- ut_error;
- }
- }
-
-#ifdef BTR_CUR_HASH_ADAPT
- if (!reorg && leaf && (cursor->flag == BTR_CUR_HASH)) {
- btr_search_update_hash_node_on_insert(cursor);
- } else {
- btr_search_update_hash_on_insert(cursor);
- }
-#endif
-
- if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) {
-
- lock_update_insert(block, *rec);
- }
-
- if (leaf && !dict_index_is_clust(index)) {
- /* Update the free bits of the B-tree page in the
- insert buffer bitmap. */
-
- /* The free bits in the insert buffer bitmap must
- never exceed the free space on a page. It is safe to
- decrement or reset the bits in the bitmap in a
- mini-transaction that is committed before the
- mini-transaction that affects the free space. */
-
- /* It is unsafe to increment the bits in a separately
- committed mini-transaction, because in crash recovery,
- the free bits could momentarily be set too high. */
-
- if (zip_size) {
- /* Update the bits in the same mini-transaction. */
- ibuf_update_free_bits_zip(block, mtr);
- } else {
- /* Decrement the bits in a separate
- mini-transaction. */
- ibuf_update_free_bits_if_full(
- block, max_size,
- rec_size + PAGE_DIR_SLOT_SIZE);
- }
- }
-
- *big_rec = big_rec_vec;
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************//**
-Performs an insert on a page of an index tree. It is assumed that mtr
-holds an x-latch on the tree and on the cursor page. If the insert is
-made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-btr_cur_pessimistic_insert(
-/*=======================*/
- ulint flags, /*!< in: undo logging and locking flags: if not
- zero, the parameter thr should be
- specified; if no undo logging is specified,
- then the caller must have reserved enough
- free extents in the file space so that the
- insertion will certainly succeed */
- btr_cur_t* cursor, /*!< in: cursor after which to insert;
- cursor stays valid */
- ulint** offsets,/*!< out: offsets on *rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap
- that can be emptied */
- dtuple_t* entry, /*!< in/out: entry to insert */
- rec_t** rec, /*!< out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr, /*!< in/out: query thread; can be NULL if
- !(~flags
- & (BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG)) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- dict_index_t* index = cursor->index;
- ulint zip_size = dict_table_zip_size(index->table);
- big_rec_t* big_rec_vec = NULL;
- dberr_t err;
- ibool inherit = FALSE;
- ibool success;
- ulint n_reserved = 0;
-
- ut_ad(dtuple_check_typed(entry));
- ut_ad(thr || !(~flags & (BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG)));
-
- *big_rec = NULL;
-
- const bool fake_changes = (~flags & (BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG))
- && thr_get_trx(thr)->fake_changes;
- ut_ad(fake_changes || mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(fake_changes || mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(!dict_index_is_online_ddl(index)
- || dict_index_is_clust(index)
- || (flags & BTR_CREATE_FLAG));
-
- cursor->flag = BTR_CUR_BINARY;
-
- /* Check locks and write to undo log, if specified */
-
- err = btr_cur_ins_lock_and_undo(flags, cursor, entry,
- thr, mtr, &inherit);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
-
- ut_a(cursor->tree_height != ULINT_UNDEFINED);
-
- /* First reserve enough free space for the file segments
- of the index tree, so that the insert will not fail because
- of lack of space */
-
- ulint n_extents = cursor->tree_height / 16 + 3;
-
- success = fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents, FSP_NORMAL, mtr);
- if (!success) {
- return(DB_OUT_OF_FILE_SPACE);
- }
- }
-
- if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
- dict_table_is_comp(index->table),
- dtuple_get_n_fields(entry),
- zip_size)) {
- /* The record is so big that we have to store some fields
- externally on separate database pages */
-
- if (UNIV_LIKELY_NULL(big_rec_vec)) {
- /* This should never happen, but we handle
- the situation in a robust manner. */
- ut_ad(0);
- dtuple_convert_back_big_rec(index, entry, big_rec_vec);
- }
-
- big_rec_vec = dtuple_convert_big_rec(index, entry, &n_ext);
-
- if (big_rec_vec == NULL) {
-
- if (n_reserved > 0) {
- fil_space_release_free_extents(index->space,
- n_reserved);
- }
- return(DB_TOO_BIG_RECORD);
- }
- }
-
- if (UNIV_UNLIKELY(fake_changes)) {
- /* skip CHANGE, LOG */
- if (n_reserved > 0) {
- fil_space_release_free_extents(index->space,
- n_reserved);
- }
- *big_rec = big_rec_vec;
- return(DB_SUCCESS);
- }
-
- if (dict_index_get_page(index)
- == buf_block_get_page_no(btr_cur_get_block(cursor))) {
-
- /* The page is the root page */
- *rec = btr_root_raise_and_insert(
- flags, cursor, offsets, heap, entry, n_ext, mtr);
- } else {
- *rec = btr_page_split_and_insert(
- flags, cursor, offsets, heap, entry, n_ext, mtr);
- }
-
- if (*rec == NULL && os_has_said_disk_full) {
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- ut_ad(page_rec_get_next(btr_cur_get_rec(cursor)) == *rec);
-
- if (!(flags & BTR_NO_LOCKING_FLAG)) {
- /* The cursor might be moved to the other page,
- and the max trx id field should be updated after
- the cursor was fixed. */
- if (!dict_index_is_clust(index)) {
- page_update_max_trx_id(
- btr_cur_get_block(cursor),
- btr_cur_get_page_zip(cursor),
- thr_get_trx(thr)->id, mtr);
- }
-
- if (!page_rec_is_infimum(btr_cur_get_rec(cursor))) {
- /* split and inserted need to call
- lock_update_insert() always. */
- inherit = TRUE;
- }
-
- buf_block_t* block = btr_cur_get_block(cursor);
- buf_frame_t* frame = NULL;
-
- if (block) {
- frame = buf_block_get_frame(block);
- }
- /* split and inserted need to call
- lock_update_insert() always. */
- if (frame && btr_page_get_prev(frame, mtr) == FIL_NULL) {
- inherit = TRUE;
- }
- }
-
-#ifdef BTR_CUR_ADAPT
- btr_search_update_hash_on_insert(cursor);
-#endif
- if (inherit && !(flags & BTR_NO_LOCKING_FLAG)) {
-
- lock_update_insert(btr_cur_get_block(cursor), *rec);
- }
-
- if (n_reserved > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- *big_rec = big_rec_vec;
-
- return(DB_SUCCESS);
-}
-
-/*==================== B-TREE UPDATE =========================*/
-
-/*************************************************************//**
-For an update, checks the locks and does the undo logging.
-@return DB_SUCCESS, DB_WAIT_LOCK, or error number */
-UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
-dberr_t
-btr_cur_upd_lock_and_undo(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on record to update */
- const ulint* offsets,/*!< in: rec_get_offsets() on cursor */
- const upd_t* update, /*!< in: update vector */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread
- (can be NULL if BTR_NO_LOCKING_FLAG) */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- roll_ptr_t* roll_ptr)/*!< out: roll pointer */
-{
- dict_index_t* index;
- const rec_t* rec;
- dberr_t err;
-
- ut_ad((thr != NULL) || (flags & BTR_NO_LOCKING_FLAG));
-
- if (!(flags & BTR_NO_LOCKING_FLAG) && thr_get_trx(thr)->fake_changes) {
- /* skip LOCK, UNDO */
- return(DB_SUCCESS);
- }
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (!dict_index_is_clust(index)) {
- ut_ad(dict_index_is_online_ddl(index)
- == !!(flags & BTR_CREATE_FLAG));
-
- /* We do undo logging only when we update a clustered index
- record */
- return(lock_sec_rec_modify_check_and_lock(
- flags, btr_cur_get_block(cursor), rec,
- index, thr, mtr));
- }
-
- /* Check if we have to wait for a lock: enqueue an explicit lock
- request if yes */
-
- if (!(flags & BTR_NO_LOCKING_FLAG)) {
- err = lock_clust_rec_modify_check_and_lock(
- flags, btr_cur_get_block(cursor), rec, index,
- offsets, thr);
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- /* Append the info about the update in the undo log */
-
- return((flags & BTR_NO_UNDO_LOG_FLAG)
- ? DB_SUCCESS
- : trx_undo_report_row_operation(
- thr, index, NULL, update,
- cmpl_info, rec, offsets, roll_ptr));
-}
-
-/***********************************************************//**
-Writes a redo log record of updating a record in-place. */
-UNIV_INTERN
-void
-btr_cur_update_in_place_log(
-/*========================*/
- ulint flags, /*!< in: flags */
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index of the record */
- const upd_t* update, /*!< in: update vector */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr, /*!< in: roll ptr */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- const page_t* page = page_align(rec);
- ut_ad(flags < 256);
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page)
- ? MLOG_COMP_REC_UPDATE_IN_PLACE
- : MLOG_REC_UPDATE_IN_PLACE,
- 1 + DATA_ROLL_PTR_LEN + 14 + 2
- + MLOG_BUF_MARGIN);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery */
- return;
- }
-
- /* For secondary indexes, we could skip writing the dummy system fields
- to the redo log but we have to change redo log parsing of
- MLOG_REC_UPDATE_IN_PLACE/MLOG_COMP_REC_UPDATE_IN_PLACE or we have to add
- new redo log record. For now, just write dummy sys fields to the redo
- log if we are updating a secondary index record.
- */
- mach_write_to_1(log_ptr, flags);
- log_ptr++;
-
- if (dict_index_is_clust(index)) {
- log_ptr = row_upd_write_sys_vals_to_log(
- index, trx_id, roll_ptr, log_ptr, mtr);
- } else {
- /* Dummy system fields for a secondary index */
- /* TRX_ID Position */
- log_ptr += mach_write_compressed(log_ptr, 0);
- /* ROLL_PTR */
- trx_write_roll_ptr(log_ptr, 0);
- log_ptr += DATA_ROLL_PTR_LEN;
- /* TRX_ID */
- log_ptr += mach_ull_write_compressed(log_ptr, 0);
- }
-
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- row_upd_index_write_log(update, log_ptr, mtr);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of updating a record in-place.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_update_in_place(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index) /*!< in: index corresponding to page */
-{
- ulint flags;
- rec_t* rec;
- upd_t* update;
- ulint pos;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- ulint rec_offset;
- mem_heap_t* heap;
- ulint* offsets;
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- flags = mach_read_from_1(ptr);
- ptr++;
-
- ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- rec_offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(rec_offset <= UNIV_PAGE_SIZE);
-
- heap = mem_heap_create(256);
-
- ptr = row_upd_index_parse(ptr, end_ptr, heap, &update);
-
- if (!ptr || !page) {
-
- goto func_exit;
- }
-
- ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
- rec = page + rec_offset;
-
- /* We do not need to reserve btr_search_latch, as the page is only
- being recovered, and there cannot be a hash index to it. */
-
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields_in_recovery(rec, page_zip, offsets,
- pos, trx_id, roll_ptr);
- }
-
- row_upd_rec_in_place(rec, index, offsets, update, page_zip);
-
-func_exit:
- mem_heap_free(heap);
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-See if there is enough place in the page modification log to log
-an update-in-place.
-
-@retval false if out of space; IBUF_BITMAP_FREE will be reset
-outside mtr if the page was recompressed
-@retval true if enough place;
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
-a secondary index leaf page. This has to be done either within the
-same mini-transaction, or by invoking ibuf_reset_free_bits() before
-mtr_commit(mtr). */
-UNIV_INTERN
-bool
-btr_cur_update_alloc_zip_func(
-/*==========================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- page_cur_t* cursor, /*!< in/out: B-tree page cursor */
- dict_index_t* index, /*!< in: the index corresponding to cursor */
-#ifdef UNIV_DEBUG
- ulint* offsets,/*!< in/out: offsets of the cursor record */
-#endif /* UNIV_DEBUG */
- ulint length, /*!< in: size needed */
- bool create, /*!< in: true=delete-and-insert,
- false=update-in-place */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- trx_t* trx) /*!< in: NULL or transaction */
-{
- const page_t* page = page_cur_get_page(cursor);
-
- ut_ad(page_zip == page_cur_get_page_zip(cursor));
- ut_ad(!dict_index_is_ibuf(index));
- ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
-
- if (page_zip_available(page_zip, dict_index_is_clust(index),
- length, create)) {
- return(true);
- }
-
- if (!page_zip->m_nonempty && !page_has_garbage(page)) {
- /* The page has been freshly compressed, so
- reorganizing it will not help. */
- return(false);
- }
-
- if (create && page_is_leaf(page)
- && (length + page_get_data_size(page)
- >= dict_index_zip_pad_optimal_page_size(index))) {
- return(false);
- }
-
- if (UNIV_UNLIKELY(trx && trx->fake_changes)) {
- /* Don't call page_zip_compress_write_log_no_data as that has
- assert which would fail. Assume there won't be a compression
- failure. */
-
- return(true);
- }
-
- if (!btr_page_reorganize(cursor, index, mtr)) {
- goto out_of_space;
- }
-
- rec_offs_make_valid(page_cur_get_rec(cursor), index, offsets);
-
- /* After recompressing a page, we must make sure that the free
- bits in the insert buffer bitmap will not exceed the free
- space on the page. Because this function will not attempt
- recompression unless page_zip_available() fails above, it is
- safe to reset the free bits if page_zip_available() fails
- again, below. The free bits can safely be reset in a separate
- mini-transaction. If page_zip_available() succeeds below, we
- can be sure that the btr_page_reorganize() above did not reduce
- the free space available on the page. */
-
- if (page_zip_available(page_zip, dict_index_is_clust(index),
- length, create)) {
- return(true);
- }
-
-out_of_space:
- ut_ad(rec_offs_validate(page_cur_get_rec(cursor), index, offsets));
-
- /* Out of space: reset the free bits. */
- if (!dict_index_is_clust(index) && page_is_leaf(page)) {
- ibuf_reset_free_bits(page_cur_get_block(cursor));
- }
-
- return(false);
-}
-
-/*************************************************************//**
-Updates a record when the update causes no size changes in its fields.
-We assume here that the ordering fields of the record do not change.
-@return locking or undo log related error code, or
-@retval DB_SUCCESS on success
-@retval DB_ZIP_OVERFLOW if there is not enough space left
-on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
-dberr_t
-btr_cur_update_in_place(
-/*====================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
- const upd_t* update, /*!< in: update vector */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction; if this
- is a secondary index, the caller must
- mtr_commit(mtr) before latching any
- further pages */
-{
- dict_index_t* index;
- buf_block_t* block;
- page_zip_des_t* page_zip;
- dberr_t err;
- rec_t* rec;
- roll_ptr_t roll_ptr = 0;
- ulint was_delete_marked;
- ibool is_hashed;
- trx_t* trx;
-
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
- ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
- || dict_index_is_clust(index));
- ut_ad(thr_get_trx(thr)->id == trx_id
- || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
- == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
- | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
- ut_ad(fil_page_get_type(btr_cur_get_page(cursor)) == FIL_PAGE_INDEX);
- ut_ad(btr_page_get_index_id(btr_cur_get_page(cursor)) == index->id);
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops) {
- btr_cur_trx_report(trx_id, index, "update ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
-
- block = btr_cur_get_block(cursor);
- page_zip = buf_block_get_page_zip(block);
- trx = thr_get_trx(thr);
-
- /* Check that enough space is available on the compressed page. */
- if (page_zip) {
- if (!btr_cur_update_alloc_zip(
- page_zip, btr_cur_get_page_cur(cursor),
- index, offsets, rec_offs_size(offsets),
- false, mtr, trx)) {
- return(DB_ZIP_OVERFLOW);
- }
-
- rec = btr_cur_get_rec(cursor);
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, offsets,
- update, cmpl_info,
- thr, mtr, &roll_ptr);
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- /* skip CHANGE, LOG */
- return(err); /* == DB_SUCCESS */
- }
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_rec_sys_fields(rec, NULL, index, offsets,
- thr_get_trx(thr), roll_ptr);
- }
-
- was_delete_marked = rec_get_deleted_flag(
- rec, page_is_comp(buf_block_get_frame(block)));
-
- is_hashed = (block->index != NULL);
-
- if (is_hashed) {
- /* TO DO: Can we skip this if none of the fields
- index->search_info->curr_n_fields
- are being updated? */
-
- /* The function row_upd_changes_ord_field_binary works only
- if the update vector was built for a clustered index, we must
- NOT call it if index is secondary */
-
- if (!dict_index_is_clust(index)
- || row_upd_changes_ord_field_binary(index, update, thr,
- NULL, NULL)) {
-
- /* Remove possible hash index pointer to this record */
- btr_search_update_hash_on_delete(cursor);
- }
-
- rw_lock_x_lock(btr_search_get_latch(cursor->index));
- }
-
- row_upd_rec_in_place(rec, index, offsets, update, page_zip);
-
- if (is_hashed) {
- rw_lock_x_unlock(btr_search_get_latch(cursor->index));
- }
-
- btr_cur_update_in_place_log(flags, rec, index, update,
- trx_id, roll_ptr, mtr);
-
- if (was_delete_marked
- && !rec_get_deleted_flag(
- rec, page_is_comp(buf_block_get_frame(block)))) {
- /* The new updated record owns its possible externally
- stored fields */
-
- btr_cur_unmark_extern_fields(page_zip,
- rec, index, offsets, mtr);
- }
-
- ut_ad(err == DB_SUCCESS);
-
-func_exit:
- if (page_zip
- && !(flags & BTR_KEEP_IBUF_BITMAP)
- && !dict_index_is_clust(index)
- && block) {
- buf_frame_t* frame = buf_block_get_frame(block);
- if (frame && page_is_leaf(frame)) {
- /* Update the free bits in the insert buffer. */
- ibuf_update_free_bits_zip(block, mtr);
- }
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Tries to update a record on a page in an index tree. It is assumed that mtr
-holds an x-latch on the page. The operation does not succeed if there is too
-little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended. We assume here that the ordering
-fields of the record do not change.
-@return error code, including
-@retval DB_SUCCESS on success
-@retval DB_OVERFLOW if the updated record does not fit
-@retval DB_UNDERFLOW if the page would become too empty
-@retval DB_ZIP_OVERFLOW if there is not enough space left
-on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
-dberr_t
-btr_cur_optimistic_update(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
- mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
- const upd_t* update, /*!< in: update vector; this must also
- contain trx id and roll ptr fields */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction; if this
- is a secondary index, the caller must
- mtr_commit(mtr) before latching any
- further pages */
-{
- dict_index_t* index;
- page_cur_t* page_cursor;
- dberr_t err;
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
- rec_t* rec;
- ulint max_size;
- ulint new_rec_size;
- ulint old_rec_size;
- ulint max_ins_size = 0;
- dtuple_t* new_entry;
- roll_ptr_t roll_ptr;
- ulint i;
- ulint n_ext;
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- rec = btr_cur_get_rec(cursor);
- index = cursor->index;
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- ut_ad(thr_get_trx(thr)->fake_changes
- || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
- ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
- || dict_index_is_clust(index));
- ut_ad(thr_get_trx(thr)->id == trx_id
- || (flags & ~(BTR_KEEP_POS_FLAG | BTR_KEEP_IBUF_BITMAP))
- == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
- | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(btr_page_get_index_id(page) == index->id);
-
- *offsets = rec_get_offsets(rec, index, *offsets,
- ULINT_UNDEFINED, heap);
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(rec, *offsets)
- || trx_is_recv(thr_get_trx(thr)));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops) {
- btr_cur_trx_report(trx_id, index, "update ");
- rec_print_new(stderr, rec, *offsets);
- }
-#endif /* UNIV_DEBUG */
-
- if (!row_upd_changes_field_size_or_external(index, *offsets, update)) {
-
- /* The simplest and the most common case: the update does not
- change the size of any field and none of the updated fields is
- externally stored in rec or update, and there is enough space
- on the compressed page to log the update. */
-
- return(btr_cur_update_in_place(
- flags, cursor, *offsets, update,
- cmpl_info, thr, trx_id, mtr));
- }
-
- if (rec_offs_any_extern(*offsets)) {
-any_extern:
- /* Externally stored fields are treated in pessimistic
- update */
-
- return(DB_OVERFLOW);
- }
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
- if (dfield_is_ext(&upd_get_nth_field(update, i)->new_val)) {
-
- goto any_extern;
- }
- }
-
- page_cursor = btr_cur_get_page_cur(cursor);
-
- if (!*heap) {
- *heap = mem_heap_create(
- rec_offs_size(*offsets)
- + DTUPLE_EST_ALLOC(rec_offs_n_fields(*offsets)));
- }
-
- new_entry = row_rec_to_index_entry(rec, index, *offsets,
- &n_ext, *heap);
- /* We checked above that there are no externally stored fields. */
- ut_a(!n_ext);
-
- /* The page containing the clustered index record
- corresponding to new_entry is latched in mtr.
- Thus the following call is safe. */
- row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, *heap);
- old_rec_size = rec_offs_size(*offsets);
- new_rec_size = rec_get_converted_size(index, new_entry, 0);
-
- page_zip = buf_block_get_page_zip(block);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (page_zip) {
- if (page_zip_rec_needs_ext(new_rec_size, page_is_comp(page),
- dict_index_get_n_fields(index),
- page_zip_get_size(page_zip))) {
- goto any_extern;
- }
-
- if (!btr_cur_update_alloc_zip(
- page_zip, page_cursor, index, *offsets,
- new_rec_size, true, mtr, thr_get_trx(thr))) {
- return(DB_ZIP_OVERFLOW);
- }
-
- rec = page_cur_get_rec(page_cursor);
- }
-
- if (UNIV_UNLIKELY(new_rec_size
- >= (page_get_free_space_of_empty(page_is_comp(page))
- / 2))) {
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
- err = DB_OVERFLOW;
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(page_get_data_size(page)
- - old_rec_size + new_rec_size
- < BTR_CUR_PAGE_COMPRESS_LIMIT)) {
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
-
- /* The page would become too empty */
- err = DB_UNDERFLOW;
- goto func_exit;
- }
-
- /* We do not attempt to reorganize if the page is compressed.
- This is because the page may fail to compress after reorganization. */
- max_size = page_zip
- ? page_get_max_insert_size(page, 1)
- : (old_rec_size
- + page_get_max_insert_size_after_reorganize(page, 1));
-
- if (!page_zip) {
- max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
- }
-
- if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
- && (max_size >= new_rec_size))
- || (page_get_n_recs(page) <= 1))) {
-
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
-
- /* There was not enough space, or it did not pay to
- reorganize: for simplicity, we decide what to do assuming a
- reorganization is needed, though it might not be necessary */
-
- err = DB_OVERFLOW;
- goto func_exit;
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
- update, cmpl_info,
- thr, mtr, &roll_ptr);
- if (err != DB_SUCCESS) {
- /* We may need to update the IBUF_BITMAP_FREE
- bits after a reorganize that was done in
- btr_cur_update_alloc_zip(). */
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- /* skip CHANGE, LOG */
- ut_ad(err == DB_SUCCESS);
- return(DB_SUCCESS);
- }
-
- /* Ok, we may do the replacement. Store on the page infimum the
- explicit locks on rec, before deleting rec (see the comment in
- btr_cur_pessimistic_update). */
-
- lock_rec_store_on_page_infimum(block, rec);
-
- btr_search_update_hash_on_delete(cursor);
-
- page_cur_delete_rec(page_cursor, index, *offsets, mtr);
-
- page_cur_move_to_prev(page_cursor);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx_id);
- }
-
- /* There are no externally stored columns in new_entry */
- rec = btr_cur_insert_if_possible(
- cursor, new_entry, offsets, heap, 0/*n_ext*/, mtr);
- ut_a(rec); /* <- We calculated above the insert would fit */
-
- /* Restore the old explicit lock state on the record */
-
- lock_rec_restore_from_page_infimum(block, rec, block);
-
- page_cur_move_to_next(page_cursor);
- ut_ad(err == DB_SUCCESS);
-
-func_exit:
- if (!(flags & BTR_KEEP_IBUF_BITMAP)
- && !dict_index_is_clust(index)
- && page_is_leaf(page)) {
-
- if (page_zip) {
- ibuf_update_free_bits_zip(block, mtr);
- } else {
- ibuf_update_free_bits_low(block, max_ins_size, mtr);
- }
- }
-
- return(err);
-}
-
-/*************************************************************//**
-If, in a split, a new supremum record was created as the predecessor of the
-updated record, the supremum record must inherit exactly the locks on the
-updated record. In the split it may have inherited locks from the successor
-of the updated record, which is not correct. This function restores the
-right locks for the new supremum. */
-static
-void
-btr_cur_pess_upd_restore_supremum(
-/*==============================*/
- buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: updated record */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page;
- buf_block_t* prev_block;
- ulint space;
- ulint zip_size;
- ulint prev_page_no;
-
- page = buf_block_get_frame(block);
-
- if (page_rec_get_next(page_get_infimum_rec(page)) != rec) {
- /* Updated record is not the first user record on its page */
-
- return;
- }
-
- space = buf_block_get_space(block);
- zip_size = buf_block_get_zip_size(block);
- prev_page_no = btr_page_get_prev(page, mtr);
-
- ut_ad(prev_page_no != FIL_NULL);
- prev_block = buf_page_get_with_no_latch(space, zip_size,
- prev_page_no, mtr);
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_block->frame, mtr)
- == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- /* We must already have an x-latch on prev_block! */
- ut_ad(mtr_memo_contains(mtr, prev_block, MTR_MEMO_PAGE_X_FIX));
-
- lock_rec_reset_and_inherit_gap_locks(prev_block, block,
- PAGE_HEAP_NO_SUPREMUM,
- page_rec_get_heap_no(rec));
-}
-
-/*************************************************************//**
-Check if the total length of the modified blob for the row is within 10%
-of the total redo log size. This constraint on the blob length is to
-avoid overwriting the redo logs beyond the last checkpoint lsn.
-@return DB_SUCCESS or DB_TOO_BIG_FOR_REDO. */
-static
-dberr_t
-btr_check_blob_limit(const big_rec_t* big_rec_vec)
-{
- const ib_uint64_t redo_size = srv_n_log_files * srv_log_file_size
- * UNIV_PAGE_SIZE;
- const ib_uint64_t redo_10p = redo_size / 10;
- ib_uint64_t total_blob_len = 0;
- dberr_t err = DB_SUCCESS;
-
- /* Calculate the total number of bytes for blob data */
- for (ulint i = 0; i < big_rec_vec->n_fields; i++) {
- total_blob_len += big_rec_vec->fields[i].len;
- }
-
- if (total_blob_len > redo_10p) {
- ib_logf(IB_LOG_LEVEL_ERROR, "The total blob data"
- " length (" UINT64PF ") is greater than"
- " 10%% of the total redo log size (" UINT64PF
- "). Please increase total redo log size.",
- total_blob_len, redo_size);
- err = DB_TOO_BIG_FOR_REDO;
- }
-
- return(err);
-}
-
-/*************************************************************//**
-Performs an update of a record on a page of a tree. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. If the
-update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist. We assume
-here that the ordering fields of the record do not change.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-btr_cur_pessimistic_update(
-/*=======================*/
- ulint flags, /*!< in: undo logging, locking, and rollback
- flags */
- btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
- cursor may become invalid if *big_rec == NULL
- || !(flags & BTR_KEEP_POS_FLAG) */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
- mem_heap_t** offsets_heap,
- /*!< in/out: pointer to memory heap
- that can be emptied */
- mem_heap_t* entry_heap,
- /*!< in/out: memory heap for allocating
- big_rec and the index tuple */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller */
- const upd_t* update, /*!< in: update vector; this is allowed also
- contain trx id and roll ptr fields, but
- the values in update vector have no effect */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction; must be
- committed before latching any further pages */
-{
- big_rec_t* big_rec_vec = NULL;
- big_rec_t* dummy_big_rec;
- dict_index_t* index;
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
- rec_t* rec;
- page_cur_t* page_cursor;
- dberr_t err;
- dberr_t optim_err;
- roll_ptr_t roll_ptr;
- ibool was_first;
- ulint n_reserved = 0;
- ulint n_ext;
- trx_t* trx;
- ulint max_ins_size = 0;
-
- *offsets = NULL;
- *big_rec = NULL;
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- page_zip = buf_block_get_page_zip(block);
- index = cursor->index;
-
- ut_ad(thr_get_trx(thr)->fake_changes
- || mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(thr_get_trx(thr)->fake_changes
- || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- /* The insert buffer tree should never be updated in place. */
- ut_ad(!dict_index_is_ibuf(index));
- ut_ad(dict_index_is_online_ddl(index) == !!(flags & BTR_CREATE_FLAG)
- || dict_index_is_clust(index));
- ut_ad(thr_get_trx(thr)->id == trx_id
- || (flags & ~BTR_KEEP_POS_FLAG)
- == (BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
- | BTR_CREATE_FLAG | BTR_KEEP_SYS_FLAG));
-
- err = optim_err = btr_cur_optimistic_update(
- flags | BTR_KEEP_IBUF_BITMAP,
- cursor, offsets, offsets_heap, update,
- cmpl_info, thr, trx_id, mtr);
-
- switch (err) {
- case DB_ZIP_OVERFLOW:
- case DB_UNDERFLOW:
- case DB_OVERFLOW:
- break;
- default:
- err_exit:
- /* We suppressed this with BTR_KEEP_IBUF_BITMAP.
- For DB_ZIP_OVERFLOW, the IBUF_BITMAP_FREE bits were
- already reset by btr_cur_update_alloc_zip() if the
- page was recompressed. */
- if (page_zip
- && optim_err != DB_ZIP_OVERFLOW
- && !dict_index_is_clust(index)
- && page_is_leaf(page)) {
- ibuf_update_free_bits_zip(block, mtr);
- }
-
- return(err);
- }
-
- /* Do lock checking and undo logging */
- err = btr_cur_upd_lock_and_undo(flags, cursor, *offsets,
- update, cmpl_info,
- thr, mtr, &roll_ptr);
- if (err != DB_SUCCESS) {
- goto err_exit;
- }
-
- if (optim_err == DB_OVERFLOW) {
- ulint reserve_flag;
- ulint n_extents;
-
- /* First reserve enough free space for the file segments
- of the index tree, so that the update will not fail because
- of lack of space */
- if (UNIV_UNLIKELY(cursor->tree_height == ULINT_UNDEFINED)) {
- /* When the tree height is uninitialized due to fake
- changes, reserve some hardcoded number of extents. */
- ut_a(thr_get_trx(thr)->fake_changes);
- n_extents = 3;
- }
- else {
- n_extents = cursor->tree_height / 16 + 3;
- }
-
- if (flags & BTR_NO_UNDO_LOG_FLAG) {
- reserve_flag = FSP_CLEANING;
- } else {
- reserve_flag = FSP_NORMAL;
- }
-
- if (!fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents, reserve_flag, mtr)) {
- err = DB_OUT_OF_FILE_SPACE;
- goto err_exit;
- }
- }
-
- rec = btr_cur_get_rec(cursor);
-
- *offsets = rec_get_offsets(
- rec, index, *offsets, ULINT_UNDEFINED, offsets_heap);
-
- dtuple_t* new_entry = row_rec_to_index_entry(
- rec, index, *offsets, &n_ext, entry_heap);
-
- /* The page containing the clustered index record
- corresponding to new_entry is latched in mtr. If the
- clustered index record is delete-marked, then its externally
- stored fields cannot have been purged yet, because then the
- purge would also have removed the clustered index record
- itself. Thus the following call is safe. */
- row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
- FALSE, entry_heap);
-
- trx = thr_get_trx(thr);
-
- if (!(flags & BTR_KEEP_SYS_FLAG) && UNIV_LIKELY(!trx->fake_changes)) {
- row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
- roll_ptr);
- row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
- trx_id);
- }
-
- if ((flags & BTR_NO_UNDO_LOG_FLAG) && rec_offs_any_extern(*offsets)) {
- /* We are in a transaction rollback undoing a row
- update: we must free possible externally stored fields
- which got new values in the update, if they are not
- inherited values. They can be inherited if we have
- updated the primary key to another value, and then
- update it back again. */
-
- ut_ad(big_rec_vec == NULL);
-
- /* fake_changes should not cause undo. so never reaches here */
- ut_ad(!(trx->fake_changes));
-
- btr_rec_free_updated_extern_fields(
- index, rec, page_zip, *offsets, update,
- trx_is_recv(thr_get_trx(thr))
- ? RB_RECOVERY : RB_NORMAL, mtr);
- }
-
- /* We have to set appropriate extern storage bits in the new
- record to be inserted: we have to remember which fields were such */
-
- ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec));
- ut_ad(rec_offs_validate(rec, index, *offsets));
- n_ext += btr_push_update_extern_fields(new_entry, update, entry_heap);
-
- if (page_zip) {
- ut_ad(page_is_comp(page));
- if (page_zip_rec_needs_ext(
- rec_get_converted_size(index, new_entry, n_ext),
- TRUE,
- dict_index_get_n_fields(index),
- page_zip_get_size(page_zip))) {
-
- goto make_external;
- }
- } else if (page_zip_rec_needs_ext(
- rec_get_converted_size(index, new_entry, n_ext),
- page_is_comp(page), 0, 0)) {
-make_external:
- big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
- if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
-
- /* We cannot goto return_after_reservations,
- because we may need to update the
- IBUF_BITMAP_FREE bits, which was suppressed by
- BTR_KEEP_IBUF_BITMAP. */
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip
- || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- if (n_reserved > 0) {
- fil_space_release_free_extents(
- index->space, n_reserved);
- }
-
- err = DB_TOO_BIG_RECORD;
- goto err_exit;
- }
-
- ut_ad(page_is_leaf(page));
- ut_ad(dict_index_is_clust(index));
- ut_ad(flags & BTR_KEEP_POS_FLAG);
- }
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- /* skip CHANGE, LOG */
- err = DB_SUCCESS;
- goto return_after_reservations;
- }
-
- if (big_rec_vec) {
-
- err = btr_check_blob_limit(big_rec_vec);
-
- if (err != DB_SUCCESS) {
- if (n_reserved > 0) {
- fil_space_release_free_extents(
- index->space, n_reserved);
- }
- goto err_exit;
- }
- }
-
- if (!page_zip) {
- max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
- }
-
- /* Store state of explicit locks on rec on the page infimum record,
- before deleting rec. The page infimum acts as a dummy carrier of the
- locks, taking care also of lock releases, before we can move the locks
- back on the actual record. There is a special case: if we are
- inserting on the root page and the insert causes a call of
- btr_root_raise_and_insert. Therefore we cannot in the lock system
- delete the lock structs set on the root page even if the root
- page carries just node pointers. */
-
- lock_rec_store_on_page_infimum(block, rec);
-
- btr_search_update_hash_on_delete(cursor);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- page_cursor = btr_cur_get_page_cur(cursor);
-
- page_cur_delete_rec(page_cursor, index, *offsets, mtr);
-
- page_cur_move_to_prev(page_cursor);
-
- rec = btr_cur_insert_if_possible(cursor, new_entry,
- offsets, offsets_heap, n_ext, mtr);
-
- if (rec) {
- page_cursor->rec = rec;
-
- lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
- rec, block);
-
- if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
- /* The new inserted record owns its possible externally
- stored fields */
- btr_cur_unmark_extern_fields(
- page_zip, rec, index, *offsets, mtr);
- }
-
- bool adjust = big_rec_vec && (flags & BTR_KEEP_POS_FLAG);
-
- if (btr_cur_compress_if_useful(cursor, adjust, mtr)) {
- if (adjust) {
- rec_offs_make_valid(
- page_cursor->rec, index, *offsets);
- }
- } else if (!dict_index_is_clust(index)
- && page_is_leaf(page)) {
-
- /* Update the free bits in the insert buffer.
- This is the same block which was skipped by
- BTR_KEEP_IBUF_BITMAP. */
- if (page_zip) {
- ibuf_update_free_bits_zip(block, mtr);
- } else {
- ibuf_update_free_bits_low(block, max_ins_size,
- mtr);
- }
- }
-
- err = DB_SUCCESS;
- goto return_after_reservations;
- } else {
- /* If the page is compressed and it initially
- compresses very well, and there is a subsequent insert
- of a badly-compressing record, it is possible for
- btr_cur_optimistic_update() to return DB_UNDERFLOW and
- btr_cur_insert_if_possible() to return FALSE. */
- ut_a(page_zip || optim_err != DB_UNDERFLOW);
-
- /* Out of space: reset the free bits.
- This is the same block which was skipped by
- BTR_KEEP_IBUF_BITMAP. */
- if (!dict_index_is_clust(index) && page_is_leaf(page)) {
- ibuf_reset_free_bits(block);
- }
- }
-
- if (big_rec_vec) {
- ut_ad(page_is_leaf(page));
- ut_ad(dict_index_is_clust(index));
- ut_ad(flags & BTR_KEEP_POS_FLAG);
-
- /* btr_page_split_and_insert() in
- btr_cur_pessimistic_insert() invokes
- mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK).
- We must keep the index->lock when we created a
- big_rec, so that row_upd_clust_rec() can store the
- big_rec in the same mini-transaction. */
-
- mtr_x_lock(dict_index_get_lock(index), mtr);
- }
-
- /* Was the record to be updated positioned as the first user
- record on its page? */
- was_first = page_cur_is_before_first(page_cursor);
-
- /* Lock checks and undo logging were already performed by
- btr_cur_upd_lock_and_undo(). We do not try
- btr_cur_optimistic_insert() because
- btr_cur_insert_if_possible() already failed above. */
-
- err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG
- | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG,
- cursor, offsets, offsets_heap,
- new_entry, &rec,
- &dummy_big_rec, n_ext, NULL, mtr);
- ut_a(rec);
- ut_a(err == DB_SUCCESS);
- ut_a(dummy_big_rec == NULL);
- ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
- page_cursor->rec = rec;
-
- if (dict_index_is_sec_or_ibuf(index)) {
- /* Update PAGE_MAX_TRX_ID in the index page header.
- It was not updated by btr_cur_pessimistic_insert()
- because of BTR_NO_LOCKING_FLAG. */
- buf_block_t* rec_block;
-
- rec_block = btr_cur_get_block(cursor);
-
- page_update_max_trx_id(rec_block,
- buf_block_get_page_zip(rec_block),
- trx_id, mtr);
- }
-
- if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) {
- /* The new inserted record owns its possible externally
- stored fields */
- buf_block_t* rec_block = btr_cur_get_block(cursor);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
- page = buf_block_get_frame(rec_block);
-#endif /* UNIV_ZIP_DEBUG */
- page_zip = buf_block_get_page_zip(rec_block);
-
- btr_cur_unmark_extern_fields(page_zip,
- rec, index, *offsets, mtr);
- }
-
- lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor),
- rec, block);
-
- /* If necessary, restore also the correct lock state for a new,
- preceding supremum record created in a page split. While the old
- record was nonexistent, the supremum might have inherited its locks
- from a wrong record. */
-
- if (!was_first) {
- btr_cur_pess_upd_restore_supremum(btr_cur_get_block(cursor),
- rec, mtr);
- }
-
-return_after_reservations:
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (n_reserved > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- *big_rec = big_rec_vec;
-
- return(err);
-}
-
-/*==================== B-TREE DELETE MARK AND UNMARK ===============*/
-
-/****************************************************************//**
-Writes the redo log record for delete marking or unmarking of an index
-record. */
-UNIV_INLINE
-void
-btr_cur_del_mark_set_clust_rec_log(
-/*===============================*/
- rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index of the record */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr,/*!< in: roll ptr to the undo log record */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index,
- page_rec_is_comp(rec)
- ? MLOG_COMP_REC_CLUST_DELETE_MARK
- : MLOG_REC_CLUST_DELETE_MARK,
- 1 + 1 + DATA_ROLL_PTR_LEN
- + 14 + 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery */
- return;
- }
-
- *log_ptr++ = 0;
- *log_ptr++ = 1;
-
- log_ptr = row_upd_write_sys_vals_to_log(
- index, trx_id, roll_ptr, log_ptr, mtr);
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/****************************************************************//**
-Parses the redo log record for delete marking or unmarking of a clustered
-index record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_del_mark_set_clust_rec(
-/*=================================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index) /*!< in: index corresponding to page */
-{
- ulint flags;
- ulint val;
- ulint pos;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- ulint offset;
- rec_t* rec;
-
- ut_ad(!page
- || !!page_is_comp(page) == dict_table_is_comp(index->table));
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- flags = mach_read_from_1(ptr);
- ptr++;
- val = mach_read_from_1(ptr);
- ptr++;
-
- ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (page) {
- rec = page + offset;
-
- /* We do not need to reserve btr_search_latch, as the page
- is only being recovered, and there cannot be a hash index to
- it. Besides, these fields are being updated in place
- and the adaptive hash index does not depend on them. */
-
- btr_rec_set_deleted_flag(rec, page_zip, val);
-
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- row_upd_rec_sys_fields_in_recovery(
- rec, page_zip,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- pos, trx_id, roll_ptr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
- }
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Marks a clustered index record deleted. Writes an undo log record to
-undo log on this delete marking. Writes in the trx id field the id
-of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
-dberr_t
-btr_cur_del_mark_set_clust_rec(
-/*===========================*/
- buf_block_t* block, /*!< in/out: buffer block of the record */
- rec_t* rec, /*!< in/out: record */
- dict_index_t* index, /*!< in: clustered index of the record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- roll_ptr_t roll_ptr;
- dberr_t err;
- page_zip_des_t* page_zip;
- trx_t* trx;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
- ut_ad(buf_block_get_frame(block) == page_align(rec));
- ut_ad(page_is_leaf(page_align(rec)));
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops) {
- btr_cur_trx_report(thr_get_trx(thr)->id, index, "del mark ");
- rec_print_new(stderr, rec, offsets);
- }
-#endif /* UNIV_DEBUG */
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
-
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- /* skip LOCK, UNDO, CHANGE, LOG */
- return(DB_SUCCESS);
- }
-
- err = lock_clust_rec_modify_check_and_lock(BTR_NO_LOCKING_FLAG, block,
- rec, index, offsets, thr);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- err = trx_undo_report_row_operation(thr,
- index, NULL, NULL, 0, rec, offsets,
- &roll_ptr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- /* The btr_search_latch is not needed here, because
- the adaptive hash index does not depend on the delete-mark
- and the delete-mark is being updated in place. */
-
- page_zip = buf_block_get_page_zip(block);
-
- btr_blob_dbg_set_deleted_flag(rec, index, offsets, TRUE);
- btr_rec_set_deleted_flag(rec, page_zip, TRUE);
-
- trx = thr_get_trx(thr);
-
- if (dict_index_is_online_ddl(index)) {
- row_log_table_delete(rec, index, offsets, NULL);
- }
-
- row_upd_rec_sys_fields(rec, page_zip, index, offsets, trx, roll_ptr);
-
- btr_cur_del_mark_set_clust_rec_log(rec, index, trx->id,
- roll_ptr, mtr);
-
- return(err);
-}
-
-/****************************************************************//**
-Writes the redo log record for a delete mark setting of a secondary
-index record. */
-UNIV_INLINE
-void
-btr_cur_del_mark_set_sec_rec_log(
-/*=============================*/
- rec_t* rec, /*!< in: record */
- ibool val, /*!< in: value to set */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- ut_ad(val <= 1);
-
- log_ptr = mlog_open(mtr, 11 + 1 + 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(
- rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr);
- mach_write_to_1(log_ptr, val);
- log_ptr++;
-
- mach_write_to_2(log_ptr, page_offset(rec));
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/****************************************************************//**
-Parses the redo log record for delete marking or unmarking of a secondary
-index record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_del_mark_set_sec_rec(
-/*===============================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip)/*!< in/out: compressed page, or NULL */
-{
- ulint val;
- ulint offset;
- rec_t* rec;
-
- if (end_ptr < ptr + 3) {
-
- return(NULL);
- }
-
- val = mach_read_from_1(ptr);
- ptr++;
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (page) {
- rec = page + offset;
-
- /* We do not need to reserve btr_search_latch, as the page
- is only being recovered, and there cannot be a hash index to
- it. Besides, the delete-mark flag is being updated in place
- and the adaptive hash index does not depend on it. */
-
- btr_rec_set_deleted_flag(rec, page_zip, val);
- }
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Sets a secondary index record delete mark to TRUE or FALSE.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
-dberr_t
-btr_cur_del_mark_set_sec_rec(
-/*=========================*/
- ulint flags, /*!< in: locking flag */
- btr_cur_t* cursor, /*!< in: cursor */
- ibool val, /*!< in: value to set */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- buf_block_t* block;
- rec_t* rec;
- dberr_t err;
-
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- /* skip LOCK, CHANGE, LOG */
- return(DB_SUCCESS);
- }
-
- block = btr_cur_get_block(cursor);
- rec = btr_cur_get_rec(cursor);
-
-#ifdef UNIV_DEBUG
- if (btr_cur_print_record_ops) {
- btr_cur_trx_report(thr_get_trx(thr)->id, cursor->index,
- "del mark ");
- rec_print(stderr, rec, cursor->index);
- }
-#endif /* UNIV_DEBUG */
-
- err = lock_sec_rec_modify_check_and_lock(flags,
- btr_cur_get_block(cursor),
- rec, cursor->index, thr, mtr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- ut_ad(!!page_rec_is_comp(rec)
- == dict_table_is_comp(cursor->index->table));
-
- /* We do not need to reserve btr_search_latch, as the
- delete-mark flag is being updated in place and the adaptive
- hash index does not depend on it. */
- btr_rec_set_deleted_flag(rec, buf_block_get_page_zip(block), val);
-
- btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************//**
-Sets a secondary index record's delete mark to the given value. This
-function is only used by the insert buffer merge mechanism. */
-UNIV_INTERN
-void
-btr_cur_set_deleted_flag_for_ibuf(
-/*==============================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip, /*!< in/out: compressed page
- corresponding to rec, or NULL
- when the tablespace is
- uncompressed */
- ibool val, /*!< in: value to set */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- /* We do not need to reserve btr_search_latch, as the page
- has just been read to the buffer pool and there cannot be
- a hash index to it. Besides, the delete-mark flag is being
- updated in place and the adaptive hash index does not depend
- on it. */
-
- btr_rec_set_deleted_flag(rec, page_zip, val);
-
- btr_cur_del_mark_set_sec_rec_log(rec, val, mtr);
-}
-
-/*==================== B-TREE RECORD REMOVE =========================*/
-
-/*************************************************************//**
-Tries to compress a page of the tree if it seems useful. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done!
-@return TRUE if compression occurred */
-UNIV_INTERN
-ibool
-btr_cur_compress_if_useful(
-/*=======================*/
- btr_cur_t* cursor, /*!< in/out: cursor on the page to compress;
- cursor does not stay valid if !adjust and
- compression occurs */
- ibool adjust, /*!< in: TRUE if should adjust the
- cursor position even if compression occurs */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(mtr_memo_contains(mtr,
- dict_index_get_lock(btr_cur_get_index(cursor)),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
-
- return(btr_cur_compress_recommendation(cursor, mtr)
- && btr_compress(cursor, adjust, mtr));
-}
-
-/*******************************************************//**
-Removes the record on which the tree cursor is positioned on a leaf page.
-It is assumed that the mtr has an x-latch on the page where the cursor is
-positioned, but no latch on the whole tree.
-@return TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
-ibool
-btr_cur_optimistic_delete_func(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor on leaf page, on the record to
- delete; cursor stays valid: if deletion
- succeeds, on function exit it points to the
- successor of the deleted record */
-#ifdef UNIV_DEBUG
- ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
-#endif /* UNIV_DEBUG */
- mtr_t* mtr) /*!< in: mtr; if this function returns
- TRUE on a leaf page of a secondary
- index, the mtr must be committed
- before latching any further pages */
-{
- buf_block_t* block;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ibool no_compress_needed;
- rec_offs_init(offsets_);
-
- ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
- /* This is intended only for leaf page deletions */
-
- block = btr_cur_get_block(cursor);
-
- SRV_CORRUPT_TABLE_CHECK(block, return(DB_CORRUPTION););
-
- ut_ad(page_is_leaf(buf_block_get_frame(block)));
- ut_ad(!dict_index_is_online_ddl(cursor->index)
- || dict_index_is_clust(cursor->index)
- || (flags & BTR_CREATE_FLAG));
-
- rec = btr_cur_get_rec(cursor);
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- no_compress_needed = !rec_offs_any_extern(offsets)
- && btr_cur_can_delete_without_compress(
- cursor, rec_offs_size(offsets), mtr);
-
- if (no_compress_needed) {
-
- page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
-
- lock_update_delete(block, rec);
-
- btr_search_update_hash_on_delete(cursor);
-
- if (page_zip) {
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, cursor->index));
-#endif /* UNIV_ZIP_DEBUG */
- page_cur_delete_rec(btr_cur_get_page_cur(cursor),
- cursor->index, offsets, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, cursor->index));
-#endif /* UNIV_ZIP_DEBUG */
-
- /* On compressed pages, the IBUF_BITMAP_FREE
- space is not affected by deleting (purging)
- records, because it is defined as the minimum
- of space available *without* reorganize, and
- space available in the modification log. */
- } else {
- const ulint max_ins
- = page_get_max_insert_size_after_reorganize(
- page, 1);
-
- page_cur_delete_rec(btr_cur_get_page_cur(cursor),
- cursor->index, offsets, mtr);
-
- /* The change buffer does not handle inserts
- into non-leaf pages, into clustered indexes,
- or into the change buffer. */
- if (page_is_leaf(page)
- && !dict_index_is_clust(cursor->index)
- && !dict_index_is_ibuf(cursor->index)) {
- ibuf_update_free_bits_low(block, max_ins, mtr);
- }
- }
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(no_compress_needed);
-}
-
-/*************************************************************//**
-Removes the record on which the tree cursor is positioned. Tries
-to compress the page if its fillfactor drops below a threshold
-or if it is the only page on the level. It is assumed that mtr holds
-an x-latch on the tree and on the cursor page. To avoid deadlocks,
-mtr must also own x-latches to brothers of page, if those brothers
-exist.
-@return TRUE if compression occurred */
-UNIV_INTERN
-ibool
-btr_cur_pessimistic_delete(
-/*=======================*/
- dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
- the latter may occur because we may have
- to update node pointers on upper levels,
- and in the case of variable length keys
- these may actually grow in size */
- ibool has_reserved_extents, /*!< in: TRUE if the
- caller has already reserved enough free
- extents so that he knows that the operation
- will succeed */
- btr_cur_t* cursor, /*!< in: cursor on the record to delete;
- if compression does not occur, the cursor
- stays valid: it points to successor of
- deleted record on function exit */
- ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- page_t* page;
- page_zip_des_t* page_zip;
- dict_index_t* index;
- rec_t* rec;
- ulint n_reserved = 0;
- ibool success;
- ibool ret = FALSE;
- ulint level;
- mem_heap_t* heap;
- ulint* offsets;
-
- block = btr_cur_get_block(cursor);
- page = buf_block_get_frame(block);
- index = btr_cur_get_index(cursor);
-
- ut_ad(flags == 0 || flags == BTR_CREATE_FLAG);
- ut_ad(!dict_index_is_online_ddl(index)
- || dict_index_is_clust(index)
- || (flags & BTR_CREATE_FLAG));
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- if (!has_reserved_extents) {
- /* First reserve enough free space for the file segments
- of the index tree, so that the node pointer updates will
- not fail because of lack of space */
-
- ut_a(cursor->tree_height != ULINT_UNDEFINED);
-
- ulint n_extents = cursor->tree_height / 32 + 1;
-
- success = fsp_reserve_free_extents(&n_reserved,
- index->space,
- n_extents,
- FSP_CLEANING, mtr);
- if (!success) {
- *err = DB_OUT_OF_FILE_SPACE;
-
- return(FALSE);
- }
- }
-
- heap = mem_heap_create(1024);
- rec = btr_cur_get_rec(cursor);
- page_zip = buf_block_get_page_zip(block);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
-
- if (rec_offs_any_extern(offsets)) {
- btr_rec_free_externally_stored_fields(index,
- rec, offsets, page_zip,
- rb_ctx, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- }
-
- if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
- && UNIV_UNLIKELY(dict_index_get_page(index)
- != buf_block_get_page_no(block))) {
-
- /* If there is only one record, drop the whole page in
- btr_discard_page, if this is not the root page */
-
- btr_discard_page(cursor, mtr);
-
- ret = TRUE;
-
- goto return_after_reservations;
- }
-
- if (flags == 0) {
- lock_update_delete(block, rec);
- }
-
- level = btr_page_get_level(page, mtr);
-
- if (level > 0
- && UNIV_UNLIKELY(rec == page_rec_get_next(
- page_get_infimum_rec(page)))) {
-
- rec_t* next_rec = page_rec_get_next(rec);
-
- if (btr_page_get_prev(page, mtr) == FIL_NULL) {
-
- /* If we delete the leftmost node pointer on a
- non-leaf level, we must mark the new leftmost node
- pointer as the predefined minimum record */
-
- /* This will make page_zip_validate() fail until
- page_cur_delete_rec() completes. This is harmless,
- because everything will take place within a single
- mini-transaction and because writing to the redo log
- is an atomic operation (performed by mtr_commit()). */
- btr_set_min_rec_mark(next_rec, mtr);
- } else {
- /* Otherwise, if we delete the leftmost node pointer
- on a page, we have to change the father node pointer
- so that it is equal to the new leftmost node pointer
- on the page */
-
- btr_node_ptr_delete(index, block, mtr);
-
- dtuple_t* node_ptr = dict_index_build_node_ptr(
- index, next_rec, buf_block_get_page_no(block),
- heap, level);
-
- btr_insert_on_non_leaf_level(
- flags, index, level + 1, node_ptr, mtr);
- }
- }
-
- btr_search_update_hash_on_delete(cursor);
-
- page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- ut_ad(btr_check_node_ptr(index, block, mtr));
-
-return_after_reservations:
- *err = DB_SUCCESS;
-
- mem_heap_free(heap);
-
- if (ret == FALSE) {
- ret = btr_cur_compress_if_useful(cursor, FALSE, mtr);
- }
-
- if (n_reserved > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- return(ret);
-}
-
-/*******************************************************************//**
-Adds path information to the cursor for the current page, for which
-the binary search has been performed. */
-static
-void
-btr_cur_add_path_info(
-/*==================*/
- btr_cur_t* cursor, /*!< in: cursor positioned on a page */
- ulint height, /*!< in: height of the page in tree;
- 0 means leaf node */
- ulint root_height) /*!< in: root node height in tree */
-{
- btr_path_t* slot;
- const rec_t* rec;
- const page_t* page;
-
- ut_a(cursor->path_arr);
-
- if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) {
- /* Do nothing; return empty path */
-
- slot = cursor->path_arr;
- slot->nth_rec = ULINT_UNDEFINED;
-
- return;
- }
-
- if (height == 0) {
- /* Mark end of slots for path */
- slot = cursor->path_arr + root_height + 1;
- slot->nth_rec = ULINT_UNDEFINED;
- }
-
- rec = btr_cur_get_rec(cursor);
-
- slot = cursor->path_arr + (root_height - height);
-
- page = page_align(rec);
-
- slot->nth_rec = page_rec_get_n_recs_before(rec);
- slot->n_recs = page_get_n_recs(page);
- slot->page_no = page_get_page_no(page);
- slot->page_level = btr_page_get_level_low(page);
-}
-
-/*******************************************************************//**
-Estimate the number of rows between slot1 and slot2 for any level on a
-B-tree. This function starts from slot1->page and reads a few pages to
-the right, counting their records. If we reach slot2->page quickly then
-we know exactly how many records there are between slot1 and slot2 and
-we set is_n_rows_exact to TRUE. If we cannot reach slot2->page quickly
-then we calculate the average number of records in the pages scanned
-so far and assume that all pages that we did not scan up to slot2->page
-contain the same number of records, then we multiply that average to
-the number of pages between slot1->page and slot2->page (which is
-n_rows_on_prev_level). In this case we set is_n_rows_exact to FALSE.
-@return number of rows (exact or estimated) */
-static
-ib_int64_t
-btr_estimate_n_rows_in_range_on_level(
-/*==================================*/
- dict_index_t* index, /*!< in: index */
- btr_path_t* slot1, /*!< in: left border */
- btr_path_t* slot2, /*!< in: right border */
- ib_int64_t n_rows_on_prev_level, /*!< in: number of rows
- on the previous level for the
- same descend paths; used to
- determine the numbe of pages
- on this level */
- ibool* is_n_rows_exact) /*!< out: TRUE if the returned
- value is exact i.e. not an
- estimation */
-{
- ulint space;
- ib_int64_t n_rows;
- ulint n_pages_read;
- ulint page_no;
- ulint zip_size;
- ulint level;
-
- space = dict_index_get_space(index);
-
- n_rows = 0;
- n_pages_read = 0;
-
- /* Assume by default that we will scan all pages between
- slot1->page_no and slot2->page_no */
- *is_n_rows_exact = TRUE;
-
- /* add records from slot1->page_no which are to the right of
- the record which serves as a left border of the range, if any */
- if (slot1->nth_rec < slot1->n_recs) {
- n_rows += slot1->n_recs - slot1->nth_rec;
- }
-
- /* add records from slot2->page_no which are to the left of
- the record which servers as a right border of the range, if any */
- if (slot2->nth_rec > 1) {
- n_rows += slot2->nth_rec - 1;
- }
-
- /* count the records in the pages between slot1->page_no and
- slot2->page_no (non inclusive), if any */
-
- zip_size = fil_space_get_zip_size(space);
-
- /* Do not read more than this number of pages in order not to hurt
- performance with this code which is just an estimation. If we read
- this many pages before reaching slot2->page_no then we estimate the
- average from the pages scanned so far */
-# define N_PAGES_READ_LIMIT 10
-
- page_no = slot1->page_no;
- level = slot1->page_level;
-
- do {
- mtr_t mtr;
- page_t* page;
- buf_block_t* block;
- dberr_t err=DB_SUCCESS;
-
- mtr_start(&mtr);
-
- /* Fetch the page. Because we are not holding the
- index->lock, the tree may have changed and we may be
- attempting to read a page that is no longer part of
- the B-tree. We pass BUF_GET_POSSIBLY_FREED in order to
- silence a debug assertion about this. */
- block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
- NULL, BUF_GET_POSSIBLY_FREED,
- __FILE__, __LINE__, &mtr, &err);
-
- ut_ad((block != NULL) == (err == DB_SUCCESS));
-
- if (err != DB_SUCCESS) {
- if (err == DB_DECRYPTION_FAILED) {
- ib_push_warning((void *)NULL,
- DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- index->table->name);
- index->table->file_unreadable = true;
- }
-
- mtr_commit(&mtr);
- goto inexact;
- }
-
- page = buf_block_get_frame(block);
-
- /* It is possible that the tree has been reorganized in the
- meantime and this is a different page. If this happens the
- calculated estimate will be bogus, which is not fatal as
- this is only an estimate. We are sure that a page with
- page_no exists because InnoDB never frees pages, only
- reuses them. */
- if (fil_page_get_type(page) != FIL_PAGE_INDEX
- || btr_page_get_index_id(page) != index->id
- || btr_page_get_level_low(page) != level) {
-
- /* The page got reused for something else */
- mtr_commit(&mtr);
- goto inexact;
- }
-
- /* It is possible but highly unlikely that the page was
- originally written by an old version of InnoDB that did
- not initialize FIL_PAGE_TYPE on other than B-tree pages.
- For example, this could be an almost-empty BLOB page
- that happens to contain the magic values in the fields
- that we checked above. */
-
- n_pages_read++;
-
- if (page_no != slot1->page_no) {
- /* Do not count the records on slot1->page_no,
- we already counted them before this loop. */
- n_rows += page_get_n_recs(page);
- }
-
- page_no = btr_page_get_next(page, &mtr);
-
- mtr_commit(&mtr);
-
- if (n_pages_read == N_PAGES_READ_LIMIT
- || page_no == FIL_NULL) {
- /* Either we read too many pages or
- we reached the end of the level without passing
- through slot2->page_no, the tree must have changed
- in the meantime */
- goto inexact;
- }
-
- } while (page_no != slot2->page_no);
-
- return(n_rows);
-
-inexact:
-
- *is_n_rows_exact = FALSE;
-
- /* We did interrupt before reaching slot2->page */
-
- if (n_pages_read > 0) {
- /* The number of pages on this level is
- n_rows_on_prev_level, multiply it by the
- average number of recs per page so far */
- n_rows = n_rows_on_prev_level
- * n_rows / n_pages_read;
- } else {
- /* The tree changed before we could even
- start with slot1->page_no */
- n_rows = 10;
- }
-
- return(n_rows);
-}
-
-/** If the tree gets changed too much between the two dives for the left
-and right boundary then btr_estimate_n_rows_in_range_low() will retry
-that many times before giving up and returning the value stored in
-rows_in_range_arbitrary_ret_val. */
-static const unsigned rows_in_range_max_retries = 4;
-
-/** We pretend that a range has that many records if the tree keeps changing
-for rows_in_range_max_retries retries while we try to estimate the records
-in a given range. */
-static const ib_int64_t rows_in_range_arbitrary_ret_val = 10;
-
-/** Estimates the number of rows in a given index range.
-@param[in] index index
-@param[in] tuple1 range start, may also be empty tuple
-@param[in] mode1 search mode for range start
-@param[in] tuple2 range end, may also be empty tuple
-@param[in] mode2 search mode for range end
-@param[in] trx trx
-@param[in] nth_attempt if the tree gets modified too much while
-we are trying to analyze it, then we will retry (this function will call
-itself, incrementing this parameter)
-@return estimated number of rows; if after rows_in_range_max_retries
-retries the tree keeps changing, then we will just return
-rows_in_range_arbitrary_ret_val as a result (if
-nth_attempt >= rows_in_range_max_retries and the tree is modified between
-the two dives). */
-static
-ib_int64_t
-btr_estimate_n_rows_in_range_low(
- dict_index_t* index,
- const dtuple_t* tuple1,
- ulint mode1,
- const dtuple_t* tuple2,
- ulint mode2,
- trx_t* trx,
- unsigned nth_attempt)
-{
- btr_path_t path1[BTR_PATH_ARRAY_N_SLOTS];
- btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS];
- btr_cur_t cursor;
- btr_path_t* slot1;
- btr_path_t* slot2;
- ibool diverged;
- ibool diverged_lot;
- ulint divergence_level;
- ib_int64_t n_rows;
- ibool is_n_rows_exact;
- ulint i;
- mtr_t mtr;
- ib_int64_t table_n_rows;
-
- table_n_rows = dict_table_get_n_rows(index->table);
-
- mtr_start_trx(&mtr, trx);
-
- cursor.path_arr = path1;
-
- if (dtuple_get_n_fields(tuple1) > 0) {
-
- btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0,
- __FILE__, __LINE__, &mtr);
- } else {
- btr_cur_open_at_index_side(true, index,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0, &mtr);
- }
-
- mtr_commit(&mtr);
-
- if (index->table->file_unreadable) {
- return (0);
- }
-
- mtr_start_trx(&mtr, trx);
-
-#ifdef UNIV_DEBUG
- if (!strcmp(index->name, "iC")) {
- DEBUG_SYNC_C("btr_estimate_n_rows_in_range_between_dives");
- }
-#endif
-
- cursor.path_arr = path2;
-
- if (dtuple_get_n_fields(tuple2) > 0) {
-
- btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0,
- __FILE__, __LINE__, &mtr);
- } else {
- btr_cur_open_at_index_side(false, index,
- BTR_SEARCH_LEAF | BTR_ESTIMATE,
- &cursor, 0, &mtr);
- }
-
- mtr_commit(&mtr);
-
- /* We have the path information for the range in path1 and path2 */
-
- n_rows = 1;
- is_n_rows_exact = TRUE;
- diverged = FALSE; /* This becomes true when the path is not
- the same any more */
- diverged_lot = FALSE; /* This becomes true when the paths are
- not the same or adjacent any more */
- divergence_level = 1000000; /* This is the level where paths diverged
- a lot */
- for (i = 0; ; i++) {
- ut_ad(i < BTR_PATH_ARRAY_N_SLOTS);
-
- slot1 = path1 + i;
- slot2 = path2 + i;
-
- if (slot1->nth_rec == ULINT_UNDEFINED
- || slot2->nth_rec == ULINT_UNDEFINED) {
-
- if (i > divergence_level + 1 && !is_n_rows_exact) {
- /* In trees whose height is > 1 our algorithm
- tends to underestimate: multiply the estimate
- by 2: */
-
- n_rows = n_rows * 2;
- }
-
- DBUG_EXECUTE_IF("bug14007649", return(n_rows););
-
- /* Do not estimate the number of rows in the range
- to over 1 / 2 of the estimated rows in the whole
- table */
-
- if (n_rows > table_n_rows / 2 && !is_n_rows_exact) {
-
- n_rows = table_n_rows / 2;
-
- /* If there are just 0 or 1 rows in the table,
- then we estimate all rows are in the range */
-
- if (n_rows == 0) {
- n_rows = table_n_rows;
- }
- }
-
- return(n_rows);
- }
-
- if (!diverged && slot1->nth_rec != slot2->nth_rec) {
-
- /* If both slots do not point to the same page or if
- the paths have crossed and the same page on both
- apparently contains a different number of records,
- this means that the tree must have changed between
- the dive for slot1 and the dive for slot2 at the
- beginning of this function. */
- if (slot1->page_no != slot2->page_no
- || slot1->page_level != slot2->page_level
- || (slot1->nth_rec >= slot2->nth_rec
- && slot1->n_recs != slot2->n_recs)) {
-
- /* If the tree keeps changing even after a
- few attempts, then just return some arbitrary
- number. */
- if (nth_attempt >= rows_in_range_max_retries) {
- return(rows_in_range_arbitrary_ret_val);
- }
-
- const ib_int64_t ret =
- btr_estimate_n_rows_in_range_low(
- index, tuple1, mode1,
- tuple2, mode2, trx,
- nth_attempt + 1);
-
- return(ret);
- }
-
- diverged = TRUE;
-
- if (slot1->nth_rec < slot2->nth_rec) {
- n_rows = slot2->nth_rec - slot1->nth_rec;
-
- if (n_rows > 1) {
- diverged_lot = TRUE;
- divergence_level = i;
- }
- } else {
- /* It is possible that
- slot1->nth_rec >= slot2->nth_rec
- if, for example, we have a single page
- tree which contains (inf, 5, 6, supr)
- and we select where x > 20 and x < 30;
- in this case slot1->nth_rec will point
- to the supr record and slot2->nth_rec
- will point to 6 */
- return(0);
- }
-
- } else if (diverged && !diverged_lot) {
-
- if (slot1->nth_rec < slot1->n_recs
- || slot2->nth_rec > 1) {
-
- diverged_lot = TRUE;
- divergence_level = i;
-
- n_rows = 0;
-
- if (slot1->nth_rec < slot1->n_recs) {
- n_rows += slot1->n_recs
- - slot1->nth_rec;
- }
-
- if (slot2->nth_rec > 1) {
- n_rows += slot2->nth_rec - 1;
- }
- }
- } else if (diverged_lot) {
-
- n_rows = btr_estimate_n_rows_in_range_on_level(
- index, slot1, slot2, n_rows,
- &is_n_rows_exact);
- }
- }
-}
-
-/** Estimates the number of rows in a given index range.
-@param[in] index index
-@param[in] tuple1 range start, may also be empty tuple
-@param[in] mode1 search mode for range start
-@param[in] tuple2 range end, may also be empty tuple
-@param[in] mode2 search mode for range end
-@param[in] trx trx
-@return estimated number of rows */
-ib_int64_t
-btr_estimate_n_rows_in_range(
- dict_index_t* index,
- const dtuple_t* tuple1,
- ulint mode1,
- const dtuple_t* tuple2,
- ulint mode2,
- trx_t* trx)
-{
- const ib_int64_t ret = btr_estimate_n_rows_in_range_low(
- index, tuple1, mode1, tuple2, mode2, trx,
- 1 /* first attempt */);
-
- return(ret);
-}
-
-/*******************************************************************//**
-Record the number of non_null key values in a given index for
-each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
-The estimates are eventually stored in the array:
-index->stat_n_non_null_key_vals[], which is indexed from 0 to n-1. */
-static
-void
-btr_record_not_null_field_in_rec(
-/*=============================*/
- ulint n_unique, /*!< in: dict_index_get_n_unique(index),
- number of columns uniquely determine
- an index entry */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
- its size could be for all fields or
- that of "n_unique" */
- ib_uint64_t* n_not_null) /*!< in/out: array to record number of
- not null rows for n-column prefix */
-{
- ulint i;
-
- ut_ad(rec_offs_n_fields(offsets) >= n_unique);
-
- if (n_not_null == NULL) {
- return;
- }
-
- for (i = 0; i < n_unique; i++) {
- if (rec_offs_nth_sql_null(offsets, i)) {
- break;
- }
-
- n_not_null[i]++;
- }
-}
-
-/*******************************************************************//**
-Estimates the number of different key values in a given index, for
-each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
-0..n_uniq-1) and the number of pages that were sampled is saved in
-index->stat_n_sample_sizes[].
-If innodb_stats_method is nulls_ignored, we also record the number of
-non-null values for each prefix and stored the estimates in
-array index->stat_n_non_null_key_vals. */
-UNIV_INTERN
-void
-btr_estimate_number_of_different_key_vals(
-/*======================================*/
- dict_index_t* index) /*!< in: index */
-{
- btr_cur_t cursor;
- page_t* page;
- rec_t* rec;
- ulint n_cols;
- ulint matched_fields;
- ulint matched_bytes;
- ib_uint64_t* n_diff;
- ib_uint64_t* n_not_null;
- ibool stats_null_not_equal;
- ullint n_sample_pages=1; /* number of pages to sample */
- ulint not_empty_flag = 0;
- ulint total_external_size = 0;
- ulint i;
- ulint j;
- ullint add_on;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint* offsets_rec = NULL;
- ulint* offsets_next_rec = NULL;
-
- n_cols = dict_index_get_n_unique(index);
-
- heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
- * n_cols
- + dict_index_get_n_fields(index)
- * (sizeof *offsets_rec
- + sizeof *offsets_next_rec));
-
- n_diff = (ib_uint64_t*) mem_heap_zalloc(
- heap, n_cols * sizeof(ib_int64_t));
-
- n_not_null = NULL;
-
- /* Check srv_innodb_stats_method setting, and decide whether we
- need to record non-null value and also decide if NULL is
- considered equal (by setting stats_null_not_equal value) */
- switch (srv_innodb_stats_method) {
- case SRV_STATS_NULLS_IGNORED:
- n_not_null = (ib_uint64_t*) mem_heap_zalloc(
- heap, n_cols * sizeof *n_not_null);
- /* fall through */
-
- case SRV_STATS_NULLS_UNEQUAL:
- /* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
- case, we will treat NULLs as unequal value */
- stats_null_not_equal = TRUE;
- break;
-
- case SRV_STATS_NULLS_EQUAL:
- stats_null_not_equal = FALSE;
- break;
-
- default:
- ut_error;
- }
-
- if (srv_stats_sample_traditional) {
- /* It makes no sense to test more pages than are contained
- in the index, thus we lower the number if it is too high */
- if (srv_stats_transient_sample_pages > index->stat_index_size) {
- if (index->stat_index_size > 0) {
- n_sample_pages = index->stat_index_size;
- }
- } else {
- n_sample_pages = srv_stats_transient_sample_pages;
- }
- } else {
- /* New logaritmic number of pages that are estimated.
- Number of pages estimated should be between 1 and
- index->stat_index_size.
-
- If we have only 0 or 1 index pages then we can only take 1
- sample. We have already initialized n_sample_pages to 1.
-
- So taking index size as I and sample as S and log(I)*S as L
-
- requirement 1) we want the out limit of the expression to not exceed I;
- requirement 2) we want the ideal pages to be at least S;
- so the current expression is min(I, max( min(S,I), L)
-
- looking for simplifications:
-
- case 1: assume S < I
- min(I, max( min(S,I), L) -> min(I , max( S, L))
-
- but since L=LOG2(I)*S and log2(I) >=1 L>S always so max(S,L) = L.
-
- so we have: min(I , L)
-
- case 2: assume I < S
- min(I, max( min(S,I), L) -> min(I, max( I, L))
-
- case 2a: L > I
- min(I, max( I, L)) -> min(I, L) -> I
-
- case 2b: when L < I
- min(I, max( I, L)) -> min(I, I ) -> I
-
- so taking all case2 paths is I, our expression is:
- n_pages = S < I? min(I,L) : I
- */
- if (index->stat_index_size > 1) {
- n_sample_pages = (srv_stats_transient_sample_pages < index->stat_index_size) ?
- (ulint) ut_min((double) index->stat_index_size,
- log2(index->stat_index_size)*srv_stats_transient_sample_pages)
- : index->stat_index_size;
-
- }
- }
-
- /* Sanity check */
- ut_ad(n_sample_pages > 0 && n_sample_pages <= (index->stat_index_size < 1 ? 1 : index->stat_index_size));
-
- /* We sample some pages in the index to get an estimate */
-
- for (i = 0; i < n_sample_pages; i++) {
- mtr_start(&mtr);
-
- btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
-
- /* Count the number of different key values for each prefix of
- the key on this index page. If the prefix does not determine
- the index record uniquely in the B-tree, then we subtract one
- because otherwise our algorithm would give a wrong estimate
- for an index where there is just one key value. */
-
- if (index->table->file_unreadable) {
- mtr_commit(&mtr);
- goto exit_loop;
- }
-
- page = btr_cur_get_page(&cursor);
-
- SRV_CORRUPT_TABLE_CHECK(page, goto exit_loop;);
- DBUG_EXECUTE_IF("ib_corrupt_page_while_stats_calc",
- page = NULL;);
-
- SRV_CORRUPT_TABLE_CHECK(page,
- {
- mtr_commit(&mtr);
- goto exit_loop;
- });
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
-
- if (!page_rec_is_supremum(rec)) {
- not_empty_flag = 1;
- offsets_rec = rec_get_offsets(rec, index, offsets_rec,
- ULINT_UNDEFINED, &heap);
-
- if (n_not_null != NULL) {
- btr_record_not_null_field_in_rec(
- n_cols, offsets_rec, n_not_null);
- }
- }
-
- while (!page_rec_is_supremum(rec)) {
- rec_t* next_rec = page_rec_get_next(rec);
- if (page_rec_is_supremum(next_rec)) {
- total_external_size +=
- btr_rec_get_externally_stored_len(
- rec, offsets_rec);
- break;
- }
-
- matched_fields = 0;
- matched_bytes = 0;
- offsets_next_rec = rec_get_offsets(next_rec, index,
- offsets_next_rec,
- ULINT_UNDEFINED,
- &heap);
-
- cmp_rec_rec_with_match(rec, next_rec,
- offsets_rec, offsets_next_rec,
- index, stats_null_not_equal,
- &matched_fields,
- &matched_bytes);
-
- for (j = matched_fields; j < n_cols; j++) {
- /* We add one if this index record has
- a different prefix from the previous */
-
- n_diff[j]++;
- }
-
- if (n_not_null != NULL) {
- btr_record_not_null_field_in_rec(
- n_cols, offsets_next_rec, n_not_null);
- }
-
- total_external_size
- += btr_rec_get_externally_stored_len(
- rec, offsets_rec);
-
- rec = next_rec;
- /* Initialize offsets_rec for the next round
- and assign the old offsets_rec buffer to
- offsets_next_rec. */
- {
- ulint* offsets_tmp = offsets_rec;
- offsets_rec = offsets_next_rec;
- offsets_next_rec = offsets_tmp;
- }
- }
-
-
- if (n_cols == dict_index_get_n_unique_in_tree(index)) {
-
- /* If there is more than one leaf page in the tree,
- we add one because we know that the first record
- on the page certainly had a different prefix than the
- last record on the previous index page in the
- alphabetical order. Before this fix, if there was
- just one big record on each clustered index page, the
- algorithm grossly underestimated the number of rows
- in the table. */
-
- if (btr_page_get_prev(page, &mtr) != FIL_NULL
- || btr_page_get_next(page, &mtr) != FIL_NULL) {
-
- n_diff[n_cols - 1]++;
- }
- }
-
- mtr_commit(&mtr);
- }
-
-exit_loop:
- /* If we saw k borders between different key values on
- n_sample_pages leaf pages, we can estimate how many
- there will be in index->stat_n_leaf_pages */
-
- /* We must take into account that our sample actually represents
- also the pages used for external storage of fields (those pages are
- included in index->stat_n_leaf_pages) */
-
- for (j = 0; j < n_cols; j++) {
- index->stat_n_diff_key_vals[j]
- = BTR_TABLE_STATS_FROM_SAMPLE(
- n_diff[j], index, n_sample_pages,
- total_external_size, not_empty_flag);
-
- /* If the tree is small, smaller than
- 10 * n_sample_pages + total_external_size, then
- the above estimate is ok. For bigger trees it is common that we
- do not see any borders between key values in the few pages
- we pick. But still there may be n_sample_pages
- different key values, or even more. Let us try to approximate
- that: */
-
- add_on = index->stat_n_leaf_pages
- / (10 * (n_sample_pages
- + total_external_size));
-
- if (add_on > n_sample_pages) {
- add_on = n_sample_pages;
- }
-
- index->stat_n_diff_key_vals[j] += add_on;
-
- index->stat_n_sample_sizes[j] = n_sample_pages;
-
- /* Update the stat_n_non_null_key_vals[] with our
- sampled result. stat_n_non_null_key_vals[] is created
- and initialized to zero in dict_index_add_to_cache(),
- along with stat_n_diff_key_vals[] array */
- if (n_not_null != NULL) {
- index->stat_n_non_null_key_vals[j] =
- BTR_TABLE_STATS_FROM_SAMPLE(
- n_not_null[j], index, n_sample_pages,
- total_external_size, not_empty_flag);
- }
- }
-
- mem_heap_free(heap);
-}
-
-/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
-
-/***********************************************************//**
-Gets the offset of the pointer to the externally stored part of a field.
-@return offset of the pointer to the externally stored part */
-static
-ulint
-btr_rec_get_field_ref_offs(
-/*=======================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: index of the external field */
-{
- ulint field_ref_offs;
- ulint local_len;
-
- ut_a(rec_offs_nth_extern(offsets, n));
- field_ref_offs = rec_get_nth_field_offs(offsets, n, &local_len);
- ut_a(local_len != UNIV_SQL_NULL);
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- return(field_ref_offs + local_len - BTR_EXTERN_FIELD_REF_SIZE);
-}
-
-/** Gets a pointer to the externally stored part of a field.
-@param rec record
-@param offsets rec_get_offsets(rec)
-@param n index of the externally stored field
-@return pointer to the externally stored part */
-#define btr_rec_get_field_ref(rec, offsets, n) \
- ((rec) + btr_rec_get_field_ref_offs(offsets, n))
-
-/** Gets the externally stored size of a record, in units of a database page.
-@param[in] rec record
-@param[in] offsets array returned by rec_get_offsets()
-@return externally stored part, in units of a database page */
-
-ulint
-btr_rec_get_externally_stored_len(
- const rec_t* rec,
- const ulint* offsets)
-{
- ulint n_fields;
- ulint total_extern_len = 0;
- ulint i;
-
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
-
- if (!rec_offs_any_extern(offsets)) {
- return(0);
- }
-
- n_fields = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n_fields; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- ulint extern_len = mach_read_from_4(
- btr_rec_get_field_ref(rec, offsets, i)
- + BTR_EXTERN_LEN + 4);
-
- total_extern_len += ut_calc_align(extern_len,
- UNIV_PAGE_SIZE);
- }
- }
-
- return(total_extern_len / UNIV_PAGE_SIZE);
-}
-
-/*******************************************************************//**
-Sets the ownership bit of an externally stored field in a record. */
-static
-void
-btr_cur_set_ownership_of_extern_field(
-/*==================================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: clustered index record */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint i, /*!< in: field number */
- ibool val, /*!< in: value to set */
- mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
-{
- byte* data;
- ulint local_len;
- ulint byte_val;
-
- data = rec_get_nth_field(rec, offsets, i, &local_len);
- ut_ad(rec_offs_nth_extern(offsets, i));
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN);
-
- if (val) {
- byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG);
- } else {
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!(byte_val & BTR_EXTERN_OWNER_FLAG));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- byte_val = byte_val | BTR_EXTERN_OWNER_FLAG;
- }
-
- if (page_zip) {
- mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
- page_zip_write_blob_ptr(page_zip, rec, index, offsets, i, mtr);
- } else if (mtr != NULL) {
-
- mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val,
- MLOG_1BYTE, mtr);
- } else {
- mach_write_to_1(data + local_len + BTR_EXTERN_LEN, byte_val);
- }
-
- btr_blob_dbg_owner(rec, index, offsets, i, val);
-}
-
-/*******************************************************************//**
-Marks non-updated off-page fields as disowned by this record. The ownership
-must be transferred to the updated record which is inserted elsewhere in the
-index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
-UNIV_INTERN
-void
-btr_cur_disown_inherited_fields(
-/*============================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: record in a clustered index */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- const upd_t* update, /*!< in: update vector */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
- ut_ad(rec_offs_any_extern(offsets));
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (rec_offs_nth_extern(offsets, i)
- && !upd_get_field_by_field_no(update, i)) {
- btr_cur_set_ownership_of_extern_field(
- page_zip, rec, index, offsets, i, FALSE, mtr);
- }
- }
-}
-
-/*******************************************************************//**
-Marks all extern fields in a record as owned by the record. This function
-should be called if the delete mark of a record is removed: a not delete
-marked record always owns all its extern fields. */
-static
-void
-btr_cur_unmark_extern_fields(
-/*=========================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: record in a clustered index */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- mtr_t* mtr) /*!< in: mtr, or NULL if not logged */
-{
- ulint n;
- ulint i;
-
- ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec));
- n = rec_offs_n_fields(offsets);
-
- if (!rec_offs_any_extern(offsets)) {
-
- return;
- }
-
- for (i = 0; i < n; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
-
- btr_cur_set_ownership_of_extern_field(
- page_zip, rec, index, offsets, i, TRUE, mtr);
- }
- }
-}
-
-/*******************************************************************//**
-Flags the data tuple fields that are marked as extern storage in the
-update vector. We use this function to remember which fields we must
-mark as extern storage in a record inserted for an update.
-@return number of flagged external columns */
-UNIV_INTERN
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const upd_t* update, /*!< in: update vector */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint n_pushed = 0;
- ulint n;
- const upd_field_t* uf;
-
- uf = update->fields;
- n = upd_get_n_fields(update);
-
- for (; n--; uf++) {
- if (dfield_is_ext(&uf->new_val)) {
- dfield_t* field
- = dtuple_get_nth_field(tuple, uf->field_no);
-
- if (!dfield_is_ext(field)) {
- dfield_set_ext(field);
- n_pushed++;
- }
-
- switch (uf->orig_len) {
- byte* data;
- ulint len;
- byte* buf;
- case 0:
- break;
- case BTR_EXTERN_FIELD_REF_SIZE:
- /* Restore the original locally stored
- part of the column. In the undo log,
- InnoDB writes a longer prefix of externally
- stored columns, so that column prefixes
- in secondary indexes can be reconstructed. */
- dfield_set_data(field, (byte*) dfield_get_data(field)
- + dfield_get_len(field)
- - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- dfield_set_ext(field);
- break;
- default:
- /* Reconstruct the original locally
- stored part of the column. The data
- will have to be copied. */
- ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
-
- data = (byte*) dfield_get_data(field);
- len = dfield_get_len(field);
-
- buf = (byte*) mem_heap_alloc(heap,
- uf->orig_len);
- /* Copy the locally stored prefix. */
- memcpy(buf, data,
- uf->orig_len
- - BTR_EXTERN_FIELD_REF_SIZE);
- /* Copy the BLOB pointer. */
- memcpy(buf + uf->orig_len
- - BTR_EXTERN_FIELD_REF_SIZE,
- data + len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
-
- dfield_set_data(field, buf, uf->orig_len);
- dfield_set_ext(field);
- }
- }
- }
-
- return(n_pushed);
-}
-
-/*******************************************************************//**
-Returns the length of a BLOB part stored on the header page.
-@return part length */
-static
-ulint
-btr_blob_get_part_len(
-/*==================*/
- const byte* blob_header) /*!< in: blob header */
-{
- return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN));
-}
-
-/*******************************************************************//**
-Returns the page number where the next BLOB part is stored.
-@return page number or FIL_NULL if no more pages */
-static
-ulint
-btr_blob_get_next_page_no(
-/*======================*/
- const byte* blob_header) /*!< in: blob header */
-{
- return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO));
-}
-
-/*******************************************************************//**
-Deallocate a buffer block that was reserved for a BLOB part. */
-static
-void
-btr_blob_free(
-/*==========*/
- buf_block_t* block, /*!< in: buffer block */
- ibool all, /*!< in: TRUE=remove also the compressed page
- if there is one */
- mtr_t* mtr) /*!< in: mini-transaction to commit */
-{
- buf_pool_t* buf_pool = buf_pool_from_block(block);
- ulint space = buf_block_get_space(block);
- ulint page_no = buf_block_get_page_no(block);
- bool freed = false;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- mtr_commit(mtr);
-
- mutex_enter(&buf_pool->LRU_list_mutex);
- mutex_enter(&block->mutex);
-
- /* Only free the block if it is still allocated to
- the same file page. */
-
- if (buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE
- && buf_block_get_space(block) == space
- && buf_block_get_page_no(block) == page_no) {
-
- freed = buf_LRU_free_page(&block->page, all);
-
- if (!freed && all && block->page.zip.data
- /* Now, buf_LRU_free_page() may release mutexes
- temporarily */
- && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE
- && buf_block_get_space(block) == space
- && buf_block_get_page_no(block) == page_no) {
-
- /* Attempt to deallocate the uncompressed page
- if the whole block cannot be deallocted. */
- freed = buf_LRU_free_page(&block->page, false);
- }
- }
-
- if (!freed) {
- mutex_exit(&buf_pool->LRU_list_mutex);
- }
-
- mutex_exit(&block->mutex);
-}
-
-/*******************************************************************//**
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The extern flags in rec will have to be set beforehand.
-The fields are stored on pages allocated from leaf node
-file segment of the index tree.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE or DB_TOO_BIG_FOR_REDO */
-UNIV_INTERN
-dberr_t
-btr_store_big_rec_extern_fields(
-/*============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree
- MUST be X-latched */
- buf_block_t* rec_block, /*!< in/out: block containing rec */
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
- the "external storage" flags in offsets
- will not correspond to rec when
- this function returns */
- const big_rec_t*big_rec_vec, /*!< in: vector containing fields
- to be stored externally */
- mtr_t* btr_mtr, /*!< in: mtr containing the
- latches to the clustered index */
- enum blob_op op) /*! in: operation code */
-{
- ulint rec_page_no;
- byte* field_ref;
- ulint extern_len;
- ulint store_len;
- ulint page_no;
- ulint space_id;
- ulint zip_size;
- ulint prev_page_no;
- ulint hint_page_no;
- ulint i;
- mtr_t mtr;
- mtr_t* alloc_mtr;
- mem_heap_t* heap = NULL;
- page_zip_des_t* page_zip;
- z_stream c_stream;
- buf_block_t** freed_pages = NULL;
- ulint n_freed_pages = 0;
- dberr_t error = DB_SUCCESS;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(rec_offs_any_extern(offsets));
- ut_ad(mtr_memo_contains(btr_mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains(btr_mtr, rec_block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(buf_block_get_frame(rec_block) == page_align(rec));
- ut_a(dict_index_is_clust(index));
-
- page_zip = buf_block_get_page_zip(rec_block);
- ut_a(dict_table_zip_size(index->table)
- == buf_block_get_zip_size(rec_block));
-
- space_id = buf_block_get_space(rec_block);
- zip_size = buf_block_get_zip_size(rec_block);
- rec_page_no = buf_block_get_page_no(rec_block);
- ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
-
- error = btr_check_blob_limit(big_rec_vec);
-
- if (error != DB_SUCCESS) {
- ut_ad(op == BTR_STORE_INSERT);
- return(error);
- }
-
- if (page_zip) {
- int err;
-
- /* Zlib deflate needs 128 kilobytes for the default
- window size, plus 512 << memLevel, plus a few
- kilobytes for small objects. We use reduced memLevel
- to limit the memory consumption, and preallocate the
- heap, hoping to avoid memory fragmentation. */
- heap = mem_heap_create(250000);
- page_zip_set_alloc(&c_stream, heap);
-
- err = deflateInit2(&c_stream, page_zip_level,
- Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
- ut_a(err == Z_OK);
- }
-
- if (btr_blob_op_is_update(op)) {
- /* Avoid reusing pages that have been previously freed
- in btr_mtr. */
- if (btr_mtr->n_freed_pages) {
- if (heap == NULL) {
- heap = mem_heap_create(
- btr_mtr->n_freed_pages
- * sizeof *freed_pages);
- }
-
- freed_pages = static_cast<buf_block_t**>(
- mem_heap_alloc(
- heap,
- btr_mtr->n_freed_pages
- * sizeof *freed_pages));
- n_freed_pages = 0;
- }
-
- /* Because btr_mtr will be committed after mtr, it is
- possible that the tablespace has been extended when
- the B-tree record was updated or inserted, or it will
- be extended while allocating pages for big_rec.
-
- TODO: In mtr (not btr_mtr), write a redo log record
- about extending the tablespace to its current size,
- and remember the current size. Whenever the tablespace
- grows as pages are allocated, write further redo log
- records to mtr. (Currently tablespace extension is not
- covered by the redo log. If it were, the record would
- only be written to btr_mtr, which is committed after
- mtr.) */
- alloc_mtr = btr_mtr;
- } else {
- /* Use the local mtr for allocations. */
- alloc_mtr = &mtr;
- }
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- /* All pointers to externally stored columns in the record
- must either be zero or they must be pointers to inherited
- columns, owned by this record or an earlier record version. */
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (!rec_offs_nth_extern(offsets, i)) {
- continue;
- }
- field_ref = btr_rec_get_field_ref(rec, offsets, i);
-
- ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
- /* Either this must be an update in place,
- or the BLOB must be inherited, or the BLOB pointer
- must be zero (will be written in this function). */
- ut_a(op == BTR_STORE_UPDATE
- || (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
- || !memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE));
- }
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- /* We have to create a file segment to the tablespace
- for each field and put the pointer to the field in rec */
-
- for (i = 0; i < big_rec_vec->n_fields; i++) {
- field_ref = btr_rec_get_field_ref(
- rec, offsets, big_rec_vec->fields[i].field_no);
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- /* A zero BLOB pointer should have been initially inserted. */
- ut_a(!memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- extern_len = big_rec_vec->fields[i].len;
- UNIV_MEM_ASSERT_RW(big_rec_vec->fields[i].data,
- extern_len);
-
- ut_a(extern_len > 0);
-
- prev_page_no = FIL_NULL;
-
- if (page_zip) {
- int err = deflateReset(&c_stream);
- ut_a(err == Z_OK);
-
- c_stream.next_in = (Bytef*)
- big_rec_vec->fields[i].data;
- c_stream.avail_in = static_cast<uInt>(extern_len);
- }
-
- for (;;) {
- buf_block_t* block;
- page_t* page;
-
- mtr_start(&mtr);
-
- if (prev_page_no == FIL_NULL) {
- hint_page_no = 1 + rec_page_no;
- } else {
- hint_page_no = prev_page_no + 1;
- }
-
-alloc_another:
- block = btr_page_alloc(index, hint_page_no,
- FSP_NO_DIR, 0, alloc_mtr, &mtr);
- if (UNIV_UNLIKELY(block == NULL)) {
- mtr_commit(&mtr);
- error = DB_OUT_OF_FILE_SPACE;
- goto func_exit;
- }
-
- if (rw_lock_get_x_lock_count(&block->lock) > 1) {
- /* This page must have been freed in
- btr_mtr previously. Put it aside, and
- allocate another page for the BLOB data. */
- ut_ad(alloc_mtr == btr_mtr);
- ut_ad(btr_blob_op_is_update(op));
- ut_ad(n_freed_pages < btr_mtr->n_freed_pages);
- freed_pages[n_freed_pages++] = block;
- goto alloc_another;
- }
-
- page_no = buf_block_get_page_no(block);
- page = buf_block_get_frame(block);
-
- if (prev_page_no != FIL_NULL) {
- buf_block_t* prev_block;
- page_t* prev_page;
-
- prev_block = buf_page_get(space_id, zip_size,
- prev_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(prev_block,
- SYNC_EXTERN_STORAGE);
- prev_page = buf_block_get_frame(prev_block);
-
- if (page_zip) {
- mlog_write_ulint(
- prev_page + FIL_PAGE_NEXT,
- page_no, MLOG_4BYTES, &mtr);
- memcpy(buf_block_get_page_zip(
- prev_block)
- ->data + FIL_PAGE_NEXT,
- prev_page + FIL_PAGE_NEXT, 4);
- } else {
- mlog_write_ulint(
- prev_page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO,
- page_no, MLOG_4BYTES, &mtr);
- }
-
- } else if (dict_index_is_online_ddl(index)) {
- row_log_table_blob_alloc(index, page_no);
- }
-
- if (page_zip) {
- int err;
- page_zip_des_t* blob_page_zip;
-
- /* Write FIL_PAGE_TYPE to the redo log
- separately, before logging any other
- changes to the page, so that the debug
- assertions in
- recv_parse_or_apply_log_rec_body() can
- be made simpler. Before InnoDB Plugin
- 1.0.4, the initialization of
- FIL_PAGE_TYPE was logged as part of
- the mlog_log_string() below. */
-
- mlog_write_ulint(page + FIL_PAGE_TYPE,
- prev_page_no == FIL_NULL
- ? FIL_PAGE_TYPE_ZBLOB
- : FIL_PAGE_TYPE_ZBLOB2,
- MLOG_2BYTES, &mtr);
-
- c_stream.next_out = page
- + FIL_PAGE_DATA;
- c_stream.avail_out
- = static_cast<uInt>(page_zip_get_size(page_zip))
- - FIL_PAGE_DATA;
-
- err = deflate(&c_stream, Z_FINISH);
- ut_a(err == Z_OK || err == Z_STREAM_END);
- ut_a(err == Z_STREAM_END
- || c_stream.avail_out == 0);
-
- /* Write the "next BLOB page" pointer */
- mlog_write_ulint(page + FIL_PAGE_NEXT,
- FIL_NULL, MLOG_4BYTES, &mtr);
- /* Initialize the unused "prev page" pointer */
- mlog_write_ulint(page + FIL_PAGE_PREV,
- FIL_NULL, MLOG_4BYTES, &mtr);
- /* Write a back pointer to the record
- into the otherwise unused area. This
- information could be useful in
- debugging. Later, we might want to
- implement the possibility to relocate
- BLOB pages. Then, we would need to be
- able to adjust the BLOB pointer in the
- record. We do not store the heap
- number of the record, because it can
- change in page_zip_reorganize() or
- btr_page_reorganize(). However, also
- the page number of the record may
- change when B-tree nodes are split or
- merged. */
- mlog_write_ulint(page
- + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- space_id,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(page
- + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4,
- rec_page_no,
- MLOG_4BYTES, &mtr);
-
- /* Zero out the unused part of the page. */
- memset(page + page_zip_get_size(page_zip)
- - c_stream.avail_out,
- 0, c_stream.avail_out);
- mlog_log_string(page
- + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- page_zip_get_size(page_zip)
- - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- &mtr);
- /* Copy the page to compressed storage,
- because it will be flushed to disk
- from there. */
- blob_page_zip = buf_block_get_page_zip(block);
- ut_ad(blob_page_zip);
- ut_ad(page_zip_get_size(blob_page_zip)
- == page_zip_get_size(page_zip));
- memcpy(blob_page_zip->data, page,
- page_zip_get_size(page_zip));
-
- if (err == Z_OK && prev_page_no != FIL_NULL) {
-
- goto next_zip_page;
- }
-
- if (alloc_mtr == &mtr) {
- rec_block = buf_page_get(
- space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(
- rec_block,
- SYNC_NO_ORDER_CHECK);
- }
-
- if (err == Z_STREAM_END) {
- mach_write_to_4(field_ref
- + BTR_EXTERN_LEN, 0);
- mach_write_to_4(field_ref
- + BTR_EXTERN_LEN + 4,
- c_stream.total_in);
- } else {
- memset(field_ref + BTR_EXTERN_LEN,
- 0, 8);
- }
-
- if (prev_page_no == FIL_NULL) {
- btr_blob_dbg_add_blob(
- rec, big_rec_vec->fields[i]
- .field_no, page_no, index,
- "store");
-
- mach_write_to_4(field_ref
- + BTR_EXTERN_SPACE_ID,
- space_id);
-
- mach_write_to_4(field_ref
- + BTR_EXTERN_PAGE_NO,
- page_no);
-
- mach_write_to_4(field_ref
- + BTR_EXTERN_OFFSET,
- FIL_PAGE_NEXT);
- }
-
- page_zip_write_blob_ptr(
- page_zip, rec, index, offsets,
- big_rec_vec->fields[i].field_no,
- alloc_mtr);
-
-next_zip_page:
- prev_page_no = page_no;
-
- /* Commit mtr and release the
- uncompressed page frame to save memory. */
- btr_blob_free(block, FALSE, &mtr);
-
- if (err == Z_STREAM_END) {
- break;
- }
- } else {
- mlog_write_ulint(page + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_BLOB,
- MLOG_2BYTES, &mtr);
-
- if (extern_len > (UNIV_PAGE_SIZE
- - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END)) {
- store_len = UNIV_PAGE_SIZE
- - FIL_PAGE_DATA
- - BTR_BLOB_HDR_SIZE
- - FIL_PAGE_DATA_END;
- } else {
- store_len = extern_len;
- }
-
- mlog_write_string(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_SIZE,
- (const byte*)
- big_rec_vec->fields[i].data
- + big_rec_vec->fields[i].len
- - extern_len,
- store_len, &mtr);
- mlog_write_ulint(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_PART_LEN,
- store_len, MLOG_4BYTES, &mtr);
- mlog_write_ulint(page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO,
- FIL_NULL, MLOG_4BYTES, &mtr);
-
- extern_len -= store_len;
-
- if (alloc_mtr == &mtr) {
- rec_block = buf_page_get(
- space_id, zip_size,
- rec_page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(
- rec_block,
- SYNC_NO_ORDER_CHECK);
- }
-
- mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
- MLOG_4BYTES, alloc_mtr);
- mlog_write_ulint(field_ref
- + BTR_EXTERN_LEN + 4,
- big_rec_vec->fields[i].len
- - extern_len,
- MLOG_4BYTES, alloc_mtr);
-
- if (prev_page_no == FIL_NULL) {
- btr_blob_dbg_add_blob(
- rec, big_rec_vec->fields[i]
- .field_no, page_no, index,
- "store");
-
- mlog_write_ulint(field_ref
- + BTR_EXTERN_SPACE_ID,
- space_id, MLOG_4BYTES,
- alloc_mtr);
-
- mlog_write_ulint(field_ref
- + BTR_EXTERN_PAGE_NO,
- page_no, MLOG_4BYTES,
- alloc_mtr);
-
- mlog_write_ulint(field_ref
- + BTR_EXTERN_OFFSET,
- FIL_PAGE_DATA,
- MLOG_4BYTES,
- alloc_mtr);
- }
-
- prev_page_no = page_no;
-
- mtr_commit(&mtr);
-
- if (extern_len == 0) {
- break;
- }
- }
- }
-
- DBUG_EXECUTE_IF("btr_store_big_rec_extern",
- error = DB_OUT_OF_FILE_SPACE;
- goto func_exit;);
- }
-
-func_exit:
- if (page_zip) {
- deflateEnd(&c_stream);
- }
-
- if (n_freed_pages) {
- ulint i;
-
- ut_ad(alloc_mtr == btr_mtr);
- ut_ad(btr_blob_op_is_update(op));
-
- for (i = 0; i < n_freed_pages; i++) {
- btr_page_free_low(index, freed_pages[i], 0, true, alloc_mtr);
- }
- }
-
- if (heap != NULL) {
- mem_heap_free(heap);
- }
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- /* All pointers to externally stored columns in the record
- must be valid. */
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (!rec_offs_nth_extern(offsets, i)) {
- continue;
- }
-
- field_ref = btr_rec_get_field_ref(rec, offsets, i);
-
- /* The pointer must not be zero if the operation
- succeeded. */
- ut_a(0 != memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE)
- || error != DB_SUCCESS);
- /* The column must not be disowned by this record. */
- ut_a(!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG));
- }
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- return(error);
-}
-
-/*******************************************************************//**
-Check the FIL_PAGE_TYPE on an uncompressed BLOB page. */
-static
-void
-btr_check_blob_fil_page_type(
-/*=========================*/
- ulint space_id, /*!< in: space id */
- ulint page_no, /*!< in: page number */
- const page_t* page, /*!< in: page */
- ibool read) /*!< in: TRUE=read, FALSE=purge */
-{
- ulint type = fil_page_get_type(page);
-
- ut_a(space_id == page_get_space_id(page));
- ut_a(page_no == page_get_page_no(page));
-
- if (UNIV_UNLIKELY(type != FIL_PAGE_TYPE_BLOB)) {
- ulint flags = fil_space_get_flags(space_id);
-
-#ifndef UNIV_DEBUG /* Improve debug test coverage */
- if (dict_tf_get_format(flags) == UNIV_FORMAT_A) {
- /* Old versions of InnoDB did not initialize
- FIL_PAGE_TYPE on BLOB pages. Do not print
- anything about the type mismatch when reading
- a BLOB page that is in Antelope format.*/
- return;
- }
-#endif /* !UNIV_DEBUG */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: FIL_PAGE_TYPE=%lu"
- " on BLOB %s space %lu page %lu flags %lx\n",
- (ulong) type, read ? "read" : "purge",
- (ulong) space_id, (ulong) page_no, (ulong) flags);
- ut_error;
- }
-}
-
-/*******************************************************************//**
-Frees the space in an externally stored field to the file space
-management if the field in data is owned by the externally stored field,
-in a rollback we may have the additional condition that the field must
-not be inherited. */
-UNIV_INTERN
-void
-btr_free_externally_stored_field(
-/*=============================*/
- dict_index_t* index, /*!< in: index of the data, the index
- tree MUST be X-latched; if the tree
- height is 1, then also the root page
- must be X-latched! (this is relevant
- in the case this function is called
- from purge where 'data' is located on
- an undo log page, not an index
- page) */
- byte* field_ref, /*!< in/out: field reference */
- const rec_t* rec, /*!< in: record containing field_ref, for
- page_zip_write_blob_ptr(), or NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
- or NULL */
- page_zip_des_t* page_zip, /*!< in: compressed page corresponding
- to rec, or NULL if rec == NULL */
- ulint i, /*!< in: field number of field_ref;
- ignored if rec == NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* local_mtr MY_ATTRIBUTE((unused))) /*!< in: mtr
- containing the latch to data an an
- X-latch to the index tree */
-{
- page_t* page;
- const ulint space_id = mach_read_from_4(
- field_ref + BTR_EXTERN_SPACE_ID);
- const ulint start_page = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
- ulint rec_zip_size = dict_table_zip_size(index->table);
- ulint ext_zip_size;
- ulint page_no;
- ulint next_page_no;
- mtr_t mtr;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains_page(local_mtr, field_ref,
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(!rec || rec_offs_validate(rec, index, offsets));
- ut_ad(!rec || field_ref == btr_rec_get_field_ref(rec, offsets, i));
-
- if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE))) {
- /* In the rollback, we may encounter a clustered index
- record with some unwritten off-page columns. There is
- nothing to free then. */
- if (rb_ctx == RB_NONE) {
- char buf[3 * 512];
- char *bufend;
- ulint ispace = dict_index_get_space(index);
- bufend = innobase_convert_name(buf, sizeof buf,
- index->name, strlen(index->name),
- NULL,
- FALSE);
- buf[bufend - buf]='\0';
- ib_logf(IB_LOG_LEVEL_ERROR, "Unwritten off-page columns in "
- "rollback context %d. Table %s index %s space_id %lu "
- "index space %lu.",
- rb_ctx, index->table->name, buf, space_id, ispace);
- }
-
- ut_a(rb_ctx != RB_NONE);
- return;
- }
-
- ut_ad(space_id == index->space);
-
- if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
- ext_zip_size = fil_space_get_zip_size(space_id);
- /* This must be an undo log record in the system tablespace,
- that is, in row_purge_upd_exist_or_extern().
- Currently, externally stored records are stored in the
- same tablespace as the referring records. */
- ut_ad(!page_get_space_id(page_align(field_ref)));
- ut_ad(!rec);
- ut_ad(!page_zip);
- } else {
- ext_zip_size = rec_zip_size;
- }
-
- if (!rec) {
- /* This is a call from row_purge_upd_exist_or_extern(). */
- ut_ad(!page_zip);
- rec_zip_size = 0;
- }
-
-#ifdef UNIV_BLOB_DEBUG
- if (!(field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG)
- && !((field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_INHERITED_FLAG)
- && (rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY))) {
- /* This off-page column will be freed.
- Check that no references remain. */
-
- btr_blob_dbg_t b;
-
- b.blob_page_no = start_page;
-
- if (rec) {
- /* Remove the reference from the record to the
- BLOB. If the BLOB were not freed, the
- reference would be removed when the record is
- removed. Freeing the BLOB will overwrite the
- BTR_EXTERN_PAGE_NO in the field_ref of the
- record with FIL_NULL, which would make the
- btr_blob_dbg information inconsistent with the
- record. */
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
- b.ref_field_no = i;
- btr_blob_dbg_rbt_delete(index, &b, "free");
- }
-
- btr_blob_dbg_assert_empty(index, b.blob_page_no);
- }
-#endif /* UNIV_BLOB_DEBUG */
-
- for (;;) {
-#ifdef UNIV_SYNC_DEBUG
- buf_block_t* rec_block;
-#endif /* UNIV_SYNC_DEBUG */
- buf_block_t* ext_block;
-
- mtr_start(&mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- rec_block =
-#endif /* UNIV_SYNC_DEBUG */
- buf_page_get(page_get_space_id(page_align(field_ref)),
- rec_zip_size,
- page_get_page_no(page_align(field_ref)),
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
- page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
-
- if (/* There is no external storage data */
- page_no == FIL_NULL
- /* This field does not own the externally stored field */
- || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
- & BTR_EXTERN_OWNER_FLAG)
- /* Rollback and inherited field */
- || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
- && (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
- & BTR_EXTERN_INHERITED_FLAG))) {
-
- /* Do not free */
- mtr_commit(&mtr);
-
- return;
- }
-
- if (page_no == start_page && dict_index_is_online_ddl(index)) {
- row_log_table_blob_free(index, start_page);
- }
-
- ext_block = buf_page_get(space_id, ext_zip_size, page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
- page = buf_block_get_frame(ext_block);
-
- if (ext_zip_size) {
- /* Note that page_zip will be NULL
- in row_purge_upd_exist_or_extern(). */
- switch (fil_page_get_type(page)) {
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- break;
- default:
- ut_error;
- }
- next_page_no = mach_read_from_4(page + FIL_PAGE_NEXT);
-
- btr_page_free_low(index, ext_block, 0, true, &mtr);
-
- if (page_zip != NULL) {
- mach_write_to_4(field_ref + BTR_EXTERN_PAGE_NO,
- next_page_no);
- mach_write_to_4(field_ref + BTR_EXTERN_LEN + 4,
- 0);
- page_zip_write_blob_ptr(page_zip, rec, index,
- offsets, i, &mtr);
- } else {
- mlog_write_ulint(field_ref
- + BTR_EXTERN_PAGE_NO,
- next_page_no,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(field_ref
- + BTR_EXTERN_LEN + 4, 0,
- MLOG_4BYTES, &mtr);
- }
- } else {
- ut_a(!page_zip);
- btr_check_blob_fil_page_type(space_id, page_no, page,
- FALSE);
-
- next_page_no = mach_read_from_4(
- page + FIL_PAGE_DATA
- + BTR_BLOB_HDR_NEXT_PAGE_NO);
-
- /* We must supply the page level (= 0) as an argument
- because we did not store it on the page (we save the
- space overhead from an index page header. */
-
- btr_page_free_low(index, ext_block, 0, true, &mtr);
-
- mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
- next_page_no,
- MLOG_4BYTES, &mtr);
- /* Zero out the BLOB length. If the server
- crashes during the execution of this function,
- trx_rollback_or_clean_all_recovered() could
- dereference the half-deleted BLOB, fetching a
- wrong prefix for the BLOB. */
- mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
- 0,
- MLOG_4BYTES, &mtr);
- }
-
- /* Commit mtr and release the BLOB block to save memory. */
- btr_blob_free(ext_block, TRUE, &mtr);
- }
-}
-
-/***********************************************************//**
-Frees the externally stored fields for a record. */
-static
-void
-btr_rec_free_externally_stored_fields(
-/*==================================*/
- dict_index_t* index, /*!< in: index of the data, the index
- tree MUST be X-latched */
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr) /*!< in: mini-transaction handle which contains
- an X-latch to record page and to the index
- tree */
-{
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
- /* Free possible externally stored fields in the record */
-
- ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets));
- n_fields = rec_offs_n_fields(offsets);
-
- for (i = 0; i < n_fields; i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- btr_free_externally_stored_field(
- index, btr_rec_get_field_ref(rec, offsets, i),
- rec, offsets, page_zip, i, rb_ctx, mtr);
- }
- }
-}
-
-/***********************************************************//**
-Frees the externally stored fields for a record, if the field is mentioned
-in the update vector. */
-static
-void
-btr_rec_free_updated_extern_fields(
-/*===============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree MUST be
- X-latched */
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const upd_t* update, /*!< in: update vector */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr) /*!< in: mini-transaction handle which contains
- an X-latch to record page and to the tree */
-{
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
-
- /* Free possible externally stored fields in the record */
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- const upd_field_t* ufield = upd_get_nth_field(update, i);
-
- if (rec_offs_nth_extern(offsets, ufield->field_no)) {
- ulint len;
- byte* data = rec_get_nth_field(
- rec, offsets, ufield->field_no, &len);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- btr_free_externally_stored_field(
- index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
- rec, offsets, page_zip,
- ufield->field_no, rb_ctx, mtr);
- }
- }
-}
-
-/*******************************************************************//**
-Copies the prefix of an uncompressed BLOB. The clustered index record
-that points to this BLOB must be protected by a lock or a page latch.
-@return number of bytes written to buf */
-static
-ulint
-btr_copy_blob_prefix(
-/*=================*/
- byte* buf, /*!< out: the externally stored part of
- the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint space_id,/*!< in: space id of the BLOB pages */
- ulint page_no,/*!< in: page number of the first BLOB page */
- ulint offset, /*!< in: offset on the first BLOB page */
- trx_t* trx) /*!< in: transaction handle */
-{
- ulint copied_len = 0;
-
- for (;;) {
- mtr_t mtr;
- buf_block_t* block;
- const page_t* page;
- const byte* blob_header;
- ulint part_len;
- ulint copy_len;
-
- mtr_start_trx(&mtr, trx);
-
- block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
- page = buf_block_get_frame(block);
-
- btr_check_blob_fil_page_type(space_id, page_no, page, TRUE);
-
- blob_header = page + offset;
- part_len = btr_blob_get_part_len(blob_header);
- copy_len = ut_min(part_len, len - copied_len);
-
- memcpy(buf + copied_len,
- blob_header + BTR_BLOB_HDR_SIZE, copy_len);
- copied_len += copy_len;
-
- page_no = btr_blob_get_next_page_no(blob_header);
-
- mtr_commit(&mtr);
-
- if (page_no == FIL_NULL || copy_len != part_len) {
- UNIV_MEM_ASSERT_RW(buf, copied_len);
- return(copied_len);
- }
-
- /* On other BLOB pages except the first the BLOB header
- always is at the page data start: */
-
- offset = FIL_PAGE_DATA;
-
- ut_ad(copied_len <= len);
- }
-}
-
-/*******************************************************************//**
-Copies the prefix of a compressed BLOB. The clustered index record
-that points to this BLOB must be protected by a lock or a page latch.
-@return number of bytes written to buf */
-static
-ulint
-btr_copy_zblob_prefix(
-/*==================*/
- byte* buf, /*!< out: the externally stored part of
- the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: compressed BLOB page size */
- ulint space_id,/*!< in: space id of the BLOB pages */
- ulint page_no,/*!< in: page number of the first BLOB page */
- ulint offset) /*!< in: offset on the first BLOB page */
-{
- ulint page_type = FIL_PAGE_TYPE_ZBLOB;
- mem_heap_t* heap;
- int err;
- z_stream d_stream;
-
- d_stream.next_out = buf;
- d_stream.avail_out = static_cast<uInt>(len);
- d_stream.next_in = Z_NULL;
- d_stream.avail_in = 0;
-
- /* Zlib inflate needs 32 kilobytes for the default
- window size, plus a few kilobytes for small objects. */
- heap = mem_heap_create(40000);
- page_zip_set_alloc(&d_stream, heap);
-
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_ad(space_id);
-
- err = inflateInit(&d_stream);
- ut_a(err == Z_OK);
-
- for (;;) {
- buf_page_t* bpage;
- ulint next_page_no;
-
- /* There is no latch on bpage directly. Instead,
- bpage is protected by the B-tree page latch that
- is being held on the clustered index record, or,
- in row_merge_copy_blobs(), by an exclusive table lock. */
- bpage = buf_page_get_zip(space_id, zip_size, page_no);
-
- if (UNIV_UNLIKELY(!bpage)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot load"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) page_no, (ulong) space_id);
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY
- (fil_page_get_type(bpage->zip.data) != page_type)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Unexpected type %lu of"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) fil_page_get_type(bpage->zip.data),
- (ulong) page_no, (ulong) space_id);
- ut_ad(0);
- goto end_of_blob;
- }
-
- next_page_no = mach_read_from_4(bpage->zip.data + offset);
-
- if (UNIV_LIKELY(offset == FIL_PAGE_NEXT)) {
- /* When the BLOB begins at page header,
- the compressed data payload does not
- immediately follow the next page pointer. */
- offset = FIL_PAGE_DATA;
- } else {
- offset += 4;
- }
-
- d_stream.next_in = bpage->zip.data + offset;
- d_stream.avail_in = static_cast<uInt>(zip_size - offset);
-
- err = inflate(&d_stream, Z_NO_FLUSH);
- switch (err) {
- case Z_OK:
- if (!d_stream.avail_out) {
- goto end_of_blob;
- }
- break;
- case Z_STREAM_END:
- if (next_page_no == FIL_NULL) {
- goto end_of_blob;
- }
- /* fall through */
- default:
-inflate_error:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: inflate() of"
- " compressed BLOB"
- " page %lu space %lu returned %d (%s)\n",
- (ulong) page_no, (ulong) space_id,
- err, d_stream.msg);
- case Z_BUF_ERROR:
- goto end_of_blob;
- }
-
- if (next_page_no == FIL_NULL) {
- if (!d_stream.avail_in) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: unexpected end of"
- " compressed BLOB"
- " page %lu space %lu\n",
- (ulong) page_no,
- (ulong) space_id);
- } else {
- err = inflate(&d_stream, Z_FINISH);
- switch (err) {
- case Z_STREAM_END:
- case Z_BUF_ERROR:
- break;
- default:
- goto inflate_error;
- }
- }
-
-end_of_blob:
- buf_page_release_zip(bpage);
- goto func_exit;
- }
-
- buf_page_release_zip(bpage);
-
- /* On other BLOB pages except the first
- the BLOB header always is at the page header: */
-
- page_no = next_page_no;
- offset = FIL_PAGE_NEXT;
- page_type = FIL_PAGE_TYPE_ZBLOB2;
- }
-
-func_exit:
- inflateEnd(&d_stream);
- mem_heap_free(heap);
- UNIV_MEM_ASSERT_RW(buf, d_stream.total_out);
- return(d_stream.total_out);
-}
-
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record that points to this BLOB must be protected by a
-lock or a page latch.
-@return number of bytes written to buf */
-static
-ulint
-btr_copy_externally_stored_field_prefix_low(
-/*========================================*/
- byte* buf, /*!< out: the externally stored part of
- the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint space_id,/*!< in: space id of the first BLOB page */
- ulint page_no,/*!< in: page number of the first BLOB page */
- ulint offset, /*!< in: offset on the first BLOB page */
- trx_t* trx) /*!< in: transaction handle */
-{
- if (UNIV_UNLIKELY(len == 0)) {
- return(0);
- }
-
- if (zip_size) {
- return(btr_copy_zblob_prefix(buf, len, zip_size,
- space_id, page_no, offset));
- } else {
- return(btr_copy_blob_prefix(buf, len, space_id,
- page_no, offset, trx));
- }
-}
-
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record must be protected by a lock or a page latch.
-@return the length of the copied field, or 0 if the column was being
-or has been deleted */
-UNIV_INTERN
-ulint
-btr_copy_externally_stored_field_prefix(
-/*====================================*/
- byte* buf, /*!< out: the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint local_len,/*!< in: length of data, in bytes */
- trx_t* trx) /*!< in: transaction handle */
-{
- ulint space_id;
- ulint page_no;
- ulint offset;
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- if (UNIV_UNLIKELY(local_len >= len)) {
- memcpy(buf, data, len);
- return(len);
- }
-
- memcpy(buf, data, local_len);
- data += local_len;
-
- ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
-
- if (!mach_read_from_4(data + BTR_EXTERN_LEN + 4)) {
- /* The externally stored part of the column has been
- (partially) deleted. Signal the half-deleted BLOB
- to the caller. */
-
- return(0);
- }
-
- space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
-
- page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
-
- offset = mach_read_from_4(data + BTR_EXTERN_OFFSET);
-
- return(local_len
- + btr_copy_externally_stored_field_prefix_low(buf + local_len,
- len - local_len,
- zip_size,
- space_id, page_no,
- offset, trx));
-}
-
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap. The
-clustered index record must be protected by a lock or a page latch.
-@return the whole field copied to heap */
-UNIV_INTERN
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
- ulint* len, /*!< out: length of the whole field */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint local_len,/*!< in: length of data */
- mem_heap_t* heap, /*!< in: mem heap */
- trx_t* trx) /*!< in: transaction handle */
-{
- ulint space_id;
- ulint page_no;
- ulint offset;
- ulint extern_len;
- byte* buf;
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID);
-
- page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO);
-
- offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET);
-
- /* Currently a BLOB cannot be bigger than 4 GB; we
- leave the 4 upper bytes in the length field unused */
-
- extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4);
-
- buf = (byte*) mem_heap_alloc(heap, local_len + extern_len);
-
- memcpy(buf, data, local_len);
- *len = local_len
- + btr_copy_externally_stored_field_prefix_low(buf + local_len,
- extern_len,
- zip_size,
- space_id,
- page_no, offset,
- trx);
-
- return(buf);
-}
-
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return the field copied to heap, or NULL if the field is incomplete */
-UNIV_INTERN
-byte*
-btr_rec_copy_externally_stored_field(
-/*=================================*/
- const rec_t* rec, /*!< in: record in a clustered index;
- must be protected by a lock or a page latch */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint no, /*!< in: field number */
- ulint* len, /*!< out: length of the field */
- mem_heap_t* heap, /*!< in: mem heap */
- trx_t* trx) /*!< in: transaction handle */
-{
- ulint local_len;
- const byte* data;
-
- ut_a(rec_offs_nth_extern(offsets, no));
-
- /* An externally stored field can contain some initial
- data from the field, and in the last 20 bytes it has the
- space id, page number, and offset where the rest of the
- field data is stored, and the data length in addition to
- the data stored locally. We may need to store some data
- locally to get the local record length above the 128 byte
- limit so that field offsets are stored in two bytes, and
- the extern bit is available in those two bytes. */
-
- data = rec_get_nth_field(rec, offsets, no, &local_len);
-
- ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- if (UNIV_UNLIKELY
- (!memcmp(data + local_len - BTR_EXTERN_FIELD_REF_SIZE,
- field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
- /* The externally stored field was not written yet.
- This record should only be seen by
- recv_recovery_rollback_active() or any
- TRX_ISO_READ_UNCOMMITTED transactions. */
- return(NULL);
- }
-
- return(btr_copy_externally_stored_field(len, data,
- zip_size, local_len, heap,
- trx));
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/btr/btr0defragment.cc b/storage/xtradb/btr/btr0defragment.cc
deleted file mode 100644
index c2f58a8e1cf..00000000000
--- a/storage/xtradb/btr/btr0defragment.cc
+++ /dev/null
@@ -1,833 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2012, 2014 Facebook, Inc. All Rights Reserved.
-Copyright (C) 2014, 2015, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-/**************************************************//**
-@file btr/btr0defragment.cc
-Index defragmentation.
-
-Created 05/29/2014 Rongrong Zhong
-Modified 16/07/2014 Sunguck Lee
-Modified 30/07/2014 Jan Lindström jan.lindstrom@mariadb.com
-*******************************************************/
-
-#include "btr0defragment.h"
-#ifndef UNIV_HOTBACKUP
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "btr0pcur.h"
-#include "dict0stats.h"
-#include "dict0stats_bg.h"
-#include "ibuf0ibuf.h"
-#include "lock0lock.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "ut0timer.h"
-
-#include <list>
-
-/**************************************************//**
-Custom nullptr implementation for under g++ 4.6
-*******************************************************/
-/*
-// #pragma once
-namespace std
-{
- // based on SC22/WG21/N2431 = J16/07-0301
- struct nullptr_t
- {
- template<typename any> operator any * () const
- {
- return 0;
- }
- template<class any, typename T> operator T any:: * () const
- {
- return 0;
- }
-
-#ifdef _MSC_VER
- struct pad {};
- pad __[sizeof(void*)/sizeof(pad)];
-#else
- char __[sizeof(void*)];
-#endif
-private:
- // nullptr_t();// {}
- // nullptr_t(const nullptr_t&);
- // void operator = (const nullptr_t&);
- void operator &() const;
- template<typename any> void operator +(any) const
- {
- // I Love MSVC 2005!
- }
- template<typename any> void operator -(any) const
- {
- // I Love MSVC 2005!
- }
- };
-static const nullptr_t __nullptr = {};
-}
-
-#ifndef nullptr
-#define nullptr std::__nullptr
-#endif
-*/
-/**************************************************//**
-End of Custom nullptr implementation for under g++ 4.6
-*******************************************************/
-
-/* When there's no work, either because defragment is disabled, or because no
-query is submitted, thread checks state every BTR_DEFRAGMENT_SLEEP_IN_USECS.*/
-#define BTR_DEFRAGMENT_SLEEP_IN_USECS 1000000
-/* Reduce the target page size by this amount when compression failure happens
-during defragmentaiton. 512 is chosen because it's a power of 2 and it is about
-3% of the page size. When there are compression failures in defragmentation,
-our goal is to get a decent defrag ratio with as few compression failure as
-possible. From experimentation it seems that reduce the target size by 512 every
-time will make sure the page is compressible within a couple of iterations. */
-#define BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE 512
-
-/* Work queue for defragmentation. */
-typedef std::list<btr_defragment_item_t*> btr_defragment_wq_t;
-static btr_defragment_wq_t btr_defragment_wq;
-
-/* Mutex protecting the defragmentation work queue.*/
-ib_mutex_t btr_defragment_mutex;
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t btr_defragment_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/* Number of compression failures caused by defragmentation since server
-start. */
-ulint btr_defragment_compression_failures = 0;
-/* Number of btr_defragment_n_pages calls that altered page but didn't
-manage to release any page. */
-ulint btr_defragment_failures = 0;
-/* Total number of btr_defragment_n_pages calls that altered page.
-The difference between btr_defragment_count and btr_defragment_failures shows
-the amount of effort wasted. */
-ulint btr_defragment_count = 0;
-
-/******************************************************************//**
-Constructor for btr_defragment_item_t. */
-btr_defragment_item_t::btr_defragment_item_t(
- btr_pcur_t* pcur,
- os_event_t event)
-{
- this->pcur = pcur;
- this->event = event;
- this->removed = false;
- this->last_processed = 0;
-}
-
-/******************************************************************//**
-Destructor for btr_defragment_item_t. */
-btr_defragment_item_t::~btr_defragment_item_t() {
- if (this->pcur) {
- btr_pcur_free_for_mysql(this->pcur);
- }
- if (this->event) {
- os_event_set(this->event);
- }
-}
-
-/******************************************************************//**
-Initialize defragmentation. */
-void
-btr_defragment_init()
-{
- srv_defragment_interval = ut_microseconds_to_timer(
- (ulonglong) (1000000.0 / srv_defragment_frequency));
- mutex_create(btr_defragment_mutex_key, &btr_defragment_mutex,
- SYNC_ANY_LATCH);
-}
-
-/******************************************************************//**
-Shutdown defragmentation. Release all resources. */
-void
-btr_defragment_shutdown()
-{
- mutex_enter(&btr_defragment_mutex);
- list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
- while(iter != btr_defragment_wq.end()) {
- btr_defragment_item_t* item = *iter;
- iter = btr_defragment_wq.erase(iter);
- delete item;
- }
- mutex_exit(&btr_defragment_mutex);
- mutex_free(&btr_defragment_mutex);
-}
-
-
-/******************************************************************//**
-Functions used by the query threads: btr_defragment_xxx_index
-Query threads find/add/remove index. */
-/******************************************************************//**
-Check whether the given index is in btr_defragment_wq. We use index->id
-to identify indices. */
-bool
-btr_defragment_find_index(
- dict_index_t* index) /*!< Index to find. */
-{
- mutex_enter(&btr_defragment_mutex);
- for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
- iter != btr_defragment_wq.end();
- ++iter) {
- btr_defragment_item_t* item = *iter;
- btr_pcur_t* pcur = item->pcur;
- btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
- dict_index_t* idx = btr_cur_get_index(cursor);
- if (index->id == idx->id) {
- mutex_exit(&btr_defragment_mutex);
- return true;
- }
- }
- mutex_exit(&btr_defragment_mutex);
- return false;
-}
-
-/******************************************************************//**
-Query thread uses this function to add an index to btr_defragment_wq.
-Return a pointer to os_event for the query thread to wait on if this is a
-synchronized defragmentation. */
-os_event_t
-btr_defragment_add_index(
- dict_index_t* index, /*!< index to be added */
- bool async, /*!< whether this is an async
- defragmentation */
- dberr_t* err) /*!< out: error code */
-{
- mtr_t mtr;
- ulint space = dict_index_get_space(index);
- ulint zip_size = dict_table_zip_size(index->table);
- ulint page_no = dict_index_get_page(index);
- *err = DB_SUCCESS;
-
- mtr_start(&mtr);
- // Load index rood page.
- buf_block_t* block = btr_block_get(space, zip_size, page_no, RW_NO_LATCH, index, &mtr);
- page_t* page = NULL;
-
- if (block) {
- page = buf_block_get_frame(block);
- }
-
- if (page == NULL && index->table->file_unreadable) {
- mtr_commit(&mtr);
- *err = DB_DECRYPTION_FAILED;
- return NULL;
- }
-
- if (page_is_leaf(page)) {
- // Index root is a leaf page, no need to defragment.
- mtr_commit(&mtr);
- return NULL;
- }
- btr_pcur_t* pcur = btr_pcur_create_for_mysql();
- os_event_t event = NULL;
- if (!async) {
- event = os_event_create();
- }
- btr_pcur_open_at_index_side(true, index, BTR_SEARCH_LEAF, pcur,
- true, 0, &mtr);
- btr_pcur_move_to_next(pcur, &mtr);
- btr_pcur_store_position(pcur, &mtr);
- mtr_commit(&mtr);
- dict_stats_empty_defrag_summary(index);
- btr_defragment_item_t* item = new btr_defragment_item_t(pcur, event);
- mutex_enter(&btr_defragment_mutex);
- btr_defragment_wq.push_back(item);
- mutex_exit(&btr_defragment_mutex);
- return event;
-}
-
-/******************************************************************//**
-When table is dropped, this function is called to mark a table as removed in
-btr_efragment_wq. The difference between this function and the remove_index
-function is this will not NULL the event. */
-void
-btr_defragment_remove_table(
- dict_table_t* table) /*!< Index to be removed. */
-{
- mutex_enter(&btr_defragment_mutex);
- for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
- iter != btr_defragment_wq.end();
- ++iter) {
- btr_defragment_item_t* item = *iter;
- btr_pcur_t* pcur = item->pcur;
- btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
- dict_index_t* idx = btr_cur_get_index(cursor);
- if (table->id == idx->table->id) {
- item->removed = true;
- }
- }
- mutex_exit(&btr_defragment_mutex);
-}
-
-/******************************************************************//**
-Query thread uses this function to mark an index as removed in
-btr_efragment_wq. */
-void
-btr_defragment_remove_index(
- dict_index_t* index) /*!< Index to be removed. */
-{
- mutex_enter(&btr_defragment_mutex);
- for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
- iter != btr_defragment_wq.end();
- ++iter) {
- btr_defragment_item_t* item = *iter;
- btr_pcur_t* pcur = item->pcur;
- btr_cur_t* cursor = btr_pcur_get_btr_cur(pcur);
- dict_index_t* idx = btr_cur_get_index(cursor);
- if (index->id == idx->id) {
- item->removed = true;
- item->event = NULL;
- break;
- }
- }
- mutex_exit(&btr_defragment_mutex);
-}
-
-/******************************************************************//**
-Functions used by defragmentation thread: btr_defragment_xxx_item.
-Defragmentation thread operates on the work *item*. It gets/removes
-item from the work queue. */
-/******************************************************************//**
-Defragment thread uses this to remove an item from btr_defragment_wq.
-When an item is removed from the work queue, all resources associated with it
-are free as well. */
-void
-btr_defragment_remove_item(
- btr_defragment_item_t* item) /*!< Item to be removed. */
-{
- mutex_enter(&btr_defragment_mutex);
- for (list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
- iter != btr_defragment_wq.end();
- ++iter) {
- if (item == *iter) {
- btr_defragment_wq.erase(iter);
- delete item;
- break;
- }
- }
- mutex_exit(&btr_defragment_mutex);
-}
-
-/******************************************************************//**
-Defragment thread uses this to get an item from btr_defragment_wq to work on.
-The item is not removed from the work queue so query threads can still access
-this item. We keep it this way so query threads can find and kill a
-defragmentation even if that index is being worked on. Be aware that while you
-work on this item you have no lock protection on it whatsoever. This is OK as
-long as the query threads and defragment thread won't modify the same fields
-without lock protection.
-*/
-btr_defragment_item_t*
-btr_defragment_get_item()
-{
- if (btr_defragment_wq.empty()) {
- return NULL;
- //return nullptr;
- }
- mutex_enter(&btr_defragment_mutex);
- list< btr_defragment_item_t* >::iterator iter = btr_defragment_wq.begin();
- if (iter == btr_defragment_wq.end()) {
- iter = btr_defragment_wq.begin();
- }
- btr_defragment_item_t* item = *iter;
- iter++;
- mutex_exit(&btr_defragment_mutex);
- return item;
-}
-
-/*********************************************************************//**
-Check whether we should save defragmentation statistics to persistent storage.
-Currently we save the stats to persistent storage every 100 updates. */
-UNIV_INTERN
-void
-btr_defragment_save_defrag_stats_if_needed(
- dict_index_t* index) /*!< in: index */
-{
- if (srv_defragment_stats_accuracy != 0 // stats tracking disabled
- && dict_index_get_space(index) != 0 // do not track system tables
- && index->stat_defrag_modified_counter
- >= srv_defragment_stats_accuracy) {
- dict_stats_defrag_pool_add(index);
- index->stat_defrag_modified_counter = 0;
- }
-}
-
-/*********************************************************************//**
-Main defragment functionalities used by defragment thread.*/
-/*************************************************************//**
-Calculate number of records from beginning of block that can
-fit into size_limit
-@return number of records */
-UNIV_INTERN
-ulint
-btr_defragment_calc_n_recs_for_size(
- buf_block_t* block, /*!< in: B-tree page */
- dict_index_t* index, /*!< in: index of the page */
- ulint size_limit, /*!< in: size limit to fit records in */
- ulint* n_recs_size) /*!< out: actual size of the records that fit
- in size_limit. */
-{
- page_t* page = buf_block_get_frame(block);
- ulint n_recs = 0;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
- mem_heap_t* heap = NULL;
- ulint size = 0;
- page_cur_t cur;
-
- page_cur_set_before_first(block, &cur);
- page_cur_move_to_next(&cur);
- while (page_cur_get_rec(&cur) != page_get_supremum_rec(page)) {
- rec_t* cur_rec = page_cur_get_rec(&cur);
- offsets = rec_get_offsets(cur_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- ulint rec_size = rec_offs_size(offsets);
- size += rec_size;
- if (size > size_limit) {
- size = size - rec_size;
- break;
- }
- n_recs ++;
- page_cur_move_to_next(&cur);
- }
- *n_recs_size = size;
- return n_recs;
-}
-
-/*************************************************************//**
-Merge as many records from the from_block to the to_block. Delete
-the from_block if all records are successfully merged to to_block.
-@return the to_block to target for next merge operation. */
-UNIV_INTERN
-buf_block_t*
-btr_defragment_merge_pages(
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* from_block, /*!< in: origin of merge */
- buf_block_t* to_block, /*!< in: destination of merge */
- ulint zip_size, /*!< in: zip size of the block */
- ulint reserved_space, /*!< in: space reserved for future
- insert to avoid immediate page split */
- ulint* max_data_size, /*!< in/out: max data size to
- fit in a single compressed page. */
- mem_heap_t* heap, /*!< in/out: pointer to memory heap */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- page_t* from_page = buf_block_get_frame(from_block);
- page_t* to_page = buf_block_get_frame(to_block);
- ulint space = dict_index_get_space(index);
- ulint level = btr_page_get_level(from_page, mtr);
- ulint n_recs = page_get_n_recs(from_page);
- ulint new_data_size = page_get_data_size(to_page);
- ulint max_ins_size =
- page_get_max_insert_size(to_page, n_recs);
- ulint max_ins_size_reorg =
- page_get_max_insert_size_after_reorganize(
- to_page, n_recs);
- ulint max_ins_size_to_use = max_ins_size_reorg > reserved_space
- ? max_ins_size_reorg - reserved_space : 0;
- ulint move_size = 0;
- ulint n_recs_to_move = 0;
- rec_t* rec = NULL;
- ulint target_n_recs = 0;
- rec_t* orig_pred;
-
- // Estimate how many records can be moved from the from_page to
- // the to_page.
- if (zip_size) {
- ulint page_diff = UNIV_PAGE_SIZE - *max_data_size;
- max_ins_size_to_use = (max_ins_size_to_use > page_diff)
- ? max_ins_size_to_use - page_diff : 0;
- }
- n_recs_to_move = btr_defragment_calc_n_recs_for_size(
- from_block, index, max_ins_size_to_use, &move_size);
-
- // If max_ins_size >= move_size, we can move the records without
- // reorganizing the page, otherwise we need to reorganize the page
- // first to release more space.
- if (move_size > max_ins_size) {
- if (!btr_page_reorganize_block(false, page_zip_level,
- to_block, index,
- mtr)) {
- if (!dict_index_is_clust(index)
- && page_is_leaf(to_page)) {
- ibuf_reset_free_bits(to_block);
- }
- // If reorganization fails, that means page is
- // not compressable. There's no point to try
- // merging into this page. Continue to the
- // next page.
- return from_block;
- }
- ut_ad(page_validate(to_page, index));
- max_ins_size = page_get_max_insert_size(to_page, n_recs);
- ut_a(max_ins_size >= move_size);
- }
-
- // Move records to pack to_page more full.
- orig_pred = NULL;
- target_n_recs = n_recs_to_move;
- while (n_recs_to_move > 0) {
- rec = page_rec_get_nth(from_page,
- n_recs_to_move + 1);
- orig_pred = page_copy_rec_list_start(
- to_block, from_block, rec, index, mtr);
- if (orig_pred)
- break;
- // If we reach here, that means compression failed after packing
- // n_recs_to_move number of records to to_page. We try to reduce
- // the targeted data size on the to_page by
- // BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE and try again.
- os_atomic_increment_ulint(
- &btr_defragment_compression_failures, 1);
- max_ins_size_to_use =
- move_size > BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE
- ? move_size - BTR_DEFRAGMENT_PAGE_REDUCTION_STEP_SIZE
- : 0;
- if (max_ins_size_to_use == 0) {
- n_recs_to_move = 0;
- move_size = 0;
- break;
- }
- n_recs_to_move = btr_defragment_calc_n_recs_for_size(
- from_block, index, max_ins_size_to_use, &move_size);
- }
- // If less than target_n_recs are moved, it means there are
- // compression failures during page_copy_rec_list_start. Adjust
- // the max_data_size estimation to reduce compression failures
- // in the following runs.
- if (target_n_recs > n_recs_to_move
- && *max_data_size > new_data_size + move_size) {
- *max_data_size = new_data_size + move_size;
- }
- // Set ibuf free bits if necessary.
- if (!dict_index_is_clust(index)
- && page_is_leaf(to_page)) {
- if (zip_size) {
- ibuf_reset_free_bits(to_block);
- } else {
- ibuf_update_free_bits_if_full(
- to_block,
- UNIV_PAGE_SIZE,
- ULINT_UNDEFINED);
- }
- }
- if (n_recs_to_move == n_recs) {
- /* The whole page is merged with the previous page,
- free it. */
- lock_update_merge_left(to_block, orig_pred,
- from_block);
- btr_search_drop_page_hash_index(from_block);
- btr_level_list_remove(space, zip_size, from_page,
- index, mtr);
- btr_node_ptr_delete(index, from_block, mtr);
- btr_blob_dbg_remove(from_page, index,
- "btr_defragment_n_pages");
- btr_page_free(index, from_block, mtr);
- } else {
- // There are still records left on the page, so
- // increment n_defragmented. Node pointer will be changed
- // so remove the old node pointer.
- if (n_recs_to_move > 0) {
- // Part of the page is merged to left, remove
- // the merged records, update record locks and
- // node pointer.
- dtuple_t* node_ptr;
- page_delete_rec_list_start(rec, from_block,
- index, mtr);
- lock_update_split_and_merge(to_block,
- orig_pred,
- from_block);
- btr_node_ptr_delete(index, from_block, mtr);
- rec = page_rec_get_next(
- page_get_infimum_rec(from_page));
- node_ptr = dict_index_build_node_ptr(
- index, rec, page_get_page_no(from_page),
- heap, level + 1);
- btr_insert_on_non_leaf_level(0, index, level+1,
- node_ptr, mtr);
- }
- to_block = from_block;
- }
- return to_block;
-}
-
-/*************************************************************//**
-Tries to merge N consecutive pages, starting from the page pointed by the
-cursor. Skip space 0. Only consider leaf pages.
-This function first loads all N pages into memory, then for each of
-the pages other than the first page, it tries to move as many records
-as possible to the left sibling to keep the left sibling full. During
-the process, if any page becomes empty, that page will be removed from
-the level list. Record locks, hash, and node pointers are updated after
-page reorganization.
-@return pointer to the last block processed, or NULL if reaching end of index */
-UNIV_INTERN
-buf_block_t*
-btr_defragment_n_pages(
- buf_block_t* block, /*!< in: starting block for defragmentation */
- dict_index_t* index, /*!< in: index tree */
- uint n_pages,/*!< in: number of pages to defragment */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint space;
- ulint zip_size;
- /* We will need to load the n+1 block because if the last page is freed
- and we need to modify the prev_page_no of that block. */
- buf_block_t* blocks[BTR_DEFRAGMENT_MAX_N_PAGES + 1];
- page_t* first_page;
- buf_block_t* current_block;
- ulint total_data_size = 0;
- ulint total_n_recs = 0;
- ulint data_size_per_rec;
- ulint optimal_page_size;
- ulint reserved_space;
- ulint level;
- ulint max_data_size = 0;
- uint n_defragmented = 0;
- uint n_new_slots;
- mem_heap_t* heap;
- ibool end_of_index = FALSE;
-
- /* It doesn't make sense to call this function with n_pages = 1. */
- ut_ad(n_pages > 1);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_X_LOCK));
- space = dict_index_get_space(index);
- if (space == 0) {
- /* Ignore space 0. */
- return NULL;
- }
-
- if (n_pages > BTR_DEFRAGMENT_MAX_N_PAGES) {
- n_pages = BTR_DEFRAGMENT_MAX_N_PAGES;
- }
-
- zip_size = dict_table_zip_size(index->table);
- first_page = buf_block_get_frame(block);
- level = btr_page_get_level(first_page, mtr);
-
- if (level != 0) {
- return NULL;
- }
-
- /* 1. Load the pages and calculate the total data size. */
- blocks[0] = block;
- for (uint i = 1; i <= n_pages; i++) {
- page_t* page = buf_block_get_frame(blocks[i-1]);
- ulint page_no = btr_page_get_next(page, mtr);
- total_data_size += page_get_data_size(page);
- total_n_recs += page_get_n_recs(page);
- if (page_no == FIL_NULL) {
- n_pages = i;
- end_of_index = TRUE;
- break;
- }
- blocks[i] = btr_block_get(space, zip_size, page_no,
- RW_X_LATCH, index, mtr);
- }
-
- if (n_pages == 1) {
- if (btr_page_get_prev(first_page, mtr) == FIL_NULL) {
- /* last page in the index */
- if (dict_index_get_page(index)
- == page_get_page_no(first_page))
- return NULL;
- /* given page is the last page.
- Lift the records to father. */
- btr_lift_page_up(index, block, mtr);
- }
- return NULL;
- }
-
- /* 2. Calculate how many pages data can fit in. If not compressable,
- return early. */
- ut_a(total_n_recs != 0);
- data_size_per_rec = total_data_size / total_n_recs;
- // For uncompressed pages, the optimal data size if the free space of a
- // empty page.
- optimal_page_size = page_get_free_space_of_empty(
- page_is_comp(first_page));
- // For compressed pages, we take compression failures into account.
- if (zip_size) {
- ulint size = 0;
- int i = 0;
- // We estimate the optimal data size of the index use samples of
- // data size. These samples are taken when pages failed to
- // compress due to insertion on the page. We use the average
- // of all samples we have as the estimation. Different pages of
- // the same index vary in compressibility. Average gives a good
- // enough estimation.
- for (;i < STAT_DEFRAG_DATA_SIZE_N_SAMPLE; i++) {
- if (index->stat_defrag_data_size_sample[i] == 0) {
- break;
- }
- size += index->stat_defrag_data_size_sample[i];
- }
- if (i != 0) {
- size = size / i;
- optimal_page_size = min(optimal_page_size, size);
- }
- max_data_size = optimal_page_size;
- }
-
- reserved_space = min((ulint)(optimal_page_size
- * (1 - srv_defragment_fill_factor)),
- (data_size_per_rec
- * srv_defragment_fill_factor_n_recs));
- optimal_page_size -= reserved_space;
- n_new_slots = (total_data_size + optimal_page_size - 1)
- / optimal_page_size;
- if (n_new_slots >= n_pages) {
- /* Can't defragment. */
- if (end_of_index)
- return NULL;
- return blocks[n_pages-1];
- }
-
- /* 3. Defragment pages. */
- heap = mem_heap_create(256);
- // First defragmented page will be the first page.
- current_block = blocks[0];
- // Start from the second page.
- for (uint i = 1; i < n_pages; i ++) {
- buf_block_t* new_block = btr_defragment_merge_pages(
- index, blocks[i], current_block, zip_size,
- reserved_space, &max_data_size, heap, mtr);
- if (new_block != current_block) {
- n_defragmented ++;
- current_block = new_block;
- }
- }
- mem_heap_free(heap);
- n_defragmented ++;
- os_atomic_increment_ulint(
- &btr_defragment_count, 1);
- if (n_pages == n_defragmented) {
- os_atomic_increment_ulint(
- &btr_defragment_failures, 1);
- } else {
- index->stat_defrag_n_pages_freed += (n_pages - n_defragmented);
- }
- if (end_of_index)
- return NULL;
- return current_block;
-}
-
-/** Whether btr_defragment_thread is active */
-bool btr_defragment_thread_active;
-
-/** Merge consecutive b-tree pages into fewer pages to defragment indexes */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(btr_defragment_thread)(void*)
-{
- btr_pcur_t* pcur;
- btr_cur_t* cursor;
- dict_index_t* index;
- mtr_t mtr;
- buf_block_t* first_block;
- buf_block_t* last_block;
-
- while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
- ut_ad(btr_defragment_thread_active);
-
- /* If defragmentation is disabled, sleep before
- checking whether it's enabled. */
- if (!srv_defragment) {
- os_thread_sleep(BTR_DEFRAGMENT_SLEEP_IN_USECS);
- continue;
- }
- /* The following call won't remove the item from work queue.
- We only get a pointer to it to work on. This will make sure
- when user issue a kill command, all indices are in the work
- queue to be searched. This also means that the user thread
- cannot directly remove the item from queue (since we might be
- using it). So user thread only marks index as removed. */
- btr_defragment_item_t* item = btr_defragment_get_item();
- /* If work queue is empty, sleep and check later. */
- if (!item) {
- os_thread_sleep(BTR_DEFRAGMENT_SLEEP_IN_USECS);
- continue;
- }
- /* If an index is marked as removed, we remove it from the work
- queue. No other thread could be using this item at this point so
- it's safe to remove now. */
- if (item->removed) {
- btr_defragment_remove_item(item);
- continue;
- }
-
- pcur = item->pcur;
- ulonglong now = ut_timer_now();
- ulonglong elapsed = now - item->last_processed;
-
- if (elapsed < srv_defragment_interval) {
- /* If we see an index again before the interval
- determined by the configured frequency is reached,
- we just sleep until the interval pass. Since
- defragmentation of all indices queue up on a single
- thread, it's likely other indices that follow this one
- don't need to sleep again. */
- os_thread_sleep(((ulint)ut_timer_to_microseconds(
- srv_defragment_interval - elapsed)));
- }
-
- now = ut_timer_now();
- mtr_start(&mtr);
- btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, &mtr);
- cursor = btr_pcur_get_btr_cur(pcur);
- index = btr_cur_get_index(cursor);
- first_block = btr_cur_get_block(cursor);
- last_block = btr_defragment_n_pages(first_block, index,
- srv_defragment_n_pages,
- &mtr);
- if (last_block) {
- /* If we haven't reached the end of the index,
- place the cursor on the last record of last page,
- store the cursor position, and put back in queue. */
- page_t* last_page = buf_block_get_frame(last_block);
- rec_t* rec = page_rec_get_prev(
- page_get_supremum_rec(last_page));
- ut_a(page_rec_is_user_rec(rec));
- page_cur_position(rec, last_block,
- btr_cur_get_page_cur(cursor));
- btr_pcur_store_position(pcur, &mtr);
- mtr_commit(&mtr);
- /* Update the last_processed time of this index. */
- item->last_processed = now;
- } else {
- mtr_commit(&mtr);
- /* Reaching the end of the index. */
- dict_stats_empty_defrag_stats(index);
- dict_stats_save_defrag_stats(index);
- dict_stats_save_defrag_summary(index);
- btr_defragment_remove_item(item);
- }
- }
-
- btr_defragment_thread_active = false;
- os_thread_exit(NULL);
- OS_THREAD_DUMMY_RETURN;
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/btr/btr0pcur.cc b/storage/xtradb/btr/btr0pcur.cc
deleted file mode 100644
index 0b970e1cf49..00000000000
--- a/storage/xtradb/btr/btr0pcur.cc
+++ /dev/null
@@ -1,620 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file btr/btr0pcur.cc
-The index tree persistent cursor
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-#include "btr0pcur.h"
-
-#ifdef UNIV_NONINL
-#include "btr0pcur.ic"
-#endif
-
-#include "ut0byte.h"
-#include "rem0cmp.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-/**************************************************************//**
-Allocates memory for a persistent cursor object and initializes the cursor.
-@return own: persistent cursor */
-UNIV_INTERN
-btr_pcur_t*
-btr_pcur_create_for_mysql(void)
-/*============================*/
-{
- btr_pcur_t* pcur;
-
- pcur = (btr_pcur_t*) mem_alloc(sizeof(btr_pcur_t));
-
- pcur->btr_cur.index = NULL;
- btr_pcur_init(pcur);
- pcur->btr_cur.tree_height = ULINT_UNDEFINED;
-
- return(pcur);
-}
-
-/**************************************************************//**
-Resets a persistent cursor object, freeing ::old_rec_buf if it is
-allocated and resetting the other members to their initial values. */
-UNIV_INTERN
-void
-btr_pcur_reset(
-/*===========*/
- btr_pcur_t* cursor) /*!< in, out: persistent cursor */
-{
- if (cursor->old_rec_buf != NULL) {
-
- mem_free(cursor->old_rec_buf);
-
- cursor->old_rec_buf = NULL;
- }
-
- cursor->btr_cur.index = NULL;
- cursor->btr_cur.page_cur.rec = NULL;
- cursor->old_rec = NULL;
- cursor->old_n_fields = 0;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->latch_mode = BTR_NO_LATCHES;
- cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
-}
-
-/**************************************************************//**
-Frees the memory for a persistent cursor object. */
-UNIV_INTERN
-void
-btr_pcur_free_for_mysql(
-/*====================*/
- btr_pcur_t* cursor) /*!< in, own: persistent cursor */
-{
- btr_pcur_reset(cursor);
- mem_free(cursor);
-}
-
-/**************************************************************//**
-The position of the cursor is stored by taking an initial segment of the
-record the cursor is positioned on, before, or after, and copying it to the
-cursor data structure, or just setting a flag if the cursor id before the
-first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
-page where the cursor is positioned must not be empty if the index tree is
-not totally empty! */
-UNIV_INTERN
-void
-btr_pcur_store_position(
-/*====================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t* page_cursor;
- buf_block_t* block;
- rec_t* rec;
- dict_index_t* index;
- page_t* page;
- ulint offs;
-
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- block = btr_pcur_get_block(cursor);
-
- SRV_CORRUPT_TABLE_CHECK(block, return;);
-
- index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
-
- page_cursor = btr_pcur_get_page_cur(cursor);
-
- rec = page_cur_get_rec(page_cursor);
- page = page_align(rec);
- offs = page_offset(rec);
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- if (page_is_empty(page)) {
- /* It must be an empty index tree; NOTE that in this case
- we do not store the modify_clock, but always do a search
- if we restore the cursor position */
-
- ut_a(btr_page_get_next(page, mtr) == FIL_NULL);
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
- ut_ad(page_is_leaf(page));
- ut_ad(page_get_page_no(page) == index->page);
-
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- if (page_rec_is_supremum_low(offs)) {
-
- cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE;
- } else {
- cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE;
- }
-
- return;
- }
-
- if (page_rec_is_supremum_low(offs)) {
-
- rec = page_rec_get_prev(rec);
-
- cursor->rel_pos = BTR_PCUR_AFTER;
-
- } else if (page_rec_is_infimum_low(offs)) {
-
- rec = page_rec_get_next(rec);
-
- cursor->rel_pos = BTR_PCUR_BEFORE;
- } else {
- cursor->rel_pos = BTR_PCUR_ON;
- }
-
- cursor->old_stored = BTR_PCUR_OLD_STORED;
- cursor->old_rec = dict_index_copy_rec_order_prefix(
- index, rec, &cursor->old_n_fields,
- &cursor->old_rec_buf, &cursor->buf_size);
-
- cursor->block_when_stored = block;
- cursor->modify_clock = buf_block_get_modify_clock(block);
-}
-
-/**************************************************************//**
-Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
-void
-btr_pcur_copy_stored_position(
-/*==========================*/
- btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the
- position info */
- btr_pcur_t* pcur_donate) /*!< in: pcur from which the info is
- copied */
-{
- if (pcur_receive->old_rec_buf) {
- mem_free(pcur_receive->old_rec_buf);
- }
-
- ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t));
-
- if (pcur_donate->old_rec_buf) {
-
- pcur_receive->old_rec_buf = (byte*)
- mem_alloc(pcur_donate->buf_size);
-
- ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf,
- pcur_donate->buf_size);
- pcur_receive->old_rec = pcur_receive->old_rec_buf
- + (pcur_donate->old_rec - pcur_donate->old_rec_buf);
- }
-
- pcur_receive->old_n_fields = pcur_donate->old_n_fields;
-}
-
-/**************************************************************//**
-Restores the stored position of a persistent cursor bufferfixing the page and
-obtaining the specified latches. If the cursor position was saved when the
-(1) cursor was positioned on a user record: this function restores the position
-to the last record LESS OR EQUAL to the stored record;
-(2) cursor was positioned on a page infimum record: restores the position to
-the last record LESS than the user record which was the successor of the page
-infimum;
-(3) cursor was positioned on the page supremum: restores to the first record
-GREATER than the user record which was the predecessor of the supremum.
-(4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree.
-@return TRUE if the cursor position was stored when it was on a user
-record and it can be restored on a user record whose ordering fields
-are identical to the ones of the original user record */
-UNIV_INTERN
-ibool
-btr_pcur_restore_position_func(
-/*===========================*/
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: detached persistent cursor */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- dtuple_t* tuple;
- ulint mode;
- ulint old_mode;
- mem_heap_t* heap;
-
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
- ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
- || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
-
- if (UNIV_UNLIKELY
- (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE
- || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) {
-
- /* In these cases we do not try an optimistic restoration,
- but always do a search */
-
- btr_cur_open_at_index_side(
- cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE,
- index, latch_mode,
- btr_pcur_get_btr_cur(cursor), 0, mtr);
-
- cursor->latch_mode = latch_mode;
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
- cursor->block_when_stored = btr_pcur_get_block(cursor);
-
- return(FALSE);
- }
-
- ut_a(cursor->old_rec);
- ut_a(cursor->old_n_fields);
-
- if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF)
- || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) {
- /* Try optimistic restoration. */
-
- if (buf_page_optimistic_get(latch_mode,
- cursor->block_when_stored,
- cursor->modify_clock,
- file, line, mtr)) {
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
- cursor->latch_mode = latch_mode;
-
- buf_block_dbg_add_level(
- btr_pcur_get_block(cursor),
- dict_index_is_ibuf(index)
- ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
-
- if (cursor->rel_pos == BTR_PCUR_ON) {
-#ifdef UNIV_DEBUG
- const rec_t* rec;
- const ulint* offsets1;
- const ulint* offsets2;
- rec = btr_pcur_get_rec(cursor);
-
- heap = mem_heap_create(256);
- offsets1 = rec_get_offsets(
- cursor->old_rec, index, NULL,
- cursor->old_n_fields, &heap);
- offsets2 = rec_get_offsets(
- rec, index, NULL,
- cursor->old_n_fields, &heap);
-
- ut_ad(!cmp_rec_rec(cursor->old_rec,
- rec, offsets1, offsets2,
- index));
- mem_heap_free(heap);
-#endif /* UNIV_DEBUG */
- return(TRUE);
- }
- /* This is the same record as stored,
- may need to be adjusted for BTR_PCUR_BEFORE/AFTER,
- depending on search mode and direction. */
- if (btr_pcur_is_on_user_rec(cursor)) {
- cursor->pos_state
- = BTR_PCUR_IS_POSITIONED_OPTIMISTIC;
- }
- return(FALSE);
- }
- }
-
- /* If optimistic restoration did not succeed, open the cursor anew */
-
- heap = mem_heap_create(256);
-
- tuple = dict_index_build_data_tuple(index, cursor->old_rec,
- cursor->old_n_fields, heap);
-
- /* Save the old search mode of the cursor */
- old_mode = cursor->search_mode;
-
- switch (cursor->rel_pos) {
- case BTR_PCUR_ON:
- mode = PAGE_CUR_LE;
- break;
- case BTR_PCUR_AFTER:
- mode = PAGE_CUR_G;
- break;
- case BTR_PCUR_BEFORE:
- mode = PAGE_CUR_L;
- break;
- default:
- ut_error;
- mode = 0;
- }
-
- btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
- cursor, 0, file, line, mtr);
-
- /* Restore the old search mode */
- cursor->search_mode = old_mode;
-
- switch (cursor->rel_pos) {
- case BTR_PCUR_ON:
- if (btr_pcur_is_on_user_rec(cursor)
- && !cmp_dtuple_rec(
- tuple, btr_pcur_get_rec(cursor),
- rec_get_offsets(btr_pcur_get_rec(cursor),
- index, NULL,
- ULINT_UNDEFINED, &heap))) {
-
- /* We have to store the NEW value for
- the modify clock, since the cursor can
- now be on a different page! But we can
- retain the value of old_rec */
-
- cursor->block_when_stored =
- btr_pcur_get_block(cursor);
- cursor->modify_clock =
- buf_block_get_modify_clock(
- cursor->block_when_stored);
- cursor->old_stored = BTR_PCUR_OLD_STORED;
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
-#ifdef UNIV_DEBUG
- /* fall through */
- case BTR_PCUR_BEFORE:
- case BTR_PCUR_AFTER:
- break;
- default:
- ut_error;
-#endif /* UNIV_DEBUG */
- }
-
- mem_heap_free(heap);
-
- /* We have to store new position information, modify_clock etc.,
- to the cursor because it can now be on a different page, the record
- under it may have been removed, etc. */
-
- btr_pcur_store_position(cursor, mtr);
-
- return(FALSE);
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the first record on the next page. Releases the
-latch on the current page, and bufferunfixes it. Note that there must not be
-modifications on the current page, as then the x-latch can be released only in
-mtr_commit. */
-UNIV_INTERN
-void
-btr_pcur_move_to_next_page(
-/*=======================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
- last record of the current page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint next_page_no;
- ulint space;
- ulint zip_size;
- page_t* page;
- buf_block_t* next_block;
- page_t* next_page;
-
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- ut_ad(btr_pcur_is_after_last_on_page(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- page = btr_pcur_get_page(cursor);
-
- if (UNIV_UNLIKELY(!page)) {
- return;
- }
-
- next_page_no = btr_page_get_next(page, mtr);
- space = buf_block_get_space(btr_pcur_get_block(cursor));
- zip_size = buf_block_get_zip_size(btr_pcur_get_block(cursor));
-
- ut_ad(next_page_no != FIL_NULL);
-
- next_block = btr_block_get(space, zip_size, next_page_no,
- cursor->latch_mode,
- btr_pcur_get_btr_cur(cursor)->index, mtr);
-
- if (UNIV_UNLIKELY(!next_block)) {
- return;
- }
-
- next_page = buf_block_get_frame(next_block);
-
- SRV_CORRUPT_TABLE_CHECK(next_page,
- {
- btr_leaf_page_release(btr_pcur_get_block(cursor),
- cursor->latch_mode, mtr);
- btr_pcur_get_page_cur(cursor)->block = 0;
- btr_pcur_get_page_cur(cursor)->rec = 0;
-
- return;
- });
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(page_is_comp(next_page) == page_is_comp(page));
- ut_a(btr_page_get_prev(next_page, mtr)
- == buf_block_get_page_no(btr_pcur_get_block(cursor)));
-#endif /* UNIV_BTR_DEBUG */
- next_block->check_index_page_at_flush = TRUE;
-
- btr_leaf_page_release(btr_pcur_get_block(cursor),
- cursor->latch_mode, mtr);
-
- page_cur_set_before_first(next_block, btr_pcur_get_page_cur(cursor));
-
- page_check_dir(next_page);
-}
-
-/*********************************************************//**
-Moves the persistent cursor backward if it is on the first record of the page.
-Commits mtr. Note that to prevent a possible deadlock, the operation
-first stores the position of the cursor, commits mtr, acquires the necessary
-latches and restores the cursor position again before returning. The
-alphabetical position of the cursor is guaranteed to be sensible on
-return, but it may happen that the cursor is not positioned on the last
-record of any page, because the structure of the tree may have changed
-during the time when the cursor had no latches. */
-UNIV_INTERN
-void
-btr_pcur_move_backward_from_page(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the first
- record of the current page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint prev_page_no;
- page_t* page;
- buf_block_t* prev_block;
- ulint latch_mode;
- ulint latch_mode2;
-
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- ut_ad(btr_pcur_is_before_first_on_page(cursor));
- ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr));
-
- latch_mode = cursor->latch_mode;
-
- if (latch_mode == BTR_SEARCH_LEAF) {
-
- latch_mode2 = BTR_SEARCH_PREV;
-
- } else if (latch_mode == BTR_MODIFY_LEAF) {
-
- latch_mode2 = BTR_MODIFY_PREV;
- } else {
- latch_mode2 = 0; /* To eliminate compiler warning */
- ut_error;
- }
-
- btr_pcur_store_position(cursor, mtr);
-
- mtr_commit(mtr);
-
- mtr_start_trx(mtr, mtr->trx);
-
- btr_pcur_restore_position(latch_mode2, cursor, mtr);
-
- page = btr_pcur_get_page(cursor);
-
- prev_page_no = btr_page_get_prev(page, mtr);
-
- if (prev_page_no == FIL_NULL) {
- } else if (btr_pcur_is_before_first_on_page(cursor)) {
-
- prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
-
- btr_leaf_page_release(btr_pcur_get_block(cursor),
- latch_mode, mtr);
-
- page_cur_set_after_last(prev_block,
- btr_pcur_get_page_cur(cursor));
- } else {
-
- /* The repositioned cursor did not end on an infimum record on
- a page. Cursor repositioning acquired a latch also on the
- previous page, but we do not need the latch: release it. */
-
- prev_block = btr_pcur_get_btr_cur(cursor)->left_block;
-
- btr_leaf_page_release(prev_block, latch_mode, mtr);
- }
-
- cursor->latch_mode = latch_mode;
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'.
-@return TRUE if the cursor was not before first in tree */
-UNIV_INTERN
-ibool
-btr_pcur_move_to_prev(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- if (btr_pcur_is_before_first_on_page(cursor)) {
-
- if (btr_pcur_is_before_first_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_backward_from_page(cursor, mtr);
-
- return(TRUE);
- }
-
- btr_pcur_move_to_prev_on_page(cursor);
-
- return(TRUE);
-}
-
-/**************************************************************//**
-If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
-user record satisfying the search condition, in the case PAGE_CUR_L or
-PAGE_CUR_LE, on the last user record. If no such user record exists, then
-in the first case sets the cursor after last in tree, and in the latter case
-before first in tree. The latching mode must be BTR_SEARCH_LEAF or
-BTR_MODIFY_LEAF. */
-UNIV_INTERN
-void
-btr_pcur_open_on_user_rec_func(
-/*===========================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent
- cursor */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_pcur_open_low(index, 0, tuple, mode, latch_mode, cursor,
- file, line, mtr);
-
- if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
-
- if (btr_pcur_is_after_last_on_page(cursor)) {
-
- btr_pcur_move_to_next_user_rec(cursor, mtr);
- }
- } else {
- ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L));
-
- /* Not implemented yet */
-
- ut_error;
- }
-}
diff --git a/storage/xtradb/btr/btr0scrub.cc b/storage/xtradb/btr/btr0scrub.cc
deleted file mode 100644
index 24c84ed301b..00000000000
--- a/storage/xtradb/btr/btr0scrub.cc
+++ /dev/null
@@ -1,931 +0,0 @@
-// Copyright (c) 2014, Google Inc.
-
-/**************************************************//**
-@file btr/btr0scrub.cc
-Scrubbing of btree pages
-
-*******************************************************/
-
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "btr0scrub.h"
-#include "ibuf0ibuf.h"
-#include "fsp0fsp.h"
-#include "dict0dict.h"
-#include "mtr0mtr.h"
-
-/* used when trying to acquire dict-lock */
-UNIV_INTERN bool fil_crypt_is_closing(ulint space);
-
-/**
-* scrub data at delete time (e.g purge thread)
-*/
-my_bool srv_immediate_scrub_data_uncompressed = false;
-
-/**
-* background scrub uncompressed data
-*
-* if srv_immediate_scrub_data_uncompressed is enabled
-* this is only needed to handle "old" data
-*/
-my_bool srv_background_scrub_data_uncompressed = false;
-
-/**
-* backgrounds scrub compressed data
-*
-* reorganize compressed page for scrubbing
-* (only way to scrub compressed data)
-*/
-my_bool srv_background_scrub_data_compressed = false;
-
-/* check spaces once per hour */
-UNIV_INTERN uint srv_background_scrub_data_check_interval = (60 * 60);
-
-/* default to scrub spaces that hasn't been scrubbed in a week */
-UNIV_INTERN uint srv_background_scrub_data_interval = (7 * 24 * 60 * 60);
-
-/**
-* statistics for scrubbing by background threads
-*/
-static btr_scrub_stat_t scrub_stat;
-static ib_mutex_t scrub_stat_mutex;
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t scrub_stat_mutex_key;
-#endif
-
-#ifdef UNIV_DEBUG
-/**
-* srv_scrub_force_testing
-*
-* - force scrubbing using background threads even for uncompressed tables
-* - force pessimistic scrubbing (page split) even if not needed
-* (see test_pessimistic_scrub_pct)
-*/
-my_bool srv_scrub_force_testing = true;
-
-/**
-* Force pessimistic scrubbing in 50% of the cases (UNIV_DEBUG only)
-*/
-static int test_pessimistic_scrub_pct = 50;
-
-#endif
-static uint scrub_compression_level = page_zip_level;
-
-/**************************************************************//**
-Log a scrubbing failure */
-static
-void
-log_scrub_failure(
-/*===============*/
- btr_scrub_t* scrub_data, /*!< in: data to store statistics on */
- buf_block_t* block, /*!< in: block */
- dberr_t err) /*!< in: error */
-{
- const char* reason = "unknown";
- switch(err) {
- case DB_UNDERFLOW:
- reason = "too few records on page";
- scrub_data->scrub_stat.page_split_failures_underflow++;
- break;
- case DB_INDEX_CORRUPT:
- reason = "unable to find index!";
- scrub_data->scrub_stat.page_split_failures_missing_index++;
- break;
- case DB_OUT_OF_FILE_SPACE:
- reason = "out of filespace";
- scrub_data->scrub_stat.page_split_failures_out_of_filespace++;
- break;
- default:
- ut_ad(0);
- reason = "unknown";
- scrub_data->scrub_stat.page_split_failures_unknown++;
- }
- fprintf(stderr,
- "InnoDB: Warning: Failed to scrub page %lu in space %lu : %s\n",
- buf_block_get_page_no(block),
- buf_block_get_space(block),
- reason);
-}
-
-/****************************************************************
-Lock dict mutexes */
-static
-bool
-btr_scrub_lock_dict_func(ulint space_id, bool lock_to_close_table,
- const char * file, uint line)
-{
- time_t start = time(0);
- time_t last = start;
-
- /* FIXME: this is not the proper way of doing things. The
- dict_sys->mutex should not be held by any thread for longer
- than a few microseconds. It must not be held during I/O,
- for example. So, what is the purpose for this busy-waiting?
- This function should be rewritten as part of MDEV-8139:
- Fix scrubbing tests. */
-
- while (mutex_enter_nowait_func(&(dict_sys->mutex), file, line)) {
- /* if we lock to close a table, we wait forever
- * if we don't lock to close a table, we check if space
- * is closing, and then instead give up
- */
- if (lock_to_close_table) {
- } else if (fil_space_t* space = fil_space_acquire(space_id)) {
- bool stopping = space->is_stopping();
- fil_space_release(space);
- if (stopping) {
- return false;
- }
- } else {
- return false;
- }
-
- os_thread_sleep(250000);
-
- time_t now = time(0);
- if (now >= last + 30) {
- fprintf(stderr,
- "WARNING: %s:%u waited %ld seconds for"
- " dict_sys lock, space: %lu"
- " lock_to_close_table: %d\n",
- file, line, now - start, space_id,
- lock_to_close_table);
-
- last = now;
- }
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
- return true;
-}
-
-#define btr_scrub_lock_dict(space, lock_to_close_table) \
- btr_scrub_lock_dict_func(space, lock_to_close_table, __FILE__, __LINE__)
-
-/****************************************************************
-Unlock dict mutexes */
-static
-void
-btr_scrub_unlock_dict()
-{
- dict_mutex_exit_for_mysql();
-}
-
-/****************************************************************
-Release reference to table
-*/
-static
-void
-btr_scrub_table_close(
-/*==================*/
- dict_table_t* table) /*!< in: table */
-{
- bool dict_locked = true;
- bool try_drop = false;
- table->stats_bg_flag &= ~BG_SCRUB_IN_PROGRESS;
- dict_table_close(table, dict_locked, try_drop);
-}
-
-/****************************************************************
-Release reference to table
-*/
-static
-void
-btr_scrub_table_close_for_thread(
- btr_scrub_t *scrub_data)
-{
- if (scrub_data->current_table == NULL) {
- return;
- }
-
- if (fil_space_t* space = fil_space_acquire(scrub_data->space)) {
- /* If tablespace is not marked as stopping perform
- the actual close. */
- if (!space->is_stopping()) {
- mutex_enter(&dict_sys->mutex);
- /* perform the actual closing */
- btr_scrub_table_close(scrub_data->current_table);
- mutex_exit(&dict_sys->mutex);
- }
- fil_space_release(space);
- }
-
- scrub_data->current_table = NULL;
- scrub_data->current_index = NULL;
-}
-
-/**************************************************************//**
-Check if scrubbing is turned ON or OFF */
-static
-bool
-check_scrub_setting(
-/*=====================*/
- btr_scrub_t* scrub_data) /*!< in: scrub data */
-{
- if (scrub_data->compressed)
- return srv_background_scrub_data_compressed;
- else
- return srv_background_scrub_data_uncompressed;
-}
-
-#define IBUF_INDEX_ID (DICT_IBUF_ID_MIN + IBUF_SPACE_ID)
-
-/**************************************************************//**
-Check if a page needs scrubbing */
-UNIV_INTERN
-int
-btr_page_needs_scrubbing(
-/*=====================*/
- btr_scrub_t* scrub_data, /*!< in: scrub data */
- buf_block_t* block, /*!< in: block to check, latched */
- btr_scrub_page_allocation_status_t allocated) /*!< in: is block known
- to be allocated */
-{
- /**
- * Check if scrubbing has been turned OFF.
- *
- * at start of space, we check if scrubbing is ON or OFF
- * here we only check if scrubbing is turned OFF.
- *
- * Motivation is that it's only valueable to have a full table (space)
- * scrubbed.
- */
- if (!check_scrub_setting(scrub_data)) {
- bool before_value = scrub_data->scrubbing;
- scrub_data->scrubbing = false;
-
- if (before_value == true) {
- /* we toggle scrubbing from on to off */
- return BTR_SCRUB_TURNED_OFF;
- }
- }
-
- if (scrub_data->scrubbing == false) {
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- page_t* page = buf_block_get_frame(block);
- uint type = fil_page_get_type(page);
-
- if (allocated == BTR_SCRUB_PAGE_ALLOCATED) {
- if (type != FIL_PAGE_INDEX) {
- /* this function is called from fil-crypt-threads.
- * these threads iterate all pages of all tablespaces
- * and don't know about fil_page_type.
- * But scrubbing is only needed for index-pages. */
-
- /**
- * NOTE: scrubbing is also needed for UNDO pages,
- * but they are scrubbed at purge-time, since they are
- * uncompressed
- */
-
- /* if encountering page type not needing scrubbing
- release reference to table object */
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- if (page_has_garbage(page) == false) {
- /* no garbage (from deleted/shrunken records) */
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- } else if (allocated == BTR_SCRUB_PAGE_FREE ||
- allocated == BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN) {
-
- if (! (type == FIL_PAGE_INDEX ||
- type == FIL_PAGE_TYPE_BLOB ||
- type == FIL_PAGE_TYPE_ZBLOB ||
- type == FIL_PAGE_TYPE_ZBLOB2)) {
-
- /**
- * If this is a dropped page, we also need to scrub
- * BLOB pages
- */
-
- /* if encountering page type not needing scrubbing
- release reference to table object */
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
- }
-
- if (btr_page_get_index_id(page) == IBUF_INDEX_ID) {
- /* skip ibuf */
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- return BTR_SCRUB_PAGE;
-}
-
-/****************************************************************
-Handle a skipped page
-*/
-UNIV_INTERN
-void
-btr_scrub_skip_page(
-/*==================*/
- btr_scrub_t* scrub_data, /*!< in: data with scrub state */
- int needs_scrubbing) /*!< in: return code from
- btr_page_needs_scrubbing */
-{
- switch(needs_scrubbing) {
- case BTR_SCRUB_SKIP_PAGE:
- /* nothing todo */
- return;
- case BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE:
- btr_scrub_table_close_for_thread(scrub_data);
- return;
- case BTR_SCRUB_TURNED_OFF:
- case BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE:
- btr_scrub_complete_space(scrub_data);
- return;
- }
-
- /* unknown value. should not happen */
- ut_a(0);
-}
-
-/****************************************************************
-Try to scrub a page using btr_page_reorganize_low
-return DB_SUCCESS on success or DB_OVERFLOW on failure */
-static
-dberr_t
-btr_optimistic_scrub(
-/*==================*/
- btr_scrub_t* scrub_data, /*!< in: data with scrub state */
- buf_block_t* block, /*!< in: block to scrub */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr) /*!< in: mtr */
-{
-#ifdef UNIV_DEBUG
- if (srv_scrub_force_testing &&
- page_get_n_recs(buf_block_get_frame(block)) > 2 &&
- (rand() % 100) < test_pessimistic_scrub_pct) {
-
- fprintf(stderr,
- "scrub: simulate btr_page_reorganize failed %lu:%lu "
- " table: %llu:%s index: %llu:%s get_n_recs(): %lu\n",
- buf_block_get_space(block),
- buf_block_get_page_no(block),
- (ulonglong)scrub_data->current_table->id,
- scrub_data->current_table->name,
- (ulonglong)scrub_data->current_index->id,
- scrub_data->current_index->name,
- page_get_n_recs(buf_block_get_frame(block)));
- return DB_OVERFLOW;
- }
-#endif
-
- page_cur_t cur;
- page_cur_set_before_first(block, &cur);
- bool recovery = false;
- if (!btr_page_reorganize_low(recovery, scrub_compression_level,
- &cur, index, mtr)) {
- return DB_OVERFLOW;
- }
-
- /* We play safe and reset the free bits */
- if (!dict_index_is_clust(index) &&
- block != NULL) {
- buf_frame_t* frame = buf_block_get_frame(block);
- if (frame &&
- page_is_leaf(frame)) {
-
- ibuf_reset_free_bits(block);
- }
- }
-
- scrub_data->scrub_stat.page_reorganizations++;
-
- return DB_SUCCESS;
-}
-
-/****************************************************************
-Try to scrub a page by splitting it
-return DB_SUCCESS on success
-DB_UNDERFLOW if page has too few records
-DB_OUT_OF_FILE_SPACE if we can't find space for split */
-static
-dberr_t
-btr_pessimistic_scrub(
-/*==================*/
- btr_scrub_t* scrub_data, /*!< in: data with scrub state */
- buf_block_t* block, /*!< in: block to scrub */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page = buf_block_get_frame(block);
- if (page_get_n_recs(page) < 2) {
- /**
- * There is no way we can split a page with < 2 records
- */
- log_scrub_failure(scrub_data, block, DB_UNDERFLOW);
- return DB_UNDERFLOW;
- }
-
- /**
- * Splitting page needs new space, allocate it here
- * so that splitting won't fail due to this */
- ulint n_extents = 3;
- ulint n_reserved = 0;
- if (!fsp_reserve_free_extents(&n_reserved, index->space,
- n_extents, FSP_NORMAL, mtr)) {
- log_scrub_failure(scrub_data, block,
- DB_OUT_OF_FILE_SPACE);
- return DB_OUT_OF_FILE_SPACE;
- }
-
- /* read block variables */
- ulint space = buf_block_get_space(block);
- ulint page_no = buf_block_get_page_no(block);
- ulint zip_size = buf_block_get_zip_size(block);
- ulint left_page_no = btr_page_get_prev(page, mtr);
- ulint right_page_no = btr_page_get_next(page, mtr);
-
- /**
- * When splitting page, we need X-latches on left/right brothers
- * see e.g btr_cur_latch_leaves
- */
-
- if (left_page_no != FIL_NULL) {
- /**
- * pages needs to be locked left-to-right, release block
- * and re-lock. We still have x-lock on index
- * so this should be safe
- */
- mtr_release_buf_page_at_savepoint(mtr, scrub_data->savepoint,
- block);
-
- buf_block_t* get_block = btr_block_get(
- space, zip_size, left_page_no,
- RW_X_LATCH, index, mtr);
- get_block->check_index_page_at_flush = TRUE;
-
- /**
- * Refetch block and re-initialize page
- */
- block = btr_block_get(
- space, zip_size, page_no,
- RW_X_LATCH, index, mtr);
-
- page = buf_block_get_frame(block);
-
- /**
- * structure should be unchanged
- */
- ut_a(left_page_no == btr_page_get_prev(page, mtr));
- ut_a(right_page_no == btr_page_get_next(page, mtr));
- }
-
- if (right_page_no != FIL_NULL) {
- buf_block_t* get_block = btr_block_get(
- space, zip_size, right_page_no,
- RW_X_LATCH, index, mtr);
- get_block->check_index_page_at_flush = TRUE;
- }
-
- /* arguments to btr_page_split_and_insert */
- mem_heap_t* heap = NULL;
- dtuple_t* entry = NULL;
- ulint* offsets = NULL;
- ulint n_ext = 0;
- ulint flags = BTR_MODIFY_TREE;
-
- /**
- * position a cursor on first record on page
- */
- rec_t* rec = page_rec_get_next(page_get_infimum_rec(page));
- btr_cur_t cursor;
- btr_cur_position(index, rec, block, &cursor);
-
- /**
- * call split page with NULL as argument for entry to insert
- */
- if (dict_index_get_page(index) == buf_block_get_page_no(block)) {
- /* The page is the root page
- * NOTE: ibuf_reset_free_bits is called inside
- * btr_root_raise_and_insert */
- rec = btr_root_raise_and_insert(
- flags, &cursor, &offsets, &heap, entry, n_ext, mtr);
- } else {
- /* We play safe and reset the free bits
- * NOTE: need to call this prior to btr_page_split_and_insert */
- if (!dict_index_is_clust(index) &&
- block != NULL) {
- buf_frame_t* frame = buf_block_get_frame(block);
- if (frame &&
- page_is_leaf(frame)) {
-
- ibuf_reset_free_bits(block);
- }
- }
-
- rec = btr_page_split_and_insert(
- flags, &cursor, &offsets, &heap, entry, n_ext, mtr);
- }
-
- if (heap) {
- mem_heap_free(heap);
- }
-
- if (n_reserved > 0) {
- fil_space_release_free_extents(index->space, n_reserved);
- }
-
- scrub_data->scrub_stat.page_splits++;
- return DB_SUCCESS;
-}
-
-/****************************************************************
-Location index by id for a table
-return index or NULL */
-static
-dict_index_t*
-find_index(
-/*========*/
- dict_table_t* table, /*!< in: table */
- index_id_t index_id) /*!< in: index id */
-{
- if (table != NULL) {
- dict_index_t* index = dict_table_get_first_index(table);
- while (index != NULL) {
- if (index->id == index_id)
- return index;
- index = dict_table_get_next_index(index);
- }
- }
-
- return NULL;
-}
-
-/****************************************************************
-Check if table should be scrubbed
-*/
-static
-bool
-btr_scrub_table_needs_scrubbing(
-/*============================*/
- dict_table_t* table) /*!< in: table */
-{
- if (table == NULL)
- return false;
-
- if (table->stats_bg_flag & BG_STAT_SHOULD_QUIT) {
- return false;
- }
-
- if (table->to_be_dropped) {
- return false;
- }
-
- if (!table->is_readable()) {
- return false;
- }
-
- return true;
-}
-
-/****************************************************************
-Check if index should be scrubbed
-*/
-static
-bool
-btr_scrub_index_needs_scrubbing(
-/*============================*/
- dict_index_t* index) /*!< in: index */
-{
- if (index == NULL)
- return false;
-
- if (dict_index_is_ibuf(index)) {
- return false;
- }
-
- if (dict_index_is_online_ddl(index)) {
- return false;
- }
-
- return true;
-}
-
-/****************************************************************
-Get table and index and store it on scrub_data
-*/
-static
-void
-btr_scrub_get_table_and_index(
-/*=========================*/
- btr_scrub_t* scrub_data, /*!< in/out: scrub data */
- index_id_t index_id) /*!< in: index id */
-{
- /* first check if it's an index to current table */
- scrub_data->current_index = find_index(scrub_data->current_table,
- index_id);
-
- if (scrub_data->current_index != NULL) {
- /* yes it was */
- return;
- }
-
- if (!btr_scrub_lock_dict(scrub_data->space, false)) {
- btr_scrub_complete_space(scrub_data);
- return;
- }
-
- /* close current table (if any) */
- if (scrub_data->current_table != NULL) {
- btr_scrub_table_close(scrub_data->current_table);
- scrub_data->current_table = NULL;
- }
-
- /* argument to dict_table_open_on_index_id */
- bool dict_locked = true;
-
- /* open table based on index_id */
- dict_table_t* table = dict_table_open_on_index_id(
- index_id,
- dict_locked);
-
- if (table != NULL) {
- /* mark table as being scrubbed */
- table->stats_bg_flag |= BG_SCRUB_IN_PROGRESS;
-
- if (!btr_scrub_table_needs_scrubbing(table)) {
- btr_scrub_table_close(table);
- btr_scrub_unlock_dict();
- return;
- }
- }
-
- btr_scrub_unlock_dict();
- scrub_data->current_table = table;
- scrub_data->current_index = find_index(table, index_id);
-}
-
-/****************************************************************
-Handle free page */
-UNIV_INTERN
-int
-btr_scrub_free_page(
-/*====================*/
- btr_scrub_t* scrub_data, /*!< in/out: scrub data */
- buf_block_t* block, /*!< in: block to scrub */
- mtr_t* mtr) /*!< in: mtr */
-{
- // TODO(jonaso): scrub only what is actually needed
-
- {
- /* note: perform both the memset and setting of FIL_PAGE_TYPE
- * wo/ logging. so that if we crash before page is flushed
- * it will be found by scrubbing thread again
- */
- memset(buf_block_get_frame(block) + PAGE_HEADER, 0,
- UNIV_PAGE_SIZE - PAGE_HEADER);
-
- mach_write_to_2(buf_block_get_frame(block) + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_ALLOCATED);
- }
-
- ulint compact = 1;
- page_create(block, mtr, compact);
-
- mtr_commit(mtr);
-
- /* page doesn't need further processing => SKIP
- * and close table/index so that we don't keep references too long */
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
-}
-
-/****************************************************************
-Recheck if a page needs scrubbing, and if it does load appropriate
-table and index */
-UNIV_INTERN
-int
-btr_scrub_recheck_page(
-/*====================*/
- btr_scrub_t* scrub_data, /*!< inut: scrub data */
- buf_block_t* block, /*!< in: block */
- btr_scrub_page_allocation_status_t allocated, /*!< in: is block
- allocated or free */
- mtr_t* mtr) /*!< in: mtr */
-{
- /* recheck if page needs scrubbing (knowing allocation status) */
- int needs_scrubbing = btr_page_needs_scrubbing(
- scrub_data, block, allocated);
-
- if (needs_scrubbing != BTR_SCRUB_PAGE) {
- mtr_commit(mtr);
- return needs_scrubbing;
- }
-
- if (allocated == BTR_SCRUB_PAGE_FREE) {
- /** we don't need to load table/index for free pages
- * so scrub directly here */
- /* mtr is committed inside btr_scrub_page_free */
- return btr_scrub_free_page(scrub_data,
- block,
- mtr);
- }
-
- page_t* page = buf_block_get_frame(block);
- index_id_t index_id = btr_page_get_index_id(page);
-
- if (scrub_data->current_index == NULL ||
- scrub_data->current_index->id != index_id) {
-
- /**
- * commit mtr (i.e release locks on block)
- * and try to get table&index potentially loading it
- * from disk
- */
- mtr_commit(mtr);
- btr_scrub_get_table_and_index(scrub_data, index_id);
- } else {
- /* we already have correct index
- * commit mtr so that we can lock index before fetching page
- */
- mtr_commit(mtr);
- }
-
- /* check if table is about to be dropped */
- if (!btr_scrub_table_needs_scrubbing(scrub_data->current_table)) {
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- /* check if index is scrubbable */
- if (!btr_scrub_index_needs_scrubbing(scrub_data->current_index)) {
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- mtr_start(mtr);
- mtr_x_lock(dict_index_get_lock(scrub_data->current_index), mtr);
- /** set savepoint for X-latch of block */
- scrub_data->savepoint = mtr_set_savepoint(mtr);
- return BTR_SCRUB_PAGE;
-}
-
-/****************************************************************
-Perform actual scrubbing of page */
-UNIV_INTERN
-int
-btr_scrub_page(
-/*============*/
- btr_scrub_t* scrub_data, /*!< in/out: scrub data */
- buf_block_t* block, /*!< in: block */
- btr_scrub_page_allocation_status_t allocated, /*!< in: is block
- allocated or free */
- mtr_t* mtr) /*!< in: mtr */
-{
- /* recheck if page needs scrubbing (knowing allocation status) */
- int needs_scrubbing = BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
-
- if (block) {
- btr_page_needs_scrubbing(scrub_data, block, allocated);
- }
-
- if (!block || needs_scrubbing != BTR_SCRUB_PAGE) {
- mtr_commit(mtr);
- return needs_scrubbing;
- }
-
- if (allocated == BTR_SCRUB_PAGE_FREE) {
- /* mtr is committed inside btr_scrub_page_free */
- return btr_scrub_free_page(scrub_data,
- block,
- mtr);
- }
-
- /* check that table/index still match now that they are loaded */
-
- if (scrub_data->current_table->space != scrub_data->space) {
- /* this is truncate table */
- mtr_commit(mtr);
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- if (scrub_data->current_index->space != scrub_data->space) {
- /* this is truncate table */
- mtr_commit(mtr);
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- if (scrub_data->current_index->page == FIL_NULL) {
- /* this is truncate table */
- mtr_commit(mtr);
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- buf_frame_t* frame = buf_block_get_frame(block);
-
- if (!frame || btr_page_get_index_id(frame) !=
- scrub_data->current_index->id) {
- /* page has been reallocated to new index */
- mtr_commit(mtr);
- return BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE;
- }
-
- /* check if I can scrub (reorganize) page wo/ overflow */
- if (btr_optimistic_scrub(scrub_data,
- block,
- scrub_data->current_index,
- mtr) != DB_SUCCESS) {
-
- /**
- * Can't reorganize page...need to split it
- */
- btr_pessimistic_scrub(scrub_data,
- block,
- scrub_data->current_index,
- mtr);
- }
- mtr_commit(mtr);
-
- return BTR_SCRUB_SKIP_PAGE; // no further action needed
-}
-
-/**************************************************************//**
-Start iterating a space */
-UNIV_INTERN
-bool
-btr_scrub_start_space(
-/*===================*/
- ulint space, /*!< in: space */
- btr_scrub_t* scrub_data) /*!< in/out: scrub data */
-{
- scrub_data->space = space;
- scrub_data->current_table = NULL;
- scrub_data->current_index = NULL;
-
- scrub_data->compressed = fil_space_get_zip_size(space) > 0;
- scrub_data->scrubbing = check_scrub_setting(scrub_data);
- return scrub_data->scrubbing;
-}
-
-/***********************************************************************
-Update global statistics with thread statistics */
-static
-void
-btr_scrub_update_total_stat(btr_scrub_t *scrub_data)
-{
- mutex_enter(&scrub_stat_mutex);
- scrub_stat.page_reorganizations +=
- scrub_data->scrub_stat.page_reorganizations;
- scrub_stat.page_splits +=
- scrub_data->scrub_stat.page_splits;
- scrub_stat.page_split_failures_underflow +=
- scrub_data->scrub_stat.page_split_failures_underflow;
- scrub_stat.page_split_failures_out_of_filespace +=
- scrub_data->scrub_stat.page_split_failures_out_of_filespace;
- scrub_stat.page_split_failures_missing_index +=
- scrub_data->scrub_stat.page_split_failures_missing_index;
- scrub_stat.page_split_failures_unknown +=
- scrub_data->scrub_stat.page_split_failures_unknown;
- mutex_exit(&scrub_stat_mutex);
-
- // clear stat
- memset(&scrub_data->scrub_stat, 0, sizeof(scrub_data->scrub_stat));
-}
-
-/**************************************************************//**
-Complete iterating a space */
-UNIV_INTERN
-bool
-btr_scrub_complete_space(
-/*=====================*/
- btr_scrub_t* scrub_data) /*!< in/out: scrub data */
-{
- btr_scrub_table_close_for_thread(scrub_data);
- btr_scrub_update_total_stat(scrub_data);
- return scrub_data->scrubbing;
-}
-
-/*********************************************************************
-Return scrub statistics */
-void
-btr_scrub_total_stat(btr_scrub_stat_t *stat)
-{
- mutex_enter(&scrub_stat_mutex);
- *stat = scrub_stat;
- mutex_exit(&scrub_stat_mutex);
-}
-
-/*********************************************************************
-Init global variables */
-UNIV_INTERN
-void
-btr_scrub_init()
-{
- mutex_create(scrub_stat_mutex_key,
- &scrub_stat_mutex, SYNC_NO_ORDER_CHECK);
-
- memset(&scrub_stat, 0, sizeof(scrub_stat));
-}
-
-/*********************************************************************
-Cleanup globals */
-UNIV_INTERN
-void
-btr_scrub_cleanup()
-{
- mutex_free(&scrub_stat_mutex);
-}
diff --git a/storage/xtradb/btr/btr0sea.cc b/storage/xtradb/btr/btr0sea.cc
deleted file mode 100644
index 2f0428747d5..00000000000
--- a/storage/xtradb/btr/btr0sea.cc
+++ /dev/null
@@ -1,2085 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file btr/btr0sea.cc
-The index tree adaptive search
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "btr0sea.h"
-#ifdef UNIV_NONINL
-#include "btr0sea.ic"
-#endif
-
-#include "buf0buf.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "btr0cur.h"
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "ha0ha.h"
-#include "srv0srv.h"
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch. */
-UNIV_INTERN char btr_search_enabled = TRUE;
-
-/** Number of adaptive hash index partitions */
-UNIV_INTERN ulint btr_search_index_num;
-
-/** A dummy variable to fool the compiler */
-UNIV_INTERN ulint btr_search_this_is_zero = 0;
-
-/** padding to prevent other memory update
-hotspots from residing on the same memory
-cache line as btr_search_latch */
-UNIV_INTERN byte btr_sea_pad1[CACHE_LINE_SIZE];
-
-/** Array of latches protecting individual AHI partitions. The latches
-protect: (1) positions of records on those pages where a hash index from the
-corresponding AHI partition has been built.
-NOTE: They do not protect values of non-ordering fields within a record from
-being updated in-place! We can use fact (1) to perform unique searches to
-indexes. */
-
-UNIV_INTERN prio_rw_lock_t* btr_search_latch_arr;
-
-/** padding to prevent other memory update hotspots from residing on
-the same memory cache line */
-UNIV_INTERN byte btr_sea_pad2[CACHE_LINE_SIZE];
-
-/** The adaptive hash index */
-UNIV_INTERN btr_search_sys_t* btr_search_sys;
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register btr_search_sys with performance schema */
-UNIV_INTERN mysql_pfs_key_t btr_search_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-/** If the number of records on the page divided by this parameter
-would have been successfully accessed using a hash index, the index
-is then built on the page, assuming the global limit has been reached */
-#define BTR_SEARCH_PAGE_BUILD_LIMIT 16
-
-/** The global limit for consecutive potentially successful hash searches,
-before hash index building is started */
-#define BTR_SEARCH_BUILD_LIMIT 100
-
-/********************************************************************//**
-Builds a hash index on a page with the given parameters. If the page already
-has a hash index with different parameters, the old hash index is removed.
-If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
-static
-void
-btr_search_build_page_hash_index(
-/*=============================*/
- dict_index_t* index, /*!< in: index for which to build, or NULL if
- not known */
- buf_block_t* block, /*!< in: index page, s- or x-latched */
- ulint n_fields,/*!< in: hash this many full fields */
- ulint n_bytes,/*!< in: hash this many bytes from the next
- field */
- ibool left_side);/*!< in: hash for searches from left side? */
-
-/*****************************************************************//**
-This function should be called before reserving any btr search mutex, if
-the intended operation might add nodes to the search system hash table.
-Because of the latching order, once we have reserved the btr search system
-latch, we cannot allocate a free frame from the buffer pool. Checks that
-there is a free buffer frame allocated for hash table heap in the btr search
-system. If not, allocates a free frames for the heap. This check makes it
-probable that, when have reserved the btr search system latch and we need to
-allocate a new node to the hash table, it will succeed. However, the check
-will not guarantee success. */
-static
-void
-btr_search_check_free_space_in_heap(
-/*================================*/
- dict_index_t* index)
-{
- hash_table_t* table;
- mem_heap_t* heap;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- table = btr_search_get_hash_table(index);
-
- heap = table->heap;
-
- /* Note that we peek the value of heap->free_block without reserving
- the latch: this is ok, because we will not guarantee that there will
- be enough free space in the hash table. */
-
- if (heap->free_block == NULL) {
- buf_block_t* block = buf_block_alloc(NULL);
-
- rw_lock_x_lock(btr_search_get_latch(index));
-
- if (heap->free_block == NULL) {
- heap->free_block = block;
- } else {
- buf_block_free(block);
- }
-
- rw_lock_x_unlock(btr_search_get_latch(index));
- }
-}
-
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
-void
-btr_search_sys_create(
-/*==================*/
- ulint hash_size) /*!< in: hash index hash table size */
-{
- ulint i;
-
- /* PS bug lp:1018264 - Multiple hash index partitions causes overly
- large hash index: When multiple adaptive hash index partitions are
- specified, _each_ partition was being created with hash_size which
- should be 1/64 of the total size of all buffer pools which is
- incorrect and can cause overly high memory usage. hash_size
- should be representing the _total_ size of all partitions, not the
- individual size of each partition. */
- hash_size /= btr_search_index_num;
-
- /* We allocate the search latch from dynamic memory:
- see above at the global variable definition */
-
- /* btr_search_index_num is constrained to machine word size for
- historical reasons. This limitation can be easily removed later. */
-
- btr_search_latch_arr = (prio_rw_lock_t *)
- mem_alloc(sizeof(prio_rw_lock_t) * btr_search_index_num);
-
- btr_search_sys = (btr_search_sys_t*)
- mem_alloc(sizeof(btr_search_sys_t));
-
- btr_search_sys->hash_tables = (hash_table_t **)
- mem_alloc(sizeof(hash_table_t *) * btr_search_index_num);
-
- for (i = 0; i < btr_search_index_num; i++) {
-
- rw_lock_create(btr_search_latch_key,
- &btr_search_latch_arr[i], SYNC_SEARCH_SYS);
-
- btr_search_sys->hash_tables[i]
- = ib_create(hash_size, 0, MEM_HEAP_FOR_BTR_SEARCH, 0);
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- btr_search_sys->hash_tables[i]->adaptive = TRUE;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- }
-}
-
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
-void
-btr_search_sys_free(void)
-/*=====================*/
-{
- ulint i;
-
- for (i = 0; i < btr_search_index_num; i++) {
-
- rw_lock_free(&btr_search_latch_arr[i]);
-
- mem_heap_free(btr_search_sys->hash_tables[i]->heap);
-
- hash_table_free(btr_search_sys->hash_tables[i]);
-
- }
-
- mem_free(btr_search_latch_arr);
- btr_search_latch_arr = NULL;
-
- mem_free(btr_search_sys->hash_tables);
-
- mem_free(btr_search_sys);
- btr_search_sys = NULL;
-}
-
-/********************************************************************//**
-Set index->ref_count = 0 on all indexes of a table. */
-static
-void
-btr_search_disable_ref_count(
-/*=========================*/
- dict_table_t* table) /*!< in/out: table */
-{
- dict_index_t* index;
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- for (index = dict_table_get_first_index(table); index;
- index = dict_table_get_next_index(index)) {
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(btr_search_get_latch(index),
- RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- index->search_info->ref_count = 0;
- }
-}
-
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
-void
-btr_search_disable(void)
-/*====================*/
-{
- dict_table_t* table;
- ulint i;
-
- mutex_enter(&dict_sys->mutex);
- btr_search_x_lock_all();
-
- btr_search_enabled = FALSE;
-
- /* Clear the index->search_info->ref_count of every index in
- the data dictionary cache. */
- for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); table;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
- btr_search_disable_ref_count(table);
- }
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); table;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
- btr_search_disable_ref_count(table);
- }
-
- mutex_exit(&dict_sys->mutex);
-
- /* Set all block->index = NULL. */
- buf_pool_clear_hash_index();
-
- /* Clear the adaptive hash index. */
- for (i = 0; i < btr_search_index_num; i++) {
- hash_table_clear(btr_search_sys->hash_tables[i]);
- mem_heap_empty(btr_search_sys->hash_tables[i]->heap);
- }
-
- btr_search_x_unlock_all();
-}
-
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
-void
-btr_search_enable(void)
-/*====================*/
-{
- btr_search_x_lock_all();
-
- btr_search_enabled = TRUE;
-
- btr_search_x_unlock_all();
-}
-
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return own: search info struct */
-UNIV_INTERN
-btr_search_t*
-btr_search_info_create(
-/*===================*/
- mem_heap_t* heap) /*!< in: heap where created */
-{
- btr_search_t* info;
-
- info = (btr_search_t*) mem_heap_alloc(heap, sizeof(btr_search_t));
-
-#ifdef UNIV_DEBUG
- info->magic_n = BTR_SEARCH_MAGIC_N;
-#endif /* UNIV_DEBUG */
-
- info->ref_count = 0;
- info->root_guess = NULL;
-
- info->hash_analysis = 0;
- info->n_hash_potential = 0;
-
- info->last_hash_succ = FALSE;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_succ = 0;
- info->n_hash_fail = 0;
- info->n_patt_succ = 0;
- info->n_searches = 0;
-#endif /* UNIV_SEARCH_PERF_STAT */
-
- /* Set some sensible values */
- info->n_fields = 1;
- info->n_bytes = 0;
-
- info->left_side = TRUE;
-
- return(info);
-}
-
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-the latch of the AHI partition corresponding to this index.
-@return ref_count value. */
-UNIV_INTERN
-ulint
-btr_search_info_get_ref_count(
-/*==========================*/
- btr_search_t* info, /*!< in: search info. */
- dict_index_t* index) /*!< in: index */
-{
- ulint ret;
-
- ut_ad(info);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(btr_search_get_latch(index));
- ret = info->ref_count;
- rw_lock_s_unlock(btr_search_get_latch(index));
-
- return(ret);
-}
-
-/*********************************************************************//**
-Updates the search info of an index about hash successes. NOTE that info
-is NOT protected by any semaphore, to save CPU time! Do not assume its fields
-are consistent. */
-static
-void
-btr_search_info_update_hash(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
-{
- dict_index_t* index = cursor->index;
- ulint n_unique;
- int cmp;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (dict_index_is_ibuf(index)) {
- /* So many deletes are performed on an insert buffer tree
- that we do not consider a hash index useful on it: */
-
- return;
- }
-
- n_unique = dict_index_get_n_unique_in_tree(index);
-
- if (info->n_hash_potential == 0) {
-
- goto set_new_recomm;
- }
-
- /* Test if the search would have succeeded using the recommended
- hash prefix */
-
- if (info->n_fields >= n_unique && cursor->up_match >= n_unique) {
-increment_potential:
- info->n_hash_potential++;
-
- return;
- }
-
- cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
- cursor->low_match, cursor->low_bytes);
-
- if (info->left_side ? cmp <= 0 : cmp > 0) {
-
- goto set_new_recomm;
- }
-
- cmp = ut_pair_cmp(info->n_fields, info->n_bytes,
- cursor->up_match, cursor->up_bytes);
-
- if (info->left_side ? cmp <= 0 : cmp > 0) {
-
- goto increment_potential;
- }
-
-set_new_recomm:
- /* We have to set a new recommendation; skip the hash analysis
- for a while to avoid unnecessary CPU time usage when there is no
- chance for success */
-
- info->hash_analysis = 0;
-
- cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes,
- cursor->low_match, cursor->low_bytes);
- if (cmp == 0) {
- info->n_hash_potential = 0;
-
- /* For extra safety, we set some sensible values here */
-
- info->n_fields = 1;
- info->n_bytes = 0;
-
- info->left_side = TRUE;
-
- } else if (cmp > 0) {
- info->n_hash_potential = 1;
-
- if (cursor->up_match >= n_unique) {
-
- info->n_fields = n_unique;
- info->n_bytes = 0;
-
- } else if (cursor->low_match < cursor->up_match) {
-
- info->n_fields = cursor->low_match + 1;
- info->n_bytes = 0;
- } else {
- info->n_fields = cursor->low_match;
- info->n_bytes = cursor->low_bytes + 1;
- }
-
- info->left_side = TRUE;
- } else {
- info->n_hash_potential = 1;
-
- if (cursor->low_match >= n_unique) {
-
- info->n_fields = n_unique;
- info->n_bytes = 0;
-
- } else if (cursor->low_match > cursor->up_match) {
-
- info->n_fields = cursor->up_match + 1;
- info->n_bytes = 0;
- } else {
- info->n_fields = cursor->up_match;
- info->n_bytes = cursor->up_bytes + 1;
- }
-
- info->left_side = FALSE;
- }
-}
-
-/*********************************************************************//**
-Updates the block search info on hash successes. NOTE that info and
-block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any
-semaphore, to save CPU time! Do not assume the fields are consistent.
-@return TRUE if building a (new) hash index on the block is recommended */
-static
-ibool
-btr_search_update_block_hash_info(
-/*==============================*/
- btr_search_t* info, /*!< in: search info */
- buf_block_t* block, /*!< in: buffer block */
- btr_cur_t* cursor MY_ATTRIBUTE((unused)))
- /*!< in: cursor */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index),
- RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index),
- RW_LOCK_EX));
- ut_ad(rw_lock_own(&block->lock, RW_LOCK_SHARED)
- || rw_lock_own(&block->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(cursor);
-
- info->last_hash_succ = FALSE;
-
- ut_a(buf_block_state_valid(block));
- ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N);
-
- if ((block->n_hash_helps > 0)
- && (info->n_hash_potential > 0)
- && (block->n_fields == info->n_fields)
- && (block->n_bytes == info->n_bytes)
- && (block->left_side == info->left_side)) {
-
- if ((block->index)
- && (block->curr_n_fields == info->n_fields)
- && (block->curr_n_bytes == info->n_bytes)
- && (block->curr_left_side == info->left_side)) {
-
- /* The search would presumably have succeeded using
- the hash index */
-
- info->last_hash_succ = TRUE;
- }
-
- block->n_hash_helps++;
- } else {
- block->n_hash_helps = 1;
- block->n_fields = info->n_fields;
- block->n_bytes = info->n_bytes;
- block->left_side = info->left_side;
- }
-
-#ifdef UNIV_DEBUG
- if (cursor->index->table->does_not_fit_in_memory) {
- block->n_hash_helps = 0;
- }
-#endif /* UNIV_DEBUG */
-
- if ((block->n_hash_helps > page_get_n_recs(block->frame)
- / BTR_SEARCH_PAGE_BUILD_LIMIT)
- && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) {
-
- if ((!block->index)
- || (block->n_hash_helps
- > 2 * page_get_n_recs(block->frame))
- || (block->n_fields != block->curr_n_fields)
- || (block->n_bytes != block->curr_n_bytes)
- || (block->left_side != block->curr_left_side)) {
-
- /* Build a new hash index on the page */
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Updates a hash node reference when it has been unsuccessfully used in a
-search which could have succeeded with the used hash parameters. This can
-happen because when building a hash index for a page, we do not check
-what happens at page boundaries, and therefore there can be misleading
-hash nodes. Also, collisions in the fold value can lead to misleading
-references. This function lazily fixes these imperfections in the hash
-index. */
-static
-void
-btr_search_update_hash_ref(
-/*=======================*/
- btr_search_t* info, /*!< in: search info */
- buf_block_t* block, /*!< in: buffer block where cursor positioned */
- btr_cur_t* cursor) /*!< in: cursor */
-{
- dict_index_t* index;
- ulint fold;
- const rec_t* rec;
-
- ut_ad(cursor->flag == BTR_CUR_HASH_FAIL);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(btr_search_get_latch(cursor->index),
- RW_LOCK_EX));
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(page_align(btr_cur_get_rec(cursor))
- == buf_block_get_frame(block));
-
- index = block->index;
-
- if (!index) {
-
- return;
- }
-
- ut_a(index == cursor->index);
- ut_a(!dict_index_is_ibuf(index));
-
- if ((info->n_hash_potential > 0)
- && (block->curr_n_fields == info->n_fields)
- && (block->curr_n_bytes == info->n_bytes)
- && (block->curr_left_side == info->left_side)) {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- rec = btr_cur_get_rec(cursor);
-
- if (!page_rec_is_user_rec(rec)) {
-
- return;
- }
-
- fold = rec_fold(rec,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- block->curr_n_fields,
- block->curr_n_bytes, index->id);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(btr_search_get_latch(cursor->index),
- RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ha_insert_for_fold(btr_search_get_hash_table(cursor->index),
- fold, block, rec);
-
- MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED);
- }
-}
-
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INTERN
-void
-btr_search_info_update_slow(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
-{
- buf_block_t* block;
- ibool build_index;
- ulint* params;
- ulint* params2;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index),
- RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(btr_search_get_latch(cursor->index),
- RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- block = btr_cur_get_block(cursor);
-
- SRV_CORRUPT_TABLE_CHECK(block, return;);
-
- /* NOTE that the following two function calls do NOT protect
- info or block->n_fields etc. with any semaphore, to save CPU time!
- We cannot assume the fields are consistent when we return from
- those functions! */
-
- btr_search_info_update_hash(info, cursor);
-
- build_index = btr_search_update_block_hash_info(info, block, cursor);
-
- if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) {
-
- btr_search_check_free_space_in_heap(cursor->index);
- }
-
- if (cursor->flag == BTR_CUR_HASH_FAIL) {
- /* Update the hash node reference, if appropriate */
-
- rw_lock_x_lock(btr_search_get_latch(cursor->index));
-
- btr_search_update_hash_ref(info, block, cursor);
-
- rw_lock_x_unlock(btr_search_get_latch(cursor->index));
- }
-
- if (build_index) {
- /* Note that since we did not protect block->n_fields etc.
- with any semaphore, the values can be inconsistent. We have
- to check inside the function call that they make sense. We
- also malloc an array and store the values there to make sure
- the compiler does not let the function call parameters change
- inside the called function. It might be that the compiler
- would optimize the call just to pass pointers to block. */
-
- params = (ulint*) mem_alloc(3 * sizeof(ulint));
- params[0] = block->n_fields;
- params[1] = block->n_bytes;
- params[2] = block->left_side;
-
- /* Make sure the compiler cannot deduce the values and do
- optimizations */
-
- params2 = params + btr_search_this_is_zero;
-
- btr_search_build_page_hash_index(cursor->index,
- block,
- params2[0],
- params2[1],
- params2[2]);
- mem_free(params);
- }
-}
-
-/******************************************************************//**
-Checks if a guessed position for a tree cursor is right. Note that if
-mode is PAGE_CUR_LE, which is used in inserts, and the function returns
-TRUE, then cursor->up_match and cursor->low_match both have sensible values.
-@return TRUE if success */
-static
-ibool
-btr_search_check_guess(
-/*===================*/
- btr_cur_t* cursor, /*!< in: guessed cursor position */
- ibool can_only_compare_to_cursor_rec,
- /*!< in: if we do not have a latch on the page
- of cursor, but only a latch on
- btr_search_latch, then ONLY the columns
- of the record UNDER the cursor are
- protected, not the next or previous record
- in the chain: we cannot look at the next or
- previous record to check our guess! */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
- or PAGE_CUR_GE */
- mtr_t* mtr) /*!< in: mtr */
-{
- rec_t* rec;
- ulint n_unique;
- ulint match;
- ulint bytes;
- int cmp;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ibool success = FALSE;
- rec_offs_init(offsets_);
-
- n_unique = dict_index_get_n_unique_in_tree(cursor->index);
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad(page_rec_is_user_rec(rec));
-
- match = 0;
- bytes = 0;
-
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, rec,
- offsets, &match, &bytes);
-
- if (mode == PAGE_CUR_GE) {
- if (cmp == 1) {
- goto exit_func;
- }
-
- cursor->up_match = match;
-
- if (match >= n_unique) {
- success = TRUE;
- goto exit_func;
- }
- } else if (mode == PAGE_CUR_LE) {
- if (cmp == -1) {
- goto exit_func;
- }
-
- cursor->low_match = match;
-
- } else if (mode == PAGE_CUR_G) {
- if (cmp != -1) {
- goto exit_func;
- }
- } else if (mode == PAGE_CUR_L) {
- if (cmp != 1) {
- goto exit_func;
- }
- }
-
- if (can_only_compare_to_cursor_rec) {
- /* Since we could not determine if our guess is right just by
- looking at the record under the cursor, return FALSE */
- goto exit_func;
- }
-
- match = 0;
- bytes = 0;
-
- if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) {
- rec_t* prev_rec;
-
- ut_ad(!page_rec_is_infimum(rec));
-
- prev_rec = page_rec_get_prev(rec);
-
- if (page_rec_is_infimum(prev_rec)) {
- success = btr_page_get_prev(page_align(prev_rec), mtr)
- == FIL_NULL;
-
- goto exit_func;
- }
-
- offsets = rec_get_offsets(prev_rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec,
- offsets, &match, &bytes);
- if (mode == PAGE_CUR_GE) {
- success = cmp == 1;
- } else {
- success = cmp != -1;
- }
-
- goto exit_func;
- } else {
- rec_t* next_rec;
-
- ut_ad(!page_rec_is_supremum(rec));
-
- next_rec = page_rec_get_next(rec);
-
- if (page_rec_is_supremum(next_rec)) {
- if (btr_page_get_next(page_align(next_rec), mtr)
- == FIL_NULL) {
-
- cursor->up_match = 0;
- success = TRUE;
- }
-
- goto exit_func;
- }
-
- offsets = rec_get_offsets(next_rec, cursor->index, offsets,
- n_unique, &heap);
- cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec,
- offsets, &match, &bytes);
- if (mode == PAGE_CUR_LE) {
- success = cmp == -1;
- cursor->up_match = match;
- } else {
- success = cmp != 1;
- }
- }
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(success);
-}
-
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
-of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
-and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-btr_search_guess_on_hash(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- btr_search_t* info, /*!< in: index search info */
- const dtuple_t* tuple, /*!< in: logical record */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that only if has_search_latch
- is 0, we will have a latch set on
- the cursor page, otherwise we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /*!< out: tree cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_pool_t* buf_pool;
- buf_block_t* block;
- const rec_t* rec;
- ulint fold;
- index_id_t index_id;
-#ifdef notdefined
- btr_cur_t cursor2;
- btr_pcur_t pcur;
-#endif
- ut_ad(index && info && tuple && cursor && mtr);
- ut_ad(!dict_index_is_ibuf(index));
- ut_ad((latch_mode == BTR_SEARCH_LEAF)
- || (latch_mode == BTR_MODIFY_LEAF));
-
- /* Note that, for efficiency, the struct info may not be protected by
- any latch here! */
-
- if (UNIV_UNLIKELY(info->n_hash_potential == 0)) {
-
- return(FALSE);
- }
-
- cursor->n_fields = info->n_fields;
- cursor->n_bytes = info->n_bytes;
-
- if (UNIV_UNLIKELY(dtuple_get_n_fields(tuple)
- < cursor->n_fields + (cursor->n_bytes > 0))) {
-
- return(FALSE);
- }
-
- index_id = index->id;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_succ++;
-#endif
- fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id);
-
- cursor->fold = fold;
- cursor->flag = BTR_CUR_HASH;
-
- if (UNIV_LIKELY(!has_search_latch)) {
- rw_lock_s_lock(btr_search_get_latch(index));
-
- if (UNIV_UNLIKELY(!btr_search_enabled)) {
- goto failure_unlock;
- }
- }
-
- ut_ad(rw_lock_get_writer(btr_search_get_latch(index)) != RW_LOCK_EX);
- ut_ad(rw_lock_get_reader_count(btr_search_get_latch(index)) > 0);
-
- rec = (rec_t*) ha_search_and_get_data(
- btr_search_get_hash_table(index), fold);
-
- if (UNIV_UNLIKELY(!rec)) {
- goto failure_unlock;
- }
-
- block = buf_block_align(rec);
-
- if (UNIV_LIKELY(!has_search_latch)) {
-
- if (UNIV_UNLIKELY(
- !buf_page_get_known_nowait(latch_mode, block,
- BUF_MAKE_YOUNG,
- __FILE__, __LINE__,
- mtr))) {
- goto failure_unlock;
- }
-
- rw_lock_s_unlock(btr_search_get_latch(index));
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
- }
-
- if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
-
- if (UNIV_LIKELY(!has_search_latch)) {
-
- btr_leaf_page_release(block, latch_mode, mtr);
- }
-
- goto failure;
- }
-
- ut_ad(page_rec_is_user_rec(rec));
-
- btr_cur_position(index, (rec_t*) rec, block, cursor);
-
- /* Check the validity of the guess within the page */
-
- /* If we only have the latch on btr_search_latch, not on the
- page, it only protects the columns of the record the cursor
- is positioned on. We cannot look at the next of the previous
- record to determine if our guess for the cursor position is
- right. */
- if (UNIV_UNLIKELY(index_id != btr_page_get_index_id(block->frame))
- || !btr_search_check_guess(cursor,
- has_search_latch,
- tuple, mode, mtr)) {
- if (UNIV_LIKELY(!has_search_latch)) {
- btr_leaf_page_release(block, latch_mode, mtr);
- }
-
- goto failure;
- }
-
- if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) {
-
- info->n_hash_potential++;
- }
-
-#ifdef notdefined
- /* These lines of code can be used in a debug version to check
- the correctness of the searched cursor position: */
-
- info->last_hash_succ = FALSE;
-
- /* Currently, does not work if the following fails: */
- ut_ad(!has_search_latch);
-
- btr_leaf_page_release(block, latch_mode, mtr);
-
- btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- &cursor2, 0, mtr);
- if (mode == PAGE_CUR_GE
- && page_rec_is_supremum(btr_cur_get_rec(&cursor2))) {
-
- /* If mode is PAGE_CUR_GE, then the binary search
- in the index tree may actually take us to the supremum
- of the previous page */
-
- info->last_hash_succ = FALSE;
-
- btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode,
- &pcur, mtr);
- ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor));
- } else {
- ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor));
- }
-
- /* NOTE that it is theoretically possible that the above assertions
- fail if the page of the cursor gets removed from the buffer pool
- meanwhile! Thus it might not be a bug. */
-#endif
- info->last_hash_succ = TRUE;
-
-#ifdef UNIV_SEARCH_PERF_STAT
-#endif
- if (UNIV_LIKELY(!has_search_latch)
- && buf_page_peek_if_too_old(&block->page)) {
-
- buf_page_make_young(&block->page);
- }
-
- /* Increment the page get statistics though we did not really
- fix the page: for user info only */
- buf_pool = buf_pool_from_bpage(&block->page);
- buf_pool->stat.n_page_gets++;
-
- return(TRUE);
-
- /*-------------------------------------------*/
-failure_unlock:
- if (UNIV_LIKELY(!has_search_latch)) {
- rw_lock_s_unlock(btr_search_get_latch(index));
- }
-failure:
- cursor->flag = BTR_CUR_HASH_FAIL;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- info->n_hash_fail++;
-
- if (info->n_hash_succ > 0) {
- info->n_hash_succ--;
- }
-#endif
- info->last_hash_succ = FALSE;
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_index(
-/*============================*/
- buf_block_t* block) /*!< in: block containing index page,
- s- or x-latched, or an index page
- for which we know that
- block->buf_fix_count == 0 or it is an
- index page which has already been
- removed from the buf_pool->page_hash
- i.e.: it is in state
- BUF_BLOCK_REMOVE_HASH */
-{
- hash_table_t* table;
- ulint n_fields;
- ulint n_bytes;
- const page_t* page;
- const rec_t* rec;
- ulint fold;
- ulint prev_fold;
- index_id_t index_id;
- ulint n_cached;
- ulint n_recs;
- ulint* folds;
- ulint i;
- mem_heap_t* heap;
- const dict_index_t* index;
- ulint* offsets;
- btr_search_t* info;
-
-retry:
- /* Do a dirty check on block->index, return if the block is not in the
- adaptive hash index. This is to avoid acquiring an AHI latch for
- performance considerations. */
-
- index = block->index;
- if (!index) {
-
- return;
- }
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- rw_lock_s_lock(btr_search_get_latch(index));
-
- if (UNIV_UNLIKELY(index != block->index)) {
-
- rw_lock_s_unlock(btr_search_get_latch(index));
-
- goto retry;
- }
-
- ut_a(!dict_index_is_ibuf(index));
-#ifdef UNIV_DEBUG
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_CREATION:
- /* The index is being created (bulk loaded). */
- case ONLINE_INDEX_COMPLETE:
- /* The index has been published. */
- case ONLINE_INDEX_ABORTED:
- /* Either the index creation was aborted due to an
- error observed by InnoDB (in which case there should
- not be any adaptive hash index entries), or it was
- completed and then flagged aborted in
- rollback_inplace_alter_table(). */
- break;
- case ONLINE_INDEX_ABORTED_DROPPED:
- /* The index should have been dropped from the tablespace
- already, and the adaptive hash index entries should have
- been dropped as well. */
- ut_error;
- }
-#endif /* UNIV_DEBUG */
-
- table = btr_search_get_hash_table(index);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX)
- || block->page.buf_fix_count == 0
- || buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH);
-#endif /* UNIV_SYNC_DEBUG */
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
-
- /* NOTE: The fields of block must not be accessed after
- releasing btr_search_latch, as the index page might only
- be s-latched! */
-
- rw_lock_s_unlock(btr_search_get_latch(index));
-
- ut_a(n_fields + n_bytes > 0);
-
- page = block->frame;
- n_recs = page_get_n_recs(page);
-
- /* Calculate and cache fold values into an array for fast deletion
- from the hash index */
-
- folds = (ulint*) mem_alloc(n_recs * sizeof(ulint));
-
- n_cached = 0;
-
- rec = page_get_infimum_rec(page);
- rec = page_rec_get_next_low(rec, page_is_comp(page));
-
- index_id = btr_page_get_index_id(page);
-
- ut_a(index_id == index->id);
-
- prev_fold = 0;
-
- heap = NULL;
- offsets = NULL;
-
- while (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0));
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id);
-
- if (fold == prev_fold && prev_fold != 0) {
-
- goto next_rec;
- }
-
- /* Remove all hash nodes pointing to this page from the
- hash chain */
-
- folds[n_cached] = fold;
- n_cached++;
-next_rec:
- rec = page_rec_get_next_low(rec, page_rec_is_comp(rec));
- prev_fold = fold;
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- rw_lock_x_lock(btr_search_get_latch(index));
-
- if (UNIV_UNLIKELY(!block->index)) {
- /* Someone else has meanwhile dropped the hash index */
-
- goto cleanup;
- }
-
- ut_a(block->index == index);
-
- if (UNIV_UNLIKELY(block->curr_n_fields != n_fields)
- || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) {
-
- /* Someone else has meanwhile built a new hash index on the
- page, with different parameters */
-
- rw_lock_x_unlock(btr_search_get_latch(index));
-
- mem_free(folds);
- goto retry;
- }
-
- for (i = 0; i < n_cached; i++) {
-
- ha_remove_all_nodes_to_page(table, folds[i], page);
- }
-
- info = btr_search_get_info(block->index);
- ut_a(info->ref_count > 0);
- info->ref_count--;
-
- block->index = NULL;
-
- MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_REMOVED);
- MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_REMOVED, n_cached);
-
-cleanup:
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (UNIV_UNLIKELY(block->n_pointers)) {
- /* Corruption */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Corruption of adaptive hash index."
- " After dropping\n"
- "InnoDB: the hash index to a page of %s,"
- " still %lu hash nodes remain.\n",
- index->name, (ulong) block->n_pointers);
- rw_lock_x_unlock(btr_search_get_latch(index));
-
- ut_ad(btr_search_validate());
- } else {
- rw_lock_x_unlock(btr_search_get_latch(index));
- }
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- rw_lock_x_unlock(btr_search_get_latch(index));
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
- mem_free(folds);
-}
-
-/********************************************************************//**
-Drops a possible page hash index when a page is evicted from the buffer pool
-or freed in a file segment. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_when_freed(
-/*=================================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no) /*!< in: page number */
-{
- buf_block_t* block;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- /* If the caller has a latch on the page, then the caller must
- have a x-latch on the page and it must have already dropped
- the hash index for the page. Because of the x-latch that we
- are possibly holding, we cannot s-latch the page, but must
- (recursively) x-latch it, even though we are only reading. */
-
- block = buf_page_get_gen(space, zip_size, page_no, RW_X_LATCH, NULL,
- BUF_PEEK_IF_IN_POOL, __FILE__, __LINE__,
- &mtr);
-
- if (block && block->index) {
-
- buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
-
- btr_search_drop_page_hash_index(block);
- }
-
- mtr_commit(&mtr);
-}
-
-/********************************************************************//**
-Builds a hash index on a page with the given parameters. If the page already
-has a hash index with different parameters, the old hash index is removed.
-If index is non-NULL, this function checks if n_fields and n_bytes are
-sensible values, and does not build a hash index if not. */
-static
-void
-btr_search_build_page_hash_index(
-/*=============================*/
- dict_index_t* index, /*!< in: index for which to build */
- buf_block_t* block, /*!< in: index page, s- or x-latched */
- ulint n_fields,/*!< in: hash this many full fields */
- ulint n_bytes,/*!< in: hash this many bytes from the next
- field */
- ibool left_side)/*!< in: hash for searches from left side? */
-{
- hash_table_t* table;
- page_t* page;
- rec_t* rec;
- rec_t* next_rec;
- ulint fold;
- ulint next_fold;
- ulint n_cached;
- ulint n_recs;
- ulint* folds;
- rec_t** recs;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(index);
- ut_a(!dict_index_is_ibuf(index));
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX));
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(btr_search_get_latch(index));
-
- if (!btr_search_enabled) {
- rw_lock_s_unlock(btr_search_get_latch(index));
- return;
- }
-
- table = btr_search_get_hash_table(index);
- page = buf_block_get_frame(block);
-
- if (block->index && ((block->curr_n_fields != n_fields)
- || (block->curr_n_bytes != n_bytes)
- || (block->curr_left_side != left_side))) {
-
- rw_lock_s_unlock(btr_search_get_latch(index));
-
- btr_search_drop_page_hash_index(block);
- } else {
- rw_lock_s_unlock(btr_search_get_latch(index));
- }
-
- n_recs = page_get_n_recs(page);
-
- if (n_recs == 0) {
-
- return;
- }
-
- /* Check that the values for hash index build are sensible */
-
- if (n_fields + n_bytes == 0) {
-
- return;
- }
-
- if (dict_index_get_n_unique_in_tree(index) < n_fields
- || (dict_index_get_n_unique_in_tree(index) == n_fields
- && n_bytes > 0)) {
- return;
- }
-
- /* Calculate and cache fold values and corresponding records into
- an array for fast insertion to the hash index */
-
- folds = (ulint*) mem_alloc(n_recs * sizeof(ulint));
- recs = (rec_t**) mem_alloc(n_recs * sizeof(rec_t*));
-
- n_cached = 0;
-
- ut_a(index->id == btr_page_get_index_id(page));
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
-
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
-
- if (!page_rec_is_supremum(rec)) {
- ut_a(n_fields <= rec_offs_n_fields(offsets));
-
- if (n_bytes > 0) {
- ut_a(n_fields < rec_offs_n_fields(offsets));
- }
- }
-
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
-
- if (left_side) {
-
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
-
- for (;;) {
- next_rec = page_rec_get_next(rec);
-
- if (page_rec_is_supremum(next_rec)) {
-
- if (!left_side) {
-
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
-
- break;
- }
-
- offsets = rec_get_offsets(next_rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- next_fold = rec_fold(next_rec, offsets, n_fields,
- n_bytes, index->id);
-
- if (fold != next_fold) {
- /* Insert an entry into the hash index */
-
- if (left_side) {
-
- folds[n_cached] = next_fold;
- recs[n_cached] = next_rec;
- n_cached++;
- } else {
- folds[n_cached] = fold;
- recs[n_cached] = rec;
- n_cached++;
- }
- }
-
- rec = next_rec;
- fold = next_fold;
- }
-
- btr_search_check_free_space_in_heap(index);
-
- rw_lock_x_lock(btr_search_get_latch(index));
-
- if (UNIV_UNLIKELY(!btr_search_enabled)) {
- goto exit_func;
- }
-
- if (block->index && ((block->curr_n_fields != n_fields)
- || (block->curr_n_bytes != n_bytes)
- || (block->curr_left_side != left_side))) {
- goto exit_func;
- }
-
- /* This counter is decremented every time we drop page
- hash index entries and is incremented here. Since we can
- rebuild hash index for a page that is already hashed, we
- have to take care not to increment the counter in that
- case. */
- if (!block->index) {
- index->search_info->ref_count++;
- }
-
- block->n_hash_helps = 0;
-
- block->curr_n_fields = n_fields;
- block->curr_n_bytes = n_bytes;
- block->curr_left_side = left_side;
- block->index = index;
-
- for (i = 0; i < n_cached; i++) {
-
- ha_insert_for_fold(table, folds[i], block, recs[i]);
- }
-
- MONITOR_INC(MONITOR_ADAPTIVE_HASH_PAGE_ADDED);
- MONITOR_INC_VALUE(MONITOR_ADAPTIVE_HASH_ROW_ADDED, n_cached);
-exit_func:
- rw_lock_x_unlock(btr_search_get_latch(index));
-
- mem_free(folds);
- mem_free(recs);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
-void
-btr_search_move_or_delete_hash_entries(
-/*===================================*/
- buf_block_t* new_block, /*!< in: records are copied
- to this page */
- buf_block_t* block, /*!< in: index page from which
- records were copied, and the
- copied records will be deleted
- from this page */
- dict_index_t* index) /*!< in: record descriptor */
-{
- ulint n_fields;
- ulint n_bytes;
- ibool left_side;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
- ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(btr_search_get_latch(index));
-
- ut_a(!new_block->index || new_block->index == index);
- ut_a(!block->index || block->index == index);
- ut_a(!(new_block->index || block->index)
- || !dict_index_is_ibuf(index));
-
- if (new_block->index) {
-
- rw_lock_s_unlock(btr_search_get_latch(index));
-
- btr_search_drop_page_hash_index(block);
-
- return;
- }
-
- if (block->index) {
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- left_side = block->curr_left_side;
-
- new_block->n_fields = block->curr_n_fields;
- new_block->n_bytes = block->curr_n_bytes;
- new_block->left_side = left_side;
-
- rw_lock_s_unlock(btr_search_get_latch(index));
-
- ut_a(n_fields + n_bytes > 0);
-
- btr_search_build_page_hash_index(index, new_block, n_fields,
- n_bytes, left_side);
- ut_ad(n_fields == block->curr_n_fields);
- ut_ad(n_bytes == block->curr_n_bytes);
- ut_ad(left_side == block->curr_left_side);
- return;
- }
-
- rw_lock_s_unlock(btr_search_get_latch(index));
-}
-
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_delete(
-/*=============================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned on the
- record to delete using btr_cur_search_...,
- the record is not yet deleted */
-{
- hash_table_t* table;
- buf_block_t* block;
- const rec_t* rec;
- ulint fold;
- dict_index_t* index;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- mem_heap_t* heap = NULL;
- rec_offs_init(offsets_);
-
- block = btr_cur_get_block(cursor);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- index = block->index;
-
- if (!index) {
-
- return;
- }
-
- ut_a(index == cursor->index);
- ut_a(block->curr_n_fields + block->curr_n_bytes > 0);
- ut_a(!dict_index_is_ibuf(index));
-
- table = btr_search_get_hash_table(cursor->index);
-
- rec = btr_cur_get_rec(cursor);
-
- fold = rec_fold(rec, rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- block->curr_n_fields, block->curr_n_bytes, index->id);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- rw_lock_x_lock(btr_search_get_latch(cursor->index));
-
- if (block->index) {
- ut_a(block->index == index);
-
- if (ha_search_and_delete_if_found(table, fold, rec)) {
- MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_REMOVED);
- } else {
- MONITOR_INC(
- MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND);
- }
- }
-
- rw_lock_x_unlock(btr_search_get_latch(cursor->index));
-}
-
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-{
- hash_table_t* table;
- buf_block_t* block;
- dict_index_t* index;
- rec_t* rec;
-
- rec = btr_cur_get_rec(cursor);
-
- block = btr_cur_get_block(cursor);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- index = block->index;
-
- if (!index) {
-
- return;
- }
-
- ut_a(cursor->index == index);
- ut_a(!dict_index_is_ibuf(index));
-
- rw_lock_x_lock(btr_search_get_latch(cursor->index));
-
- if (!block->index) {
-
- goto func_exit;
- }
-
- ut_a(block->index == index);
-
- if ((cursor->flag == BTR_CUR_HASH)
- && (cursor->n_fields == block->curr_n_fields)
- && (cursor->n_bytes == block->curr_n_bytes)
- && !block->curr_left_side) {
-
- table = btr_search_get_hash_table(cursor->index);
-
- if (ha_search_and_update_if_found(
- table, cursor->fold, rec, block,
- page_rec_get_next(rec))) {
- MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_UPDATED);
- }
-
-func_exit:
- rw_lock_x_unlock(btr_search_get_latch(cursor->index));
- } else {
- rw_lock_x_unlock(btr_search_get_latch(cursor->index));
-
- btr_search_update_hash_on_insert(cursor);
- }
-}
-
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
- btr_cur_t* cursor) /*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-{
- hash_table_t* table;
- buf_block_t* block;
- dict_index_t* index;
- const rec_t* rec;
- const rec_t* ins_rec;
- const rec_t* next_rec;
- ulint fold;
- ulint ins_fold;
- ulint next_fold = 0; /* remove warning (??? bug ???) */
- ulint n_fields;
- ulint n_bytes;
- ibool left_side;
- ibool locked = FALSE;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- block = btr_cur_get_block(cursor);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- index = block->index;
-
- if (!index) {
-
- return;
- }
-
- btr_search_check_free_space_in_heap(cursor->index);
-
- table = btr_search_get_hash_table(cursor->index);
-
- rec = btr_cur_get_rec(cursor);
-
- ut_a(index == cursor->index);
- ut_a(!dict_index_is_ibuf(index));
-
- n_fields = block->curr_n_fields;
- n_bytes = block->curr_n_bytes;
- left_side = block->curr_left_side;
-
- ins_rec = page_rec_get_next_const(rec);
- next_rec = page_rec_get_next_const(ins_rec);
-
- offsets = rec_get_offsets(ins_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index->id);
-
- if (!page_rec_is_supremum(next_rec)) {
- offsets = rec_get_offsets(next_rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- next_fold = rec_fold(next_rec, offsets, n_fields,
- n_bytes, index->id);
- }
-
- if (!page_rec_is_infimum(rec)) {
- offsets = rec_get_offsets(rec, index, offsets,
- n_fields + (n_bytes > 0), &heap);
- fold = rec_fold(rec, offsets, n_fields, n_bytes, index->id);
- } else {
- if (left_side) {
-
- rw_lock_x_lock(btr_search_get_latch(index));
-
- locked = TRUE;
-
- if (!btr_search_enabled) {
- goto function_exit;
- }
-
- ha_insert_for_fold(table, ins_fold, block, ins_rec);
- }
-
- goto check_next_rec;
- }
-
- if (fold != ins_fold) {
-
- if (!locked) {
-
- rw_lock_x_lock(btr_search_get_latch(index));
-
- locked = TRUE;
-
- if (!btr_search_enabled) {
- goto function_exit;
- }
- }
-
- if (!left_side) {
- ha_insert_for_fold(table, fold, block, rec);
- } else {
- ha_insert_for_fold(table, ins_fold, block, ins_rec);
- }
- }
-
-check_next_rec:
- if (page_rec_is_supremum(next_rec)) {
-
- if (!left_side) {
-
- if (!locked) {
- rw_lock_x_lock(btr_search_get_latch(index));
-
- locked = TRUE;
-
- if (!btr_search_enabled) {
- goto function_exit;
- }
- }
-
- ha_insert_for_fold(table, ins_fold, block, ins_rec);
- }
-
- goto function_exit;
- }
-
- if (ins_fold != next_fold) {
-
- if (!locked) {
-
- rw_lock_x_lock(btr_search_get_latch(index));
-
- locked = TRUE;
-
- if (!btr_search_enabled) {
- goto function_exit;
- }
- }
-
- if (!left_side) {
-
- ha_insert_for_fold(table, ins_fold, block, ins_rec);
- /*
- fputs("Hash insert for ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " fold %lu\n", ins_fold);
- */
- } else {
- ha_insert_for_fold(table, next_fold, block, next_rec);
- }
- }
-
-function_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- if (locked) {
- rw_lock_x_unlock(btr_search_get_latch(index));
- }
-}
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/********************************************************************//**
-Validates one hash table in the search system.
-@return TRUE if ok */
-static
-ibool
-btr_search_validate_one_table(
-/*==========================*/
- ulint t)
-{
- ha_node_t* node;
- ulint n_page_dumps = 0;
- ibool ok = TRUE;
- ulint i;
- ulint cell_count;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- /* How many cells to check before temporarily releasing
- btr_search_latch. */
- ulint chunk_size = 10000;
-
- rec_offs_init(offsets_);
-
- cell_count = hash_get_n_cells(btr_search_sys->hash_tables[t]);
-
- for (i = 0; i < cell_count; i++) {
- /* We release btr_search_latch every once in a while to
- give other queries a chance to run. */
- if ((i != 0) && ((i % chunk_size) == 0)) {
- btr_search_x_unlock_all();
- os_thread_yield();
- btr_search_x_lock_all();
- }
-
- node = (ha_node_t*)
- hash_get_nth_cell(btr_search_sys->hash_tables[t],
- i)->node;
-
- for (; node != NULL; node = node->next) {
- buf_block_t* block
- = buf_block_align((byte*) node->data);
- const buf_block_t* hash_block;
- buf_pool_t* buf_pool;
- index_id_t page_index_id;
-
- buf_pool = buf_pool_from_bpage((buf_page_t *) block);
- /* Prevent BUF_BLOCK_FILE_PAGE -> BUF_BLOCK_REMOVE_HASH
- transition until we lock the block mutex */
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- if (UNIV_LIKELY(buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE)) {
-
- /* The space and offset are only valid
- for file blocks. It is possible that
- the block is being freed
- (BUF_BLOCK_REMOVE_HASH, see the
- assertion and the comment below) */
- hash_block = buf_block_hash_get(
- buf_pool,
- buf_block_get_space(block),
- buf_block_get_page_no(block));
- } else {
- hash_block = NULL;
- }
-
- if (hash_block) {
- ut_a(hash_block == block);
- } else {
- /* When a block is being freed,
- buf_LRU_search_and_free_block() first
- removes the block from
- buf_pool->page_hash by calling
- buf_LRU_block_remove_hashed_page().
- After that, it invokes
- btr_search_drop_page_hash_index() to
- remove the block from
- btr_search_sys->hash_index. */
-
- ut_a(buf_block_get_state(block)
- == BUF_BLOCK_REMOVE_HASH);
- }
-
- mutex_enter(&block->mutex);
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- ut_a(!dict_index_is_ibuf(block->index));
-
- page_index_id = btr_page_get_index_id(block->frame);
-
- offsets = rec_get_offsets(node->data,
- block->index, offsets,
- block->curr_n_fields
- + (block->curr_n_bytes > 0),
- &heap);
-
- if (!block->index || node->fold
- != rec_fold(node->data,
- offsets,
- block->curr_n_fields,
- block->curr_n_bytes,
- page_index_id)) {
- const page_t* page = block->frame;
-
- ok = FALSE;
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error in an adaptive hash"
- " index pointer to page %lu\n"
- "InnoDB: ptr mem address %p"
- " index id %llu,"
- " node fold %lu, rec fold %lu\n",
- (ulong) page_get_page_no(page),
- node->data,
- (ullint) page_index_id,
- (ulong) node->fold,
- (ulong) rec_fold(node->data,
- offsets,
- block->curr_n_fields,
- block->curr_n_bytes,
- page_index_id));
-
- fputs("InnoDB: Record ", stderr);
- rec_print_new(stderr, node->data, offsets);
- fprintf(stderr, "\nInnoDB: on that page."
- " Page mem address %p, is hashed %p,"
- " n fields %lu, n bytes %lu\n"
- "InnoDB: side %lu\n",
- (void*) page, (void*) block->index,
- (ulong) block->curr_n_fields,
- (ulong) block->curr_n_bytes,
- (ulong) block->curr_left_side);
-
- if (n_page_dumps < 20) {
- buf_page_print(
- page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- n_page_dumps++;
- }
- }
-
- mutex_exit(&block->mutex);
- }
- }
-
- for (i = 0; i < cell_count; i += chunk_size) {
- ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1);
-
- /* We release btr_search_latch every once in a while to
- give other queries a chance to run. */
- if (i != 0) {
- btr_search_x_unlock_all();
- os_thread_yield();
- btr_search_x_lock_all();
- }
-
- if (!ha_validate(btr_search_sys->hash_tables[t], i,
- end_index)) {
- ok = FALSE;
- }
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(ok);
-}
-
-/********************************************************************//**
-Validates the search system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_search_validate(void)
-/*=====================*/
-{
- ulint i;
- ibool ok = TRUE;
-
- btr_search_x_lock_all();
-
- for (i = 0; i < btr_search_index_num; i++) {
-
- if (!btr_search_validate_one_table(i))
- ok = FALSE;
- }
-
- btr_search_x_unlock_all();
-
- return(ok);
-}
-
-
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
diff --git a/storage/xtradb/buf/buf0buddy.cc b/storage/xtradb/buf/buf0buddy.cc
deleted file mode 100644
index 2ee39c6c992..00000000000
--- a/storage/xtradb/buf/buf0buddy.cc
+++ /dev/null
@@ -1,741 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0buddy.cc
-Binary buddy allocator for compressed pages
-
-Created December 2006 by Marko Makela
-*******************************************************/
-
-#define THIS_MODULE
-#include "buf0buddy.h"
-#ifdef UNIV_NONINL
-# include "buf0buddy.ic"
-#endif
-#undef THIS_MODULE
-#include "buf0buf.h"
-#include "buf0lru.h"
-#include "buf0flu.h"
-#include "page0zip.h"
-#include "srv0start.h"
-
-/** When freeing a buf we attempt to coalesce by looking at its buddy
-and deciding whether it is free or not. To ascertain if the buddy is
-free we look for BUF_BUDDY_STAMP_FREE at BUF_BUDDY_STAMP_OFFSET
-within the buddy. The question is how we can be sure that it is
-safe to look at BUF_BUDDY_STAMP_OFFSET.
-The answer lies in following invariants:
-* All blocks allocated by buddy allocator are used for compressed
-page frame.
-* A compressed table always have space_id < SRV_LOG_SPACE_FIRST_ID
-* BUF_BUDDY_STAMP_OFFSET always points to the space_id field in
-a frame.
- -- The above is true because we look at these fields when the
- corresponding buddy block is free which implies that:
- * The block we are looking at must have an address aligned at
- the same size that its free buddy has. For example, if we have
- a free block of 8K then its buddy's address must be aligned at
- 8K as well.
- * It is possible that the block we are looking at may have been
- further divided into smaller sized blocks but its starting
- address must still remain the start of a page frame i.e.: it
- cannot be middle of a block. For example, if we have a free
- block of size 8K then its buddy may be divided into blocks
- of, say, 1K, 1K, 2K, 4K but the buddy's address will still be
- the starting address of first 1K compressed page.
- * What is important to note is that for any given block, the
- buddy's address cannot be in the middle of a larger block i.e.:
- in above example, our 8K block cannot have a buddy whose address
- is aligned on 8K but it is part of a larger 16K block.
-*/
-
-/** Offset within buf_buddy_free_t where free or non_free stamps
-are written.*/
-#define BUF_BUDDY_STAMP_OFFSET FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
-
-/** Value that we stamp on all buffers that are currently on the zip_free
-list. This value is stamped at BUF_BUDDY_STAMP_OFFSET offset */
-#define BUF_BUDDY_STAMP_FREE (SRV_LOG_SPACE_FIRST_ID)
-
-/** Stamp value for non-free buffers. Will be overwritten by a non-zero
-value by the consumer of the block */
-#define BUF_BUDDY_STAMP_NONFREE (0XFFFFFFFF)
-
-#if BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE
-# error "BUF_BUDDY_STAMP_FREE >= BUF_BUDDY_STAMP_NONFREE"
-#endif
-
-/** Return type of buf_buddy_is_free() */
-enum buf_buddy_state_t {
- BUF_BUDDY_STATE_FREE, /*!< If the buddy to completely free */
- BUF_BUDDY_STATE_USED, /*!< Buddy currently in used */
- BUF_BUDDY_STATE_PARTIALLY_USED/*!< Some sub-blocks in the buddy
- are in use */
-};
-
-#ifdef UNIV_DEBUG_VALGRIND
-/**********************************************************************//**
-Invalidate memory area that we won't access while page is free */
-UNIV_INLINE
-void
-buf_buddy_mem_invalid(
-/*==================*/
- buf_buddy_free_t* buf, /*!< in: block to check */
- ulint i) /*!< in: index of zip_free[] */
-{
- const size_t size = BUF_BUDDY_LOW << i;
- ut_ad(i <= BUF_BUDDY_SIZES);
-
- UNIV_MEM_ASSERT_W(buf, size);
- UNIV_MEM_INVALID(buf, size);
-}
-#else /* UNIV_DEBUG_VALGRIND */
-# define buf_buddy_mem_invalid(buf, i) ut_ad((i) <= BUF_BUDDY_SIZES)
-#endif /* UNIV_DEBUG_VALGRIND */
-
-/**********************************************************************//**
-Check if a buddy is stamped free.
-@return whether the buddy is free */
-UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
-bool
-buf_buddy_stamp_is_free(
-/*====================*/
- const buf_buddy_free_t* buf) /*!< in: block to check */
-{
- return(mach_read_from_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET)
- == BUF_BUDDY_STAMP_FREE);
-}
-
-/**********************************************************************//**
-Stamps a buddy free. */
-UNIV_INLINE
-void
-buf_buddy_stamp_free(
-/*=================*/
- buf_buddy_free_t* buf, /*!< in/out: block to stamp */
- ulint i) /*!< in: block size */
-{
- ut_d(memset(buf, static_cast<int>(i), BUF_BUDDY_LOW << i));
- buf_buddy_mem_invalid(buf, i);
- mach_write_to_4(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET,
- BUF_BUDDY_STAMP_FREE);
- buf->stamp.size = i;
-}
-
-/**********************************************************************//**
-Stamps a buddy nonfree.
-@param[in/out] buf block to stamp
-@param[in] i block size */
-#define buf_buddy_stamp_nonfree(buf, i) do { \
- buf_buddy_mem_invalid(buf, i); \
- memset(buf->stamp.bytes + BUF_BUDDY_STAMP_OFFSET, 0xff, 4); \
-} while (0)
-#if BUF_BUDDY_STAMP_NONFREE != 0xffffffff
-# error "BUF_BUDDY_STAMP_NONFREE != 0xffffffff"
-#endif
-
-/**********************************************************************//**
-Get the offset of the buddy of a compressed page frame.
-@return the buddy relative of page */
-UNIV_INLINE
-void*
-buf_buddy_get(
-/*==========*/
- byte* page, /*!< in: compressed page */
- ulint size) /*!< in: page size in bytes */
-{
- ut_ad(ut_is_2pow(size));
- ut_ad(size >= BUF_BUDDY_LOW);
- ut_ad(BUF_BUDDY_LOW <= UNIV_ZIP_SIZE_MIN);
- ut_ad(size < BUF_BUDDY_HIGH);
- ut_ad(BUF_BUDDY_HIGH == UNIV_PAGE_SIZE);
- ut_ad(!ut_align_offset(page, size));
-
- if (((ulint) page) & size) {
- return(page - size);
- } else {
- return(page + size);
- }
-}
-
-/** Validate a given zip_free list. */
-struct CheckZipFree {
- ulint i;
- CheckZipFree(ulint i) : i (i) {}
-
- void operator()(const buf_buddy_free_t* elem) const
- {
- ut_a(buf_buddy_stamp_is_free(elem));
- ut_a(elem->stamp.size <= i);
- }
-};
-
-#define BUF_BUDDY_LIST_VALIDATE(bp, i) \
- UT_LIST_VALIDATE(list, buf_buddy_free_t, \
- bp->zip_free[i], CheckZipFree(i))
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Debug function to validate that a buffer is indeed free i.e.: in the
-zip_free[].
-@return true if free */
-UNIV_INLINE
-bool
-buf_buddy_check_free(
-/*=================*/
- buf_pool_t* buf_pool,/*!< in: buffer pool instance */
- const buf_buddy_free_t* buf, /*!< in: block to check */
- ulint i) /*!< in: index of buf_pool->zip_free[] */
-{
- const ulint size = BUF_BUDDY_LOW << i;
-
- ut_ad(mutex_own(&buf_pool->zip_free_mutex));
- ut_ad(!ut_align_offset(buf, size));
- ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
-
- buf_buddy_free_t* itr;
-
- for (itr = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
- itr && itr != buf;
- itr = UT_LIST_GET_NEXT(list, itr)) {
- }
-
- return(itr == buf);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************************//**
-Checks if a buf is free i.e.: in the zip_free[].
-@retval BUF_BUDDY_STATE_FREE if fully free
-@retval BUF_BUDDY_STATE_USED if currently in use
-@retval BUF_BUDDY_STATE_PARTIALLY_USED if partially in use. */
-static MY_ATTRIBUTE((warn_unused_result))
-buf_buddy_state_t
-buf_buddy_is_free(
-/*==============*/
- buf_buddy_free_t* buf, /*!< in: block to check */
- ulint i) /*!< in: index of
- buf_pool->zip_free[] */
-{
-#ifdef UNIV_DEBUG
- const ulint size = BUF_BUDDY_LOW << i;
- ut_ad(!ut_align_offset(buf, size));
- ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
-#endif /* UNIV_DEBUG */
-
- /* We assume that all memory from buf_buddy_alloc()
- is used for compressed page frames. */
-
- /* We look inside the allocated objects returned by
- buf_buddy_alloc() and assume that each block is a compressed
- page that contains one of the following in space_id.
- * BUF_BUDDY_STAMP_FREE if the block is in a zip_free list or
- * BUF_BUDDY_STAMP_NONFREE if the block has been allocated but
- not initialized yet or
- * A valid space_id of a compressed tablespace
-
- The call below attempts to read from free memory. The memory
- is "owned" by the buddy allocator (and it has been allocated
- from the buffer pool), so there is nothing wrong about this. */
- if (!buf_buddy_stamp_is_free(buf)) {
- return(BUF_BUDDY_STATE_USED);
- }
-
- /* A block may be free but a fragment of it may still be in use.
- To guard against that we write the free block size in terms of
- zip_free index at start of stamped block. Note that we can
- safely rely on this value only if the buf is free. */
- ut_ad(buf->stamp.size <= i);
- return(buf->stamp.size == i
- ? BUF_BUDDY_STATE_FREE
- : BUF_BUDDY_STATE_PARTIALLY_USED);
-}
-
-/**********************************************************************//**
-Add a block to the head of the appropriate buddy free list. */
-UNIV_INLINE
-void
-buf_buddy_add_to_free(
-/*==================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_buddy_free_t* buf, /*!< in,own: block to be freed */
- ulint i) /*!< in: index of
- buf_pool->zip_free[] */
-{
- ut_ad(mutex_own(&buf_pool->zip_free_mutex));
- ut_ad(buf_pool->zip_free[i].start != buf);
-
- buf_buddy_stamp_free(buf, i);
- UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], buf);
- ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
-}
-
-/**********************************************************************//**
-Remove a block from the appropriate buddy free list. */
-UNIV_INLINE
-void
-buf_buddy_remove_from_free(
-/*=======================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_buddy_free_t* buf, /*!< in,own: block to be freed */
- ulint i) /*!< in: index of
- buf_pool->zip_free[] */
-{
- ut_ad(mutex_own(&buf_pool->zip_free_mutex));
- ut_ad(buf_buddy_check_free(buf_pool, buf, i));
-
- UT_LIST_REMOVE(list, buf_pool->zip_free[i], buf);
- buf_buddy_stamp_nonfree(buf, i);
-}
-
-/**********************************************************************//**
-Try to allocate a block from buf_pool->zip_free[].
-@return allocated block, or NULL if buf_pool->zip_free[] was empty */
-static
-buf_buddy_free_t*
-buf_buddy_alloc_zip(
-/*================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint i) /*!< in: index of buf_pool->zip_free[] */
-{
- buf_buddy_free_t* buf;
-
- ut_ad(mutex_own(&buf_pool->zip_free_mutex));
- ut_a(i < BUF_BUDDY_SIZES);
- ut_a(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
-
- ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
-
- buf = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
-
- if (buf) {
- buf_buddy_remove_from_free(buf_pool, buf, i);
- } else if (i + 1 < BUF_BUDDY_SIZES) {
- /* Attempt to split. */
- buf = buf_buddy_alloc_zip(buf_pool, i + 1);
-
- if (buf) {
- buf_buddy_free_t* buddy =
- reinterpret_cast<buf_buddy_free_t*>(
- buf->stamp.bytes
- + (BUF_BUDDY_LOW << i));
-
- ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
- buf_buddy_add_to_free(buf_pool, buddy, i);
- }
- }
-
- if (buf) {
- /* Trash the page other than the BUF_BUDDY_STAMP_NONFREE. */
- UNIV_MEM_TRASH(buf, ~i, BUF_BUDDY_STAMP_OFFSET);
- UNIV_MEM_TRASH(BUF_BUDDY_STAMP_OFFSET + 4
- + buf->stamp.bytes, ~i,
- (BUF_BUDDY_LOW << i)
- - (BUF_BUDDY_STAMP_OFFSET + 4));
- ut_ad(mach_read_from_4(buf->stamp.bytes
- + BUF_BUDDY_STAMP_OFFSET)
- == BUF_BUDDY_STAMP_NONFREE);
- }
-
- return(buf);
-}
-
-/**********************************************************************//**
-Deallocate a buffer frame of UNIV_PAGE_SIZE. */
-static
-void
-buf_buddy_block_free(
-/*=================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- void* buf) /*!< in: buffer frame to deallocate */
-{
- const ulint fold = BUF_POOL_ZIP_FOLD_PTR(buf);
- buf_page_t* bpage;
- buf_block_t* block;
-
- ut_ad(!mutex_own(&buf_pool->zip_mutex));
- ut_a(!ut_align_offset(buf, UNIV_PAGE_SIZE));
-
- mutex_enter(&buf_pool->zip_hash_mutex);
-
- HASH_SEARCH(hash, buf_pool->zip_hash, fold, buf_page_t*, bpage,
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY
- && bpage->in_zip_hash && !bpage->in_page_hash),
- ((buf_block_t*) bpage)->frame == buf);
- ut_a(bpage);
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_MEMORY);
- ut_ad(!bpage->in_page_hash);
- ut_ad(bpage->in_zip_hash);
- ut_d(bpage->in_zip_hash = FALSE);
- HASH_DELETE(buf_page_t, hash, buf_pool->zip_hash, fold, bpage);
-
- mutex_exit(&buf_pool->zip_hash_mutex);
-
- ut_d(memset(buf, 0, UNIV_PAGE_SIZE));
- UNIV_MEM_INVALID(buf, UNIV_PAGE_SIZE);
-
- block = (buf_block_t*) bpage;
- mutex_enter(&block->mutex);
- buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
-
- ut_ad(buf_pool->buddy_n_frames > 0);
- ut_d(buf_pool->buddy_n_frames--);
-}
-
-/**********************************************************************//**
-Allocate a buffer block to the buddy allocator. */
-static
-void
-buf_buddy_block_register(
-/*=====================*/
- buf_block_t* block) /*!< in: buffer frame to allocate */
-{
- buf_pool_t* buf_pool = buf_pool_from_block(block);
- const ulint fold = BUF_POOL_ZIP_FOLD(block);
- ut_ad(!mutex_own(&buf_pool->zip_mutex));
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_READY_FOR_USE);
-
- buf_block_set_state(block, BUF_BLOCK_MEMORY);
-
- ut_a(block->frame);
- ut_a(!ut_align_offset(block->frame, UNIV_PAGE_SIZE));
-
- ut_ad(!block->page.in_page_hash);
- ut_ad(!block->page.in_zip_hash);
- ut_d(block->page.in_zip_hash = TRUE);
-
- mutex_enter(&buf_pool->zip_hash_mutex);
- HASH_INSERT(buf_page_t, hash, buf_pool->zip_hash, fold, &block->page);
- mutex_exit(&buf_pool->zip_hash_mutex);
-
- ut_d(buf_pool->buddy_n_frames++);
-}
-
-/**********************************************************************//**
-Allocate a block from a bigger object.
-@return allocated block */
-static
-void*
-buf_buddy_alloc_from(
-/*=================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- void* buf, /*!< in: a block that is free to use */
- ulint i, /*!< in: index of
- buf_pool->zip_free[] */
- ulint j) /*!< in: size of buf as an index
- of buf_pool->zip_free[] */
-{
- ulint offs = BUF_BUDDY_LOW << j;
- ut_ad(mutex_own(&buf_pool->zip_free_mutex));
- ut_ad(j <= BUF_BUDDY_SIZES);
- ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
- ut_ad(j >= i);
- ut_ad(!ut_align_offset(buf, offs));
-
- /* Add the unused parts of the block to the free lists. */
- while (j > i) {
- buf_buddy_free_t* zip_buf;
-
- offs >>= 1;
- j--;
-
- zip_buf = reinterpret_cast<buf_buddy_free_t*>(
- reinterpret_cast<byte*>(buf) + offs);
- buf_buddy_add_to_free(buf_pool, zip_buf, j);
- }
-
- buf_buddy_stamp_nonfree(reinterpret_cast<buf_buddy_free_t*>(buf), i);
- return(buf);
-}
-
-/**********************************************************************//**
-Allocate a block. The thread calling this function must hold
-buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
-block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired.
-@return allocated block, never NULL */
-UNIV_INTERN
-void*
-buf_buddy_alloc_low(
-/*================*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- ulint i, /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
- ibool* lru) /*!< in: pointer to a variable that
- will be assigned TRUE if storage was
- allocated from the LRU list and
- buf_pool->LRU_list_mutex was
- temporarily released */
-{
- buf_block_t* block;
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(!mutex_own(&buf_pool->zip_mutex));
- ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
-
- if (i < BUF_BUDDY_SIZES) {
- /* Try to allocate from the buddy system. */
- mutex_enter(&buf_pool->zip_free_mutex);
- block = (buf_block_t*) buf_buddy_alloc_zip(buf_pool, i);
-
- if (block) {
- goto func_exit;
- }
- mutex_exit(&buf_pool->zip_free_mutex);
- }
-
- /* Try allocating from the buf_pool->free list. */
- block = buf_LRU_get_free_only(buf_pool);
-
- if (block) {
-
- goto alloc_big;
- }
-
- /* Try replacing an uncompressed page in the buffer pool. */
- mutex_exit(&buf_pool->LRU_list_mutex);
- block = buf_LRU_get_free_block(buf_pool);
- *lru = TRUE;
- mutex_enter(&buf_pool->LRU_list_mutex);
-
-alloc_big:
- buf_buddy_block_register(block);
-
- mutex_enter(&buf_pool->zip_free_mutex);
- block = (buf_block_t*) buf_buddy_alloc_from(
- buf_pool, block->frame, i, BUF_BUDDY_SIZES);
-
-func_exit:
- buf_pool->buddy_stat[i].used++;
- mutex_exit(&buf_pool->zip_free_mutex);
-
- return(block);
-}
-
-/**********************************************************************//**
-Try to relocate a block. The caller must hold zip_free_mutex, and this
-function will release and lock it again.
-@return true if relocated */
-static
-bool
-buf_buddy_relocate(
-/*===============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- void* src, /*!< in: block to relocate */
- void* dst, /*!< in: free block to relocate to */
- ulint i) /*!< in: index of
- buf_pool->zip_free[] */
-{
- buf_page_t* bpage;
- const ulint size = BUF_BUDDY_LOW << i;
- ulint space;
- ulint offset;
-
- ut_ad(mutex_own(&buf_pool->zip_free_mutex));
- ut_ad(!mutex_own(&buf_pool->zip_mutex));
- ut_ad(!ut_align_offset(src, size));
- ut_ad(!ut_align_offset(dst, size));
- ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
- UNIV_MEM_ASSERT_W(dst, size);
-
- space = mach_read_from_4((const byte*) src
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- offset = mach_read_from_4((const byte*) src
- + FIL_PAGE_OFFSET);
-
- /* Suppress Valgrind warnings about conditional jump
- on uninitialized value. */
- UNIV_MEM_VALID(&space, sizeof space);
- UNIV_MEM_VALID(&offset, sizeof offset);
-
- ut_ad(space != BUF_BUDDY_STAMP_FREE);
-
- mutex_exit(&buf_pool->zip_free_mutex);
-
- ulint fold = buf_page_address_fold(space, offset);
- prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
- rw_lock_x_lock(hash_lock);
-
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
- if (!bpage || bpage->zip.data != src) {
- /* The block has probably been freshly
- allocated by buf_LRU_get_free_block() but not
- added to buf_pool->page_hash yet. Obviously,
- it cannot be relocated. */
-
- rw_lock_x_unlock(hash_lock);
-
- mutex_enter(&buf_pool->zip_free_mutex);
- return(false);
- }
-
- if (page_zip_get_size(&bpage->zip) != size) {
- /* The block is of different size. We would
- have to relocate all blocks covered by src.
- For the sake of simplicity, give up. */
- ut_ad(page_zip_get_size(&bpage->zip) < size);
-
- rw_lock_x_unlock(hash_lock);
-
- mutex_enter(&buf_pool->zip_free_mutex);
- return(false);
- }
-
- /* The block must have been allocated, but it may
- contain uninitialized data. */
- UNIV_MEM_ASSERT_W(src, size);
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- mutex_enter(&buf_pool->zip_free_mutex);
-
- if (buf_page_can_relocate(bpage)) {
- /* Relocate the compressed page. */
- ullint usec = ut_time_us(NULL);
-
- ut_a(bpage->zip.data == src);
-
- /* Note: This is potentially expensive, we need a better
- solution here. We go with correctness for now. */
- ::memcpy(dst, src, size);
-
- bpage->zip.data = reinterpret_cast<page_zip_t*>(dst);
-
- rw_lock_x_unlock(hash_lock);
-
- mutex_exit(block_mutex);
-
- buf_buddy_mem_invalid(
- reinterpret_cast<buf_buddy_free_t*>(src), i);
-
- buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i];
-
- ++buddy_stat->relocated;
-
- buddy_stat->relocated_usec += ut_time_us(NULL) - usec;
-
- return(true);
- }
-
- rw_lock_x_unlock(hash_lock);
-
- mutex_exit(block_mutex);
-
- return(false);
-}
-
-/**********************************************************************//**
-Deallocate a block. */
-UNIV_INTERN
-void
-buf_buddy_free_low(
-/*===============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
- ulint i) /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
-{
- buf_buddy_free_t* buddy;
-
- ut_ad(!mutex_own(&buf_pool->zip_mutex));
- ut_ad(i <= BUF_BUDDY_SIZES);
- ut_ad(i >= buf_buddy_get_slot(UNIV_ZIP_SIZE_MIN));
-
- mutex_enter(&buf_pool->zip_free_mutex);
-
- ut_ad(buf_pool->buddy_stat[i].used > 0);
- buf_pool->buddy_stat[i].used--;
-recombine:
- UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
-
- if (i == BUF_BUDDY_SIZES) {
- mutex_exit(&buf_pool->zip_free_mutex);
- buf_buddy_block_free(buf_pool, buf);
- return;
- }
-
- ut_ad(i < BUF_BUDDY_SIZES);
- ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
- ut_ad(!buf_pool_contains_zip(buf_pool, buf));
-
- /* Do not recombine blocks if there are few free blocks.
- We may waste up to 15360*max_len bytes to free blocks
- (1024 + 2048 + 4096 + 8192 = 15360) */
- if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
- goto func_exit;
- }
-
- /* Try to combine adjacent blocks. */
- buddy = reinterpret_cast<buf_buddy_free_t*>(
- buf_buddy_get(reinterpret_cast<byte*>(buf),
- BUF_BUDDY_LOW << i));
-
- switch (buf_buddy_is_free(buddy, i)) {
- case BUF_BUDDY_STATE_FREE:
- /* The buddy is free: recombine */
- buf_buddy_remove_from_free(buf_pool, buddy, i);
-buddy_is_free:
- ut_ad(!buf_pool_contains_zip(buf_pool, buddy));
- i++;
- buf = ut_align_down(buf, BUF_BUDDY_LOW << i);
-
- goto recombine;
-
- case BUF_BUDDY_STATE_USED:
- ut_d(BUF_BUDDY_LIST_VALIDATE(buf_pool, i));
-
- /* The buddy is not free. Is there a free block of
- this size? */
- if (buf_buddy_free_t* zip_buf =
- UT_LIST_GET_FIRST(buf_pool->zip_free[i])) {
-
- /* Remove the block from the free list, because
- a successful buf_buddy_relocate() will overwrite
- zip_free->list. */
- buf_buddy_remove_from_free(buf_pool, zip_buf, i);
-
- /* Try to relocate the buddy of buf to the free
- block. */
- if (buf_buddy_relocate(buf_pool, buddy, zip_buf, i)) {
-
- goto buddy_is_free;
- }
-
- buf_buddy_add_to_free(buf_pool, zip_buf, i);
- }
-
- break;
- case BUF_BUDDY_STATE_PARTIALLY_USED:
- /* Some sub-blocks in the buddy are still in use.
- Relocation will fail. No need to try. */
- break;
- }
-
-func_exit:
- /* Free the block to the buddy list. */
- buf_buddy_add_to_free(buf_pool,
- reinterpret_cast<buf_buddy_free_t*>(buf),
- i);
- mutex_exit(&buf_pool->zip_free_mutex);
-}
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
deleted file mode 100644
index 01bec11d2ed..00000000000
--- a/storage/xtradb/buf/buf0buf.cc
+++ /dev/null
@@ -1,6504 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0buf.cc
-The database buffer buf_pool
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0buf.h"
-
-#ifdef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#include "mem0mem.h"
-#include "btr0btr.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#ifndef UNIV_HOTBACKUP
-#include "buf0buddy.h"
-#include "lock0lock.h"
-#include "btr0sea.h"
-#include "ibuf0ibuf.h"
-#include "trx0undo.h"
-#include "log0log.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "srv0srv.h"
-#include "dict0dict.h"
-#include "log0recv.h"
-#include "page0zip.h"
-#include "srv0mon.h"
-#include "buf0checksum.h"
-#ifdef HAVE_LIBNUMA
-#include <numa.h>
-#include <numaif.h>
-#endif // HAVE_LIBNUMA
-#include "trx0trx.h"
-#include "srv0start.h"
-#include "ut0byte.h"
-#include "fil0pagecompress.h"
-#include "ha_prototypes.h"
-
-#ifdef UNIV_LINUX
-#include <stdlib.h>
-#endif
-
-#ifdef HAVE_LZO
-#include "lzo/lzo1x.h"
-#endif
-
-#ifdef HAVE_SNAPPY
-#include "snappy-c.h"
-#endif
-
-/** Decrypt a page.
-@param[in,out] bpage Page control block
-@param[in,out] space tablespace
-@return whether the operation was successful */
-static
-bool
-buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
- MY_ATTRIBUTE((nonnull));
-
-/********************************************************************//**
-Mark a table with the specified space pointed by bpage->space corrupted.
-Also remove the bpage from LRU list.
-@param[in,out] bpage Block */
-static
-void
-buf_mark_space_corrupt(
- buf_page_t* bpage);
-
-/* prototypes for new functions added to ha_innodb.cc */
-trx_t* innobase_get_trx();
-
-inline void* aligned_malloc(size_t size, size_t align) {
- void *result;
-#ifdef _MSC_VER
- result = _aligned_malloc(size, align);
-#else
- if(posix_memalign(&result, align, size)) {
- result = 0;
- }
-#endif
- return result;
-}
-
-inline void aligned_free(void *ptr) {
-#ifdef _MSC_VER
- _aligned_free(ptr);
-#else
- free(ptr);
-#endif
-}
-
-static inline
-void
-_increment_page_get_statistics(buf_block_t* block, trx_t* trx)
-{
- ulint block_hash;
- ulint block_hash_byte;
- byte block_hash_offset;
-
- ut_ad(block);
- ut_ad(trx && trx->take_stats);
-
- if (!trx->distinct_page_access_hash) {
- trx->distinct_page_access_hash
- = static_cast<byte *>(mem_alloc(DPAH_SIZE));
- memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
- }
-
- block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space +
- block->page.offset, DPAH_SIZE << 3);
- block_hash_byte = block_hash >> 3;
- block_hash_offset = (byte) block_hash & 0x07;
- if (block_hash_byte >= DPAH_SIZE)
- fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
- if (block_hash_offset > 7)
- fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
- if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0)
- trx->distinct_page_access++;
- trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset;
- return;
-}
-
-/*
- IMPLEMENTATION OF THE BUFFER POOL
- =================================
-
-Performance improvement:
-------------------------
-Thread scheduling in NT may be so slow that the OS wait mechanism should
-not be used even in waiting for disk reads to complete.
-Rather, we should put waiting query threads to the queue of
-waiting jobs, and let the OS thread do something useful while the i/o
-is processed. In this way we could remove most OS thread switches in
-an i/o-intensive benchmark like TPC-C.
-
-A possibility is to put a user space thread library between the database
-and NT. User space thread libraries might be very fast.
-
-SQL Server 7.0 can be configured to use 'fibers' which are lightweight
-threads in NT. These should be studied.
-
- Buffer frames and blocks
- ------------------------
-Following the terminology of Gray and Reuter, we call the memory
-blocks where file pages are loaded buffer frames. For each buffer
-frame there is a control block, or shortly, a block, in the buffer
-control array. The control info which does not need to be stored
-in the file along with the file page, resides in the control block.
-
- Buffer pool struct
- ------------------
-The buffer buf_pool contains several mutexes which protect all the
-control data structures of the buf_pool. The content of a buffer frame is
-protected by a separate read-write lock in its control block, though.
-
- Control blocks
- --------------
-
-The control block contains, for instance, the bufferfix count
-which is incremented when a thread wants a file page to be fixed
-in a buffer frame. The bufferfix operation does not lock the
-contents of the frame, however. For this purpose, the control
-block contains a read-write lock.
-
-The buffer frames have to be aligned so that the start memory
-address of a frame is divisible by the universal page size, which
-is a power of two.
-
-We intend to make the buffer buf_pool size on-line reconfigurable,
-that is, the buf_pool size can be changed without closing the database.
-Then the database administarator may adjust it to be bigger
-at night, for example. The control block array must
-contain enough control blocks for the maximum buffer buf_pool size
-which is used in the particular database.
-If the buf_pool size is cut, we exploit the virtual memory mechanism of
-the OS, and just refrain from using frames at high addresses. Then the OS
-can swap them to disk.
-
-The control blocks containing file pages are put to a hash table
-according to the file address of the page.
-We could speed up the access to an individual page by using
-"pointer swizzling": we could replace the page references on
-non-leaf index pages by direct pointers to the page, if it exists
-in the buf_pool. We could make a separate hash table where we could
-chain all the page references in non-leaf pages residing in the buf_pool,
-using the page reference as the hash key,
-and at the time of reading of a page update the pointers accordingly.
-Drawbacks of this solution are added complexity and,
-possibly, extra space required on non-leaf pages for memory pointers.
-A simpler solution is just to speed up the hash table mechanism
-in the database, using tables whose size is a power of 2.
-
- Lists of blocks
- ---------------
-
-There are several lists of control blocks.
-
-The free list (buf_pool->free) contains blocks which are currently not
-used.
-
-The common LRU list contains all the blocks holding a file page
-except those for which the bufferfix count is non-zero.
-The pages are in the LRU list roughly in the order of the last
-access to the page, so that the oldest pages are at the end of the
-list. We also keep a pointer to near the end of the LRU list,
-which we can use when we want to artificially age a page in the
-buf_pool. This is used if we know that some page is not needed
-again for some time: we insert the block right after the pointer,
-causing it to be replaced sooner than would normally be the case.
-Currently this aging mechanism is used for read-ahead mechanism
-of pages, and it can also be used when there is a scan of a full
-table which cannot fit in the memory. Putting the pages near the
-end of the LRU list, we make sure that most of the buf_pool stays
-in the main memory, undisturbed.
-
-The unzip_LRU list contains a subset of the common LRU list. The
-blocks on the unzip_LRU list hold a compressed file page and the
-corresponding uncompressed page frame. A block is in unzip_LRU if and
-only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
-holds. The blocks in unzip_LRU will be in same order as they are in
-the common LRU list. That is, each manipulation of the common LRU
-list will result in the same manipulation of the unzip_LRU list.
-
-The chain of modified blocks (buf_pool->flush_list) contains the blocks
-holding file pages that have been modified in the memory
-but not written to disk yet. The block with the oldest modification
-which has not yet been written to disk is at the end of the chain.
-The access to this list is protected by buf_pool->flush_list_mutex.
-
-The chain of unmodified compressed blocks (buf_pool->zip_clean)
-contains the control blocks (buf_page_t) of those compressed pages
-that are not in buf_pool->flush_list and for which no uncompressed
-page has been allocated in the buffer pool. The control blocks for
-uncompressed pages are accessible via buf_block_t objects that are
-reachable via buf_pool->chunks[].
-
-The chains of free memory blocks (buf_pool->zip_free[]) are used by
-the buddy allocator (buf0buddy.cc) to keep track of currently unused
-memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
-blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
-BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
-pool. The buddy allocator is solely used for allocating control
-blocks for compressed pages (buf_page_t) and compressed page frames.
-
- Loading a file page
- -------------------
-
-First, a victim block for replacement has to be found in the
-buf_pool. It is taken from the free list or searched for from the
-end of the LRU-list. An exclusive lock is reserved for the frame,
-the io_fix field is set in the block fixing the block in buf_pool,
-and the io-operation for loading the page is queued. The io-handler thread
-releases the X-lock on the frame and resets the io_fix field
-when the io operation completes.
-
-A thread may request the above operation using the function
-buf_page_get(). It may then continue to request a lock on the frame.
-The lock is granted when the io-handler releases the x-lock.
-
- Read-ahead
- ----------
-
-The read-ahead mechanism is intended to be intelligent and
-isolated from the semantically higher levels of the database
-index management. From the higher level we only need the
-information if a file page has a natural successor or
-predecessor page. On the leaf level of a B-tree index,
-these are the next and previous pages in the natural
-order of the pages.
-
-Let us first explain the read-ahead mechanism when the leafs
-of a B-tree are scanned in an ascending or descending order.
-When a read page is the first time referenced in the buf_pool,
-the buffer manager checks if it is at the border of a so-called
-linear read-ahead area. The tablespace is divided into these
-areas of size 64 blocks, for example. So if the page is at the
-border of such an area, the read-ahead mechanism checks if
-all the other blocks in the area have been accessed in an
-ascending or descending order. If this is the case, the system
-looks at the natural successor or predecessor of the page,
-checks if that is at the border of another area, and in this case
-issues read-requests for all the pages in that area. Maybe
-we could relax the condition that all the pages in the area
-have to be accessed: if data is deleted from a table, there may
-appear holes of unused pages in the area.
-
-A different read-ahead mechanism is used when there appears
-to be a random access pattern to a file.
-If a new page is referenced in the buf_pool, and several pages
-of its random access area (for instance, 32 consecutive pages
-in a tablespace) have recently been referenced, we may predict
-that the whole area may be needed in the near future, and issue
-the read requests for the whole area.
-*/
-
-#ifndef UNIV_HOTBACKUP
-/** Value in microseconds */
-static const int WAIT_FOR_READ = 100;
-/** Number of attemtps made to read in a page in the buffer pool */
-static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
-
-/** The buffer pools of the database */
-UNIV_INTERN buf_pool_t* buf_pool_ptr;
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
- operations in execution in the
- debug version */
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_DEBUG
-/** If this is set TRUE, the program prints info whenever
-read-ahead or flush occurs */
-UNIV_INTERN ibool buf_debug_prints = FALSE;
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_PFS_RWLOCK
-/* Keys to register buffer block related rwlocks and mutexes with
-performance schema */
-UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
-# ifdef UNIV_SYNC_DEBUG
-UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
-UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
-UNIV_INTERN mysql_pfs_key_t buf_pool_flush_state_mutex_key;
-UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
-UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
-UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
-UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
-UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
-# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
-
-/* Buffer block mutexes and rwlocks can be registered
-in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
-is defined, register buffer block mutex and rwlock
-in one group after their initialization. */
-# define PFS_GROUP_BUFFER_SYNC
-
-/* This define caps the number of mutexes/rwlocks can
-be registered with performance schema. Developers can
-modify this define if necessary. Please note, this would
-be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
-# define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
-
-# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
-#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
-
-/** Macro to determine whether the read of write counter is used depending
-on the io_type */
-#define MONITOR_RW_COUNTER(io_type, counter) \
- ((io_type == BUF_IO_READ) \
- ? (counter##_READ) \
- : (counter##_WRITTEN))
-
-/********************************************************************//**
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-zero if all modified pages have been flushed to disk.
-@return oldest modification in pool, zero if none */
-UNIV_INTERN
-lsn_t
-buf_pool_get_oldest_modification(void)
-/*==================================*/
-{
- ulint i;
- buf_page_t* bpage;
- lsn_t lsn = 0;
- lsn_t oldest_lsn = 0;
-
- /* When we traverse all the flush lists we don't want another
- thread to add a dirty page to any flush list. */
- if (srv_buf_pool_instances > 1)
- log_flush_order_mutex_enter();
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- buf_flush_list_mutex_enter(buf_pool);
-
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-
- if (bpage != NULL) {
- ut_ad(bpage->in_flush_list);
- lsn = bpage->oldest_modification;
- }
-
- buf_flush_list_mutex_exit(buf_pool);
-
- if (!oldest_lsn || oldest_lsn > lsn) {
- oldest_lsn = lsn;
- }
- }
-
- if (srv_buf_pool_instances > 1)
- log_flush_order_mutex_exit();
-
- /* The returned answer may be out of date: the flush_list can
- change after the mutex has been released. */
-
- return(oldest_lsn);
-}
-
-/********************************************************************//**
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-zero if all modified pages have been flushed to disk.
-@return oldest modification in pool, zero if none */
-UNIV_INTERN
-lsn_t
-buf_pool_get_oldest_modification_peek(void)
-/*=======================================*/
-{
- ulint i;
- buf_page_t* bpage;
- lsn_t lsn = 0;
- lsn_t oldest_lsn = 0;
-
- /* Dirsty read to buffer pool array */
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- buf_flush_list_mutex_enter(buf_pool);
-
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-
- if (bpage != NULL) {
- ut_ad(bpage->in_flush_list);
- lsn = bpage->oldest_modification;
- }
-
- buf_flush_list_mutex_exit(buf_pool);
-
- if (!oldest_lsn || oldest_lsn > lsn) {
- oldest_lsn = lsn;
- }
- }
-
- /* The returned answer may be out of date: the flush_list can
- change after the mutex has been released. */
-
- return(oldest_lsn);
-}
-
-/********************************************************************//**
-Get total buffer pool statistics. */
-UNIV_INTERN
-void
-buf_get_total_list_len(
-/*===================*/
- ulint* LRU_len, /*!< out: length of all LRU lists */
- ulint* free_len, /*!< out: length of all free lists */
- ulint* flush_list_len) /*!< out: length of all flush lists */
-{
- ulint i;
-
- *LRU_len = 0;
- *free_len = 0;
- *flush_list_len = 0;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
- *free_len += UT_LIST_GET_LEN(buf_pool->free);
- *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
- }
-}
-
-/********************************************************************//**
-Get total list size in bytes from all buffer pools. */
-UNIV_INTERN
-void
-buf_get_total_list_size_in_bytes(
-/*=============================*/
- buf_pools_list_size_t* buf_pools_list_size) /*!< out: list sizes
- in all buffer pools */
-{
- ut_ad(buf_pools_list_size);
- memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size));
-
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- /* We don't need mutex protection since this is
- for statistics purpose */
- buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
- buf_pools_list_size->unzip_LRU_bytes +=
- UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
- buf_pools_list_size->flush_list_bytes +=
- buf_pool->stat.flush_list_bytes;
- }
-}
-
-/********************************************************************//**
-Get total buffer pool statistics. */
-UNIV_INTERN
-void
-buf_get_total_stat(
-/*===============*/
- buf_pool_stat_t* tot_stat) /*!< out: buffer pool stats */
-{
- ulint i;
-
- memset(tot_stat, 0, sizeof(*tot_stat));
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_stat_t*buf_stat;
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- buf_stat = &buf_pool->stat;
- tot_stat->n_page_gets += buf_stat->n_page_gets;
- tot_stat->n_pages_read += buf_stat->n_pages_read;
- tot_stat->n_pages_written += buf_stat->n_pages_written;
- tot_stat->n_pages_created += buf_stat->n_pages_created;
- tot_stat->n_ra_pages_read_rnd += buf_stat->n_ra_pages_read_rnd;
- tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
- tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
- tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
-
- tot_stat->n_pages_not_made_young +=
- buf_stat->n_pages_not_made_young;
- }
-}
-
-/********************************************************************//**
-Allocates a buffer block.
-@return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INTERN
-buf_block_t*
-buf_block_alloc(
-/*============*/
- buf_pool_t* buf_pool) /*!< in/out: buffer pool instance,
- or NULL for round-robin selection
- of the buffer pool */
-{
- buf_block_t* block;
- ulint index;
- static ulint buf_pool_index;
-
- if (buf_pool == NULL) {
- /* We are allocating memory from any buffer pool, ensure
- we spread the grace on all buffer pool instances. */
- index = buf_pool_index++ % srv_buf_pool_instances;
- buf_pool = buf_pool_from_array(index);
- }
-
- block = buf_LRU_get_free_block(buf_pool);
-
- buf_block_set_state(block, BUF_BLOCK_MEMORY);
-
- return(block);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/** Check if a page is all zeroes.
-@param[in] read_buf database page
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return whether the page is all zeroes */
-UNIV_INTERN
-bool
-buf_page_is_zeroes(const byte* read_buf, ulint zip_size)
-{
- const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
-
- for (ulint i = 0; i < page_size; i++) {
- if (read_buf[i] != 0) {
- return(false);
- }
- }
- return(true);
-}
-
-/** Checks if the page is in crc32 checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in crc32 checksum format */
-UNIV_INTERN
-bool
-buf_page_is_checksum_valid_crc32(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
-{
- ib_uint32_t crc32 = buf_calc_page_crc32(read_buf);
-
- if (!(checksum_field1 == crc32 && checksum_field2 == crc32)) {
- DBUG_PRINT("buf_checksum",
- ("Page checksum crc32 not valid field1 " ULINTPF
- " field2 " ULINTPF " crc32 %u.",
- checksum_field1, checksum_field2, crc32));
- return (false);
- }
-
- return (true);
-}
-
-/** Checks if the page is in innodb checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in innodb checksum format */
-UNIV_INTERN
-bool
-buf_page_is_checksum_valid_innodb(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
-{
- /* There are 2 valid formulas for
- checksum_field2 (old checksum field) which algo=innodb could have
- written to the page:
-
- 1. Very old versions of InnoDB only stored 8 byte lsn to the
- start and the end of the page.
-
- 2. Newer InnoDB versions store the old formula checksum
- (buf_calc_page_old_checksum()). */
-
- if (checksum_field2 != mach_read_from_4(read_buf + FIL_PAGE_LSN)
- && checksum_field2 != buf_calc_page_old_checksum(read_buf)) {
-
- DBUG_PRINT("buf_checksum",
- ("Page checksum innodb not valid field1 " ULINTPF
- " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
- checksum_field1, checksum_field2, buf_calc_page_old_checksum(read_buf),
- mach_read_from_4(read_buf + FIL_PAGE_LSN)));
-
- return(false);
- }
-
- /* old field is fine, check the new field */
-
- /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
- (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
-
- if (checksum_field1 != 0
- && checksum_field1 != buf_calc_page_new_checksum(read_buf)) {
-
- DBUG_PRINT("buf_checksum",
- ("Page checksum innodb not valid field1 " ULINTPF
- " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
- checksum_field1, checksum_field2, buf_calc_page_new_checksum(read_buf),
- mach_read_from_4(read_buf + FIL_PAGE_LSN)));
-
- return(false);
- }
-
- return(true);
-}
-
-/** Checks if the page is in none checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in none checksum format */
-UNIV_INTERN
-bool
-buf_page_is_checksum_valid_none(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
-{
-
- if (!(checksum_field1 == checksum_field2 && checksum_field1 == BUF_NO_CHECKSUM_MAGIC)) {
- DBUG_PRINT("buf_checksum",
- ("Page checksum none not valid field1 " ULINTPF
- " field2 " ULINTPF "crc32 " ULINTPF " lsn " ULINTPF ".",
- checksum_field1, checksum_field2, BUF_NO_CHECKSUM_MAGIC,
- mach_read_from_4(read_buf + FIL_PAGE_LSN)));
- }
-
- return(checksum_field1 == checksum_field2
- && checksum_field1 == BUF_NO_CHECKSUM_MAGIC);
-}
-
-/** Check if a page is corrupt.
-@param[in] check_lsn true if LSN should be checked
-@param[in] read_buf Page to be checked
-@param[in] zip_size compressed size or 0
-@param[in] space Pointer to tablespace
-@return true if corrupted, false if not */
-UNIV_INTERN
-bool
-buf_page_is_corrupted(
- bool check_lsn,
- const byte* read_buf,
- ulint zip_size,
- const fil_space_t* space)
-{
- ulint checksum_field1;
- ulint checksum_field2;
- ulint space_id = mach_read_from_4(
- read_buf + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- ulint page_type = mach_read_from_2(
- read_buf + FIL_PAGE_TYPE);
-
- /* We can trust page type if page compression is set on tablespace
- flags because page compression flag means file must have been
- created with 10.1 (later than 5.5 code base). In 10.1 page
- compressed tables do not contain post compression checksum and
- FIL_PAGE_END_LSN_OLD_CHKSUM field stored. Note that space can
- be null if we are in fil_check_first_page() and first page
- is not compressed or encrypted. Page checksum is verified
- after decompression (i.e. normally pages are already
- decompressed at this stage). */
- if ((page_type == FIL_PAGE_PAGE_COMPRESSED ||
- page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)
- && space && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags)) {
- return (false);
- }
-
- if (!zip_size
- && memcmp(read_buf + FIL_PAGE_LSN + 4,
- read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
-
- /* Stored log sequence numbers at the start and the end
- of page do not match */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Log sequence number at the start %lu and the end %lu do not match.",
- mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
- mach_read_from_4(read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
-
- return(TRUE);
- }
-
-#ifndef UNIV_HOTBACKUP
- if (check_lsn && recv_lsn_checks_on) {
- lsn_t current_lsn;
-
- /* Since we are going to reset the page LSN during the import
- phase it makes no sense to spam the log with error messages. */
-
- if (log_peek_lsn(&current_lsn)
- && current_lsn
- < mach_read_from_8(read_buf + FIL_PAGE_LSN)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: page %lu log sequence number"
- " " LSN_PF "\n"
- "InnoDB: is in the future! Current system "
- "log sequence number " LSN_PF ".\n"
- "InnoDB: Your database may be corrupt or "
- "you may have copied the InnoDB\n"
- "InnoDB: tablespace but not the InnoDB "
- "log files. See\n"
- "InnoDB: " REFMAN
- "forcing-innodb-recovery.html\n"
- "InnoDB: for more information.\n",
- (ulint) mach_read_from_4(
- read_buf + FIL_PAGE_OFFSET),
- (lsn_t) mach_read_from_8(
- read_buf + FIL_PAGE_LSN),
- current_lsn);
- }
- }
-#endif
-
- /* Check whether the checksum fields have correct values */
-
- if (srv_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_NONE) {
- return(false);
- }
-
- if (zip_size) {
- return(!page_zip_verify_checksum(read_buf, zip_size));
- }
-
- checksum_field1 = mach_read_from_4(
- read_buf + FIL_PAGE_SPACE_OR_CHKSUM);
-
- checksum_field2 = mach_read_from_4(
- read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
-
-#if FIL_PAGE_LSN % 8
-#error "FIL_PAGE_LSN must be 64 bit aligned"
-#endif
-
- /* declare empty pages non-corrupted */
- if (checksum_field1 == 0 && checksum_field2 == 0
- && *reinterpret_cast<const ib_uint64_t*>(read_buf +
- FIL_PAGE_LSN) == 0) {
- /* make sure that the page is really empty */
- for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) {
- if (read_buf[i] != 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Checksum fields zero but page is not empty.");
-
- return(true);
- }
- }
-
- return(false);
- }
-
- DBUG_EXECUTE_IF("buf_page_is_corrupt_failure", return(true); );
-
- ulint page_no = mach_read_from_4(read_buf + FIL_PAGE_OFFSET);
-
- const srv_checksum_algorithm_t curr_algo =
- static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
-
- switch (curr_algo) {
- case SRV_CHECKSUM_ALGORITHM_CRC32:
- case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
-
- if (buf_page_is_checksum_valid_crc32(read_buf,
- checksum_field1, checksum_field2)) {
- return(false);
- }
-
- if (buf_page_is_checksum_valid_none(read_buf,
- checksum_field1, checksum_field2)) {
- if (curr_algo
- == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_NONE,
- space_id, page_no);
- }
-
- return(false);
- }
-
- if (buf_page_is_checksum_valid_innodb(read_buf,
- checksum_field1, checksum_field2)) {
- if (curr_algo
- == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_INNODB,
- space_id, page_no);
- }
-
- return(false);
- }
-
- return(true);
-
- case SRV_CHECKSUM_ALGORITHM_INNODB:
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
-
- if (buf_page_is_checksum_valid_innodb(read_buf,
- checksum_field1, checksum_field2)) {
- return(false);
- }
-
- if (buf_page_is_checksum_valid_none(read_buf,
- checksum_field1, checksum_field2)) {
- if (curr_algo
- == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_NONE,
- space_id, page_no);
- }
-
- return(false);
- }
-
- if (buf_page_is_checksum_valid_crc32(read_buf,
- checksum_field1, checksum_field2)) {
- if (curr_algo
- == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_CRC32,
- space_id, page_no);
- }
-
- return(false);
- }
-
- return(true);
-
- case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
-
- if (buf_page_is_checksum_valid_none(read_buf,
- checksum_field1, checksum_field2)) {
- return(false);
- }
-
- if (buf_page_is_checksum_valid_crc32(read_buf,
- checksum_field1, checksum_field2)) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_CRC32,
- space_id, page_no);
- return(false);
- }
-
- if (buf_page_is_checksum_valid_innodb(read_buf,
- checksum_field1, checksum_field2)) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_INNODB,
- space_id, page_no);
- return(false);
- }
-
- return(true);
-
- case SRV_CHECKSUM_ALGORITHM_NONE:
- /* should have returned FALSE earlier */
- break;
- /* no default so the compiler will emit a warning if new enum
- is added and not handled here */
- }
-
- ut_error;
- return(false);
-}
-
-/********************************************************************//**
-Prints a page to stderr. */
-UNIV_INTERN
-void
-buf_page_print(
-/*===========*/
- const byte* read_buf, /*!< in: a database page */
- ulint zip_size, /*!< in: compressed page size, or
- 0 for uncompressed pages */
- ulint flags) /*!< in: 0 or
- BUF_PAGE_PRINT_NO_CRASH or
- BUF_PAGE_PRINT_NO_FULL */
-
-{
-#ifndef UNIV_HOTBACKUP
- dict_index_t* index;
-#endif /* !UNIV_HOTBACKUP */
- ulint size = zip_size;
-
- if (!size) {
- size = UNIV_PAGE_SIZE;
- }
-
- if (!(flags & BUF_PAGE_PRINT_NO_FULL)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
- size);
- ut_print_buf(stderr, read_buf, size);
- fputs("\nInnoDB: End of page dump\n", stderr);
- }
-
- if (zip_size) {
- /* Print compressed page. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Compressed page type (" ULINTPF "); "
- "stored checksum in field1 " ULINTPF "; "
- "calculated checksums for field1: "
- "%s " ULINTPF ", "
- "%s " ULINTPF ", "
- "%s " ULINTPF "; "
- "page LSN " LSN_PF "; "
- "page number (if stored to page already) " ULINTPF "; "
- "space id (if stored to page already) " ULINTPF "\n",
- fil_page_get_type(read_buf),
- mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
- buf_checksum_algorithm_name(
- SRV_CHECKSUM_ALGORITHM_CRC32),
- page_zip_calc_checksum(read_buf, zip_size,
- SRV_CHECKSUM_ALGORITHM_CRC32),
- buf_checksum_algorithm_name(
- SRV_CHECKSUM_ALGORITHM_INNODB),
- page_zip_calc_checksum(read_buf, zip_size,
- SRV_CHECKSUM_ALGORITHM_INNODB),
- buf_checksum_algorithm_name(
- SRV_CHECKSUM_ALGORITHM_NONE),
- page_zip_calc_checksum(read_buf, zip_size,
- SRV_CHECKSUM_ALGORITHM_NONE),
- mach_read_from_8(read_buf + FIL_PAGE_LSN),
- mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
- mach_read_from_4(read_buf
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: uncompressed page, "
- "stored checksum in field1 " ULINTPF ", "
- "calculated checksums for field1: "
- "%s " UINT32PF ", "
- "%s " ULINTPF ", "
- "%s " ULINTPF ", "
-
- "stored checksum in field2 " ULINTPF ", "
- "calculated checksums for field2: "
- "%s " UINT32PF ", "
- "%s " ULINTPF ", "
- "%s " ULINTPF ", "
-
- "page LSN " ULINTPF " " ULINTPF ", "
- "low 4 bytes of LSN at page end " ULINTPF ", "
- "page number (if stored to page already) " ULINTPF ", "
- "space id (if created with >= MySQL-4.1.1 "
- "and stored already) %lu\n",
- mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32),
- buf_calc_page_crc32(read_buf),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB),
- buf_calc_page_new_checksum(read_buf),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE),
- BUF_NO_CHECKSUM_MAGIC,
-
- mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_CRC32),
- buf_calc_page_crc32(read_buf),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_INNODB),
- buf_calc_page_old_checksum(read_buf),
- buf_checksum_algorithm_name(SRV_CHECKSUM_ALGORITHM_NONE),
- BUF_NO_CHECKSUM_MAGIC,
-
- mach_read_from_4(read_buf + FIL_PAGE_LSN),
- mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
- mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
- mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
- mach_read_from_4(read_buf
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
-
- ulint page_type = fil_page_get_type(read_buf);
-
- fprintf(stderr, "InnoDB: page type %ld meaning %s\n", page_type,
- fil_get_page_type_name(page_type));
- }
-
-#ifndef UNIV_HOTBACKUP
- if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_INSERT) {
- fprintf(stderr,
- "InnoDB: Page may be an insert undo log page\n");
- } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_UPDATE) {
- fprintf(stderr,
- "InnoDB: Page may be an update undo log page\n");
- }
-#endif /* !UNIV_HOTBACKUP */
-
- switch (fil_page_get_type(read_buf)) {
- index_id_t index_id;
- case FIL_PAGE_INDEX:
- index_id = btr_page_get_index_id(read_buf);
- fprintf(stderr,
- "InnoDB: Page may be an index page where"
- " index id is %llu\n",
- (ullint) index_id);
-#ifndef UNIV_HOTBACKUP
- index = dict_index_find_on_id_low(index_id);
- if (index) {
- fputs("InnoDB: (", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs(")\n", stderr);
- }
-#endif /* !UNIV_HOTBACKUP */
- break;
- case FIL_PAGE_INODE:
- fputs("InnoDB: Page may be an 'inode' page\n", stderr);
- break;
- case FIL_PAGE_IBUF_FREE_LIST:
- fputs("InnoDB: Page may be an insert buffer free list page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_ALLOCATED:
- fputs("InnoDB: Page may be a freshly allocated page\n",
- stderr);
- break;
- case FIL_PAGE_IBUF_BITMAP:
- fputs("InnoDB: Page may be an insert buffer bitmap page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_SYS:
- fputs("InnoDB: Page may be a system page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_TRX_SYS:
- fputs("InnoDB: Page may be a transaction system page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_FSP_HDR:
- fputs("InnoDB: Page may be a file space header page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_XDES:
- fputs("InnoDB: Page may be an extent descriptor page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_BLOB:
- fputs("InnoDB: Page may be a BLOB page\n",
- stderr);
- break;
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- fputs("InnoDB: Page may be a compressed BLOB page\n",
- stderr);
- break;
- }
-
- ut_ad(flags & BUF_PAGE_PRINT_NO_CRASH);
-}
-
-#ifndef UNIV_HOTBACKUP
-
-# ifdef PFS_GROUP_BUFFER_SYNC
-/********************************************************************//**
-This function registers mutexes and rwlocks in buffer blocks with
-performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
-defined to be a value less than chunk->size, then only mutexes
-and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
-blocks are registered. */
-static
-void
-pfs_register_buffer_block(
-/*======================*/
- buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
-{
- ulint i;
- ulint num_to_register;
- buf_block_t* block;
-
- block = chunk->blocks;
-
- num_to_register = ut_min(chunk->size,
- PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
-
- for (i = 0; i < num_to_register; i++) {
- ib_mutex_t* mutex;
- rw_lock_t* rwlock;
-
-# ifdef UNIV_PFS_MUTEX
- mutex = &block->mutex;
- ut_a(!mutex->pfs_psi);
- mutex->pfs_psi = (PSI_server)
- ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
- : NULL;
-# endif /* UNIV_PFS_MUTEX */
-
-# ifdef UNIV_PFS_RWLOCK
- rwlock = &block->lock;
- ut_a(!rwlock->pfs_psi);
- rwlock->pfs_psi = (PSI_server)
- ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
- : NULL;
-
-# ifdef UNIV_SYNC_DEBUG
- rwlock = &block->debug_latch;
- ut_a(!rwlock->pfs_psi);
- rwlock->pfs_psi = (PSI_server)
- ? PSI_server->init_rwlock(buf_block_debug_latch_key,
- rwlock)
- : NULL;
-# endif /* UNIV_SYNC_DEBUG */
-
-# endif /* UNIV_PFS_RWLOCK */
- block++;
- }
-}
-# endif /* PFS_GROUP_BUFFER_SYNC */
-
-/********************************************************************//**
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-buf_block_init(
-/*===========*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_block_t* block, /*!< in: pointer to control block */
- byte* frame) /*!< in: pointer to buffer frame */
-{
- UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
-
- block->frame = frame;
-
- block->page.buf_pool_index = buf_pool_index(buf_pool);
- block->page.flush_type = BUF_FLUSH_LRU;
- block->page.state = BUF_BLOCK_NOT_USED;
- block->page.buf_fix_count = 0;
- block->page.io_fix = BUF_IO_NONE;
- block->page.encrypted = false;
- block->page.real_size = 0;
- block->page.write_size = 0;
- block->modify_clock = 0;
- block->page.slot = NULL;
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-
- block->check_index_page_at_flush = FALSE;
- block->index = NULL;
-
-#ifdef UNIV_DEBUG
- block->page.in_page_hash = FALSE;
- block->page.in_zip_hash = FALSE;
- block->page.in_flush_list = FALSE;
- block->page.in_free_list = FALSE;
- block->page.in_LRU_list = FALSE;
- block->in_unzip_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- block->n_pointers = 0;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- page_zip_des_init(&block->page.zip);
-
-#if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
- /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
- of buffer block mutex/rwlock with performance schema. If
- PFS_GROUP_BUFFER_SYNC is defined, skip the registration
- since buffer block mutex/rwlock will be registered later in
- pfs_register_buffer_block() */
-
- mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
- rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
-
-# ifdef UNIV_SYNC_DEBUG
- rw_lock_create(PFS_NOT_INSTRUMENTED,
- &block->debug_latch, SYNC_NO_ORDER_CHECK);
-# endif /* UNIV_SYNC_DEBUG */
-
-#else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
- mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
- rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
-
-# ifdef UNIV_SYNC_DEBUG
- rw_lock_create(buf_block_debug_latch_key,
- &block->debug_latch, SYNC_NO_ORDER_CHECK);
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
-
- ut_ad(rw_lock_validate(&(block->lock)));
-}
-
-/********************************************************************//**
-Allocates a chunk of buffer frames.
-@return chunk, or NULL on failure */
-static
-buf_chunk_t*
-buf_chunk_init(
-/*===========*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_chunk_t* chunk, /*!< out: chunk of buffers */
- ulint mem_size) /*!< in: requested size in bytes */
-{
- buf_block_t* block;
- byte* frame;
- ulint i;
- ulint size_target;
-
- /* Round down to a multiple of page size,
- although it already should be. */
- mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
- size_target = (mem_size / UNIV_PAGE_SIZE) - 1;
- /* Reserve space for the block descriptors. */
- mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
- + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
-
- chunk->mem_size = mem_size;
- chunk->mem = os_mem_alloc_large(&chunk->mem_size);
-
- if (UNIV_UNLIKELY(chunk->mem == NULL)) {
-
- return(NULL);
- }
-
-#ifdef HAVE_LIBNUMA
- if (srv_numa_interleave) {
- struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
- int st = mbind(chunk->mem, chunk->mem_size,
- MPOL_INTERLEAVE,
- numa_mems_allowed->maskp,
- numa_mems_allowed->size,
- MPOL_MF_MOVE);
- if (st != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set NUMA memory policy of buffer"
- " pool page frames to MPOL_INTERLEAVE"
- " (error: %s).", strerror(errno));
- }
- }
-#endif // HAVE_LIBNUMA
-
- /* Allocate the block descriptors from
- the start of the memory block. */
- chunk->blocks = (buf_block_t*) chunk->mem;
-
- /* Align a pointer to the first frame. Note that when
- os_large_page_size is smaller than UNIV_PAGE_SIZE,
- we may allocate one fewer block than requested. When
- it is bigger, we may allocate more blocks than requested. */
-
- frame = (byte*) ut_align(chunk->mem, UNIV_PAGE_SIZE);
- chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
- - (frame != chunk->mem);
-
- /* Subtract the space needed for block descriptors. */
- {
- ulint size = chunk->size;
-
- while (frame < (byte*) (chunk->blocks + size)) {
- frame += UNIV_PAGE_SIZE;
- size--;
- }
-
- chunk->size = size;
- }
-
- if (chunk->size > size_target) {
- chunk->size = size_target;
- }
-
- /* Init block structs and assign frames for them. Then we
- assign the frames to the first blocks (we already mapped the
- memory above). */
-
- block = chunk->blocks;
-
- for (i = chunk->size; i--; ) {
-
- buf_block_init(buf_pool, block, frame);
- UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
-
- /* Add the block to the free list */
- UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
-
- ut_d(block->page.in_free_list = TRUE);
- ut_ad(buf_pool_from_block(block) == buf_pool);
-
- block++;
- frame += UNIV_PAGE_SIZE;
- }
-
-#ifdef PFS_GROUP_BUFFER_SYNC
- pfs_register_buffer_block(chunk);
-#endif
- return(chunk);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Finds a block in the given buffer chunk that points to a
-given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
-static
-buf_block_t*
-buf_chunk_contains_zip(
-/*===================*/
- buf_chunk_t* chunk, /*!< in: chunk being checked */
- const void* data) /*!< in: pointer to compressed page */
-{
- buf_block_t* block;
- ulint i;
-
- block = chunk->blocks;
-
- for (i = chunk->size; i--; block++) {
- if (block->page.zip.data == data) {
-
- return(block);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Finds a block in the buffer pool that points to a
-given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_pool_contains_zip(
-/*==================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- const void* data) /*!< in: pointer to compressed page */
-{
- ulint n;
- buf_chunk_t* chunk = buf_pool->chunks;
-
- ut_ad(buf_pool);
- for (n = buf_pool->n_chunks; n--; chunk++) {
-
- buf_block_t* block = buf_chunk_contains_zip(chunk, data);
-
- if (block) {
- return(block);
- }
- }
-
- return(NULL);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Checks that all file pages in the buffer chunk are in a replaceable state.
-@return address of a non-free block, or NULL if all freed */
-static
-const buf_block_t*
-buf_chunk_not_freed(
-/*================*/
- buf_chunk_t* chunk) /*!< in: chunk being checked */
-{
- buf_block_t* block;
- ulint i;
-
- block = chunk->blocks;
-
- for (i = chunk->size; i--; block++) {
- ibool ready;
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- /* The uncompressed buffer pool should never
- contain compressed block descriptors. */
- ut_error;
- break;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- /* Skip blocks that are not being used for
- file pages. */
- break;
- case BUF_BLOCK_FILE_PAGE:
- mutex_enter(&block->mutex);
- ready = buf_flush_ready_for_replace(&block->page);
- mutex_exit(&block->mutex);
-
- if (UNIV_UNLIKELY(block->page.is_corrupt)) {
- /* corrupt page may remain, it can be
- skipped */
- break;
- }
-
- if (!ready) {
-
- return(block);
- }
-
- break;
- }
- }
-
- return(NULL);
-}
-
-/********************************************************************//**
-Set buffer pool size variables after resizing it */
-static
-void
-buf_pool_set_sizes(void)
-/*====================*/
-{
- ulint i;
- ulint curr_size = 0;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- curr_size += buf_pool->curr_pool_size;
- }
-
- srv_buf_pool_curr_size = curr_size;
- srv_buf_pool_old_size = srv_buf_pool_size;
-}
-
-/********************************************************************//**
-Initialize a buffer pool instance.
-@return DB_SUCCESS if all goes well. */
-UNIV_INTERN
-ulint
-buf_pool_init_instance(
-/*===================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint buf_pool_size, /*!< in: size in bytes */
- ulint instance_no) /*!< in: id of the instance */
-{
- ulint i;
- buf_chunk_t* chunk;
-
- /* 1. Initialize general fields
- ------------------------------- */
- mutex_create(buf_pool_LRU_list_mutex_key,
- &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
- mutex_create(buf_pool_free_list_mutex_key,
- &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
- mutex_create(buf_pool_zip_free_mutex_key,
- &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
- mutex_create(buf_pool_zip_hash_mutex_key,
- &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
- mutex_create(buf_pool_zip_mutex_key,
- &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
- mutex_create(buf_pool_flush_state_mutex_key,
- &buf_pool->flush_state_mutex, SYNC_BUF_FLUSH_STATE);
-
- if (buf_pool_size > 0) {
- buf_pool->n_chunks = 1;
-
- buf_pool->chunks = chunk =
- (buf_chunk_t*) mem_zalloc(sizeof *chunk);
-
- if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) {
- mem_free(chunk);
- mem_free(buf_pool);
-
- return(DB_ERROR);
- }
-
- buf_pool->instance_no = instance_no;
- buf_pool->old_pool_size = buf_pool_size;
- buf_pool->curr_size = chunk->size;
- buf_pool->read_ahead_area
- = ut_min(64, ut_2_power_up(buf_pool->curr_size / 32));
- buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
-
- /* Number of locks protecting page_hash must be a
- power of two */
- srv_n_page_hash_locks = static_cast<ulong>(
- ut_2_power_up(srv_n_page_hash_locks));
- ut_a(srv_n_page_hash_locks != 0);
- ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS);
-
- buf_pool->page_hash = ib_create(2 * buf_pool->curr_size,
- srv_n_page_hash_locks,
- MEM_HEAP_FOR_PAGE_HASH,
- SYNC_BUF_PAGE_HASH);
-
- buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
-
- buf_pool->last_printout_time = ut_time();
- }
- /* 2. Initialize flushing fields
- -------------------------------- */
-
- mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
- SYNC_BUF_FLUSH_LIST);
-
- for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
- buf_pool->no_flush[i] = os_event_create();
- }
-
- buf_pool->watch = (buf_page_t*) mem_zalloc(
- sizeof(*buf_pool->watch) * BUF_POOL_WATCH_SIZE);
-
- /* All fields are initialized by mem_zalloc(). */
-
- /* Initialize the temporal memory array and slots */
- buf_pool->tmp_arr = (buf_tmp_array_t *)mem_zalloc(sizeof(buf_tmp_array_t));
- ulint n_slots = (srv_n_read_io_threads + srv_n_write_io_threads) * (8 * OS_AIO_N_PENDING_IOS_PER_THREAD);
- buf_pool->tmp_arr->n_slots = n_slots;
- buf_pool->tmp_arr->slots = (buf_tmp_buffer_t*)mem_zalloc(sizeof(buf_tmp_buffer_t) * n_slots);
-
- buf_pool->try_LRU_scan = TRUE;
-
- DBUG_EXECUTE_IF("buf_pool_init_instance_force_oom",
- return(DB_ERROR); );
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-free one buffer pool instance */
-static
-void
-buf_pool_free_instance(
-/*===================*/
- buf_pool_t* buf_pool) /* in,own: buffer pool instance
- to free */
-{
- buf_chunk_t* chunk;
- buf_chunk_t* chunks;
- buf_page_t* bpage;
- ulint i;
-
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- while (bpage != NULL) {
- buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
- enum buf_page_state state = buf_page_get_state(bpage);
-
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
-
- if (state != BUF_BLOCK_FILE_PAGE) {
- /* We must not have any dirty block except
- when doing a fast shutdown. */
- ut_ad(state == BUF_BLOCK_ZIP_PAGE
- || srv_fast_shutdown == 2);
- buf_page_free_descriptor(bpage);
- }
-
- bpage = prev_bpage;
- }
-
- mem_free(buf_pool->watch);
- buf_pool->watch = NULL;
-
- for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
- os_event_free(buf_pool->no_flush[i]);
- }
- mutex_free(&buf_pool->LRU_list_mutex);
- mutex_free(&buf_pool->free_list_mutex);
- mutex_free(&buf_pool->zip_free_mutex);
- mutex_free(&buf_pool->zip_hash_mutex);
- mutex_free(&buf_pool->zip_mutex);
- mutex_free(&buf_pool->flush_state_mutex);
- mutex_free(&buf_pool->flush_list_mutex);
-
- chunks = buf_pool->chunks;
- chunk = chunks + buf_pool->n_chunks;
-
- while (--chunk >= chunks) {
- buf_block_t* block = chunk->blocks;
- for (i = 0; i < chunk->size; i++, block++) {
- mutex_free(&block->mutex);
- rw_lock_free(&block->lock);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_free(&block->debug_latch);
-#endif
- }
- os_mem_free_large(chunk->mem, chunk->mem_size);
- }
-
- mem_free(buf_pool->chunks);
- ha_clear(buf_pool->page_hash);
- hash_table_free(buf_pool->page_hash);
- hash_table_free(buf_pool->zip_hash);
-
- /* Free all used temporary slots */
- if (buf_pool->tmp_arr) {
- for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) {
- buf_tmp_buffer_t* slot = &(buf_pool->tmp_arr->slots[i]);
- if (slot && slot->crypt_buf) {
- aligned_free(slot->crypt_buf);
- slot->crypt_buf = NULL;
- }
-
- if (slot && slot->comp_buf) {
- aligned_free(slot->comp_buf);
- slot->comp_buf = NULL;
- }
- }
- }
-
- mem_free(buf_pool->tmp_arr->slots);
- mem_free(buf_pool->tmp_arr);
- buf_pool->tmp_arr = NULL;
-}
-
-/********************************************************************//**
-Creates the buffer pool.
-@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
-UNIV_INTERN
-dberr_t
-buf_pool_init(
-/*==========*/
- ulint total_size, /*!< in: size of the total pool in bytes */
- ulint n_instances) /*!< in: number of instances */
-{
- ulint i;
- const ulint size = total_size / n_instances;
-
- ut_ad(n_instances > 0);
- ut_ad(n_instances <= MAX_BUFFER_POOLS);
- ut_ad(n_instances == srv_buf_pool_instances);
-
-#ifdef HAVE_LIBNUMA
- if (srv_numa_interleave) {
- struct bitmask *numa_mems_allowed = numa_get_mems_allowed();
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting NUMA memory policy to MPOL_INTERLEAVE");
- if (set_mempolicy(MPOL_INTERLEAVE,
- numa_mems_allowed->maskp,
- numa_mems_allowed->size) != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set NUMA memory policy to"
- " MPOL_INTERLEAVE (error: %s).",
- strerror(errno));
- }
- }
-#endif // HAVE_LIBNUMA
-
- buf_pool_ptr = (buf_pool_t*) mem_zalloc(
- n_instances * sizeof *buf_pool_ptr);
-
- for (i = 0; i < n_instances; i++) {
- buf_pool_t* ptr = &buf_pool_ptr[i];
-
- if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) {
-
- /* Free all the instances created so far. */
- buf_pool_free(i);
-
- return(DB_ERROR);
- }
- }
-
- buf_pool_set_sizes();
- buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
-
- btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
-
-#ifdef HAVE_LIBNUMA
- if (srv_numa_interleave) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting NUMA memory policy to MPOL_DEFAULT");
- if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set NUMA memory policy to"
- " MPOL_DEFAULT (error: %s).", strerror(errno));
- }
- }
-#endif // HAVE_LIBNUMA
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Frees the buffer pool at shutdown. This must not be invoked before
-freeing all mutexes. */
-UNIV_INTERN
-void
-buf_pool_free(
-/*==========*/
- ulint n_instances) /*!< in: numbere of instances to free */
-{
- ulint i;
-
- for (i = 0; i < n_instances; i++) {
- buf_pool_free_instance(buf_pool_from_array(i));
- }
-
- mem_free(buf_pool_ptr);
- buf_pool_ptr = NULL;
-}
-
-/********************************************************************//**
-Clears the adaptive hash index on all pages in the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_clear_hash_index(void)
-/*===========================*/
-{
- ulint p;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(btr_search_own_all(RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!btr_search_enabled);
-
- for (p = 0; p < srv_buf_pool_instances; p++) {
- buf_pool_t* buf_pool = buf_pool_from_array(p);
- buf_chunk_t* chunks = buf_pool->chunks;
- buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
-
- while (--chunk >= chunks) {
- buf_block_t* block = chunk->blocks;
- ulint i = chunk->size;
-
- for (; i--; block++) {
- dict_index_t* index = block->index;
-
- /* We can set block->index = NULL
- when we have an x-latch on btr_search_latch;
- see the comment in buf0buf.h */
-
- if (!index) {
- /* Not hashed */
- continue;
- }
-
- block->index = NULL;
-# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- block->n_pointers = 0;
-# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- }
- }
- }
-}
-
-/********************************************************************//**
-Relocate a buffer control block. Relocates the block on the LRU list
-and in buf_pool->page_hash. Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
-UNIV_INTERN
-void
-buf_relocate(
-/*=========*/
- buf_page_t* bpage, /*!< in/out: control block being relocated;
- buf_page_get_state(bpage) must be
- BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
- buf_page_t* dpage) /*!< in/out: destination control block */
-{
- buf_page_t* b;
- ulint fold;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- fold = buf_page_address_fold(bpage->space, bpage->offset);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(buf_page_hash_lock_held_x(buf_pool, bpage));
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- ut_a(bpage->buf_fix_count == 0);
- ut_ad(bpage->in_LRU_list);
- ut_ad(!bpage->in_zip_hash);
- ut_ad(bpage->in_page_hash);
- ut_ad(bpage == buf_page_hash_get_low(buf_pool,
- bpage->space,
- bpage->offset,
- fold));
-
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
-#ifdef UNIV_DEBUG
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_FILE_PAGE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_ZIP_PAGE:
- break;
- }
-#endif /* UNIV_DEBUG */
-
- memcpy(dpage, bpage, sizeof *dpage);
-
- ut_d(bpage->in_LRU_list = FALSE);
- ut_d(bpage->in_page_hash = FALSE);
-
- /* relocate buf_pool->LRU */
- b = UT_LIST_GET_PREV(LRU, bpage);
- UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
-
- if (b) {
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
- } else {
- UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
- }
-
- if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
- buf_pool->LRU_old = dpage;
-#ifdef UNIV_LRU_DEBUG
- /* buf_pool->LRU_old must be the first item in the LRU list
- whose "old" flag is set. */
- ut_a(buf_pool->LRU_old->old);
- ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
- || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
- ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
- || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
- } else {
- /* Check that the "old" flag is consistent in
- the block and its neighbours. */
- buf_page_set_old(dpage, buf_page_is_old(dpage));
-#endif /* UNIV_LRU_DEBUG */
- }
-
- ut_d(UT_LIST_VALIDATE(
- LRU, buf_page_t, buf_pool->LRU, CheckInLRUList()));
-
- /* relocate buf_pool->page_hash */
- HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
-}
-
-/********************************************************************//**
-Determine if a block is a sentinel for a buffer pool watch.
-@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
-UNIV_INTERN
-ibool
-buf_pool_watch_is_sentinel(
-/*=======================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- const buf_page_t* bpage) /*!< in: block */
-{
- /* We must also own the appropriate hash lock. */
- ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
- ut_ad(buf_page_in_file(bpage));
-
- if (bpage < &buf_pool->watch[0]
- || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
-
- ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
- || bpage->zip.data != NULL);
-
- return(FALSE);
- }
-
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
- ut_ad(!bpage->in_zip_hash);
- ut_ad(bpage->in_page_hash);
- ut_ad(bpage->zip.data == NULL);
- ut_ad(bpage->buf_fix_count > 0);
- return(TRUE);
-}
-
-/****************************************************************//**
-Add watch for the given page to be read in. Caller must have
-appropriate hash_lock for the bpage and hold the LRU list mutex to avoid a race
-condition with buf_LRU_free_page inserting the same page into the page hash.
-This function may release the hash_lock and reacquire it.
-@return NULL if watch set, block if the page is in the buffer pool */
-UNIV_INTERN
-buf_page_t*
-buf_pool_watch_set(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- ulint fold) /*!< in: buf_page_address_fold(space, offset) */
-{
- buf_page_t* bpage;
- ulint i;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- prio_rw_lock_t* hash_lock;
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
- if (bpage != NULL) {
-page_found:
- if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
- /* The page was loaded meanwhile. */
- return(bpage);
- }
-
- /* Add to an existing watch. */
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
-#else
- ++bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
- return(NULL);
- }
-
- /* From this point this function becomes fairly heavy in terms
- of latching. We acquire all the hash_locks. They are needed
- because we don't want to read any stale information in
- buf_pool->watch[]. However, it is not in the critical code path
- as this function will be called only by the purge thread. */
-
-
- /* To obey latching order first release the hash_lock. */
- rw_lock_x_unlock(hash_lock);
-
- hash_lock_x_all(buf_pool->page_hash);
-
- /* We have to recheck that the page
- was not loaded or a watch set by some other
- purge thread. This is because of the small
- time window between when we release the
- hash_lock to acquire all the hash locks above. */
-
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
- if (UNIV_LIKELY_NULL(bpage)) {
- hash_unlock_x_all_but(buf_pool->page_hash, hash_lock);
- goto page_found;
- }
-
- /* The maximum number of purge threads should never exceed
- BUF_POOL_WATCH_SIZE. So there is no way for purge thread
- instance to hold a watch when setting another watch. */
- for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
- bpage = &buf_pool->watch[i];
-
- ut_ad(bpage->access_time == 0);
- ut_ad(bpage->newest_modification == 0);
- ut_ad(bpage->oldest_modification == 0);
- ut_ad(bpage->zip.data == NULL);
- ut_ad(!bpage->in_zip_hash);
-
- switch (bpage->state) {
- case BUF_BLOCK_POOL_WATCH:
- ut_ad(!bpage->in_page_hash);
- ut_ad(bpage->buf_fix_count == 0);
-
- bpage->state = BUF_BLOCK_ZIP_PAGE;
- bpage->space = static_cast<ib_uint32_t>(space);
- bpage->offset = static_cast<ib_uint32_t>(offset);
- bpage->buf_fix_count = 1;
- bpage->buf_pool_index = buf_pool_index(buf_pool);
-
- ut_d(bpage->in_page_hash = TRUE);
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
- fold, bpage);
-
- /* Once the sentinel is in the page_hash we can
- safely release all locks except just the
- relevant hash_lock */
- hash_unlock_x_all_but(buf_pool->page_hash,
- hash_lock);
-
- return(NULL);
- case BUF_BLOCK_ZIP_PAGE:
- ut_ad(bpage->in_page_hash);
- ut_ad(bpage->buf_fix_count > 0);
- break;
- default:
- ut_error;
- }
- }
-
- /* Allocation failed. Either the maximum number of purge
- threads should never exceed BUF_POOL_WATCH_SIZE, or this code
- should be modified to return a special non-NULL value and the
- caller should purge the record directly. */
- ut_error;
-
- /* Fix compiler warning */
- return(NULL);
-}
-
-/****************************************************************//**
-Remove the sentinel block for the watch before replacing it with a real block.
-buf_page_watch_clear() or buf_page_watch_occurred() will notice that
-the block has been replaced with the real block.
-@return reference count, to be added to the replacement block */
-static
-void
-buf_pool_watch_remove(
-/*==================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint fold, /*!< in: buf_page_address_fold(
- space, offset) */
- buf_page_t* watch) /*!< in/out: sentinel for watch */
-{
-#ifdef UNIV_SYNC_DEBUG
- /* We must also own the appropriate hash_bucket mutex. */
- prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(buf_page_get_state(watch) == BUF_BLOCK_ZIP_PAGE);
-
- HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
- ut_d(watch->in_page_hash = FALSE);
- watch->buf_fix_count = 0;
- watch->state = BUF_BLOCK_POOL_WATCH;
-}
-
-/****************************************************************//**
-Stop watching if the page has been read in.
-buf_pool_watch_set(space,offset) must have returned NULL before. */
-UNIV_INTERN
-void
-buf_pool_watch_unset(
-/*=================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ulint fold = buf_page_address_fold(space, offset);
- prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
- rw_lock_x_lock(hash_lock);
-
- /* The page must exist because buf_pool_watch_set() increments
- buf_fix_count. */
-
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
- if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
- buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage));
- } else {
-
- ut_ad(bpage->buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_decrement_uint32(&bpage->buf_fix_count, 1);
-#else
- --bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
- if (bpage->buf_fix_count == 0) {
- buf_pool_watch_remove(buf_pool, fold, bpage);
- }
- }
-
- rw_lock_x_unlock(hash_lock);
-}
-
-/****************************************************************//**
-Check if the page has been read in.
-This may only be called after buf_pool_watch_set(space,offset)
-has returned NULL and before invoking buf_pool_watch_unset(space,offset).
-@return FALSE if the given page was not read in, TRUE if it was */
-UNIV_INTERN
-ibool
-buf_pool_watch_occurred(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- ibool ret;
- buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ulint fold = buf_page_address_fold(space, offset);
- prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool,
- fold);
-
- rw_lock_s_lock(hash_lock);
-
- /* The page must exist because buf_pool_watch_set()
- increments buf_fix_count. */
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
- ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
- rw_lock_s_unlock(hash_lock);
-
- return(ret);
-}
-
-/********************************************************************//**
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from slipping out of
-the buffer pool. */
-UNIV_INTERN
-void
-buf_page_make_young(
-/*================*/
- buf_page_t* bpage) /*!< in: buffer block of a file page */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- ut_a(buf_page_in_file(bpage));
-
- buf_LRU_make_block_young(bpage);
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-}
-
-/********************************************************************//**
-Moves a page to the start of the buffer pool LRU list if it is too old.
-This high-level function can be used to prevent an important page from
-slipping out of the buffer pool. */
-static
-void
-buf_page_make_young_if_needed(
-/*==========================*/
- buf_page_t* bpage) /*!< in/out: buffer block of a
- file page */
-{
- ut_a(buf_page_in_file(bpage));
-
- if (buf_page_peek_if_too_old(bpage)) {
- buf_page_make_young(bpage);
- }
-}
-
-/********************************************************************//**
-Resets the check_index_page_at_flush field of a page if found in the buffer
-pool. */
-UNIV_INTERN
-void
-buf_reset_check_index_page_at_flush(
-/*================================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_block_t* block;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
- block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
-
- if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
- block->check_index_page_at_flush = FALSE;
- }
-}
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- prio_rw_lock_t* hash_lock;
-
- bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
- &hash_lock);
-
- if (bpage) {
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
- mutex_enter(block_mutex);
- rw_lock_s_unlock(hash_lock);
- /* bpage->file_page_was_freed can already hold
- when this code is invoked from dict_drop_index_tree() */
- bpage->file_page_was_freed = TRUE;
- mutex_exit(block_mutex);
- }
-
- return(bpage);
-}
-
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- prio_rw_lock_t* hash_lock;
-
- bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
- &hash_lock);
- if (bpage) {
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
- mutex_enter(block_mutex);
- rw_lock_s_unlock(hash_lock);
- bpage->file_page_was_freed = FALSE;
- mutex_exit(block_mutex);
- }
-
- return(bpage);
-}
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-
-/********************************************************************//**
-Attempts to discard the uncompressed frame of a compressed page. The
-caller should not be holding any mutexes when this function is called.
-@return TRUE if successful, FALSE otherwise. */
-static
-void
-buf_block_try_discard_uncompressed(
-/*===============================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
- /* Since we need to acquire buf_pool->LRU_list_mutex to discard
- the uncompressed frame and because page_hash mutex resides below
- buf_pool->LRU_list_mutex in sync ordering therefore we must first
- release the page_hash mutex. This means that the block in question
- can move out of page_hash. Therefore we need to check again if the
- block is still in page_hash. */
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- bpage = buf_page_hash_get(buf_pool, space, offset);
-
- if (bpage) {
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- if (buf_LRU_free_page(bpage, false)) {
-
- mutex_exit(block_mutex);
- return;
- }
- mutex_exit(block_mutex);
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-}
-
-/********************************************************************//**
-Get read access to a compressed page (usually of type
-FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
-The page must be released with buf_page_release_zip().
-NOTE: the page is not protected by any latch. Mutual exclusion has to
-be implemented at a higher level. In other words, all possible
-accesses to a given page through this function must be protected by
-the same set of mutexes or latches.
-@return pointer to the block */
-UNIV_INTERN
-buf_page_t*
-buf_page_get_zip(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size */
- ulint offset) /*!< in: page number */
-{
- buf_page_t* bpage;
- ib_mutex_t* block_mutex;
- prio_rw_lock_t* hash_lock;
- ibool discard_attempted = FALSE;
- ibool must_read;
- trx_t* trx = NULL;
- ulint sec;
- ulint ms;
- ib_uint64_t start_time;
- ib_uint64_t finish_time;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
- if (UNIV_UNLIKELY(innobase_get_slow_log())) {
- trx = innobase_get_trx();
- }
- buf_pool->stat.n_page_gets++;
-
- for (;;) {
-lookup:
-
- /* The following call will also grab the page_hash
- mutex if the page is found. */
- bpage = buf_page_hash_get_s_locked(buf_pool, space,
- offset, &hash_lock);
- if (bpage) {
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
- break;
- }
-
- /* Page not in buf_pool: needs to be read from file */
-
- ut_ad(!hash_lock);
- dberr_t err = buf_read_page(space, zip_size, offset, trx);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Reading compressed page " ULINTPF
- ":" ULINTPF
- " failed with error: %s.",
- space, offset, ut_strerr(err));
-
- goto err_exit;
- }
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- }
-
- ut_ad(buf_page_hash_lock_held_s(buf_pool, bpage));
-
- if (!bpage->zip.data) {
- /* There is no compressed page. */
-err_exit:
- rw_lock_s_unlock(hash_lock);
- return(NULL);
- }
-
- if (UNIV_UNLIKELY(bpage->is_corrupt && srv_pass_corrupt_table <= 1)) {
-
- rw_lock_s_unlock(hash_lock);
-
- return(NULL);
- }
-
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
-
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- block_mutex = &buf_pool->zip_mutex;
- mutex_enter(block_mutex);
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
-#else
- ++bpage->buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
- goto got_block;
- case BUF_BLOCK_FILE_PAGE:
- /* Discard the uncompressed page frame if possible. */
- if (!discard_attempted) {
- rw_lock_s_unlock(hash_lock);
- buf_block_try_discard_uncompressed(space, offset);
- discard_attempted = TRUE;
- goto lookup;
- }
-
- block_mutex = &((buf_block_t*) bpage)->mutex;
-
- mutex_enter(block_mutex);
-
- buf_block_buf_fix_inc((buf_block_t*) bpage, __FILE__, __LINE__);
- goto got_block;
- }
-
- ut_error;
- goto err_exit;
-
-got_block:
- must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
-
- rw_lock_s_unlock(hash_lock);
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- ut_a(!bpage->file_page_was_freed);
-#endif /* defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG */
-
- buf_page_set_accessed(bpage);
-
- mutex_exit(block_mutex);
-
- buf_page_make_young_if_needed(bpage);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(bpage->buf_fix_count > 0);
- ut_a(buf_page_in_file(bpage));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- if (must_read) {
- /* Let us wait until the read operation
- completes */
-
- if (UNIV_UNLIKELY(trx && trx->take_stats))
- {
- ut_usectime(&sec, &ms);
- start_time = (ib_uint64_t)sec * 1000000 + ms;
- } else {
- start_time = 0;
- }
- for (;;) {
- enum buf_io_fix io_fix;
-
- mutex_enter(block_mutex);
- io_fix = buf_page_get_io_fix(bpage);
- mutex_exit(block_mutex);
-
- if (io_fix == BUF_IO_READ) {
-
- os_thread_sleep(WAIT_FOR_READ);
- } else {
- break;
- }
- }
- if (UNIV_UNLIKELY(start_time != 0))
- {
- ut_usectime(&sec, &ms);
- finish_time = (ib_uint64_t)sec * 1000000 + ms;
- trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
- }
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_page_get_space(bpage),
- buf_page_get_page_no(bpage)) == 0);
-#endif
- return(bpage);
-}
-
-/********************************************************************//**
-Initialize some fields of a control block. */
-UNIV_INLINE
-void
-buf_block_init_low(
-/*===============*/
- buf_block_t* block) /*!< in: block to init */
-{
- block->check_index_page_at_flush = FALSE;
- block->index = NULL;
-
- block->n_hash_helps = 0;
- block->n_fields = 1;
- block->n_bytes = 0;
- block->left_side = TRUE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Decompress a block.
-@return TRUE if successful */
-UNIV_INTERN
-ibool
-buf_zip_decompress(
-/*===============*/
- buf_block_t* block, /*!< in/out: block */
- ibool check) /*!< in: TRUE=verify the page checksum */
-{
- const byte* frame = block->page.zip.data;
- ulint size = page_zip_get_size(&block->page.zip);
- /* Space is not found if this function is called during IMPORT */
- fil_space_t* space = fil_space_acquire_for_io(block->page.space);
- const unsigned key_version = mach_read_from_4(frame +
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- fil_space_crypt_t* crypt_data = space ? space->crypt_data : NULL;
- const bool encrypted = crypt_data
- && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
- && (!crypt_data->is_default_encryption()
- || srv_encrypt_tables);
-
- ut_ad(buf_block_get_zip_size(block));
- ut_a(buf_block_get_space(block) != 0);
-
- if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Compressed page checksum mismatch"
- " for %s [%u:%u]: stored: " ULINTPF ", crc32: " ULINTPF
- " innodb: " ULINTPF ", none: " ULINTPF ".",
- space ? space->chain.start->name : "N/A",
- block->page.space, block->page.offset,
- mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM),
- page_zip_calc_checksum(frame, size,
- SRV_CHECKSUM_ALGORITHM_CRC32),
- page_zip_calc_checksum(frame, size,
- SRV_CHECKSUM_ALGORITHM_INNODB),
- page_zip_calc_checksum(frame, size,
- SRV_CHECKSUM_ALGORITHM_NONE));
- goto err_exit;
- }
-
- switch (fil_page_get_type(frame)) {
- case FIL_PAGE_INDEX: {
-
- if (page_zip_decompress(&block->page.zip,
- block->frame, TRUE)) {
- if (space) {
- fil_space_release_for_io(space);
- }
- return(TRUE);
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to decompress space %s [%u:%u]",
- space ? space->chain.start->name : "N/A",
- block->page.space,
- block->page.offset);
-
- goto err_exit;
- }
- case FIL_PAGE_TYPE_ALLOCATED:
- case FIL_PAGE_INODE:
- case FIL_PAGE_IBUF_BITMAP:
- case FIL_PAGE_TYPE_FSP_HDR:
- case FIL_PAGE_TYPE_XDES:
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- /* Copy to uncompressed storage. */
- memcpy(block->frame, frame,
- buf_block_get_zip_size(block));
-
- if (space) {
- fil_space_release_for_io(space);
- }
-
- return(TRUE);
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown compressed page in %s [%u:%u]"
- " type %s [" ULINTPF "].",
- space ? space->chain.start->name : "N/A",
- block->page.space, block->page.offset,
- fil_get_page_type_name(fil_page_get_type(frame)), fil_page_get_type(frame));
-
-err_exit:
- if (encrypted) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Row compressed page could be encrypted with key_version %u.",
- key_version);
- block->page.encrypted = true;
- dict_set_encrypted_by_space(block->page.space);
- } else {
- dict_set_corrupted_by_space(block->page.space);
- }
-
- if (space) {
- fil_space_release_for_io(space);
- }
-
- return(FALSE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to if found
-in this buffer pool instance.
-@return pointer to block */
-UNIV_INTERN
-buf_block_t*
-buf_block_align_instance(
-/*=====================*/
- buf_pool_t* buf_pool, /*!< in: buffer in which the block
- resides */
- const byte* ptr) /*!< in: pointer to a frame */
-{
- buf_chunk_t* chunk;
- ulint i;
-
- /* TODO: protect buf_pool->chunks with a mutex (it will
- currently remain constant after buf_pool_init()) */
- for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
- ulint offs;
-
- if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
-
- continue;
- }
- /* else */
-
- offs = ptr - chunk->blocks->frame;
-
- offs >>= UNIV_PAGE_SIZE_SHIFT;
-
- if (UNIV_LIKELY(offs < chunk->size)) {
- buf_block_t* block = &chunk->blocks[offs];
-
- /* The function buf_chunk_init() invokes
- buf_block_init() so that block[n].frame ==
- block->frame + n * UNIV_PAGE_SIZE. Check it. */
- ut_ad(block->frame == page_align(ptr));
-#ifdef UNIV_DEBUG
- /* A thread that updates these fields must
- hold one of the buf_pool mutexes, depending on the
- page state, and block->mutex. Acquire
- only the latter. */
- mutex_enter(&block->mutex);
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- /* These types should only be used in
- the compressed buffer pool, whose
- memory is allocated from
- buf_pool->chunks, in UNIV_PAGE_SIZE
- blocks flagged as BUF_BLOCK_MEMORY. */
- ut_error;
- break;
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- /* Some data structures contain
- "guess" pointers to file pages. The
- file pages may have been freed and
- reused. Do not complain. */
- break;
- case BUF_BLOCK_REMOVE_HASH:
- /* buf_LRU_block_remove_hashed_page()
- will overwrite the FIL_PAGE_OFFSET and
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
- 0xff and set the state to
- BUF_BLOCK_REMOVE_HASH. */
- ut_ad(page_get_space_id(page_align(ptr))
- == 0xffffffff);
- ut_ad(page_get_page_no(page_align(ptr))
- == 0xffffffff);
- break;
- case BUF_BLOCK_FILE_PAGE: {
- ulint space = page_get_space_id(page_align(ptr));
- ulint offset = page_get_page_no(page_align(ptr));
-
- if (block->page.space != space ||
- block->page.offset != offset) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Block space_id %lu != page space_id %lu or "
- "Block offset %lu != page offset %lu",
- (ulint)block->page.space, space,
- (ulint)block->page.offset, offset);
- }
-
- ut_ad(block->page.space
- == page_get_space_id(page_align(ptr)));
- ut_ad(block->page.offset
- == page_get_page_no(page_align(ptr)));
- break;
- }
- }
-
- mutex_exit(&block->mutex);
-#endif /* UNIV_DEBUG */
-
- return(block);
- }
- }
-
- return(NULL);
-}
-
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return pointer to block, never NULL */
-UNIV_INTERN
-buf_block_t*
-buf_block_align(
-/*============*/
- const byte* ptr) /*!< in: pointer to a frame */
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_block_t* block;
-
- block = buf_block_align_instance(
- buf_pool_from_array(i), ptr);
- if (block) {
- return(block);
- }
- }
-
- /* The block should always be found. */
- ut_error;
- return(NULL);
-}
-
-/********************************************************************//**
-Find out if a pointer belongs to a buf_block_t. It can be a pointer to
-the buf_block_t itself or a member of it. This functions checks one of
-the buffer pool instances.
-@return TRUE if ptr belongs to a buf_block_t struct */
-static
-ibool
-buf_pointer_is_block_field_instance(
-/*================================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- const void* ptr) /*!< in: pointer not dereferenced */
-{
- const buf_chunk_t* chunk = buf_pool->chunks;
- const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
-
- /* TODO: protect buf_pool->chunks with a mutex (it will
- currently remain constant after buf_pool_init()) */
- while (chunk < echunk) {
- if (ptr >= (void*) chunk->blocks
- && ptr < (void*) (chunk->blocks + chunk->size)) {
-
- return(TRUE);
- }
-
- chunk++;
- }
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Find out if a pointer belongs to a buf_block_t. It can be a pointer to
-the buf_block_t itself or a member of it
-@return TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
-ibool
-buf_pointer_is_block_field(
-/*=======================*/
- const void* ptr) /*!< in: pointer not dereferenced */
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- ibool found;
-
- found = buf_pointer_is_block_field_instance(
- buf_pool_from_array(i), ptr);
- if (found) {
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Find out if a buffer block was created by buf_chunk_init().
-@return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
-static
-ibool
-buf_block_is_uncompressed(
-/*======================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- const buf_block_t* block) /*!< in: pointer to block,
- not dereferenced */
-{
- if ((((ulint) block) % sizeof *block) != 0) {
- /* The pointer should be aligned. */
- return(FALSE);
- }
-
- return(buf_pointer_is_block_field_instance(buf_pool, (void*) block));
-}
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/********************************************************************//**
-Return true if probe is enabled.
-@return true if probe enabled. */
-static
-bool
-buf_debug_execute_is_force_flush()
-/*==============================*/
-{
- DBUG_EXECUTE_IF("ib_buf_force_flush", return(true); );
-
- /* This is used during queisce testing, we want to ensure maximum
- buffering by the change buffer. */
-
- if (srv_ibuf_disable_background_merge) {
- return(true);
- }
-
- return(false);
-}
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-/**
-Wait for the block to be read in.
-@param block The block to check
-@param trx Transaction to account the I/Os to */
-static
-void
-buf_wait_for_read(buf_block_t* block, trx_t* trx)
-{
- /* Note: For the PAGE_ATOMIC_REF_COUNT case:
-
- We are using the block->lock to check for IO state (and a dirty read).
- We set the IO_READ state under the protection of the hash_lock
- (and block->mutex). This is safe because another thread can only
- access the block (and check for IO state) after the block has been
- added to the page hashtable. */
-
- if (buf_block_get_io_fix_unlocked(block) == BUF_IO_READ) {
-
- ib_uint64_t start_time;
- ulint sec;
- ulint ms;
-
- /* Wait until the read operation completes */
-
- ib_mutex_t* mutex = buf_page_get_mutex(&block->page);
-
- if (UNIV_UNLIKELY(trx && trx->take_stats))
- {
- ut_usectime(&sec, &ms);
- start_time = (ib_uint64_t)sec * 1000000 + ms;
- } else {
- start_time = 0;
- }
-
- for (;;) {
- buf_io_fix io_fix;
-
- mutex_enter(mutex);
-
- io_fix = buf_block_get_io_fix(block);
-
- mutex_exit(mutex);
-
- if (io_fix == BUF_IO_READ) {
- /* Wait by temporaly s-latch */
- rw_lock_s_lock(&block->lock);
- rw_lock_s_unlock(&block->lock);
- } else {
- break;
- }
- }
-
- if (UNIV_UNLIKELY(start_time != 0))
- {
- ut_usectime(&sec, &ms);
- ib_uint64_t finish_time
- = (ib_uint64_t)sec * 1000000 + ms;
- trx->io_reads_wait_timer
- += (ulint)(finish_time - start_time);
- }
-
- }
-}
-
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return pointer to the block or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_page_get_gen(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page number */
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_block_t* guess, /*!< in: guessed block or NULL */
- ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or
- BUF_GET_IF_IN_POOL_OR_WATCH */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr, /*!< in: mini-transaction */
- dberr_t* err) /*!< out: error code */
-{
- buf_block_t* block;
- ulint fold;
- unsigned access_time;
- ulint fix_type;
- prio_rw_lock_t* hash_lock;
- ulint retries = 0;
- trx_t* trx = NULL;
- buf_block_t* fix_block;
- ib_mutex_t* fix_mutex = NULL;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad((rw_latch == RW_S_LATCH)
- || (rw_latch == RW_X_LATCH)
- || (rw_latch == RW_NO_LATCH));
-
- if (err) {
- *err = DB_SUCCESS;
- }
-
-#ifdef UNIV_DEBUG
- switch (mode) {
- case BUF_GET_NO_LATCH:
- ut_ad(rw_latch == RW_NO_LATCH);
- break;
- case BUF_GET:
- case BUF_GET_IF_IN_POOL:
- case BUF_PEEK_IF_IN_POOL:
- case BUF_GET_IF_IN_POOL_OR_WATCH:
- case BUF_GET_POSSIBLY_FREED:
- break;
- default:
- ut_error;
- }
-#endif /* UNIV_DEBUG */
- ut_ad(zip_size == fil_space_get_zip_size(space));
- ut_ad(ut_is_2pow(zip_size));
-#ifndef UNIV_LOG_DEBUG
- ut_ad(!ibuf_inside(mtr)
- || ibuf_page_low(space, zip_size, offset,
- FALSE, file, line, NULL));
-#endif
- if (UNIV_UNLIKELY(innobase_get_slow_log())) {
- trx = innobase_get_trx();
- }
- buf_pool->stat.n_page_gets++;
- fold = buf_page_address_fold(space, offset);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-loop:
- block = guess;
-
- rw_lock_s_lock(hash_lock);
-
- if (block != NULL) {
-
- /* If the guess is a compressed page descriptor that
- has been allocated by buf_page_alloc_descriptor(),
- it may have been freed by buf_relocate(). */
-
- if (!buf_block_is_uncompressed(buf_pool, block)
- || offset != block->page.offset
- || space != block->page.space
- || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
-
- /* Our guess was bogus or things have changed
- since. */
- block = guess = NULL;
- } else {
- ut_ad(!block->page.in_zip_hash);
- }
- }
-
- if (block == NULL) {
- block = (buf_block_t*) buf_page_hash_get_low(
- buf_pool, space, offset, fold);
- }
-
- if (!block || buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
- rw_lock_s_unlock(hash_lock);
- block = NULL;
- }
-
- if (block == NULL) {
-
- /* Page not in buf_pool: needs to be read from file */
-
- if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
- mutex_enter(&buf_pool->LRU_list_mutex);
- rw_lock_x_lock(hash_lock);
- block = (buf_block_t*) buf_pool_watch_set(
- space, offset, fold);
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- if (UNIV_LIKELY_NULL(block)) {
- /* We can release hash_lock after we
- increment the fix count to make
- sure that no state change takes place. */
- fix_block = block;
- buf_block_fix(fix_block);
-
- /* Now safe to release page_hash mutex */
- rw_lock_x_unlock(hash_lock);
- goto got_block;
- }
-
- rw_lock_x_unlock(hash_lock);
- }
-
- if (mode == BUF_GET_IF_IN_POOL
- || mode == BUF_PEEK_IF_IN_POOL
- || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- return(NULL);
- }
-
- /* Call path is buf_read_page() -> buf_read_page_low()
- (_fil_io()) -> buf_page_io_complete() ->
- buf_decrypt_after_read() here fil_space_t* is used
- and we decrypt -> buf_page_check_corrupt() where
- page checksums are compared. Decryption/decompression
- is handled lower level, error handling is handled on lower
- level, here we need only to know is page really corrupted
- or encrypted page with correct checksum. */
-
- dberr_t local_err = buf_read_page(space, zip_size, offset, trx);
-
- if (local_err == DB_SUCCESS) {
- buf_read_ahead_random(space, zip_size, offset,
- ibuf_inside(mtr), trx);
-
- retries = 0;
- } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
- ++retries;
-
- DBUG_EXECUTE_IF(
- "innodb_page_corruption_retries",
- retries = BUF_PAGE_READ_MAX_RETRIES;
- );
- } else {
- if (err) {
- *err = local_err;
- }
-
- /* Pages whose encryption key is unavailable or used
- key, encryption algorithm or encryption method is
- incorrect are marked as encrypted in
- buf_page_check_corrupt(). Unencrypted page could be
- corrupted in a way where the key_id field is
- nonzero. There is no checksum on field
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION. */
- if (local_err == DB_DECRYPTION_FAILED) {
- return (NULL);
- }
-
- /* Try to set table as corrupted instead of
- asserting. */
- if (space > TRX_SYS_SPACE &&
- dict_set_corrupted_by_space(space)) {
- return (NULL);
- }
-
- ib_logf(IB_LOG_LEVEL_FATAL, "Unable"
- " to read tablespace " ULINTPF " page no "
- ULINTPF " into the buffer pool after "
- ULINTPF " attempts."
- " The most probable cause"
- " of this error may be that the"
- " table has been corrupted."
- " You can try to fix this"
- " problem by using"
- " innodb_force_recovery."
- " Please see " REFMAN " for more"
- " details. Aborting...",
- space, offset,
- BUF_PAGE_READ_MAX_RETRIES);
- }
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- goto loop;
- } else {
- fix_block = block;
- }
-
- buf_block_fix(fix_block);
-
- /* Now safe to release page_hash mutex */
- rw_lock_s_unlock(hash_lock);
-
-got_block:
-
- fix_mutex = buf_page_get_mutex(&fix_block->page);
-
- ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
-
- if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) {
-
- bool must_read;
-
- {
- buf_page_t* fix_page = &fix_block->page;
-
- mutex_enter(fix_mutex);
-
- buf_io_fix io_fix = buf_page_get_io_fix(fix_page);
-
- must_read = (io_fix == BUF_IO_READ);
-
- mutex_exit(fix_mutex);
- }
-
- if (must_read) {
- /* The page is being read to buffer pool,
- but we cannot wait around for the read to
- complete. */
- buf_block_unfix(fix_block);
-
- return(NULL);
- }
- }
-
- if (UNIV_UNLIKELY(fix_block->page.is_corrupt &&
- srv_pass_corrupt_table <= 1)) {
-
- buf_block_unfix(fix_block);
-
- return(NULL);
- }
-
- switch(buf_block_get_state(fix_block)) {
- buf_page_t* bpage;
-
- case BUF_BLOCK_FILE_PAGE:
- ut_ad(fix_mutex != &buf_pool->zip_mutex);
- break;
-
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- if (mode == BUF_PEEK_IF_IN_POOL) {
- /* This mode is only used for dropping an
- adaptive hash index. There cannot be an
- adaptive hash index for a compressed-only
- page, so do not bother decompressing the page. */
- buf_block_unfix(fix_block);
-
- return(NULL);
- }
-
- bpage = &block->page;
- ut_ad(fix_mutex == &buf_pool->zip_mutex);
-
- /* Note: We have already buffer fixed this block. */
- if (bpage->buf_fix_count > 1
- || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) {
-
- /* This condition often occurs when the buffer
- is not buffer-fixed, but I/O-fixed by
- buf_page_init_for_read(). */
-
- buf_block_unfix(fix_block);
-
- /* The block is buffer-fixed or I/O-fixed.
- Try again later. */
- os_thread_sleep(WAIT_FOR_READ);
-
- goto loop;
- }
-
- /* Buffer-fix the block so that it cannot be evicted
- or relocated while we are attempting to allocate an
- uncompressed page. */
-
- /* Allocate an uncompressed page. */
-
- block = buf_LRU_get_free_block(buf_pool);
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- rw_lock_x_lock(hash_lock);
-
- /* Buffer-fixing prevents the page_hash from changing. */
- ut_ad(bpage == buf_page_hash_get_low(
- buf_pool, space, offset, fold));
-
- buf_block_mutex_enter(block);
-
- mutex_enter(&buf_pool->zip_mutex);
-
- ut_ad(fix_block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_decrement_uint32(&fix_block->page.buf_fix_count, 1);
-#else
- --fix_block->page.buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
- fix_block = block;
-
- if (bpage->buf_fix_count > 0
- || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
-
- mutex_exit(&buf_pool->zip_mutex);
- /* The block was buffer-fixed or I/O-fixed while
- buf_pool->mutex was not held by this thread.
- Free the block that was allocated and retry.
- This should be extremely unlikely, for example,
- if buf_page_get_zip() was invoked. */
-
- buf_LRU_block_free_non_file_page(block);
- mutex_exit(&buf_pool->LRU_list_mutex);
- rw_lock_x_unlock(hash_lock);
- buf_block_mutex_exit(block);
-
- /* Try again */
- goto loop;
- }
-
- /* Move the compressed page from bpage to block,
- and uncompress it. */
-
- /* Note: this is the uncompressed block and it is not
- accessible by other threads yet because it is not in
- any list or hash table */
- buf_relocate(bpage, &block->page);
-
- buf_block_init_low(block);
-
- /* Set after relocate(). */
- block->page.buf_fix_count = 1;
-
- block->lock_hash_val = lock_rec_hash(space, offset);
-
- UNIV_MEM_DESC(&block->page.zip.data,
- page_zip_get_size(&block->page.zip));
-
- if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) {
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- UT_LIST_REMOVE(list, buf_pool->zip_clean,
- &block->page);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- ut_ad(!block->page.in_flush_list);
- } else {
- /* Relocate buf_pool->flush_list. */
- buf_flush_relocate_on_flush_list(bpage, &block->page);
- }
-
- /* Buffer-fix, I/O-fix, and X-latch the block
- for the duration of the decompression.
- Also add the block to the unzip_LRU list. */
- block->page.state = BUF_BLOCK_FILE_PAGE;
-
- /* Insert at the front of unzip_LRU list */
- buf_unzip_LRU_add_block(block, FALSE);
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- buf_block_set_io_fix(block, BUF_IO_READ);
- rw_lock_x_lock_inline(&block->lock, 0, file, line);
-
- UNIV_MEM_INVALID(bpage, sizeof *bpage);
-
- rw_lock_x_unlock(hash_lock);
-
- os_atomic_increment_ulint(&buf_pool->n_pend_unzip, 1);
-
- mutex_exit(&buf_pool->zip_mutex);
-
- access_time = buf_page_is_accessed(&block->page);
-
- buf_block_mutex_exit(block);
-
- buf_page_free_descriptor(bpage);
-
- /* Decompress the page while not holding
- any buf_pool or block->mutex. */
-
- {
- bool success = buf_zip_decompress(block, TRUE);
-
- if (!success) {
- buf_block_mutex_enter(fix_block);
- buf_block_set_io_fix(fix_block, BUF_IO_NONE);
- buf_block_mutex_exit(fix_block);
-
- os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1);
- rw_lock_x_unlock(&fix_block->lock);
- mutex_enter(&buf_pool->LRU_list_mutex);
- buf_block_unfix(fix_block);
- mutex_exit(&buf_pool->LRU_list_mutex);
- return NULL;
- }
- }
-
- if (!recv_no_ibuf_operations) {
- if (access_time) {
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(space, offset) == 0);
-#endif /* UNIV_IBUF_COUNT_DEBUG */
- } else {
- ibuf_merge_or_delete_for_page(
- block, space, offset, zip_size, TRUE);
- }
- }
-
- /* Unfix and unlatch the block. */
- buf_block_mutex_enter(fix_block);
-
- buf_block_set_io_fix(fix_block, BUF_IO_NONE);
-
- buf_block_mutex_exit(fix_block);
-
- os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1);
-
- rw_lock_x_unlock(&block->lock);
-
- break;
-
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
-
- ut_ad(block == fix_block);
- ut_ad(fix_block->page.buf_fix_count > 0);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-
- if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
- && (ibuf_debug || buf_debug_execute_is_force_flush())) {
-
- /* Try to evict the block from the buffer pool, to use the
- insert buffer (change buffer) as much as possible. */
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- buf_block_unfix(fix_block);
-
- /* Now we are only holding the buf_pool->LRU_list_mutex,
- not block->mutex or hash_lock. Blocks cannot be
- relocated or enter or exit the buf_pool while we
- are holding the buf_pool->LRU_list_mutex. */
-
- fix_mutex = buf_page_get_mutex(&fix_block->page);
- mutex_enter(fix_mutex);
-
- if (buf_LRU_free_page(&fix_block->page, true)) {
-
- mutex_exit(fix_mutex);
-
- if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
- mutex_enter(&buf_pool->LRU_list_mutex);
- rw_lock_x_lock(hash_lock);
-
- /* Set the watch, as it would have
- been set if the page were not in the
- buffer pool in the first place. */
- block = (buf_block_t*) buf_pool_watch_set(
- space, offset, fold);
- mutex_exit(&buf_pool->LRU_list_mutex);
- } else {
- rw_lock_x_lock(hash_lock);
- block = (buf_block_t*) buf_page_hash_get_low(
- buf_pool, space, offset, fold);
- }
-
- rw_lock_x_unlock(hash_lock);
-
- if (block != NULL) {
- /* Either the page has been read in or
- a watch was set on that in the window
- where we released the buf_pool::mutex
- and before we acquire the hash_lock
- above. Try again. */
- guess = block;
- goto loop;
- }
-
- return(NULL);
- }
-
- if (buf_flush_page_try(buf_pool, fix_block)) {
- guess = fix_block;
- goto loop;
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- buf_block_mutex_exit(fix_block);
-
- buf_block_fix(fix_block);
-
- /* Failed to evict the page; change it directly */
- }
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
- ut_ad(fix_block->page.buf_fix_count > 0);
-
-#ifdef UNIV_SYNC_DEBUG
- /* We have already buffer fixed the page, and we are committed to
- returning this page to the caller. Register for debugging. */
- {
- ibool ret;
- ret = rw_lock_s_lock_nowait(&fix_block->debug_latch, file, line);
- ut_a(ret);
- }
-#endif /* UNIV_SYNC_DEBUG */
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- ut_a(mode == BUF_GET_POSSIBLY_FREED
- || !fix_block->page.file_page_was_freed);
-#endif
- /* Check if this is the first access to the page */
- access_time = buf_page_is_accessed(&fix_block->page);
-
- /* This is a heuristic and we don't care about ordering issues. */
- if (access_time == 0) {
- buf_block_mutex_enter(fix_block);
-
- buf_page_set_accessed(&fix_block->page);
-
- buf_block_mutex_exit(fix_block);
- }
-
- if (mode != BUF_PEEK_IF_IN_POOL) {
- buf_page_make_young_if_needed(&fix_block->page);
- }
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(fix_block->page.buf_fix_count > 0);
- ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- /* We have to wait here because the IO_READ state was set
- under the protection of the hash_lock and the block->mutex
- but not the block->lock. */
- buf_wait_for_read(fix_block, trx);
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
- switch (rw_latch) {
- case RW_NO_LATCH:
-
-#ifndef PAGE_ATOMIC_REF_COUNT
- buf_wait_for_read(fix_block, trx);
-#endif /* !PAGE_ATOMIC_REF_COUNT */
-
- fix_type = MTR_MEMO_BUF_FIX;
- break;
-
- case RW_S_LATCH:
- rw_lock_s_lock_inline(&fix_block->lock, 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_S_FIX;
- break;
-
- default:
- ut_ad(rw_latch == RW_X_LATCH);
- rw_lock_x_lock_inline(&fix_block->lock, 0, file, line);
-
- fix_type = MTR_MEMO_PAGE_X_FIX;
- break;
- }
-
- mtr_memo_push(mtr, fix_block, fix_type);
-
- if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
- /* In the case of a first access, try to apply linear
- read-ahead */
-
- buf_read_ahead_linear(
- space, zip_size, offset, ibuf_inside(mtr), trx);
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(fix_block),
- buf_block_get_page_no(fix_block)) == 0);
-#endif
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (UNIV_UNLIKELY(trx && trx->take_stats)) {
- _increment_page_get_statistics(block, trx);
- }
-
- return(fix_block);
-}
-
-/********************************************************************//**
-This is the general function used to get optimistic access to a database
-page.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-buf_page_optimistic_get(
-/*====================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: guessed buffer block */
- ib_uint64_t modify_clock,/*!< in: modify clock value */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- buf_pool_t* buf_pool;
- unsigned access_time;
- ibool success;
- ulint fix_type;
- trx_t* trx = NULL;
-
- ut_ad(block);
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- mutex_enter(&block->mutex);
-
- if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
-
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
- buf_block_buf_fix_inc(block, file, line);
-
- access_time = buf_page_is_accessed(&block->page);
-
- buf_page_set_accessed(&block->page);
-
- mutex_exit(&block->mutex);
-
- buf_page_make_young_if_needed(&block->page);
-
- ut_ad(!ibuf_inside(mtr)
- || ibuf_page(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block), NULL));
-
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_X_FIX;
- }
-
- if (UNIV_UNLIKELY(!success)) {
- buf_block_buf_fix_dec(block);
-
- return(FALSE);
- }
-
- if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- if (rw_latch == RW_S_LATCH) {
- rw_lock_s_unlock(&(block->lock));
- } else {
- rw_lock_x_unlock(&(block->lock));
- }
-
- buf_block_buf_fix_dec(block);
-
- return(FALSE);
- }
-
- mtr_memo_push(mtr, block, fix_type);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(block->page.buf_fix_count > 0);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- mutex_enter(&block->mutex);
- ut_a(!block->page.file_page_was_freed);
- mutex_exit(&block->mutex);
-#endif
- if (UNIV_UNLIKELY(innobase_get_slow_log())) {
- trx = innobase_get_trx();
- }
-
- if (!access_time) {
- /* In the case of a first access, try to apply linear
- read-ahead */
-
- buf_read_ahead_linear(buf_block_get_space(block),
- buf_block_get_zip_size(block),
- buf_block_get_page_no(block),
- ibuf_inside(mtr), trx);
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
- buf_pool = buf_pool_from_block(block);
- buf_pool->stat.n_page_gets++;
-
- if (UNIV_UNLIKELY(trx && trx->take_stats)) {
- _increment_page_get_statistics(block, trx);
- }
- return(TRUE);
-}
-
-/********************************************************************//**
-This is used to get access to a known database page, when no waiting can be
-done. For example, if a search in an adaptive hash index leads us to this
-frame.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-buf_page_get_known_nowait(
-/*======================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: the known page */
- ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- buf_pool_t* buf_pool;
- ibool success;
- ulint fix_type;
- trx_t* trx = NULL;
-
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- mutex_enter(&block->mutex);
-
- if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
- /* Another thread is just freeing the block from the LRU list
- of the buffer pool: do not try to access this page; this
- attempt to access the page can only come through the hash
- index because when the buffer block state is ..._REMOVE_HASH,
- we have already removed it from the page address hash table
- of the buffer pool. */
-
- mutex_exit(&block->mutex);
-
- return(FALSE);
- }
-
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- buf_block_buf_fix_inc(block, file, line);
-
- buf_page_set_accessed(&block->page);
-
- mutex_exit(&block->mutex);
-
- buf_pool = buf_pool_from_block(block);
-
- if (mode == BUF_MAKE_YOUNG) {
- buf_page_make_young_if_needed(&block->page);
- }
-
- ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
-
- if (rw_latch == RW_S_LATCH) {
- success = rw_lock_s_lock_nowait(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_S_FIX;
- } else {
- success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
- file, line);
- fix_type = MTR_MEMO_PAGE_X_FIX;
- }
-
- if (!success) {
- buf_block_buf_fix_dec(block);
-
- return(FALSE);
- }
-
- mtr_memo_push(mtr, block, fix_type);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(block->page.buf_fix_count > 0);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- if (mode != BUF_KEEP_OLD) {
- /* If mode == BUF_KEEP_OLD, we are executing an I/O
- completion routine. Avoid a bogus assertion failure
- when ibuf_merge_or_delete_for_page() is processing a
- page that was just freed due to DROP INDEX, or
- deleting a record from SYS_INDEXES. This check will be
- skipped in recv_recover_page() as well. */
-
- mutex_enter(&block->mutex);
- ut_a(!block->page.file_page_was_freed);
- mutex_exit(&block->mutex);
- }
-#endif
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((mode == BUF_KEEP_OLD)
- || (ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0));
-#endif
- buf_pool->stat.n_page_gets++;
-
- if (UNIV_UNLIKELY(innobase_get_slow_log())) {
-
- trx = innobase_get_trx();
- if (trx != NULL && trx->take_stats) {
-
- _increment_page_get_statistics(block, trx);
- }
- }
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
-page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the lock_sys_t::mutex.
-@return pointer to a page or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_page_try_get_func(
-/*==================*/
- ulint space_id,/*!< in: tablespace id */
- ulint page_no,/*!< in: page number */
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- bool possibly_freed,
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- buf_block_t* block;
- ibool success;
- ulint fix_type;
- buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
- prio_rw_lock_t* hash_lock;
-
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- block = buf_block_hash_get_s_locked(buf_pool, space_id,
- page_no, &hash_lock);
-
- if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
- if (block) {
- rw_lock_s_unlock(hash_lock);
- }
- return(NULL);
- }
-
- ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
-
- mutex_enter(&block->mutex);
- rw_lock_s_unlock(hash_lock);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_a(buf_block_get_space(block) == space_id);
- ut_a(buf_block_get_page_no(block) == page_no);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- buf_block_buf_fix_inc(block, file, line);
- mutex_exit(&block->mutex);
-
- if (rw_latch == RW_S_LATCH) {
- fix_type = MTR_MEMO_PAGE_S_FIX;
- success = rw_lock_s_lock_nowait(&block->lock, file, line);
- } else {
- success = false;
- }
-
- if (!success) {
- /* Let us try to get an X-latch. If the current thread
- is holding an X-latch on the page, we cannot get an
- S-latch. */
-
- fix_type = MTR_MEMO_PAGE_X_FIX;
- success = rw_lock_x_lock_func_nowait_inline(&block->lock,
- file, line);
- }
-
- if (!success) {
- buf_block_buf_fix_dec(block);
-
- return(NULL);
- }
-
- mtr_memo_push(mtr, block, fix_type);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
- ut_a(block->page.buf_fix_count > 0);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- if (!possibly_freed) {
- mutex_enter(&block->mutex);
- ut_a(!block->page.file_page_was_freed);
- mutex_exit(&block->mutex);
- }
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- buf_pool->stat.n_page_gets++;
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
-
- return(block);
-}
-
-/********************************************************************//**
-Initialize some fields of a control block. */
-UNIV_INLINE
-void
-buf_page_init_low(
-/*==============*/
- buf_page_t* bpage) /*!< in: block to init */
-{
- bpage->flush_type = BUF_FLUSH_LRU;
- bpage->io_fix = BUF_IO_NONE;
- bpage->buf_fix_count = 0;
- bpage->freed_page_clock = 0;
- bpage->access_time = 0;
- bpage->newest_modification = 0;
- bpage->oldest_modification = 0;
- bpage->write_size = 0;
- bpage->encrypted = false;
- bpage->real_size = 0;
-
- HASH_INVALIDATE(bpage, hash);
- bpage->is_corrupt = FALSE;
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- bpage->file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-}
-
-/********************************************************************//**
-Inits a page to the buffer buf_pool. */
-static MY_ATTRIBUTE((nonnull))
-void
-buf_page_init(
-/*==========*/
- buf_pool_t* buf_pool,/*!< in/out: buffer pool */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- ulint fold, /*!< in: buf_page_address_fold(space,offset) */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- buf_block_t* block) /*!< in/out: block to init */
-{
- buf_page_t* hash_page;
-
- ut_ad(buf_pool == buf_pool_get(space, offset));
-
- ut_ad(mutex_own(&(block->mutex)));
- ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(buf_page_hash_lock_get(buf_pool, fold),
- RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Set the state of the block */
- buf_block_set_file_page(block, space, offset);
-
- buf_block_init_low(block);
-
- block->lock_hash_val = lock_rec_hash(space, offset);
-
- buf_page_init_low(&block->page);
-
- /* Insert into the hash table of file pages */
-
- hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
- if (hash_page == NULL) {
- /* Block not found in the hash table */
- } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
-
- mutex_enter(&buf_pool->zip_mutex);
-
- ib_uint32_t buf_fix_count = hash_page->buf_fix_count;
-
- ut_a(buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(
- &block->page.buf_fix_count, buf_fix_count);
-#else
- block->page.buf_fix_count += ulint(buf_fix_count);
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
- buf_pool_watch_remove(buf_pool, fold, hash_page);
-
- mutex_exit(&buf_pool->zip_mutex);
-
- } else {
- fprintf(stderr,
- "InnoDB: Error: page %lu %lu already found"
- " in the hash table: %p, %p\n",
- space,
- offset,
- (const void*) hash_page, (const void*) block);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- mutex_exit(&block->mutex);
- buf_print();
- buf_LRU_print();
- buf_validate();
- buf_LRU_validate();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- ut_error;
- }
-
- ut_ad(!block->page.in_zip_hash);
- ut_ad(!block->page.in_page_hash);
- ut_d(block->page.in_page_hash = TRUE);
-
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, &block->page);
-
- if (zip_size) {
- page_zip_set_size(&block->page.zip, zip_size);
- }
-}
-
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
-(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
-then this function does nothing.
-Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
-on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later.
-@return pointer to the block or NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_init_for_read(
-/*===================*/
- dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- ibool unzip, /*!< in: TRUE=request uncompressed page */
- ib_int64_t tablespace_version,
- /*!< in: prevents reading from a wrong
- version of the tablespace in case we have done
- DISCARD + IMPORT */
- ulint offset) /*!< in: page number */
-{
- buf_block_t* block;
- buf_page_t* bpage = NULL;
- buf_page_t* watch_page;
- prio_rw_lock_t* hash_lock;
- mtr_t mtr;
- ulint fold;
- ibool lru;
- void* data;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
- ut_ad(buf_pool);
-
- *err = DB_SUCCESS;
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
- /* It is a read-ahead within an ibuf routine */
-
- ut_ad(!ibuf_bitmap_page(zip_size, offset));
-
- ibuf_mtr_start(&mtr);
-
- if (!recv_no_ibuf_operations
- && !ibuf_page(space, zip_size, offset, &mtr)) {
-
- ibuf_mtr_commit(&mtr);
-
- return(NULL);
- }
- } else {
- ut_ad(mode == BUF_READ_ANY_PAGE);
- }
-
- if (zip_size && !unzip && !recv_recovery_is_on()) {
- block = NULL;
- } else {
- block = buf_LRU_get_free_block(buf_pool);
- ut_ad(block);
- ut_ad(buf_pool_from_block(block) == buf_pool);
- }
-
- fold = buf_page_address_fold(space, offset);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- mutex_enter(&buf_pool->LRU_list_mutex);
- rw_lock_x_lock(hash_lock);
-
- watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
- if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
- /* The page is already in the buffer pool. */
- watch_page = NULL;
-err_exit:
- mutex_exit(&buf_pool->LRU_list_mutex);
- rw_lock_x_unlock(hash_lock);
- if (block) {
- mutex_enter(&block->mutex);
- buf_LRU_block_free_non_file_page(block);
- mutex_exit(&block->mutex);
- }
-
- bpage = NULL;
- goto func_exit;
- }
-
- if (fil_tablespace_deleted_or_being_deleted_in_mem(
- space, tablespace_version)) {
- /* The page belongs to a space which has been
- deleted or is being deleted. */
- *err = DB_TABLESPACE_DELETED;
-
- goto err_exit;
- }
-
- if (block) {
- bpage = &block->page;
-
- mutex_enter(&block->mutex);
-
- ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
-
- buf_page_init(buf_pool, space, offset, fold, zip_size, block);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- /* Note: We set the io state without the protection of
- the block->lock. This is because other threads cannot
- access this block unless it is in the hash table. */
-
- buf_page_set_io_fix(bpage, BUF_IO_READ);
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
- /* The block must be put to the LRU list, to the old blocks */
- buf_LRU_add_block(bpage, TRUE/* to old blocks */);
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- /* We set a pass-type x-lock on the frame because then
- the same thread which called for the read operation
- (and is running now at this point of code) can wait
- for the read to complete by waiting for the x-lock on
- the frame; if the x-lock were recursive, the same
- thread would illegally get the x-lock before the page
- read is completed. The x-lock is cleared by the
- io-handler thread. */
-
- rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
-
-#ifndef PAGE_ATOMIC_REF_COUNT
- buf_page_set_io_fix(bpage, BUF_IO_READ);
-#endif /* !PAGE_ATOMIC_REF_COUNT */
-
- rw_lock_x_unlock(hash_lock);
-
- if (zip_size) {
- /* buf_pool->LRU_list_mutex may be released and
- reacquired by buf_buddy_alloc(). Thus, we
- must release block->mutex in order not to
- break the latching order in the reacquisition
- of buf_pool->LRU_list_mutex. We also must defer this
- operation until after the block descriptor has
- been added to buf_pool->LRU and
- buf_pool->page_hash. */
- mutex_exit(&block->mutex);
- mutex_enter(&buf_pool->LRU_list_mutex);
- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
- mutex_enter(&block->mutex);
- block->page.zip.data = (page_zip_t*) data;
-
- /* To maintain the invariant
- block->in_unzip_LRU_list
- == buf_page_belongs_to_unzip_LRU(&block->page)
- we have to add this block to unzip_LRU
- after block->page.zip.data is set. */
- ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
- buf_unzip_LRU_add_block(block, TRUE);
- mutex_exit(&buf_pool->LRU_list_mutex);
- }
-
- mutex_exit(&block->mutex);
- } else {
- rw_lock_x_unlock(hash_lock);
-
- /* The compressed page must be allocated before the
- control block (bpage), in order to avoid the
- invocation of buf_buddy_relocate_block() on
- uninitialized data. */
- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
-
- rw_lock_x_lock(hash_lock);
-
- /* We must check the page_hash again, as it may have been
- modified. */
-
- watch_page = buf_page_hash_get_low(
- buf_pool, space, offset, fold);
-
- if (UNIV_UNLIKELY(watch_page
- && !buf_pool_watch_is_sentinel(buf_pool,
- watch_page))) {
-
- /* The block was added by some other thread. */
- mutex_exit(&buf_pool->LRU_list_mutex);
- rw_lock_x_unlock(hash_lock);
- watch_page = NULL;
- buf_buddy_free(buf_pool, data, zip_size);
-
- bpage = NULL;
- goto func_exit;
- }
-
- bpage = buf_page_alloc_descriptor();
-
- /* Initialize the buf_pool pointer. */
- bpage->buf_pool_index = buf_pool_index(buf_pool);
-
- page_zip_des_init(&bpage->zip);
- page_zip_set_size(&bpage->zip, zip_size);
- bpage->zip.data = (page_zip_t*) data;
-
- bpage->slot = NULL;
-
- mutex_enter(&buf_pool->zip_mutex);
- UNIV_MEM_DESC(bpage->zip.data,
- page_zip_get_size(&bpage->zip));
-
- buf_page_init_low(bpage);
-
- bpage->state = BUF_BLOCK_ZIP_PAGE;
- bpage->space = static_cast<ib_uint32_t>(space);
- bpage->offset = static_cast<ib_uint32_t>(offset);
-
-#ifdef UNIV_DEBUG
- bpage->in_page_hash = FALSE;
- bpage->in_zip_hash = FALSE;
- bpage->in_flush_list = FALSE;
- bpage->in_free_list = FALSE;
- bpage->in_LRU_list = FALSE;
-#endif /* UNIV_DEBUG */
-
- ut_d(bpage->in_page_hash = TRUE);
-
- if (watch_page != NULL) {
-
- /* Preserve the reference count. */
- ib_uint32_t buf_fix_count;
-
- buf_fix_count = watch_page->buf_fix_count;
-
- ut_a(buf_fix_count > 0);
-
- ut_ad(buf_own_zip_mutex_for_page(bpage));
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(
- &bpage->buf_fix_count, buf_fix_count);
-#else
- bpage->buf_fix_count += buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
- ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
- buf_pool_watch_remove(buf_pool, fold, watch_page);
- }
-
- HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
- bpage);
-
- rw_lock_x_unlock(hash_lock);
-
- /* The block must be put to the LRU list, to the old blocks.
- The zip_size is already set into the page zip */
- buf_LRU_add_block(bpage, TRUE/* to old blocks */);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- buf_LRU_insert_zip_clean(bpage);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- buf_page_set_io_fix(bpage, BUF_IO_READ);
-
- mutex_exit(&buf_pool->zip_mutex);
- }
-
- os_atomic_increment_ulint(&buf_pool->n_pend_reads, 1);
-func_exit:
-
- if (mode == BUF_READ_IBUF_PAGES_ONLY) {
-
- ibuf_mtr_commit(&mtr);
- }
-
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(!bpage || buf_page_in_file(bpage));
- return(bpage);
-}
-
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
-from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_get_gen).
-@return pointer to the block, page bufferfixed */
-UNIV_INTERN
-buf_block_t*
-buf_page_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space in units of
- a page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- buf_frame_t* frame;
- buf_block_t* block;
- ulint fold;
- buf_block_t* free_block = NULL;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- prio_rw_lock_t* hash_lock;
-
- ut_ad(mtr);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(space || !zip_size);
-
- free_block = buf_LRU_get_free_block(buf_pool);
-
- fold = buf_page_address_fold(space, offset);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- mutex_enter(&buf_pool->LRU_list_mutex);
- rw_lock_x_lock(hash_lock);
-
- block = (buf_block_t*) buf_page_hash_get_low(
- buf_pool, space, offset, fold);
-
- if (block
- && buf_page_in_file(&block->page)
- && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(space, offset) == 0);
-#endif
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- block->page.file_page_was_freed = FALSE;
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-
- /* Page can be found in buf_pool */
- rw_lock_x_unlock(hash_lock);
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- buf_block_free(free_block);
-
- return(buf_page_get_with_no_latch(space, zip_size, offset, mtr));
- }
-
- /* If we get here, the page was not in buf_pool: init it there */
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Creating space %lu page %lu to buffer\n",
- space, offset);
- }
-#endif /* UNIV_DEBUG */
-
- block = free_block;
-
- mutex_enter(&block->mutex);
-
- buf_page_init(buf_pool, space, offset, fold, zip_size, block);
-
- rw_lock_x_unlock(hash_lock);
-
- /* The block must be put to the LRU list */
- buf_LRU_add_block(&block->page, FALSE);
-
- buf_block_buf_fix_inc(block, __FILE__, __LINE__);
- buf_pool->stat.n_pages_created++;
-
- if (zip_size) {
- void* data;
- ibool lru;
-
- /* Prevent race conditions during buf_buddy_alloc(),
- which may release and reacquire buf_pool->LRU_list_mutex,
- by IO-fixing and X-latching the block. */
-
- buf_page_set_io_fix(&block->page, BUF_IO_READ);
- rw_lock_x_lock(&block->lock);
-
- mutex_exit(&block->mutex);
- /* buf_pool->LRU_list_mutex may be released and reacquired by
- buf_buddy_alloc(). Thus, we must release block->mutex
- in order not to break the latching order in
- the reacquisition of buf_pool->LRU_list_mutex. We also must
- defer this operation until after the block descriptor
- has been added to buf_pool->LRU and buf_pool->page_hash. */
- data = buf_buddy_alloc(buf_pool, zip_size, &lru);
- mutex_enter(&block->mutex);
- block->page.zip.data = (page_zip_t*) data;
-
- /* To maintain the invariant
- block->in_unzip_LRU_list
- == buf_page_belongs_to_unzip_LRU(&block->page)
- we have to add this block to unzip_LRU after
- block->page.zip.data is set. */
- ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
- buf_unzip_LRU_add_block(block, FALSE);
-
- buf_page_set_io_fix(&block->page, BUF_IO_NONE);
- rw_lock_x_unlock(&block->lock);
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
-
- buf_page_set_accessed(&block->page);
-
- mutex_exit(&block->mutex);
-
- /* Delete possible entries for the page from the insert buffer:
- such can exist if the page belonged to an index which was dropped */
-
- ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
-
- frame = block->frame;
-
- memset(frame + FIL_PAGE_PREV, 0xff, 4);
- memset(frame + FIL_PAGE_NEXT, 0xff, 4);
- mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
-
- /* FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is only used on the
- following pages:
- (1) The first page of the InnoDB system tablespace (page 0:0)
- (2) FIL_RTREE_SPLIT_SEQ_NUM on R-tree pages
- (3) key_version on encrypted pages (not page 0:0) */
-
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(++buf_dbg_counter % 5771 || buf_validate());
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(buf_block_get_space(block),
- buf_block_get_page_no(block)) == 0);
-#endif
- return(block);
-}
-
-/********************************************************************//**
-Monitor the buffer page read/write activity, and increment corresponding
-counter value if MONITOR_MODULE_BUF_PAGE (module_buf_page) module is
-enabled. */
-static
-void
-buf_page_monitor(
-/*=============*/
- const buf_page_t* bpage, /*!< in: pointer to the block */
- enum buf_io_fix io_type)/*!< in: io_fix types */
-{
- const byte* frame;
- monitor_id_t counter;
-
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- /* If the counter module is not turned on, just return */
- if (!MONITOR_IS_ON(MONITOR_MODULE_BUF_PAGE)) {
- return;
- }
-
- ut_a(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
-
- frame = bpage->zip.data
- ? bpage->zip.data
- : ((buf_block_t*) bpage)->frame;
-
- switch (fil_page_get_type(frame)) {
- ulint level;
-
- case FIL_PAGE_INDEX:
- level = btr_page_get_level_low(frame);
-
- /* Check if it is an index page for insert buffer */
- if (btr_page_get_index_id(frame)
- == (index_id_t)(DICT_IBUF_ID_MIN + IBUF_SPACE_ID)) {
- if (level == 0) {
- counter = MONITOR_RW_COUNTER(
- io_type, MONITOR_INDEX_IBUF_LEAF_PAGE);
- } else {
- counter = MONITOR_RW_COUNTER(
- io_type,
- MONITOR_INDEX_IBUF_NON_LEAF_PAGE);
- }
- } else {
- if (level == 0) {
- counter = MONITOR_RW_COUNTER(
- io_type, MONITOR_INDEX_LEAF_PAGE);
- } else {
- counter = MONITOR_RW_COUNTER(
- io_type, MONITOR_INDEX_NON_LEAF_PAGE);
- }
- }
- break;
-
- case FIL_PAGE_UNDO_LOG:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_UNDO_LOG_PAGE);
- break;
-
- case FIL_PAGE_INODE:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_INODE_PAGE);
- break;
-
- case FIL_PAGE_IBUF_FREE_LIST:
- counter = MONITOR_RW_COUNTER(io_type,
- MONITOR_IBUF_FREELIST_PAGE);
- break;
-
- case FIL_PAGE_IBUF_BITMAP:
- counter = MONITOR_RW_COUNTER(io_type,
- MONITOR_IBUF_BITMAP_PAGE);
- break;
-
- case FIL_PAGE_TYPE_SYS:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_SYSTEM_PAGE);
- break;
-
- case FIL_PAGE_TYPE_TRX_SYS:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_TRX_SYSTEM_PAGE);
- break;
-
- case FIL_PAGE_TYPE_FSP_HDR:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_FSP_HDR_PAGE);
- break;
-
- case FIL_PAGE_TYPE_XDES:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_XDES_PAGE);
- break;
-
- case FIL_PAGE_TYPE_BLOB:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_BLOB_PAGE);
- break;
-
- case FIL_PAGE_TYPE_ZBLOB:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB_PAGE);
- break;
-
- case FIL_PAGE_TYPE_ZBLOB2:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_ZBLOB2_PAGE);
- break;
-
- default:
- counter = MONITOR_RW_COUNTER(io_type, MONITOR_OTHER_PAGE);
- }
-
- MONITOR_INC_NOCHECK(counter);
-}
-
-/********************************************************************//**
-Mark a table with the specified space pointed by bpage->space corrupted.
-Also remove the bpage from LRU list.
-@param[in,out] bpage Block */
-static
-void
-buf_mark_space_corrupt(
- buf_page_t* bpage)
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- const ibool uncompressed = (buf_page_get_state(bpage)
- == BUF_BLOCK_FILE_PAGE);
- ulint space = bpage->space;
- const ulint fold = buf_page_address_fold(bpage->space,
- bpage->offset);
- prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- /* First unfix and release lock on the bpage */
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
-
- mutex_enter(&buf_pool->LRU_list_mutex);
- rw_lock_x_lock(hash_lock);
- mutex_enter(block_mutex);
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
- ut_ad(bpage->buf_fix_count == 0);
-
- /* Set BUF_IO_NONE before we remove the block from LRU list */
- buf_page_set_io_fix(bpage, BUF_IO_NONE);
-
- if (uncompressed) {
- rw_lock_x_unlock_gen(
- &((buf_block_t*) bpage)->lock,
- BUF_IO_READ);
- }
-
- /* If block is not encrypted find the table with specified
- space id, and mark it corrupted. Encrypted tables
- are marked unusable later e.g. in ::open(). */
- if (!bpage->encrypted) {
- dict_set_corrupted_by_space(space);
- } else {
- dict_set_encrypted_by_space(space);
- }
-
- /* After this point bpage can't be referenced. This
- function will release the hash_lock acquired above. */
- buf_LRU_free_one_page(bpage);
-
- ut_ad(buf_pool->n_pend_reads > 0);
- os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1);
- mutex_exit(&buf_pool->LRU_list_mutex);
-}
-
-/** Check if page is maybe compressed, encrypted or both when we encounter
-corrupted page. Note that we can't be 100% sure if page is corrupted
-or decrypt/decompress just failed.
-@param[in,out] bpage page
-@param[in,out] space tablespace from fil_space_acquire_for_io()
-@return whether the operation succeeded
-@retval DB_SUCCESS if page has been read and is not corrupted
-@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted
-@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
-after decryption normal page checksum does not match.
-@retval DB_TABLESPACE_DELETED if accessed tablespace is not found */
-static
-dberr_t
-buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space)
-{
- ut_ad(space->n_pending_ios > 0);
-
- ulint zip_size = buf_page_get_zip_size(bpage);
- byte* dst_frame = (zip_size) ? bpage->zip.data :
- ((buf_block_t*) bpage)->frame;
- bool still_encrypted = false;
- dberr_t err = DB_SUCCESS;
- bool corrupted = false;
- fil_space_crypt_t* crypt_data = space->crypt_data;
-
- /* In buf_decrypt_after_read we have either decrypted the page if
- page post encryption checksum matches and used key_id is found
- from the encryption plugin. If checksum did not match page was
- not decrypted and it could be either encrypted and corrupted
- or corrupted or good page. If we decrypted, there page could
- still be corrupted if used key does not match. */
- still_encrypted = (crypt_data &&
- crypt_data->type != CRYPT_SCHEME_UNENCRYPTED &&
- !bpage->encrypted &&
- fil_space_verify_crypt_checksum(dst_frame, zip_size,
- space, bpage->offset));
-
- if (!still_encrypted) {
- /* If traditional checksums match, we assume that page is
- not anymore encrypted. */
- corrupted = buf_page_is_corrupted(true, dst_frame, zip_size,
- space);
-
- if (!corrupted) {
- bpage->encrypted = false;
- } else {
- err = DB_PAGE_CORRUPTED;
- }
- }
-
- /* Pages that we think are unencrypted but do not match the checksum
- checks could be corrupted or encrypted or both. */
- if (corrupted && !bpage->encrypted) {
- /* An error will be reported by
- buf_page_io_complete(). */
- } else if (still_encrypted || (bpage->encrypted && corrupted)) {
- bpage->encrypted = true;
- err = DB_DECRYPTION_FAILED;
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The page [page id: space=%u"
- ", page number=%u]"
- " in file %s cannot be decrypted.",
- bpage->space, bpage->offset,
- space->name);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "However key management plugin or used key_version " ULINTPF
- " is not found or"
- " used encryption algorithm or method does not match.",
- mach_read_from_4(dst_frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION));
-
- if (bpage->space > TRX_SYS_SPACE) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Marking tablespace as missing. You may drop this table or"
- " install correct key management plugin and key file.");
- }
- }
-
- return (err);
-}
-
-/** Complete a read or write request of a file page to or from the buffer pool.
-@param[in,out] bpage Page to complete
-@return whether the operation succeeded
-@retval DB_SUCCESS always when writing, or if a read page was OK
-@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
-@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
- after decryption normal page checksum does
- not match */
-UNIV_INTERN
-dberr_t
-buf_page_io_complete(buf_page_t* bpage)
-{
- enum buf_io_fix io_type;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- const ibool uncompressed = (buf_page_get_state(bpage)
- == BUF_BLOCK_FILE_PAGE);
- bool have_LRU_mutex = false;
- byte* frame = NULL;
- dberr_t err = DB_SUCCESS;
-
- ut_a(buf_page_in_file(bpage));
-
- /* We do not need protect io_fix here by mutex to read
- it because this is the only function where we can change the value
- from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
- ensures that this is the only thread that handles the i/o for this
- block. */
-
- io_type = buf_page_get_io_fix_unlocked(bpage);
- ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
-
- if (io_type == BUF_IO_READ) {
- ulint read_page_no = 0;
- ulint read_space_id = 0;
- uint key_version = 0;
-
- ut_ad(bpage->zip.data || ((buf_block_t*)bpage)->frame);
- fil_space_t* space = fil_space_acquire_for_io(bpage->space);
- if (!space) {
- return(DB_TABLESPACE_DELETED);
- }
-
- buf_page_decrypt_after_read(bpage, space);
-
- if (buf_page_get_zip_size(bpage)) {
- frame = bpage->zip.data;
- } else {
- frame = ((buf_block_t*) bpage)->frame;
- }
-
- if (buf_page_get_zip_size(bpage)) {
- frame = bpage->zip.data;
- os_atomic_increment_ulint(&buf_pool->n_pend_unzip, 1);
- if (uncompressed
- && !buf_zip_decompress((buf_block_t*) bpage,
- FALSE)) {
-
- os_atomic_decrement_ulint(
- &buf_pool->n_pend_unzip, 1);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Page %u in tablespace %u zip_decompress failure.",
- bpage->offset, bpage->space);
-
- err = DB_PAGE_CORRUPTED;
-
- goto database_corrupted;
- }
-
- os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1);
- } else {
- ut_a(uncompressed);
- frame = ((buf_block_t*) bpage)->frame;
- }
-
- /* If this page is not uninitialized and not in the
- doublewrite buffer, then the page number and space id
- should be the same as in block. */
- read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
- read_space_id = mach_read_from_4(
- frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- key_version = mach_read_from_4(
- frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
- if (bpage->space == TRX_SYS_SPACE
- && buf_dblwr_page_inside(bpage->offset)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: reading page %u\n"
- "InnoDB: which is in the"
- " doublewrite buffer!\n",
- bpage->offset);
- } else if (!read_space_id && !read_page_no) {
- /* This is likely an uninitialized page. */
- } else if ((bpage->space
- && bpage->space != read_space_id)
- || bpage->offset != read_page_no) {
- /* We did not compare space_id to read_space_id
- if bpage->space == 0, because the field on the
- page may contain garbage in MySQL < 4.1.1,
- which only supported bpage->space == 0. */
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Space id and page n:o"
- " stored in the page"
- " read in are " ULINTPF ":" ULINTPF ","
- " should be %u:%u!",
- read_space_id,
- read_page_no,
- bpage->space,
- bpage->offset);
- }
-
- if (UNIV_LIKELY(!bpage->is_corrupt ||
- !srv_pass_corrupt_table)) {
- err = buf_page_check_corrupt(bpage, space);
- }
-
-database_corrupted:
-
- if (err != DB_SUCCESS) {
- /* Not a real corruption if it was triggered by
- error injection */
- DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
- if (bpage->space > TRX_SYS_SPACE) {
- buf_mark_space_corrupt(bpage);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Simulated page corruption");
- fil_space_release_for_io(space);
- return(err);
- }
- err = DB_SUCCESS;
- goto page_not_corrupt;
- );
-
- if (err == DB_PAGE_CORRUPTED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Database page corruption on disk"
- " or a failed file read of tablespace %s"
- " page [page id: space=%u"
- ", page number=%u]"
- ". You may have to recover from "
- "a backup.",
- space->name,
- bpage->space, bpage->offset);
-
- buf_page_print(frame, buf_page_get_zip_size(bpage),
- BUF_PAGE_PRINT_NO_CRASH);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "It is also possible that your"
- " operating system has corrupted"
- " its own file cache and rebooting"
- " your computer removes the error."
- " If the corrupt page is an index page."
- " You can also try to fix the"
- " corruption by dumping, dropping,"
- " and reimporting the corrupt table."
- " You can use CHECK TABLE to scan"
- " your table for corruption. "
- "Please refer to " REFMAN "forcing-innodb-recovery.html"
- " for information about forcing recovery.");
- }
-
- if (srv_pass_corrupt_table && bpage->space != 0
- && bpage->space < SRV_LOG_SPACE_FIRST_ID) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "InnoDB: space %u will be treated as corrupt.",
- bpage->space);
- fil_space_set_corrupt(bpage->space);
-
- dict_set_corrupted_by_space(bpage->space);
- bpage->is_corrupt = TRUE;
- }
-
- if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
- /* If page space id is larger than TRX_SYS_SPACE
- (0), we will attempt to mark the corresponding
- table as corrupted instead of crashing server */
- if (bpage->space > TRX_SYS_SPACE) {
- buf_mark_space_corrupt(bpage);
- fil_space_release_for_io(space);
- return(err);
- } else {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Ending processing because of a corrupt database page.");
- }
- }
- }
-
- DBUG_EXECUTE_IF("buf_page_is_corrupt_failure",
- page_not_corrupt: bpage = bpage; );
-
- if (recv_recovery_is_on()) {
- /* Pages must be uncompressed for crash recovery. */
- ut_a(uncompressed);
- recv_recover_page(TRUE, (buf_block_t*) bpage);
- }
-
- if (uncompressed && !recv_no_ibuf_operations
- && fil_page_get_type(frame) == FIL_PAGE_INDEX
- && page_is_leaf(frame)) {
-
- if (bpage && bpage->encrypted) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Table in tablespace " ULINTPF " encrypted."
- "However key management plugin or used "
- " key_version %u is not found or"
- " used encryption algorithm or method does not match."
- " Can't continue opening the table.",
- read_space_id, key_version);
- } else {
-
- ibuf_merge_or_delete_for_page(
- (buf_block_t*)bpage, bpage->space,
- bpage->offset, buf_page_get_zip_size(bpage),
- TRUE);
- }
-
- }
-
- fil_space_release_for_io(space);
- } else {
- /* io_type == BUF_IO_WRITE */
- if (bpage->slot) {
- /* Mark slot free */
- bpage->slot->reserved = false;
- bpage->slot = NULL;
- }
- }
-
- if (io_type == BUF_IO_WRITE
- && (
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- /* to keep consistency at buf_LRU_insert_zip_clean() */
- buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY ||
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
-
- have_LRU_mutex = true; /* optimistic */
- }
-retry_mutex:
- if (have_LRU_mutex) {
- mutex_enter(&buf_pool->LRU_list_mutex);
- }
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(block_mutex);
-
- if (io_type == BUF_IO_WRITE
- && (
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
- ||
-#endif
- buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
- && !have_LRU_mutex) {
-
- mutex_exit(block_mutex);
- have_LRU_mutex = true;
- goto retry_mutex;
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- if (io_type == BUF_IO_WRITE || uncompressed) {
- /* For BUF_IO_READ of compressed-only blocks, the
- buffered operations will be merged by buf_page_get_gen()
- after the block has been uncompressed. */
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
- }
-#endif
- /* Because this thread which does the unlocking is not the same that
- did the locking, we use a pass value != 0 in unlock, which simply
- removes the newest lock debug record, without checking the thread
- id. */
-
- switch (io_type) {
- case BUF_IO_READ:
-
- buf_page_set_io_fix(bpage, BUF_IO_NONE);
-
- /* NOTE that the call to ibuf may have moved the ownership of
- the x-latch to this OS thread: do not let this confuse you in
- debugging! */
-
- ut_ad(buf_pool->n_pend_reads > 0);
- os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1);
- os_atomic_increment_ulint(&buf_pool->stat.n_pages_read, 1);
-
- ut_ad(!have_LRU_mutex);
-
- if (uncompressed) {
- rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
- BUF_IO_READ);
- }
-
- break;
-
- case BUF_IO_WRITE:
- /* Write means a flush operation: call the completion
- routine in the flush system */
-
- buf_flush_write_complete(bpage);
-
- os_atomic_increment_ulint(&buf_pool->stat.n_pages_written, 1);
-
- if (have_LRU_mutex) {
- mutex_exit(&buf_pool->LRU_list_mutex);
- }
-
- if (uncompressed) {
- rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
- BUF_IO_WRITE);
- }
-
- break;
-
- default:
- ut_error;
- }
-
- buf_page_monitor(bpage, io_type);
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Has %s page space %lu page no %lu\n",
- io_type == BUF_IO_READ ? "read" : "written",
- buf_page_get_space(bpage),
- buf_page_get_page_no(bpage));
- }
-#endif /* UNIV_DEBUG */
-
- mutex_exit(block_mutex);
-
- return(err);
-}
-
-/*********************************************************************//**
-Asserts that all file pages in the buffer are in a replaceable state.
-@return TRUE */
-static
-ibool
-buf_all_freed_instance(
-/*===================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool instancce */
-{
- ulint i;
- buf_chunk_t* chunk;
-
- ut_ad(buf_pool);
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- chunk = buf_pool->chunks;
-
- for (i = buf_pool->n_chunks; i--; chunk++) {
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- const buf_block_t* block = buf_chunk_not_freed(chunk);
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- if (UNIV_LIKELY_NULL(block)) {
- fil_space_t* space = fil_space_get(block->page.space);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page %u %u still fixed or dirty.",
- block->page.space,
- block->page.offset);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page oldest_modification " LSN_PF
- " fix_count %d io_fix %d.",
- block->page.oldest_modification,
- block->page.buf_fix_count,
- buf_page_get_io_fix(&block->page));
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Page space_id %u name %s.",
- block->page.space,
- (space && space->name) ? space->name : "NULL");
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Invalidates file pages in one buffer pool instance */
-static
-void
-buf_pool_invalidate_instance(
-/*=========================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- ulint i;
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
-
- mutex_enter(&buf_pool->flush_state_mutex);
-
- for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
-
- /* As this function is called during startup and
- during redo application phase during recovery, InnoDB
- is single threaded (apart from IO helper threads) at
- this stage. No new write batch can be in intialization
- stage at this point. */
- ut_ad(buf_pool->init_flush[i] == FALSE);
-
- /* However, it is possible that a write batch that has
- been posted earlier is still not complete. For buffer
- pool invalidation to proceed we must ensure there is NO
- write activity happening. */
- if (buf_pool->n_flush[i] > 0) {
- buf_flush_t type = static_cast<buf_flush_t>(i);
-
- mutex_exit(&buf_pool->flush_state_mutex);
- buf_flush_wait_batch_end(buf_pool, type);
- mutex_enter(&buf_pool->flush_state_mutex);
- }
- }
- mutex_exit(&buf_pool->flush_state_mutex);
-
- ut_ad(buf_all_freed_instance(buf_pool));
-
- while (buf_LRU_scan_and_free_block(buf_pool, TRUE)) {
- }
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
- ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
-
- buf_pool->freed_page_clock = 0;
- buf_pool->LRU_old = NULL;
- buf_pool->LRU_old_len = 0;
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
- buf_refresh_io_stats(buf_pool);
-}
-
-/*********************************************************************//**
-Invalidates the file pages in the buffer pool when an archive recovery is
-completed. All the file pages buffered must be in a replaceable state when
-this function is called: not latched and not modified. */
-UNIV_INTERN
-void
-buf_pool_invalidate(void)
-/*=====================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_invalidate_instance(buf_pool_from_array(i));
- }
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/*********************************************************************//**
-Validates data in one buffer pool instance
-@return TRUE */
-static
-ibool
-buf_pool_validate_instance(
-/*=======================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- buf_page_t* b;
- buf_chunk_t* chunk;
- ulint i;
- ulint n_lru_flush = 0;
- ulint n_page_flush = 0;
- ulint n_list_flush = 0;
- ulint n_lru = 0;
- ulint n_flush = 0;
- ulint n_free = 0;
- ulint n_zip = 0;
- ulint fold = 0;
- ulint space = 0;
- ulint offset = 0;
-
- ut_ad(buf_pool);
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- mutex_enter(&buf_pool->LRU_list_mutex);
- hash_lock_x_all(buf_pool->page_hash);
- mutex_enter(&buf_pool->zip_mutex);
- mutex_enter(&buf_pool->free_list_mutex);
- mutex_enter(&buf_pool->flush_state_mutex);
-
- chunk = buf_pool->chunks;
-
- /* Check the uncompressed blocks. */
-
- for (i = buf_pool->n_chunks; i--; chunk++) {
-
- ulint j;
- buf_block_t* block = chunk->blocks;
-
- for (j = chunk->size; j--; block++) {
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- /* These should only occur on
- zip_clean, zip_free[], or flush_list. */
- ut_error;
- break;
-
- case BUF_BLOCK_FILE_PAGE:
-
- space = buf_block_get_space(block);
- offset = buf_block_get_page_no(block);
- fold = buf_page_address_fold(space, offset);
- ut_a(buf_page_hash_get_low(buf_pool,
- space,
- offset,
- fold)
- == &block->page);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(buf_page_get_io_fix_unlocked(&block->page)
- == BUF_IO_READ
- || !ibuf_count_get(buf_block_get_space(
- block),
- buf_block_get_page_no(
- block)));
-#endif
- switch (buf_page_get_io_fix_unlocked(
- &block->page)) {
- case BUF_IO_NONE:
- break;
-
- case BUF_IO_WRITE:
- switch (buf_page_get_flush_type(
- &block->page)) {
- case BUF_FLUSH_LRU:
- case BUF_FLUSH_SINGLE_PAGE:
- case BUF_FLUSH_LIST:
- break;
- default:
- ut_error;
- }
-
- break;
-
- case BUF_IO_READ:
-
- ut_a(rw_lock_is_locked(&block->lock,
- RW_LOCK_EX));
- break;
-
- case BUF_IO_PIN:
- break;
- }
-
- n_lru++;
- break;
-
- case BUF_BLOCK_NOT_USED:
- n_free++;
- break;
-
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- /* do nothing */
- break;
- }
- }
- }
-
- /* Check clean compressed-only blocks. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- switch (buf_page_get_io_fix(b)) {
- case BUF_IO_NONE:
- case BUF_IO_PIN:
- /* All clean blocks should be I/O-unfixed. */
- break;
- case BUF_IO_READ:
- /* In buf_LRU_free_page(), we temporarily set
- b->io_fix = BUF_IO_READ for a newly allocated
- control block in order to prevent
- buf_page_get_gen() from decompressing the block. */
- break;
- default:
- ut_error;
- break;
- }
-
- /* It is OK to read oldest_modification here because
- we have acquired buf_pool->zip_mutex above which acts
- as the 'block->mutex' for these bpages. */
- ut_a(!b->oldest_modification);
- fold = buf_page_address_fold(b->space, b->offset);
- ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
- fold) == b);
- n_lru++;
- n_zip++;
- }
-
- /* Check dirty blocks. */
-
- buf_flush_list_mutex_enter(buf_pool);
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_ad(b->in_flush_list);
- ut_a(b->oldest_modification);
- n_flush++;
-
- switch (buf_page_get_state(b)) {
- case BUF_BLOCK_ZIP_DIRTY:
- n_lru++;
- n_zip++;
- /* fallthrough */
- case BUF_BLOCK_FILE_PAGE:
- switch (buf_page_get_io_fix_unlocked(b)) {
- case BUF_IO_NONE:
- case BUF_IO_READ:
- case BUF_IO_PIN:
- break;
- case BUF_IO_WRITE:
- switch (buf_page_get_flush_type(b)) {
- case BUF_FLUSH_LRU:
- n_lru_flush++;
- break;
- case BUF_FLUSH_SINGLE_PAGE:
- n_page_flush++;
- break;
- case BUF_FLUSH_LIST:
- n_list_flush++;
- break;
- default:
- ut_error;
- }
- break;
- default:
- ut_error;
- }
- break;
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
- fold = buf_page_address_fold(b->space, b->offset);
- ut_a(buf_page_hash_get_low(buf_pool, b->space, b->offset,
- fold) == b);
- }
-
- ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
-
- hash_unlock_x_all(buf_pool->page_hash);
- buf_flush_list_mutex_exit(buf_pool);
-
- mutex_exit(&buf_pool->zip_mutex);
-
- if (n_lru + n_free > buf_pool->curr_size + n_zip) {
- fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
- n_lru, n_free,
- buf_pool->curr_size, n_zip);
- ut_error;
- }
-
- ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
- fprintf(stderr, "Free list len %lu, free blocks %lu\n",
- UT_LIST_GET_LEN(buf_pool->free),
- n_free);
- ut_error;
- }
-
- mutex_exit(&buf_pool->free_list_mutex);
-
- ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
- ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
- ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_page_flush);
-
- mutex_exit(&buf_pool->flush_state_mutex);
-
- ut_a(buf_LRU_validate());
- ut_a(buf_flush_validate(buf_pool));
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Validates the buffer buf_pool data structure.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_validate(void)
-/*==============*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- buf_pool_validate_instance(buf_pool);
- }
- return(TRUE);
-}
-
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/*********************************************************************//**
-Prints info of the buffer buf_pool data structure for one instance. */
-static
-void
-buf_print_instance(
-/*===============*/
- buf_pool_t* buf_pool)
-{
- index_id_t* index_ids;
- ulint* counts;
- ulint size;
- ulint i;
- ulint j;
- index_id_t id;
- ulint n_found;
- buf_chunk_t* chunk;
- dict_index_t* index;
-
- ut_ad(buf_pool);
-
- size = buf_pool->curr_size;
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- index_ids = static_cast<index_id_t*>(
- mem_alloc(size * sizeof *index_ids));
-
- counts = static_cast<ulint*>(mem_alloc(sizeof(ulint) * size));
-
- /* Dirty reads below */
-
- fprintf(stderr,
- "buf_pool size %lu\n"
- "database pages %lu\n"
- "free pages %lu\n"
- "modified database pages %lu\n"
- "n pending decompressions %lu\n"
- "n pending reads %lu\n"
- "n pending flush LRU %lu list %lu single page %lu\n"
- "pages made young %lu, not young %lu\n"
- "pages read %lu, created %lu, written %lu\n",
- (ulint) size,
- (ulint) UT_LIST_GET_LEN(buf_pool->LRU),
- (ulint) UT_LIST_GET_LEN(buf_pool->free),
- (ulint) UT_LIST_GET_LEN(buf_pool->flush_list),
- (ulint) buf_pool->n_pend_unzip,
- (ulint) buf_pool->n_pend_reads,
- (ulint) buf_pool->n_flush[BUF_FLUSH_LRU],
- (ulint) buf_pool->n_flush[BUF_FLUSH_LIST],
- (ulint) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
- (ulint) buf_pool->stat.n_pages_made_young,
- (ulint) buf_pool->stat.n_pages_not_made_young,
- (ulint) buf_pool->stat.n_pages_read,
- (ulint) buf_pool->stat.n_pages_created,
- (ulint) buf_pool->stat.n_pages_written);
-
- /* Count the number of blocks belonging to each index in the buffer */
-
- n_found = 0;
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- chunk = buf_pool->chunks;
-
- for (i = buf_pool->n_chunks; i--; chunk++) {
- buf_block_t* block = chunk->blocks;
- ulint n_blocks = chunk->size;
-
- for (; n_blocks--; block++) {
- const buf_frame_t* frame = block->frame;
-
- if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
-
- id = btr_page_get_index_id(frame);
-
- /* Look for the id in the index_ids array */
- j = 0;
-
- while (j < n_found) {
-
- if (index_ids[j] == id) {
- counts[j]++;
-
- break;
- }
- j++;
- }
-
- if (j == n_found) {
- n_found++;
- index_ids[j] = id;
- counts[j] = 1;
- }
- }
- }
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- for (i = 0; i < n_found; i++) {
- index = dict_index_get_if_in_cache(index_ids[i]);
-
- fprintf(stderr,
- "Block count for index %llu in buffer is about %lu",
- (ullint) index_ids[i],
- (ulint) counts[i]);
-
- if (index) {
- putc(' ', stderr);
- dict_index_name_print(stderr, NULL, index);
- }
-
- putc('\n', stderr);
- }
-
- mem_free(index_ids);
- mem_free(counts);
-
- ut_a(buf_pool_validate_instance(buf_pool));
-}
-
-/*********************************************************************//**
-Prints info of the buffer buf_pool data structure. */
-UNIV_INTERN
-void
-buf_print(void)
-/*===========*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- buf_print_instance(buf_pool);
- }
-}
-#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Returns the number of latched pages in the buffer pool.
-@return number of latched pages */
-UNIV_INTERN
-ulint
-buf_get_latched_pages_number_instance(
-/*==================================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- buf_page_t* b;
- ulint i;
- buf_chunk_t* chunk;
- ulint fixed_pages_number = 0;
-
- /* The LRU list mutex is enough to protect the required fields below */
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- chunk = buf_pool->chunks;
-
- for (i = buf_pool->n_chunks; i--; chunk++) {
- buf_block_t* block;
- ulint j;
-
- block = chunk->blocks;
-
- for (j = chunk->size; j--; block++) {
- if (buf_block_get_state(block)
- != BUF_BLOCK_FILE_PAGE) {
-
- continue;
- }
-
- if (block->page.buf_fix_count != 0
- || buf_page_get_io_fix_unlocked(&block->page)
- != BUF_IO_NONE) {
- fixed_pages_number++;
- }
-
- }
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- mutex_enter(&buf_pool->zip_mutex);
-
- /* Traverse the lists of clean and dirty compressed-only blocks. */
-
- for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
- ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
-
- if (b->buf_fix_count != 0
- || buf_page_get_io_fix(b) != BUF_IO_NONE) {
- fixed_pages_number++;
- }
- }
-
- buf_flush_list_mutex_enter(buf_pool);
- for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
- b = UT_LIST_GET_NEXT(list, b)) {
- ut_ad(b->in_flush_list);
-
- switch (buf_page_get_state(b)) {
- case BUF_BLOCK_ZIP_DIRTY:
- if (b->buf_fix_count != 0
- || buf_page_get_io_fix(b) != BUF_IO_NONE) {
- fixed_pages_number++;
- }
- break;
- case BUF_BLOCK_FILE_PAGE:
- /* uncompressed page */
- case BUF_BLOCK_REMOVE_HASH:
- /* We hold flush list but not LRU list mutex here.
- Thus encountering BUF_BLOCK_REMOVE_HASH pages is
- possible. */
- break;
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- ut_error;
- break;
- }
- }
-
- buf_flush_list_mutex_exit(buf_pool);
- mutex_exit(&buf_pool->zip_mutex);
-
- return(fixed_pages_number);
-}
-
-/*********************************************************************//**
-Returns the number of latched pages in all the buffer pools.
-@return number of latched pages */
-UNIV_INTERN
-ulint
-buf_get_latched_pages_number(void)
-/*==============================*/
-{
- ulint i;
- ulint total_latched_pages = 0;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- total_latched_pages += buf_get_latched_pages_number_instance(
- buf_pool);
- }
-
- return(total_latched_pages);
-}
-
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Returns the number of pending buf pool read ios.
-@return number of pending read I/O operations */
-UNIV_INTERN
-ulint
-buf_get_n_pending_read_ios(void)
-/*============================*/
-{
- ulint i;
- ulint pend_ios = 0;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- pend_ios += buf_pool_from_array(i)->n_pend_reads;
- }
-
- return(pend_ios);
-}
-
-/*********************************************************************//**
-Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool.
-@return modified page percentage ratio */
-UNIV_INTERN
-double
-buf_get_modified_ratio_pct(void)
-/*============================*/
-{
- double percentage = 0.0;
- ulint lru_len = 0;
- ulint free_len = 0;
- ulint flush_list_len = 0;
-
- buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
-
- percentage = (100.0 * flush_list_len) / (1.0 + lru_len + free_len);
-
- /* 1 + is there to avoid division by zero */
-
- return(percentage);
-}
-
-/*******************************************************************//**
-Aggregates a pool stats information with the total buffer pool stats */
-static
-void
-buf_stats_aggregate_pool_info(
-/*==========================*/
- buf_pool_info_t* total_info, /*!< in/out: the buffer pool
- info to store aggregated
- result */
- const buf_pool_info_t* pool_info) /*!< in: individual buffer pool
- stats info */
-{
- ut_a(total_info && pool_info);
-
- /* Nothing to copy if total_info is the same as pool_info */
- if (total_info == pool_info) {
- return;
- }
-
- total_info->pool_size += pool_info->pool_size;
- total_info->pool_size_bytes += pool_info->pool_size_bytes;
- total_info->lru_len += pool_info->lru_len;
- total_info->old_lru_len += pool_info->old_lru_len;
- total_info->free_list_len += pool_info->free_list_len;
- total_info->flush_list_len += pool_info->flush_list_len;
- total_info->n_pend_unzip += pool_info->n_pend_unzip;
- total_info->n_pend_reads += pool_info->n_pend_reads;
- total_info->n_pending_flush_lru += pool_info->n_pending_flush_lru;
- total_info->n_pending_flush_list += pool_info->n_pending_flush_list;
- total_info->n_pages_made_young += pool_info->n_pages_made_young;
- total_info->n_pages_not_made_young += pool_info->n_pages_not_made_young;
- total_info->n_pages_read += pool_info->n_pages_read;
- total_info->n_pages_created += pool_info->n_pages_created;
- total_info->n_pages_written += pool_info->n_pages_written;
- total_info->n_page_gets += pool_info->n_page_gets;
- total_info->n_ra_pages_read_rnd += pool_info->n_ra_pages_read_rnd;
- total_info->n_ra_pages_read += pool_info->n_ra_pages_read;
- total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted;
- total_info->page_made_young_rate += pool_info->page_made_young_rate;
- total_info->page_not_made_young_rate +=
- pool_info->page_not_made_young_rate;
- total_info->pages_read_rate += pool_info->pages_read_rate;
- total_info->pages_created_rate += pool_info->pages_created_rate;
- total_info->pages_written_rate += pool_info->pages_written_rate;
- total_info->n_page_get_delta += pool_info->n_page_get_delta;
- total_info->page_read_delta += pool_info->page_read_delta;
- total_info->young_making_delta += pool_info->young_making_delta;
- total_info->not_young_making_delta += pool_info->not_young_making_delta;
- total_info->pages_readahead_rnd_rate += pool_info->pages_readahead_rnd_rate;
- total_info->pages_readahead_rate += pool_info->pages_readahead_rate;
- total_info->pages_evicted_rate += pool_info->pages_evicted_rate;
- total_info->unzip_lru_len += pool_info->unzip_lru_len;
- total_info->io_sum += pool_info->io_sum;
- total_info->io_cur += pool_info->io_cur;
- total_info->unzip_sum += pool_info->unzip_sum;
- total_info->unzip_cur += pool_info->unzip_cur;
-}
-/*******************************************************************//**
-Collect buffer pool stats information for a buffer pool. Also
-record aggregated stats if there are more than one buffer pool
-in the server */
-UNIV_INTERN
-void
-buf_stats_get_pool_info(
-/*====================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool */
- ulint pool_id, /*!< in: buffer pool ID */
- buf_pool_info_t* all_pool_info) /*!< in/out: buffer pool info
- to fill */
-{
- buf_pool_info_t* pool_info;
- time_t current_time;
- double time_elapsed;
-
- /* Find appropriate pool_info to store stats for this buffer pool */
- pool_info = &all_pool_info[pool_id];
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
-
- pool_info->pool_unique_id = pool_id;
-
- pool_info->pool_size = buf_pool->curr_size;
-
- pool_info->pool_size_bytes = buf_pool->curr_pool_size;
-
- pool_info->lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- pool_info->old_lru_len = buf_pool->LRU_old_len;
-
- pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool->free);
-
- pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool->flush_list);
-
- pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
-
- pool_info->n_pend_reads = buf_pool->n_pend_reads;
-
- mutex_enter(&buf_pool->flush_state_mutex);
-
- pool_info->n_pending_flush_lru =
- (buf_pool->n_flush[BUF_FLUSH_LRU]
- + buf_pool->init_flush[BUF_FLUSH_LRU]);
-
- pool_info->n_pending_flush_list =
- (buf_pool->n_flush[BUF_FLUSH_LIST]
- + buf_pool->init_flush[BUF_FLUSH_LIST]);
-
- pool_info->n_pending_flush_single_page =
- (buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]
- + buf_pool->init_flush[BUF_FLUSH_SINGLE_PAGE]);
-
- mutex_exit(&buf_pool->flush_state_mutex);
-
- current_time = time(NULL);
- time_elapsed = 0.001 + difftime(current_time,
- buf_pool->last_printout_time);
-
- pool_info->n_pages_made_young = buf_pool->stat.n_pages_made_young;
-
- pool_info->n_pages_not_made_young =
- buf_pool->stat.n_pages_not_made_young;
-
- pool_info->n_pages_read = buf_pool->stat.n_pages_read;
-
- pool_info->n_pages_created = buf_pool->stat.n_pages_created;
-
- pool_info->n_pages_written = buf_pool->stat.n_pages_written;
-
- pool_info->n_page_gets = buf_pool->stat.n_page_gets;
-
- pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd;
- pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read;
-
- pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted;
-
- pool_info->page_made_young_rate =
- (buf_pool->stat.n_pages_made_young
- - buf_pool->old_stat.n_pages_made_young) / time_elapsed;
-
- pool_info->page_not_made_young_rate =
- (buf_pool->stat.n_pages_not_made_young
- - buf_pool->old_stat.n_pages_not_made_young) / time_elapsed;
-
- pool_info->pages_read_rate =
- (buf_pool->stat.n_pages_read
- - buf_pool->old_stat.n_pages_read) / time_elapsed;
-
- pool_info->pages_created_rate =
- (buf_pool->stat.n_pages_created
- - buf_pool->old_stat.n_pages_created) / time_elapsed;
-
- pool_info->pages_written_rate =
- (buf_pool->stat.n_pages_written
- - buf_pool->old_stat.n_pages_written) / time_elapsed;
-
- pool_info->n_page_get_delta = buf_pool->stat.n_page_gets
- - buf_pool->old_stat.n_page_gets;
-
- if (pool_info->n_page_get_delta) {
- pool_info->page_read_delta = buf_pool->stat.n_pages_read
- - buf_pool->old_stat.n_pages_read;
-
- pool_info->young_making_delta =
- buf_pool->stat.n_pages_made_young
- - buf_pool->old_stat.n_pages_made_young;
-
- pool_info->not_young_making_delta =
- buf_pool->stat.n_pages_not_made_young
- - buf_pool->old_stat.n_pages_not_made_young;
- }
- pool_info->pages_readahead_rnd_rate =
- (buf_pool->stat.n_ra_pages_read_rnd
- - buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed;
-
-
- pool_info->pages_readahead_rate =
- (buf_pool->stat.n_ra_pages_read
- - buf_pool->old_stat.n_ra_pages_read) / time_elapsed;
-
- pool_info->pages_evicted_rate =
- (buf_pool->stat.n_ra_pages_evicted
- - buf_pool->old_stat.n_ra_pages_evicted) / time_elapsed;
-
- pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
-
- pool_info->io_sum = buf_LRU_stat_sum.io;
-
- pool_info->io_cur = buf_LRU_stat_cur.io;
-
- pool_info->unzip_sum = buf_LRU_stat_sum.unzip;
-
- pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
-
- buf_refresh_io_stats(buf_pool);
-}
-
-/*********************************************************************//**
-Prints info of the buffer i/o. */
-UNIV_INTERN
-void
-buf_print_io_instance(
-/*==================*/
- buf_pool_info_t*pool_info, /*!< in: buffer pool info */
- FILE* file) /*!< in/out: buffer where to print */
-{
- ut_ad(pool_info);
-
- fprintf(file,
- "Buffer pool size %lu\n"
- "Buffer pool size, bytes " ULINTPF "\n"
- "Free buffers %lu\n"
- "Database pages %lu\n"
- "Old database pages %lu\n"
- "Modified db pages %lu\n"
- "Percent of dirty pages(LRU & free pages): %.3f\n"
- "Max dirty pages percent: %.3f\n"
- "Pending reads %lu\n"
- "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
- pool_info->pool_size,
- pool_info->pool_size_bytes,
- pool_info->free_list_len,
- pool_info->lru_len,
- pool_info->old_lru_len,
- pool_info->flush_list_len,
- (((double) pool_info->flush_list_len) /
- (pool_info->lru_len + pool_info->free_list_len + 1.0)) * 100.0,
- srv_max_buf_pool_modified_pct,
- pool_info->n_pend_reads,
- pool_info->n_pending_flush_lru,
- pool_info->n_pending_flush_list,
- pool_info->n_pending_flush_single_page);
-
- fprintf(file,
- "Pages made young %lu, not young %lu\n"
- "%.2f youngs/s, %.2f non-youngs/s\n"
- "Pages read %lu, created %lu, written %lu\n"
- "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
- pool_info->n_pages_made_young,
- pool_info->n_pages_not_made_young,
- pool_info->page_made_young_rate,
- pool_info->page_not_made_young_rate,
- pool_info->n_pages_read,
- pool_info->n_pages_created,
- pool_info->n_pages_written,
- pool_info->pages_read_rate,
- pool_info->pages_created_rate,
- pool_info->pages_written_rate);
-
- if (pool_info->n_page_get_delta) {
- double hit_rate = double(pool_info->page_read_delta)
- / pool_info->n_page_get_delta;
-
- if (hit_rate > 1) {
- hit_rate = 1;
- }
-
- fprintf(file,
- "Buffer pool hit rate " ULINTPF " / 1000,"
- " young-making rate " ULINTPF " / 1000 not "
- ULINTPF " / 1000\n",
- ulint(1000 * (1 - hit_rate)),
- ulint(1000 * double(pool_info->young_making_delta)
- / pool_info->n_page_get_delta),
- ulint(1000 * double(pool_info->not_young_making_delta)
- / pool_info->n_page_get_delta));
- } else {
- fputs("No buffer pool page gets since the last printout\n",
- file);
- }
-
- /* Statistics about read ahead algorithm */
- fprintf(file, "Pages read ahead %.2f/s,"
- " evicted without access %.2f/s,"
- " Random read ahead %.2f/s\n",
-
- pool_info->pages_readahead_rate,
- pool_info->pages_evicted_rate,
- pool_info->pages_readahead_rnd_rate);
-
- /* Print some values to help us with visualizing what is
- happening with LRU eviction. */
- fprintf(file,
- "LRU len: %lu, unzip_LRU len: %lu\n"
- "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
- pool_info->lru_len, pool_info->unzip_lru_len,
- pool_info->io_sum, pool_info->io_cur,
- pool_info->unzip_sum, pool_info->unzip_cur);
-}
-
-/*********************************************************************//**
-Prints info of the buffer i/o. */
-UNIV_INTERN
-void
-buf_print_io(
-/*=========*/
- FILE* file) /*!< in/out: buffer where to print */
-{
- ulint i;
- buf_pool_info_t* pool_info;
- buf_pool_info_t* pool_info_total;
-
- /* If srv_buf_pool_instances is greater than 1, allocate
- one extra buf_pool_info_t, the last one stores
- aggregated/total values from all pools */
- if (srv_buf_pool_instances > 1) {
- pool_info = (buf_pool_info_t*) mem_zalloc((
- srv_buf_pool_instances + 1) * sizeof *pool_info);
-
- pool_info_total = &pool_info[srv_buf_pool_instances];
- } else {
- ut_a(srv_buf_pool_instances == 1);
-
- pool_info_total = pool_info =
- static_cast<buf_pool_info_t*>(
- mem_zalloc(sizeof *pool_info));
- }
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- /* Fetch individual buffer pool info and calculate
- aggregated stats along the way */
- buf_stats_get_pool_info(buf_pool, i, pool_info);
-
- /* If we have more than one buffer pool, store
- the aggregated stats */
- if (srv_buf_pool_instances > 1) {
- buf_stats_aggregate_pool_info(pool_info_total,
- &pool_info[i]);
- }
- }
-
- /* Print the aggreate buffer pool info */
- buf_print_io_instance(pool_info_total, file);
-
- /* If there are more than one buffer pool, print each individual pool
- info */
- if (srv_buf_pool_instances > 1) {
- fputs("----------------------\n"
- "INDIVIDUAL BUFFER POOL INFO\n"
- "----------------------\n", file);
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- fprintf(file, "---BUFFER POOL %lu\n", i);
- buf_print_io_instance(&pool_info[i], file);
- }
- }
-
- mem_free(pool_info);
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-buf_refresh_io_stats(
-/*=================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- buf_pool->last_printout_time = ut_time();
- buf_pool->old_stat = buf_pool->stat;
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-buf_refresh_io_stats_all(void)
-/*==========================*/
-{
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- buf_refresh_io_stats(buf_pool);
- }
-}
-
-/**********************************************************************//**
-Check if all pages in all buffer pools are in a replacable state.
-@return FALSE if not */
-UNIV_INTERN
-ibool
-buf_all_freed(void)
-/*===============*/
-{
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- if (!buf_all_freed_instance(buf_pool)) {
- return(FALSE);
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Checks that there currently are no pending i/o-operations for the buffer
-pool.
-@return number of pending i/o */
-UNIV_INTERN
-ulint
-buf_pool_check_no_pending_io(void)
-/*==============================*/
-{
- ulint i;
- ulint pending_io = 0;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- pending_io += buf_pool->n_pend_reads;
-
- mutex_enter(&buf_pool->flush_state_mutex);
-
- pending_io += buf_pool->n_flush[BUF_FLUSH_LRU];
- pending_io += buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
- pending_io += buf_pool->n_flush[BUF_FLUSH_LIST];
-
- mutex_exit(&buf_pool->flush_state_mutex);
- }
-
- return(pending_io);
-}
-
-#if 0
-Code currently not used
-/*********************************************************************//**
-Gets the current length of the free list of buffer blocks.
-@return length of the free list */
-UNIV_INTERN
-ulint
-buf_get_free_list_len(void)
-/*=======================*/
-{
- ulint len;
-
- mutex_enter(&buf_pool->free_list_mutex);
-
- len = UT_LIST_GET_LEN(buf_pool->free);
-
- mutex_exit(&buf_pool->free_list_mutex);
-
- return(len);
-}
-#endif
-
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
-UNIV_INTERN
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- buf_block_t* block) /*!< in: block to init */
-{
- block->page.state = BUF_BLOCK_FILE_PAGE;
- block->page.space = space;
- block->page.offset = offset;
-
- page_zip_des_init(&block->page.zip);
-
- /* We assume that block->page.data has been allocated
- with zip_size == UNIV_PAGE_SIZE. */
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_ad(ut_is_2pow(zip_size));
- page_zip_set_size(&block->page.zip, zip_size);
- if (zip_size) {
- block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Aquire LRU list mutex */
-void
-buf_pool_mutex_enter(
-/*=================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool */
-{
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- mutex_enter(&buf_pool->LRU_list_mutex);
-}
-/*********************************************************************//**
-Exit LRU list mutex */
-void
-buf_pool_mutex_exit(
-/*================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool */
-{
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- mutex_exit(&buf_pool->LRU_list_mutex);
-}
-
-/********************************************************************//**
-Reserve unused slot from temporary memory array and allocate necessary
-temporary memory if not yet allocated.
-@return reserved slot */
-UNIV_INTERN
-buf_tmp_buffer_t*
-buf_pool_reserve_tmp_slot(
-/*======================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool where to
- reserve */
- bool compressed) /*!< in: is file space compressed */
-{
- buf_tmp_buffer_t *free_slot=NULL;
-
- /* Array is protected by buf_pool mutex */
- buf_pool_mutex_enter(buf_pool);
-
- for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) {
- buf_tmp_buffer_t *slot = &buf_pool->tmp_arr->slots[i];
-
- if(slot->reserved == false) {
- free_slot = slot;
- break;
- }
- }
-
- /* We assume that free slot is found */
- ut_a(free_slot != NULL);
- free_slot->reserved = true;
- /* Now that we have reserved this slot we can release
- buf_pool mutex */
- buf_pool_mutex_exit(buf_pool);
-
- /* Allocate temporary memory for encryption/decryption */
- if (free_slot->crypt_buf == NULL) {
- free_slot->crypt_buf = static_cast<byte*>(aligned_malloc(UNIV_PAGE_SIZE, UNIV_PAGE_SIZE));
- memset(free_slot->crypt_buf, 0, UNIV_PAGE_SIZE);
- }
-
- /* For page compressed tables allocate temporary memory for
- compression/decompression */
- if (compressed && free_slot->comp_buf == NULL) {
- ulint size = UNIV_PAGE_SIZE;
-
- /* Both snappy and lzo compression methods require that
- output buffer used for compression is bigger than input
- buffer. Increase the allocated buffer size accordingly. */
-#if HAVE_SNAPPY
- size = snappy_max_compressed_length(size);
-#endif
-#if HAVE_LZO
- size += LZO1X_1_15_MEM_COMPRESS;
-#endif
- free_slot->comp_buf = static_cast<byte*>(aligned_malloc(size, UNIV_PAGE_SIZE));
- memset(free_slot->comp_buf, 0, size);
- }
-
- return (free_slot);
-}
-
-/** Encryption and page_compression hook that is called just before
-a page is written to disk.
-@param[in,out] space tablespace
-@param[in,out] bpage buffer page
-@param[in] src_frame physical page frame that is being encrypted
-@return page frame to be written to file
-(may be src_frame or an encrypted/compressed copy of it) */
-UNIV_INTERN
-byte*
-buf_page_encrypt_before_write(
- fil_space_t* space,
- buf_page_t* bpage,
- byte* src_frame)
-{
- ut_ad(space->id == bpage->space);
- bpage->real_size = UNIV_PAGE_SIZE;
-
- fil_page_type_validate(src_frame);
-
- switch (bpage->offset) {
- case 0:
- /* Page 0 of a tablespace is not encrypted/compressed */
- return src_frame;
- case TRX_SYS_PAGE_NO:
- if (bpage->space == TRX_SYS_SPACE) {
- /* don't encrypt/compress page as it contains
- address to dblwr buffer */
- return src_frame;
- }
- }
-
- fil_space_crypt_t* crypt_data = space->crypt_data;
-
- const bool encrypted = crypt_data
- && !crypt_data->not_encrypted()
- && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED
- && (!crypt_data->is_default_encryption()
- || srv_encrypt_tables);
-
- bool page_compressed = FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
-
- if (!encrypted && !page_compressed) {
- /* No need to encrypt or page compress the page.
- Clear key-version & crypt-checksum. */
- memset(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
- return src_frame;
- }
-
- ulint zip_size = buf_page_get_zip_size(bpage);
- ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- /* Find free slot from temporary memory array */
- buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
- slot->out_buf = NULL;
- bpage->slot = slot;
-
- byte *dst_frame = slot->crypt_buf;
-
- if (!page_compressed) {
- /* Encrypt page content */
- byte* tmp = fil_space_encrypt(space,
- bpage->offset,
- bpage->newest_modification,
- src_frame,
- dst_frame);
-
- bpage->real_size = page_size;
- slot->out_buf = dst_frame = tmp;
-
- ut_d(fil_page_type_validate(tmp));
- } else {
- /* First we compress the page content */
- ulint out_len = 0;
-
- byte *tmp = fil_compress_page(
- space,
- (byte *)src_frame,
- slot->comp_buf,
- page_size,
- fsp_flags_get_page_compression_level(space->flags),
- fil_space_get_block_size(space, bpage->offset),
- encrypted,
- &out_len);
-
- bpage->real_size = out_len;
-
-#ifdef UNIV_DEBUG
- fil_page_type_validate(tmp);
-#endif
-
- if(encrypted) {
-
- /* And then we encrypt the page content */
- tmp = fil_space_encrypt(space,
- bpage->offset,
- bpage->newest_modification,
- tmp,
- dst_frame);
- }
-
- slot->out_buf = dst_frame = tmp;
- }
-
-#ifdef UNIV_DEBUG
- fil_page_type_validate(dst_frame);
-#endif
-
- // return dst_frame which will be written
- return dst_frame;
-}
-
-/** Decrypt a page.
-@param[in,out] bpage Page control block
-@param[in,out] space tablespace
-@return whether the operation was successful */
-static
-bool
-buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space)
-{
- ut_ad(space->n_pending_ios > 0);
- ut_ad(space->id == bpage->space);
-
- ulint zip_size = buf_page_get_zip_size(bpage);
- ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
-
- byte* dst_frame = (zip_size) ? bpage->zip.data :
- ((buf_block_t*) bpage)->frame;
- unsigned key_version =
- mach_read_from_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- bool page_compressed = fil_page_is_compressed(dst_frame);
- bool page_compressed_encrypted = fil_page_is_compressed_encrypted(dst_frame);
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- bool success = true;
-
- if (bpage->offset == 0) {
- /* File header pages are not encrypted/compressed */
- return (true);
- }
-
- /* Page is encrypted if encryption information is found from
- tablespace and page contains used key_version. This is true
- also for pages first compressed and then encrypted. */
- if (!space->crypt_data) {
- key_version = 0;
- }
-
- if (page_compressed) {
- /* the page we read is unencrypted */
- /* Find free slot from temporary memory array */
- buf_tmp_buffer_t* slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
-
-#ifdef UNIV_DEBUG
- fil_page_type_validate(dst_frame);
-#endif
-
- /* decompress using comp_buf to dst_frame */
- fil_decompress_page(slot->comp_buf,
- dst_frame,
- ulong(size),
- &bpage->write_size);
-
- /* Mark this slot as free */
- slot->reserved = false;
- key_version = 0;
-
-#ifdef UNIV_DEBUG
- fil_page_type_validate(dst_frame);
-#endif
- } else {
- buf_tmp_buffer_t* slot = NULL;
-
- if (key_version) {
- /* Verify encryption checksum before we even try to
- decrypt. */
- if (!fil_space_verify_crypt_checksum(dst_frame,
- zip_size, NULL, bpage->offset)) {
-
- /* Mark page encrypted in case it should
- be. */
- if (space->crypt_data->type
- != CRYPT_SCHEME_UNENCRYPTED) {
- bpage->encrypted = true;
- }
-
- return (false);
- }
-
- /* Find free slot from temporary memory array */
- slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
-
-#ifdef UNIV_DEBUG
- fil_page_type_validate(dst_frame);
-#endif
-
- /* decrypt using crypt_buf to dst_frame */
- if (!fil_space_decrypt(space, slot->crypt_buf,
- dst_frame, &bpage->encrypted)) {
- success = false;
- }
-
-#ifdef UNIV_DEBUG
- fil_page_type_validate(dst_frame);
-#endif
- }
-
- if (page_compressed_encrypted && success) {
- if (!slot) {
- slot = buf_pool_reserve_tmp_slot(buf_pool, page_compressed);
- }
-
-#ifdef UNIV_DEBUG
- fil_page_type_validate(dst_frame);
-#endif
- /* decompress using comp_buf to dst_frame */
- fil_decompress_page(slot->comp_buf,
- dst_frame,
- ulong(size),
- &bpage->write_size);
- ut_d(fil_page_type_validate(dst_frame));
- }
-
- /* Mark this slot as free */
- if (slot) {
- slot->reserved = false;
- }
- }
-
- ut_ad(space->n_pending_ios > 0);
- return (success);
-}
diff --git a/storage/xtradb/buf/buf0checksum.cc b/storage/xtradb/buf/buf0checksum.cc
deleted file mode 100644
index 01b646a78e0..00000000000
--- a/storage/xtradb/buf/buf0checksum.cc
+++ /dev/null
@@ -1,156 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0checksum.cc
-Buffer pool checksum functions, also linked from /extra/innochecksum.cc
-
-Created Aug 11, 2011 Vasil Dimov
-*******************************************************/
-
-#include "univ.i"
-#include "fil0fil.h" /* FIL_* */
-#include "ut0crc32.h" /* ut_crc32() */
-#include "ut0rnd.h" /* ut_fold_binary() */
-#include "buf0types.h"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "srv0srv.h" /* SRV_CHECKSUM_* */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/** the macro MYSQL_SYSVAR_ENUM() requires "long unsigned int" and if we
-use srv_checksum_algorithm_t here then we get a compiler error:
-ha_innodb.cc:12251: error: cannot convert 'srv_checksum_algorithm_t*' to
- 'long unsigned int*' in initialization */
-UNIV_INTERN ulong srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_INNODB;
-
-/********************************************************************//**
-Calculates a page CRC32 which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ib_uint32_t
-buf_calc_page_crc32(
-/*================*/
- const byte* page) /*!< in: buffer page */
-{
- ib_uint32_t checksum;
-
- /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written outside the buffer pool
- to the first pages of data files, we have to skip them in the page
- checksum calculation.
- We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
- checksum is stored, and also the last 8 bytes of page because
- there we store the old formula checksum. */
-
- checksum = ut_crc32(page + FIL_PAGE_OFFSET,
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
- - FIL_PAGE_OFFSET)
- ^ ut_crc32(page + FIL_PAGE_DATA,
- UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - FIL_PAGE_END_LSN_OLD_CHKSUM);
-
- return(checksum);
-}
-
-/********************************************************************//**
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- const byte* page) /*!< in: buffer page */
-{
- ulint checksum;
-
- /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, are written outside the buffer pool
- to the first pages of data files, we have to skip them in the page
- checksum calculation.
- We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
- checksum is stored, and also the last 8 bytes of page because
- there we store the old formula checksum. */
-
- checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
- - FIL_PAGE_OFFSET)
- + ut_fold_binary(page + FIL_PAGE_DATA,
- UNIV_PAGE_SIZE - FIL_PAGE_DATA
- - FIL_PAGE_END_LSN_OLD_CHKSUM);
- checksum = checksum & 0xFFFFFFFFUL;
-
- return(checksum);
-}
-
-/********************************************************************//**
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input!
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- const byte* page) /*!< in: buffer page */
-{
- ulint checksum;
-
- checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
- checksum = checksum & 0xFFFFFFFFUL;
-
- return(checksum);
-}
-
-/********************************************************************//**
-Return a printable string describing the checksum algorithm.
-@return algorithm name */
-UNIV_INTERN
-const char*
-buf_checksum_algorithm_name(
-/*========================*/
- srv_checksum_algorithm_t algo) /*!< in: algorithm */
-{
- switch (algo) {
- case SRV_CHECKSUM_ALGORITHM_CRC32:
- return("crc32");
- case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
- return("strict_crc32");
- case SRV_CHECKSUM_ALGORITHM_INNODB:
- return("innodb");
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
- return("strict_innodb");
- case SRV_CHECKSUM_ALGORITHM_NONE:
- return("none");
- case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
- return("strict_none");
- }
-
- ut_error;
- return(NULL);
-}
diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc
deleted file mode 100644
index 49371f9a6f1..00000000000
--- a/storage/xtradb/buf/buf0dblwr.cc
+++ /dev/null
@@ -1,1288 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0dblwr.cc
-Doublwrite buffer module
-
-Created 2011/12/19
-*******************************************************/
-
-#include "buf0dblwr.h"
-
-#ifdef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#include "buf0buf.h"
-#include "buf0checksum.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "page0zip.h"
-#include "trx0sys.h"
-#include "fil0crypt.h"
-#include "fil0pagecompress.h"
-
-#ifndef UNIV_HOTBACKUP
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t buf_dblwr_mutex_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-/** The doublewrite buffer */
-UNIV_INTERN buf_dblwr_t* buf_dblwr = NULL;
-
-/** Set to TRUE when the doublewrite buffer is being created */
-UNIV_INTERN ibool buf_dblwr_being_created = FALSE;
-
-#define TRX_SYS_DOUBLEWRITE_BLOCKS 2
-
-/****************************************************************//**
-Determines if a page number is located inside the doublewrite buffer.
-@return TRUE if the location is inside the two blocks of the
-doublewrite buffer */
-UNIV_INTERN
-ibool
-buf_dblwr_page_inside(
-/*==================*/
- ulint page_no) /*!< in: page number */
-{
- if (buf_dblwr == NULL) {
-
- return(FALSE);
- }
-
- if (page_no >= buf_dblwr->block1
- && page_no < buf_dblwr->block1
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- return(TRUE);
- }
-
- if (page_no >= buf_dblwr->block2
- && page_no < buf_dblwr->block2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/****************************************************************//**
-Calls buf_page_get() on the TRX_SYS_PAGE and returns a pointer to the
-doublewrite buffer within it.
-@return pointer to the doublewrite buffer within the filespace header
-page. */
-UNIV_INLINE
-byte*
-buf_dblwr_get(
-/*==========*/
- mtr_t* mtr) /*!< in/out: MTR to hold the page latch */
-{
- buf_block_t* block;
-
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE);
-}
-
-/********************************************************************//**
-Flush a batch of writes to the datafiles that have already been
-written to the dblwr buffer on disk. */
-UNIV_INLINE
-void
-buf_dblwr_sync_datafiles()
-/*======================*/
-{
- /* Wake possible simulated aio thread to actually post the
- writes to the operating system */
- os_aio_simulated_wake_handler_threads();
-
- /* Wait that all async writes to tablespaces have been posted to
- the OS */
- os_aio_wait_until_no_pending_writes();
-
- /* Now we flush the data to disk (for example, with fsync) */
- fil_flush_file_spaces(FIL_TABLESPACE);
-}
-
-/****************************************************************//**
-Creates or initialializes the doublewrite buffer at a database start. */
-static
-void
-buf_dblwr_init(
-/*===========*/
- byte* doublewrite) /*!< in: pointer to the doublewrite buf
- header on trx sys page */
-{
- ulint buf_size;
-
- buf_dblwr = static_cast<buf_dblwr_t*>(
- mem_zalloc(sizeof(buf_dblwr_t)));
-
- /* There are two blocks of same size in the doublewrite
- buffer. */
- buf_size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
-
- /* There must be atleast one buffer for single page writes
- and one buffer for batch writes. */
- ut_a(srv_doublewrite_batch_size > 0
- && srv_doublewrite_batch_size < buf_size);
-
- mutex_create(buf_dblwr_mutex_key,
- &buf_dblwr->mutex, SYNC_DOUBLEWRITE);
-
- buf_dblwr->b_event = os_event_create();
- buf_dblwr->s_event = os_event_create();
- buf_dblwr->first_free = 0;
- buf_dblwr->s_reserved = 0;
- buf_dblwr->b_reserved = 0;
-
- buf_dblwr->block1 = mach_read_from_4(
- doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
- buf_dblwr->block2 = mach_read_from_4(
- doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
-
- buf_dblwr->in_use = static_cast<bool*>(
- mem_zalloc(buf_size * sizeof(bool)));
-
- buf_dblwr->write_buf_unaligned = static_cast<byte*>(
- ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE));
-
- buf_dblwr->write_buf = static_cast<byte*>(
- ut_align(buf_dblwr->write_buf_unaligned,
- UNIV_PAGE_SIZE));
-
- buf_dblwr->buf_block_arr = static_cast<buf_page_t**>(
- mem_zalloc(buf_size * sizeof(void*)));
-}
-
-/** Create the doublewrite buffer if the doublewrite buffer header
-is not present in the TRX_SYS page.
-@return whether the operation succeeded
-@retval true if the doublewrite buffer exists or was created
-@retval false if the creation failed (too small first data file) */
-UNIV_INTERN
-bool
-buf_dblwr_create()
-{
- buf_block_t* block2;
- buf_block_t* new_block;
- byte* doublewrite;
- byte* fseg_header;
- ulint page_no;
- ulint prev_page_no;
- ulint i;
- mtr_t mtr;
-
- if (buf_dblwr) {
- /* Already inited */
- return(true);
- }
-
-start_again:
- mtr_start(&mtr);
- buf_dblwr_being_created = TRUE;
-
- doublewrite = buf_dblwr_get(&mtr);
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has already been created:
- just read in some numbers */
-
- buf_dblwr_init(doublewrite);
-
- mtr_commit(&mtr);
- buf_dblwr_being_created = FALSE;
- return(true);
- }
-
- if (buf_pool_get_curr_size()
- < ((TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2 + 100)
- * UNIV_PAGE_SIZE)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create doublewrite buffer: "
- "innodb_buffer_pool_size is too small.");
- mtr_commit(&mtr);
- return(false);
- } else {
- fil_space_t* space = fil_space_acquire(TRX_SYS_SPACE);
- const bool fail = UT_LIST_GET_FIRST(space->chain)->size
- < 3 * FSP_EXTENT_SIZE;
- fil_space_release(space);
-
- if (fail) {
- goto too_small;
- }
- }
-
- block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
- TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
-
- if (block2 == NULL) {
-too_small:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create doublewrite buffer: "
- "the first file in innodb_data_file_path"
- " must be at least %luM.",
- 3 * (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) >> 20);
- mtr_commit(&mtr);
- return(false);
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Doublewrite buffer not found: creating new");
-
- /* FIXME: After this point, the doublewrite buffer creation
- is not atomic. The doublewrite buffer should not exist in
- the InnoDB system tablespace file in the first place.
- It could be located in separate optional file(s) in a
- user-specified location. */
-
- /* fseg_create acquires a second latch on the page,
- therefore we must declare it: */
-
- buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
-
- fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG;
- prev_page_no = 0;
-
- for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
- + FSP_EXTENT_SIZE / 2; i++) {
- new_block = fseg_alloc_free_page(
- fseg_header, prev_page_no + 1, FSP_UP, &mtr);
- if (new_block == NULL) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Cannot create doublewrite buffer: you must "
- "increase your tablespace size. "
- "Cannot continue operation.");
- }
-
- /* We read the allocated pages to the buffer pool;
- when they are written to disk in a flush, the space
- id and page number fields are also written to the
- pages. When we at database startup read pages
- from the doublewrite buffer, we know that if the
- space id and page number in them are the same as
- the page position in the tablespace, then the page
- has not been written to in doublewrite. */
-
- ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
- page_no = buf_block_get_page_no(new_block);
-
- if (i == FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == FSP_EXTENT_SIZE);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK1,
- page_no, MLOG_4BYTES, &mtr);
-
- } else if (i == FSP_EXTENT_SIZE / 2
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- ut_a(page_no == 2 * FSP_EXTENT_SIZE);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_REPEAT
- + TRX_SYS_DOUBLEWRITE_BLOCK2,
- page_no, MLOG_4BYTES, &mtr);
-
- } else if (i > FSP_EXTENT_SIZE / 2) {
- ut_a(page_no == prev_page_no + 1);
- }
-
- if (((i + 1) & 15) == 0) {
- /* rw_locks can only be recursively x-locked
- 2048 times. (on 32 bit platforms,
- (lint) 0 - (X_LOCK_DECR * 2049)
- is no longer a negative number, and thus
- lock_word becomes like a shared lock).
- For 4k page size this loop will
- lock the fseg header too many times. Since
- this code is not done while any other threads
- are active, restart the MTR occasionally. */
- mtr_commit(&mtr);
- mtr_start(&mtr);
- doublewrite = buf_dblwr_get(&mtr);
- fseg_header = doublewrite
- + TRX_SYS_DOUBLEWRITE_FSEG;
- }
-
- prev_page_no = page_no;
- }
-
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
- mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
- + TRX_SYS_DOUBLEWRITE_REPEAT,
- TRX_SYS_DOUBLEWRITE_MAGIC_N,
- MLOG_4BYTES, &mtr);
-
- mlog_write_ulint(doublewrite
- + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
- TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- /* Flush the modified pages to disk and make a checkpoint */
- log_make_checkpoint_at(LSN_MAX, TRUE);
-
- /* Remove doublewrite pages from LRU */
- buf_pool_invalidate();
-
- ib_logf(IB_LOG_LEVEL_INFO, "Doublewrite buffer created");
-
- goto start_again;
-}
-
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function loads the pages from double write buffer into memory. */
-void
-buf_dblwr_init_or_load_pages(
-/*=========================*/
- pfs_os_file_t file,
- char* path,
- bool load_corrupt_pages)
-{
- byte* buf;
- byte* read_buf;
- byte* unaligned_read_buf;
- ulint block1;
- ulint block2;
- byte* page;
- ibool reset_space_ids = FALSE;
- byte* doublewrite;
- ulint space_id;
- ulint i;
- ulint block_bytes = 0;
- recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
-
- /* We do the file i/o past the buffer pool */
-
- unaligned_read_buf = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
-
- read_buf = static_cast<byte*>(
- ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
-
- /* Read the trx sys header to check if we are using the doublewrite
- buffer */
- off_t trx_sys_page = TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE;
- os_file_read(file, read_buf, trx_sys_page, UNIV_PAGE_SIZE);
-
- doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
-
- /* TRX_SYS_PAGE_NO is not encrypted see fil_crypt_rotate_page() */
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
- == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
- /* The doublewrite buffer has been created */
-
- buf_dblwr_init(doublewrite);
-
- block1 = buf_dblwr->block1;
- block2 = buf_dblwr->block2;
-
- buf = buf_dblwr->write_buf;
- } else {
- goto leave_func;
- }
-
- if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
- != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
-
- /* We are upgrading from a version < 4.1.x to a version where
- multiple tablespaces are supported. We must reset the space id
- field in the pages in the doublewrite buffer because starting
- from this version the space id is stored to
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
-
- reset_space_ids = TRUE;
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Resetting space id's in the doublewrite buffer");
- }
-
- /* Read the pages from the doublewrite buffer to memory */
-
- block_bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
-
- os_file_read(file, buf, block1 * UNIV_PAGE_SIZE, block_bytes);
- os_file_read(file, buf + block_bytes, block2 * UNIV_PAGE_SIZE,
- block_bytes);
-
- /* Check if any of these pages is half-written in data files, in the
- intended position */
-
- page = buf;
-
- for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * TRX_SYS_DOUBLEWRITE_BLOCKS; i++) {
-
- ulint source_page_no;
-
- if (reset_space_ids) {
-
- space_id = 0;
- mach_write_to_4(page
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
- /* We do not need to calculate new checksums for the
- pages because the field .._SPACE_ID does not affect
- them. Write the page back to where we read it from. */
-
- if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- source_page_no = block1 + i;
- } else {
- source_page_no = block2
- + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
- }
-
- os_file_write(path, file, page,
- source_page_no * UNIV_PAGE_SIZE,
- UNIV_PAGE_SIZE);
- } else if (load_corrupt_pages
- && !buf_page_is_zeroes(page, FIL_PAGE_DATA)) {
- /* Each valid page header must contain some
- nonzero bytes, such as FIL_PAGE_OFFSET
- or FIL_PAGE_LSN. */
- recv_dblwr.add(page);
- }
-
- page += UNIV_PAGE_SIZE;
- }
-
- if (reset_space_ids) {
- os_file_flush(file);
- }
-
-leave_func:
- ut_free(unaligned_read_buf);
-}
-
-/****************************************************************//**
-Process the double write buffer pages. */
-void
-buf_dblwr_process()
-/*===============*/
-{
- ulint space_id;
- ulint page_no;
- ulint page_no_dblwr = 0;
- byte* page;
- byte* read_buf;
- byte* unaligned_read_buf;
- recv_dblwr_t& recv_dblwr = recv_sys->dblwr;
-
- if (!buf_dblwr) {
- return;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Restoring possible half-written data pages "
- "from the doublewrite buffer...");
-
- unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
-
- read_buf = static_cast<byte*>(
- ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
-
- for (std::list<byte*>::iterator i = recv_dblwr.pages.begin();
- i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) {
- page = *i;
- page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
- space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID);
-
- FilSpace space(space_id, true);
-
- if (!space()) {
- /* Maybe we have dropped the single-table tablespace
- and this page once belonged to it: do nothing */
- continue;
- }
-
- if (!fil_check_adress_in_tablespace(space_id, page_no)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "A copy of page " ULINTPF ":" ULINTPF
- " in the doublewrite buffer slot " ULINTPF
- " is not within space bounds",
- space_id, page_no, page_no_dblwr);
- continue;
- }
-
- ulint zip_size = fsp_flags_get_zip_size(space()->flags);
- ut_ad(!buf_page_is_zeroes(page, zip_size));
-
- /* Read in the actual page from the file */
- fil_io(OS_FILE_READ,
- true,
- space_id,
- zip_size,
- page_no,
- 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- read_buf,
- NULL,
- 0);
-
- const bool is_all_zero = buf_page_is_zeroes(
- read_buf, zip_size);
-
- if (is_all_zero) {
- /* We will check if the copy in the
- doublewrite buffer is valid. If not, we will
- ignore this page (there should be redo log
- records to initialize it). */
- } else {
- if (fil_page_is_compressed_encrypted(read_buf) ||
- fil_page_is_compressed(read_buf)) {
- /* Decompress the page before
- validating the checksum. */
- fil_decompress_page(
- NULL, read_buf, srv_page_size,
- NULL, true);
- }
-
- if (fil_space_verify_crypt_checksum(
- read_buf, zip_size, NULL, page_no)
- || !buf_page_is_corrupted(
- true, read_buf, zip_size, space())) {
- /* The page is good; there is no need
- to consult the doublewrite buffer. */
- continue;
- }
-
- /* We intentionally skip this message for
- is_all_zero pages. */
- ib_logf(IB_LOG_LEVEL_INFO,
- "Trying to recover page " ULINTPF ":" ULINTPF
- " from the doublewrite buffer.",
- space_id, page_no);
- }
-
- /* Next, validate the doublewrite page. */
- if (fil_page_is_compressed_encrypted(page) ||
- fil_page_is_compressed(page)) {
- /* Decompress the page before
- validating the checksum. */
- fil_decompress_page(
- NULL, page, srv_page_size, NULL, true);
- }
-
- if (!fil_space_verify_crypt_checksum(page, zip_size, NULL, page_no)
- && buf_page_is_corrupted(true, page, zip_size, space)) {
- if (!is_all_zero) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "A doublewrite copy of page "
- ULINTPF ":" ULINTPF " is corrupted.",
- space_id, page_no);
- }
- /* Theoretically we could have another good
- copy for this page in the doublewrite
- buffer. If not, we will report a fatal error
- for a corrupted page somewhere else if that
- page was truly needed. */
- continue;
- }
-
- if (page_no == 0) {
- /* Check the FSP_SPACE_FLAGS. */
- ulint flags = fsp_header_get_flags(page);
- if (!fsp_flags_is_valid(flags)
- && fsp_flags_convert_from_101(flags)
- == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Ignoring a doublewrite copy of page "
- ULINTPF ":0 due to invalid flags 0x%x",
- space_id, int(flags));
- continue;
- }
- /* The flags on the page should be converted later. */
- }
-
- /* Write the good page from the doublewrite buffer to
- the intended position. */
-
- fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0,
- zip_size ? zip_size : UNIV_PAGE_SIZE,
- page, NULL, 0);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Recovered page " ULINTPF ":" ULINTPF " from"
- " the doublewrite buffer.",
- space_id, page_no);
- }
-
- ut_free(unaligned_read_buf);
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- {
- size_t bytes = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
- byte *unaligned_buf = static_cast<byte*>(
- ut_malloc(bytes + UNIV_PAGE_SIZE - 1));
-
- byte *buf = static_cast<byte*>(
- ut_align(unaligned_buf, UNIV_PAGE_SIZE));
- memset(buf, 0, bytes);
-
- fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
- buf_dblwr->block1, 0, bytes, buf, NULL, NULL);
- fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
- buf_dblwr->block2, 0, bytes, buf, NULL, NULL);
-
- ut_free(unaligned_buf);
- }
-}
-
-/****************************************************************//**
-Frees doublewrite buffer. */
-UNIV_INTERN
-void
-buf_dblwr_free(void)
-/*================*/
-{
- /* Free the double write data structures. */
- ut_a(buf_dblwr != NULL);
- ut_ad(buf_dblwr->s_reserved == 0);
- ut_ad(buf_dblwr->b_reserved == 0);
-
- os_event_free(buf_dblwr->b_event);
- os_event_free(buf_dblwr->s_event);
- ut_free(buf_dblwr->write_buf_unaligned);
- buf_dblwr->write_buf_unaligned = NULL;
-
- mem_free(buf_dblwr->buf_block_arr);
- buf_dblwr->buf_block_arr = NULL;
-
- mem_free(buf_dblwr->in_use);
- buf_dblwr->in_use = NULL;
-
- mutex_free(&buf_dblwr->mutex);
- mem_free(buf_dblwr);
- buf_dblwr = NULL;
-}
-
-/********************************************************************//**
-Updates the doublewrite buffer when an IO request is completed. */
-UNIV_INTERN
-void
-buf_dblwr_update(
-/*=============*/
- const buf_page_t* bpage, /*!< in: buffer block descriptor */
- buf_flush_t flush_type)/*!< in: flush type */
-{
- if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
- return;
- }
-
- switch (flush_type) {
- case BUF_FLUSH_LIST:
- case BUF_FLUSH_LRU:
- mutex_enter(&buf_dblwr->mutex);
-
- ut_ad(buf_dblwr->batch_running);
- ut_ad(buf_dblwr->b_reserved > 0);
- ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free);
-
- buf_dblwr->b_reserved--;
-
- if (buf_dblwr->b_reserved == 0) {
- mutex_exit(&buf_dblwr->mutex);
- /* This will finish the batch. Sync data files
- to the disk. */
- fil_flush_file_spaces(FIL_TABLESPACE);
- mutex_enter(&buf_dblwr->mutex);
-
- /* We can now reuse the doublewrite memory buffer: */
- buf_dblwr->first_free = 0;
- buf_dblwr->batch_running = false;
- os_event_set(buf_dblwr->b_event);
- }
-
- mutex_exit(&buf_dblwr->mutex);
- break;
- case BUF_FLUSH_SINGLE_PAGE:
- {
- const ulint size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
- ulint i;
- mutex_enter(&buf_dblwr->mutex);
- for (i = srv_doublewrite_batch_size; i < size; ++i) {
- if (buf_dblwr->buf_block_arr[i] == bpage) {
- buf_dblwr->s_reserved--;
- buf_dblwr->buf_block_arr[i] = NULL;
- buf_dblwr->in_use[i] = false;
- break;
- }
- }
-
- /* The block we are looking for must exist as a
- reserved block. */
- ut_a(i < size);
- }
- os_event_set(buf_dblwr->s_event);
- mutex_exit(&buf_dblwr->mutex);
- break;
- case BUF_FLUSH_N_TYPES:
- ut_error;
- }
-}
-
-/********************************************************************//**
-Check the LSN values on the page. */
-static
-void
-buf_dblwr_check_page_lsn(
-/*=====================*/
- const page_t* page) /*!< in: page to check */
-{
- ibool page_compressed = (mach_read_from_2(page+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
- uint key_version = mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
- /* Ignore page compressed or encrypted pages */
- if (page_compressed || key_version) {
- return;
- }
-
- if (memcmp(page + (FIL_PAGE_LSN + 4),
- page + (UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
- 4)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: The page to be written"
- " seems corrupt!\n"
- "InnoDB: The low 4 bytes of LSN fields do not match "
- "(" ULINTPF " != " ULINTPF ")!"
- " Noticed in the buffer pool.\n",
- mach_read_from_4(
- page + FIL_PAGE_LSN + 4),
- mach_read_from_4(
- page + UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
- }
-}
-
-/********************************************************************//**
-Asserts when a corrupt block is find during writing out data to the
-disk. */
-static
-void
-buf_dblwr_assert_on_corrupt_block(
-/*==============================*/
- const buf_block_t* block) /*!< in: block to check */
-{
- buf_page_print(block->frame, 0, BUF_PAGE_PRINT_NO_CRASH);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Apparent corruption of an"
- " index page n:o %lu in space %lu\n"
- "InnoDB: to be written to data file."
- " We intentionally crash server\n"
- "InnoDB: to prevent corrupt data"
- " from ending up in data\n"
- "InnoDB: files.\n",
- (ulong) buf_block_get_page_no(block),
- (ulong) buf_block_get_space(block));
-
- ut_error;
-}
-
-/********************************************************************//**
-Check the LSN values on the page with which this block is associated.
-Also validate the page if the option is set. */
-static
-void
-buf_dblwr_check_block(
-/*==================*/
- const buf_block_t* block) /*!< in: block to check */
-{
- if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
- || block->page.zip.data) {
- /* No simple validate for compressed pages exists. */
- return;
- }
-
- buf_dblwr_check_page_lsn(block->frame);
-
- if (!block->check_index_page_at_flush) {
- return;
- }
-
- if (page_is_comp(block->frame)) {
- if (!page_simple_validate_new(block->frame)) {
- buf_dblwr_assert_on_corrupt_block(block);
- }
- } else if (!page_simple_validate_old(block->frame)) {
-
- buf_dblwr_assert_on_corrupt_block(block);
- }
-}
-
-/********************************************************************//**
-Writes a page that has already been written to the doublewrite buffer
-to the datafile. It is the job of the caller to sync the datafile. */
-static
-void
-buf_dblwr_write_block_to_datafile(
-/*==============================*/
- const buf_page_t* bpage, /*!< in: page to write */
- bool sync) /*!< in: true if sync IO
- is requested */
-{
- ut_a(bpage);
- ut_a(buf_page_in_file(bpage));
-
- const ulint flags = sync
- ? OS_FILE_WRITE
- : OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER;
-
- void * frame = buf_page_get_frame(bpage);
-
- if (bpage->zip.data) {
- fil_io(flags,
- sync,
- buf_page_get_space(bpage),
- buf_page_get_zip_size(bpage),
- buf_page_get_page_no(bpage),
- 0,
- buf_page_get_zip_size(bpage),
- frame,
- (void*) bpage,
- 0);
-
- return;
- }
-
-
- const buf_block_t* block = (buf_block_t*) bpage;
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- buf_dblwr_check_page_lsn(block->frame);
-
- fil_io(flags,
- sync,
- buf_block_get_space(block),
- 0,
- buf_block_get_page_no(block),
- 0,
- bpage->real_size,
- frame,
- (void*) block,
- (ulint *)&bpage->write_size);
-}
-
-/********************************************************************//**
-Flushes possible buffered writes from the doublewrite memory buffer to disk,
-and also wakes up the aio thread if simulated aio is used. It is very
-important to call this function after a batch of writes has been posted,
-and also when we may have to wait for a page latch! Otherwise a deadlock
-of threads can occur. */
-UNIV_INTERN
-void
-buf_dblwr_flush_buffered_writes(void)
-/*=================================*/
-{
- byte* write_buf;
- ulint first_free;
- ulint len;
-
- if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
- /* Sync the writes to the disk. */
- buf_dblwr_sync_datafiles();
- return;
- }
-
-try_again:
- mutex_enter(&buf_dblwr->mutex);
-
- /* Write first to doublewrite buffer blocks. We use synchronous
- aio and thus know that file write has been completed when the
- control returns. */
-
- if (buf_dblwr->first_free == 0) {
-
- mutex_exit(&buf_dblwr->mutex);
-
- return;
- }
-
- if (buf_dblwr->batch_running) {
- /* Another thread is running the batch right now. Wait
- for it to finish. */
- ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event);
- mutex_exit(&buf_dblwr->mutex);
-
- os_event_wait_low(buf_dblwr->b_event, sig_count);
- goto try_again;
- }
-
- ut_a(!buf_dblwr->batch_running);
- ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
-
- /* Disallow anyone else to post to doublewrite buffer or to
- start another batch of flushing. */
- buf_dblwr->batch_running = true;
- first_free = buf_dblwr->first_free;
-
- /* Now safe to release the mutex. Note that though no other
- thread is allowed to post to the doublewrite batch flushing
- but any threads working on single page flushes are allowed
- to proceed. */
- mutex_exit(&buf_dblwr->mutex);
-
- write_buf = buf_dblwr->write_buf;
-
- for (ulint len2 = 0, i = 0;
- i < buf_dblwr->first_free;
- len2 += UNIV_PAGE_SIZE, i++) {
-
- const buf_block_t* block;
-
- block = (buf_block_t*) buf_dblwr->buf_block_arr[i];
-
- if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
- || block->page.zip.data) {
- /* No simple validate for compressed
- pages exists. */
- continue;
- }
-
- /* Check that the actual page in the buffer pool is
- not corrupt and the LSN values are sane. */
- buf_dblwr_check_block(block);
-
- /* Check that the page as written to the doublewrite
- buffer has sane LSN values. */
- buf_dblwr_check_page_lsn(write_buf + len2);
- }
-
- /* Write out the first block of the doublewrite buffer */
- len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
- buf_dblwr->first_free) * UNIV_PAGE_SIZE;
-
- fil_io(OS_FILE_WRITE,
- true,
- TRX_SYS_SPACE,
- 0,
- buf_dblwr->block1,
- 0,
- len,
- (void*)
- write_buf,
- NULL,
- 0);
-
- if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- /* No unwritten pages in the second block. */
- goto flush;
- }
-
- /* Write out the second block of the doublewrite buffer. */
- len = (buf_dblwr->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
- * UNIV_PAGE_SIZE;
-
- write_buf = buf_dblwr->write_buf
- + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
-
- fil_io(OS_FILE_WRITE,
- true,
- TRX_SYS_SPACE,
- 0,
- buf_dblwr->block2,
- 0,
- len,
- (void*) write_buf,
- NULL,
- 0);
-
-flush:
- /* increment the doublewrite flushed pages counter */
- srv_stats.dblwr_pages_written.add(buf_dblwr->first_free);
- srv_stats.dblwr_writes.inc();
-
- /* Now flush the doublewrite buffer data to disk */
- fil_flush(ulint(TRX_SYS_SPACE));
-
- /* We know that the writes have been flushed to disk now
- and in recovery we will find them in the doublewrite buffer
- blocks. Next do the writes to the intended positions. */
-
- /* Up to this point first_free and buf_dblwr->first_free are
- same because we have set the buf_dblwr->batch_running flag
- disallowing any other thread to post any request but we
- can't safely access buf_dblwr->first_free in the loop below.
- This is so because it is possible that after we are done with
- the last iteration and before we terminate the loop, the batch
- gets finished in the IO helper thread and another thread posts
- a new batch setting buf_dblwr->first_free to a higher value.
- If this happens and we are using buf_dblwr->first_free in the
- loop termination condition then we'll end up dispatching
- the same block twice from two different threads. */
- ut_ad(first_free == buf_dblwr->first_free);
- for (ulint i = 0; i < first_free; i++) {
- buf_dblwr_write_block_to_datafile(
- buf_dblwr->buf_block_arr[i], false);
- }
-
- /* Wake possible simulated aio thread to actually post the
- writes to the operating system. We don't flush the files
- at this point. We leave it to the IO helper thread to flush
- datafiles when the whole batch has been processed. */
- os_aio_simulated_wake_handler_threads();
-}
-
-/********************************************************************//**
-Posts a buffer page for writing. If the doublewrite memory buffer is
-full, calls buf_dblwr_flush_buffered_writes and waits for for free
-space to appear. */
-UNIV_INTERN
-void
-buf_dblwr_add_to_batch(
-/*====================*/
- buf_page_t* bpage) /*!< in: buffer block to write */
-{
- ulint zip_size;
-
- ut_a(buf_page_in_file(bpage));
- ut_ad(!mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex));
-
-try_again:
- mutex_enter(&buf_dblwr->mutex);
-
- ut_a(buf_dblwr->first_free <= srv_doublewrite_batch_size);
-
- if (buf_dblwr->batch_running) {
-
- /* This not nearly as bad as it looks. There is only
- page_cleaner thread which does background flushing
- in batches therefore it is unlikely to be a contention
- point. The only exception is when a user thread is
- forced to do a flush batch because of a sync
- checkpoint. */
- ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event);
- mutex_exit(&buf_dblwr->mutex);
-
- os_event_wait_low(buf_dblwr->b_event, sig_count);
- goto try_again;
- }
-
- if (buf_dblwr->first_free == srv_doublewrite_batch_size) {
- mutex_exit(&(buf_dblwr->mutex));
-
- buf_dblwr_flush_buffered_writes();
-
- goto try_again;
- }
-
- zip_size = buf_page_get_zip_size(bpage);
- void * frame = buf_page_get_frame(bpage);
-
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
- /* Copy the compressed page and clear the rest. */
- memcpy(buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * buf_dblwr->first_free,
- frame, zip_size);
- memset(buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * buf_dblwr->first_free
- + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
- } else {
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
- UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
-
- memcpy(buf_dblwr->write_buf
- + UNIV_PAGE_SIZE * buf_dblwr->first_free,
- frame, UNIV_PAGE_SIZE);
- }
-
- buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage;
-
- buf_dblwr->first_free++;
- buf_dblwr->b_reserved++;
-
- ut_ad(!buf_dblwr->batch_running);
- ut_ad(buf_dblwr->first_free == buf_dblwr->b_reserved);
- ut_ad(buf_dblwr->b_reserved <= srv_doublewrite_batch_size);
-
- if (buf_dblwr->first_free == srv_doublewrite_batch_size) {
- mutex_exit(&(buf_dblwr->mutex));
-
- buf_dblwr_flush_buffered_writes();
-
- return;
- }
-
- mutex_exit(&(buf_dblwr->mutex));
-}
-
-/********************************************************************//**
-Writes a page to the doublewrite buffer on disk, sync it, then write
-the page to the datafile and sync the datafile. This function is used
-for single page flushes. If all the buffers allocated for single page
-flushes in the doublewrite buffer are in use we wait here for one to
-become free. We are guaranteed that a slot will become free because any
-thread that is using a slot must also release the slot before leaving
-this function. */
-UNIV_INTERN
-void
-buf_dblwr_write_single_page(
-/*========================*/
- buf_page_t* bpage, /*!< in: buffer block to write */
- bool sync) /*!< in: true if sync IO requested */
-{
- ulint n_slots;
- ulint size;
- ulint zip_size;
- ulint offset;
- ulint i;
-
- ut_a(buf_page_in_file(bpage));
- ut_a(srv_use_doublewrite_buf);
- ut_a(buf_dblwr != NULL);
-
- /* total number of slots available for single page flushes
- starts from srv_doublewrite_batch_size to the end of the
- buffer. */
- size = TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
- ut_a(size > srv_doublewrite_batch_size);
- n_slots = size - srv_doublewrite_batch_size;
-
- if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
-
- /* Check that the actual page in the buffer pool is
- not corrupt and the LSN values are sane. */
- buf_dblwr_check_block((buf_block_t*) bpage);
-
- /* Check that the page as written to the doublewrite
- buffer has sane LSN values. */
- if (!bpage->zip.data) {
- buf_dblwr_check_page_lsn(
- ((buf_block_t*) bpage)->frame);
- }
- }
-
-retry:
- mutex_enter(&buf_dblwr->mutex);
- if (buf_dblwr->s_reserved == n_slots) {
-
- /* All slots are reserved. */
- ib_int64_t sig_count =
- os_event_reset(buf_dblwr->s_event);
- mutex_exit(&buf_dblwr->mutex);
- os_event_wait_low(buf_dblwr->s_event, sig_count);
-
- goto retry;
- }
-
- for (i = srv_doublewrite_batch_size; i < size; ++i) {
-
- if (!buf_dblwr->in_use[i]) {
- break;
- }
- }
-
- /* We are guaranteed to find a slot. */
- ut_a(i < size);
- buf_dblwr->in_use[i] = true;
- buf_dblwr->s_reserved++;
- buf_dblwr->buf_block_arr[i] = bpage;
-
- /* increment the doublewrite flushed pages counter */
- srv_stats.dblwr_pages_written.inc();
- srv_stats.dblwr_writes.inc();
-
- mutex_exit(&buf_dblwr->mutex);
-
- /* Lets see if we are going to write in the first or second
- block of the doublewrite buffer. */
- if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
- offset = buf_dblwr->block1 + i;
- } else {
- offset = buf_dblwr->block2 + i
- - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
- }
-
- /* We deal with compressed and uncompressed pages a little
- differently here. In case of uncompressed pages we can
- directly write the block to the allocated slot in the
- doublewrite buffer in the system tablespace and then after
- syncing the system table space we can proceed to write the page
- in the datafile.
- In case of compressed page we first do a memcpy of the block
- to the in-memory buffer of doublewrite before proceeding to
- write it. This is so because we want to pad the remaining
- bytes in the doublewrite page with zeros. */
-
- zip_size = buf_page_get_zip_size(bpage);
- void * frame = buf_page_get_frame(bpage);
-
- if (zip_size) {
- memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i,
- frame, zip_size);
- memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i
- + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
-
- fil_io(OS_FILE_WRITE,
- true,
- TRX_SYS_SPACE,
- 0,
- offset,
- 0,
- UNIV_PAGE_SIZE,
- (void*) (buf_dblwr->write_buf + UNIV_PAGE_SIZE * i),
- NULL,
- 0);
- } else {
- /* It is a regular page. Write it directly to the
- doublewrite buffer */
- fil_io(OS_FILE_WRITE,
- true,
- TRX_SYS_SPACE,
- 0,
- offset,
- 0,
- bpage->real_size,
- frame,
- NULL,
- 0);
- }
-
- /* Now flush the doublewrite buffer data to disk */
- fil_flush(ulint(TRX_SYS_SPACE));
-
- /* We know that the write has been flushed to disk now
- and during recovery we will find it in the doublewrite buffer
- blocks. Next do the write to the intended position. */
- buf_dblwr_write_block_to_datafile(bpage, sync);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc
deleted file mode 100644
index 71b97b770e1..00000000000
--- a/storage/xtradb/buf/buf0dump.cc
+++ /dev/null
@@ -1,732 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0dump.cc
-Implements a buffer pool dump/load.
-
-Created April 08, 2011 Vasil Dimov
-*******************************************************/
-
-#include "univ.i"
-
-#include <stdarg.h> /* va_* */
-#include <string.h> /* strerror() */
-
-#include "buf0buf.h" /* srv_buf_pool_instances */
-#include "buf0dump.h"
-#include "db0err.h"
-#include "dict0dict.h" /* dict_operation_lock */
-#include "os0file.h" /* OS_FILE_MAX_PATH */
-#include "os0sync.h" /* os_event* */
-#include "os0thread.h" /* os_thread_* */
-#include "srv0srv.h" /* srv_fast_shutdown, srv_buf_dump* */
-#include "srv0start.h" /* srv_shutdown_state */
-#include "sync0rw.h" /* rw_lock_s_lock() */
-#include "ut0byte.h" /* ut_ull_create() */
-#include "ut0sort.h" /* UT_SORT_FUNCTION_BODY */
-
-enum status_severity {
- STATUS_INFO,
- STATUS_NOTICE,
- STATUS_ERR
-};
-
-#define SHUTTING_DOWN() (UNIV_UNLIKELY(srv_shutdown_state \
- != SRV_SHUTDOWN_NONE))
-
-/* Flags that tell the buffer pool dump/load thread which action should it
-take after being waked up. */
-static volatile bool buf_dump_should_start;
-static volatile bool buf_load_should_start;
-
-static ibool buf_load_abort_flag = FALSE;
-
-/* Used to temporary store dump info in order to avoid IO while holding
-buffer pool LRU list mutex during dump and also to sort the contents of the
-dump before reading the pages from disk during load.
-We store the space id in the high 32 bits and page no in low 32 bits. */
-typedef ib_uint64_t buf_dump_t;
-
-/* Aux macros to create buf_dump_t and to extract space and page from it */
-#define BUF_DUMP_CREATE(space, page) ut_ull_create(space, page)
-#define BUF_DUMP_SPACE(a) ((ulint) ((a) >> 32))
-#define BUF_DUMP_PAGE(a) ((ulint) ((a) & 0xFFFFFFFFUL))
-
-/*****************************************************************//**
-Wakes up the buffer pool dump/load thread and instructs it to start
-a dump. This function is called by MySQL code via buffer_pool_dump_now()
-and it should return immediately because the whole MySQL is frozen during
-its execution. */
-UNIV_INTERN
-void
-buf_dump_start()
-/*============*/
-{
- buf_dump_should_start = true;
- os_event_set(srv_buf_dump_event);
-}
-
-/*****************************************************************//**
-Wakes up the buffer pool dump/load thread and instructs it to start
-a load. This function is called by MySQL code via buffer_pool_load_now()
-and it should return immediately because the whole MySQL is frozen during
-its execution. */
-UNIV_INTERN
-void
-buf_load_start()
-/*============*/
-{
- buf_load_should_start = true;
- os_event_set(srv_buf_dump_event);
-}
-
-/*****************************************************************//**
-Sets the global variable that feeds MySQL's innodb_buffer_pool_dump_status
-to the specified string. The format and the following parameters are the
-same as the ones used for printf(3). The value of this variable can be
-retrieved by:
-SELECT variable_value FROM information_schema.global_status WHERE
-variable_name = 'INNODB_BUFFER_POOL_DUMP_STATUS';
-or by:
-SHOW STATUS LIKE 'innodb_buffer_pool_dump_status'; */
-static MY_ATTRIBUTE((nonnull, format(printf, 2, 3)))
-void
-buf_dump_status(
-/*============*/
- enum status_severity severity,/*!< in: status severity */
- const char* fmt, /*!< in: format */
- ...) /*!< in: extra parameters according
- to fmt */
-{
- va_list ap;
-
- va_start(ap, fmt);
-
- ut_vsnprintf(
- export_vars.innodb_buffer_pool_dump_status,
- sizeof(export_vars.innodb_buffer_pool_dump_status),
- fmt, ap);
-
- ib_logf((ib_log_level_t) severity, "%s", export_vars.innodb_buffer_pool_dump_status);
-
- va_end(ap);
-}
-
-/*****************************************************************//**
-Sets the global variable that feeds MySQL's innodb_buffer_pool_load_status
-to the specified string. The format and the following parameters are the
-same as the ones used for printf(3). The value of this variable can be
-retrieved by:
-SELECT variable_value FROM information_schema.global_status WHERE
-variable_name = 'INNODB_BUFFER_POOL_LOAD_STATUS';
-or by:
-SHOW STATUS LIKE 'innodb_buffer_pool_load_status'; */
-static MY_ATTRIBUTE((nonnull, format(printf, 2, 3)))
-void
-buf_load_status(
-/*============*/
- enum status_severity severity,/*!< in: status severity */
- const char* fmt, /*!< in: format */
- ...) /*!< in: extra parameters according to fmt */
-{
- va_list ap;
-
- va_start(ap, fmt);
-
- ut_vsnprintf(
- export_vars.innodb_buffer_pool_load_status,
- sizeof(export_vars.innodb_buffer_pool_load_status),
- fmt, ap);
-
- if (severity == STATUS_NOTICE || severity == STATUS_ERR) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n",
- export_vars.innodb_buffer_pool_load_status);
- }
-
- va_end(ap);
-}
-
-/** Returns the directory path where the buffer pool dump file will be created.
-@return directory path */
-static
-const char*
-get_buf_dump_dir()
-{
- const char* dump_dir;
-
- /* The dump file should be created in the default data directory if
- innodb_data_home_dir is set as an empty string. */
- if (strcmp(srv_data_home, "") == 0) {
- dump_dir = fil_path_to_mysql_datadir;
- } else {
- dump_dir = srv_data_home;
- }
-
- return(dump_dir);
-}
-
-/*****************************************************************//**
-Perform a buffer pool dump into the file specified by
-innodb_buffer_pool_filename. If any errors occur then the value of
-innodb_buffer_pool_dump_status will be set accordingly, see buf_dump_status().
-The dump filename can be specified by (relative to srv_data_home):
-SET GLOBAL innodb_buffer_pool_filename='filename'; */
-static
-void
-buf_dump(
-/*=====*/
- ibool obey_shutdown) /*!< in: quit if we are in a shutting down
- state */
-{
-#define SHOULD_QUIT() (SHUTTING_DOWN() && obey_shutdown)
-
- char full_filename[OS_FILE_MAX_PATH];
- char tmp_filename[OS_FILE_MAX_PATH];
- char now[32];
- FILE* f;
- ulint i;
- int ret;
-
- ut_snprintf(full_filename, sizeof(full_filename),
- "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR,
- srv_buf_dump_filename);
-
- ut_snprintf(tmp_filename, sizeof(tmp_filename),
- "%s.incomplete", full_filename);
-
- buf_dump_status(STATUS_NOTICE, "Dumping buffer pool(s) to %s",
- full_filename);
-
- f = fopen(tmp_filename, "w");
- if (f == NULL) {
- buf_dump_status(STATUS_ERR,
- "Cannot open '%s' for writing: %s",
- tmp_filename, strerror(errno));
- return;
- }
- /* else */
-
- /* walk through each buffer pool */
- for (i = 0; i < srv_buf_pool_instances && !SHOULD_QUIT(); i++) {
- buf_pool_t* buf_pool;
- const buf_page_t* bpage;
- buf_dump_t* dump;
- ulint n_pages;
- ulint j;
- ulint limit;
- ulint counter;
-
- buf_pool = buf_pool_from_array(i);
-
- /* obtain buf_pool LRU list mutex before allocate, since
- UT_LIST_GET_LEN(buf_pool->LRU) could change */
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- n_pages = UT_LIST_GET_LEN(buf_pool->LRU);
-
- /* skip empty buffer pools */
- if (n_pages == 0) {
- mutex_exit(&buf_pool->LRU_list_mutex);
- continue;
- }
-
- if (srv_buf_pool_dump_pct != 100) {
- ut_ad(srv_buf_pool_dump_pct < 100);
-
- n_pages = n_pages * srv_buf_pool_dump_pct / 100;
-
- if (n_pages == 0) {
- n_pages = 1;
- }
- }
-
- dump = static_cast<buf_dump_t*>(
- ut_malloc(n_pages * sizeof(*dump))) ;
-
- if (dump == NULL) {
- mutex_exit(&buf_pool->LRU_list_mutex);
- fclose(f);
- buf_dump_status(STATUS_ERR,
- "Cannot allocate " ULINTPF " bytes: %s",
- (ulint) (n_pages * sizeof(*dump)),
- strerror(errno));
- /* leave tmp_filename to exist */
- return;
- }
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU), j = 0;
- bpage != NULL && j < n_pages;
- bpage = UT_LIST_GET_NEXT(LRU, bpage), j++) {
-
- ut_a(buf_page_in_file(bpage));
-
- dump[j] = BUF_DUMP_CREATE(buf_page_get_space(bpage),
- buf_page_get_page_no(bpage));
- }
-
- ut_a(j == n_pages);
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- limit = (ulint)((double)n_pages * ((double)srv_buf_dump_status_frequency / (double)100));
- counter = 0;
-
- for (j = 0; j < n_pages && !SHOULD_QUIT(); j++) {
- ret = fprintf(f, ULINTPF "," ULINTPF "\n",
- BUF_DUMP_SPACE(dump[j]),
- BUF_DUMP_PAGE(dump[j]));
- if (ret < 0) {
- ut_free(dump);
- fclose(f);
- buf_dump_status(STATUS_ERR,
- "Cannot write to '%s': %s",
- tmp_filename, strerror(errno));
- /* leave tmp_filename to exist */
- return;
- }
-
- counter++;
-
- /* Print buffer pool dump status only if
- srv_buf_dump_status_frequency is > 0 and
- we have processed that amount of pages. */
- if (srv_buf_dump_status_frequency &&
- counter == limit) {
- counter = 0;
- buf_dump_status(
- STATUS_INFO,
- "Dumping buffer pool "
- ULINTPF "/" ULINTPF ", "
- "page " ULINTPF "/" ULINTPF,
- i + 1, srv_buf_pool_instances,
- j + 1, n_pages);
- }
- }
-
- ut_free(dump);
- }
-
- ret = fclose(f);
- if (ret != 0) {
- buf_dump_status(STATUS_ERR,
- "Cannot close '%s': %s",
- tmp_filename, strerror(errno));
- return;
- }
- /* else */
-
- ret = unlink(full_filename);
- if (ret != 0 && errno != ENOENT) {
- buf_dump_status(STATUS_ERR,
- "Cannot delete '%s': %s",
- full_filename, strerror(errno));
- /* leave tmp_filename to exist */
- return;
- }
- /* else */
-
- ret = rename(tmp_filename, full_filename);
- if (ret != 0) {
- buf_dump_status(STATUS_ERR,
- "Cannot rename '%s' to '%s': %s",
- tmp_filename, full_filename,
- strerror(errno));
- /* leave tmp_filename to exist */
- return;
- }
- /* else */
-
- /* success */
-
- ut_sprintf_timestamp(now);
-
- buf_dump_status(STATUS_NOTICE,
- "Buffer pool(s) dump completed at %s", now);
-}
-
-/*****************************************************************//**
-Compare two buffer pool dump entries, used to sort the dump on
-space_no,page_no before loading in order to increase the chance for
-sequential IO.
-@return -1/0/1 if entry 1 is smaller/equal/bigger than entry 2 */
-static
-lint
-buf_dump_cmp(
-/*=========*/
- const buf_dump_t d1, /*!< in: buffer pool dump entry 1 */
- const buf_dump_t d2) /*!< in: buffer pool dump entry 2 */
-{
- if (d1 < d2) {
- return(-1);
- } else if (d1 == d2) {
- return(0);
- } else {
- return(1);
- }
-}
-
-/*****************************************************************//**
-Sort a buffer pool dump on space_no, page_no. */
-static
-void
-buf_dump_sort(
-/*==========*/
- buf_dump_t* dump, /*!< in/out: buffer pool dump to sort */
- buf_dump_t* tmp, /*!< in/out: temp storage */
- ulint low, /*!< in: lowest index (inclusive) */
- ulint high) /*!< in: highest index (non-inclusive) */
-{
- UT_SORT_FUNCTION_BODY(buf_dump_sort, dump, tmp, low, high,
- buf_dump_cmp);
-}
-
-/*****************************************************************//**
-Artificially delay the buffer pool loading if necessary. The idea of
-this function is to prevent hogging the server with IO and slowing down
-too much normal client queries. */
-UNIV_INLINE
-void
-buf_load_throttle_if_needed(
-/*========================*/
- ulint* last_check_time, /*!< in/out: miliseconds since epoch
- of the last time we did check if
- throttling is needed, we do the check
- every srv_io_capacity IO ops. */
- ulint* last_activity_count,
- ulint n_io) /*!< in: number of IO ops done since
- buffer pool load has started */
-{
- if (n_io % srv_io_capacity < srv_io_capacity - 1) {
- return;
- }
-
- if (*last_check_time == 0 || *last_activity_count == 0) {
- *last_check_time = ut_time_ms();
- *last_activity_count = srv_get_activity_count();
- return;
- }
-
- /* srv_io_capacity IO operations have been performed by buffer pool
- load since the last time we were here. */
-
- /* If no other activity, then keep going without any delay. */
- if (srv_get_activity_count() == *last_activity_count) {
- return;
- }
-
- /* There has been other activity, throttle. */
-
- ulint now = ut_time_ms();
- ulint elapsed_time = now - *last_check_time;
-
- /* Notice that elapsed_time is not the time for the last
- srv_io_capacity IO operations performed by BP load. It is the
- time elapsed since the last time we detected that there has been
- other activity. This has a small and acceptable deficiency, e.g.:
- 1. BP load runs and there is no other activity.
- 2. Other activity occurs, we run N IO operations after that and
- enter here (where 0 <= N < srv_io_capacity).
- 3. last_check_time is very old and we do not sleep at this time, but
- only update last_check_time and last_activity_count.
- 4. We run srv_io_capacity more IO operations and call this function
- again.
- 5. There has been more other activity and thus we enter here.
- 6. Now last_check_time is recent and we sleep if necessary to prevent
- more than srv_io_capacity IO operations per second.
- The deficiency is that we could have slept at 3., but for this we
- would have to update last_check_time before the
- "cur_activity_count == *last_activity_count" check and calling
- ut_time_ms() that often may turn out to be too expensive. */
-
- if (elapsed_time < 1000 /* 1 sec (1000 mili secs) */) {
- os_thread_sleep((1000 - elapsed_time) * 1000 /* micro secs */);
- }
-
- *last_check_time = ut_time_ms();
- *last_activity_count = srv_get_activity_count();
-}
-
-/*****************************************************************//**
-Perform a buffer pool load from the file specified by
-innodb_buffer_pool_filename. If any errors occur then the value of
-innodb_buffer_pool_load_status will be set accordingly, see buf_load_status().
-The dump filename can be specified by (relative to srv_data_home):
-SET GLOBAL innodb_buffer_pool_filename='filename'; */
-static
-void
-buf_load()
-/*======*/
-{
- char full_filename[OS_FILE_MAX_PATH];
- char now[32];
- FILE* f;
- buf_dump_t* dump;
- buf_dump_t* dump_tmp;
- ulint dump_n;
- ulint total_buffer_pools_pages;
- ulint i;
- ulint space_id;
- ulint page_no;
- int fscanf_ret;
-
- /* Ignore any leftovers from before */
- buf_load_abort_flag = FALSE;
-
- ut_snprintf(full_filename, sizeof(full_filename),
- "%s%c%s", get_buf_dump_dir(), SRV_PATH_SEPARATOR,
- srv_buf_dump_filename);
-
- buf_load_status(STATUS_NOTICE,
- "Loading buffer pool(s) from %s", full_filename);
-
- f = fopen(full_filename, "r");
- if (f == NULL) {
- buf_load_status(STATUS_ERR,
- "Cannot open '%s' for reading: %s",
- full_filename, strerror(errno));
- return;
- }
- /* else */
-
- /* First scan the file to estimate how many entries are in it.
- This file is tiny (approx 500KB per 1GB buffer pool), reading it
- two times is fine. */
- dump_n = 0;
- while (fscanf(f, ULINTPF "," ULINTPF, &space_id, &page_no) == 2
- && !SHUTTING_DOWN()) {
- dump_n++;
- }
-
- if (!SHUTTING_DOWN() && !feof(f)) {
- /* fscanf() returned != 2 */
- const char* what;
- if (ferror(f)) {
- what = "reading";
- } else {
- what = "parsing";
- }
- fclose(f);
- buf_load_status(STATUS_ERR, "Error %s '%s', "
- "unable to load buffer pool (stage 1)",
- what, full_filename);
- return;
- }
-
- /* If dump is larger than the buffer pool(s), then we ignore the
- extra trailing. This could happen if a dump is made, then buffer
- pool is shrunk and then load it attempted. */
- total_buffer_pools_pages = buf_pool_get_n_pages()
- * srv_buf_pool_instances;
- if (dump_n > total_buffer_pools_pages) {
- dump_n = total_buffer_pools_pages;
- }
-
- dump = static_cast<buf_dump_t*>(ut_malloc(dump_n * sizeof(*dump)));
-
- if (dump == NULL) {
- fclose(f);
- buf_load_status(STATUS_ERR,
- "Cannot allocate " ULINTPF " bytes: %s",
- (ulint) (dump_n * sizeof(*dump)),
- strerror(errno));
- return;
- }
-
- dump_tmp = static_cast<buf_dump_t*>(
- ut_malloc(dump_n * sizeof(*dump_tmp)));
-
- if (dump_tmp == NULL) {
- ut_free(dump);
- fclose(f);
- buf_load_status(STATUS_ERR,
- "Cannot allocate " ULINTPF " bytes: %s",
- (ulint) (dump_n * sizeof(*dump_tmp)),
- strerror(errno));
- return;
- }
-
- rewind(f);
-
- for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
- fscanf_ret = fscanf(f, ULINTPF "," ULINTPF,
- &space_id, &page_no);
-
- if (fscanf_ret != 2) {
- if (feof(f)) {
- break;
- }
- /* else */
-
- ut_free(dump);
- ut_free(dump_tmp);
- fclose(f);
- buf_load_status(STATUS_ERR,
- "Error parsing '%s', unable "
- "to load buffer pool (stage 2)",
- full_filename);
- return;
- }
-
- if (space_id > ULINT32_MASK || page_no > ULINT32_MASK) {
- ut_free(dump);
- ut_free(dump_tmp);
- fclose(f);
- buf_load_status(STATUS_ERR,
- "Error parsing '%s': bogus "
- "space,page " ULINTPF "," ULINTPF
- " at line " ULINTPF ", "
- "unable to load buffer pool",
- full_filename,
- space_id, page_no,
- i);
- return;
- }
-
- dump[i] = BUF_DUMP_CREATE(space_id, page_no);
- }
-
- /* Set dump_n to the actual number of initialized elements,
- i could be smaller than dump_n here if the file got truncated after
- we read it the first time. */
- dump_n = i;
-
- fclose(f);
-
- if (dump_n == 0) {
- ut_free(dump);
- ut_free(dump_tmp);
- ut_sprintf_timestamp(now);
- buf_load_status(STATUS_NOTICE,
- "Buffer pool(s) load completed at %s "
- "(%s was empty)", now, full_filename);
- return;
- }
-
- if (!SHUTTING_DOWN()) {
- buf_dump_sort(dump, dump_tmp, 0, dump_n);
- }
-
- ut_free(dump_tmp);
-
- ulint last_check_time = 0;
- ulint last_activity_cnt = 0;
-
- for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) {
-
- buf_read_page_async(BUF_DUMP_SPACE(dump[i]),
- BUF_DUMP_PAGE(dump[i]));
-
- if (i % 64 == 63) {
- os_aio_simulated_wake_handler_threads();
- }
-
- if (i % 128 == 0) {
- buf_load_status(STATUS_INFO,
- "Loaded " ULINTPF "/" ULINTPF " pages",
- i + 1, dump_n);
- }
-
- if (buf_load_abort_flag) {
- buf_load_abort_flag = FALSE;
- ut_free(dump);
- buf_load_status(
- STATUS_NOTICE,
- "Buffer pool(s) load aborted on request");
- return;
- }
-
- buf_load_throttle_if_needed(
- &last_check_time, &last_activity_cnt, i);
- }
-
- ut_free(dump);
-
- ut_sprintf_timestamp(now);
-
- buf_load_status(STATUS_NOTICE,
- "Buffer pool(s) load completed at %s", now);
-}
-
-/*****************************************************************//**
-Aborts a currently running buffer pool load. This function is called by
-MySQL code via buffer_pool_load_abort() and it should return immediately
-because the whole MySQL is frozen during its execution. */
-UNIV_INTERN
-void
-buf_load_abort()
-/*============*/
-{
- buf_load_abort_flag = TRUE;
-}
-
-/*****************************************************************//**
-This is the main thread for buffer pool dump/load. It waits for an
-event and when waked up either performs a dump or load and sleeps
-again.
-@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(buf_dump_thread)(void*)
-{
- my_thread_init();
- ut_ad(!srv_read_only_mode);
-
- buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) not yet started");
- buf_load_status(STATUS_INFO, "Loading buffer pool(s) not yet started");
-
- if (srv_buffer_pool_load_at_startup) {
- buf_load();
- }
-
- while (!SHUTTING_DOWN()) {
-
- os_event_wait(srv_buf_dump_event);
-
- if (buf_dump_should_start) {
- buf_dump_should_start = false;
- buf_dump(TRUE /* quit on shutdown */);
- }
-
- if (buf_load_should_start) {
- buf_load_should_start = false;
- buf_load();
- }
-
- if (buf_dump_should_start || buf_load_should_start) {
- continue;
- }
- os_event_reset(srv_buf_dump_event);
- }
-
- if (srv_buffer_pool_dump_at_shutdown && srv_fast_shutdown != 2) {
- buf_dump(FALSE /* ignore shutdown down flag,
- keep going even if we are in a shutdown state */);
- }
-
- srv_buf_dump_thread_active = false;
-
- my_thread_end();
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
deleted file mode 100644
index 84eea3bc692..00000000000
--- a/storage/xtradb/buf/buf0flu.cc
+++ /dev/null
@@ -1,3133 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-Copyright (c) 2013, 2014, Fusion-io
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0flu.cc
-The database buffer buf_pool flush algorithm
-
-Created 11/11/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0flu.h"
-
-#ifdef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
-
-#include "buf0buf.h"
-#include "buf0mtflu.h"
-#include "buf0checksum.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "page0zip.h"
-#ifndef UNIV_HOTBACKUP
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "page0page.h"
-#include "fil0fil.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-#include "os0file.h"
-#include "os0sync.h"
-#include "trx0sys.h"
-#include "srv0mon.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
-#include "fil0pagecompress.h"
-
-/** Number of pages flushed through non flush_list flushes. */
-// static ulint buf_lru_flush_page_count = 0;
-
-/** Flag indicating if the page_cleaner is in active state. This flag
-is set to TRUE by the page_cleaner thread when it is spawned and is set
-back to FALSE at shutdown by the page_cleaner as well. Therefore no
-need to protect it by a mutex. It is only ever read by the thread
-doing the shutdown */
-UNIV_INTERN bool buf_page_cleaner_is_active;
-
-/** Flag indicating if the lru_manager is in active state. */
-UNIV_INTERN bool buf_lru_manager_is_active;
-
-#ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key;
-UNIV_INTERN mysql_pfs_key_t buf_lru_manager_thread_key;
-#endif /* UNIV_PFS_THREAD */
-
-/* @} */
-
-/******************************************************************//**
-Increases flush_list size in bytes with zip_size for compressed page,
-UNIV_PAGE_SIZE for uncompressed page in inline function */
-static inline
-void
-incr_flush_list_size_in_bytes(
-/*==========================*/
- buf_block_t* block, /*!< in: control block */
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- ut_ad(buf_flush_list_mutex_own(buf_pool));
- ulint zip_size = page_zip_get_size(&block->page.zip);
- buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
- ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/******************************************************************//**
-Validates the flush list.
-@return TRUE if ok */
-static
-ibool
-buf_flush_validate_low(
-/*===================*/
- buf_pool_t* buf_pool); /*!< in: Buffer pool instance */
-
-/******************************************************************//**
-Validates the flush list some of the time.
-@return TRUE if ok or the check was skipped */
-static
-ibool
-buf_flush_validate_skip(
-/*====================*/
- buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
-{
-/** Try buf_flush_validate_low() every this many times */
-# define BUF_FLUSH_VALIDATE_SKIP 23
-
- /** The buf_flush_validate_low() call skip counter.
- Use a signed type because of the race condition below. */
- static int buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP;
-
- /* There is a race condition below, but it does not matter,
- because this call is only for heuristic purposes. We want to
- reduce the call frequency of the costly buf_flush_validate_low()
- check in debug builds. */
- if (--buf_flush_validate_count > 0) {
- return(TRUE);
- }
-
- buf_flush_validate_count = BUF_FLUSH_VALIDATE_SKIP;
- return(buf_flush_validate_low(buf_pool));
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-/*******************************************************************//**
-Sets hazard pointer during flush_list iteration. */
-UNIV_INLINE
-void
-buf_flush_set_hp(
-/*=============*/
- buf_pool_t* buf_pool,/*!< in/out: buffer pool instance */
- const buf_page_t* bpage) /*!< in: buffer control block */
-{
- ut_ad(buf_flush_list_mutex_own(buf_pool));
- ut_ad(buf_pool->flush_list_hp == NULL || bpage == NULL);
- ut_ad(!bpage || buf_page_in_file(bpage)
- || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
- ut_ad(!bpage || bpage->in_flush_list);
- ut_ad(!bpage || buf_pool_from_bpage(bpage) == buf_pool);
-
- buf_pool->flush_list_hp = bpage;
-}
-
-/*******************************************************************//**
-Checks if the given block is a hazard pointer
-@return true if bpage is hazard pointer */
-UNIV_INLINE
-bool
-buf_flush_is_hp(
-/*============*/
- buf_pool_t* buf_pool,/*!< in: buffer pool instance */
- const buf_page_t* bpage) /*!< in: buffer control block */
-{
- ut_ad(buf_flush_list_mutex_own(buf_pool));
-
- return(buf_pool->flush_list_hp == bpage);
-}
-
-/*******************************************************************//**
-Whenever we move a block in flush_list (either to remove it or to
-relocate it) we check the hazard pointer set by some other thread
-doing the flush list scan. If the hazard pointer is the same as the
-one we are about going to move then we set it to NULL to force a rescan
-in the thread doing the batch. */
-UNIV_INLINE
-void
-buf_flush_update_hp(
-/*================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_page_t* bpage) /*!< in: buffer control block */
-{
- ut_ad(buf_flush_list_mutex_own(buf_pool));
-
- if (buf_flush_is_hp(buf_pool, bpage)) {
- buf_flush_set_hp(buf_pool, NULL);
- MONITOR_INC(MONITOR_FLUSH_HP_RESCAN);
- }
-}
-
-/******************************************************************//**
-Insert a block in the flush_rbt and returns a pointer to its
-predecessor or NULL if no predecessor. The ordering is maintained
-on the basis of the <oldest_modification, space, offset> key.
-@return pointer to the predecessor or NULL if no predecessor. */
-static
-buf_page_t*
-buf_flush_insert_in_flush_rbt(
-/*==========================*/
- buf_page_t* bpage) /*!< in: bpage to be inserted. */
-{
- const ib_rbt_node_t* c_node;
- const ib_rbt_node_t* p_node;
- buf_page_t* prev = NULL;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(buf_flush_list_mutex_own(buf_pool));
-
- /* Insert this buffer into the rbt. */
- c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
- ut_a(c_node != NULL);
-
- /* Get the predecessor. */
- p_node = rbt_prev(buf_pool->flush_rbt, c_node);
-
- if (p_node != NULL) {
- buf_page_t** value;
- value = rbt_value(buf_page_t*, p_node);
- prev = *value;
- ut_a(prev != NULL);
- }
-
- return(prev);
-}
-
-/*********************************************************//**
-Delete a bpage from the flush_rbt. */
-static
-void
-buf_flush_delete_from_flush_rbt(
-/*============================*/
- buf_page_t* bpage) /*!< in: bpage to be removed. */
-{
-#ifdef UNIV_DEBUG
- ibool ret = FALSE;
-#endif /* UNIV_DEBUG */
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(buf_flush_list_mutex_own(buf_pool));
-
-#ifdef UNIV_DEBUG
- ret =
-#endif /* UNIV_DEBUG */
- rbt_delete(buf_pool->flush_rbt, &bpage);
-
- ut_ad(ret);
-}
-
-/*****************************************************************//**
-Compare two modified blocks in the buffer pool. The key for comparison
-is:
-key = <oldest_modification, space, offset>
-This comparison is used to maintian ordering of blocks in the
-buf_pool->flush_rbt.
-Note that for the purpose of flush_rbt, we only need to order blocks
-on the oldest_modification. The other two fields are used to uniquely
-identify the blocks.
-@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
-static
-int
-buf_flush_block_cmp(
-/*================*/
- const void* p1, /*!< in: block1 */
- const void* p2) /*!< in: block2 */
-{
- int ret;
- const buf_page_t* b1 = *(const buf_page_t**) p1;
- const buf_page_t* b2 = *(const buf_page_t**) p2;
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(b1);
-#endif /* UNIV_DEBUG */
-
- ut_ad(b1 != NULL);
- ut_ad(b2 != NULL);
-
- ut_ad(buf_flush_list_mutex_own(buf_pool));
-
- ut_ad(b1->in_flush_list);
- ut_ad(b2->in_flush_list);
-
- if (b2->oldest_modification > b1->oldest_modification) {
- return(1);
- } else if (b2->oldest_modification < b1->oldest_modification) {
- return(-1);
- }
-
- /* If oldest_modification is same then decide on the space. */
- ret = (int)(b2->space - b1->space);
-
- /* Or else decide ordering on the offset field. */
- return(ret ? ret : (int)(b2->offset - b1->offset));
-}
-
-/********************************************************************//**
-Initialize the red-black tree to speed up insertions into the flush_list
-during recovery process. Should be called at the start of recovery
-process before any page has been read/written. */
-UNIV_INTERN
-void
-buf_flush_init_flush_rbt(void)
-/*==========================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- buf_flush_list_mutex_enter(buf_pool);
-
- ut_ad(buf_pool->flush_rbt == NULL);
-
- /* Create red black tree for speedy insertions in flush list. */
- buf_pool->flush_rbt = rbt_create(
- sizeof(buf_page_t*), buf_flush_block_cmp);
-
- buf_flush_list_mutex_exit(buf_pool);
- }
-}
-
-/********************************************************************//**
-Frees up the red-black tree. */
-UNIV_INTERN
-void
-buf_flush_free_flush_rbt(void)
-/*==========================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- buf_flush_list_mutex_enter(buf_pool);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low(buf_pool));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- rbt_free(buf_pool->flush_rbt);
- buf_pool->flush_rbt = NULL;
-
- buf_flush_list_mutex_exit(buf_pool);
- }
-}
-
-/********************************************************************//**
-Inserts a modified block into the flush list. */
-UNIV_INTERN
-void
-buf_flush_insert_into_flush_list(
-/*=============================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_block_t* block, /*!< in/out: block which is modified */
- lsn_t lsn) /*!< in: oldest modification */
-{
- ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
- ut_ad(log_flush_order_mutex_own());
- ut_ad(mutex_own(&block->mutex));
-
- buf_flush_list_mutex_enter(buf_pool);
-
- ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL)
- || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
- <= lsn));
-
- /* If we are in the recovery then we need to update the flush
- red-black tree as well. */
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
- buf_flush_list_mutex_exit(buf_pool);
- buf_flush_insert_sorted_into_flush_list(buf_pool, block, lsn);
- return;
- }
-
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(!block->page.in_flush_list);
-
- ut_d(block->page.in_flush_list = TRUE);
- block->page.oldest_modification = lsn;
- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
- incr_flush_list_size_in_bytes(block, buf_pool);
-
-#ifdef UNIV_DEBUG_VALGRIND
- {
- ulint zip_size = buf_block_get_zip_size(block);
-
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
- } else {
- UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
- }
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_skip(buf_pool));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- buf_flush_list_mutex_exit(buf_pool);
-}
-
-/********************************************************************//**
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-UNIV_INTERN
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_block_t* block, /*!< in/out: block which is modified */
- lsn_t lsn) /*!< in: oldest modification */
-{
- buf_page_t* prev_b;
- buf_page_t* b;
-
- ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE);
- ut_ad(log_flush_order_mutex_own());
- ut_ad(mutex_own(&block->mutex));
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- buf_flush_list_mutex_enter(buf_pool);
-
- /* The field in_LRU_list is protected by buf_pool->LRU_list_mutex,
- which we are not holding. However, while a block is in the flush
- list, it is dirty and cannot be discarded, not from the
- page_hash or from the LRU list. At most, the uncompressed
- page frame of a compressed block may be discarded or created
- (copying the block->page to or from a buf_page_t that is
- dynamically allocated from buf_buddy_alloc()). Because those
- transitions hold block->mutex and the flush list mutex (via
- buf_flush_relocate_on_flush_list()), there is no possibility
- of a race condition in the assertions below. */
- ut_ad(block->page.in_LRU_list);
- ut_ad(block->page.in_page_hash);
- /* buf_buddy_block_register() will take a block in the
- BUF_BLOCK_MEMORY state, not a file page. */
- ut_ad(!block->page.in_zip_hash);
-
- ut_ad(!block->page.in_flush_list);
- ut_d(block->page.in_flush_list = TRUE);
- block->page.oldest_modification = lsn;
-
-#ifdef UNIV_DEBUG_VALGRIND
- {
- ulint zip_size = buf_block_get_zip_size(block);
-
- if (zip_size) {
- UNIV_MEM_ASSERT_RW(block->page.zip.data, zip_size);
- } else {
- UNIV_MEM_ASSERT_RW(block->frame, UNIV_PAGE_SIZE);
- }
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- prev_b = NULL;
-
- /* For the most part when this function is called the flush_rbt
- should not be NULL. In a very rare boundary case it is possible
- that the flush_rbt has already been freed by the recovery thread
- before the last page was hooked up in the flush_list by the
- io-handler thread. In that case we'll just do a simple
- linear search in the else block. */
- if (buf_pool->flush_rbt) {
-
- prev_b = buf_flush_insert_in_flush_rbt(&block->page);
-
- } else {
-
- b = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- while (b && b->oldest_modification
- > block->page.oldest_modification) {
- ut_ad(b->in_flush_list);
- prev_b = b;
- b = UT_LIST_GET_NEXT(list, b);
- }
- }
-
- if (prev_b == NULL) {
- UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
- } else {
- UT_LIST_INSERT_AFTER(list, buf_pool->flush_list,
- prev_b, &block->page);
- }
-
- incr_flush_list_size_in_bytes(block, buf_pool);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low(buf_pool));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- buf_flush_list_mutex_exit(buf_pool);
-}
-
-/********************************************************************//**
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., the transition FILE_PAGE => NOT_USED allowed.
-@return TRUE if can replace immediately */
-UNIV_INTERN
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
- buf_page_t* bpage) /*!< in: buffer control block, must be
- buf_page_in_file(bpage) and in the LRU list */
-{
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-#endif /* UNIV_DEBUG */
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(bpage->in_LRU_list);
-
- if (buf_page_in_file(bpage)) {
-
- return(bpage->oldest_modification == 0
- && bpage->buf_fix_count == 0
- && buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: buffer block state %lu"
- " in the LRU list!\n",
- (ulong) buf_page_get_state(bpage));
- ut_print_buf(stderr, bpage, sizeof(buf_page_t));
- putc('\n', stderr);
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Returns true if the block is modified and ready for flushing.
-@return true if can flush immediately */
-UNIV_INTERN
-bool
-buf_flush_ready_for_flush(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer control block, must be
- buf_page_in_file(bpage) */
- buf_flush_t flush_type)/*!< in: type of flush */
-{
- ut_ad(flush_type < BUF_FLUSH_N_TYPES);
- ut_ad(mutex_own(buf_page_get_mutex(bpage))
- || flush_type == BUF_FLUSH_LIST);
- ut_a(buf_page_in_file(bpage) || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
-
- if (bpage->oldest_modification == 0
- || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) {
- return(false);
- }
-
- ut_ad(bpage->in_flush_list);
-
- switch (flush_type) {
- case BUF_FLUSH_LIST:
- return(buf_page_get_state(bpage) != BUF_BLOCK_REMOVE_HASH);
- case BUF_FLUSH_LRU:
- case BUF_FLUSH_SINGLE_PAGE:
- return(true);
-
- case BUF_FLUSH_N_TYPES:
- break;
- }
-
- ut_error;
- return(false);
-}
-
-/********************************************************************//**
-Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
-void
-buf_flush_remove(
-/*=============*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ulint zip_size;
-
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_DIRTY
- || mutex_own(&buf_pool->LRU_list_mutex));
-#endif
- ut_ad(bpage->in_flush_list);
-
- buf_flush_list_mutex_enter(buf_pool);
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- /* Clean compressed pages should not be on the flush list */
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- return;
- case BUF_BLOCK_ZIP_DIRTY:
- buf_page_set_state(bpage, BUF_BLOCK_ZIP_PAGE);
- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- buf_LRU_insert_zip_clean(bpage);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- break;
- case BUF_BLOCK_FILE_PAGE:
- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
- break;
- }
-
- /* If the flush_rbt is active then delete from there as well. */
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
- buf_flush_delete_from_flush_rbt(bpage);
- }
-
- /* Must be done after we have removed it from the flush_rbt
- because we assert on in_flush_list in comparison function. */
- ut_d(bpage->in_flush_list = FALSE);
-
- zip_size = page_zip_get_size(&bpage->zip);
- buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
-
- bpage->oldest_modification = 0;
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_skip(buf_pool));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- buf_flush_update_hp(buf_pool, bpage);
- buf_flush_list_mutex_exit(buf_pool);
-}
-
-/*******************************************************************//**
-Relocates a buffer control block on the flush_list.
-Note that it is assumed that the contents of bpage have already been
-copied to dpage.
-IMPORTANT: When this function is called bpage and dpage are not
-exact copies of each other. For example, they both will have different
-::state. Also the ::list pointers in dpage may be stale. We need to
-use the current list node (bpage) to do the list manipulation because
-the list pointers could have changed between the time that we copied
-the contents of bpage to the dpage and the flush list manipulation
-below. */
-UNIV_INTERN
-void
-buf_flush_relocate_on_flush_list(
-/*=============================*/
- buf_page_t* bpage, /*!< in/out: control block being moved */
- buf_page_t* dpage) /*!< in/out: destination block */
-{
- buf_page_t* prev;
- buf_page_t* prev_b = NULL;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- /* Must reside in the same buffer pool. */
- ut_ad(buf_pool == buf_pool_from_bpage(dpage));
-
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- buf_flush_list_mutex_enter(buf_pool);
-
- ut_ad(bpage->in_flush_list);
- ut_ad(dpage->in_flush_list);
-
- /* If recovery is active we must swap the control blocks in
- the flush_rbt as well. */
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
- buf_flush_delete_from_flush_rbt(bpage);
- prev_b = buf_flush_insert_in_flush_rbt(dpage);
- }
-
- /* Must be done after we have removed it from the flush_rbt
- because we assert on in_flush_list in comparison function. */
- ut_d(bpage->in_flush_list = FALSE);
-
- prev = UT_LIST_GET_PREV(list, bpage);
- UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
-
- if (prev) {
- ut_ad(prev->in_flush_list);
- UT_LIST_INSERT_AFTER(
- list,
- buf_pool->flush_list,
- prev, dpage);
- } else {
- UT_LIST_ADD_FIRST(
- list,
- buf_pool->flush_list,
- dpage);
- }
-
- /* Just an extra check. Previous in flush_list
- should be the same control block as in flush_rbt. */
- ut_a(!buf_pool->flush_rbt || prev_b == prev);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- ut_a(buf_flush_validate_low(buf_pool));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- buf_flush_update_hp(buf_pool, bpage);
- buf_flush_list_mutex_exit(buf_pool);
-}
-
-/********************************************************************//**
-Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
-void
-buf_flush_write_complete(
-/*=====================*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
-{
- buf_flush_t flush_type = buf_page_get_flush_type(bpage);
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- mutex_enter(&buf_pool->flush_state_mutex);
-
- buf_flush_remove(bpage);
-
- buf_page_set_io_fix(bpage, BUF_IO_NONE);
-
- buf_pool->n_flush[flush_type]--;
- ut_ad(buf_pool->n_flush[flush_type] != ULINT_MAX);
-
-#ifdef UNIV_MTFLUSH_DEBUG
- fprintf(stderr, "n pending flush %lu\n",
- buf_pool->n_flush[flush_type]);
-#endif
-
- if (buf_pool->n_flush[flush_type] == 0
- && buf_pool->init_flush[flush_type] == FALSE) {
-
- /* The running flush batch has ended */
-
- os_event_set(buf_pool->no_flush[flush_type]);
- }
-
- buf_dblwr_update(bpage, flush_type);
-
- mutex_exit(&buf_pool->flush_state_mutex);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Calculate the checksum of a page from compressed table and update the page. */
-UNIV_INTERN
-void
-buf_flush_update_zip_checksum(
-/*==========================*/
- buf_frame_t* page, /*!< in/out: Page to update */
- ulint zip_size, /*!< in: Compressed page size */
- lsn_t lsn) /*!< in: Lsn to stamp on the page */
-{
- ut_a(zip_size > 0);
-
- ib_uint32_t checksum = static_cast<ib_uint32_t>(
- page_zip_calc_checksum(
- page, zip_size,
- static_cast<srv_checksum_algorithm_t>(
- srv_checksum_algorithm)));
-
- mach_write_to_8(page + FIL_PAGE_LSN, lsn);
- memset(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
-}
-
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
-void
-buf_flush_init_for_writing(
-/*=======================*/
- byte* page, /*!< in/out: page */
- void* page_zip_, /*!< in/out: compressed page, or NULL */
- lsn_t newest_lsn) /*!< in: newest modification lsn
- to the page */
-{
- ib_uint32_t checksum = 0 /* silence bogus gcc warning */;
-
- ut_ad(page);
-
- if (page_zip_) {
- page_zip_des_t* page_zip;
- ulint zip_size;
-
- page_zip = static_cast<page_zip_des_t*>(page_zip_);
- zip_size = page_zip_get_size(page_zip);
-
- ut_ad(zip_size);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
- switch (UNIV_EXPECT(fil_page_get_type(page), FIL_PAGE_INDEX)) {
- case FIL_PAGE_TYPE_ALLOCATED:
- case FIL_PAGE_INODE:
- case FIL_PAGE_IBUF_BITMAP:
- case FIL_PAGE_TYPE_FSP_HDR:
- case FIL_PAGE_TYPE_XDES:
- /* These are essentially uncompressed pages. */
- memcpy(page_zip->data, page, zip_size);
- /* fall through */
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- case FIL_PAGE_INDEX:
-
- buf_flush_update_zip_checksum(
- page_zip->data, zip_size, newest_lsn);
-
- return;
- }
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ERROR: The compressed page to be written"
- " seems corrupt:", stderr);
- ut_print_buf(stderr, page, zip_size);
- fputs("\nInnoDB: Possibly older version of the page:", stderr);
- ut_print_buf(stderr, page_zip->data, zip_size);
- putc('\n', stderr);
- ut_error;
- }
-
- /* Write the newest modification lsn to the page header and trailer */
- mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
-
- mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- newest_lsn);
-
- /* Store the new formula checksum */
-
- switch ((srv_checksum_algorithm_t) srv_checksum_algorithm) {
- case SRV_CHECKSUM_ALGORITHM_CRC32:
- case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
- checksum = buf_calc_page_crc32(page);
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
- break;
- case SRV_CHECKSUM_ALGORITHM_INNODB:
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
- checksum = (ib_uint32_t) buf_calc_page_new_checksum(page);
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
- checksum = (ib_uint32_t) buf_calc_page_old_checksum(page);
- break;
- case SRV_CHECKSUM_ALGORITHM_NONE:
- case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
- checksum = BUF_NO_CHECKSUM_MAGIC;
- mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, checksum);
- break;
- /* no default so the compiler will emit a warning if new enum
- is added and not handled here */
- }
-
- /* With the InnoDB checksum, we overwrite the first 4 bytes of
- the end lsn field to store the old formula checksum. Since it
- depends also on the field FIL_PAGE_SPACE_OR_CHKSUM, it has to
- be calculated after storing the new formula checksum.
-
- In other cases we write the same value to both fields.
- If CRC32 is used then it is faster to use that checksum
- (calculated above) instead of calculating another one.
- We can afford to store something other than
- buf_calc_page_old_checksum() or BUF_NO_CHECKSUM_MAGIC in
- this field because the file will not be readable by old
- versions of MySQL/InnoDB anyway (older than MySQL 5.6.3) */
-
- mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM,
- checksum);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Does an asynchronous write of a buffer page. NOTE: in simulated aio and
-also when the doublewrite buffer is used, we must call
-buf_dblwr_flush_buffered_writes after we have posted a batch of
-writes! */
-static
-void
-buf_flush_write_block_low(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer block to write */
- buf_flush_t flush_type, /*!< in: type of flush */
- bool sync) /*!< in: true if sync IO request */
-{
- fil_space_t* space = fil_space_acquire_for_io(bpage->space);
- if (!space) {
- return;
- }
- ulint zip_size = buf_page_get_zip_size(bpage);
- page_t* frame = NULL;
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
-#endif
-
-#ifdef UNIV_LOG_DEBUG
- static ibool univ_log_debug_warned;
-#endif /* UNIV_LOG_DEBUG */
-
- ut_ad(buf_page_in_file(bpage));
-
- /* We are not holding block_mutex here.
- Nevertheless, it is safe to access bpage, because it is
- io_fixed and oldest_modification != 0. Thus, it cannot be
- relocated in the buffer pool or removed from flush_list or
- LRU_list. */
- ut_ad(!buf_flush_list_mutex_own(buf_pool));
- ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE);
- ut_ad(bpage->oldest_modification != 0);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
-#endif
- ut_ad(bpage->newest_modification != 0);
-
-#ifdef UNIV_LOG_DEBUG
- if (!univ_log_debug_warned) {
- univ_log_debug_warned = TRUE;
- fputs("Warning: cannot force log to disk if"
- " UNIV_LOG_DEBUG is defined!\n"
- "Crash recovery will not work!\n",
- stderr);
- }
-#else
- /* Force the log to the disk before writing the modified block */
- log_write_up_to(bpage->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE);
-#endif
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE: /* The page should be dirty. */
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- case BUF_BLOCK_ZIP_DIRTY:
- frame = bpage->zip.data;
- mach_write_to_8(frame + FIL_PAGE_LSN,
- bpage->newest_modification);
-
- ut_a(page_zip_verify_checksum(frame, zip_size));
-
- memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8);
- break;
- case BUF_BLOCK_FILE_PAGE:
- frame = bpage->zip.data;
- if (!frame) {
- frame = ((buf_block_t*) bpage)->frame;
- }
-
- buf_flush_init_for_writing(((buf_block_t*) bpage)->frame,
- bpage->zip.data
- ? &bpage->zip : NULL,
- bpage->newest_modification);
- break;
- }
-
- frame = buf_page_encrypt_before_write(space, bpage, frame);
-
- if (!srv_use_doublewrite_buf || !buf_dblwr) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- sync,
- buf_page_get_space(bpage),
- zip_size,
- buf_page_get_page_no(bpage),
- 0,
- zip_size ? zip_size : bpage->real_size,
- frame,
- bpage,
- &bpage->write_size);
- } else {
- /* InnoDB uses doublewrite buffer and doublewrite buffer
- is initialized. User can define do we use atomic writes
- on a file space (table) or not. If atomic writes are
- not used we should use doublewrite buffer and if
- atomic writes should be used, no doublewrite buffer
- is used. */
-
- if (fsp_flags_get_atomic_writes(space->flags)
- == ATOMIC_WRITES_ON) {
- fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
- FALSE,
- buf_page_get_space(bpage),
- zip_size,
- buf_page_get_page_no(bpage),
- 0,
- zip_size ? zip_size : bpage->real_size,
- frame,
- bpage,
- &bpage->write_size);
- } else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
- buf_dblwr_write_single_page(bpage, sync);
- } else {
- buf_dblwr_add_to_batch(bpage);
- }
- }
-
- /* When doing single page flushing the IO is done synchronously
- and we flush the changes to disk only for the tablespace we
- are working on. */
- if (sync) {
- ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE);
- fil_flush(space);
-
- /* The tablespace could already have been dropped,
- because fil_io(request, sync) would already have
- decremented the node->n_pending. However,
- buf_page_io_complete() only needs to look up the
- tablespace during read requests, not during writes. */
- ut_ad(buf_page_get_io_fix_unlocked(bpage) == BUF_IO_WRITE);
-
-#ifdef UNIV_DEBUG
- dberr_t err =
-#endif
- buf_page_io_complete(bpage);
-
- ut_ad(err == DB_SUCCESS);
- }
-
- fil_space_release_for_io(space);
-
- /* Increment the counter of I/O operations used
- for selecting LRU policy. */
- buf_LRU_stat_inc_io();
-}
-
-/********************************************************************//**
-Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: in simulated aio we must call
-os_aio_simulated_wake_handler_threads after we have posted a batch of
-writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this
-function, and it will be released by this function if it returns true.
-LRU_list_mutex must be held iff performing a single page flush and will be
-released by the function if it returns true.
-@return TRUE if the page was flushed */
-UNIV_INTERN
-bool
-buf_flush_page(
-/*===========*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_page_t* bpage, /*!< in: buffer control block */
- buf_flush_t flush_type, /*!< in: type of flush */
- bool sync) /*!< in: true if sync IO request */
-{
- ut_ad(flush_type < BUF_FLUSH_N_TYPES);
- /* Hold the LRU list mutex iff called for a single page LRU
- flush. A single page LRU flush is already non-performant, and holding
- the LRU list mutex allows us to avoid having to store the previous LRU
- list page or to restart the LRU scan in
- buf_flush_single_page_from_LRU(). */
- ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE ||
- !mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(flush_type != BUF_FLUSH_SINGLE_PAGE ||
- mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(buf_page_in_file(bpage));
- ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE);
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- ut_ad(mutex_own(block_mutex));
-
- ut_ad(buf_flush_ready_for_flush(bpage, flush_type));
-
- bool is_uncompressed;
-
- is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
- ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex));
-
- ibool flush;
- rw_lock_t* rw_lock;
- bool no_fix_count = bpage->buf_fix_count == 0;
-
- if (!is_uncompressed) {
- flush = TRUE;
- rw_lock = NULL;
-
- } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)) {
- /* This is a heuristic, to avoid expensive S attempts. */
- flush = FALSE;
- } else {
-
- rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock;
-
- if (flush_type != BUF_FLUSH_LIST) {
- flush = rw_lock_s_lock_gen_nowait(
- rw_lock, BUF_IO_WRITE);
- } else {
- /* Will S lock later */
- flush = TRUE;
- }
- }
-
- if (flush) {
-
- /* We are committed to flushing by the time we get here */
-
- mutex_enter(&buf_pool->flush_state_mutex);
-
- buf_page_set_io_fix(bpage, BUF_IO_WRITE);
-
- buf_page_set_flush_type(bpage, flush_type);
-
- if (buf_pool->n_flush[flush_type] == 0) {
-
- os_event_reset(buf_pool->no_flush[flush_type]);
- }
-
- ++buf_pool->n_flush[flush_type];
- ut_ad(buf_pool->n_flush[flush_type] != 0);
-
- mutex_exit(&buf_pool->flush_state_mutex);
-
- mutex_exit(block_mutex);
-
- if (flush_type == BUF_FLUSH_SINGLE_PAGE)
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- if (flush_type == BUF_FLUSH_LIST
- && is_uncompressed
- && !rw_lock_s_lock_gen_nowait(rw_lock, BUF_IO_WRITE)) {
- /* avoiding deadlock possibility involves doublewrite
- buffer, should flush it, because it might hold the
- another block->lock. */
- buf_dblwr_flush_buffered_writes();
-
- rw_lock_s_lock_gen(rw_lock, BUF_IO_WRITE);
- }
-
- /* Even though bpage is not protected by any mutex at this
- point, it is safe to access bpage, because it is io_fixed and
- oldest_modification != 0. Thus, it cannot be relocated in the
- buffer pool or removed from flush_list or LRU_list. */
-
- buf_flush_write_block_low(bpage, flush_type, sync);
- }
-
- return(flush);
-}
-
-# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/********************************************************************//**
-Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: block and LRU list mutexes must be held upon entering this function, and
-they will be released by this function after flushing. This is loosely based on
-buf_flush_batch() and buf_flush_page().
-@return TRUE if the page was flushed and the mutexes released */
-UNIV_INTERN
-ibool
-buf_flush_page_try(
-/*===============*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- buf_block_t* block) /*!< in/out: buffer control block */
-{
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(mutex_own(&block->mutex));
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) {
- return(FALSE);
- }
-
- /* The following call will release the LRU list and
- block mutex if successful. */
- return(buf_flush_page(
- buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true));
-}
-# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/***********************************************************//**
-Check the page is in buffer pool and can be flushed.
-@return true if the page can be flushed. */
-static
-bool
-buf_flush_check_neighbor(
-/*=====================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset */
- buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST */
-{
- buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- bool ret;
- prio_rw_lock_t* hash_lock;
- ib_mutex_t* block_mutex;
-
- ut_ad(flush_type == BUF_FLUSH_LRU
- || flush_type == BUF_FLUSH_LIST);
-
- /* We only want to flush pages from this buffer pool. */
- bpage = buf_page_hash_get_s_locked(buf_pool, space, offset,
- &hash_lock);
-
- if (!bpage) {
-
- return(false);
- }
-
- block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- rw_lock_s_unlock(hash_lock);
-
- ut_a(buf_page_in_file(bpage));
-
- /* We avoid flushing 'non-old' blocks in an LRU flush,
- because the flushed blocks are soon freed */
-
- ret = false;
- if (flush_type != BUF_FLUSH_LRU || buf_page_is_old(bpage)) {
-
- if (buf_flush_ready_for_flush(bpage, flush_type)) {
- ret = true;
- }
- }
-
- mutex_exit(block_mutex);
-
- return(ret);
-}
-
-/***********************************************************//**
-Flushes to disk all flushable pages within the flush area.
-@return number of pages flushed */
-static
-ulint
-buf_flush_try_neighbors(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset */
- buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST */
- ulint n_flushed, /*!< in: number of pages
- flushed so far in this batch */
- ulint n_to_flush) /*!< in: maximum number of pages
- we are allowed to flush */
-{
- ulint i;
- ulint low;
- ulint high;
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(!buf_flush_list_mutex_own(buf_pool));
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN
- || srv_flush_neighbors == 0) {
- /* If there is little space or neighbor flushing is
- not enabled then just flush the victim. */
- low = offset;
- high = offset + 1;
- } else {
- /* When flushed, dirty blocks are searched in
- neighborhoods of this size, and flushed along with the
- original page. */
-
- ulint buf_flush_area;
-
- buf_flush_area = ut_min(
- BUF_READ_AHEAD_AREA(buf_pool),
- buf_pool->curr_size / 16);
-
- low = (offset / buf_flush_area) * buf_flush_area;
- high = (offset / buf_flush_area + 1) * buf_flush_area;
-
- if (srv_flush_neighbors == 1) {
- /* adjust 'low' and 'high' to limit
- for contiguous dirty area */
- if (offset > low) {
- for (i = offset - 1;
- i >= low
- && buf_flush_check_neighbor(
- space, i, flush_type);
- i--) {
- /* do nothing */
- }
- low = i + 1;
- }
-
- for (i = offset + 1;
- i < high
- && buf_flush_check_neighbor(
- space, i, flush_type);
- i++) {
- /* do nothing */
- }
- high = i;
- }
- }
-
- /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
-
- if (high > fil_space_get_size(space)) {
- high = fil_space_get_size(space);
- }
-
- ulint count = 0;
-
- for (i = low; i < high; i++) {
-
- prio_rw_lock_t* hash_lock;
- ib_mutex_t* block_mutex;
-
- if ((count + n_flushed) >= n_to_flush) {
-
- /* We have already flushed enough pages and
- should call it a day. There is, however, one
- exception. If the page whose neighbors we
- are flushing has not been flushed yet then
- we'll try to flush the victim that we
- selected originally. */
- if (i <= offset) {
- i = offset;
- } else {
- break;
- }
- }
-
- buf_pool = buf_pool_get(space, i);
-
- /* We only want to flush pages from this buffer pool. */
- buf_page_t* bpage = buf_page_hash_get_s_locked(buf_pool,
- space, i, &hash_lock);
-
- if (bpage == NULL) {
-
- continue;
- }
-
- block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- rw_lock_s_unlock(hash_lock);
-
- ut_a(buf_page_in_file(bpage));
-
- /* We avoid flushing 'non-old' blocks in an LRU flush,
- because the flushed blocks are soon freed */
-
- if (flush_type != BUF_FLUSH_LRU
- || i == offset
- || buf_page_is_old(bpage)) {
-
- if (buf_flush_ready_for_flush(bpage, flush_type)
- && (i == offset || bpage->buf_fix_count == 0)
- && buf_flush_page(
- buf_pool, bpage, flush_type, false)) {
-
- ++count;
-
- continue;
- }
- }
-
- mutex_exit(block_mutex);
- }
-
- if (count > 0) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
- MONITOR_FLUSH_NEIGHBOR_COUNT,
- MONITOR_FLUSH_NEIGHBOR_PAGES,
- (count - 1));
- }
-
- return(count);
-}
-
-/********************************************************************//**
-Check if the block is modified and ready for flushing. If the the block
-is ready to flush then flush the page and try o flush its neighbors.
-
-@return TRUE if, depending on the flush type, either LRU or flush list
-mutex was released during this function. This does not guarantee that some
-pages were written as well.
-Number of pages written are incremented to the count. */
-static
-ibool
-buf_flush_page_and_try_neighbors(
-/*=============================*/
- buf_page_t* bpage, /*!< in: buffer control block,
- must be
- buf_page_in_file(bpage) */
- buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
- ulint n_to_flush, /*!< in: number of pages to
- flush */
- ulint* count) /*!< in/out: number of pages
- flushed */
-{
- ibool flushed;
- ib_mutex_t* block_mutex = NULL;
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-#endif /* UNIV_DEBUG */
-
- ut_ad((flush_type == BUF_FLUSH_LRU
- && mutex_own(&buf_pool->LRU_list_mutex))
- || (flush_type == BUF_FLUSH_LIST
- && buf_flush_list_mutex_own(buf_pool)));
-
- if (flush_type == BUF_FLUSH_LRU) {
- block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(block_mutex);
- }
-
- ut_a(buf_page_in_file(bpage)
- || (buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH
- ));
-
- if (buf_flush_ready_for_flush(bpage, flush_type)) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_bpage(bpage);
-
- if (flush_type == BUF_FLUSH_LRU) {
- mutex_exit(&buf_pool->LRU_list_mutex);
- }
-
- /* These fields are protected by the buf_page_get_mutex()
- mutex. */
- /* Read the fields directly in order to avoid asserting on
- BUF_BLOCK_REMOVE_HASH pages. */
- ulint space = bpage->space;
- ulint offset = bpage->offset;
-
- if (flush_type == BUF_FLUSH_LRU) {
- mutex_exit(block_mutex);
- } else {
- buf_flush_list_mutex_exit(buf_pool);
- }
-
- /* Try to flush also all the neighbors */
- *count += buf_flush_try_neighbors(
- space, offset, flush_type, *count, n_to_flush);
-
- if (flush_type == BUF_FLUSH_LRU) {
- mutex_enter(&buf_pool->LRU_list_mutex);
- } else {
- buf_flush_list_mutex_enter(buf_pool);
- }
- flushed = TRUE;
-
- } else if (flush_type == BUF_FLUSH_LRU) {
- mutex_exit(block_mutex);
- flushed = FALSE;
- } else {
- flushed = FALSE;
- }
-
- ut_ad((flush_type == BUF_FLUSH_LRU
- && mutex_own(&buf_pool->LRU_list_mutex))
- || (flush_type == BUF_FLUSH_LIST
- && buf_flush_list_mutex_own(buf_pool)));
-
- return(flushed);
-}
-
-/*******************************************************************//**
-This utility moves the uncompressed frames of pages to the free list.
-Note that this function does not actually flush any data to disk. It
-just detaches the uncompressed frames from the compressed pages at the
-tail of the unzip_LRU and puts those freed frames in the free list.
-Note that it is a best effort attempt and it is not guaranteed that
-after a call to this function there will be 'max' blocks in the free
-list.
-@return number of blocks moved to the free list. */
-static
-ulint
-buf_free_from_unzip_LRU_list_batch(
-/*===============================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint max) /*!< in: desired number of
- blocks in the free_list */
-{
- buf_block_t* block;
- ulint scanned = 0;
- ulint count = 0;
- ulint free_len = UT_LIST_GET_LEN(buf_pool->free);
- ulint lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
- while (block != NULL && count < max
- && free_len < srv_LRU_scan_depth
- && lru_len > UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page);
-
- ++scanned;
-
- mutex_enter(block_mutex);
-
- if (buf_LRU_free_page(&block->page, false)) {
-
- mutex_exit(block_mutex);
- /* Block was freed. LRU list mutex potentially
- released and reacquired */
- ++count;
- mutex_enter(&buf_pool->LRU_list_mutex);
- block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
-
- } else {
-
- mutex_exit(block_mutex);
- block = UT_LIST_GET_PREV(unzip_LRU, block);
- }
-
- free_len = UT_LIST_GET_LEN(buf_pool->free);
- lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
- }
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- if (scanned) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_SCANNED,
- MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
- MONITOR_LRU_BATCH_SCANNED_PER_CALL,
- scanned);
- }
-
- return(count);
-}
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list.
-The calling thread is not allowed to own any latches on pages!
-It attempts to make 'max' blocks available in the free list. Note that
-it is a best effort attempt and it is not guaranteed that after a call
-to this function there will be 'max' blocks in the free list.
-@return number of blocks for which the write request was queued. */
-MY_ATTRIBUTE((nonnull))
-static
-void
-buf_flush_LRU_list_batch(
-/*=====================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint max, /*!< in: desired number of
- blocks in the free_list */
- bool limited_scan, /*!< in: if true, allow to scan only up
- to srv_LRU_scan_depth pages in total */
- flush_counters_t* n) /*!< out: flushed/evicted page
- counts */
-{
- buf_page_t* bpage;
- ulint scanned = 0;
- ulint lru_position = 0;
- ulint max_lru_position;
- ulint max_scanned_pages;
- ulint free_len = UT_LIST_GET_LEN(buf_pool->free);
- ulint lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- n->flushed = 0;
- n->evicted = 0;
- n->unzip_LRU_evicted = 0;
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- max_scanned_pages = limited_scan ? srv_LRU_scan_depth : lru_len * max;
- max_lru_position = ut_min(srv_LRU_scan_depth, lru_len);
-
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- while (bpage != NULL
- && (srv_cleaner_eviction_factor ? n->evicted : n->flushed) < max
- && free_len < srv_LRU_scan_depth
- && lru_len > BUF_LRU_MIN_LEN
- && lru_position < max_lru_position
- && scanned < max_scanned_pages) {
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
- ibool evict;
- ulint failed_acquire;
-
- ++scanned;
- ++lru_position;
-
- failed_acquire = mutex_enter_nowait(block_mutex);
-
- evict = UNIV_LIKELY(!failed_acquire)
- && buf_flush_ready_for_replace(bpage);
-
- if (UNIV_LIKELY(!failed_acquire) && !evict) {
-
- mutex_exit(block_mutex);
- }
-
- /* If the block is ready to be replaced we try to
- free it i.e.: put it on the free list.
- Otherwise we try to flush the block and its
- neighbors. In this case we'll put it on the
- free list in the next pass. We do this extra work
- of putting blocks to the free list instead of
- just flushing them because after every flush
- we have to restart the scan from the tail of
- the LRU list and if we don't clear the tail
- of the flushed pages then the scan becomes
- O(n*n). */
- if (evict) {
-
- if (buf_LRU_free_page(bpage, true)) {
-
- mutex_exit(block_mutex);
- n->evicted++;
- lru_position = 0;
- mutex_enter(&buf_pool->LRU_list_mutex);
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- } else {
-
- bpage = UT_LIST_GET_PREV(LRU, bpage);
- mutex_exit(block_mutex);
- }
- } else if (UNIV_LIKELY(!failed_acquire)) {
-
- ulint space;
- ulint offset;
- buf_page_t* prev_bpage;
-
- prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
- /* Save the previous bpage */
-
- if (prev_bpage != NULL) {
- space = prev_bpage->space;
- offset = prev_bpage->offset;
- } else {
- space = ULINT_UNDEFINED;
- offset = ULINT_UNDEFINED;
- }
-
- if (buf_flush_page_and_try_neighbors(
- bpage,
- BUF_FLUSH_LRU, max, &n->flushed)) {
-
- /* LRU list mutex was released.
- reposition the iterator. Note: the
- prev block could have been repositioned
- too but that should be rare. */
-
- if (prev_bpage != NULL) {
-
- ut_ad(space != ULINT_UNDEFINED);
- ut_ad(offset != ULINT_UNDEFINED);
-
- prev_bpage = buf_page_hash_get(
- buf_pool, space, offset);
- }
- }
-
- bpage = prev_bpage;
- }
-
- free_len = UT_LIST_GET_LEN(buf_pool->free);
- lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
- }
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- /* We keep track of all flushes happening as part of LRU
- flush. When estimating the desired rate at which flush_list
- should be flushed, we factor in this value. */
- buf_pool->stat.buf_lru_flush_page_count += n->flushed;
-
- if (scanned) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_SCANNED,
- MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
- MONITOR_LRU_BATCH_SCANNED_PER_CALL,
- scanned);
- }
-}
-
-/*******************************************************************//**
-Flush and move pages from LRU or unzip_LRU list to the free list.
-Whether LRU or unzip_LRU is used depends on the state of the system.
-@return number of blocks for which either the write request was queued
-or in case of unzip_LRU the number of blocks actually moved to the
-free list */
-MY_ATTRIBUTE((nonnull))
-static
-void
-buf_do_LRU_batch(
-/*=============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint max, /*!< in: desired number of
- blocks in the free_list */
- bool limited_scan, /*!< in: if true, allow to scan only up
- to srv_LRU_scan_depth pages in total */
- flush_counters_t* n) /*!< out: flushed/evicted page
- counts */
-{
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- if (buf_LRU_evict_from_unzip_LRU(buf_pool)) {
- n->unzip_LRU_evicted
- = buf_free_from_unzip_LRU_list_batch(buf_pool, max);
- } else {
- n->unzip_LRU_evicted = 0;
- }
-
- if (max > n->unzip_LRU_evicted) {
- buf_flush_LRU_list_batch(buf_pool, max - n->unzip_LRU_evicted,
- limited_scan, n);
- } else {
- n->evicted = 0;
- n->flushed = 0;
- }
-
- n->evicted += n->unzip_LRU_evicted;
-}
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush_list.
-the calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued;
-ULINT_UNDEFINED if there was a flush of the same type already
-running */
-static
-ulint
-buf_do_flush_list_batch(
-/*====================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint min_n, /*!< in: wished minimum mumber
- of blocks flushed (it is not
- guaranteed that the actual
- number is that big, though) */
- lsn_t lsn_limit) /*!< all blocks whose
- oldest_modification is smaller
- than this should be flushed (if
- their number does not exceed
- min_n) */
-{
- ulint count = 0;
- ulint scanned = 0;
-
- /* Start from the end of the list looking for a suitable
- block to be flushed. */
- buf_flush_list_mutex_enter(buf_pool);
- ulint len = UT_LIST_GET_LEN(buf_pool->flush_list);
-
- /* In order not to degenerate this scan to O(n*n) we attempt
- to preserve pointer of previous block in the flush list. To do
- so we declare it a hazard pointer. Any thread working on the
- flush list must check the hazard pointer and if it is removing
- the same block then it must reset it. */
- for (buf_page_t* bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
- count < min_n && bpage != NULL && len > 0
- && bpage->oldest_modification < lsn_limit;
- ++scanned) {
-
- buf_page_t* prev;
-
- ut_a(bpage->oldest_modification > 0);
- ut_ad(bpage->in_flush_list);
-
- prev = UT_LIST_GET_PREV(list, bpage);
- buf_flush_set_hp(buf_pool, prev);
-
-#ifdef UNIV_DEBUG
- bool flushed =
-#endif /* UNIV_DEBUG */
- buf_flush_page_and_try_neighbors(
- bpage, BUF_FLUSH_LIST, min_n, &count);
-
- ut_ad(flushed || buf_flush_is_hp(buf_pool, prev));
-
- if (!buf_flush_is_hp(buf_pool, prev)) {
- /* The hazard pointer was reset by some other
- thread. Restart the scan. */
- ut_ad(buf_flush_is_hp(buf_pool, NULL));
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
- len = UT_LIST_GET_LEN(buf_pool->flush_list);
- } else {
- bpage = prev;
- --len;
- buf_flush_set_hp(buf_pool, NULL);
- }
-
- ut_ad(!bpage || bpage->in_flush_list);
- }
-
- buf_flush_list_mutex_exit(buf_pool);
-
- MONITOR_INC_VALUE_CUMULATIVE(MONITOR_FLUSH_BATCH_SCANNED,
- MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
- MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
- scanned);
-
- return(count);
-}
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued */
-MY_ATTRIBUTE((nonnull))
-void
-buf_flush_batch(
-/*============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
- then the caller must not own any
- latches on pages */
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST
- all blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- bool limited_lru_scan,/*!< in: for LRU flushes, if true,
- allow to scan only up to
- srv_LRU_scan_depth pages in total */
- flush_counters_t* n) /*!< out: flushed/evicted page
- counts */
-{
- ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((flush_type != BUF_FLUSH_LIST)
- || sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Note: The buffer pool mutexes are released and reacquired within
- the flush functions. */
- switch (flush_type) {
- case BUF_FLUSH_LRU:
- mutex_enter(&buf_pool->LRU_list_mutex);
- buf_do_LRU_batch(buf_pool, min_n, limited_lru_scan, n);
- mutex_exit(&buf_pool->LRU_list_mutex);
- break;
- case BUF_FLUSH_LIST:
- ut_ad(!limited_lru_scan);
- n->flushed = buf_do_flush_list_batch(buf_pool, min_n,
- lsn_limit);
- n->evicted = 0;
- break;
- default:
- ut_error;
- }
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && n->flushed > 0) {
- fprintf(stderr, flush_type == BUF_FLUSH_LRU
- ? "Flushed %lu pages in LRU flush\n"
- : "Flushed %lu pages in flush list flush\n",
- (ulong) n->flushed);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
-void
-buf_flush_common(
-/*=============*/
- buf_flush_t flush_type, /*!< in: type of flush */
- ulint page_count) /*!< in: number of pages flushed */
-{
- if (page_count) {
- buf_dblwr_flush_buffered_writes();
- }
-
- ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && page_count > 0) {
- fprintf(stderr, flush_type == BUF_FLUSH_LRU
- ? "Flushed %lu pages in LRU flush\n"
- : "Flushed %lu pages in flush list flush\n",
- (ulong) page_count);
- }
-#endif /* UNIV_DEBUG */
-
- srv_stats.buf_pool_flushed.add(page_count);
-}
-
-/******************************************************************//**
-Start a buffer flush batch for LRU or flush list */
-ibool
-buf_flush_start(
-/*============*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-{
- mutex_enter(&buf_pool->flush_state_mutex);
-
- if (buf_pool->n_flush[flush_type] > 0
- || buf_pool->init_flush[flush_type] == TRUE) {
-
- /* There is already a flush batch of the same type running */
-
-#ifdef UNIV_PAGECOMPRESS_DEBUG
- fprintf(stderr, "Error: flush_type %d n_flush %lu init_flush %lu\n",
- flush_type, buf_pool->n_flush[flush_type], buf_pool->init_flush[flush_type]);
-#endif
-
- mutex_exit(&buf_pool->flush_state_mutex);
-
- return(FALSE);
- }
-
- buf_pool->init_flush[flush_type] = TRUE;
-
- mutex_exit(&buf_pool->flush_state_mutex);
-
- return(TRUE);
-}
-
-/******************************************************************//**
-End a buffer flush batch for LRU or flush list */
-void
-buf_flush_end(
-/*==========*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_flush_t flush_type) /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-{
- mutex_enter(&buf_pool->flush_state_mutex);
-
- buf_pool->init_flush[flush_type] = FALSE;
-
- buf_pool->try_LRU_scan = TRUE;
-
- if (buf_pool->n_flush[flush_type] == 0) {
-
- /* The running flush batch has ended */
-
- os_event_set(buf_pool->no_flush[flush_type]);
- }
-
- mutex_exit(&buf_pool->flush_state_mutex);
-}
-
-/******************************************************************//**
-Waits until a flush batch of the given type ends */
-UNIV_INTERN
-void
-buf_flush_wait_batch_end(
-/*=====================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_flush_t type) /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-{
- ut_ad(type == BUF_FLUSH_LRU || type == BUF_FLUSH_LIST);
-
- if (buf_pool == NULL) {
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; ++i) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- thd_wait_begin(NULL, THD_WAIT_DISKIO);
- os_event_wait(buf_pool->no_flush[type]);
- thd_wait_end(NULL);
- }
- } else {
- thd_wait_begin(NULL, THD_WAIT_DISKIO);
- os_event_wait(buf_pool->no_flush[type]);
- thd_wait_end(NULL);
- }
-}
-
-/* JAN: TODO: */
-
-void buf_pool_enter_LRU_mutex(
- buf_pool_t* buf_pool)
-{
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- mutex_enter(&buf_pool->LRU_list_mutex);
-}
-
-void buf_pool_exit_LRU_mutex(
- buf_pool_t* buf_pool)
-{
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- mutex_exit(&buf_pool->LRU_list_mutex);
-}
-
-/* JAN: TODO: END: */
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list and also
-puts replaceable clean pages from the end of the LRU list to the free
-list.
-NOTE: The calling thread is not allowed to own any latches on pages!
-@return true if a batch was queued successfully. false if another batch
-of same type was already running. */
-MY_ATTRIBUTE((nonnull))
-static
-bool
-buf_flush_LRU(
-/*==========*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- bool limited_scan, /*!< in: if true, allow to scan
- only up to srv_LRU_scan_depth
- pages in total */
- flush_counters_t *n) /*!< out: flushed/evicted page
- counts */
-{
- if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
- n->flushed = 0;
- n->evicted = 0;
- n->unzip_LRU_evicted = 0;
- return(false);
- }
-
- buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0, limited_scan, n);
-
- buf_flush_end(buf_pool, BUF_FLUSH_LRU);
-
- buf_flush_common(BUF_FLUSH_LRU, n->flushed);
-
- return(true);
-}
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances.
-NOTE: The calling thread is not allowed to own any latches on pages!
-@return true if a batch was queued successfully for each buffer pool
-instance. false if another batch of same type was already running in
-at least one of the buffer pool instance */
-UNIV_INTERN
-bool
-buf_flush_list(
-/*===========*/
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- ulint* n_processed) /*!< out: the number of pages
- which were processed is passed
- back to caller. Ignored if NULL */
-
-{
- ulint i;
-
- ulint requested_pages[MAX_BUFFER_POOLS];
- bool active_instance[MAX_BUFFER_POOLS];
- ulint remaining_instances = srv_buf_pool_instances;
- bool timeout = false;
- ulint flush_start_time = 0;
-
- if (buf_mtflu_init_done()) {
- return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed));
- }
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- requested_pages[i] = 0;
- active_instance[i] = true;
- }
-
- if (n_processed) {
- *n_processed = 0;
- }
-
- if (min_n != ULINT_MAX) {
- /* Ensure that flushing is spread evenly amongst the
- buffer pool instances. When min_n is ULINT_MAX
- we need to flush everything up to the lsn limit
- so no limit here. */
- min_n = (min_n + srv_buf_pool_instances - 1)
- / srv_buf_pool_instances;
- if (lsn_limit != LSN_MAX) {
- flush_start_time = ut_time_ms();
- }
- }
-
- /* Flush to lsn_limit in all buffer pool instances */
- while (remaining_instances && !timeout) {
-
- ulint flush_common_batch = 0;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
-
- if (flush_start_time
- && (ut_time_ms() - flush_start_time
- >= srv_cleaner_max_flush_time)) {
-
- timeout = true;
- break;
- }
-
- if (active_instance[i]) {
-
- buf_pool_t* buf_pool;
- ulint chunk_size;
- flush_counters_t n;
-
- chunk_size = ut_min(
- srv_cleaner_flush_chunk_size,
- min_n - requested_pages[i]);
-
- buf_pool = buf_pool_from_array(i);
-
- if (!buf_flush_start(buf_pool,
- BUF_FLUSH_LIST)) {
-
- continue;
- }
-
- buf_flush_batch(buf_pool, BUF_FLUSH_LIST,
- chunk_size, lsn_limit, false,
- &n);
-
- buf_flush_end(buf_pool, BUF_FLUSH_LIST);
-
- flush_common_batch += n.flushed;
-
- if (n_processed) {
- *n_processed += n.flushed;
- }
-
- requested_pages[i] += chunk_size;
-
- if (requested_pages[i] >= min_n
- || !n.flushed) {
-
- active_instance[i] = false;
- remaining_instances--;
- }
-
- if (n.flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_BATCH_TOTAL_PAGE,
- MONITOR_FLUSH_BATCH_COUNT,
- MONITOR_FLUSH_BATCH_PAGES,
- n.flushed);
- }
- }
- }
-
- buf_flush_common(BUF_FLUSH_LIST, flush_common_batch);
- }
-
- /* If we haven't flushed all the instances due to timeout or a repeat
- failure to start a flush, return failure */
- for (i = 0; i < srv_buf_pool_instances; i++) {
- if (active_instance[i]) {
- return(false);
- }
- }
-
- return(true);
-}
-
-/******************************************************************//**
-This function picks up a single dirty page from the tail of the LRU
-list, flushes it, removes it from page_hash and LRU list and puts
-it on the free list. It is called from user threads when they are
-unable to find a replaceable page at the tail of the LRU list i.e.:
-when the background LRU flushing in the page_cleaner thread is not
-fast enough to keep pace with the workload.
-@return TRUE if success. */
-UNIV_INTERN
-ibool
-buf_flush_single_page_from_LRU(
-/*===========================*/
- buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
-{
- ulint scanned;
- buf_page_t* bpage;
- ibool flushed = FALSE;
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), scanned = 1;
- bpage != NULL;
- bpage = UT_LIST_GET_PREV(LRU, bpage), ++scanned) {
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
-
- /* The following call will release the LRU list
- and block mutex. */
-
- flushed = buf_flush_page(buf_pool, bpage,
- BUF_FLUSH_SINGLE_PAGE, true);
-
- if (flushed) {
- /* buf_flush_page() will release the
- block mutex */
- break;
- }
- }
-
- mutex_exit(block_mutex);
- }
-
- if (!flushed)
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_SINGLE_FLUSH_SCANNED,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL,
- scanned);
-
- if (bpage == NULL) {
- /* Can't find a single flushable page. */
- return(FALSE);
- }
-
-
- ibool freed = FALSE;
-
- /* At this point the page has been written to the disk.
- As we are not holding LRU list or buf_page_get_mutex() mutex therefore
- we cannot use the bpage safely. It may have been plucked out
- of the LRU list by some other thread or it may even have
- relocated in case of a compressed page. We need to start
- the scan of LRU list again to remove the block from the LRU
- list and put it on the free list. */
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- bpage != NULL;
- bpage = UT_LIST_GET_PREV(LRU, bpage)) {
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- ibool ready = buf_flush_ready_for_replace(bpage);
-
- if (ready) {
- bool evict_zip;
-
- evict_zip = !buf_LRU_evict_from_unzip_LRU(buf_pool);
-
- freed = buf_LRU_free_page(bpage, evict_zip);
-
- mutex_exit(block_mutex);
-
- break;
- }
-
- mutex_exit(block_mutex);
-
- }
-
- if (!freed)
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- return(freed);
-}
-
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return number of flushed and evicted pages */
-UNIV_INTERN
-ulint
-buf_flush_LRU_tail(void)
-/*====================*/
-{
- ulint total_flushed = 0;
- ulint total_evicted = 0;
- ulint start_time = ut_time_ms();
- ulint scan_depth[MAX_BUFFER_POOLS];
- ulint requested_pages[MAX_BUFFER_POOLS];
- bool active_instance[MAX_BUFFER_POOLS];
- bool limited_scan[MAX_BUFFER_POOLS];
- ulint previous_evicted[MAX_BUFFER_POOLS];
- ulint remaining_instances = srv_buf_pool_instances;
- ulint lru_chunk_size = srv_cleaner_lru_chunk_size;
- ulint free_list_lwm = srv_LRU_scan_depth / 100
- * srv_cleaner_free_list_lwm;
-
- if(buf_mtflu_init_done())
- {
- return(buf_mtflu_flush_LRU_tail());
- }
-
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
-
- const buf_pool_t* buf_pool = buf_pool_from_array(i);
-
- scan_depth[i] = ut_min(srv_LRU_scan_depth,
- UT_LIST_GET_LEN(buf_pool->LRU));
- requested_pages[i] = 0;
- active_instance[i] = true;
- limited_scan[i] = true;
- previous_evicted[i] = 0;
- }
-
- while (remaining_instances) {
-
- if (ut_time_ms() - start_time >= srv_cleaner_max_lru_time) {
-
- break;
- }
-
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
-
- if (!active_instance[i]) {
- continue;
- }
-
- ulint free_len = free_list_lwm;
- buf_pool_t* buf_pool = buf_pool_from_array(i);
-
- do {
- flush_counters_t n;
-
- ut_ad(requested_pages[i] <= scan_depth[i]);
-
- /* Currently page_cleaner is the only thread
- that can trigger an LRU flush. It is possible
- that a batch triggered during last iteration is
- still running, */
- if (buf_flush_LRU(buf_pool, lru_chunk_size,
- limited_scan[i], &n)) {
-
- /* Allowed only one batch per
- buffer pool instance. */
- buf_flush_wait_batch_end(
- buf_pool, BUF_FLUSH_LRU);
- }
-
- total_flushed += n.flushed;
-
- /* When we evict less pages than we did on a
- previous try we relax the LRU scan limit in
- order to attempt to evict more */
- limited_scan[i]
- = (previous_evicted[i] > n.evicted);
- previous_evicted[i] = n.evicted;
- total_evicted += n.evicted;
-
- requested_pages[i] += lru_chunk_size;
-
- /* If we failed to flush or evict this
- instance, do not bother anymore. But take into
- account that we might have zero flushed pages
- because the flushing request was fully
- satisfied by unzip_LRU evictions. */
- if (requested_pages[i] >= scan_depth[i]
- || !(srv_cleaner_eviction_factor
- ? n.evicted
- : (n.flushed + n.unzip_LRU_evicted))) {
-
- active_instance[i] = false;
- remaining_instances--;
- } else {
-
- free_len = UT_LIST_GET_LEN(
- buf_pool->free);
- }
- if (n.flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
- MONITOR_LRU_BATCH_FLUSH_COUNT,
- MONITOR_LRU_BATCH_FLUSH_PAGES,
- n.flushed);
- }
-
- if (n.evicted) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_BATCH_EVICT_COUNT,
- MONITOR_LRU_BATCH_EVICT_PAGES,
- n.evicted);
- }
- } while (active_instance[i]
- && free_len <= free_list_lwm);
- }
- }
-
- return(total_flushed + total_evicted);
-}
-
-/*********************************************************************//**
-Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INTERN
-void
-buf_flush_wait_LRU_batch_end(void)
-/*==============================*/
-{
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- mutex_enter(&buf_pool->flush_state_mutex);
-
- if (buf_pool->n_flush[BUF_FLUSH_LRU] > 0
- || buf_pool->init_flush[BUF_FLUSH_LRU]) {
-
- mutex_exit(&buf_pool->flush_state_mutex);
- buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
- } else {
- mutex_exit(&buf_pool->flush_state_mutex);
- }
- }
-}
-
-/*********************************************************************//**
-Flush a batch of dirty pages from the flush list
-@return number of pages flushed, 0 if no page is flushed or if another
-flush_list type batch is running */
-static
-ulint
-page_cleaner_do_flush_batch(
-/*========================*/
- ulint n_to_flush, /*!< in: number of pages that
- we should attempt to flush. */
- lsn_t lsn_limit) /*!< in: LSN up to which flushing
- must happen */
-{
- ulint n_flushed;
-
- buf_flush_list(n_to_flush, lsn_limit, &n_flushed);
-
- return(n_flushed);
-}
-
-/*********************************************************************//**
-Calculates if flushing is required based on number of dirty pages in
-the buffer pool.
-@return percent of io_capacity to flush to manage dirty page ratio */
-static
-ulint
-af_get_pct_for_dirty()
-/*==================*/
-{
- ulint dirty_pct = (ulint) buf_get_modified_ratio_pct();
-
- if (dirty_pct > 0 && srv_max_buf_pool_modified_pct == 0) {
- return(100);
- }
-
- ut_a(srv_max_dirty_pages_pct_lwm
- <= srv_max_buf_pool_modified_pct);
-
- if (srv_max_dirty_pages_pct_lwm == 0) {
- /* The user has not set the option to preflush dirty
- pages as we approach the high water mark. */
- if (dirty_pct > srv_max_buf_pool_modified_pct) {
- /* We have crossed the high water mark of dirty
- pages In this case we start flushing at 100% of
- innodb_io_capacity. */
- return(100);
- }
- } else if (dirty_pct > srv_max_dirty_pages_pct_lwm) {
- /* We should start flushing pages gradually. */
- return (ulint) ((dirty_pct * 100)
- / (srv_max_buf_pool_modified_pct + 1));
- }
-
- return(0);
-}
-
-/*********************************************************************//**
-Calculates if flushing is required based on redo generation rate.
-@return percent of io_capacity to flush to manage redo space */
-static
-ulint
-af_get_pct_for_lsn(
-/*===============*/
- lsn_t age) /*!< in: current age of LSN. */
-{
- lsn_t max_async_age;
- lsn_t lsn_age_factor;
- lsn_t af_lwm = (lsn_t) ((srv_adaptive_flushing_lwm
- * log_get_capacity()) / 100);
-
- if (age < af_lwm) {
- /* No adaptive flushing. */
- return(0);
- }
-
- max_async_age = log_get_max_modified_age_async();
-
- if (age < max_async_age && !srv_adaptive_flushing) {
- /* We have still not reached the max_async point and
- the user has disabled adaptive flushing. */
- return(0);
- }
-
- /* If we are here then we know that either:
- 1) User has enabled adaptive flushing
- 2) User may have disabled adaptive flushing but we have reached
- max_async_age. */
- lsn_age_factor = (age * 100) / max_async_age;
-
- ut_ad(srv_max_io_capacity >= srv_io_capacity);
- switch ((srv_cleaner_lsn_age_factor_t)srv_cleaner_lsn_age_factor) {
- case SRV_CLEANER_LSN_AGE_FACTOR_LEGACY:
- return(static_cast<ulint>(
- ((srv_max_io_capacity / srv_io_capacity)
- * (lsn_age_factor
- * sqrt((double)lsn_age_factor)))
- / 7.5));
- case SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT:
- return(static_cast<ulint>(
- ((srv_max_io_capacity / srv_io_capacity)
- * (lsn_age_factor * lsn_age_factor
- * sqrt((double)lsn_age_factor)))
- / 700.5));
- default:
- ut_error;
- }
-}
-
-/*********************************************************************//**
-This function is called approximately once every second by the
-page_cleaner thread. Based on various factors it decides if there is a
-need to do flushing. If flushing is needed it is performed and the
-number of pages flushed is returned.
-@return number of pages flushed */
-static
-ulint
-page_cleaner_flush_pages_if_needed(void)
-/*====================================*/
-{
- static lsn_t lsn_avg_rate = 0;
- static lsn_t prev_lsn = 0;
- static lsn_t last_lsn = 0;
- static ulint sum_pages = 0;
- static ulint last_pages = 0;
- static ulint prev_pages = 0;
- static ulint avg_page_rate = 0;
- static ulint n_iterations = 0;
- lsn_t oldest_lsn;
- lsn_t cur_lsn;
- lsn_t age;
- lsn_t lsn_rate;
- ulint n_pages = 0;
- ulint pct_for_dirty = 0;
- ulint pct_for_lsn = 0;
- ulint pct_total = 0;
- int age_factor = 0;
-
- cur_lsn = log_get_lsn_nowait();
-
- /* log_get_lsn_nowait tries to get log_sys->mutex with
- mutex_enter_nowait, if this does not succeed function
- returns 0, do not use that value to update stats. */
- if (cur_lsn == 0) {
- return(0);
- }
-
- if (prev_lsn == 0) {
- /* First time around. */
- prev_lsn = cur_lsn;
- return(0);
- }
-
- if (prev_lsn == cur_lsn) {
- return(0);
- }
-
- /* We update our variables every srv_flushing_avg_loops
- iterations to smooth out transition in workload. */
- if (++n_iterations >= srv_flushing_avg_loops) {
-
- avg_page_rate = ((sum_pages / srv_flushing_avg_loops)
- + avg_page_rate) / 2;
-
- /* How much LSN we have generated since last call. */
- lsn_rate = (cur_lsn - prev_lsn) / srv_flushing_avg_loops;
-
- lsn_avg_rate = (lsn_avg_rate + lsn_rate) / 2;
-
- prev_lsn = cur_lsn;
-
- n_iterations = 0;
-
- sum_pages = 0;
- }
-
- oldest_lsn = buf_pool_get_oldest_modification();
-
- ut_ad(oldest_lsn <= log_get_lsn());
-
- age = cur_lsn > oldest_lsn ? cur_lsn - oldest_lsn : 0;
-
- pct_for_dirty = af_get_pct_for_dirty();
- pct_for_lsn = af_get_pct_for_lsn(age);
-
- pct_total = ut_max(pct_for_dirty, pct_for_lsn);
-
- /* Cap the maximum IO capacity that we are going to use by
- max_io_capacity. */
- n_pages = PCT_IO(pct_total);
- if (age < log_get_max_modified_age_async())
- n_pages = (n_pages + avg_page_rate) / 2;
-
- if (n_pages > srv_max_io_capacity) {
- n_pages = srv_max_io_capacity;
- }
-
- if (last_pages && cur_lsn - last_lsn > lsn_avg_rate / 2) {
- age_factor = static_cast<int>(prev_pages / last_pages);
- }
-
- MONITOR_SET(MONITOR_FLUSH_N_TO_FLUSH_REQUESTED, n_pages);
-
- prev_pages = n_pages;
- n_pages = page_cleaner_do_flush_batch(
- n_pages, oldest_lsn + lsn_avg_rate * (age_factor + 1));
-
- last_lsn= cur_lsn;
- last_pages= n_pages + 1;
-
- MONITOR_SET(MONITOR_FLUSH_AVG_PAGE_RATE, avg_page_rate);
- MONITOR_SET(MONITOR_FLUSH_LSN_AVG_RATE, lsn_avg_rate);
- MONITOR_SET(MONITOR_FLUSH_PCT_FOR_DIRTY, pct_for_dirty);
- MONITOR_SET(MONITOR_FLUSH_PCT_FOR_LSN, pct_for_lsn);
-
- if (n_pages) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
- MONITOR_FLUSH_ADAPTIVE_COUNT,
- MONITOR_FLUSH_ADAPTIVE_PAGES,
- n_pages);
-
- sum_pages += n_pages;
- }
-
- return(n_pages);
-}
-
-/*********************************************************************//**
-Puts the page_cleaner thread to sleep if it has finished work in less
-than a second */
-static
-void
-page_cleaner_sleep_if_needed(
-/*=========================*/
- ulint next_loop_time) /*!< in: time when next loop iteration
- should start */
-{
- /* No sleep if we are cleaning the buffer pool during the shutdown
- with everything else finished */
- if (srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE)
- return;
-
- ulint cur_time = ut_time_ms();
-
- if (next_loop_time > cur_time) {
- /* Get sleep interval in micro seconds. We use
- ut_min() to avoid long sleep in case of
- wrap around. */
- os_thread_sleep(ut_min(1000000,
- (next_loop_time - cur_time)
- * 1000));
- }
-}
-
-/*********************************************************************//**
-Returns the aggregate free list length over all buffer pool instances.
-@return total free list length. */
-MY_ATTRIBUTE((warn_unused_result))
-static
-ulint
-buf_get_total_free_list_length(void)
-/*================================*/
-{
- ulint result = 0;
-
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
-
- result += UT_LIST_GET_LEN(buf_pool_from_array(i)->free);
- }
-
- return result;
-}
-
-/** Returns the aggregate LRU list length over all buffer pool instances.
-@return total LRU list length. */
-MY_ATTRIBUTE((warn_unused_result))
-static
-ulint
-buf_get_total_LRU_list_length(void)
-{
- ulint result = 0;
-
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
-
- result += UT_LIST_GET_LEN(buf_pool_from_array(i)->LRU);
- }
-
- return result;
-}
-
-/*********************************************************************//**
-Adjust the desired page cleaner thread sleep time for LRU flushes. */
-MY_ATTRIBUTE((nonnull))
-static
-void
-page_cleaner_adapt_lru_sleep_time(
-/*==============================*/
- ulint* lru_sleep_time, /*!< in/out: desired page cleaner thread sleep
- time for LRU flushes */
- ulint lru_n_flushed) /*!< in: number of flushed in previous batch */
-
-{
- ulint free_len = buf_get_total_free_list_length();
- ulint max_free_len = ut_min(buf_get_total_LRU_list_length(),
- srv_LRU_scan_depth * srv_buf_pool_instances);
-
- if (free_len < max_free_len / 100 && lru_n_flushed) {
-
- /* Free lists filled less than 1%
- and iteration was able to flush, no sleep */
- *lru_sleep_time = 0;
- } else if (free_len > max_free_len / 5
- || (free_len < max_free_len / 100 && lru_n_flushed == 0)) {
-
- /* Free lists filled more than 20%
- or no pages flushed in previous batch, sleep a bit more */
- *lru_sleep_time += 1;
- if (*lru_sleep_time > srv_cleaner_max_lru_time)
- *lru_sleep_time = srv_cleaner_max_lru_time;
- } else if (free_len < max_free_len / 20 && *lru_sleep_time >= 50) {
-
- /* Free lists filled less than 5%, sleep a bit less */
- *lru_sleep_time -= 50;
- } else {
-
- /* Free lists filled between 5% and 20%, no change */
- }
-}
-
-/*********************************************************************//**
-Get the desired page cleaner thread sleep time for flush list flushes.
-@return desired sleep time */
-MY_ATTRIBUTE((warn_unused_result))
-static
-ulint
-page_cleaner_adapt_flush_sleep_time(void)
-/*=====================================*/
-{
- lsn_t age = log_get_lsn() - log_sys->last_checkpoint_lsn;
-
- if (age > log_sys->max_modified_age_sync) {
-
- /* No sleep if in sync preflush zone */
- return(0);
- }
-
- /* In all other cases flush list factors do not influence the page
- cleaner sleep time */
- return(srv_cleaner_max_flush_time);
-}
-
-/******************************************************************//**
-page_cleaner thread tasked with flushing dirty pages from the buffer
-pool flush lists. As of now we'll have only one instance of this thread.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(buf_flush_page_cleaner_thread)(
-/*==========================================*/
- void* arg MY_ATTRIBUTE((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- my_thread_init();
- ulint next_loop_time = ut_time_ms() + 1000;
- ulint n_flushed = 0;
- ulint last_activity = srv_get_activity_count();
- ulint last_activity_time = ut_time_ms();
-
- ut_ad(!srv_read_only_mode);
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(buf_page_cleaner_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
- srv_cleaner_tid = os_thread_get_tid();
-
- os_thread_set_priority(srv_cleaner_tid, srv_sched_priority_cleaner);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "InnoDB: page_cleaner thread running, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
-
- ulint page_cleaner_sleep_time;
- ibool server_active;
-
- srv_current_thread_priority = srv_cleaner_thread_priority;
-
- page_cleaner_sleep_if_needed(next_loop_time);
-
- page_cleaner_sleep_time
- = page_cleaner_adapt_flush_sleep_time();
-
- next_loop_time = ut_time_ms() + page_cleaner_sleep_time;
-
- server_active = srv_check_activity(last_activity);
-
- if (server_active
- || ut_time_ms() - last_activity_time < 1000) {
-
- if (server_active) {
-
- last_activity = srv_get_activity_count();
- last_activity_time = ut_time_ms();
- }
-
- /* Flush pages from flush_list if required */
- page_cleaner_flush_pages_if_needed();
- } else if (srv_idle_flush_pct) {
- n_flushed = page_cleaner_do_flush_batch(
- PCT_IO(100),
- LSN_MAX);
-
- if (n_flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
- MONITOR_FLUSH_BACKGROUND_COUNT,
- MONITOR_FLUSH_BACKGROUND_PAGES,
- n_flushed);
- }
- }
-
- /* Flush pages from end of LRU if required */
- buf_flush_LRU_tail();
- }
-
- ut_ad(srv_shutdown_state > 0);
- if (srv_fast_shutdown == 2) {
- /* In very fast shutdown we simulate a crash of
- buffer pool. We are not required to do any flushing */
- goto thread_exit;
- }
-
- /* In case of normal and slow shutdown the page_cleaner thread
- must wait for all other activity in the server to die down.
- Note that we can start flushing the buffer pool as soon as the
- server enters shutdown phase but we must stay alive long enough
- to ensure that any work done by the master or purge threads is
- also flushed.
- During shutdown we pass through two stages. In the first stage,
- when SRV_SHUTDOWN_CLEANUP is set other threads like the master
- and the purge threads may be working as well. We start flushing
- the buffer pool but can't be sure that no new pages are being
- dirtied until we enter SRV_SHUTDOWN_FLUSH_PHASE phase. */
-
- do {
- n_flushed = page_cleaner_do_flush_batch(PCT_IO(100), LSN_MAX);
-
- /* We sleep only if there are no pages to flush */
- if (n_flushed == 0) {
- os_thread_sleep(100000);
- }
- } while (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
-
- /* At this point all threads including the master and the purge
- thread must have been suspended. */
- ut_a(srv_get_active_thread_type() == SRV_NONE);
- ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE);
-
- /* We can now make a final sweep on flushing the buffer pool
- and exit after we have cleaned the whole buffer pool.
- It is important that we wait for any running batch that has
- been triggered by us to finish. Otherwise we can end up
- considering end of that batch as a finish of our final
- sweep and we'll come out of the loop leaving behind dirty pages
- in the flush_list */
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
- buf_flush_wait_LRU_batch_end();
-
- bool success;
-
- do {
-
- success = buf_flush_list(PCT_IO(100), LSN_MAX, &n_flushed);
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- } while (!success || n_flushed > 0 || (IS_XTRABACKUP() && buf_get_n_pending_read_ios() > 0));
-
- /* Some sanity checks */
- ut_a(srv_get_active_thread_type() == SRV_NONE);
- ut_a(srv_shutdown_state == SRV_SHUTDOWN_FLUSH_PHASE);
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool = buf_pool_from_array(i);
- ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == 0);
- }
-
- /* We have lived our life. Time to die. */
-
-thread_exit:
- buf_page_cleaner_is_active = false;
-
- my_thread_end();
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/******************************************************************//**
-lru_manager thread tasked with performing LRU flushes and evictions to refill
-the buffer pool free lists. As of now we'll have only one instance of this
-thread.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(buf_flush_lru_manager_thread)(
-/*==========================================*/
- void* arg MY_ATTRIBUTE((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- ulint next_loop_time = ut_time_ms() + 1000;
- ulint lru_sleep_time = srv_cleaner_max_lru_time;
- ulint lru_n_flushed = 1;
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(buf_lru_manager_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
- srv_lru_manager_tid = os_thread_get_tid();
-
- os_thread_set_priority(srv_lru_manager_tid,
- srv_sched_priority_cleaner);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "InnoDB: lru_manager thread running, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- /* On server shutdown, the LRU manager thread runs through cleanup
- phase to provide free pages for the master and purge threads. */
- while (srv_shutdown_state == SRV_SHUTDOWN_NONE
- || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP) {
-
- srv_current_thread_priority = srv_cleaner_thread_priority;
-
- page_cleaner_sleep_if_needed(next_loop_time);
-
- page_cleaner_adapt_lru_sleep_time(&lru_sleep_time, lru_n_flushed);
-
- next_loop_time = ut_time_ms() + lru_sleep_time;
-
- lru_n_flushed = buf_flush_LRU_tail();
- }
-
- buf_lru_manager_is_active = false;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-
-/** Functor to validate the flush list. */
-struct Check {
- void operator()(const buf_page_t* elem)
- {
- ut_a(elem->in_flush_list);
- }
-};
-
-/******************************************************************//**
-Validates the flush list.
-@return TRUE if ok */
-static
-ibool
-buf_flush_validate_low(
-/*===================*/
- buf_pool_t* buf_pool) /*!< in: Buffer pool instance */
-{
- buf_page_t* bpage;
- const ib_rbt_node_t* rnode = NULL;
-
- ut_ad(buf_flush_list_mutex_own(buf_pool));
-
- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list, Check());
-
- bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- /* If we are in recovery mode i.e.: flush_rbt != NULL
- then each block in the flush_list must also be present
- in the flush_rbt. */
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
- rnode = rbt_first(buf_pool->flush_rbt);
- }
-
- while (bpage != NULL) {
- const lsn_t om = bpage->oldest_modification;
-
- ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
-
- ut_ad(bpage->in_flush_list);
-
- /* A page in buf_pool->flush_list can be in
- BUF_BLOCK_REMOVE_HASH state. This happens when a page
- is in the middle of being relocated. In that case the
- original descriptor can have this state and still be
- in the flush list waiting to acquire the
- buf_pool->flush_list_mutex to complete the relocation. */
- ut_a(buf_page_in_file(bpage)
- || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
- ut_a(om > 0);
-
- if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
- buf_page_t** prpage;
-
- ut_a(rnode);
- prpage = rbt_value(buf_page_t*, rnode);
-
- ut_a(*prpage);
- ut_a(*prpage == bpage);
- rnode = rbt_next(buf_pool->flush_rbt, rnode);
- }
-
- bpage = UT_LIST_GET_NEXT(list, bpage);
-
- ut_a(!bpage || om >= bpage->oldest_modification);
- }
-
- /* By this time we must have exhausted the traversal of
- flush_rbt (if active) as well. */
- ut_a(rnode == NULL);
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Validates the flush list.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-buf_flush_validate(
-/*===============*/
- buf_pool_t* buf_pool) /*!< buffer pool instance */
-{
- ibool ret;
-
- buf_flush_list_mutex_enter(buf_pool);
-
- ret = buf_flush_validate_low(buf_pool);
-
- buf_flush_list_mutex_exit(buf_pool);
-
- return(ret);
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Check if there are any dirty pages that belong to a space id in the flush
-list in a particular buffer pool.
-@return number of dirty pages present in a single buffer pool */
-UNIV_INTERN
-ulint
-buf_pool_get_dirty_pages_count(
-/*===========================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool */
- ulint id) /*!< in: space id to check */
-
-{
- ulint count = 0;
-
- buf_flush_list_mutex_enter(buf_pool);
-
- buf_page_t* bpage;
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
- bpage != 0;
- bpage = UT_LIST_GET_NEXT(list, bpage)) {
-
- ut_ad(buf_page_in_file(bpage)
- || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH);
- ut_ad(bpage->in_flush_list);
- ut_ad(bpage->oldest_modification > 0);
-
- if (bpage->space == id) {
- ++count;
- }
- }
-
- buf_flush_list_mutex_exit(buf_pool);
-
- return(count);
-}
-
-/******************************************************************//**
-Check if there are any dirty pages that belong to a space id in the flush list.
-@return number of dirty pages present in all the buffer pools */
-UNIV_INTERN
-ulint
-buf_flush_get_dirty_pages_count(
-/*============================*/
- ulint id) /*!< in: space id to check */
-
-{
- ulint count = 0;
-
- for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- count += buf_pool_get_dirty_pages_count(buf_pool, id);
- }
-
- return(count);
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc
deleted file mode 100644
index d979eb44a96..00000000000
--- a/storage/xtradb/buf/buf0lru.cc
+++ /dev/null
@@ -1,3016 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0lru.cc
-The database buffer replacement algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0lru.h"
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_NONINL
-#include "buf0lru.ic"
-#endif
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "ut0rnd.h"
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "hash0hash.h"
-#include "os0sync.h"
-#include "fil0fil.h"
-#include "btr0btr.h"
-#include "buf0buddy.h"
-#include "buf0buf.h"
-#include "buf0dblwr.h"
-#include "buf0flu.h"
-#include "buf0rea.h"
-#include "btr0sea.h"
-#include "ibuf0ibuf.h"
-#include "os0file.h"
-#include "page0zip.h"
-#include "log0recv.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "srv0mon.h"
-#include "lock0lock.h"
-
-#include "ha_prototypes.h"
-
-/** The number of blocks from the LRU_old pointer onward, including
-the block pointed to, must be buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
-of the whole LRU list length, except that the tolerance defined below
-is allowed. Note that the tolerance must be small enough such that for
-even the BUF_LRU_OLD_MIN_LEN long LRU list, the LRU_old pointer is not
-allowed to point to either end of the LRU list. */
-
-#define BUF_LRU_OLD_TOLERANCE 20
-
-/** The minimum amount of non-old blocks when the LRU_old list exists
-(that is, when there are more than BUF_LRU_OLD_MIN_LEN blocks).
-@see buf_LRU_old_adjust_len */
-#define BUF_LRU_NON_OLD_MIN_LEN 5
-#if BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN
-# error "BUF_LRU_NON_OLD_MIN_LEN >= BUF_LRU_OLD_MIN_LEN"
-#endif
-
-/** When dropping the search hash index entries before deleting an ibd
-file, we build a local array of pages belonging to that tablespace
-in the buffer pool. Following is the size of that array.
-We also release buf_pool->LRU_list_mutex after scanning this many pages of the
-flush_list when dropping a table. This is to ensure that other threads
-are not blocked for extended period of time when using very large
-buffer pools. */
-#define BUF_LRU_DROP_SEARCH_SIZE 1024
-
-/** If we switch on the InnoDB monitor because there are too few available
-frames in the buffer pool, we set this to TRUE */
-static ibool buf_lru_switched_on_innodb_mon = FALSE;
-
-/******************************************************************//**
-These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
-and page_zip_decompress() operations. Based on the statistics,
-buf_LRU_evict_from_unzip_LRU() decides if we want to evict from
-unzip_LRU or the regular LRU. From unzip_LRU, we will only evict the
-uncompressed frame (meaning we can evict dirty blocks as well). From
-the regular LRU, we will evict the entire block (i.e.: both the
-uncompressed and compressed data), which must be clean. */
-
-/* @{ */
-
-/** Number of intervals for which we keep the history of these stats.
-Each interval is 1 second, defined by the rate at which
-srv_error_monitor_thread() calls buf_LRU_stat_update(). */
-#define BUF_LRU_STAT_N_INTERVAL 50
-
-/** Co-efficient with which we multiply I/O operations to equate them
-with page_zip_decompress() operations. */
-#define BUF_LRU_IO_TO_UNZIP_FACTOR 50
-
-/** Sampled values buf_LRU_stat_cur.
-Not protected by any mutex. Updated by buf_LRU_stat_update(). */
-static buf_LRU_stat_t buf_LRU_stat_arr[BUF_LRU_STAT_N_INTERVAL];
-
-/** Cursor to buf_LRU_stat_arr[] that is updated in a round-robin fashion. */
-static ulint buf_LRU_stat_arr_ind;
-
-/** Current operation counters. Not protected by any mutex. Cleared
-by buf_LRU_stat_update(). */
-UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_cur;
-
-/** Running sum of past values of buf_LRU_stat_cur.
-Updated by buf_LRU_stat_update(). Not Protected by any mutex. */
-UNIV_INTERN buf_LRU_stat_t buf_LRU_stat_sum;
-
-/* @} */
-
-/** @name Heuristics for detecting index scan @{ */
-/** Move blocks to "new" LRU list only if the first access was at
-least this many milliseconds ago. Not protected by any mutex or latch. */
-UNIV_INTERN uint buf_LRU_old_threshold_ms;
-/* @} */
-
-/******************************************************************//**
-Takes a block out of the LRU list and page hash table.
-If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
-the object will be freed.
-
-The caller must hold buf_pool->LRU_list_mutex, the buf_page_get_mutex() mutex
-and the appropriate hash_lock. This function will release the
-buf_page_get_mutex() and the hash_lock.
-
-If a compressed page is freed other compressed pages may be relocated.
-@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
-caller needs to free the page to the free list
-@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
-this case the block is already returned to the buddy allocator. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-buf_LRU_block_remove_hashed(
-/*========================*/
- buf_page_t* bpage, /*!< in: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
- bool zip); /*!< in: true if should remove also the
- compressed page of an uncompressed page */
-/******************************************************************//**
-Puts a file page whose has no hash index to the free list. */
-static
-void
-buf_LRU_block_free_hashed_page(
-/*===========================*/
- buf_block_t* block); /*!< in: block, must contain a file page and
- be in a state where it can be freed */
-
-/******************************************************************//**
-Increases LRU size in bytes with zip_size for compressed page,
-UNIV_PAGE_SIZE for uncompressed page in inline function */
-static inline
-void
-incr_LRU_size_in_bytes(
-/*===================*/
- buf_page_t* bpage, /*!< in: control block */
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ulint zip_size = page_zip_get_size(&bpage->zip);
- buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
- ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
-}
-
-/******************************************************************//**
-Determines if the unzip_LRU list should be used for evicting a victim
-instead of the general LRU list.
-@return TRUE if should use unzip_LRU */
-UNIV_INTERN
-ibool
-buf_LRU_evict_from_unzip_LRU(
-/*=========================*/
- buf_pool_t* buf_pool)
-{
- ulint io_avg;
- ulint unzip_avg;
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- /* If the unzip_LRU list is empty, we can only use the LRU. */
- if (UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0) {
- return(FALSE);
- }
-
- /* If unzip_LRU is at most 10% of the size of the LRU list,
- then use the LRU. This slack allows us to keep hot
- decompressed pages in the buffer pool. */
- if (UT_LIST_GET_LEN(buf_pool->unzip_LRU)
- <= UT_LIST_GET_LEN(buf_pool->LRU) / 10) {
- return(FALSE);
- }
-
- /* If eviction hasn't started yet, we assume by default
- that a workload is disk bound. */
- if (buf_pool->freed_page_clock == 0) {
- return(TRUE);
- }
-
- /* Calculate the average over past intervals, and add the values
- of the current interval. */
- io_avg = buf_LRU_stat_sum.io / BUF_LRU_STAT_N_INTERVAL
- + buf_LRU_stat_cur.io;
- unzip_avg = buf_LRU_stat_sum.unzip / BUF_LRU_STAT_N_INTERVAL
- + buf_LRU_stat_cur.unzip;
-
- /* Decide based on our formula. If the load is I/O bound
- (unzip_avg is smaller than the weighted io_avg), evict an
- uncompressed frame from unzip_LRU. Otherwise we assume that
- the load is CPU bound and evict from the regular LRU. */
- return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
-}
-
-/******************************************************************//**
-Attempts to drop page hash index on a batch of pages belonging to a
-particular space id. */
-static
-void
-buf_LRU_drop_page_hash_batch(
-/*=========================*/
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- const ulint* arr, /*!< in: array of page_no */
- ulint count) /*!< in: number of entries in array */
-{
- ulint i;
-
- ut_ad(arr != NULL);
- ut_ad(count <= BUF_LRU_DROP_SEARCH_SIZE);
-
- for (i = 0; i < count; ++i) {
- btr_search_drop_page_hash_when_freed(space_id, zip_size,
- arr[i]);
- }
-}
-
-/******************************************************************//**
-When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
-hash index entries belonging to that table. This function tries to
-do that in batch. Note that this is a 'best effort' attempt and does
-not guarantee that ALL hash entries will be removed. */
-static
-void
-buf_LRU_drop_page_hash_for_tablespace(
-/*==================================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint id) /*!< in: space id */
-{
- buf_page_t* bpage;
- ulint* page_arr;
- ulint num_entries;
- ulint zip_size;
-
- zip_size = fil_space_get_zip_size(id);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* Somehow, the tablespace does not exist. Nothing to drop. */
- ut_ad(0);
- return;
- }
-
- page_arr = static_cast<ulint*>(ut_malloc(
- sizeof(ulint) * BUF_LRU_DROP_SEARCH_SIZE));
-
- mutex_enter(&buf_pool->LRU_list_mutex);
- num_entries = 0;
-
-scan_again:
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (bpage != NULL) {
- buf_page_t* prev_bpage;
- ibool is_fixed;
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
- ut_a(buf_page_in_file(bpage));
-
- if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
- || bpage->space != id
- || bpage->io_fix != BUF_IO_NONE) {
- /* Compressed pages are never hashed.
- Skip blocks of other tablespaces.
- Skip I/O-fixed blocks (to be dealt with later). */
-next_page:
- bpage = prev_bpage;
- continue;
- }
-
- mutex_enter(block_mutex);
- is_fixed = bpage->buf_fix_count > 0
- || !((buf_block_t*) bpage)->index;
- mutex_exit(block_mutex);
-
- if (is_fixed) {
- goto next_page;
- }
-
- /* Store the page number so that we can drop the hash
- index in a batch later. */
- page_arr[num_entries] = bpage->offset;
- ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
- ++num_entries;
-
- if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) {
- goto next_page;
- }
-
- /* Array full. We release the buf_pool->LRU_list_mutex to obey
- the latching order. */
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- buf_LRU_drop_page_hash_batch(
- id, zip_size, page_arr, num_entries);
-
- num_entries = 0;
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- /* Note that we released the buf_pool->LRU_list_mutex above
- after reading the prev_bpage during processing of a
- page_hash_batch (i.e.: when the array was full).
- Because prev_bpage could belong to a compressed-only
- block, it may have been relocated, and thus the
- pointer cannot be trusted. Because bpage is of type
- buf_block_t, it is safe to dereference.
-
- bpage can change in the LRU list. This is OK because
- this function is a 'best effort' to drop as many
- search hash entries as possible and it does not
- guarantee that ALL such entries will be dropped. */
-
- /* If, however, bpage has been removed from LRU list
- to the free list then we should restart the scan. */
-
- if (bpage
- && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
- goto scan_again;
- }
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- /* Drop any remaining batch of search hashed pages. */
- buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
- ut_free(page_arr);
-}
-
-/******************************************************************//**
-While flushing (or removing dirty) pages from a tablespace we don't
-want to hog the CPU and resources. Release the buffer pool and block
-mutex and try to force a context switch. Then reacquire the same mutexes.
-The current page is "fixed" before the release of the mutexes and then
-"unfixed" again once we have reacquired the mutexes. */
-static MY_ATTRIBUTE((nonnull))
-void
-buf_flush_yield(
-/*============*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- buf_page_t* bpage) /*!< in/out: current page */
-{
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(mutex_own(block_mutex));
- ut_ad(buf_page_in_file(bpage));
-
- /* "Fix" the block so that the position cannot be
- changed after we release the buffer pool and
- block mutexes. */
- buf_page_set_sticky(bpage);
-
- /* Now it is safe to release the LRU list mutex */
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- mutex_exit(block_mutex);
- /* Try and force a context switch. */
- os_thread_yield();
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- mutex_enter(block_mutex);
- /* "Unfix" the block now that we have both the
- buffer pool and block mutex again. */
- buf_page_unset_sticky(bpage);
- mutex_exit(block_mutex);
-}
-
-/******************************************************************//**
-If we have hogged the resources for too long then release the buffer
-pool and flush list mutex and do a thread yield. Set the current page
-to "sticky" so that it is not relocated during the yield.
-@return true if yielded */
-static MY_ATTRIBUTE((nonnull(1), warn_unused_result))
-bool
-buf_flush_try_yield(
-/*================*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- buf_page_t* bpage, /*!< in/out: bpage to remove */
- ulint processed, /*!< in: number of pages processed */
- bool* must_restart) /*!< in/out: if true, we have to
- restart the flush list scan */
-{
- /* Every BUF_LRU_DROP_SEARCH_SIZE iterations in the
- loop we release buf_pool->LRU_list_mutex to let other threads
- do their job but only if the block is not IO fixed. This
- ensures that the block stays in its position in the
- flush_list. */
-
- if (bpage != NULL
- && processed >= BUF_LRU_DROP_SEARCH_SIZE
- && buf_page_get_io_fix_unlocked(bpage) == BUF_IO_NONE) {
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- buf_flush_list_mutex_exit(buf_pool);
-
- /* We don't have to worry about bpage becoming a dangling
- pointer by a compressed page flush list relocation because
- buf_page_get_gen() won't be called for pages from this
- tablespace. */
-
- mutex_enter(block_mutex);
- /* Recheck the I/O fix and the flush list presence now that we
- hold the right mutex */
- if (UNIV_UNLIKELY(buf_page_get_io_fix(bpage) != BUF_IO_NONE
- || bpage->oldest_modification == 0)) {
-
- mutex_exit(block_mutex);
-
- *must_restart = true;
-
- buf_flush_list_mutex_enter(buf_pool);
-
- return false;
- }
-
- *must_restart = false;
-
- /* Release the LRU list and buf_page_get_mutex() mutex
- to give the other threads a go. */
-
- buf_flush_yield(buf_pool, bpage);
-
- buf_flush_list_mutex_enter(buf_pool);
-
- /* Should not have been removed from the flush
- list during the yield. However, this check is
- not sufficient to catch a remove -> add. */
-
- ut_ad(bpage->in_flush_list);
-
- return(true);
- }
-
- return(false);
-}
-
-/******************************************************************//**
-Removes a single page from a given tablespace inside a specific
-buffer pool instance.
-@return true if page was removed. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-buf_flush_or_remove_page(
-/*=====================*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- buf_page_t* bpage, /*!< in/out: bpage to remove */
- bool flush, /*!< in: flush to disk if true but
- don't remove else remove without
- flushing to disk */
- bool* must_restart) /*!< in/out: if true, must restart the
- flush list scan */
-{
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(buf_flush_list_mutex_own(buf_pool));
-
- /* It is safe to check bpage->space and bpage->io_fix while holding
- buf_pool->LRU_list_mutex only. */
-
- if (UNIV_UNLIKELY(buf_page_get_io_fix_unlocked(bpage)
- != BUF_IO_NONE)) {
-
- /* We cannot remove this page during this scan
- yet; maybe the system is currently reading it
- in, or flushing the modifications to the file */
- return(false);
- }
-
- buf_flush_list_mutex_exit(buf_pool);
-
- /* We don't have to worry about bpage becoming a dangling
- pointer by a compressed page flush list relocation because
- buf_page_get_gen() won't be called for pages from this
- tablespace. */
- bool processed;
-
- mutex_enter(block_mutex);
-
- /* Recheck the page I/O fix and the flush list presence now
- that we hold the right mutex. */
- if (UNIV_UNLIKELY(buf_page_get_io_fix(bpage) != BUF_IO_NONE
- || bpage->oldest_modification == 0)) {
-
- /* The page became I/O-fixed or is not on the flush
- list anymore, this invalidates any flush-list-page
- pointers we have. */
-
- mutex_exit(block_mutex);
-
- *must_restart = true;
- processed = false;
-
- } else if (!flush) {
-
- buf_flush_remove(bpage);
-
- mutex_exit(block_mutex);
-
- processed = true;
-
- } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) {
-
- if (buf_flush_page(
- buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false)) {
-
- /* Wake possible simulated aio thread to actually
- post the writes to the operating system */
- os_aio_simulated_wake_handler_threads();
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- processed = true;
-
- } else {
- mutex_exit(block_mutex);
-
- processed = false;
- }
-
- } else {
- mutex_exit(block_mutex);
-
- processed = false;
- }
-
- buf_flush_list_mutex_enter(buf_pool);
-
- ut_ad(!mutex_own(block_mutex));
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- return(processed);
-}
-
-/******************************************************************//**
-Remove all dirty pages belonging to a given tablespace inside a specific
-buffer pool instance when we are deleting the data file(s) of that
-tablespace. The pages still remain a part of LRU and are evicted from
-the list as they age towards the tail of the LRU.
-@retval DB_SUCCESS if all freed
-@retval DB_FAIL if not all freed
-@retval DB_INTERRUPTED if the transaction was interrupted */
-static MY_ATTRIBUTE((nonnull(1), warn_unused_result))
-dberr_t
-buf_flush_or_remove_pages(
-/*======================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint id, /*!< in: target space id for which
- to remove or flush pages */
- bool flush, /*!< in: flush to disk if true but
- don't remove else remove without
- flushing to disk */
- const trx_t* trx) /*!< to check if the operation must
- be interrupted, can be 0 */
-{
- buf_page_t* prev;
- buf_page_t* bpage;
- ulint processed = 0;
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- buf_flush_list_mutex_enter(buf_pool);
-
-rescan:
- bool must_restart = false;
- bool all_freed = true;
-
- for (bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
- bpage != NULL;
- bpage = prev) {
-
- ut_ad(!must_restart);
- ut_a(buf_page_in_file(bpage));
-
- /* Save the previous link because once we free the
- page we can't rely on the links. */
-
- prev = UT_LIST_GET_PREV(list, bpage);
-
- if (buf_page_get_space(bpage) != id) {
-
- /* Skip this block, as it does not belong to
- the target space. */
-
- } else if (!buf_flush_or_remove_page(buf_pool, bpage, flush,
- &must_restart)) {
-
- /* Remove was unsuccessful, we have to try again
- by scanning the entire list from the end.
- buf_flush_or_remove_page() released the
- flush list mutex but not the LRU list mutex.
- Therefore it is possible that a new page was
- added to the flush list. For example, in case
- where we are at the head of the flush list and
- prev == NULL. That is OK because we have the
- tablespace quiesced and no new pages for this
- space-id should enter flush_list. This is
- because the only callers of this function are
- DROP TABLE and FLUSH TABLE FOR EXPORT.
- We know that we'll have to do at least one more
- scan but we don't break out of loop here and
- try to do as much work as we can in this
- iteration. */
-
- all_freed = false;
- if (UNIV_UNLIKELY(must_restart)) {
-
- /* Cannot trust the prev pointer */
- break;
- }
- } else if (flush) {
-
- /* The processing was successful. And during the
- processing we have released all the buf_pool mutexes
- when calling buf_page_flush(). We cannot trust
- prev pointer. */
- goto rescan;
- }
-
- ut_ad(!must_restart);
- ++processed;
-
- /* Yield if we have hogged the CPU and mutexes for too long. */
- if (buf_flush_try_yield(buf_pool, prev, processed,
- &must_restart)) {
-
- ut_ad(!must_restart);
- /* Reset the batch size counter if we had to yield. */
-
- processed = 0;
- } else if (UNIV_UNLIKELY(must_restart)) {
-
- /* Cannot trust the prev pointer */
- all_freed = false;
- break;
- }
-
-#ifdef DBUG_OFF
- if (flush) {
- DBUG_EXECUTE_IF("ib_export_flush_crash",
- static ulint n_pages;
- if (++n_pages == 4) {DBUG_SUICIDE();});
- }
-#endif /* DBUG_OFF */
-
- /* The check for trx is interrupted is expensive, we want
- to check every N iterations. */
- if (!processed && trx && trx_is_interrupted(trx)) {
- buf_flush_list_mutex_exit(buf_pool);
- return(DB_INTERRUPTED);
- }
- }
-
- buf_flush_list_mutex_exit(buf_pool);
-
- return(all_freed ? DB_SUCCESS : DB_FAIL);
-}
-
-/******************************************************************//**
-Remove or flush all the dirty pages that belong to a given tablespace
-inside a specific buffer pool instance. The pages will remain in the LRU
-list and will be evicted from the LRU list as they age and move towards
-the tail of the LRU list. */
-static MY_ATTRIBUTE((nonnull(1)))
-void
-buf_flush_dirty_pages(
-/*==================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint id, /*!< in: space id */
- bool flush, /*!< in: flush to disk if true otherwise
- remove the pages without flushing */
- const trx_t* trx) /*!< to check if the operation must
- be interrupted */
-{
- dberr_t err;
-
- do {
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- err = buf_flush_or_remove_pages(buf_pool, id, flush, trx);
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- ut_ad(buf_flush_validate(buf_pool));
-
- if (err == DB_FAIL) {
- os_thread_sleep(2000);
- }
-
- /* DB_FAIL is a soft error, it means that the task wasn't
- completed, needs to be retried. */
-
- ut_ad(buf_flush_validate(buf_pool));
-
- } while (err == DB_FAIL);
-
- ut_ad(err == DB_INTERRUPTED
- || buf_pool_get_dirty_pages_count(buf_pool, id) == 0);
-}
-
-/******************************************************************//**
-Remove all pages that belong to a given tablespace inside a specific
-buffer pool instance when we are DISCARDing the tablespace. */
-static MY_ATTRIBUTE((nonnull))
-void
-buf_LRU_remove_all_pages(
-/*=====================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint id) /*!< in: space id */
-{
- buf_page_t* bpage;
- ibool all_freed;
-
-scan_again:
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- all_freed = TRUE;
-
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU);
- bpage != NULL;
- /* No op */) {
-
- prio_rw_lock_t* hash_lock;
- buf_page_t* prev_bpage;
- ib_mutex_t* block_mutex = NULL;
-
- ut_a(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
-
- prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
- /* It is safe to check bpage->space and bpage->io_fix while
- holding buf_pool->LRU_list_mutex only and later recheck
- while holding the buf_page_get_mutex() mutex. */
-
- if (buf_page_get_space(bpage) != id) {
- /* Skip this block, as it does not belong to
- the space that is being invalidated. */
- goto next_page;
- } else if (UNIV_UNLIKELY(buf_page_get_io_fix_unlocked(bpage)
- != BUF_IO_NONE)) {
- /* We cannot remove this page during this scan
- yet; maybe the system is currently reading it
- in, or flushing the modifications to the file */
-
- all_freed = FALSE;
- goto next_page;
- } else {
- ulint fold = buf_page_address_fold(
- bpage->space, bpage->offset);
-
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
- rw_lock_x_lock(hash_lock);
-
- block_mutex = buf_page_get_mutex(bpage);
- mutex_enter(block_mutex);
-
- if (UNIV_UNLIKELY(
- buf_page_get_space(bpage) != id
- || bpage->buf_fix_count > 0
- || (buf_page_get_io_fix(bpage)
- != BUF_IO_NONE))) {
-
- mutex_exit(block_mutex);
-
- rw_lock_x_unlock(hash_lock);
-
- /* We cannot remove this page during
- this scan yet; maybe the system is
- currently reading it in, or flushing
- the modifications to the file */
-
- all_freed = FALSE;
-
- goto next_page;
- }
- }
-
- ut_ad(mutex_own(block_mutex));
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Dropping space %lu page %lu\n",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
- }
-#endif
- if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
- /* Do nothing, because the adaptive hash index
- covers uncompressed pages only. */
- } else if (((buf_block_t*) bpage)->index) {
- ulint page_no;
- ulint zip_size;
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- zip_size = buf_page_get_zip_size(bpage);
- page_no = buf_page_get_page_no(bpage);
-
- mutex_exit(block_mutex);
-
- rw_lock_x_unlock(hash_lock);
-
- /* Note that the following call will acquire
- and release block->lock X-latch. */
-
- btr_search_drop_page_hash_when_freed(
- id, zip_size, page_no);
-
- goto scan_again;
- }
-
- if (bpage->oldest_modification != 0) {
-
- buf_flush_remove(bpage);
- }
-
- ut_ad(!bpage->in_flush_list);
-
- /* Remove from the LRU list. */
-
- if (buf_LRU_block_remove_hashed(bpage, true)) {
-
- mutex_enter(block_mutex);
- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
- mutex_exit(block_mutex);
- } else {
- ut_ad(block_mutex == &buf_pool->zip_mutex);
- }
-
- ut_ad(!mutex_own(block_mutex));
-
-#ifdef UNIV_SYNC_DEBUG
- /* buf_LRU_block_remove_hashed() releases the hash_lock */
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
-next_page:
- bpage = prev_bpage;
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- if (!all_freed) {
- os_thread_sleep(20000);
-
- goto scan_again;
- }
-}
-
-/******************************************************************//**
-Remove pages belonging to a given tablespace inside a specific
-buffer pool instance when we are deleting the data file(s) of that
-tablespace. The pages still remain a part of LRU and are evicted from
-the list as they age towards the tail of the LRU only if buf_remove
-is BUF_REMOVE_FLUSH_NO_WRITE. */
-static MY_ATTRIBUTE((nonnull(1)))
-void
-buf_LRU_remove_pages(
-/*=================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint id, /*!< in: space id */
- buf_remove_t buf_remove, /*!< in: remove or flush strategy */
- const trx_t* trx) /*!< to check if the operation must
- be interrupted */
-{
- switch (buf_remove) {
- case BUF_REMOVE_ALL_NO_WRITE:
- buf_LRU_remove_all_pages(buf_pool, id);
- break;
-
- case BUF_REMOVE_FLUSH_NO_WRITE:
- ut_a(trx == 0);
- buf_flush_dirty_pages(buf_pool, id, false, NULL);
- break;
-
- case BUF_REMOVE_FLUSH_WRITE:
- ut_a(trx != 0);
- buf_flush_dirty_pages(buf_pool, id, true, trx);
- /* Ensure that all asynchronous IO is completed. */
- os_aio_wait_until_no_pending_writes();
- fil_flush(id);
- break;
- }
-}
-
-/******************************************************************//**
-Flushes all dirty pages or removes all pages belonging
-to a given tablespace. A PROBLEM: if readahead is being started, what
-guarantees that it will not try to read in pages after this operation
-has completed? */
-UNIV_INTERN
-void
-buf_LRU_flush_or_remove_pages(
-/*==========================*/
- ulint id, /*!< in: space id */
- buf_remove_t buf_remove, /*!< in: remove or flush strategy */
- const trx_t* trx) /*!< to check if the operation must
- be interrupted */
-{
- ulint i;
-
- /* Before we attempt to drop pages one by one we first
- attempt to drop page hash index entries in batches to make
- it more efficient. The batching attempt is a best effort
- attempt and does not guarantee that all pages hash entries
- will be dropped. We get rid of remaining page hash entries
- one by one below. */
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- switch (buf_remove) {
- case BUF_REMOVE_ALL_NO_WRITE:
- buf_LRU_drop_page_hash_for_tablespace(buf_pool, id);
- break;
-
- case BUF_REMOVE_FLUSH_NO_WRITE:
- /* It is a DROP TABLE for a single table
- tablespace. No AHI entries exist because
- we already dealt with them when freeing up
- extents. */
- case BUF_REMOVE_FLUSH_WRITE:
- /* We allow read-only queries against the
- table, there is no need to drop the AHI entries. */
- break;
- }
-
- buf_LRU_remove_pages(buf_pool, id, buf_remove, trx);
- }
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/********************************************************************//**
-Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
-void
-buf_LRU_insert_zip_clean(
-/*=====================*/
- buf_page_t* bpage) /*!< in: pointer to the block in question */
-{
- buf_page_t* b;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(mutex_own(&buf_pool->zip_mutex));
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
-
- /* Find the first successor of bpage in the LRU list
- that is in the zip_clean list. */
- b = bpage;
- do {
- b = UT_LIST_GET_NEXT(LRU, b);
- } while (b && buf_page_get_state(b) != BUF_BLOCK_ZIP_PAGE);
-
- /* Insert bpage before b, i.e., after the predecessor of b. */
- if (b) {
- b = UT_LIST_GET_PREV(list, b);
- }
-
- if (b) {
- UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, bpage);
- } else {
- UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, bpage);
- }
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-/******************************************************************//**
-Try to free an uncompressed page of a compressed block from the unzip
-LRU list. The compressed page is preserved, and it need not be clean.
-@return TRUE if freed */
-UNIV_INLINE
-ibool
-buf_LRU_free_from_unzip_LRU_list(
-/*=============================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ibool scan_all) /*!< in: scan whole LRU list
- if TRUE, otherwise scan only
- srv_LRU_scan_depth / 2 blocks. */
-{
- buf_block_t* block;
- ibool freed;
- ulint scanned;
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- if (!buf_LRU_evict_from_unzip_LRU(buf_pool)) {
- return(FALSE);
- }
-
- for (block = UT_LIST_GET_LAST(buf_pool->unzip_LRU),
- scanned = 1, freed = FALSE;
- block != NULL && !freed
- && (scan_all || scanned < srv_LRU_scan_depth);
- ++scanned) {
-
- buf_block_t* prev_block = UT_LIST_GET_PREV(unzip_LRU,
- block);
-
- mutex_enter(&block->mutex);
-
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->in_unzip_LRU_list);
- ut_ad(block->page.in_LRU_list);
-
- freed = buf_LRU_free_page(&block->page, false);
-
- mutex_exit(&block->mutex);
-
- block = prev_block;
- }
-
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_UNZIP_SEARCH_SCANNED,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
- scanned);
- return(freed);
-}
-
-/******************************************************************//**
-Try to free a clean page from the common LRU list.
-@return TRUE if freed */
-UNIV_INLINE
-ibool
-buf_LRU_free_from_common_LRU_list(
-/*==============================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ibool scan_all) /*!< in: scan whole LRU list
- if TRUE, otherwise scan only
- srv_LRU_scan_depth / 2 blocks. */
-{
- buf_page_t* bpage;
- ibool freed;
- ulint scanned;
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU),
- scanned = 1, freed = FALSE;
- bpage != NULL && !freed
- && (scan_all || scanned < srv_LRU_scan_depth);
- ++scanned) {
-
- unsigned accessed;
- buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU,
- bpage);
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
-
- accessed = buf_page_is_accessed(bpage);
-
- mutex_enter(block_mutex);
-
- freed = buf_LRU_free_page(bpage, true);
-
- mutex_exit(block_mutex);
-
- if (freed && !accessed) {
- /* Keep track of pages that are evicted without
- ever being accessed. This gives us a measure of
- the effectiveness of readahead */
- ++buf_pool->stat.n_ra_pages_evicted;
- }
-
- bpage = prev_bpage;
- }
-
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_SEARCH_SCANNED,
- MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
- MONITOR_LRU_SEARCH_SCANNED_PER_CALL,
- scanned);
-
- return(freed);
-}
-
-/******************************************************************//**
-Try to free a replaceable block.
-@return TRUE if found and freed */
-UNIV_INTERN
-ibool
-buf_LRU_scan_and_free_block(
-/*========================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ibool scan_all) /*!< in: scan whole LRU list
- if TRUE, otherwise scan only
- 'old' blocks. */
-{
- ibool freed = FALSE;
- bool use_unzip_list = UT_LIST_GET_LEN(buf_pool->unzip_LRU) > 0;
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- if (use_unzip_list) {
- freed = buf_LRU_free_from_unzip_LRU_list(buf_pool, scan_all);
- }
-
- if (!freed) {
- freed = buf_LRU_free_from_common_LRU_list(buf_pool, scan_all);
- }
-
- if (!freed) {
- mutex_exit(&buf_pool->LRU_list_mutex);
- }
-
- return(freed);
-}
-
-/******************************************************************//**
-Returns TRUE if less than 25 % of the buffer pool in any instance is
-available. This can be used in heuristics to prevent huge transactions
-eating up the whole buffer pool for their locks.
-@return TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
-ibool
-buf_LRU_buf_pool_running_out(void)
-/*==============================*/
-{
- ulint i;
- ibool ret = FALSE;
-
- for (i = 0; i < srv_buf_pool_instances && !ret; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- if (!recv_recovery_on
- && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU)
- < buf_pool->curr_size / 4) {
-
- ret = TRUE;
- }
- }
-
- return(ret);
-}
-
-/******************************************************************//**
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, returns NULL.
-@return a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
-buf_block_t*
-buf_LRU_get_free_only(
-/*==================*/
- buf_pool_t* buf_pool)
-{
- buf_block_t* block;
-
- mutex_enter_last(&buf_pool->free_list_mutex);
-
- block = (buf_block_t*) UT_LIST_GET_LAST(buf_pool->free);
-
- if (block) {
-
- ut_ad(block->page.in_free_list);
- ut_d(block->page.in_free_list = FALSE);
- ut_ad(!block->page.in_flush_list);
- ut_ad(!block->page.in_LRU_list);
- ut_a(!buf_page_in_file(&block->page));
- UT_LIST_REMOVE(list, buf_pool->free, (&block->page));
- buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-
- mutex_exit(&buf_pool->free_list_mutex);
-
- mutex_enter(&block->mutex);
-
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-
- ut_ad(buf_pool_from_block(block) == buf_pool);
-
- mutex_exit(&block->mutex);
- return(block);
- }
-
- mutex_exit(&buf_pool->free_list_mutex);
-
- return(NULL);
-}
-
-/******************************************************************//**
-Checks how much of buf_pool is occupied by non-data objects like
-AHI, lock heaps etc. Depending on the size of non-data objects this
-function will either assert or issue a warning and switch on the
-status monitor. */
-static
-void
-buf_LRU_check_size_of_non_data_objects(
-/*===================================*/
- const buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->curr_size / 20) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: ERROR: over 95 percent of the buffer pool"
- " is occupied by\n"
- "InnoDB: lock heaps or the adaptive hash index!"
- " Check that your\n"
- "InnoDB: transactions do not set too many row locks.\n"
- "InnoDB: Your buffer pool size is %lu MB."
- " Maybe you should make\n"
- "InnoDB: the buffer pool bigger?\n"
- "InnoDB: We intentionally generate a seg fault"
- " to print a stack trace\n"
- "InnoDB: on Linux!\n",
- (ulong) (buf_pool->curr_size
- / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
- ut_error;
-
- } else if (!recv_recovery_on
- && (UT_LIST_GET_LEN(buf_pool->free)
- + UT_LIST_GET_LEN(buf_pool->LRU))
- < buf_pool->curr_size / 3) {
-
- if (!buf_lru_switched_on_innodb_mon) {
-
- /* Over 67 % of the buffer pool is occupied by lock
- heaps or the adaptive hash index. This may be a memory
- leak! */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: WARNING: over 67 percent of"
- " the buffer pool is occupied by\n"
- "InnoDB: lock heaps or the adaptive"
- " hash index! Check that your\n"
- "InnoDB: transactions do not set too many"
- " row locks.\n"
- "InnoDB: Your buffer pool size is %lu MB."
- " Maybe you should make\n"
- "InnoDB: the buffer pool bigger?\n"
- "InnoDB: Starting the InnoDB Monitor to print"
- " diagnostics, including\n"
- "InnoDB: lock heap and hash index sizes.\n",
- (ulong) (buf_pool->curr_size
- / (1024 * 1024 / UNIV_PAGE_SIZE)));
-
- buf_lru_switched_on_innodb_mon = TRUE;
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_monitor_event);
- }
- } else if (buf_lru_switched_on_innodb_mon) {
-
- /* Switch off the InnoDB Monitor; this is a simple way
- to stop the monitor if the situation becomes less urgent,
- but may also surprise users if the user also switched on the
- monitor! */
-
- buf_lru_switched_on_innodb_mon = FALSE;
- srv_print_innodb_monitor = FALSE;
- }
-}
-
-/** Diagnose failure to get a free page and request InnoDB monitor output in
-the error log if more than two seconds have been spent already.
-@param[in] n_iterations how many buf_LRU_get_free_page iterations
- already completed
-@param[in] started_ms timestamp in ms of when the attempt to get the
- free page started
-@param[in] flush_failures how many times single-page flush, if allowed,
- has failed
-@param[out] mon_value_was previous srv_print_innodb_monitor value
-@param[out] started_monitor whether InnoDB monitor print has been requested
-*/
-static
-void
-buf_LRU_handle_lack_of_free_blocks(ulint n_iterations, ulint started_ms,
- ulint flush_failures,
- ibool *mon_value_was,
- ibool *started_monitor)
-{
- static ulint last_printout_ms = 0;
-
- /* Legacy algorithm started warning after at least 2 seconds, we
- emulate this. */
- const ulint current_ms = ut_time_ms();
-
- if ((current_ms > started_ms + 2000)
- && (current_ms > last_printout_ms + 2000)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: difficult to find free blocks in\n"
- "InnoDB: the buffer pool (%lu search iterations)!\n"
- "InnoDB: %lu failed attempts to flush a page!"
- " Consider\n"
- "InnoDB: increasing the buffer pool size.\n"
- "InnoDB: It is also possible that"
- " in your Unix version\n"
- "InnoDB: fsync is very slow, or"
- " completely frozen inside\n"
- "InnoDB: the OS kernel. Then upgrading to"
- " a newer version\n"
- "InnoDB: of your operating system may help."
- " Look at the\n"
- "InnoDB: number of fsyncs in diagnostic info below.\n"
- "InnoDB: Pending flushes (fsync) log: %lu;"
- " buffer pool: %lu\n"
- "InnoDB: %lu OS file reads, %lu OS file writes,"
- " %lu OS fsyncs\n"
- "InnoDB: Starting InnoDB Monitor to print further\n"
- "InnoDB: diagnostics to the standard output.\n",
- (ulong) n_iterations,
- (ulong) flush_failures,
- (ulong) fil_n_pending_log_flushes,
- (ulong) fil_n_pending_tablespace_flushes,
- (ulong) os_n_file_reads, (ulong) os_n_file_writes,
- (ulong) os_n_fsyncs);
-
- last_printout_ms = current_ms;
- *mon_value_was = srv_print_innodb_monitor;
- *started_monitor = TRUE;
- srv_print_innodb_monitor = TRUE;
- os_event_set(lock_sys->timeout_event);
- }
-
-}
-
-/** The maximum allowed backoff sleep time duration, microseconds */
-#define MAX_FREE_LIST_BACKOFF_SLEEP 10000
-
-/** The sleep reduction factor for high-priority waiter backoff sleeps */
-#define FREE_LIST_BACKOFF_HIGH_PRIO_DIVIDER 100
-
-/** The sleep reduction factor for low-priority waiter backoff sleeps */
-#define FREE_LIST_BACKOFF_LOW_PRIO_DIVIDER 1
-
-/******************************************************************//**
-Returns a free block from the buf_pool. The block is taken off the
-free list. If free list is empty, blocks are moved from the end of the
-LRU list to the free list.
-This function is called from a user thread when it needs a clean
-block to read in a page. Note that we only ever get a block from
-the free list. Even when we flush a page or find a page in LRU scan
-we put it to free list to be used.
-* iteration 0:
- * get a block from free list, success:done
- * if there is an LRU flush batch in progress:
- * wait for batch to end: retry free list
- * if buf_pool->try_LRU_scan is set
- * scan LRU up to srv_LRU_scan_depth to find a clean block
- * the above will put the block on free list
- * success:retry the free list
- * flush one dirty page from tail of LRU to disk
- * the above will put the block on free list
- * success: retry the free list
-* iteration 1:
- * same as iteration 0 except:
- * scan whole LRU list
- * scan LRU list even if buf_pool->try_LRU_scan is not set
-* iteration > 1:
- * same as iteration 1 but sleep 100ms
-@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
-buf_block_t*
-buf_LRU_get_free_block(
-/*===================*/
- buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
-{
- buf_block_t* block = NULL;
- ibool freed = FALSE;
- ulint n_iterations = 0;
- ulint flush_failures = 0;
- ibool mon_value_was = FALSE;
- ibool started_monitor = FALSE;
- ulint started_ms = 0;
-
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
-
- MONITOR_INC(MONITOR_LRU_GET_FREE_SEARCH);
-loop:
- buf_LRU_check_size_of_non_data_objects(buf_pool);
-
- /* If there is a block in the free list, take it */
- if (DBUG_EVALUATE_IF("simulate_lack_of_pages", true, false)) {
-
- block = NULL;
-
- if (srv_debug_monitor_printed)
- DBUG_SET("-d,simulate_lack_of_pages");
-
- } else if (DBUG_EVALUATE_IF("simulate_recovery_lack_of_pages",
- recv_recovery_on, false)) {
-
- block = NULL;
-
- if (srv_debug_monitor_printed)
- DBUG_SUICIDE();
- } else {
-
- block = buf_LRU_get_free_only(buf_pool);
- }
-
- if (block) {
-
- ut_ad(buf_pool_from_block(block) == buf_pool);
- memset(&block->page.zip, 0, sizeof block->page.zip);
-
- if (started_monitor) {
- srv_print_innodb_monitor =
- static_cast<my_bool>(mon_value_was);
- }
-
- return(block);
- }
-
- if (!started_ms)
- started_ms = ut_time_ms();
-
- if (srv_empty_free_list_algorithm == SRV_EMPTY_FREE_LIST_BACKOFF
- && buf_lru_manager_is_active
- && (srv_shutdown_state == SRV_SHUTDOWN_NONE
- || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP)) {
-
- /* Backoff to minimize the free list mutex contention while the
- free list is empty */
- ulint priority = srv_current_thread_priority;
-
- if (n_iterations < 3) {
-
- os_thread_yield();
- if (!priority) {
- os_thread_yield();
- }
- } else {
-
- ulint i, b;
-
- if (n_iterations < 6) {
- i = n_iterations - 3;
- } else if (n_iterations < 8) {
- i = 4;
- } else if (n_iterations < 11) {
- i = 5;
- } else {
- i = n_iterations - 5;
- }
- b = 1 << i;
- if (b > MAX_FREE_LIST_BACKOFF_SLEEP) {
- b = MAX_FREE_LIST_BACKOFF_SLEEP;
- }
- os_thread_sleep(b / (priority
- ? FREE_LIST_BACKOFF_HIGH_PRIO_DIVIDER
- : FREE_LIST_BACKOFF_LOW_PRIO_DIVIDER));
- }
-
- buf_LRU_handle_lack_of_free_blocks(n_iterations, started_ms,
- flush_failures,
- &mon_value_was,
- &started_monitor);
-
- n_iterations++;
-
- srv_stats.buf_pool_wait_free.inc();
-
- /* In case of backoff, do not ever attempt single page flushes
- and wait for the cleaner to free some pages instead. */
- goto loop;
- } else {
-
- /* The LRU manager is not running or Oracle MySQL 5.6 algorithm
- was requested, will perform a single page flush */
- ut_ad((srv_empty_free_list_algorithm
- == SRV_EMPTY_FREE_LIST_LEGACY)
- || !buf_lru_manager_is_active
- || (srv_shutdown_state != SRV_SHUTDOWN_NONE
- && srv_shutdown_state != SRV_SHUTDOWN_CLEANUP));
- }
-
- mutex_enter(&buf_pool->flush_state_mutex);
-
- if (buf_pool->init_flush[BUF_FLUSH_LRU]
- && srv_use_doublewrite_buf
- && buf_dblwr != NULL) {
-
- mutex_exit(&buf_pool->flush_state_mutex);
-
- /* If there is an LRU flush happening in the background
- then we wait for it to end instead of trying a single
- page flush. If, however, we are not using doublewrite
- buffer then it is better to do our own single page
- flush instead of waiting for LRU flush to end. */
- buf_flush_wait_batch_end(buf_pool, BUF_FLUSH_LRU);
- goto loop;
- }
-
- mutex_exit(&buf_pool->flush_state_mutex);
-
- if (DBUG_EVALUATE_IF("simulate_recovery_lack_of_pages", true, false)
- || DBUG_EVALUATE_IF("simulate_lack_of_pages", true, false)) {
-
- buf_pool->try_LRU_scan = false;
- }
-
- freed = FALSE;
- if (buf_pool->try_LRU_scan || n_iterations > 0) {
-
- /* If no block was in the free list, search from the
- end of the LRU list and try to free a block there.
- If we are doing for the first time we'll scan only
- tail of the LRU list otherwise we scan the whole LRU
- list. */
- freed = buf_LRU_scan_and_free_block(buf_pool,
- n_iterations > 0);
-
- if (!freed && n_iterations == 0) {
- /* Tell other threads that there is no point
- in scanning the LRU list. This flag is set to
- TRUE again when we flush a batch from this
- buffer pool. */
- buf_pool->try_LRU_scan = FALSE;
- }
- }
-
- if (freed) {
- goto loop;
-
- }
-
- buf_LRU_handle_lack_of_free_blocks(n_iterations, started_ms,
- flush_failures, &mon_value_was,
- &started_monitor);
-
- /* If we have scanned the whole LRU and still are unable to
- find a free block then we should sleep here to let the
- page_cleaner do an LRU batch for us.
- TODO: It'd be better if we can signal the page_cleaner. Perhaps
- we should use timed wait for page_cleaner. */
- if (n_iterations > 1) {
-
- os_thread_sleep(100000);
- }
-
- /* No free block was found: try to flush the LRU list.
- This call will flush one page from the LRU and put it on the
- free list. That means that the free block is up for grabs for
- all user threads.
- TODO: A more elegant way would have been to return the freed
- up block to the caller here but the code that deals with
- removing the block from page_hash and LRU_list is fairly
- involved (particularly in case of compressed pages). We
- can do that in a separate patch sometime in future. */
- if (!buf_flush_single_page_from_LRU(buf_pool)) {
- MONITOR_INC(MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT);
- ++flush_failures;
- }
-
- srv_stats.buf_pool_wait_free.inc();
-
- n_iterations++;
-
- goto loop;
-}
-
-/*******************************************************************//**
-Moves the LRU_old pointer so that the length of the old blocks list
-is inside the allowed limits. */
-UNIV_INLINE
-void
-buf_LRU_old_adjust_len(
-/*===================*/
- buf_pool_t* buf_pool) /*!< in: buffer pool instance */
-{
- ulint old_len;
- ulint new_len;
-
- ut_a(buf_pool->LRU_old);
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(buf_pool->LRU_old_ratio >= BUF_LRU_OLD_RATIO_MIN);
- ut_ad(buf_pool->LRU_old_ratio <= BUF_LRU_OLD_RATIO_MAX);
-#if BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)
-# error "BUF_LRU_OLD_RATIO_MIN * BUF_LRU_OLD_MIN_LEN <= BUF_LRU_OLD_RATIO_DIV * (BUF_LRU_OLD_TOLERANCE + 5)"
-#endif
-#ifdef UNIV_LRU_DEBUG
- /* buf_pool->LRU_old must be the first item in the LRU list
- whose "old" flag is set. */
- ut_a(buf_pool->LRU_old->old);
- ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
- || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
- ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
- || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
-#endif /* UNIV_LRU_DEBUG */
-
- old_len = buf_pool->LRU_old_len;
- new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
- * buf_pool->LRU_old_ratio / BUF_LRU_OLD_RATIO_DIV,
- UT_LIST_GET_LEN(buf_pool->LRU)
- - (BUF_LRU_OLD_TOLERANCE
- + BUF_LRU_NON_OLD_MIN_LEN));
-
- for (;;) {
- buf_page_t* LRU_old = buf_pool->LRU_old;
-
- ut_a(LRU_old);
- ut_ad(LRU_old->in_LRU_list);
-#ifdef UNIV_LRU_DEBUG
- ut_a(LRU_old->old);
-#endif /* UNIV_LRU_DEBUG */
-
- /* Update the LRU_old pointer if necessary */
-
- if (old_len + BUF_LRU_OLD_TOLERANCE < new_len) {
-
- buf_pool->LRU_old = LRU_old = UT_LIST_GET_PREV(
- LRU, LRU_old);
-#ifdef UNIV_LRU_DEBUG
- ut_a(!LRU_old->old);
-#endif /* UNIV_LRU_DEBUG */
- old_len = ++buf_pool->LRU_old_len;
- buf_page_set_old(LRU_old, TRUE);
-
- } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) {
-
- buf_pool->LRU_old = UT_LIST_GET_NEXT(LRU, LRU_old);
- old_len = --buf_pool->LRU_old_len;
- buf_page_set_old(LRU_old, FALSE);
- } else {
- return;
- }
- }
-}
-
-/*******************************************************************//**
-Initializes the old blocks pointer in the LRU list. This function should be
-called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */
-static
-void
-buf_LRU_old_init(
-/*=============*/
- buf_pool_t* buf_pool)
-{
- buf_page_t* bpage;
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN);
-
- /* We first initialize all blocks in the LRU list as old and then use
- the adjust function to move the LRU_old pointer to the right
- position */
-
- for (bpage = UT_LIST_GET_LAST(buf_pool->LRU); bpage != NULL;
- bpage = UT_LIST_GET_PREV(LRU, bpage)) {
- ut_ad(bpage->in_LRU_list);
- ut_ad(buf_page_in_file(bpage));
- /* This loop temporarily violates the
- assertions of buf_page_set_old(). */
- bpage->old = TRUE;
- }
-
- buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU);
- buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- buf_LRU_old_adjust_len(buf_pool);
-}
-
-/******************************************************************//**
-Remove a block from the unzip_LRU list if it belonged to the list. */
-static
-void
-buf_unzip_LRU_remove_block_if_needed(
-/*=================================*/
- buf_page_t* bpage) /*!< in/out: control block */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(buf_page_in_file(bpage));
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_block_t* block = (buf_block_t*) bpage;
-
- ut_ad(block->in_unzip_LRU_list);
- ut_d(block->in_unzip_LRU_list = FALSE);
-
- UT_LIST_REMOVE(unzip_LRU, buf_pool->unzip_LRU, block);
- }
-}
-
-/******************************************************************//**
-Removes a block from the LRU list. */
-UNIV_INLINE
-void
-buf_LRU_remove_block(
-/*=================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ulint zip_size;
-
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- ut_a(buf_page_in_file(bpage));
-
- ut_ad(bpage->in_LRU_list);
-
- /* If the LRU_old pointer is defined and points to just this block,
- move it backward one step */
-
- if (UNIV_UNLIKELY(bpage == buf_pool->LRU_old)) {
-
- /* Below: the previous block is guaranteed to exist,
- because the LRU_old pointer is only allowed to differ
- by BUF_LRU_OLD_TOLERANCE from strict
- buf_pool->LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV of the LRU
- list length. */
- buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
-
- ut_a(prev_bpage);
-#ifdef UNIV_LRU_DEBUG
- ut_a(!prev_bpage->old);
-#endif /* UNIV_LRU_DEBUG */
- buf_pool->LRU_old = prev_bpage;
- buf_page_set_old(prev_bpage, TRUE);
-
- buf_pool->LRU_old_len++;
- }
-
- /* Remove the block from the LRU list */
- UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
- ut_d(bpage->in_LRU_list = FALSE);
-
- zip_size = page_zip_get_size(&bpage->zip);
- buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
-
- buf_unzip_LRU_remove_block_if_needed(bpage);
-
- /* If the LRU list is so short that LRU_old is not defined,
- clear the "old" flags and return */
- if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) {
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU); bpage != NULL;
- bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
- /* This loop temporarily violates the
- assertions of buf_page_set_old(). */
- bpage->old = FALSE;
- }
-
- buf_pool->LRU_old = NULL;
- buf_pool->LRU_old_len = 0;
-
- return;
- }
-
- ut_ad(buf_pool->LRU_old);
-
- /* Update the LRU_old_len field if necessary */
- if (buf_page_is_old(bpage)) {
-
- buf_pool->LRU_old_len--;
- }
-
- /* Adjust the length of the old block list if necessary */
- buf_LRU_old_adjust_len(buf_pool);
-}
-
-/******************************************************************//**
-Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
-void
-buf_unzip_LRU_add_block(
-/*====================*/
- buf_block_t* block, /*!< in: control block */
- ibool old) /*!< in: TRUE if should be put to the end
- of the list, else put to the start */
-{
- buf_pool_t* buf_pool = buf_pool_from_block(block);
-
- ut_ad(buf_pool);
- ut_ad(block);
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
-
- ut_ad(!block->in_unzip_LRU_list);
- ut_d(block->in_unzip_LRU_list = TRUE);
-
- if (old) {
- UT_LIST_ADD_LAST(unzip_LRU, buf_pool->unzip_LRU, block);
- } else {
- UT_LIST_ADD_FIRST(unzip_LRU, buf_pool->unzip_LRU, block);
- }
-}
-
-/******************************************************************//**
-Adds a block to the LRU list end. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INLINE
-void
-buf_LRU_add_block_to_end_low(
-/*=========================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(buf_pool);
- ut_ad(bpage);
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- ut_a(buf_page_in_file(bpage));
-
- ut_ad(!bpage->in_LRU_list);
- UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
- ut_d(bpage->in_LRU_list = TRUE);
-
- incr_LRU_size_in_bytes(bpage, buf_pool);
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
- ut_ad(buf_pool->LRU_old);
-
- /* Adjust the length of the old block list if necessary */
-
- buf_page_set_old(bpage, TRUE);
- buf_pool->LRU_old_len++;
- buf_LRU_old_adjust_len(buf_pool);
-
- } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
- /* The LRU list is now long enough for LRU_old to become
- defined: init it */
-
- buf_LRU_old_init(buf_pool);
- } else {
- buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
- }
-
- /* If this is a zipped block with decompressed frame as well
- then put it on the unzip_LRU list */
- if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_unzip_LRU_add_block((buf_block_t*) bpage, TRUE);
- }
-}
-
-/******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INLINE
-void
-buf_LRU_add_block_low(
-/*==================*/
- buf_page_t* bpage, /*!< in: control block */
- ibool old) /*!< in: TRUE if should be put to the old blocks
- in the LRU list, else put to the start; if the
- LRU list is very short, the block is added to
- the start, regardless of this parameter */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- ut_a(buf_page_in_file(bpage));
- ut_ad(!bpage->in_LRU_list);
-
- if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
-
- UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
-
- bpage->freed_page_clock = buf_pool->freed_page_clock;
- } else {
-#ifdef UNIV_LRU_DEBUG
- /* buf_pool->LRU_old must be the first item in the LRU list
- whose "old" flag is set. */
- ut_a(buf_pool->LRU_old->old);
- ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
- || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
- ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
- || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
-#endif /* UNIV_LRU_DEBUG */
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
- bpage);
- buf_pool->LRU_old_len++;
- }
-
- ut_d(bpage->in_LRU_list = TRUE);
-
- incr_LRU_size_in_bytes(bpage, buf_pool);
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
-
- ut_ad(buf_pool->LRU_old);
-
- /* Adjust the length of the old block list if necessary */
-
- buf_page_set_old(bpage, old);
- buf_LRU_old_adjust_len(buf_pool);
-
- } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) {
-
- /* The LRU list is now long enough for LRU_old to become
- defined: init it */
-
- buf_LRU_old_init(buf_pool);
- } else {
- buf_page_set_old(bpage, buf_pool->LRU_old != NULL);
- }
-
- /* If this is a zipped block with decompressed frame as well
- then put it on the unzip_LRU list */
- if (buf_page_belongs_to_unzip_LRU(bpage)) {
- buf_unzip_LRU_add_block((buf_block_t*) bpage, old);
- }
-}
-
-/******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INTERN
-void
-buf_LRU_add_block(
-/*==============*/
- buf_page_t* bpage, /*!< in: control block */
- ibool old) /*!< in: TRUE if should be put to the old
- blocks in the LRU list, else put to the start;
- if the LRU list is very short, the block is
- added to the start, regardless of this
- parameter */
-{
- buf_LRU_add_block_low(bpage, old);
-}
-
-/******************************************************************//**
-Moves a block to the start of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_young(
-/*=====================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-
- if (bpage->old) {
- buf_pool->stat.n_pages_made_young++;
- }
-
- buf_LRU_remove_block(bpage);
- buf_LRU_add_block_low(bpage, FALSE);
-}
-
-/******************************************************************//**
-Moves a block to the end of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_old(
-/*===================*/
- buf_page_t* bpage) /*!< in: control block */
-{
- buf_LRU_remove_block(bpage);
- buf_LRU_add_block_to_end_low(bpage);
-}
-
-/******************************************************************//**
-Try to free a block. If bpage is a descriptor of a compressed-only
-page, the descriptor object will be freed as well.
-
-NOTE: If this function returns true, it will release the LRU list mutex,
-and temporarily release and relock the buf_page_get_mutex() mutex.
-Furthermore, the page frame will no longer be accessible via bpage. If this
-function returns false, the buf_page_get_mutex() might be temporarily released
-and relocked too.
-
-The caller must hold the LRU list and buf_page_get_mutex() mutexes.
-
-@return true if freed, false otherwise. */
-UNIV_INTERN
-bool
-buf_LRU_free_page(
-/*===============*/
- buf_page_t* bpage, /*!< in: block to be freed */
- bool zip) /*!< in: true if should remove also the
- compressed page of an uncompressed page */
-{
- buf_page_t* b = NULL;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- const ulint fold = buf_page_address_fold(bpage->space,
- bpage->offset);
- prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(mutex_own(block_mutex));
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
-
- if (!buf_page_can_relocate(bpage)) {
-
- /* Do not free buffer fixed or I/O-fixed blocks. */
- return(false);
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
-#endif /* UNIV_IBUF_COUNT_DEBUG */
-
- if (zip || !bpage->zip.data) {
- /* This would completely free the block. */
- /* Do not completely free dirty blocks. */
-
- if (bpage->oldest_modification) {
- return(false);
- }
- } else if (bpage->oldest_modification > 0
- && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
-
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
-
- return(false);
-
- } else if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
- b = buf_page_alloc_descriptor();
- ut_a(b);
- }
-
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
- ut_ad(!bpage->in_flush_list == !bpage->oldest_modification);
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr, "Putting space %lu page %lu to free list\n",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
- }
-#endif /* UNIV_DEBUG */
-
- mutex_exit(block_mutex);
-
- rw_lock_x_lock(hash_lock);
- mutex_enter(block_mutex);
-
- if (UNIV_UNLIKELY(!buf_page_can_relocate(bpage)
- || ((zip || !bpage->zip.data)
- && bpage->oldest_modification))) {
-
-not_freed:
- rw_lock_x_unlock(hash_lock);
- if (b) {
- buf_page_free_descriptor(b);
- }
-
- return(false);
- } else if (UNIV_UNLIKELY(bpage->oldest_modification
- && (buf_page_get_state(bpage)
- != BUF_BLOCK_FILE_PAGE))) {
-
- ut_ad(buf_page_get_state(bpage)
- == BUF_BLOCK_ZIP_DIRTY);
- goto not_freed;
- }
-
- if (b) {
- memcpy(b, bpage, sizeof *b);
- }
-
- if (!buf_LRU_block_remove_hashed(bpage, zip)) {
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- if (b) {
- buf_page_free_descriptor(b);
- }
-
- mutex_enter(block_mutex);
-
- return(true);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- /* buf_LRU_block_remove_hashed() releases the hash_lock */
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
- && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* We have just freed a BUF_BLOCK_FILE_PAGE. If b != NULL
- then it was a compressed page with an uncompressed frame and
- we are interested in freeing only the uncompressed frame.
- Therefore we have to reinsert the compressed page descriptor
- into the LRU and page_hash (and possibly flush_list).
- if b == NULL then it was a regular page that has been freed */
-
- if (b) {
- buf_page_t* prev_b = UT_LIST_GET_PREV(LRU, b);
-
- rw_lock_x_lock(hash_lock);
- mutex_enter(block_mutex);
-
- ut_a(!buf_page_hash_get_low(
- buf_pool, b->space, b->offset, fold));
-
- b->state = b->oldest_modification
- ? BUF_BLOCK_ZIP_DIRTY
- : BUF_BLOCK_ZIP_PAGE;
- UNIV_MEM_DESC(b->zip.data,
- page_zip_get_size(&b->zip));
-
- /* The fields in_page_hash and in_LRU_list of
- the to-be-freed block descriptor should have
- been cleared in
- buf_LRU_block_remove_hashed(), which
- invokes buf_LRU_remove_block(). */
- ut_ad(!bpage->in_page_hash);
- ut_ad(!bpage->in_LRU_list);
- /* bpage->state was BUF_BLOCK_FILE_PAGE because
- b != NULL. The type cast below is thus valid. */
- ut_ad(!((buf_block_t*) bpage)->in_unzip_LRU_list);
-
- /* The fields of bpage were copied to b before
- buf_LRU_block_remove_hashed() was invoked. */
- ut_ad(!b->in_zip_hash);
- ut_ad(b->in_page_hash);
- ut_ad(b->in_LRU_list);
-
- HASH_INSERT(buf_page_t, hash,
- buf_pool->page_hash, fold, b);
-
- /* Insert b where bpage was in the LRU list. */
- if (UNIV_LIKELY(prev_b != NULL)) {
- ulint lru_len;
-
- ut_ad(prev_b->in_LRU_list);
- ut_ad(buf_page_in_file(prev_b));
- UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
- prev_b, b);
-
- incr_LRU_size_in_bytes(b, buf_pool);
-
- if (buf_page_is_old(b)) {
- buf_pool->LRU_old_len++;
- if (UNIV_UNLIKELY
- (buf_pool->LRU_old
- == UT_LIST_GET_NEXT(LRU, b))) {
-
- buf_pool->LRU_old = b;
- }
- }
-
- lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- if (lru_len > BUF_LRU_OLD_MIN_LEN) {
- ut_ad(buf_pool->LRU_old);
- /* Adjust the length of the
- old block list if necessary */
- buf_LRU_old_adjust_len(buf_pool);
- } else if (lru_len == BUF_LRU_OLD_MIN_LEN) {
- /* The LRU list is now long
- enough for LRU_old to become
- defined: init it */
- buf_LRU_old_init(buf_pool);
- }
-#ifdef UNIV_LRU_DEBUG
- /* Check that the "old" flag is consistent
- in the block and its neighbours. */
- buf_page_set_old(b, buf_page_is_old(b));
-#endif /* UNIV_LRU_DEBUG */
- } else {
- ut_d(b->in_LRU_list = FALSE);
- buf_LRU_add_block_low(b, buf_page_is_old(b));
- }
-
- mutex_enter(&buf_pool->zip_mutex);
- rw_lock_x_unlock(hash_lock);
- if (b->state == BUF_BLOCK_ZIP_PAGE) {
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- buf_LRU_insert_zip_clean(b);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- } else {
- /* Relocate on buf_pool->flush_list. */
- buf_flush_relocate_on_flush_list(bpage, b);
- }
-
- bpage->zip.data = NULL;
- page_zip_set_size(&bpage->zip, 0);
-
- /* Prevent buf_page_get_gen() from
- decompressing the block while we release block_mutex. */
- buf_page_set_sticky(b);
- mutex_exit(&buf_pool->zip_mutex);
- mutex_exit(block_mutex);
-
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- /* Remove possible adaptive hash index on the page.
- The page was declared uninitialized by
- buf_LRU_block_remove_hashed(). We need to flag
- the contents of the page valid (which it still is) in
- order to avoid bogus Valgrind warnings.*/
-
- UNIV_MEM_VALID(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
- btr_search_drop_page_hash_index((buf_block_t*) bpage);
- UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
-
- if (b) {
- ib_uint32_t checksum;
- /* Compute and stamp the compressed page
- checksum while not holding any mutex. The
- block is already half-freed
- (BUF_BLOCK_REMOVE_HASH) and removed from
- buf_pool->page_hash, thus inaccessible by any
- other thread. */
-
- checksum = static_cast<ib_uint32_t>(
- page_zip_calc_checksum(
- b->zip.data,
- page_zip_get_size(&b->zip),
- static_cast<srv_checksum_algorithm_t>(
- srv_checksum_algorithm)));
-
- mach_write_to_4(b->zip.data + FIL_PAGE_SPACE_OR_CHKSUM,
- checksum);
- }
-
- mutex_enter(block_mutex);
-
- if (b) {
- mutex_enter(&buf_pool->zip_mutex);
- buf_page_unset_sticky(b);
- mutex_exit(&buf_pool->zip_mutex);
- }
-
- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
- ut_ad(mutex_own(block_mutex));
- ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
- return(true);
-}
-
-/******************************************************************//**
-Puts a block back to the free list. */
-UNIV_INTERN
-void
-buf_LRU_block_free_non_file_page(
-/*=============================*/
- buf_block_t* block) /*!< in: block, must not contain a file page */
-{
- void* data;
- buf_pool_t* buf_pool = buf_pool_from_block(block);
-
- ut_ad(block);
- ut_ad(mutex_own(&block->mutex));
-
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_READY_FOR_USE:
- break;
- default:
- fprintf(stderr, "InnoDB: Error: Block %p incorrect state %s in buf_LRU_block_free_non_file_page()\n",
- block, buf_get_state_name(block));
- return; /* Continue */
- }
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ut_a(block->n_pointers == 0);
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- ut_ad(!block->page.in_free_list);
- ut_ad(!block->page.in_flush_list);
- ut_ad(!block->page.in_LRU_list);
-
- UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
-#ifdef UNIV_DEBUG
- /* Wipe contents of page to reveal possible stale pointers to it */
- memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#else
- /* Wipe page_no and space_id */
- memset(block->frame + FIL_PAGE_OFFSET, 0xfe, 4);
- memset(block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xfe, 4);
-#endif
- data = block->page.zip.data;
-
- if (data) {
- block->page.zip.data = NULL;
- mutex_exit(&block->mutex);
-
- buf_buddy_free(
- buf_pool, data, page_zip_get_size(&block->page.zip));
-
- mutex_enter(&block->mutex);
- page_zip_set_size(&block->page.zip, 0);
- }
-
- mutex_enter_first(&buf_pool->free_list_mutex);
- buf_block_set_state(block, BUF_BLOCK_NOT_USED);
- UT_LIST_ADD_FIRST(list, buf_pool->free, (&block->page));
- ut_d(block->page.in_free_list = TRUE);
- mutex_exit(&buf_pool->free_list_mutex);
-
- UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE);
-}
-
-/******************************************************************//**
-Takes a block out of the LRU list and page hash table.
-If the block is compressed-only (BUF_BLOCK_ZIP_PAGE),
-the object will be freed.
-
-The caller must hold buf_pool->LRU_list_mutex, the buf_page_get_mutex() mutex
-and the appropriate hash_lock. This function will release the
-buf_page_get_mutex() and the hash_lock.
-
-If a compressed page is freed other compressed pages may be relocated.
-@retval true if BUF_BLOCK_FILE_PAGE was removed from page_hash. The
-caller needs to free the page to the free list
-@retval false if BUF_BLOCK_ZIP_PAGE was removed from page_hash. In
-this case the block is already returned to the buddy allocator. */
-static
-bool
-buf_LRU_block_remove_hashed(
-/*========================*/
- buf_page_t* bpage, /*!< in: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
- bool zip) /*!< in: true if should remove also the
- compressed page of an uncompressed page */
-{
- ulint fold;
- const buf_page_t* hashed_bpage;
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- prio_rw_lock_t* hash_lock;
-
- ut_ad(bpage);
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- fold = buf_page_address_fold(bpage->space, bpage->offset);
- hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- ut_a(bpage->buf_fix_count == 0);
-
- buf_LRU_remove_block(bpage);
-
- buf_pool->freed_page_clock += 1;
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_FILE_PAGE:
- UNIV_MEM_ASSERT_W(bpage, sizeof(buf_block_t));
- UNIV_MEM_ASSERT_W(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
- buf_block_modify_clock_inc((buf_block_t*) bpage);
- if (bpage->zip.data) {
- const page_t* page = ((buf_block_t*) bpage)->frame;
- const ulint zip_size
- = page_zip_get_size(&bpage->zip);
-
- ut_a(!zip || bpage->oldest_modification == 0);
-
- switch (UNIV_EXPECT(fil_page_get_type(page),
- FIL_PAGE_INDEX)) {
- case FIL_PAGE_TYPE_ALLOCATED:
- case FIL_PAGE_INODE:
- case FIL_PAGE_IBUF_BITMAP:
- case FIL_PAGE_TYPE_FSP_HDR:
- case FIL_PAGE_TYPE_XDES:
- /* These are essentially uncompressed pages. */
- if (!zip) {
- /* InnoDB writes the data to the
- uncompressed page frame. Copy it
- to the compressed page, which will
- be preserved. */
- memcpy(bpage->zip.data, page,
- zip_size);
- }
- break;
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
- break;
- case FIL_PAGE_INDEX:
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(
- &bpage->zip, page,
- ((buf_block_t*) bpage)->index));
-#endif /* UNIV_ZIP_DEBUG */
- break;
- default:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ERROR: The compressed page"
- " to be evicted seems corrupt:", stderr);
- ut_print_buf(stderr, page, zip_size);
- fputs("\nInnoDB: Possibly older version"
- " of the page:", stderr);
- ut_print_buf(stderr, bpage->zip.data,
- zip_size);
- putc('\n', stderr);
- ut_error;
- }
-
- break;
- }
- /* fall through */
- case BUF_BLOCK_ZIP_PAGE:
- ut_a(bpage->oldest_modification == 0);
- UNIV_MEM_ASSERT_W(bpage->zip.data,
- page_zip_get_size(&bpage->zip));
- break;
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- }
-
- hashed_bpage = buf_page_hash_get_low(buf_pool, bpage->space,
- bpage->offset, fold);
-
- if (UNIV_UNLIKELY(bpage != hashed_bpage)) {
- fprintf(stderr,
- "InnoDB: Error: page %lu %lu not found"
- " in the hash table\n",
- (ulong) bpage->space,
- (ulong) bpage->offset);
-
-#ifdef UNIV_DEBUG
- fprintf(stderr,
- "InnoDB: in_page_hash %lu in_zip_hash %lu\n"
- " in_free_list %lu in_flush_list %lu in_LRU_list %lu\n"
- " zip.data %p zip_size %lu page_state %d\n",
- bpage->in_page_hash, bpage->in_zip_hash,
- bpage->in_free_list, bpage->in_flush_list,
- bpage->in_LRU_list, bpage->zip.data,
- buf_page_get_zip_size(bpage),
- buf_page_get_state(bpage));
-#else
- fprintf(stderr,
- "InnoDB: zip.data %p zip_size %lu page_state %d\n",
- bpage->zip.data,
- buf_page_get_zip_size(bpage),
- buf_page_get_state(bpage));
-#endif
-
- if (hashed_bpage) {
- fprintf(stderr,
- "InnoDB: In hash table we find block"
- " %p of %lu %lu which is not %p\n",
- (const void*) hashed_bpage,
- (ulong) hashed_bpage->space,
- (ulong) hashed_bpage->offset,
- (const void*) bpage);
- }
-
- ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- ut_a(bpage->buf_fix_count == 0);
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- mutex_exit(buf_page_get_mutex(bpage));
- rw_lock_x_unlock(hash_lock);
- mutex_exit(&buf_pool->LRU_list_mutex);
- buf_print();
- buf_LRU_print();
- buf_validate();
- buf_LRU_validate();
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- ut_error;
- }
-
- ut_ad(!bpage->in_zip_hash);
- ut_ad(bpage->in_page_hash);
- ut_d(bpage->in_page_hash = FALSE);
- HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_PAGE:
- ut_ad(!bpage->in_free_list);
- ut_ad(!bpage->in_flush_list);
- ut_ad(!bpage->in_LRU_list);
- ut_a(bpage->zip.data);
- ut_a(buf_page_get_zip_size(bpage));
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- UT_LIST_REMOVE(list, buf_pool->zip_clean, bpage);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
- mutex_exit(&buf_pool->zip_mutex);
- rw_lock_x_unlock(hash_lock);
-
- buf_buddy_free(
- buf_pool, bpage->zip.data,
- page_zip_get_size(&bpage->zip));
-
- buf_page_free_descriptor(bpage);
- return(false);
-
- case BUF_BLOCK_FILE_PAGE:
- memset(((buf_block_t*) bpage)->frame
- + FIL_PAGE_OFFSET, 0xff, 4);
- memset(((buf_block_t*) bpage)->frame
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0xff, 4);
- UNIV_MEM_INVALID(((buf_block_t*) bpage)->frame,
- UNIV_PAGE_SIZE);
- buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH);
-
- if (buf_pool->flush_rbt == NULL) {
- bpage->space = ULINT32_UNDEFINED;
- bpage->offset = ULINT32_UNDEFINED;
- }
-
- /* Question: If we release bpage and hash mutex here
- then what protects us against:
- 1) Some other thread buffer fixing this page
- 2) Some other thread trying to read this page and
- not finding it in buffer pool attempting to read it
- from the disk.
- Answer:
- 1) Cannot happen because the page is no longer in the
- page_hash. Only possibility is when while invalidating
- a tablespace we buffer fix the prev_page in LRU to
- avoid relocation during the scan. But that is not
- possible because we are holding LRU list mutex.
-
- 2) Not possible because in buf_page_init_for_read()
- we do a look up of page_hash while holding LRU list
- mutex and since we are holding LRU list mutex here
- and by the time we'll release it in the caller we'd
- have inserted the compressed only descriptor in the
- page_hash. */
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- rw_lock_x_unlock(hash_lock);
- mutex_exit(&((buf_block_t*) bpage)->mutex);
-
- if (zip && bpage->zip.data) {
- /* Free the compressed page. */
- void* data = bpage->zip.data;
- bpage->zip.data = NULL;
-
- ut_ad(!bpage->in_free_list);
- ut_ad(!bpage->in_flush_list);
- ut_ad(!bpage->in_LRU_list);
-
- buf_buddy_free(
- buf_pool, data,
- page_zip_get_size(&bpage->zip));
-
- page_zip_set_size(&bpage->zip, 0);
- }
-
- return(true);
-
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- break;
- }
-
- ut_error;
- return(false);
-}
-
-/******************************************************************//**
-Puts a file page whose has no hash index to the free list. */
-static
-void
-buf_LRU_block_free_hashed_page(
-/*===========================*/
- buf_block_t* block) /*!< in: block, must contain a file page and
- be in a state where it can be freed */
-{
- ut_ad(mutex_own(&block->mutex));
-
- buf_block_set_state(block, BUF_BLOCK_MEMORY);
-
- buf_LRU_block_free_non_file_page(block);
-}
-
-/******************************************************************//**
-Remove one page from LRU list and put it to free list */
-UNIV_INTERN
-void
-buf_LRU_free_one_page(
-/*==================*/
- buf_page_t* bpage) /*!< in/out: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
-{
-#if defined(UNIV_DEBUG) || defined(UNIV_SYNC_DEBUG)
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-#endif
-#ifdef UNIV_SYNC_DEBUG
- const ulint fold = buf_page_address_fold(bpage->space,
- bpage->offset);
- prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-#endif
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(mutex_own(block_mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX));
-#endif
-
- if (buf_LRU_block_remove_hashed(bpage, true)) {
- mutex_enter(block_mutex);
- buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
- mutex_exit(block_mutex);
- }
-
- /* buf_LRU_block_remove_hashed() releases hash_lock and block_mutex */
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
- && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!mutex_own(block_mutex));
-}
-
-/**********************************************************************//**
-Updates buf_pool->LRU_old_ratio for one buffer pool instance.
-@return updated old_pct */
-static
-uint
-buf_LRU_old_ratio_update_instance(
-/*==============================*/
- buf_pool_t* buf_pool,/*!< in: buffer pool instance */
- uint old_pct,/*!< in: Reserve this percentage of
- the buffer pool for "old" blocks. */
- ibool adjust) /*!< in: TRUE=adjust the LRU list;
- FALSE=just assign buf_pool->LRU_old_ratio
- during the initialization of InnoDB */
-{
- uint ratio;
-
- ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100;
- if (ratio < BUF_LRU_OLD_RATIO_MIN) {
- ratio = BUF_LRU_OLD_RATIO_MIN;
- } else if (ratio > BUF_LRU_OLD_RATIO_MAX) {
- ratio = BUF_LRU_OLD_RATIO_MAX;
- }
-
- if (adjust) {
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- if (ratio != buf_pool->LRU_old_ratio) {
- buf_pool->LRU_old_ratio = ratio;
-
- if (UT_LIST_GET_LEN(buf_pool->LRU)
- >= BUF_LRU_OLD_MIN_LEN) {
-
- buf_LRU_old_adjust_len(buf_pool);
- }
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
- } else {
- buf_pool->LRU_old_ratio = ratio;
- }
- /* the reverse of
- ratio = old_pct * BUF_LRU_OLD_RATIO_DIV / 100 */
- return((uint) (ratio * 100 / (double) BUF_LRU_OLD_RATIO_DIV + 0.5));
-}
-
-/**********************************************************************//**
-Updates buf_pool->LRU_old_ratio.
-@return updated old_pct */
-UNIV_INTERN
-ulint
-buf_LRU_old_ratio_update(
-/*=====================*/
- uint old_pct,/*!< in: Reserve this percentage of
- the buffer pool for "old" blocks. */
- ibool adjust) /*!< in: TRUE=adjust the LRU list;
- FALSE=just assign buf_pool->LRU_old_ratio
- during the initialization of InnoDB */
-{
- ulint i;
- ulint new_ratio = 0;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- new_ratio = buf_LRU_old_ratio_update_instance(
- buf_pool, old_pct, adjust);
- }
-
- return(new_ratio);
-}
-
-/********************************************************************//**
-Update the historical stats that we are collecting for LRU eviction
-policy at the end of each interval. */
-UNIV_INTERN
-void
-buf_LRU_stat_update(void)
-/*=====================*/
-{
- ulint i;
- buf_LRU_stat_t* item;
- buf_pool_t* buf_pool;
- ibool evict_started = FALSE;
- buf_LRU_stat_t cur_stat;
-
- /* If we haven't started eviction yet then don't update stats. */
- for (i = 0; i < srv_buf_pool_instances; i++) {
-
- buf_pool = buf_pool_from_array(i);
-
- if (buf_pool->freed_page_clock != 0) {
- evict_started = TRUE;
- break;
- }
- }
-
- if (!evict_started) {
- goto func_exit;
- }
-
- /* Update the index. */
- item = &buf_LRU_stat_arr[buf_LRU_stat_arr_ind];
- buf_LRU_stat_arr_ind++;
- buf_LRU_stat_arr_ind %= BUF_LRU_STAT_N_INTERVAL;
-
- /* Add the current value and subtract the obsolete entry.
- Since buf_LRU_stat_cur is not protected by any mutex,
- it can be changing between adding to buf_LRU_stat_sum
- and copying to item. Assign it to local variables to make
- sure the same value assign to the buf_LRU_stat_sum
- and item */
- cur_stat = buf_LRU_stat_cur;
-
- buf_LRU_stat_sum.io += cur_stat.io - item->io;
- buf_LRU_stat_sum.unzip += cur_stat.unzip - item->unzip;
-
- /* Put current entry in the array. */
- memcpy(item, &cur_stat, sizeof *item);
-
-func_exit:
- /* Clear the current entry. */
- memset(&buf_LRU_stat_cur, 0, sizeof buf_LRU_stat_cur);
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Validates the LRU list for one buffer pool instance. */
-static
-void
-buf_LRU_validate_instance(
-/*======================*/
- buf_pool_t* buf_pool)
-{
- buf_page_t* bpage;
- buf_block_t* block;
- ulint old_len;
- ulint new_len;
-
- ut_ad(buf_pool);
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
-
- ut_a(buf_pool->LRU_old);
- old_len = buf_pool->LRU_old_len;
- new_len = ut_min(UT_LIST_GET_LEN(buf_pool->LRU)
- * buf_pool->LRU_old_ratio
- / BUF_LRU_OLD_RATIO_DIV,
- UT_LIST_GET_LEN(buf_pool->LRU)
- - (BUF_LRU_OLD_TOLERANCE
- + BUF_LRU_NON_OLD_MIN_LEN));
- ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE);
- ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE);
- }
-
- UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU, CheckInLRUList());
-
- old_len = 0;
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
- bpage != NULL;
- bpage = UT_LIST_GET_NEXT(LRU, bpage)) {
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- ut_error;
- break;
- case BUF_BLOCK_FILE_PAGE:
- ut_ad(((buf_block_t*) bpage)->in_unzip_LRU_list
- == buf_page_belongs_to_unzip_LRU(bpage));
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- break;
- }
-
- if (buf_page_is_old(bpage)) {
- const buf_page_t* prev
- = UT_LIST_GET_PREV(LRU, bpage);
- const buf_page_t* next
- = UT_LIST_GET_NEXT(LRU, bpage);
-
- if (!old_len++) {
- ut_a(buf_pool->LRU_old == bpage);
- } else {
- ut_a(!prev || buf_page_is_old(prev));
- }
-
- ut_a(!next || buf_page_is_old(next));
- }
- }
-
- ut_a(buf_pool->LRU_old_len == old_len);
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- mutex_enter(&buf_pool->free_list_mutex);
-
- UT_LIST_VALIDATE(list, buf_page_t, buf_pool->free, CheckInFreeList());
-
- for (bpage = UT_LIST_GET_FIRST(buf_pool->free);
- bpage != NULL;
- bpage = UT_LIST_GET_NEXT(list, bpage)) {
-
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_NOT_USED);
- }
-
- mutex_exit(&buf_pool->free_list_mutex);
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- UT_LIST_VALIDATE(
- unzip_LRU, buf_block_t, buf_pool->unzip_LRU,
- CheckUnzipLRUAndLRUList());
-
- for (block = UT_LIST_GET_FIRST(buf_pool->unzip_LRU);
- block;
- block = UT_LIST_GET_NEXT(unzip_LRU, block)) {
-
- ut_ad(block->in_unzip_LRU_list);
- ut_ad(block->page.in_LRU_list);
- ut_a(buf_page_belongs_to_unzip_LRU(&block->page));
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-}
-
-/**********************************************************************//**
-Validates the LRU list.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_LRU_validate(void)
-/*==================*/
-{
- ulint i;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
- buf_LRU_validate_instance(buf_pool);
- }
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Prints the LRU list for one buffer pool instance. */
-UNIV_INTERN
-void
-buf_LRU_print_instance(
-/*===================*/
- buf_pool_t* buf_pool)
-{
- const buf_page_t* bpage;
-
- ut_ad(buf_pool);
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
-
- while (bpage != NULL) {
-
- mutex_enter(buf_page_get_mutex(bpage));
- fprintf(stderr, "BLOCK space %lu page %lu ",
- (ulong) buf_page_get_space(bpage),
- (ulong) buf_page_get_page_no(bpage));
-
- if (buf_page_is_old(bpage)) {
- fputs("old ", stderr);
- }
-
- if (bpage->buf_fix_count) {
- fprintf(stderr, "buffix count %lu ",
- (ulong) bpage->buf_fix_count);
- }
-
- if (buf_page_get_io_fix(bpage)) {
- fprintf(stderr, "io_fix %lu ",
- (ulong) buf_page_get_io_fix(bpage));
- }
-
- if (bpage->oldest_modification) {
- fputs("modif. ", stderr);
- }
-
- switch (buf_page_get_state(bpage)) {
- const byte* frame;
- case BUF_BLOCK_FILE_PAGE:
- frame = buf_block_get_frame((buf_block_t*) bpage);
- fprintf(stderr, "\ntype %lu"
- " index id %llu\n",
- (ulong) fil_page_get_type(frame),
- (ullint) btr_page_get_index_id(frame));
- break;
- case BUF_BLOCK_ZIP_PAGE:
- frame = bpage->zip.data;
- fprintf(stderr, "\ntype %lu size %lu"
- " index id %llu\n",
- (ulong) fil_page_get_type(frame),
- (ulong) buf_page_get_zip_size(bpage),
- (ullint) btr_page_get_index_id(frame));
- break;
-
- default:
- fprintf(stderr, "\n!state %lu!\n",
- (ulong) buf_page_get_state(bpage));
- break;
- }
-
- mutex_exit(buf_page_get_mutex(bpage));
- bpage = UT_LIST_GET_NEXT(LRU, bpage);
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-}
-
-/**********************************************************************//**
-Prints the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_print(void)
-/*===============*/
-{
- ulint i;
- buf_pool_t* buf_pool;
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool = buf_pool_from_array(i);
- buf_LRU_print_instance(buf_pool);
- }
-}
-#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
deleted file mode 100644
index f90b1e46c1e..00000000000
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ /dev/null
@@ -1,756 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved.
-Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file buf/buf0mtflu.cc
-Multi-threaded flush method implementation
-
-Created 06/11/2013 Dhananjoy Das DDas@fusionio.com
-Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com
-Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com
-Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
-***********************************************************************/
-
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0mtflu.h"
-#include "buf0checksum.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "page0zip.h"
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "page0page.h"
-#include "fil0fil.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-#include "os0file.h"
-#include "os0sync.h"
-#include "trx0sys.h"
-#include "srv0mon.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
-#include "fil0pagecompress.h"
-
-#define MT_COMP_WATER_MARK 50
-/** Time to wait for a message. */
-#define MT_WAIT_IN_USECS 5000000
-
-/* Work item status */
-typedef enum wrk_status {
- WRK_ITEM_UNSET=0, /*!< Work item is not set */
- WRK_ITEM_START=1, /*!< Processing of work item has started */
- WRK_ITEM_DONE=2, /*!< Processing is done usually set to
- SUCCESS/FAILED */
- WRK_ITEM_SUCCESS=2, /*!< Work item successfully processed */
- WRK_ITEM_FAILED=3, /*!< Work item process failed */
- WRK_ITEM_EXIT=4, /*!< Exiting */
- WRK_ITEM_SET=5, /*!< Work item is set */
- WRK_ITEM_STATUS_UNDEFINED
-} wrk_status_t;
-
-/* Work item task type */
-typedef enum mt_wrk_tsk {
- MT_WRK_NONE=0, /*!< Exit queue-wait */
- MT_WRK_WRITE=1, /*!< Flush operation */
- MT_WRK_READ=2, /*!< Read operation */
- MT_WRK_UNDEFINED
-} mt_wrk_tsk_t;
-
-/* Work thread status */
-typedef enum wthr_status {
- WTHR_NOT_INIT=0, /*!< Work thread not initialized */
- WTHR_INITIALIZED=1, /*!< Work thread initialized */
- WTHR_SIG_WAITING=2, /*!< Work thread wating signal */
- WTHR_RUNNING=3, /*!< Work thread running */
- WTHR_NO_WORK=4, /*!< Work thread has no work */
- WTHR_KILL_IT=5, /*!< Work thread should exit */
- WTHR_STATUS_UNDEFINED
-} wthr_status_t;
-
-/* Write work task */
-typedef struct wr_tsk {
- buf_pool_t *buf_pool; /*!< buffer-pool instance */
- buf_flush_t flush_type; /*!< flush-type for buffer-pool
- flush operation */
- ulint min; /*!< minimum number of pages
- requested to be flushed */
- lsn_t lsn_limit; /*!< lsn limit for the buffer-pool
- flush operation */
-} wr_tsk_t;
-
-/* Read work task */
-typedef struct rd_tsk {
- buf_pool_t *page_pool; /*!< list of pages to decompress; */
-} rd_tsk_t;
-
-/* Work item */
-typedef struct wrk_itm
-{
- mt_wrk_tsk_t tsk; /*!< Task type. Based on task-type
- one of the entries wr_tsk/rd_tsk
- will be used */
- wr_tsk_t wr; /*!< Flush page list */
- rd_tsk_t rd; /*!< Decompress page list */
- ulint n_flushed; /*!< Number of flushed pages */
- ulint n_evicted; /*!< Number of evicted pages */
- os_thread_id_t id_usr; /*!< Thread-id currently working */
- wrk_status_t wi_status; /*!< Work item status */
- mem_heap_t *wheap; /*!< Heap were to allocate memory
- for queue nodes */
- mem_heap_t *rheap;
-} wrk_t;
-
-typedef struct thread_data
-{
- os_thread_id_t wthread_id; /*!< Identifier */
- os_thread_t wthread; /*!< Thread id */
- wthr_status_t wt_status; /*!< Worker thread status */
-} thread_data_t;
-
-/* Thread syncronization data */
-typedef struct thread_sync
-{
- /* Global variables used by all threads */
- os_fast_mutex_t thread_global_mtx; /*!< Mutex used protecting below
- variables */
- ulint n_threads; /*!< Number of threads */
- ib_wqueue_t *wq; /*!< Work Queue */
- ib_wqueue_t *wr_cq; /*!< Write Completion Queue */
- ib_wqueue_t *rd_cq; /*!< Read Completion Queue */
- mem_heap_t* wheap; /*!< Work heap where memory
- is allocated */
- mem_heap_t* rheap; /*!< Work heap where memory
- is allocated */
- wthr_status_t gwt_status; /*!< Global thread status */
-
- /* Variables used by only one thread at a time */
- thread_data_t* thread_data; /*!< Thread specific data */
-
-} thread_sync_t;
-
-static int mtflush_work_initialized = -1;
-static thread_sync_t* mtflush_ctx=NULL;
-static os_fast_mutex_t mtflush_mtx;
-
-/******************************************************************//**
-Set multi-threaded flush work initialized. */
-static inline
-void
-buf_mtflu_work_init(void)
-/*=====================*/
-{
- mtflush_work_initialized = 1;
-}
-
-/******************************************************************//**
-Return true if multi-threaded flush is initialized
-@return true if initialized */
-bool
-buf_mtflu_init_done(void)
-/*=====================*/
-{
- return(mtflush_work_initialized == 1);
-}
-
-/******************************************************************//**
-Fush buffer pool instance.
-@return number of flushed pages, or 0 if error happened
-*/
-static
-ulint
-buf_mtflu_flush_pool_instance(
-/*==========================*/
- wrk_t *work_item) /*!< inout: work item to be flushed */
-{
- flush_counters_t n;
- ut_a(work_item != NULL);
- ut_a(work_item->wr.buf_pool != NULL);
-
- if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) {
- /* We have two choices here. If lsn_limit was
- specified then skipping an instance of buffer
- pool means we cannot guarantee that all pages
- up to lsn_limit has been flushed. We can
- return right now with failure or we can try
- to flush remaining buffer pools up to the
- lsn_limit. We attempt to flush other buffer
- pools based on the assumption that it will
- help in the retry which will follow the
- failure. */
-#ifdef UNIV_MTFLUSH_DEBUG
- fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
-#endif
- return 0;
- }
-
- memset(&n, 0, sizeof(flush_counters_t));
-
- if (work_item->wr.flush_type == BUF_FLUSH_LRU) {
- /* srv_LRU_scan_depth can be arbitrarily large value.
- * We cap it with current LRU size.
- */
- buf_pool_mutex_enter(work_item->wr.buf_pool);
- work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU);
- buf_pool_mutex_exit(work_item->wr.buf_pool);
- work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min);
- }
-
- buf_flush_batch(work_item->wr.buf_pool,
- work_item->wr.flush_type,
- work_item->wr.min,
- work_item->wr.lsn_limit,
- false,
- &n);
-
- work_item->n_flushed = n.flushed;
- buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
- buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
-
- return work_item->n_flushed;
-}
-
-/******************************************************************//**
-Worker function to wait for work items and processing them and
-sending reply back.
-*/
-static
-void
-mtflush_service_io(
-/*===============*/
- thread_sync_t* mtflush_io, /*!< inout: multi-threaded flush
- syncronization data */
- thread_data_t* thread_data) /* Thread status data */
-{
- wrk_t *work_item = NULL;
- ulint n_flushed=0;
-
- ut_a(mtflush_io != NULL);
- ut_a(thread_data != NULL);
-
- thread_data->wt_status = WTHR_SIG_WAITING;
-
- work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
-
- if (work_item == NULL) {
- work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
- }
-
- if (work_item) {
- thread_data->wt_status = WTHR_RUNNING;
- } else {
- /* Thread did not get any work */
- thread_data->wt_status = WTHR_NO_WORK;
- return;
- }
-
- if (work_item->wi_status != WRK_ITEM_EXIT) {
- work_item->wi_status = WRK_ITEM_SET;
- }
-
-#ifdef UNIV_MTFLUSH_DEBUG
- ut_a(work_item->id_usr == 0);
-#endif
- work_item->id_usr = os_thread_get_curr_id();
-
- /* This works as a producer/consumer model, where in tasks are
- * inserted into the work-queue (wq) and completions are based
- * on the type of operations performed and as a result the WRITE/
- * compression/flush operation completions get posted to wr_cq.
- * And READ/decompress operations completions get posted to rd_cq.
- * in future we may have others.
- */
-
- switch(work_item->tsk) {
- case MT_WRK_NONE:
- ut_a(work_item->wi_status == WRK_ITEM_EXIT);
- work_item->wi_status = WRK_ITEM_EXIT;
- ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
- thread_data->wt_status = WTHR_KILL_IT;
- break;
-
- case MT_WRK_WRITE:
- ut_a(work_item->wi_status == WRK_ITEM_SET);
- work_item->wi_status = WRK_ITEM_START;
- /* Process work item */
- if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
- work_item->wi_status = WRK_ITEM_FAILED;
- }
- work_item->wi_status = WRK_ITEM_SUCCESS;
- ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
- break;
-
- case MT_WRK_READ:
- ut_a(0);
- break;
-
- default:
- /* None other than Write/Read handling planned */
- ut_a(0);
- break;
- }
-}
-
-/******************************************************************//**
-Thead used to flush dirty pages when multi-threaded flush is
-used.
-@return a dummy parameter*/
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(mtflush_io_thread)(
-/*==============================*/
- void * arg)
-{
- thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
- thread_data_t *this_thread_data = NULL;
- ulint i;
-
- /* Find correct slot for this thread */
- os_fast_mutex_lock(&(mtflush_io->thread_global_mtx));
- for(i=0; i < mtflush_io->n_threads; i ++) {
- if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) {
- break;
- }
- }
-
- ut_a(i <= mtflush_io->n_threads);
- this_thread_data = &mtflush_io->thread_data[i];
- os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx));
-
- while (TRUE) {
-
-#ifdef UNIV_MTFLUSH_DEBUG
- fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
- os_thread_get_curr_id(),
- ib_wqueue_len(mtflush_io->wq),
- ib_wqueue_len(mtflush_io->wr_cq));
-#endif /* UNIV_MTFLUSH_DEBUG */
-
- mtflush_service_io(mtflush_io, this_thread_data);
-
-
- if (this_thread_data->wt_status == WTHR_KILL_IT) {
- break;
- }
- }
-
- os_thread_exit(NULL);
- OS_THREAD_DUMMY_RETURN;
-}
-
-/******************************************************************//**
-Add exit work item to work queue to signal multi-threded flush
-threads that they should exit.
-*/
-void
-buf_mtflu_io_thread_exit(void)
-/*==========================*/
-{
- ulint i;
- thread_sync_t* mtflush_io = mtflush_ctx;
- wrk_t* work_item = NULL;
-
- ut_a(mtflush_io != NULL);
-
- /* Allocate work items for shutdown message */
- work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
-
- /* Confirm if the io-thread KILL is in progress, bailout */
- if (mtflush_io->gwt_status == WTHR_KILL_IT) {
- return;
- }
-
- mtflush_io->gwt_status = WTHR_KILL_IT;
-
- /* This lock is to safequard against timing bug: flush request take
- this mutex before sending work items to be processed by flush
- threads. Inside flush thread we assume that work queue contains only
- a constant number of items. Thus, we may not install new work items
- below before all previous ones are processed. This mutex is released
- by flush request after all work items sent to flush threads have
- been processed. Thus, we can get this mutex if and only if work
- queue is empty. */
-
- os_fast_mutex_lock(&mtflush_mtx);
-
- /* Make sure the work queue is empty */
- ut_a(ib_wqueue_is_empty(mtflush_io->wq));
-
- /* Send one exit work item/thread */
- for (i=0; i < (ulint)srv_mtflush_threads; i++) {
- work_item[i].tsk = MT_WRK_NONE;
- work_item[i].wi_status = WRK_ITEM_EXIT;
- work_item[i].wheap = mtflush_io->wheap;
- work_item[i].rheap = mtflush_io->rheap;
- work_item[i].id_usr = 0;
-
- ib_wqueue_add(mtflush_io->wq,
- (void *)&(work_item[i]),
- mtflush_io->wheap);
- }
-
- /* Requests sent */
- os_fast_mutex_unlock(&mtflush_mtx);
-
- /* Wait until all work items on a work queue are processed */
- while(!ib_wqueue_is_empty(mtflush_io->wq)) {
- /* Wait */
- os_thread_sleep(MT_WAIT_IN_USECS);
- }
-
- ut_a(ib_wqueue_is_empty(mtflush_io->wq));
-
- /* Collect all work done items */
- for (i=0; i < (ulint)srv_mtflush_threads;) {
- wrk_t* work_item = NULL;
-
- work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS);
-
- /* If we receive reply to work item and it's status is exit,
- thead has processed this message and existed */
- if (work_item && work_item->wi_status == WRK_ITEM_EXIT) {
- i++;
- }
- }
-
- /* Wait about 1/2 sec to allow threads really exit */
- os_thread_sleep(MT_WAIT_IN_USECS);
-
- /* Make sure that work queue is empty */
- while(!ib_wqueue_is_empty(mtflush_io->wq))
- {
- ib_wqueue_nowait(mtflush_io->wq);
- }
-
- os_fast_mutex_lock(&mtflush_mtx);
-
- ut_a(ib_wqueue_is_empty(mtflush_io->wq));
- ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
- ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq));
-
- /* Free all queues */
- ib_wqueue_free(mtflush_io->wq);
- ib_wqueue_free(mtflush_io->wr_cq);
- ib_wqueue_free(mtflush_io->rd_cq);
-
- mtflush_io->wq = NULL;
- mtflush_io->wr_cq = NULL;
- mtflush_io->rd_cq = NULL;
- mtflush_work_initialized = 0;
-
- /* Free heap */
- mem_heap_free(mtflush_io->wheap);
- mem_heap_free(mtflush_io->rheap);
-
- os_fast_mutex_unlock(&mtflush_mtx);
- os_fast_mutex_free(&mtflush_mtx);
- os_fast_mutex_free(&mtflush_io->thread_global_mtx);
-}
-
-/******************************************************************//**
-Initialize multi-threaded flush thread syncronization data.
-@return Initialized multi-threaded flush thread syncroniztion data. */
-void*
-buf_mtflu_handler_init(
-/*===================*/
- ulint n_threads, /*!< in: Number of threads to create */
- ulint wrk_cnt) /*!< in: Number of work items */
-{
- ulint i;
- mem_heap_t* mtflush_heap;
- mem_heap_t* mtflush_heap2;
-
- /* Create heap, work queue, write completion queue, read
- completion queue for multi-threaded flush, and init
- handler. */
- mtflush_heap = mem_heap_create(0);
- ut_a(mtflush_heap != NULL);
- mtflush_heap2 = mem_heap_create(0);
- ut_a(mtflush_heap2 != NULL);
-
- mtflush_ctx = (thread_sync_t *)mem_heap_zalloc(mtflush_heap,
- sizeof(thread_sync_t));
-
- ut_a(mtflush_ctx != NULL);
- mtflush_ctx->thread_data = (thread_data_t*)mem_heap_zalloc(
- mtflush_heap, sizeof(thread_data_t) * n_threads);
- ut_a(mtflush_ctx->thread_data);
-
- mtflush_ctx->n_threads = n_threads;
- mtflush_ctx->wq = ib_wqueue_create();
- ut_a(mtflush_ctx->wq);
- mtflush_ctx->wr_cq = ib_wqueue_create();
- ut_a(mtflush_ctx->wr_cq);
- mtflush_ctx->rd_cq = ib_wqueue_create();
- ut_a(mtflush_ctx->rd_cq);
- mtflush_ctx->wheap = mtflush_heap;
- mtflush_ctx->rheap = mtflush_heap2;
-
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx);
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
-
- /* Create threads for page-compression-flush */
- for(i=0; i < n_threads; i++) {
- os_thread_id_t new_thread_id;
-
- mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED;
-
- mtflush_ctx->thread_data[i].wthread = os_thread_create(
- mtflush_io_thread,
- ((void *) mtflush_ctx),
- &new_thread_id);
-
- mtflush_ctx->thread_data[i].wthread_id = new_thread_id;
- }
-
- buf_mtflu_work_init();
-
- return((void *)mtflush_ctx);
-}
-
-/******************************************************************//**
-Flush buffer pool instances.
-@return number of pages flushed. */
-ulint
-buf_mtflu_flush_work_items(
-/*=======================*/
- ulint buf_pool_inst, /*!< in: Number of buffer pool instances */
- flush_counters_t *per_pool_cnt, /*!< out: Number of pages
- flushed or evicted /instance */
- buf_flush_t flush_type, /*!< in: Type of flush */
- ulint min_n, /*!< in: Wished minimum number of
- blocks to be flushed */
- lsn_t lsn_limit) /*!< in: All blocks whose
- oldest_modification is smaller than
- this should be flushed (if their
- number does not exceed min_n) */
-{
- ulint n_flushed=0, i;
- mem_heap_t* work_heap;
- mem_heap_t* reply_heap;
- wrk_t work_item[MTFLUSH_MAX_WORKER];
-
- if (mtflush_ctx->gwt_status == WTHR_KILL_IT) {
- return 0;
- }
-
- /* Allocate heap where all work items used and queue
- node items areallocated */
- work_heap = mem_heap_create(0);
- reply_heap = mem_heap_create(0);
-
-
- for(i=0;i<buf_pool_inst; i++) {
- work_item[i].tsk = MT_WRK_WRITE;
- work_item[i].wr.buf_pool = buf_pool_from_array(i);
- work_item[i].wr.flush_type = flush_type;
- work_item[i].wr.min = min_n;
- work_item[i].wr.lsn_limit = lsn_limit;
- work_item[i].wi_status = WRK_ITEM_UNSET;
- work_item[i].wheap = work_heap;
- work_item[i].rheap = reply_heap;
- work_item[i].n_flushed = 0;
- work_item[i].n_evicted = 0;
- work_item[i].id_usr = 0;
-
- ib_wqueue_add(mtflush_ctx->wq,
- (void *)(work_item + i),
- work_heap);
- }
-
- /* wait on the completion to arrive */
- for(i=0; i< buf_pool_inst;) {
- wrk_t *done_wi = NULL;
- done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
-
- if (done_wi != NULL) {
- per_pool_cnt[i].flushed = done_wi->n_flushed;
- per_pool_cnt[i].evicted = done_wi->n_evicted;
-
-#ifdef UNIV_MTFLUSH_DEBUG
- if((int)done_wi->id_usr == 0 &&
- (done_wi->wi_status == WRK_ITEM_SET ||
- done_wi->wi_status == WRK_ITEM_UNSET)) {
- fprintf(stderr,
- "**Set/Unused work_item[%lu] flush_type=%d\n",
- i,
- done_wi->wr.flush_type);
- ut_a(0);
- }
-#endif
-
- n_flushed+= done_wi->n_flushed+done_wi->n_evicted;
- i++;
- }
- }
-
- /* Release used work_items and queue nodes */
- mem_heap_free(work_heap);
- mem_heap_free(reply_heap);
-
- return(n_flushed);
-}
-
-/*******************************************************************//**
-Multi-threaded version of buf_flush_list
-*/
-bool
-buf_mtflu_flush_list(
-/*=================*/
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- ulint* n_processed) /*!< out: the number of pages
- which were processed is passed
- back to caller. Ignored if NULL */
-
-{
- ulint i;
- bool success = true;
- flush_counters_t cnt[MTFLUSH_MAX_WORKER];
-
- if (n_processed) {
- *n_processed = 0;
- }
-
- if (min_n != ULINT_MAX) {
- /* Ensure that flushing is spread evenly amongst the
- buffer pool instances. When min_n is ULINT_MAX
- we need to flush everything up to the lsn limit
- so no limit here. */
- min_n = (min_n + srv_buf_pool_instances - 1)
- / srv_buf_pool_instances;
- }
-
- /* This lock is to safequard against re-entry if any. */
- os_fast_mutex_lock(&mtflush_mtx);
- buf_mtflu_flush_work_items(srv_buf_pool_instances,
- cnt, BUF_FLUSH_LIST,
- min_n, lsn_limit);
- os_fast_mutex_unlock(&mtflush_mtx);
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- if (n_processed) {
- *n_processed += cnt[i].flushed+cnt[i].evicted;
- }
-
- if (cnt[i].flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_BATCH_TOTAL_PAGE,
- MONITOR_FLUSH_BATCH_COUNT,
- MONITOR_FLUSH_BATCH_PAGES,
- cnt[i].flushed);
- }
-
- if(cnt[i].evicted) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_BATCH_EVICT_COUNT,
- MONITOR_LRU_BATCH_EVICT_PAGES,
- cnt[i].evicted);
- }
- }
-#ifdef UNIV_MTFLUSH_DEBUG
- fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n",
- __FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed);
-#endif
- return(success);
-}
-
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-buf_mtflu_flush_LRU_tail(void)
-/*==========================*/
-{
- ulint total_flushed=0, i;
- flush_counters_t cnt[MTFLUSH_MAX_WORKER];
-
- ut_a(buf_mtflu_init_done());
-
- /* At shutdown do not send requests anymore */
- if (!mtflush_ctx || mtflush_ctx->gwt_status == WTHR_KILL_IT) {
- return (total_flushed);
- }
-
- /* This lock is to safeguard against re-entry if any */
- os_fast_mutex_lock(&mtflush_mtx);
- buf_mtflu_flush_work_items(srv_buf_pool_instances,
- cnt, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
- os_fast_mutex_unlock(&mtflush_mtx);
-
- for (i = 0; i < srv_buf_pool_instances; i++) {
- total_flushed += cnt[i].flushed+cnt[i].evicted;
-
- if (cnt[i].flushed) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
- MONITOR_LRU_BATCH_FLUSH_COUNT,
- MONITOR_LRU_BATCH_FLUSH_PAGES,
- cnt[i].flushed);
- }
- if(cnt[i].evicted) {
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_BATCH_EVICT_COUNT,
- MONITOR_LRU_BATCH_EVICT_PAGES,
- cnt[i].evicted);
- }
- }
-
-#if UNIV_MTFLUSH_DEBUG
- fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", (
- srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed);
-#endif
-
- return(total_flushed);
-}
-
-/*********************************************************************//**
-Set correct thread identifiers to io thread array based on
-information we have. */
-void
-buf_mtflu_set_thread_ids(
-/*=====================*/
- ulint n_threads, /*!<in: Number of threads to fill */
- void* ctx, /*!<in: thread context */
- os_thread_id_t* thread_ids) /*!<in: thread id array */
-{
- thread_sync_t *mtflush_io = ((thread_sync_t *)ctx);
- ulint i;
- ut_a(mtflush_io != NULL);
- ut_a(thread_ids != NULL);
-
- for(i = 0; i < n_threads; i++) {
- thread_ids[i] = mtflush_io->thread_data[i].wthread_id;
- }
-}
diff --git a/storage/xtradb/buf/buf0rea.cc b/storage/xtradb/buf/buf0rea.cc
deleted file mode 100644
index b2b737b8d40..00000000000
--- a/storage/xtradb/buf/buf0rea.cc
+++ /dev/null
@@ -1,1125 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0rea.cc
-The database buffer read
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "buf0rea.h"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "buf0dblwr.h"
-#include "ibuf0ibuf.h"
-#include "log0recv.h"
-#include "trx0sys.h"
-#include "os0file.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
-
-/** There must be at least this many pages in buf_pool in the area to start
-a random read-ahead */
-#define BUF_READ_AHEAD_RANDOM_THRESHOLD(b) \
- (5 + BUF_READ_AHEAD_AREA(b) / 8)
-
-/** If there are buf_pool->curr_size per the number below pending reads, then
-read-ahead is not done: this is to prevent flooding the buffer pool with
-i/o-fixed buffer blocks */
-#define BUF_READ_AHEAD_PEND_LIMIT 2
-
-/********************************************************************//**
-Unfixes the pages, unlatches the page,
-removes it from page_hash and removes it from LRU. */
-static
-void
-buf_read_page_handle_error(
-/*=======================*/
- buf_page_t* bpage) /*!< in: pointer to the block */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- const bool uncompressed = (buf_page_get_state(bpage)
- == BUF_BLOCK_FILE_PAGE);
- const ulint fold = buf_page_address_fold(bpage->space,
- bpage->offset);
- prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold);
-
- mutex_enter(&buf_pool->LRU_list_mutex);
- rw_lock_x_lock(hash_lock);
- mutex_enter(buf_page_get_mutex(bpage));
-
- /* First unfix and release lock on the bpage */
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
- ut_ad(bpage->buf_fix_count == 0);
-
- /* Set BUF_IO_NONE before we remove the block from LRU list */
- buf_page_set_io_fix(bpage, BUF_IO_NONE);
-
- if (uncompressed) {
- rw_lock_x_unlock_gen(
- &((buf_block_t*) bpage)->lock,
- BUF_IO_READ);
- }
-
- /* remove the block from LRU list */
- buf_LRU_free_one_page(bpage);
-
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- ut_ad(buf_pool->n_pend_reads > 0);
- os_atomic_decrement_ulint(&buf_pool->n_pend_reads, 1);
-}
-
-/********************************************************************//**
-Low-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there, in which case does nothing.
-Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
-flag is cleared and the x-lock released by an i/o-handler thread.
-@return 1 if a read request was queued, 0 if the page already resided
-in buf_pool, or if the page is in the doublewrite buffer blocks in
-which case it is never read into the pool, or if the tablespace does
-not exist or is being dropped
-
-@param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED if we are
- trying to read from a non-existent tablespace, or a
- tablespace which is just now being dropped,
- DB_PAGE_CORRUPTED if page based on checksum
- check is corrupted, or DB_DECRYPTION_FAILED
- if page post encryption checksum matches but
- after decryption normal page checksum does not match.
-@param[in] sync true if synchronous aio is desired
-@param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
- ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
- at read-ahead functions)
-@param[in] space space id
-@param[in] zip_size compressed page size, or 0
-@param[in] unzip TRUE=request uncompressed page
-@param[in] tablespace_version if the space memory object has
- this timestamp different from what we are giving here,
- treat the tablespace as dropped; this is a timestamp we
- use to stop dangling page reads from a tablespace
- which we have DISCARDed + IMPORTed back
-@param[in] offset page number
-@param[in] trx transaction
-@return 1 if read request is issued. 0 if it is not */
-static
-ulint
-buf_read_page_low(
- dberr_t* err,
- bool sync,
- ulint mode,
- ulint space,
- ulint zip_size,
- ibool unzip,
- ib_int64_t tablespace_version,
- ulint offset,
- trx_t* trx = NULL)
-{
- buf_page_t* bpage;
- ulint wake_later;
- ibool ignore_nonexistent_pages;
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
-
- ignore_nonexistent_pages = mode & BUF_READ_IGNORE_NONEXISTENT_PAGES;
- mode &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
-
- if (space == TRX_SYS_SPACE && buf_dblwr_page_inside(offset)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: trying to read"
- " doublewrite buffer page %lu\n",
- (ulong) offset);
-
- return(0);
- }
-
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
-
- /* Trx sys header is so low in the latching order that we play
- safe and do not leave the i/o-completion to an asynchronous
- i/o-thread. Ibuf bitmap pages must always be read with
- syncronous i/o, to make sure they do not get involved in
- thread deadlocks. */
-
- sync = true;
- }
-
- /* The following call will also check if the tablespace does not exist
- or is being dropped; if we succeed in initing the page in the buffer
- pool for read, then DISCARD cannot proceed until the read has
- completed */
- bpage = buf_page_init_for_read(err, mode, space, zip_size, unzip,
- tablespace_version, offset);
- if (bpage == NULL) {
- /* bugfix: http://bugs.mysql.com/bug.php?id=43948 */
- if (recv_recovery_is_on() && *err == DB_TABLESPACE_DELETED) {
- /* hashed log recs must be treated here */
- recv_addr_t* recv_addr;
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_sys->apply_log_recs == FALSE) {
- mutex_exit(&(recv_sys->mutex));
- goto not_to_recover;
- }
-
- /* recv_get_fil_addr_struct() */
- recv_addr = (recv_addr_t*)HASH_GET_FIRST(recv_sys->addr_hash,
- hash_calc_hash(ut_fold_ulint_pair(space, offset),
- recv_sys->addr_hash));
- while (recv_addr) {
- if ((recv_addr->space == space)
- && (recv_addr->page_no == offset)) {
- break;
- }
- recv_addr = (recv_addr_t*)HASH_GET_NEXT(addr_hash, recv_addr);
- }
-
- if ((recv_addr == NULL)
- || (recv_addr->state == RECV_BEING_PROCESSED)
- || (recv_addr->state == RECV_PROCESSED)) {
- mutex_exit(&(recv_sys->mutex));
- goto not_to_recover;
- }
-
- fprintf(stderr, " (cannot find space: %lu)", space);
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
-
- mutex_exit(&(recv_sys->mutex));
- }
-not_to_recover:
-
- return(0);
- }
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Posting read request for page %lu, sync %s\n",
- (ulong) offset, sync ? "true" : "false");
- }
-#endif
-
- ut_ad(buf_page_in_file(bpage));
- ut_ad(!mutex_own(&buf_pool_from_bpage(bpage)->LRU_list_mutex));
-
- byte* frame = zip_size ? bpage->zip.data : ((buf_block_t*) bpage)->frame;
-
- if (sync) {
- thd_wait_begin(NULL, THD_WAIT_DISKIO);
- }
-
- if (zip_size) {
- *err = _fil_io(OS_FILE_READ | wake_later
- | ignore_nonexistent_pages,
- sync, space, zip_size, offset, 0, zip_size,
- frame, bpage, 0, trx);
- } else {
- ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-
- *err = _fil_io(OS_FILE_READ | wake_later
- | ignore_nonexistent_pages,
- sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
- frame, bpage, &bpage->write_size, trx);
- }
-
- if (sync) {
- thd_wait_end(NULL);
- }
-
- if (*err != DB_SUCCESS) {
- if (ignore_nonexistent_pages || *err == DB_TABLESPACE_DELETED) {
- buf_read_page_handle_error(bpage);
- return(0);
- }
- SRV_CORRUPT_TABLE_CHECK(*err == DB_SUCCESS,
- bpage->is_corrupt = TRUE;);
- }
-
- if (sync) {
- /* The i/o is already completed when we arrive from
- fil_read */
- *err = buf_page_io_complete(bpage);
-
- if (*err != DB_SUCCESS) {
- return(0);
- }
- }
-
- return(1);
-}
-
-/********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
-value of accessed pages from the random read-ahead area. Does not read any
-page, not even the one at the position (space, offset), if the read-ahead
-mechanism is not activated. NOTE 1: the calling thread may own latches on
-pages: to avoid deadlocks this function must be written such that it cannot
-end up waiting for these latches! NOTE 2: the calling thread must want
-access to the page given: this rule is set to prevent unintended read-aheads
-performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o.
-@return number of page read requests issued; NOTE that if we read ibuf
-pages, it may happen that the page at the given page number does not
-get read even if we return a positive value!
-@return number of page read requests issued */
-UNIV_INTERN
-ulint
-buf_read_ahead_random(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes,
- or 0 */
- ulint offset, /*!< in: page number of a page which
- the current thread wants to access */
- ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf
- routine */
- trx_t* trx)
-{
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ib_int64_t tablespace_version;
- ulint recent_blocks = 0;
- ulint ibuf_mode;
- ulint count;
- ulint low, high;
- dberr_t err = DB_SUCCESS;
- ulint i;
- const ulint buf_read_ahead_random_area
- = BUF_READ_AHEAD_AREA(buf_pool);
-
- if (!srv_random_read_ahead) {
- /* Disabled by user */
- return(0);
- }
-
- if (srv_startup_is_before_trx_rollback_phase) {
- /* No read-ahead to avoid thread deadlocks */
- return(0);
- }
-
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
-
- /* If it is an ibuf bitmap page or trx sys hdr, we do
- no read-ahead, as that could break the ibuf page access
- order */
-
- return(0);
- }
-
- /* Remember the tablespace version before we ask te tablespace size
- below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
- do not try to read outside the bounds of the tablespace! */
-
- tablespace_version = fil_space_get_version(space);
-
- low = (offset / buf_read_ahead_random_area)
- * buf_read_ahead_random_area;
- high = (offset / buf_read_ahead_random_area + 1)
- * buf_read_ahead_random_area;
- if (high > fil_space_get_size(space)) {
-
- high = fil_space_get_size(space);
- }
-
- if (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-
- return(0);
- }
-
- /* Count how many blocks in the area have been recently accessed,
- that is, reside near the start of the LRU list. */
-
- for (i = low; i < high; i++) {
-
- prio_rw_lock_t* hash_lock;
-
- const buf_page_t* bpage =
- buf_page_hash_get_s_locked(buf_pool, space, i,
- &hash_lock);
-
- if (bpage
- && buf_page_is_accessed(bpage)
- && buf_page_peek_if_young(bpage)) {
-
- recent_blocks++;
-
- if (recent_blocks
- >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) {
-
- rw_lock_s_unlock(hash_lock);
- goto read_ahead;
- }
- }
-
- if (bpage) {
- rw_lock_s_unlock(hash_lock);
- }
- }
-
- /* Do nothing */
- return(0);
-
-read_ahead:
- /* Read all the suitable blocks within the area */
-
- if (inside_ibuf) {
- ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
- } else {
- ibuf_mode = BUF_READ_ANY_PAGE;
- }
-
- count = 0;
-
- for (i = low; i < high; i++) {
- /* It is only sensible to do read-ahead in the non-sync aio
- mode: hence FALSE as the first parameter */
-
- if (!ibuf_bitmap_page(zip_size, i)) {
-
- count += buf_read_page_low(
- &err, false,
- ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
- space, zip_size, FALSE,
- tablespace_version, i, trx);
-
- switch(err) {
- case DB_SUCCESS:
- case DB_ERROR:
- break;
- case DB_TABLESPACE_DELETED:
- ib_logf(IB_LOG_LEVEL_WARN,
- "In random"
- " readahead trying to access"
- " tablespace " ULINTPF ":" ULINTPF
- " but the tablespace does not"
- " exist or is just being dropped.",
- space, i);
- break;
- case DB_DECRYPTION_FAILED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Random readahead failed to decrypt page "
- ULINTPF ":" ULINTPF ".",
- i, space);
- break;
- default:
- ut_error;
- }
- }
- }
-
- /* In simulated aio we wake the aio handler threads only after
- queuing all aio requests, in native aio the following call does
- nothing: */
-
- os_aio_simulated_wake_handler_threads();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "Random read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset,
- (ulong) count);
- }
-#endif /* UNIV_DEBUG */
-
- /* Read ahead is considered one I/O operation for the purpose of
- LRU policy decision. */
- buf_LRU_stat_inc_io();
-
- buf_pool->stat.n_ra_pages_read_rnd += count;
- srv_stats.buf_pool_reads.add(count);
- return(count);
-}
-
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread.
-
-@param[in] space_id space_id
-@param[in] zip_size compressed page size in bytes, or 0
-@param[in] offset page number
-@param[in] trx transaction
-@param[out] encrypted true if page encrypted
-@return DB_SUCCESS if page has been read and is not corrupted,
-@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
-@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
-after decryption normal page checksum does not match.
-@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-UNIV_INTERN
-dberr_t
-buf_read_page(
- ulint space_id,
- ulint zip_size,
- ulint offset,
- trx_t* trx)
-{
- ib_int64_t tablespace_version;
- ulint count;
- dberr_t err = DB_SUCCESS;
-
- tablespace_version = fil_space_get_version(space_id);
-
- FilSpace space(space_id, true);
-
- if (space()) {
-
- /* We do the i/o in the synchronous aio mode to save thread
- switches: hence TRUE */
- count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space_id,
- zip_size, FALSE,
- tablespace_version, offset, trx);
-
- srv_stats.buf_pool_reads.add(count);
- }
-
- /* Page corruption and decryption failures are already reported
- in above function. */
- if (!space() || err == DB_TABLESPACE_DELETED) {
- err = DB_TABLESPACE_DELETED;
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to access"
- " tablespace [space=" ULINTPF ": page=" ULINTPF
- "] but the tablespace does not exist"
- " or is just being dropped.",
- space_id, offset);
- }
-
- /* Increment number of I/O operations used for LRU policy. */
- buf_LRU_stat_inc_io();
-
- return(err);
-}
-
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread.
-@param[in] space Tablespace id
-@param[in] offset Page no */
-UNIV_INTERN
-void
-buf_read_page_async(
- ulint space,
- ulint offset)
-{
- ulint zip_size;
- ib_int64_t tablespace_version;
- ulint count;
- dberr_t err = DB_SUCCESS;
-
- zip_size = fil_space_get_zip_size(space);
-
- if (zip_size == ULINT_UNDEFINED) {
- return;
- }
-
- tablespace_version = fil_space_get_version(space);
-
- count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE
- | OS_AIO_SIMULATED_WAKE_LATER
- | BUF_READ_IGNORE_NONEXISTENT_PAGES,
- space, zip_size, FALSE,
- tablespace_version, offset);
-
- switch(err) {
- case DB_SUCCESS:
- case DB_ERROR:
- break;
- case DB_TABLESPACE_DELETED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "In async page read "
- "trying to access "
- "page " ULINTPF ":" ULINTPF
- " in nonexisting or being-dropped tablespace",
- space, offset);
- break;
-
- case DB_DECRYPTION_FAILED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Async page read failed to decrypt page "
- ULINTPF ":" ULINTPF ".",
- space, offset);
- break;
- default:
- ut_error;
- }
-
- srv_stats.buf_pool_reads.add(count);
-
- /* We do not increment number of I/O operations used for LRU policy
- here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
- about evicting uncompressed version of compressed pages from the
- buffer pool. Since this function is called from buffer pool load
- these IOs are deliberate and are not part of normal workload we can
- ignore these in our heuristics. */
-}
-
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
-a linear read-ahead area and all the pages in the area have been accessed.
-Does not read any page if the read-ahead mechanism is not activated. Note
-that the algorithm looks at the 'natural' adjacent successor and
-predecessor of the page, which on the leaf level of a B-tree are the next
-and previous page in the chain of leaves. To know these, the page specified
-in (space, offset) must already be present in the buf_pool. Thus, the
-natural way to use this function is to call it when a page in the buf_pool
-is accessed the first time, calling this function just after it has been
-bufferfixed.
-NOTE 1: as this function looks at the natural predecessor and successor
-fields on the page, what happens, if these are not initialized to any
-sensible value? No problem, before applying read-ahead we check that the
-area to read is within the span of the space, if not, read-ahead is not
-applied. An uninitialized value may result in a useless read operation, but
-only very improbably.
-NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
-function must be written such that it cannot end up waiting for these
-latches!
-NOTE 3: the calling thread must want access to the page given: this rule is
-set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io.
-@return number of page read requests issued */
-UNIV_INTERN
-ulint
-buf_read_ahead_linear(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes, or 0 */
- ulint offset, /*!< in: page number; see NOTE 3 above */
- ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */
- trx_t* trx)
-{
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
- ib_int64_t tablespace_version;
- buf_page_t* bpage;
- buf_frame_t* frame;
- buf_page_t* pred_bpage = NULL;
- unsigned pred_bpage_is_accessed = 0;
- ulint pred_offset;
- ulint succ_offset;
- ulint count;
- int asc_or_desc;
- ulint new_offset;
- ulint fail_count;
- ulint ibuf_mode;
- ulint low, high;
- dberr_t err = DB_SUCCESS;
- ulint i;
- const ulint buf_read_ahead_linear_area
- = BUF_READ_AHEAD_AREA(buf_pool);
- ulint threshold;
-
- /* check if readahead is disabled */
- if (!srv_read_ahead_threshold) {
- return(0);
- }
-
- if (UNIV_UNLIKELY(srv_startup_is_before_trx_rollback_phase)) {
- /* No read-ahead to avoid thread deadlocks */
- return(0);
- }
-
- low = (offset / buf_read_ahead_linear_area)
- * buf_read_ahead_linear_area;
- high = (offset / buf_read_ahead_linear_area + 1)
- * buf_read_ahead_linear_area;
-
- if ((offset != low) && (offset != high - 1)) {
- /* This is not a border page of the area: return */
-
- return(0);
- }
-
- if (ibuf_bitmap_page(zip_size, offset)
- || trx_sys_hdr_page(space, offset)) {
-
- /* If it is an ibuf bitmap page or trx sys hdr, we do
- no read-ahead, as that could break the ibuf page access
- order */
-
- return(0);
- }
-
- /* Remember the tablespace version before we ask te tablespace size
- below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
- do not try to read outside the bounds of the tablespace! */
-
- tablespace_version = fil_space_get_version(space);
-
- if (high > fil_space_get_size(space)) {
- /* The area is not whole, return */
-
- return(0);
- }
-
- if (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
-
- return(0);
- }
-
- /* Check that almost all pages in the area have been accessed; if
- offset == low, the accesses must be in a descending order, otherwise,
- in an ascending order. */
-
- asc_or_desc = 1;
-
- if (offset == low) {
- asc_or_desc = -1;
- }
-
- /* How many out of order accessed pages can we ignore
- when working out the access pattern for linear readahead */
- threshold = ut_min((64 - srv_read_ahead_threshold),
- BUF_READ_AHEAD_AREA(buf_pool));
-
- fail_count = 0;
-
- prio_rw_lock_t* hash_lock;
-
- for (i = low; i < high; i++) {
-
- bpage = buf_page_hash_get_s_locked(buf_pool, space, i,
- &hash_lock);
-
- if (bpage == NULL || !buf_page_is_accessed(bpage)) {
- /* Not accessed */
- fail_count++;
-
- } else if (pred_bpage) {
- /* Note that buf_page_is_accessed() returns
- the time of the first access. If some blocks
- of the extent existed in the buffer pool at
- the time of a linear access pattern, the first
- access times may be nonmonotonic, even though
- the latest access times were linear. The
- threshold (srv_read_ahead_factor) should help
- a little against this. */
- int res = ut_ulint_cmp(
- buf_page_is_accessed(bpage),
- pred_bpage_is_accessed);
- /* Accesses not in the right order */
- if (res != 0 && res != asc_or_desc) {
- fail_count++;
- }
- }
-
- if (fail_count > threshold) {
- /* Too many failures: return */
- if (bpage) {
- rw_lock_s_unlock(hash_lock);
- }
- return(0);
- }
-
- if (bpage) {
- if (buf_page_is_accessed(bpage)) {
- pred_bpage = bpage;
- pred_bpage_is_accessed
- = buf_page_is_accessed(bpage);
- }
-
- rw_lock_s_unlock(hash_lock);
- }
- }
-
- /* If we got this far, we know that enough pages in the area have
- been accessed in the right order: linear read-ahead can be sensible */
-
- bpage = buf_page_hash_get_s_locked(buf_pool, space, offset, &hash_lock);
-
- if (bpage == NULL) {
-
- return(0);
- }
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_ZIP_PAGE:
- frame = bpage->zip.data;
- break;
- case BUF_BLOCK_FILE_PAGE:
- frame = ((buf_block_t*) bpage)->frame;
- break;
- default:
- ut_error;
- break;
- }
-
- /* Read the natural predecessor and successor page addresses from
- the page; NOTE that because the calling thread may have an x-latch
- on the page, we do not acquire an s-latch on the page, this is to
- prevent deadlocks. Even if we read values which are nonsense, the
- algorithm will work. */
-
- pred_offset = fil_page_get_prev(frame);
- succ_offset = fil_page_get_next(frame);
-
- rw_lock_s_unlock(hash_lock);
-
- if ((offset == low) && (succ_offset == offset + 1)) {
-
- /* This is ok, we can continue */
- new_offset = pred_offset;
-
- } else if ((offset == high - 1) && (pred_offset == offset - 1)) {
-
- /* This is ok, we can continue */
- new_offset = succ_offset;
- } else {
- /* Successor or predecessor not in the right order */
-
- return(0);
- }
-
- low = (new_offset / buf_read_ahead_linear_area)
- * buf_read_ahead_linear_area;
- high = (new_offset / buf_read_ahead_linear_area + 1)
- * buf_read_ahead_linear_area;
-
- if ((new_offset != low) && (new_offset != high - 1)) {
- /* This is not a border page of the area: return */
-
- return(0);
- }
-
- if (high > fil_space_get_size(space)) {
- /* The area is not whole, return */
-
- return(0);
- }
-
- /* If we got this far, read-ahead can be sensible: do it */
-
- ibuf_mode = inside_ibuf
- ? BUF_READ_IBUF_PAGES_ONLY | OS_AIO_SIMULATED_WAKE_LATER
- : BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER;
-
- count = 0;
-
- /* Since Windows XP seems to schedule the i/o handler thread
- very eagerly, and consequently it does not wait for the
- full read batch to be posted, we use special heuristics here */
-
- os_aio_simulated_put_read_threads_to_sleep();
-
- for (i = low; i < high; i++) {
- /* It is only sensible to do read-ahead in the non-sync
- aio mode: hence FALSE as the first parameter */
-
- if (!ibuf_bitmap_page(zip_size, i)) {
-
- count += buf_read_page_low(
- &err, false,
- ibuf_mode,
- space, zip_size, FALSE, tablespace_version,
- i, trx);
-
- switch(err) {
- case DB_SUCCESS:
- case DB_ERROR:
- break;
- case DB_TABLESPACE_DELETED:
- ib_logf(IB_LOG_LEVEL_WARN,
- "In linear"
- " readahead trying to access"
- " tablespace " ULINTPF ":" ULINTPF
- " but the tablespace does not"
- " exist or is just being dropped.",
- space, i);
- break;
-
- case DB_DECRYPTION_FAILED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Linear readahead failed to decrypt page "
- ULINTPF ":" ULINTPF ".",
- i, space);
- break;
- default:
- ut_error;
- }
- }
- }
-
- /* In simulated aio we wake the aio handler threads only after
- queuing all aio requests, in native aio the following call does
- nothing: */
-
- os_aio_simulated_wake_handler_threads();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints && (count > 0)) {
- fprintf(stderr,
- "LINEAR read-ahead space %lu offset %lu pages %lu\n",
- (ulong) space, (ulong) offset, (ulong) count);
- }
-#endif /* UNIV_DEBUG */
-
- /* Read ahead is considered one I/O operation for the purpose of
- LRU policy decision. */
- buf_LRU_stat_inc_io();
-
- buf_pool->stat.n_ra_pages_read += count;
- return(count);
-}
-
-/********************************************************************//**
-Issues read requests for pages which the ibuf module wants to read in, in
-order to contract the insert buffer tree. Technically, this function is like
-a read-ahead function. */
-UNIV_INTERN
-void
-buf_read_ibuf_merge_pages(
-/*======================*/
- bool sync, /*!< in: true if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- const ulint* space_ids, /*!< in: array of space ids */
- const ib_int64_t* space_versions,/*!< in: the spaces must have
- this version number
- (timestamp), otherwise we
- discard the read; we use this
- to cancel reads if DISCARD +
- IMPORT may have changed the
- tablespace size */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored) /*!< in: number of elements
- in the arrays */
-{
- ulint i;
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(n_stored < UNIV_PAGE_SIZE);
-#endif
-
- for (i = 0; i < n_stored; i++) {
- buf_pool_t* buf_pool;
- ulint zip_size = fil_space_get_zip_size(space_ids[i]);
- dberr_t err = DB_SUCCESS;
-
- buf_pool = buf_pool_get(space_ids[i], page_nos[i]);
-
- while (buf_pool->n_pend_reads
- > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
- os_thread_sleep(500000);
- }
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- goto tablespace_deleted;
- }
-
- buf_read_page_low(&err, sync && (i + 1 == n_stored),
- BUF_READ_ANY_PAGE, space_ids[i],
- zip_size, TRUE, space_versions[i],
- page_nos[i], NULL);
-
- switch(err) {
- case DB_SUCCESS:
- case DB_ERROR:
- break;
- case DB_TABLESPACE_DELETED:
-
-tablespace_deleted:
- /* We have deleted or are deleting the single-table
- tablespace: remove the entries for tablespace. */
- ibuf_delete_for_discarded_space(space_ids[i]);
- break;
- case DB_DECRYPTION_FAILED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to decrypt insert buffer page "
- ULINTPF ":" ULINTPF ".",
- space_ids[i], page_nos[i]);
- break;
- default:
- ut_error;
- }
- }
-
- os_aio_simulated_wake_handler_threads();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Ibuf merge read-ahead space %lu pages %lu\n",
- (ulong) space_ids[0], (ulong) n_stored);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
-void
-buf_read_recv_pages(
-/*================*/
- ibool sync, /*!< in: TRUE if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in
- bytes, or 0 */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored) /*!< in: number of page numbers
- in the array */
-{
- ib_int64_t tablespace_version;
- ulint count;
- dberr_t err = DB_SUCCESS;
- ulint i;
-
- zip_size = fil_space_get_zip_size(space);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
-
- /* the log records should be treated here same reason
- for http://bugs.mysql.com/bug.php?id=43948 */
-
- if (recv_recovery_is_on()) {
- recv_addr_t* recv_addr;
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_sys->apply_log_recs == FALSE) {
- mutex_exit(&(recv_sys->mutex));
- goto not_to_recover;
- }
-
- for (i = 0; i < n_stored; i++) {
- /* recv_get_fil_addr_struct() */
- recv_addr = (recv_addr_t*)HASH_GET_FIRST(recv_sys->addr_hash,
- hash_calc_hash(ut_fold_ulint_pair(space, page_nos[i]),
- recv_sys->addr_hash));
- while (recv_addr) {
- if ((recv_addr->space == space)
- && (recv_addr->page_no == page_nos[i])) {
- break;
- }
- recv_addr = (recv_addr_t*)HASH_GET_NEXT(addr_hash, recv_addr);
- }
-
- if ((recv_addr == NULL)
- || (recv_addr->state == RECV_BEING_PROCESSED)
- || (recv_addr->state == RECV_PROCESSED)) {
- continue;
- }
-
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
- }
-
- mutex_exit(&(recv_sys->mutex));
-
- fprintf(stderr, " (cannot find space: %lu)", space);
- }
-not_to_recover:
-
- return;
- }
-
- tablespace_version = fil_space_get_version(space);
-
- for (i = 0; i < n_stored; i++) {
- buf_pool_t* buf_pool;
-
- count = 0;
-
- os_aio_print_debug = FALSE;
- buf_pool = buf_pool_get(space, page_nos[i]);
- while (buf_pool->n_pend_reads
- >= recv_n_pool_free_frames / 2) {
-
- os_aio_simulated_wake_handler_threads();
- os_thread_sleep(10000);
-
- count++;
-
- if (count > 1000) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "waited for 10 seconds for " ULINTPF
- " pending reads to the buffer pool to"
- " be finished",
- buf_pool->n_pend_reads);
-
- os_aio_print_debug = TRUE;
- }
- }
-
- os_aio_print_debug = FALSE;
-
- if ((i + 1 == n_stored) && sync) {
- buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
- zip_size, TRUE, tablespace_version,
- page_nos[i], NULL);
- } else {
- buf_read_page_low(&err, false, BUF_READ_ANY_PAGE
- | OS_AIO_SIMULATED_WAKE_LATER,
- space, zip_size, TRUE,
- tablespace_version, page_nos[i],
- NULL);
- }
-
- if (err == DB_DECRYPTION_FAILED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Recovery failed to decrypt read page "
- ULINTPF ":" ULINTPF ".",
- space, page_nos[i]);
- }
- }
-
- os_aio_simulated_wake_handler_threads();
-
-#ifdef UNIV_DEBUG
- if (buf_debug_prints) {
- fprintf(stderr,
- "Recovery applies read-ahead pages %lu\n",
- (ulong) n_stored);
- }
-#endif /* UNIV_DEBUG */
-}
diff --git a/storage/xtradb/data/data0data.cc b/storage/xtradb/data/data0data.cc
deleted file mode 100644
index 593af089b00..00000000000
--- a/storage/xtradb/data/data0data.cc
+++ /dev/null
@@ -1,751 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file data/data0data.cc
-SQL data field and tuple
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "data0data.h"
-
-#ifdef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#include "rem0rec.h"
-#include "rem0cmp.h"
-#include "page0page.h"
-#include "page0zip.h"
-#include "dict0dict.h"
-#include "btr0cur.h"
-
-#include <ctype.h>
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/** Dummy variable to catch access to uninitialized fields. In the
-debug version, dtuple_create() will make all fields of dtuple_t point
-to data_error. */
-UNIV_INTERN byte data_error;
-
-# ifndef UNIV_DEBUG_VALGRIND
-/** this is used to fool the compiler in dtuple_validate */
-UNIV_INTERN ulint data_dummy;
-# endif /* !UNIV_DEBUG_VALGRIND */
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
-int
-dtuple_coll_cmp(
-/*============*/
- const dtuple_t* tuple1, /*!< in: tuple 1 */
- const dtuple_t* tuple2) /*!< in: tuple 2 */
-{
- ulint n_fields;
- ulint i;
-
- ut_ad(tuple1 != NULL);
- ut_ad(tuple2 != NULL);
- ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(dtuple_check_typed(tuple1));
- ut_ad(dtuple_check_typed(tuple2));
-
- n_fields = dtuple_get_n_fields(tuple1);
-
- if (n_fields != dtuple_get_n_fields(tuple2)) {
-
- return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1);
- }
-
- for (i = 0; i < n_fields; i++) {
- int cmp;
- const dfield_t* field1 = dtuple_get_nth_field(tuple1, i);
- const dfield_t* field2 = dtuple_get_nth_field(tuple2, i);
-
- cmp = cmp_dfield_dfield(field1, field2);
-
- if (cmp) {
- return(cmp);
- }
- }
-
- return(0);
-}
-
-/*********************************************************************//**
-Sets number of fields used in a tuple. Normally this is set in
-dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
-void
-dtuple_set_n_fields(
-/*================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields) /*!< in: number of fields */
-{
- ut_ad(tuple);
-
- tuple->n_fields = n_fields;
- tuple->n_fields_cmp = n_fields;
-}
-
-/**********************************************************//**
-Checks that a data field is typed.
-@return TRUE if ok */
-static
-ibool
-dfield_check_typed_no_assert(
-/*=========================*/
- const dfield_t* field) /*!< in: data field */
-{
- if (dfield_get_type(field)->mtype > DATA_MYSQL
- || dfield_get_type(field)->mtype < DATA_VARCHAR) {
-
- fprintf(stderr,
- "InnoDB: Error: data field type %lu, len %lu\n",
- (ulong) dfield_get_type(field)->mtype,
- (ulong) dfield_get_len(field));
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/**********************************************************//**
-Checks that a data tuple is typed.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- const dfield_t* field;
- ulint i;
-
- if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) {
- fprintf(stderr,
- "InnoDB: Error: index entry has %lu fields\n",
- (ulong) dtuple_get_n_fields(tuple));
-dump:
- fputs("InnoDB: Tuple contents: ", stderr);
- dtuple_print(stderr, tuple);
- putc('\n', stderr);
-
- return(FALSE);
- }
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- field = dtuple_get_nth_field(tuple, i);
-
- if (!dfield_check_typed_no_assert(field)) {
- goto dump;
- }
- }
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/**********************************************************//**
-Checks that a data field is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dfield_check_typed(
-/*===============*/
- const dfield_t* field) /*!< in: data field */
-{
- if (dfield_get_type(field)->mtype > DATA_MYSQL
- || dfield_get_type(field)->mtype < DATA_VARCHAR) {
-
- fprintf(stderr,
- "InnoDB: Error: data field type %lu, len %lu\n",
- (ulong) dfield_get_type(field)->mtype,
- (ulong) dfield_get_len(field));
-
- ut_error;
- }
-
- return(TRUE);
-}
-
-/**********************************************************//**
-Checks that a data tuple is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed(
-/*===============*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- const dfield_t* field;
- ulint i;
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- field = dtuple_get_nth_field(tuple, i);
-
- ut_a(dfield_check_typed(field));
- }
-
- return(TRUE);
-}
-
-/**********************************************************//**
-Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_validate(
-/*============*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- const dfield_t* field;
- ulint n_fields;
- ulint len;
- ulint i;
-
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
- n_fields = dtuple_get_n_fields(tuple);
-
- /* We dereference all the data of each field to test
- for memory traps */
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(tuple, i);
- len = dfield_get_len(field);
-
- if (!dfield_is_null(field)) {
-
- const byte* data;
-
- data = static_cast<const byte*>(dfield_get_data(field));
-#ifndef UNIV_DEBUG_VALGRIND
- ulint j;
-
- for (j = 0; j < len; j++) {
-
- data_dummy += *data; /* fool the compiler not
- to optimize out this
- code */
- data++;
- }
-#endif /* !UNIV_DEBUG_VALGRIND */
-
- UNIV_MEM_ASSERT_RW(data, len);
- }
- }
-
- ut_a(dtuple_check_typed(tuple));
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
-void
-dfield_print(
-/*=========*/
- const dfield_t* dfield) /*!< in: dfield */
-{
- const byte* data;
- ulint len;
- ulint i;
-
- len = dfield_get_len(dfield);
- data = static_cast<const byte*>(dfield_get_data(dfield));
-
- if (dfield_is_null(dfield)) {
- fputs("NULL", stderr);
-
- return;
- }
-
- switch (dtype_get_mtype(dfield_get_type(dfield))) {
- case DATA_CHAR:
- case DATA_VARCHAR:
- for (i = 0; i < len; i++) {
- int c = *data++;
- putc(isprint(c) ? c : ' ', stderr);
- }
-
- if (dfield_is_ext(dfield)) {
- fputs("(external)", stderr);
- }
- break;
- case DATA_INT:
- ut_a(len == 4); /* only works for 32-bit integers */
- fprintf(stderr, "%d", (int) mach_read_from_4(data));
- break;
- default:
- ut_error;
- }
-}
-
-/*************************************************************//**
-Pretty prints a dfield value according to its data type. Also the hex string
-is printed if a string contains non-printable characters. */
-UNIV_INTERN
-void
-dfield_print_also_hex(
-/*==================*/
- const dfield_t* dfield) /*!< in: dfield */
-{
- const byte* data;
- ulint len;
- ulint prtype;
- ulint i;
- ibool print_also_hex;
-
- len = dfield_get_len(dfield);
- data = static_cast<const byte*>(dfield_get_data(dfield));
-
- if (dfield_is_null(dfield)) {
- fputs("NULL", stderr);
-
- return;
- }
-
- prtype = dtype_get_prtype(dfield_get_type(dfield));
-
- switch (dtype_get_mtype(dfield_get_type(dfield))) {
- ib_id_t id;
- case DATA_INT:
- switch (len) {
- ulint val;
- case 1:
- val = mach_read_from_1(data);
-
- if (!(prtype & DATA_UNSIGNED)) {
- val &= ~0x80;
- fprintf(stderr, "%ld", (long) val);
- } else {
- fprintf(stderr, "%lu", (ulong) val);
- }
- break;
-
- case 2:
- val = mach_read_from_2(data);
-
- if (!(prtype & DATA_UNSIGNED)) {
- val &= ~0x8000;
- fprintf(stderr, "%ld", (long) val);
- } else {
- fprintf(stderr, "%lu", (ulong) val);
- }
- break;
-
- case 3:
- val = mach_read_from_3(data);
-
- if (!(prtype & DATA_UNSIGNED)) {
- val &= ~0x800000;
- fprintf(stderr, "%ld", (long) val);
- } else {
- fprintf(stderr, "%lu", (ulong) val);
- }
- break;
-
- case 4:
- val = mach_read_from_4(data);
-
- if (!(prtype & DATA_UNSIGNED)) {
- val &= ~0x80000000;
- fprintf(stderr, "%ld", (long) val);
- } else {
- fprintf(stderr, "%lu", (ulong) val);
- }
- break;
-
- case 6:
- id = mach_read_from_6(data);
- fprintf(stderr, "%llu", (ullint) id);
- break;
-
- case 7:
- id = mach_read_from_7(data);
- fprintf(stderr, "%llu", (ullint) id);
- break;
- case 8:
- id = mach_read_from_8(data);
- fprintf(stderr, "%llu", (ullint) id);
- break;
- default:
- goto print_hex;
- }
- break;
-
- case DATA_SYS:
- switch (prtype & DATA_SYS_PRTYPE_MASK) {
- case DATA_TRX_ID:
- id = mach_read_from_6(data);
-
- fprintf(stderr, "trx_id " TRX_ID_FMT, id);
- break;
-
- case DATA_ROLL_PTR:
- id = mach_read_from_7(data);
-
- fprintf(stderr, "roll_ptr " TRX_ID_FMT, id);
- break;
-
- case DATA_ROW_ID:
- id = mach_read_from_6(data);
-
- fprintf(stderr, "row_id " TRX_ID_FMT, id);
- break;
-
- default:
- id = mach_ull_read_compressed(data);
-
- fprintf(stderr, "mix_id " TRX_ID_FMT, id);
- }
- break;
-
- case DATA_CHAR:
- case DATA_VARCHAR:
- print_also_hex = FALSE;
-
- for (i = 0; i < len; i++) {
- int c = *data++;
-
- if (!isprint(c)) {
- print_also_hex = TRUE;
-
- fprintf(stderr, "\\x%02x", (unsigned char) c);
- } else {
- putc(c, stderr);
- }
- }
-
- if (dfield_is_ext(dfield)) {
- fputs("(external)", stderr);
- }
-
- if (!print_also_hex) {
- break;
- }
-
- data = static_cast<byte*>(dfield_get_data(dfield));
- /* fall through */
-
- case DATA_BINARY:
- default:
-print_hex:
- fputs(" Hex: ",stderr);
-
- for (i = 0; i < len; i++) {
- fprintf(stderr, "%02lx", (ulint) *data++);
- }
-
- if (dfield_is_ext(dfield)) {
- fputs("(external)", stderr);
- }
- }
-}
-
-/*************************************************************//**
-Print a dfield value using ut_print_buf. */
-static
-void
-dfield_print_raw(
-/*=============*/
- FILE* f, /*!< in: output stream */
- const dfield_t* dfield) /*!< in: dfield */
-{
- ulint len = dfield_get_len(dfield);
- if (!dfield_is_null(dfield)) {
- ulint print_len = ut_min(len, 1000);
- ut_print_buf(f, dfield_get_data(dfield), print_len);
- if (len != print_len) {
- fprintf(f, "(total %lu bytes%s)",
- (ulong) len,
- dfield_is_ext(dfield) ? ", external" : "");
- }
- } else {
- fputs(" SQL NULL", f);
- }
-}
-
-/**********************************************************//**
-The following function prints the contents of a tuple. */
-UNIV_INTERN
-void
-dtuple_print(
-/*=========*/
- FILE* f, /*!< in: output stream */
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ulint n_fields;
- ulint i;
-
- n_fields = dtuple_get_n_fields(tuple);
-
- fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields);
-
- for (i = 0; i < n_fields; i++) {
- fprintf(f, " %lu:", (ulong) i);
-
- dfield_print_raw(f, dtuple_get_nth_field(tuple, i));
-
- putc(';', f);
- putc('\n', f);
- }
-
- ut_ad(dtuple_validate(tuple));
-}
-
-/**************************************************************//**
-Moves parts of long fields in entry to the big record vector so that
-the size of tuple drops below the maximum record size allowed in the
-database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index.
-@return own: created big record vector, NULL if we are not able to
-shorten the entry enough, i.e., if there are too many fixed-length or
-short fields in entry or the index is clustered */
-UNIV_INTERN
-big_rec_t*
-dtuple_convert_big_rec(
-/*===================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in/out: index entry */
- ulint* n_ext) /*!< in/out: number of
- externally stored columns */
-{
- mem_heap_t* heap;
- big_rec_t* vector;
- dfield_t* dfield;
- dict_field_t* ifield;
- ulint size;
- ulint n_fields;
- ulint local_len;
- ulint local_prefix_len;
-
- if (!dict_index_is_clust(index)) {
- return(NULL);
- }
-
- if (dict_table_get_format(index->table) < UNIV_FORMAT_B) {
- /* up to MySQL 5.1: store a 768-byte prefix locally */
- local_len = BTR_EXTERN_FIELD_REF_SIZE
- + DICT_ANTELOPE_MAX_INDEX_COL_LEN;
- } else {
- /* new-format table: do not store any BLOB prefix locally */
- local_len = BTR_EXTERN_FIELD_REF_SIZE;
- }
-
- ut_a(dtuple_check_typed_no_assert(entry));
-
- size = rec_get_converted_size(index, entry, *n_ext);
-
- if (UNIV_UNLIKELY(size > 1000000000)) {
- fprintf(stderr,
- "InnoDB: Warning: tuple size very big: %lu\n",
- (ulong) size);
- fputs("InnoDB: Tuple contents: ", stderr);
- dtuple_print(stderr, entry);
- putc('\n', stderr);
- }
-
- heap = mem_heap_create(size + dtuple_get_n_fields(entry)
- * sizeof(big_rec_field_t) + 1000);
-
- vector = static_cast<big_rec_t*>(
- mem_heap_alloc(heap, sizeof(big_rec_t)));
-
- vector->heap = heap;
-
- vector->fields = static_cast<big_rec_field_t*>(
- mem_heap_alloc(
- heap,
- dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)));
-
- /* Decide which fields to shorten: the algorithm is to look for
- a variable-length field that yields the biggest savings when
- stored externally */
-
- n_fields = 0;
-
- while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
- *n_ext),
- dict_table_is_comp(index->table),
- dict_index_get_n_fields(index),
- dict_table_zip_size(index->table))) {
- ulint i;
- ulint longest = 0;
- ulint longest_i = ULINT_MAX;
- byte* data;
- big_rec_field_t* b;
-
- for (i = dict_index_get_n_unique_in_tree(index);
- i < dtuple_get_n_fields(entry); i++) {
- ulint savings;
-
- dfield = dtuple_get_nth_field(entry, i);
- ifield = dict_index_get_nth_field(index, i);
-
- /* Skip fixed-length, NULL, externally stored,
- or short columns */
-
- if (ifield->fixed_len
- || dfield_is_null(dfield)
- || dfield_is_ext(dfield)
- || dfield_get_len(dfield) <= local_len
- || dfield_get_len(dfield)
- <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
- goto skip_field;
- }
-
- savings = dfield_get_len(dfield) - local_len;
-
- /* Check that there would be savings */
- if (longest >= savings) {
- goto skip_field;
- }
-
- /* In DYNAMIC and COMPRESSED format, store
- locally any non-BLOB columns whose maximum
- length does not exceed 256 bytes. This is
- because there is no room for the "external
- storage" flag when the maximum length is 255
- bytes or less. This restriction trivially
- holds in REDUNDANT and COMPACT format, because
- there we always store locally columns whose
- length is up to local_len == 788 bytes.
- @see rec_init_offsets_comp_ordinary */
- if (ifield->col->mtype != DATA_BLOB
- && ifield->col->len < 256) {
- goto skip_field;
- }
-
- longest_i = i;
- longest = savings;
-
-skip_field:
- continue;
- }
-
- if (!longest) {
- /* Cannot shorten more */
-
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- /* Move data from field longest_i to big rec vector.
-
- We store the first bytes locally to the record. Then
- we can calculate all ordering fields in all indexes
- from locally stored data. */
-
- dfield = dtuple_get_nth_field(entry, longest_i);
- ifield = dict_index_get_nth_field(index, longest_i);
- local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b = &vector->fields[n_fields];
- b->field_no = longest_i;
- b->len = dfield_get_len(dfield) - local_prefix_len;
- b->data = (char*) dfield_get_data(dfield) + local_prefix_len;
-
- /* Allocate the locally stored part of the column. */
- data = static_cast<byte*>(mem_heap_alloc(heap, local_len));
-
- /* Copy the local prefix. */
- memcpy(data, dfield_get_data(dfield), local_prefix_len);
- /* Clear the extern field reference (BLOB pointer). */
- memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE);
-#if 0
- /* The following would fail the Valgrind checks in
- page_cur_insert_rec_low() and page_cur_insert_rec_zip().
- The BLOB pointers in the record will be initialized after
- the record and the BLOBs have been written. */
- UNIV_MEM_ALLOC(data + local_prefix_len,
- BTR_EXTERN_FIELD_REF_SIZE);
-#endif
-
- dfield_set_data(dfield, data, local_len);
- dfield_set_ext(dfield);
-
- n_fields++;
- (*n_ext)++;
- ut_ad(n_fields < dtuple_get_n_fields(entry));
- }
-
- vector->n_fields = n_fields;
- return(vector);
-}
-
-/**************************************************************//**
-Puts back to entry the data stored in vector. Note that to ensure the
-fields in entry can accommodate the data, vector must have been created
-from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
-void
-dtuple_convert_back_big_rec(
-/*========================*/
- dict_index_t* index MY_ATTRIBUTE((unused)), /*!< in: index */
- dtuple_t* entry, /*!< in: entry whose data was put to vector */
- big_rec_t* vector) /*!< in, own: big rec vector; it is
- freed in this function */
-{
- big_rec_field_t* b = vector->fields;
- const big_rec_field_t* const end = b + vector->n_fields;
-
- for (; b < end; b++) {
- dfield_t* dfield;
- ulint local_len;
-
- dfield = dtuple_get_nth_field(entry, b->field_no);
- local_len = dfield_get_len(dfield);
-
- ut_ad(dfield_is_ext(dfield));
- ut_ad(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- /* Only in REDUNDANT and COMPACT format, we store
- up to DICT_ANTELOPE_MAX_INDEX_COL_LEN (768) bytes
- locally */
- ut_ad(local_len <= DICT_ANTELOPE_MAX_INDEX_COL_LEN);
-
- dfield_set_data(dfield,
- (char*) b->data - local_len,
- b->len + local_len);
- }
-
- mem_heap_free(vector->heap);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/data/data0type.cc b/storage/xtradb/data/data0type.cc
deleted file mode 100644
index 0b9e08544a5..00000000000
--- a/storage/xtradb/data/data0type.cc
+++ /dev/null
@@ -1,298 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file data/data0type.cc
-Data types
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#include "data0type.h"
-
-#ifdef UNIV_NONINL
-#include "data0type.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-
-/* At the database startup we store the default-charset collation number of
-this MySQL installation to this global variable. If we have < 4.1.2 format
-column definitions, or records in the insert buffer, we use this
-charset-collation code for them. */
-
-UNIV_INTERN ulint data_mysql_default_charset_coll;
-
-/*********************************************************************//**
-Determine how many bytes the first n characters of the given string occupy.
-If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy.
-@return length of the prefix, in bytes */
-UNIV_INTERN
-ulint
-dtype_get_at_most_n_mbchars(
-/*========================*/
- ulint prtype, /*!< in: precise type */
- ulint mbminmaxlen, /*!< in: minimum and maximum length of
- a multi-byte character */
- ulint prefix_len, /*!< in: length of the requested
- prefix, in characters, multiplied by
- dtype_get_mbmaxlen(dtype) */
- ulint data_len, /*!< in: length of str (in bytes) */
- const char* str) /*!< in: the string whose prefix
- length is being determined */
-{
- ulint mbminlen = DATA_MBMINLEN(mbminmaxlen);
- ulint mbmaxlen = DATA_MBMAXLEN(mbminmaxlen);
-
- ut_a(data_len != UNIV_SQL_NULL);
- ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen));
-
- if (mbminlen != mbmaxlen) {
- ut_a(!(prefix_len % mbmaxlen));
- return(innobase_get_at_most_n_mbchars(
- dtype_get_charset_coll(prtype),
- prefix_len, data_len, str));
- }
-
- if (prefix_len < data_len) {
-
- return(prefix_len);
-
- }
-
- return(data_len);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Checks if a data main type is a string type. Also a BLOB is considered a
-string type.
-@return TRUE if string type */
-UNIV_INTERN
-ibool
-dtype_is_string_type(
-/*=================*/
- ulint mtype) /*!< in: InnoDB main data type code: DATA_CHAR, ... */
-{
- if (mtype <= DATA_BLOB
- || mtype == DATA_MYSQL
- || mtype == DATA_VARMYSQL) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Checks if a type is a binary string type. Note that for tables created with
-< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE.
-@return TRUE if binary string type */
-UNIV_INTERN
-ibool
-dtype_is_binary_string_type(
-/*========================*/
- ulint mtype, /*!< in: main data type */
- ulint prtype) /*!< in: precise type */
-{
- if ((mtype == DATA_FIXBINARY)
- || (mtype == DATA_BINARY)
- || (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Checks if a type is a non-binary string type. That is, dtype_is_string_type is
-TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
-with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE.
-@return TRUE if non-binary string type */
-UNIV_INTERN
-ibool
-dtype_is_non_binary_string_type(
-/*============================*/
- ulint mtype, /*!< in: main data type */
- ulint prtype) /*!< in: precise type */
-{
- if (dtype_is_string_type(mtype) == TRUE
- && dtype_is_binary_string_type(mtype, prtype) == FALSE) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code.
-@return precise type, including the charset-collation code */
-UNIV_INTERN
-ulint
-dtype_form_prtype(
-/*==============*/
- ulint old_prtype, /*!< in: the MySQL type code and the flags
- DATA_BINARY_TYPE etc. */
- ulint charset_coll) /*!< in: MySQL charset-collation code */
-{
- ut_a(old_prtype < 256 * 256);
- ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
-
- return(old_prtype + (charset_coll << 16));
-}
-
-/*********************************************************************//**
-Validates a data type structure.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtype_validate(
-/*===========*/
- const dtype_t* type) /*!< in: type struct to validate */
-{
- ut_a(type);
- ut_a(type->mtype >= DATA_VARCHAR);
- ut_a(type->mtype <= DATA_MYSQL);
-
- if (type->mtype == DATA_SYS) {
- ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
- }
-
-#ifndef UNIV_HOTBACKUP
- ut_a(dtype_get_mbminlen(type) <= dtype_get_mbmaxlen(type));
-#endif /* !UNIV_HOTBACKUP */
-
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Prints a data type structure. */
-UNIV_INTERN
-void
-dtype_print(
-/*========*/
- const dtype_t* type) /*!< in: type */
-{
- ulint mtype;
- ulint prtype;
- ulint len;
-
- ut_a(type);
-
- mtype = type->mtype;
- prtype = type->prtype;
-
- switch (mtype) {
- case DATA_VARCHAR:
- fputs("DATA_VARCHAR", stderr);
- break;
-
- case DATA_CHAR:
- fputs("DATA_CHAR", stderr);
- break;
-
- case DATA_BINARY:
- fputs("DATA_BINARY", stderr);
- break;
-
- case DATA_FIXBINARY:
- fputs("DATA_FIXBINARY", stderr);
- break;
-
- case DATA_BLOB:
- fputs("DATA_BLOB", stderr);
- break;
-
- case DATA_INT:
- fputs("DATA_INT", stderr);
- break;
-
- case DATA_MYSQL:
- fputs("DATA_MYSQL", stderr);
- break;
-
- case DATA_SYS:
- fputs("DATA_SYS", stderr);
- break;
-
- case DATA_FLOAT:
- fputs("DATA_FLOAT", stderr);
- break;
-
- case DATA_DOUBLE:
- fputs("DATA_DOUBLE", stderr);
- break;
-
- case DATA_DECIMAL:
- fputs("DATA_DECIMAL", stderr);
- break;
-
- case DATA_VARMYSQL:
- fputs("DATA_VARMYSQL", stderr);
- break;
-
- default:
- fprintf(stderr, "type %lu", (ulong) mtype);
- break;
- }
-
- len = type->len;
-
- if ((type->mtype == DATA_SYS)
- || (type->mtype == DATA_VARCHAR)
- || (type->mtype == DATA_CHAR)) {
- putc(' ', stderr);
- if (prtype == DATA_ROW_ID) {
- fputs("DATA_ROW_ID", stderr);
- len = DATA_ROW_ID_LEN;
- } else if (prtype == DATA_ROLL_PTR) {
- fputs("DATA_ROLL_PTR", stderr);
- len = DATA_ROLL_PTR_LEN;
- } else if (prtype == DATA_TRX_ID) {
- fputs("DATA_TRX_ID", stderr);
- len = DATA_TRX_ID_LEN;
- } else if (prtype == DATA_ENGLISH) {
- fputs("DATA_ENGLISH", stderr);
- } else {
- fprintf(stderr, "prtype %lu", (ulong) prtype);
- }
- } else {
- if (prtype & DATA_UNSIGNED) {
- fputs(" DATA_UNSIGNED", stderr);
- }
-
- if (prtype & DATA_BINARY_TYPE) {
- fputs(" DATA_BINARY_TYPE", stderr);
- }
-
- if (prtype & DATA_NOT_NULL) {
- fputs(" DATA_NOT_NULL", stderr);
- }
- }
-
- fprintf(stderr, " len %lu", (ulong) len);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc
deleted file mode 100644
index 9731211ff5f..00000000000
--- a/storage/xtradb/dict/dict0boot.cc
+++ /dev/null
@@ -1,531 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dict/dict0boot.cc
-Data dictionary creation and booting
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0boot.h"
-
-#ifdef UNIV_NONINL
-#include "dict0boot.ic"
-#endif
-
-#include "dict0crea.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "dict0load.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-#include "ibuf0ibuf.h"
-#include "buf0flu.h"
-#include "log0recv.h"
-#include "os0file.h"
-
-/**********************************************************************//**
-Gets a pointer to the dictionary header and x-latches its page.
-@return pointer to the dictionary header, page x-latched */
-UNIV_INTERN
-dict_hdr_t*
-dict_hdr_get(
-/*=========*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- dict_hdr_t* header;
-
- block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
- RW_X_LATCH, mtr);
- header = DICT_HDR + buf_block_get_frame(block);
-
- buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
-
- return(header);
-}
-
-/**********************************************************************//**
-Returns a new table, index, or space id. */
-UNIV_INTERN
-void
-dict_hdr_get_new_id(
-/*================*/
- table_id_t* table_id, /*!< out: table id
- (not assigned if NULL) */
- index_id_t* index_id, /*!< out: index id
- (not assigned if NULL) */
- ulint* space_id) /*!< out: space id
- (not assigned if NULL) */
-{
- dict_hdr_t* dict_hdr;
- ib_id_t id;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- dict_hdr = dict_hdr_get(&mtr);
-
- if (table_id) {
- id = mach_read_from_8(dict_hdr + DICT_HDR_TABLE_ID);
- id++;
- mlog_write_ull(dict_hdr + DICT_HDR_TABLE_ID, id, &mtr);
- *table_id = id;
- }
-
- if (index_id) {
- id = mach_read_from_8(dict_hdr + DICT_HDR_INDEX_ID);
- id++;
- mlog_write_ull(dict_hdr + DICT_HDR_INDEX_ID, id, &mtr);
- *index_id = id;
- }
-
- if (space_id) {
- *space_id = mtr_read_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID,
- MLOG_4BYTES, &mtr);
- if (fil_assign_new_space_id(space_id)) {
- mlog_write_ulint(dict_hdr + DICT_HDR_MAX_SPACE_ID,
- *space_id, MLOG_4BYTES, &mtr);
- }
- }
-
- mtr_commit(&mtr);
-}
-
-/**********************************************************************//**
-Writes the current value of the row id counter to the dictionary header file
-page. */
-UNIV_INTERN
-void
-dict_hdr_flush_row_id(void)
-/*=======================*/
-{
- dict_hdr_t* dict_hdr;
- row_id_t id;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- id = dict_sys->row_id;
-
- mtr_start(&mtr);
-
- dict_hdr = dict_hdr_get(&mtr);
-
- mlog_write_ull(dict_hdr + DICT_HDR_ROW_ID, id, &mtr);
-
- mtr_commit(&mtr);
-}
-
-/*****************************************************************//**
-Creates the file page for the dictionary header. This function is
-called only at the database creation.
-@return TRUE if succeed */
-static
-ibool
-dict_hdr_create(
-/*============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- dict_hdr_t* dict_header;
- ulint root_page_no;
-
- ut_ad(mtr);
-
- /* Create the dictionary header file block in a new, allocated file
- segment in the system tablespace */
- block = fseg_create(DICT_HDR_SPACE, 0,
- DICT_HDR + DICT_HDR_FSEG_HEADER, mtr);
-
- ut_a(DICT_HDR_PAGE_NO == buf_block_get_page_no(block));
-
- dict_header = dict_hdr_get(mtr);
-
- /* Start counting row, table, index, and tree ids from
- DICT_HDR_FIRST_ID */
- mlog_write_ull(dict_header + DICT_HDR_ROW_ID,
- DICT_HDR_FIRST_ID, mtr);
-
- mlog_write_ull(dict_header + DICT_HDR_TABLE_ID,
- DICT_HDR_FIRST_ID, mtr);
-
- mlog_write_ull(dict_header + DICT_HDR_INDEX_ID,
- DICT_HDR_FIRST_ID, mtr);
-
- mlog_write_ulint(dict_header + DICT_HDR_MAX_SPACE_ID,
- 0, MLOG_4BYTES, mtr);
-
- /* Obsolete, but we must initialize it anyway. */
- mlog_write_ulint(dict_header + DICT_HDR_MIX_ID_LOW,
- DICT_HDR_FIRST_ID, MLOG_4BYTES, mtr);
-
- /* Create the B-tree roots for the clustered indexes of the basic
- system tables */
-
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_TABLES_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, 0,
- DICT_TABLE_IDS_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_COLUMNS_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_INDEXES_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
- root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE,
- DICT_HDR_SPACE, 0, DICT_FIELDS_ID,
- dict_ind_redundant, mtr);
- if (root_page_no == FIL_NULL) {
-
- return(FALSE);
- }
-
- mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no,
- MLOG_4BYTES, mtr);
- /*--------------------------*/
-
- return(TRUE);
-}
-
-/*****************************************************************//**
-Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created.
-@return DB_SUCCESS or error code. */
-UNIV_INTERN
-dberr_t
-dict_boot(void)
-/*===========*/
-{
- dict_table_t* table;
- dict_index_t* index;
- dict_hdr_t* dict_hdr;
- mem_heap_t* heap;
- mtr_t mtr;
- dberr_t error;
-
- /* Be sure these constants do not ever change. To avoid bloat,
- only check the *NUM_FIELDS* in each table */
-
- ut_ad(DICT_NUM_COLS__SYS_TABLES == 8);
- ut_ad(DICT_NUM_FIELDS__SYS_TABLES == 10);
- ut_ad(DICT_NUM_FIELDS__SYS_TABLE_IDS == 2);
- ut_ad(DICT_NUM_COLS__SYS_COLUMNS == 7);
- ut_ad(DICT_NUM_FIELDS__SYS_COLUMNS == 9);
- ut_ad(DICT_NUM_COLS__SYS_INDEXES == 7);
- ut_ad(DICT_NUM_FIELDS__SYS_INDEXES == 9);
- ut_ad(DICT_NUM_COLS__SYS_FIELDS == 3);
- ut_ad(DICT_NUM_FIELDS__SYS_FIELDS == 5);
- ut_ad(DICT_NUM_COLS__SYS_FOREIGN == 4);
- ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN == 6);
- ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME == 2);
- ut_ad(DICT_NUM_COLS__SYS_FOREIGN_COLS == 4);
- ut_ad(DICT_NUM_FIELDS__SYS_FOREIGN_COLS == 6);
-
- mtr_start(&mtr);
-
- /* Create the hash tables etc. */
- dict_init();
-
- heap = mem_heap_create(450);
-
- mutex_enter(&(dict_sys->mutex));
-
- /* Get the dictionary header */
- dict_hdr = dict_hdr_get(&mtr);
-
- /* Because we only write new row ids to disk-based data structure
- (dictionary header) when it is divisible by
- DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover
- the latest value of the row id counter. Therefore we advance
- the counter at the database startup to avoid overlapping values.
- Note that when a user after database startup first time asks for
- a new row id, then because the counter is now divisible by
- ..._MARGIN, it will immediately be updated to the disk-based
- header. */
-
- dict_sys->row_id = DICT_HDR_ROW_ID_WRITE_MARGIN
- + ut_uint64_align_up(mach_read_from_8(dict_hdr + DICT_HDR_ROW_ID),
- DICT_HDR_ROW_ID_WRITE_MARGIN);
-
- /* Insert into the dictionary cache the descriptions of the basic
- system tables */
- /*-------------------------*/
- table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0);
-
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
- /* ROW_FORMAT = (N_COLS >> 31) ? COMPACT : REDUNDANT */
- dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4);
- /* The low order bit of TYPE is always set to 1. If the format
- is UNIV_FORMAT_B or higher, this field matches table->flags. */
- dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
- /* MIX_LEN may contain additional table flags when
- ROW_FORMAT!=REDUNDANT. Currently, these flags include
- DICT_TF2_TEMPORARY. */
- dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
-
- table->id = DICT_TABLES_ID;
-
- dict_table_add_to_cache(table, FALSE, heap);
- dict_sys->sys_tables = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_TABLES", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 1);
-
- dict_mem_index_add_field(index, "NAME", 0);
-
- index->id = DICT_TABLES_ID;
- btr_search_index_init(index);
-
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_TABLES,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- /*-------------------------*/
- index = dict_mem_index_create("SYS_TABLES", "ID_IND",
- DICT_HDR_SPACE, DICT_UNIQUE, 1);
- dict_mem_index_add_field(index, "ID", 0);
-
- index->id = DICT_TABLE_IDS_ID;
- btr_search_index_init(index);
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_TABLE_IDS,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0, 0);
-
- dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4);
-
- table->id = DICT_COLUMNS_ID;
-
- dict_table_add_to_cache(table, FALSE, heap);
- dict_sys->sys_columns = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "TABLE_ID", 0);
- dict_mem_index_add_field(index, "POS", 0);
-
- index->id = DICT_COLUMNS_ID;
- btr_search_index_init(index);
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_COLUMNS,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0, 0);
-
- dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
-
- table->id = DICT_INDEXES_ID;
-
- dict_table_add_to_cache(table, FALSE, heap);
- dict_sys->sys_indexes = table;
- mem_heap_empty(heap);
-
- index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "TABLE_ID", 0);
- dict_mem_index_add_field(index, "ID", 0);
-
- index->id = DICT_INDEXES_ID;
- btr_search_index_init(index);
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_INDEXES,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- /*-------------------------*/
- table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0);
-
- dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0);
- dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4);
- dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0);
-
- table->id = DICT_FIELDS_ID;
-
- dict_table_add_to_cache(table, FALSE, heap);
- dict_sys->sys_fields = table;
- mem_heap_free(heap);
-
- index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND",
- DICT_HDR_SPACE,
- DICT_UNIQUE | DICT_CLUSTERED, 2);
-
- dict_mem_index_add_field(index, "INDEX_ID", 0);
- dict_mem_index_add_field(index, "POS", 0);
-
- index->id = DICT_FIELDS_ID;
- btr_search_index_init(index);
- error = dict_index_add_to_cache(table, index,
- mtr_read_ulint(dict_hdr
- + DICT_HDR_FIELDS,
- MLOG_4BYTES, &mtr),
- FALSE);
- ut_a(error == DB_SUCCESS);
-
- mtr_commit(&mtr);
-
- /*-------------------------*/
-
- /* Initialize the insert buffer table and index for each tablespace */
-
- dberr_t err = DB_SUCCESS;
-
- err = ibuf_init_at_db_start();
-
- if (err == DB_SUCCESS) {
- if (srv_read_only_mode && !ibuf_is_empty()) {
-
- if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Change buffer must be empty when --innodb-read-only "
- "is set!"
- "You can try to recover the database with innodb_force_recovery=5");
-
- err = DB_ERROR;
- } else {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Change buffer not empty when --innodb-read-only "
- "is set! but srv_force_recovery = %lu, ignoring.",
- srv_force_recovery);
- }
- }
-
- if (err == DB_SUCCESS) {
- /* Load definitions of other indexes on system tables */
-
- dict_load_sys_table(dict_sys->sys_tables);
- dict_load_sys_table(dict_sys->sys_columns);
- dict_load_sys_table(dict_sys->sys_indexes);
- dict_load_sys_table(dict_sys->sys_fields);
- }
-
- mutex_exit(&(dict_sys->mutex));
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Inserts the basic system table data into themselves in the database
-creation. */
-static
-void
-dict_insert_initial_data(void)
-/*==========================*/
-{
- /* Does nothing yet */
-}
-
-/*****************************************************************//**
-Creates and initializes the data dictionary at the server bootstrap.
-@return DB_SUCCESS or error code. */
-UNIV_INTERN
-dberr_t
-dict_create(void)
-/*=============*/
-{
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- dict_hdr_create(&mtr);
-
- mtr_commit(&mtr);
-
- dberr_t err = dict_boot();
-
- if (err == DB_SUCCESS) {
- dict_insert_initial_data();
- }
-
- return(err);
-}
diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc
deleted file mode 100644
index 6d5b12474eb..00000000000
--- a/storage/xtradb/dict/dict0crea.cc
+++ /dev/null
@@ -1,1992 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dict/dict0crea.cc
-Database object creation
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0crea.h"
-
-#ifdef UNIV_NONINL
-#include "dict0crea.ic"
-#endif
-
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "page0page.h"
-#include "mach0data.h"
-#include "dict0boot.h"
-#include "dict0dict.h"
-#include "que0que.h"
-#include "row0ins.h"
-#include "row0mysql.h"
-#include "row0sel.h"
-#include "pars0pars.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
-#include "ut0vec.h"
-#include "dict0priv.h"
-#include "fts0priv.h"
-#include "ha_prototypes.h"
-
-/*****************************************************************//**
-Based on a table object, this function builds the entry to be inserted
-in the SYS_TABLES system table.
-@return the tuple which should be inserted */
-static
-dtuple_t*
-dict_create_sys_tables_tuple(
-/*=========================*/
- const dict_table_t* table, /*!< in: table */
- mem_heap_t* heap) /*!< in: memory heap from
- which the memory for the built
- tuple is allocated */
-{
- dict_table_t* sys_tables;
- dtuple_t* entry;
- dfield_t* dfield;
- byte* ptr;
- ulint type;
-
- ut_ad(table);
- ut_ad(heap);
-
- sys_tables = dict_sys->sys_tables;
-
- entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS);
-
- dict_table_copy_types(entry, sys_tables);
-
- /* 0: NAME -----------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_TABLES__NAME);
-
- dfield_set_data(dfield, table->name, ut_strlen(table->name));
-
- /* 1: DB_TRX_ID added later */
- /* 2: DB_ROLL_PTR added later */
- /* 3: ID -------------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_TABLES__ID);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
-
- /* 4: N_COLS ---------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_TABLES__N_COLS);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, table->n_def
- | ((table->flags & DICT_TF_COMPACT) << 31));
- dfield_set_data(dfield, ptr, 4);
-
- /* 5: TYPE (table flags) -----------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_TABLES__TYPE);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- /* Validate the table flags and convert them to what is saved in
- SYS_TABLES.TYPE. Table flag values 0 and 1 are both written to
- SYS_TABLES.TYPE as 1. */
- type = dict_tf_to_sys_tables_type(table->flags);
- mach_write_to_4(ptr, type);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 6: MIX_ID (obsolete) ---------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_TABLES__MIX_ID);
-
- ptr = static_cast<byte*>(mem_heap_zalloc(heap, 8));
-
- dfield_set_data(dfield, ptr, 8);
-
- /* 7: MIX_LEN (additional flags) --------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_TABLES__MIX_LEN);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- /* Be sure all non-used bits are zero. */
- ut_a(!(table->flags2 & ~DICT_TF2_BIT_MASK));
- mach_write_to_4(ptr, table->flags2);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 8: CLUSTER_NAME ---------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_TABLES__CLUSTER_ID);
- dfield_set_null(dfield); /* not supported */
-
- /* 9: SPACE ----------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_TABLES__SPACE);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, table->space);
-
- dfield_set_data(dfield, ptr, 4);
- /*----------------------------------*/
-
- return(entry);
-}
-
-/*****************************************************************//**
-Based on a table object, this function builds the entry to be inserted
-in the SYS_COLUMNS system table.
-@return the tuple which should be inserted */
-static
-dtuple_t*
-dict_create_sys_columns_tuple(
-/*==========================*/
- const dict_table_t* table, /*!< in: table */
- ulint i, /*!< in: column number */
- mem_heap_t* heap) /*!< in: memory heap from
- which the memory for the built
- tuple is allocated */
-{
- dict_table_t* sys_columns;
- dtuple_t* entry;
- const dict_col_t* column;
- dfield_t* dfield;
- byte* ptr;
- const char* col_name;
-
- ut_ad(table);
- ut_ad(heap);
-
- column = dict_table_get_nth_col(table, i);
-
- sys_columns = dict_sys->sys_columns;
-
- entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
-
- dict_table_copy_types(entry, sys_columns);
-
- /* 0: TABLE_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__TABLE_ID);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
-
- /* 1: POS ----------------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__POS);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, i);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 2: DB_TRX_ID added later */
- /* 3: DB_ROLL_PTR added later */
- /* 4: NAME ---------------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__NAME);
-
- col_name = dict_table_get_col_name(table, i);
- dfield_set_data(dfield, col_name, ut_strlen(col_name));
-
- /* 5: MTYPE --------------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__MTYPE);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, column->mtype);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 6: PRTYPE -------------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PRTYPE);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, column->prtype);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 7: LEN ----------------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__LEN);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, column->len);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 8: PREC ---------------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_COLUMNS__PREC);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, 0/* unused */);
-
- dfield_set_data(dfield, ptr, 4);
- /*---------------------------------*/
-
- return(entry);
-}
-
-/***************************************************************//**
-Builds a table definition to insert.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-dict_build_table_def_step(
-/*======================*/
- que_thr_t* thr, /*!< in: query thread */
- tab_node_t* node) /*!< in: table create node */
-{
- dict_table_t* table;
- dtuple_t* row;
- dberr_t error;
- const char* path;
- mtr_t mtr;
- ulint space = 0;
- bool use_tablespace;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = node->table;
- use_tablespace = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE);
-
- dict_hdr_get_new_id(&table->id, NULL, NULL);
-
- thr_get_trx(thr)->table_id = table->id;
-
- /* Always set this bit for all new created tables */
- DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
- DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- DICT_TF2_FLAG_UNSET(table,
- DICT_TF2_FTS_AUX_HEX_NAME););
-
- if (use_tablespace) {
- /* This table will not use the system tablespace.
- Get a new space id. */
- dict_hdr_get_new_id(NULL, NULL, &space);
-
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_out_of_space_ids",
- space = ULINT_UNDEFINED;
- );
-
- if (UNIV_UNLIKELY(space == ULINT_UNDEFINED)) {
- return(DB_ERROR);
- }
-
- /* We create a new single-table tablespace for the table.
- We initially let it be 4 pages:
- - page 0 is the fsp header and an extent descriptor page,
- - page 1 is an ibuf bitmap page,
- - page 2 is the first inode page,
- - page 3 will contain the root of the clustered index of the
- table we create here. */
-
- path = table->data_dir_path ? table->data_dir_path
- : table->dir_path_of_temp_table;
-
- ut_ad(dict_table_get_format(table) <= UNIV_FORMAT_MAX);
- ut_ad(!dict_table_zip_size(table)
- || dict_table_get_format(table) >= UNIV_FORMAT_B);
-
- error = fil_create_new_single_table_tablespace(
- space, table->name, path,
- dict_tf_to_fsp_flags(table->flags),
- table->flags2,
- FIL_IBD_FILE_INITIAL_SIZE,
- node->mode, node->key_id);
-
- table->space = (unsigned int) space;
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
-
- mtr_start(&mtr);
-
- fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
-
- mtr_commit(&mtr);
- } else {
- /* Create in the system tablespace: disallow Barracuda
- features by keeping only the first bit which says whether
- the row format is redundant or compact */
- table->flags &= DICT_TF_COMPACT;
- }
-
- row = dict_create_sys_tables_tuple(table, node->heap);
-
- ins_node_set_new_row(node->tab_def, row);
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************//**
-Builds a column definition to insert. */
-static
-void
-dict_build_col_def_step(
-/*====================*/
- tab_node_t* node) /*!< in: table create node */
-{
- dtuple_t* row;
-
- row = dict_create_sys_columns_tuple(node->table, node->col_no,
- node->heap);
- ins_node_set_new_row(node->col_def, row);
-}
-
-/*****************************************************************//**
-Based on an index object, this function builds the entry to be inserted
-in the SYS_INDEXES system table.
-@return the tuple which should be inserted */
-static
-dtuple_t*
-dict_create_sys_indexes_tuple(
-/*==========================*/
- const dict_index_t* index, /*!< in: index */
- mem_heap_t* heap) /*!< in: memory heap from
- which the memory for the built
- tuple is allocated */
-{
- dict_table_t* sys_indexes;
- dict_table_t* table;
- dtuple_t* entry;
- dfield_t* dfield;
- byte* ptr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(index);
- ut_ad(heap);
-
- sys_indexes = dict_sys->sys_indexes;
-
- table = dict_table_get_low(index->table_name);
-
- entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS);
-
- dict_table_copy_types(entry, sys_indexes);
-
- /* 0: TABLE_ID -----------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_INDEXES__TABLE_ID);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(ptr, table->id);
-
- dfield_set_data(dfield, ptr, 8);
-
- /* 1: ID ----------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_INDEXES__ID);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(ptr, index->id);
-
- dfield_set_data(dfield, ptr, 8);
-
- /* 2: DB_TRX_ID added later */
- /* 3: DB_ROLL_PTR added later */
- /* 4: NAME --------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_INDEXES__NAME);
-
- dfield_set_data(dfield, index->name, ut_strlen(index->name));
-
- /* 5: N_FIELDS ----------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_INDEXES__N_FIELDS);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, index->n_fields);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 6: TYPE --------------------------*/
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_INDEXES__TYPE);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, index->type);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 7: SPACE --------------------------*/
-
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_INDEXES__SPACE);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, index->space);
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 8: PAGE_NO --------------------------*/
-
- dfield = dtuple_get_nth_field(
- entry, DICT_COL__SYS_INDEXES__PAGE_NO);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(ptr, FIL_NULL);
-
- dfield_set_data(dfield, ptr, 4);
-
- /*--------------------------------*/
-
- return(entry);
-}
-
-/*****************************************************************//**
-Based on an index object, this function builds the entry to be inserted
-in the SYS_FIELDS system table.
-@return the tuple which should be inserted */
-static
-dtuple_t*
-dict_create_sys_fields_tuple(
-/*=========================*/
- const dict_index_t* index, /*!< in: index */
- ulint fld_no, /*!< in: field number */
- mem_heap_t* heap) /*!< in: memory heap from
- which the memory for the built
- tuple is allocated */
-{
- dict_table_t* sys_fields;
- dtuple_t* entry;
- dict_field_t* field;
- dfield_t* dfield;
- byte* ptr;
- ibool index_contains_column_prefix_field = FALSE;
- ulint j;
-
- ut_ad(index);
- ut_ad(heap);
-
- for (j = 0; j < index->n_fields; j++) {
- if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
- index_contains_column_prefix_field = TRUE;
- break;
- }
- }
-
- field = dict_index_get_nth_field(index, fld_no);
-
- sys_fields = dict_sys->sys_fields;
-
- entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS);
-
- dict_table_copy_types(entry, sys_fields);
-
- /* 0: INDEX_ID -----------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__INDEX_ID);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(ptr, index->id);
-
- dfield_set_data(dfield, ptr, 8);
-
- /* 1: POS; FIELD NUMBER & PREFIX LENGTH -----------------------*/
-
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__POS);
-
- ptr = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- if (index_contains_column_prefix_field) {
- /* If there are column prefix fields in the index, then
- we store the number of the field to the 2 HIGH bytes
- and the prefix length to the 2 low bytes, */
-
- mach_write_to_4(ptr, (fld_no << 16) + field->prefix_len);
- } else {
- /* Else we store the number of the field to the 2 LOW bytes.
- This is to keep the storage format compatible with
- InnoDB versions < 4.0.14. */
-
- mach_write_to_4(ptr, fld_no);
- }
-
- dfield_set_data(dfield, ptr, 4);
-
- /* 2: DB_TRX_ID added later */
- /* 3: DB_ROLL_PTR added later */
- /* 4: COL_NAME -------------------------*/
- dfield = dtuple_get_nth_field(entry, DICT_COL__SYS_FIELDS__COL_NAME);
-
- dfield_set_data(dfield, field->name,
- ut_strlen(field->name));
- /*---------------------------------*/
-
- return(entry);
-}
-
-/*****************************************************************//**
-Creates the tuple with which the index entry is searched for writing the index
-tree root page number, if such a tree is created.
-@return the tuple for search */
-static
-dtuple_t*
-dict_create_search_tuple(
-/*=====================*/
- const dtuple_t* tuple, /*!< in: the tuple inserted in the SYS_INDEXES
- table */
- mem_heap_t* heap) /*!< in: memory heap from which the memory for
- the built tuple is allocated */
-{
- dtuple_t* search_tuple;
- const dfield_t* field1;
- dfield_t* field2;
-
- ut_ad(tuple && heap);
-
- search_tuple = dtuple_create(heap, 2);
-
- field1 = dtuple_get_nth_field(tuple, 0);
- field2 = dtuple_get_nth_field(search_tuple, 0);
-
- dfield_copy(field2, field1);
-
- field1 = dtuple_get_nth_field(tuple, 1);
- field2 = dtuple_get_nth_field(search_tuple, 1);
-
- dfield_copy(field2, field1);
-
- ut_ad(dtuple_validate(search_tuple));
-
- return(search_tuple);
-}
-
-/***************************************************************//**
-Builds an index definition row to insert.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-dict_build_index_def_step(
-/*======================*/
- que_thr_t* thr, /*!< in: query thread */
- ind_node_t* node) /*!< in: index create node */
-{
- dict_table_t* table;
- dict_index_t* index;
- dtuple_t* row;
- trx_t* trx;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- index = node->index;
-
- table = dict_table_get_low(index->table_name);
-
- if (table == NULL) {
- return(DB_TABLE_NOT_FOUND);
- }
-
- if (!trx->table_id) {
- /* Record only the first table id. */
- trx->table_id = table->id;
- }
-
- node->table = table;
-
- ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
- || dict_index_is_clust(index));
-
- dict_hdr_get_new_id(NULL, &index->id, NULL);
-
- /* Inherit the space id from the table; we store all indexes of a
- table in the same tablespace */
-
- index->space = table->space;
- node->page_no = FIL_NULL;
- row = dict_create_sys_indexes_tuple(index, node->heap);
- node->ind_row = row;
-
- ins_node_set_new_row(node->ind_def, row);
-
- /* Note that the index was created by this transaction. */
- index->trx_id = trx->id;
- ut_ad(table->def_trx_id <= trx->id);
- table->def_trx_id = trx->id;
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************//**
-Builds a field definition row to insert. */
-static
-void
-dict_build_field_def_step(
-/*======================*/
- ind_node_t* node) /*!< in: index create node */
-{
- dict_index_t* index;
- dtuple_t* row;
-
- index = node->index;
-
- row = dict_create_sys_fields_tuple(index, node->field_no, node->heap);
-
- ins_node_set_new_row(node->field_def, row);
-}
-
-/***************************************************************//**
-Creates an index tree for the index if it is not a member of a cluster.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-dict_create_index_tree_step(
-/*========================*/
- ind_node_t* node) /*!< in: index create node */
-{
- dict_index_t* index;
- dict_table_t* sys_indexes;
- dtuple_t* search_tuple;
- btr_pcur_t pcur;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- index = node->index;
-
- sys_indexes = dict_sys->sys_indexes;
-
- if (index->type == DICT_FTS) {
- /* FTS index does not need an index tree */
- return(DB_SUCCESS);
- }
-
- /* Run a mini-transaction in which the index tree is allocated for
- the index and its root address is written to the index entry in
- sys_indexes */
-
- mtr_start(&mtr);
-
- search_tuple = dict_create_search_tuple(node->ind_row, node->heap);
-
- btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes),
- search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF,
- &pcur, &mtr);
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
-
- dberr_t err = DB_SUCCESS;
- ulint zip_size = dict_table_zip_size(index->table);
-
- if (node->index->table->file_unreadable
- || dict_table_is_discarded(node->index->table)) {
-
- node->page_no = FIL_NULL;
- } else {
- node->page_no = btr_create(
- index->type, index->space, zip_size,
- index->id, index, &mtr);
-
- if (node->page_no == FIL_NULL) {
- err = DB_OUT_OF_FILE_SPACE;
- }
-
- DBUG_EXECUTE_IF("ib_import_create_index_failure_1",
- node->page_no = FIL_NULL;
- err = DB_OUT_OF_FILE_SPACE; );
- }
-
- page_rec_write_field(
- btr_pcur_get_rec(&pcur), DICT_FLD__SYS_INDEXES__PAGE_NO,
- node->page_no, &mtr);
-
- btr_pcur_close(&pcur);
-
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
-void
-dict_drop_index_tree(
-/*=================*/
- rec_t* rec, /*!< in/out: record in the clustered index
- of SYS_INDEXES table */
- mtr_t* mtr) /*!< in: mtr having the latch on the record page */
-{
- ulint root_page_no;
- ulint space;
- ulint zip_size;
- const byte* ptr;
- ulint len;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
- ptr = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
-
- ut_ad(len == 4);
-
- root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (root_page_no == FIL_NULL) {
- /* The tree has already been freed */
-
- return;
- }
-
- ptr = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
-
- ut_ad(len == 4);
-
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
- zip_size = fil_space_get_zip_size(space);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
-
- return;
- }
-
- /* We free all the pages but the root page first; this operation
- may span several mini-transactions */
-
- btr_free_but_not_root(space, zip_size, root_page_no);
-
- /* Then we free the root page in the same mini-transaction where
- we write FIL_NULL to the appropriate field in the SYS_INDEXES
- record: this mini-transaction marks the B-tree totally freed */
-
- /* printf("Dropping index tree in space %lu root page %lu\n", space,
- root_page_no); */
- btr_free_root(space, zip_size, root_page_no, mtr);
-
- page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
- FIL_NULL, mtr);
-}
-
-/*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
-@return new root page number, or FIL_NULL on failure */
-UNIV_INTERN
-ulint
-dict_truncate_index_tree(
-/*=====================*/
- dict_table_t* table, /*!< in: the table the index belongs to */
- ulint space, /*!< in: 0=truncate,
- nonzero=create the index tree in the
- given tablespace */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
- record in the clustered index of
- SYS_INDEXES table. The cursor may be
- repositioned in this call. */
- mtr_t* mtr) /*!< in: mtr having the latch
- on the record page. The mtr may be
- committed and restarted in this call. */
-{
- ulint root_page_no;
- ibool drop = !space;
- ulint zip_size;
- ulint type;
- index_id_t index_id;
- rec_t* rec;
- const byte* ptr;
- ulint len;
- dict_index_t* index;
- bool has_been_dropped = false;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(!dict_table_is_comp(dict_sys->sys_indexes));
- rec = btr_pcur_get_rec(pcur);
- ptr = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
-
- ut_ad(len == 4);
-
- root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
-
- if (drop && root_page_no == FIL_NULL) {
- has_been_dropped = true;
- drop = FALSE;
- }
-
- ptr = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
-
- ut_ad(len == 4);
-
- if (drop) {
- space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr);
- }
-
- zip_size = fil_space_get_zip_size(space);
-
- if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
- /* It is a single table tablespace and the .ibd file is
- missing: do nothing */
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Trying to TRUNCATE"
- " a missing .ibd file of table %s!\n", table->name);
- return(FIL_NULL);
- }
-
- ptr = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
- ut_ad(len == 4);
- type = mach_read_from_4(ptr);
-
- ptr = rec_get_nth_field_old(rec, DICT_FLD__SYS_INDEXES__ID, &len);
- ut_ad(len == 8);
- index_id = mach_read_from_8(ptr);
-
- if (!drop) {
-
- goto create;
- }
-
- /* We free all the pages but the root page first; this operation
- may span several mini-transactions */
-
- btr_free_but_not_root(space, zip_size, root_page_no);
-
- /* Then we free the root page in the same mini-transaction where
- we create the b-tree and write its new root page number to the
- appropriate field in the SYS_INDEXES record: this mini-transaction
- marks the B-tree totally truncated */
-
- btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, NULL, mtr);
-
- btr_free_root(space, zip_size, root_page_no, mtr);
-create:
- /* We will temporarily write FIL_NULL to the PAGE_NO field
- in SYS_INDEXES, so that the database will not get into an
- inconsistent state in case it crashes between the mtr_commit()
- below and the following mtr_commit() call. */
- page_rec_write_field(rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
- FIL_NULL, mtr);
-
- /* We will need to commit the mini-transaction in order to avoid
- deadlocks in the btr_create() call, because otherwise we would
- be freeing and allocating pages in the same mini-transaction. */
- btr_pcur_store_position(pcur, mtr);
- mtr_commit(mtr);
-
- mtr_start(mtr);
- btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr);
-
- /* Find the index corresponding to this SYS_INDEXES record. */
- for (index = UT_LIST_GET_FIRST(table->indexes);
- index;
- index = UT_LIST_GET_NEXT(indexes, index)) {
- if (index->id == index_id) {
- if (index->type & DICT_FTS) {
- return(FIL_NULL);
- } else {
- if (has_been_dropped) {
- fprintf(stderr, " InnoDB: Trying to"
- " TRUNCATE a missing index of"
- " table %s!\n",
- index->table->name);
- }
-
- root_page_no = btr_create(type, space, zip_size,
- index_id, index, mtr);
- index->page = (unsigned int) root_page_no;
- return(root_page_no);
- }
- }
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Index %llu of table %s is missing\n"
- "InnoDB: from the data dictionary during TRUNCATE!\n",
- (ullint) index_id,
- table->name);
-
- return(FIL_NULL);
-}
-
-/*********************************************************************//**
-Creates a table create graph.
-@return own: table create node */
-UNIV_INTERN
-tab_node_t*
-tab_create_graph_create(
-/*====================*/
- dict_table_t* table, /*!< in: table to create, built as a memory data
- structure */
- mem_heap_t* heap, /*!< in: heap where created */
- bool commit, /*!< in: true if the commit node should be
- added to the query graph */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
-{
- tab_node_t* node;
-
- node = static_cast<tab_node_t*>(
- mem_heap_alloc(heap, sizeof(tab_node_t)));
-
- node->common.type = QUE_NODE_CREATE_TABLE;
-
- node->table = table;
-
- node->state = TABLE_BUILD_TABLE_DEF;
- node->heap = mem_heap_create(256);
- node->mode = mode;
- node->key_id = key_id;
-
- node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables,
- heap);
- node->tab_def->common.parent = node;
-
- node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns,
- heap);
- node->col_def->common.parent = node;
-
- if (commit) {
- node->commit_node = trx_commit_node_create(heap);
- node->commit_node->common.parent = node;
- } else {
- node->commit_node = 0;
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Creates an index create graph.
-@return own: index create node */
-UNIV_INTERN
-ind_node_t*
-ind_create_graph_create(
-/*====================*/
- dict_index_t* index, /*!< in: index to create, built as a memory data
- structure */
- mem_heap_t* heap, /*!< in: heap where created */
- bool commit) /*!< in: true if the commit node should be
- added to the query graph */
-{
- ind_node_t* node;
-
- node = static_cast<ind_node_t*>(
- mem_heap_alloc(heap, sizeof(ind_node_t)));
-
- node->common.type = QUE_NODE_CREATE_INDEX;
-
- node->index = index;
-
- node->state = INDEX_BUILD_INDEX_DEF;
- node->page_no = FIL_NULL;
- node->heap = mem_heap_create(256);
-
- node->ind_def = ins_node_create(INS_DIRECT,
- dict_sys->sys_indexes, heap);
- node->ind_def->common.parent = node;
-
- node->field_def = ins_node_create(INS_DIRECT,
- dict_sys->sys_fields, heap);
- node->field_def->common.parent = node;
-
- if (commit) {
- node->commit_node = trx_commit_node_create(heap);
- node->commit_node->common.parent = node;
- } else {
- node->commit_node = 0;
- }
-
- return(node);
-}
-
-/***********************************************************//**
-Creates a table. This is a high-level function used in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-dict_create_table_step(
-/*===================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- tab_node_t* node;
- dberr_t err = DB_ERROR;
- trx_t* trx;
-
- ut_ad(thr);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- node = static_cast<tab_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = TABLE_BUILD_TABLE_DEF;
- }
-
- if (node->state == TABLE_BUILD_TABLE_DEF) {
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = dict_build_table_def_step(thr, node);
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = TABLE_BUILD_COL_DEF;
- node->col_no = 0;
-
- thr->run_node = node->tab_def;
-
- return(thr);
- }
-
- if (node->state == TABLE_BUILD_COL_DEF) {
-
- if (node->col_no < (node->table)->n_def) {
-
- dict_build_col_def_step(node);
-
- node->col_no++;
-
- thr->run_node = node->col_def;
-
- return(thr);
- } else {
- node->state = TABLE_COMMIT_WORK;
- }
- }
-
- if (node->state == TABLE_COMMIT_WORK) {
-
- /* Table was correctly defined: do NOT commit the transaction
- (CREATE TABLE does NOT do an implicit commit of the current
- transaction) */
-
- node->state = TABLE_ADD_TO_CACHE;
-
- /* thr->run_node = node->commit_node;
-
- return(thr); */
- }
-
- if (node->state == TABLE_ADD_TO_CACHE) {
-
- dict_table_add_to_cache(node->table, TRUE, node->heap);
-
- err = DB_SUCCESS;
- }
-
-function_exit:
- trx->error_state = err;
-
- if (err == DB_SUCCESS) {
- /* Ok: do nothing */
-
- } else if (err == DB_LOCK_WAIT) {
-
- return(NULL);
- } else {
- /* SQL error detected */
-
- return(NULL);
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/***********************************************************//**
-Creates an index. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-dict_create_index_step(
-/*===================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ind_node_t* node;
- dberr_t err = DB_ERROR;
- trx_t* trx;
-
- ut_ad(thr);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = thr_get_trx(thr);
-
- node = static_cast<ind_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = INDEX_BUILD_INDEX_DEF;
- }
-
- if (node->state == INDEX_BUILD_INDEX_DEF) {
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
- err = dict_build_index_def_step(thr, node);
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = INDEX_BUILD_FIELD_DEF;
- node->field_no = 0;
-
- thr->run_node = node->ind_def;
-
- return(thr);
- }
-
- if (node->state == INDEX_BUILD_FIELD_DEF) {
-
- if (node->field_no < (node->index)->n_fields) {
-
- dict_build_field_def_step(node);
-
- node->field_no++;
-
- thr->run_node = node->field_def;
-
- return(thr);
- } else {
- node->state = INDEX_ADD_TO_CACHE;
- }
- }
-
- if (node->state == INDEX_ADD_TO_CACHE) {
-
- index_id_t index_id = node->index->id;
-
- err = dict_index_add_to_cache(
- node->table, node->index, FIL_NULL,
- trx_is_strict(trx)
- || dict_table_get_format(node->table)
- >= UNIV_FORMAT_B);
-
- node->index = dict_index_get_if_in_cache_low(index_id);
- ut_a((node->index == 0) == (err != DB_SUCCESS));
-
- if (err != DB_SUCCESS) {
-
- goto function_exit;
- }
-
- node->state = INDEX_CREATE_INDEX_TREE;
- }
-
- if (node->state == INDEX_CREATE_INDEX_TREE) {
-
- err = dict_create_index_tree_step(node);
-
- DBUG_EXECUTE_IF("ib_dict_create_index_tree_fail",
- err = DB_OUT_OF_MEMORY;);
-
- if (err != DB_SUCCESS) {
- /* If this is a FTS index, we will need to remove
- it from fts->cache->indexes list as well */
- if ((node->index->type & DICT_FTS)
- && node->table->fts) {
- fts_index_cache_t* index_cache;
-
- rw_lock_x_lock(
- &node->table->fts->cache->init_lock);
-
- index_cache = (fts_index_cache_t*)
- fts_find_index_cache(
- node->table->fts->cache,
- node->index);
-
- if (index_cache->words) {
- rbt_free(index_cache->words);
- index_cache->words = 0;
- }
-
- ib_vector_remove(
- node->table->fts->cache->indexes,
- *reinterpret_cast<void**>(index_cache));
-
- rw_lock_x_unlock(
- &node->table->fts->cache->init_lock);
- }
-
- dict_index_remove_from_cache(node->table, node->index);
- node->index = NULL;
-
- goto function_exit;
- }
-
- node->index->page = node->page_no;
- /* These should have been set in
- dict_build_index_def_step() and
- dict_index_add_to_cache(). */
- ut_ad(node->index->trx_id == trx->id);
- ut_ad(node->index->table->def_trx_id == trx->id);
- node->state = INDEX_COMMIT_WORK;
- }
-
- if (node->state == INDEX_COMMIT_WORK) {
-
- /* Index was correctly defined: do NOT commit the transaction
- (CREATE INDEX does NOT currently do an implicit commit of
- the current transaction) */
-
- node->state = INDEX_CREATE_INDEX_TREE;
-
- /* thr->run_node = node->commit_node;
-
- return(thr); */
- }
-
-function_exit:
- trx->error_state = err;
-
- if (err == DB_SUCCESS) {
- /* Ok: do nothing */
-
- } else if (err == DB_LOCK_WAIT) {
-
- return(NULL);
- } else {
- /* SQL error detected */
-
- return(NULL);
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/****************************************************************//**
-Check whether a system table exists. Additionally, if it exists,
-move it to the non-LRU end of the table LRU list. This is oly used
-for system tables that can be upgraded or added to an older database,
-which include SYS_FOREIGN, SYS_FOREIGN_COLS, SYS_TABLESPACES and
-SYS_DATAFILES.
-@return DB_SUCCESS if the sys table exists, DB_CORRUPTION if it exists
-but is not current, DB_TABLE_NOT_FOUND if it does not exist*/
-static
-dberr_t
-dict_check_if_system_table_exists(
-/*==============================*/
- const char* tablename, /*!< in: name of table */
- ulint num_fields, /*!< in: number of fields */
- ulint num_indexes) /*!< in: number of indexes */
-{
- dict_table_t* sys_table;
- dberr_t error = DB_SUCCESS;
-
- ut_a(srv_get_active_thread_type() == SRV_NONE);
-
- mutex_enter(&dict_sys->mutex);
-
- sys_table = dict_table_get_low(tablename);
-
- if (sys_table == NULL) {
- error = DB_TABLE_NOT_FOUND;
-
- } else if (UT_LIST_GET_LEN(sys_table->indexes) != num_indexes
- || sys_table->n_cols != num_fields) {
- error = DB_CORRUPTION;
-
- } else {
- /* This table has already been created, and it is OK.
- Ensure that it can't be evicted from the table LRU cache. */
-
- dict_table_move_from_lru_to_non_lru(sys_table);
- }
-
- mutex_exit(&dict_sys->mutex);
-
- return(error);
-}
-
-/****************************************************************//**
-Creates the foreign key constraints system tables inside InnoDB
-at server bootstrap or server start if they are not found or are
-not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_create_or_check_foreign_constraint_tables(void)
-/*================================================*/
-{
- trx_t* trx;
- my_bool srv_file_per_table_backup;
- dberr_t err;
- dberr_t sys_foreign_err;
- dberr_t sys_foreign_cols_err;
-
- ut_a(srv_get_active_thread_type() == SRV_NONE);
-
- /* Note: The master thread has not been started at this point. */
-
-
- sys_foreign_err = dict_check_if_system_table_exists(
- "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
- sys_foreign_cols_err = dict_check_if_system_table_exists(
- "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
-
- if (sys_foreign_err == DB_SUCCESS
- && sys_foreign_cols_err == DB_SUCCESS) {
- return(DB_SUCCESS);
- }
-
- trx = trx_allocate_for_mysql();
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- trx->op_info = "creating foreign key sys tables";
-
- row_mysql_lock_data_dictionary(trx);
-
- /* Check which incomplete table definition to drop. */
-
- if (sys_foreign_err == DB_CORRUPTION) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping incompletely created "
- "SYS_FOREIGN table.");
- row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE);
- }
-
- if (sys_foreign_cols_err == DB_CORRUPTION) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping incompletely created "
- "SYS_FOREIGN_COLS table.");
-
- row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE);
- }
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Creating foreign key constraint system tables.");
-
- /* NOTE: in dict_load_foreigns we use the fact that
- there are 2 secondary indexes on SYS_FOREIGN, and they
- are defined just like below */
-
- /* NOTE: when designing InnoDB's foreign key support in 2001, we made
- an error and made the table names and the foreign key id of type
- 'CHAR' (internally, really a VARCHAR). We should have made the type
- VARBINARY, like in other InnoDB system tables, to get a clean
- design. */
-
- srv_file_per_table_backup = srv_file_per_table;
-
- /* We always want SYSTEM tables to be created inside the system
- tablespace. */
-
- srv_file_per_table = 0;
-
- err = que_eval_sql(
- NULL,
- "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n"
- "BEGIN\n"
- "CREATE TABLE\n"
- "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR,"
- " REF_NAME CHAR, N_COLS INT);\n"
- "CREATE UNIQUE CLUSTERED INDEX ID_IND"
- " ON SYS_FOREIGN (ID);\n"
- "CREATE INDEX FOR_IND"
- " ON SYS_FOREIGN (FOR_NAME);\n"
- "CREATE INDEX REF_IND"
- " ON SYS_FOREIGN (REF_NAME);\n"
- "CREATE TABLE\n"
- "SYS_FOREIGN_COLS(ID CHAR, POS INT,"
- " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
- "CREATE UNIQUE CLUSTERED INDEX ID_IND"
- " ON SYS_FOREIGN_COLS (ID, POS);\n"
- "END;\n",
- FALSE, trx);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Creation of SYS_FOREIGN and SYS_FOREIGN_COLS "
- "has failed with error %lu. Tablespace is full. "
- "Dropping incompletely created tables.",
- (ulong) err);
-
- ut_ad(err == DB_OUT_OF_FILE_SPACE
- || err == DB_TOO_MANY_CONCURRENT_TRXS);
-
- row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE, TRUE);
- row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE, TRUE);
-
- if (err == DB_OUT_OF_FILE_SPACE) {
- err = DB_MUST_GET_MORE_FILE_SPACE;
- }
- }
-
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_mysql(trx);
-
- srv_file_per_table = srv_file_per_table_backup;
-
- if (err == DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Foreign key constraint system tables created");
- }
-
- /* Note: The master thread has not been started at this point. */
- /* Confirm and move to the non-LRU part of the table LRU list. */
- sys_foreign_err = dict_check_if_system_table_exists(
- "SYS_FOREIGN", DICT_NUM_FIELDS__SYS_FOREIGN + 1, 3);
- ut_a(sys_foreign_err == DB_SUCCESS);
-
- sys_foreign_cols_err = dict_check_if_system_table_exists(
- "SYS_FOREIGN_COLS", DICT_NUM_FIELDS__SYS_FOREIGN_COLS + 1, 1);
- ut_a(sys_foreign_cols_err == DB_SUCCESS);
-
- return(err);
-}
-
-/****************************************************************//**
-Evaluate the given foreign key SQL statement.
-@return error code or DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-dict_foreign_eval_sql(
-/*==================*/
- pars_info_t* info, /*!< in: info struct */
- const char* sql, /*!< in: SQL string to evaluate */
- const char* name, /*!< in: table name (for diagnostics) */
- const char* id, /*!< in: foreign key id */
- trx_t* trx) /*!< in/out: transaction */
-{
- dberr_t error;
- FILE* ef = dict_foreign_err_file;
-
- error = que_eval_sql(info, sql, FALSE, trx);
-
- if (error == DB_DUPLICATE_KEY) {
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Error in foreign key constraint creation for table ",
- ef);
- ut_print_name(ef, trx, TRUE, name);
- fputs(".\nA foreign key constraint of name ", ef);
- ut_print_name(ef, trx, TRUE, id);
- fputs("\nalready exists."
- " (Note that internally InnoDB adds 'databasename'\n"
- "in front of the user-defined constraint name.)\n"
- "Note that InnoDB's FOREIGN KEY system tables store\n"
- "constraint names as case-insensitive, with the\n"
- "MySQL standard latin1_swedish_ci collation. If you\n"
- "create tables or databases whose names differ only in\n"
- "the character case, then collisions in constraint\n"
- "names can occur. Workaround: name your constraints\n"
- "explicitly with unique names.\n",
- ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-
- return(error);
- }
-
- if (error != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Foreign key constraint creation failed:\n"
- "InnoDB: internal error number %lu\n", (ulong) error);
-
- mutex_enter(&dict_foreign_err_mutex);
- ut_print_timestamp(ef);
- fputs(" Internal error in foreign key constraint creation"
- " for table ", ef);
- ut_print_name(ef, trx, TRUE, name);
- fputs(".\n"
- "See the MySQL .err log in the datadir"
- " for more information.\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(error);
- }
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Add a single foreign key field definition to the data dictionary tables in
-the database.
-@return error code or DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-dict_create_add_foreign_field_to_dictionary(
-/*========================================*/
- ulint field_nr, /*!< in: field number */
- const char* table_name, /*!< in: table name */
- const dict_foreign_t* foreign, /*!< in: foreign */
- trx_t* trx) /*!< in/out: transaction */
-{
- pars_info_t* info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", foreign->id);
-
- pars_info_add_int4_literal(info, "pos", field_nr);
-
- pars_info_add_str_literal(info, "for_col_name",
- foreign->foreign_col_names[field_nr]);
-
- pars_info_add_str_literal(info, "ref_col_name",
- foreign->referenced_col_names[field_nr]);
-
- return(dict_foreign_eval_sql(
- info,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "INSERT INTO SYS_FOREIGN_COLS VALUES"
- "(:id, :pos, :for_col_name, :ref_col_name);\n"
- "END;\n",
- table_name, foreign->id, trx));
-}
-
-/********************************************************************//**
-Construct foreign key constraint defintion from data dictionary information.
-*/
-UNIV_INTERN
-char*
-dict_foreign_def_get(
-/*=================*/
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx) /*!< in: trx */
-{
- char* fk_def = (char *)mem_heap_alloc(foreign->heap, 4*1024);
- const char* tbname;
- char tablebuf[MAX_TABLE_NAME_LEN + 1] = "";
- int i;
- char* bufend;
-
- tbname = dict_remove_db_name(foreign->id);
- bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
- tbname, strlen(tbname), trx->mysql_thd, FALSE);
- tablebuf[bufend - tablebuf] = '\0';
-
- sprintf(fk_def,
- (char *)"CONSTRAINT %s FOREIGN KEY (", (char *)tablebuf);
-
- for(i = 0; i < foreign->n_fields; i++) {
- char buf[MAX_TABLE_NAME_LEN + 1] = "";
- innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
- foreign->foreign_col_names[i],
- strlen(foreign->foreign_col_names[i]),
- trx->mysql_thd, FALSE);
- strcat(fk_def, buf);
- if (i < foreign->n_fields-1) {
- strcat(fk_def, (char *)",");
- }
- }
-
- strcat(fk_def,(char *)") REFERENCES ");
-
- bufend = innobase_convert_name(tablebuf, MAX_TABLE_NAME_LEN,
- foreign->referenced_table_name,
- strlen(foreign->referenced_table_name),
- trx->mysql_thd, TRUE);
- tablebuf[bufend - tablebuf] = '\0';
-
- strcat(fk_def, tablebuf);
- strcat(fk_def, " (");
-
- for(i = 0; i < foreign->n_fields; i++) {
- char buf[MAX_TABLE_NAME_LEN + 1] = "";
- bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
- foreign->referenced_col_names[i],
- strlen(foreign->referenced_col_names[i]),
- trx->mysql_thd, FALSE);
- buf[bufend - buf] = '\0';
- strcat(fk_def, buf);
- if (i < foreign->n_fields-1) {
- strcat(fk_def, (char *)",");
- }
- }
- strcat(fk_def, (char *)")");
-
- return fk_def;
-}
-
-/********************************************************************//**
-Convert foreign key column names from data dictionary to SQL-layer.
-*/
-static
-void
-dict_foreign_def_get_fields(
-/*========================*/
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx, /*!< in: trx */
- char** field, /*!< out: foreign column */
- char** field2, /*!< out: referenced column */
- int col_no) /*!< in: column number */
-{
- char* bufend;
- char* fieldbuf = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
- char* fieldbuf2 = (char *)mem_heap_alloc(foreign->heap, MAX_TABLE_NAME_LEN+1);
-
- bufend = innobase_convert_name(fieldbuf, MAX_TABLE_NAME_LEN,
- foreign->foreign_col_names[col_no],
- strlen(foreign->foreign_col_names[col_no]),
- trx->mysql_thd, FALSE);
-
- fieldbuf[bufend - fieldbuf] = '\0';
-
- bufend = innobase_convert_name(fieldbuf2, MAX_TABLE_NAME_LEN,
- foreign->referenced_col_names[col_no],
- strlen(foreign->referenced_col_names[col_no]),
- trx->mysql_thd, FALSE);
-
- fieldbuf2[bufend - fieldbuf2] = '\0';
- *field = fieldbuf;
- *field2 = fieldbuf2;
-}
-
-/********************************************************************//**
-Add a foreign key definition to the data dictionary tables.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_create_add_foreign_to_dictionary(
-/*==================================*/
- dict_table_t* table,
- const char* name, /*!< in: table name */
- const dict_foreign_t* foreign,/*!< in: foreign key */
- trx_t* trx) /*!< in/out: dictionary transaction */
-{
- dberr_t error;
- pars_info_t* info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", foreign->id);
-
- pars_info_add_str_literal(info, "for_name", name);
-
- pars_info_add_str_literal(info, "ref_name",
- foreign->referenced_table_name);
-
- pars_info_add_int4_literal(info, "n_cols",
- foreign->n_fields + (foreign->type << 24));
-
- error = dict_foreign_eval_sql(info,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "INSERT INTO SYS_FOREIGN VALUES"
- "(:id, :for_name, :ref_name, :n_cols);\n"
- "END;\n"
- , name, foreign->id, trx);
-
- if (error != DB_SUCCESS) {
- if (error == DB_DUPLICATE_KEY) {
- char buf[MAX_TABLE_NAME_LEN + 1] = "";
- char tablename[MAX_TABLE_NAME_LEN + 1] = "";
- char* fk_def;
-
- innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
- table->name, strlen(table->name),
- trx->mysql_thd, TRUE);
-
- innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
- foreign->id, strlen(foreign->id), trx->mysql_thd, FALSE);
-
- fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
-
- ib_push_warning(trx, error,
- "Create or Alter table %s with foreign key constraint"
- " failed. Foreign key constraint %s"
- " already exists on data dictionary."
- " Foreign key constraint names need to be unique in database."
- " Error in foreign key definition: %s.",
- tablename, buf, fk_def);
- }
-
- return(error);
- }
-
- for (ulint i = 0; i < foreign->n_fields; i++) {
- error = dict_create_add_foreign_field_to_dictionary(
- i, name, foreign, trx);
-
- if (error != DB_SUCCESS) {
- char buf[MAX_TABLE_NAME_LEN + 1] = "";
- char tablename[MAX_TABLE_NAME_LEN + 1] = "";
- char* field=NULL;
- char* field2=NULL;
- char* fk_def;
-
- innobase_convert_name(tablename, MAX_TABLE_NAME_LEN,
- table->name, strlen(table->name),
- trx->mysql_thd, TRUE);
- innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
- foreign->id, strlen(foreign->id), trx->mysql_thd, FALSE);
- fk_def = dict_foreign_def_get((dict_foreign_t*)foreign, trx);
- dict_foreign_def_get_fields((dict_foreign_t*)foreign, trx, &field, &field2, i);
-
- ib_push_warning(trx, error,
- "Create or Alter table %s with foreign key constraint"
- " failed. Error adding foreign key constraint name %s"
- " fields %s or %s to the dictionary."
- " Error in foreign key definition: %s.",
- tablename, buf, i+1, fk_def);
-
- return(error);
- }
- }
-
- return(error);
-}
-
-/** Adds the given set of foreign key objects to the dictionary tables
-in the database. This function does not modify the dictionary cache. The
-caller must ensure that all foreign key objects contain a valid constraint
-name in foreign->id.
-@param[in] local_fk_set set of foreign key objects, to be added to
-the dictionary tables
-@param[in] table table to which the foreign key objects in
-local_fk_set belong to
-@param[in,out] trx transaction
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_create_add_foreigns_to_dictionary(
-/*===================================*/
- const dict_foreign_set& local_fk_set,
- const dict_table_t* table,
- trx_t* trx)
-{
- dict_foreign_t* foreign;
- dberr_t error;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if (NULL == dict_table_get_low("SYS_FOREIGN")) {
- fprintf(stderr,
- "InnoDB: table SYS_FOREIGN not found"
- " in internal data dictionary\n");
-
- return(DB_ERROR);
- }
-
- for (dict_foreign_set::const_iterator it = local_fk_set.begin();
- it != local_fk_set.end();
- ++it) {
-
- foreign = *it;
- ut_ad(foreign->id != NULL);
-
- error = dict_create_add_foreign_to_dictionary((dict_table_t*)table, table->name,
- foreign, trx);
-
- if (error != DB_SUCCESS) {
-
- return(error);
- }
- }
-
- trx->op_info = "committing foreign key definitions";
-
- trx_commit(trx);
-
- trx->op_info = "";
-
- return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Creates the tablespaces and datafiles system tables inside InnoDB
-at server bootstrap or server start if they are not found or are
-not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_create_or_check_sys_tablespace(void)
-/*=====================================*/
-{
- trx_t* trx;
- my_bool srv_file_per_table_backup;
- dberr_t err;
- dberr_t sys_tablespaces_err;
- dberr_t sys_datafiles_err;
-
- ut_a(srv_get_active_thread_type() == SRV_NONE);
-
- /* Note: The master thread has not been started at this point. */
-
- sys_tablespaces_err = dict_check_if_system_table_exists(
- "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
- sys_datafiles_err = dict_check_if_system_table_exists(
- "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
-
- if (sys_tablespaces_err == DB_SUCCESS
- && sys_datafiles_err == DB_SUCCESS) {
- return(DB_SUCCESS);
- }
-
- trx = trx_allocate_for_mysql();
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- trx->op_info = "creating tablepace and datafile sys tables";
-
- row_mysql_lock_data_dictionary(trx);
-
- /* Check which incomplete table definition to drop. */
-
- if (sys_tablespaces_err == DB_CORRUPTION) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping incompletely created "
- "SYS_TABLESPACES table.");
- row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE);
- }
-
- if (sys_datafiles_err == DB_CORRUPTION) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping incompletely created "
- "SYS_DATAFILES table.");
-
- row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE);
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Creating tablespace and datafile system tables.");
-
- /* We always want SYSTEM tables to be created inside the system
- tablespace. */
- srv_file_per_table_backup = srv_file_per_table;
- srv_file_per_table = 0;
-
- err = que_eval_sql(
- NULL,
- "PROCEDURE CREATE_SYS_TABLESPACE_PROC () IS\n"
- "BEGIN\n"
- "CREATE TABLE SYS_TABLESPACES(\n"
- " SPACE INT, NAME CHAR, FLAGS INT);\n"
- "CREATE UNIQUE CLUSTERED INDEX SYS_TABLESPACES_SPACE"
- " ON SYS_TABLESPACES (SPACE);\n"
- "CREATE TABLE SYS_DATAFILES(\n"
- " SPACE INT, PATH CHAR);\n"
- "CREATE UNIQUE CLUSTERED INDEX SYS_DATAFILES_SPACE"
- " ON SYS_DATAFILES (SPACE);\n"
- "END;\n",
- FALSE, trx);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Creation of SYS_TABLESPACES and SYS_DATAFILES "
- "has failed with error %lu. Tablespace is full. "
- "Dropping incompletely created tables.",
- (ulong) err);
-
- ut_a(err == DB_OUT_OF_FILE_SPACE
- || err == DB_TOO_MANY_CONCURRENT_TRXS);
-
- row_drop_table_for_mysql("SYS_TABLESPACES", trx, TRUE, TRUE);
- row_drop_table_for_mysql("SYS_DATAFILES", trx, TRUE, TRUE);
-
- if (err == DB_OUT_OF_FILE_SPACE) {
- err = DB_MUST_GET_MORE_FILE_SPACE;
- }
- }
-
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_mysql(trx);
-
- srv_file_per_table = srv_file_per_table_backup;
-
- if (err == DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Tablespace and datafile system tables created.");
- }
-
- /* Note: The master thread has not been started at this point. */
- /* Confirm and move to the non-LRU part of the table LRU list. */
-
- sys_tablespaces_err = dict_check_if_system_table_exists(
- "SYS_TABLESPACES", DICT_NUM_FIELDS__SYS_TABLESPACES + 1, 1);
- ut_a(sys_tablespaces_err == DB_SUCCESS);
-
- sys_datafiles_err = dict_check_if_system_table_exists(
- "SYS_DATAFILES", DICT_NUM_FIELDS__SYS_DATAFILES + 1, 1);
- ut_a(sys_datafiles_err == DB_SUCCESS);
-
- return(err);
-}
-
-/********************************************************************//**
-Add a single tablespace definition to the data dictionary tables in the
-database.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_create_add_tablespace_to_dictionary(
-/*=====================================*/
- ulint space, /*!< in: tablespace id */
- const char* name, /*!< in: tablespace name */
- ulint flags, /*!< in: tablespace flags */
- const char* path, /*!< in: tablespace path */
- trx_t* trx, /*!< in/out: transaction */
- bool commit) /*!< in: if true then commit the
- transaction */
-{
- dberr_t error;
-
- pars_info_t* info = pars_info_create();
-
- ut_a(space > TRX_SYS_SPACE);
-
- pars_info_add_int4_literal(info, "space", space);
-
- pars_info_add_str_literal(info, "name", name);
-
- pars_info_add_int4_literal(info, "flags", flags);
-
- pars_info_add_str_literal(info, "path", path);
-
- error = que_eval_sql(info,
- "PROCEDURE P () IS\n"
- "BEGIN\n"
- "INSERT INTO SYS_TABLESPACES VALUES"
- "(:space, :name, :flags);\n"
- "INSERT INTO SYS_DATAFILES VALUES"
- "(:space, :path);\n"
- "END;\n",
- FALSE, trx);
-
- if (error != DB_SUCCESS) {
- return(error);
- }
-
- if (commit) {
- trx->op_info = "committing tablespace and datafile definition";
- trx_commit(trx);
- }
-
- trx->op_info = "";
-
- return(error);
-}
diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc
deleted file mode 100644
index a1cfeb3860f..00000000000
--- a/storage/xtradb/dict/dict0dict.cc
+++ /dev/null
@@ -1,7325 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file dict/dict0dict.cc
-Data dictionary system
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "dict0dict.h"
-#include "fts0fts.h"
-#include "fil0fil.h"
-#include <algorithm>
-#include <string>
-
-#ifdef UNIV_NONINL
-#include "dict0dict.ic"
-#include "dict0priv.ic"
-#endif
-
-/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
-UNIV_INTERN dict_index_t* dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-UNIV_INTERN dict_index_t* dict_ind_compact;
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/** Flag to control insert buffer debugging. */
-extern UNIV_INTERN uint ibuf_debug;
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-/**********************************************************************
-Issue a warning that the row is too big. */
-void
-ib_warn_row_too_big(const dict_table_t* table);
-
-#ifndef UNIV_HOTBACKUP
-#include "buf0buf.h"
-#include "data0type.h"
-#include "mach0data.h"
-#include "dict0boot.h"
-#include "dict0mem.h"
-#include "dict0crea.h"
-#include "dict0stats.h"
-#include "trx0undo.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "os0once.h"
-#include "page0zip.h"
-#include "page0page.h"
-#include "pars0pars.h"
-#include "pars0sym.h"
-#include "que0que.h"
-#include "rem0cmp.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "m_ctype.h" /* my_isspace() */
-#include "ha_prototypes.h" /* innobase_strcasecmp(), innobase_casedn_str() */
-#include "srv0mon.h"
-#include "srv0start.h"
-#include "lock0lock.h"
-#include "dict0priv.h"
-#include "row0upd.h"
-#include "row0mysql.h"
-#include "row0merge.h"
-#include "row0log.h"
-#include "ut0ut.h" /* ut_format_name() */
-#include "m_string.h"
-#include "my_sys.h"
-#include "mysqld.h" /* system_charset_info */
-#include "strfunc.h" /* strconvert() */
-
-#include <ctype.h>
-
-/** the dictionary system */
-UNIV_INTERN dict_sys_t* dict_sys = NULL;
-
-/** @brief the data dictionary rw-latch protecting dict_sys
-
-table create, drop, etc. reserve this in X-mode; implicit or
-backround operations purge, rollback, foreign key checks reserve this
-in S-mode; we cannot trust that MySQL protects implicit or background
-operations a table drop since MySQL does not know of them; therefore
-we need this; NOTE: a transaction which reserves this must keep book
-on the mode in trx_t::dict_operation_lock_mode */
-UNIV_INTERN rw_lock_t dict_operation_lock;
-
-/** Percentage of compression failures that are allowed in a single
-round */
-UNIV_INTERN ulong zip_failure_threshold_pct = 5;
-
-/** Maximum percentage of a page that can be allowed as a pad to avoid
-compression failures */
-UNIV_INTERN ulong zip_pad_max = 50;
-
-/* Keys to register rwlocks and mutexes with performance schema */
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t dict_operation_lock_key;
-UNIV_INTERN mysql_pfs_key_t index_tree_rw_lock_key;
-UNIV_INTERN mysql_pfs_key_t index_online_log_key;
-UNIV_INTERN mysql_pfs_key_t dict_table_stats_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t zip_pad_mutex_key;
-UNIV_INTERN mysql_pfs_key_t dict_sys_mutex_key;
-UNIV_INTERN mysql_pfs_key_t dict_foreign_err_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
- creating a table or index object */
-#define DICT_POOL_PER_TABLE_HASH 512 /*!< buffer pool max size per table
- hash table fixed size in bytes */
-#define DICT_POOL_PER_VARYING 4 /*!< buffer pool max size per data
- dictionary varying size in bytes */
-
-/** Identifies generated InnoDB foreign key names */
-static char dict_ibfk[] = "_ibfk_";
-
-bool innodb_table_stats_not_found = false;
-bool innodb_index_stats_not_found = false;
-static bool innodb_table_stats_not_found_reported = false;
-static bool innodb_index_stats_not_found_reported = false;
-
-/*******************************************************************//**
-Tries to find column names for the index and sets the col field of the
-index.
-@return TRUE if the column names were found */
-static
-ibool
-dict_index_find_cols(
-/*=================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index); /*!< in: index */
-/*******************************************************************//**
-Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user.
-@return own: the internal representation of the clustered index */
-static
-dict_index_t*
-dict_index_build_internal_clust(
-/*============================*/
- const dict_table_t* table, /*!< in: table */
- dict_index_t* index); /*!< in: user representation of
- a clustered index */
-/*******************************************************************//**
-Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user.
-@return own: the internal representation of the non-clustered index */
-static
-dict_index_t*
-dict_index_build_internal_non_clust(
-/*================================*/
- const dict_table_t* table, /*!< in: table */
- dict_index_t* index); /*!< in: user representation of
- a non-clustered index */
-/**********************************************************************//**
-Builds the internal dictionary cache representation for an FTS index.
-@return own: the internal representation of the FTS index */
-static
-dict_index_t*
-dict_index_build_internal_fts(
-/*==========================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index); /*!< in: user representation of an FTS index */
-/**********************************************************************//**
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
- const dict_table_t* table, /*!< in: table */
- const dict_col_t* col); /*!< in: column */
-/**********************************************************************//**
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
- dict_index_t* index); /*!< in: index */
-/**********************************************************************//**
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
- const dict_field_t* field); /*!< in: field */
-
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-static
-void
-dict_index_remove_from_cache_low(
-/*=============================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index, /*!< in, own: index */
- ibool lru_evict); /*!< in: TRUE if page being evicted
- to make room in the table LRU list */
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Validate the dictionary table LRU list.
-@return TRUE if validate OK */
-static
-ibool
-dict_lru_validate(void);
-/*===================*/
-/**********************************************************************//**
-Check if table is in the dictionary table LRU list.
-@return TRUE if table found */
-static
-ibool
-dict_lru_find_table(
-/*================*/
- const dict_table_t* find_table); /*!< in: table to find */
-/**********************************************************************//**
-Check if a table exists in the dict table non-LRU list.
-@return TRUE if table found */
-static
-ibool
-dict_non_lru_find_table(
-/*====================*/
- const dict_table_t* find_table); /*!< in: table to find */
-#endif /* UNIV_DEBUG */
-
-/* Stream for storing detailed information about the latest foreign key
-and unique key errors. Only created if !srv_read_only_mode */
-UNIV_INTERN FILE* dict_foreign_err_file = NULL;
-/* mutex protecting the foreign and unique error buffers */
-UNIV_INTERN ib_mutex_t dict_foreign_err_mutex;
-
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
- char* a) /*!< in/out: string to put in lower case */
-{
- innobase_casedn_str(a);
-}
-
-/********************************************************************//**
-Checks if the database name in two table names is the same.
-@return TRUE if same db name */
-UNIV_INTERN
-ibool
-dict_tables_have_same_db(
-/*=====================*/
- const char* name1, /*!< in: table name in the form
- dbname '/' tablename */
- const char* name2) /*!< in: table name in the form
- dbname '/' tablename */
-{
- for (; *name1 == *name2; name1++, name2++) {
- if (*name1 == '/') {
- return(TRUE);
- }
- ut_a(*name1); /* the names must contain '/' */
- }
- return(FALSE);
-}
-
-/********************************************************************//**
-Return the end of table name where we have removed dbname and '/'.
-@return table name */
-UNIV_INTERN
-const char*
-dict_remove_db_name(
-/*================*/
- const char* name) /*!< in: table name in the form
- dbname '/' tablename */
-{
- const char* s = strchr(name, '/');
- ut_a(s);
-
- return(s + 1);
-}
-
-/********************************************************************//**
-Get the database name length in a table name.
-@return database name length */
-UNIV_INTERN
-ulint
-dict_get_db_name_len(
-/*=================*/
- const char* name) /*!< in: table name in the form
- dbname '/' tablename */
-{
- const char* s;
- s = strchr(name, '/');
- ut_a(s);
- return(s - name);
-}
-
-/********************************************************************//**
-Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
-void
-dict_mutex_enter_for_mysql_func(const char * file, ulint line)
-/*============================*/
-{
- mutex_enter_func(&(dict_sys->mutex), file, line);
-}
-
-/********************************************************************//**
-Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
-void
-dict_mutex_exit_for_mysql(void)
-/*===========================*/
-{
- mutex_exit(&(dict_sys->mutex));
-}
-
-/** Allocate and init a dict_table_t's stats latch.
-This function must not be called concurrently on the same table object.
-@param[in,out] table_void table whose stats latch to create */
-static
-void
-dict_table_stats_latch_alloc(
- void* table_void)
-{
- dict_table_t* table = static_cast<dict_table_t*>(table_void);
-
- table->stats_latch = new(std::nothrow) rw_lock_t;
-
- ut_a(table->stats_latch != NULL);
-
- rw_lock_create(dict_table_stats_key, table->stats_latch,
- SYNC_INDEX_TREE);
-}
-
-/** Deinit and free a dict_table_t's stats latch.
-This function must not be called concurrently on the same table object.
-@param[in,out] table table whose stats latch to free */
-static
-void
-dict_table_stats_latch_free(
- dict_table_t* table)
-{
- rw_lock_free(table->stats_latch);
- delete table->stats_latch;
-}
-
-/** Create a dict_table_t's stats latch or delay for lazy creation.
-This function is only called from either single threaded environment
-or from a thread that has not shared the table object with other threads.
-@param[in,out] table table whose stats latch to create
-@param[in] enabled if false then the latch is disabled
-and dict_table_stats_lock()/unlock() become noop on this table. */
-
-void
-dict_table_stats_latch_create(
- dict_table_t* table,
- bool enabled)
-{
- if (!enabled) {
- table->stats_latch = NULL;
- table->stats_latch_created = os_once::DONE;
- return;
- }
-
-#ifdef HAVE_ATOMIC_BUILTINS
- /* We create this lazily the first time it is used. */
- table->stats_latch = NULL;
- table->stats_latch_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
-
- dict_table_stats_latch_alloc(table);
-
- table->stats_latch_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/** Destroy a dict_table_t's stats latch.
-This function is only called from either single threaded environment
-or from a thread that has not shared the table object with other threads.
-@param[in,out] table table whose stats latch to destroy */
-
-void
-dict_table_stats_latch_destroy(
- dict_table_t* table)
-{
- if (table->stats_latch_created == os_once::DONE
- && table->stats_latch != NULL) {
-
- dict_table_stats_latch_free(table);
- }
-}
-
-/**********************************************************************//**
-Lock the appropriate latch to protect a given table's statistics. */
-UNIV_INTERN
-void
-dict_table_stats_lock(
-/*==================*/
- dict_table_t* table, /*!< in: table */
- ulint latch_mode) /*!< in: RW_S_LATCH or RW_X_LATCH */
-{
- ut_ad(table != NULL);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
-#ifdef HAVE_ATOMIC_BUILTINS
- os_once::do_or_wait_for_done(
- &table->stats_latch_created,
- dict_table_stats_latch_alloc, table);
-#else /* HAVE_ATOMIC_BUILTINS */
- ut_ad(table->stats_latch_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- if (table->stats_latch == NULL) {
- /* This is a dummy table object that is private in the current
- thread and is not shared between multiple threads, thus we
- skip any locking. */
- return;
- }
-
- switch (latch_mode) {
- case RW_S_LATCH:
- rw_lock_s_lock(table->stats_latch);
- break;
- case RW_X_LATCH:
- rw_lock_x_lock(table->stats_latch);
- break;
- case RW_NO_LATCH:
- /* fall through */
- default:
- ut_error;
- }
-}
-
-/**********************************************************************//**
-Unlock the latch that has been locked by dict_table_stats_lock() */
-UNIV_INTERN
-void
-dict_table_stats_unlock(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- ulint latch_mode) /*!< in: RW_S_LATCH or
- RW_X_LATCH */
-{
- ut_ad(table != NULL);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- if (table->stats_latch == NULL) {
- /* This is a dummy table object that is private in the current
- thread and is not shared between multiple threads, thus we
- skip any locking. */
- return;
- }
-
- switch (latch_mode) {
- case RW_S_LATCH:
- rw_lock_s_unlock(table->stats_latch);
- break;
- case RW_X_LATCH:
- rw_lock_x_unlock(table->stats_latch);
- break;
- case RW_NO_LATCH:
- /* fall through */
- default:
- ut_error;
- }
-}
-
-/**********************************************************************//**
-Try to drop any indexes after an aborted index creation.
-This can also be after a server kill during DROP INDEX. */
-static
-void
-dict_table_try_drop_aborted(
-/*========================*/
- dict_table_t* table, /*!< in: table, or NULL if it
- needs to be looked up again */
- table_id_t table_id, /*!< in: table identifier */
- ulint ref_count) /*!< in: expected table->n_ref_count */
-{
- trx_t* trx;
-
- trx = trx_allocate_for_background();
- trx->op_info = "try to drop any indexes after an aborted index creation";
- row_mysql_lock_data_dictionary(trx);
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
- if (table == NULL) {
- table = dict_table_open_on_id_low(
- table_id, DICT_ERR_IGNORE_NONE, FALSE);
- } else {
- ut_ad(table->id == table_id);
- }
-
- if (table && table->n_ref_count == ref_count && table->drop_aborted) {
- /* Silence a debug assertion in row_merge_drop_indexes(). */
- ut_d(table->n_ref_count++);
- row_merge_drop_indexes(trx, table, TRUE);
- ut_d(table->n_ref_count--);
- ut_ad(table->n_ref_count == ref_count);
- trx_commit_for_mysql(trx);
- }
-
- row_mysql_unlock_data_dictionary(trx);
- trx_free_for_background(trx);
-}
-
-/**********************************************************************//**
-When opening a table,
-try to drop any indexes after an aborted index creation.
-Release the dict_sys->mutex. */
-static
-void
-dict_table_try_drop_aborted_and_mutex_exit(
-/*=======================================*/
- dict_table_t* table, /*!< in: table (may be NULL) */
- ibool try_drop) /*!< in: FALSE if should try to
- drop indexes whose online creation
- was aborted */
-{
- if (try_drop
- && table != NULL
- && table->drop_aborted
- && table->n_ref_count == 1
- && dict_table_get_first_index(table)) {
-
- /* Attempt to drop the indexes whose online creation
- was aborted. */
- table_id_t table_id = table->id;
-
- mutex_exit(&dict_sys->mutex);
-
- dict_table_try_drop_aborted(table, table_id, 1);
- } else {
- mutex_exit(&dict_sys->mutex);
- }
-}
-
-/********************************************************************//**
-Decrements the count of open handles to a table. */
-UNIV_INTERN
-void
-dict_table_close(
-/*=============*/
- dict_table_t* table, /*!< in/out: table */
- ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- ibool try_drop) /*!< in: TRUE=try to drop any orphan
- indexes after an aborted online
- index creation */
-{
- if (!dict_locked) {
- mutex_enter(&dict_sys->mutex);
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_a(table->n_ref_count > 0);
-
- --table->n_ref_count;
-
- /* Force persistent stats re-read upon next open of the table
- so that FLUSH TABLE can be used to forcibly fetch stats from disk
- if they have been manually modified. We reset table->stat_initialized
- only if table reference count is 0 because we do not want too frequent
- stats re-reads (e.g. in other cases than FLUSH TABLE). */
- if (strchr(table->name, '/') != NULL
- && table->n_ref_count == 0
- && dict_stats_is_persistent_enabled(table)) {
-
- dict_stats_deinit(table);
- }
-
- MONITOR_DEC(MONITOR_TABLE_REFERENCE);
-
- ut_ad(dict_lru_validate());
-
-#ifdef UNIV_DEBUG
- if (table->can_be_evicted) {
- ut_ad(dict_lru_find_table(table));
- } else {
- ut_ad(dict_non_lru_find_table(table));
- }
-#endif /* UNIV_DEBUG */
-
- if (!dict_locked) {
- table_id_t table_id = table->id;
- ibool drop_aborted;
-
- drop_aborted = try_drop
- && table->drop_aborted
- && table->n_ref_count == 1
- && dict_table_get_first_index(table);
-
- mutex_exit(&dict_sys->mutex);
-
- if (drop_aborted) {
- dict_table_try_drop_aborted(NULL, table_id, 0);
- }
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-UNIV_INTERN
-const char*
-dict_table_get_col_name(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_nr) /*!< in: column number */
-{
- ulint i;
- const char* s;
-
- ut_ad(table);
- ut_ad(col_nr < table->n_def);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- s = table->col_names;
- if (s) {
- for (i = 0; i < col_nr; i++) {
- s += strlen(s) + 1;
- }
- }
-
- return(s);
-}
-
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-UNIV_INTERN
-const char*
-dict_table_get_col_name_for_mysql(
-/*==============================*/
- const dict_table_t* table, /*!< in: table */
- const char* col_name)/*! in: MySQL table column name */
-{
- ulint i;
- const char* s;
-
- ut_ad(table);
- ut_ad(col_name);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- s = table->col_names;
- if (s) {
- /* If we have many virtual columns MySQL key_part->fieldnr
- could be larger than number of columns in InnoDB table
- when creating new indexes. */
- for (i = 0; i < table->n_def; i++) {
-
- if (!innobase_strcasecmp(s, col_name)) {
- break; /* Found */
- }
- s += strlen(s) + 1;
- }
- }
-
- return(s);
-}
-#ifndef UNIV_HOTBACKUP
-/** Allocate and init the autoinc latch of a given table.
-This function must not be called concurrently on the same table object.
-@param[in,out] table_void table whose autoinc latch to create */
-void
-dict_table_autoinc_alloc(
- void* table_void)
-{
- dict_table_t* table = static_cast<dict_table_t*>(table_void);
- table->autoinc_mutex = new (std::nothrow) ib_mutex_t();
- ut_a(table->autoinc_mutex != NULL);
- mutex_create(autoinc_mutex_key,
- table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
-}
-
-/** Allocate and init the zip_pad_mutex of a given index.
-This function must not be called concurrently on the same index object.
-@param[in,out] index_void index whose zip_pad_mutex to create */
-void
-dict_index_zip_pad_alloc(
- void* index_void)
-{
- dict_index_t* index = static_cast<dict_index_t*>(index_void);
- index->zip_pad.mutex = new (std::nothrow) os_fast_mutex_t;
- ut_a(index->zip_pad.mutex != NULL);
- os_fast_mutex_init(zip_pad_mutex_key, index->zip_pad.mutex);
-}
-
-/********************************************************************//**
-Acquire the autoinc lock. */
-UNIV_INTERN
-void
-dict_table_autoinc_lock(
-/*====================*/
- dict_table_t* table) /*!< in/out: table */
-{
-#ifdef HAVE_ATOMIC_BUILTINS
- os_once::do_or_wait_for_done(
- &table->autoinc_mutex_created,
- dict_table_autoinc_alloc, table);
-#else /* HAVE_ATOMIC_BUILTINS */
- ut_ad(table->autoinc_mutex_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- mutex_enter(table->autoinc_mutex);
-}
-
-/** Acquire the zip_pad_mutex latch.
-@param[in,out] index the index whose zip_pad_mutex to acquire.*/
-void
-dict_index_zip_pad_lock(
- dict_index_t* index)
-{
-#ifdef HAVE_ATOMIC_BUILTINS
- os_once::do_or_wait_for_done(
- &index->zip_pad.mutex_created,
- dict_index_zip_pad_alloc, index);
-#else /* HAVE_ATOMIC_BUILTINS */
- ut_ad(index->zip_pad.mutex_created == os_once::DONE);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- os_fast_mutex_lock(index->zip_pad.mutex);
-}
-
-/********************************************************************//**
-Unconditionally set the autoinc counter. */
-UNIV_INTERN
-void
-dict_table_autoinc_initialize(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: next value to assign to a row */
-{
- ut_ad(dict_table_autoinc_own(table));
-
- table->autoinc = value;
-}
-
-/************************************************************************
-Get all the FTS indexes on a table.
-@return number of FTS indexes */
-UNIV_INTERN
-ulint
-dict_table_get_all_fts_indexes(
-/*===========================*/
- dict_table_t* table, /*!< in: table */
- ib_vector_t* indexes) /*!< out: all FTS indexes on this
- table */
-{
- dict_index_t* index;
-
- ut_a(ib_vector_size(indexes) == 0);
-
- for (index = dict_table_get_first_index(table);
- index;
- index = dict_table_get_next_index(index)) {
-
- if (index->type == DICT_FTS) {
- ib_vector_push(indexes, &index);
- }
- }
-
- return(ib_vector_size(indexes));
-}
-
-/** Store autoinc value when the table is evicted.
-@param[in] table table evicted */
-UNIV_INTERN
-void
-dict_table_autoinc_store(
- const dict_table_t* table)
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-
- if (table->autoinc != 0) {
- ut_ad(dict_sys->autoinc_map->find(table->id)
- == dict_sys->autoinc_map->end());
-
- dict_sys->autoinc_map->insert(
- std::pair<table_id_t, ib_uint64_t>(
- table->id, table->autoinc));
- }
-}
-
-/** Restore autoinc value when the table is loaded.
-@param[in] table table loaded */
-UNIV_INTERN
-void
-dict_table_autoinc_restore(
- dict_table_t* table)
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-
- autoinc_map_t::iterator it;
- it = dict_sys->autoinc_map->find(table->id);
-
- if (it != dict_sys->autoinc_map->end()) {
- table->autoinc = it->second;
- ut_ad(table->autoinc != 0);
-
- dict_sys->autoinc_map->erase(it);
- }
-}
-
-/********************************************************************//**
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized.
-@return value for a new row, or 0 */
-UNIV_INTERN
-ib_uint64_t
-dict_table_autoinc_read(
-/*====================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(dict_table_autoinc_own(table));
-
- return(table->autoinc);
-}
-
-/********************************************************************//**
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-UNIV_INTERN
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
-
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: value which was assigned to a row */
-{
- ut_ad(dict_table_autoinc_own(table));
-
- if (value > table->autoinc) {
-
- table->autoinc = value;
- }
-}
-
-/********************************************************************//**
-Release the autoinc lock. */
-UNIV_INTERN
-void
-dict_table_autoinc_unlock(
-/*======================*/
- dict_table_t* table) /*!< in/out: table */
-{
- mutex_exit(table->autoinc_mutex);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Looks for column n in an index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INTERN
-ulint
-dict_index_get_nth_col_or_prefix_pos(
-/*=================================*/
- const dict_index_t* index, /*!< in: index */
- ulint n, /*!< in: column number */
- ibool inc_prefix, /*!< in: TRUE=consider
- column prefixes too */
- ulint* prefix_col_pos) /*!< out: col num if prefix */
-{
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
- ulint prefixed_pos_dummy;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad((inc_prefix && !prefix_col_pos) || (!inc_prefix));
-
- if (!prefix_col_pos) {
- prefix_col_pos = &prefixed_pos_dummy;
- }
- *prefix_col_pos = ULINT_UNDEFINED;
-
- if (!prefix_col_pos) {
- prefix_col_pos = &prefixed_pos_dummy;
- }
- *prefix_col_pos = ULINT_UNDEFINED;
-
- col = dict_table_get_nth_col(index->table, n);
-
- if (dict_index_is_clust(index)) {
-
- return(dict_col_get_clust_pos(col, index));
- }
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col) {
- *prefix_col_pos = pos;
- if (inc_prefix || field->prefix_len == 0) {
- return(pos);
- }
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Returns TRUE if the index contains a column or a prefix of that column.
-@return TRUE if contains the column or its prefix */
-UNIV_INTERN
-ibool
-dict_index_contains_col_or_prefix(
-/*==============================*/
- const dict_index_t* index, /*!< in: index */
- ulint n) /*!< in: column number */
-{
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- if (dict_index_is_clust(index)) {
-
- return(TRUE);
- }
-
- col = dict_table_get_nth_col(index->table, n);
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/********************************************************************//**
-Looks for a matching field in an index. The column has to be the same. The
-column in index must be complete, or must contain a prefix longer than the
-column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INTERN
-ulint
-dict_index_get_nth_field_pos(
-/*=========================*/
- const dict_index_t* index, /*!< in: index from which to search */
- const dict_index_t* index2, /*!< in: index */
- ulint n) /*!< in: field number in index2 */
-{
- const dict_field_t* field;
- const dict_field_t* field2;
- ulint n_fields;
- ulint pos;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- field2 = dict_index_get_nth_field(index2, n);
-
- n_fields = dict_index_get_n_fields(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (field->col == field2->col
- && (field->prefix_len == 0
- || (field->prefix_len >= field2->prefix_len
- && field2->prefix_len != 0))) {
-
- return(pos);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_id(
-/*==================*/
- table_id_t table_id, /*!< in: table id */
- ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- dict_table_op_t table_op) /*!< in: operation to perform */
-{
- dict_table_t* table;
-
- if (!dict_locked) {
- mutex_enter(&dict_sys->mutex);
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- table = dict_table_open_on_id_low(
- table_id,
- table_op == DICT_TABLE_OP_LOAD_TABLESPACE
- ? DICT_ERR_IGNORE_RECOVER_LOCK
- : DICT_ERR_IGNORE_NONE,
- table_op == DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
-
- if (table != NULL) {
-
- if (table->can_be_evicted) {
- dict_move_to_mru(table);
- }
-
- ++table->n_ref_count;
-
- MONITOR_INC(MONITOR_TABLE_REFERENCE);
- }
-
- if (!dict_locked) {
- dict_table_try_drop_aborted_and_mutex_exit(
- table, table_op == DICT_TABLE_OP_DROP_ORPHAN);
- }
-
- return(table);
-}
-
-/********************************************************************//**
-Looks for column n position in the clustered index.
-@return position in internal representation of the clustered index */
-UNIV_INTERN
-ulint
-dict_table_get_nth_col_pos(
-/*=======================*/
- const dict_table_t* table, /*!< in: table */
- ulint n) /*!< in: column number */
-{
- return(dict_index_get_nth_col_pos(dict_table_get_first_index(table),
- n, NULL));
-}
-
-/********************************************************************//**
-Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns.
-@return TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
-ibool
-dict_table_col_in_clustered_key(
-/*============================*/
- const dict_table_t* table, /*!< in: table */
- ulint n) /*!< in: column number */
-{
- const dict_index_t* index;
- const dict_field_t* field;
- const dict_col_t* col;
- ulint pos;
- ulint n_fields;
-
- ut_ad(table);
-
- col = dict_table_get_nth_col(table, n);
-
- index = dict_table_get_first_index(table);
-
- n_fields = dict_index_get_n_unique(index);
-
- for (pos = 0; pos < n_fields; pos++) {
- field = dict_index_get_nth_field(index, pos);
-
- if (col == field->col) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Inits the data dictionary module. */
-UNIV_INTERN
-void
-dict_init(void)
-/*===========*/
-{
- dict_sys = static_cast<dict_sys_t*>(mem_zalloc(sizeof(*dict_sys)));
-
- mutex_create(dict_sys_mutex_key, &dict_sys->mutex, SYNC_DICT);
-
- dict_sys->table_hash = hash_create(buf_pool_get_curr_size()
- / (DICT_POOL_PER_TABLE_HASH
- * UNIV_WORD_SIZE));
- dict_sys->table_id_hash = hash_create(buf_pool_get_curr_size()
- / (DICT_POOL_PER_TABLE_HASH
- * UNIV_WORD_SIZE));
- rw_lock_create(dict_operation_lock_key,
- &dict_operation_lock, SYNC_DICT_OPERATION);
-
- if (!srv_read_only_mode) {
- dict_foreign_err_file = os_file_create_tmpfile(NULL);
- ut_a(dict_foreign_err_file);
-
- mutex_create(dict_foreign_err_mutex_key,
- &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK);
- }
-
- dict_sys->autoinc_map = new autoinc_map_t();
-}
-
-/**********************************************************************//**
-Move to the most recently used segment of the LRU list. */
-UNIV_INTERN
-void
-dict_move_to_mru(
-/*=============*/
- dict_table_t* table) /*!< in: table to move to MRU */
-{
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(dict_lru_validate());
- ut_ad(dict_lru_find_table(table));
-
- ut_a(table->can_be_evicted);
-
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
-
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
-
- ut_ad(dict_lru_validate());
-}
-
-/**********************************************************************//**
-Returns a table object and increment its open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' module. Inside this directory dict_table_get_low
-is usually the appropriate function.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_name(
-/*====================*/
- const char* table_name, /*!< in: table name */
- ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- ibool try_drop, /*!< in: TRUE=try to drop any orphan
- indexes after an aborted online
- index creation */
- dict_err_ignore_t
- ignore_err) /*!< in: error to be ignored when
- loading a table definition */
-{
- dict_table_t* table;
-
- if (!dict_locked) {
- mutex_enter(&(dict_sys->mutex));
- }
-
- ut_ad(table_name);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- table = dict_table_check_if_in_cache_low(table_name);
-
- if (table == NULL) {
- table = dict_load_table(table_name, TRUE, ignore_err);
- }
-
- ut_ad(!table || table->cached);
-
- if (table != NULL) {
-
- /* If table is encrypted or corrupted */
- if (ignore_err == DICT_ERR_IGNORE_NONE
- && !table->is_readable()) {
- /* Make life easy for drop table. */
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
-
- if (table->corrupted) {
-
- if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
- }
-
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(table->name, TRUE, buf, sizeof(buf));
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Table %s is corrupted. Please "
- "drop the table and recreate.",
- buf);
-
- return(NULL);
- }
-
- if (table->can_be_evicted) {
- dict_move_to_mru(table);
- }
-
- ++table->n_ref_count;
-
- if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
- }
-
- return (table);
- }
-
- if (table->can_be_evicted) {
- dict_move_to_mru(table);
- }
-
- ++table->n_ref_count;
-
- MONITOR_INC(MONITOR_TABLE_REFERENCE);
- }
-
- ut_ad(dict_lru_validate());
-
- if (!dict_locked) {
- dict_table_try_drop_aborted_and_mutex_exit(table, try_drop);
- }
-
- return(table);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Adds system columns to a table object. */
-UNIV_INTERN
-void
-dict_table_add_system_columns(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- mem_heap_t* heap) /*!< in: temporary heap */
-{
- ut_ad(table);
- ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(!table->cached);
-
- /* NOTE: the system columns MUST be added in the following order
- (so that they can be indexed by the numerical value of DATA_ROW_ID,
- etc.) and as the last columns of the table memory object.
- The clustered index will not always physically contain all
- system columns. */
-
- dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS,
- DATA_ROW_ID | DATA_NOT_NULL,
- DATA_ROW_ID_LEN);
-#if DATA_ROW_ID != 0
-#error "DATA_ROW_ID != 0"
-#endif
- dict_mem_table_add_col(table, heap, "DB_TRX_ID", DATA_SYS,
- DATA_TRX_ID | DATA_NOT_NULL,
- DATA_TRX_ID_LEN);
-#if DATA_TRX_ID != 1
-#error "DATA_TRX_ID != 1"
-#endif
- dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS,
- DATA_ROLL_PTR | DATA_NOT_NULL,
- DATA_ROLL_PTR_LEN);
-#if DATA_ROLL_PTR != 2
-#error "DATA_ROLL_PTR != 2"
-#endif
-
- /* This check reminds that if a new system column is added to
- the program, it should be dealt with here */
-#if DATA_N_SYS_COLS != 3
-#error "DATA_N_SYS_COLS != 3"
-#endif
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Adds a table object to the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- ibool can_be_evicted, /*!< in: TRUE if can be evicted */
- mem_heap_t* heap) /*!< in: temporary heap */
-{
- ulint fold;
- ulint id_fold;
- ulint i;
- ulint row_len;
-
- ut_ad(dict_lru_validate());
-
- /* The lower limit for what we consider a "big" row */
-#define BIG_ROW_SIZE 1024
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_table_add_system_columns(table, heap);
-
- table->cached = TRUE;
-
- fold = ut_fold_string(table->name);
- id_fold = ut_fold_ull(table->id);
-
- row_len = 0;
- for (i = 0; i < table->n_def; i++) {
- ulint col_len = dict_col_get_max_size(
- dict_table_get_nth_col(table, i));
-
- row_len += col_len;
-
- /* If we have a single unbounded field, or several gigantic
- fields, mark the maximum row size as BIG_ROW_SIZE. */
- if (row_len >= BIG_ROW_SIZE || col_len >= BIG_ROW_SIZE) {
- row_len = BIG_ROW_SIZE;
-
- break;
- }
- }
-
- table->big_rows = row_len >= BIG_ROW_SIZE;
-
- /* Look for a table with the same name: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
- dict_table_t*, table2, ut_ad(table2->cached),
- ut_strcmp(table2->name, table->name) == 0);
- ut_a(table2 == NULL);
-
-#ifdef UNIV_DEBUG
- /* Look for the same table pointer with a different name */
- HASH_SEARCH_ALL(name_hash, dict_sys->table_hash,
- dict_table_t*, table2, ut_ad(table2->cached),
- table2 == table);
- ut_ad(table2 == NULL);
-#endif /* UNIV_DEBUG */
- }
-
- /* Look for a table with the same id: error if such exists */
- {
- dict_table_t* table2;
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold,
- dict_table_t*, table2, ut_ad(table2->cached),
- table2->id == table->id);
- ut_a(table2 == NULL);
-
-#ifdef UNIV_DEBUG
- /* Look for the same table pointer with a different id */
- HASH_SEARCH_ALL(id_hash, dict_sys->table_id_hash,
- dict_table_t*, table2, ut_ad(table2->cached),
- table2 == table);
- ut_ad(table2 == NULL);
-#endif /* UNIV_DEBUG */
- }
-
- /* Add table to hash table of tables */
- HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
- table);
-
- /* Add table to hash table of tables based on table id */
- HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold,
- table);
-
- table->can_be_evicted = can_be_evicted;
-
- if (table->can_be_evicted) {
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table);
- } else {
- UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table);
- }
-
- dict_table_autoinc_restore(table);
-
- ut_ad(dict_lru_validate());
-
- dict_sys->size += mem_heap_get_size(table->heap)
- + strlen(table->name) + 1;
-}
-
-/**********************************************************************//**
-Test whether a table can be evicted from the LRU cache.
-@return TRUE if table can be evicted. */
-static
-ibool
-dict_table_can_be_evicted(
-/*======================*/
- const dict_table_t* table) /*!< in: table to test */
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_a(table->can_be_evicted);
- ut_a(table->foreign_set.empty());
- ut_a(table->referenced_set.empty());
-
- if (table->n_ref_count == 0) {
- dict_index_t* index;
-
- /* The transaction commit and rollback are called from
- outside the handler interface. This means that there is
- a window where the table->n_ref_count can be zero but
- the table instance is in "use". */
-
- if (lock_table_has_locks(table)) {
- return(FALSE);
- }
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- btr_search_t* info = btr_search_get_info(index);
-
- /* We are not allowed to free the in-memory index
- struct dict_index_t until all entries in the adaptive
- hash index that point to any of the page belonging to
- his b-tree index are dropped. This is so because
- dropping of these entries require access to
- dict_index_t struct. To avoid such scenario we keep
- a count of number of such pages in the search_info and
- only free the dict_index_t struct when this count
- drops to zero.
-
- See also: dict_index_remove_from_cache_low() */
-
- if (btr_search_info_get_ref_count(info, index) > 0) {
- return(FALSE);
- }
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Make room in the table cache by evicting an unused table. The unused table
-should not be part of FK relationship and currently not used in any user
-transaction. There is no guarantee that it will remove a table.
-@return number of tables evicted. If the number of tables in the dict_LRU
-is less than max_tables it will not do anything. */
-UNIV_INTERN
-ulint
-dict_make_room_in_cache(
-/*====================*/
- ulint max_tables, /*!< in: max tables allowed in cache */
- ulint pct_check) /*!< in: max percent to check */
-{
- ulint i;
- ulint len;
- dict_table_t* table;
- ulint check_up_to;
- ulint n_evicted = 0;
-
- ut_a(pct_check > 0);
- ut_a(pct_check <= 100);
- ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(dict_lru_validate());
-
- i = len = UT_LIST_GET_LEN(dict_sys->table_LRU);
-
- if (len < max_tables) {
- return(0);
- }
-
- check_up_to = len - ((len * pct_check) / 100);
-
- /* Check for overflow */
- ut_a(i == 0 || check_up_to <= i);
-
- /* Find a suitable candidate to evict from the cache. Don't scan the
- entire LRU list. Only scan pct_check list entries. */
-
- for (table = UT_LIST_GET_LAST(dict_sys->table_LRU);
- table != NULL
- && i > check_up_to
- && (len - n_evicted) > max_tables;
- --i) {
-
- dict_table_t* prev_table;
-
- prev_table = UT_LIST_GET_PREV(table_LRU, table);
-
- if (dict_table_can_be_evicted(table)) {
-
- dict_table_remove_from_cache_low(table, TRUE);
-
- ++n_evicted;
- }
-
- table = prev_table;
- }
-
- return(n_evicted);
-}
-
-/**********************************************************************//**
-Move a table to the non-LRU list from the LRU list. */
-UNIV_INTERN
-void
-dict_table_move_from_lru_to_non_lru(
-/*================================*/
- dict_table_t* table) /*!< in: table to move from LRU to non-LRU */
-{
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(dict_lru_find_table(table));
-
- ut_a(table->can_be_evicted);
-
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
-
- UT_LIST_ADD_LAST(table_LRU, dict_sys->table_non_LRU, table);
-
- table->can_be_evicted = FALSE;
-}
-
-/**********************************************************************//**
-Move a table to the LRU list from the non-LRU list. */
-UNIV_INTERN
-void
-dict_table_move_from_non_lru_to_lru(
-/*================================*/
- dict_table_t* table) /*!< in: table to move from non-LRU to LRU */
-{
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(dict_non_lru_find_table(table));
-
- ut_a(!table->can_be_evicted);
-
- UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table);
-
- UT_LIST_ADD_LAST(table_LRU, dict_sys->table_LRU, table);
-
- table->can_be_evicted = TRUE;
-}
-
-/**********************************************************************//**
-Looks for an index with the given id given a table instance.
-@return index or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_table_find_index_on_id(
-/*========================*/
- const dict_table_t* table, /*!< in: table instance */
- index_id_t id) /*!< in: index id */
-{
- dict_index_t* index;
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- if (id == index->id) {
- /* Found */
-
- return(index);
- }
- }
-
- return(NULL);
-}
-
-/**********************************************************************//**
-Looks for an index with the given id. NOTE that we do not reserve
-the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page!
-@return index or NULL if not found in cache */
-UNIV_INTERN
-dict_index_t*
-dict_index_find_on_id_low(
-/*======================*/
- index_id_t id) /*!< in: index id */
-{
- dict_table_t* table;
-
- /* This can happen if the system tablespace is the wrong page size */
- if (dict_sys == NULL) {
- return(NULL);
- }
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
- table != NULL;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
- dict_index_t* index = dict_table_find_index_on_id(table, id);
-
- if (index != NULL) {
- return(index);
- }
- }
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
- table != NULL;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
- dict_index_t* index = dict_table_find_index_on_id(table, id);
-
- if (index != NULL) {
- return(index);
- }
- }
-
- return(NULL);
-}
-
-/** Function object to remove a foreign key constraint from the
-referenced_set of the referenced table. The foreign key object is
-also removed from the dictionary cache. The foreign key constraint
-is not removed from the foreign_set of the table containing the
-constraint. */
-struct dict_foreign_remove_partial
-{
- void operator()(dict_foreign_t* foreign) {
- dict_table_t* table = foreign->referenced_table;
- if (table != NULL) {
- table->referenced_set.erase(foreign);
- }
- dict_foreign_free(foreign);
- }
-};
-
-/**********************************************************************//**
-Renames a table object.
-@return TRUE if success */
-UNIV_INTERN
-dberr_t
-dict_table_rename_in_cache(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- const char* new_name, /*!< in: new name */
- ibool rename_also_foreigns)/*!< in: in ALTER TABLE we want
- to preserve the original table name
- in constraints which reference it */
-{
- dberr_t err;
- dict_foreign_t* foreign;
- dict_index_t* index;
- ulint fold;
- char old_name[MAX_FULL_NAME_LEN + 1];
- os_file_type_t ftype;
- ibool exists;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* store the old/current name to an automatic variable */
- if (strlen(table->name) + 1 <= sizeof(old_name)) {
- memcpy(old_name, table->name, strlen(table->name) + 1);
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, "InnoDB: too long table name: '%s', "
- "max length is %d\n", table->name,
- MAX_FULL_NAME_LEN);
- ut_error;
- }
-
- fold = ut_fold_string(new_name);
-
- /* Look for a table with the same name: error if such exists */
- dict_table_t* table2;
- HASH_SEARCH(name_hash, dict_sys->table_hash, fold,
- dict_table_t*, table2, ut_ad(table2->cached),
- (ut_strcmp(table2->name, new_name) == 0));
- DBUG_EXECUTE_IF("dict_table_rename_in_cache_failure",
- if (table2 == NULL) {
- table2 = (dict_table_t*) -1;
- } );
- if (table2) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot rename table '%s' to '%s' since the "
- "dictionary cache already contains '%s'.",
- old_name, new_name, new_name);
- return(DB_ERROR);
- }
-
- /* If the table is stored in a single-table tablespace, rename the
- .ibd file and rebuild the .isl file if needed. */
-
- if (dict_table_is_discarded(table)) {
- char* filepath;
-
- ut_ad(table->space != TRX_SYS_SPACE);
-
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
-
- dict_get_and_save_data_dir_path(table, true);
- ut_a(table->data_dir_path);
-
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
- } else {
- filepath = fil_make_ibd_name(table->name, false);
- }
-
- fil_delete_tablespace(table->space, BUF_REMOVE_ALL_NO_WRITE);
-
- /* Delete any temp file hanging around. */
- if (os_file_status(filepath, &exists, &ftype)
- && exists
- && !os_file_delete_if_exists(innodb_file_temp_key,
- filepath)) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Delete of %s failed.", filepath);
- }
-
- mem_free(filepath);
-
- } else if (table->space != TRX_SYS_SPACE) {
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to rename a"
- " TEMPORARY TABLE ", stderr);
- ut_print_name(stderr, NULL, TRUE, old_name);
- if (table->dir_path_of_temp_table != NULL) {
- fputs(" (", stderr);
- ut_print_filename(
- stderr, table->dir_path_of_temp_table);
- fputs(" )\n", stderr);
- }
-
- return(DB_ERROR);
- }
-
- char* new_path = NULL;
- char* old_path = fil_space_get_first_path(table->space);
-
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- new_path = os_file_make_new_pathname(
- old_path, new_name);
-
- err = fil_create_link_file(new_name, new_path);
- if (err != DB_SUCCESS) {
- mem_free(new_path);
- mem_free(old_path);
- return(DB_TABLESPACE_EXISTS);
- }
- } else {
- new_path = fil_make_ibd_name(new_name, false);
- }
-
- /* New filepath must not exist. */
- err = fil_rename_tablespace_check(
- table->space, old_path, new_path, false);
- if (err != DB_SUCCESS) {
- mem_free(old_path);
- mem_free(new_path);
- return(err);
- }
-
- ibool success = fil_rename_tablespace(
- old_name, table->space, new_name, new_path);
-
- mem_free(old_path);
- mem_free(new_path);
-
- /* If the tablespace is remote, a new .isl file was created
- If success, delete the old one. If not, delete the new one. */
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- fil_delete_link_file(success ? old_name : new_name);
- }
-
- if (!success) {
- return(DB_ERROR);
- }
- }
-
- /* Remove table from the hash tables of tables */
- HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
- ut_fold_string(old_name), table);
-
- if (strlen(new_name) > strlen(table->name)) {
- /* We allocate MAX_FULL_NAME_LEN + 1 bytes here to avoid
- memory fragmentation, we assume a repeated calls of
- ut_realloc() with the same size do not cause fragmentation */
- ut_a(strlen(new_name) <= MAX_FULL_NAME_LEN);
-
- table->name = static_cast<char*>(
- ut_realloc(table->name, MAX_FULL_NAME_LEN + 1));
- }
- memcpy(table->name, new_name, strlen(new_name) + 1);
-
- /* Add table to hash table of tables */
- HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold,
- table);
-
- dict_sys->size += strlen(new_name) - strlen(old_name);
- ut_a(dict_sys->size > 0);
-
- /* Update the table_name field in indexes */
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- index->table_name = table->name;
- }
-
- if (!rename_also_foreigns) {
- /* In ALTER TABLE we think of the rename table operation
- in the direction table -> temporary table (#sql...)
- as dropping the table with the old name and creating
- a new with the new name. Thus we kind of drop the
- constraints from the dictionary cache here. The foreign key
- constraints will be inherited to the new table from the
- system tables through a call of dict_load_foreigns. */
-
- /* Remove the foreign constraints from the cache */
- std::for_each(table->foreign_set.begin(),
- table->foreign_set.end(),
- dict_foreign_remove_partial());
- table->foreign_set.clear();
-
- /* Reset table field in referencing constraints */
- for (dict_foreign_set::iterator it
- = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
- foreign->referenced_table = NULL;
- foreign->referenced_index = NULL;
-
- }
-
- /* Make the set of referencing constraints empty */
- table->referenced_set.clear();
-
- return(DB_SUCCESS);
- }
-
- /* Update the table name fields in foreign constraints, and update also
- the constraint id of new format >= 4.0.18 constraints. Note that at
- this point we have already changed table->name to the new name. */
-
- dict_foreign_set fk_set;
-
- for (;;) {
-
- dict_foreign_set::iterator it
- = table->foreign_set.begin();
-
- if (it == table->foreign_set.end()) {
- break;
- }
-
- foreign = *it;
-
- if (foreign->referenced_table) {
- foreign->referenced_table->referenced_set.erase(foreign);
- }
-
- if (ut_strlen(foreign->foreign_table_name)
- < ut_strlen(table->name)) {
- /* Allocate a longer name buffer;
- TODO: store buf len to save memory */
-
- foreign->foreign_table_name = mem_heap_strdup(
- foreign->heap, table->name);
- dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
- } else {
- strcpy(foreign->foreign_table_name, table->name);
- dict_mem_foreign_table_name_lookup_set(foreign, FALSE);
- }
- if (strchr(foreign->id, '/')) {
- /* This is a >= 4.0.18 format id */
-
- ulint db_len;
- char* old_id;
- char old_name_cs_filename[MAX_TABLE_NAME_LEN+20];
- uint errors = 0;
-
- /* All table names are internally stored in charset
- my_charset_filename (except the temp tables and the
- partition identifier suffix in partition tables). The
- foreign key constraint names are internally stored
- in UTF-8 charset. The variable fkid here is used
- to store foreign key constraint name in charset
- my_charset_filename for comparison further below. */
- char fkid[MAX_TABLE_NAME_LEN+20];
- ibool on_tmp = FALSE;
-
- /* The old table name in my_charset_filename is stored
- in old_name_cs_filename */
-
- strncpy(old_name_cs_filename, old_name,
- MAX_TABLE_NAME_LEN);
- if (strstr(old_name, TEMP_TABLE_PATH_PREFIX) == NULL) {
-
- innobase_convert_to_system_charset(
- strchr(old_name_cs_filename, '/') + 1,
- strchr(old_name, '/') + 1,
- MAX_TABLE_NAME_LEN, &errors);
-
- if (errors) {
- /* There has been an error to convert
- old table into UTF-8. This probably
- means that the old table name is
- actually in UTF-8. */
- innobase_convert_to_filename_charset(
- strchr(old_name_cs_filename,
- '/') + 1,
- strchr(old_name, '/') + 1,
- MAX_TABLE_NAME_LEN);
- } else {
- /* Old name already in
- my_charset_filename */
- strncpy(old_name_cs_filename, old_name,
- MAX_TABLE_NAME_LEN);
- }
- }
-
- strncpy(fkid, foreign->id, MAX_TABLE_NAME_LEN);
-
- if (strstr(fkid, TEMP_TABLE_PATH_PREFIX) == NULL) {
- innobase_convert_to_filename_charset(
- strchr(fkid, '/') + 1,
- strchr(foreign->id, '/') + 1,
- MAX_TABLE_NAME_LEN+20);
- } else {
- on_tmp = TRUE;
- }
-
- old_id = mem_strdup(foreign->id);
-
- if (ut_strlen(fkid) > ut_strlen(old_name_cs_filename)
- + ((sizeof dict_ibfk) - 1)
- && !memcmp(fkid, old_name_cs_filename,
- ut_strlen(old_name_cs_filename))
- && !memcmp(fkid + ut_strlen(old_name_cs_filename),
- dict_ibfk, (sizeof dict_ibfk) - 1)) {
-
- /* This is a generated >= 4.0.18 format id */
-
- char table_name[MAX_TABLE_NAME_LEN] = "";
- uint errors = 0;
-
- if (strlen(table->name) > strlen(old_name)) {
- foreign->id = static_cast<char*>(
- mem_heap_alloc(
- foreign->heap,
- strlen(table->name)
- + strlen(old_id) + 1));
- }
-
- /* Convert the table name to UTF-8 */
- strncpy(table_name, table->name,
- MAX_TABLE_NAME_LEN);
- innobase_convert_to_system_charset(
- strchr(table_name, '/') + 1,
- strchr(table->name, '/') + 1,
- MAX_TABLE_NAME_LEN, &errors);
-
- if (errors) {
- /* Table name could not be converted
- from charset my_charset_filename to
- UTF-8. This means that the table name
- is already in UTF-8 (#mysql#50). */
- strncpy(table_name, table->name,
- MAX_TABLE_NAME_LEN);
- }
-
- /* Replace the prefix 'databasename/tablename'
- with the new names */
- strcpy(foreign->id, table_name);
- if (on_tmp) {
- strcat(foreign->id,
- old_id + ut_strlen(old_name));
- } else {
- sprintf(strchr(foreign->id, '/') + 1,
- "%s%s",
- strchr(table_name, '/') +1,
- strstr(old_id, "_ibfk_") );
- }
-
- } else {
- /* This is a >= 4.0.18 format id where the user
- gave the id name */
- db_len = dict_get_db_name_len(table->name) + 1;
-
- if (dict_get_db_name_len(table->name)
- > dict_get_db_name_len(foreign->id)) {
-
- foreign->id = static_cast<char*>(
- mem_heap_alloc(
- foreign->heap,
- db_len + strlen(old_id) + 1));
- }
-
- /* Replace the database prefix in id with the
- one from table->name */
-
- ut_memcpy(foreign->id, table->name, db_len);
-
- strcpy(foreign->id + db_len,
- dict_remove_db_name(old_id));
- }
-
- mem_free(old_id);
- }
-
- table->foreign_set.erase(it);
- fk_set.insert(foreign);
-
- if (foreign->referenced_table) {
- foreign->referenced_table->referenced_set.insert(foreign);
- }
- }
-
- ut_a(table->foreign_set.empty());
- table->foreign_set.swap(fk_set);
-
- for (dict_foreign_set::iterator it = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- if (ut_strlen(foreign->referenced_table_name)
- < ut_strlen(table->name)) {
- /* Allocate a longer name buffer;
- TODO: store buf len to save memory */
-
- foreign->referenced_table_name = mem_heap_strdup(
- foreign->heap, table->name);
-
- dict_mem_referenced_table_name_lookup_set(
- foreign, TRUE);
- } else {
- /* Use the same buffer */
- strcpy(foreign->referenced_table_name, table->name);
-
- dict_mem_referenced_table_name_lookup_set(
- foreign, FALSE);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/**********************************************************************//**
-Change the id of a table object in the dictionary cache. This is used in
-DISCARD TABLESPACE. */
-UNIV_INTERN
-void
-dict_table_change_id_in_cache(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table object already in cache */
- table_id_t new_id) /*!< in: new id to set */
-{
- ut_ad(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* Remove the table from the hash table of id's */
-
- HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_ull(table->id), table);
- table->id = new_id;
-
- /* Add the table back to the hash table */
- HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_ull(table->id), table);
-}
-
-/**********************************************************************//**
-Removes a table object from the dictionary cache. */
-void
-dict_table_remove_from_cache_low(
-/*=============================*/
- dict_table_t* table, /*!< in, own: table */
- ibool lru_evict) /*!< in: TRUE if table being evicted
- to make room in the table LRU list */
-{
- dict_foreign_t* foreign;
- dict_index_t* index;
- ulint size;
-
- ut_ad(table);
- ut_ad(dict_lru_validate());
- ut_a(table->n_ref_count == 0);
- ut_a(table->n_rec_locks == 0);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* Remove the foreign constraints from the cache */
- std::for_each(table->foreign_set.begin(), table->foreign_set.end(),
- dict_foreign_remove_partial());
- table->foreign_set.clear();
-
- /* Reset table field in referencing constraints */
- for (dict_foreign_set::iterator it = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
- foreign->referenced_table = NULL;
- foreign->referenced_index = NULL;
- }
-
- /* Remove the indexes from the cache */
-
- for (index = UT_LIST_GET_LAST(table->indexes);
- index != NULL;
- index = UT_LIST_GET_LAST(table->indexes)) {
-
- dict_index_remove_from_cache_low(table, index, lru_evict);
- }
-
- /* Remove table from the hash tables of tables */
-
- HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash,
- ut_fold_string(table->name), table);
-
- HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash,
- ut_fold_ull(table->id), table);
-
- /* Remove table from LRU or non-LRU list. */
- if (table->can_be_evicted) {
- ut_ad(dict_lru_find_table(table));
- UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table);
- } else {
- ut_ad(dict_non_lru_find_table(table));
- UT_LIST_REMOVE(table_LRU, dict_sys->table_non_LRU, table);
- }
-
- ut_ad(dict_lru_validate());
-
- if (lru_evict) {
- dict_table_autoinc_store(table);
- }
-
- if (lru_evict && table->drop_aborted) {
- /* Do as dict_table_try_drop_aborted() does. */
-
- trx_t* trx = trx_allocate_for_background();
-
- ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- /* Mimic row_mysql_lock_data_dictionary(). */
- trx->dict_operation_lock_mode = RW_X_LATCH;
-
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
- /* Silence a debug assertion in row_merge_drop_indexes(). */
- ut_d(table->n_ref_count++);
- row_merge_drop_indexes(trx, table, TRUE);
- ut_d(table->n_ref_count--);
- ut_ad(table->n_ref_count == 0);
- trx_commit_for_mysql(trx);
- trx->dict_operation_lock_mode = 0;
- trx_free_for_background(trx);
- }
-
- size = mem_heap_get_size(table->heap) + strlen(table->name) + 1;
-
- ut_ad(dict_sys->size >= size);
-
- dict_sys->size -= size;
-
- dict_mem_table_free(table);
-}
-
-/**********************************************************************//**
-Removes a table object from the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_remove_from_cache(
-/*=========================*/
- dict_table_t* table) /*!< in, own: table */
-{
- dict_table_remove_from_cache_low(table, FALSE);
-}
-
-/****************************************************************//**
-If the given column name is reserved for InnoDB system columns, return
-TRUE.
-@return TRUE if name is reserved */
-UNIV_INTERN
-ibool
-dict_col_name_is_reserved(
-/*======================*/
- const char* name) /*!< in: column name */
-{
- /* This check reminds that if a new system column is added to
- the program, it should be dealt with here. */
-#if DATA_N_SYS_COLS != 3
-#error "DATA_N_SYS_COLS != 3"
-#endif
-
- static const char* reserved_names[] = {
- "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR"
- };
-
- ulint i;
-
- for (i = 0; i < UT_ARR_SIZE(reserved_names); i++) {
- if (innobase_strcasecmp(name, reserved_names[i]) == 0) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-#if 1 /* This function is not very accurate at determining
- whether an UNDO record will be too big. See innodb_4k.test,
- Bug 13336585, for a testcase that shows an index that can
- be created but cannot be updated. */
-
-/****************************************************************//**
-If an undo log record for this table might not fit on a single page,
-return TRUE.
-@return TRUE if the undo log record could become too big */
-static
-ibool
-dict_index_too_big_for_undo(
-/*========================*/
- const dict_table_t* table, /*!< in: table */
- const dict_index_t* new_index) /*!< in: index */
-{
- /* Make sure that all column prefixes will fit in the undo log record
- in trx_undo_page_report_modify() right after trx_undo_page_init(). */
-
- ulint i;
- const dict_index_t* clust_index
- = dict_table_get_first_index(table);
- ulint undo_page_len
- = TRX_UNDO_PAGE_HDR - TRX_UNDO_PAGE_HDR_SIZE
- + 2 /* next record pointer */
- + 1 /* type_cmpl */
- + 11 /* trx->undo_no */ + 11 /* table->id */
- + 1 /* rec_get_info_bits() */
- + 11 /* DB_TRX_ID */
- + 11 /* DB_ROLL_PTR */
- + 10 + FIL_PAGE_DATA_END /* trx_undo_left() */
- + 2/* pointer to previous undo log record */;
-
- /* FTS index consists of auxiliary tables, they shall be excluded from
- index row size check */
- if (new_index->type & DICT_FTS) {
- return(false);
- }
-
- if (!clust_index) {
- ut_a(dict_index_is_clust(new_index));
- clust_index = new_index;
- }
-
- /* Add the size of the ordering columns in the
- clustered index. */
- for (i = 0; i < clust_index->n_uniq; i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(clust_index, i);
-
- /* Use the maximum output size of
- mach_write_compressed(), although the encoded
- length should always fit in 2 bytes. */
- undo_page_len += 5 + dict_col_get_max_size(col);
- }
-
- /* Add the old values of the columns to be updated.
- First, the amount and the numbers of the columns.
- These are written by mach_write_compressed() whose
- maximum output length is 5 bytes. However, given that
- the quantities are below REC_MAX_N_FIELDS (10 bits),
- the maximum length is 2 bytes per item. */
- undo_page_len += 2 * (dict_table_get_n_cols(table) + 1);
-
- for (i = 0; i < clust_index->n_def; i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(clust_index, i);
- ulint max_size
- = dict_col_get_max_size(col);
- ulint fixed_size
- = dict_col_get_fixed_size(col,
- dict_table_is_comp(table));
- ulint max_prefix
- = col->max_prefix;
-
- if (fixed_size) {
- /* Fixed-size columns are stored locally. */
- max_size = fixed_size;
- } else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
- /* Short columns are stored locally. */
- } else if (!col->ord_part
- || (col->max_prefix
- < (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table))) {
- /* See if col->ord_part would be set
- because of new_index. Also check if the new
- index could have longer prefix on columns
- that already had ord_part set */
- ulint j;
-
- for (j = 0; j < new_index->n_uniq; j++) {
- if (dict_index_get_nth_col(
- new_index, j) == col) {
- const dict_field_t* field
- = dict_index_get_nth_field(
- new_index, j);
-
- if (field->prefix_len
- > col->max_prefix) {
- max_prefix =
- field->prefix_len;
- }
-
- goto is_ord_part;
- }
- }
-
- if (col->ord_part) {
- goto is_ord_part;
- }
-
- /* This is not an ordering column in any index.
- Thus, it can be stored completely externally. */
- max_size = BTR_EXTERN_FIELD_REF_SIZE;
- } else {
- ulint max_field_len;
-is_ord_part:
- max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
-
- /* This is an ordering column in some index.
- A long enough prefix must be written to the
- undo log. See trx_undo_page_fetch_ext(). */
- max_size = ut_min(max_size, max_field_len);
-
- /* We only store the needed prefix length in undo log */
- if (max_prefix) {
- ut_ad(dict_table_get_format(table)
- >= UNIV_FORMAT_B);
-
- max_size = ut_min(max_prefix, max_size);
- }
-
- max_size += BTR_EXTERN_FIELD_REF_SIZE;
- }
-
- undo_page_len += 5 + max_size;
- }
-
- return(undo_page_len >= UNIV_PAGE_SIZE);
-}
-#endif
-
-/****************************************************************//**
-If a record of this index might not fit on a single B-tree page,
-return TRUE.
-@return TRUE if the index record could become too big */
-static
-ibool
-dict_index_too_big_for_tree(
-/*========================*/
- const dict_table_t* table, /*!< in: table */
- const dict_index_t* new_index) /*!< in: index */
-{
- ulint zip_size;
- ulint comp;
- ulint i;
- /* maximum possible storage size of a record */
- ulint rec_max_size;
- /* maximum allowed size of a record on a leaf page */
- ulint page_rec_max;
- /* maximum allowed size of a node pointer record */
- ulint page_ptr_max;
-
- /* FTS index consists of auxiliary tables, they shall be excluded from
- index row size check */
- if (new_index->type & DICT_FTS) {
- return(false);
- }
-
- DBUG_EXECUTE_IF(
- "ib_force_create_table",
- return(FALSE););
-
- comp = dict_table_is_comp(table);
- zip_size = dict_table_zip_size(table);
-
- if (zip_size && zip_size < UNIV_PAGE_SIZE) {
- /* On a compressed page, two records must fit in the
- uncompressed page modification log. On compressed
- pages with zip_size == UNIV_PAGE_SIZE, this limit will
- never be reached. */
- ut_ad(comp);
- /* The maximum allowed record size is the size of
- an empty page, minus a byte for recoding the heap
- number in the page modification log. The maximum
- allowed node pointer size is half that. */
- page_rec_max = page_zip_empty_size(new_index->n_fields,
- zip_size);
- if (page_rec_max) {
- page_rec_max--;
- }
- page_ptr_max = page_rec_max / 2;
- /* On a compressed page, there is a two-byte entry in
- the dense page directory for every record. But there
- is no record header. */
- rec_max_size = 2;
- } else {
- /* The maximum allowed record size is half a B-tree
- page. No additional sparse page directory entry will
- be generated for the first few user records. */
- page_rec_max = page_get_free_space_of_empty(comp) / 2;
- page_ptr_max = page_rec_max;
- /* Each record has a header. */
- rec_max_size = comp
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES;
- }
-
- if (comp) {
- /* Include the "null" flags in the
- maximum possible record size. */
- rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable);
- } else {
- /* For each column, include a 2-byte offset and a
- "null" flag. The 1-byte format is only used in short
- records that do not contain externally stored columns.
- Such records could never exceed the page limit, even
- when using the 2-byte format. */
- rec_max_size += 2 * new_index->n_fields;
- }
-
- /* Compute the maximum possible record size. */
- for (i = 0; i < new_index->n_fields; i++) {
- const dict_field_t* field
- = dict_index_get_nth_field(new_index, i);
- const dict_col_t* col
- = dict_field_get_col(field);
- ulint field_max_size;
- ulint field_ext_max_size;
-
- /* In dtuple_convert_big_rec(), variable-length columns
- that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
- may be chosen for external storage.
-
- Fixed-length columns, and all columns of secondary
- index records are always stored inline. */
-
- /* Determine the maximum length of the index field.
- The field_ext_max_size should be computed as the worst
- case in rec_get_converted_size_comp() for
- REC_STATUS_ORDINARY records. */
-
- field_max_size = dict_col_get_fixed_size(col, comp);
- if (field_max_size) {
- /* dict_index_add_col() should guarantee this */
- ut_ad(!field->prefix_len
- || field->fixed_len == field->prefix_len);
- /* Fixed lengths are not encoded
- in ROW_FORMAT=COMPACT. */
- field_ext_max_size = 0;
- goto add_field_size;
- }
-
- field_max_size = dict_col_get_max_size(col);
- field_ext_max_size = field_max_size < 256 ? 1 : 2;
-
- if (field->prefix_len) {
- if (field->prefix_len < field_max_size) {
- field_max_size = field->prefix_len;
- }
- } else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2
- && dict_index_is_clust(new_index)) {
-
- /* In the worst case, we have a locally stored
- column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes.
- The length can be stored in one byte. If the
- column were stored externally, the lengths in
- the clustered index page would be
- BTR_EXTERN_FIELD_REF_SIZE and 2. */
- field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2;
- field_ext_max_size = 1;
- }
-
- if (comp) {
- /* Add the extra size for ROW_FORMAT=COMPACT.
- For ROW_FORMAT=REDUNDANT, these bytes were
- added to rec_max_size before this loop. */
- rec_max_size += field_ext_max_size;
- }
-add_field_size:
- rec_max_size += field_max_size;
-
- /* Check the size limit on leaf pages. */
- if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) {
-
- return(TRUE);
- }
-
- /* Check the size limit on non-leaf pages. Records
- stored in non-leaf B-tree pages consist of the unique
- columns of the record (the key columns of the B-tree)
- and a node pointer field. When we have processed the
- unique columns, rec_max_size equals the size of the
- node pointer record minus the node pointer column. */
- if (i + 1 == dict_index_get_n_unique_in_tree(new_index)
- && rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
-dberr_t
-dict_index_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table on which the index is */
- dict_index_t* index, /*!< in, own: index; NOTE! The index memory
- object is freed in this function! */
- ulint page_no,/*!< in: root page number of the index */
- ibool strict) /*!< in: TRUE=refuse to create the index
- if records could be too big to fit in
- an B-tree page */
-{
- dict_index_t* new_index;
- ulint n_ord;
- ulint i;
-
- ut_ad(index);
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(index->n_def == index->n_fields);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(!dict_index_is_online_ddl(index));
-
- ut_ad(mem_heap_validate(index->heap));
- ut_a(!dict_index_is_clust(index)
- || UT_LIST_GET_LEN(table->indexes) == 0);
-
- if (!dict_index_find_cols(table, index)) {
-
- dict_mem_index_free(index);
- return(DB_CORRUPTION);
- }
-
- /* Build the cache internal representation of the index,
- containing also the added system fields */
-
- if (index->type == DICT_FTS) {
- new_index = dict_index_build_internal_fts(table, index);
- } else if (dict_index_is_clust(index)) {
- new_index = dict_index_build_internal_clust(table, index);
- } else {
- new_index = dict_index_build_internal_non_clust(table, index);
- }
-
- /* Set the n_fields value in new_index to the actual defined
- number of fields in the cache internal representation */
-
- new_index->n_fields = new_index->n_def;
- new_index->trx_id = index->trx_id;
-
- if (dict_index_too_big_for_tree(table, new_index)) {
-
- if (strict) {
-too_big:
- dict_mem_index_free(new_index);
- dict_mem_index_free(index);
- return(DB_TOO_BIG_RECORD);
- } else if (current_thd != NULL) {
- /* Avoid the warning to be printed
- during recovery. */
- ib_warn_row_too_big(table);
- }
- }
-
- if (dict_index_is_univ(index)) {
- n_ord = new_index->n_fields;
- } else {
- n_ord = new_index->n_uniq;
- }
-
-#if 1 /* The following code predetermines whether to call
- dict_index_too_big_for_undo(). This function is not
- accurate. See innodb_4k.test, Bug 13336585, for a
- testcase that shows an index that can be created but
- cannot be updated. */
-
- switch (dict_table_get_format(table)) {
- case UNIV_FORMAT_A:
- /* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store
- prefixes of externally stored columns locally within
- the record. There are no special considerations for
- the undo log record size. */
- goto undo_size_ok;
-
- case UNIV_FORMAT_B:
- /* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED,
- column prefix indexes require that prefixes of
- externally stored columns are written to the undo log.
- This may make the undo log record bigger than the
- record on the B-tree page. The maximum size of an
- undo log record is the page size. That must be
- checked for below. */
- break;
-
-#if UNIV_FORMAT_B != UNIV_FORMAT_MAX
-# error "UNIV_FORMAT_B != UNIV_FORMAT_MAX"
-#endif
- }
-
- for (i = 0; i < n_ord; i++) {
- const dict_field_t* field
- = dict_index_get_nth_field(new_index, i);
- const dict_col_t* col
- = dict_field_get_col(field);
-
- /* In dtuple_convert_big_rec(), variable-length columns
- that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
- may be chosen for external storage. If the column appears
- in an ordering column of an index, a longer prefix determined
- by dict_max_field_len_store_undo() will be copied to the undo
- log by trx_undo_page_report_modify() and
- trx_undo_page_fetch_ext(). It suffices to check the
- capacity of the undo log whenever new_index includes
- a column prefix on a column that may be stored externally. */
-
- if (field->prefix_len /* prefix index */
- && (!col->ord_part /* not yet ordering column */
- || field->prefix_len > col->max_prefix)
- && !dict_col_get_fixed_size(col, TRUE) /* variable-length */
- && dict_col_get_max_size(col)
- > BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) {
-
- if (dict_index_too_big_for_undo(table, new_index)) {
- /* An undo log record might not fit in
- a single page. Refuse to create this index. */
-
- goto too_big;
- }
-
- break;
- }
- }
-
-undo_size_ok:
-#endif
- /* Flag the ordering columns and also set column max_prefix */
-
- for (i = 0; i < n_ord; i++) {
- const dict_field_t* field
- = dict_index_get_nth_field(new_index, i);
-
- field->col->ord_part = 1;
-
- if (field->prefix_len > field->col->max_prefix) {
- field->col->max_prefix = field->prefix_len;
- }
- }
-
- if (!dict_index_is_univ(new_index)) {
-
- new_index->stat_n_diff_key_vals =
- static_cast<ib_uint64_t*>(mem_heap_zalloc(
- new_index->heap,
- dict_index_get_n_unique(new_index)
- * sizeof(*new_index->stat_n_diff_key_vals)));
-
- new_index->stat_n_sample_sizes =
- static_cast<ib_uint64_t*>(mem_heap_zalloc(
- new_index->heap,
- dict_index_get_n_unique(new_index)
- * sizeof(*new_index->stat_n_sample_sizes)));
-
- new_index->stat_n_non_null_key_vals =
- static_cast<ib_uint64_t*>(mem_heap_zalloc(
- new_index->heap,
- dict_index_get_n_unique(new_index)
- * sizeof(*new_index->stat_n_non_null_key_vals)));
- }
-
- new_index->stat_index_size = 1;
- new_index->stat_n_leaf_pages = 1;
-
- new_index->stat_defrag_n_pages_freed = 0;
- new_index->stat_defrag_n_page_split = 0;
-
- new_index->stat_defrag_sample_next_slot = 0;
- memset(&new_index->stat_defrag_data_size_sample,
- 0x0, sizeof(ulint) * STAT_DEFRAG_DATA_SIZE_N_SAMPLE);
-
- /* Add the new index as the last index for the table */
-
- UT_LIST_ADD_LAST(indexes, table->indexes, new_index);
- new_index->table = table;
- new_index->table_name = table->name;
- new_index->search_info = btr_search_info_create(new_index->heap);
-
- new_index->page = page_no;
- rw_lock_create(index_tree_rw_lock_key, &new_index->lock,
- dict_index_is_ibuf(index)
- ? SYNC_IBUF_INDEX_TREE : SYNC_INDEX_TREE);
-
- dict_sys->size += mem_heap_get_size(new_index->heap);
-
- dict_mem_index_free(index);
-
- return(DB_SUCCESS);
-}
-
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-static
-void
-dict_index_remove_from_cache_low(
-/*=============================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index, /*!< in, own: index */
- ibool lru_evict) /*!< in: TRUE if index being evicted
- to make room in the table LRU list */
-{
- ulint size;
- ulint retries = 0;
- btr_search_t* info;
-
- ut_ad(table && index);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* No need to acquire the dict_index_t::lock here because
- there can't be any active operations on this index (or table). */
-
- if (index->online_log) {
- ut_ad(index->online_status == ONLINE_INDEX_CREATION);
- row_log_free(index->online_log);
- }
-
- /* We always create search info whether or not adaptive
- hash index is enabled or not. */
- info = btr_search_get_info(index);
- ut_ad(info);
-
- /* We are not allowed to free the in-memory index struct
- dict_index_t until all entries in the adaptive hash index
- that point to any of the page belonging to his b-tree index
- are dropped. This is so because dropping of these entries
- require access to dict_index_t struct. To avoid such scenario
- We keep a count of number of such pages in the search_info and
- only free the dict_index_t struct when this count drops to
- zero. See also: dict_table_can_be_evicted() */
-
- do {
- ulint ref_count = btr_search_info_get_ref_count(info,
- index);
-
- if (ref_count == 0) {
- break;
- }
-
- /* Sleep for 10ms before trying again. */
- os_thread_sleep(10000);
- ++retries;
-
- if (retries % 500 == 0) {
- /* No luck after 5 seconds of wait. */
- fprintf(stderr, "InnoDB: Error: Waited for"
- " %lu secs for hash index"
- " ref_count (%lu) to drop"
- " to 0.\n"
- "index: \"%s\""
- " table: \"%s\"\n",
- retries/100,
- ref_count,
- index->name,
- table->name);
- }
-
- /* To avoid a hang here we commit suicide if the
- ref_count doesn't drop to zero in 600 seconds. */
- if (retries >= 60000) {
- ut_error;
- }
- } while (srv_shutdown_state == SRV_SHUTDOWN_NONE || !lru_evict);
-
- rw_lock_free(&index->lock);
-
- /* Remove the index from the list of indexes of the table */
- UT_LIST_REMOVE(indexes, table->indexes, index);
-
- size = mem_heap_get_size(index->heap);
-
- ut_ad(dict_sys->size >= size);
-
- dict_sys->size -= size;
-
- dict_mem_index_free(index);
-}
-
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in, own: index */
-{
- dict_index_remove_from_cache_low(table, index, FALSE);
-}
-
-/*******************************************************************//**
-Tries to find column names for the index and sets the col field of the
-index.
-@return TRUE if the column names were found */
-static
-ibool
-dict_index_find_cols(
-/*=================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: index */
-{
- ulint i;
-
- ut_ad(table && index);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- for (i = 0; i < index->n_fields; i++) {
- ulint j;
- dict_field_t* field = dict_index_get_nth_field(index, i);
-
- for (j = 0; j < table->n_cols; j++) {
- if (!innobase_strcasecmp(dict_table_get_col_name(table, j),
- field->name)) {
- field->col = dict_table_get_nth_col(table, j);
-
- goto found;
- }
- }
-
-#ifdef UNIV_DEBUG
- /* It is an error not to find a matching column. */
- fputs("InnoDB: Error: no matching column for ", stderr);
- ut_print_name(stderr, NULL, FALSE, field->name);
- fputs(" in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs("!\n", stderr);
-#endif /* UNIV_DEBUG */
- return(FALSE);
-
-found:
- ;
- }
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Adds a column to index. */
-UNIV_INTERN
-void
-dict_index_add_col(
-/*===============*/
- dict_index_t* index, /*!< in/out: index */
- const dict_table_t* table, /*!< in: table */
- dict_col_t* col, /*!< in: column */
- ulint prefix_len) /*!< in: column prefix length */
-{
- dict_field_t* field;
- const char* col_name;
-
- col_name = dict_table_get_col_name(table, dict_col_get_no(col));
-
- dict_mem_index_add_field(index, col_name, prefix_len);
-
- field = dict_index_get_nth_field(index, index->n_def - 1);
-
- field->col = col;
- field->fixed_len = (unsigned int) dict_col_get_fixed_size(
- col, dict_table_is_comp(table));
-
- if (prefix_len && field->fixed_len > prefix_len) {
- field->fixed_len = (unsigned int) prefix_len;
- }
-
- /* Long fixed-length fields that need external storage are treated as
- variable-length fields, so that the extern flag can be embedded in
- the length word. */
-
- if (field->fixed_len > DICT_MAX_FIXED_COL_LEN) {
- field->fixed_len = 0;
- }
-#if DICT_MAX_FIXED_COL_LEN != 768
- /* The comparison limit above must be constant. If it were
- changed, the disk format of some fixed-length columns would
- change, which would be a disaster. */
-# error "DICT_MAX_FIXED_COL_LEN != 768"
-#endif
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- index->n_nullable++;
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Copies fields contained in index2 to index1. */
-static
-void
-dict_index_copy(
-/*============*/
- dict_index_t* index1, /*!< in: index to copy to */
- dict_index_t* index2, /*!< in: index to copy from */
- const dict_table_t* table, /*!< in: table */
- ulint start, /*!< in: first position to copy */
- ulint end) /*!< in: last position to copy */
-{
- dict_field_t* field;
- ulint i;
-
- /* Copy fields contained in index2 */
-
- for (i = start; i < end; i++) {
-
- field = dict_index_get_nth_field(index2, i);
- dict_index_add_col(index1, table, field->col,
- field->prefix_len);
- }
-}
-
-/*******************************************************************//**
-Copies types of fields contained in index to tuple. */
-UNIV_INTERN
-void
-dict_index_copy_types(
-/*==================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_index_t* index, /*!< in: index */
- ulint n_fields) /*!< in: number of
- field types to copy */
-{
- ulint i;
-
- if (dict_index_is_univ(index)) {
- dtuple_set_types_binary(tuple, n_fields);
-
- return;
- }
-
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* ifield;
- dtype_t* dfield_type;
-
- ifield = dict_index_get_nth_field(index, i);
- dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
- dict_col_copy_type(dict_field_get_col(ifield), dfield_type);
- }
-}
-
-/*******************************************************************//**
-Copies types of columns contained in table to tuple and sets all
-fields of the tuple to the SQL NULL value. This function should
-be called right after dtuple_create(). */
-UNIV_INTERN
-void
-dict_table_copy_types(
-/*==================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_table_t* table) /*!< in: table */
-{
- ulint i;
-
- for (i = 0; i < dtuple_get_n_fields(tuple); i++) {
-
- dfield_t* dfield = dtuple_get_nth_field(tuple, i);
- dtype_t* dtype = dfield_get_type(dfield);
-
- dfield_set_null(dfield);
- dict_col_copy_type(dict_table_get_nth_col(table, i), dtype);
- }
-}
-
-/********************************************************************
-Wait until all the background threads of the given table have exited, i.e.,
-bg_threads == 0. Note: bg_threads_mutex must be reserved when
-calling this. */
-UNIV_INTERN
-void
-dict_table_wait_for_bg_threads_to_exit(
-/*===================================*/
- dict_table_t* table, /*< in: table */
- ulint delay) /*< in: time in microseconds to wait between
- checks of bg_threads. */
-{
- fts_t* fts = table->fts;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mutex_own(&fts->bg_threads_mutex));
-#endif /* UNIV_SYNC_DEBUG */
-
- while (fts->bg_threads > 0) {
- mutex_exit(&fts->bg_threads_mutex);
-
- os_thread_sleep(delay);
-
- mutex_enter(&fts->bg_threads_mutex);
- }
-}
-
-/*******************************************************************//**
-Builds the internal dictionary cache representation for a clustered
-index, containing also system fields not defined by the user.
-@return own: the internal representation of the clustered index */
-static
-dict_index_t*
-dict_index_build_internal_clust(
-/*============================*/
- const dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: user representation of
- a clustered index */
-{
- dict_index_t* new_index;
- dict_field_t* field;
- ulint trx_id_pos;
- ulint i;
- ibool* indexed;
-
- ut_ad(table && index);
- ut_ad(dict_index_is_clust(index));
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* Create a new index object with certainly enough fields */
- new_index = dict_mem_index_create(table->name,
- index->name, table->space,
- index->type,
- index->n_fields + table->n_cols);
-
- /* Copy other relevant data from the old index struct to the new
- struct: it inherits the values */
-
- new_index->n_user_defined_cols = index->n_fields;
-
- new_index->id = index->id;
- btr_search_index_init(new_index);
-
- /* Copy the fields of index */
- dict_index_copy(new_index, index, table, 0, index->n_fields);
-
- if (dict_index_is_univ(index)) {
- /* No fixed number of fields determines an entry uniquely */
-
- new_index->n_uniq = REC_MAX_N_FIELDS;
-
- } else if (dict_index_is_unique(index)) {
- /* Only the fields defined so far are needed to identify
- the index entry uniquely */
-
- new_index->n_uniq = new_index->n_def;
- } else {
- /* Also the row id is needed to identify the entry */
- new_index->n_uniq = 1 + new_index->n_def;
- }
-
- new_index->trx_id_offset = 0;
-
- if (!dict_index_is_ibuf(index)) {
- /* Add system columns, trx id first */
-
- trx_id_pos = new_index->n_def;
-
-#if DATA_ROW_ID != 0
-# error "DATA_ROW_ID != 0"
-#endif
-#if DATA_TRX_ID != 1
-# error "DATA_TRX_ID != 1"
-#endif
-#if DATA_ROLL_PTR != 2
-# error "DATA_ROLL_PTR != 2"
-#endif
-
- if (!dict_index_is_unique(index)) {
- dict_index_add_col(new_index, table,
- dict_table_get_sys_col(
- table, DATA_ROW_ID),
- 0);
- trx_id_pos++;
- }
-
- dict_index_add_col(new_index, table,
- dict_table_get_sys_col(table, DATA_TRX_ID),
- 0);
-
- dict_index_add_col(new_index, table,
- dict_table_get_sys_col(table,
- DATA_ROLL_PTR),
- 0);
-
- for (i = 0; i < trx_id_pos; i++) {
-
- ulint fixed_size = dict_col_get_fixed_size(
- dict_index_get_nth_col(new_index, i),
- dict_table_is_comp(table));
-
- if (fixed_size == 0) {
- new_index->trx_id_offset = 0;
-
- break;
- }
-
- if (dict_index_get_nth_field(new_index, i)->prefix_len
- > 0) {
- new_index->trx_id_offset = 0;
-
- break;
- }
-
- /* Add fixed_size to new_index->trx_id_offset.
- Because the latter is a bit-field, an overflow
- can theoretically occur. Check for it. */
- fixed_size += new_index->trx_id_offset;
-
- new_index->trx_id_offset = fixed_size;
-
- if (new_index->trx_id_offset != fixed_size) {
- /* Overflow. Pretend that this is a
- variable-length PRIMARY KEY. */
- ut_ad(0);
- new_index->trx_id_offset = 0;
- break;
- }
- }
-
- }
-
- /* Remember the table columns already contained in new_index */
- indexed = static_cast<ibool*>(
- mem_zalloc(table->n_cols * sizeof *indexed));
-
- /* Mark the table columns already contained in new_index */
- for (i = 0; i < new_index->n_def; i++) {
-
- field = dict_index_get_nth_field(new_index, i);
-
- /* If there is only a prefix of the column in the index
- field, do not mark the column as contained in the index */
-
- if (field->prefix_len == 0) {
-
- indexed[field->col->ind] = TRUE;
- }
- }
-
- /* Add to new_index non-system columns of table not yet included
- there */
- for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
-
- dict_col_t* col = dict_table_get_nth_col(table, i);
- ut_ad(col->mtype != DATA_SYS);
-
- if (!indexed[col->ind]) {
- dict_index_add_col(new_index, table, col, 0);
- }
- }
-
- mem_free(indexed);
-
- ut_ad(dict_index_is_ibuf(index)
- || (UT_LIST_GET_LEN(table->indexes) == 0));
-
- new_index->cached = TRUE;
-
- return(new_index);
-}
-
-/*******************************************************************//**
-Builds the internal dictionary cache representation for a non-clustered
-index, containing also system fields not defined by the user.
-@return own: the internal representation of the non-clustered index */
-static
-dict_index_t*
-dict_index_build_internal_non_clust(
-/*================================*/
- const dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: user representation of
- a non-clustered index */
-{
- dict_field_t* field;
- dict_index_t* new_index;
- dict_index_t* clust_index;
- ulint i;
- ibool* indexed;
-
- ut_ad(table && index);
- ut_ad(!dict_index_is_clust(index));
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* The clustered index should be the first in the list of indexes */
- clust_index = UT_LIST_GET_FIRST(table->indexes);
-
- ut_ad(clust_index);
- ut_ad(dict_index_is_clust(clust_index));
- ut_ad(!dict_index_is_univ(clust_index));
-
- /* Create a new index */
- new_index = dict_mem_index_create(
- table->name, index->name, index->space, index->type,
- index->n_fields + 1 + clust_index->n_uniq);
-
- /* Copy other relevant data from the old index
- struct to the new struct: it inherits the values */
-
- new_index->n_user_defined_cols = index->n_fields;
-
- new_index->id = index->id;
- btr_search_index_init(new_index);
-
- /* Copy fields from index to new_index */
- dict_index_copy(new_index, index, table, 0, index->n_fields);
-
- /* Remember the table columns already contained in new_index */
- indexed = static_cast<ibool*>(
- mem_zalloc(table->n_cols * sizeof *indexed));
-
- /* Mark the table columns already contained in new_index */
- for (i = 0; i < new_index->n_def; i++) {
-
- field = dict_index_get_nth_field(new_index, i);
-
- /* If there is only a prefix of the column in the index
- field, do not mark the column as contained in the index */
-
- if (field->prefix_len == 0) {
-
- indexed[field->col->ind] = TRUE;
- }
- }
-
- /* Add to new_index the columns necessary to determine the clustered
- index entry uniquely */
-
- for (i = 0; i < clust_index->n_uniq; i++) {
-
- field = dict_index_get_nth_field(clust_index, i);
-
- if (!indexed[field->col->ind]) {
- dict_index_add_col(new_index, table, field->col,
- field->prefix_len);
- }
- }
-
- mem_free(indexed);
-
- if (dict_index_is_unique(index)) {
- new_index->n_uniq = index->n_fields;
- } else {
- new_index->n_uniq = new_index->n_def;
- }
-
- /* Set the n_fields value in new_index to the actual defined
- number of fields */
-
- new_index->n_fields = new_index->n_def;
-
- new_index->cached = TRUE;
-
- return(new_index);
-}
-
-/***********************************************************************
-Builds the internal dictionary cache representation for an FTS index.
-@return own: the internal representation of the FTS index */
-static
-dict_index_t*
-dict_index_build_internal_fts(
-/*==========================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: user representation of an FTS index */
-{
- dict_index_t* new_index;
-
- ut_ad(table && index);
- ut_ad(index->type == DICT_FTS);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- /* Create a new index */
- new_index = dict_mem_index_create(
- table->name, index->name, index->space, index->type,
- index->n_fields);
-
- /* Copy other relevant data from the old index struct to the new
- struct: it inherits the values */
-
- new_index->n_user_defined_cols = index->n_fields;
-
- new_index->id = index->id;
- btr_search_index_init(new_index);
-
- /* Copy fields from index to new_index */
- dict_index_copy(new_index, index, table, 0, index->n_fields);
-
- new_index->n_uniq = 0;
- new_index->cached = TRUE;
-
- if (table->fts->cache == NULL) {
- table->fts->cache = fts_cache_create(table);
- }
-
- rw_lock_x_lock(&table->fts->cache->init_lock);
- /* Notify the FTS cache about this index. */
- fts_cache_index_cache_create(table, new_index);
- rw_lock_x_unlock(&table->fts->cache->init_lock);
-
- return(new_index);
-}
-/*====================== FOREIGN KEY PROCESSING ========================*/
-
-#define DB_FOREIGN_KEY_IS_PREFIX_INDEX 200
-#define DB_FOREIGN_KEY_COL_NOT_NULL 201
-#define DB_FOREIGN_KEY_COLS_NOT_EQUAL 202
-#define DB_FOREIGN_KEY_INDEX_NOT_FOUND 203
-
-/*********************************************************************//**
-Checks if a table is referenced by foreign keys.
-@return TRUE if table is referenced by a foreign key */
-UNIV_INTERN
-ibool
-dict_table_is_referenced_by_foreign_key(
-/*====================================*/
- const dict_table_t* table) /*!< in: InnoDB table */
-{
- return(!table->referenced_set.empty());
-}
-
-/**********************************************************************//**
-Removes a foreign constraint struct from the dictionary cache. */
-UNIV_INTERN
-void
-dict_foreign_remove_from_cache(
-/*===========================*/
- dict_foreign_t* foreign) /*!< in, own: foreign constraint */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(foreign);
-
- if (foreign->referenced_table != NULL) {
- foreign->referenced_table->referenced_set.erase(foreign);
- }
-
- if (foreign->foreign_table != NULL) {
- foreign->foreign_table->foreign_set.erase(foreign);
- }
-
- dict_foreign_free(foreign);
-}
-
-/**********************************************************************//**
-Looks for the foreign constraint from the foreign and referenced lists
-of a table.
-@return foreign constraint */
-static
-dict_foreign_t*
-dict_foreign_find(
-/*==============*/
- dict_table_t* table, /*!< in: table object */
- dict_foreign_t* foreign) /*!< in: foreign constraint */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- ut_ad(dict_foreign_set_validate(table->foreign_set));
- ut_ad(dict_foreign_set_validate(table->referenced_set));
-
- dict_foreign_set::iterator it = table->foreign_set.find(foreign);
-
- if (it != table->foreign_set.end()) {
- return(*it);
- }
-
- it = table->referenced_set.find(foreign);
-
- if (it != table->referenced_set.end()) {
- return(*it);
- }
-
- return(NULL);
-}
-
-
-/*********************************************************************//**
-Tries to find an index whose first fields are the columns in the array,
-in the same order and is not marked for deletion and is not the same
-as types_idx.
-@return matching index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_index(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- const char** col_names,
- /*!< in: column names, or NULL
- to use table->col_names */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- const dict_index_t* types_idx,
- /*!< in: NULL or an index
- whose types the column types
- must match */
- bool check_charsets,
- /*!< in: whether to check
- charsets. only has an effect
- if types_idx != NULL */
- ulint check_null,
- /*!< in: nonzero if none of
- the columns must be declared
- NOT NULL */
- ulint* error, /*!< out: error code */
- ulint* err_col_no,
- /*!< out: column number where
- error happened */
- dict_index_t** err_index)
- /*!< out: index where error
- happened */
-{
- dict_index_t* index;
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- if (error) {
- *error = DB_FOREIGN_KEY_INDEX_NOT_FOUND;
- }
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (types_idx != index
- && !(index->type & DICT_FTS)
- && !index->to_be_dropped
- && dict_foreign_qualify_index(
- table, col_names, columns, n_cols,
- index, types_idx,
- check_charsets, check_null,
- error, err_col_no,err_index)) {
- if (error) {
- *error = DB_SUCCESS;
- }
-
- return(index);
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(NULL);
-}
-#ifdef WITH_WSREP
-dict_index_t*
-wsrep_dict_foreign_find_index(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- const char** col_names, /*!< in: column names, or NULL
- to use table->col_names */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- dict_index_t* types_idx, /*!< in: NULL or an index to whose types the
- column types must match */
- ibool check_charsets,
- /*!< in: whether to check charsets.
- only has an effect if types_idx != NULL */
- ulint check_null)
- /*!< in: nonzero if none of the columns must
- be declared NOT NULL */
-{
- return dict_foreign_find_index(
- table, col_names, columns, n_cols, types_idx, check_charsets,
- check_null, NULL, NULL, NULL);
-}
-#endif /* WITH_WSREP */
-/**********************************************************************//**
-Report an error in a foreign key definition. */
-static
-void
-dict_foreign_error_report_low(
-/*==========================*/
- FILE* file, /*!< in: output stream */
- const char* name) /*!< in: table name */
-{
- rewind(file);
- ut_print_timestamp(file);
- fprintf(file, " Error in foreign key constraint of table %s:\n",
- name);
-}
-
-/**********************************************************************//**
-Report an error in a foreign key definition. */
-static
-void
-dict_foreign_error_report(
-/*======================*/
- FILE* file, /*!< in: output stream */
- dict_foreign_t* fk, /*!< in: foreign key constraint */
- const char* msg) /*!< in: the error message */
-{
- std::string fk_str;
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(file, fk->foreign_table_name);
- fputs(msg, file);
- fputs(" Constraint:\n", file);
- fk_str = dict_print_info_on_foreign_key_in_create_format(NULL, fk, TRUE);
- fputs(fk_str.c_str(), file);
- putc('\n', file);
- if (fk->foreign_index) {
- fputs("The index in the foreign key in table is ", file);
- ut_print_name(file, NULL, FALSE, fk->foreign_index->name);
- fputs("\n"
- "See " REFMAN "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- file);
- }
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/**********************************************************************//**
-Adds a foreign key constraint object to the dictionary cache. May free
-the object if there already is an object with the same identifier in.
-At least one of the foreign table and the referenced table must already
-be in the dictionary cache!
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_foreign_add_to_cache(
-/*======================*/
- dict_foreign_t* foreign,
- /*!< in, own: foreign key constraint */
- const char** col_names,
- /*!< in: column names, or NULL to use
- foreign->foreign_table->col_names */
- bool check_charsets,
- /*!< in: whether to check charset
- compatibility */
- dict_err_ignore_t ignore_err)
- /*!< in: error to be ignored */
-{
- dict_table_t* for_table;
- dict_table_t* ref_table;
- dict_foreign_t* for_in_cache = NULL;
- dict_index_t* index;
- ibool added_to_referenced_list= FALSE;
- FILE* ef = dict_foreign_err_file;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- for_table = dict_table_check_if_in_cache_low(
- foreign->foreign_table_name_lookup);
-
- ref_table = dict_table_check_if_in_cache_low(
- foreign->referenced_table_name_lookup);
- ut_a(for_table || ref_table);
-
- if (for_table) {
- for_in_cache = dict_foreign_find(for_table, foreign);
- }
-
- if (!for_in_cache && ref_table) {
- for_in_cache = dict_foreign_find(ref_table, foreign);
- }
-
- if (for_in_cache) {
- dict_foreign_free(foreign);
- } else {
- for_in_cache = foreign;
- }
-
- if (ref_table && !for_in_cache->referenced_table) {
- index = dict_foreign_find_index(
- ref_table, NULL,
- for_in_cache->referenced_col_names,
- for_in_cache->n_fields, for_in_cache->foreign_index,
- check_charsets, false, NULL, NULL, NULL);
-
- if (index == NULL
- && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) {
- dict_foreign_error_report(
- ef, for_in_cache,
- "there is no index in referenced table"
- " which would contain\n"
- "the columns as the first columns,"
- " or the data types in the\n"
- "referenced table do not match"
- " the ones in table.");
-
- if (for_in_cache == foreign) {
- dict_foreign_free(foreign);
- }
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for_in_cache->referenced_table = ref_table;
- for_in_cache->referenced_index = index;
-
- std::pair<dict_foreign_set::iterator, bool> ret
- = ref_table->referenced_set.insert(for_in_cache);
-
- ut_a(ret.second); /* second is true if the insertion
- took place */
- added_to_referenced_list = TRUE;
- }
-
- if (for_table && !for_in_cache->foreign_table) {
- ulint index_error;
- ulint err_col;
- dict_index_t *err_index=NULL;
-
- index = dict_foreign_find_index(
- for_table, col_names,
- for_in_cache->foreign_col_names,
- for_in_cache->n_fields,
- for_in_cache->referenced_index, check_charsets,
- for_in_cache->type
- & (DICT_FOREIGN_ON_DELETE_SET_NULL
- | DICT_FOREIGN_ON_UPDATE_SET_NULL),
- &index_error, &err_col, &err_index);
-
- if (index == NULL
- && !(ignore_err & DICT_ERR_IGNORE_FK_NOKEY)) {
- dict_foreign_error_report(
- ef, for_in_cache,
- "there is no index in the table"
- " which would contain\n"
- "the columns as the first columns,"
- " or the data types in the\n"
- "table do not match"
- " the ones in the referenced table\n"
- "or one of the ON ... SET NULL columns"
- " is declared NOT NULL.");
-
- if (for_in_cache == foreign) {
- if (added_to_referenced_list) {
- const dict_foreign_set::size_type n
- = ref_table->referenced_set
- .erase(for_in_cache);
-
- ut_a(n == 1); /* the number of
- elements removed must
- be one */
- }
-
- dict_foreign_free(foreign);
- }
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for_in_cache->foreign_table = for_table;
- for_in_cache->foreign_index = index;
- std::pair<dict_foreign_set::iterator, bool> ret
- = for_table->foreign_set.insert(for_in_cache);
-
- ut_a(ret.second); /* second is true if the insertion
- took place */
- }
-
- /* We need to move the table to the non-LRU end of the table LRU
- list. Otherwise it will be evicted from the cache. */
-
- if (ref_table != NULL && ref_table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(ref_table);
- }
-
- if (for_table != NULL && for_table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(for_table);
- }
-
- ut_ad(dict_lru_validate());
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Scans from pointer onwards. Stops if is at the start of a copy of
-'string' where characters are compared without case sensitivity, and
-only outside `` or "" quotes. Stops also at NUL.
-@return scanned up to this */
-static
-const char*
-dict_scan_to(
-/*=========*/
- const char* ptr, /*!< in: scan from */
- const char* string) /*!< in: look for this */
-{
- char quote = '\0';
- bool escape = false;
-
- for (; *ptr; ptr++) {
- if (*ptr == quote) {
- /* Closing quote character: do not look for
- starting quote or the keyword. */
-
- /* If the quote character is escaped by a
- backslash, ignore it. */
- if (escape) {
- escape = false;
- } else {
- quote = '\0';
- }
- } else if (quote) {
- /* Within quotes: do nothing. */
- if (escape) {
- escape = false;
- } else if (*ptr == '\\') {
- escape = true;
- }
- } else if (*ptr == '`' || *ptr == '"' || *ptr == '\'') {
- /* Starting quote: remember the quote character. */
- quote = *ptr;
- } else {
- /* Outside quotes: look for the keyword. */
- ulint i;
- for (i = 0; string[i]; i++) {
- if (toupper((int)(unsigned char)(ptr[i]))
- != toupper((int)(unsigned char)
- (string[i]))) {
- goto nomatch;
- }
- }
- break;
-nomatch:
- ;
- }
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Accepts a specified string. Comparisons are case-insensitive.
-@return if string was accepted, the pointer is moved after that, else
-ptr is returned */
-static
-const char*
-dict_accept(
-/*========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
- const char* ptr, /*!< in: scan from this */
- const char* string, /*!< in: accept only this string as the next
- non-whitespace string */
- ibool* success)/*!< out: TRUE if accepted */
-{
- const char* old_ptr = ptr;
- const char* old_ptr2;
-
- *success = FALSE;
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- old_ptr2 = ptr;
-
- ptr = dict_scan_to(ptr, string);
-
- if (*ptr == '\0' || old_ptr2 != ptr) {
- return(old_ptr);
- }
-
- *success = TRUE;
-
- return(ptr + ut_strlen(string));
-}
-
-/*********************************************************************//**
-Scans an id. For the lexical definition of an 'id', see the code below.
-Strips backquotes or double quotes from around the id.
-@return scanned to */
-static
-const char*
-dict_scan_id(
-/*=========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
- const char* ptr, /*!< in: scanned to */
- mem_heap_t* heap, /*!< in: heap where to allocate the id
- (NULL=id will not be allocated, but it
- will point to string near ptr) */
- const char** id, /*!< out,own: the id; NULL if no id was
- scannable */
- ibool table_id,/*!< in: TRUE=convert the allocated id
- as a table name; FALSE=convert to UTF-8 */
- ibool accept_also_dot)
- /*!< in: TRUE if also a dot can appear in a
- non-quoted id; in a quoted id it can appear
- always */
-{
- char quote = '\0';
- ulint len = 0;
- const char* s;
- char* str;
- char* dst;
-
- *id = NULL;
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- if (*ptr == '\0') {
-
- return(ptr);
- }
-
- if (*ptr == '`' || *ptr == '"') {
- quote = *ptr++;
- }
-
- s = ptr;
-
- if (quote) {
- for (;;) {
- if (!*ptr) {
- /* Syntax error */
- return(ptr);
- }
- if (*ptr == quote) {
- ptr++;
- if (*ptr != quote) {
- break;
- }
- }
- ptr++;
- len++;
- }
- } else {
- while (!my_isspace(cs, *ptr) && *ptr != '(' && *ptr != ')'
- && (accept_also_dot || *ptr != '.')
- && *ptr != ',' && *ptr != '\0') {
-
- ptr++;
- }
-
- len = ptr - s;
- }
-
- if (UNIV_UNLIKELY(!heap)) {
- /* no heap given: id will point to source string */
- *id = s;
- return(ptr);
- }
-
- if (quote) {
- char* d;
-
- str = d = static_cast<char*>(
- mem_heap_alloc(heap, len + 1));
-
- while (len--) {
- if ((*d++ = *s++) == quote) {
- s++;
- }
- }
- *d++ = 0;
- len = d - str;
- ut_ad(*s == quote);
- ut_ad(s + 1 == ptr);
- } else {
- str = mem_heap_strdupl(heap, s, len);
- }
-
- if (!table_id) {
-convert_id:
- /* Convert the identifier from connection character set
- to UTF-8. */
- len = 3 * len + 1;
- *id = dst = static_cast<char*>(mem_heap_alloc(heap, len));
-
- innobase_convert_from_id(cs, dst, str, len);
- } else if (!strncmp(str, srv_mysql50_table_name_prefix,
- sizeof(srv_mysql50_table_name_prefix) - 1)) {
- /* This is a pre-5.1 table name
- containing chars other than [A-Za-z0-9].
- Discard the prefix and use raw UTF-8 encoding. */
- str += sizeof(srv_mysql50_table_name_prefix) - 1;
- len -= sizeof(srv_mysql50_table_name_prefix) - 1;
- goto convert_id;
- } else {
- /* Encode using filename-safe characters. */
- len = 5 * len + 1;
- *id = dst = static_cast<char*>(mem_heap_alloc(heap, len));
-
- innobase_convert_from_table_id(cs, dst, str, len);
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Tries to scan a column name.
-@return scanned to */
-static
-const char*
-dict_scan_col(
-/*==========*/
- struct charset_info_st* cs, /*!< in: the character set of ptr */
- const char* ptr, /*!< in: scanned to */
- ibool* success,/*!< out: TRUE if success */
- dict_table_t* table, /*!< in: table in which the column is */
- const dict_col_t** column, /*!< out: pointer to column if success */
- mem_heap_t* heap, /*!< in: heap where to allocate */
- const char** name) /*!< out,own: the column name;
- NULL if no name was scannable */
-{
- ulint i;
-
- *success = FALSE;
-
- ptr = dict_scan_id(cs, ptr, heap, name, FALSE, TRUE);
-
- if (*name == NULL) {
-
- return(ptr); /* Syntax error */
- }
-
- if (table == NULL) {
- *success = TRUE;
- *column = NULL;
- } else {
- for (i = 0; i < dict_table_get_n_cols(table); i++) {
-
- const char* col_name = dict_table_get_col_name(
- table, i);
-
- if (0 == innobase_strcasecmp(col_name, *name)) {
- /* Found */
-
- *success = TRUE;
- *column = dict_table_get_nth_col(table, i);
- strcpy((char*) *name, col_name);
-
- break;
- }
- }
- }
-
- return(ptr);
-}
-
-
-/*********************************************************************//**
-Open a table from its database and table name, this is currently used by
-foreign constraint parser to get the referenced table.
-@return complete table name with database and table name, allocated from
-heap memory passed in */
-UNIV_INTERN
-char*
-dict_get_referenced_table(
-/*======================*/
- const char* name, /*!< in: foreign key table name */
- const char* database_name, /*!< in: table db name */
- ulint database_name_len, /*!< in: db name length */
- const char* table_name, /*!< in: table name */
- ulint table_name_len, /*!< in: table name length */
- dict_table_t** table, /*!< out: table object or NULL */
- mem_heap_t* heap) /*!< in/out: heap memory */
-{
- char* ref;
- const char* db_name;
-
- if (!database_name) {
- /* Use the database name of the foreign key table */
-
- db_name = name;
- database_name_len = dict_get_db_name_len(name);
- } else {
- db_name = database_name;
- }
-
- /* Copy database_name, '/', table_name, '\0' */
- ref = static_cast<char*>(
- mem_heap_alloc(heap, database_name_len + table_name_len + 2));
-
- memcpy(ref, db_name, database_name_len);
- ref[database_name_len] = '/';
- memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
-
- /* Values; 0 = Store and compare as given; case sensitive
- 1 = Store and compare in lower; case insensitive
- 2 = Store as given, compare in lower; case semi-sensitive */
- if (innobase_get_lower_case_table_names() == 2) {
- innobase_casedn_str(ref);
- *table = dict_table_get_low(ref);
- memcpy(ref, db_name, database_name_len);
- ref[database_name_len] = '/';
- memcpy(ref + database_name_len + 1, table_name, table_name_len + 1);
-
- } else {
-#ifndef __WIN__
- if (innobase_get_lower_case_table_names() == 1) {
- innobase_casedn_str(ref);
- }
-#else
- innobase_casedn_str(ref);
-#endif /* !__WIN__ */
- *table = dict_table_get_low(ref);
- }
-
- return(ref);
-}
-/*********************************************************************//**
-Scans a table name from an SQL string.
-@return scanned to */
-static
-const char*
-dict_scan_table_name(
-/*=================*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
- const char* ptr, /*!< in: scanned to */
- dict_table_t** table, /*!< out: table object or NULL */
- const char* name, /*!< in: foreign key table name */
- ibool* success,/*!< out: TRUE if ok name found */
- mem_heap_t* heap, /*!< in: heap where to allocate the id */
- const char** ref_name)/*!< out,own: the table name;
- NULL if no name was scannable */
-{
- const char* database_name = NULL;
- ulint database_name_len = 0;
- const char* table_name = NULL;
- const char* scan_name;
-
- *success = FALSE;
- *table = NULL;
-
- ptr = dict_scan_id(cs, ptr, heap, &scan_name, TRUE, FALSE);
-
- if (scan_name == NULL) {
-
- return(ptr); /* Syntax error */
- }
-
- if (*ptr == '.') {
- /* We scanned the database name; scan also the table name */
-
- ptr++;
-
- database_name = scan_name;
- database_name_len = strlen(database_name);
-
- ptr = dict_scan_id(cs, ptr, heap, &table_name, TRUE, FALSE);
-
- if (table_name == NULL) {
-
- return(ptr); /* Syntax error */
- }
- } else {
- /* To be able to read table dumps made with InnoDB-4.0.17 or
- earlier, we must allow the dot separator between the database
- name and the table name also to appear within a quoted
- identifier! InnoDB used to print a constraint as:
- ... REFERENCES `databasename.tablename` ...
- starting from 4.0.18 it is
- ... REFERENCES `databasename`.`tablename` ... */
- const char* s;
-
- for (s = scan_name; *s; s++) {
- if (*s == '.') {
- database_name = scan_name;
- database_name_len = s - scan_name;
- scan_name = ++s;
- break;/* to do: multiple dots? */
- }
- }
-
- table_name = scan_name;
- }
-
- *ref_name = dict_get_referenced_table(
- name, database_name, database_name_len,
- table_name, strlen(table_name), table, heap);
-
- *success = TRUE;
- return(ptr);
-}
-
-/*********************************************************************//**
-Skips one id. The id is allowed to contain also '.'.
-@return scanned to */
-static
-const char*
-dict_skip_word(
-/*===========*/
- struct charset_info_st* cs,/*!< in: the character set of ptr */
- const char* ptr, /*!< in: scanned to */
- ibool* success)/*!< out: TRUE if success, FALSE if just spaces
- left in string or a syntax error */
-{
- const char* start;
-
- *success = FALSE;
-
- ptr = dict_scan_id(cs, ptr, NULL, &start, FALSE, TRUE);
-
- if (start) {
- *success = TRUE;
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Removes MySQL comments from an SQL string. A comment is either
-(a) '#' to the end of the line,
-(b) '--[space]' to the end of the line, or
-(c) '[slash][asterisk]' till the next '[asterisk][slash]' (like the familiar
-C comment syntax).
-@return own: SQL string stripped from comments; the caller must free
-this with mem_free()! */
-static
-char*
-dict_strip_comments(
-/*================*/
- const char* sql_string, /*!< in: SQL string */
- size_t sql_length) /*!< in: length of sql_string */
-{
- char* str;
- const char* sptr;
- const char* eptr = sql_string + sql_length;
- char* ptr;
- /* unclosed quote character (0 if none) */
- char quote = 0;
- bool escape = false;
-
- DBUG_ENTER("dict_strip_comments");
-
- DBUG_PRINT("dict_strip_comments", ("%s", sql_string));
-
- str = static_cast<char*>(mem_alloc(sql_length + 1));
-
- sptr = sql_string;
- ptr = str;
-
- for (;;) {
-scan_more:
- if (sptr >= eptr || *sptr == '\0') {
-end_of_string:
- *ptr = '\0';
-
- ut_a(ptr <= str + sql_length);
-
- DBUG_PRINT("dict_strip_comments", ("%s", str));
- DBUG_RETURN(str);
- }
-
- if (*sptr == quote) {
- /* Closing quote character: do not look for
- starting quote or comments. */
-
- /* If the quote character is escaped by a
- backslash, ignore it. */
- if (escape) {
- escape = false;
- } else {
- quote = 0;
- }
- } else if (quote) {
- /* Within quotes: do not look for
- starting quotes or comments. */
- if (escape) {
- escape = false;
- } else if (*sptr == '\\') {
- escape = true;
- }
- } else if (*sptr == '"' || *sptr == '`' || *sptr == '\'') {
- /* Starting quote: remember the quote character. */
- quote = *sptr;
- } else if (*sptr == '#'
- || (sptr[0] == '-' && sptr[1] == '-'
- && sptr[2] == ' ')) {
- for (;;) {
- if (++sptr >= eptr) {
- goto end_of_string;
- }
-
- /* In Unix a newline is 0x0A while in Windows
- it is 0x0D followed by 0x0A */
-
- switch (*sptr) {
- case (char) 0X0A:
- case (char) 0x0D:
- case '\0':
- goto scan_more;
- }
- }
- } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') {
- sptr += 2;
- for (;;) {
- if (sptr >= eptr) {
- goto end_of_string;
- }
-
- switch (*sptr) {
- case '\0':
- goto scan_more;
- case '*':
- if (sptr[1] == '/') {
- sptr += 2;
- goto scan_more;
- }
- }
-
- sptr++;
- }
- }
-
- *ptr = *sptr;
-
- ptr++;
- sptr++;
- }
-}
-
-/*********************************************************************//**
-Finds the highest [number] for foreign key constraints of the table. Looks
-only at the >= 4.0.18-format id's, which are of the form
-databasename/tablename_ibfk_[number].
-@return highest number, 0 if table has no new format foreign key constraints */
-UNIV_INTERN
-ulint
-dict_table_get_highest_foreign_id(
-/*==============================*/
- dict_table_t* table) /*!< in: table in the dictionary memory cache */
-{
- dict_foreign_t* foreign;
- char* endp;
- ulint biggest_id = 0;
- ulint id;
- ulint len;
-
- ut_a(table);
-
- len = ut_strlen(table->name);
-
- for (dict_foreign_set::iterator it = table->foreign_set.begin();
- it != table->foreign_set.end();
- ++it) {
- char fkid[MAX_TABLE_NAME_LEN+20];
- foreign = *it;
-
- strcpy(fkid, foreign->id);
- /* Convert foreign key identifier on dictionary memory
- cache to filename charset. */
- innobase_convert_to_filename_charset(
- strchr(fkid, '/') + 1,
- strchr(foreign->id, '/') + 1,
- MAX_TABLE_NAME_LEN);
-
- if (ut_strlen(fkid) > ((sizeof dict_ibfk) - 1) + len
- && 0 == ut_memcmp(fkid, table->name, len)
- && 0 == ut_memcmp(fkid + len,
- dict_ibfk, (sizeof dict_ibfk) - 1)
- && fkid[len + ((sizeof dict_ibfk) - 1)] != '0') {
- /* It is of the >= 4.0.18 format */
-
- id = strtoul(fkid + len
- + ((sizeof dict_ibfk) - 1),
- &endp, 10);
- if (*endp == '\0') {
- ut_a(id != biggest_id);
-
- if (id > biggest_id) {
- biggest_id = id;
- }
- }
- }
- }
-
- return(biggest_id);
-}
-
-/*********************************************************************//**
-Reports a simple foreign key create clause syntax error. */
-static
-void
-dict_foreign_report_syntax_err(
-/*===========================*/
- const char* fmt, /*!< in: syntax err msg */
- const char* oper, /*!< in: operation */
- const char* name, /*!< in: table name */
- const char* start_of_latest_foreign,
- /*!< in: start of the foreign key clause
- in the SQL string */
- const char* ptr) /*!< in: place of the syntax error */
-{
- ut_ad(!srv_read_only_mode);
-
- FILE* ef = dict_foreign_err_file;
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, name);
- fprintf(ef, fmt, oper, name, start_of_latest_foreign, ptr);
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*********************************************************************//**
-Push warning message to SQL-layer based on foreign key constraint
-index match error. */
-static
-void
-dict_foreign_push_index_error(
-/*==========================*/
- trx_t* trx, /*!< in: trx */
- const char* operation, /*!< in: operation create or alter
- */
- const char* create_name, /*!< in: table name in create or
- alter table */
- const char* latest_foreign, /*!< in: start of latest foreign key
- constraint name */
- const char** columns, /*!< in: foreign key columns */
- ulint index_error, /*!< in: error code */
- ulint err_col, /*!< in: column where error happened
- */
- dict_index_t* err_index, /*!< in: index where error happened
- */
- dict_table_t* table, /*!< in: table */
- FILE* ef) /*!< in: output stream */
-{
- switch (index_error) {
- case DB_FOREIGN_KEY_INDEX_NOT_FOUND: {
- fprintf(ef,
- "%s table '%s' with foreign key constraint"
- " failed. There is no index in the referenced"
- " table where the referenced columns appear"
- " as the first columns near '%s'.\n",
- operation, create_name, latest_foreign);
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table '%s' with foreign key constraint"
- " failed. There is no index in the referenced"
- " table where the referenced columns appear"
- " as the first columns near '%s'.",
- operation, create_name, latest_foreign);
- break;
- }
- case DB_FOREIGN_KEY_IS_PREFIX_INDEX: {
- fprintf(ef,
- "%s table '%s' with foreign key constraint"
- " failed. There is only prefix index in the referenced"
- " table where the referenced columns appear"
- " as the first columns near '%s'.\n",
- operation, create_name, latest_foreign);
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table '%s' with foreign key constraint"
- " failed. There is only prefix index in the referenced"
- " table where the referenced columns appear"
- " as the first columns near '%s'.",
- operation, create_name, latest_foreign);
- break;
- }
- case DB_FOREIGN_KEY_COL_NOT_NULL: {
- fprintf(ef,
- "%s table %s with foreign key constraint"
- " failed. You have defined a SET NULL condition but "
- "column '%s' on index is defined as NOT NULL near '%s'.\n",
- operation, create_name, columns[err_col], latest_foreign);
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. You have defined a SET NULL condition but "
- "column '%s' on index is defined as NOT NULL near '%s'.",
- operation, create_name, columns[err_col], latest_foreign);
- break;
- }
- case DB_FOREIGN_KEY_COLS_NOT_EQUAL: {
- dict_field_t* field;
- const char* col_name;
- field = dict_index_get_nth_field(err_index, err_col);
-
- col_name = dict_table_get_col_name(
- table, dict_col_get_no(field->col));
- fprintf(ef,
- "%s table %s with foreign key constraint"
- " failed. Field type or character set for column '%s' "
- "does not mach referenced column '%s' near '%s'.\n",
- operation, create_name, columns[err_col], col_name, latest_foreign);
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Field type or character set for column '%s' "
- "does not mach referenced column '%s' near '%s'.",
- operation, create_name, columns[err_col], col_name, latest_foreign);
- break;
- }
- default:
- ut_error;
- }
-}
-
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-static
-dberr_t
-dict_create_foreign_constraints_low(
-/*================================*/
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap, /*!< in: memory heap */
- struct charset_info_st* cs,/*!< in: the character set of sql_string */
- const char* sql_string,
- /*!< in: CREATE TABLE or ALTER TABLE statement
- where foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the database
- name before it: test.table2; the default
- database is the database of parameter name */
- const char* name, /*!< in: table full name in the normalized form
- database_name/table_name */
- ibool reject_fks)
- /*!< in: if TRUE, fail with error code
- DB_CANNOT_ADD_CONSTRAINT if any foreign
- keys are found. */
-{
- dict_table_t* table = NULL;
- dict_table_t* referenced_table = NULL;
- dict_table_t* table_to_alter = NULL;
- dict_table_t* table_to_create = NULL;
- ulint highest_id_so_far = 0;
- ulint number = 1;
- dict_index_t* index = NULL;
- dict_foreign_t* foreign = NULL;
- const char* ptr = sql_string;
- const char* start_of_latest_foreign = sql_string;
- const char* start_of_latest_set = NULL;
- FILE* ef = dict_foreign_err_file;
- ulint index_error = DB_SUCCESS;
- dict_index_t* err_index = NULL;
- ulint err_col;
- const char* constraint_name;
- ibool success;
- dberr_t error;
- const char* ptr1;
- const char* ptr2;
- ulint i;
- ulint j;
- ibool is_on_delete;
- ulint n_on_deletes;
- ulint n_on_updates;
- const dict_col_t*columns[500];
- const char* column_names[500];
- const char* ref_column_names[500];
- const char* referenced_table_name;
- dict_foreign_set local_fk_set;
- dict_foreign_set_free local_fk_set_free(local_fk_set);
- const char* create_table_name;
- const char* orig;
- char create_name[MAX_TABLE_NAME_LEN + 1];
- char operation[8];
-
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = dict_table_get_low(name);
- /* First check if we are actually doing an ALTER TABLE, and in that
- case look for the table being altered */
- orig = ptr;
- ptr = dict_accept(cs, ptr, "ALTER", &success);
-
- strcpy((char *)operation, success ? "Alter " : "Create ");
-
- if (!success) {
- orig = ptr;
- ptr = dict_scan_to(ptr, "CREATE");
- ptr = dict_scan_to(ptr, "TABLE");
- ptr = dict_accept(cs, ptr, "TABLE", &success);
-
- if (success) {
- ptr = dict_scan_table_name(cs, ptr, &table_to_create, name,
- &success, heap, &create_table_name);
- }
-
- if (success) {
- char *bufend;
- bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN,
- create_table_name, strlen(create_table_name),
- trx->mysql_thd, TRUE);
- create_name[bufend-create_name]='\0';
- ptr = orig;
- } else {
- char *bufend;
- ptr = orig;
- bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN,
- name, strlen(name), trx->mysql_thd, TRUE);
- create_name[bufend-create_name]='\0';
- }
-
- goto loop;
- }
-
- if (table == NULL) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef, "%s table %s with foreign key constraint"
- " failed. Table %s not found from data dictionary."
- " Error close to %s.\n",
- operation, create_name, create_name, start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
- ib_push_warning(trx, DB_ERROR,
- "%s table %s with foreign key constraint"
- " failed. Table %s not found from data dictionary."
- " Error close to %s.",
- operation, create_name, create_name, start_of_latest_foreign);
-
- return(DB_ERROR);
- }
-
- /* If not alter table jump to loop */
- if (!success) {
-
- goto loop;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "TABLE", &success);
-
- if (!success) {
-
- goto loop;
- }
-
- /* We are doing an ALTER TABLE: scan the table name we are altering */
-
- orig = ptr;
- ptr = dict_scan_table_name(cs, ptr, &table_to_alter, name,
- &success, heap, &referenced_table_name);
-
- if (table_to_alter) {
- char *bufend;
- bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN,
- table_to_alter->name, strlen(table_to_alter->name),
- trx->mysql_thd, TRUE);
- create_name[bufend-create_name]='\0';
- } else {
- char *bufend;
- bufend = innobase_convert_name((char *)create_name, MAX_TABLE_NAME_LEN,
- referenced_table_name, strlen(referenced_table_name),
- trx->mysql_thd, TRUE);
- create_name[bufend-create_name]='\0';
-
- }
-
- if (!success) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef,
- "%s table %s with foreign key constraint"
- " failed. Table %s not found from data dictionary."
- " Error close to %s.\n",
- operation, create_name, create_name, orig);
- mutex_exit(&dict_foreign_err_mutex);
-
- ib_push_warning(trx, DB_ERROR,
- "%s table %s with foreign key constraint"
- " failed. Table %s not found from data dictionary."
- " Error close to %s.",
- operation, create_name, create_name, orig);
-
- return(DB_ERROR);
- }
-
- /* Starting from 4.0.18 and 4.1.2, we generate foreign key id's in the
- format databasename/tablename_ibfk_[number], where [number] is local
- to the table; look for the highest [number] for table_to_alter, so
- that we can assign to new constraints higher numbers. */
-
- /* If we are altering a temporary table, the table name after ALTER
- TABLE does not correspond to the internal table name, and
- table_to_alter is NULL. TODO: should we fix this somehow? */
-
- if (table_to_alter == NULL) {
- highest_id_so_far = 0;
- } else {
- highest_id_so_far = dict_table_get_highest_foreign_id(
- table_to_alter);
- }
-
- number = highest_id_so_far + 1;
- /* Scan for foreign key declarations in a loop */
-loop:
- /* Scan either to "CONSTRAINT" or "FOREIGN", whichever is closer */
-
- ptr1 = dict_scan_to(ptr, "CONSTRAINT");
- ptr2 = dict_scan_to(ptr, "FOREIGN");
-
- constraint_name = NULL;
-
- if (ptr1 < ptr2) {
- /* The user may have specified a constraint name. Pick it so
- that we can store 'databasename/constraintname' as the id of
- of the constraint to system tables. */
- ptr = ptr1;
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "CONSTRAINT", &success);
-
- ut_a(success);
-
- if (!my_isspace(cs, *ptr) && *ptr != '"' && *ptr != '`') {
- goto loop;
- }
-
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- /* read constraint name unless got "CONSTRAINT FOREIGN" */
- if (ptr != ptr2) {
- ptr = dict_scan_id(cs, ptr, heap,
- &constraint_name, FALSE, FALSE);
- }
- } else {
- ptr = ptr2;
- }
-
- if (*ptr == '\0') {
- /* The proper way to reject foreign keys for temporary
- tables would be to split the lexing and syntactical
- analysis of foreign key clauses from the actual adding
- of them, so that ha_innodb.cc could first parse the SQL
- command, determine if there are any foreign keys, and
- if so, immediately reject the command if the table is a
- temporary one. For now, this kludge will work. */
- if (reject_fks && !local_fk_set.empty()) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef, "%s table %s with foreign key constraint"
- " failed. Temporary tables can't have foreign key constraints."
- " Error close to %s.\n",
- operation, create_name, start_of_latest_foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Temporary tables can't have foreign key constraints."
- " Error close to %s.",
- operation, create_name, start_of_latest_foreign);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /**********************************************************/
- /* The following call adds the foreign key constraints
- to the data dictionary system tables on disk */
-
- error = dict_create_add_foreigns_to_dictionary(
- local_fk_set, table, trx);
-
- if (error == DB_SUCCESS) {
-
- table->foreign_set.insert(local_fk_set.begin(),
- local_fk_set.end());
- std::for_each(local_fk_set.begin(),
- local_fk_set.end(),
- dict_foreign_add_to_referenced_table());
- local_fk_set.clear();
- }
- return(error);
- }
-
- start_of_latest_foreign = ptr;
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "FOREIGN", &success);
-
- if (!success) {
- goto loop;
- }
-
- if (!my_isspace(cs, *ptr)) {
- goto loop;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "KEY", &success);
-
- if (!success) {
- goto loop;
- }
-
- if (my_isspace(cs, *ptr)) {
- ptr1 = dict_accept(cs, ptr, "IF", &success);
-
- if (success) {
- if (!my_isspace(cs, *ptr1)) {
- goto loop;
- }
- ptr1 = dict_accept(cs, ptr1, "NOT", &success);
- if (!success) {
- goto loop;
- }
- ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
- if (!success) {
- goto loop;
- }
- ptr = ptr1;
- }
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- if (constraint_name) {
- /* MySQL allows also an index id before the '('; we
- skip it */
- ptr = dict_skip_word(cs, ptr, &success);
- if (!success) {
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, orig);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, orig);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- }
- else {
- while (my_isspace(cs, *ptr)) {
- ptr++;
- }
-
- ptr = dict_scan_id(cs, ptr, heap,
- &constraint_name, FALSE, FALSE);
- }
-
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- /* We do not flag a syntax error here because in an
- ALTER TABLE we may also have DROP FOREIGN KEY abc */
-
- goto loop;
- }
- }
-
- i = 0;
-
- /* Scan the columns in the first list */
-col_loop1:
- ut_a(i < (sizeof column_names) / sizeof *column_names);
- orig = ptr;
- ptr = dict_scan_col(cs, ptr, &success, table, columns + i,
- heap, column_names + i);
- if (!success) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, orig);
-
- mutex_exit(&dict_foreign_err_mutex);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, orig);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- i++;
-
- ptr = dict_accept(cs, ptr, ",", &success);
-
- if (success) {
- goto col_loop1;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, ")", &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, orig);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, orig);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Try to find an index which contains the columns
- as the first fields and in the right order. There is
- no need to check column type match (on types_idx), since
- the referenced table can be NULL if foreign_key_checks is
- set to 0 */
-
- index = dict_foreign_find_index(
- table, NULL, column_names, i,
- NULL, TRUE, FALSE, &index_error, &err_col, &err_index);
-
- if (!index) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fputs("There is no index in table ", ef);
- ut_print_name(ef, NULL, TRUE, create_name);
- fprintf(ef, " where the columns appear\n"
- "as the first columns. Constraint:\n%s\n"
- "See " REFMAN "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- start_of_latest_foreign);
- dict_foreign_push_index_error(trx, operation, create_name, start_of_latest_foreign,
- column_names, index_error, err_col, err_index, table, ef);
-
- mutex_exit(&dict_foreign_err_mutex);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "REFERENCES", &success);
-
- if (!success || !my_isspace(cs, *ptr)) {
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, orig);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, orig);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Let us create a constraint struct */
-
- foreign = dict_mem_foreign_create();
-
- if (constraint_name) {
- ulint db_len;
-
- /* Catenate 'databasename/' to the constraint name specified
- by the user: we conceive the constraint as belonging to the
- same MySQL 'database' as the table itself. We store the name
- to foreign->id. */
-
- db_len = dict_get_db_name_len(table->name);
-
- foreign->id = static_cast<char*>(mem_heap_alloc(
- foreign->heap, db_len + strlen(constraint_name) + 2));
-
- ut_memcpy(foreign->id, table->name, db_len);
- foreign->id[db_len] = '/';
- strcpy(foreign->id + db_len + 1, constraint_name);
- }
-
- if (foreign->id == NULL) {
- error = dict_create_add_foreign_id(&number,
- table->name, foreign);
- if (error != DB_SUCCESS) {
- dict_foreign_free(foreign);
- return(error);
- }
- }
-
- std::pair<dict_foreign_set::iterator, bool> ret
- = local_fk_set.insert(foreign);
-
- if (!ret.second) {
- /* A duplicate foreign key name has been found */
- dict_foreign_free(foreign);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- foreign->foreign_table = table;
- foreign->foreign_table_name = mem_heap_strdup(
- foreign->heap, table->name);
- dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
-
- foreign->foreign_index = index;
- foreign->n_fields = (unsigned int) i;
-
- foreign->foreign_col_names = static_cast<const char**>(
- mem_heap_alloc(foreign->heap, i * sizeof(void*)));
-
- for (i = 0; i < foreign->n_fields; i++) {
- foreign->foreign_col_names[i] = mem_heap_strdup(
- foreign->heap,
- dict_table_get_col_name(table,
- dict_col_get_no(columns[i])));
- }
-
- ptr = dict_scan_table_name(cs, ptr, &referenced_table, name,
- &success, heap, &referenced_table_name);
-
- /* Note that referenced_table can be NULL if the user has suppressed
- checking of foreign key constraints! */
-
- if (!success || (!referenced_table && trx->check_foreigns)) {
- char buf[MAX_TABLE_NAME_LEN + 1] = "";
- char* bufend;
-
- bufend = innobase_convert_name(buf, MAX_TABLE_NAME_LEN,
- referenced_table_name, strlen(referenced_table_name),
- trx->mysql_thd, TRUE);
- buf[bufend - buf] = '\0';
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint failed. Referenced table %s not found in the data dictionary "
- "near '%s'.",
- operation, create_name, buf, start_of_latest_foreign);
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef,
- "%s table %s with foreign key constraint failed. Referenced table %s not found in the data dictionary "
- "near '%s'.\n",
- operation, create_name, buf, start_of_latest_foreign);
-
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "(", &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, orig);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, orig);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Scan the columns in the second list */
- i = 0;
-
-col_loop2:
- orig = ptr;
- ptr = dict_scan_col(cs, ptr, &success, referenced_table, columns + i,
- heap, ref_column_names + i);
- i++;
-
- if (!success) {
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, orig);
- mutex_exit(&dict_foreign_err_mutex);
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, orig);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, ",", &success);
-
- if (success) {
- goto col_loop2;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, ")", &success);
-
- if (!success || foreign->n_fields != i) {
-
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s' near '%s'. Referencing column count does not match referenced column count.\n",
- operation, create_name, start_of_latest_foreign, orig);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s' near '%s'. Referencing column count %d does not match referenced column count %d.\n",
- operation, create_name, start_of_latest_foreign, orig, i, foreign->n_fields);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- n_on_deletes = 0;
- n_on_updates = 0;
-
-scan_on_conditions:
- /* Loop here as long as we can find ON ... conditions */
-
- start_of_latest_set = ptr;
- ptr = dict_accept(cs, ptr, "ON", &success);
-
- if (!success) {
-
- goto try_find_index;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "DELETE", &success);
-
- if (!success) {
- orig = ptr;
- ptr = dict_accept(cs, ptr, "UPDATE", &success);
-
- if (!success) {
-
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- is_on_delete = FALSE;
- n_on_updates++;
- } else {
- is_on_delete = TRUE;
- n_on_deletes++;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "RESTRICT", &success);
-
- if (success) {
- goto scan_on_conditions;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "CASCADE", &success);
-
- if (success) {
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_CASCADE;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE;
- }
-
- goto scan_on_conditions;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "NO", &success);
-
- if (success) {
- orig = ptr;
- ptr = dict_accept(cs, ptr, "ACTION", &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_NO_ACTION;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION;
- }
-
- goto scan_on_conditions;
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "SET", &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- orig = ptr;
- ptr = dict_accept(cs, ptr, "NULL", &success);
-
- if (!success) {
- dict_foreign_report_syntax_err(
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.\n",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. Parse error in '%s'"
- " near '%s'.",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- for (j = 0; j < foreign->n_fields; j++) {
- if ((dict_index_get_nth_col(foreign->foreign_index, j)->prtype)
- & DATA_NOT_NULL) {
- const dict_col_t* col
- = dict_index_get_nth_col(foreign->foreign_index, j);
- const char* col_name = dict_table_get_col_name(foreign->foreign_index->table,
- dict_col_get_no(col));
-
- /* It is not sensible to define SET NULL
- if the column is not allowed to be NULL! */
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef,
- "%s table %s with foreign key constraint"
- " failed. You have defined a SET NULL condition but column '%s' is defined as NOT NULL"
- " in '%s' near '%s'.\n",
- operation, create_name, col_name, start_of_latest_foreign, start_of_latest_set);
- mutex_exit(&dict_foreign_err_mutex);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. You have defined a SET NULL condition but column '%s' is defined as NOT NULL"
- " in '%s' near '%s'.",
- operation, create_name, col_name, start_of_latest_foreign, start_of_latest_set);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- }
-
- if (is_on_delete) {
- foreign->type |= DICT_FOREIGN_ON_DELETE_SET_NULL;
- } else {
- foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL;
- }
-
- goto scan_on_conditions;
-
-try_find_index:
- if (n_on_deletes > 1 || n_on_updates > 1) {
- /* It is an error to define more than 1 action */
-
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef,
- "%s table %s with foreign key constraint"
- " failed. You have more than one on delete or on update clause"
- " in '%s' near '%s'.\n",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT,
- "%s table %s with foreign key constraint"
- " failed. You have more than one on delete or on update clause"
- " in '%s' near '%s'.",
- operation, create_name, start_of_latest_foreign, start_of_latest_set);
-
- dict_foreign_free(foreign);
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
-
- /* Try to find an index which contains the columns as the first fields
- and in the right order, and the types are the same as in
- foreign->foreign_index */
-
- if (referenced_table) {
- index = dict_foreign_find_index(referenced_table, NULL,
- ref_column_names, i,
- foreign->foreign_index,
- TRUE, FALSE, &index_error, &err_col, &err_index);
- if (!index) {
- mutex_enter(&dict_foreign_err_mutex);
- dict_foreign_error_report_low(ef, create_name);
- fprintf(ef, "%s:\n"
- "Cannot find an index in the"
- " referenced table where the\n"
- "referenced columns appear as the"
- " first columns, or column types\n"
- "in the table and the referenced table"
- " do not match for constraint.\n"
- "Note that the internal storage type of"
- " ENUM and SET changed in\n"
- "tables created with >= InnoDB-4.1.12,"
- " and such columns in old tables\n"
- "cannot be referenced by such columns"
- " in new tables.\n"
- "See " REFMAN
- "innodb-foreign-key-constraints.html\n"
- "for correct foreign key definition.\n",
- start_of_latest_foreign);
-
- dict_foreign_push_index_error(trx, operation, create_name, start_of_latest_foreign,
- column_names, index_error, err_col, err_index, referenced_table, ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_ADD_CONSTRAINT);
- }
- } else {
- ut_a(trx->check_foreigns == FALSE);
- index = NULL;
- }
-
- foreign->referenced_index = index;
- foreign->referenced_table = referenced_table;
-
- foreign->referenced_table_name = mem_heap_strdup(
- foreign->heap, referenced_table_name);
- dict_mem_referenced_table_name_lookup_set(foreign, TRUE);
-
- foreign->referenced_col_names = static_cast<const char**>(
- mem_heap_alloc(foreign->heap, i * sizeof(void*)));
-
- for (i = 0; i < foreign->n_fields; i++) {
- foreign->referenced_col_names[i]
- = mem_heap_strdup(foreign->heap, ref_column_names[i]);
- }
-
- goto loop;
-}
-
-/**************************************************************************
-Determines whether a string starts with the specified keyword.
-@return TRUE if str starts with keyword */
-UNIV_INTERN
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
- THD* thd, /*!< in: MySQL thread handle */
- const char* str, /*!< in: string to scan for keyword */
- const char* keyword) /*!< in: keyword to look for */
-{
- struct charset_info_st* cs = innobase_get_charset(thd);
- ibool success;
-
- dict_accept(cs, str, keyword, &success);
- return(success);
-}
-
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary the foreign
-key constraints declared in the string. This function should be called after
-the indexes for a table have been created. Each foreign key constraint must
-be accompanied with indexes in both participating tables. The indexes are
-allowed to contain more fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_create_foreign_constraints(
-/*============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES
- table2(c, d), table2 can be written
- also with the database
- name before it: test.table2; the
- default database id the database of
- parameter name */
- size_t sql_length, /*!< in: length of sql_string */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-{
- char* str;
- dberr_t err;
- mem_heap_t* heap;
-
- ut_a(trx);
- ut_a(trx->mysql_thd);
-
- str = dict_strip_comments(sql_string, sql_length);
- heap = mem_heap_create(10000);
-
- err = dict_create_foreign_constraints_low(
- trx, heap, innobase_get_charset(trx->mysql_thd), str, name,
- reject_fks);
-
- mem_heap_free(heap);
- mem_free(str);
-
- return(err);
-}
-
-/**********************************************************************//**
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
-@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
-constraint id does not match */
-UNIV_INTERN
-dberr_t
-dict_foreign_parse_drop_constraints(
-/*================================*/
- mem_heap_t* heap, /*!< in: heap from which we can
- allocate memory */
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: table */
- ulint* n, /*!< out: number of constraints
- to drop */
- const char*** constraints_to_drop) /*!< out: id's of the
- constraints to drop */
-{
- ibool success;
- char* str;
- size_t len;
- const char* ptr;
- const char* ptr1;
- const char* id;
- struct charset_info_st* cs;
-
- ut_a(trx);
- ut_a(trx->mysql_thd);
-
- cs = innobase_get_charset(trx->mysql_thd);
-
- *n = 0;
-
- *constraints_to_drop = static_cast<const char**>(
- mem_heap_alloc(heap, 1000 * sizeof(char*)));
-
- ptr = innobase_get_stmt(trx->mysql_thd, &len);
-
- str = dict_strip_comments(ptr, len);
-
- ptr = str;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-loop:
- ptr = dict_scan_to(ptr, "DROP");
-
- if (*ptr == '\0') {
- mem_free(str);
-
- return(DB_SUCCESS);
- }
-
- ptr = dict_accept(cs, ptr, "DROP", &success);
-
- if (!my_isspace(cs, *ptr)) {
-
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "FOREIGN", &success);
-
- if (!success || !my_isspace(cs, *ptr)) {
-
- goto loop;
- }
-
- ptr = dict_accept(cs, ptr, "KEY", &success);
-
- if (!success) {
-
- goto syntax_error;
- }
-
- ptr1 = dict_accept(cs, ptr, "IF", &success);
-
- if (success && my_isspace(cs, *ptr1)) {
- ptr1 = dict_accept(cs, ptr1, "EXISTS", &success);
- if (success) {
-
- ptr = ptr1;
- }
- }
-
- ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE);
-
- if (id == NULL) {
-
- goto syntax_error;
- }
-
- ut_a(*n < 1000);
- (*constraints_to_drop)[*n] = id;
- (*n)++;
-
- if (std::find_if(table->foreign_set.begin(),
- table->foreign_set.end(),
- dict_foreign_matches_id(id))
- == table->foreign_set.end()) {
-
- if (!srv_read_only_mode) {
- FILE* ef = dict_foreign_err_file;
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Error in dropping of a foreign key "
- "constraint of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fputs(",\nin SQL command\n", ef);
- fputs(str, ef);
- fputs("\nCannot find a constraint with the "
- "given id ", ef);
- ut_print_name(ef, NULL, FALSE, id);
- fputs(".\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
- }
-
- mem_free(str);
-
- return(DB_CANNOT_DROP_CONSTRAINT);
- }
-
- goto loop;
-
-syntax_error:
- if (!srv_read_only_mode) {
- FILE* ef = dict_foreign_err_file;
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
- fputs(" Syntax error in dropping of a"
- " foreign key constraint of table ", ef);
- ut_print_name(ef, NULL, TRUE, table->name);
- fprintf(ef, ",\n"
- "close to:\n%s\n in SQL command\n%s\n", ptr, str);
- mutex_exit(&dict_foreign_err_mutex);
- }
-
- mem_free(str);
-
- return(DB_CANNOT_DROP_CONSTRAINT);
-}
-
-/*==================== END OF FOREIGN KEY PROCESSING ====================*/
-
-/**********************************************************************//**
-Returns an index object if it is found in the dictionary cache.
-Assumes that dict_sys->mutex is already being held.
-@return index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_if_in_cache_low(
-/*===========================*/
- index_id_t index_id) /*!< in: index id */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- return(dict_index_find_on_id_low(index_id));
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Returns an index object if it is found in the dictionary cache.
-@return index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_if_in_cache(
-/*=======================*/
- index_id_t index_id) /*!< in: index id */
-{
- dict_index_t* index;
-
- if (dict_sys == NULL) {
- return(NULL);
- }
-
- mutex_enter(&(dict_sys->mutex));
-
- index = dict_index_get_if_in_cache_low(index_id);
-
- mutex_exit(&(dict_sys->mutex));
-
- return(index);
-}
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dict_index_check_search_tuple(
-/*==========================*/
- const dict_index_t* index, /*!< in: index tree */
- const dtuple_t* tuple) /*!< in: tuple used in a search */
-{
- ut_a(index);
- ut_a(dtuple_get_n_fields_cmp(tuple)
- <= dict_index_get_n_unique_in_tree(index));
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************************//**
-Builds a node pointer out of a physical record and a page number.
-@return own: node pointer */
-UNIV_INTERN
-dtuple_t*
-dict_index_build_node_ptr(
-/*======================*/
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record for which to build node
- pointer */
- ulint page_no,/*!< in: page number to put in node
- pointer */
- mem_heap_t* heap, /*!< in: memory heap where pointer
- created */
- ulint level) /*!< in: level of rec in tree:
- 0 means leaf level */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
- ulint n_unique;
-
- if (dict_index_is_univ(index)) {
- /* In a universal index tree, we take the whole record as
- the node pointer if the record is on the leaf level,
- on non-leaf levels we remove the last field, which
- contains the page number of the child page */
-
- ut_a(!dict_table_is_comp(index->table));
- n_unique = rec_get_n_fields_old(rec);
-
- if (level > 0) {
- ut_a(n_unique > 1);
- n_unique--;
- }
- } else {
- n_unique = dict_index_get_n_unique_in_tree(index);
- }
-
- tuple = dtuple_create(heap, n_unique + 1);
-
- /* When searching in the tree for the node pointer, we must not do
- comparison on the last field, the page number field, as on upper
- levels in the tree there may be identical node pointers with a
- different page number; therefore, we set the n_fields_cmp to one
- less: */
-
- dtuple_set_n_fields_cmp(tuple, n_unique);
-
- dict_index_copy_types(tuple, index, n_unique);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, page_no);
-
- field = dtuple_get_nth_field(tuple, n_unique);
- dfield_set_data(field, buf, 4);
-
- dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4);
-
- rec_copy_prefix_to_dtuple(tuple, rec, index, n_unique, heap);
- dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple)
- | REC_STATUS_NODE_PTR);
-
- ut_ad(dtuple_check_typed(tuple));
-
- return(tuple);
-}
-
-/**********************************************************************//**
-Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely.
-@return pointer to the prefix record */
-UNIV_INTERN
-rec_t*
-dict_index_copy_rec_order_prefix(
-/*=============================*/
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record for which to
- copy prefix */
- ulint* n_fields,/*!< out: number of fields copied */
- byte** buf, /*!< in/out: memory buffer for the
- copied prefix, or NULL */
- ulint* buf_size)/*!< in/out: buffer size */
-{
- ulint n;
-
- UNIV_PREFETCH_R(rec);
-
- if (dict_index_is_univ(index)) {
- ut_a(!dict_table_is_comp(index->table));
- n = rec_get_n_fields_old(rec);
- } else {
- n = dict_index_get_n_unique_in_tree(index);
- }
-
- *n_fields = n;
- return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size));
-}
-
-/**********************************************************************//**
-Builds a typed data tuple out of a physical record.
-@return own: data tuple */
-UNIV_INTERN
-dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
- dict_index_t* index, /*!< in: index tree */
- rec_t* rec, /*!< in: record for which to build data tuple */
- ulint n_fields,/*!< in: number of data fields */
- mem_heap_t* heap) /*!< in: memory heap where tuple created */
-{
- dtuple_t* tuple;
-
- ut_ad(dict_table_is_comp(index->table)
- || n_fields <= rec_get_n_fields_old(rec));
-
- tuple = dtuple_create(heap, n_fields);
-
- dict_index_copy_types(tuple, index, n_fields);
-
- rec_copy_prefix_to_dtuple(tuple, rec, index, n_fields, heap);
-
- ut_ad(dtuple_check_typed(tuple));
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Calculates the minimum record length in an index. */
-UNIV_INTERN
-ulint
-dict_index_calc_min_rec_len(
-/*========================*/
- const dict_index_t* index) /*!< in: index */
-{
- ulint sum = 0;
- ulint i;
- ulint comp = dict_table_is_comp(index->table);
-
- if (comp) {
- ulint nullable = 0;
- sum = REC_N_NEW_EXTRA_BYTES;
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- const dict_col_t* col
- = dict_index_get_nth_col(index, i);
- ulint size = dict_col_get_fixed_size(col, comp);
- sum += size;
- if (!size) {
- size = col->len;
- sum += size < 128 ? 1 : 2;
- }
- if (!(col->prtype & DATA_NOT_NULL)) {
- nullable++;
- }
- }
-
- /* round the NULL flags up to full bytes */
- sum += UT_BITS_IN_BYTES(nullable);
-
- return(sum);
- }
-
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- sum += dict_col_get_fixed_size(
- dict_index_get_nth_col(index, i), comp);
- }
-
- if (sum > 127) {
- sum += 2 * dict_index_get_n_fields(index);
- } else {
- sum += dict_index_get_n_fields(index);
- }
-
- sum += REC_N_OLD_EXTRA_BYTES;
-
- return(sum);
-}
-
-/**********************************************************************//**
-Prints info of a foreign key constraint. */
-static
-void
-dict_foreign_print_low(
-/*===================*/
- dict_foreign_t* foreign) /*!< in: foreign key constraint */
-{
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- fprintf(stderr, " FOREIGN KEY CONSTRAINT %s: %s (",
- foreign->id, foreign->foreign_table_name);
-
- for (i = 0; i < foreign->n_fields; i++) {
- fprintf(stderr, " %s", foreign->foreign_col_names[i]);
- }
-
- fprintf(stderr, " )\n"
- " REFERENCES %s (",
- foreign->referenced_table_name);
-
- for (i = 0; i < foreign->n_fields; i++) {
- fprintf(stderr, " %s", foreign->referenced_col_names[i]);
- }
-
- fputs(" )\n", stderr);
-}
-
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
- dict_table_t* table) /*!< in: table */
-{
- dict_index_t* index;
- ulint i;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_table_stats_lock(table, RW_X_LATCH);
-
- if (!table->stat_initialized) {
- dict_stats_update_transient(table);
- }
-
- fprintf(stderr,
- "--------------------------------------\n"
- "TABLE: name %s, id %llu, flags %lx, columns %lu,"
- " indexes %lu, appr.rows " UINT64PF "\n"
- " COLUMNS: ",
- table->name,
- (ullint) table->id,
- (ulong) table->flags,
- (ulong) table->n_cols,
- (ulong) UT_LIST_GET_LEN(table->indexes),
- table->stat_n_rows);
-
- for (i = 0; i < (ulint) table->n_cols; i++) {
- dict_col_print_low(table, dict_table_get_nth_col(table, i));
- fputs("; ", stderr);
- }
-
- putc('\n', stderr);
-
- index = UT_LIST_GET_FIRST(table->indexes);
-
- while (index != NULL) {
- dict_index_print_low(index);
- index = UT_LIST_GET_NEXT(indexes, index);
- }
-
- dict_table_stats_unlock(table, RW_X_LATCH);
-
- std::for_each(table->foreign_set.begin(),
- table->foreign_set.end(),
- dict_foreign_print_low);
-
- std::for_each(table->referenced_set.begin(),
- table->referenced_set.end(),
- dict_foreign_print_low);
-}
-
-/**********************************************************************//**
-Prints a column data. */
-static
-void
-dict_col_print_low(
-/*===============*/
- const dict_table_t* table, /*!< in: table */
- const dict_col_t* col) /*!< in: column */
-{
- dtype_t type;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- dict_col_copy_type(col, &type);
- fprintf(stderr, "%s: ", dict_table_get_col_name(table,
- dict_col_get_no(col)));
-
- dtype_print(&type);
-}
-
-/**********************************************************************//**
-Prints an index data. */
-static
-void
-dict_index_print_low(
-/*=================*/
- dict_index_t* index) /*!< in: index */
-{
- ib_int64_t n_vals;
- ulint i;
-
- ut_a(index->table->stat_initialized);
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- if (index->n_user_defined_cols > 0) {
- n_vals = index->stat_n_diff_key_vals[
- index->n_user_defined_cols - 1];
- } else {
- n_vals = index->stat_n_diff_key_vals[0];
- }
-
- fprintf(stderr,
- " INDEX: name %s, id %llu, fields %lu/%lu,"
- " uniq %lu, type %lu\n"
- " root page %lu, appr.key vals %lu,"
- " leaf pages %lu, size pages %lu\n"
- " FIELDS: ",
- index->name,
- (ullint) index->id,
- (ulong) index->n_user_defined_cols,
- (ulong) index->n_fields,
- (ulong) index->n_uniq,
- (ulong) index->type,
- (ulong) index->page,
- (ulong) n_vals,
- (ulong) index->stat_n_leaf_pages,
- (ulong) index->stat_index_size);
-
- for (i = 0; i < index->n_fields; i++) {
- dict_field_print_low(dict_index_get_nth_field(index, i));
- }
-
- putc('\n', stderr);
-
-#ifdef UNIV_BTR_PRINT
- btr_print_size(index);
-
- btr_print_index(index, 7);
-#endif /* UNIV_BTR_PRINT */
-}
-
-/**********************************************************************//**
-Prints a field data. */
-static
-void
-dict_field_print_low(
-/*=================*/
- const dict_field_t* field) /*!< in: field */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- fprintf(stderr, " %s", field->name);
-
- if (field->prefix_len != 0) {
- fprintf(stderr, "(%lu)", (ulong) field->prefix_len);
- }
-}
-
-/**********************************************************************//**
-Outputs info on a foreign key of a table in a format suitable for
-CREATE TABLE. */
-UNIV_INTERN
-std::string
-dict_print_info_on_foreign_key_in_create_format(
-/*============================================*/
- trx_t* trx, /*!< in: transaction */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- ibool add_newline) /*!< in: whether to add a newline */
-{
- const char* stripped_id;
- ulint i;
- std::string str;
-
- if (strchr(foreign->id, '/')) {
- /* Strip the preceding database name from the constraint id */
- stripped_id = foreign->id + 1
- + dict_get_db_name_len(foreign->id);
- } else {
- stripped_id = foreign->id;
- }
-
- str.append(",");
-
- if (add_newline) {
- /* SHOW CREATE TABLE wants constraints each printed nicely
- on its own line, while error messages want no newlines
- inserted. */
- str.append("\n ");
- }
-
- str.append(" CONSTRAINT ");
-
- str.append(ut_get_name(trx, FALSE, stripped_id));
- str.append(" FOREIGN KEY (");
-
- for (i = 0;;) {
- str.append(ut_get_name(trx, FALSE, foreign->foreign_col_names[i]));
- if (++i < foreign->n_fields) {
- str.append(", ");
- } else {
- break;
- }
- }
-
- str.append(") REFERENCES ");
-
- if (dict_tables_have_same_db(foreign->foreign_table_name_lookup,
- foreign->referenced_table_name_lookup)) {
- /* Do not print the database name of the referenced table */
- str.append(ut_get_name(trx, TRUE,
- dict_remove_db_name(
- foreign->referenced_table_name)));
- } else {
- str.append(ut_get_name(trx, TRUE,
- foreign->referenced_table_name));
- }
-
- str.append(" (");
-
- for (i = 0;;) {
- str.append(ut_get_name(trx, FALSE,
- foreign->referenced_col_names[i]));
-
- if (++i < foreign->n_fields) {
- str.append(", ");
- } else {
- break;
- }
- }
-
- str.append(")");
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
- str.append(" ON DELETE CASCADE");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
- str.append(" ON DELETE SET NULL");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
- str.append(" ON DELETE NO ACTION");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
- str.append(" ON UPDATE CASCADE");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
- str.append(" ON UPDATE SET NULL");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
- str.append(" ON UPDATE NO ACTION");
- }
-
- return str;
-}
-
-/**********************************************************************//**
-Outputs info on foreign keys of a table. */
-UNIV_INTERN
-std::string
-dict_print_info_on_foreign_keys(
-/*============================*/
- ibool create_table_format, /*!< in: if TRUE then print in
- a format suitable to be inserted into
- a CREATE TABLE, otherwise in the format
- of SHOW TABLE STATUS */
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table) /*!< in: table */
-{
- dict_foreign_t* foreign;
- std::string str;
-
- mutex_enter(&(dict_sys->mutex));
-
- for (dict_foreign_set::iterator it = table->foreign_set.begin();
- it != table->foreign_set.end();
- ++it) {
-
- foreign = *it;
-
- if (create_table_format) {
- str.append(
- dict_print_info_on_foreign_key_in_create_format(
- trx, foreign, TRUE));
- } else {
- ulint i;
- str.append("; (");
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (i) {
- str.append(" ");
- }
-
- str.append(ut_get_name(trx, FALSE,
- foreign->foreign_col_names[i]));
- }
-
- str.append(") REFER ");
- str.append(ut_get_name(trx, TRUE,
- foreign->referenced_table_name));
- str.append(")");
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (i) {
- str.append(" ");
- }
- str.append(ut_get_name(
- trx, FALSE,
- foreign->referenced_col_names[i]));
- }
-
- str.append(")");
-
- if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) {
- str.append(" ON DELETE CASCADE");
- }
-
- if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) {
- str.append(" ON DELETE SET NULL");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
- str.append(" ON DELETE NO ACTION");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
- str.append(" ON UPDATE CASCADE");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
- str.append(" ON UPDATE SET NULL");
- }
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
- str.append(" ON UPDATE NO ACTION");
- }
- }
- }
-
- mutex_exit(&(dict_sys->mutex));
-
- return str;
-}
-
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
- FILE* file, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- const dict_index_t* index) /*!< in: index to print */
-{
- fputs("index ", file);
- ut_print_name(file, trx, FALSE, index->name);
- fputs(" of table ", file);
- ut_print_name(file, trx, TRUE, index->table_name);
-}
-
-/**********************************************************************//**
-Find a table in dict_sys->table_LRU list with specified space id
-@return table if found, NULL if not */
-static
-dict_table_t*
-dict_find_table_by_space(
-/*=====================*/
- ulint space_id) /*!< in: space ID */
-{
- dict_table_t* table;
- ulint num_item;
- ulint count = 0;
-
- ut_ad(space_id > 0);
-
- if (dict_sys == NULL) {
- /* This could happen when it's in redo processing. */
- return(NULL);
- }
-
- table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
- num_item = UT_LIST_GET_LEN(dict_sys->table_LRU);
-
- /* This function intentionally does not acquire mutex as it is used
- by error handling code in deep call stack as last means to avoid
- killing the server, so it worth to risk some consequencies for
- the action. */
- while (table && count < num_item) {
- if (table->space == space_id) {
- return(table);
- }
-
- table = UT_LIST_GET_NEXT(table_LRU, table);
- count++;
- }
-
- return(NULL);
-}
-
-/**********************************************************************//**
-Flags a table with specified space_id corrupted in the data dictionary
-cache
-@return TRUE if successful */
-UNIV_INTERN
-ibool
-dict_set_corrupted_by_space(
-/*========================*/
- ulint space_id) /*!< in: space ID */
-{
- dict_table_t* table;
-
- table = dict_find_table_by_space(space_id);
-
- if (!table) {
- return(FALSE);
- }
-
- /* mark the table->corrupted bit only, since the caller
- could be too deep in the stack for SYS_INDEXES update */
- table->corrupted = true;
- table->file_unreadable = true;
-
- return(TRUE);
-}
-
-
-/** Flags a table with specified space_id encrypted in the data dictionary
-cache
-@param[in] space_id Tablespace id */
-UNIV_INTERN
-void
-dict_set_encrypted_by_space(ulint space_id)
-{
- dict_table_t* table;
-
- table = dict_find_table_by_space(space_id);
-
- if (table) {
- table->file_unreadable = true;
- }
-}
-
-/**********************************************************************//**
-Flags an index corrupted both in the data dictionary cache
-and in the SYS_INDEXES */
-UNIV_INTERN
-void
-dict_set_corrupted(
-/*===============*/
- dict_index_t* index, /*!< in/out: index */
- trx_t* trx, /*!< in/out: transaction */
- const char* ctx) /*!< in: context */
-{
- mem_heap_t* heap;
- mtr_t mtr;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- dfield_t* dfield;
- byte* buf;
- char* table_name;
- const char* status;
- btr_cur_t cursor;
- bool locked = RW_X_LATCH == trx->dict_operation_lock_mode;
-
- if (!locked) {
- row_mysql_lock_data_dictionary(trx);
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
- ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif
-
- /* Mark the table as corrupted only if the clustered index
- is corrupted */
- if (dict_index_is_clust(index)) {
- index->table->corrupted = TRUE;
- }
-
- if (index->type & DICT_CORRUPT) {
- /* The index was already flagged corrupted. */
- ut_ad(!dict_index_is_clust(index) || index->table->corrupted);
- goto func_exit;
- }
-
- heap = mem_heap_create(sizeof(dtuple_t) + 2 * (sizeof(dfield_t)
- + sizeof(que_fork_t) + sizeof(upd_node_t)
- + sizeof(upd_t) + 12));
- mtr_start(&mtr);
- index->type |= DICT_CORRUPT;
-
- sys_index = UT_LIST_GET_FIRST(dict_sys->sys_indexes->indexes);
-
- /* Find the index row in SYS_INDEXES */
- tuple = dtuple_create(heap, 2);
-
- dfield = dtuple_get_nth_field(tuple, 0);
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(buf, index->table->id);
- dfield_set_data(dfield, buf, 8);
-
- dfield = dtuple_get_nth_field(tuple, 1);
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(buf, index->id);
- dfield_set_data(dfield, buf, 8);
-
- dict_index_copy_types(tuple, sys_index, 2);
-
- btr_cur_search_to_nth_level(sys_index, 0, tuple, PAGE_CUR_LE,
- BTR_MODIFY_LEAF,
- &cursor, 0, __FILE__, __LINE__, &mtr);
-
- if (cursor.low_match == dtuple_get_n_fields(tuple)) {
- /* UPDATE SYS_INDEXES SET TYPE=index->type
- WHERE TABLE_ID=index->table->id AND INDEX_ID=index->id */
- ulint len;
- byte* field = rec_get_nth_field_old(
- btr_cur_get_rec(&cursor),
- DICT_FLD__SYS_INDEXES__TYPE, &len);
- if (len != 4) {
- goto fail;
- }
- mlog_write_ulint(field, index->type, MLOG_4BYTES, &mtr);
- status = "Flagged";
- } else {
-fail:
- status = "Unable to flag";
- }
-
- mtr_commit(&mtr);
- mem_heap_empty(heap);
- table_name = static_cast<char*>(mem_heap_alloc(heap, FN_REFLEN + 1));
- *innobase_convert_name(
- table_name, FN_REFLEN,
- index->table_name, strlen(index->table_name),
- NULL, TRUE) = 0;
-
- ib_logf(IB_LOG_LEVEL_ERROR, "%s corruption of %s in table %s in %s",
- status, index->name, table_name, ctx);
-
- mem_heap_free(heap);
-
-func_exit:
- if (!locked) {
- row_mysql_unlock_data_dictionary(trx);
- }
-}
-
-/**********************************************************************//**
-Flags an index corrupted in the data dictionary cache only. This
-is used mostly to mark a corrupted index when index's own dictionary
-is corrupted, and we force to load such index for repair purpose */
-UNIV_INTERN
-void
-dict_set_corrupted_index_cache_only(
-/*================================*/
- dict_index_t* index, /*!< in/out: index */
- dict_table_t* table) /*!< in/out: table */
-{
- ut_ad(index != NULL);
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(!dict_table_is_comp(dict_sys->sys_tables));
- ut_ad(!dict_table_is_comp(dict_sys->sys_indexes));
-
- /* Mark the table as corrupted only if the clustered index
- is corrupted */
- if (dict_index_is_clust(index)) {
- ut_ad((index->table != NULL) || (table != NULL)
- || index->table == table);
-
- table->corrupted = TRUE;
- }
-
- index->type |= DICT_CORRUPT;
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
-void
-dict_ind_init(void)
-/*===============*/
-{
- dict_table_t* table;
-
- /* create dummy table and index for REDUNDANT infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
-
- dict_ind_redundant = dict_mem_index_create("SYS_DUMMY1", "SYS_DUMMY1",
- DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(dict_ind_redundant, table,
- dict_table_get_nth_col(table, 0), 0);
- dict_ind_redundant->table = table;
-
- /* create dummy table and index for COMPACT infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY2",
- DICT_HDR_SPACE, 1,
- DICT_TF_COMPACT, 0);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
- dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2",
- DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(dict_ind_compact, table,
- dict_table_get_nth_col(table, 0), 0);
- dict_ind_compact->table = table;
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- dict_ind_redundant->cached = dict_ind_compact->cached = TRUE;
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Frees dict_ind_redundant and dict_ind_compact. */
-static
-void
-dict_ind_free(void)
-/*===============*/
-{
- dict_table_t* table;
-
- table = dict_ind_compact->table;
- dict_mem_index_free(dict_ind_compact);
- dict_ind_compact = NULL;
- dict_mem_table_free(table);
-
- table = dict_ind_redundant->table;
- dict_mem_index_free(dict_ind_redundant);
- dict_ind_redundant = NULL;
- dict_mem_table_free(table);
-}
-
-/**********************************************************************//**
-Get index by name
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name(
-/*=========================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
-{
- dict_index_t* index;
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (innobase_strcasecmp(index->name, name) == 0) {
-
- return(index);
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(NULL);
-}
-
-/**********************************************************************//**
-Replace the index passed in with another equivalent index in the
-foreign key lists of the table.
-@return whether all replacements were found */
-UNIV_INTERN
-bool
-dict_foreign_replace_index(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- const char** col_names,
- /*!< in: column names, or NULL
- to use table->col_names */
- const dict_index_t* index) /*!< in: index to be replaced */
-{
- bool found = true;
- dict_foreign_t* foreign;
-
- ut_ad(index->to_be_dropped);
- ut_ad(index->table == table);
-
- for (dict_foreign_set::iterator it = table->foreign_set.begin();
- it != table->foreign_set.end();
- ++it) {
-
- foreign = *it;
- if (foreign->foreign_index == index) {
- ut_ad(foreign->foreign_table == index->table);
-
- dict_index_t* new_index = dict_foreign_find_index(
- foreign->foreign_table, col_names,
- foreign->foreign_col_names,
- foreign->n_fields, index,
- /*check_charsets=*/TRUE, /*check_null=*/FALSE,
- NULL, NULL, NULL);
- if (new_index) {
- ut_ad(new_index->table == index->table);
- ut_ad(!new_index->to_be_dropped);
- } else {
- found = false;
- }
-
- foreign->foreign_index = new_index;
- }
- }
-
- for (dict_foreign_set::iterator it = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
- if (foreign->referenced_index == index) {
- ut_ad(foreign->referenced_table == index->table);
-
- dict_index_t* new_index = dict_foreign_find_index(
- foreign->referenced_table, NULL,
- foreign->referenced_col_names,
- foreign->n_fields, index,
- /*check_charsets=*/TRUE, /*check_null=*/FALSE,
- NULL, NULL, NULL);
- /* There must exist an alternative index,
- since this must have been checked earlier. */
- if (new_index) {
- ut_ad(new_index->table == index->table);
- ut_ad(!new_index->to_be_dropped);
- } else {
- found = false;
- }
-
- foreign->referenced_index = new_index;
- }
- }
-
- return(found);
-}
-
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*=====================================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
-{
- dict_index_t* index;
- dict_index_t* min_index; /* Index with matching name and min(id) */
-
- min_index = NULL;
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (ut_strcmp(index->name, name) == 0) {
- if (!min_index || index->id < min_index->id) {
-
- min_index = index;
- }
- }
-
- index = dict_table_get_next_index(index);
- }
-
- return(min_index);
-
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
-void
-dict_table_check_for_dup_indexes(
-/*=============================*/
- const dict_table_t* table, /*!< in: Check for dup indexes
- in this table */
- enum check_name check) /*!< in: whether and when to allow
- temporary index names */
-{
- /* Check for duplicates, ignoring indexes that are marked
- as to be dropped */
-
- const dict_index_t* index1;
- const dict_index_t* index2;
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- /* The primary index _must_ exist */
- ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
-
- index1 = UT_LIST_GET_FIRST(table->indexes);
-
- do {
- if (*index1->name == TEMP_INDEX_PREFIX) {
- ut_a(!dict_index_is_clust(index1));
-
- switch (check) {
- case CHECK_ALL_COMPLETE:
- ut_error;
- case CHECK_ABORTED_OK:
- switch (dict_index_get_online_status(index1)) {
- case ONLINE_INDEX_COMPLETE:
- case ONLINE_INDEX_CREATION:
- ut_error;
- break;
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- break;
- }
- /* fall through */
- case CHECK_PARTIAL_OK:
- break;
- }
- }
-
- for (index2 = UT_LIST_GET_NEXT(indexes, index1);
- index2 != NULL;
- index2 = UT_LIST_GET_NEXT(indexes, index2)) {
- ut_ad(ut_strcmp(index1->name, index2->name));
- }
-
- index1 = UT_LIST_GET_NEXT(indexes, index1);
- } while (index1);
-}
-#endif /* UNIV_DEBUG */
-
-/** Auxiliary macro used inside dict_table_schema_check(). */
-#define CREATE_TYPES_NAMES() \
- dtype_sql_name((unsigned) req_schema->columns[i].mtype, \
- (unsigned) req_schema->columns[i].prtype_mask, \
- (unsigned) req_schema->columns[i].len, \
- req_type, sizeof(req_type)); \
- dtype_sql_name(table->cols[j].mtype, \
- table->cols[j].prtype, \
- table->cols[j].len, \
- actual_type, sizeof(actual_type))
-
-/*********************************************************************//**
-Checks whether a table exists and whether it has the given structure.
-The table must have the same number of columns with the same names and
-types. The order of the columns does not matter.
-The caller must own the dictionary mutex.
-dict_table_schema_check() @{
-@return DB_SUCCESS if the table exists and contains the necessary columns */
-UNIV_INTERN
-dberr_t
-dict_table_schema_check(
-/*====================*/
- dict_table_schema_t* req_schema, /*!< in/out: required table
- schema */
- char* errstr, /*!< out: human readable error
- message if != DB_SUCCESS is
- returned */
- size_t errstr_sz) /*!< in: errstr size */
-{
- char buf[MAX_FULL_NAME_LEN];
- char req_type[64];
- char actual_type[64];
- dict_table_t* table;
- ulint i;
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- table = dict_table_get_low(req_schema->table_name);
-
- if (table == NULL) {
- bool should_print=true;
- /* no such table */
-
- if (innobase_strcasecmp(req_schema->table_name, "mysql/innodb_table_stats") == 0) {
- if (innodb_table_stats_not_found_reported == false) {
- innodb_table_stats_not_found = true;
- innodb_table_stats_not_found_reported = true;
- } else {
- should_print = false;
- }
- } else if (innobase_strcasecmp(req_schema->table_name, "mysql/innodb_index_stats") == 0 ) {
- if (innodb_index_stats_not_found_reported == false) {
- innodb_index_stats_not_found = true;
- innodb_index_stats_not_found_reported = true;
- } else {
- should_print = false;
- }
- }
-
- if (should_print) {
- ut_snprintf(errstr, errstr_sz,
- "Table %s not found.",
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)));
- return(DB_TABLE_NOT_FOUND);
- } else {
- return(DB_STATS_DO_NOT_EXIST);
- }
- }
-
- if (!table->is_readable() &&
- fil_space_get(table->space) == NULL) {
- /* missing tablespace */
-
- ut_snprintf(errstr, errstr_sz,
- "Tablespace for table %s is missing.",
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)));
-
- return(DB_TABLE_NOT_FOUND);
- }
-
- if ((ulint) table->n_def - DATA_N_SYS_COLS != req_schema->n_cols) {
- /* the table has a different number of columns than
- required */
-
- ut_snprintf(errstr, errstr_sz,
- "%s has %d columns but should have %lu.",
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
- table->n_def - DATA_N_SYS_COLS,
- req_schema->n_cols);
-
- return(DB_ERROR);
- }
-
- /* For each column from req_schema->columns[] search
- whether it is present in table->cols[].
- The following algorithm is O(n_cols^2), but is optimized to
- be O(n_cols) if the columns are in the same order in both arrays. */
-
- for (i = 0; i < req_schema->n_cols; i++) {
- ulint j;
-
- /* check if i'th column is the same in both arrays */
- if (innobase_strcasecmp(req_schema->columns[i].name,
- dict_table_get_col_name(table, i)) == 0) {
-
- /* we found the column in table->cols[] quickly */
- j = i;
- } else {
-
- /* columns in both arrays are not in the same order,
- do a full scan of the second array */
- for (j = 0; j < table->n_def; j++) {
- const char* name;
-
- name = dict_table_get_col_name(table, j);
-
- if (innobase_strcasecmp(name,
- req_schema->columns[i].name) == 0) {
-
- /* found the column on j'th
- position */
- break;
- }
- }
-
- if (j == table->n_def) {
-
- ut_snprintf(errstr, errstr_sz,
- "required column %s "
- "not found in table %s.",
- req_schema->columns[i].name,
- ut_format_name(
- req_schema->table_name,
- TRUE, buf, sizeof(buf)));
-
- return(DB_ERROR);
- }
- }
-
- /* we found a column with the same name on j'th position,
- compare column types and flags */
-
- /* check length for exact match */
- if (req_schema->columns[i].len != table->cols[j].len) {
-
- CREATE_TYPES_NAMES();
-
- ut_snprintf(errstr, errstr_sz,
- "Column %s in table %s is %s "
- "but should be %s (length mismatch).",
- req_schema->columns[i].name,
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
- actual_type, req_type);
-
- return(DB_ERROR);
- }
-
- /* check mtype for exact match */
- if (req_schema->columns[i].mtype != table->cols[j].mtype) {
-
- CREATE_TYPES_NAMES();
-
- ut_snprintf(errstr, errstr_sz,
- "Column %s in table %s is %s "
- "but should be %s (type mismatch).",
- req_schema->columns[i].name,
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
- actual_type, req_type);
-
- return(DB_ERROR);
- }
-
- /* check whether required prtype mask is set */
- if (req_schema->columns[i].prtype_mask != 0
- && (table->cols[j].prtype
- & req_schema->columns[i].prtype_mask)
- != req_schema->columns[i].prtype_mask) {
-
- CREATE_TYPES_NAMES();
-
- ut_snprintf(errstr, errstr_sz,
- "Column %s in table %s is %s "
- "but should be %s (flags mismatch).",
- req_schema->columns[i].name,
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
- actual_type, req_type);
-
- return(DB_ERROR);
- }
- }
-
- if (req_schema->n_foreign != table->foreign_set.size()) {
- ut_snprintf(
- errstr, errstr_sz,
- "Table %s has " ULINTPF " foreign key(s) pointing"
- " to other tables, but it must have %lu.",
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
- static_cast<ulint>(table->foreign_set.size()),
- req_schema->n_foreign);
- return(DB_ERROR);
- }
-
- if (req_schema->n_referenced != table->referenced_set.size()) {
- ut_snprintf(
- errstr, errstr_sz,
- "There are " ULINTPF " foreign key(s) pointing to %s, "
- "but there must be %lu.",
- static_cast<ulint>(table->referenced_set.size()),
- ut_format_name(req_schema->table_name,
- TRUE, buf, sizeof(buf)),
- req_schema->n_referenced);
- return(DB_ERROR);
- }
-
- return(DB_SUCCESS);
-}
-/* @} */
-
-/*********************************************************************//**
-Converts a database and table name from filesystem encoding
-(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
-strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
-at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
-UNIV_INTERN
-void
-dict_fs2utf8(
-/*=========*/
- const char* db_and_table, /*!< in: database and table names,
- e.g. d@i1b/a@q1b@1Kc */
- char* db_utf8, /*!< out: database name, e.g. dцb */
- size_t db_utf8_size, /*!< in: dbname_utf8 size */
- char* table_utf8, /*!< out: table name, e.g. aюbØc */
- size_t table_utf8_size)/*!< in: table_utf8 size */
-{
- char db[MAX_DATABASE_NAME_LEN + 1];
- ulint db_len;
- uint errors;
-
- db_len = dict_get_db_name_len(db_and_table);
-
- ut_a(db_len <= sizeof(db));
-
- memcpy(db, db_and_table, db_len);
- db[db_len] = '\0';
-
- strconvert(
- &my_charset_filename, db, db_len, system_charset_info,
- db_utf8, static_cast<uint>(db_utf8_size), &errors);
-
- /* convert each # to @0023 in table name and store the result in buf */
- const char* table = dict_remove_db_name(db_and_table);
- const char* table_p;
- char buf[MAX_TABLE_NAME_LEN * 5 + 1];
- char* buf_p;
- for (table_p = table, buf_p = buf; table_p[0] != '\0'; table_p++) {
- if (table_p[0] != '#') {
- buf_p[0] = table_p[0];
- buf_p++;
- } else {
- buf_p[0] = '@';
- buf_p[1] = '0';
- buf_p[2] = '0';
- buf_p[3] = '2';
- buf_p[4] = '3';
- buf_p += 5;
- }
- ut_a((size_t) (buf_p - buf) < sizeof(buf));
- }
- buf_p[0] = '\0';
-
- errors = 0;
- strconvert(
- &my_charset_filename, buf, buf_p - buf, system_charset_info,
- table_utf8, static_cast<uint>(table_utf8_size),
- &errors);
-
- if (errors != 0) {
- ut_snprintf(table_utf8, table_utf8_size, "%s%s",
- srv_mysql50_table_name_prefix, table);
- }
-}
-
-/**********************************************************************//**
-Closes the data dictionary module. */
-UNIV_INTERN
-void
-dict_close(void)
-/*============*/
-{
- ulint i;
-
- /* Free the hash elements. We don't remove them from the table
- because we are going to destroy the table anyway. */
- for (i = 0; i < hash_get_n_cells(dict_sys->table_hash); i++) {
- dict_table_t* table;
-
- table = static_cast<dict_table_t*>(
- HASH_GET_FIRST(dict_sys->table_hash, i));
-
- while (table) {
- dict_table_t* prev_table = table;
-
- table = static_cast<dict_table_t*>(
- HASH_GET_NEXT(name_hash, prev_table));
-#ifdef UNIV_DEBUG
- ut_a(prev_table->magic_n == DICT_TABLE_MAGIC_N);
-#endif
- /* Acquire only because it's a pre-condition. */
- mutex_enter(&dict_sys->mutex);
-
- dict_table_remove_from_cache(prev_table);
-
- mutex_exit(&dict_sys->mutex);
- }
- }
-
- hash_table_free(dict_sys->table_hash);
-
- /* The elements are the same instance as in dict_sys->table_hash,
- therefore we don't delete the individual elements. */
- hash_table_free(dict_sys->table_id_hash);
-
- dict_ind_free();
-
- mutex_free(&dict_sys->mutex);
-
- rw_lock_free(&dict_operation_lock);
- memset(&dict_operation_lock, 0x0, sizeof(dict_operation_lock));
-
- if (!srv_read_only_mode) {
- mutex_free(&dict_foreign_err_mutex);
- }
-
- delete dict_sys->autoinc_map;
-
- mem_free(dict_sys);
- dict_sys = NULL;
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Validate the dictionary table LRU list.
-@return TRUE if valid */
-static
-ibool
-dict_lru_validate(void)
-/*===================*/
-{
- dict_table_t* table;
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
- table != NULL;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
- ut_a(table->can_be_evicted);
- }
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
- table != NULL;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
- ut_a(!table->can_be_evicted);
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Check if a table exists in the dict table LRU list.
-@return TRUE if table found in LRU list */
-static
-ibool
-dict_lru_find_table(
-/*================*/
- const dict_table_t* find_table) /*!< in: table to find */
-{
- dict_table_t* table;
-
- ut_ad(find_table != NULL);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
- table != NULL;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
- ut_a(table->can_be_evicted);
-
- if (table == find_table) {
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Check if a table exists in the dict table non-LRU list.
-@return TRUE if table found in non-LRU list */
-static
-ibool
-dict_non_lru_find_table(
-/*====================*/
- const dict_table_t* find_table) /*!< in: table to find */
-{
- dict_table_t* table;
-
- ut_ad(find_table != NULL);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
- table != NULL;
- table = UT_LIST_GET_NEXT(table_LRU, table)) {
-
- ut_a(!table->can_be_evicted);
-
- if (table == find_table) {
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Check an index to see whether its first fields are the columns in the array,
-in the same order and is not marked for deletion and is not the same
-as types_idx.
-@return true if the index qualifies, otherwise false */
-UNIV_INTERN
-bool
-dict_foreign_qualify_index(
-/*=======================*/
- const dict_table_t* table, /*!< in: table */
- const char** col_names,
- /*!< in: column names, or NULL
- to use table->col_names */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- const dict_index_t* index, /*!< in: index to check */
- const dict_index_t* types_idx,
- /*!< in: NULL or an index
- whose types the column types
- must match */
- bool check_charsets,
- /*!< in: whether to check
- charsets. only has an effect
- if types_idx != NULL */
- ulint check_null,
- /*!< in: nonzero if none of
- the columns must be declared
- NOT NULL */
- ulint* error, /*!< out: error code */
- ulint* err_col_no,
- /*!< out: column number where error happened */
- dict_index_t** err_index)
- /*!< out: index where error happened */
-{
- if (dict_index_get_n_fields(index) < n_cols) {
- return(false);
- }
-
- for (ulint i = 0; i < n_cols; i++) {
- dict_field_t* field;
- const char* col_name;
- ulint col_no;
-
- field = dict_index_get_nth_field(index, i);
- col_no = dict_col_get_no(field->col);
-
- if (field->prefix_len != 0) {
- /* We do not accept column prefix
- indexes here */
- if (error && err_col_no && err_index) {
- *error = DB_FOREIGN_KEY_IS_PREFIX_INDEX;
- *err_col_no = i;
- *err_index = (dict_index_t*)index;
- }
- return(false);
- }
-
- if (check_null
- && (field->col->prtype & DATA_NOT_NULL)) {
- if (error && err_col_no && err_index) {
- *error = DB_FOREIGN_KEY_COL_NOT_NULL;
- *err_col_no = i;
- *err_index = (dict_index_t*)index;
- }
- return(false);
- }
-
- col_name = col_names
- ? col_names[col_no]
- : dict_table_get_col_name(table, col_no);
-
- if (0 != innobase_strcasecmp(columns[i], col_name)) {
- return(false);
- }
-
- if (types_idx && !cmp_cols_are_equal(
- dict_index_get_nth_col(index, i),
- dict_index_get_nth_col(types_idx, i),
- check_charsets)) {
- if (error && err_col_no && err_index) {
- *error = DB_FOREIGN_KEY_COLS_NOT_EQUAL;
- *err_col_no = i;
- *err_index = (dict_index_t*)index;
- }
-
- return(false);
- }
- }
-
- return(true);
-}
-
-/*********************************************************************//**
-Update the state of compression failure padding heuristics. This is
-called whenever a compression operation succeeds or fails.
-The caller must be holding info->mutex */
-static
-void
-dict_index_zip_pad_update(
-/*======================*/
- zip_pad_info_t* info, /*<! in/out: info to be updated */
- ulint zip_threshold) /*<! in: zip threshold value */
-{
- ulint total;
- ulint fail_pct;
-
- ut_ad(info);
-
- total = info->success + info->failure;
-
- ut_ad(total > 0);
-
- if(zip_threshold == 0) {
- /* User has just disabled the padding. */
- return;
- }
-
- if (total < ZIP_PAD_ROUND_LEN) {
- /* We are in middle of a round. Do nothing. */
- return;
- }
-
- /* We are at a 'round' boundary. Reset the values but first
- calculate fail rate for our heuristic. */
- fail_pct = (info->failure * 100) / total;
- info->failure = 0;
- info->success = 0;
-
- if (fail_pct > zip_threshold) {
- /* Compression failures are more then user defined
- threshold. Increase the pad size to reduce chances of
- compression failures. */
- ut_ad(info->pad % ZIP_PAD_INCR == 0);
-
- /* Only do increment if it won't increase padding
- beyond max pad size. */
- if (info->pad + ZIP_PAD_INCR
- < (UNIV_PAGE_SIZE * zip_pad_max) / 100) {
-#ifdef HAVE_ATOMIC_BUILTINS
- /* Use atomics even though we have the mutex.
- This is to ensure that we are able to read
- info->pad atomically where atomics are
- supported. */
- os_atomic_increment_ulint(&info->pad, ZIP_PAD_INCR);
-#else /* HAVE_ATOMIC_BUILTINS */
- info->pad += ZIP_PAD_INCR;
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- MONITOR_INC(MONITOR_PAD_INCREMENTS);
- }
-
- info->n_rounds = 0;
-
- } else {
- /* Failure rate was OK. Another successful round
- completed. */
- ++info->n_rounds;
-
- /* If enough successful rounds are completed with
- compression failure rate in control, decrease the
- padding. */
- if (info->n_rounds >= ZIP_PAD_SUCCESSFUL_ROUND_LIMIT
- && info->pad > 0) {
-
- ut_ad(info->pad % ZIP_PAD_INCR == 0);
-#ifdef HAVE_ATOMIC_BUILTINS
- /* Use atomics even though we have the mutex.
- This is to ensure that we are able to read
- info->pad atomically where atomics are
- supported. */
- os_atomic_decrement_ulint(&info->pad, ZIP_PAD_INCR);
-#else /* HAVE_ATOMIC_BUILTINS */
- info->pad -= ZIP_PAD_INCR;
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- info->n_rounds = 0;
-
- MONITOR_INC(MONITOR_PAD_DECREMENTS);
- }
- }
-}
-
-/*********************************************************************//**
-This function should be called whenever a page is successfully
-compressed. Updates the compression padding information. */
-UNIV_INTERN
-void
-dict_index_zip_success(
-/*===================*/
- dict_index_t* index) /*!< in/out: index to be updated. */
-{
- ut_ad(index);
-
- ulint zip_threshold = zip_failure_threshold_pct;
- if (!zip_threshold) {
- /* Disabled by user. */
- return;
- }
-
- dict_index_zip_pad_lock(index);
- ++index->zip_pad.success;
- dict_index_zip_pad_update(&index->zip_pad, zip_threshold);
- dict_index_zip_pad_unlock(index);
-}
-
-/*********************************************************************//**
-This function should be called whenever a page compression attempt
-fails. Updates the compression padding information. */
-UNIV_INTERN
-void
-dict_index_zip_failure(
-/*===================*/
- dict_index_t* index) /*!< in/out: index to be updated. */
-{
- ut_ad(index);
-
- ulint zip_threshold = zip_failure_threshold_pct;
- if (!zip_threshold) {
- /* Disabled by user. */
- return;
- }
-
- dict_index_zip_pad_lock(index);
- ++index->zip_pad.failure;
- dict_index_zip_pad_update(&index->zip_pad, zip_threshold);
- dict_index_zip_pad_unlock(index);
-}
-
-
-/*********************************************************************//**
-Return the optimal page size, for which page will likely compress.
-@return page size beyond which page might not compress */
-UNIV_INTERN
-ulint
-dict_index_zip_pad_optimal_page_size(
-/*=================================*/
- dict_index_t* index) /*!< in: index for which page size
- is requested */
-{
- ulint pad;
- ulint min_sz;
- ulint sz;
-
- ut_ad(index);
-
- if (!zip_failure_threshold_pct) {
- /* Disabled by user. */
- return(UNIV_PAGE_SIZE);
- }
-
- /* We use atomics to read index->zip_pad.pad. Here we use zero
- as increment as are not changing the value of the 'pad'. On
- platforms where atomics are not available we grab the mutex. */
-
-#ifdef HAVE_ATOMIC_BUILTINS
- pad = os_atomic_increment_ulint(&index->zip_pad.pad, 0);
-#else /* HAVE_ATOMIC_BUILTINS */
- dict_index_zip_pad_lock(index);
- pad = index->zip_pad.pad;
- dict_index_zip_pad_unlock(index);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- ut_ad(pad < UNIV_PAGE_SIZE);
- sz = UNIV_PAGE_SIZE - pad;
-
- /* Min size allowed by user. */
- ut_ad(zip_pad_max < 100);
- min_sz = (UNIV_PAGE_SIZE * (100 - zip_pad_max)) / 100;
-
- return(ut_max(sz, min_sz));
-}
-
-/*************************************************************//**
-Convert table flag to row format string.
-@return row format name. */
-UNIV_INTERN
-const char*
-dict_tf_to_row_format_string(
-/*=========================*/
- ulint table_flag) /*!< in: row format setting */
-{
- switch (dict_tf_get_rec_format(table_flag)) {
- case REC_FORMAT_REDUNDANT:
- return("ROW_TYPE_REDUNDANT");
- case REC_FORMAT_COMPACT:
- return("ROW_TYPE_COMPACT");
- case REC_FORMAT_COMPRESSED:
- return("ROW_TYPE_COMPRESSED");
- case REC_FORMAT_DYNAMIC:
- return("ROW_TYPE_DYNAMIC");
- }
-
- ut_error;
- return(0);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc
deleted file mode 100644
index 4c3dd47761f..00000000000
--- a/storage/xtradb/dict/dict0load.cc
+++ /dev/null
@@ -1,3275 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dict/dict0load.cc
-Loads to the memory cache database object definitions
-from dictionary tables
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0load.h"
-#include "mysql_version.h"
-
-#ifdef UNIV_NONINL
-#include "dict0load.ic"
-#endif
-
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "page0page.h"
-#include "mach0data.h"
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "dict0stats.h"
-#include "rem0cmp.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "dict0crea.h"
-#include "dict0priv.h"
-#include "ha_prototypes.h" /* innobase_casedn_str() */
-#include "fts0priv.h"
-
-/** Following are the InnoDB system tables. The positions in
-this array are referenced by enum dict_system_table_id. */
-static const char* SYSTEM_TABLE_NAME[] = {
- "SYS_TABLES",
- "SYS_INDEXES",
- "SYS_COLUMNS",
- "SYS_FIELDS",
- "SYS_FOREIGN",
- "SYS_FOREIGN_COLS",
- "SYS_TABLESPACES",
- "SYS_DATAFILES"
-};
-
-/* If this flag is TRUE, then we will load the cluster index's (and tables')
-metadata even if it is marked as "corrupted". */
-UNIV_INTERN my_bool srv_load_corrupted = FALSE;
-
-#ifdef UNIV_DEBUG
-/****************************************************************//**
-Compare the name of an index column.
-@return TRUE if the i'th column of index is 'name'. */
-static
-ibool
-name_of_col_is(
-/*===========*/
- const dict_table_t* table, /*!< in: table */
- const dict_index_t* index, /*!< in: index */
- ulint i, /*!< in: index field offset */
- const char* name) /*!< in: name to compare to */
-{
- ulint tmp = dict_col_get_no(dict_field_get_col(
- dict_index_get_nth_field(
- index, i)));
-
- return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0);
-}
-#endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Finds the first table name in the given database.
-@return own: table name, NULL if does not exist; the caller must free
-the memory in the string! */
-UNIV_INTERN
-char*
-dict_get_first_table_name_in_db(
-/*============================*/
- const char* name) /*!< in: database name which ends in '/' */
-{
- dict_table_t* sys_tables;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(1000);
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_ad(!dict_table_is_comp(sys_tables));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, name, ut_strlen(name));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-loop:
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
-
- if (len < strlen(name)
- || ut_memcmp(name, field, strlen(name)) != 0) {
- /* Not found */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- if (!rec_get_deleted_flag(rec, 0)) {
-
- /* We found one */
-
- char* table_name = mem_strdupl((char*) field, len);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(table_name);
- }
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- goto loop;
-}
-
-/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void)
-/*============*/
-{
- dict_table_t* table;
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- /* Enlarge the fatal semaphore wait timeout during the InnoDB table
- monitor printout */
-
- os_increment_counter_by_amount(
- server_mutex,
- srv_fatal_semaphore_wait_threshold,
- SRV_SEMAPHORE_WAIT_EXTENSION);
-
- heap = mem_heap_create(1000);
- mutex_enter(&(dict_sys->mutex));
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
-
- while (rec) {
- const char* err_msg;
-
- err_msg = static_cast<const char*>(
- dict_process_sys_tables_rec_and_mtr_commit(
- heap, rec, &table, DICT_TABLE_LOAD_FROM_CACHE,
- &mtr));
-
- if (!err_msg) {
- dict_table_print(table);
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", err_msg);
- }
-
- mem_heap_empty(heap);
-
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&(dict_sys->mutex));
- mem_heap_free(heap);
-
- /* Restore the fatal semaphore wait timeout */
- os_decrement_counter_by_amount(
- server_mutex,
- srv_fatal_semaphore_wait_threshold,
- SRV_SEMAPHORE_WAIT_EXTENSION);
-}
-
-/********************************************************************//**
-This function gets the next system table record as it scans the table.
-@return the next record if found, NULL if end of scan */
-static
-const rec_t*
-dict_getnext_system_low(
-/*====================*/
- btr_pcur_t* pcur, /*!< in/out: persistent cursor to the
- record*/
- mtr_t* mtr) /*!< in: the mini-transaction */
-{
- rec_t* rec = NULL;
-
- while (!rec || rec_get_deleted_flag(rec, 0)) {
- btr_pcur_move_to_next_user_rec(pcur, mtr);
-
- rec = btr_pcur_get_rec(pcur);
-
- if (!btr_pcur_is_on_user_rec(pcur)) {
- /* end of index */
- btr_pcur_close(pcur);
-
- return(NULL);
- }
- }
-
- /* Get a record, let's save the position */
- btr_pcur_store_position(pcur, mtr);
-
- return(rec);
-}
-
-/********************************************************************//**
-This function opens a system table, and returns the first record.
-@return first record of the system table */
-UNIV_INTERN
-const rec_t*
-dict_startscan_system(
-/*==================*/
- btr_pcur_t* pcur, /*!< out: persistent cursor to
- the record */
- mtr_t* mtr, /*!< in: the mini-transaction */
- dict_system_id_t system_id) /*!< in: which system table to open */
-{
- dict_table_t* system_table;
- dict_index_t* clust_index;
- const rec_t* rec;
-
- ut_a(system_id < SYS_NUM_SYSTEM_TABLES);
-
- system_table = dict_table_get_low(SYSTEM_TABLE_NAME[system_id]);
-
- clust_index = UT_LIST_GET_FIRST(system_table->indexes);
-
- btr_pcur_open_at_index_side(true, clust_index, BTR_SEARCH_LEAF, pcur,
- true, 0, mtr);
-
- rec = dict_getnext_system_low(pcur, mtr);
-
- return(rec);
-}
-
-/********************************************************************//**
-This function gets the next system table record as it scans the table.
-@return the next record if found, NULL if end of scan */
-UNIV_INTERN
-const rec_t*
-dict_getnext_system(
-/*================*/
- btr_pcur_t* pcur, /*!< in/out: persistent cursor
- to the record */
- mtr_t* mtr) /*!< in: the mini-transaction */
-{
- const rec_t* rec;
-
- /* Restore the position */
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
-
- /* Get the next record */
- rec = dict_getnext_system_low(pcur, mtr);
-
- return(rec);
-}
-
-/********************************************************************//**
-This function processes one SYS_TABLES record and populate the dict_table_t
-struct for the table. Extracted out of dict_print() to be used by
-both monitor table output and information schema innodb_sys_tables output.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_tables_rec_and_mtr_commit(
-/*=======================================*/
- mem_heap_t* heap, /*!< in/out: temporary memory heap */
- const rec_t* rec, /*!< in: SYS_TABLES record */
- dict_table_t** table, /*!< out: dict_table_t to fill */
- dict_table_info_t status, /*!< in: status bit controls
- options such as whether we shall
- look for dict_table_t from cache
- first */
- mtr_t* mtr) /*!< in/out: mini-transaction,
- will be committed */
-{
- ulint len;
- const char* field;
- const char* err_msg = NULL;
- char* table_name;
-
- field = (const char*) rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
-
- ut_a(!rec_get_deleted_flag(rec, 0));
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-
- /* Get the table name */
- table_name = mem_heap_strdupl(heap, field, len);
-
- /* If DICT_TABLE_LOAD_FROM_CACHE is set, first check
- whether there is cached dict_table_t struct */
- if (status & DICT_TABLE_LOAD_FROM_CACHE) {
-
- /* Commit before load the table again */
- mtr_commit(mtr);
-
- *table = dict_table_get_low(table_name);
-
- if (!(*table)) {
- err_msg = "Table not found in cache";
- }
- } else {
- err_msg = dict_load_table_low(table_name, rec, table);
- mtr_commit(mtr);
- }
-
- if (err_msg) {
- return(err_msg);
- }
-
- return(NULL);
-}
-
-/********************************************************************//**
-This function parses a SYS_INDEXES record and populate a dict_index_t
-structure with the information from the record. For detail information
-about SYS_INDEXES fields, please refer to dict_boot() function.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_indexes_rec(
-/*=========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_INDEXES rec */
- dict_index_t* index, /*!< out: index to be filled */
- table_id_t* table_id) /*!< out: index table id */
-{
- const char* err_msg;
- byte* buf;
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
-
- /* Parse the record, and get "dict_index_t" struct filled */
- err_msg = dict_load_index_low(buf, NULL,
- heap, rec, FALSE, &index);
-
- *table_id = mach_read_from_8(buf);
-
- return(err_msg);
-}
-
-/********************************************************************//**
-This function parses a SYS_COLUMNS record and populate a dict_column_t
-structure with the information from the record.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_columns_rec(
-/*=========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_COLUMNS rec */
- dict_col_t* column, /*!< out: dict_col_t to be filled */
- table_id_t* table_id, /*!< out: table id */
- const char** col_name) /*!< out: column name */
-{
- const char* err_msg;
-
- /* Parse the record, and get "dict_col_t" struct filled */
- err_msg = dict_load_column_low(NULL, heap, column,
- table_id, col_name, rec);
-
- return(err_msg);
-}
-
-/********************************************************************//**
-This function parses a SYS_FIELDS record and populates a dict_field_t
-structure with the information from the record.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_fields_rec(
-/*========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_FIELDS rec */
- dict_field_t* sys_field, /*!< out: dict_field_t to be
- filled */
- ulint* pos, /*!< out: Field position */
- index_id_t* index_id, /*!< out: current index id */
- index_id_t last_id) /*!< in: previous index id */
-{
- byte* buf;
- byte* last_index_id;
- const char* err_msg;
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
-
- last_index_id = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(last_index_id, last_id);
-
- err_msg = dict_load_field_low(buf, NULL, sys_field,
- pos, last_index_id, heap, rec);
-
- *index_id = mach_read_from_8(buf);
-
- return(err_msg);
-
-}
-
-/********************************************************************//**
-This function parses a SYS_FOREIGN record and populate a dict_foreign_t
-structure with the information from the record. For detail information
-about SYS_FOREIGN fields, please refer to dict_load_foreign() function.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_foreign_rec(
-/*=========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_FOREIGN rec */
- dict_foreign_t* foreign) /*!< out: dict_foreign_t struct
- to be filled */
-{
- ulint len;
- const byte* field;
- ulint n_fields_and_type;
-
- if (rec_get_deleted_flag(rec, 0)) {
- return("delete-marked record in SYS_FOREIGN");
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FOREIGN) {
- return("wrong number of columns in SYS_FOREIGN record");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN__ID, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
-err_len:
- return("incorrect column length in SYS_FOREIGN");
- }
-
- /* This receives a dict_foreign_t* that points to a stack variable.
- So dict_foreign_free(foreign) is not used as elsewhere.
- Since the heap used here is freed elsewhere, foreign->heap
- is not assigned. */
- foreign->id = mem_heap_strdupl(heap, (const char*) field, len);
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_FOREIGN__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- /* The _lookup versions of the referenced and foreign table names
- are not assigned since they are not used in this dict_foreign_t */
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
- goto err_len;
- }
- foreign->foreign_table_name = mem_heap_strdupl(
- heap, (const char*) field, len);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
- goto err_len;
- }
- foreign->referenced_table_name = mem_heap_strdupl(
- heap, (const char*) field, len);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN__N_COLS, &len);
- if (len != 4) {
- goto err_len;
- }
- n_fields_and_type = mach_read_from_4(field);
-
- foreign->type = (unsigned int) (n_fields_and_type >> 24);
- foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL);
-
- return(NULL);
-}
-
-/********************************************************************//**
-This function parses a SYS_FOREIGN_COLS record and extract necessary
-information from the record and return to caller.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_foreign_col_rec(
-/*=============================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_FOREIGN_COLS rec */
- const char** name, /*!< out: foreign key constraint name */
- const char** for_col_name, /*!< out: referencing column name */
- const char** ref_col_name, /*!< out: referenced column name
- in referenced table */
- ulint* pos) /*!< out: column position */
-{
- ulint len;
- const byte* field;
-
- if (rec_get_deleted_flag(rec, 0)) {
- return("delete-marked record in SYS_FOREIGN_COLS");
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FOREIGN_COLS) {
- return("wrong number of columns in SYS_FOREIGN_COLS record");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
-err_len:
- return("incorrect column length in SYS_FOREIGN_COLS");
- }
- *name = mem_heap_strdupl(heap, (char*) field, len);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len);
- if (len != 4) {
- goto err_len;
- }
- *pos = mach_read_from_4(field);
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
- goto err_len;
- }
- *for_col_name = mem_heap_strdupl(heap, (char*) field, len);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
- goto err_len;
- }
- *ref_col_name = mem_heap_strdupl(heap, (char*) field, len);
-
- return(NULL);
-}
-
-/********************************************************************//**
-This function parses a SYS_TABLESPACES record, extracts necessary
-information from the record and returns to caller.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_tablespaces(
-/*=========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */
- ulint* space, /*!< out: space id */
- const char** name, /*!< out: tablespace name */
- ulint* flags) /*!< out: tablespace flags */
-{
- ulint len;
- const byte* field;
-
- /* Initialize the output values */
- *space = ULINT_UNDEFINED;
- *name = NULL;
- *flags = ULINT_UNDEFINED;
-
- if (rec_get_deleted_flag(rec, 0)) {
- return("delete-marked record in SYS_TABLESPACES");
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLESPACES) {
- return("wrong number of columns in SYS_TABLESPACES record");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLESPACES__SPACE, &len);
- if (len != DICT_FLD_LEN_SPACE) {
-err_len:
- return("incorrect column length in SYS_TABLESPACES");
- }
- *space = mach_read_from_4(field);
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLESPACES__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLESPACES__NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
- goto err_len;
- }
- *name = mem_heap_strdupl(heap, (char*) field, len);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLESPACES__FLAGS, &len);
- if (len != DICT_FLD_LEN_FLAGS) {
- goto err_len;
- }
- *flags = mach_read_from_4(field);
-
- return(NULL);
-}
-
-/********************************************************************//**
-This function parses a SYS_DATAFILES record, extracts necessary
-information from the record and returns it to the caller.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_datafiles(
-/*=======================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_DATAFILES rec */
- ulint* space, /*!< out: space id */
- const char** path) /*!< out: datafile paths */
-{
- ulint len;
- const byte* field;
-
- if (rec_get_deleted_flag(rec, 0)) {
- return("delete-marked record in SYS_DATAFILES");
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_DATAFILES) {
- return("wrong number of columns in SYS_DATAFILES record");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_DATAFILES__SPACE, &len);
- if (len != DICT_FLD_LEN_SPACE) {
-err_len:
- return("incorrect column length in SYS_DATAFILES");
- }
- *space = mach_read_from_4(field);
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_DATAFILES__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
- goto err_len;
- }
- *path = mem_heap_strdupl(heap, (char*) field, len);
-
- return(NULL);
-}
-
-/********************************************************************//**
-Determine the flags of a table as stored in SYS_TABLES.TYPE and N_COLS.
-@return ULINT_UNDEFINED if error, else a valid dict_table_t::flags. */
-static
-ulint
-dict_sys_tables_get_flags(
-/*======================*/
- const rec_t* rec) /*!< in: a record of SYS_TABLES */
-{
- const byte* field;
- ulint len;
- ulint type;
- ulint n_cols;
-
- /* read the 4 byte flags from the TYPE field */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__TYPE, &len);
- ut_a(len == 4);
- type = mach_read_from_4(field);
-
- /* The low order bit of SYS_TABLES.TYPE is always set to 1. But in
- dict_table_t::flags the low order bit is used to determine if the
- row format is Redundant or Compact when the format is Antelope.
- Read the 4 byte N_COLS field and look at the high order bit. It
- should be set for COMPACT and later. It should not be set for
- REDUNDANT. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
- ut_a(len == 4);
- n_cols = mach_read_from_4(field);
-
- /* This validation function also combines the DICT_N_COLS_COMPACT
- flag in n_cols into the type field to effectively make it a
- dict_table_t::flags. */
-
- if (ULINT_UNDEFINED == dict_sys_tables_type_validate(type, n_cols)) {
- return(ULINT_UNDEFINED);
- }
-
- return(dict_sys_tables_type_to_tf(type, n_cols));
-}
-
-/********************************************************************//**
-Gets the filepath for a spaceid from SYS_DATAFILES and checks it against
-the contents of a link file. This function is called when there is no
-fil_node_t entry for this space ID so both durable locations on disk
-must be checked and compared.
-We use a temporary heap here for the table lookup, but not for the path
-returned which the caller must free.
-This function can return NULL if the space ID is not found in SYS_DATAFILES,
-then the caller will assume that the ibd file is in the normal datadir.
-@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for
-the given space ID. NULL if space ID is zero or not found. */
-UNIV_INTERN
-char*
-dict_get_first_path(
-/*================*/
- ulint space, /*!< in: space id */
- const char* name) /*!< in: tablespace name */
-{
- mtr_t mtr;
- dict_table_t* sys_datafiles;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- dfield_t* dfield;
- byte* buf;
- btr_pcur_t pcur;
- const rec_t* rec;
- const byte* field;
- ulint len;
- char* dict_filepath = NULL;
- mem_heap_t* heap = mem_heap_create(1024);
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_datafiles = dict_table_get_low("SYS_DATAFILES");
- sys_index = UT_LIST_GET_FIRST(sys_datafiles->indexes);
- ut_ad(!dict_table_is_comp(sys_datafiles));
- ut_ad(name_of_col_is(sys_datafiles, sys_index,
- DICT_FLD__SYS_DATAFILES__SPACE, "SPACE"));
- ut_ad(name_of_col_is(sys_datafiles, sys_index,
- DICT_FLD__SYS_DATAFILES__PATH, "PATH"));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, DICT_FLD__SYS_DATAFILES__SPACE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
- mach_write_to_4(buf, space);
-
- dfield_set_data(dfield, buf, 4);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- /* If the file-per-table tablespace was created with
- an earlier version of InnoDB, then this record is not
- in SYS_DATAFILES. But a link file still might exist. */
-
- if (btr_pcur_is_on_user_rec(&pcur)) {
- /* A record for this space ID was found. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_DATAFILES__PATH, &len);
- ut_a(len > 0 || len == UNIV_SQL_NULL);
- ut_a(len < OS_FILE_MAX_PATH);
- dict_filepath = mem_strdupl((char*) field, len);
- ut_a(dict_filepath);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(dict_filepath);
-}
-
-/********************************************************************//**
-Update the record for space_id in SYS_TABLESPACES to this filepath.
-@return DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
-dberr_t
-dict_update_filepath(
-/*=================*/
- ulint space_id, /*!< in: space id */
- const char* filepath) /*!< in: filepath */
-{
- dberr_t err = DB_SUCCESS;
- trx_t* trx;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx = trx_allocate_for_background();
- trx->op_info = "update filepath";
- trx->dict_operation_lock_mode = RW_X_LATCH;
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
-
- pars_info_t* info = pars_info_create();
-
- pars_info_add_int4_literal(info, "space", space_id);
- pars_info_add_str_literal(info, "path", filepath);
-
- err = que_eval_sql(info,
- "PROCEDURE UPDATE_FILEPATH () IS\n"
- "BEGIN\n"
- "UPDATE SYS_DATAFILES"
- " SET PATH = :path\n"
- " WHERE SPACE = :space;\n"
- "END;\n", FALSE, trx);
-
- trx_commit_for_mysql(trx);
- trx->dict_operation_lock_mode = 0;
- trx_free_for_background(trx);
-
- if (err == DB_SUCCESS) {
- /* We just updated SYS_DATAFILES due to the contents in
- a link file. Make a note that we did this. */
- ib_logf(IB_LOG_LEVEL_INFO,
- "The InnoDB data dictionary table SYS_DATAFILES "
- "for tablespace ID %lu was updated to use file %s.",
- (ulong) space_id, filepath);
- } else {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Problem updating InnoDB data dictionary table "
- "SYS_DATAFILES for tablespace ID %lu to file %s.",
- (ulong) space_id, filepath);
- }
-
- return(err);
-}
-
-/********************************************************************//**
-Insert records into SYS_TABLESPACES and SYS_DATAFILES.
-@return DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
-dberr_t
-dict_insert_tablespace_and_filepath(
-/*================================*/
- ulint space, /*!< in: space id */
- const char* name, /*!< in: talespace name */
- const char* filepath, /*!< in: filepath */
- ulint fsp_flags) /*!< in: tablespace flags */
-{
- dberr_t err = DB_SUCCESS;
- trx_t* trx;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(filepath);
-
- trx = trx_allocate_for_background();
- trx->op_info = "insert tablespace and filepath";
- trx->dict_operation_lock_mode = RW_X_LATCH;
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
-
- /* A record for this space ID was not found in
- SYS_DATAFILES. Assume the record is also missing in
- SYS_TABLESPACES. Insert records onto them both. */
- err = dict_create_add_tablespace_to_dictionary(
- space, name, fsp_flags, filepath, trx, false);
-
- trx_commit_for_mysql(trx);
- trx->dict_operation_lock_mode = 0;
- trx_free_for_background(trx);
-
- return(err);
-}
-
-/* Set by Xtrabackup */
-my_bool (*dict_check_if_skip_table)(const char* name) = 0;
-
-
-/********************************************************************//**
-This function looks at each table defined in SYS_TABLES. It checks the
-tablespace for any table with a space_id > 0. It looks up the tablespace
-in SYS_DATAFILES to ensure the correct path.
-
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
-
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
- dict_check_t dict_check) /*!< in: how to check */
-{
- dict_table_t* sys_tables;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- const rec_t* rec;
- ulint max_space_id;
- mtr_t mtr;
-
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&(dict_sys->mutex));
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_ad(!dict_table_is_comp(sys_tables));
-
- max_space_id = mtr_read_ulint(dict_hdr_get(&mtr)
- + DICT_HDR_MAX_SPACE_ID,
- MLOG_4BYTES, &mtr);
- fil_set_max_space_id_if_bigger(max_space_id);
-
- btr_pcur_open_at_index_side(true, sys_index, BTR_SEARCH_LEAF, &pcur,
- true, 0, &mtr);
-loop:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* end of index */
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- /* We must make the tablespace cache aware of the biggest
- known space id */
-
- /* printf("Biggest space id in data dictionary %lu\n",
- max_space_id); */
- fil_set_max_space_id_if_bigger(max_space_id);
-
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&dict_operation_lock);
-
- return;
- }
-
- if (!rec_get_deleted_flag(rec, 0)) {
-
- /* We found one */
- const byte* field;
- ulint len;
- ulint space_id;
- ulint flags;
- char* name;
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
-
- name = mem_strdupl((char*) field, len);
-
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), name, FALSE);
-
- flags = dict_sys_tables_get_flags(rec);
- if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
- /* Read again the 4 bytes from rec. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__TYPE, &len);
- ut_ad(len == 4); /* this was checked earlier */
- flags = mach_read_from_4(field);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Table '%s' in InnoDB data dictionary"
- " has unknown type %lx", table_name, flags);
- mem_free(name);
- goto loop;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__SPACE, &len);
- ut_a(len == 4);
-
- space_id = mach_read_from_4(field);
-
- btr_pcur_store_position(&pcur, &mtr);
-
- /* For tables created with old versions of InnoDB,
- SYS_TABLES.MIX_LEN may contain garbage. Such tables
- would always be in ROW_FORMAT=REDUNDANT. Pretend that
- all such tables are non-temporary. That is, do not
- suppress error printouts about temporary or discarded
- tablespaces not being found. */
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
-
- bool is_temp = false;
- bool discarded = false;
- bool print_error_if_does_not_exist;
- bool remove_from_data_dict_if_does_not_exist;
-
- ib_uint32_t flags2 = static_cast<ib_uint32_t>(
- mach_read_from_4(field));
-
- /* Check that the tablespace (the .ibd file) really
- exists; print a warning to the .err log if not.
- Do not print warnings for temporary tables or for
- tablespaces that have been discarded. */
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
-
- /* MIX_LEN valid only for ROW_FORMAT > REDUNDANT. */
- if (mach_read_from_4(field) & DICT_N_COLS_COMPACT) {
-
- is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
- discarded = !!(flags2 & DICT_TF2_DISCARDED);
- }
-
- if (space_id == 0) {
- /* The system tablespace always exists. */
- ut_ad(!discarded);
- mem_free(name);
- goto loop;
- }
-
-
- ut_a(!IS_XTRABACKUP() || dict_check_if_skip_table);
-
- if (is_temp || discarded ||
- (IS_XTRABACKUP() && dict_check_if_skip_table(name))) {
- print_error_if_does_not_exist = false;
- }
- else {
- print_error_if_does_not_exist = true;
- }
-
- remove_from_data_dict_if_does_not_exist = IS_XTRABACKUP() && !(is_temp || discarded);
-
- mtr_commit(&mtr);
-
- switch (dict_check) {
- case DICT_CHECK_ALL_LOADED:
- /* All tablespaces should have been found in
- fil_load_single_table_tablespaces(). */
- if (fil_space_for_table_exists_in_mem(
- space_id, name, print_error_if_does_not_exist,
- remove_from_data_dict_if_does_not_exist , false, NULL, 0, flags)
- && !(is_temp || discarded)) {
- /* If user changes the path of .ibd files in
- *.isl files before doing crash recovery ,
- then this leads to inconsistency in
- SYS_DATAFILES system table because the
- tables are loaded from the updated path
- but the SYS_DATAFILES still points to the
- old path.Therefore after crash recovery
- update SYS_DATAFILES with the updated path.*/
- ut_ad(space_id);
- ut_ad(recv_needed_recovery);
- char *dict_path = dict_get_first_path(space_id,
- name);
- char *remote_path = fil_read_link_file(name);
- if(dict_path && remote_path) {
- if(strcmp(dict_path,remote_path)) {
- dict_update_filepath(space_id,
- remote_path);
- }
- }
- if(dict_path)
- mem_free(dict_path);
- if(remote_path)
- mem_free(remote_path);
- }
- break;
-
- case DICT_CHECK_SOME_LOADED:
- /* Some tablespaces may have been opened in
- trx_resurrect_table_locks(). */
- if (fil_space_for_table_exists_in_mem(
- space_id, name, false,
- false, false, NULL, 0, flags)) {
- break;
- }
- /* fall through */
- case DICT_CHECK_NONE_LOADED:
- if (discarded) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "DISCARD flag set for table '%s',"
- " ignored.",
- table_name);
- break;
- }
-
- /* It is a normal database startup: create the
- space object and check that the .ibd file exists.
- If the table uses a remote tablespace, look for the
- space_id in SYS_DATAFILES to find the filepath */
-
- /* Use the remote filepath if known. */
- char* filepath = NULL;
- if (DICT_TF_HAS_DATA_DIR(flags)) {
- filepath = dict_get_first_path(
- space_id, name);
- }
-
- /* We could read page 0 to get (optional) IV
- if encryption is turned on, if it's off
- we will read the page 0 later and find out
- if we should decrypt a potentially
- already encrypted table
- bool read_page_0 = srv_encrypt_tables; */
-
- bool read_page_0 = false;
-
- /* We set the 2nd param (fix_dict = true)
- here because we already have an x-lock on
- dict_operation_lock and dict_sys->mutex. Besides,
- this is at startup and we are now single threaded.
- If the filepath is not known, it will need to
- be discovered. */
- dberr_t err = fil_open_single_table_tablespace(
- read_page_0, srv_read_only_mode ? false : true,
- space_id, dict_tf_to_fsp_flags(flags),
- name, filepath);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace open failed for '%s', "
- "ignored.", table_name);
- }
-
- if (filepath) {
- mem_free(filepath);
- }
-
- break;
- }
-
- if (space_id > max_space_id) {
- max_space_id = space_id;
- }
-
- mem_free(name);
- mtr_start(&mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
- }
-
- goto loop;
-}
-
-/********************************************************************//**
-Loads a table column definition from a SYS_COLUMNS record to
-dict_table_t.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_column_low(
-/*=================*/
- dict_table_t* table, /*!< in/out: table, could be NULL
- if we just populate a dict_column_t
- struct with information from
- a SYS_COLUMNS record */
- mem_heap_t* heap, /*!< in/out: memory heap
- for temporary storage */
- dict_col_t* column, /*!< out: dict_column_t to fill,
- or NULL if table != NULL */
- table_id_t* table_id, /*!< out: table id */
- const char** col_name, /*!< out: column name */
- const rec_t* rec) /*!< in: SYS_COLUMNS record */
-{
- char* name;
- const byte* field;
- ulint len;
- ulint mtype;
- ulint prtype;
- ulint col_len;
- ulint pos;
-
- ut_ad(table || column);
-
- if (rec_get_deleted_flag(rec, 0)) {
- return("delete-marked record in SYS_COLUMNS");
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_COLUMNS) {
- return("wrong number of columns in SYS_COLUMNS record");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_COLUMNS__TABLE_ID, &len);
- if (len != 8) {
-err_len:
- return("incorrect column length in SYS_COLUMNS");
- }
-
- if (table_id) {
- *table_id = mach_read_from_8(field);
- } else if (table->id != mach_read_from_8(field)) {
- return("SYS_COLUMNS.TABLE_ID mismatch");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_COLUMNS__POS, &len);
- if (len != 4) {
-
- goto err_len;
- }
-
- pos = mach_read_from_4(field);
-
- if (table && table->n_def != pos) {
- return("SYS_COLUMNS.POS mismatch");
- }
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_COLUMNS__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_COLUMNS__NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
- goto err_len;
- }
-
- name = mem_heap_strdupl(heap, (const char*) field, len);
-
- if (col_name) {
- *col_name = name;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_COLUMNS__MTYPE, &len);
- if (len != 4) {
- goto err_len;
- }
-
- mtype = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_COLUMNS__PRTYPE, &len);
- if (len != 4) {
- goto err_len;
- }
- prtype = mach_read_from_4(field);
-
- if (dtype_get_charset_coll(prtype) == 0
- && dtype_is_string_type(mtype)) {
- /* The table was created with < 4.1.2. */
-
- if (dtype_is_binary_string_type(mtype, prtype)) {
- /* Use the binary collation for
- string columns of binary type. */
-
- prtype = dtype_form_prtype(
- prtype,
- DATA_MYSQL_BINARY_CHARSET_COLL);
- } else {
- /* Use the default charset for
- other than binary columns. */
-
- prtype = dtype_form_prtype(
- prtype,
- data_mysql_default_charset_coll);
- }
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_COLUMNS__LEN, &len);
- if (len != 4) {
- goto err_len;
- }
- col_len = mach_read_from_4(field);
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_COLUMNS__PREC, &len);
- if (len != 4) {
- goto err_len;
- }
-
- if (!column) {
- dict_mem_table_add_col(table, heap, name, mtype,
- prtype, col_len);
- } else {
- dict_mem_fill_column_struct(column, pos, mtype,
- prtype, col_len);
- }
-
- return(NULL);
-}
-
-/********************************************************************//**
-Loads definitions for table columns. */
-static
-void
-dict_load_columns(
-/*==============*/
- dict_table_t* table, /*!< in/out: table */
- mem_heap_t* heap) /*!< in/out: memory heap
- for temporary storage */
-{
- dict_table_t* sys_columns;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- const rec_t* rec;
- byte* buf;
- ulint i;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_columns = dict_table_get_low("SYS_COLUMNS");
- sys_index = UT_LIST_GET_FIRST(sys_columns->indexes);
- ut_ad(!dict_table_is_comp(sys_columns));
-
- ut_ad(name_of_col_is(sys_columns, sys_index,
- DICT_FLD__SYS_COLUMNS__NAME, "NAME"));
- ut_ad(name_of_col_is(sys_columns, sys_index,
- DICT_FLD__SYS_COLUMNS__PREC, "PREC"));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) {
- const char* err_msg;
- const char* name = NULL;
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur));
-
- err_msg = dict_load_column_low(table, heap, NULL, NULL,
- &name, rec);
-
- if (err_msg) {
- fprintf(stderr, "InnoDB: %s\n", err_msg);
- ut_error;
- }
-
- /* Note: Currently we have one DOC_ID column that is
- shared by all FTS indexes on a table. */
- if (innobase_strcasecmp(name,
- FTS_DOC_ID_COL_NAME) == 0) {
- dict_col_t* col;
- /* As part of normal loading of tables the
- table->flag is not set for tables with FTS
- till after the FTS indexes are loaded. So we
- create the fts_t instance here if there isn't
- one already created.
-
- This case does not arise for table create as
- the flag is set before the table is created. */
- if (table->fts == NULL) {
- table->fts = fts_create(table);
- fts_optimize_add_table(table);
- }
-
- ut_a(table->fts->doc_col == ULINT_UNDEFINED);
-
- col = dict_table_get_nth_col(table, i);
-
- ut_ad(col->len == sizeof(doc_id_t));
-
- if (col->prtype & DATA_FTS_DOC_ID) {
- DICT_TF2_FLAG_SET(
- table, DICT_TF2_FTS_HAS_DOC_ID);
- DICT_TF2_FLAG_UNSET(
- table, DICT_TF2_FTS_ADD_DOC_ID);
- }
-
- table->fts->doc_col = i;
- }
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/** Error message for a delete-marked record in dict_load_field_low() */
-static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS";
-
-/********************************************************************//**
-Loads an index field definition from a SYS_FIELDS record to
-dict_index_t.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_field_low(
-/*================*/
- byte* index_id, /*!< in/out: index id (8 bytes)
- an "in" value if index != NULL
- and "out" if index == NULL */
- dict_index_t* index, /*!< in/out: index, could be NULL
- if we just populate a dict_field_t
- struct with information from
- a SYS_FIELDS record */
- dict_field_t* sys_field, /*!< out: dict_field_t to be
- filled */
- ulint* pos, /*!< out: Field position */
- byte* last_index_id, /*!< in: last index id */
- mem_heap_t* heap, /*!< in/out: memory heap
- for temporary storage */
- const rec_t* rec) /*!< in: SYS_FIELDS record */
-{
- const byte* field;
- ulint len;
- ulint pos_and_prefix_len;
- ulint prefix_len;
- ibool first_field;
- ulint position;
-
- /* Either index or sys_field is supplied, not both */
- ut_a((!index) || (!sys_field));
-
- if (rec_get_deleted_flag(rec, 0)) {
- return(dict_load_field_del);
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_FIELDS) {
- return("wrong number of columns in SYS_FIELDS record");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FIELDS__INDEX_ID, &len);
- if (len != 8) {
-err_len:
- return("incorrect column length in SYS_FIELDS");
- }
-
- if (!index) {
- ut_a(last_index_id);
- memcpy(index_id, (const char*) field, 8);
- first_field = memcmp(index_id, last_index_id, 8);
- } else {
- first_field = (index->n_def == 0);
- if (memcmp(field, index_id, 8)) {
- return("SYS_FIELDS.INDEX_ID mismatch");
- }
- }
-
- /* The next field stores the field position in the index and a
- possible column prefix length if the index field does not
- contain the whole column. The storage format is like this: if
- there is at least one prefix field in the index, then the HIGH
- 2 bytes contain the field number (index->n_def) and the low 2
- bytes the prefix length for the field. Otherwise the field
- number (index->n_def) is contained in the 2 LOW bytes. */
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FIELDS__POS, &len);
- if (len != 4) {
- goto err_len;
- }
-
- pos_and_prefix_len = mach_read_from_4(field);
-
- if (index && UNIV_UNLIKELY
- ((pos_and_prefix_len & 0xFFFFUL) != index->n_def
- && (pos_and_prefix_len >> 16 & 0xFFFF) != index->n_def)) {
- return("SYS_FIELDS.POS mismatch");
- }
-
- if (first_field || pos_and_prefix_len > 0xFFFFUL) {
- prefix_len = pos_and_prefix_len & 0xFFFFUL;
- position = (pos_and_prefix_len & 0xFFFF0000UL) >> 16;
- } else {
- prefix_len = 0;
- position = pos_and_prefix_len & 0xFFFFUL;
- }
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_FIELDS__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_FIELDS__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FIELDS__COL_NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
- goto err_len;
- }
-
- if (index) {
- dict_mem_index_add_field(
- index, mem_heap_strdupl(heap, (const char*) field, len),
- prefix_len);
- } else {
- ut_a(sys_field);
- ut_a(pos);
-
- sys_field->name = mem_heap_strdupl(
- heap, (const char*) field, len);
- sys_field->prefix_len = prefix_len;
- *pos = position;
- }
-
- return(NULL);
-}
-
-/********************************************************************//**
-Loads definitions for index fields.
-@return DB_SUCCESS if ok, DB_CORRUPTION if corruption */
-static
-ulint
-dict_load_fields(
-/*=============*/
- dict_index_t* index, /*!< in/out: index whose fields to load */
- mem_heap_t* heap) /*!< in: memory heap for temporary storage */
-{
- dict_table_t* sys_fields;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- const rec_t* rec;
- byte* buf;
- ulint i;
- mtr_t mtr;
- dberr_t error;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_fields = dict_table_get_low("SYS_FIELDS");
- sys_index = UT_LIST_GET_FIRST(sys_fields->indexes);
- ut_ad(!dict_table_is_comp(sys_fields));
- ut_ad(name_of_col_is(sys_fields, sys_index,
- DICT_FLD__SYS_FIELDS__COL_NAME, "COL_NAME"));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(buf, index->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < index->n_fields; i++) {
- const char* err_msg;
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur));
-
- err_msg = dict_load_field_low(buf, index, NULL, NULL, NULL,
- heap, rec);
-
- if (err_msg == dict_load_field_del) {
- /* There could be delete marked records in
- SYS_FIELDS because SYS_FIELDS.INDEX_ID can be
- updated by ALTER TABLE ADD INDEX. */
-
- goto next_rec;
- } else if (err_msg) {
- fprintf(stderr, "InnoDB: %s\n", err_msg);
- error = DB_CORRUPTION;
- goto func_exit;
- }
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- error = DB_SUCCESS;
-func_exit:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- return(error);
-}
-
-/** Error message for a delete-marked record in dict_load_index_low() */
-static const char* dict_load_index_del = "delete-marked record in SYS_INDEXES";
-/** Error message for table->id mismatch in dict_load_index_low() */
-static const char* dict_load_index_id_err = "SYS_INDEXES.TABLE_ID mismatch";
-
-/********************************************************************//**
-Loads an index definition from a SYS_INDEXES record to dict_index_t.
-If allocate=TRUE, we will create a dict_index_t structure and fill it
-accordingly. If allocated=FALSE, the dict_index_t will be supplied by
-the caller and filled with information read from the record. @return
-error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_index_low(
-/*================*/
- byte* table_id, /*!< in/out: table id (8 bytes),
- an "in" value if allocate=TRUE
- and "out" when allocate=FALSE */
- const char* table_name, /*!< in: table name */
- mem_heap_t* heap, /*!< in/out: temporary memory heap */
- const rec_t* rec, /*!< in: SYS_INDEXES record */
- ibool allocate, /*!< in: TRUE=allocate *index,
- FALSE=fill in a pre-allocated
- *index */
- dict_index_t** index) /*!< out,own: index, or NULL */
-{
- const byte* field;
- ulint len;
- ulint name_len;
- char* name_buf;
- index_id_t id;
- ulint n_fields;
- ulint type;
- ulint space;
-
- if (allocate) {
- /* If allocate=TRUE, no dict_index_t will
- be supplied. Initialize "*index" to NULL */
- *index = NULL;
- }
-
- if (rec_get_deleted_flag(rec, 0)) {
- return(dict_load_index_del);
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_INDEXES) {
- return("wrong number of columns in SYS_INDEXES record");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
- if (len != 8) {
-err_len:
- return("incorrect column length in SYS_INDEXES");
- }
-
- if (!allocate) {
- /* We are reading a SYS_INDEXES record. Copy the table_id */
- memcpy(table_id, (const char*) field, 8);
- } else if (memcmp(field, table_id, 8)) {
- /* Caller supplied table_id, verify it is the same
- id as on the index record */
- return(dict_load_index_id_err);
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__ID, &len);
- if (len != 8) {
- goto err_len;
- }
-
- id = mach_read_from_8(field);
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_INDEXES__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_INDEXES__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__NAME, &name_len);
- if (name_len == UNIV_SQL_NULL) {
- goto err_len;
- }
-
- name_buf = mem_heap_strdupl(heap, (const char*) field,
- name_len);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__N_FIELDS, &len);
- if (len != 4) {
- goto err_len;
- }
- n_fields = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__TYPE, &len);
- if (len != 4) {
- goto err_len;
- }
- type = mach_read_from_4(field);
- if (type & (~0U << DICT_IT_BITS)) {
- return("unknown SYS_INDEXES.TYPE bits");
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__SPACE, &len);
- if (len != 4) {
- goto err_len;
- }
- space = mach_read_from_4(field);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__PAGE_NO, &len);
- if (len != 4) {
- goto err_len;
- }
-
- if (allocate) {
- *index = dict_mem_index_create(table_name, name_buf,
- space, type, n_fields);
- } else {
- ut_a(*index);
-
- dict_mem_fill_index_struct(*index, NULL, NULL, name_buf,
- space, type, n_fields);
- }
-
- (*index)->id = id;
- (*index)->page = mach_read_from_4(field);
- btr_search_index_init(*index);
- ut_ad((*index)->page);
-
- return(NULL);
-}
-
-/********************************************************************//**
-Loads definitions for table indexes. Adds them to the data dictionary
-cache.
-@return DB_SUCCESS if ok, DB_CORRUPTION if corruption of dictionary
-table or DB_UNSUPPORTED if table has unknown index type */
-static MY_ATTRIBUTE((nonnull))
-dberr_t
-dict_load_indexes(
-/*==============*/
- dict_table_t* table, /*!< in/out: table */
- mem_heap_t* heap, /*!< in: memory heap for temporary storage */
- dict_err_ignore_t ignore_err)
- /*!< in: error to be ignored when
- loading the index definition */
-{
- dict_table_t* sys_indexes;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- const rec_t* rec;
- byte* buf;
- mtr_t mtr;
- dberr_t error = DB_SUCCESS;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- mtr_start(&mtr);
-
- sys_indexes = dict_table_get_low("SYS_INDEXES");
- sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes);
- ut_ad(!dict_table_is_comp(sys_indexes));
- ut_ad(name_of_col_is(sys_indexes, sys_index,
- DICT_FLD__SYS_INDEXES__NAME, "NAME"));
- ut_ad(name_of_col_is(sys_indexes, sys_index,
- DICT_FLD__SYS_INDEXES__PAGE_NO, "PAGE_NO"));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (;;) {
- dict_index_t* index = NULL;
- const char* err_msg;
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
-
- /* We should allow the table to open even
- without index when DICT_ERR_IGNORE_CORRUPT is set.
- DICT_ERR_IGNORE_CORRUPT is currently only set
- for drop table */
- if (dict_table_get_first_index(table) == NULL
- && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Cannot load table %s "
- "because it has no indexes in "
- "InnoDB internal data dictionary.",
- table->name);
- error = DB_CORRUPTION;
- goto func_exit;
- }
-
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- if ((ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
- && rec_get_n_fields_old(rec)
- == DICT_NUM_FIELDS__SYS_INDEXES) {
- const byte* field;
- ulint len;
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__NAME, &len);
-
- if (len != UNIV_SQL_NULL
- && char(*field) == char(TEMP_INDEX_PREFIX)) {
- /* Skip indexes whose name starts with
- TEMP_INDEX_PREFIX, because they will
- be dropped during crash recovery. */
- goto next_rec;
- }
- }
-
- err_msg = dict_load_index_low(buf, table->name, heap, rec,
- TRUE, &index);
- ut_ad((index == NULL && err_msg != NULL)
- || (index != NULL && err_msg == NULL));
-
- if (err_msg == dict_load_index_id_err) {
- /* TABLE_ID mismatch means that we have
- run out of index definitions for the table. */
-
- if (dict_table_get_first_index(table) == NULL
- && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to load the "
- "clustered index for table %s "
- "because of the following error: %s. "
- "Refusing to load the rest of the "
- "indexes (if any) and the whole table "
- "altogether.", table->name, err_msg);
- error = DB_CORRUPTION;
- goto func_exit;
- }
-
- break;
- } else if (err_msg == dict_load_index_del) {
- /* Skip delete-marked records. */
- goto next_rec;
- } else if (err_msg) {
- fprintf(stderr, "InnoDB: %s\n", err_msg);
- if (ignore_err & DICT_ERR_IGNORE_CORRUPT) {
- goto next_rec;
- }
- error = DB_CORRUPTION;
- goto func_exit;
- }
-
- ut_ad(index);
-
- /* Check whether the index is corrupted */
- if (dict_index_is_corrupted(index)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs(" is corrupted\n", stderr);
-
- if (!srv_load_corrupted
- && !(ignore_err & DICT_ERR_IGNORE_CORRUPT)
- && dict_index_is_clust(index)) {
- dict_mem_index_free(index);
-
- error = DB_INDEX_CORRUPT;
- goto func_exit;
- } else {
- /* We will load the index if
- 1) srv_load_corrupted is TRUE
- 2) ignore_err is set with
- DICT_ERR_IGNORE_CORRUPT
- 3) if the index corrupted is a secondary
- index */
- ut_print_timestamp(stderr);
- fputs(" InnoDB: load corrupted index ", stderr);
- dict_index_name_print(stderr, NULL, index);
- putc('\n', stderr);
- }
- }
-
- if (index->type & DICT_FTS
- && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) {
- /* This should have been created by now. */
- ut_a(table->fts != NULL);
- DICT_TF2_FLAG_SET(table, DICT_TF2_FTS);
- }
-
- /* We check for unsupported types first, so that the
- subsequent checks are relevant for the supported types. */
- if (index->type & ~(DICT_CLUSTERED | DICT_UNIQUE
- | DICT_CORRUPT | DICT_FTS)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown type %lu of index %s of table %s",
- (ulong) index->type, index->name, table->name);
-
- error = DB_UNSUPPORTED;
- dict_mem_index_free(index);
- goto func_exit;
- } else if (index->page == FIL_NULL
- && !table->file_unreadable
- && (!(index->type & DICT_FTS))) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to load index %s"
- " for table %s\n"
- "InnoDB: but the index tree has been freed!\n",
- index->name, table->name);
-
- if (ignore_err & DICT_ERR_IGNORE_INDEX_ROOT) {
- /* If caller can tolerate this error,
- we will continue to load the index and
- let caller deal with this error. However
- mark the index and table corrupted. We
- only need to mark such in the index
- dictionary cache for such metadata corruption,
- since we would always be able to set it
- when loading the dictionary cache */
- dict_set_corrupted_index_cache_only(
- index, table);
-
- fprintf(stderr,
- "InnoDB: Index is corrupt but forcing"
- " load into data dictionary\n");
- } else {
-corrupted:
- dict_mem_index_free(index);
- error = DB_CORRUPTION;
- goto func_exit;
- }
- } else if (!dict_index_is_clust(index)
- && NULL == dict_table_get_first_index(table)) {
-
- fputs("InnoDB: Error: trying to load index ",
- stderr);
- ut_print_name(stderr, NULL, FALSE, index->name);
- fputs(" for table ", stderr);
- ut_print_name(stderr, NULL, TRUE, table->name);
- fputs("\nInnoDB: but the first index"
- " is not clustered!\n", stderr);
-
- goto corrupted;
- } else if (dict_is_sys_table(table->id)
- && (dict_index_is_clust(index)
- || ((table == dict_sys->sys_tables)
- && !strcmp("ID_IND", index->name)))) {
-
- /* The index was created in memory already at booting
- of the database server */
- dict_mem_index_free(index);
- } else {
- dict_load_fields(index, heap);
-
- error = dict_index_add_to_cache(
- table, index, index->page, FALSE);
-
- /* The data dictionary tables should never contain
- invalid index definitions. If we ignored this error
- and simply did not load this index definition, the
- .frm file would disagree with the index definitions
- inside InnoDB. */
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-
- goto func_exit;
- }
- }
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- /* If the table contains FTS indexes, populate table->fts->indexes */
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)) {
- /* table->fts->indexes should have been created. */
- ut_a(table->fts->indexes != NULL);
- dict_table_get_all_fts_indexes(table, table->fts->indexes);
- }
-
-func_exit:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(error);
-}
-
-/********************************************************************//**
-Loads a table definition from a SYS_TABLES record to dict_table_t.
-Does not load any columns or indexes.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_table_low(
-/*================*/
- const char* name, /*!< in: table name */
- const rec_t* rec, /*!< in: SYS_TABLES record */
- dict_table_t** table) /*!< out,own: table, or NULL */
-{
- const byte* field;
- ulint len;
- ulint space;
- ulint n_cols;
- ulint flags = 0;
- ulint flags2;
-
- if (rec_get_deleted_flag(rec, 0)) {
- return("delete-marked record in SYS_TABLES");
- }
-
- if (rec_get_n_fields_old(rec) != DICT_NUM_FIELDS__SYS_TABLES) {
- return("wrong number of columns in SYS_TABLES record");
- }
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
- if (len == 0 || len == UNIV_SQL_NULL) {
-err_len:
- return("incorrect column length in SYS_TABLES");
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__DB_TRX_ID, &len);
- if (len != DATA_TRX_ID_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__DB_ROLL_PTR, &len);
- if (len != DATA_ROLL_PTR_LEN && len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
- if (len != 8) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
- if (len != 4) {
- goto err_len;
- }
-
- n_cols = mach_read_from_4(field);
-
- rec_get_nth_field_offs_old(rec, DICT_FLD__SYS_TABLES__TYPE, &len);
- if (len != 4) {
- goto err_len;
- }
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__MIX_ID, &len);
- if (len != 8) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
- if (len != 4) {
- goto err_len;
- }
-
- /* MIX_LEN may hold additional flags in post-antelope file formats. */
- flags2 = mach_read_from_4(field);
-
- /* DICT_TF2_FTS will be set when indexes is being loaded */
- flags2 &= ~DICT_TF2_FTS;
-
- rec_get_nth_field_offs_old(
- rec, DICT_FLD__SYS_TABLES__CLUSTER_ID, &len);
- if (len != UNIV_SQL_NULL) {
- goto err_len;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__SPACE, &len);
- if (len != 4) {
- goto err_len;
- }
-
- space = mach_read_from_4(field);
-
- /* Check if the tablespace exists and has the right name */
- flags = dict_sys_tables_get_flags(rec);
-
- if (UNIV_UNLIKELY(flags == ULINT_UNDEFINED)) {
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__TYPE, &len);
- ut_ad(len == 4); /* this was checked earlier */
- flags = mach_read_from_4(field);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has unknown type %lx.\n",
- (ulong) flags);
- return("incorrect flags in SYS_TABLES");
- }
-
- /* The high-order bit of N_COLS is the "compact format" flag.
- For tables in that format, MIX_LEN may hold additional flags. */
- if (n_cols & DICT_N_COLS_COMPACT) {
- ut_ad(flags & DICT_TF_COMPACT);
-
- if (flags2 & ~DICT_TF2_BIT_MASK) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary"
- " has unknown flags %lx.\n",
- (ulong) flags2);
-
- /* Clean it up and keep going */
- flags2 &= DICT_TF2_BIT_MASK;
- }
- } else {
- /* Do not trust the MIX_LEN field when the
- row format is Redundant. */
- flags2 = 0;
- }
-
- /* See if the tablespace is available. */
- *table = dict_mem_table_create(
- name, space, n_cols & ~DICT_N_COLS_COMPACT, flags, flags2);
-
- field = rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__ID, &len);
- ut_ad(len == 8); /* this was checked earlier */
-
- (*table)->id = mach_read_from_8(field);
-
- (*table)->file_unreadable = false;
-
- return(NULL);
-}
-
-/********************************************************************//**
-Using the table->heap, copy the null-terminated filepath into
-table->data_dir_path and replace the 'databasename/tablename.ibd'
-portion with 'tablename'.
-This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
-Make this data directory path only if it has not yet been saved. */
-UNIV_INTERN
-void
-dict_save_data_dir_path(
-/*====================*/
- dict_table_t* table, /*!< in/out: table */
- char* filepath) /*!< in: filepath of tablespace */
-{
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_a(DICT_TF_HAS_DATA_DIR(table->flags));
-
- ut_a(!table->data_dir_path);
- ut_a(filepath);
-
- /* Be sure this filepath is not the default filepath. */
- char* default_filepath = fil_make_ibd_name(table->name, false);
- if (strcmp(filepath, default_filepath)) {
- ulint pathlen = strlen(filepath);
- ut_a(pathlen < OS_FILE_MAX_PATH);
- ut_a(0 == strcmp(filepath + pathlen - 4, ".ibd"));
-
- table->data_dir_path = mem_heap_strdup(table->heap, filepath);
- os_file_make_data_dir_path(table->data_dir_path);
- } else {
- /* This does not change SYS_DATAFILES or SYS_TABLES
- or FSP_FLAGS on the header page of the tablespace,
- but it makes dict_table_t consistent */
- table->flags &= ~DICT_TF_MASK_DATA_DIR;
- }
- mem_free(default_filepath);
-}
-
-/*****************************************************************//**
-Make sure the data_file_name is saved in dict_table_t if needed. Try to
-read it from the file dictionary first, then from SYS_DATAFILES. */
-UNIV_INTERN
-void
-dict_get_and_save_data_dir_path(
-/*============================*/
- dict_table_t* table, /*!< in/out: table */
- bool dict_mutex_own) /*!< in: true if dict_sys->mutex
- is owned already */
-{
- bool is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY);
-
- if (!is_temp && !table->data_dir_path && table->space) {
- char* path = fil_space_get_first_path(table->space);
-
- if (!dict_mutex_own) {
- dict_mutex_enter_for_mysql();
- }
- if (!path) {
- path = dict_get_first_path(
- table->space, table->name);
- }
-
- if (path) {
- table->flags |= (1 << DICT_TF_POS_DATA_DIR);
- dict_save_data_dir_path(table, path);
- mem_free(path);
- }
-
- if (!dict_mutex_own) {
- dict_mutex_exit_for_mysql();
- }
- }
-}
-
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
-the cluster definition if the table is a member in a cluster. Also loads
-all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table. Adds all these to the data
-dictionary cache.
-@return table, NULL if does not exist; if the table is stored in an
-.ibd file, but the file does not exist, then we set the
-ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
-dict_table_t*
-dict_load_table(
-/*============*/
- const char* name, /*!< in: table name in the
- databasename/tablename format */
- ibool cached, /*!< in: TRUE=add to cache, FALSE=do not */
- dict_err_ignore_t ignore_err)
- /*!< in: error to be ignored when loading
- table and its indexes' definition */
-{
- dberr_t err;
- dict_table_t* table;
- dict_table_t* sys_tables;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- char* filepath = NULL;
- const char* err_msg;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(32000);
-
- mtr_start(&mtr);
-
- sys_tables = dict_table_get_low("SYS_TABLES");
- sys_index = UT_LIST_GET_FIRST(sys_tables->indexes);
- ut_ad(!dict_table_is_comp(sys_tables));
- ut_ad(name_of_col_is(sys_tables, sys_index,
- DICT_FLD__SYS_TABLES__ID, "ID"));
- ut_ad(name_of_col_is(sys_tables, sys_index,
- DICT_FLD__SYS_TABLES__N_COLS, "N_COLS"));
- ut_ad(name_of_col_is(sys_tables, sys_index,
- DICT_FLD__SYS_TABLES__TYPE, "TYPE"));
- ut_ad(name_of_col_is(sys_tables, sys_index,
- DICT_FLD__SYS_TABLES__MIX_LEN, "MIX_LEN"));
- ut_ad(name_of_col_is(sys_tables, sys_index,
- DICT_FLD__SYS_TABLES__SPACE, "SPACE"));
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, name, ut_strlen(name));
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)
- || rec_get_deleted_flag(rec, 0)) {
- /* Not found */
-err_exit:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(NULL);
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
-
- /* Check if the table name in record is the searched one */
- if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) {
-
- goto err_exit;
- }
-
- err_msg = dict_load_table_low(name, rec, &table);
-
- if (err_msg) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", err_msg);
- goto err_exit;
- }
-
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(table_name, sizeof(table_name), name, FALSE);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- if (table->space == 0) {
- /* The system tablespace is always available. */
- } else if (table->flags2 & DICT_TF2_DISCARDED) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Table '%s' tablespace is set as discarded.",
- table_name);
-
- table->file_unreadable = true;
-
- } else if (!fil_space_for_table_exists_in_mem(
- table->space, name, false, IS_XTRABACKUP(), true, heap,
- table->id, table->flags)) {
-
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
- /* Do not bother to retry opening temporary tables. */
- table->file_unreadable = true;
-
- } else {
- if (!(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to find tablespace for "
- "table '%s' in the cache. "
- "Attempting to load the tablespace "
- "with space id %lu.",
- table_name, (ulong) table->space);
- }
-
- /* Use the remote filepath if needed. */
- /* This needs to be added to the table
- from SYS_DATAFILES */
- dict_get_and_save_data_dir_path(table, true);
-
- if (table->data_dir_path) {
- filepath = os_file_make_remote_pathname(
- table->data_dir_path,
- table->name, "ibd");
- }
-
- /* Try to open the tablespace. We set the
- 2nd param (fix_dict = false) here because we
- do not have an x-lock on dict_operation_lock */
- err = fil_open_single_table_tablespace(
- true, false, table->space,
- dict_tf_to_fsp_flags(table->flags),
- name, filepath);
-
- if (err != DB_SUCCESS) {
- /* We failed to find a sensible
- tablespace file */
-
- table->file_unreadable = true;
- }
-
- if (filepath) {
- mem_free(filepath);
- }
- }
- }
-
- dict_load_columns(table, heap);
-
- if (cached) {
- dict_table_add_to_cache(table, TRUE, heap);
- } else {
- dict_table_add_system_columns(table, heap);
- }
-
- mem_heap_empty(heap);
-
- /* If there is no tablespace for the table then we only need to
- load the index definitions. So that we can IMPORT the tablespace
- later. When recovering table locks for resurrected incomplete
- transactions, the tablespace should exist, because DDL operations
- were not allowed while the table is being locked by a transaction. */
- dict_err_ignore_t index_load_err =
- !(ignore_err & DICT_ERR_IGNORE_RECOVER_LOCK)
- && table->file_unreadable
- ? DICT_ERR_IGNORE_ALL
- : ignore_err;
-
- err = dict_load_indexes(table, heap, index_load_err);
-
- if (err == DB_INDEX_CORRUPT) {
- /* Refuse to load the table if the table has a corrupted
- cluster index */
- if (!srv_load_corrupted) {
- fprintf(stderr, "InnoDB: Error: Load table ");
- ut_print_name(stderr, NULL, TRUE, table->name);
- fprintf(stderr, " failed, the table has corrupted"
- " clustered indexes. Turn on"
- " 'innodb_force_load_corrupted'"
- " to drop it\n");
-
- dict_table_remove_from_cache(table);
- table = NULL;
- goto func_exit;
- } else {
- dict_index_t* clust_index;
- clust_index = dict_table_get_first_index(table);
-
- if (dict_index_is_corrupted(clust_index)) {
- table->corrupted = TRUE;
- }
- }
- }
-
- /* Initialize table foreign_child value. Its value could be
- changed when dict_load_foreigns() is called below */
- table->fk_max_recusive_level = 0;
-
- /* If the force recovery flag is set, we open the table irrespective
- of the error condition, since the user may want to dump data from the
- clustered index. However we load the foreign key information only if
- all indexes were loaded. */
- if (!cached || table->file_unreadable) {
- /* Don't attempt to load the indexes from disk. */
- } else if (err == DB_SUCCESS) {
- err = dict_load_foreigns(table->name, NULL, true, true,
- ignore_err);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Load table '%s' failed, the table has missing "
- "foreign key indexes. Turn off "
- "'foreign_key_checks' and try again.",
- table->name);
-
- dict_table_remove_from_cache(table);
- table = NULL;
- } else {
- table->fk_max_recusive_level = 0;
- }
- } else {
- dict_index_t* index;
-
- /* Make sure that at least the clustered index was loaded.
- Otherwise refuse to load the table */
- index = dict_table_get_first_index(table);
-
- if (!srv_force_recovery
- || !index
- || !dict_index_is_clust(index)) {
-
- dict_table_remove_from_cache(table);
- table = NULL;
-
- } else if (dict_index_is_corrupted(index)
- && !table->file_unreadable) {
-
- /* It is possible we force to load a corrupted
- clustered index if srv_load_corrupted is set.
- Mark the table as corrupted in this case */
- table->corrupted = true;
- }
- }
-
-func_exit:
- mem_heap_free(heap);
-
- ut_ad(!table
- || ignore_err != DICT_ERR_IGNORE_NONE
- || table->file_unreadable
- || !table->corrupted);
-
- if (table && table->fts) {
- if (!(dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID))) {
- /* the table->fts could be created in dict_load_column
- when a user defined FTS_DOC_ID is present, but no
- FTS */
- fts_optimize_remove_table(table);
- fts_free(table);
- } else {
- fts_optimize_add_table(table);
- }
- }
-
- ut_ad(err != DB_SUCCESS || dict_foreign_set_validate(*table));
-
- return(table);
-}
-
-/***********************************************************************//**
-Loads a table object based on the table id.
-@return table; NULL if table does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_load_table_on_id(
-/*==================*/
- table_id_t table_id, /*!< in: table id */
- dict_err_ignore_t ignore_err) /*!< in: errors to ignore
- when loading the table */
-{
- byte id_buf[8];
- btr_pcur_t pcur;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sys_table_ids;
- dict_table_t* sys_tables;
- const rec_t* rec;
- const byte* field;
- ulint len;
- dict_table_t* table;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = NULL;
-
- /* NOTE that the operation of this function is protected by
- the dictionary mutex, and therefore no deadlocks can occur
- with other dictionary operations. */
-
- mtr_start(&mtr);
- /*---------------------------------------------------*/
- /* Get the secondary index based on ID for table SYS_TABLES */
- sys_tables = dict_sys->sys_tables;
- sys_table_ids = dict_table_get_next_index(
- dict_table_get_first_index(sys_tables));
- ut_ad(!dict_table_is_comp(sys_tables));
- ut_ad(!dict_index_is_clust(sys_table_ids));
- heap = mem_heap_create(256);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- /* Write the table id in byte format to id_buf */
- mach_write_to_8(id_buf, table_id);
-
- dfield_set_data(dfield, id_buf, 8);
- dict_index_copy_types(tuple, sys_table_ids, 1);
-
- btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (page_rec_is_user_rec(rec)) {
- /*---------------------------------------------------*/
- /* Now we have the record in the secondary index
- containing the table ID and NAME */
-check_rec:
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLE_IDS__ID, &len);
- ut_ad(len == 8);
-
- /* Check if the table id in record is the one searched for */
- if (table_id == mach_read_from_8(field)) {
- if (rec_get_deleted_flag(rec, 0)) {
- /* Until purge has completed, there
- may be delete-marked duplicate records
- for the same SYS_TABLES.ID, but different
- SYS_TABLES.NAME. */
- while (btr_pcur_move_to_next(&pcur, &mtr)) {
- rec = btr_pcur_get_rec(&pcur);
-
- if (page_rec_is_user_rec(rec)) {
- goto check_rec;
- }
- }
- } else {
- /* Now we get the table name from the record */
- field = rec_get_nth_field_old(rec,
- DICT_FLD__SYS_TABLE_IDS__NAME, &len);
- /* Load the table definition to memory */
- table = dict_load_table(
- mem_heap_strdupl(
- heap, (char*) field, len),
- TRUE, ignore_err);
- }
- }
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
-
- return(table);
-}
-
-/***********************************************************************//**
-Loads a table id based on the index id.
-@return true if found */
-static
-bool
-dict_load_table_id_on_index_id(
-/*==================*/
- index_id_t index_id, /*!< in: index id */
- table_id_t* table_id) /*!< out: table id */
-{
- /* check hard coded indexes */
- switch(index_id) {
- case DICT_TABLES_ID:
- case DICT_COLUMNS_ID:
- case DICT_INDEXES_ID:
- case DICT_FIELDS_ID:
- *table_id = index_id;
- return true;
- case DICT_TABLE_IDS_ID:
- /* The following is a secondary index on SYS_TABLES */
- *table_id = DICT_TABLES_ID;
- return true;
- }
-
- bool found = false;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* NOTE that the operation of this function is protected by
- the dictionary mutex, and therefore no deadlocks can occur
- with other dictionary operations. */
-
- mtr_start(&mtr);
-
- btr_pcur_t pcur;
- const rec_t* rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
-
- while (rec) {
- ulint len;
- const byte* field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__ID, &len);
- ut_ad(len == 8);
-
- /* Check if the index id is the one searched for */
- if (index_id == mach_read_from_8(field)) {
- found = true;
- /* Now we get the table id */
- const byte* field = rec_get_nth_field_old(
- rec,
- DICT_FLD__SYS_INDEXES__TABLE_ID,
- &len);
- *table_id = mach_read_from_8(field);
- break;
- }
- mtr_commit(&mtr);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- return(found);
-}
-
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_index_id(
-/*==================*/
- index_id_t index_id, /*!< in: index id */
- bool dict_locked) /*!< in: dict locked */
-{
- if (!dict_locked) {
- mutex_enter(&dict_sys->mutex);
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
- table_id_t table_id;
- dict_table_t * table = NULL;
- if (dict_load_table_id_on_index_id(index_id, &table_id)) {
- bool local_dict_locked = true;
- table = dict_table_open_on_id(table_id,
- local_dict_locked,
- DICT_TABLE_OP_LOAD_TABLESPACE);
- }
-
- if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
- }
- return table;
-}
-
-/********************************************************************//**
-This function is called when the database is booted. Loads system table
-index definitions except for the clustered index which is added to the
-dictionary cache at booting before calling this function. */
-UNIV_INTERN
-void
-dict_load_sys_table(
-/*================*/
- dict_table_t* table) /*!< in: system table */
-{
- mem_heap_t* heap;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- heap = mem_heap_create(1000);
-
- dict_load_indexes(table, heap, DICT_ERR_IGNORE_NONE);
-
- mem_heap_free(heap);
-}
-
-/********************************************************************//**
-Loads foreign key constraint col names (also for the referenced table).
-Members that must be set (and valid) in foreign:
-foreign->heap
-foreign->n_fields
-foreign->id ('\0'-terminated)
-Members that will be created and set by this function:
-foreign->foreign_col_names[i]
-foreign->referenced_col_names[i]
-(for i=0..foreign->n_fields-1) */
-static
-void
-dict_load_foreign_cols(
-/*===================*/
- dict_foreign_t* foreign)/*!< in/out: foreign constraint object */
-{
- dict_table_t* sys_foreign_cols;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- ulint i;
- mtr_t mtr;
- size_t id_len;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- id_len = strlen(foreign->id);
-
- foreign->foreign_col_names = static_cast<const char**>(
- mem_heap_alloc(foreign->heap,
- foreign->n_fields * sizeof(void*)));
-
- foreign->referenced_col_names = static_cast<const char**>(
- mem_heap_alloc(foreign->heap,
- foreign->n_fields * sizeof(void*)));
-
- mtr_start(&mtr);
-
- sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS");
-
- sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes);
- ut_ad(!dict_table_is_comp(sys_foreign_cols));
-
- tuple = dtuple_create(foreign->heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, foreign->id, id_len);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- for (i = 0; i < foreign->n_fields; i++) {
-
- rec = btr_pcur_get_rec(&pcur);
-
- ut_a(btr_pcur_is_on_user_rec(&pcur));
- ut_a(!rec_get_deleted_flag(rec, 0));
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__ID, &len);
-
- if (len != id_len || ut_memcmp(foreign->id, field, len) != 0) {
- const rec_t* pos;
- ulint pos_len;
- const rec_t* for_col_name;
- ulint for_col_name_len;
- const rec_t* ref_col_name;
- ulint ref_col_name_len;
-
- pos = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__POS,
- &pos_len);
-
- for_col_name = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME,
- &for_col_name_len);
-
- ref_col_name = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME,
- &ref_col_name_len);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to load columns names for foreign "
- "key '%s' because it was not found in "
- "InnoDB internal table SYS_FOREIGN_COLS. The "
- "closest entry we found is: "
- "(ID='%.*s', POS=%lu, FOR_COL_NAME='%.*s', "
- "REF_COL_NAME='%.*s')",
- foreign->id,
- (int) len, field,
- mach_read_from_4(pos),
- (int) for_col_name_len, for_col_name,
- (int) ref_col_name_len, ref_col_name);
-
- ut_error;
- }
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__POS, &len);
- ut_a(len == 4);
- ut_a(i == mach_read_from_4(field));
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME, &len);
- foreign->foreign_col_names[i] = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME, &len);
- foreign->referenced_col_names[i] = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-}
-
-/***********************************************************************//**
-Loads a foreign key constraint to the dictionary cache.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull(1), warn_unused_result))
-dberr_t
-dict_load_foreign(
-/*==============*/
- const char* id,
- /*!< in: foreign constraint id, must be
- '\0'-terminated */
- const char** col_names,
- /*!< in: column names, or NULL
- to use foreign->foreign_table->col_names */
- bool check_recursive,
- /*!< in: whether to record the foreign table
- parent count to avoid unlimited recursive
- load of chained foreign tables */
- bool check_charsets,
- /*!< in: whether to check charset
- compatibility */
- dict_err_ignore_t ignore_err)
- /*!< in: error to be ignored */
-{
- dict_foreign_t* foreign;
- dict_table_t* sys_foreign;
- btr_pcur_t pcur;
- dict_index_t* sys_index;
- dtuple_t* tuple;
- mem_heap_t* heap2;
- dfield_t* dfield;
- const rec_t* rec;
- const byte* field;
- ulint len;
- ulint n_fields_and_type;
- mtr_t mtr;
- dict_table_t* for_table;
- dict_table_t* ref_table;
- size_t id_len;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- id_len = strlen(id);
-
- heap2 = mem_heap_create(1000);
-
- mtr_start(&mtr);
-
- sys_foreign = dict_table_get_low("SYS_FOREIGN");
-
- sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes);
- ut_ad(!dict_table_is_comp(sys_foreign));
-
- tuple = dtuple_create(heap2, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, id, id_len);
- dict_index_copy_types(tuple, sys_index, 1);
-
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)
- || rec_get_deleted_flag(rec, 0)) {
- /* Not found */
-
- fprintf(stderr,
- "InnoDB: Error: cannot load foreign constraint "
- "%s: could not find the relevant record in "
- "SYS_FOREIGN\n", id);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap2);
-
- return(DB_ERROR);
- }
-
- field = rec_get_nth_field_old(rec, DICT_FLD__SYS_FOREIGN__ID, &len);
-
- /* Check if the id in record is the searched one */
- if (len != id_len || ut_memcmp(id, field, len) != 0) {
-
- fprintf(stderr,
- "InnoDB: Error: cannot load foreign constraint "
- "%s: found %.*s instead in SYS_FOREIGN\n",
- id, (int) len, field);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap2);
-
- return(DB_ERROR);
- }
-
- /* Read the table names and the number of columns associated
- with the constraint */
-
- mem_heap_free(heap2);
-
- foreign = dict_mem_foreign_create();
-
- n_fields_and_type = mach_read_from_4(
- rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN__N_COLS, &len));
-
- ut_a(len == 4);
-
- /* We store the type in the bits 24..29 of n_fields_and_type. */
-
- foreign->type = (unsigned int) (n_fields_and_type >> 24);
- foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL);
-
- foreign->id = mem_heap_strdupl(foreign->heap, id, id_len);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN__FOR_NAME, &len);
-
- foreign->foreign_table_name = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
- dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN__REF_NAME, &len);
- foreign->referenced_table_name = mem_heap_strdupl(
- foreign->heap, (char*) field, len);
- dict_mem_referenced_table_name_lookup_set(foreign, TRUE);
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- dict_load_foreign_cols(foreign);
-
- ref_table = dict_table_check_if_in_cache_low(
- foreign->referenced_table_name_lookup);
-
- /* We could possibly wind up in a deep recursive calls if
- we call dict_table_get_low() again here if there
- is a chain of tables concatenated together with
- foreign constraints. In such case, each table is
- both a parent and child of the other tables, and
- act as a "link" in such table chains.
- To avoid such scenario, we would need to check the
- number of ancesters the current table has. If that
- exceeds DICT_FK_MAX_CHAIN_LEN, we will stop loading
- the child table.
- Foreign constraints are loaded in a Breath First fashion,
- that is, the index on FOR_NAME is scanned first, and then
- index on REF_NAME. So foreign constrains in which
- current table is a child (foreign table) are loaded first,
- and then those constraints where current table is a
- parent (referenced) table.
- Thus we could check the parent (ref_table) table's
- reference count (fk_max_recusive_level) to know how deep the
- recursive call is. If the parent table (ref_table) is already
- loaded, and its fk_max_recusive_level is larger than
- DICT_FK_MAX_CHAIN_LEN, we will stop the recursive loading
- by skipping loading the child table. It will not affect foreign
- constraint check for DMLs since child table will be loaded
- at that time for the constraint check. */
- if (!ref_table
- || ref_table->fk_max_recusive_level < DICT_FK_MAX_RECURSIVE_LOAD) {
-
- /* If the foreign table is not yet in the dictionary cache, we
- have to load it so that we are able to make type comparisons
- in the next function call. */
-
- for_table = dict_table_get_low(foreign->foreign_table_name_lookup);
-
- if (for_table && ref_table && check_recursive) {
- /* This is to record the longest chain of ancesters
- this table has, if the parent has more ancesters
- than this table has, record it after add 1 (for this
- parent */
- if (ref_table->fk_max_recusive_level
- >= for_table->fk_max_recusive_level) {
- for_table->fk_max_recusive_level =
- ref_table->fk_max_recusive_level + 1;
- }
- }
- }
-
- /* Note that there may already be a foreign constraint object in
- the dictionary cache for this constraint: then the following
- call only sets the pointers in it to point to the appropriate table
- and index objects and frees the newly created object foreign.
- Adding to the cache should always succeed since we are not creating
- a new foreign key constraint but loading one from the data
- dictionary. */
-
- return(dict_foreign_add_to_cache(foreign, col_names, check_charsets,
- ignore_err));
-}
-
-/***********************************************************************//**
-Loads foreign key constraints where the table is either the foreign key
-holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_load_foreigns(
-/*===============*/
- const char* table_name, /*!< in: table name */
- const char** col_names, /*!< in: column names, or NULL
- to use table->col_names */
- bool check_recursive,/*!< in: Whether to check
- recursive load of tables
- chained by FK */
- bool check_charsets, /*!< in: whether to check
- charset compatibility */
- dict_err_ignore_t ignore_err) /*!< in: error to be ignored */
-{
- ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1)
- / sizeof(ulint)];
- btr_pcur_t pcur;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sec_index;
- dict_table_t* sys_foreign;
- const rec_t* rec;
- const byte* field;
- ulint len;
- dberr_t err;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- sys_foreign = dict_table_get_low("SYS_FOREIGN");
-
- if (sys_foreign == NULL) {
- /* No foreign keys defined yet in this database */
-
- fprintf(stderr,
- "InnoDB: Error: no foreign key system tables"
- " in the database\n");
-
- return(DB_ERROR);
- }
-
- ut_ad(!dict_table_is_comp(sys_foreign));
- mtr_start(&mtr);
-
- /* Get the secondary index based on FOR_NAME from table
- SYS_FOREIGN */
-
- sec_index = dict_table_get_next_index(
- dict_table_get_first_index(sys_foreign));
- ut_ad(!dict_index_is_clust(sec_index));
-start_load:
-
- tuple = dtuple_create_from_mem(tuple_buf, sizeof(tuple_buf), 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(dfield, table_name, ut_strlen(table_name));
- dict_index_copy_types(tuple, sec_index, 1);
-
- btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-loop:
- rec = btr_pcur_get_rec(&pcur);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* End of index */
-
- goto load_next_index;
- }
-
- /* Now we have the record in the secondary index containing a table
- name and a foreign constraint ID */
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME, &len);
-
- /* Check if the table name in the record is the one searched for; the
- following call does the comparison in the latin1_swedish_ci
- charset-collation, in a case-insensitive way. */
-
- if (0 != cmp_data_data(dfield_get_type(dfield)->mtype,
- dfield_get_type(dfield)->prtype,
- static_cast<const byte*>(
- dfield_get_data(dfield)),
- dfield_get_len(dfield),
- field, len)) {
-
- goto load_next_index;
- }
-
- /* Since table names in SYS_FOREIGN are stored in a case-insensitive
- order, we have to check that the table name matches also in a binary
- string comparison. On Unix, MySQL allows table names that only differ
- in character case. If lower_case_table_names=2 then what is stored
- may not be the same case, but the previous comparison showed that they
- match with no-case. */
-
- if (rec_get_deleted_flag(rec, 0)) {
- goto next_rec;
- }
-
- if ((innobase_get_lower_case_table_names() != 2)
- && (0 != ut_memcmp(field, table_name, len))) {
- goto next_rec;
- }
-
- /* Now we get a foreign key constraint id */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_FOREIGN_FOR_NAME__ID, &len);
-
- /* Copy the string because the page may be modified or evicted
- after mtr_commit() below. */
- char fk_id[MAX_TABLE_NAME_LEN + 1];
-
- ut_a(len <= MAX_TABLE_NAME_LEN);
- memcpy(fk_id, field, len);
- fk_id[len] = '\0';
-
- btr_pcur_store_position(&pcur, &mtr);
-
- mtr_commit(&mtr);
-
- /* Load the foreign constraint definition to the dictionary cache */
-
- err = dict_load_foreign(fk_id, col_names,
- check_recursive, check_charsets, ignore_err);
-
- if (err != DB_SUCCESS) {
- btr_pcur_close(&pcur);
-
- return(err);
- }
-
- mtr_start(&mtr);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr);
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- goto loop;
-
-load_next_index:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- sec_index = dict_table_get_next_index(sec_index);
-
- if (sec_index != NULL) {
-
- mtr_start(&mtr);
-
- /* Switch to scan index on REF_NAME, fk_max_recusive_level
- already been updated when scanning FOR_NAME index, no need to
- update again */
- check_recursive = FALSE;
-
- goto start_load;
- }
-
- return(DB_SUCCESS);
-}
diff --git a/storage/xtradb/dict/dict0mem.cc b/storage/xtradb/dict/dict0mem.cc
deleted file mode 100644
index 125d7d78a1f..00000000000
--- a/storage/xtradb/dict/dict0mem.cc
+++ /dev/null
@@ -1,823 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file dict/dict0mem.cc
-Data dictionary memory object creation
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "dict0mem.h"
-
-#ifdef UNIV_NONINL
-#include "dict0mem.ic"
-#endif
-
-#include "rem0rec.h"
-#include "data0type.h"
-#include "mach0data.h"
-#include "dict0dict.h"
-#include "fts0priv.h"
-#include "ut0crc32.h"
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h" /* innobase_casedn_str(),
- innobase_get_lower_case_table_names */
-# include "mysql_com.h" /* NAME_LEN */
-# include "lock0lock.h"
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_BLOB_DEBUG
-# include "ut0rbt.h"
-#endif /* UNIV_BLOB_DEBUG */
-#include <iostream>
-
-#define DICT_HEAP_SIZE 100 /*!< initial memory heap size when
- creating a table or index object */
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register autoinc_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t autoinc_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/** System databases */
-static const char* innobase_system_databases[] = {
- "mysql/",
- "information_schema/",
- "performance_schema/",
- NullS
-};
-
-/** An interger randomly initialized at startup used to make a temporary
-table name as unique as possible. */
-static ib_uint32_t dict_temp_file_num;
-
-/**********************************************************************//**
-Creates a table memory object.
-@return own: table object */
-UNIV_INTERN
-dict_table_t*
-dict_mem_table_create(
-/*==================*/
- const char* name, /*!< in: table name */
- ulint space, /*!< in: space where the clustered index of
- the table is placed */
- ulint n_cols, /*!< in: number of columns */
- ulint flags, /*!< in: table flags */
- ulint flags2) /*!< in: table flags2 */
-{
- dict_table_t* table;
- mem_heap_t* heap;
-
- ut_ad(name);
- ut_a(dict_tf_is_valid(flags));
- ut_a(!(flags2 & ~DICT_TF2_BIT_MASK));
-
- heap = mem_heap_create(DICT_HEAP_SIZE);
-
- table = static_cast<dict_table_t*>(
- mem_heap_zalloc(heap, sizeof(dict_table_t)));
-
- table->heap = heap;
-
- table->flags = (unsigned int) flags;
- table->flags2 = (unsigned int) flags2;
- table->name = static_cast<char*>(ut_malloc(strlen(name) + 1));
- memcpy(table->name, name, strlen(name) + 1);
- table->is_system_db = dict_mem_table_is_system(table->name);
- table->space = (unsigned int) space;
- table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS);
-
- table->cols = static_cast<dict_col_t*>(
- mem_heap_alloc(heap,
- (n_cols + DATA_N_SYS_COLS)
- * sizeof(dict_col_t)));
-
- ut_d(table->magic_n = DICT_TABLE_MAGIC_N);
-
- /* true means that the stats latch will be enabled -
- dict_table_stats_lock() will not be noop. */
- dict_table_stats_latch_create(table, true);
-
-#ifndef UNIV_HOTBACKUP
- table->autoinc_lock = static_cast<ib_lock_t*>(
- mem_heap_alloc(heap, lock_get_size()));
-
- dict_table_autoinc_create_lazy(table);
-
- table->autoinc = 0;
-
- /* The number of transactions that are either waiting on the
- AUTOINC lock or have been granted the lock. */
- table->n_waiting_or_granted_auto_inc_locks = 0;
-
- /* If the table has an FTS index or we are in the process
- of building one, create the table->fts */
- if (dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
- table->fts = fts_create(table);
- table->fts->cache = fts_cache_create(table);
- } else {
- table->fts = NULL;
- }
-
-#endif /* !UNIV_HOTBACKUP */
-
- new(&table->foreign_set) dict_foreign_set();
- new(&table->referenced_set) dict_foreign_set();
-
- return(table);
-}
-
-/****************************************************************//**
-Determines if a table belongs to a system database
-@return */
-UNIV_INTERN
-bool
-dict_mem_table_is_system(
-/*================*/
- char *name) /*!< in: table name */
-{
- ut_ad(name);
-
- /* table has the following format: database/table
- and some system table are of the form SYS_* */
- if (strchr(name, '/')) {
- int table_len = strlen(name);
- const char *system_db;
- int i = 0;
- while ((system_db = innobase_system_databases[i++])
- && (system_db != NullS)) {
- int len = strlen(system_db);
- if (table_len > len && !strncmp(name, system_db, len)) {
- return true;
- }
- }
- return false;
- } else {
- return true;
- }
-}
-
-/****************************************************************//**
-Free a table memory object. */
-UNIV_INTERN
-void
-dict_mem_table_free(
-/*================*/
- dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_d(table->cached = FALSE);
-
- if (dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
- if (table->fts) {
- if (table->cached) {
- fts_optimize_remove_table(table);
- }
-
- fts_free(table);
- }
- }
-#ifndef UNIV_HOTBACKUP
- dict_table_autoinc_destroy(table);
-#endif /* UNIV_HOTBACKUP */
-
- dict_table_stats_latch_destroy(table);
-
- table->foreign_set.~dict_foreign_set();
- table->referenced_set.~dict_foreign_set();
-
- ut_free(table->name);
- mem_heap_free(table->heap);
-}
-
-/****************************************************************//**
-Append 'name' to 'col_names'. @see dict_table_t::col_names
-@return new column names array */
-static
-const char*
-dict_add_col_name(
-/*==============*/
- const char* col_names, /*!< in: existing column names, or
- NULL */
- ulint cols, /*!< in: number of existing columns */
- const char* name, /*!< in: new column name */
- mem_heap_t* heap) /*!< in: heap */
-{
- ulint old_len;
- ulint new_len;
- ulint total_len;
- char* res;
-
- ut_ad(!cols == !col_names);
-
- /* Find out length of existing array. */
- if (col_names) {
- const char* s = col_names;
- ulint i;
-
- for (i = 0; i < cols; i++) {
- s += strlen(s) + 1;
- }
-
- old_len = s - col_names;
- } else {
- old_len = 0;
- }
-
- new_len = strlen(name) + 1;
- total_len = old_len + new_len;
-
- res = static_cast<char*>(mem_heap_alloc(heap, total_len));
-
- if (old_len > 0) {
- memcpy(res, col_names, old_len);
- }
-
- memcpy(res + old_len, name, new_len);
-
- return(res);
-}
-
-/**********************************************************************//**
-Adds a column definition to a table. */
-UNIV_INTERN
-void
-dict_mem_table_add_col(
-/*===================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
- const char* name, /*!< in: column name, or NULL */
- ulint mtype, /*!< in: main datatype */
- ulint prtype, /*!< in: precise type */
- ulint len) /*!< in: precision */
-{
- dict_col_t* col;
- ulint i;
-
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(!heap == !name);
-
- i = table->n_def++;
-
- if (name) {
- if (UNIV_UNLIKELY(table->n_def == table->n_cols)) {
- heap = table->heap;
- }
- if (UNIV_LIKELY(i != 0) && UNIV_UNLIKELY(table->col_names == NULL)) {
- /* All preceding column names are empty. */
- char* s = static_cast<char*>(
- mem_heap_zalloc(heap, table->n_def));
-
- table->col_names = s;
- }
-
- table->col_names = dict_add_col_name(table->col_names,
- i, name, heap);
- }
-
- col = dict_table_get_nth_col(table, i);
-
- dict_mem_fill_column_struct(col, i, mtype, prtype, len);
-}
-
-/**********************************************************************//**
-Renames a column of a table in the data dictionary cache. */
-static MY_ATTRIBUTE((nonnull))
-void
-dict_mem_table_col_rename_low(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- unsigned i, /*!< in: column offset corresponding to s */
- const char* to, /*!< in: new column name */
- const char* s) /*!< in: pointer to table->col_names */
-{
- size_t from_len = strlen(s), to_len = strlen(to);
-
- ut_ad(i < table->n_def);
- ut_ad(from_len <= NAME_LEN);
- ut_ad(to_len <= NAME_LEN);
-
- char from[NAME_LEN + 1];
- strncpy(from, s, NAME_LEN + 1);
-
- if (from_len == to_len) {
- /* The easy case: simply replace the column name in
- table->col_names. */
- strcpy(const_cast<char*>(s), to);
- } else {
- /* We need to adjust all affected index->field
- pointers, as in dict_index_add_col(). First, copy
- table->col_names. */
- ulint prefix_len = s - table->col_names;
-
- for (; i < table->n_def; i++) {
- s += strlen(s) + 1;
- }
-
- ulint full_len = s - table->col_names;
- char* col_names;
-
- if (to_len > from_len) {
- col_names = static_cast<char*>(
- mem_heap_alloc(
- table->heap,
- full_len + to_len - from_len));
-
- memcpy(col_names, table->col_names, prefix_len);
- } else {
- col_names = const_cast<char*>(table->col_names);
- }
-
- memcpy(col_names + prefix_len, to, to_len);
- memmove(col_names + prefix_len + to_len,
- table->col_names + (prefix_len + from_len),
- full_len - (prefix_len + from_len));
-
- /* Replace the field names in every index. */
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- ulint n_fields = dict_index_get_n_fields(index);
-
- for (ulint i = 0; i < n_fields; i++) {
- dict_field_t* field
- = dict_index_get_nth_field(
- index, i);
- ulint name_ofs
- = field->name - table->col_names;
- if (name_ofs <= prefix_len) {
- field->name = col_names + name_ofs;
- } else {
- ut_a(name_ofs < full_len);
- field->name = col_names
- + name_ofs + to_len - from_len;
- }
- }
- }
-
- table->col_names = col_names;
- }
-
- dict_foreign_t* foreign;
-
- /* Replace the field names in every foreign key constraint. */
- for (dict_foreign_set::iterator it = table->foreign_set.begin();
- it != table->foreign_set.end();
- ++it) {
-
- foreign = *it;
-
- if (foreign->foreign_index == NULL) {
- /* We may go here when we set foreign_key_checks to 0,
- and then try to rename a column and modify the
- corresponding foreign key constraint. The index
- would have been dropped, we have to find an equivalent
- one */
- for (unsigned f = 0; f < foreign->n_fields; f++) {
- if (strcmp(foreign->foreign_col_names[f], from)
- == 0) {
-
- char** rc = const_cast<char**>(
- foreign->foreign_col_names
- + f);
-
- if (to_len <= strlen(*rc)) {
- memcpy(*rc, to, to_len + 1);
- } else {
- *rc = static_cast<char*>(
- mem_heap_dup(
- foreign->heap,
- to,
- to_len + 1));
- }
- }
- }
-
- dict_index_t* new_index = dict_foreign_find_index(
- foreign->foreign_table, NULL,
- foreign->foreign_col_names,
- foreign->n_fields, NULL, true, false,
- NULL, NULL, NULL);
- /* There must be an equivalent index in this case. */
- ut_ad(new_index != NULL);
-
- foreign->foreign_index = new_index;
-
- } else {
-
- for (unsigned f = 0; f < foreign->n_fields; f++) {
- /* These can point straight to
- table->col_names, because the foreign key
- constraints will be freed at the same time
- when the table object is freed. */
- foreign->foreign_col_names[f]
- = dict_index_get_nth_field(
- foreign->foreign_index,
- f)->name;
- }
- }
- }
-
- for (dict_foreign_set::iterator it = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- ut_ad(foreign->referenced_index != NULL);
-
- for (unsigned f = 0; f < foreign->n_fields; f++) {
- /* foreign->referenced_col_names[] need to be
- copies, because the constraint may become
- orphan when foreign_key_checks=0 and the
- parent table is dropped. */
-
- const char* col_name = dict_index_get_nth_field(
- foreign->referenced_index, f)->name;
-
- if (strcmp(foreign->referenced_col_names[f],
- col_name)) {
- char** rc = const_cast<char**>(
- foreign->referenced_col_names + f);
- size_t col_name_len_1 = strlen(col_name) + 1;
-
- if (col_name_len_1 <= strlen(*rc) + 1) {
- memcpy(*rc, col_name, col_name_len_1);
- } else {
- *rc = static_cast<char*>(
- mem_heap_dup(
- foreign->heap,
- col_name,
- col_name_len_1));
- }
- }
- }
- }
-}
-
-/**********************************************************************//**
-Renames a column of a table in the data dictionary cache. */
-UNIV_INTERN
-void
-dict_mem_table_col_rename(
-/*======================*/
- dict_table_t* table, /*!< in/out: table */
- unsigned nth_col,/*!< in: column index */
- const char* from, /*!< in: old column name */
- const char* to) /*!< in: new column name */
-{
- const char* s = table->col_names;
-
- ut_ad(nth_col < table->n_def);
-
- for (unsigned i = 0; i < nth_col; i++) {
- size_t len = strlen(s);
- ut_ad(len > 0);
- s += len + 1;
- }
-
- /* This could fail if the data dictionaries are out of sync.
- Proceed with the renaming anyway. */
- ut_ad(!strcmp(from, s));
-
- dict_mem_table_col_rename_low(table, nth_col, to, s);
-}
-
-/**********************************************************************//**
-This function populates a dict_col_t memory structure with
-supplied information. */
-UNIV_INTERN
-void
-dict_mem_fill_column_struct(
-/*========================*/
- dict_col_t* column, /*!< out: column struct to be
- filled */
- ulint col_pos, /*!< in: column position */
- ulint mtype, /*!< in: main data type */
- ulint prtype, /*!< in: precise type */
- ulint col_len) /*!< in: column length */
-{
-#ifndef UNIV_HOTBACKUP
- ulint mbminlen;
- ulint mbmaxlen;
-#endif /* !UNIV_HOTBACKUP */
-
- column->ind = (unsigned int) col_pos;
- column->ord_part = 0;
- column->max_prefix = 0;
- column->mtype = (unsigned int) mtype;
- column->prtype = (unsigned int) prtype;
- column->len = (unsigned int) col_len;
-#ifndef UNIV_HOTBACKUP
- dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen);
- dict_col_set_mbminmaxlen(column, mbminlen, mbmaxlen);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/**********************************************************************//**
-Creates an index memory object.
-@return own: index object */
-UNIV_INTERN
-dict_index_t*
-dict_mem_index_create(
-/*==================*/
- const char* table_name, /*!< in: table name */
- const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
- ulint type, /*!< in: DICT_UNIQUE,
- DICT_CLUSTERED, ... ORed */
- ulint n_fields) /*!< in: number of fields */
-{
- dict_index_t* index;
- mem_heap_t* heap;
-
- ut_ad(table_name && index_name);
-
- heap = mem_heap_create(DICT_HEAP_SIZE);
-
- index = static_cast<dict_index_t*>(
- mem_heap_zalloc(heap, sizeof(*index)));
-
- dict_mem_fill_index_struct(index, heap, table_name, index_name,
- space, type, n_fields);
-
- dict_index_zip_pad_mutex_create_lazy(index);
- return(index);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Creates and initializes a foreign constraint memory object.
-@return own: foreign constraint struct */
-UNIV_INTERN
-dict_foreign_t*
-dict_mem_foreign_create(void)
-/*=========================*/
-{
- dict_foreign_t* foreign;
- mem_heap_t* heap;
-
- heap = mem_heap_create(100);
-
- foreign = static_cast<dict_foreign_t*>(
- mem_heap_zalloc(heap, sizeof(dict_foreign_t)));
-
- foreign->heap = heap;
-
- return(foreign);
-}
-
-/**********************************************************************//**
-Sets the foreign_table_name_lookup pointer based on the value of
-lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup
-will point to foreign_table_name. If 2, then another string is
-allocated from foreign->heap and set to lower case. */
-UNIV_INTERN
-void
-dict_mem_foreign_table_name_lookup_set(
-/*===================================*/
- dict_foreign_t* foreign, /*!< in/out: foreign struct */
- ibool do_alloc) /*!< in: is an alloc needed */
-{
- if (innobase_get_lower_case_table_names() == 2) {
- if (do_alloc) {
- ulint len;
-
- len = strlen(foreign->foreign_table_name) + 1;
-
- foreign->foreign_table_name_lookup =
- static_cast<char*>(
- mem_heap_alloc(foreign->heap, len));
- }
- strcpy(foreign->foreign_table_name_lookup,
- foreign->foreign_table_name);
- innobase_casedn_str(foreign->foreign_table_name_lookup);
- } else {
- foreign->foreign_table_name_lookup
- = foreign->foreign_table_name;
- }
-}
-
-/**********************************************************************//**
-Sets the referenced_table_name_lookup pointer based on the value of
-lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup
-will point to referenced_table_name. If 2, then another string is
-allocated from foreign->heap and set to lower case. */
-UNIV_INTERN
-void
-dict_mem_referenced_table_name_lookup_set(
-/*======================================*/
- dict_foreign_t* foreign, /*!< in/out: foreign struct */
- ibool do_alloc) /*!< in: is an alloc needed */
-{
- if (innobase_get_lower_case_table_names() == 2) {
- if (do_alloc) {
- ulint len;
-
- len = strlen(foreign->referenced_table_name) + 1;
-
- foreign->referenced_table_name_lookup =
- static_cast<char*>(
- mem_heap_alloc(foreign->heap, len));
- }
- strcpy(foreign->referenced_table_name_lookup,
- foreign->referenced_table_name);
- innobase_casedn_str(foreign->referenced_table_name_lookup);
- } else {
- foreign->referenced_table_name_lookup
- = foreign->referenced_table_name;
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Adds a field definition to an index. NOTE: does not take a copy
-of the column name if the field is a column. The memory occupied
-by the column name may be released only after publishing the index. */
-UNIV_INTERN
-void
-dict_mem_index_add_field(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- const char* name, /*!< in: column name */
- ulint prefix_len) /*!< in: 0 or the column prefix length
- in a MySQL index like
- INDEX (textcol(25)) */
-{
- dict_field_t* field;
-
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->n_def++;
-
- field = dict_index_get_nth_field(index, index->n_def - 1);
-
- field->name = name;
- field->prefix_len = (unsigned int) prefix_len;
-}
-
-/**********************************************************************//**
-Frees an index memory object. */
-UNIV_INTERN
-void
-dict_mem_index_free(
-/*================*/
- dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-#ifdef UNIV_BLOB_DEBUG
- if (index->blobs) {
- mutex_free(&index->blobs_mutex);
- rbt_free(index->blobs);
- }
-#endif /* UNIV_BLOB_DEBUG */
-
- dict_index_zip_pad_mutex_destroy(index);
-
- mem_heap_free(index->heap);
-}
-
-/** Create a temporary tablename like "#sql-ibtid-inc where
- tid = the Table ID
- inc = a randomly initialized number that is incremented for each file
-The table ID is a 64 bit integer, can use up to 20 digits, and is
-initialized at bootstrap. The second number is 32 bits, can use up to 10
-digits, and is initialized at startup to a randomly distributed number.
-It is hoped that the combination of these two numbers will provide a
-reasonably unique temporary file name.
-@param[in] heap A memory heap
-@param[in] dbtab Table name in the form database/table name
-@param[in] id Table id
-@return A unique temporary tablename suitable for InnoDB use */
-UNIV_INTERN
-char*
-dict_mem_create_temporary_tablename(
- mem_heap_t* heap,
- const char* dbtab,
- table_id_t id)
-{
- size_t size;
- char* name;
- const char* dbend = strchr(dbtab, '/');
- ut_ad(dbend);
- size_t dblen = dbend - dbtab + 1;
-
-#ifdef HAVE_ATOMIC_BUILTINS
- /* Increment a randomly initialized number for each temp file. */
- os_atomic_increment_uint32(&dict_temp_file_num, 1);
-#else /* HAVE_ATOMIC_BUILTINS */
- dict_temp_file_num++;
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- size = tmp_file_prefix_length + 3 + 20 + 1 + 10 + dblen;
- name = static_cast<char*>(mem_heap_alloc(heap, size));
- memcpy(name, dbtab, dblen);
- ut_snprintf(name + dblen, size - dblen,
- TEMP_FILE_PREFIX_INNODB UINT64PF "-" UINT32PF,
- id, dict_temp_file_num);
-
- return(name);
-}
-
-/** Initialize dict memory variables */
-
-void
-dict_mem_init(void)
-{
- /* Initialize a randomly distributed temporary file number */
- ib_uint32_t now = static_cast<ib_uint32_t>(ut_time());
-
- const byte* buf = reinterpret_cast<const byte*>(&now);
- ut_ad(ut_crc32 != NULL);
-
- dict_temp_file_num = ut_crc32(buf, sizeof(now));
-
- DBUG_PRINT("dict_mem_init",
- ("Starting Temporary file number is " UINT32PF,
- dict_temp_file_num));
-}
-
-/** Validate the search order in the foreign key set.
-@param[in] fk_set the foreign key set to be validated
-@return true if search order is fine in the set, false otherwise. */
-bool
-dict_foreign_set_validate(
- const dict_foreign_set& fk_set)
-{
- dict_foreign_not_exists not_exists(fk_set);
-
- dict_foreign_set::const_iterator it = std::find_if(
- fk_set.begin(), fk_set.end(), not_exists);
-
- if (it == fk_set.end()) {
- return(true);
- }
-
- dict_foreign_t* foreign = *it;
- std::cerr << "Foreign key lookup failed: " << *foreign;
- std::cerr << fk_set;
- ut_ad(0);
- return(false);
-}
-
-/** Validate the search order in the foreign key sets of the table
-(foreign_set and referenced_set).
-@param[in] table table whose foreign key sets are to be validated
-@return true if foreign key sets are fine, false otherwise. */
-bool
-dict_foreign_set_validate(
- const dict_table_t& table)
-{
- return(dict_foreign_set_validate(table.foreign_set)
- && dict_foreign_set_validate(table.referenced_set));
-}
-
-std::ostream&
-operator<< (std::ostream& out, const dict_foreign_t& foreign)
-{
- out << "[dict_foreign_t: id='" << foreign.id << "'";
-
- if (foreign.foreign_table_name != NULL) {
- out << ",for: '" << foreign.foreign_table_name << "'";
- }
-
- out << "]";
- return(out);
-}
-
-std::ostream&
-operator<< (std::ostream& out, const dict_foreign_set& fk_set)
-{
- out << "[dict_foreign_set:";
- std::for_each(fk_set.begin(), fk_set.end(), dict_foreign_print(out));
- out << "]" << std::endl;
- return(out);
-}
-
diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc
deleted file mode 100644
index c1463e98ce0..00000000000
--- a/storage/xtradb/dict/dict0stats.cc
+++ /dev/null
@@ -1,4463 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2009, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dict/dict0stats.cc
-Code used for calculating and manipulating table statistics.
-
-Created Jan 06, 2010 Vasil Dimov
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-
-#include "univ.i"
-
-#include "btr0btr.h" /* btr_get_size() */
-#include "btr0cur.h" /* btr_estimate_number_of_different_key_vals() */
-#include "dict0dict.h" /* dict_table_get_first_index(), dict_fs2utf8() */
-#include "dict0mem.h" /* DICT_TABLE_MAGIC_N */
-#include "dict0stats.h"
-#include "data0type.h" /* dtype_t */
-#include "db0err.h" /* dberr_t */
-#include "page0page.h" /* page_align() */
-#include "pars0pars.h" /* pars_info_create() */
-#include "pars0types.h" /* pars_info_t */
-#include "que0que.h" /* que_eval_sql() */
-#include "rem0cmp.h" /* REC_MAX_N_FIELDS,cmp_rec_rec_with_match() */
-#include "row0sel.h" /* sel_node_t */
-#include "row0types.h" /* sel_node_t */
-#include "trx0trx.h" /* trx_create() */
-#include "trx0roll.h" /* trx_rollback_to_savepoint() */
-#include "ut0rnd.h" /* ut_rnd_interval() */
-#include "ut0ut.h" /* ut_format_name(), ut_time() */
-
-#include <algorithm>
-#include <map>
-#include <vector>
-
-/* Sampling algorithm description @{
-
-The algorithm is controlled by one number - N_SAMPLE_PAGES(index),
-let it be A, which is the number of leaf pages to analyze for a given index
-for each n-prefix (if the index is on 3 columns, then 3*A leaf pages will be
-analyzed).
-
-Let the total number of leaf pages in the table be T.
-Level 0 - leaf pages, level H - root.
-
-Definition: N-prefix-boring record is a record on a non-leaf page that equals
-the next (to the right, cross page boundaries, skipping the supremum and
-infimum) record on the same level when looking at the fist n-prefix columns.
-The last (user) record on a level is not boring (it does not match the
-non-existent user record to the right). We call the records boring because all
-the records on the page below a boring record are equal to that boring record.
-
-We avoid diving below boring records when searching for a leaf page to
-estimate the number of distinct records because we know that such a leaf
-page will have number of distinct records == 1.
-
-For each n-prefix: start from the root level and full scan subsequent lower
-levels until a level that contains at least A*10 distinct records is found.
-Lets call this level LA.
-As an optimization the search is canceled if it has reached level 1 (never
-descend to the level 0 (leaf)) and also if the next level to be scanned
-would contain more than A pages. The latter is because the user has asked
-to analyze A leaf pages and it does not make sense to scan much more than
-A non-leaf pages with the sole purpose of finding a good sample of A leaf
-pages.
-
-After finding the appropriate level LA with >A*10 distinct records (or less in
-the exceptions described above), divide it into groups of equal records and
-pick A such groups. Then pick the last record from each group. For example,
-let the level be:
-
-index: 0,1,2,3,4,5,6,7,8,9,10
-record: 1,1,1,2,2,7,7,7,7,7,9
-
-There are 4 groups of distinct records and if A=2 random ones are selected,
-e.g. 1,1,1 and 7,7,7,7,7, then records with indexes 2 and 9 will be selected.
-
-After selecting A records as described above, dive below them to find A leaf
-pages and analyze them, finding the total number of distinct records. The
-dive to the leaf level is performed by selecting a non-boring record from
-each page and diving below it.
-
-This way, a total of A leaf pages are analyzed for the given n-prefix.
-
-Let the number of different key values found in each leaf page i be Pi (i=1..A).
-Let N_DIFF_AVG_LEAF be (P1 + P2 + ... + PA) / A.
-Let the number of different key values on level LA be N_DIFF_LA.
-Let the total number of records on level LA be TOTAL_LA.
-Let R be N_DIFF_LA / TOTAL_LA, we assume this ratio is the same on the
-leaf level.
-Let the number of leaf pages be N.
-Then the total number of different key values on the leaf level is:
-N * R * N_DIFF_AVG_LEAF.
-See REF01 for the implementation.
-
-The above describes how to calculate the cardinality of an index.
-This algorithm is executed for each n-prefix of a multi-column index
-where n=1..n_uniq.
-@} */
-
-/* names of the tables from the persistent statistics storage */
-#define TABLE_STATS_NAME "mysql/innodb_table_stats"
-#define TABLE_STATS_NAME_PRINT "mysql.innodb_table_stats"
-#define INDEX_STATS_NAME "mysql/innodb_index_stats"
-#define INDEX_STATS_NAME_PRINT "mysql.innodb_index_stats"
-
-#ifdef UNIV_STATS_DEBUG
-#define DEBUG_PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__)
-#else /* UNIV_STATS_DEBUG */
-#define DEBUG_PRINTF(fmt, ...) /* noop */
-#endif /* UNIV_STATS_DEBUG */
-
-/* Gets the number of leaf pages to sample in persistent stats estimation */
-#define N_SAMPLE_PAGES(index) \
- static_cast<ib_uint64_t>( \
- (index)->table->stats_sample_pages != 0 \
- ? (index)->table->stats_sample_pages \
- : srv_stats_persistent_sample_pages)
-
-/* number of distinct records on a given level that are required to stop
-descending to lower levels and fetch N_SAMPLE_PAGES(index) records
-from that level */
-#define N_DIFF_REQUIRED(index) (N_SAMPLE_PAGES(index) * 10)
-
-/* A dynamic array where we store the boundaries of each distinct group
-of keys. For example if a btree level is:
-index: 0,1,2,3,4,5,6,7,8,9,10,11,12
-data: b,b,b,b,b,b,g,g,j,j,j, x, y
-then we would store 5,7,10,11,12 in the array. */
-typedef std::vector<ib_uint64_t> boundaries_t;
-
-/* This is used to arrange the index based on the index name.
-@return true if index_name1 is smaller than index_name2. */
-struct index_cmp
-{
- bool operator()(const char* index_name1, const char* index_name2) const {
- return(strcmp(index_name1, index_name2) < 0);
- }
-};
-
-typedef std::map<const char*, dict_index_t*, index_cmp> index_map_t;
-
-/*********************************************************************//**
-Checks whether an index should be ignored in stats manipulations:
-* stats fetch
-* stats recalc
-* stats save
-@return true if exists and all tables are ok */
-UNIV_INLINE
-bool
-dict_stats_should_ignore_index(
-/*===========================*/
- const dict_index_t* index) /*!< in: index */
-{
- return((index->type & DICT_FTS)
- || dict_index_is_corrupted(index)
- || index->to_be_dropped
- || *index->name == TEMP_INDEX_PREFIX);
-}
-
-/*********************************************************************//**
-Checks whether the persistent statistics storage exists and that all
-tables have the proper structure.
-@return true if exists and all tables are ok */
-static
-bool
-dict_stats_persistent_storage_check(
-/*================================*/
- bool caller_has_dict_sys_mutex) /*!< in: true if the caller
- owns dict_sys->mutex */
-{
- /* definition for the table TABLE_STATS_NAME */
- dict_col_meta_t table_stats_columns[] = {
- {"database_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192},
-
- {"table_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192},
-
- {"last_update", DATA_FIXBINARY,
- DATA_NOT_NULL, 4},
-
- {"n_rows", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED, 8},
-
- {"clustered_index_size", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED, 8},
-
- {"sum_of_other_index_sizes", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED, 8}
- };
- dict_table_schema_t table_stats_schema = {
- TABLE_STATS_NAME,
- UT_ARR_SIZE(table_stats_columns),
- table_stats_columns,
- 0 /* n_foreign */,
- 0 /* n_referenced */
- };
-
- /* definition for the table INDEX_STATS_NAME */
- dict_col_meta_t index_stats_columns[] = {
- {"database_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192},
-
- {"table_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192},
-
- {"index_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 192},
-
- {"last_update", DATA_FIXBINARY,
- DATA_NOT_NULL, 4},
-
- {"stat_name", DATA_VARMYSQL,
- DATA_NOT_NULL, 64*3},
-
- {"stat_value", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED, 8},
-
- {"sample_size", DATA_INT,
- DATA_UNSIGNED, 8},
-
- {"stat_description", DATA_VARMYSQL,
- DATA_NOT_NULL, 1024*3}
- };
- dict_table_schema_t index_stats_schema = {
- INDEX_STATS_NAME,
- UT_ARR_SIZE(index_stats_columns),
- index_stats_columns,
- 0 /* n_foreign */,
- 0 /* n_referenced */
- };
-
- char errstr[512];
- dberr_t ret;
-
- if (!caller_has_dict_sys_mutex) {
- mutex_enter(&(dict_sys->mutex));
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- /* first check table_stats */
- ret = dict_table_schema_check(&table_stats_schema, errstr,
- sizeof(errstr));
- if (ret == DB_SUCCESS) {
- /* if it is ok, then check index_stats */
- ret = dict_table_schema_check(&index_stats_schema, errstr,
- sizeof(errstr));
- }
-
- if (!caller_has_dict_sys_mutex) {
- mutex_exit(&(dict_sys->mutex));
- }
-
- if (ret != DB_SUCCESS && ret != DB_STATS_DO_NOT_EXIST) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: %s\n", errstr);
- return(false);
- } else if (ret == DB_STATS_DO_NOT_EXIST) {
- return false;
- }
- /* else */
-
- return(true);
-}
-
-/** Executes a given SQL statement using the InnoDB internal SQL parser.
-This function will free the pinfo object.
-@param[in,out] pinfo pinfo to pass to que_eval_sql() must already
-have any literals bound to it
-@param[in] sql SQL string to execute
-@param[in,out] trx in case of NULL the function will allocate and
-free the trx object. If it is not NULL then it will be rolled back
-only in the case of error, but not freed.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-dict_stats_exec_sql(
- pars_info_t* pinfo,
- const char* sql,
- trx_t* trx)
-{
- dberr_t err;
- bool trx_started = false;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&dict_sys->mutex));
-
- if (!dict_stats_persistent_storage_check(true)) {
- pars_info_free(pinfo);
- return(DB_STATS_DO_NOT_EXIST);
- }
-
- if (trx == NULL) {
- trx = trx_allocate_for_background();
- trx_start_if_not_started(trx);
- trx_started = true;
- }
-
- err = que_eval_sql(pinfo, sql, FALSE, trx); /* pinfo is freed here */
-
- DBUG_EXECUTE_IF("stats_index_error",
- if (!trx_started) {
- err = DB_STATS_DO_NOT_EXIST;
- trx->error_state = DB_STATS_DO_NOT_EXIST;
- });
-
- if (!trx_started && err == DB_SUCCESS) {
- return(DB_SUCCESS);
- }
-
- if (err == DB_SUCCESS) {
- trx_commit_for_mysql(trx);
- } else {
- trx->op_info = "rollback of internal trx on stats tables";
- trx->dict_operation_lock_mode = RW_X_LATCH;
- trx_rollback_to_savepoint(trx, NULL);
- trx->dict_operation_lock_mode = 0;
- trx->op_info = "";
- ut_a(trx->error_state == DB_SUCCESS);
- }
-
- if (trx_started) {
- trx_free_for_background(trx);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Duplicate a table object and its indexes.
-This function creates a dummy dict_table_t object and initializes the
-following table and index members:
-dict_table_t::id (copied)
-dict_table_t::heap (newly created)
-dict_table_t::name (copied)
-dict_table_t::corrupted (copied)
-dict_table_t::indexes<> (newly created)
-dict_table_t::magic_n
-for each entry in dict_table_t::indexes, the following are initialized:
-(indexes that have DICT_FTS set in index->type are skipped)
-dict_index_t::id (copied)
-dict_index_t::name (copied)
-dict_index_t::table_name (points to the copied table name)
-dict_index_t::table (points to the above semi-initialized object)
-dict_index_t::type (copied)
-dict_index_t::to_be_dropped (copied)
-dict_index_t::online_status (copied)
-dict_index_t::n_uniq (copied)
-dict_index_t::fields[] (newly created, only first n_uniq, only fields[i].name)
-dict_index_t::indexes<> (newly created)
-dict_index_t::stat_n_diff_key_vals[] (only allocated, left uninitialized)
-dict_index_t::stat_n_sample_sizes[] (only allocated, left uninitialized)
-dict_index_t::stat_n_non_null_key_vals[] (only allocated, left uninitialized)
-dict_index_t::magic_n
-The returned object should be freed with dict_stats_table_clone_free()
-when no longer needed.
-@return incomplete table object */
-static
-dict_table_t*
-dict_stats_table_clone_create(
-/*==========================*/
- const dict_table_t* table) /*!< in: table whose stats to copy */
-{
- size_t heap_size;
- dict_index_t* index;
-
- /* Estimate the size needed for the table and all of its indexes */
-
- heap_size = 0;
- heap_size += sizeof(dict_table_t);
- heap_size += strlen(table->name) + 1;
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- if (dict_stats_should_ignore_index(index)) {
- continue;
- }
-
- ut_ad(!dict_index_is_univ(index));
-
- ulint n_uniq = dict_index_get_n_unique(index);
-
- heap_size += sizeof(dict_index_t);
- heap_size += strlen(index->name) + 1;
- heap_size += n_uniq * sizeof(index->fields[0]);
- for (ulint i = 0; i < n_uniq; i++) {
- heap_size += strlen(index->fields[i].name) + 1;
- }
- heap_size += n_uniq * sizeof(index->stat_n_diff_key_vals[0]);
- heap_size += n_uniq * sizeof(index->stat_n_sample_sizes[0]);
- heap_size += n_uniq * sizeof(index->stat_n_non_null_key_vals[0]);
- }
-
- /* Allocate the memory and copy the members */
-
- mem_heap_t* heap;
-
- heap = mem_heap_create(heap_size);
-
- dict_table_t* t;
-
- t = (dict_table_t*) mem_heap_alloc(heap, sizeof(*t));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->id, sizeof(table->id));
- t->id = table->id;
-
- t->heap = heap;
-
- UNIV_MEM_ASSERT_RW_ABORT(table->name, strlen(table->name) + 1);
- t->name = (char*) mem_heap_strdup(heap, table->name);
-
- t->corrupted = table->corrupted;
-
- /* This private object "t" is not shared with other threads, so
- we do not need the stats_latch (thus we pass false below). The
- dict_table_stats_lock()/unlock() routines will do nothing. */
- dict_table_stats_latch_create(t, false);
-
- UT_LIST_INIT(t->indexes);
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- if (dict_stats_should_ignore_index(index)) {
- continue;
- }
-
- ut_ad(!dict_index_is_univ(index));
-
- dict_index_t* idx;
-
- idx = (dict_index_t*) mem_heap_alloc(heap, sizeof(*idx));
-
- UNIV_MEM_ASSERT_RW_ABORT(&index->id, sizeof(index->id));
- idx->id = index->id;
-
- UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name) + 1);
- idx->name = (char*) mem_heap_strdup(heap, index->name);
-
- idx->table_name = t->name;
-
- idx->table = t;
-
- idx->type = index->type;
-
- idx->to_be_dropped = 0;
-
- idx->online_status = ONLINE_INDEX_COMPLETE;
-
- idx->n_uniq = index->n_uniq;
-
- idx->fields = (dict_field_t*) mem_heap_alloc(
- heap, idx->n_uniq * sizeof(idx->fields[0]));
-
- for (ulint i = 0; i < idx->n_uniq; i++) {
- UNIV_MEM_ASSERT_RW_ABORT(index->fields[i].name, strlen(index->fields[i].name) + 1);
- idx->fields[i].name = (char*) mem_heap_strdup(
- heap, index->fields[i].name);
- }
-
- /* hook idx into t->indexes */
- UT_LIST_ADD_LAST(indexes, t->indexes, idx);
-
- idx->stat_n_diff_key_vals = (ib_uint64_t*) mem_heap_alloc(
- heap,
- idx->n_uniq * sizeof(idx->stat_n_diff_key_vals[0]));
-
- idx->stat_n_sample_sizes = (ib_uint64_t*) mem_heap_alloc(
- heap,
- idx->n_uniq * sizeof(idx->stat_n_sample_sizes[0]));
-
- idx->stat_n_non_null_key_vals = (ib_uint64_t*) mem_heap_alloc(
- heap,
- idx->n_uniq * sizeof(idx->stat_n_non_null_key_vals[0]));
- ut_d(idx->magic_n = DICT_INDEX_MAGIC_N);
-
- idx->stat_defrag_n_page_split = 0;
- idx->stat_defrag_n_pages_freed = 0;
- }
-
- ut_d(t->magic_n = DICT_TABLE_MAGIC_N);
-
- return(t);
-}
-
-/*********************************************************************//**
-Free the resources occupied by an object returned by
-dict_stats_table_clone_create(). */
-static
-void
-dict_stats_table_clone_free(
-/*========================*/
- dict_table_t* t) /*!< in: dummy table object to free */
-{
- dict_table_stats_latch_destroy(t);
- mem_heap_free(t->heap);
-}
-
-/*********************************************************************//**
-Write all zeros (or 1 where it makes sense) into an index
-statistics members. The resulting stats correspond to an empty index.
-The caller must own index's table stats latch in X mode
-(dict_table_stats_lock(table, RW_X_LATCH)) */
-static
-void
-dict_stats_empty_index(
-/*===================*/
- dict_index_t* index, /*!< in/out: index */
- bool empty_defrag_stats)
- /*!< in: whether to empty defrag stats */
-{
- ut_ad(!(index->type & DICT_FTS));
- ut_ad(!dict_index_is_univ(index));
-
- ulint n_uniq = index->n_uniq;
-
- for (ulint i = 0; i < n_uniq; i++) {
- index->stat_n_diff_key_vals[i] = 0;
- index->stat_n_sample_sizes[i] = 1;
- index->stat_n_non_null_key_vals[i] = 0;
- }
-
- index->stat_index_size = 1;
- index->stat_n_leaf_pages = 1;
-
- if (empty_defrag_stats) {
- dict_stats_empty_defrag_stats(index);
- dict_stats_empty_defrag_summary(index);
- }
-}
-
-/**********************************************************************//**
-Clear defragmentation summary. */
-UNIV_INTERN
-void
-dict_stats_empty_defrag_summary(
-/*==================*/
- dict_index_t* index) /*!< in: index to clear defragmentation stats */
-{
- index->stat_defrag_n_pages_freed = 0;
-}
-
-/**********************************************************************//**
-Clear defragmentation related index stats. */
-UNIV_INTERN
-void
-dict_stats_empty_defrag_stats(
-/*==================*/
- dict_index_t* index) /*!< in: index to clear defragmentation stats */
-{
- index->stat_defrag_modified_counter = 0;
- index->stat_defrag_n_page_split = 0;
-}
-
-/*********************************************************************//**
-Write all zeros (or 1 where it makes sense) into a table and its indexes'
-statistics members. The resulting stats correspond to an empty table. */
-static
-void
-dict_stats_empty_table(
-/*===================*/
- dict_table_t* table, /*!< in/out: table */
- bool empty_defrag_stats)
- /*!< in: whether to empty defrag stats */
-{
- /* Zero the stats members */
-
- dict_table_stats_lock(table, RW_X_LATCH);
-
- table->stat_n_rows = 0;
- table->stat_clustered_index_size = 1;
- /* 1 page for each index, not counting the clustered */
- table->stat_sum_of_other_index_sizes
- = UT_LIST_GET_LEN(table->indexes) - 1;
- table->stat_modified_counter = 0;
-
- dict_index_t* index;
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- if (index->type & DICT_FTS) {
- continue;
- }
-
- ut_ad(!dict_index_is_univ(index));
-
- dict_stats_empty_index(index, empty_defrag_stats);
- }
-
- table->stat_initialized = TRUE;
-
- dict_table_stats_unlock(table, RW_X_LATCH);
-}
-
-/*********************************************************************//**
-Check whether index's stats are initialized (assert if they are not). */
-static
-void
-dict_stats_assert_initialized_index(
-/*================================*/
- const dict_index_t* index) /*!< in: index */
-{
- UNIV_MEM_ASSERT_RW_ABORT(
- index->stat_n_diff_key_vals,
- index->n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
-
- UNIV_MEM_ASSERT_RW_ABORT(
- index->stat_n_sample_sizes,
- index->n_uniq * sizeof(index->stat_n_sample_sizes[0]));
-
- UNIV_MEM_ASSERT_RW_ABORT(
- index->stat_n_non_null_key_vals,
- index->n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
-
- UNIV_MEM_ASSERT_RW_ABORT(
- &index->stat_index_size,
- sizeof(index->stat_index_size));
-
- UNIV_MEM_ASSERT_RW_ABORT(
- &index->stat_n_leaf_pages,
- sizeof(index->stat_n_leaf_pages));
-}
-
-/*********************************************************************//**
-Check whether table's stats are initialized (assert if they are not). */
-static
-void
-dict_stats_assert_initialized(
-/*==========================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_a(table->stat_initialized);
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stats_last_recalc,
- sizeof(table->stats_last_recalc));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stat_persistent,
- sizeof(table->stat_persistent));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stats_auto_recalc,
- sizeof(table->stats_auto_recalc));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stats_sample_pages,
- sizeof(table->stats_sample_pages));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stat_n_rows,
- sizeof(table->stat_n_rows));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stat_clustered_index_size,
- sizeof(table->stat_clustered_index_size));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stat_sum_of_other_index_sizes,
- sizeof(table->stat_sum_of_other_index_sizes));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stat_modified_counter,
- sizeof(table->stat_modified_counter));
-
- UNIV_MEM_ASSERT_RW_ABORT(&table->stats_bg_flag,
- sizeof(table->stats_bg_flag));
-
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- if (!dict_stats_should_ignore_index(index)) {
- dict_stats_assert_initialized_index(index);
- }
- }
-}
-
-#define INDEX_EQ(i1, i2) \
- ((i1) != NULL \
- && (i2) != NULL \
- && (i1)->id == (i2)->id \
- && strcmp((i1)->name, (i2)->name) == 0)
-
-/*********************************************************************//**
-Copy table and index statistics from one table to another, including index
-stats. Extra indexes in src are ignored and extra indexes in dst are
-initialized to correspond to an empty index. */
-static
-void
-dict_stats_copy(
-/*============*/
- dict_table_t* dst, /*!< in/out: destination table */
- const dict_table_t* src, /*!< in: source table */
- bool reset_ignored_indexes) /*!< in: if true, set ignored indexes
- to have the same statistics as if
- the table was empty */
-{
- dst->stats_last_recalc = src->stats_last_recalc;
- dst->stat_n_rows = src->stat_n_rows;
- dst->stat_clustered_index_size = src->stat_clustered_index_size;
- dst->stat_sum_of_other_index_sizes = src->stat_sum_of_other_index_sizes;
- dst->stat_modified_counter = src->stat_modified_counter;
-
- dict_index_t* dst_idx;
- dict_index_t* src_idx;
-
- for (dst_idx = dict_table_get_first_index(dst),
- src_idx = dict_table_get_first_index(src);
- dst_idx != NULL;
- dst_idx = dict_table_get_next_index(dst_idx),
- (src_idx != NULL
- && (src_idx = dict_table_get_next_index(src_idx)))) {
-
- if (dict_stats_should_ignore_index(dst_idx)) {
- if (reset_ignored_indexes) {
- /* Reset index statistics for all ignored indexes,
- unless they are FT indexes (these have no statistics)*/
- if (dst_idx->type & DICT_FTS) {
- continue;
- }
- dict_stats_empty_index(dst_idx, true);
- } else {
- continue;
- }
- }
-
- ut_ad(!dict_index_is_univ(dst_idx));
-
- if (!INDEX_EQ(src_idx, dst_idx)) {
- for (src_idx = dict_table_get_first_index(src);
- src_idx != NULL;
- src_idx = dict_table_get_next_index(src_idx)) {
-
- if (INDEX_EQ(src_idx, dst_idx)) {
- break;
- }
- }
- }
-
- if (!INDEX_EQ(src_idx, dst_idx)) {
- dict_stats_empty_index(dst_idx, true);
- continue;
- }
-
- ulint n_copy_el;
-
- if (dst_idx->n_uniq > src_idx->n_uniq) {
- n_copy_el = src_idx->n_uniq;
- /* Since src is smaller some elements in dst
- will remain untouched by the following memmove(),
- thus we init all of them here. */
- dict_stats_empty_index(dst_idx, true);
- } else {
- n_copy_el = dst_idx->n_uniq;
- }
-
- memmove(dst_idx->stat_n_diff_key_vals,
- src_idx->stat_n_diff_key_vals,
- n_copy_el * sizeof(dst_idx->stat_n_diff_key_vals[0]));
-
- memmove(dst_idx->stat_n_sample_sizes,
- src_idx->stat_n_sample_sizes,
- n_copy_el * sizeof(dst_idx->stat_n_sample_sizes[0]));
-
- memmove(dst_idx->stat_n_non_null_key_vals,
- src_idx->stat_n_non_null_key_vals,
- n_copy_el * sizeof(dst_idx->stat_n_non_null_key_vals[0]));
-
- dst_idx->stat_index_size = src_idx->stat_index_size;
-
- dst_idx->stat_n_leaf_pages = src_idx->stat_n_leaf_pages;
-
- dst_idx->stat_defrag_modified_counter =
- src_idx->stat_defrag_modified_counter;
- dst_idx->stat_defrag_n_pages_freed =
- src_idx->stat_defrag_n_pages_freed;
- dst_idx->stat_defrag_n_page_split =
- src_idx->stat_defrag_n_page_split;
- }
-
- dst->stat_initialized = TRUE;
-}
-
-/*********************************************************************//**
-Duplicate the stats of a table and its indexes.
-This function creates a dummy dict_table_t object and copies the input
-table's stats into it. The returned table object is not in the dictionary
-cache and cannot be accessed by any other threads. In addition to the
-members copied in dict_stats_table_clone_create() this function initializes
-the following:
-dict_table_t::stat_initialized
-dict_table_t::stat_persistent
-dict_table_t::stat_n_rows
-dict_table_t::stat_clustered_index_size
-dict_table_t::stat_sum_of_other_index_sizes
-dict_table_t::stat_modified_counter
-dict_index_t::stat_n_diff_key_vals[]
-dict_index_t::stat_n_sample_sizes[]
-dict_index_t::stat_n_non_null_key_vals[]
-dict_index_t::stat_index_size
-dict_index_t::stat_n_leaf_pages
-dict_index_t::stat_defrag_modified_counter
-dict_index_t::stat_defrag_n_pages_freed
-dict_index_t::stat_defrag_n_page_split
-The returned object should be freed with dict_stats_snapshot_free()
-when no longer needed.
-@return incomplete table object */
-static
-dict_table_t*
-dict_stats_snapshot_create(
-/*=======================*/
- dict_table_t* table) /*!< in: table whose stats to copy */
-{
- mutex_enter(&dict_sys->mutex);
-
- dict_table_stats_lock(table, RW_S_LATCH);
-
- dict_stats_assert_initialized(table);
-
- dict_table_t* t;
-
- t = dict_stats_table_clone_create(table);
-
- dict_stats_copy(t, table, false);
-
- t->stat_persistent = table->stat_persistent;
- t->stats_auto_recalc = table->stats_auto_recalc;
- t->stats_sample_pages = table->stats_sample_pages;
- t->stats_bg_flag = table->stats_bg_flag;
-
- dict_table_stats_unlock(table, RW_S_LATCH);
-
- mutex_exit(&dict_sys->mutex);
-
- return(t);
-}
-
-/*********************************************************************//**
-Free the resources occupied by an object returned by
-dict_stats_snapshot_create(). */
-static
-void
-dict_stats_snapshot_free(
-/*=====================*/
- dict_table_t* t) /*!< in: dummy table object to free */
-{
- dict_stats_table_clone_free(t);
-}
-
-/*********************************************************************//**
-Calculates new estimates for index statistics. This function is
-relatively quick and is used to calculate transient statistics that
-are not saved on disk. This was the only way to calculate statistics
-before the Persistent Statistics feature was introduced.
-This function doesn't update the defragmentation related stats.
-Only persistent statistics supports defragmentation stats. */
-static
-void
-dict_stats_update_transient_for_index(
-/*==================================*/
- dict_index_t* index) /*!< in/out: index */
-{
- if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
- && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
- || !dict_index_is_clust(index))) {
- /* If we have set a high innodb_force_recovery
- level, do not calculate statistics, as a badly
- corrupted index can cause a crash in it.
- Initialize some bogus index cardinality
- statistics, so that the data can be queried in
- various means, also via secondary indexes. */
- dict_stats_empty_index(index, false);
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- } else if (ibuf_debug && !dict_index_is_clust(index)) {
- dict_stats_empty_index(index, false);
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
- } else {
- mtr_t mtr;
- ulint size;
- mtr_start(&mtr);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
-
- if (size != ULINT_UNDEFINED) {
- index->stat_index_size = size;
-
- size = btr_get_size(
- index, BTR_N_LEAF_PAGES, &mtr);
- }
-
- mtr_commit(&mtr);
-
- switch (size) {
- case ULINT_UNDEFINED:
- dict_stats_empty_index(index, false);
- return;
- case 0:
- /* The root node of the tree is a leaf */
- size = 1;
- }
-
- index->stat_n_leaf_pages = size;
-
- /* Do not continue if table decryption has failed or
- table is already marked as corrupted. */
- if (index->is_readable()) {
- btr_estimate_number_of_different_key_vals(index);
- }
- }
-}
-
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. This function
-is relatively quick and is used to calculate transient statistics that
-are not saved on disk.
-This was the only way to calculate statistics before the
-Persistent Statistics feature was introduced. */
-UNIV_INTERN
-void
-dict_stats_update_transient(
-/*========================*/
- dict_table_t* table) /*!< in/out: table */
-{
- dict_index_t* index;
- ulint sum_of_index_sizes = 0;
-
- /* Find out the sizes of the indexes and how many different values
- for the key they approximately have */
-
- index = dict_table_get_first_index(table);
-
- if (dict_table_is_discarded(table)) {
- /* Nothing to do. */
- dict_stats_empty_table(table, false);
- return;
- } else if (index == NULL) {
- /* Table definition is corrupt */
-
- char buf[MAX_FULL_NAME_LEN];
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: table %s has no indexes. "
- "Cannot calculate statistics.\n",
- ut_format_name(table->name, TRUE, buf, sizeof(buf)));
- dict_stats_empty_table(table, false);
- return;
- }
-
- for (; index != NULL; index = dict_table_get_next_index(index)) {
-
- ut_ad(!dict_index_is_univ(index));
-
- if (index->type & DICT_FTS) {
- continue;
- }
-
- dict_stats_empty_index(index, false);
-
- if (dict_stats_should_ignore_index(index)) {
- continue;
- }
-
- /* Do not continue if table decryption has failed or
- table is already marked as corrupted. */
- if (!index->is_readable()) {
- break;
- }
-
- dict_stats_update_transient_for_index(index);
-
- sum_of_index_sizes += index->stat_index_size;
- }
-
- index = dict_table_get_first_index(table);
-
- table->stat_n_rows = index->stat_n_diff_key_vals[
- dict_index_get_n_unique(index) - 1];
-
- table->stat_clustered_index_size = index->stat_index_size;
-
- table->stat_sum_of_other_index_sizes = sum_of_index_sizes
- - index->stat_index_size;
-
- table->stats_last_recalc = ut_time();
-
- table->stat_modified_counter = 0;
-
- table->stat_initialized = TRUE;
-}
-
-/* @{ Pseudo code about the relation between the following functions
-
-let N = N_SAMPLE_PAGES(index)
-
-dict_stats_analyze_index()
- for each n_prefix
- search for good enough level:
- dict_stats_analyze_index_level() // only called if level has <= N pages
- // full scan of the level in one mtr
- collect statistics about the given level
- if we are not satisfied with the level, search next lower level
- we have found a good enough level here
- dict_stats_analyze_index_for_n_prefix(that level, stats collected above)
- // full scan of the level in one mtr
- dive below some records and analyze the leaf page there:
- dict_stats_analyze_index_below_cur()
-@} */
-
-/*********************************************************************//**
-Find the total number and the number of distinct keys on a given level in
-an index. Each of the 1..n_uniq prefixes are looked up and the results are
-saved in the array n_diff[0] .. n_diff[n_uniq - 1]. The total number of
-records on the level is saved in total_recs.
-Also, the index of the last record in each group of equal records is saved
-in n_diff_boundaries[0..n_uniq - 1], records indexing starts from the leftmost
-record on the level and continues cross pages boundaries, counting from 0. */
-static
-void
-dict_stats_analyze_index_level(
-/*===========================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: level */
- ib_uint64_t* n_diff, /*!< out: array for number of
- distinct keys for all prefixes */
- ib_uint64_t* total_recs, /*!< out: total number of records */
- ib_uint64_t* total_pages, /*!< out: total number of pages */
- boundaries_t* n_diff_boundaries,/*!< out: boundaries of the groups
- of distinct keys */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint n_uniq;
- mem_heap_t* heap;
- btr_pcur_t pcur;
- const page_t* page;
- const rec_t* rec;
- const rec_t* prev_rec;
- bool prev_rec_is_copied;
- byte* prev_rec_buf = NULL;
- ulint prev_rec_buf_size = 0;
- ulint* rec_offsets;
- ulint* prev_rec_offsets;
- ulint i;
-
- DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu)\n", __func__,
- index->table->name, index->name, level);
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
-
- n_uniq = dict_index_get_n_unique(index);
-
- /* elements in the n_diff array are 0..n_uniq-1 (inclusive) */
- memset(n_diff, 0x0, n_uniq * sizeof(n_diff[0]));
-
- /* Allocate space for the offsets header (the allocation size at
- offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
- so that this will never be less than the size calculated in
- rec_get_offsets_func(). */
- i = (REC_OFFS_HEADER_SIZE + 1 + 1) + index->n_fields;
-
- heap = mem_heap_create((2 * sizeof *rec_offsets) * i);
- rec_offsets = static_cast<ulint*>(
- mem_heap_alloc(heap, i * sizeof *rec_offsets));
- prev_rec_offsets = static_cast<ulint*>(
- mem_heap_alloc(heap, i * sizeof *prev_rec_offsets));
- rec_offs_set_n_alloc(rec_offsets, i);
- rec_offs_set_n_alloc(prev_rec_offsets, i);
-
- /* reset the dynamic arrays n_diff_boundaries[0..n_uniq-1] */
- if (n_diff_boundaries != NULL) {
- for (i = 0; i < n_uniq; i++) {
- n_diff_boundaries[i].erase(
- n_diff_boundaries[i].begin(),
- n_diff_boundaries[i].end());
- }
- }
-
- /* Position pcur on the leftmost record on the leftmost page
- on the desired level. */
-
- btr_pcur_open_at_index_side(
- true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
- &pcur, true, level, mtr);
- btr_pcur_move_to_next_on_page(&pcur);
-
- page = btr_pcur_get_page(&pcur);
-
- /* The page must not be empty, except when
- it is the root page (and the whole index is empty). */
- ut_ad(btr_pcur_is_on_user_rec(&pcur) || page_is_leaf(page));
- ut_ad(btr_pcur_get_rec(&pcur)
- == page_rec_get_next_const(page_get_infimum_rec(page)));
-
- /* check that we are indeed on the desired level */
- ut_a(btr_page_get_level(page, mtr) == level);
-
- /* there should not be any pages on the left */
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
-
- /* check whether the first record on the leftmost page is marked
- as such, if we are on a non-leaf level */
- ut_a((level == 0)
- == !(REC_INFO_MIN_REC_FLAG & rec_get_info_bits(
- btr_pcur_get_rec(&pcur), page_is_comp(page))));
-
- prev_rec = NULL;
- prev_rec_is_copied = false;
-
- /* no records by default */
- *total_recs = 0;
-
- *total_pages = 0;
-
- /* iterate over all user records on this level
- and compare each two adjacent ones, even the last on page
- X and the fist on page X+1 */
- for (;
- btr_pcur_is_on_user_rec(&pcur);
- btr_pcur_move_to_next_user_rec(&pcur, mtr)) {
-
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
- bool rec_is_last_on_page;
-
- rec = btr_pcur_get_rec(&pcur);
-
- /* If rec and prev_rec are on different pages, then prev_rec
- must have been copied, because we hold latch only on the page
- where rec resides. */
- if (prev_rec != NULL
- && page_align(rec) != page_align(prev_rec)) {
-
- ut_a(prev_rec_is_copied);
- }
-
- rec_is_last_on_page =
- page_rec_is_supremum(page_rec_get_next_const(rec));
-
- /* increment the pages counter at the end of each page */
- if (rec_is_last_on_page) {
-
- (*total_pages)++;
- }
-
- /* Skip delete-marked records on the leaf level. If we
- do not skip them, then ANALYZE quickly after DELETE
- could count them or not (purge may have already wiped
- them away) which brings non-determinism. We skip only
- leaf-level delete marks because delete marks on
- non-leaf level do not make sense. */
-
- if (level == 0
- && !srv_stats_include_delete_marked
- && rec_get_deleted_flag(
- rec,
- page_is_comp(btr_pcur_get_page(&pcur)))) {
-
- if (rec_is_last_on_page
- && !prev_rec_is_copied
- && prev_rec != NULL) {
- /* copy prev_rec */
-
- prev_rec_offsets = rec_get_offsets(
- prev_rec, index, prev_rec_offsets,
- n_uniq, &heap);
-
- prev_rec = rec_copy_prefix_to_buf(
- prev_rec, index,
- rec_offs_n_fields(prev_rec_offsets),
- &prev_rec_buf, &prev_rec_buf_size);
-
- prev_rec_is_copied = true;
- }
-
- continue;
- }
-
- rec_offsets = rec_get_offsets(
- rec, index, rec_offsets, n_uniq, &heap);
-
- (*total_recs)++;
-
- if (prev_rec != NULL) {
- prev_rec_offsets = rec_get_offsets(
- prev_rec, index, prev_rec_offsets,
- n_uniq, &heap);
-
- cmp_rec_rec_with_match(rec,
- prev_rec,
- rec_offsets,
- prev_rec_offsets,
- index,
- FALSE,
- &matched_fields,
- &matched_bytes);
-
- for (i = matched_fields; i < n_uniq; i++) {
-
- if (n_diff_boundaries != NULL) {
- /* push the index of the previous
- record, that is - the last one from
- a group of equal keys */
-
- ib_uint64_t idx;
-
- /* the index of the current record
- is total_recs - 1, the index of the
- previous record is total_recs - 2;
- we know that idx is not going to
- become negative here because if we
- are in this branch then there is a
- previous record and thus
- total_recs >= 2 */
- idx = *total_recs - 2;
-
- n_diff_boundaries[i].push_back(idx);
- }
-
- /* increment the number of different keys
- for n_prefix=i+1 (e.g. if i=0 then we increment
- for n_prefix=1 which is stored in n_diff[0]) */
- n_diff[i]++;
- }
- } else {
- /* this is the first non-delete marked record */
- for (i = 0; i < n_uniq; i++) {
- n_diff[i] = 1;
- }
- }
-
- if (rec_is_last_on_page) {
- /* end of a page has been reached */
-
- /* we need to copy the record instead of assigning
- like prev_rec = rec; because when we traverse the
- records on this level at some point we will jump from
- one page to the next and then rec and prev_rec will
- be on different pages and
- btr_pcur_move_to_next_user_rec() will release the
- latch on the page that prev_rec is on */
- prev_rec = rec_copy_prefix_to_buf(
- rec, index, rec_offs_n_fields(rec_offsets),
- &prev_rec_buf, &prev_rec_buf_size);
- prev_rec_is_copied = true;
-
- } else {
- /* still on the same page, the next call to
- btr_pcur_move_to_next_user_rec() will not jump
- on the next page, we can simply assign pointers
- instead of copying the records like above */
-
- prev_rec = rec;
- prev_rec_is_copied = false;
- }
- }
-
- /* if *total_pages is left untouched then the above loop was not
- entered at all and there is one page in the whole tree which is
- empty or the loop was entered but this is level 0, contains one page
- and all records are delete-marked */
- if (*total_pages == 0) {
-
- ut_ad(level == 0);
- ut_ad(*total_recs == 0);
-
- *total_pages = 1;
- }
-
- /* if there are records on this level and boundaries
- should be saved */
- if (*total_recs > 0 && n_diff_boundaries != NULL) {
-
- /* remember the index of the last record on the level as the
- last one from the last group of equal keys; this holds for
- all possible prefixes */
- for (i = 0; i < n_uniq; i++) {
- ib_uint64_t idx;
-
- idx = *total_recs - 1;
-
- n_diff_boundaries[i].push_back(idx);
- }
- }
-
- /* now in n_diff_boundaries[i] there are exactly n_diff[i] integers,
- for i=0..n_uniq-1 */
-
-#ifdef UNIV_STATS_DEBUG
- for (i = 0; i < n_uniq; i++) {
-
- DEBUG_PRINTF(" %s(): total recs: " UINT64PF
- ", total pages: " UINT64PF
- ", n_diff[%lu]: " UINT64PF "\n",
- __func__, *total_recs,
- *total_pages,
- i, n_diff[i]);
-
-#if 0
- if (n_diff_boundaries != NULL) {
- ib_uint64_t j;
-
- DEBUG_PRINTF(" %s(): boundaries[%lu]: ",
- __func__, i);
-
- for (j = 0; j < n_diff[i]; j++) {
- ib_uint64_t idx;
-
- idx = n_diff_boundaries[i][j];
-
- DEBUG_PRINTF(UINT64PF "=" UINT64PF ", ",
- j, idx);
- }
- DEBUG_PRINTF("\n");
- }
-#endif
- }
-#endif /* UNIV_STATS_DEBUG */
-
- /* Release the latch on the last page, because that is not done by
- btr_pcur_close(). This function works also for non-leaf pages. */
- btr_leaf_page_release(btr_pcur_get_block(&pcur), BTR_SEARCH_LEAF, mtr);
-
- btr_pcur_close(&pcur);
-
- if (prev_rec_buf != NULL) {
-
- mem_free(prev_rec_buf);
- }
-
- mem_heap_free(heap);
-}
-
-/* aux enum for controlling the behavior of dict_stats_scan_page() @{ */
-enum page_scan_method_t {
- COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED,/* scan all records on
- the given page and count the number of
- distinct ones, also ignore delete marked
- records */
- QUIT_ON_FIRST_NON_BORING,/* quit when the first record that differs
- from its right neighbor is found */
- COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED/* scan all records on
- the given page and count the number of
- distinct ones, include delete marked
- records */
-};
-/* @} */
-
-/** Scan a page, reading records from left to right and counting the number
-of distinct records (looking only at the first n_prefix
-columns) and the number of external pages pointed by records from this page.
-If scan_method is QUIT_ON_FIRST_NON_BORING then the function
-will return as soon as it finds a record that does not match its neighbor
-to the right, which means that in the case of QUIT_ON_FIRST_NON_BORING the
-returned n_diff can either be 0 (empty page), 1 (the whole page has all keys
-equal) or 2 (the function found a non-boring record and returned).
-@param[out] out_rec record, or NULL
-@param[out] offsets1 rec_get_offsets() working space (must
-be big enough)
-@param[out] offsets2 rec_get_offsets() working space (must
-be big enough)
-@param[in] index index of the page
-@param[in] page the page to scan
-@param[in] n_prefix look at the first n_prefix columns
-@param[in] scan_method scan to the end of the page or not
-@param[out] n_diff number of distinct records encountered
-@param[out] n_external_pages if this is non-NULL then it will be set
-to the number of externally stored pages which were encountered
-@return offsets1 or offsets2 (the offsets of *out_rec),
-or NULL if the page is empty and does not contain user records. */
-UNIV_INLINE
-ulint*
-dict_stats_scan_page(
- const rec_t** out_rec,
- ulint* offsets1,
- ulint* offsets2,
- dict_index_t* index,
- const page_t* page,
- ulint n_prefix,
- page_scan_method_t scan_method,
- ib_uint64_t* n_diff,
- ib_uint64_t* n_external_pages)
-{
- ulint* offsets_rec = offsets1;
- ulint* offsets_next_rec = offsets2;
- const rec_t* rec;
- const rec_t* next_rec;
- /* A dummy heap, to be passed to rec_get_offsets().
- Because offsets1,offsets2 should be big enough,
- this memory heap should never be used. */
- mem_heap_t* heap = NULL;
- const rec_t* (*get_next)(const rec_t*);
-
- if (scan_method == COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED) {
- get_next = page_rec_get_next_non_del_marked;
- } else {
- get_next = page_rec_get_next_const;
- }
-
- const bool should_count_external_pages = n_external_pages != NULL;
-
- if (should_count_external_pages) {
- *n_external_pages = 0;
- }
-
- rec = get_next(page_get_infimum_rec(page));
-
- if (page_rec_is_supremum(rec)) {
- /* the page is empty or contains only delete-marked records */
- *n_diff = 0;
- *out_rec = NULL;
- return(NULL);
- }
-
- offsets_rec = rec_get_offsets(rec, index, offsets_rec,
- ULINT_UNDEFINED, &heap);
-
- if (should_count_external_pages) {
- *n_external_pages += btr_rec_get_externally_stored_len(
- rec, offsets_rec);
- }
-
- next_rec = get_next(rec);
-
- *n_diff = 1;
-
- while (!page_rec_is_supremum(next_rec)) {
-
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
-
- offsets_next_rec = rec_get_offsets(next_rec, index,
- offsets_next_rec,
- ULINT_UNDEFINED,
- &heap);
-
- /* check whether rec != next_rec when looking at
- the first n_prefix fields */
- cmp_rec_rec_with_match(rec, next_rec,
- offsets_rec, offsets_next_rec,
- index, FALSE, &matched_fields,
- &matched_bytes);
-
- if (matched_fields < n_prefix) {
- /* rec != next_rec, => rec is non-boring */
-
- (*n_diff)++;
-
- if (scan_method == QUIT_ON_FIRST_NON_BORING) {
- goto func_exit;
- }
- }
-
- rec = next_rec;
- {
- /* Assign offsets_rec = offsets_next_rec
- so that offsets_rec matches with rec which
- was just assigned rec = next_rec above.
- Also need to point offsets_next_rec to the
- place where offsets_rec was pointing before
- because we have just 2 placeholders where
- data is actually stored:
- offsets_onstack1 and offsets_onstack2 and we
- are using them in circular fashion
- (offsets[_next]_rec are just pointers to
- those placeholders). */
- ulint* offsets_tmp;
- offsets_tmp = offsets_rec;
- offsets_rec = offsets_next_rec;
- offsets_next_rec = offsets_tmp;
- }
-
- if (should_count_external_pages) {
- *n_external_pages += btr_rec_get_externally_stored_len(
- rec, offsets_rec);
- }
-
- next_rec = get_next(next_rec);
- }
-
-func_exit:
- /* offsets1,offsets2 should have been big enough */
- ut_a(heap == NULL);
- *out_rec = rec;
- return(offsets_rec);
-}
-
-/** Dive below the current position of a cursor and calculate the number of
-distinct records on the leaf page, when looking at the fist n_prefix
-columns. Also calculate the number of external pages pointed by records
-on the leaf page.
-@param[in] cur cursor
-@param[in] n_prefix look at the first n_prefix columns
-when comparing records
-@param[out] n_diff number of distinct records
-@param[out] n_external_pages number of external pages
-@return number of distinct records on the leaf page */
-static
-void
-dict_stats_analyze_index_below_cur(
- const btr_cur_t* cur,
- ulint n_prefix,
- ib_uint64_t* n_diff,
- ib_uint64_t* n_external_pages)
-{
- dict_index_t* index;
- ulint space;
- ulint zip_size;
- buf_block_t* block;
- ulint page_no;
- const page_t* page;
- mem_heap_t* heap;
- const rec_t* rec;
- ulint* offsets1;
- ulint* offsets2;
- ulint* offsets_rec;
- ulint size;
- mtr_t mtr;
-
- index = btr_cur_get_index(cur);
-
- /* Allocate offsets for the record and the node pointer, for
- node pointer records. In a secondary index, the node pointer
- record will consist of all index fields followed by a child
- page number.
- Allocate space for the offsets header (the allocation size at
- offsets[0] and the REC_OFFS_HEADER_SIZE bytes), and n_fields + 1,
- so that this will never be less than the size calculated in
- rec_get_offsets_func(). */
- size = (1 + REC_OFFS_HEADER_SIZE) + 1 + dict_index_get_n_fields(index);
-
- heap = mem_heap_create(size * (sizeof *offsets1 + sizeof *offsets2));
-
- offsets1 = static_cast<ulint*>(mem_heap_alloc(
- heap, size * sizeof *offsets1));
-
- offsets2 = static_cast<ulint*>(mem_heap_alloc(
- heap, size * sizeof *offsets2));
-
- rec_offs_set_n_alloc(offsets1, size);
- rec_offs_set_n_alloc(offsets2, size);
-
- space = dict_index_get_space(index);
- zip_size = dict_table_zip_size(index->table);
-
- rec = btr_cur_get_rec(cur);
-
- offsets_rec = rec_get_offsets(rec, index, offsets1,
- ULINT_UNDEFINED, &heap);
-
- page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
-
- /* assume no external pages by default - in case we quit from this
- function without analyzing any leaf pages */
- *n_external_pages = 0;
-
- mtr_start(&mtr);
-
- /* descend to the leaf level on the B-tree */
- for (;;) {
-
- block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
- NULL /* no guessed block */,
- BUF_GET, __FILE__, __LINE__, &mtr);
-
- page = buf_block_get_frame(block);
-
- if (page_is_leaf(page)) {
- /* leaf level */
- break;
- }
- /* else */
-
- /* search for the first non-boring record on the page */
- offsets_rec = dict_stats_scan_page(
- &rec, offsets1, offsets2, index, page, n_prefix,
- QUIT_ON_FIRST_NON_BORING, n_diff, NULL);
-
- /* pages on level > 0 are not allowed to be empty */
- ut_a(offsets_rec != NULL);
- /* if page is not empty (offsets_rec != NULL) then n_diff must
- be > 0, otherwise there is a bug in dict_stats_scan_page() */
- ut_a(*n_diff > 0);
-
- if (*n_diff == 1) {
- mtr_commit(&mtr);
-
- /* page has all keys equal and the end of the page
- was reached by dict_stats_scan_page(), no need to
- descend to the leaf level */
- mem_heap_free(heap);
- /* can't get an estimate for n_external_pages here
- because we do not dive to the leaf level, assume no
- external pages (*n_external_pages was assigned to 0
- above). */
- return;
- }
- /* else */
-
- /* when we instruct dict_stats_scan_page() to quit on the
- first non-boring record it finds, then the returned n_diff
- can either be 0 (empty page), 1 (page has all keys equal) or
- 2 (non-boring record was found) */
- ut_a(*n_diff == 2);
-
- /* we have a non-boring record in rec, descend below it */
-
- page_no = btr_node_ptr_get_child_page_no(rec, offsets_rec);
- }
-
- /* make sure we got a leaf page as a result from the above loop */
- ut_ad(page_is_leaf(page));
-
- /* scan the leaf page and find the number of distinct keys,
- when looking only at the first n_prefix columns; also estimate
- the number of externally stored pages pointed by records on this
- page */
-
- offsets_rec = dict_stats_scan_page(
- &rec, offsets1, offsets2, index, page, n_prefix,
- srv_stats_include_delete_marked ?
- COUNT_ALL_NON_BORING_INCLUDE_DEL_MARKED:
- COUNT_ALL_NON_BORING_AND_SKIP_DEL_MARKED, n_diff,
- n_external_pages);
-
-#if 0
- DEBUG_PRINTF(" %s(): n_diff below page_no=%lu: " UINT64PF "\n",
- __func__, page_no, n_diff);
-#endif
-
- mtr_commit(&mtr);
- mem_heap_free(heap);
-}
-
-/** Input data that is used to calculate dict_index_t::stat_n_diff_key_vals[]
-for each n-columns prefix (n from 1 to n_uniq). */
-struct n_diff_data_t {
- /** Index of the level on which the descent through the btree
- stopped. level 0 is the leaf level. This is >= 1 because we
- avoid scanning the leaf level because it may contain too many
- pages and doing so is useless when combined with the random dives -
- if we are to scan the leaf level, this means a full scan and we can
- simply do that instead of fiddling with picking random records higher
- in the tree and to dive below them. At the start of the analyzing
- we may decide to do full scan of the leaf level, but then this
- structure is not used in that code path. */
- ulint level;
-
- /** Number of records on the level where the descend through the btree
- stopped. When we scan the btree from the root, we stop at some mid
- level, choose some records from it and dive below them towards a leaf
- page to analyze. */
- ib_uint64_t n_recs_on_level;
-
- /** Number of different key values that were found on the mid level. */
- ib_uint64_t n_diff_on_level;
-
- /** Number of leaf pages that are analyzed. This is also the same as
- the number of records that we pick from the mid level and dive below
- them. */
- ib_uint64_t n_leaf_pages_to_analyze;
-
- /** Cumulative sum of the number of different key values that were
- found on all analyzed pages. */
- ib_uint64_t n_diff_all_analyzed_pages;
-
- /** Cumulative sum of the number of external pages (stored outside of
- the btree but in the same file segment). */
- ib_uint64_t n_external_pages_sum;
-};
-
-/** Estimate the number of different key values in an index when looking at
-the first n_prefix columns. For a given level in an index select
-n_diff_data->n_leaf_pages_to_analyze records from that level and dive below
-them to the corresponding leaf pages, then scan those leaf pages and save the
-sampling results in n_diff_data->n_diff_all_analyzed_pages.
-@param[in] index index
-@param[in] n_prefix look at first 'n_prefix' columns when
-comparing records
-@param[in] boundaries a vector that contains
-n_diff_data->n_diff_on_level integers each of which represents the index (on
-level 'level', counting from left/smallest to right/biggest from 0) of the
-last record from each group of distinct keys
-@param[in,out] n_diff_data n_diff_all_analyzed_pages and
-n_external_pages_sum in this structure will be set by this function. The
-members level, n_diff_on_level and n_leaf_pages_to_analyze must be set by the
-caller in advance - they are used by some calculations inside this function
-@param[in,out] mtr mini-transaction */
-static
-void
-dict_stats_analyze_index_for_n_prefix(
- dict_index_t* index,
- ulint n_prefix,
- const boundaries_t* boundaries,
- n_diff_data_t* n_diff_data,
- mtr_t* mtr)
-{
- btr_pcur_t pcur;
- const page_t* page;
- ib_uint64_t rec_idx;
- ib_uint64_t i;
-
-#if 0
- DEBUG_PRINTF(" %s(table=%s, index=%s, level=%lu, n_prefix=%lu, "
- "n_diff_on_level=" UINT64PF ")\n",
- __func__, index->table->name, index->name, level,
- n_prefix, n_diff_data->n_diff_on_level);
-#endif
-
- ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
- MTR_MEMO_S_LOCK));
-
- /* Position pcur on the leftmost record on the leftmost page
- on the desired level. */
-
- btr_pcur_open_at_index_side(
- true, index, BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED,
- &pcur, true, n_diff_data->level, mtr);
- btr_pcur_move_to_next_on_page(&pcur);
-
- page = btr_pcur_get_page(&pcur);
-
- const rec_t* first_rec = btr_pcur_get_rec(&pcur);
-
- /* We shouldn't be scanning the leaf level. The caller of this function
- should have stopped the descend on level 1 or higher. */
- ut_ad(n_diff_data->level > 0);
- ut_ad(!page_is_leaf(page));
-
- /* The page must not be empty, except when
- it is the root page (and the whole index is empty). */
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
- ut_ad(first_rec == page_rec_get_next_const(page_get_infimum_rec(page)));
-
- /* check that we are indeed on the desired level */
- ut_a(btr_page_get_level(page, mtr) == n_diff_data->level);
-
- /* there should not be any pages on the left */
- ut_a(btr_page_get_prev(page, mtr) == FIL_NULL);
-
- /* check whether the first record on the leftmost page is marked
- as such; we are on a non-leaf level */
- ut_a(rec_get_info_bits(first_rec, page_is_comp(page))
- & REC_INFO_MIN_REC_FLAG);
-
- const ib_uint64_t last_idx_on_level = boundaries->at(
- static_cast<unsigned>(n_diff_data->n_diff_on_level - 1));
-
- rec_idx = 0;
-
- n_diff_data->n_diff_all_analyzed_pages = 0;
- n_diff_data->n_external_pages_sum = 0;
-
- for (i = 0; i < n_diff_data->n_leaf_pages_to_analyze; i++) {
- /* there are n_diff_on_level elements
- in 'boundaries' and we divide those elements
- into n_leaf_pages_to_analyze segments, for example:
-
- let n_diff_on_level=100, n_leaf_pages_to_analyze=4, then:
- segment i=0: [0, 24]
- segment i=1: [25, 49]
- segment i=2: [50, 74]
- segment i=3: [75, 99] or
-
- let n_diff_on_level=1, n_leaf_pages_to_analyze=1, then:
- segment i=0: [0, 0] or
-
- let n_diff_on_level=2, n_leaf_pages_to_analyze=2, then:
- segment i=0: [0, 0]
- segment i=1: [1, 1] or
-
- let n_diff_on_level=13, n_leaf_pages_to_analyze=7, then:
- segment i=0: [0, 0]
- segment i=1: [1, 2]
- segment i=2: [3, 4]
- segment i=3: [5, 6]
- segment i=4: [7, 8]
- segment i=5: [9, 10]
- segment i=6: [11, 12]
-
- then we select a random record from each segment and dive
- below it */
- const ib_uint64_t n_diff = n_diff_data->n_diff_on_level;
- const ib_uint64_t n_pick
- = n_diff_data->n_leaf_pages_to_analyze;
-
- const ib_uint64_t left = n_diff * i / n_pick;
- const ib_uint64_t right = n_diff * (i + 1) / n_pick - 1;
-
- ut_a(left <= right);
- ut_a(right <= last_idx_on_level);
-
- /* we do not pass (left, right) because we do not want to ask
- ut_rnd_interval() to work with too big numbers since
- ib_uint64_t could be bigger than ulint */
- const ulint rnd = ut_rnd_interval(
- 0, static_cast<ulint>(right - left));
-
- const ib_uint64_t dive_below_idx
- = boundaries->at(static_cast<unsigned>(left + rnd));
-
-#if 0
- DEBUG_PRINTF(" %s(): dive below record with index="
- UINT64PF "\n", __func__, dive_below_idx);
-#endif
-
- /* seek to the record with index dive_below_idx */
- while (rec_idx < dive_below_idx
- && btr_pcur_is_on_user_rec(&pcur)) {
-
- btr_pcur_move_to_next_user_rec(&pcur, mtr);
- rec_idx++;
- }
-
- /* if the level has finished before the record we are
- searching for, this means that the B-tree has changed in
- the meantime, quit our sampling and use whatever stats
- we have collected so far */
- if (rec_idx < dive_below_idx) {
-
- ut_ad(!btr_pcur_is_on_user_rec(&pcur));
- break;
- }
-
- /* it could be that the tree has changed in such a way that
- the record under dive_below_idx is the supremum record, in
- this case rec_idx == dive_below_idx and pcur is positioned
- on the supremum, we do not want to dive below it */
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- break;
- }
-
- ut_a(rec_idx == dive_below_idx);
-
- ib_uint64_t n_diff_on_leaf_page;
- ib_uint64_t n_external_pages;
-
- dict_stats_analyze_index_below_cur(btr_pcur_get_btr_cur(&pcur),
- n_prefix,
- &n_diff_on_leaf_page,
- &n_external_pages);
-
- /* We adjust n_diff_on_leaf_page here to avoid counting
- one record twice - once as the last on some page and once
- as the first on another page. Consider the following example:
- Leaf level:
- page: (2,2,2,2,3,3)
- ... many pages like (3,3,3,3,3,3) ...
- page: (3,3,3,3,5,5)
- ... many pages like (5,5,5,5,5,5) ...
- page: (5,5,5,5,8,8)
- page: (8,8,8,8,9,9)
- our algo would (correctly) get an estimate that there are
- 2 distinct records per page (average). Having 4 pages below
- non-boring records, it would (wrongly) estimate the number
- of distinct records to 8. */
- if (n_diff_on_leaf_page > 0) {
- n_diff_on_leaf_page--;
- }
-
- n_diff_data->n_diff_all_analyzed_pages += n_diff_on_leaf_page;
-
- n_diff_data->n_external_pages_sum += n_external_pages;
- }
-
- btr_pcur_close(&pcur);
-}
-
-/** Set dict_index_t::stat_n_diff_key_vals[] and stat_n_sample_sizes[].
-@param[in] n_diff_data input data to use to derive the results
-@param[in,out] index index whose stat_n_diff_key_vals[] to set */
-UNIV_INLINE
-void
-dict_stats_index_set_n_diff(
- const n_diff_data_t* n_diff_data,
- dict_index_t* index)
-{
- for (ulint n_prefix = dict_index_get_n_unique(index);
- n_prefix >= 1;
- n_prefix--) {
- /* n_diff_all_analyzed_pages can be 0 here if
- all the leaf pages sampled contained only
- delete-marked records. In this case we should assign
- 0 to index->stat_n_diff_key_vals[n_prefix - 1], which
- the formula below does. */
-
- const n_diff_data_t* data = &n_diff_data[n_prefix - 1];
-
- ut_ad(data->n_leaf_pages_to_analyze > 0);
- ut_ad(data->n_recs_on_level > 0);
-
- ulint n_ordinary_leaf_pages;
-
- if (data->level == 1) {
- /* If we know the number of records on level 1, then
- this number is the same as the number of pages on
- level 0 (leaf). */
- n_ordinary_leaf_pages = data->n_recs_on_level;
- } else {
- /* If we analyzed D ordinary leaf pages and found E
- external pages in total linked from those D ordinary
- leaf pages, then this means that the ratio
- ordinary/external is D/E. Then the ratio ordinary/total
- is D / (D + E). Knowing that the total number of pages
- is T (including ordinary and external) then we estimate
- that the total number of ordinary leaf pages is
- T * D / (D + E). */
- n_ordinary_leaf_pages
- = index->stat_n_leaf_pages
- * data->n_leaf_pages_to_analyze
- / (data->n_leaf_pages_to_analyze
- + data->n_external_pages_sum);
- }
-
- /* See REF01 for an explanation of the algorithm */
- index->stat_n_diff_key_vals[n_prefix - 1]
- = n_ordinary_leaf_pages
-
- * data->n_diff_on_level
- / data->n_recs_on_level
-
- * data->n_diff_all_analyzed_pages
- / data->n_leaf_pages_to_analyze;
-
- index->stat_n_sample_sizes[n_prefix - 1]
- = data->n_leaf_pages_to_analyze;
-
- DEBUG_PRINTF(" %s(): n_diff=" UINT64PF " for n_prefix=%lu"
- " (%lu"
- " * " UINT64PF " / " UINT64PF
- " * " UINT64PF " / " UINT64PF ")\n",
- __func__,
- index->stat_n_diff_key_vals[n_prefix - 1],
- n_prefix,
- index->stat_n_leaf_pages,
- data->n_diff_on_level,
- data->n_recs_on_level,
- data->n_diff_all_analyzed_pages,
- data->n_leaf_pages_to_analyze);
- }
-}
-
-/*********************************************************************//**
-Calculates new statistics for a given index and saves them to the index
-members stat_n_diff_key_vals[], stat_n_sample_sizes[], stat_index_size and
-stat_n_leaf_pages. This function could be slow. */
-static
-void
-dict_stats_analyze_index(
-/*=====================*/
- dict_index_t* index) /*!< in/out: index to analyze */
-{
- ulint root_level;
- ulint level;
- bool level_is_analyzed;
- ulint n_uniq;
- ulint n_prefix;
- ib_uint64_t total_recs;
- ib_uint64_t total_pages;
- mtr_t mtr;
- ulint size;
- DBUG_ENTER("dict_stats_analyze_index");
-
- DBUG_PRINT("info", ("index: %s, online status: %d", index->name,
- dict_index_get_online_status(index)));
-
- DEBUG_PRINTF(" %s(index=%s)\n", __func__, index->name);
-
- dict_stats_empty_index(index, false);
-
- mtr_start(&mtr);
-
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- size = btr_get_size(index, BTR_TOTAL_SIZE, &mtr);
-
- if (size != ULINT_UNDEFINED) {
- index->stat_index_size = size;
- size = btr_get_size(index, BTR_N_LEAF_PAGES, &mtr);
- }
-
- /* Release the X locks on the root page taken by btr_get_size() */
- mtr_commit(&mtr);
-
- switch (size) {
- case ULINT_UNDEFINED:
- dict_stats_assert_initialized_index(index);
- DBUG_VOID_RETURN;
- case 0:
- /* The root node of the tree is a leaf */
- size = 1;
- }
-
- index->stat_n_leaf_pages = size;
-
- mtr_start(&mtr);
-
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- root_level = btr_height_get(index, &mtr);
-
- n_uniq = dict_index_get_n_unique(index);
-
- /* If the tree has just one level (and one page) or if the user
- has requested to sample too many pages then do full scan.
-
- For each n-column prefix (for n=1..n_uniq) N_SAMPLE_PAGES(index)
- will be sampled, so in total N_SAMPLE_PAGES(index) * n_uniq leaf
- pages will be sampled. If that number is bigger than the total
- number of leaf pages then do full scan of the leaf level instead
- since it will be faster and will give better results. */
-
- if (root_level == 0
- || N_SAMPLE_PAGES(index) * n_uniq > index->stat_n_leaf_pages) {
-
- if (root_level == 0) {
- DEBUG_PRINTF(" %s(): just one page, "
- "doing full scan\n", __func__);
- } else {
- DEBUG_PRINTF(" %s(): too many pages requested for "
- "sampling, doing full scan\n", __func__);
- }
-
- /* do full scan of level 0; save results directly
- into the index */
-
- dict_stats_analyze_index_level(index,
- 0 /* leaf level */,
- index->stat_n_diff_key_vals,
- &total_recs,
- &total_pages,
- NULL /* boundaries not needed */,
- &mtr);
-
- for (ulint i = 0; i < n_uniq; i++) {
- index->stat_n_sample_sizes[i] = total_pages;
- }
-
- mtr_commit(&mtr);
-
- dict_stats_assert_initialized_index(index);
- DBUG_VOID_RETURN;
- }
-
- /* For each level that is being scanned in the btree, this contains the
- number of different key values for all possible n-column prefixes. */
- ib_uint64_t* n_diff_on_level = new ib_uint64_t[n_uniq];
-
- /* For each level that is being scanned in the btree, this contains the
- index of the last record from each group of equal records (when
- comparing only the first n columns, n=1..n_uniq). */
- boundaries_t* n_diff_boundaries = new boundaries_t[n_uniq];
-
- /* For each n-column prefix this array contains the input data that is
- used to calculate dict_index_t::stat_n_diff_key_vals[]. */
- n_diff_data_t* n_diff_data = new n_diff_data_t[n_uniq];
-
- /* total_recs is also used to estimate the number of pages on one
- level below, so at the start we have 1 page (the root) */
- total_recs = 1;
-
- /* Here we use the following optimization:
- If we find that level L is the first one (searching from the
- root) that contains at least D distinct keys when looking at
- the first n_prefix columns, then:
- if we look at the first n_prefix-1 columns then the first
- level that contains D distinct keys will be either L or a
- lower one.
- So if we find that the first level containing D distinct
- keys (on n_prefix columns) is L, we continue from L when
- searching for D distinct keys on n_prefix-1 columns. */
- level = root_level;
- level_is_analyzed = false;
-
- for (n_prefix = n_uniq; n_prefix >= 1; n_prefix--) {
-
- DEBUG_PRINTF(" %s(): searching level with >=%llu "
- "distinct records, n_prefix=%lu\n",
- __func__, N_DIFF_REQUIRED(index), n_prefix);
-
- /* Commit the mtr to release the tree S lock to allow
- other threads to do some work too. */
- mtr_commit(&mtr);
- mtr_start(&mtr);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- if (root_level != btr_height_get(index, &mtr)) {
- /* Just quit if the tree has changed beyond
- recognition here. The old stats from previous
- runs will remain in the values that we have
- not calculated yet. Initially when the index
- object is created the stats members are given
- some sensible values so leaving them untouched
- here even the first time will not cause us to
- read uninitialized memory later. */
- break;
- }
-
- /* check whether we should pick the current level;
- we pick level 1 even if it does not have enough
- distinct records because we do not want to scan the
- leaf level because it may contain too many records */
- if (level_is_analyzed
- && (n_diff_on_level[n_prefix - 1] >= N_DIFF_REQUIRED(index)
- || level == 1)) {
-
- goto found_level;
- }
-
- /* search for a level that contains enough distinct records */
-
- if (level_is_analyzed && level > 1) {
-
- /* if this does not hold we should be on
- "found_level" instead of here */
- ut_ad(n_diff_on_level[n_prefix - 1]
- < N_DIFF_REQUIRED(index));
-
- level--;
- level_is_analyzed = false;
- }
-
- /* descend into the tree, searching for "good enough" level */
- for (;;) {
-
- /* make sure we do not scan the leaf level
- accidentally, it may contain too many pages */
- ut_ad(level > 0);
-
- /* scanning the same level twice is an optimization
- bug */
- ut_ad(!level_is_analyzed);
-
- /* Do not scan if this would read too many pages.
- Here we use the following fact:
- the number of pages on level L equals the number
- of records on level L+1, thus we deduce that the
- following call would scan total_recs pages, because
- total_recs is left from the previous iteration when
- we scanned one level upper or we have not scanned any
- levels yet in which case total_recs is 1. */
- if (total_recs > N_SAMPLE_PAGES(index)) {
-
- /* if the above cond is true then we are
- not at the root level since on the root
- level total_recs == 1 (set before we
- enter the n-prefix loop) and cannot
- be > N_SAMPLE_PAGES(index) */
- ut_a(level != root_level);
-
- /* step one level back and be satisfied with
- whatever it contains */
- level++;
- level_is_analyzed = true;
-
- break;
- }
-
- dict_stats_analyze_index_level(index,
- level,
- n_diff_on_level,
- &total_recs,
- &total_pages,
- n_diff_boundaries,
- &mtr);
-
- level_is_analyzed = true;
-
- if (level == 1
- || n_diff_on_level[n_prefix - 1]
- >= N_DIFF_REQUIRED(index)) {
- /* we have reached the last level we could scan
- or we found a good level with many distinct
- records */
- break;
- }
-
- level--;
- level_is_analyzed = false;
- }
-found_level:
-
- DEBUG_PRINTF(" %s(): found level %lu that has " UINT64PF
- " distinct records for n_prefix=%lu\n",
- __func__, level, n_diff_on_level[n_prefix - 1],
- n_prefix);
- /* here we are either on level 1 or the level that we are on
- contains >= N_DIFF_REQUIRED distinct keys or we did not scan
- deeper levels because they would contain too many pages */
-
- ut_ad(level > 0);
-
- ut_ad(level_is_analyzed);
-
- /* if any of these is 0 then there is exactly one page in the
- B-tree and it is empty and we should have done full scan and
- should not be here */
- ut_ad(total_recs > 0);
- ut_ad(n_diff_on_level[n_prefix - 1] > 0);
-
- ut_ad(N_SAMPLE_PAGES(index) > 0);
-
- n_diff_data_t* data = &n_diff_data[n_prefix - 1];
-
- data->level = level;
-
- data->n_recs_on_level = total_recs;
-
- data->n_diff_on_level = n_diff_on_level[n_prefix - 1];
-
- data->n_leaf_pages_to_analyze = std::min(
- N_SAMPLE_PAGES(index),
- n_diff_on_level[n_prefix - 1]);
-
- /* pick some records from this level and dive below them for
- the given n_prefix */
-
- dict_stats_analyze_index_for_n_prefix(
- index, n_prefix, &n_diff_boundaries[n_prefix - 1],
- data, &mtr);
- }
-
- mtr_commit(&mtr);
-
- delete[] n_diff_boundaries;
-
- delete[] n_diff_on_level;
-
- /* n_prefix == 0 means that the above loop did not end up prematurely
- due to tree being changed and so n_diff_data[] is set up. */
- if (n_prefix == 0) {
- dict_stats_index_set_n_diff(n_diff_data, index);
- }
-
- delete[] n_diff_data;
-
- dict_stats_assert_initialized_index(index);
- DBUG_VOID_RETURN;
-}
-
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. This function
-is relatively slow and is used to calculate persistent statistics that
-will be saved on disk.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-dict_stats_update_persistent(
-/*=========================*/
- dict_table_t* table) /*!< in/out: table */
-{
- dict_index_t* index;
-
- DEBUG_PRINTF("%s(table=%s)\n", __func__, table->name);
-
- dict_table_stats_lock(table, RW_X_LATCH);
-
- /* analyze the clustered index first */
-
- index = dict_table_get_first_index(table);
-
- if (index == NULL
- || dict_index_is_corrupted(index)
- || (index->type | DICT_UNIQUE) != (DICT_CLUSTERED | DICT_UNIQUE)) {
-
- /* Table definition is corrupt */
- dict_table_stats_unlock(table, RW_X_LATCH);
- dict_stats_empty_table(table, true);
-
- return(DB_CORRUPTION);
- }
-
- ut_ad(!dict_index_is_univ(index));
-
- dict_stats_analyze_index(index);
-
- ulint n_unique = dict_index_get_n_unique(index);
-
- table->stat_n_rows = index->stat_n_diff_key_vals[n_unique - 1];
-
- table->stat_clustered_index_size = index->stat_index_size;
-
- /* analyze other indexes from the table, if any */
-
- table->stat_sum_of_other_index_sizes = 0;
-
- for (index = dict_table_get_next_index(index);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- ut_ad(!dict_index_is_univ(index));
-
- if (index->type & DICT_FTS) {
- continue;
- }
-
- dict_stats_empty_index(index, false);
-
- if (dict_stats_should_ignore_index(index)) {
- continue;
- }
-
- if (!(table->stats_bg_flag & BG_STAT_SHOULD_QUIT)) {
- dict_stats_analyze_index(index);
- }
-
- table->stat_sum_of_other_index_sizes
- += index->stat_index_size;
- }
-
- table->stats_last_recalc = ut_time();
-
- table->stat_modified_counter = 0;
-
- table->stat_initialized = TRUE;
-
- dict_stats_assert_initialized(table);
-
- dict_table_stats_unlock(table, RW_X_LATCH);
-
- return(DB_SUCCESS);
-}
-
-#include "mysql_com.h"
-/** Save an individual index's statistic into the persistent statistics
-storage.
-@param[in] index index to be updated
-@param[in] last_update timestamp of the stat
-@param[in] stat_name name of the stat
-@param[in] stat_value value of the stat
-@param[in] sample_size n pages sampled or NULL
-@param[in] stat_description description of the stat
-@param[in,out] trx in case of NULL the function will
-allocate and free the trx object. If it is not NULL then it will be
-rolled back only in the case of error, but not freed.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-dict_stats_save_index_stat(
- dict_index_t* index,
- lint last_update,
- const char* stat_name,
- ib_uint64_t stat_value,
- ib_uint64_t* sample_size,
- const char* stat_description,
- trx_t* trx)
-{
- pars_info_t* pinfo;
- dberr_t ret;
- char db_utf8[MAX_DB_UTF8_LEN];
- char table_utf8[MAX_TABLE_UTF8_LEN];
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&dict_sys->mutex));
-
- dict_fs2utf8(index->table->name, db_utf8, sizeof(db_utf8),
- table_utf8, sizeof(table_utf8));
-
- pinfo = pars_info_create();
- pars_info_add_str_literal(pinfo, "database_name", db_utf8);
- pars_info_add_str_literal(pinfo, "table_name", table_utf8);
- UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name));
- pars_info_add_str_literal(pinfo, "index_name", index->name);
- UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4);
- pars_info_add_int4_literal(pinfo, "last_update", last_update);
- UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name));
- pars_info_add_str_literal(pinfo, "stat_name", stat_name);
- UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8);
- pars_info_add_ull_literal(pinfo, "stat_value", stat_value);
- if (sample_size != NULL) {
- UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8);
- pars_info_add_ull_literal(pinfo, "sample_size", *sample_size);
- } else {
- pars_info_add_literal(pinfo, "sample_size", NULL,
- UNIV_SQL_NULL, DATA_FIXBINARY, 0);
- }
- UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description));
- pars_info_add_str_literal(pinfo, "stat_description",
- stat_description);
-
- ret = dict_stats_exec_sql(
- pinfo,
- "PROCEDURE INDEX_STATS_SAVE () IS\n"
- "BEGIN\n"
-
- "DELETE FROM \"" INDEX_STATS_NAME "\"\n"
- "WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name AND\n"
- "index_name = :index_name AND\n"
- "stat_name = :stat_name;\n"
-
- "INSERT INTO \"" INDEX_STATS_NAME "\"\n"
- "VALUES\n"
- "(\n"
- ":database_name,\n"
- ":table_name,\n"
- ":index_name,\n"
- ":last_update,\n"
- ":stat_name,\n"
- ":stat_value,\n"
- ":sample_size,\n"
- ":stat_description\n"
- ");\n"
- "END;", trx);
-
- if (ret != DB_SUCCESS) {
- if (innodb_index_stats_not_found == false &&
- index->stats_error_printed == false) {
- char buf_table[MAX_FULL_NAME_LEN];
- char buf_index[MAX_FULL_NAME_LEN];
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot save index statistics for table "
- "%s, index %s, stat name \"%s\": %s\n",
- ut_format_name(index->table->name, TRUE,
- buf_table, sizeof(buf_table)),
- ut_format_name(index->name, FALSE,
- buf_index, sizeof(buf_index)),
- stat_name, ut_strerr(ret));
- index->stats_error_printed = true;
- }
- }
-
- return(ret);
-}
-
-/** Report error if statistic update for a table failed because
-.ibd file is missing, table decryption failed or table is corrupted.
-@param[in,out] table Table
-@param[in] defragment true if statistics is for defragment
-@return DB_DECRYPTION_FAILED, DB_TABLESPACE_DELETED or DB_CORRUPTION
-@retval DB_DECRYPTION_FAILED if decryption of the table failed
-@retval DB_TABLESPACE_DELETED if .ibd file is missing
-@retval DB_CORRUPTION if table is marked as corrupted */
-static
-dberr_t
-dict_stats_report_error(
- dict_table_t* table,
- bool defragment = false)
-{
- char buf[3 * NAME_LEN];
- dberr_t err;
-
- innobase_format_name(buf, sizeof buf,
- table->name,
- true);
-
- FilSpace space(table->space);
-
- if (space()) {
- if (table->corrupted) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Cannot save%s statistics because "
- " table %s in file %s is corrupted.",
- defragment ? " defragment" : " ",
- buf, space()->chain.start->name);
- err = DB_CORRUPTION;
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Cannot save%s statistics because "
- " table %s in file %s can't be decrypted.",
- defragment ? " defragment" : " ",
- buf, space()->chain.start->name);
- err = DB_DECRYPTION_FAILED;
- }
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Cannot save%s statistics for "
- " table %s because .ibd file is missing."
- " For help, please "
- "refer to " REFMAN "innodb-troubleshooting.html.",
- defragment ? " defragment" : " ",
- buf);
- err = DB_TABLESPACE_DELETED;
- }
-
- dict_stats_empty_table(table, defragment);
-
- return (err);
-}
-
-/** Save the table's statistics into the persistent statistics storage.
-@param[in] table_orig table whose stats to save
-@param[in] only_for_index if this is non-NULL, then stats for indexes
-that are not equal to it will not be saved, if NULL, then all
-indexes' stats are saved
-@return DB_SUCCESS or error code */
-static
-dberr_t
-dict_stats_save(
-/*============*/
- dict_table_t* table_orig,
- const index_id_t* only_for_index)
-{
- pars_info_t* pinfo;
- lint now;
- dberr_t ret;
- dict_table_t* table;
- char db_utf8[MAX_DB_UTF8_LEN];
- char table_utf8[MAX_TABLE_UTF8_LEN];
-
- if (table_orig->is_readable()) {
- } else {
- return (dict_stats_report_error(table_orig));
- }
-
- table = dict_stats_snapshot_create(table_orig);
-
- dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
- table_utf8, sizeof(table_utf8));
-
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&dict_sys->mutex);
-
- /* MySQL's timestamp is 4 byte, so we use
- pars_info_add_int4_literal() which takes a lint arg, so "now" is
- lint */
- now = (lint) ut_time();
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "database_name", db_utf8);
- pars_info_add_str_literal(pinfo, "table_name", table_utf8);
- pars_info_add_int4_literal(pinfo, "last_update", now);
- pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows);
- pars_info_add_ull_literal(pinfo, "clustered_index_size",
- table->stat_clustered_index_size);
- pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes",
- table->stat_sum_of_other_index_sizes);
-
- ret = dict_stats_exec_sql(
- pinfo,
- "PROCEDURE TABLE_STATS_SAVE () IS\n"
- "BEGIN\n"
-
- "DELETE FROM \"" TABLE_STATS_NAME "\"\n"
- "WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name;\n"
-
- "INSERT INTO \"" TABLE_STATS_NAME "\"\n"
- "VALUES\n"
- "(\n"
- ":database_name,\n"
- ":table_name,\n"
- ":last_update,\n"
- ":n_rows,\n"
- ":clustered_index_size,\n"
- ":sum_of_other_index_sizes\n"
- ");\n"
- "END;", NULL);
-
- if (ret != DB_SUCCESS) {
- char buf[MAX_FULL_NAME_LEN];
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot save table statistics for table "
- "%s: %s\n",
- ut_format_name(table->name, TRUE, buf, sizeof(buf)),
- ut_strerr(ret));
-
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
-
- dict_stats_snapshot_free(table);
-
- return(ret);
- }
-
- trx_t* trx = trx_allocate_for_background();
- trx_start_if_not_started(trx);
-
- dict_index_t* index;
- index_map_t indexes;
-
- /* Below we do all the modifications in innodb_index_stats in a single
- transaction for performance reasons. Modifying more than one row in a
- single transaction may deadlock with other transactions if they
- lock the rows in different order. Other transaction could be for
- example when we DROP a table and do
- DELETE FROM innodb_index_stats WHERE database_name = '...'
- AND table_name = '...'; which will affect more than one row. To
- prevent deadlocks we always lock the rows in the same order - the
- order of the PK, which is (database_name, table_name, index_name,
- stat_name). This is why below we sort the indexes by name and then
- for each index, do the mods ordered by stat_name. */
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- indexes[index->name] = index;
- }
-
- index_map_t::const_iterator it;
-
- for (it = indexes.begin(); it != indexes.end(); ++it) {
-
- index = it->second;
-
- if (only_for_index != NULL && index->id != *only_for_index) {
- continue;
- }
-
- if (dict_stats_should_ignore_index(index)) {
- continue;
- }
-
- ut_ad(!dict_index_is_univ(index));
-
- for (ulint i = 0; i < index->n_uniq; i++) {
-
- char stat_name[16];
- char stat_description[1024];
- ulint j;
-
- ut_snprintf(stat_name, sizeof(stat_name),
- "n_diff_pfx%02lu", i + 1);
-
- /* craft a string that contains the columns names */
- ut_snprintf(stat_description,
- sizeof(stat_description),
- "%s", index->fields[0].name);
- for (j = 1; j <= i; j++) {
- size_t len;
-
- len = strlen(stat_description);
-
- ut_snprintf(stat_description + len,
- sizeof(stat_description) - len,
- ",%s", index->fields[j].name);
- }
-
- ret = dict_stats_save_index_stat(
- index, now, stat_name,
- index->stat_n_diff_key_vals[i],
- &index->stat_n_sample_sizes[i],
- stat_description, trx);
-
- if (ret != DB_SUCCESS) {
- goto end;
- }
- }
-
- ret = dict_stats_save_index_stat(index, now, "n_leaf_pages",
- index->stat_n_leaf_pages,
- NULL,
- "Number of leaf pages "
- "in the index", trx);
- if (ret != DB_SUCCESS) {
- goto end;
- }
-
- ret = dict_stats_save_index_stat(index, now, "size",
- index->stat_index_size,
- NULL,
- "Number of pages "
- "in the index", trx);
- if (ret != DB_SUCCESS) {
- goto end;
- }
- }
-
- trx_commit_for_mysql(trx);
-
-end:
- trx_free_for_background(trx);
-
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
-
- dict_stats_snapshot_free(table);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Called for the row that is selected by
-SELECT ... FROM mysql.innodb_table_stats WHERE table='...'
-The second argument is a pointer to the table and the fetched stats are
-written to it.
-@return non-NULL dummy */
-static
-ibool
-dict_stats_fetch_table_stats_step(
-/*==============================*/
- void* node_void, /*!< in: select node */
- void* table_void) /*!< out: table */
-{
- sel_node_t* node = (sel_node_t*) node_void;
- dict_table_t* table = (dict_table_t*) table_void;
- que_common_t* cnode;
- int i;
-
- /* this should loop exactly 3 times - for
- n_rows,clustered_index_size,sum_of_other_index_sizes */
- for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
- cnode != NULL;
- cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
- i++) {
-
- const byte* data;
- dfield_t* dfield = que_node_get_val(cnode);
- dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- data = static_cast<const byte*>(dfield_get_data(dfield));
-
- switch (i) {
- case 0: /* mysql.innodb_table_stats.n_rows */
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(len == 8);
-
- table->stat_n_rows = mach_read_from_8(data);
-
- break;
-
- case 1: /* mysql.innodb_table_stats.clustered_index_size */
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(len == 8);
-
- table->stat_clustered_index_size
- = (ulint) mach_read_from_8(data);
-
- break;
-
- case 2: /* mysql.innodb_table_stats.sum_of_other_index_sizes */
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(len == 8);
-
- table->stat_sum_of_other_index_sizes
- = (ulint) mach_read_from_8(data);
-
- break;
-
- default:
-
- /* someone changed SELECT
- n_rows,clustered_index_size,sum_of_other_index_sizes
- to select more columns from innodb_table_stats without
- adjusting here */
- ut_error;
- }
- }
-
- /* if i < 3 this means someone changed the
- SELECT n_rows,clustered_index_size,sum_of_other_index_sizes
- to select less columns from innodb_table_stats without adjusting here;
- if i > 3 we would have ut_error'ed earlier */
- ut_a(i == 3 /*n_rows,clustered_index_size,sum_of_other_index_sizes*/);
-
- /* XXX this is not used but returning non-NULL is necessary */
- return(TRUE);
-}
-
-/** Aux struct used to pass a table and a boolean to
-dict_stats_fetch_index_stats_step(). */
-struct index_fetch_t {
- dict_table_t* table; /*!< table whose indexes are to be modified */
- bool stats_were_modified; /*!< will be set to true if at
- least one index stats were modified */
-};
-
-/*********************************************************************//**
-Called for the rows that are selected by
-SELECT ... FROM mysql.innodb_index_stats WHERE table='...'
-The second argument is a pointer to the table and the fetched stats are
-written to its indexes.
-Let a table has N indexes and each index has Ui unique columns for i=1..N,
-then mysql.innodb_index_stats will have SUM(Ui) i=1..N rows for that table.
-So this function will be called SUM(Ui) times where SUM(Ui) is of magnitude
-N*AVG(Ui). In each call it searches for the currently fetched index into
-table->indexes linearly, assuming this list is not sorted. Thus, overall,
-fetching all indexes' stats from mysql.innodb_index_stats is O(N^2) where N
-is the number of indexes.
-This can be improved if we sort table->indexes in a temporary area just once
-and then search in that sorted list. Then the complexity will be O(N*log(N)).
-We assume a table will not have more than 100 indexes, so we go with the
-simpler N^2 algorithm.
-@return non-NULL dummy */
-static
-ibool
-dict_stats_fetch_index_stats_step(
-/*==============================*/
- void* node_void, /*!< in: select node */
- void* arg_void) /*!< out: table + a flag that tells if we
- modified anything */
-{
- sel_node_t* node = (sel_node_t*) node_void;
- index_fetch_t* arg = (index_fetch_t*) arg_void;
- dict_table_t* table = arg->table;
- dict_index_t* index = NULL;
- que_common_t* cnode;
- const char* stat_name = NULL;
- ulint stat_name_len = ULINT_UNDEFINED;
- ib_uint64_t stat_value = UINT64_UNDEFINED;
- ib_uint64_t sample_size = UINT64_UNDEFINED;
- int i;
-
- /* this should loop exactly 4 times - for the columns that
- were selected: index_name,stat_name,stat_value,sample_size */
- for (cnode = static_cast<que_common_t*>(node->select_list), i = 0;
- cnode != NULL;
- cnode = static_cast<que_common_t*>(que_node_get_next(cnode)),
- i++) {
-
- const byte* data;
- dfield_t* dfield = que_node_get_val(cnode);
- dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- data = static_cast<const byte*>(dfield_get_data(dfield));
-
- switch (i) {
- case 0: /* mysql.innodb_index_stats.index_name */
-
- ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
-
- /* search for index in table's indexes whose name
- matches data; the fetched index name is in data,
- has no terminating '\0' and has length len */
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- if (strlen(index->name) == len
- && memcmp(index->name, data, len) == 0) {
- /* the corresponding index was found */
- break;
- }
- }
-
- /* if index is NULL here this means that
- mysql.innodb_index_stats contains more rows than the
- number of indexes in the table; this is ok, we just
- return ignoring those extra rows; in other words
- dict_stats_fetch_index_stats_step() has been called
- for a row from index_stats with unknown index_name
- column */
- if (index == NULL) {
-
- return(TRUE);
- }
-
- break;
-
- case 1: /* mysql.innodb_index_stats.stat_name */
-
- ut_a(dtype_get_mtype(type) == DATA_VARMYSQL);
-
- ut_a(index != NULL);
-
- stat_name = (const char*) data;
- stat_name_len = len;
-
- break;
-
- case 2: /* mysql.innodb_index_stats.stat_value */
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(len == 8);
-
- ut_a(index != NULL);
- ut_a(stat_name != NULL);
- ut_a(stat_name_len != ULINT_UNDEFINED);
-
- stat_value = mach_read_from_8(data);
-
- break;
-
- case 3: /* mysql.innodb_index_stats.sample_size */
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(len == 8 || len == UNIV_SQL_NULL);
-
- ut_a(index != NULL);
- ut_a(stat_name != NULL);
- ut_a(stat_name_len != ULINT_UNDEFINED);
- ut_a(stat_value != UINT64_UNDEFINED);
-
- if (len == UNIV_SQL_NULL) {
- break;
- }
- /* else */
-
- sample_size = mach_read_from_8(data);
-
- break;
-
- default:
-
- /* someone changed
- SELECT index_name,stat_name,stat_value,sample_size
- to select more columns from innodb_index_stats without
- adjusting here */
- ut_error;
- }
- }
-
- /* if i < 4 this means someone changed the
- SELECT index_name,stat_name,stat_value,sample_size
- to select less columns from innodb_index_stats without adjusting here;
- if i > 4 we would have ut_error'ed earlier */
- ut_a(i == 4 /* index_name,stat_name,stat_value,sample_size */);
-
- ut_a(index != NULL);
- ut_a(stat_name != NULL);
- ut_a(stat_name_len != ULINT_UNDEFINED);
- ut_a(stat_value != UINT64_UNDEFINED);
- /* sample_size could be UINT64_UNDEFINED here, if it is NULL */
-
-#define PFX "n_diff_pfx"
-#define PFX_LEN 10
-
- if (stat_name_len == 4 /* strlen("size") */
- && strncasecmp("size", stat_name, stat_name_len) == 0) {
- index->stat_index_size = (ulint) stat_value;
- arg->stats_were_modified = true;
- } else if (stat_name_len == 12 /* strlen("n_leaf_pages") */
- && strncasecmp("n_leaf_pages", stat_name, stat_name_len)
- == 0) {
- index->stat_n_leaf_pages = (ulint) stat_value;
- arg->stats_were_modified = true;
- } else if (stat_name_len == 12 /* strlen("n_page_split") */
- && strncasecmp("n_page_split", stat_name, stat_name_len)
- == 0) {
- index->stat_defrag_n_page_split = (ulint) stat_value;
- arg->stats_were_modified = true;
- } else if (stat_name_len == 13 /* strlen("n_pages_freed") */
- && strncasecmp("n_pages_freed", stat_name, stat_name_len)
- == 0) {
- index->stat_defrag_n_pages_freed = (ulint) stat_value;
- arg->stats_were_modified = true;
- } else if (stat_name_len > PFX_LEN /* e.g. stat_name=="n_diff_pfx01" */
- && strncasecmp(PFX, stat_name, PFX_LEN) == 0) {
-
- const char* num_ptr;
- unsigned long n_pfx;
-
- /* point num_ptr into "1" from "n_diff_pfx12..." */
- num_ptr = stat_name + PFX_LEN;
-
- /* stat_name should have exactly 2 chars appended to PFX
- and they should be digits */
- if (stat_name_len != PFX_LEN + 2
- || num_ptr[0] < '0' || num_ptr[0] > '9'
- || num_ptr[1] < '0' || num_ptr[1] > '9') {
-
- char db_utf8[MAX_DB_UTF8_LEN];
- char table_utf8[MAX_TABLE_UTF8_LEN];
-
- dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
- table_utf8, sizeof(table_utf8));
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Ignoring strange row from "
- "%s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s' AND "
- "index_name = '%s' AND "
- "stat_name = '%.*s'; because stat_name "
- "is malformed\n",
- INDEX_STATS_NAME_PRINT,
- db_utf8,
- table_utf8,
- index->name,
- (int) stat_name_len,
- stat_name);
- return(TRUE);
- }
- /* else */
-
- /* extract 12 from "n_diff_pfx12..." into n_pfx
- note that stat_name does not have a terminating '\0' */
- n_pfx = (num_ptr[0] - '0') * 10 + (num_ptr[1] - '0');
-
- ulint n_uniq = index->n_uniq;
-
- if (n_pfx == 0 || n_pfx > n_uniq) {
-
- char db_utf8[MAX_DB_UTF8_LEN];
- char table_utf8[MAX_TABLE_UTF8_LEN];
-
- dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
- table_utf8, sizeof(table_utf8));
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Ignoring strange row from "
- "%s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s' AND "
- "index_name = '%s' AND "
- "stat_name = '%.*s'; because stat_name is "
- "out of range, the index has %lu unique "
- "columns\n",
- INDEX_STATS_NAME_PRINT,
- db_utf8,
- table_utf8,
- index->name,
- (int) stat_name_len,
- stat_name,
- n_uniq);
- return(TRUE);
- }
- /* else */
-
- index->stat_n_diff_key_vals[n_pfx - 1] = stat_value;
-
- if (sample_size != UINT64_UNDEFINED) {
- index->stat_n_sample_sizes[n_pfx - 1] = sample_size;
- } else {
- /* hmm, strange... the user must have UPDATEd the
- table manually and SET sample_size = NULL */
- index->stat_n_sample_sizes[n_pfx - 1] = 0;
- }
-
- index->stat_n_non_null_key_vals[n_pfx - 1] = 0;
-
- arg->stats_were_modified = true;
- } else {
- /* silently ignore rows with unknown stat_name, the
- user may have developed her own stats */
- }
-
- /* XXX this is not used but returning non-NULL is necessary */
- return(TRUE);
-}
-
-/*********************************************************************//**
-Read table's statistics from the persistent statistics storage.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-dict_stats_fetch_from_ps(
-/*=====================*/
- dict_table_t* table) /*!< in/out: table */
-{
- index_fetch_t index_fetch_arg;
- trx_t* trx;
- pars_info_t* pinfo;
- dberr_t ret;
- char db_utf8[MAX_DB_UTF8_LEN];
- char table_utf8[MAX_TABLE_UTF8_LEN];
-
- ut_ad(!mutex_own(&dict_sys->mutex));
-
- /* Initialize all stats to dummy values before fetching because if
- the persistent storage contains incomplete stats (e.g. missing stats
- for some index) then we would end up with (partially) uninitialized
- stats. */
- dict_stats_empty_table(table, true);
-
- trx = trx_allocate_for_background();
-
- /* Use 'read-uncommitted' so that the SELECTs we execute
- do not get blocked in case some user has locked the rows we
- are SELECTing */
-
- trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
-
- trx_start_if_not_started(trx);
-
- dict_fs2utf8(table->name, db_utf8, sizeof(db_utf8),
- table_utf8, sizeof(table_utf8));
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "database_name", db_utf8);
-
- pars_info_add_str_literal(pinfo, "table_name", table_utf8);
-
- pars_info_bind_function(pinfo,
- "fetch_table_stats_step",
- dict_stats_fetch_table_stats_step,
- table);
-
- index_fetch_arg.table = table;
- index_fetch_arg.stats_were_modified = false;
- pars_info_bind_function(pinfo,
- "fetch_index_stats_step",
- dict_stats_fetch_index_stats_step,
- &index_fetch_arg);
-
- ret = que_eval_sql(pinfo,
- "PROCEDURE FETCH_STATS () IS\n"
- "found INT;\n"
- "DECLARE FUNCTION fetch_table_stats_step;\n"
- "DECLARE FUNCTION fetch_index_stats_step;\n"
- "DECLARE CURSOR table_stats_cur IS\n"
- " SELECT\n"
- /* if you change the selected fields, be
- sure to adjust
- dict_stats_fetch_table_stats_step() */
- " n_rows,\n"
- " clustered_index_size,\n"
- " sum_of_other_index_sizes\n"
- " FROM \"" TABLE_STATS_NAME "\"\n"
- " WHERE\n"
- " database_name = :database_name AND\n"
- " table_name = :table_name;\n"
- "DECLARE CURSOR index_stats_cur IS\n"
- " SELECT\n"
- /* if you change the selected fields, be
- sure to adjust
- dict_stats_fetch_index_stats_step() */
- " index_name,\n"
- " stat_name,\n"
- " stat_value,\n"
- " sample_size\n"
- " FROM \"" INDEX_STATS_NAME "\"\n"
- " WHERE\n"
- " database_name = :database_name AND\n"
- " table_name = :table_name;\n"
-
- "BEGIN\n"
-
- "OPEN table_stats_cur;\n"
- "FETCH table_stats_cur INTO\n"
- " fetch_table_stats_step();\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " CLOSE table_stats_cur;\n"
- " RETURN;\n"
- "END IF;\n"
- "CLOSE table_stats_cur;\n"
-
- "OPEN index_stats_cur;\n"
- "found := 1;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH index_stats_cur INTO\n"
- " fetch_index_stats_step();\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE index_stats_cur;\n"
-
- "END;",
- TRUE, trx);
- /* pinfo is freed by que_eval_sql() */
-
- trx_commit_for_mysql(trx);
-
- trx_free_for_background(trx);
-
- if (!index_fetch_arg.stats_were_modified) {
- return(DB_STATS_DO_NOT_EXIST);
- }
-
- return(ret);
-}
-
-/*********************************************************************//**
-Clear defragmentation stats modified counter for all indices in table. */
-static
-void
-dict_stats_empty_defrag_modified_counter(
- dict_table_t* table) /*!< in: table */
-{
- dict_index_t* index;
- ut_a(table);
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- index->stat_defrag_modified_counter = 0;
- }
-}
-
-/*********************************************************************//**
-Fetches or calculates new estimates for index statistics. */
-UNIV_INTERN
-void
-dict_stats_update_for_index(
-/*========================*/
- dict_index_t* index) /*!< in/out: index */
-{
- DBUG_ENTER("dict_stats_update_for_index");
-
- ut_ad(!mutex_own(&dict_sys->mutex));
-
- if (dict_stats_is_persistent_enabled(index->table)) {
-
- if (dict_stats_persistent_storage_check(false)) {
- dict_table_stats_lock(index->table, RW_X_LATCH);
- dict_stats_analyze_index(index);
- dict_table_stats_unlock(index->table, RW_X_LATCH);
- dict_stats_save(index->table, &index->id);
- DBUG_VOID_RETURN;
- }
- /* else */
-
- if (innodb_index_stats_not_found == false &&
- index->stats_error_printed == false) {
- /* Fall back to transient stats since the persistent
- storage is not present or is corrupted */
- char buf_table[MAX_FULL_NAME_LEN];
- char buf_index[MAX_FULL_NAME_LEN];
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Recalculation of persistent statistics "
- "requested for table %s index %s but the required "
- "persistent statistics storage is not present or is "
- "corrupted. Using transient stats instead.\n",
- ut_format_name(index->table->name, TRUE,
- buf_table, sizeof(buf_table)),
- ut_format_name(index->name, FALSE,
- buf_index, sizeof(buf_index)));
- index->stats_error_printed = false;
- }
- }
-
- dict_table_stats_lock(index->table, RW_X_LATCH);
- dict_stats_update_transient_for_index(index);
- dict_table_stats_unlock(index->table, RW_X_LATCH);
-
- DBUG_VOID_RETURN;
-}
-
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_update(
-/*==============*/
- dict_table_t* table, /*!< in/out: table */
- dict_stats_upd_option_t stats_upd_option)
- /*!< in: whether to (re) calc
- the stats or to fetch them from
- the persistent statistics
- storage */
-{
- char buf[MAX_FULL_NAME_LEN];
-
- ut_ad(!mutex_own(&dict_sys->mutex));
-
- if (!table->is_readable()) {
- return (dict_stats_report_error(table));
- } else if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
- /* If we have set a high innodb_force_recovery level, do
- not calculate statistics, as a badly corrupted index can
- cause a crash in it. */
- dict_stats_empty_table(table, false);
- return(DB_SUCCESS);
- }
-
- switch (stats_upd_option) {
- case DICT_STATS_RECALC_PERSISTENT:
-
- if (srv_read_only_mode) {
- goto transient;
- }
-
- /* Persistent recalculation requested, called from
- 1) ANALYZE TABLE, or
- 2) the auto recalculation background thread, or
- 3) open table if stats do not exist on disk and auto recalc
- is enabled */
-
- /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
- persistent stats enabled */
- ut_a(strchr(table->name, '/') != NULL);
-
- /* check if the persistent statistics storage exists
- before calling the potentially slow function
- dict_stats_update_persistent(); that is a
- prerequisite for dict_stats_save() succeeding */
- if (dict_stats_persistent_storage_check(false)) {
-
- dberr_t err;
-
- err = dict_stats_update_persistent(table);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- err = dict_stats_save(table, NULL);
-
- return(err);
- }
-
- /* Fall back to transient stats since the persistent
- storage is not present or is corrupted */
-
- if (innodb_table_stats_not_found == false &&
- table->stats_error_printed == false) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Recalculation of persistent statistics "
- "requested for table %s but the required persistent "
- "statistics storage is not present or is corrupted. "
- "Using transient stats instead.\n",
- ut_format_name(table->name, TRUE, buf, sizeof(buf)));
- table->stats_error_printed = true;
- }
-
- goto transient;
-
- case DICT_STATS_RECALC_TRANSIENT:
-
- goto transient;
-
- case DICT_STATS_EMPTY_TABLE:
-
- dict_stats_empty_table(table, true);
-
- /* If table is using persistent stats,
- then save the stats on disk */
-
- if (dict_stats_is_persistent_enabled(table)) {
-
- if (dict_stats_persistent_storage_check(false)) {
-
- return(dict_stats_save(table, NULL));
- }
-
- return(DB_STATS_DO_NOT_EXIST);
- }
-
- return(DB_SUCCESS);
-
- case DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY:
-
- /* fetch requested, either fetch from persistent statistics
- storage or use the old method */
-
- if (table->stat_initialized) {
- return(DB_SUCCESS);
- }
-
- /* InnoDB internal tables (e.g. SYS_TABLES) cannot have
- persistent stats enabled */
- ut_a(strchr(table->name, '/') != NULL);
-
- if (!dict_stats_persistent_storage_check(false)) {
- /* persistent statistics storage does not exist
- or is corrupted, calculate the transient stats */
-
- if (innodb_table_stats_not_found == false &&
- table->stats_error_printed == false) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Fetch of persistent "
- "statistics requested for table %s but the "
- "required system tables %s and %s are not "
- "present or have unexpected structure. "
- "Using transient stats instead.\n",
- ut_format_name(table->name, TRUE,
- buf, sizeof(buf)),
- TABLE_STATS_NAME_PRINT,
- INDEX_STATS_NAME_PRINT);
- table->stats_error_printed = true;
- }
-
- goto transient;
- }
-
- dict_table_t* t;
-
- /* Create a dummy table object with the same name and
- indexes, suitable for fetching the stats into it. */
- t = dict_stats_table_clone_create(table);
-
- dberr_t err = dict_stats_fetch_from_ps(t);
-
- t->stats_last_recalc = table->stats_last_recalc;
- t->stat_modified_counter = 0;
- dict_stats_empty_defrag_modified_counter(t);
-
- switch (err) {
- case DB_SUCCESS:
-
- dict_table_stats_lock(table, RW_X_LATCH);
-
- /* Pass reset_ignored_indexes=true as parameter
- to dict_stats_copy. This will cause statictics
- for corrupted indexes to be set to empty values */
- dict_stats_copy(table, t, true);
-
- dict_stats_assert_initialized(table);
-
- dict_table_stats_unlock(table, RW_X_LATCH);
-
- dict_stats_table_clone_free(t);
-
- return(DB_SUCCESS);
- case DB_STATS_DO_NOT_EXIST:
-
- dict_stats_table_clone_free(t);
-
- if (srv_read_only_mode) {
- goto transient;
- }
-
- if (dict_stats_auto_recalc_is_enabled(table)) {
- return(dict_stats_update(
- table,
- DICT_STATS_RECALC_PERSISTENT));
- }
-
- ut_format_name(table->name, TRUE, buf, sizeof(buf));
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Trying to use table %s which has "
- "persistent statistics enabled, but auto "
- "recalculation turned off and the statistics "
- "do not exist in %s and %s. Please either run "
- "\"ANALYZE TABLE %s;\" manually or enable the "
- "auto recalculation with "
- "\"ALTER TABLE %s STATS_AUTO_RECALC=1;\". "
- "InnoDB will now use transient statistics for "
- "%s.\n",
- buf, TABLE_STATS_NAME, INDEX_STATS_NAME, buf,
- buf, buf);
-
- goto transient;
- default:
-
- dict_stats_table_clone_free(t);
-
- if (innodb_table_stats_not_found == false &&
- table->stats_error_printed == false) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error fetching persistent statistics "
- "for table %s from %s and %s: %s. "
- "Using transient stats method instead.\n",
- ut_format_name(table->name, TRUE, buf,
- sizeof(buf)),
- TABLE_STATS_NAME,
- INDEX_STATS_NAME,
- ut_strerr(err));
- }
-
- goto transient;
- }
- /* no "default:" in order to produce a compilation warning
- about unhandled enumeration value */
- }
-
-transient:
-
- dict_table_stats_lock(table, RW_X_LATCH);
-
- dict_stats_update_transient(table);
-
- dict_table_stats_unlock(table, RW_X_LATCH);
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Removes the information for a particular index's stats from the persistent
-storage if it exists and if there is data stored for this index.
-This function creates its own trx and commits it.
-A note from Marko why we cannot edit user and sys_* tables in one trx:
-marko: The problem is that ibuf merges should be disabled while we are
-rolling back dict transactions.
-marko: If ibuf merges are not disabled, we need to scan the *.ibd files.
-But we shouldn't open *.ibd files before we have rolled back dict
-transactions and opened the SYS_* records for the *.ibd files.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_drop_index(
-/*==================*/
- const char* db_and_table,/*!< in: db and table, e.g. 'db/table' */
- const char* iname, /*!< in: index name */
- char* errstr, /*!< out: error message if != DB_SUCCESS
- is returned */
- ulint errstr_sz)/*!< in: size of the errstr buffer */
-{
- char db_utf8[MAX_DB_UTF8_LEN];
- char table_utf8[MAX_TABLE_UTF8_LEN];
- pars_info_t* pinfo;
- dberr_t ret;
-
- ut_ad(!mutex_own(&dict_sys->mutex));
-
- /* skip indexes whose table names do not contain a database name
- e.g. if we are dropping an index from SYS_TABLES */
- if (strchr(db_and_table, '/') == NULL) {
-
- return(DB_SUCCESS);
- }
-
- dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
- table_utf8, sizeof(table_utf8));
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "database_name", db_utf8);
-
- pars_info_add_str_literal(pinfo, "table_name", table_utf8);
-
- pars_info_add_str_literal(pinfo, "index_name", iname);
-
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&dict_sys->mutex);
-
- ret = dict_stats_exec_sql(
- pinfo,
- "PROCEDURE DROP_INDEX_STATS () IS\n"
- "BEGIN\n"
- "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name AND\n"
- "index_name = :index_name;\n"
- "END;\n", NULL);
-
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
-
- if (ret == DB_STATS_DO_NOT_EXIST) {
- ret = DB_SUCCESS;
- }
-
- if (ret != DB_SUCCESS) {
- ut_snprintf(errstr, errstr_sz,
- "Unable to delete statistics for index %s "
- "from %s%s: %s. They can be deleted later using "
- "DELETE FROM %s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s' AND "
- "index_name = '%s';",
- iname,
- INDEX_STATS_NAME_PRINT,
- (ret == DB_LOCK_WAIT_TIMEOUT
- ? " because the rows are locked"
- : ""),
- ut_strerr(ret),
- INDEX_STATS_NAME_PRINT,
- db_utf8,
- table_utf8,
- iname);
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", errstr);
- }
-
- return(ret);
-}
-
-/*********************************************************************//**
-Executes
-DELETE FROM mysql.innodb_table_stats
-WHERE database_name = '...' AND table_name = '...';
-Creates its own transaction and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-dberr_t
-dict_stats_delete_from_table_stats(
-/*===============================*/
- const char* database_name, /*!< in: database name, e.g. 'db' */
- const char* table_name) /*!< in: table name, e.g. 'table' */
-{
- pars_info_t* pinfo;
- dberr_t ret;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&dict_sys->mutex));
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "database_name", database_name);
- pars_info_add_str_literal(pinfo, "table_name", table_name);
-
- ret = dict_stats_exec_sql(
- pinfo,
- "PROCEDURE DELETE_FROM_TABLE_STATS () IS\n"
- "BEGIN\n"
- "DELETE FROM \"" TABLE_STATS_NAME "\" WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name;\n"
- "END;\n", NULL);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Executes
-DELETE FROM mysql.innodb_index_stats
-WHERE database_name = '...' AND table_name = '...';
-Creates its own transaction and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-dberr_t
-dict_stats_delete_from_index_stats(
-/*===============================*/
- const char* database_name, /*!< in: database name, e.g. 'db' */
- const char* table_name) /*!< in: table name, e.g. 'table' */
-{
- pars_info_t* pinfo;
- dberr_t ret;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&dict_sys->mutex));
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "database_name", database_name);
- pars_info_add_str_literal(pinfo, "table_name", table_name);
-
- ret = dict_stats_exec_sql(
- pinfo,
- "PROCEDURE DELETE_FROM_INDEX_STATS () IS\n"
- "BEGIN\n"
- "DELETE FROM \"" INDEX_STATS_NAME "\" WHERE\n"
- "database_name = :database_name AND\n"
- "table_name = :table_name;\n"
- "END;\n", NULL);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Removes the statistics for a table and all of its indexes from the
-persistent statistics storage if it exists and if there is data stored for
-the table. This function creates its own transaction and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_drop_table(
-/*==================*/
- const char* db_and_table, /*!< in: db and table, e.g. 'db/table' */
- char* errstr, /*!< out: error message
- if != DB_SUCCESS is returned */
- ulint errstr_sz) /*!< in: size of errstr buffer */
-{
- char db_utf8[MAX_DB_UTF8_LEN];
- char table_utf8[MAX_TABLE_UTF8_LEN];
- dberr_t ret;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&dict_sys->mutex));
-
- /* skip tables that do not contain a database name
- e.g. if we are dropping SYS_TABLES */
- if (strchr(db_and_table, '/') == NULL) {
-
- return(DB_SUCCESS);
- }
-
- /* skip innodb_table_stats and innodb_index_stats themselves */
- if (strcmp(db_and_table, TABLE_STATS_NAME) == 0
- || strcmp(db_and_table, INDEX_STATS_NAME) == 0) {
-
- return(DB_SUCCESS);
- }
-
- dict_fs2utf8(db_and_table, db_utf8, sizeof(db_utf8),
- table_utf8, sizeof(table_utf8));
-
- ret = dict_stats_delete_from_table_stats(db_utf8, table_utf8);
-
- if (ret == DB_SUCCESS) {
- ret = dict_stats_delete_from_index_stats(db_utf8, table_utf8);
- }
-
- if (ret == DB_STATS_DO_NOT_EXIST) {
- ret = DB_SUCCESS;
- }
-
- if (ret != DB_SUCCESS) {
-
- ut_snprintf(errstr, errstr_sz,
- "Unable to delete statistics for table %s.%s: %s. "
- "They can be deleted later using "
-
- "DELETE FROM %s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s'; "
-
- "DELETE FROM %s WHERE "
- "database_name = '%s' AND "
- "table_name = '%s';",
-
- db_utf8, table_utf8,
- ut_strerr(ret),
-
- INDEX_STATS_NAME_PRINT,
- db_utf8, table_utf8,
-
- TABLE_STATS_NAME_PRINT,
- db_utf8, table_utf8);
- }
-
- return(ret);
-}
-
-/*********************************************************************//**
-Executes
-UPDATE mysql.innodb_table_stats SET
-database_name = '...', table_name = '...'
-WHERE database_name = '...' AND table_name = '...';
-Creates its own transaction and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-dberr_t
-dict_stats_rename_in_table_stats(
-/*=============================*/
- const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
- const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
- const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
- const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
-{
- pars_info_t* pinfo;
- dberr_t ret;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&dict_sys->mutex));
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
- pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
- pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
- pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
-
- ret = dict_stats_exec_sql(
- pinfo,
- "PROCEDURE RENAME_IN_TABLE_STATS () IS\n"
- "BEGIN\n"
- "UPDATE \"" TABLE_STATS_NAME "\" SET\n"
- "database_name = :new_dbname_utf8,\n"
- "table_name = :new_tablename_utf8\n"
- "WHERE\n"
- "database_name = :old_dbname_utf8 AND\n"
- "table_name = :old_tablename_utf8;\n"
- "END;\n", NULL);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Executes
-UPDATE mysql.innodb_index_stats SET
-database_name = '...', table_name = '...'
-WHERE database_name = '...' AND table_name = '...';
-Creates its own transaction and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INLINE
-dberr_t
-dict_stats_rename_in_index_stats(
-/*=============================*/
- const char* old_dbname_utf8,/*!< in: database name, e.g. 'olddb' */
- const char* old_tablename_utf8,/*!< in: table name, e.g. 'oldtable' */
- const char* new_dbname_utf8,/*!< in: database name, e.g. 'newdb' */
- const char* new_tablename_utf8)/*!< in: table name, e.g. 'newtable' */
-{
- pars_info_t* pinfo;
- dberr_t ret;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&dict_sys->mutex));
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "old_dbname_utf8", old_dbname_utf8);
- pars_info_add_str_literal(pinfo, "old_tablename_utf8", old_tablename_utf8);
- pars_info_add_str_literal(pinfo, "new_dbname_utf8", new_dbname_utf8);
- pars_info_add_str_literal(pinfo, "new_tablename_utf8", new_tablename_utf8);
-
- ret = dict_stats_exec_sql(
- pinfo,
- "PROCEDURE RENAME_IN_INDEX_STATS () IS\n"
- "BEGIN\n"
- "UPDATE \"" INDEX_STATS_NAME "\" SET\n"
- "database_name = :new_dbname_utf8,\n"
- "table_name = :new_tablename_utf8\n"
- "WHERE\n"
- "database_name = :old_dbname_utf8 AND\n"
- "table_name = :old_tablename_utf8;\n"
- "END;\n", NULL);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Renames a table in InnoDB persistent stats storage.
-This function creates its own transaction and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_rename_table(
-/*====================*/
- const char* old_name, /*!< in: old name, e.g. 'db/table' */
- const char* new_name, /*!< in: new name, e.g. 'db/table' */
- char* errstr, /*!< out: error string if != DB_SUCCESS
- is returned */
- size_t errstr_sz) /*!< in: errstr size */
-{
- char old_db_utf8[MAX_DB_UTF8_LEN];
- char new_db_utf8[MAX_DB_UTF8_LEN];
- char old_table_utf8[MAX_TABLE_UTF8_LEN];
- char new_table_utf8[MAX_TABLE_UTF8_LEN];
- dberr_t ret;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!mutex_own(&dict_sys->mutex));
-
- /* skip innodb_table_stats and innodb_index_stats themselves */
- if (strcmp(old_name, TABLE_STATS_NAME) == 0
- || strcmp(old_name, INDEX_STATS_NAME) == 0
- || strcmp(new_name, TABLE_STATS_NAME) == 0
- || strcmp(new_name, INDEX_STATS_NAME) == 0) {
-
- return(DB_SUCCESS);
- }
-
- dict_fs2utf8(old_name, old_db_utf8, sizeof(old_db_utf8),
- old_table_utf8, sizeof(old_table_utf8));
-
- dict_fs2utf8(new_name, new_db_utf8, sizeof(new_db_utf8),
- new_table_utf8, sizeof(new_table_utf8));
-
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&dict_sys->mutex);
-
- ulint n_attempts = 0;
- do {
- n_attempts++;
-
- ret = dict_stats_rename_in_table_stats(
- old_db_utf8, old_table_utf8,
- new_db_utf8, new_table_utf8);
-
- if (ret == DB_DUPLICATE_KEY) {
- dict_stats_delete_from_table_stats(
- new_db_utf8, new_table_utf8);
- }
-
- if (ret == DB_STATS_DO_NOT_EXIST) {
- ret = DB_SUCCESS;
- }
-
- if (ret != DB_SUCCESS) {
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
- os_thread_sleep(200000 /* 0.2 sec */);
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&dict_sys->mutex);
- }
- } while ((ret == DB_DEADLOCK
- || ret == DB_DUPLICATE_KEY
- || ret == DB_LOCK_WAIT_TIMEOUT)
- && n_attempts < 5);
-
- if (ret != DB_SUCCESS) {
- ut_snprintf(errstr, errstr_sz,
- "Unable to rename statistics from "
- "%s.%s to %s.%s in %s: %s. "
- "They can be renamed later using "
-
- "UPDATE %s SET "
- "database_name = '%s', "
- "table_name = '%s' "
- "WHERE "
- "database_name = '%s' AND "
- "table_name = '%s';",
-
- old_db_utf8, old_table_utf8,
- new_db_utf8, new_table_utf8,
- TABLE_STATS_NAME_PRINT,
- ut_strerr(ret),
-
- TABLE_STATS_NAME_PRINT,
- new_db_utf8, new_table_utf8,
- old_db_utf8, old_table_utf8);
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
- return(ret);
- }
- /* else */
-
- n_attempts = 0;
- do {
- n_attempts++;
-
- ret = dict_stats_rename_in_index_stats(
- old_db_utf8, old_table_utf8,
- new_db_utf8, new_table_utf8);
-
- if (ret == DB_DUPLICATE_KEY) {
- dict_stats_delete_from_index_stats(
- new_db_utf8, new_table_utf8);
- }
-
- if (ret == DB_STATS_DO_NOT_EXIST) {
- ret = DB_SUCCESS;
- }
-
- if (ret != DB_SUCCESS) {
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
- os_thread_sleep(200000 /* 0.2 sec */);
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&dict_sys->mutex);
- }
- } while ((ret == DB_DEADLOCK
- || ret == DB_DUPLICATE_KEY
- || ret == DB_LOCK_WAIT_TIMEOUT)
- && n_attempts < 5);
-
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
-
- if (ret != DB_SUCCESS) {
- ut_snprintf(errstr, errstr_sz,
- "Unable to rename statistics from "
- "%s.%s to %s.%s in %s: %s. "
- "They can be renamed later using "
-
- "UPDATE %s SET "
- "database_name = '%s', "
- "table_name = '%s' "
- "WHERE "
- "database_name = '%s' AND "
- "table_name = '%s';",
-
- old_db_utf8, old_table_utf8,
- new_db_utf8, new_table_utf8,
- INDEX_STATS_NAME_PRINT,
- ut_strerr(ret),
-
- INDEX_STATS_NAME_PRINT,
- new_db_utf8, new_table_utf8,
- old_db_utf8, old_table_utf8);
- }
-
- return(ret);
-}
-
-/*********************************************************************//**
-Save defragmentation result.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_save_defrag_summary(
- dict_index_t* index) /*!< in: index */
-{
- dberr_t ret;
- lint now = (lint) ut_time();
- if (dict_index_is_univ(index)) {
- return DB_SUCCESS;
- }
- rw_lock_x_lock(&dict_operation_lock);
- mutex_enter(&dict_sys->mutex);
- ret = dict_stats_save_index_stat(index, now, "n_pages_freed",
- index->stat_defrag_n_pages_freed,
- NULL,
- "Number of pages freed during"
- " last defragmentation run.",
- NULL);
-
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
- return (ret);
-}
-
-/*********************************************************************//**
-Save defragmentation stats for a given index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_save_defrag_stats(
- dict_index_t* index) /*!< in: index */
-{
- dberr_t ret;
-
-
- if (index->is_readable()) {
- } else {
- return (dict_stats_report_error(index->table, true));
- }
-
- if (dict_index_is_univ(index)) {
- return DB_SUCCESS;
- }
-
- lint now = (lint) ut_time();
- mtr_t mtr;
- ulint n_leaf_pages;
- ulint n_leaf_reserved;
- mtr_start(&mtr);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- n_leaf_reserved = btr_get_size_and_reserved(index, BTR_N_LEAF_PAGES,
- &n_leaf_pages, &mtr);
- mtr_commit(&mtr);
-
- if (n_leaf_reserved == ULINT_UNDEFINED) {
- // The index name is different during fast index creation,
- // so the stats won't be associated with the right index
- // for later use. We just return without saving.
- return DB_SUCCESS;
- }
-
- rw_lock_x_lock(&dict_operation_lock);
-
- mutex_enter(&dict_sys->mutex);
- ret = dict_stats_save_index_stat(index, now, "n_page_split",
- index->stat_defrag_n_page_split,
- NULL,
- "Number of new page splits on leaves"
- " since last defragmentation.",
- NULL);
- if (ret != DB_SUCCESS) {
- goto end;
- }
-
- ret = dict_stats_save_index_stat(
- index, now, "n_leaf_pages_defrag",
- n_leaf_pages,
- NULL,
- "Number of leaf pages when this stat is saved to disk",
- NULL);
- if (ret != DB_SUCCESS) {
- goto end;
- }
-
- ret = dict_stats_save_index_stat(
- index, now, "n_leaf_pages_reserved",
- n_leaf_reserved,
- NULL,
- "Number of pages reserved for this index leaves when this stat "
- "is saved to disk",
- NULL);
-
-end:
- mutex_exit(&dict_sys->mutex);
- rw_lock_x_unlock(&dict_operation_lock);
-
- return (ret);
-}
-
-/* tests @{ */
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-/* The following unit tests test some of the functions in this file
-individually, such testing cannot be performed by the mysql-test framework
-via SQL. */
-
-/* test_dict_table_schema_check() @{ */
-void
-test_dict_table_schema_check()
-{
- /*
- CREATE TABLE tcheck (
- c01 VARCHAR(123),
- c02 INT,
- c03 INT NOT NULL,
- c04 INT UNSIGNED,
- c05 BIGINT,
- c06 BIGINT UNSIGNED NOT NULL,
- c07 TIMESTAMP
- ) ENGINE=INNODB;
- */
- /* definition for the table 'test/tcheck' */
- dict_col_meta_t columns[] = {
- {"c01", DATA_VARCHAR, 0, 123},
- {"c02", DATA_INT, 0, 4},
- {"c03", DATA_INT, DATA_NOT_NULL, 4},
- {"c04", DATA_INT, DATA_UNSIGNED, 4},
- {"c05", DATA_INT, 0, 8},
- {"c06", DATA_INT, DATA_NOT_NULL | DATA_UNSIGNED, 8},
- {"c07", DATA_INT, 0, 4},
- {"c_extra", DATA_INT, 0, 4}
- };
- dict_table_schema_t schema = {
- "test/tcheck",
- 0 /* will be set individually for each test below */,
- columns
- };
- char errstr[512];
-
- ut_snprintf(errstr, sizeof(errstr), "Table not found");
-
- /* prevent any data dictionary modifications while we are checking
- the tables' structure */
-
- mutex_enter(&(dict_sys->mutex));
-
- /* check that a valid table is reported as valid */
- schema.n_cols = 7;
- if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
- == DB_SUCCESS) {
- printf("OK: test.tcheck ok\n");
- } else {
- printf("ERROR: %s\n", errstr);
- printf("ERROR: test.tcheck not present or corrupted\n");
- goto test_dict_table_schema_check_end;
- }
-
- /* check columns with wrong length */
- schema.columns[1].len = 8;
- if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
- != DB_SUCCESS) {
- printf("OK: test.tcheck.c02 has different length and is "
- "reported as corrupted\n");
- } else {
- printf("OK: test.tcheck.c02 has different length but is "
- "reported as ok\n");
- goto test_dict_table_schema_check_end;
- }
- schema.columns[1].len = 4;
-
- /* request that c02 is NOT NULL while actually it does not have
- this flag set */
- schema.columns[1].prtype_mask |= DATA_NOT_NULL;
- if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
- != DB_SUCCESS) {
- printf("OK: test.tcheck.c02 does not have NOT NULL while "
- "it should and is reported as corrupted\n");
- } else {
- printf("ERROR: test.tcheck.c02 does not have NOT NULL while "
- "it should and is not reported as corrupted\n");
- goto test_dict_table_schema_check_end;
- }
- schema.columns[1].prtype_mask &= ~DATA_NOT_NULL;
-
- /* check a table that contains some extra columns */
- schema.n_cols = 6;
- if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
- == DB_SUCCESS) {
- printf("ERROR: test.tcheck has more columns but is not "
- "reported as corrupted\n");
- goto test_dict_table_schema_check_end;
- } else {
- printf("OK: test.tcheck has more columns and is "
- "reported as corrupted\n");
- }
-
- /* check a table that has some columns missing */
- schema.n_cols = 8;
- if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
- != DB_SUCCESS) {
- printf("OK: test.tcheck has missing columns and is "
- "reported as corrupted\n");
- } else {
- printf("ERROR: test.tcheck has missing columns but is "
- "reported as ok\n");
- goto test_dict_table_schema_check_end;
- }
-
- /* check non-existent table */
- schema.table_name = "test/tcheck_nonexistent";
- if (dict_table_schema_check(&schema, errstr, sizeof(errstr))
- != DB_SUCCESS) {
- printf("OK: test.tcheck_nonexistent is not present\n");
- } else {
- printf("ERROR: test.tcheck_nonexistent is present!?\n");
- goto test_dict_table_schema_check_end;
- }
-
-test_dict_table_schema_check_end:
-
- mutex_exit(&(dict_sys->mutex));
-}
-/* @} */
-
-/* save/fetch aux macros @{ */
-#define TEST_DATABASE_NAME "foobardb"
-#define TEST_TABLE_NAME "test_dict_stats"
-
-#define TEST_N_ROWS 111
-#define TEST_CLUSTERED_INDEX_SIZE 222
-#define TEST_SUM_OF_OTHER_INDEX_SIZES 333
-
-#define TEST_IDX1_NAME "tidx1"
-#define TEST_IDX1_COL1_NAME "tidx1_col1"
-#define TEST_IDX1_INDEX_SIZE 123
-#define TEST_IDX1_N_LEAF_PAGES 234
-#define TEST_IDX1_N_DIFF1 50
-#define TEST_IDX1_N_DIFF1_SAMPLE_SIZE 500
-
-#define TEST_IDX2_NAME "tidx2"
-#define TEST_IDX2_COL1_NAME "tidx2_col1"
-#define TEST_IDX2_COL2_NAME "tidx2_col2"
-#define TEST_IDX2_COL3_NAME "tidx2_col3"
-#define TEST_IDX2_COL4_NAME "tidx2_col4"
-#define TEST_IDX2_INDEX_SIZE 321
-#define TEST_IDX2_N_LEAF_PAGES 432
-#define TEST_IDX2_N_DIFF1 60
-#define TEST_IDX2_N_DIFF1_SAMPLE_SIZE 600
-#define TEST_IDX2_N_DIFF2 61
-#define TEST_IDX2_N_DIFF2_SAMPLE_SIZE 610
-#define TEST_IDX2_N_DIFF3 62
-#define TEST_IDX2_N_DIFF3_SAMPLE_SIZE 620
-#define TEST_IDX2_N_DIFF4 63
-#define TEST_IDX2_N_DIFF4_SAMPLE_SIZE 630
-/* @} */
-
-/* test_dict_stats_save() @{ */
-void
-test_dict_stats_save()
-{
- dict_table_t table;
- dict_index_t index1;
- dict_field_t index1_fields[1];
- ib_uint64_t index1_stat_n_diff_key_vals[1];
- ib_uint64_t index1_stat_n_sample_sizes[1];
- dict_index_t index2;
- dict_field_t index2_fields[4];
- ib_uint64_t index2_stat_n_diff_key_vals[4];
- ib_uint64_t index2_stat_n_sample_sizes[4];
- dberr_t ret;
-
- /* craft a dummy dict_table_t */
- table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
- table.stat_n_rows = TEST_N_ROWS;
- table.stat_clustered_index_size = TEST_CLUSTERED_INDEX_SIZE;
- table.stat_sum_of_other_index_sizes = TEST_SUM_OF_OTHER_INDEX_SIZES;
- UT_LIST_INIT(table.indexes);
- UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
- UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
- ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
- ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
-
- index1.name = TEST_IDX1_NAME;
- index1.table = &table;
- index1.cached = 1;
- index1.n_uniq = 1;
- index1.fields = index1_fields;
- index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
- index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
- index1.stat_index_size = TEST_IDX1_INDEX_SIZE;
- index1.stat_n_leaf_pages = TEST_IDX1_N_LEAF_PAGES;
- index1_fields[0].name = TEST_IDX1_COL1_NAME;
- index1_stat_n_diff_key_vals[0] = TEST_IDX1_N_DIFF1;
- index1_stat_n_sample_sizes[0] = TEST_IDX1_N_DIFF1_SAMPLE_SIZE;
-
- ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
- index2.name = TEST_IDX2_NAME;
- index2.table = &table;
- index2.cached = 1;
- index2.n_uniq = 4;
- index2.fields = index2_fields;
- index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
- index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
- index2.stat_index_size = TEST_IDX2_INDEX_SIZE;
- index2.stat_n_leaf_pages = TEST_IDX2_N_LEAF_PAGES;
- index2_fields[0].name = TEST_IDX2_COL1_NAME;
- index2_fields[1].name = TEST_IDX2_COL2_NAME;
- index2_fields[2].name = TEST_IDX2_COL3_NAME;
- index2_fields[3].name = TEST_IDX2_COL4_NAME;
- index2_stat_n_diff_key_vals[0] = TEST_IDX2_N_DIFF1;
- index2_stat_n_diff_key_vals[1] = TEST_IDX2_N_DIFF2;
- index2_stat_n_diff_key_vals[2] = TEST_IDX2_N_DIFF3;
- index2_stat_n_diff_key_vals[3] = TEST_IDX2_N_DIFF4;
- index2_stat_n_sample_sizes[0] = TEST_IDX2_N_DIFF1_SAMPLE_SIZE;
- index2_stat_n_sample_sizes[1] = TEST_IDX2_N_DIFF2_SAMPLE_SIZE;
- index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE;
- index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE;
-
- ret = dict_stats_save(&table, NULL);
-
- ut_a(ret == DB_SUCCESS);
-
- printf("\nOK: stats saved successfully, now go ahead and read "
- "what's inside %s and %s:\n\n",
- TABLE_STATS_NAME_PRINT,
- INDEX_STATS_NAME_PRINT);
-
- printf("SELECT COUNT(*) = 1 AS table_stats_saved_successfully\n"
- "FROM %s\n"
- "WHERE\n"
- "database_name = '%s' AND\n"
- "table_name = '%s' AND\n"
- "n_rows = %d AND\n"
- "clustered_index_size = %d AND\n"
- "sum_of_other_index_sizes = %d;\n"
- "\n",
- TABLE_STATS_NAME_PRINT,
- TEST_DATABASE_NAME,
- TEST_TABLE_NAME,
- TEST_N_ROWS,
- TEST_CLUSTERED_INDEX_SIZE,
- TEST_SUM_OF_OTHER_INDEX_SIZES);
-
- printf("SELECT COUNT(*) = 3 AS tidx1_stats_saved_successfully\n"
- "FROM %s\n"
- "WHERE\n"
- "database_name = '%s' AND\n"
- "table_name = '%s' AND\n"
- "index_name = '%s' AND\n"
- "(\n"
- " (stat_name = 'size' AND stat_value = %d AND"
- " sample_size IS NULL) OR\n"
- " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
- " sample_size IS NULL) OR\n"
- " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
- " sample_size = '%d' AND stat_description = '%s')\n"
- ");\n"
- "\n",
- INDEX_STATS_NAME_PRINT,
- TEST_DATABASE_NAME,
- TEST_TABLE_NAME,
- TEST_IDX1_NAME,
- TEST_IDX1_INDEX_SIZE,
- TEST_IDX1_N_LEAF_PAGES,
- TEST_IDX1_N_DIFF1,
- TEST_IDX1_N_DIFF1_SAMPLE_SIZE,
- TEST_IDX1_COL1_NAME);
-
- printf("SELECT COUNT(*) = 6 AS tidx2_stats_saved_successfully\n"
- "FROM %s\n"
- "WHERE\n"
- "database_name = '%s' AND\n"
- "table_name = '%s' AND\n"
- "index_name = '%s' AND\n"
- "(\n"
- " (stat_name = 'size' AND stat_value = %d AND"
- " sample_size IS NULL) OR\n"
- " (stat_name = 'n_leaf_pages' AND stat_value = %d AND"
- " sample_size IS NULL) OR\n"
- " (stat_name = 'n_diff_pfx01' AND stat_value = %d AND"
- " sample_size = '%d' AND stat_description = '%s') OR\n"
- " (stat_name = 'n_diff_pfx02' AND stat_value = %d AND"
- " sample_size = '%d' AND stat_description = '%s,%s') OR\n"
- " (stat_name = 'n_diff_pfx03' AND stat_value = %d AND"
- " sample_size = '%d' AND stat_description = '%s,%s,%s') OR\n"
- " (stat_name = 'n_diff_pfx04' AND stat_value = %d AND"
- " sample_size = '%d' AND stat_description = '%s,%s,%s,%s')\n"
- ");\n"
- "\n",
- INDEX_STATS_NAME_PRINT,
- TEST_DATABASE_NAME,
- TEST_TABLE_NAME,
- TEST_IDX2_NAME,
- TEST_IDX2_INDEX_SIZE,
- TEST_IDX2_N_LEAF_PAGES,
- TEST_IDX2_N_DIFF1,
- TEST_IDX2_N_DIFF1_SAMPLE_SIZE, TEST_IDX2_COL1_NAME,
- TEST_IDX2_N_DIFF2,
- TEST_IDX2_N_DIFF2_SAMPLE_SIZE,
- TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME,
- TEST_IDX2_N_DIFF3,
- TEST_IDX2_N_DIFF3_SAMPLE_SIZE,
- TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
- TEST_IDX2_N_DIFF4,
- TEST_IDX2_N_DIFF4_SAMPLE_SIZE,
- TEST_IDX2_COL1_NAME, TEST_IDX2_COL2_NAME, TEST_IDX2_COL3_NAME,
- TEST_IDX2_COL4_NAME);
-}
-/* @} */
-
-/* test_dict_stats_fetch_from_ps() @{ */
-void
-test_dict_stats_fetch_from_ps()
-{
- dict_table_t table;
- dict_index_t index1;
- ib_uint64_t index1_stat_n_diff_key_vals[1];
- ib_uint64_t index1_stat_n_sample_sizes[1];
- dict_index_t index2;
- ib_uint64_t index2_stat_n_diff_key_vals[4];
- ib_uint64_t index2_stat_n_sample_sizes[4];
- dberr_t ret;
-
- /* craft a dummy dict_table_t */
- table.name = (char*) (TEST_DATABASE_NAME "/" TEST_TABLE_NAME);
- UT_LIST_INIT(table.indexes);
- UT_LIST_ADD_LAST(indexes, table.indexes, &index1);
- UT_LIST_ADD_LAST(indexes, table.indexes, &index2);
- ut_d(table.magic_n = DICT_TABLE_MAGIC_N);
-
- index1.name = TEST_IDX1_NAME;
- ut_d(index1.magic_n = DICT_INDEX_MAGIC_N);
- index1.cached = 1;
- index1.n_uniq = 1;
- index1.stat_n_diff_key_vals = index1_stat_n_diff_key_vals;
- index1.stat_n_sample_sizes = index1_stat_n_sample_sizes;
-
- index2.name = TEST_IDX2_NAME;
- ut_d(index2.magic_n = DICT_INDEX_MAGIC_N);
- index2.cached = 1;
- index2.n_uniq = 4;
- index2.stat_n_diff_key_vals = index2_stat_n_diff_key_vals;
- index2.stat_n_sample_sizes = index2_stat_n_sample_sizes;
-
- ret = dict_stats_fetch_from_ps(&table);
-
- ut_a(ret == DB_SUCCESS);
-
- ut_a(table.stat_n_rows == TEST_N_ROWS);
- ut_a(table.stat_clustered_index_size == TEST_CLUSTERED_INDEX_SIZE);
- ut_a(table.stat_sum_of_other_index_sizes
- == TEST_SUM_OF_OTHER_INDEX_SIZES);
-
- ut_a(index1.stat_index_size == TEST_IDX1_INDEX_SIZE);
- ut_a(index1.stat_n_leaf_pages == TEST_IDX1_N_LEAF_PAGES);
- ut_a(index1_stat_n_diff_key_vals[0] == TEST_IDX1_N_DIFF1);
- ut_a(index1_stat_n_sample_sizes[0] == TEST_IDX1_N_DIFF1_SAMPLE_SIZE);
-
- ut_a(index2.stat_index_size == TEST_IDX2_INDEX_SIZE);
- ut_a(index2.stat_n_leaf_pages == TEST_IDX2_N_LEAF_PAGES);
- ut_a(index2_stat_n_diff_key_vals[0] == TEST_IDX2_N_DIFF1);
- ut_a(index2_stat_n_sample_sizes[0] == TEST_IDX2_N_DIFF1_SAMPLE_SIZE);
- ut_a(index2_stat_n_diff_key_vals[1] == TEST_IDX2_N_DIFF2);
- ut_a(index2_stat_n_sample_sizes[1] == TEST_IDX2_N_DIFF2_SAMPLE_SIZE);
- ut_a(index2_stat_n_diff_key_vals[2] == TEST_IDX2_N_DIFF3);
- ut_a(index2_stat_n_sample_sizes[2] == TEST_IDX2_N_DIFF3_SAMPLE_SIZE);
- ut_a(index2_stat_n_diff_key_vals[3] == TEST_IDX2_N_DIFF4);
- ut_a(index2_stat_n_sample_sizes[3] == TEST_IDX2_N_DIFF4_SAMPLE_SIZE);
-
- printf("OK: fetch successful\n");
-}
-/* @} */
-
-/* test_dict_stats_all() @{ */
-void
-test_dict_stats_all()
-{
- test_dict_table_schema_check();
-
- test_dict_stats_save();
-
- test_dict_stats_fetch_from_ps();
-}
-/* @} */
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-/* @} */
-
-#endif /* UNIV_HOTBACKUP */
diff --git a/storage/xtradb/dict/dict0stats_bg.cc b/storage/xtradb/dict/dict0stats_bg.cc
deleted file mode 100644
index ba6fd115551..00000000000
--- a/storage/xtradb/dict/dict0stats_bg.cc
+++ /dev/null
@@ -1,585 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dict/dict0stats_bg.cc
-Code used for background table and index stats gathering.
-
-Created Apr 25, 2012 Vasil Dimov
-*******************************************************/
-
-#include "row0mysql.h"
-#include "srv0start.h"
-#include "dict0dict.h"
-#include "dict0stats.h"
-#include "dict0stats_bg.h"
-
-#ifdef UNIV_NONINL
-# include "dict0stats_bg.ic"
-#endif
-
-#include <vector>
-
-/** Minimum time interval between stats recalc for a given table */
-#define MIN_RECALC_INTERVAL 10 /* seconds */
-
-/** Event to wake up dict_stats_thread on dict_stats_recalc_pool_add()
-or shutdown. Not protected by any mutex. */
-UNIV_INTERN os_event_t dict_stats_event;
-
-/** Variable to initiate shutdown the dict stats thread. Note we don't
-use 'srv_shutdown_state' because we want to shutdown dict stats thread
-before purge thread. */
-static bool dict_stats_start_shutdown;
-
-/** Event to wait for shutdown of the dict stats thread */
-static os_event_t dict_stats_shutdown_event;
-
-/** This mutex protects the "recalc_pool" variable. */
-static ib_mutex_t recalc_pool_mutex;
-static ib_mutex_t defrag_pool_mutex;
-#ifdef HAVE_PSI_INTERFACE
-static mysql_pfs_key_t recalc_pool_mutex_key;
-static mysql_pfs_key_t defrag_pool_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
-
-/** The number of tables that can be added to "recalc_pool" before
-it is enlarged */
-static const ulint RECALC_POOL_INITIAL_SLOTS = 128;
-
-/** The multitude of tables whose stats are to be automatically
-recalculated - an STL vector */
-typedef std::vector<table_id_t> recalc_pool_t;
-static recalc_pool_t recalc_pool;
-
-typedef recalc_pool_t::iterator recalc_pool_iterator_t;
-
-/** Indices whose defrag stats need to be saved to persistent storage.*/
-struct defrag_pool_item_t {
- table_id_t table_id;
- index_id_t index_id;
-};
-typedef std::vector<defrag_pool_item_t> defrag_pool_t;
-static defrag_pool_t defrag_pool;
-typedef defrag_pool_t::iterator defrag_pool_iterator_t;
-
-/*****************************************************************//**
-Initialize the recalc pool, called once during thread initialization. */
-static
-void
-dict_stats_pool_init()
-/*=========================*/
-{
- ut_ad(!srv_read_only_mode);
-
- recalc_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
- defrag_pool.reserve(RECALC_POOL_INITIAL_SLOTS);
-}
-
-/*****************************************************************//**
-Free the resources occupied by the recalc pool, called once during
-thread de-initialization. */
-static
-void
-dict_stats_pool_deinit()
-/*===========================*/
-{
- ut_ad(!srv_read_only_mode);
-
- recalc_pool.clear();
- defrag_pool.clear();
-
- /*
- recalc_pool may still have its buffer allocated. It will free it when
- its destructor is called.
- The problem is, memory leak detector is run before the recalc_pool's
- destructor is invoked, and will report recalc_pool's buffer as leaked
- memory. To avoid that, we force recalc_pool to surrender its buffer
- to empty_pool object, which will free it when leaving this function:
- */
- recalc_pool_t recalc_empty_pool;
- defrag_pool_t defrag_empty_pool;
- memset(&recalc_empty_pool, 0, sizeof(recalc_pool_t));
- memset(&defrag_empty_pool, 0, sizeof(defrag_pool_t));
- recalc_pool.swap(recalc_empty_pool);
- defrag_pool.swap(defrag_empty_pool);
-}
-
-/*****************************************************************//**
-Add a table to the recalc pool, which is processed by the
-background stats gathering thread. Only the table id is added to the
-list, so the table can be closed after being enqueued and it will be
-opened when needed. If the table does not exist later (has been DROPped),
-then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_recalc_pool_add(
-/*=======================*/
- const dict_table_t* table) /*!< in: table to add */
-{
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&recalc_pool_mutex);
-
- /* quit if already in the list */
- for (recalc_pool_iterator_t iter = recalc_pool.begin();
- iter != recalc_pool.end();
- ++iter) {
-
- if (*iter == table->id) {
- mutex_exit(&recalc_pool_mutex);
- return;
- }
- }
-
- recalc_pool.push_back(table->id);
-
- mutex_exit(&recalc_pool_mutex);
-
- os_event_set(dict_stats_event);
-}
-
-/*****************************************************************//**
-Get a table from the auto recalc pool. The returned table id is removed
-from the pool.
-@return true if the pool was non-empty and "id" was set, false otherwise */
-static
-bool
-dict_stats_recalc_pool_get(
-/*=======================*/
- table_id_t* id) /*!< out: table id, or unmodified if list is
- empty */
-{
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&recalc_pool_mutex);
-
- if (recalc_pool.empty()) {
- mutex_exit(&recalc_pool_mutex);
- return(false);
- }
-
- *id = recalc_pool[0];
-
- recalc_pool.erase(recalc_pool.begin());
-
- mutex_exit(&recalc_pool_mutex);
-
- return(true);
-}
-
-/*****************************************************************//**
-Delete a given table from the auto recalc pool.
-dict_stats_recalc_pool_del() */
-UNIV_INTERN
-void
-dict_stats_recalc_pool_del(
-/*=======================*/
- const dict_table_t* table) /*!< in: table to remove */
-{
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- mutex_enter(&recalc_pool_mutex);
-
- ut_ad(table->id > 0);
-
- for (recalc_pool_iterator_t iter = recalc_pool.begin();
- iter != recalc_pool.end();
- ++iter) {
-
- if (*iter == table->id) {
- /* erase() invalidates the iterator */
- recalc_pool.erase(iter);
- break;
- }
- }
-
- mutex_exit(&recalc_pool_mutex);
-}
-
-/*****************************************************************//**
-Add an index in a table to the defrag pool, which is processed by the
-background stats gathering thread. Only the table id and index id are
-added to the list, so the table can be closed after being enqueued and
-it will be opened when needed. If the table or index does not exist later
-(has been DROPped), then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_add(
-/*=======================*/
- const dict_index_t* index) /*!< in: table to add */
-{
- defrag_pool_item_t item;
-
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&defrag_pool_mutex);
-
- /* quit if already in the list */
- for (defrag_pool_iterator_t iter = defrag_pool.begin();
- iter != defrag_pool.end();
- ++iter) {
- if ((*iter).table_id == index->table->id
- && (*iter).index_id == index->id) {
- mutex_exit(&defrag_pool_mutex);
- return;
- }
- }
-
- item.table_id = index->table->id;
- item.index_id = index->id;
- defrag_pool.push_back(item);
-
- mutex_exit(&defrag_pool_mutex);
-
- os_event_set(dict_stats_event);
-}
-
-/*****************************************************************//**
-Get an index from the auto defrag pool. The returned index id is removed
-from the pool.
-@return true if the pool was non-empty and "id" was set, false otherwise */
-static
-bool
-dict_stats_defrag_pool_get(
-/*=======================*/
- table_id_t* table_id, /*!< out: table id, or unmodified if
- list is empty */
- index_id_t* index_id) /*!< out: index id, or unmodified if
- list is empty */
-{
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&defrag_pool_mutex);
-
- if (defrag_pool.empty()) {
- mutex_exit(&defrag_pool_mutex);
- return(false);
- }
-
- defrag_pool_item_t& item = defrag_pool.back();
- *table_id = item.table_id;
- *index_id = item.index_id;
-
- defrag_pool.pop_back();
-
- mutex_exit(&defrag_pool_mutex);
-
- return(true);
-}
-
-/*****************************************************************//**
-Delete a given index from the auto defrag pool. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_del(
-/*=======================*/
- const dict_table_t* table, /*!<in: if given, remove
- all entries for the table */
- const dict_index_t* index) /*!< in: if given, remove this index */
-{
- ut_a((table && !index) || (!table && index));
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- mutex_enter(&defrag_pool_mutex);
-
- defrag_pool_iterator_t iter = defrag_pool.begin();
- while (iter != defrag_pool.end()) {
- if ((table && (*iter).table_id == table->id)
- || (index
- && (*iter).table_id == index->table->id
- && (*iter).index_id == index->id)) {
- /* erase() invalidates the iterator */
- iter = defrag_pool.erase(iter);
- if (index)
- break;
- } else {
- iter++;
- }
- }
-
- mutex_exit(&defrag_pool_mutex);
-}
-
-/*****************************************************************//**
-Wait until background stats thread has stopped using the specified table.
-The caller must have locked the data dictionary using
-row_mysql_lock_data_dictionary() and this function may unlock it temporarily
-and restore the lock before it exits.
-The background stats thread is guaranteed not to start using the specified
-table after this function returns and before the caller unlocks the data
-dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
-under dict_sys->mutex. */
-UNIV_INTERN
-void
-dict_stats_wait_bg_to_stop_using_table(
-/*===================================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx) /*!< in/out: transaction to use for
- unlocking/locking the data dict */
-{
- while (!dict_stats_stop_bg(table)) {
- DICT_STATS_BG_YIELD(trx);
- }
-}
-
-/*****************************************************************//**
-Initialize global variables needed for the operation of dict_stats_thread()
-Must be called before dict_stats_thread() is started. */
-UNIV_INTERN
-void
-dict_stats_thread_init()
-{
- ut_a(!srv_read_only_mode);
-
- dict_stats_event = os_event_create();
- dict_stats_shutdown_event = os_event_create();
-
- /* The recalc_pool_mutex is acquired from:
- 1) the background stats gathering thread before any other latch
- and released without latching anything else in between (thus
- any level would do here)
- 2) from row_update_statistics_if_needed()
- and released without latching anything else in between. We know
- that dict_sys->mutex (SYNC_DICT) is not acquired when
- row_update_statistics_if_needed() is called and it may be acquired
- inside that function (thus a level <=SYNC_DICT would do).
- 3) from row_drop_table_for_mysql() after dict_sys->mutex (SYNC_DICT)
- and dict_operation_lock (SYNC_DICT_OPERATION) have been locked
- (thus a level <SYNC_DICT && <SYNC_DICT_OPERATION would do)
- So we choose SYNC_STATS_AUTO_RECALC to be about below SYNC_DICT. */
- mutex_create(recalc_pool_mutex_key, &recalc_pool_mutex,
- SYNC_STATS_AUTO_RECALC);
-
- /* We choose SYNC_STATS_DEFRAG to be below SYNC_FSP_PAGE. */
- mutex_create(defrag_pool_mutex_key, &defrag_pool_mutex,
- SYNC_STATS_DEFRAG);
- dict_stats_pool_init();
-}
-
-/*****************************************************************//**
-Free resources allocated by dict_stats_thread_init(), must be called
-after dict_stats_thread() has exited. */
-UNIV_INTERN
-void
-dict_stats_thread_deinit()
-/*======================*/
-{
- ut_a(!srv_read_only_mode);
- ut_ad(!srv_dict_stats_thread_active);
-
- dict_stats_pool_deinit();
-
- mutex_free(&recalc_pool_mutex);
- memset(&recalc_pool_mutex, 0x0, sizeof(recalc_pool_mutex));
-
- mutex_free(&defrag_pool_mutex);
- memset(&defrag_pool_mutex, 0x0, sizeof(defrag_pool_mutex));
-
- os_event_free(dict_stats_event);
- dict_stats_event = NULL;
- os_event_free(dict_stats_shutdown_event);
- dict_stats_shutdown_event = NULL;
- dict_stats_start_shutdown = false;
-}
-
-/*****************************************************************//**
-Get the first table that has been added for auto recalc and eventually
-update its stats. */
-static
-void
-dict_stats_process_entry_from_recalc_pool()
-/*=======================================*/
-{
- table_id_t table_id;
-
- ut_ad(!srv_read_only_mode);
-
- /* pop the first table from the auto recalc pool */
- if (!dict_stats_recalc_pool_get(&table_id)) {
- /* no tables for auto recalc */
- return;
- }
-
- dict_table_t* table;
-
- mutex_enter(&dict_sys->mutex);
-
- table = dict_table_open_on_id(table_id, TRUE, DICT_TABLE_OP_NORMAL);
-
- if (table == NULL) {
- /* table does not exist, must have been DROPped
- after its id was enqueued */
- mutex_exit(&dict_sys->mutex);
- return;
- }
-
- /* Check whether table is corrupted */
- if (table->corrupted) {
- dict_table_close(table, TRUE, FALSE);
- mutex_exit(&dict_sys->mutex);
- return;
- }
-
- table->stats_bg_flag |= BG_STAT_IN_PROGRESS;
-
- mutex_exit(&dict_sys->mutex);
-
- /* ut_time() could be expensive, the current function
- is called once every time a table has been changed more than 10% and
- on a system with lots of small tables, this could become hot. If we
- find out that this is a problem, then the check below could eventually
- be replaced with something else, though a time interval is the natural
- approach. */
-
- if (ut_difftime(ut_time(), table->stats_last_recalc)
- < MIN_RECALC_INTERVAL) {
-
- /* Stats were (re)calculated not long ago. To avoid
- too frequent stats updates we put back the table on
- the auto recalc list and do nothing. */
-
- dict_stats_recalc_pool_add(table);
-
- } else {
-
- dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
- }
-
- mutex_enter(&dict_sys->mutex);
-
- table->stats_bg_flag &= ~BG_STAT_IN_PROGRESS;
-
- dict_table_close(table, TRUE, FALSE);
-
- mutex_exit(&dict_sys->mutex);
-}
-
-/*****************************************************************//**
-Get the first index that has been added for updating persistent defrag
-stats and eventually save its stats. */
-static
-void
-dict_stats_process_entry_from_defrag_pool()
-/*=======================================*/
-{
- table_id_t table_id;
- index_id_t index_id;
-
- ut_ad(!srv_read_only_mode);
-
- /* pop the first index from the auto defrag pool */
- if (!dict_stats_defrag_pool_get(&table_id, &index_id)) {
- /* no index in defrag pool */
- return;
- }
-
- dict_table_t* table;
-
- mutex_enter(&dict_sys->mutex);
-
- /* If the table is no longer cached, we've already lost the in
- memory stats so there's nothing really to write to disk. */
- table = dict_table_open_on_id(table_id, TRUE,
- DICT_TABLE_OP_OPEN_ONLY_IF_CACHED);
-
- if (table == NULL) {
- mutex_exit(&dict_sys->mutex);
- return;
- }
-
- /* Check whether table is corrupted */
- if (table->corrupted) {
- dict_table_close(table, TRUE, FALSE);
- mutex_exit(&dict_sys->mutex);
- return;
- }
- mutex_exit(&dict_sys->mutex);
-
- dict_index_t* index = dict_table_find_index_on_id(table, index_id);
-
- if (index == NULL) {
- return;
- }
-
- /* Check whether index is corrupted */
- if (dict_index_is_corrupted(index)) {
- dict_table_close(table, FALSE, FALSE);
- return;
- }
-
- dict_stats_save_defrag_stats(index);
- dict_table_close(table, FALSE, FALSE);
-}
-
-/*****************************************************************//**
-This is the thread for background stats gathering. It pops tables, from
-the auto recalc list and proceeds them, eventually recalculating their
-statistics.
-@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(dict_stats_thread)(void*)
-{
- my_thread_init();
- ut_a(!srv_read_only_mode);
-
- while (!dict_stats_start_shutdown) {
-
- /* Wake up periodically even if not signaled. This is
- because we may lose an event - if the below call to
- dict_stats_process_entry_from_recalc_pool() puts the entry back
- in the list, the os_event_set() will be lost by the subsequent
- os_event_reset(). */
- os_event_wait_time(
- dict_stats_event, MIN_RECALC_INTERVAL * 1000000);
-
- if (dict_stats_start_shutdown) {
- break;
- }
-
- dict_stats_process_entry_from_recalc_pool();
-
- while (defrag_pool.size())
- dict_stats_process_entry_from_defrag_pool();
-
- os_event_reset(dict_stats_event);
- }
-
- srv_dict_stats_thread_active = false;
-
- os_event_set(dict_stats_shutdown_event);
- my_thread_end();
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit instead of return(). */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/** Shut down the dict_stats_thread. */
-void
-dict_stats_shutdown()
-{
- dict_stats_start_shutdown = true;
- os_event_set(dict_stats_event);
- os_event_wait(dict_stats_shutdown_event);
-}
diff --git a/storage/xtradb/dyn/dyn0dyn.cc b/storage/xtradb/dyn/dyn0dyn.cc
deleted file mode 100644
index dd1f6863c14..00000000000
--- a/storage/xtradb/dyn/dyn0dyn.cc
+++ /dev/null
@@ -1,65 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file dyn/dyn0dyn.cc
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dyn0dyn.h"
-#ifdef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
- dyn_array_t* arr) /*!< in/out: dyn array */
-{
- mem_heap_t* heap;
- dyn_block_t* block;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- if (arr->heap == NULL) {
- UT_LIST_INIT(arr->base);
- UT_LIST_ADD_FIRST(list, arr->base, arr);
-
- arr->heap = mem_heap_create(sizeof(dyn_block_t));
- }
-
- block = dyn_array_get_last_block(arr);
- block->used = block->used | DYN_BLOCK_FULL_FLAG;
-
- heap = arr->heap;
-
- block = static_cast<dyn_block_t*>(
- mem_heap_alloc(heap, sizeof(dyn_block_t)));
-
- block->used = 0;
-
- UT_LIST_ADD_LAST(list, arr->base, block);
-
- return(block);
-}
diff --git a/storage/xtradb/eval/eval0eval.cc b/storage/xtradb/eval/eval0eval.cc
deleted file mode 100644
index ccc54781102..00000000000
--- a/storage/xtradb/eval/eval0eval.cc
+++ /dev/null
@@ -1,950 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file eval/eval0eval.cc
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#include "eval0eval.h"
-
-#ifdef UNIV_NONINL
-#include "eval0eval.ic"
-#endif
-
-#include "data0data.h"
-#include "row0sel.h"
-#include "rem0cmp.h"
-
-/** The RND function seed */
-static ulint eval_rnd = 128367121;
-
-/** Dummy adress used when we should allocate a buffer of size 0 in
-eval_node_alloc_val_buf */
-
-static byte eval_dummy;
-
-/*************************************************************************
-Gets the like node from the node */
-UNIV_INLINE
-que_node_t*
-que_node_get_like_node(
-/*===================*/
- /* out: next node in a list of nodes */
- que_node_t* node) /* in: node in a list */
-{
- return(((sym_node_t*) node)->like_node);
-}
-
-/*****************************************************************//**
-Allocate a buffer from global dynamic memory for a value of a que_node.
-NOTE that this memory must be explicitly freed when the query graph is
-freed. If the node already has an allocated buffer, that buffer is freed
-here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field.
-@return pointer to allocated buffer */
-UNIV_INTERN
-byte*
-eval_node_alloc_val_buf(
-/*====================*/
- que_node_t* node, /*!< in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size) /*!< in: buffer size */
-{
- dfield_t* dfield;
- byte* data;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
- || que_node_get_type(node) == QUE_NODE_FUNC);
-
- dfield = que_node_get_val(node);
-
- data = static_cast<byte*>(dfield_get_data(dfield));
-
- if (data && data != &eval_dummy) {
- mem_free(data);
- }
-
- if (size == 0) {
- data = &eval_dummy;
- } else {
- data = static_cast<byte*>(mem_alloc(size));
- }
-
- que_node_set_val_buf_size(node, size);
-
- dfield_set_data(dfield, data, size);
-
- return(data);
-}
-
-/*****************************************************************//**
-Free the buffer from global dynamic memory for a value of a que_node,
-if it has been allocated in the above function. The freeing for pushed
-column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
-void
-eval_node_free_val_buf(
-/*===================*/
- que_node_t* node) /*!< in: query graph node */
-{
- dfield_t* dfield;
- byte* data;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL
- || que_node_get_type(node) == QUE_NODE_FUNC);
-
- dfield = que_node_get_val(node);
-
- data = static_cast<byte*>(dfield_get_data(dfield));
-
- if (que_node_get_val_buf_size(node) > 0) {
- ut_a(data);
-
- mem_free(data);
- }
-}
-
-/*********************************************************************
-Evaluates a LIKE comparison node.
-@return the result of the comparison */
-UNIV_INLINE
-ibool
-eval_cmp_like(
-/*==========*/
- que_node_t* arg1, /* !< in: left operand */
- que_node_t* arg2) /* !< in: right operand */
-{
- ib_like_t op;
- int res;
- que_node_t* arg3;
- que_node_t* arg4;
- dfield_t* dfield;
- dtype_t* dtype;
- ibool val = TRUE;
-
- arg3 = que_node_get_like_node(arg2);
-
- /* Get the comparison type operator */
- ut_a(arg3);
-
- dfield = que_node_get_val(arg3);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_INT);
- op = static_cast<ib_like_t>(mach_read_from_4(static_cast<const unsigned char*>(dfield_get_data(dfield))));
-
- switch (op) {
- case IB_LIKE_PREFIX:
-
- arg4 = que_node_get_next(arg3);
- res = cmp_dfield_dfield_like_prefix(
- que_node_get_val(arg1),
- que_node_get_val(arg4));
- break;
-
- case IB_LIKE_SUFFIX:
-
- arg4 = que_node_get_next(arg3);
- res = cmp_dfield_dfield_like_suffix(
- que_node_get_val(arg1),
- que_node_get_val(arg4));
- break;
-
- case IB_LIKE_SUBSTR:
-
- arg4 = que_node_get_next(arg3);
- res = cmp_dfield_dfield_like_substr(
- que_node_get_val(arg1),
- que_node_get_val(arg4));
- break;
-
- case IB_LIKE_EXACT:
- res = cmp_dfield_dfield(
- que_node_get_val(arg1),
- que_node_get_val(arg2));
- break;
-
- default:
- ut_error;
- }
-
- if (res != 0) {
- val = FALSE;
- }
-
- return(val);
-}
-
-/*********************************************************************
-Evaluates a comparison node.
-@return the result of the comparison */
-ibool
-eval_cmp(
-/*=====*/
- func_node_t* cmp_node) /*!< in: comparison node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- int res;
- int func;
- ibool val = TRUE;
-
- ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC);
-
- arg1 = cmp_node->args;
- arg2 = que_node_get_next(arg1);
-
- func = cmp_node->func;
-
- if (func == PARS_LIKE_TOKEN_EXACT
- || func == PARS_LIKE_TOKEN_PREFIX
- || func == PARS_LIKE_TOKEN_SUFFIX
- || func == PARS_LIKE_TOKEN_SUBSTR) {
-
- val = eval_cmp_like(arg1, arg2);
- } else {
- res = cmp_dfield_dfield(
- que_node_get_val(arg1), que_node_get_val(arg2));
-
- if (func == '=') {
- if (res != 0) {
- val = FALSE;
- }
- } else if (func == '<') {
- if (res != -1) {
- val = FALSE;
- }
- } else if (func == PARS_LE_TOKEN) {
- if (res == 1) {
- val = FALSE;
- }
- } else if (func == PARS_NE_TOKEN) {
- if (res == 0) {
- val = FALSE;
- }
- } else if (func == PARS_GE_TOKEN) {
- if (res == -1) {
- val = FALSE;
- }
- } else {
- ut_ad(func == '>');
-
- if (res != 1) {
- val = FALSE;
- }
- }
- }
-
- eval_node_set_ibool_val(cmp_node, val);
-
- return(val);
-}
-
-/*****************************************************************//**
-Evaluates a logical operation node. */
-UNIV_INLINE
-void
-eval_logical(
-/*=========*/
- func_node_t* logical_node) /*!< in: logical operation node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- ibool val1;
- ibool val2 = 0; /* remove warning */
- ibool val = 0; /* remove warning */
- int func;
-
- ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC);
-
- arg1 = logical_node->args;
- arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */
-
- val1 = eval_node_get_ibool_val(arg1);
-
- if (arg2) {
- val2 = eval_node_get_ibool_val(arg2);
- }
-
- func = logical_node->func;
-
- if (func == PARS_AND_TOKEN) {
- val = val1 & val2;
- } else if (func == PARS_OR_TOKEN) {
- val = val1 | val2;
- } else if (func == PARS_NOT_TOKEN) {
- val = TRUE - val1;
- } else {
- ut_error;
- }
-
- eval_node_set_ibool_val(logical_node, val);
-}
-
-/*****************************************************************//**
-Evaluates an arithmetic operation node. */
-UNIV_INLINE
-void
-eval_arith(
-/*=======*/
- func_node_t* arith_node) /*!< in: arithmetic operation node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- lint val1;
- lint val2 = 0; /* remove warning */
- lint val;
- int func;
-
- ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC);
-
- arg1 = arith_node->args;
- arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */
-
- val1 = eval_node_get_int_val(arg1);
-
- if (arg2) {
- val2 = eval_node_get_int_val(arg2);
- }
-
- func = arith_node->func;
-
- if (func == '+') {
- val = val1 + val2;
- } else if ((func == '-') && arg2) {
- val = val1 - val2;
- } else if (func == '-') {
- val = -val1;
- } else if (func == '*') {
- val = val1 * val2;
- } else {
- ut_ad(func == '/');
- val = val1 / val2;
- }
-
- eval_node_set_int_val(arith_node, val);
-}
-
-/*****************************************************************//**
-Evaluates an aggregate operation node. */
-UNIV_INLINE
-void
-eval_aggregate(
-/*===========*/
- func_node_t* node) /*!< in: aggregate operation node */
-{
- que_node_t* arg;
- lint val;
- lint arg_val;
- int func;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
-
- val = eval_node_get_int_val(node);
-
- func = node->func;
-
- if (func == PARS_COUNT_TOKEN) {
-
- val = val + 1;
- } else {
- ut_ad(func == PARS_SUM_TOKEN);
-
- arg = node->args;
- arg_val = eval_node_get_int_val(arg);
-
- val = val + arg_val;
- }
-
- eval_node_set_int_val(node, val);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node where the function is not relevant
-in benchmarks. */
-static
-void
-eval_predefined_2(
-/*==============*/
- func_node_t* func_node) /*!< in: predefined function node */
-{
- que_node_t* arg;
- que_node_t* arg1;
- que_node_t* arg2 = 0; /* remove warning (??? bug ???) */
- lint int_val;
- byte* data;
- ulint len1;
- ulint len2;
- int func;
- ulint i;
-
- ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
-
- arg1 = func_node->args;
-
- if (arg1) {
- arg2 = que_node_get_next(arg1);
- }
-
- func = func_node->func;
-
- if (func == PARS_PRINTF_TOKEN) {
-
- arg = arg1;
-
- while (arg) {
- dfield_print(que_node_get_val(arg));
-
- arg = que_node_get_next(arg);
- }
-
- putc('\n', stderr);
-
- } else if (func == PARS_ASSERT_TOKEN) {
-
- if (!eval_node_get_ibool_val(arg1)) {
- fputs("SQL assertion fails in a stored procedure!\n",
- stderr);
- }
-
- ut_a(eval_node_get_ibool_val(arg1));
-
- /* This function, or more precisely, a debug procedure,
- returns no value */
-
- } else if (func == PARS_RND_TOKEN) {
-
- len1 = (ulint) eval_node_get_int_val(arg1);
- len2 = (ulint) eval_node_get_int_val(arg2);
-
- ut_ad(len2 >= len1);
-
- if (len2 > len1) {
- int_val = (lint) (len1
- + (eval_rnd % (len2 - len1 + 1)));
- } else {
- int_val = (lint) len1;
- }
-
- eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
-
- eval_node_set_int_val(func_node, int_val);
-
- } else if (func == PARS_RND_STR_TOKEN) {
-
- len1 = (ulint) eval_node_get_int_val(arg1);
-
- data = eval_node_ensure_val_buf(func_node, len1);
-
- for (i = 0; i < len1; i++) {
- data[i] = (byte)(97 + (eval_rnd % 3));
-
- eval_rnd = ut_rnd_gen_next_ulint(eval_rnd);
- }
- } else {
- ut_error;
- }
-}
-
-/*****************************************************************//**
-Evaluates a notfound-function node. */
-UNIV_INLINE
-void
-eval_notfound(
-/*==========*/
- func_node_t* func_node) /*!< in: function node */
-{
- sym_node_t* cursor;
- sel_node_t* sel_node;
- ibool ibool_val;
-
- ut_ad(func_node->func == PARS_NOTFOUND_TOKEN);
-
- cursor = static_cast<sym_node_t*>(func_node->args);
-
- ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL);
-
- if (cursor->token_type == SYM_LIT) {
-
- ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)),
- "SQL", 3) == 0);
-
- sel_node = cursor->sym_table->query_graph->last_sel_node;
- } else {
- sel_node = cursor->alias->cursor_def;
- }
-
- if (sel_node->state == SEL_NODE_NO_MORE_ROWS) {
- ibool_val = TRUE;
- } else {
- ibool_val = FALSE;
- }
-
- eval_node_set_ibool_val(func_node, ibool_val);
-}
-
-/*****************************************************************//**
-Evaluates a substr-function node. */
-UNIV_INLINE
-void
-eval_substr(
-/*========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- que_node_t* arg3;
- dfield_t* dfield;
- byte* str1;
- ulint len1;
- ulint len2;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(func_node->func == PARS_SUBSTR_TOKEN);
-
- arg3 = que_node_get_next(arg2);
-
- str1 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg1)));
-
- len1 = (ulint) eval_node_get_int_val(arg2);
- len2 = (ulint) eval_node_get_int_val(arg3);
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1 + len1, len2);
-}
-
-/*****************************************************************//**
-Evaluates a replstr-procedure node. */
-static
-void
-eval_replstr(
-/*=========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- que_node_t* arg3;
- que_node_t* arg4;
- byte* str1;
- byte* str2;
- ulint len1;
- ulint len2;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL);
-
- arg3 = que_node_get_next(arg2);
- arg4 = que_node_get_next(arg3);
-
- str1 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg1)));
- str2 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg2)));
-
- len1 = (ulint) eval_node_get_int_val(arg3);
- len2 = (ulint) eval_node_get_int_val(arg4);
-
- if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2)
- || (dfield_get_len(que_node_get_val(arg2)) < len2)) {
-
- ut_error;
- }
-
- ut_memcpy(str1 + len1, str2, len2);
-}
-
-/*****************************************************************//**
-Evaluates an instr-function node. */
-static
-void
-eval_instr(
-/*=======*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- dfield_t* dfield1;
- dfield_t* dfield2;
- lint int_val;
- byte* str1;
- byte* str2;
- byte match_char;
- ulint len1;
- ulint len2;
- ulint i;
- ulint j;
-
- arg1 = func_node->args;
- arg2 = que_node_get_next(arg1);
-
- dfield1 = que_node_get_val(arg1);
- dfield2 = que_node_get_val(arg2);
-
- str1 = static_cast<byte*>(dfield_get_data(dfield1));
- str2 = static_cast<byte*>(dfield_get_data(dfield2));
-
- len1 = dfield_get_len(dfield1);
- len2 = dfield_get_len(dfield2);
-
- if (len2 == 0) {
- ut_error;
- }
-
- match_char = str2[0];
-
- for (i = 0; i < len1; i++) {
- /* In this outer loop, the number of matched characters is 0 */
-
- if (str1[i] == match_char) {
-
- if (i + len2 > len1) {
-
- break;
- }
-
- for (j = 1;; j++) {
- /* We have already matched j characters */
-
- if (j == len2) {
- int_val = i + 1;
-
- goto match_found;
- }
-
- if (str1[i + j] != str2[j]) {
-
- break;
- }
- }
- }
- }
-
- int_val = 0;
-
-match_found:
- eval_node_set_int_val(func_node, int_val);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. */
-UNIV_INLINE
-void
-eval_binary_to_number(
-/*==================*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- dfield_t* dfield;
- byte* str1;
- byte* str2;
- ulint len1;
- ulint int_val;
-
- arg1 = func_node->args;
-
- dfield = que_node_get_val(arg1);
-
- str1 = static_cast<byte*>(dfield_get_data(dfield));
- len1 = dfield_get_len(dfield);
-
- if (len1 > 4) {
- ut_error;
- }
-
- if (len1 == 4) {
- str2 = str1;
- } else {
- int_val = 0;
- str2 = (byte*) &int_val;
-
- ut_memcpy(str2 + (4 - len1), str1, len1);
- }
-
- eval_node_copy_and_alloc_val(func_node, str2, 4);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. */
-static
-void
-eval_concat(
-/*========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg;
- dfield_t* dfield;
- byte* data;
- ulint len;
- ulint len1;
-
- arg = func_node->args;
- len = 0;
-
- while (arg) {
- len1 = dfield_get_len(que_node_get_val(arg));
-
- len += len1;
-
- arg = que_node_get_next(arg);
- }
-
- data = eval_node_ensure_val_buf(func_node, len);
-
- arg = func_node->args;
- len = 0;
-
- while (arg) {
- dfield = que_node_get_val(arg);
- len1 = dfield_get_len(dfield);
-
- ut_memcpy(data + len, dfield_get_data(dfield), len1);
-
- len += len1;
-
- arg = que_node_get_next(arg);
- }
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. If the first argument is an integer,
-this function looks at the second argument which is the integer length in
-bytes, and converts the integer to a VARCHAR.
-If the first argument is of some other type, this function converts it to
-BINARY. */
-UNIV_INLINE
-void
-eval_to_binary(
-/*===========*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- dfield_t* dfield;
- byte* str1;
- ulint len;
- ulint len1;
-
- arg1 = func_node->args;
-
- str1 = static_cast<byte*>(dfield_get_data(que_node_get_val(arg1)));
-
- if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) {
-
- len = dfield_get_len(que_node_get_val(arg1));
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1, len);
-
- return;
- }
-
- arg2 = que_node_get_next(arg1);
-
- len1 = (ulint) eval_node_get_int_val(arg2);
-
- if (len1 > 4) {
-
- ut_error;
- }
-
- dfield = que_node_get_val(func_node);
-
- dfield_set_data(dfield, str1 + (4 - len1), len1);
-}
-
-/*****************************************************************//**
-Evaluates a predefined function node. */
-UNIV_INLINE
-void
-eval_predefined(
-/*============*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg1;
- lint int_val;
- byte* data;
- int func;
-
- func = func_node->func;
-
- arg1 = func_node->args;
-
- if (func == PARS_LENGTH_TOKEN) {
-
- int_val = (lint) dfield_get_len(que_node_get_val(arg1));
-
- } else if (func == PARS_TO_CHAR_TOKEN) {
-
- /* Convert number to character string as a
- signed decimal integer. */
-
- ulint uint_val;
- int int_len;
-
- int_val = eval_node_get_int_val(arg1);
-
- /* Determine the length of the string. */
-
- if (int_val == 0) {
- int_len = 1; /* the number 0 occupies 1 byte */
- } else {
- int_len = 0;
- if (int_val < 0) {
- uint_val = ((ulint) -int_val - 1) + 1;
- int_len++; /* reserve space for minus sign */
- } else {
- uint_val = (ulint) int_val;
- }
- for (; uint_val > 0; int_len++) {
- uint_val /= 10;
- }
- }
-
- /* allocate the string */
- data = eval_node_ensure_val_buf(func_node, int_len + 1);
-
- /* add terminating NUL character */
- data[int_len] = 0;
-
- /* convert the number */
-
- if (int_val == 0) {
- data[0] = '0';
- } else {
- int tmp;
- if (int_val < 0) {
- data[0] = '-'; /* preceding minus sign */
- uint_val = ((ulint) -int_val - 1) + 1;
- } else {
- uint_val = (ulint) int_val;
- }
- for (tmp = int_len; uint_val > 0; uint_val /= 10) {
- data[--tmp] = (byte)
- ('0' + (byte)(uint_val % 10));
- }
- }
-
- dfield_set_len(que_node_get_val(func_node), int_len);
-
- return;
-
- } else if (func == PARS_TO_NUMBER_TOKEN) {
-
- int_val = atoi((char*)
- dfield_get_data(que_node_get_val(arg1)));
-
- } else if (func == PARS_SYSDATE_TOKEN) {
- int_val = (lint) ut_time();
- } else {
- eval_predefined_2(func_node);
-
- return;
- }
-
- eval_node_set_int_val(func_node, int_val);
-}
-
-/*****************************************************************//**
-Evaluates a function node. */
-UNIV_INTERN
-void
-eval_func(
-/*======*/
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg;
- ulint fclass;
- ulint func;
-
- ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC);
-
- fclass = func_node->fclass;
- func = func_node->func;
-
- arg = func_node->args;
-
- /* Evaluate first the argument list */
- while (arg) {
- eval_exp(arg);
-
- /* The functions are not defined for SQL null argument
- values, except for eval_cmp and notfound */
-
- if (dfield_is_null(que_node_get_val(arg))
- && (fclass != PARS_FUNC_CMP)
- && (func != PARS_NOTFOUND_TOKEN)
- && (func != PARS_PRINTF_TOKEN)) {
- ut_error;
- }
-
- arg = que_node_get_next(arg);
- }
-
- switch (fclass) {
- case PARS_FUNC_CMP:
- eval_cmp(func_node);
- return;
- case PARS_FUNC_ARITH:
- eval_arith(func_node);
- return;
- case PARS_FUNC_AGGREGATE:
- eval_aggregate(func_node);
- return;
- case PARS_FUNC_PREDEFINED:
- switch (func) {
- case PARS_NOTFOUND_TOKEN:
- eval_notfound(func_node);
- return;
- case PARS_SUBSTR_TOKEN:
- eval_substr(func_node);
- return;
- case PARS_REPLSTR_TOKEN:
- eval_replstr(func_node);
- return;
- case PARS_INSTR_TOKEN:
- eval_instr(func_node);
- return;
- case PARS_BINARY_TO_NUMBER_TOKEN:
- eval_binary_to_number(func_node);
- return;
- case PARS_CONCAT_TOKEN:
- eval_concat(func_node);
- return;
- case PARS_TO_BINARY_TOKEN:
- eval_to_binary(func_node);
- return;
- default:
- eval_predefined(func_node);
- return;
- }
- case PARS_FUNC_LOGICAL:
- eval_logical(func_node);
- return;
- }
-
- ut_error;
-}
diff --git a/storage/xtradb/eval/eval0proc.cc b/storage/xtradb/eval/eval0proc.cc
deleted file mode 100644
index e6f3a32cd48..00000000000
--- a/storage/xtradb/eval/eval0proc.cc
+++ /dev/null
@@ -1,296 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file eval/eval0proc.cc
-Executes SQL stored procedures and their control structures
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#include "eval0proc.h"
-
-#ifdef UNIV_NONINL
-#include "eval0proc.ic"
-#endif
-
-/**********************************************************************//**
-Performs an execution step of an if-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-if_step(
-/*====*/
- que_thr_t* thr) /*!< in: query thread */
-{
- if_node_t* node;
- elsif_node_t* elsif_node;
-
- ut_ad(thr);
-
- node = static_cast<if_node_t*>(thr->run_node);
- ut_ad(que_node_get_type(node) == QUE_NODE_IF);
-
- if (thr->prev_node == que_node_get_parent(node)) {
-
- /* Evaluate the condition */
-
- eval_exp(node->cond);
-
- if (eval_node_get_ibool_val(node->cond)) {
-
- /* The condition evaluated to TRUE: start execution
- from the first statement in the statement list */
-
- thr->run_node = node->stat_list;
-
- } else if (node->else_part) {
- thr->run_node = node->else_part;
-
- } else if (node->elsif_list) {
- elsif_node = node->elsif_list;
-
- for (;;) {
- eval_exp(elsif_node->cond);
-
- if (eval_node_get_ibool_val(
- elsif_node->cond)) {
-
- /* The condition evaluated to TRUE:
- start execution from the first
- statement in the statement list */
-
- thr->run_node = elsif_node->stat_list;
-
- break;
- }
-
- elsif_node = static_cast<elsif_node_t*>(
- que_node_get_next(elsif_node));
-
- if (elsif_node == NULL) {
- thr->run_node = NULL;
-
- break;
- }
- }
- } else {
- thr->run_node = NULL;
- }
- } else {
- /* Move to the next statement */
- ut_ad(que_node_get_next(thr->prev_node) == NULL);
-
- thr->run_node = NULL;
- }
-
- if (thr->run_node == NULL) {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of a while-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-while_step(
-/*=======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- while_node_t* node;
-
- ut_ad(thr);
-
- node = static_cast<while_node_t*>(thr->run_node);
- ut_ad(que_node_get_type(node) == QUE_NODE_WHILE);
-
- ut_ad((thr->prev_node == que_node_get_parent(node))
- || (que_node_get_next(thr->prev_node) == NULL));
-
- /* Evaluate the condition */
-
- eval_exp(node->cond);
-
- if (eval_node_get_ibool_val(node->cond)) {
-
- /* The condition evaluated to TRUE: start execution
- from the first statement in the statement list */
-
- thr->run_node = node->stat_list;
- } else {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of an assignment statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-assign_step(
-/*========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- assign_node_t* node;
-
- ut_ad(thr);
-
- node = static_cast<assign_node_t*>(thr->run_node);
- ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT);
-
- /* Evaluate the value to assign */
-
- eval_exp(node->val);
-
- eval_node_copy_val(node->var->alias, node->val);
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of a for-loop node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-for_step(
-/*=====*/
- que_thr_t* thr) /*!< in: query thread */
-{
- for_node_t* node;
- que_node_t* parent;
- lint loop_var_value;
-
- ut_ad(thr);
-
- node = static_cast<for_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FOR);
-
- parent = que_node_get_parent(node);
-
- if (thr->prev_node != parent) {
-
- /* Move to the next statement */
- thr->run_node = que_node_get_next(thr->prev_node);
-
- if (thr->run_node != NULL) {
-
- return(thr);
- }
-
- /* Increment the value of loop_var */
-
- loop_var_value = 1 + eval_node_get_int_val(node->loop_var);
- } else {
- /* Initialize the loop */
-
- eval_exp(node->loop_start_limit);
- eval_exp(node->loop_end_limit);
-
- loop_var_value = eval_node_get_int_val(node->loop_start_limit);
-
- node->loop_end_value
- = (int) eval_node_get_int_val(node->loop_end_limit);
- }
-
- /* Check if we should do another loop */
-
- if (loop_var_value > node->loop_end_value) {
-
- /* Enough loops done */
-
- thr->run_node = parent;
- } else {
- eval_node_set_int_val(node->loop_var, loop_var_value);
-
- thr->run_node = node->stat_list;
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of an exit statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-exit_step(
-/*======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- exit_node_t* node;
- que_node_t* loop_node;
-
- ut_ad(thr);
-
- node = static_cast<exit_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_EXIT);
-
- /* Loops exit by setting thr->run_node as the loop node's parent, so
- find our containing loop node and get its parent. */
-
- loop_node = que_node_get_containing_loop_node(node);
-
- /* If someone uses an EXIT statement outside of a loop, this will
- trigger. */
- ut_a(loop_node);
-
- thr->run_node = que_node_get_parent(loop_node);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of a return-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-return_step(
-/*========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- return_node_t* node;
- que_node_t* parent;
-
- ut_ad(thr);
-
- node = static_cast<return_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_RETURN);
-
- parent = node;
-
- while (que_node_get_type(parent) != QUE_NODE_PROC) {
-
- parent = que_node_get_parent(parent);
- }
-
- ut_a(parent);
-
- thr->run_node = que_node_get_parent(parent);
-
- return(thr);
-}
diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc
deleted file mode 100644
index e73d600d2ca..00000000000
--- a/storage/xtradb/fil/fil0crypt.cc
+++ /dev/null
@@ -1,2662 +0,0 @@
-/*****************************************************************************
-Copyright (C) 2013, 2015, Google Inc. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-/**************************************************//**
-@file fil0crypt.cc
-Innodb file space encrypt/decrypt
-
-Created Jonas Oreland Google
-Modified Jan Lindström jan.lindstrom@mariadb.com
-*******************************************************/
-
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "mach0data.h"
-#include "log0recv.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "page0zip.h"
-#include "ut0ut.h"
-#include "btr0scrub.h"
-#include "fsp0fsp.h"
-#include "fil0pagecompress.h"
-#include "ha_prototypes.h" // IB_LOG_
-#include <my_crypt.h>
-
-/** Mutex for keys */
-static ib_mutex_t fil_crypt_key_mutex;
-
-static bool fil_crypt_threads_inited = false;
-
-#ifdef UNIV_PFS_MUTEX
-static mysql_pfs_key_t fil_crypt_key_mutex_key;
-#endif
-
-/** Is encryption enabled/disabled */
-UNIV_INTERN ulong srv_encrypt_tables = 0;
-
-/** No of key rotation threads requested */
-UNIV_INTERN uint srv_n_fil_crypt_threads = 0;
-
-/** No of key rotation threads started */
-UNIV_INTERN uint srv_n_fil_crypt_threads_started = 0;
-
-/** At this age or older a space/page will be rotated */
-UNIV_INTERN uint srv_fil_crypt_rotate_key_age;
-
-/** Event to signal FROM the key rotation threads. */
-static os_event_t fil_crypt_event;
-
-/** Event to signal TO the key rotation threads. */
-UNIV_INTERN os_event_t fil_crypt_threads_event;
-
-/** Event for waking up threads throttle. */
-static os_event_t fil_crypt_throttle_sleep_event;
-
-/** Mutex for key rotation threads. */
-UNIV_INTERN ib_mutex_t fil_crypt_threads_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-static mysql_pfs_key_t fil_crypt_threads_mutex_key;
-#endif
-
-/** Variable ensuring only 1 thread at time does initial conversion */
-static bool fil_crypt_start_converting = false;
-
-/** Variables for throttling */
-UNIV_INTERN uint srv_n_fil_crypt_iops = 100; // 10ms per iop
-static uint srv_alloc_time = 3; // allocate iops for 3s at a time
-static uint n_fil_crypt_iops_allocated = 0;
-
-/** Variables for scrubbing */
-extern uint srv_background_scrub_data_interval;
-extern uint srv_background_scrub_data_check_interval;
-
-#define DEBUG_KEYROTATION_THROTTLING 0
-
-/** Statistics variables */
-static fil_crypt_stat_t crypt_stat;
-static ib_mutex_t crypt_stat_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-static mysql_pfs_key_t fil_crypt_stat_mutex_key;
-
-/**
- * key for crypt data mutex
-*/
-UNIV_INTERN mysql_pfs_key_t fil_crypt_data_mutex_key;
-#endif
-
-/** Is background scrubbing enabled, defined on btr0scrub.cc */
-extern my_bool srv_background_scrub_data_uncompressed;
-extern my_bool srv_background_scrub_data_compressed;
-
-static bool
-fil_crypt_needs_rotation(
- fil_encryption_t encrypt_mode, /*!< in: Encryption
- mode */
- uint key_version, /*!< in: Key version */
- uint latest_key_version, /*!< in: Latest key version */
- uint rotate_key_age); /*!< in: When to rotate */
-
-/*********************************************************************
-Init space crypt */
-UNIV_INTERN
-void
-fil_space_crypt_init()
-{
- mutex_create(fil_crypt_key_mutex_key,
- &fil_crypt_key_mutex, SYNC_NO_ORDER_CHECK);
-
- fil_crypt_throttle_sleep_event = os_event_create();
-
- mutex_create(fil_crypt_stat_mutex_key,
- &crypt_stat_mutex, SYNC_NO_ORDER_CHECK);
-
- memset(&crypt_stat, 0, sizeof(crypt_stat));
-}
-
-/*********************************************************************
-Cleanup space crypt */
-UNIV_INTERN
-void
-fil_space_crypt_cleanup()
-{
- os_event_free(fil_crypt_throttle_sleep_event);
- fil_crypt_throttle_sleep_event = NULL;
- mutex_free(&fil_crypt_key_mutex);
- mutex_free(&crypt_stat_mutex);
-}
-
-/**
-Get latest key version from encryption plugin.
-@return key version or ENCRYPTION_KEY_VERSION_INVALID */
-uint
-fil_space_crypt_t::key_get_latest_version(void)
-{
- uint key_version = key_found;
-
- if (is_key_found()) {
- key_version = encryption_key_get_latest_version(key_id);
- srv_stats.n_key_requests.inc();
- key_found = key_version;
- }
-
- return key_version;
-}
-
-/******************************************************************
-Get the latest(key-version), waking the encrypt thread, if needed
-@param[in,out] crypt_data Crypt data */
-static inline
-uint
-fil_crypt_get_latest_key_version(
- fil_space_crypt_t* crypt_data)
-{
- ut_ad(crypt_data != NULL);
-
- uint key_version = crypt_data->key_get_latest_version();
-
- if (crypt_data->is_key_found()) {
-
- if (fil_crypt_needs_rotation(crypt_data->encryption,
- crypt_data->min_key_version,
- key_version,
- srv_fil_crypt_rotate_key_age)) {
- os_event_set(fil_crypt_threads_event);
- }
- }
-
- return key_version;
-}
-
-/******************************************************************
-Mutex helper for crypt_data->scheme */
-void
-crypt_data_scheme_locker(
-/*=====================*/
- st_encryption_scheme* scheme,
- int exit)
-{
- fil_space_crypt_t* crypt_data =
- static_cast<fil_space_crypt_t*>(scheme);
-
- if (exit) {
- mutex_exit(&crypt_data->mutex);
- } else {
- mutex_enter(&crypt_data->mutex);
- }
-}
-
-/******************************************************************
-Create a fil_space_crypt_t object
-@param[in] type CRYPT_SCHEME_UNENCRYPTE or
- CRYPT_SCHEME_1
-@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or
- FIL_ENCRYPTION_ON or
- FIL_ENCRYPTION_OFF
-@param[in] min_key_version key_version or 0
-@param[in] key_id Used key id
-@return crypt object */
-static
-fil_space_crypt_t*
-fil_space_create_crypt_data(
- uint type,
- fil_encryption_t encrypt_mode,
- uint min_key_version,
- uint key_id)
-{
- void* buf = mem_zalloc(sizeof(fil_space_crypt_t));
- fil_space_crypt_t* crypt_data = NULL;
-
- if (buf) {
- crypt_data = new(buf)
- fil_space_crypt_t(
- type,
- min_key_version,
- key_id,
- encrypt_mode);
- }
-
- return crypt_data;
-}
-
-/******************************************************************
-Create a fil_space_crypt_t object
-@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or
- FIL_ENCRYPTION_ON or
- FIL_ENCRYPTION_OFF
-
-@param[in] key_id Encryption key id
-@return crypt object */
-UNIV_INTERN
-fil_space_crypt_t*
-fil_space_create_crypt_data(
- fil_encryption_t encrypt_mode,
- uint key_id)
-{
- return (fil_space_create_crypt_data(0, encrypt_mode, 0, key_id));
-}
-
-/******************************************************************
-Merge fil_space_crypt_t object
-@param[in,out] dst Destination cryp data
-@param[in] src Source crypt data */
-UNIV_INTERN
-void
-fil_space_merge_crypt_data(
- fil_space_crypt_t* dst,
- const fil_space_crypt_t* src)
-{
- mutex_enter(&dst->mutex);
-
- /* validate that they are mergeable */
- ut_a(src->type == CRYPT_SCHEME_UNENCRYPTED ||
- src->type == CRYPT_SCHEME_1);
-
- ut_a(dst->type == CRYPT_SCHEME_UNENCRYPTED ||
- dst->type == CRYPT_SCHEME_1);
-
- dst->encryption = src->encryption;
- dst->type = src->type;
- dst->min_key_version = src->min_key_version;
- dst->keyserver_requests += src->keyserver_requests;
-
- mutex_exit(&dst->mutex);
-}
-
-/******************************************************************
-Read crypt data from a page (0)
-@param[in] space space_id
-@param[in] page Page 0
-@param[in] offset Offset to crypt data
-@return crypt data from page 0 or NULL. */
-UNIV_INTERN
-fil_space_crypt_t*
-fil_space_read_crypt_data(
- ulint space,
- const byte* page,
- ulint offset)
-{
- if (memcmp(page + offset, CRYPT_MAGIC, MAGIC_SZ) != 0) {
- /* Crypt data is not stored. */
- return NULL;
- }
-
- ulint type = mach_read_from_1(page + offset + MAGIC_SZ + 0);
-
- if (! (type == CRYPT_SCHEME_UNENCRYPTED ||
- type == CRYPT_SCHEME_1)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Found non sensible crypt scheme: " ULINTPF " for space " ULINTPF
- " offset: " ULINTPF " bytes: "
- "[ %.2x %.2x %.2x %.2x %.2x %.2x ].",
- type, space, offset,
- page[offset + 0 + MAGIC_SZ],
- page[offset + 1 + MAGIC_SZ],
- page[offset + 2 + MAGIC_SZ],
- page[offset + 3 + MAGIC_SZ],
- page[offset + 4 + MAGIC_SZ],
- page[offset + 5 + MAGIC_SZ]);
- ut_error;
- }
-
- fil_space_crypt_t* crypt_data;
- ulint iv_length = mach_read_from_1(page + offset + MAGIC_SZ + 1);
-
- if (! (iv_length == sizeof(crypt_data->iv))) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Found non sensible iv length: %lu for space %lu "
- " offset: %lu type: %lu bytes: "
- "[ %.2x %.2x %.2x %.2x %.2x %.2x ].",
- iv_length, space, offset, type,
- page[offset + 0 + MAGIC_SZ],
- page[offset + 1 + MAGIC_SZ],
- page[offset + 2 + MAGIC_SZ],
- page[offset + 3 + MAGIC_SZ],
- page[offset + 4 + MAGIC_SZ],
- page[offset + 5 + MAGIC_SZ]);
- ut_error;
- }
-
- uint min_key_version = mach_read_from_4
- (page + offset + MAGIC_SZ + 2 + iv_length);
-
- uint key_id = mach_read_from_4
- (page + offset + MAGIC_SZ + 2 + iv_length + 4);
-
- fil_encryption_t encryption = (fil_encryption_t)mach_read_from_1(
- page + offset + MAGIC_SZ + 2 + iv_length + 8);
-
- crypt_data = fil_space_create_crypt_data(encryption, key_id);
- /* We need to overwrite these as above function will initialize
- members */
- crypt_data->type = type;
- crypt_data->min_key_version = min_key_version;
- crypt_data->page0_offset = offset;
- memcpy(crypt_data->iv, page + offset + MAGIC_SZ + 2, iv_length);
-
- return crypt_data;
-}
-
-/******************************************************************
-Free a crypt data object
-@param[in,out] crypt_data crypt data to be freed */
-UNIV_INTERN
-void
-fil_space_destroy_crypt_data(
- fil_space_crypt_t **crypt_data)
-{
- if (crypt_data != NULL && (*crypt_data) != NULL) {
- fil_space_crypt_t* c = *crypt_data;
- c->~fil_space_crypt_t();
- mem_free(c);
- *crypt_data = NULL;
- }
-}
-
-/******************************************************************
-Write crypt data to a page (0)
-@param[in,out] page0 Page 0 where to write
-@param[in,out] mtr Minitransaction */
-UNIV_INTERN
-void
-fil_space_crypt_t::write_page0(
- byte* page,
- mtr_t* mtr)
-{
- ulint space_id = mach_read_from_4(
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- const uint len = sizeof(iv);
- ulint zip_size = fsp_header_get_zip_size(page);
- const ulint offset = fsp_header_get_crypt_offset(zip_size);
- page0_offset = offset;
-
- /*
- redo log this as bytewise updates to page 0
- followed by an MLOG_FILE_WRITE_CRYPT_DATA
- (that will during recovery update fil_space_t)
- */
- mlog_write_string(page + offset, CRYPT_MAGIC, MAGIC_SZ, mtr);
- mlog_write_ulint(page + offset + MAGIC_SZ + 0, type, MLOG_1BYTE, mtr);
- mlog_write_ulint(page + offset + MAGIC_SZ + 1, len, MLOG_1BYTE, mtr);
- mlog_write_string(page + offset + MAGIC_SZ + 2, iv, len,
- mtr);
- mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len, min_key_version,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len + 4, key_id,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(page + offset + MAGIC_SZ + 2 + len + 8, encryption,
- MLOG_1BYTE, mtr);
-
- byte* log_ptr = mlog_open(mtr, 11 + 17 + len);
-
- if (log_ptr != NULL) {
- log_ptr = mlog_write_initial_log_record_fast(
- page,
- MLOG_FILE_WRITE_CRYPT_DATA,
- log_ptr, mtr);
- mach_write_to_4(log_ptr, space_id);
- log_ptr += 4;
- mach_write_to_2(log_ptr, offset);
- log_ptr += 2;
- mach_write_to_1(log_ptr, type);
- log_ptr += 1;
- mach_write_to_1(log_ptr, len);
- log_ptr += 1;
- mach_write_to_4(log_ptr, min_key_version);
- log_ptr += 4;
- mach_write_to_4(log_ptr, key_id);
- log_ptr += 4;
- mach_write_to_1(log_ptr, encryption);
- log_ptr += 1;
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, iv, len);
- }
-}
-
-/******************************************************************
-Set crypt data for a tablespace
-@param[in,out] space Tablespace
-@param[in,out] crypt_data Crypt data to be set
-@return crypt_data in tablespace */
-static
-fil_space_crypt_t*
-fil_space_set_crypt_data(
- fil_space_t* space,
- fil_space_crypt_t* crypt_data)
-{
- fil_space_crypt_t* free_crypt_data = NULL;
- fil_space_crypt_t* ret_crypt_data = NULL;
-
- /* Provided space is protected using fil_space_acquire()
- from concurrent operations. */
- if (space->crypt_data != NULL) {
- /* There is already crypt data present,
- merge new crypt_data */
- fil_space_merge_crypt_data(space->crypt_data,
- crypt_data);
- ret_crypt_data = space->crypt_data;
- free_crypt_data = crypt_data;
- } else {
- space->crypt_data = crypt_data;
- ret_crypt_data = space->crypt_data;
- }
-
- if (free_crypt_data != NULL) {
- /* there was already crypt data present and the new crypt
- * data provided as argument to this function has been merged
- * into that => free new crypt data
- */
- fil_space_destroy_crypt_data(&free_crypt_data);
- }
-
- return ret_crypt_data;
-}
-
-/******************************************************************
-Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry
-@param[in] ptr Log entry start
-@param[in] end_ptr Log entry end
-@param[in] block buffer block
-@return position on log buffer */
-UNIV_INTERN
-byte*
-fil_parse_write_crypt_data(
- byte* ptr,
- const byte* end_ptr,
- const buf_block_t* block,
- dberr_t* err)
-{
- /* check that redo log entry is complete */
- size_t entry_size =
- 4 + // size of space_id
- 2 + // size of offset
- 1 + // size of type
- 1 + // size of iv-len
- 4 + // size of min_key_version
- 4 + // size of key_id
- 1; // fil_encryption_t
-
- *err = DB_SUCCESS;
-
- if (ptr + entry_size > end_ptr) {
- return NULL;
- }
-
- ulint space_id = mach_read_from_4(ptr);
- ptr += 4;
- uint offset = mach_read_from_2(ptr);
- ptr += 2;
- uint type = mach_read_from_1(ptr);
- ptr += 1;
- size_t len = mach_read_from_1(ptr);
- ptr += 1;
-
- ut_a(type == CRYPT_SCHEME_UNENCRYPTED ||
- type == CRYPT_SCHEME_1); // only supported
-
- ut_a(len == CRYPT_SCHEME_1_IV_LEN); // only supported
- uint min_key_version = mach_read_from_4(ptr);
- ptr += 4;
-
- uint key_id = mach_read_from_4(ptr);
- ptr += 4;
-
- fil_encryption_t encryption = (fil_encryption_t)mach_read_from_1(ptr);
- ptr +=1;
-
- if (ptr + len > end_ptr) {
- return NULL;
- }
-
- fil_space_crypt_t* crypt_data = fil_space_create_crypt_data(encryption, key_id);
- /* Need to overwrite these as above will initialize fields. */
- crypt_data->page0_offset = offset;
- crypt_data->min_key_version = min_key_version;
- crypt_data->encryption = encryption;
- memcpy(crypt_data->iv, ptr, len);
- ptr += len;
-
- /* update fil_space memory cache with crypt_data */
- if (fil_space_t* space = fil_space_acquire_silent(space_id)) {
- crypt_data = fil_space_set_crypt_data(space, crypt_data);
- fil_space_release(space);
- /* Check is used key found from encryption plugin */
- if (crypt_data->should_encrypt()
- && !crypt_data->is_key_found()) {
- *err = DB_DECRYPTION_FAILED;
- }
- } else {
- fil_space_destroy_crypt_data(&crypt_data);
- }
-
- return ptr;
-}
-
-/******************************************************************
-Encrypt a buffer
-@param[in,out] crypt_data Crypt data
-@param[in] space space_id
-@param[in] offset Page offset
-@param[in] lsn Log sequence number
-@param[in] src_frame Page to encrypt
-@param[in] zip_size Compressed size or 0
-@param[in,out] dst_frame Output buffer
-@return encrypted buffer or NULL */
-UNIV_INTERN
-byte*
-fil_encrypt_buf(
- fil_space_crypt_t* crypt_data,
- ulint space,
- ulint offset,
- lsn_t lsn,
- const byte* src_frame,
- ulint zip_size,
- byte* dst_frame)
-{
- ulint page_size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
- uint key_version = fil_crypt_get_latest_key_version(crypt_data);
-
- if (key_version == ENCRYPTION_KEY_VERSION_INVALID) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Unknown key id %u. Can't continue!\n",
- crypt_data->key_id);
- ut_error;
- }
-
- ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
- ibool page_compressed = (orig_page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
- ulint header_len = FIL_PAGE_DATA;
-
- if (page_compressed) {
- header_len += (FIL_PAGE_COMPRESSED_SIZE + FIL_PAGE_COMPRESSION_METHOD_SIZE);
- }
-
- /* FIL page header is not encrypted */
- memcpy(dst_frame, src_frame, header_len);
-
- /* Store key version */
- mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, key_version);
-
- /* Calculate the start offset in a page */
- ulint unencrypted_bytes = header_len + FIL_PAGE_DATA_END;
- ulint srclen = page_size - unencrypted_bytes;
- const byte* src = src_frame + header_len;
- byte* dst = dst_frame + header_len;
- uint32 dstlen = 0;
-
- if (page_compressed) {
- srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA);
- }
-
- int rc = encryption_scheme_encrypt(src, srclen, dst, &dstlen,
- crypt_data, key_version,
- space, offset, lsn);
-
- if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Unable to encrypt data-block "
- " src: %p srclen: %ld buf: %p buflen: %d."
- " return-code: %d. Can't continue!\n",
- src, (long)srclen,
- dst, dstlen, rc);
- ut_error;
- }
-
- /* For compressed tables we do not store the FIL header because
- the whole page is not stored to the disk. In compressed tables only
- the FIL header + compressed (and now encrypted) payload alligned
- to sector boundary is written. */
- if (!page_compressed) {
- /* FIL page trailer is also not encrypted */
- memcpy(dst_frame + page_size - FIL_PAGE_DATA_END,
- src_frame + page_size - FIL_PAGE_DATA_END,
- FIL_PAGE_DATA_END);
- } else {
- /* Clean up rest of buffer */
- memset(dst_frame+header_len+srclen, 0, page_size - (header_len+srclen));
- }
-
- /* handle post encryption checksum */
- ib_uint32_t checksum = 0;
-
- checksum = fil_crypt_calculate_checksum(zip_size, dst_frame);
-
- // store the post-encryption checksum after the key-version
- mach_write_to_4(dst_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4, checksum);
-
- ut_ad(fil_space_verify_crypt_checksum(dst_frame, zip_size, NULL, offset));
-
- srv_stats.pages_encrypted.inc();
-
- return dst_frame;
-}
-
-/******************************************************************
-Encrypt a page
-
-@param[in] space Tablespace
-@param[in] offset Page offset
-@param[in] lsn Log sequence number
-@param[in] src_frame Page to encrypt
-@param[in,out] dst_frame Output buffer
-@return encrypted buffer or NULL */
-UNIV_INTERN
-byte*
-fil_space_encrypt(
- const fil_space_t* space,
- ulint offset,
- lsn_t lsn,
- byte* src_frame,
- byte* dst_frame)
-{
- ulint orig_page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
-
- if (orig_page_type==FIL_PAGE_TYPE_FSP_HDR
- || orig_page_type==FIL_PAGE_TYPE_XDES) {
- /* File space header or extent descriptor do not need to be
- encrypted. */
- return (src_frame);
- }
-
- if (!space->crypt_data || !space->crypt_data->is_encrypted()) {
- return (src_frame);
- }
-
- fil_space_crypt_t* crypt_data = space->crypt_data;
- ut_ad(space->n_pending_ios > 0);
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- byte* tmp = fil_encrypt_buf(crypt_data, space->id, offset, lsn, src_frame, zip_size, dst_frame);
-
-#ifdef UNIV_DEBUG
- if (tmp) {
- /* Verify that encrypted buffer is not corrupted */
- byte* tmp_mem = (byte *)malloc(UNIV_PAGE_SIZE);
- dberr_t err = DB_SUCCESS;
- byte* src = src_frame;
- bool page_compressed_encrypted = (mach_read_from_2(tmp+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
- byte* comp_mem = NULL;
- byte* uncomp_mem = NULL;
- ulint size = (zip_size) ? zip_size : UNIV_PAGE_SIZE;
-
- if (page_compressed_encrypted) {
- comp_mem = (byte *)malloc(UNIV_PAGE_SIZE);
- uncomp_mem = (byte *)malloc(UNIV_PAGE_SIZE);
- memcpy(comp_mem, src_frame, UNIV_PAGE_SIZE);
- fil_decompress_page(uncomp_mem, comp_mem,
- srv_page_size, NULL);
- src = uncomp_mem;
- }
-
- bool corrupted1 = buf_page_is_corrupted(true, src, zip_size, space);
- bool ok = fil_space_decrypt(crypt_data, tmp_mem, size, tmp, &err);
-
- /* Need to decompress the page if it was also compressed */
- if (page_compressed_encrypted) {
- memcpy(comp_mem, tmp_mem, UNIV_PAGE_SIZE);
- fil_decompress_page(tmp_mem, comp_mem,
- srv_page_size, NULL);
- }
-
- bool corrupted = buf_page_is_corrupted(true, tmp_mem, zip_size, space);
- memcpy(tmp_mem+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, src+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 8);
- bool different = memcmp(src, tmp_mem, size);
-
- if (!ok || corrupted || corrupted1 || err != DB_SUCCESS || different) {
- fprintf(stderr, "ok %d corrupted %d corrupted1 %d err %d different %d\n",
- ok , corrupted, corrupted1, err, different);
- fprintf(stderr, "src_frame\n");
- buf_page_print(src_frame, zip_size, BUF_PAGE_PRINT_NO_CRASH);
- fprintf(stderr, "encrypted_frame\n");
- buf_page_print(tmp, zip_size, BUF_PAGE_PRINT_NO_CRASH);
- fprintf(stderr, "decrypted_frame\n");
- buf_page_print(tmp_mem, zip_size, 0);
- }
-
- free(tmp_mem);
-
- if (comp_mem) {
- free(comp_mem);
- }
-
- if (uncomp_mem) {
- free(uncomp_mem);
- }
- }
-
-#endif /* UNIV_DEBUG */
-
- return tmp;
-}
-
-/******************************************************************
-Decrypt a page
-@param[in] crypt_data crypt_data
-@param[in] tmp_frame Temporary buffer
-@param[in] page_size Page size
-@param[in,out] src_frame Page to decrypt
-@param[out] err DB_SUCCESS or DB_DECRYPTION_FAILED
-@return true if page decrypted, false if not.*/
-UNIV_INTERN
-bool
-fil_space_decrypt(
- fil_space_crypt_t* crypt_data,
- byte* tmp_frame,
- ulint page_size,
- byte* src_frame,
- dberr_t* err)
-{
- ulint page_type = mach_read_from_2(src_frame+FIL_PAGE_TYPE);
- uint key_version = mach_read_from_4(src_frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
- ulint offset = mach_read_from_4(src_frame + FIL_PAGE_OFFSET);
- ulint space = mach_read_from_4(src_frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- ib_uint64_t lsn = mach_read_from_8(src_frame + FIL_PAGE_LSN);
-
- *err = DB_SUCCESS;
-
- if (key_version == ENCRYPTION_KEY_NOT_ENCRYPTED) {
- return false;
- }
-
- ut_a(crypt_data != NULL && crypt_data->is_encrypted());
-
- /* read space & lsn */
- ulint header_len = FIL_PAGE_DATA;
-
- if (page_compressed) {
- header_len += (FIL_PAGE_COMPRESSED_SIZE + FIL_PAGE_COMPRESSION_METHOD_SIZE);
- }
-
- /* Copy FIL page header, it is not encrypted */
- memcpy(tmp_frame, src_frame, header_len);
-
- /* Calculate the offset where decryption starts */
- const byte* src = src_frame + header_len;
- byte* dst = tmp_frame + header_len;
- uint32 dstlen = 0;
- ulint srclen = page_size - (header_len + FIL_PAGE_DATA_END);
-
- if (page_compressed) {
- srclen = mach_read_from_2(src_frame + FIL_PAGE_DATA);
- }
-
- int rc = encryption_scheme_decrypt(src, srclen, dst, &dstlen,
- crypt_data, key_version,
- space, offset, lsn);
-
- if (! ((rc == MY_AES_OK) && ((ulint) dstlen == srclen))) {
-
- if (rc == -1) {
- *err = DB_DECRYPTION_FAILED;
- return false;
- }
-
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Unable to decrypt data-block "
- " src: %p srclen: %ld buf: %p buflen: %d."
- " return-code: %d. Can't continue!\n",
- src, (long)srclen,
- dst, dstlen, rc);
- ut_error;
- }
-
- /* For compressed tables we do not store the FIL header because
- the whole page is not stored to the disk. In compressed tables only
- the FIL header + compressed (and now encrypted) payload alligned
- to sector boundary is written. */
- if (!page_compressed) {
- /* Copy FIL trailer */
- memcpy(tmp_frame + page_size - FIL_PAGE_DATA_END,
- src_frame + page_size - FIL_PAGE_DATA_END,
- FIL_PAGE_DATA_END);
- }
-
- srv_stats.pages_decrypted.inc();
-
- return true; /* page was decrypted */
-}
-
-/******************************************************************
-Decrypt a page
-@param[in] space Tablespace
-@param[in] tmp_frame Temporary buffer used for decrypting
-@param[in] page_size Page size
-@param[in,out] src_frame Page to decrypt
-@param[out] decrypted true if page was decrypted
-@return decrypted page, or original not encrypted page if decryption is
-not needed.*/
-UNIV_INTERN
-byte*
-fil_space_decrypt(
- const fil_space_t* space,
- byte* tmp_frame,
- byte* src_frame,
- bool* decrypted)
-{
- dberr_t err = DB_SUCCESS;
- byte* res = NULL;
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- ulint size = zip_size ? zip_size : UNIV_PAGE_SIZE;
- *decrypted = false;
-
- ut_ad(space->crypt_data != NULL && space->crypt_data->is_encrypted());
- ut_ad(space->n_pending_ios > 0);
-
- bool encrypted = fil_space_decrypt(
- space->crypt_data,
- tmp_frame,
- size,
- src_frame,
- &err);
-
- if (err == DB_SUCCESS) {
- if (encrypted) {
- *decrypted = true;
- /* Copy the decrypted page back to page buffer, not
- really any other options. */
- memcpy(src_frame, tmp_frame, size);
- }
-
- res = src_frame;
- }
-
- return res;
-}
-
-/******************************************************************
-Calculate post encryption checksum
-@param[in] zip_size zip_size or 0
-@param[in] dst_frame Block where checksum is calculated
-@return page checksum
-not needed. */
-UNIV_INTERN
-ulint
-fil_crypt_calculate_checksum(
- ulint zip_size,
- const byte* dst_frame)
-{
- ib_uint32_t checksum = 0;
-
- /* For encrypted tables we use only crc32 and strict_crc32 */
- if (zip_size == 0) {
- checksum = buf_calc_page_crc32(dst_frame);
- } else {
- checksum = page_zip_calc_checksum(dst_frame, zip_size,
- SRV_CHECKSUM_ALGORITHM_CRC32);
- }
-
- return checksum;
-}
-
-/*********************************************************************
-Verify that post encryption checksum match calculated checksum.
-This function should be called only if tablespace contains crypt_data
-metadata (this is strong indication that tablespace is encrypted).
-Function also verifies that traditional checksum does not match
-calculated checksum as if it does page could be valid unencrypted,
-encrypted, or corrupted.
-
-@param[in] page Page to verify
-@param[in] zip_size zip size
-@param[in] space Tablespace
-@param[in] pageno Page no
-@return true if page is encrypted AND OK, false otherwise */
-UNIV_INTERN
-bool
-fil_space_verify_crypt_checksum(
- byte* page,
- ulint zip_size,
- const fil_space_t* space,
- ulint pageno)
-{
- uint key_version = mach_read_from_4(page+ FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
- /* If page is not encrypted, return false */
- if (key_version == 0) {
- return(false);
- }
-
- /* Read stored post encryption checksum. */
- ib_uint32_t checksum = mach_read_from_4(
- page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4);
-
- /* Declare empty pages non-corrupted */
- if (checksum == 0
- && *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_LSN) == 0
- && buf_page_is_zeroes(page, zip_size)) {
- return(true);
- }
-
- /* Compressed and encrypted pages do not have checksum. Assume not
- corrupted. Page verification happens after decompression in
- buf_page_io_complete() using buf_page_is_corrupted(). */
- if (mach_read_from_2(page+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
- return (true);
- }
-
- ib_uint32_t cchecksum1 = 0;
- ib_uint32_t cchecksum2 = 0;
-
- /* Calculate checksums */
- if (zip_size) {
- cchecksum1 = page_zip_calc_checksum(
- page, zip_size, SRV_CHECKSUM_ALGORITHM_CRC32);
-
- if(cchecksum1 != checksum) {
- cchecksum2 = page_zip_calc_checksum(
- page, zip_size,
- SRV_CHECKSUM_ALGORITHM_INNODB);
- }
- } else {
- cchecksum1 = buf_calc_page_crc32(page);
-
- if (cchecksum1 != checksum) {
- cchecksum2 = (ib_uint32_t) buf_calc_page_new_checksum(
- page);
- }
- }
-
- /* If stored checksum matches one of the calculated checksums
- page is not corrupted. */
-
- bool encrypted = (checksum == cchecksum1 || checksum == cchecksum2
- || checksum == BUF_NO_CHECKSUM_MAGIC);
-
- /* MySQL 5.6 and MariaDB 10.0 and 10.1 will write an LSN to the
- first page of each system tablespace file at
- FIL_PAGE_FILE_FLUSH_LSN offset. On other pages and in other files,
- the field might have been uninitialized until MySQL 5.5. In MySQL 5.7
- (and MariaDB Server 10.2.2) WL#7990 stopped writing the field for other
- than page 0 of the system tablespace.
-
- Starting from MariaDB 10.1 the field has been repurposed for
- encryption key_version.
-
- Starting with MySQL 5.7 (and MariaDB Server 10.2), the
- field has been repurposed for SPATIAL INDEX pages for
- FIL_RTREE_SPLIT_SEQ_NUM.
-
- Note that FIL_PAGE_FILE_FLUSH_LSN is not included in the InnoDB page
- checksum.
-
- Thus, FIL_PAGE_FILE_FLUSH_LSN could contain any value. While the
- field would usually be 0 for pages that are not encrypted, we cannot
- assume that a nonzero value means that the page is encrypted.
- Therefore we must validate the page both as encrypted and unencrypted
- when FIL_PAGE_FILE_FLUSH_LSN does not contain 0.
- */
-
- ulint checksum1 = mach_read_from_4(
- page + FIL_PAGE_SPACE_OR_CHKSUM);
-
- ulint checksum2 = checksum1;
-
- bool valid;
-
- if (zip_size) {
- valid = (checksum1 == cchecksum1);
- } else {
- checksum1 = mach_read_from_4(
- page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
- valid = (buf_page_is_checksum_valid_crc32(page,checksum1,checksum2)
- || buf_page_is_checksum_valid_innodb(page,checksum1, checksum2));
- }
-
- if (encrypted && valid) {
- /* If page is encrypted and traditional checksums match,
- page could be still encrypted, or not encrypted and valid or
- corrupted. */
- ib_logf(IB_LOG_LEVEL_ERROR,
- " Page %lu in space %s (%lu) maybe corrupted."
- " Post encryption checksum %u stored [%lu:%lu] key_version %u",
- pageno,
- space ? space->name : "N/A",
- mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID),
- checksum, checksum1, checksum2, key_version);
- encrypted = false;
- }
-
- return(encrypted);
-}
-
-/***********************************************************************/
-
-/** A copy of global key state */
-struct key_state_t {
- key_state_t() : key_id(0), key_version(0),
- rotate_key_age(srv_fil_crypt_rotate_key_age) {}
- bool operator==(const key_state_t& other) const {
- return key_version == other.key_version &&
- rotate_key_age == other.rotate_key_age;
- }
- uint key_id;
- uint key_version;
- uint rotate_key_age;
-};
-
-/***********************************************************************
-Copy global key state
-@param[in,out] new_state key state
-@param[in] crypt_data crypt data */
-static void
-fil_crypt_get_key_state(
- key_state_t* new_state,
- fil_space_crypt_t* crypt_data)
-{
- if (srv_encrypt_tables) {
- new_state->key_version = crypt_data->key_get_latest_version();
- new_state->rotate_key_age = srv_fil_crypt_rotate_key_age;
-
- ut_a(new_state->key_version != ENCRYPTION_KEY_NOT_ENCRYPTED);
- } else {
- new_state->key_version = 0;
- new_state->rotate_key_age = 0;
- }
-}
-
-/***********************************************************************
-Check if a key needs rotation given a key_state
-@param[in] encrypt_mode Encryption mode
-@param[in] key_version Current key version
-@param[in] latest_key_version Latest key version
-@param[in] rotate_key_age when to rotate
-@return true if key needs rotation, false if not */
-static bool
-fil_crypt_needs_rotation(
- fil_encryption_t encrypt_mode,
- uint key_version,
- uint latest_key_version,
- uint rotate_key_age)
-{
- if (key_version == ENCRYPTION_KEY_VERSION_INVALID) {
- return false;
- }
-
- if (key_version == 0 && latest_key_version != 0) {
- /* this is rotation unencrypted => encrypted
- * ignore rotate_key_age */
- return true;
- }
-
- if (latest_key_version == 0 && key_version != 0) {
- if (encrypt_mode == FIL_ENCRYPTION_DEFAULT) {
- /* this is rotation encrypted => unencrypted */
- return true;
- }
- return false;
- }
-
- /* this is rotation encrypted => encrypted,
- * only reencrypt if key is sufficiently old */
- if (key_version + rotate_key_age < latest_key_version) {
- return true;
- }
-
- return false;
-}
-
-/** Read page 0 and possible crypt data from there.
-@param[in,out] space Tablespace */
-static inline
-void
-fil_crypt_read_crypt_data(fil_space_t* space)
-{
- if (space->crypt_data || space->size) {
- /* The encryption metadata has already been read, or
- the tablespace is not encrypted and the file has been
- opened already. */
- return;
- }
-
- mtr_t mtr;
- mtr_start(&mtr);
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- ulint offset = fsp_header_get_crypt_offset(zip_size);
- if (buf_block_t* block = buf_page_get(space->id, zip_size, 0,
- RW_S_LATCH, &mtr)) {
- mutex_enter(&fil_system->mutex);
- if (!space->crypt_data) {
- space->crypt_data = fil_space_read_crypt_data(
- space->id, block->frame, offset);
- }
- mutex_exit(&fil_system->mutex);
- }
-
- mtr_commit(&mtr);
-}
-
-/***********************************************************************
-Start encrypting a space
-@param[in,out] space Tablespace
-@return true if a recheck is needed */
-static
-bool
-fil_crypt_start_encrypting_space(
- fil_space_t* space)
-{
- bool recheck = false;
-
- mutex_enter(&fil_crypt_threads_mutex);
-
- fil_space_crypt_t *crypt_data = space->crypt_data;
-
- /* If space is not encrypted and encryption is not enabled, then
- do not continue encrypting the space. */
- if (!crypt_data && !srv_encrypt_tables) {
- mutex_exit(&fil_crypt_threads_mutex);
- return false;
- }
-
- if (crypt_data != NULL || fil_crypt_start_converting) {
- /* someone beat us to it */
- if (fil_crypt_start_converting) {
- recheck = true;
- }
-
- mutex_exit(&fil_crypt_threads_mutex);
- return recheck;
- }
-
- /* NOTE: we need to write and flush page 0 before publishing
- * the crypt data. This so that after restart there is no
- * risk of finding encrypted pages without having
- * crypt data in page 0 */
-
- /* 1 - create crypt data */
- crypt_data = fil_space_create_crypt_data(FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
-
- if (crypt_data == NULL) {
- mutex_exit(&fil_crypt_threads_mutex);
- return false;
- }
-
- crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
- crypt_data->min_key_version = 0; // all pages are unencrypted
- crypt_data->rotate_state.start_time = time(0);
- crypt_data->rotate_state.starting = true;
- crypt_data->rotate_state.active_threads = 1;
-
- mutex_enter(&crypt_data->mutex);
- crypt_data = fil_space_set_crypt_data(space, crypt_data);
- mutex_exit(&crypt_data->mutex);
-
- fil_crypt_start_converting = true;
- mutex_exit(&fil_crypt_threads_mutex);
-
- do
- {
- mtr_t mtr;
- mtr_start(&mtr);
-
- /* 2 - get page 0 */
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- buf_block_t* block = buf_page_get_gen(space->id, zip_size, 0,
- RW_X_LATCH,
- NULL,
- BUF_GET,
- __FILE__, __LINE__,
- &mtr);
-
-
- /* 3 - write crypt data to page 0 */
- byte* frame = buf_block_get_frame(block);
- crypt_data->type = CRYPT_SCHEME_1;
- crypt_data->write_page0(frame, &mtr);
- mtr_commit(&mtr);
-
- /* record lsn of update */
- lsn_t end_lsn = mtr.end_lsn;
-
- /* 4 - sync tablespace before publishing crypt data */
-
- bool success = false;
- ulint sum_pages = 0;
-
- do {
- ulint n_pages = 0;
- success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
- sum_pages += n_pages;
- } while (!success);
-
- /* 5 - publish crypt data */
- mutex_enter(&fil_crypt_threads_mutex);
- mutex_enter(&crypt_data->mutex);
- crypt_data->type = CRYPT_SCHEME_1;
- ut_a(crypt_data->rotate_state.active_threads == 1);
- crypt_data->rotate_state.active_threads = 0;
- crypt_data->rotate_state.starting = false;
-
- fil_crypt_start_converting = false;
- mutex_exit(&crypt_data->mutex);
- mutex_exit(&fil_crypt_threads_mutex);
-
- return recheck;
- } while (0);
-
- mutex_enter(&crypt_data->mutex);
- ut_a(crypt_data->rotate_state.active_threads == 1);
- crypt_data->rotate_state.active_threads = 0;
- mutex_exit(&crypt_data->mutex);
-
- mutex_enter(&fil_crypt_threads_mutex);
- fil_crypt_start_converting = false;
- mutex_exit(&fil_crypt_threads_mutex);
-
- return recheck;
-}
-
-/** State of a rotation thread */
-struct rotate_thread_t {
- explicit rotate_thread_t(uint no) {
- memset(this, 0, sizeof(* this));
- thread_no = no;
- first = true;
- estimated_max_iops = 20;
- }
-
- uint thread_no;
- bool first; /*!< is position before first space */
- fil_space_t* space; /*!< current space or NULL */
- ulint offset; /*!< current offset */
- ulint batch; /*!< #pages to rotate */
- uint min_key_version_found;/*!< min key version found but not rotated */
- lsn_t end_lsn; /*!< max lsn when rotating this space */
-
- uint estimated_max_iops; /*!< estimation of max iops */
- uint allocated_iops; /*!< allocated iops */
- uint cnt_waited; /*!< #times waited during this slot */
- uint sum_waited_us; /*!< wait time during this slot */
-
- fil_crypt_stat_t crypt_stat; // statistics
-
- btr_scrub_t scrub_data; /* thread local data used by btr_scrub-functions
- * when iterating pages of tablespace */
-
- /** @return whether this thread should terminate */
- bool should_shutdown() const {
- switch (srv_shutdown_state) {
- case SRV_SHUTDOWN_NONE:
- return thread_no >= srv_n_fil_crypt_threads;
- case SRV_SHUTDOWN_CLEANUP:
- return true;
- case SRV_SHUTDOWN_FLUSH_PHASE:
- case SRV_SHUTDOWN_LAST_PHASE:
- case SRV_SHUTDOWN_EXIT_THREADS:
- break;
- }
- ut_ad(0);
- return true;
- }
-};
-
-/***********************************************************************
-Check if space needs rotation given a key_state
-@param[in,out] state Key rotation state
-@param[in,out] key_state Key state
-@param[in,out] recheck needs recheck ?
-@return true if space needs key rotation */
-static
-bool
-fil_crypt_space_needs_rotation(
- rotate_thread_t* state,
- key_state_t* key_state,
- bool* recheck)
-{
- fil_space_t* space = state->space;
-
- /* Make sure that tablespace is normal tablespace */
- if (space->purpose != FIL_TABLESPACE) {
- return false;
- }
-
- ut_ad(space->n_pending_ops > 0);
-
- fil_space_crypt_t *crypt_data = space->crypt_data;
-
- if (crypt_data == NULL) {
- /**
- * space has no crypt data
- * start encrypting it...
- */
- *recheck = fil_crypt_start_encrypting_space(space);
- crypt_data = space->crypt_data;
-
- if (crypt_data == NULL) {
- return false;
- }
-
- crypt_data->key_get_latest_version();
- }
-
- /* If used key_id is not found from encryption plugin we can't
- continue to rotate the tablespace */
- if (!crypt_data->is_key_found()) {
- return false;
- }
-
- mutex_enter(&crypt_data->mutex);
-
- do {
- /* prevent threads from starting to rotate space */
- if (crypt_data->rotate_state.starting) {
- /* recheck this space later */
- *recheck = true;
- break;
- }
-
- /* prevent threads from starting to rotate space */
- if (space->is_stopping()) {
- break;
- }
-
- if (crypt_data->rotate_state.flushing) {
- break;
- }
-
- /* No need to rotate space if encryption is disabled */
- if (crypt_data->not_encrypted()) {
- break;
- }
-
- if (crypt_data->key_id != key_state->key_id) {
- key_state->key_id= crypt_data->key_id;
- fil_crypt_get_key_state(key_state, crypt_data);
- }
-
- bool need_key_rotation = fil_crypt_needs_rotation(
- crypt_data->encryption,
- crypt_data->min_key_version,
- key_state->key_version, key_state->rotate_key_age);
-
- crypt_data->rotate_state.scrubbing.is_active =
- btr_scrub_start_space(space->id, &state->scrub_data);
-
- time_t diff = time(0) - crypt_data->rotate_state.scrubbing.
- last_scrub_completed;
-
- bool need_scrubbing =
- (srv_background_scrub_data_uncompressed ||
- srv_background_scrub_data_compressed) &&
- crypt_data->rotate_state.scrubbing.is_active
- && diff >= 0
- && ulint(diff) >= srv_background_scrub_data_interval;
-
- if (need_key_rotation == false && need_scrubbing == false) {
- break;
- }
-
- mutex_exit(&crypt_data->mutex);
-
- return true;
- } while (0);
-
- mutex_exit(&crypt_data->mutex);
-
-
- return false;
-}
-
-/***********************************************************************
-Update global statistics with thread statistics
-@param[in,out] state key rotation statistics */
-static void
-fil_crypt_update_total_stat(
- rotate_thread_t *state)
-{
- mutex_enter(&crypt_stat_mutex);
- crypt_stat.pages_read_from_cache +=
- state->crypt_stat.pages_read_from_cache;
- crypt_stat.pages_read_from_disk +=
- state->crypt_stat.pages_read_from_disk;
- crypt_stat.pages_modified += state->crypt_stat.pages_modified;
- crypt_stat.pages_flushed += state->crypt_stat.pages_flushed;
- // remote old estimate
- crypt_stat.estimated_iops -= state->crypt_stat.estimated_iops;
- // add new estimate
- crypt_stat.estimated_iops += state->estimated_max_iops;
- mutex_exit(&crypt_stat_mutex);
-
- // make new estimate "current" estimate
- memset(&state->crypt_stat, 0, sizeof(state->crypt_stat));
- // record our old (current) estimate
- state->crypt_stat.estimated_iops = state->estimated_max_iops;
-}
-
-/***********************************************************************
-Allocate iops to thread from global setting,
-used before starting to rotate a space.
-@param[in,out] state Rotation state
-@return true if allocation succeeded, false if failed */
-static
-bool
-fil_crypt_alloc_iops(
- rotate_thread_t *state)
-{
- ut_ad(state->allocated_iops == 0);
-
- /* We have not yet selected the space to rotate, thus
- state might not contain space and we can't check
- its status yet. */
-
- uint max_iops = state->estimated_max_iops;
- mutex_enter(&fil_crypt_threads_mutex);
-
- if (n_fil_crypt_iops_allocated >= srv_n_fil_crypt_iops) {
- /* this can happen when user decreases srv_fil_crypt_iops */
- mutex_exit(&fil_crypt_threads_mutex);
- return false;
- }
-
- uint alloc = srv_n_fil_crypt_iops - n_fil_crypt_iops_allocated;
-
- if (alloc > max_iops) {
- alloc = max_iops;
- }
-
- n_fil_crypt_iops_allocated += alloc;
- mutex_exit(&fil_crypt_threads_mutex);
-
- state->allocated_iops = alloc;
-
- return alloc > 0;
-}
-
-/***********************************************************************
-Reallocate iops to thread,
-used when inside a space
-@param[in,out] state Rotation state */
-static
-void
-fil_crypt_realloc_iops(
- rotate_thread_t *state)
-{
- ut_a(state->allocated_iops > 0);
-
- if (10 * state->cnt_waited > state->batch) {
- /* if we waited more than 10% re-estimate max_iops */
- ulint avg_wait_time_us =
- state->sum_waited_us / state->cnt_waited;
-
- if (avg_wait_time_us == 0) {
- avg_wait_time_us = 1; // prevent division by zero
- }
-
- DBUG_PRINT("ib_crypt",
- ("thr_no: %u - update estimated_max_iops from %u to "
- ULINTPF ".",
- state->thread_no,
- state->estimated_max_iops,
- 1000000 / avg_wait_time_us));
-
- state->estimated_max_iops = uint(1000000 / avg_wait_time_us);
- state->cnt_waited = 0;
- state->sum_waited_us = 0;
- } else {
-
- DBUG_PRINT("ib_crypt",
- ("thr_no: %u only waited %lu%% skip re-estimate.",
- state->thread_no,
- (100 * state->cnt_waited) / state->batch));
- }
-
- if (state->estimated_max_iops <= state->allocated_iops) {
- /* return extra iops */
- uint extra = state->allocated_iops - state->estimated_max_iops;
-
- if (extra > 0) {
- mutex_enter(&fil_crypt_threads_mutex);
- if (n_fil_crypt_iops_allocated < extra) {
- /* unknown bug!
- * crash in debug
- * keep n_fil_crypt_iops_allocated unchanged
- * in release */
- ut_ad(0);
- extra = 0;
- }
- n_fil_crypt_iops_allocated -= extra;
- state->allocated_iops -= extra;
-
- if (state->allocated_iops == 0) {
- /* no matter how slow io system seems to be
- * never decrease allocated_iops to 0... */
- state->allocated_iops ++;
- n_fil_crypt_iops_allocated ++;
- }
-
- os_event_set(fil_crypt_threads_event);
- mutex_exit(&fil_crypt_threads_mutex);
- }
- } else {
- /* see if there are more to get */
- mutex_enter(&fil_crypt_threads_mutex);
- if (n_fil_crypt_iops_allocated < srv_n_fil_crypt_iops) {
- /* there are extra iops free */
- uint extra = srv_n_fil_crypt_iops -
- n_fil_crypt_iops_allocated;
- if (state->allocated_iops + extra >
- state->estimated_max_iops) {
- /* but don't alloc more than our max */
- extra = state->estimated_max_iops -
- state->allocated_iops;
- }
- n_fil_crypt_iops_allocated += extra;
- state->allocated_iops += extra;
-
- DBUG_PRINT("ib_crypt",
- ("thr_no: %u increased iops from %u to %u.",
- state->thread_no,
- state->allocated_iops - extra,
- state->allocated_iops));
-
- }
- mutex_exit(&fil_crypt_threads_mutex);
- }
-
- fil_crypt_update_total_stat(state);
-}
-
-/***********************************************************************
-Return allocated iops to global
-@param[in,out] state Rotation state */
-static
-void
-fil_crypt_return_iops(
- rotate_thread_t *state)
-{
- if (state->allocated_iops > 0) {
- uint iops = state->allocated_iops;
- mutex_enter(&fil_crypt_threads_mutex);
- if (n_fil_crypt_iops_allocated < iops) {
- /* unknown bug!
- * crash in debug
- * keep n_fil_crypt_iops_allocated unchanged
- * in release */
- ut_ad(0);
- iops = 0;
- }
-
- n_fil_crypt_iops_allocated -= iops;
- state->allocated_iops = 0;
- os_event_set(fil_crypt_threads_event);
- mutex_exit(&fil_crypt_threads_mutex);
- }
-
- fil_crypt_update_total_stat(state);
-}
-
-/***********************************************************************
-Search for a space needing rotation
-@param[in,out] key_state Key state
-@param[in,out] state Rotation state
-@param[in,out] recheck recheck ? */
-static
-bool
-fil_crypt_find_space_to_rotate(
- key_state_t* key_state,
- rotate_thread_t* state,
- bool* recheck)
-{
- /* we need iops to start rotating */
- while (!state->should_shutdown() && !fil_crypt_alloc_iops(state)) {
- os_event_reset(fil_crypt_threads_event);
- os_event_wait_time(fil_crypt_threads_event, 1000000);
- }
-
- if (state->should_shutdown()) {
- if (state->space) {
- fil_space_release(state->space);
- state->space = NULL;
- }
- return false;
- }
-
- if (state->first) {
- state->first = false;
- if (state->space) {
- fil_space_release(state->space);
- }
- state->space = NULL;
- }
-
- /* If key rotation is enabled (default) we iterate all tablespaces.
- If key rotation is not enabled we iterate only the tablespaces
- added to keyrotation list. */
- if (srv_fil_crypt_rotate_key_age) {
- state->space = fil_space_next(state->space);
- } else {
- state->space = fil_space_keyrotate_next(state->space);
- }
-
- while (!state->should_shutdown() && state->space) {
- fil_crypt_read_crypt_data(state->space);
-
- if (fil_crypt_space_needs_rotation(state, key_state, recheck)) {
- ut_ad(key_state->key_id);
- /* init state->min_key_version_found before
- * starting on a space */
- state->min_key_version_found = key_state->key_version;
- return true;
- }
-
- if (srv_fil_crypt_rotate_key_age) {
- state->space = fil_space_next(state->space);
- } else {
- state->space = fil_space_keyrotate_next(state->space);
- }
- }
-
- /* if we didn't find any space return iops */
- fil_crypt_return_iops(state);
-
- return false;
-
-}
-
-/***********************************************************************
-Start rotating a space
-@param[in] key_state Key state
-@param[in,out] state Rotation state */
-static
-void
-fil_crypt_start_rotate_space(
- const key_state_t* key_state,
- rotate_thread_t* state)
-{
- fil_space_crypt_t *crypt_data = state->space->crypt_data;
-
- ut_ad(crypt_data);
- mutex_enter(&crypt_data->mutex);
- ut_ad(key_state->key_id == crypt_data->key_id);
-
- if (crypt_data->rotate_state.active_threads == 0) {
- /* only first thread needs to init */
- crypt_data->rotate_state.next_offset = 1; // skip page 0
- /* no need to rotate beyond current max
- * if space extends, it will be encrypted with newer version */
- /* FIXME: max_offset could be removed and instead
- space->size consulted.*/
- crypt_data->rotate_state.max_offset = state->space->size;
- crypt_data->rotate_state.end_lsn = 0;
- crypt_data->rotate_state.min_key_version_found =
- key_state->key_version;
-
- crypt_data->rotate_state.start_time = time(0);
-
- if (crypt_data->type == CRYPT_SCHEME_UNENCRYPTED &&
- crypt_data->is_encrypted() &&
- key_state->key_version != 0) {
- /* this is rotation unencrypted => encrypted */
- crypt_data->type = CRYPT_SCHEME_1;
- }
- }
-
- /* count active threads in space */
- crypt_data->rotate_state.active_threads++;
-
- /* Initialize thread local state */
- state->end_lsn = crypt_data->rotate_state.end_lsn;
- state->min_key_version_found =
- crypt_data->rotate_state.min_key_version_found;
-
- mutex_exit(&crypt_data->mutex);
-}
-
-/***********************************************************************
-Search for batch of pages needing rotation
-@param[in] key_state Key state
-@param[in,out] state Rotation state
-@return true if page needing key rotation found, false if not found */
-static
-bool
-fil_crypt_find_page_to_rotate(
- const key_state_t* key_state,
- rotate_thread_t* state)
-{
- ulint batch = srv_alloc_time * state->allocated_iops;
- fil_space_t* space = state->space;
-
- ut_ad(!space || space->n_pending_ops > 0);
-
- /* If space is marked to be dropped stop rotation. */
- if (!space || space->is_stopping()) {
- return false;
- }
-
- fil_space_crypt_t *crypt_data = space->crypt_data;
-
- mutex_enter(&crypt_data->mutex);
- ut_ad(key_state->key_id == crypt_data->key_id);
-
- bool found = crypt_data->rotate_state.max_offset >=
- crypt_data->rotate_state.next_offset;
-
- if (found) {
- state->offset = crypt_data->rotate_state.next_offset;
- ulint remaining = crypt_data->rotate_state.max_offset -
- crypt_data->rotate_state.next_offset;
-
- if (batch <= remaining) {
- state->batch = batch;
- } else {
- state->batch = remaining;
- }
- }
-
- crypt_data->rotate_state.next_offset += batch;
- mutex_exit(&crypt_data->mutex);
- return found;
-}
-
-/***********************************************************************
-Check if a page is uninitialized (doesn't need to be rotated)
-@param[in] frame Page to check
-@param[in] zip_size zip_size or 0
-@return true if page is uninitialized, false if not. */
-static inline
-bool
-fil_crypt_is_page_uninitialized(
- const byte *frame,
- uint zip_size)
-{
- return (buf_page_is_zeroes(frame, zip_size));
-}
-
-#define fil_crypt_get_page_throttle(state,offset,mtr,sleeptime_ms) \
- fil_crypt_get_page_throttle_func(state, offset, mtr, \
- sleeptime_ms, __FILE__, __LINE__)
-
-/***********************************************************************
-Get a page and compute sleep time
-@param[in,out] state Rotation state
-@param[in] zip_size compressed size or 0
-@param[in] offset Page offset
-@param[in,out] mtr Minitransaction
-@param[out] sleeptime_ms Sleep time
-@param[in] file File where called
-@param[in] line Line where called
-@return page or NULL*/
-static
-buf_block_t*
-fil_crypt_get_page_throttle_func(
- rotate_thread_t* state,
- ulint offset,
- mtr_t* mtr,
- ulint* sleeptime_ms,
- const char* file,
- ulint line)
-{
- fil_space_t* space = state->space;
- ulint zip_size = fsp_flags_get_zip_size(space->flags);
- ut_ad(space->n_pending_ops > 0);
-
- buf_block_t* block = buf_page_try_get_func(space->id, offset, RW_X_LATCH,
- true,
- file, line, mtr);
- if (block != NULL) {
- /* page was in buffer pool */
- state->crypt_stat.pages_read_from_cache++;
- return block;
- }
-
- /* Before reading from tablespace we need to make sure that
- tablespace exists and is not is just being dropped. */
- if (space->is_stopping()) {
- return NULL;
- }
-
- state->crypt_stat.pages_read_from_disk++;
-
- ullint start = ut_time_us(NULL);
- block = buf_page_get_gen(space->id, zip_size, offset,
- RW_X_LATCH,
- NULL, BUF_GET_POSSIBLY_FREED,
- file, line, mtr);
- ullint end = ut_time_us(NULL);
-
- if (end < start) {
- end = start; // safety...
- }
-
- state->cnt_waited++;
- state->sum_waited_us += (end - start);
-
- /* average page load */
- ulint add_sleeptime_ms = 0;
- ulint avg_wait_time_us = state->sum_waited_us / state->cnt_waited;
- ulint alloc_wait_us = 1000000 / state->allocated_iops;
-
- if (avg_wait_time_us < alloc_wait_us) {
- /* we reading faster than we allocated */
- add_sleeptime_ms = (alloc_wait_us - avg_wait_time_us) / 1000;
- } else {
- /* if page load time is longer than we want, skip sleeping */
- }
-
- *sleeptime_ms += add_sleeptime_ms;
-
- return block;
-}
-
-
-/***********************************************************************
-Get block and allocation status
-
-note: innodb locks fil_space_latch and then block when allocating page
-but locks block and then fil_space_latch when freeing page.
-
-@param[in,out] state Rotation state
-@param[in] zip_size Compressed size or 0
-@param[in] offset Page offset
-@param[in,out] mtr Minitransaction
-@param[out] allocation_status Allocation status
-@param[out] sleeptime_ms Sleep time
-@return block or NULL
-*/
-static
-buf_block_t*
-btr_scrub_get_block_and_allocation_status(
- rotate_thread_t* state,
- uint zip_size,
- ulint offset,
- mtr_t* mtr,
- btr_scrub_page_allocation_status_t *allocation_status,
- ulint* sleeptime_ms)
-{
- mtr_t local_mtr;
- buf_block_t *block = NULL;
- fil_space_t* space = state->space;
-
- ut_ad(space->n_pending_ops > 0);
- ut_ad(zip_size == fsp_flags_get_zip_size(space->flags));
-
- mtr_start(&local_mtr);
-
- *allocation_status = fsp_page_is_free(space->id, offset, &local_mtr) ?
- BTR_SCRUB_PAGE_FREE :
- BTR_SCRUB_PAGE_ALLOCATED;
-
- if (*allocation_status == BTR_SCRUB_PAGE_FREE) {
- /* this is easy case, we lock fil_space_latch first and
- then block */
- block = fil_crypt_get_page_throttle(state,
- offset, mtr,
- sleeptime_ms);
- mtr_commit(&local_mtr);
- } else {
- /* page is allocated according to xdes */
-
- /* release fil_space_latch *before* fetching block */
- mtr_commit(&local_mtr);
-
- /* NOTE: when we have locked dict_index_get_lock(),
- * it's safe to release fil_space_latch and then fetch block
- * as dict_index_get_lock() is needed to make tree modifications
- * such as free-ing a page
- */
-
- block = fil_crypt_get_page_throttle(state,
- offset, mtr,
- sleeptime_ms);
- }
-
- return block;
-}
-
-
-/***********************************************************************
-Rotate one page
-@param[in,out] key_state Key state
-@param[in,out] state Rotation state */
-static
-void
-fil_crypt_rotate_page(
- const key_state_t* key_state,
- rotate_thread_t* state)
-{
- fil_space_t*space = state->space;
- ulint space_id = space->id;
- ulint offset = state->offset;
- const uint zip_size = fsp_flags_get_zip_size(space->flags);
- ulint sleeptime_ms = 0;
- fil_space_crypt_t *crypt_data = space->crypt_data;
-
- ut_ad(space->n_pending_ops > 0);
-
- /* In fil_crypt_thread where key rotation is done we have
- acquired space and checked that this space is not yet
- marked to be dropped. Similarly, in fil_crypt_find_page_to_rotate().
- Check here also to give DROP TABLE or similar a change. */
- if (space->is_stopping()) {
- return;
- }
-
- if (space_id == TRX_SYS_SPACE && offset == TRX_SYS_PAGE_NO) {
- /* don't encrypt this as it contains address to dblwr buffer */
- return;
- }
-
- mtr_t mtr;
- mtr_start(&mtr);
- buf_block_t* block = fil_crypt_get_page_throttle(state,
- offset, &mtr,
- &sleeptime_ms);
-
- if (block) {
-
- bool modified = false;
- int needs_scrubbing = BTR_SCRUB_SKIP_PAGE;
- lsn_t block_lsn = block->page.newest_modification;
- byte* frame = buf_block_get_frame(block);
- uint kv = mach_read_from_4(frame+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
-
- /* check if tablespace is closing after reading page */
- if (!space->is_stopping()) {
-
- if (kv == 0 &&
- fil_crypt_is_page_uninitialized(frame, zip_size)) {
- ;
- } else if (fil_crypt_needs_rotation(
- crypt_data->encryption,
- kv, key_state->key_version,
- key_state->rotate_key_age)) {
-
- modified = true;
-
- /* force rotation by dummy updating page */
- mlog_write_ulint(frame +
- FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- space_id, MLOG_4BYTES, &mtr);
-
- /* statistics */
- state->crypt_stat.pages_modified++;
- } else {
- if (crypt_data->is_encrypted()) {
- if (kv < state->min_key_version_found) {
- state->min_key_version_found = kv;
- }
- }
- }
-
- needs_scrubbing = btr_page_needs_scrubbing(
- &state->scrub_data, block,
- BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN);
- }
-
- mtr_commit(&mtr);
- lsn_t end_lsn = mtr.end_lsn;
-
- if (needs_scrubbing == BTR_SCRUB_PAGE) {
- mtr_start(&mtr);
- /*
- * refetch page and allocation status
- */
- btr_scrub_page_allocation_status_t allocated;
- block = btr_scrub_get_block_and_allocation_status(
- state, zip_size, offset, &mtr,
- &allocated,
- &sleeptime_ms);
-
- if (block) {
-
- /* get required table/index and index-locks */
- needs_scrubbing = btr_scrub_recheck_page(
- &state->scrub_data, block, allocated, &mtr);
-
- if (needs_scrubbing == BTR_SCRUB_PAGE) {
- /* we need to refetch it once more now that we have
- * index locked */
- block = btr_scrub_get_block_and_allocation_status(
- state, zip_size, offset, &mtr,
- &allocated,
- &sleeptime_ms);
-
- needs_scrubbing = btr_scrub_page(&state->scrub_data,
- block, allocated,
- &mtr);
- }
-
- /* NOTE: mtr is committed inside btr_scrub_recheck_page()
- * and/or btr_scrub_page. This is to make sure that
- * locks & pages are latched in corrected order,
- * the mtr is in some circumstances restarted.
- * (mtr_commit() + mtr_start())
- */
- }
- }
-
- if (needs_scrubbing != BTR_SCRUB_PAGE) {
- /* if page didn't need scrubbing it might be that cleanups
- are needed. do those outside of any mtr to prevent deadlocks.
-
- the information what kinds of cleanups that are needed are
- encoded inside the needs_scrubbing, but this is opaque to
- this function (except the value BTR_SCRUB_PAGE) */
- btr_scrub_skip_page(&state->scrub_data, needs_scrubbing);
- }
-
- if (needs_scrubbing == BTR_SCRUB_TURNED_OFF) {
- /* if we just detected that scrubbing was turned off
- * update global state to reflect this */
- ut_ad(crypt_data);
- mutex_enter(&crypt_data->mutex);
- crypt_data->rotate_state.scrubbing.is_active = false;
- mutex_exit(&crypt_data->mutex);
- }
-
- if (modified) {
- /* if we modified page, we take lsn from mtr */
- ut_a(end_lsn > state->end_lsn);
- ut_a(end_lsn > block_lsn);
- state->end_lsn = end_lsn;
- } else {
- /* if we did not modify page, check for max lsn */
- if (block_lsn > state->end_lsn) {
- state->end_lsn = block_lsn;
- }
- }
- } else {
- /* If block read failed mtr memo and log should be empty. */
- ut_ad(dyn_array_get_data_size(&mtr.memo) == 0);
- ut_ad(dyn_array_get_data_size(&mtr.log) == 0);
- mtr_commit(&mtr);
- }
-
- if (sleeptime_ms) {
- os_event_reset(fil_crypt_throttle_sleep_event);
- os_event_wait_time(fil_crypt_throttle_sleep_event,
- 1000 * sleeptime_ms);
- }
-}
-
-/***********************************************************************
-Rotate a batch of pages
-@param[in,out] key_state Key state
-@param[in,out] state Rotation state */
-static
-void
-fil_crypt_rotate_pages(
- const key_state_t* key_state,
- rotate_thread_t* state)
-{
- ulint space = state->space->id;
- ulint end = state->offset + state->batch;
-
- ut_ad(state->space->n_pending_ops > 0);
-
- for (; state->offset < end; state->offset++) {
-
- /* we can't rotate pages in dblwr buffer as
- * it's not possible to read those due to lots of asserts
- * in buffer pool.
- *
- * However since these are only (short-lived) copies of
- * real pages, they will be updated anyway when the
- * real page is updated
- */
- if (space == TRX_SYS_SPACE &&
- buf_dblwr_page_inside(state->offset)) {
- continue;
- }
-
- fil_crypt_rotate_page(key_state, state);
- }
-}
-
-/***********************************************************************
-Flush rotated pages and then update page 0
-
-@param[in,out] state rotation state */
-static
-void
-fil_crypt_flush_space(
- rotate_thread_t* state)
-{
- fil_space_t* space = state->space;
- fil_space_crypt_t *crypt_data = space->crypt_data;
-
- ut_ad(space->n_pending_ops > 0);
-
- /* flush tablespace pages so that there are no pages left with old key */
- lsn_t end_lsn = crypt_data->rotate_state.end_lsn;
-
- if (end_lsn > 0 && !space->is_stopping()) {
- bool success = false;
- ulint n_pages = 0;
- ulint sum_pages = 0;
- ullint start = ut_time_us(NULL);
-
- do {
- success = buf_flush_list(ULINT_MAX, end_lsn, &n_pages);
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
- sum_pages += n_pages;
- } while (!success && !space->is_stopping());
-
- ullint end = ut_time_us(NULL);
-
- if (sum_pages && end > start) {
- state->cnt_waited += sum_pages;
- state->sum_waited_us += (end - start);
-
- /* statistics */
- state->crypt_stat.pages_flushed += sum_pages;
- }
- }
-
- if (crypt_data->min_key_version == 0) {
- crypt_data->type = CRYPT_SCHEME_UNENCRYPTED;
- }
-
- /* update page 0 */
- mtr_t mtr;
- mtr_start(&mtr);
-
- const uint zip_size = fsp_flags_get_zip_size(state->space->flags);
-
- buf_block_t* block = buf_page_get_gen(space->id, zip_size, 0,
- RW_X_LATCH, NULL, BUF_GET,
- __FILE__, __LINE__, &mtr);
- byte* frame = buf_block_get_frame(block);
-
- crypt_data->write_page0(frame, &mtr);
-
- mtr_commit(&mtr);
-}
-
-/***********************************************************************
-Complete rotating a space
-@param[in,out] key_state Key state
-@param[in,out] state Rotation state */
-static
-void
-fil_crypt_complete_rotate_space(
- const key_state_t* key_state,
- rotate_thread_t* state)
-{
- fil_space_crypt_t *crypt_data = state->space->crypt_data;
-
- ut_ad(crypt_data);
- ut_ad(state->space->n_pending_ops > 0);
-
- /* Space might already be dropped */
- if (!state->space->is_stopping()) {
- mutex_enter(&crypt_data->mutex);
-
- /**
- * Update crypt data state with state from thread
- */
- if (state->min_key_version_found <
- crypt_data->rotate_state.min_key_version_found) {
- crypt_data->rotate_state.min_key_version_found =
- state->min_key_version_found;
- }
-
- if (state->end_lsn > crypt_data->rotate_state.end_lsn) {
- crypt_data->rotate_state.end_lsn = state->end_lsn;
- }
-
- ut_a(crypt_data->rotate_state.active_threads > 0);
- crypt_data->rotate_state.active_threads--;
- bool last = crypt_data->rotate_state.active_threads == 0;
-
- /**
- * check if space is fully done
- * this as when threads shutdown, it could be that we "complete"
- * iterating before we have scanned the full space.
- */
- bool done = crypt_data->rotate_state.next_offset >=
- crypt_data->rotate_state.max_offset;
-
- /**
- * we should flush space if we're last thread AND
- * the iteration is done
- */
- bool should_flush = last && done;
-
- if (should_flush) {
- /* we're the last active thread */
- crypt_data->rotate_state.flushing = true;
- crypt_data->min_key_version =
- crypt_data->rotate_state.min_key_version_found;
- }
-
- /* inform scrubbing */
- crypt_data->rotate_state.scrubbing.is_active = false;
- mutex_exit(&crypt_data->mutex);
-
- /* all threads must call btr_scrub_complete_space wo/ mutex held */
- if (btr_scrub_complete_space(&state->scrub_data) == true) {
- if (should_flush) {
- /* only last thread updates last_scrub_completed */
- ut_ad(crypt_data);
- mutex_enter(&crypt_data->mutex);
- crypt_data->rotate_state.scrubbing.
- last_scrub_completed = time(0);
- mutex_exit(&crypt_data->mutex);
- }
- }
-
- if (should_flush) {
- fil_crypt_flush_space(state);
-
- mutex_enter(&crypt_data->mutex);
- crypt_data->rotate_state.flushing = false;
- mutex_exit(&crypt_data->mutex);
- }
- } else {
- mutex_enter(&crypt_data->mutex);
- ut_a(crypt_data->rotate_state.active_threads > 0);
- crypt_data->rotate_state.active_threads--;
- mutex_exit(&crypt_data->mutex);
- }
-}
-
-/*********************************************************************//**
-A thread which monitors global key state and rotates tablespaces accordingly
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(fil_crypt_thread)(
-/*=============================*/
- void* arg __attribute__((unused))) /*!< in: a dummy parameter required
- * by os_thread_create */
-{
- UT_NOT_USED(arg);
-
- mutex_enter(&fil_crypt_threads_mutex);
- uint thread_no = srv_n_fil_crypt_threads_started;
- srv_n_fil_crypt_threads_started++;
- os_event_set(fil_crypt_event); /* signal that we started */
- mutex_exit(&fil_crypt_threads_mutex);
-
- /* state of this thread */
- rotate_thread_t thr(thread_no);
-
- /* if we find a space that is starting, skip over it and recheck it later */
- bool recheck = false;
-
- while (!thr.should_shutdown()) {
-
- key_state_t new_state;
-
- time_t wait_start = time(0);
-
- while (!thr.should_shutdown()) {
-
- /* wait for key state changes
- * i.e either new key version of change or
- * new rotate_key_age */
- os_event_reset(fil_crypt_threads_event);
-
- if (os_event_wait_time(fil_crypt_threads_event, 1000000) == 0) {
- break;
- }
-
- if (recheck) {
- /* check recheck here, after sleep, so
- * that we don't busy loop while when one thread is starting
- * a space*/
- break;
- }
-
- time_t waited = time(0) - wait_start;
-
- /* Break if we have waited the background scrub
- internal and background scrubbing is enabled */
- if (waited >= 0
- && ulint(waited) >= srv_background_scrub_data_check_interval
- && (srv_background_scrub_data_uncompressed
- || srv_background_scrub_data_compressed)) {
- break;
- }
- }
-
- recheck = false;
- thr.first = true; // restart from first tablespace
-
- /* iterate all spaces searching for those needing rotation */
- while (!thr.should_shutdown() &&
- fil_crypt_find_space_to_rotate(&new_state, &thr, &recheck)) {
-
- /* we found a space to rotate */
- fil_crypt_start_rotate_space(&new_state, &thr);
-
- /* iterate all pages (cooperativly with other threads) */
- while (!thr.should_shutdown() &&
- fil_crypt_find_page_to_rotate(&new_state, &thr)) {
-
- if (!thr.space->is_stopping()) {
- /* rotate a (set) of pages */
- fil_crypt_rotate_pages(&new_state, &thr);
- }
-
- /* If space is marked as stopping, release
- space and stop rotation. */
- if (thr.space->is_stopping()) {
- fil_crypt_complete_rotate_space(
- &new_state, &thr);
- fil_space_release(thr.space);
- thr.space = NULL;
- break;
- }
-
- /* realloc iops */
- fil_crypt_realloc_iops(&thr);
- }
-
- /* complete rotation */
- if (thr.space) {
- fil_crypt_complete_rotate_space(&new_state, &thr);
- }
-
- /* force key state refresh */
- new_state.key_id = 0;
-
- /* return iops */
- fil_crypt_return_iops(&thr);
- }
- }
-
- /* return iops if shutting down */
- fil_crypt_return_iops(&thr);
-
- /* release current space if shutting down */
- if (thr.space) {
- fil_space_release(thr.space);
- thr.space = NULL;
- }
-
- mutex_enter(&fil_crypt_threads_mutex);
- srv_n_fil_crypt_threads_started--;
- os_event_set(fil_crypt_event); /* signal that we stopped */
- mutex_exit(&fil_crypt_threads_mutex);
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*********************************************************************
-Adjust thread count for key rotation
-@param[in] enw_cnt Number of threads to be used */
-UNIV_INTERN
-void
-fil_crypt_set_thread_cnt(
- const uint new_cnt)
-{
- if (!fil_crypt_threads_inited) {
- fil_crypt_threads_init();
- }
-
- mutex_enter(&fil_crypt_threads_mutex);
-
- if (new_cnt > srv_n_fil_crypt_threads) {
- uint add = new_cnt - srv_n_fil_crypt_threads;
- srv_n_fil_crypt_threads = new_cnt;
- for (uint i = 0; i < add; i++) {
- os_thread_id_t rotation_thread_id;
- os_thread_create(fil_crypt_thread, NULL, &rotation_thread_id);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Creating #%d thread id %lu total threads %u.",
- i+1, os_thread_pf(rotation_thread_id), new_cnt);
- }
- } else if (new_cnt < srv_n_fil_crypt_threads) {
- srv_n_fil_crypt_threads = new_cnt;
- os_event_set(fil_crypt_threads_event);
- }
-
- mutex_exit(&fil_crypt_threads_mutex);
-
- while(srv_n_fil_crypt_threads_started != srv_n_fil_crypt_threads) {
- os_event_reset(fil_crypt_event);
- os_event_wait_time(fil_crypt_event, 1000000);
- }
-}
-
-/*********************************************************************
-Adjust max key age
-@param[in] val New max key age */
-UNIV_INTERN
-void
-fil_crypt_set_rotate_key_age(
- uint val)
-{
- srv_fil_crypt_rotate_key_age = val;
- os_event_set(fil_crypt_threads_event);
-}
-
-/*********************************************************************
-Adjust rotation iops
-@param[in] val New max roation iops */
-UNIV_INTERN
-void
-fil_crypt_set_rotation_iops(
- uint val)
-{
- srv_n_fil_crypt_iops = val;
- os_event_set(fil_crypt_threads_event);
-}
-
-/*********************************************************************
-Adjust encrypt tables
-@param[in] val New setting for innodb-encrypt-tables */
-UNIV_INTERN
-void
-fil_crypt_set_encrypt_tables(
- uint val)
-{
- srv_encrypt_tables = val;
- os_event_set(fil_crypt_threads_event);
-}
-
-/*********************************************************************
-Init threads for key rotation */
-UNIV_INTERN
-void
-fil_crypt_threads_init()
-{
- ut_ad(mutex_own(&fil_system->mutex));
- if (!fil_crypt_threads_inited) {
- fil_crypt_event = os_event_create();
- fil_crypt_threads_event = os_event_create();
- mutex_create(fil_crypt_threads_mutex_key,
- &fil_crypt_threads_mutex, SYNC_NO_ORDER_CHECK);
-
- uint cnt = srv_n_fil_crypt_threads;
- srv_n_fil_crypt_threads = 0;
- fil_crypt_threads_inited = true;
- fil_crypt_set_thread_cnt(cnt);
- }
-}
-
-/*********************************************************************
-Clean up key rotation threads resources */
-UNIV_INTERN
-void
-fil_crypt_threads_cleanup()
-{
- if (!fil_crypt_threads_inited) {
- return;
- }
- ut_a(!srv_n_fil_crypt_threads_started);
- os_event_free(fil_crypt_event);
- fil_crypt_event = NULL;
- os_event_free(fil_crypt_threads_event);
- fil_crypt_threads_event = NULL;
- mutex_free(&fil_crypt_threads_mutex);
- fil_crypt_threads_inited = false;
-}
-
-/*********************************************************************
-Wait for crypt threads to stop accessing space
-@param[in] space Tablespace */
-UNIV_INTERN
-void
-fil_space_crypt_close_tablespace(
- const fil_space_t* space)
-{
- if (!srv_encrypt_tables || !space->crypt_data) {
- return;
- }
-
- mutex_enter(&fil_crypt_threads_mutex);
-
- fil_space_crypt_t* crypt_data = space->crypt_data;
-
- time_t start = time(0);
- time_t last = start;
-
- mutex_enter(&crypt_data->mutex);
- mutex_exit(&fil_crypt_threads_mutex);
-
- uint cnt = crypt_data->rotate_state.active_threads;
- bool flushing = crypt_data->rotate_state.flushing;
-
- while (cnt > 0 || flushing) {
- mutex_exit(&crypt_data->mutex);
- /* release dict mutex so that scrub threads can release their
- * table references */
- dict_mutex_exit_for_mysql();
-
- /* wakeup throttle (all) sleepers */
- os_event_set(fil_crypt_throttle_sleep_event);
-
- os_thread_sleep(20000);
- dict_mutex_enter_for_mysql();
- mutex_enter(&crypt_data->mutex);
- cnt = crypt_data->rotate_state.active_threads;
- flushing = crypt_data->rotate_state.flushing;
-
- time_t now = time(0);
-
- if (now >= last + 30) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Waited %ld seconds to drop space: %s (" ULINTPF
- ") active threads %u flushing=%d.",
- now - start, space->name, space->id, cnt, flushing);
- last = now;
- }
- }
-
- mutex_exit(&crypt_data->mutex);
-}
-
-/*********************************************************************
-Get crypt status for a space (used by information_schema)
-@param[in] space Tablespace
-@param[out] status Crypt status */
-UNIV_INTERN
-void
-fil_space_crypt_get_status(
- const fil_space_t* space,
- struct fil_space_crypt_status_t* status)
-{
- memset(status, 0, sizeof(*status));
-
- ut_ad(space->n_pending_ops > 0);
- fil_crypt_read_crypt_data(const_cast<fil_space_t*>(space));
- status->space = space->id;
-
- if (fil_space_crypt_t* crypt_data = space->crypt_data) {
- mutex_enter(&crypt_data->mutex);
- status->scheme = crypt_data->type;
- status->keyserver_requests = crypt_data->keyserver_requests;
- status->min_key_version = crypt_data->min_key_version;
- status->key_id = crypt_data->key_id;
-
- if (crypt_data->rotate_state.active_threads > 0 ||
- crypt_data->rotate_state.flushing) {
- status->rotating = true;
- status->flushing =
- crypt_data->rotate_state.flushing;
- status->rotate_next_page_number =
- crypt_data->rotate_state.next_offset;
- status->rotate_max_page_number =
- crypt_data->rotate_state.max_offset;
- }
-
- mutex_exit(&crypt_data->mutex);
-
- if (srv_encrypt_tables || crypt_data->min_key_version) {
- status->current_key_version =
- fil_crypt_get_latest_key_version(crypt_data);
- }
- }
-}
-
-/*********************************************************************
-Return crypt statistics
-@param[out] stat Crypt statistics */
-UNIV_INTERN
-void
-fil_crypt_total_stat(
- fil_crypt_stat_t *stat)
-{
- mutex_enter(&crypt_stat_mutex);
- *stat = crypt_stat;
- mutex_exit(&crypt_stat_mutex);
-}
-
-/*********************************************************************
-Get scrub status for a space (used by information_schema)
-
-@param[in] space Tablespace
-@param[out] status Scrub status */
-UNIV_INTERN
-void
-fil_space_get_scrub_status(
- const fil_space_t* space,
- struct fil_space_scrub_status_t* status)
-{
- memset(status, 0, sizeof(*status));
-
- ut_ad(space->n_pending_ops > 0);
- fil_space_crypt_t* crypt_data = space->crypt_data;
-
- status->space = space->id;
-
- if (crypt_data != NULL) {
- status->compressed = fsp_flags_get_zip_size(space->flags) > 0;
- mutex_enter(&crypt_data->mutex);
- status->last_scrub_completed =
- crypt_data->rotate_state.scrubbing.last_scrub_completed;
- if (crypt_data->rotate_state.active_threads > 0 &&
- crypt_data->rotate_state.scrubbing.is_active) {
- status->scrubbing = true;
- status->current_scrub_started =
- crypt_data->rotate_state.start_time;
- status->current_scrub_active_threads =
- crypt_data->rotate_state.active_threads;
- status->current_scrub_page_number =
- crypt_data->rotate_state.next_offset;
- status->current_scrub_max_page_number =
- crypt_data->rotate_state.max_offset;
- }
-
- mutex_exit(&crypt_data->mutex);
- }
-}
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
deleted file mode 100644
index fdd09a6034e..00000000000
--- a/storage/xtradb/fil/fil0fil.cc
+++ /dev/null
@@ -1,7725 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file fil/fil0fil.cc
-The tablespace memory cache
-
-Created 10/25/1995 Heikki Tuuri
-*******************************************************/
-
-#include "fil0fil.h"
-#include "fil0pagecompress.h"
-#include "fsp0pagecompress.h"
-#include "fil0crypt.h"
-
-#include <debug_sync.h>
-#include <my_dbug.h>
-
-#include "mem0mem.h"
-#include "hash0hash.h"
-#include "os0file.h"
-#include "mach0data.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "log0recv.h"
-#include "fsp0fsp.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "dict0dict.h"
-#include "page0page.h"
-#include "page0zip.h"
-#include "trx0sys.h"
-#include "row0mysql.h"
-#include "os0file.h"
-#ifndef UNIV_HOTBACKUP
-# include "buf0lru.h"
-# include "ibuf0ibuf.h"
-# include "sync0sync.h"
-# include "os0sync.h"
-#else /* !UNIV_HOTBACKUP */
-# include "srv0srv.h"
-static ulint srv_data_read, srv_data_written;
-#endif /* !UNIV_HOTBACKUP */
-
-#include "zlib.h"
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#include <fcntl.h>
-#endif
-#include "row0mysql.h"
-#include "trx0purge.h"
-
-MYSQL_PLUGIN_IMPORT extern my_bool lower_case_file_system;
-
-
-/*
- IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
- =============================================
-
-The tablespace cache is responsible for providing fast read/write access to
-tablespaces and logs of the database. File creation and deletion is done
-in other modules which know more of the logic of the operation, however.
-
-A tablespace consists of a chain of files. The size of the files does not
-have to be divisible by the database block size, because we may just leave
-the last incomplete block unused. When a new file is appended to the
-tablespace, the maximum size of the file is also specified. At the moment,
-we think that it is best to extend the file to its maximum size already at
-the creation of the file, because then we can avoid dynamically extending
-the file when more space is needed for the tablespace.
-
-A block's position in the tablespace is specified with a 32-bit unsigned
-integer. The files in the chain are thought to be catenated, and the block
-corresponding to an address n is the nth block in the catenated file (where
-the first block is named the 0th block, and the incomplete block fragments
-at the end of files are not taken into account). A tablespace can be extended
-by appending a new file at the end of the chain.
-
-Our tablespace concept is similar to the one of Oracle.
-
-To acquire more speed in disk transfers, a technique called disk striping is
-sometimes used. This means that logical block addresses are divided in a
-round-robin fashion across several disks. Windows NT supports disk striping,
-so there we do not need to support it in the database. Disk striping is
-implemented in hardware in RAID disks. We conclude that it is not necessary
-to implement it in the database. Oracle 7 does not support disk striping,
-either.
-
-Another trick used at some database sites is replacing tablespace files by
-raw disks, that is, the whole physical disk drive, or a partition of it, is
-opened as a single file, and it is accessed through byte offsets calculated
-from the start of the disk or the partition. This is recommended in some
-books on database tuning to achieve more speed in i/o. Using raw disk
-certainly prevents the OS from fragmenting disk space, but it is not clear
-if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
-system + EIDE Conner disk only a negligible difference in speed when reading
-from a file, versus reading from a raw disk.
-
-To have fast access to a tablespace or a log file, we put the data structures
-to a hash table. Each tablespace and log file is given an unique 32-bit
-identifier.
-
-Some operating systems do not support many open files at the same time,
-though NT seems to tolerate at least 900 open files. Therefore, we put the
-open files in an LRU-list. If we need to open another file, we may close the
-file at the end of the LRU-list. When an i/o-operation is pending on a file,
-the file cannot be closed. We take the file nodes with pending i/o-operations
-out of the LRU-list and keep a count of pending operations. When an operation
-completes, we decrement the count and return the file node to the LRU-list if
-the count drops to zero. */
-
-/** When mysqld is run, the default directory "." is the mysqld datadir,
-but in the MySQL Embedded Server Library and mysqlbackup it is not the default
-directory, and we must set the base file path explicitly */
-UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
-
-/** The number of fsyncs done to the log */
-UNIV_INTERN ulint fil_n_log_flushes = 0;
-
-/** Number of pending redo log flushes */
-UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
-/** Number of pending tablespace flushes */
-UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
-
-/** Number of files currently open */
-UNIV_INTERN ulint fil_n_file_opened = 0;
-
-/** The null file address */
-UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register fil_system_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register file space latch with performance schema */
-UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-/** The tablespace memory cache. This variable is NULL before the module is
-initialized. */
-UNIV_INTERN fil_system_t* fil_system = NULL;
-
-/** At this age or older a space/page will be rotated */
-UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age;
-UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex;
-
-/** Determine if (i) is a user tablespace id or not. */
-# define fil_is_user_tablespace_id(i) (i != 0 \
- && !srv_is_undo_tablespace(i))
-
-/** Determine if user has explicitly disabled fsync(). */
-#ifndef __WIN__
-# define fil_buffering_disabled(s) \
- (((s)->purpose == FIL_TABLESPACE \
- && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)\
- || ((s)->purpose == FIL_LOG \
- && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT))
-
-#else /* __WIN__ */
-# define fil_buffering_disabled(s) (0)
-#endif /* __WIN__ */
-
-#ifdef UNIV_DEBUG
-/** Try fil_validate() every this many times */
-# define FIL_VALIDATE_SKIP 17
-
-/******************************************************************//**
-Checks the consistency of the tablespace cache some of the time.
-@return TRUE if ok or the check was skipped */
-static
-ibool
-fil_validate_skip(void)
-/*===================*/
-{
- /** The fil_validate() call skip counter. Use a signed type
- because of the race condition below. */
- static int fil_validate_count = FIL_VALIDATE_SKIP;
-
- /* There is a race condition below, but it does not matter,
- because this call is only for heuristic purposes. We want to
- reduce the call frequency of the costly fil_validate() check
- in debug builds. */
- if (--fil_validate_count > 0) {
- return(TRUE);
- }
-
- fil_validate_count = FIL_VALIDATE_SKIP;
- return(fil_validate());
-}
-#endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Determines if a file node belongs to the least-recently-used list.
-@return TRUE if the file belongs to fil_system->LRU mutex. */
-UNIV_INLINE
-ibool
-fil_space_belongs_in_lru(
-/*=====================*/
- const fil_space_t* space) /*!< in: file space */
-{
- return(space->purpose == FIL_TABLESPACE
- && fil_is_user_tablespace_id(space->id));
-}
-
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space); /*!< in: space */
-/********************************************************************//**
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
-static
-void
-fil_node_complete_io(
-/*=================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
- the node as modified if
- type == OS_FILE_WRITE */
-/** Free a space object from the tablespace memory cache. Close the files in
-the chain but do not delete them. There must not be any pending i/o's or
-flushes on the files.
-The fil_system->mutex will be released.
-@param[in] id tablespace ID
-@param[in] x_latched whether the caller holds exclusive space->latch
-@return whether the tablespace existed */
-static
-bool
-fil_space_free_and_mutex_exit(ulint id, bool x_latched);
-/********************************************************************//**
-Reads data from a space to a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space.
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INLINE
-dberr_t
-fil_read(
-/*=====*/
- bool sync, /*!< in: true if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /*!< in: how many bytes to read; this must not
- cross a file boundary; in aio this must be a
- block size multiple */
- void* buf, /*!< in/out: buffer where to store data read;
- in aio this must be appropriately aligned */
- void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-{
- return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message, write_size));
-}
-
-/********************************************************************//**
-Writes data to a space from a buffer. Remember that the possible incomplete
-blocks at the end of file are ignored: they are not taken into account when
-calculating the byte offset within a space.
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INLINE
-dberr_t
-fil_write(
-/*======*/
- bool sync, /*!< in: true if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
- this must be divisible by the OS block size */
- ulint len, /*!< in: how many bytes to write; this must
- not cross a file boundary; in aio this must
- be a block size multiple */
- void* buf, /*!< in: buffer from which to write; in aio
- this must be appropriately aligned */
- void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
- ulint* write_size) /*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-{
- ut_ad(!srv_read_only_mode);
-
- return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
- byte_offset, len, buf, message, write_size));
-}
-
-/*******************************************************************//**
-Returns the table space by a given id, NULL if not found.
-It is unsafe to dereference the returned pointer. It is fine to check
-for NULL.
-@param[in] id Tablespace id
-@return table space or NULL */
-fil_space_t*
-fil_space_get_by_id(
-/*================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- HASH_SEARCH(hash, fil_system->spaces, id,
- fil_space_t*, space,
- ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
- space->id == id);
-
- /* The system tablespace must always be found */
- ut_ad(space || id != 0 || srv_is_being_started);
- return(space);
-}
-
-/****************************************************************//**
-Get space id from fil node */
-ulint
-fil_node_get_space_id(
-/*==================*/
- fil_node_t* node) /*!< in: Compressed node*/
-{
- ut_ad(node);
- ut_ad(node->space);
-
- return (node->space->id);
-}
-
-/*******************************************************************//**
-Returns the table space by a given name, NULL if not found. */
-fil_space_t*
-fil_space_get_by_name(
-/*==================*/
- const char* name) /*!< in: space name */
-{
- fil_space_t* space;
- ulint fold;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- fold = ut_fold_string(name);
-
- HASH_SEARCH(name_hash, fil_system->name_hash, fold,
- fil_space_t*, space,
- ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
- !strcmp(name, space->name));
-
- return(space);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- ib_int64_t version = -1;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space) {
- version = space->tablespace_version;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(version);
-}
-
-/*******************************************************************//**
-Returns the latch of a file space.
-@return latch protecting storage allocation */
-UNIV_INTERN
-prio_rw_lock_t*
-fil_space_get_latch(
-/*================*/
- ulint id, /*!< in: space id */
- ulint* flags) /*!< out: tablespace flags */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- if (flags) {
- *flags = space->flags;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(&(space->latch));
-}
-
-/*******************************************************************//**
-Returns the type of a file space.
-@return ULINT_UNDEFINED, or FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
-fil_space_get_type(
-/*===============*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- ulint type = ULINT_UNDEFINED;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- mutex_exit(&fil_system->mutex);
-
- if (space) {
- type = space->purpose;
- }
-
- return(type);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Checks if all the file nodes in a space are flushed. The caller must hold
-the fil_system mutex.
-@return true if all are flushed */
-static
-bool
-fil_space_is_flushed(
-/*=================*/
- fil_space_t* space) /*!< in: space */
-{
- fil_node_t* node;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- while (node) {
- if (node->modification_counter > node->flush_counter) {
-
- ut_ad(!fil_buffering_disabled(space));
- return(false);
- }
-
- node = UT_LIST_GET_NEXT(chain, node);
- }
-
- return(true);
-}
-
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed.
-@return pointer to the file name, or NULL on error */
-UNIV_INTERN
-char*
-fil_node_create(
-/*============*/
- const char* name, /*!< in: file name (file must be closed) */
- ulint size, /*!< in: file size in database blocks, rounded
- downwards to an integer */
- ulint id, /*!< in: space id where to append */
- ibool is_raw) /*!< in: TRUE if a raw device or
- a raw disk partition */
-{
- fil_node_t* node;
- fil_space_t* space;
-
- ut_a(fil_system);
- ut_a(name);
-
- mutex_enter(&fil_system->mutex);
-
- node = static_cast<fil_node_t*>(mem_zalloc(sizeof(fil_node_t)));
-
- node->name = mem_strdup(name);
-
- ut_a(!is_raw || srv_start_raw_disk_in_use);
-
- node->sync_event = os_event_create();
- node->is_raw_disk = is_raw;
- node->size = size;
- node->magic_n = FIL_NODE_MAGIC_N;
-
- space = fil_space_get_by_id(id);
-
- if (!space) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Could not find tablespace %lu for\n"
- "InnoDB: file ", (ulong) id);
- ut_print_filename(stderr, name);
- fputs(" in the tablespace memory cache.\n", stderr);
- mem_free(node->name);
-
- mem_free(node);
-
- mutex_exit(&fil_system->mutex);
-
- return(NULL);
- }
-
- space->size += size;
-
- node->space = space;
-
- UT_LIST_ADD_LAST(chain, space->chain, node);
-
- if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
-
- fil_system->max_assigned_id = id;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(node->name);
-}
-
-/********************************************************************//**
-Opens a file of a node of a tablespace. The caller must own the fil_system
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_open_file(
-/*===============*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space) /*!< in: space */
-{
- os_offset_t size_bytes;
- ibool ret;
- ibool success;
- byte* buf2;
- byte* page;
-
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->n_pending == 0);
- ut_a(node->open == FALSE);
-
- if (node->size == 0) {
- /* It must be a single-table tablespace and we do not know the
- size of the file yet. First we open the file in the normal
- mode, no async I/O here, for simplicity. Then do some checks,
- and close the file again.
- NOTE that we could not use the simple file read function
- os_file_read() in Windows to read from a file opened for
- async I/O! */
-
- node->handle = os_file_create_simple_no_error_handling(
- innodb_file_data_key, node->name, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &success, 0);
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot "
- "open %s\n. InnoDB: Have you deleted .ibd "
- "files under a running mysqld server?\n",
- node->name);
-
- return(false);
- }
-
- size_bytes = os_file_get_size(node->handle);
- ut_a(size_bytes != (os_offset_t) -1);
-
- node->file_block_size = os_file_get_block_size(
- node->handle, node->name);
- space->file_block_size = node->file_block_size;
-
-#ifdef UNIV_HOTBACKUP
- if (space->id == 0) {
- node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
- os_file_close(node->handle);
- goto add_size;
- }
-#endif /* UNIV_HOTBACKUP */
- ut_a(space->purpose != FIL_LOG);
- ut_a(fil_is_user_tablespace_id(space->id));
-
- if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The size of the file %s is only " UINT64PF
- " bytes, should be at least " ULINTPF,
- node->name, size_bytes,
- FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE);
- os_file_close(node->handle);
- return(false);
- }
-
- /* Read the first page of the tablespace */
-
- buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
- /* Align the memory for file i/o if we might have O_DIRECT
- set */
- page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
-
- success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE);
- srv_stats.page0_read.add(1);
-
- const ulint space_id = fsp_header_get_space_id(page);
- ulint flags = fsp_header_get_flags(page);
-
- /* Try to read crypt_data from page 0 if it is not yet
- read. */
- if (!node->space->crypt_data) {
- const ulint offset = fsp_header_get_crypt_offset(
- fsp_flags_get_zip_size(flags));
- node->space->crypt_data = fil_space_read_crypt_data(space_id, page, offset);
- }
-
- ut_free(buf2);
- os_file_close(node->handle);
-
- if (!fsp_flags_is_valid(flags)) {
- ulint cflags = fsp_flags_convert_from_101(flags);
- if (cflags == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Expected tablespace flags 0x%x"
- " but found 0x%x in the file %s",
- int(space->flags), int(flags),
- node->name);
- return(false);
- }
-
- flags = cflags;
- }
-
- if (UNIV_UNLIKELY(space_id != space->id)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "tablespace id is " ULINTPF " in the data dictionary"
- " but in file %s it is " ULINTPF "!\n",
- space->id, node->name, space_id);
- return(false);
- }
-
- if (ulint zip_size = fsp_flags_get_zip_size(flags)) {
- node->size = ulint(size_bytes / zip_size);
- } else {
- node->size = ulint(size_bytes / UNIV_PAGE_SIZE);
- }
-
-#ifdef UNIV_HOTBACKUP
-add_size:
-#endif /* UNIV_HOTBACKUP */
- space->size += node->size;
- }
-
- ulint atomic_writes = fsp_flags_get_atomic_writes(space->flags);
-
- /* printf("Opening file %s\n", node->name); */
-
- /* Open the file for reading and writing, in Windows normally in the
- unbuffered async I/O mode, though global variables may make
- os_file_create() to fall back to the normal file I/O mode. */
-
- if (space->purpose == FIL_LOG) {
- node->handle = os_file_create(innodb_file_log_key,
- node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_LOG_FILE,
- &ret, atomic_writes);
- } else if (node->is_raw_disk) {
- node->handle = os_file_create(innodb_file_data_key,
- node->name,
- OS_FILE_OPEN_RAW,
- OS_FILE_AIO, OS_DATA_FILE,
- &ret, atomic_writes);
- } else {
- node->handle = os_file_create(innodb_file_data_key,
- node->name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_DATA_FILE,
- &ret, atomic_writes);
- }
-
- if (node->file_block_size == 0) {
- node->file_block_size = os_file_get_block_size(
- node->handle, node->name);
- space->file_block_size = node->file_block_size;
- }
-
- ut_a(ret);
-
- node->open = TRUE;
-
- system->n_open++;
- fil_n_file_opened++;
-
- if (fil_space_belongs_in_lru(space)) {
-
- /* Put the node to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
- }
-
- return(true);
-}
-
-/**********************************************************************//**
-Closes a file. */
-static
-void
-fil_node_close_file(
-/*================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system) /*!< in: tablespace memory cache */
-{
- ibool ret;
-
- ut_ad(node && system);
- ut_ad(mutex_own(&(system->mutex)));
- ut_a(node->open);
- ut_a(node->n_pending == 0);
- ut_a(node->n_pending_flushes == 0);
- ut_a(!node->being_extended);
-#ifndef UNIV_HOTBACKUP
- ut_a(node->modification_counter == node->flush_counter
- || srv_fast_shutdown == 2);
-#endif /* !UNIV_HOTBACKUP */
-
- ret = os_file_close(node->handle);
- ut_a(ret);
-
- /* printf("Closing file %s\n", node->name); */
-
- node->open = FALSE;
- ut_a(system->n_open > 0);
- system->n_open--;
- fil_n_file_opened--;
-
- if (fil_space_belongs_in_lru(node->space)) {
-
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
- /* The node is in the LRU list, remove it */
- UT_LIST_REMOVE(LRU, system->LRU, node);
- }
-}
-
-/********************************************************************//**
-Tries to close a file in the LRU list. The caller must hold the fil_sys
-mutex.
-@return TRUE if success, FALSE if should retry later; since i/o's
-generally complete in < 100 ms, and as InnoDB writes at most 128 pages
-from the buffer pool in a batch, and then immediately flushes the
-files, there is a good chance that the next time we find a suitable
-node from the LRU list */
-static
-ibool
-fil_try_to_close_file_in_LRU(
-/*=========================*/
- ibool print_info) /*!< in: if TRUE, prints information why it
- cannot close a file */
-{
- fil_node_t* node;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- if (print_info) {
- fprintf(stderr,
- "InnoDB: fil_sys open file LRU len %lu\n",
- (ulong) UT_LIST_GET_LEN(fil_system->LRU));
- }
-
- for (node = UT_LIST_GET_LAST(fil_system->LRU);
- node != NULL;
- node = UT_LIST_GET_PREV(LRU, node)) {
-
- if (node->modification_counter == node->flush_counter
- && node->n_pending_flushes == 0
- && !node->being_extended) {
-
- fil_node_close_file(node, fil_system);
-
- return(TRUE);
- }
-
- if (!print_info) {
- continue;
- }
-
- if (node->n_pending_flushes > 0) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr, ", because n_pending_flushes %lu\n",
- (ulong) node->n_pending_flushes);
- }
-
- if (node->modification_counter != node->flush_counter) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr,
- ", because mod_count %ld != fl_count %ld\n",
- (long) node->modification_counter,
- (long) node->flush_counter);
-
- }
-
- if (node->being_extended) {
- fputs("InnoDB: cannot close file ", stderr);
- ut_print_filename(stderr, node->name);
- fprintf(stderr, ", because it is being extended\n");
- }
- }
-
- return(FALSE);
-}
-
-/** Flush any writes cached by the file system.
-@param[in,out] space tablespace */
-static
-void
-fil_flush_low(fil_space_t* space)
-{
- ut_ad(mutex_own(&fil_system->mutex));
- ut_ad(space);
- ut_ad(!space->stop_new_ops);
-
- if (fil_buffering_disabled(space)) {
-
- /* No need to flush. User has explicitly disabled
- buffering. */
- ut_ad(!space->is_in_unflushed_spaces);
- ut_ad(fil_space_is_flushed(space));
- ut_ad(space->n_pending_flushes == 0);
-
-#ifdef UNIV_DEBUG
- for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- ut_ad(node->modification_counter
- == node->flush_counter);
- ut_ad(node->n_pending_flushes == 0);
- }
-#endif /* UNIV_DEBUG */
-
- return;
- }
-
- /* Prevent dropping of the space while we are flushing */
- space->n_pending_flushes++;
-
- for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- ib_int64_t old_mod_counter = node->modification_counter;
-
- if (old_mod_counter <= node->flush_counter) {
- continue;
- }
-
- ut_a(node->open);
-
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes++;
- } else {
- fil_n_pending_log_flushes++;
- fil_n_log_flushes++;
- }
-#ifdef __WIN__
- if (node->is_raw_disk) {
-
- goto skip_flush;
- }
-#endif /* __WIN__ */
-retry:
- if (node->n_pending_flushes > 0) {
- /* We want to avoid calling os_file_flush() on
- the file twice at the same time, because we do
- not know what bugs OS's may contain in file
- i/o */
-
- ib_int64_t sig_count =
- os_event_reset(node->sync_event);
-
- mutex_exit(&fil_system->mutex);
-
- os_event_wait_low(node->sync_event, sig_count);
-
- mutex_enter(&fil_system->mutex);
-
- if (node->flush_counter >= old_mod_counter) {
-
- goto skip_flush;
- }
-
- goto retry;
- }
-
- ut_a(node->open);
- node->n_pending_flushes++;
-
- mutex_exit(&fil_system->mutex);
-
- os_file_flush(node->handle);
-
- mutex_enter(&fil_system->mutex);
-
- os_event_set(node->sync_event);
-
- node->n_pending_flushes--;
-skip_flush:
- if (node->flush_counter < old_mod_counter) {
- node->flush_counter = old_mod_counter;
-
- if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- space->is_in_unflushed_spaces = false;
-
- UT_LIST_REMOVE(
- unflushed_spaces,
- fil_system->unflushed_spaces,
- space);
- }
- }
-
- if (space->purpose == FIL_TABLESPACE) {
- fil_n_pending_tablespace_flushes--;
- } else {
- fil_n_pending_log_flushes--;
- }
- }
-
- space->n_pending_flushes--;
-}
-
-/** Try to extend a tablespace.
-@param[in,out] space tablespace to be extended
-@param[in,out] node last file of the tablespace
-@param[in] size desired size in number of pages
-@param[out] success whether the operation succeeded
-@return whether the operation should be retried */
-static UNIV_COLD __attribute__((warn_unused_result, nonnull))
-bool
-fil_space_extend_must_retry(
- fil_space_t* space,
- fil_node_t* node,
- ulint size,
- ibool* success)
-{
- ut_ad(mutex_own(&fil_system->mutex));
- ut_ad(UT_LIST_GET_LAST(space->chain) == node);
- ut_ad(size >= FIL_IBD_FILE_INITIAL_SIZE);
-
- *success = space->size >= size;
-
- if (*success) {
- /* Space already big enough */
- return(false);
- }
-
- if (node->being_extended) {
- /* Another thread is currently extending the file. Wait
- for it to finish.
- It'd have been better to use event driven mechanism but
- the entire module is peppered with polling stuff. */
- mutex_exit(&fil_system->mutex);
- os_thread_sleep(100000);
- return(true);
- }
-
- node->being_extended = true;
-
- if (!fil_node_prepare_for_io(node, fil_system, space)) {
- /* The tablespace data file, such as .ibd file, is missing */
- node->being_extended = false;
- return(false);
- }
-
- /* At this point it is safe to release fil_system mutex. No
- other thread can rename, delete or close the file because
- we have set the node->being_extended flag. */
- mutex_exit(&fil_system->mutex);
-
- ulint start_page_no = space->size;
- const ulint file_start_page_no = start_page_no - node->size;
-
- /* Determine correct file block size */
- if (node->file_block_size == 0) {
- node->file_block_size = os_file_get_block_size(
- node->handle, node->name);
- space->file_block_size = node->file_block_size;
- }
-
- ulint page_size = fsp_flags_get_zip_size(space->flags);
- if (!page_size) {
- page_size = UNIV_PAGE_SIZE;
- }
-
-#ifdef _WIN32
- const ulint io_completion_type = OS_FILE_READ;
- /* Logically or physically extend the file with zero bytes,
- depending on whether it is sparse. */
-
- /* FIXME: Call DeviceIoControl(node->handle, FSCTL_SET_SPARSE, ...)
- when opening a file when FSP_FLAGS_HAS_PAGE_COMPRESSION(). */
- {
- FILE_END_OF_FILE_INFO feof;
- /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
- fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
- Do not shrink short ROW_FORMAT=COMPRESSED files. */
- feof.EndOfFile.QuadPart = std::max(
- os_offset_t(size - file_start_page_no) * page_size,
- os_offset_t(FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
- *success = SetFileInformationByHandle(node->handle,
- FileEndOfFileInfo,
- &feof, sizeof feof);
- if (!*success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
- " from " INT64PF
- " to " INT64PF " bytes failed with %u",
- node->name,
- os_offset_t(node->size) * page_size,
- feof.EndOfFile.QuadPart, GetLastError());
- } else {
- start_page_no = size;
- }
- }
-#else
- /* We will logically extend the file with ftruncate() if
- page_compression is enabled, because the file is expected to
- be sparse in that case. Make sure that ftruncate() can deal
- with large files. */
- const bool is_sparse = sizeof(off_t) >= 8
- && FSP_FLAGS_HAS_PAGE_COMPRESSION(space->flags);
-
-# ifdef HAVE_POSIX_FALLOCATE
- /* We must complete the I/O request after invoking
- posix_fallocate() to avoid an assertion failure at shutdown.
- Because no actual writes were dispatched, a read operation
- will suffice. */
- const ulint io_completion_type = srv_use_posix_fallocate
- || is_sparse ? OS_FILE_READ : OS_FILE_WRITE;
-
- if (srv_use_posix_fallocate && !is_sparse) {
- const os_offset_t start_offset
- = os_offset_t(start_page_no - file_start_page_no)
- * page_size;
- const ulint n_pages = size - start_page_no;
- const os_offset_t len = os_offset_t(n_pages) * page_size;
-
- int err;
- do {
- err = posix_fallocate(node->handle, start_offset, len);
- } while (err == EINTR
- && srv_shutdown_state == SRV_SHUTDOWN_NONE);
-
- *success = !err;
- if (!*success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "extending file %s"
- " from " INT64PF " to " INT64PF " bytes"
- " failed with error %d",
- node->name, start_offset, len + start_offset,
- err);
- }
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- *success = FALSE;
- os_has_said_disk_full = TRUE;);
-
- if (*success) {
- os_has_said_disk_full = FALSE;
- start_page_no = size;
- }
- } else
-# else
- const ulint io_completion_type = is_sparse
- ? OS_FILE_READ : OS_FILE_WRITE;
-# endif
- if (is_sparse) {
- /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
- fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
- Do not shrink short ROW_FORMAT=COMPRESSED files. */
- off_t s = std::max(off_t(size - file_start_page_no)
- * off_t(page_size),
- off_t(FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
- *success = !ftruncate(node->handle, s);
- if (!*success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
- " from " INT64PF " to " INT64PF " bytes"
- " failed with error %d",
- node->name,
- os_offset_t(start_page_no - file_start_page_no)
- * page_size, os_offset_t(s), errno);
- } else {
- start_page_no = size;
- }
- } else {
- /* Extend at most 64 pages at a time */
- ulint buf_size = ut_min(64, size - start_page_no)
- * page_size;
- byte* buf2 = static_cast<byte*>(
- calloc(1, buf_size + page_size));
- *success = buf2 != NULL;
- if (!buf2) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Cannot allocate " ULINTPF
- " bytes to extend file",
- buf_size + page_size);
- }
- byte* const buf = static_cast<byte*>(
- ut_align(buf2, page_size));
-
- while (*success && start_page_no < size) {
- ulint n_pages
- = ut_min(buf_size / page_size,
- size - start_page_no);
-
- os_offset_t offset = static_cast<os_offset_t>(
- start_page_no - file_start_page_no)
- * page_size;
-
- *success = os_aio(OS_FILE_WRITE, 0, OS_AIO_SYNC,
- node->name, node->handle, buf,
- offset, page_size * n_pages,
- page_size, node, NULL,
- space->id, NULL, 0);
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- *success = FALSE;
- os_has_said_disk_full = TRUE;);
-
- if (*success) {
- os_has_said_disk_full = FALSE;
- }
- /* Let us measure the size of the file
- to determine how much we were able to
- extend it */
- os_offset_t fsize = os_file_get_size(node->handle);
- ut_a(fsize != os_offset_t(-1));
-
- start_page_no = ulint(fsize / page_size)
- + file_start_page_no;
- }
-
- free(buf2);
- }
-#endif
- mutex_enter(&fil_system->mutex);
-
- ut_a(node->being_extended);
- ut_a(start_page_no - file_start_page_no >= node->size);
-
- ulint file_size = start_page_no - file_start_page_no;
- space->size += file_size - node->size;
- node->size = file_size;
-
- fil_node_complete_io(node, fil_system, io_completion_type);
-
- node->being_extended = FALSE;
-
- if (space->id == 0) {
- ulint pages_per_mb = (1024 * 1024) / page_size;
-
- /* Keep the last data file size info up to date, rounded to
- full megabytes */
-
- srv_data_file_sizes[srv_n_data_files - 1]
- = (node->size / pages_per_mb) * pages_per_mb;
- }
-
- fil_flush_low(space);
- return(false);
-}
-
-/*******************************************************************//**
-Reserves the fil_system mutex and tries to make sure we can open at least one
-file while holding it. This should be called before calling
-fil_node_prepare_for_io(), because that function may need to open a file. */
-static
-void
-fil_mutex_enter_and_prepare_for_io(
-/*===============================*/
- ulint space_id) /*!< in: space id */
-{
- fil_space_t* space;
- ulint count = 0;
- ulint count2 = 0;
-
-retry:
- mutex_enter(&fil_system->mutex);
-
- if (space_id >= SRV_LOG_SPACE_FIRST_ID) {
- /* We keep log files always open. */
- return;
- }
-
- space = fil_space_get_by_id(space_id);
-
- if (space == NULL) {
- return;
- }
-
- if (space->stop_ios) {
- ut_ad(space->id != 0);
- /* We are going to do a rename file and want to stop new i/o's
- for a while */
-
- if (count2 > 20000) {
- fputs("InnoDB: Warning: tablespace ", stderr);
- ut_print_filename(stderr, space->name);
- fprintf(stderr,
- " has i/o ops stopped for a long time %lu\n",
- (ulong) count2);
- }
-
- mutex_exit(&fil_system->mutex);
-
-#ifndef UNIV_HOTBACKUP
-
- /* Wake the i/o-handler threads to make sure pending
- i/o's are performed */
- os_aio_simulated_wake_handler_threads();
-
- /* The sleep here is just to give IO helper threads a
- bit of time to do some work. It is not required that
- all IO related to the tablespace being renamed must
- be flushed here as we do fil_flush() in
- fil_rename_tablespace() as well. */
- os_thread_sleep(20000);
-
-#endif /* UNIV_HOTBACKUP */
-
- /* Flush tablespaces so that we can close modified
- files in the LRU list */
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- os_thread_sleep(20000);
-
- count2++;
-
- goto retry;
- }
-
- fil_node_t* node = UT_LIST_GET_LAST(space->chain);
-
- ut_ad(space->id == 0 || node == UT_LIST_GET_FIRST(space->chain));
-
- if (space->id == 0) {
- /* We keep the system tablespace files always open;
- this is important in preventing deadlocks in this module, as
- a page read completion often performs another read from the
- insert buffer. The insert buffer is in tablespace 0, and we
- cannot end up waiting in this function. */
- } else if (!node || node->open) {
- /* If the file is already open, no need to do
- anything; if the space does not exist, we handle the
- situation in the function which called this
- function */
- } else {
- /* Too many files are open, try to close some */
- while (fil_system->n_open >= fil_system->max_n_open) {
- if (fil_try_to_close_file_in_LRU(count > 1)) {
- /* No problem */
- } else if (count >= 2) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "innodb_open_files=%lu is exceeded"
- " (%lu files stay open)",
- fil_system->max_n_open,
- fil_system->n_open);
- break;
- } else {
- mutex_exit(&fil_system->mutex);
-
- /* Wake the i/o-handler threads to
- make sure pending i/o's are
- performed */
- os_aio_simulated_wake_handler_threads();
- os_thread_sleep(20000);
-
- /* Flush tablespaces so that we can
- close modified files in the LRU list */
- fil_flush_file_spaces(FIL_TABLESPACE);
-
- count++;
- goto retry;
- }
- }
- }
-
- if (ulint size = UNIV_UNLIKELY(space->recv_size)) {
- ut_ad(node);
- ibool success;
- if (fil_space_extend_must_retry(space, node, size, &success)) {
- goto retry;
- }
-
- ut_ad(mutex_own(&fil_system->mutex));
- /* Crash recovery requires the file extension to succeed. */
- ut_a(success);
- /* InnoDB data files cannot shrink. */
- ut_a(space->size >= size);
-
- /* There could be multiple concurrent I/O requests for
- this tablespace (multiple threads trying to extend
- this tablespace).
-
- Also, fil_space_set_recv_size() may have been invoked
- again during the file extension while fil_system->mutex
- was not being held by us.
-
- Only if space->recv_size matches what we read originally,
- reset the field. In this way, a subsequent I/O request
- will handle any pending fil_space_set_recv_size(). */
-
- if (size == space->recv_size) {
- space->recv_size = 0;
- }
- }
-}
-
-/** Prepare a data file object for freeing.
-@param[in,out] space tablespace
-@param[in,out] node data file */
-static
-void
-fil_node_free_part1(fil_space_t* space, fil_node_t* node)
-{
- ut_ad(mutex_own(&fil_system->mutex));
- ut_a(node->magic_n == FIL_NODE_MAGIC_N);
- ut_a(node->n_pending == 0);
- ut_a(!node->being_extended);
-
- if (node->open) {
- /* We fool the assertion in fil_node_close_file() to think
- there are no unflushed modifications in the file */
-
- node->modification_counter = node->flush_counter;
- os_event_set(node->sync_event);
-
- if (fil_buffering_disabled(space)) {
-
- ut_ad(!space->is_in_unflushed_spaces);
- ut_ad(fil_space_is_flushed(space));
-
- } else if (space->is_in_unflushed_spaces
- && fil_space_is_flushed(space)) {
-
- space->is_in_unflushed_spaces = false;
-
- UT_LIST_REMOVE(unflushed_spaces,
- fil_system->unflushed_spaces,
- space);
- }
-
- fil_node_close_file(node, fil_system);
- }
-}
-
-/** Free a data file object.
-@param[in,out] space tablespace
-@param[in] node data file */
-static
-void
-fil_node_free_part2(fil_space_t* space, fil_node_t* node)
-{
- ut_ad(!node->open);
-
- space->size -= node->size;
-
- UT_LIST_REMOVE(chain, space->chain, node);
-
- os_event_free(node->sync_event);
- mem_free(node->name);
- mem_free(node);
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
-void
-fil_space_truncate_start(
-/*=====================*/
- ulint id, /*!< in: space id */
- ulint trunc_len) /*!< in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
-{
- fil_node_t* node;
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- while (trunc_len > 0) {
- node = UT_LIST_GET_FIRST(space->chain);
-
- ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
-
- trunc_len -= node->size * UNIV_PAGE_SIZE;
-
- fil_node_free_part1(space, node);
- fil_node_free_part2(space, node);
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/****************************************************************//**
-Check is there node in file space with given name. */
-UNIV_INTERN
-ibool
-fil_space_contains_node(
-/*====================*/
- ulint id, /*!< in: space id */
- char* node_name) /*!< in: node name */
-{
- fil_node_t* node;
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- for (node = UT_LIST_GET_FIRST(space->chain); node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- if (ut_strcmp(node->name, node_name) == 0) {
- mutex_exit(&fil_system->mutex);
- return(TRUE);
- }
-
- }
-
- mutex_exit(&fil_system->mutex);
- return(FALSE);
-}
-
-#endif /* UNIV_LOG_ARCHIVE */
-
-/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table.
-If there is an error, prints an error message to the .err log.
-@param[in] name Space name
-@param[in] id Space id
-@param[in] flags Tablespace flags
-@param[in] purpose FIL_TABLESPACE or FIL_LOG if log
-@param[in] crypt_data Encryption information
-@param[in] create_table True if this is create table
-@param[in] mode Encryption mode
-@return TRUE if success */
-UNIV_INTERN
-bool
-fil_space_create(
- const char* name,
- ulint id,
- ulint flags,
- ulint purpose,
- fil_space_crypt_t* crypt_data,
- bool create_table,
- fil_encryption_t mode)
-{
- fil_space_t* space;
-
- DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
-
- ut_a(fil_system);
-
- /* Look for a matching tablespace and if found free it. */
- do {
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_name(name);
-
- if (space != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Tablespace '%s' exists in the cache "
- "with id %lu != %lu",
- name, (ulong) space->id, (ulong) id);
-
- if (id == 0 || purpose != FIL_TABLESPACE) {
-
- mutex_exit(&fil_system->mutex);
-
- return(false);
- }
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Freeing existing tablespace '%s' entry "
- "from the cache with id %lu",
- name, (ulong) id);
-
- bool success = fil_space_free_and_mutex_exit(
- space->id, false);
- ut_a(success);
- }
-
- } while (space != 0);
-
- space = fil_space_get_by_id(id);
-
- if (space != 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to add tablespace '%s' with id %lu "
- "to the tablespace memory cache, but tablespace '%s' "
- "with id %lu already exists in the cache!",
- name, (ulong) id, space->name, (ulong) space->id);
-
- mutex_exit(&fil_system->mutex);
-
- return(false);
- }
-
- space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space)));
-
- space->name = mem_strdup(name);
- space->id = id;
-
- fil_system->tablespace_version++;
- space->tablespace_version = fil_system->tablespace_version;
-
- if (purpose == FIL_TABLESPACE && !recv_recovery_on
- && id > fil_system->max_assigned_id) {
-
- if (!fil_system->space_id_reuse_warned) {
- fil_system->space_id_reuse_warned = TRUE;
- if (!IS_XTRABACKUP()) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Allocated tablespace %lu, old maximum "
- "was %lu",
- (ulong)id,
- (ulong)fil_system->max_assigned_id);
- }
- }
-
- fil_system->max_assigned_id = id;
- }
-
- space->purpose = purpose;
- space->flags = flags;
-
- space->magic_n = FIL_SPACE_MAGIC_N;
- space->crypt_data = crypt_data;
-
- rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
-
- HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
-
- HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(name), space);
-
- UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
-
- /* Inform key rotation that there could be something
- to do */
- if (purpose == FIL_TABLESPACE && !srv_fil_crypt_rotate_key_age && fil_crypt_threads_event &&
- (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF ||
- srv_encrypt_tables)) {
- /* Key rotation is not enabled, need to inform background
- encryption threads. */
- UT_LIST_ADD_LAST(rotation_list, fil_system->rotation_list, space);
- space->is_in_rotation_list = true;
- mutex_exit(&fil_system->mutex);
- mutex_enter(&fil_crypt_threads_mutex);
- os_event_set(fil_crypt_threads_event);
- mutex_exit(&fil_crypt_threads_mutex);
- } else {
- mutex_exit(&fil_system->mutex);
- }
-
- return(true);
-}
-
-/*******************************************************************//**
-Assigns a new space id for a new single-table tablespace. This works simply by
-incrementing the global counter. If 4 billion id's is not enough, we may need
-to recycle id's.
-@return TRUE if assigned, FALSE if not */
-UNIV_INTERN
-ibool
-fil_assign_new_space_id(
-/*====================*/
- ulint* space_id) /*!< in/out: space id */
-{
- ulint id;
- ibool success;
-
- mutex_enter(&fil_system->mutex);
-
- id = *space_id;
-
- if (id < fil_system->max_assigned_id) {
- id = fil_system->max_assigned_id;
- }
-
- id++;
-
- if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Warning: you are running out of new"
- " single-table tablespace id's.\n"
- "InnoDB: Current counter is %lu and it"
- " must not exceed %lu!\n"
- "InnoDB: To reset the counter to zero"
- " you have to dump all your tables and\n"
- "InnoDB: recreate the whole InnoDB installation.\n",
- (ulong) id,
- (ulong) SRV_LOG_SPACE_FIRST_ID);
- }
-
- success = (id < SRV_LOG_SPACE_FIRST_ID);
-
- if (success) {
- *space_id = fil_system->max_assigned_id = id;
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: You have run out of single-table"
- " tablespace id's!\n"
- "InnoDB: Current counter is %lu.\n"
- "InnoDB: To reset the counter to zero you"
- " have to dump all your tables and\n"
- "InnoDB: recreate the whole InnoDB installation.\n",
- (ulong) id);
- *space_id = ULINT_UNDEFINED;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(success);
-}
-
-/** Free a space object from the tablespace memory cache. Close the files in
-the chain but do not delete them. There must not be any pending i/o's or
-flushes on the files.
-The fil_system->mutex will be released.
-@param[in] id tablespace ID
-@param[in] x_latched whether the caller holds exclusive space->latch
-@return whether the tablespace existed */
-static
-bool
-fil_space_free_and_mutex_exit(ulint id, bool x_latched)
-{
- fil_space_t* space;
- fil_space_t* fnamespace;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- space = fil_space_get_by_id(id);
-
- if (!space) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "trying to remove non-existing tablespace " ULINTPF,
- id);
- mutex_exit(&fil_system->mutex);
- return(false);
- }
-
- HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
-
- fnamespace = fil_space_get_by_name(space->name);
- ut_a(fnamespace);
- ut_a(space == fnamespace);
-
- HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(space->name), space);
-
- if (space->is_in_unflushed_spaces) {
-
- ut_ad(!fil_buffering_disabled(space));
- space->is_in_unflushed_spaces = false;
-
- UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
- space);
- }
-
- if (space->is_in_rotation_list) {
- space->is_in_rotation_list = false;
- ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0);
- UT_LIST_REMOVE(rotation_list, fil_system->rotation_list, space);
- }
-
- UT_LIST_REMOVE(space_list, fil_system->space_list, space);
-
- ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_a(0 == space->n_pending_flushes);
-
- for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- fil_node_free_part1(space, node);
- }
-
- mutex_exit(&fil_system->mutex);
-
- /* Wait for fil_space_release_for_io(); after
- fil_space_detach(), the tablespace cannot be found, so
- fil_space_acquire_for_io() would return NULL */
- while (space->n_pending_ios) {
- os_thread_sleep(100);
- }
-
- for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
- fil_node != NULL;
- fil_node = UT_LIST_GET_FIRST(space->chain)) {
- fil_node_free_part2(space, fil_node);
- }
-
- ut_a(0 == UT_LIST_GET_LEN(space->chain));
-
- if (x_latched) {
- rw_lock_x_unlock(&space->latch);
- }
-
- rw_lock_free(&(space->latch));
-
- fil_space_destroy_crypt_data(&(space->crypt_data));
-
- mem_free(space->name);
- mem_free(space);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
-associated with a space id.
-@return file_space_t pointer, NULL if space not found */
-fil_space_t*
-fil_space_get(
-/*==========*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- mutex_exit(&fil_system->mutex);
-
- return (space);
-}
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
-associated with a space id. The caller must lock fil_system->mutex.
-@return file_space_t pointer, NULL if space not found */
-UNIV_INLINE
-fil_space_t*
-fil_space_get_space(
-/*================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- fil_node_t* node;
-
- ut_ad(fil_system);
-
- space = fil_space_get_by_id(id);
- if (space == NULL) {
- return(NULL);
- }
-
- if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
- ut_a(id != 0);
-
- mutex_exit(&fil_system->mutex);
-
- /* It is possible that the space gets evicted at this point
- before the fil_mutex_enter_and_prepare_for_io() acquires
- the fil_system->mutex. Check for this after completing the
- call to fil_mutex_enter_and_prepare_for_io(). */
- fil_mutex_enter_and_prepare_for_io(id);
-
- /* We are still holding the fil_system->mutex. Check if
- the space is still in memory cache. */
- space = fil_space_get_by_id(id);
- if (space == NULL) {
- return(NULL);
- }
-
- /* The following code must change when InnoDB supports
- multiple datafiles per tablespace. Note that there is small
- change that space is found from tablespace list but
- we have not yet created node for it and as we hold
- fil_system mutex here fil_node_create can't continue. */
- ut_a(UT_LIST_GET_LEN(space->chain) == 1 || UT_LIST_GET_LEN(space->chain) == 0);
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- if (node) {
- /* It must be a single-table tablespace and we have not opened
- the file yet; the following calls will open it and update the
- size fields */
-
- if (!fil_node_prepare_for_io(node, fil_system, space)) {
- /* The single-table tablespace can't be opened,
- because the ibd file is missing. */
- return(NULL);
- }
- fil_node_complete_io(node, fil_system, OS_FILE_READ);
- }
- }
-
- return(space);
-}
-
-/*******************************************************************//**
-Returns the path from the first fil_node_t found for the space ID sent.
-The caller is responsible for freeing the memory allocated here for the
-value returned.
-@return own: A copy of fil_node_t::path, NULL if space ID is zero
-or not found. */
-UNIV_INTERN
-char*
-fil_space_get_first_path(
-/*=====================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- fil_node_t* node;
- char* path;
-
- ut_ad(fil_system);
- ut_a(id);
-
- fil_mutex_enter_and_prepare_for_io(id);
-
- space = fil_space_get_space(id);
-
- if (space == NULL) {
- mutex_exit(&fil_system->mutex);
-
- return(NULL);
- }
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- path = mem_strdup(node->name);
-
- mutex_exit(&fil_system->mutex);
-
- return(path);
-}
-
-/** Set the recovered size of a tablespace in pages.
-@param id tablespace ID
-@param size recovered size in pages */
-UNIV_INTERN
-void
-fil_space_set_recv_size(ulint id, ulint size)
-{
- mutex_enter(&fil_system->mutex);
- ut_ad(size);
- ut_ad(id < SRV_LOG_SPACE_FIRST_ID);
-
- if (fil_space_t* space = fil_space_get_space(id)) {
- space->recv_size = size;
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return space size, 0 if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_size(
-/*===============*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- ulint size;
-
- ut_ad(fil_system);
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_space(id);
-
- size = space ? space->size : 0;
-
- mutex_exit(&fil_system->mutex);
-
- return(size);
-}
-
-/*******************************************************************//**
-Returns the flags of the space. The tablespace must be cached
-in the memory cache.
-@return flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_flags(
-/*================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- ulint flags;
-
- ut_ad(fil_system);
-
- if (!id) {
- return(0);
- }
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_space(id);
-
- if (space == NULL) {
- mutex_exit(&fil_system->mutex);
-
- return(ULINT_UNDEFINED);
- }
-
- flags = space->flags;
-
- mutex_exit(&fil_system->mutex);
-
- return(flags);
-}
-
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
- ulint id) /*!< in: space id */
-{
- ulint flags;
-
- flags = fil_space_get_flags(id);
-
- if (flags && flags != ULINT_UNDEFINED) {
-
- return(fsp_flags_get_zip_size(flags));
- }
-
- return(flags);
-}
-
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- if (fil_space_get_size(id) > page_no) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/****************************************************************//**
-Initializes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_init(
-/*=====*/
- ulint hash_size, /*!< in: hash table size */
- ulint max_n_open) /*!< in: max number of open files */
-{
- ut_a(fil_system == NULL);
-
- ut_a(hash_size > 0);
- ut_a(max_n_open > 0);
-
- fil_system = static_cast<fil_system_t*>(
- mem_zalloc(sizeof(fil_system_t)));
-
- mutex_create(fil_system_mutex_key,
- &fil_system->mutex, SYNC_ANY_LATCH);
-
- fil_system->spaces = hash_create(hash_size);
- fil_system->name_hash = hash_create(hash_size);
-
- fil_system->max_n_open = max_n_open;
-
- fil_space_crypt_init();
-}
-
-/*******************************************************************//**
-Opens all log files and system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the log and the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
-void
-fil_open_log_and_system_tablespace_files(void)
-/*==========================================*/
-{
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- for (space = UT_LIST_GET_FIRST(fil_system->space_list);
- space != NULL;
- space = UT_LIST_GET_NEXT(space_list, space)) {
-
- fil_node_t* node;
-
- if (fil_space_belongs_in_lru(space)) {
-
- continue;
- }
-
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- if (!node->open) {
- if (!fil_node_open_file(node, fil_system,
- space)) {
- /* This func is called during server's
- startup. If some file of log or system
- tablespace is missing, the server
- can't start successfully. So we should
- assert for it. */
- ut_a(0);
- }
- }
-
- if (fil_system->max_n_open < 10 + fil_system->n_open) {
-
- fprintf(stderr,
- "InnoDB: Warning: you must"
- " raise the value of"
- " innodb_open_files in\n"
- "InnoDB: my.cnf! Remember that"
- " InnoDB keeps all log files"
- " and all system\n"
- "InnoDB: tablespace files open"
- " for the whole time mysqld is"
- " running, and\n"
- "InnoDB: needs to open also"
- " some .ibd files if the"
- " file-per-table storage\n"
- "InnoDB: model is used."
- " Current open files %lu,"
- " max allowed"
- " open files %lu.\n",
- (ulong) fil_system->n_open,
- (ulong) fil_system->max_n_open);
- }
- }
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Closes all open files. There must not be any pending i/o's or not flushed
-modifications in the files. */
-UNIV_INTERN
-void
-fil_close_all_files(void)
-/*=====================*/
-{
- fil_space_t* space;
-
- // Must check both flags as it's possible for this to be called during
- // server startup with srv_track_changed_pages == true but
- // srv_redo_log_thread_started == false
- if (srv_track_changed_pages && srv_redo_log_thread_started)
- os_event_wait(srv_redo_log_tracked_event);
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space != NULL) {
- fil_node_t* node;
- fil_space_t* prev_space = space;
-
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- if (node->open) {
- fil_node_close_file(node, fil_system);
- }
- }
-
- space = UT_LIST_GET_NEXT(space_list, space);
-
- /* This is executed during shutdown. No other thread
- can create or remove tablespaces while we are not
- holding fil_system->mutex. */
- fil_space_free_and_mutex_exit(prev_space->id, false);
- mutex_enter(&fil_system->mutex);
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Closes the redo log files. There must not be any pending i/o's or not
-flushed modifications in the files. */
-UNIV_INTERN
-void
-fil_close_log_files(
-/*================*/
- bool free) /*!< in: whether to free the memory object */
-{
- fil_space_t* space;
-
- // Must check both flags as it's possible for this to be called during
- // server startup with srv_track_changed_pages == true but
- // srv_redo_log_thread_started == false
- if (srv_track_changed_pages && srv_redo_log_thread_started)
- os_event_wait(srv_redo_log_tracked_event);
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space != NULL) {
- fil_node_t* node;
- fil_space_t* prev_space = space;
-
- if (space->purpose != FIL_LOG) {
- space = UT_LIST_GET_NEXT(space_list, space);
- continue;
- }
-
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- if (node->open) {
- fil_node_close_file(node, fil_system);
- }
- }
-
- space = UT_LIST_GET_NEXT(space_list, space);
-
- if (free) {
- /* This is executed during startup. No other thread
- can create or remove tablespaces while we are not
- holding fil_system->mutex. */
- fil_space_free_and_mutex_exit(prev_space->id, false);
- mutex_enter(&fil_system->mutex);
- }
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Sets the max tablespace id counter if the given number is bigger than the
-previous value. */
-UNIV_INTERN
-void
-fil_set_max_space_id_if_bigger(
-/*===========================*/
- ulint max_id) /*!< in: maximum known id */
-{
- if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
- fprintf(stderr,
- "InnoDB: Fatal error: max tablespace id"
- " is too high, %lu\n", (ulong) max_id);
- ut_error;
- }
-
- mutex_enter(&fil_system->mutex);
-
- if (fil_system->max_assigned_id < max_id) {
-
- fil_system->max_assigned_id = max_id;
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/** Write the flushed LSN to the page header of the first page in the
-system tablespace.
-@param[in] lsn flushed LSN
-@return DB_SUCCESS or error number */
-dberr_t
-fil_write_flushed_lsn(
- lsn_t lsn)
-{
- byte* buf1;
- byte* buf;
- dberr_t err;
-
- buf1 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
- buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
-
- /* Acquire system tablespace */
- fil_space_t* space = fil_space_acquire(0);
-
- /* If tablespace is not encrypted, stamp flush_lsn to
- first page of all system tablespace datafiles to avoid
- unnecessary error messages on possible downgrade. */
- if (space->crypt_data->min_key_version == 0) {
- fil_node_t* node;
- ulint sum_of_sizes = 0;
-
- for (node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
-
- err = fil_read(TRUE, 0, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
-
- if (err == DB_SUCCESS) {
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- lsn);
-
- err = fil_write(TRUE, 0, 0, sum_of_sizes, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
-
- sum_of_sizes += node->size;
- }
- }
- } else {
- /* When system tablespace is encrypted stamp flush_lsn to
- only the first page of the first datafile (rest of pages
- are encrypted). */
- err = fil_read(TRUE, 0, 0, 0, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
-
- if (err == DB_SUCCESS) {
- mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- lsn);
-
- err = fil_write(TRUE, 0, 0, 0, 0,
- UNIV_PAGE_SIZE, buf, NULL, 0);
- }
- }
-
- fil_flush_file_spaces(FIL_TABLESPACE);
- fil_space_release(space);
-
- ut_free(buf1);
-
- return(err);
-}
-
-/** Check the consistency of the first data page of a tablespace
-at database startup.
-@param[in] page page frame
-@param[in] space_id tablespace identifier
-@param[in] flags tablespace flags
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
-static MY_ATTRIBUTE((warn_unused_result))
-const char*
-fil_check_first_page(const page_t* page, ulint space_id, ulint flags)
-{
- if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
- return(NULL);
- }
-
- if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
- fprintf(stderr,
- "InnoDB: Error: Current page size %lu != "
- " page size on page %lu\n",
- UNIV_PAGE_SIZE, fsp_flags_get_page_size(flags));
-
- return("innodb-page-size mismatch");
- }
-
- if (!space_id && !flags) {
- ulint nonzero_bytes = UNIV_PAGE_SIZE;
- const byte* b = page;
-
- while (!*b && --nonzero_bytes) {
- b++;
- }
-
- if (!nonzero_bytes) {
- return("space header page consists of zero bytes");
- }
- }
-
- if (buf_page_is_corrupted(
- false, page, fsp_flags_get_zip_size(flags), NULL)) {
- return("checksum mismatch");
- }
-
- if (page_get_space_id(page) == space_id
- && page_get_page_no(page) == 0) {
- return(NULL);
- }
-
- return("inconsistent data in space header");
-}
-
-/** Reads the flushed lsn, arch no, space_id and tablespace flag fields from
-the first page of a first data file at database startup.
-@param[in] data_file open data file
-@param[in] one_read_only true if first datafile is already
- read
-@param[out] flags FSP_SPACE_FLAGS
-@param[out] space_id tablepspace ID
-@param[out] flushed_lsn flushed lsn value
-@param[out] crypt_data encryption crypt data
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
-UNIV_INTERN
-const char*
-fil_read_first_page(
- pfs_os_file_t data_file,
- ibool one_read_already,
- ulint* flags,
- ulint* space_id,
- lsn_t* flushed_lsn,
- fil_space_crypt_t** crypt_data)
-{
- byte* buf;
- byte* page;
- const char* check_msg = NULL;
- fil_space_crypt_t* cdata;
-
- if (IS_XTRABACKUP() && srv_backup_mode) {
- /* Files smaller than page size may occur
- in xtrabackup, when server creates new file
- but has not yet written into it, or wrote only
- partially. Checks size here, to avoid exit in os_file_read.
- This file will be skipped by xtrabackup if it is too small.
- */
- os_offset_t file_size;
- file_size = os_file_get_size(data_file);
- if (file_size < FIL_IBD_FILE_INITIAL_SIZE*UNIV_PAGE_SIZE) {
- return "File size is less than minimum";
- }
- }
-
- buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
-
- /* Align the memory for a possible read from a raw device */
-
- page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
-
- os_file_read(data_file, page, 0, UNIV_PAGE_SIZE);
-
- srv_stats.page0_read.add(1);
-
- /* The FSP_HEADER on page 0 is only valid for the first file
- in a tablespace. So if this is not the first datafile, leave
- *flags and *space_id as they were read from the first file and
- do not validate the first page. */
- if (!one_read_already) {
- *space_id = fsp_header_get_space_id(page);
- *flags = fsp_header_get_flags(page);
-
- if (flushed_lsn) {
- *flushed_lsn = mach_read_from_8(page +
- FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- }
-
- if (!fsp_flags_is_valid(*flags)) {
- ulint cflags = fsp_flags_convert_from_101(*flags);
- if (cflags == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Invalid flags 0x%x in tablespace %u",
- unsigned(*flags), unsigned(*space_id));
- return "invalid tablespace flags";
- } else {
- *flags = cflags;
- }
- }
-
- if (!(IS_XTRABACKUP() && srv_backup_mode)) {
- check_msg = fil_check_first_page(page, *space_id, *flags);
- }
-
- /* Possible encryption crypt data is also stored only to first page
- of the first datafile. */
-
- const ulint offset = fsp_header_get_crypt_offset(
- fsp_flags_get_zip_size(*flags));
-
- cdata = fil_space_read_crypt_data(*space_id, page, offset);
-
- if (crypt_data) {
- *crypt_data = cdata;
- }
-
- /* If file space is encrypted we need to have at least some
- encryption service available where to get keys */
- if (cdata && cdata->should_encrypt()) {
-
- if (!encryption_key_id_exists(cdata->key_id)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace id " ULINTPF
- " is encrypted but encryption service"
- " or used key_id %u is not available. "
- "Can't continue opening tablespace.",
- *space_id, cdata->key_id);
-
- return ("table encrypted but encryption service not available.");
- }
- }
- }
-
- ut_free(buf);
-
- if (check_msg) {
- return(check_msg);
- }
-
- return(NULL);
-}
-
-/*================ SINGLE-TABLE TABLESPACES ==========================*/
-
-/********************************************************//**
-Creates the database directory for a table if it does not exist yet. */
-static
-void
-fil_create_directory_for_tablename(
-/*===============================*/
- const char* name) /*!< in: name in the standard
- 'databasename/tablename' format */
-{
- const char* namend;
- char* path;
- ulint len;
-
- len = strlen(fil_path_to_mysql_datadir);
- namend = strchr(name, '/');
- ut_a(namend);
- path = static_cast<char*>(mem_alloc(len + (namend - name) + 2));
-
- memcpy(path, fil_path_to_mysql_datadir, len);
- path[len] = '/';
- memcpy(path + len + 1, name, namend - name);
- path[len + (namend - name) + 1] = 0;
-
- srv_normalize_path_for_win(path);
-
- ut_a(os_file_create_directory(path, FALSE));
- mem_free(path);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Writes a log record about an .ibd file create/rename/delete. */
-static
-void
-fil_op_write_log(
-/*=============*/
- ulint type, /*!< in: MLOG_FILE_CREATE,
- MLOG_FILE_CREATE2,
- MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id, /*!< in: space id */
- ulint log_flags, /*!< in: redo log flags (stored
- in the page number field) */
- ulint flags, /*!< in: compressed page size
- and file format
- if type==MLOG_FILE_CREATE2, or 0 */
- const char* name, /*!< in: table name in the familiar
- 'databasename/tablename' format, or
- the file path in the case of
- MLOG_FILE_DELETE */
- const char* new_name, /*!< in: if type is MLOG_FILE_RENAME,
- the new table name in the
- 'databasename/tablename' format */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
- ulint len;
-
- log_ptr = mlog_open(mtr, 11 + 2 + 1);
- ut_ad(fsp_flags_is_valid(flags));
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_for_file_op(
- type, space_id, log_flags, log_ptr, mtr);
- if (type == MLOG_FILE_CREATE2) {
- mach_write_to_4(log_ptr, flags);
- log_ptr += 4;
- }
- /* Let us store the strings as null-terminated for easier readability
- and handling */
-
- len = strlen(name) + 1;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, (byte*) name, len);
-
- if (type == MLOG_FILE_RENAME) {
- len = strlen(new_name) + 1;
- log_ptr = mlog_open(mtr, 2 + len);
- ut_a(log_ptr);
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, (byte*) new_name, len);
- }
-}
-#endif
-
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
-the datadir that we should use in replaying the file operations.
-
-InnoDB recovery does not replay these fully since it always sets the space id
-to zero. But mysqlbackup does replay them. TODO: If remote tablespaces are
-used, mysqlbackup will only create tables in the default directory since
-MLOG_FILE_CREATE and MLOG_FILE_CREATE2 only know the tablename, not the path.
-
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
- byte* ptr, /*!< in: buffer containing the log record body,
- or an initial segment of it, if the record does
- not fir completely between ptr and end_ptr */
- byte* end_ptr, /*!< in: buffer end */
- ulint type, /*!< in: the type of this log record */
- ulint space_id, /*!< in: the space id of the tablespace in
- question, or 0 if the log record should
- only be parsed but not replayed */
- ulint log_flags) /*!< in: redo log flags
- (stored in the page number parameter) */
-{
- ulint name_len;
- ulint new_name_len;
- const char* name;
- const char* new_name = NULL;
- ulint flags = 0;
-
- if (type == MLOG_FILE_CREATE2) {
- if (end_ptr < ptr + 4) {
-
- return(NULL);
- }
-
- flags = mach_read_from_4(ptr);
- ptr += 4;
- }
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- name_len = mach_read_from_2(ptr);
-
- ptr += 2;
-
- if (end_ptr < ptr + name_len) {
-
- return(NULL);
- }
-
- name = (const char*) ptr;
-
- ptr += name_len;
-
- if (type == MLOG_FILE_RENAME) {
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- new_name_len = mach_read_from_2(ptr);
-
- ptr += 2;
-
- if (end_ptr < ptr + new_name_len) {
-
- return(NULL);
- }
-
- new_name = (const char*) ptr;
-
- ptr += new_name_len;
- }
-
- /* We managed to parse a full log record body */
- /*
- printf("Parsed log rec of type %lu space %lu\n"
- "name %s\n", type, space_id, name);
-
- if (type == MLOG_FILE_RENAME) {
- printf("new name %s\n", new_name);
- }
- */
- if (!space_id) {
- return(ptr);
- } else {
- /* Only replay file ops during recovery. This is a
- release-build assert to minimize any data loss risk by a
- misapplied file operation. */
- ut_a(recv_recovery_is_on());
- }
-
- /* Let us try to perform the file operation, if sensible. Note that
- mysqlbackup has at this stage already read in all space id info to the
- fil0fil.cc data structures.
-
- NOTE that our algorithm is not guaranteed to work correctly if there
- were renames of tables during the backup. See mysqlbackup code for more
- on the problem. */
-
- switch (type) {
- case MLOG_FILE_DELETE:
- if (fil_tablespace_exists_in_mem(space_id)) {
- dberr_t err = fil_delete_tablespace(
- space_id, BUF_REMOVE_FLUSH_NO_WRITE);
- ut_a(err == DB_SUCCESS);
- }
-
- break;
-
- case MLOG_FILE_RENAME:
- /* In order to replay the rename, the following must hold:
- * The new name is not already used.
- * A tablespace is open in memory with the old name.
- * The space ID for that tablepace matches this log entry.
- This will prevent unintended renames during recovery. */
-
- if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED
- && space_id == fil_get_space_id_for_table(name)) {
- /* Create the database directory for the new name, if
- it does not exist yet */
- fil_create_directory_for_tablename(new_name);
-
- if (!fil_rename_tablespace(name, space_id,
- new_name, NULL)) {
- ut_error;
- }
- }
-
- break;
-
- case MLOG_FILE_CREATE:
- case MLOG_FILE_CREATE2:
- if (fil_tablespace_exists_in_mem(space_id)) {
- /* Do nothing */
- } else if (fil_get_space_id_for_table(name)
- != ULINT_UNDEFINED) {
- /* Do nothing */
- } else if (log_flags & MLOG_FILE_FLAG_TEMP) {
- /* Temporary table, do nothing */
- } else {
- /* Create the database directory for name, if it does
- not exist yet */
- fil_create_directory_for_tablename(name);
-
- if (fil_create_new_single_table_tablespace(
- space_id, name, NULL, flags,
- DICT_TF2_USE_TABLESPACE,
- FIL_IBD_FILE_INITIAL_SIZE,
- FIL_ENCRYPTION_DEFAULT,
- FIL_DEFAULT_ENCRYPTION_KEY) != DB_SUCCESS) {
- ut_error;
- }
- }
-
- break;
-
- default:
- ut_error;
- }
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Allocates a file name for the EXPORT/IMPORT config file name. The
-string must be freed by caller with mem_free().
-@return own: file name */
-static
-char*
-fil_make_cfg_name(
-/*==============*/
- const char* filepath) /*!< in: .ibd file name */
-{
- char* cfg_name;
-
- /* Create a temporary file path by replacing the .ibd suffix
- with .cfg. */
-
- ut_ad(strlen(filepath) > 4);
-
- cfg_name = mem_strdup(filepath);
- ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg");
- return(cfg_name);
-}
-
-/*******************************************************************//**
-Check for change buffer merges.
-@return 0 if no merges else count + 1. */
-static
-ulint
-fil_ibuf_check_pending_ops(
-/*=======================*/
- fil_space_t* space, /*!< in/out: Tablespace to check */
- ulint count) /*!< in: number of attempts so far */
-{
- ut_ad(mutex_own(&fil_system->mutex));
-
- if (space != 0 && space->n_pending_ops != 0) {
-
- if (count > 5000) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Trying to close/delete tablespace "
- "'%s' but there are %lu pending change "
- "buffer merges on it.",
- space->name,
- (ulong) space->n_pending_ops);
- }
-
- return(count + 1);
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Check for pending IO.
-@return 0 if no pending else count + 1. */
-static
-ulint
-fil_check_pending_io(
-/*=================*/
- fil_space_t* space, /*!< in/out: Tablespace to check */
- fil_node_t** node, /*!< out: Node in space list */
- ulint count) /*!< in: number of attempts so far */
-{
- ut_ad(mutex_own(&fil_system->mutex));
- ut_a(space->n_pending_ops == 0);
-
- /* The following code must change when InnoDB supports
- multiple datafiles per tablespace. */
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
-
- *node = UT_LIST_GET_FIRST(space->chain);
-
- if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
-
- ut_a(!(*node)->being_extended);
-
- if (count > 1000) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Trying to close/delete tablespace '%s' "
- "but there are %lu flushes "
- " and %lu pending i/o's on it.",
- space->name,
- (ulong) space->n_pending_flushes,
- (ulong) (*node)->n_pending);
- }
-
- return(count + 1);
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Check pending operations on a tablespace.
-@return DB_SUCCESS or error failure. */
-static
-dberr_t
-fil_check_pending_operations(
-/*=========================*/
- ulint id, /*!< in: space id */
- fil_space_t** space, /*!< out: tablespace instance in memory */
- char** path) /*!< out/own: tablespace path */
-{
- ulint count = 0;
-
- ut_a(id != TRX_SYS_SPACE);
- ut_ad(space);
-
- *space = 0;
-
- mutex_enter(&fil_system->mutex);
- fil_space_t* sp = fil_space_get_by_id(id);
-
- if (sp) {
- sp->stop_new_ops = true;
- /* space could be freed by other threads as soon
- as n_pending_ops reaches 0, thus increment pending
- ops here. */
- sp->n_pending_ops++;
- }
-
- mutex_exit(&fil_system->mutex);
-
- /* Wait for crypt threads to stop accessing space */
- if (sp) {
- fil_space_crypt_close_tablespace(sp);
- /* We have "acquired" this space and must
- free it now as below we compare n_pending_ops. */
- fil_space_release(sp);
- }
-
- /* Check for pending change buffer merges. */
-
- do {
- mutex_enter(&fil_system->mutex);
-
- sp = fil_space_get_by_id(id);
-
- count = fil_ibuf_check_pending_ops(sp, count);
-
- mutex_exit(&fil_system->mutex);
-
- if (count > 0) {
- os_thread_sleep(20000);
- }
-
- } while (count > 0);
-
- /* Check for pending IO. */
-
- *path = 0;
-
- do {
- mutex_enter(&fil_system->mutex);
-
- sp = fil_space_get_by_id(id);
-
- if (sp == NULL) {
- mutex_exit(&fil_system->mutex);
- return(DB_TABLESPACE_NOT_FOUND);
- }
-
- fil_node_t* node;
-
- count = fil_check_pending_io(sp, &node, count);
-
- if (count == 0) {
- *path = mem_strdup(node->name);
- }
-
- mutex_exit(&fil_system->mutex);
-
- if (count > 0) {
- os_thread_sleep(20000);
- }
-
- } while (count > 0);
-
- ut_ad(sp);
-
- *space = sp;
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Closes a single-table tablespace. The tablespace must be cached in the
-memory cache. Free all pages used by the tablespace.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_close_tablespace(
-/*=================*/
- trx_t* trx, /*!< in/out: Transaction covering the close */
- ulint id) /*!< in: space id */
-{
- char* path = 0;
- fil_space_t* space = 0;
-
- ut_a(id != TRX_SYS_SPACE);
-
- dberr_t err = fil_check_pending_operations(id, &space, &path);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- ut_a(space);
- ut_a(path != 0);
-
- rw_lock_x_lock(&space->latch);
-
-#ifndef UNIV_HOTBACKUP
- /* Invalidate in the buffer pool all pages belonging to the
- tablespace. Since we have set space->stop_new_ops = TRUE, readahead
- or ibuf merge can no longer read more pages of this tablespace to the
- buffer pool. Thus we can clean the tablespace out of the buffer pool
- completely and permanently. The flag stop_new_ops also prevents
- fil_flush() from being applied to this tablespace. */
-
- buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
-#endif
- mutex_enter(&fil_system->mutex);
-
- /* If the free is successful, the X lock will be released before
- the space memory data structure is freed. */
-
- if (!fil_space_free_and_mutex_exit(id, TRUE)) {
- rw_lock_x_unlock(&space->latch);
- err = DB_TABLESPACE_NOT_FOUND;
- } else {
- err = DB_SUCCESS;
- }
-
- /* If it is a delete then also delete any generated files, otherwise
- when we drop the database the remove directory will fail. */
-
- char* cfg_name = fil_make_cfg_name(path);
-
- os_file_delete_if_exists(innodb_file_data_key, cfg_name);
-
- mem_free(path);
- mem_free(cfg_name);
-
- return(err);
-}
-
-/*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_delete_tablespace(
-/*==================*/
- ulint id, /*!< in: space id */
- buf_remove_t buf_remove) /*!< in: specify the action to take
- on the tables pages in the buffer
- pool */
-{
- char* path = 0;
- fil_space_t* space = 0;
-
- ut_a(id != TRX_SYS_SPACE);
-
- dberr_t err = fil_check_pending_operations(id, &space, &path);
-
- if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot delete tablespace %lu because it is not "
- "found in the tablespace memory cache.",
- (ulong) id);
-
- return(err);
- }
-
- ut_a(space);
- ut_a(path != 0);
-
- /* Important: We rely on the data dictionary mutex to ensure
- that a race is not possible here. It should serialize the tablespace
- drop/free. We acquire an X latch only to avoid a race condition
- when accessing the tablespace instance via:
-
- fsp_get_available_space_in_free_extents().
-
- There our main motivation is to reduce the contention on the
- dictionary mutex. */
-
- rw_lock_x_lock(&space->latch);
-
-#ifndef UNIV_HOTBACKUP
- /* IMPORTANT: Because we have set space::stop_new_ops there
- can't be any new ibuf merges, reads or flushes. We are here
- because node::n_pending was zero above. However, it is still
- possible to have pending read and write requests:
-
- A read request can happen because the reader thread has
- gone through the ::stop_new_ops check in buf_page_init_for_read()
- before the flag was set and has not yet incremented ::n_pending
- when we checked it above.
-
- A write request can be issued any time because we don't check
- the ::stop_new_ops flag when queueing a block for write.
-
- We deal with pending write requests in the following function
- where we'd minimally evict all dirty pages belonging to this
- space from the flush_list. Not that if a block is IO-fixed
- we'll wait for IO to complete.
-
- To deal with potential read requests by checking the
- ::stop_new_ops flag in fil_io() */
-
- buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
-
-#endif /* !UNIV_HOTBACKUP */
-
- /* If it is a delete then also delete any generated files, otherwise
- when we drop the database the remove directory will fail. */
- {
- char* cfg_name = fil_make_cfg_name(path);
- os_file_delete_if_exists(innodb_file_data_key, cfg_name);
- mem_free(cfg_name);
- }
-
- /* Delete the link file pointing to the ibd file we are deleting. */
- if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
- fil_delete_link_file(space->name);
- }
-
- mutex_enter(&fil_system->mutex);
-
- /* Double check the sanity of pending ops after reacquiring
- the fil_system::mutex. */
- if (fil_space_get_by_id(id)) {
- ut_a(space->n_pending_ops == 0);
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
- fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- ut_a(node->n_pending == 0);
- }
-
- if (!fil_space_free_and_mutex_exit(id, true)) {
- err = DB_TABLESPACE_NOT_FOUND;
- }
-
- if (err != DB_SUCCESS) {
- rw_lock_x_unlock(&space->latch);
- } else if (!os_file_delete(innodb_file_data_key, path)
- && !os_file_delete_if_exists(innodb_file_data_key, path)) {
-
- /* Note: This is because we have removed the
- tablespace instance from the cache. */
-
- err = DB_IO_ERROR;
- }
-
- if (err == DB_SUCCESS && !IS_XTRABACKUP()) {
-#ifndef UNIV_HOTBACKUP
- /* Write a log record about the deletion of the .ibd
- file, so that mysqlbackup can replay it in the
- --apply-log phase. We use a dummy mtr and the familiar
- log write mechanism. */
- mtr_t mtr;
-
- /* When replaying the operation in mysqlbackup, do not try
- to write any log record */
- mtr_start(&mtr);
-
- fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
- mtr_commit(&mtr);
-#endif
- err = DB_SUCCESS;
- }
-
- mem_free(path);
-
- return(err);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace is being deleted.
-@return TRUE if being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_is_being_deleted(
-/*============================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- ibool is_being_deleted;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space != NULL);
-
- is_being_deleted = space->stop_new_ops;
-
- mutex_exit(&fil_system->mutex);
-
- return(is_being_deleted);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-
- 1. We do not drop the table from the data dictionary;
-
- 2. We remove all insert buffer entries for the tablespace immediately;
- in DROP TABLE they are only removed gradually in the background;
-
- 3. Free all the pages in use by the tablespace.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_discard_tablespace(
-/*===================*/
- ulint id) /*!< in: space id */
-{
- dberr_t err;
-
- switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
- case DB_SUCCESS:
- break;
-
- case DB_IO_ERROR:
- ib_logf(IB_LOG_LEVEL_WARN,
- "While deleting tablespace %lu in DISCARD TABLESPACE."
- " File rename/delete failed: %s",
- (ulong) id, ut_strerr(err));
- break;
-
- case DB_TABLESPACE_NOT_FOUND:
- ib_logf(IB_LOG_LEVEL_WARN,
- "Cannot delete tablespace %lu in DISCARD "
- "TABLESPACE. %s",
- (ulong) id, ut_strerr(err));
- break;
-
- default:
- ut_error;
- }
-
- /* Remove all insert buffer entries for the tablespace */
-
- ibuf_delete_for_discarded_space(id);
-
- return(err);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Renames the memory cache structures of a single-table tablespace.
-@return TRUE if success */
-static
-ibool
-fil_rename_tablespace_in_mem(
-/*=========================*/
- fil_space_t* space, /*!< in: tablespace memory object */
- fil_node_t* node, /*!< in: file node of that tablespace */
- const char* new_name, /*!< in: new name */
- const char* new_path) /*!< in: new file path */
-{
- fil_space_t* space2;
- const char* old_name = space->name;
-
- ut_ad(mutex_own(&fil_system->mutex));
-
- space2 = fil_space_get_by_name(old_name);
- if (space != space2) {
- fputs("InnoDB: Error: cannot find ", stderr);
- ut_print_filename(stderr, old_name);
- fputs(" in tablespace memory cache\n", stderr);
-
- return(FALSE);
- }
-
- space2 = fil_space_get_by_name(new_name);
- if (space2 != NULL) {
- fputs("InnoDB: Error: ", stderr);
- ut_print_filename(stderr, new_name);
- fputs(" is already in tablespace memory cache\n", stderr);
-
- return(FALSE);
- }
-
- HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(space->name), space);
- mem_free(space->name);
- mem_free(node->name);
-
- space->name = mem_strdup(new_name);
- node->name = mem_strdup(new_path);
-
- HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
- ut_fold_string(new_name), space);
- return(TRUE);
-}
-
-/*******************************************************************//**
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free().
-@return own: file name */
-UNIV_INTERN
-char*
-fil_make_ibd_name(
-/*==============*/
- const char* name, /*!< in: table name or a dir path */
- bool is_full_path) /*!< in: TRUE if it is a dir path */
-{
- char* filename;
- ulint namelen = strlen(name);
- ulint dirlen = strlen(fil_path_to_mysql_datadir);
- ulint pathlen = dirlen + namelen + sizeof "/.ibd";
-
- filename = static_cast<char*>(mem_alloc(pathlen));
-
- if (is_full_path) {
- memcpy(filename, name, namelen);
- memcpy(filename + namelen, ".ibd", sizeof ".ibd");
- } else {
- ut_snprintf(filename, pathlen, "%s/%s.ibd",
- fil_path_to_mysql_datadir, name);
-
- }
-
- srv_normalize_path_for_win(filename);
-
- return(filename);
-}
-
-/*******************************************************************//**
-Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
-The string must be freed by caller with mem_free().
-@return own: file name */
-UNIV_INTERN
-char*
-fil_make_isl_name(
-/*==============*/
- const char* name) /*!< in: table name */
-{
- char* filename;
- ulint namelen = strlen(name);
- ulint dirlen = strlen(fil_path_to_mysql_datadir);
- ulint pathlen = dirlen + namelen + sizeof "/.isl";
-
- filename = static_cast<char*>(mem_alloc(pathlen));
-
- ut_snprintf(filename, pathlen, "%s/%s.isl",
- fil_path_to_mysql_datadir, name);
-
- srv_normalize_path_for_win(filename);
-
- return(filename);
-}
-
-/** Test if a tablespace file can be renamed to a new filepath by checking
-if that the old filepath exists and the new filepath does not exist.
-@param[in] space_id tablespace id
-@param[in] old_path old filepath
-@param[in] new_path new filepath
-@param[in] is_discarded whether the tablespace is discarded
-@return innodb error code */
-dberr_t
-fil_rename_tablespace_check(
- ulint space_id,
- const char* old_path,
- const char* new_path,
- bool is_discarded)
-{
- ulint exists = false;
- os_file_type_t ftype;
-
- if (!is_discarded
- && os_file_status(old_path, &exists, &ftype)
- && !exists) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot rename '%s' to '%s' for space ID %lu"
- " because the source file does not exist.",
- old_path, new_path, space_id);
-
- return(DB_TABLESPACE_NOT_FOUND);
- }
-
- exists = false;
- if (!os_file_status(new_path, &exists, &ftype) || exists) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot rename '%s' to '%s' for space ID %lu"
- " because the target file exists."
- " Remove the target file and try again.",
- old_path, new_path, space_id);
-
- return(DB_TABLESPACE_EXISTS);
- }
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_rename_tablespace(
-/*==================*/
- const char* old_name_in, /*!< in: old table name in the
- standard databasename/tablename
- format of InnoDB, or NULL if we
- do the rename based on the space
- id only */
- ulint id, /*!< in: space id */
- const char* new_name, /*!< in: new table name in the
- standard databasename/tablename
- format of InnoDB */
- const char* new_path_in) /*!< in: new full datafile path
- if the tablespace is remotely
- located, or NULL if it is located
- in the normal data directory. */
-{
- ibool success;
- fil_space_t* space;
- fil_node_t* node;
- ulint count = 0;
- char* new_path;
- char* old_name;
- char* old_path;
- const char* not_given = "(name not specified)";
-
- ut_a(id != 0);
-
-retry:
- count++;
-
- if (!(count % 1000)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: problems renaming ", stderr);
- ut_print_filename(stderr,
- old_name_in ? old_name_in : not_given);
- fputs(" to ", stderr);
- ut_print_filename(stderr, new_name);
- fprintf(stderr, ", %lu iterations\n", (ulong) count);
- }
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
-
- if (space == NULL) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot find space id %lu in the tablespace "
- "memory cache, though the table '%s' in a "
- "rename operation should have that id.",
- (ulong) id, old_name_in ? old_name_in : not_given);
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- if (count > 25000) {
- space->stop_ios = FALSE;
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
- }
-
- /* We temporarily close the .ibd file because we do not trust that
- operating systems can rename an open file. For the closing we have to
- wait until there are no pending i/o's or flushes on the file. */
-
- space->stop_ios = TRUE;
-
- /* The following code must change when InnoDB supports
- multiple datafiles per tablespace. */
- ut_a(UT_LIST_GET_LEN(space->chain) == 1);
- node = UT_LIST_GET_FIRST(space->chain);
-
- if (node->n_pending > 0
- || node->n_pending_flushes > 0
- || node->being_extended) {
- /* There are pending i/o's or flushes or the file is
- currently being extended, sleep for a while and
- retry */
-
- mutex_exit(&fil_system->mutex);
-
- os_thread_sleep(20000);
-
- goto retry;
-
- } else if (node->modification_counter > node->flush_counter) {
- /* Flush the space */
-
- mutex_exit(&fil_system->mutex);
-
- os_thread_sleep(20000);
-
- fil_flush(id);
-
- goto retry;
-
- } else if (node->open) {
- /* Close the file */
-
- fil_node_close_file(node, fil_system);
- }
-
- /* Check that the old name in the space is right */
-
- if (old_name_in) {
- old_name = mem_strdup(old_name_in);
- ut_a(strcmp(space->name, old_name) == 0);
- } else {
- old_name = mem_strdup(space->name);
- }
- old_path = mem_strdup(node->name);
-
- /* Rename the tablespace and the node in the memory cache */
- new_path = new_path_in ? mem_strdup(new_path_in)
- : fil_make_ibd_name(new_name, false);
-
- success = fil_rename_tablespace_in_mem(
- space, node, new_name, new_path);
-
- if (success) {
-
- DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
- goto skip_second_rename; );
-
- success = os_file_rename(
- innodb_file_data_key, old_path, new_path);
-
- DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
-skip_second_rename:
- success = FALSE; );
-
- if (!success) {
- /* We have to revert the changes we made
- to the tablespace memory cache */
-
- ut_a(fil_rename_tablespace_in_mem(
- space, node, old_name, old_path));
- }
- }
-
- space->stop_ios = FALSE;
-
- mutex_exit(&fil_system->mutex);
-
-#ifndef UNIV_HOTBACKUP
- if (success && !recv_recovery_on && !IS_XTRABACKUP()) {
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
- &mtr);
- mtr_commit(&mtr);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- mem_free(new_path);
- mem_free(old_path);
- mem_free(old_name);
-
- return(success);
-}
-
-/*******************************************************************//**
-Creates a new InnoDB Symbolic Link (ISL) file. It is always created
-under the 'datadir' of MySQL. The datadir is the directory of a
-running mysqld program. We can refer to it by simply using the path '.'.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_link_file(
-/*=================*/
- const char* tablename, /*!< in: tablename */
- const char* filepath) /*!< in: pathname of tablespace */
-{
- dberr_t err = DB_SUCCESS;
- char* link_filepath;
- char* prev_filepath = fil_read_link_file(tablename);
-
- ut_ad(!srv_read_only_mode);
-
- if (prev_filepath) {
- /* Truncate will call this with an existing
- link file which contains the same filepath. */
- if (0 == strcmp(prev_filepath, filepath)) {
- mem_free(prev_filepath);
- return(DB_SUCCESS);
- }
- mem_free(prev_filepath);
- }
-
- link_filepath = fil_make_isl_name(tablename);
-
- /** Check if the file already exists. */
- FILE* file = NULL;
- ibool exists;
- os_file_type_t ftype;
-
- bool success = os_file_status(link_filepath, &exists, &ftype);
-
- ulint error = 0;
- if (success && !exists) {
- file = fopen(link_filepath, "w");
- if (file == NULL) {
- /* This call will print its own error message */
- error = os_file_get_last_error(true);
- }
- } else {
- error = OS_FILE_ALREADY_EXISTS;
- }
- if (error != 0) {
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Cannot create file ", stderr);
- ut_print_filename(stderr, link_filepath);
- fputs(".\n", stderr);
-
- if (error == OS_FILE_ALREADY_EXISTS) {
- fputs("InnoDB: The link file: ", stderr);
- ut_print_filename(stderr, filepath);
- fputs(" already exists.\n", stderr);
- err = DB_TABLESPACE_EXISTS;
- } else if (error == OS_FILE_DISK_FULL) {
- err = DB_OUT_OF_FILE_SPACE;
- } else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
- err = DB_UNSUPPORTED;
- } else {
- err = DB_ERROR;
- }
-
- /* file is not open, no need to close it. */
- mem_free(link_filepath);
- return(err);
- }
-
- ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
- if (rbytes != strlen(filepath)) {
- os_file_get_last_error(true);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "cannot write link file "
- "%s",filepath);
- err = DB_ERROR;
- }
-
- /* Close the file, we only need it at startup */
- fclose(file);
-
- mem_free(link_filepath);
-
- return(err);
-}
-
-/*******************************************************************//**
-Deletes an InnoDB Symbolic Link (ISL) file. */
-UNIV_INTERN
-void
-fil_delete_link_file(
-/*=================*/
- const char* tablename) /*!< in: name of table */
-{
- char* link_filepath = fil_make_isl_name(tablename);
-
- os_file_delete_if_exists(innodb_file_data_key, link_filepath);
-
- mem_free(link_filepath);
-}
-
-/*******************************************************************//**
-Reads an InnoDB Symbolic Link (ISL) file.
-It is always created under the 'datadir' of MySQL. The name is of the
-form {databasename}/{tablename}. and the isl file is expected to be in a
-'{databasename}' directory called '{tablename}.isl'. The caller must free
-the memory of the null-terminated path returned if it is not null.
-@return own: filepath found in link file, NULL if not found. */
-UNIV_INTERN
-char*
-fil_read_link_file(
-/*===============*/
- const char* name) /*!< in: tablespace name */
-{
- char* filepath = NULL;
- char* link_filepath;
- FILE* file = NULL;
-
- /* The .isl file is in the 'normal' tablespace location. */
- link_filepath = fil_make_isl_name(name);
-
- file = fopen(link_filepath, "r+b");
-
- mem_free(link_filepath);
-
- if (file) {
- filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH));
-
- os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
- fclose(file);
-
- if (strlen(filepath)) {
- /* Trim whitespace from end of filepath */
- ulint lastch = strlen(filepath) - 1;
- while (lastch > 4 && filepath[lastch] <= 0x20) {
- filepath[lastch--] = 0x00;
- }
- srv_normalize_path_for_win(filepath);
- }
- }
-
- return(filepath);
-}
-
-/*******************************************************************//**
-Opens a handle to the file linked to in an InnoDB Symbolic Link file.
-@return TRUE if remote linked tablespace file is found and opened. */
-UNIV_INTERN
-ibool
-fil_open_linked_file(
-/*===============*/
- const char* tablename, /*!< in: database/tablename */
- char** remote_filepath,/*!< out: remote filepath */
- pfs_os_file_t* remote_file, /*!< out: remote file handle */
- ulint atomic_writes) /*!< in: atomic writes table option
- value */
-{
- ibool success;
-
- *remote_filepath = fil_read_link_file(tablename);
- if (*remote_filepath == NULL) {
- return(FALSE);
- }
-
- /* The filepath provided is different from what was
- found in the link file. */
- *remote_file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, *remote_filepath,
- OS_FILE_OPEN, OS_FILE_READ_ONLY,
- &success, atomic_writes);
-
- if (!success) {
- char* link_filepath = fil_make_isl_name(tablename);
-
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "A link file was found named '%s' "
- "but the linked tablespace '%s' "
- "could not be opened.",
- link_filepath, *remote_filepath);
-
- mem_free(link_filepath);
- mem_free(*remote_filepath);
- *remote_filepath = NULL;
- }
-
- return(success);
-}
-
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_new_single_table_tablespace(
-/*===================================*/
- ulint space_id, /*!< in: space id */
- const char* tablename, /*!< in: the table name in the usual
- databasename/tablename format
- of InnoDB */
- const char* dir_path, /*!< in: NULL or a dir path */
- ulint flags, /*!< in: tablespace flags */
- ulint flags2, /*!< in: table flags2 */
- ulint size, /*!< in: the initial size of the
- tablespace file in pages,
- must be >= FIL_IBD_FILE_INITIAL_SIZE */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
-{
- pfs_os_file_t file;
-
- ibool ret;
- dberr_t err;
- byte* buf2;
- byte* page;
- char* path;
- ibool success;
- /* TRUE if a table is created with CREATE TEMPORARY TABLE */
- bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
-
-
- /* For XtraBackup recovery we force remote tablespaces to be local,
- i.e. never execute the code path corresponding to has_data_dir == true.
- We don't create .isl files either, because we rely on innobackupex to
- copy them under a global lock, and use them to copy remote tablespaces
- to their proper locations on --copy-back.
-
- See also MySQL bug #72022: dir_path is always NULL for remote
- tablespaces when a MLOG_FILE_CREATE* log record is replayed (the remote
- directory is not available from MLOG_FILE_CREATE*). */
- bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags) != 0 && !IS_XTRABACKUP();
- ulint atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
- fil_space_crypt_t *crypt_data = NULL;
-
- ut_a(space_id > 0);
- ut_ad(!srv_read_only_mode);
- ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
- ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
- ut_a(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK));
-
- if (is_temp) {
- /* Temporary table filepath */
- ut_ad(dir_path);
- path = fil_make_ibd_name(dir_path, true);
- } else if (has_data_dir) {
- ut_ad(dir_path);
- path = os_file_make_remote_pathname(dir_path, tablename, "ibd");
-
- /* Since this tablespace file will be created in a
- remote directory, let's create the subdirectories
- in the path, if they are not there already. */
- success = os_file_create_subdirs_if_needed(path);
- if (!success) {
- err = DB_ERROR;
- goto error_exit_3;
- }
- } else {
- path = fil_make_ibd_name(tablename, false);
- }
-
- file = os_file_create(
- innodb_file_data_key, path,
- OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
- OS_FILE_NORMAL,
- OS_DATA_FILE,
- &ret,
- atomic_writes);
-
- if (ret == FALSE) {
- /* The following call will print an error message */
- ulint error = os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create file '%s'\n", path);
-
- if (error == OS_FILE_ALREADY_EXISTS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The file '%s' already exists though the "
- "corresponding table did not exist "
- "in the InnoDB data dictionary. "
- "Have you moved InnoDB .ibd files "
- "around without using the SQL commands "
- "DISCARD TABLESPACE and IMPORT TABLESPACE, "
- "or did mysqld crash in the middle of "
- "CREATE TABLE? "
- "You can resolve the problem by removing "
- "the file '%s' under the 'datadir' of MySQL.",
- path, path);
-
- err = DB_TABLESPACE_EXISTS;
- goto error_exit_3;
- }
-
- if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
- err = DB_UNSUPPORTED;
- goto error_exit_3;
- }
-
- if (error == OS_FILE_DISK_FULL) {
- err = DB_OUT_OF_FILE_SPACE;
- goto error_exit_3;
- }
-
- err = DB_ERROR;
- goto error_exit_3;
- }
-
- {
- /* fil_read_first_page() expects UNIV_PAGE_SIZE bytes.
- fil_node_open_file() expects at least 4 * UNIV_PAGE_SIZE bytes.
- Do not create too short ROW_FORMAT=COMPRESSED files. */
- const ulint zip_size = fsp_flags_get_zip_size(flags);
- const ulint page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
- const os_offset_t fsize = std::max(
- os_offset_t(size) * page_size,
- os_offset_t(FIL_IBD_FILE_INITIAL_SIZE
- * UNIV_PAGE_SIZE));
- /* ROW_FORMAT=COMPRESSED files never use page_compression
- (are never sparse). */
- ut_ad(!zip_size || !FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));
-
- ret = os_file_set_size(path, file, fsize,
- FSP_FLAGS_HAS_PAGE_COMPRESSION(flags));
- }
-
- if (!ret) {
- err = DB_OUT_OF_FILE_SPACE;
- goto error_exit_2;
- }
-
- /* printf("Creating tablespace %s id %lu\n", path, space_id); */
-
- /* We have to write the space id to the file immediately and flush the
- file to disk. This is because in crash recovery we must be aware what
- tablespaces exist and what are their space id's, so that we can apply
- the log records to the right file. It may take quite a while until
- buffer pool flush algorithms write anything to the file and flush it to
- disk. If we would not write here anything, the file would be filled
- with zeros from the call of os_file_set_size(), until a buffer pool
- flush would write to it. */
-
- buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
- /* Align the memory for file i/o if we might have O_DIRECT set */
- page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
-
- memset(page, '\0', UNIV_PAGE_SIZE);
-
- flags |= FSP_FLAGS_PAGE_SSIZE();
- fsp_header_init_fields(page, space_id, flags);
- mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
-
- if (const ulint zip_size = fsp_flags_get_zip_size(flags)) {
- page_zip_des_t page_zip;
-
- page_zip_set_size(&page_zip, zip_size);
- page_zip.data = page + UNIV_PAGE_SIZE;
-#ifdef UNIV_DEBUG
- page_zip.m_start =
-#endif /* UNIV_DEBUG */
- page_zip.m_end = page_zip.m_nonempty =
- page_zip.n_blobs = 0;
- buf_flush_init_for_writing(page, &page_zip, 0);
- ret = os_file_write(path, file, page_zip.data, 0, zip_size);
- } else {
- buf_flush_init_for_writing(page, NULL, 0);
- ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
- }
-
- ut_free(buf2);
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Could not write the first page to tablespace "
- "'%s'", path);
-
- err = DB_ERROR;
- goto error_exit_2;
- }
-
- ret = os_file_flush(file);
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "File flush of tablespace '%s' failed", path);
- err = DB_ERROR;
- goto error_exit_2;
- }
-
- if (has_data_dir) {
- /* Now that the IBD file is created, make the ISL file. */
- err = fil_create_link_file(tablename, path);
- if (err != DB_SUCCESS) {
- goto error_exit_2;
- }
- }
-
- /* Create crypt data if the tablespace is either encrypted or user has
- requested it to remain unencrypted. */
- if (mode == FIL_ENCRYPTION_ON || mode == FIL_ENCRYPTION_OFF ||
- srv_encrypt_tables) {
- crypt_data = fil_space_create_crypt_data(mode, key_id);
- }
-
- success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE,
- crypt_data, true, mode);
-
- if (!success || !fil_node_create(path, size, space_id, FALSE)) {
- err = DB_ERROR;
- goto error_exit_1;
- }
-
-#ifndef UNIV_HOTBACKUP
- if (!IS_XTRABACKUP())
- {
- mtr_t mtr;
- ulint mlog_file_flag = 0;
-
- if (is_temp) {
- mlog_file_flag |= MLOG_FILE_FLAG_TEMP;
- }
-
- mtr_start(&mtr);
-
- fil_op_write_log(flags
- ? MLOG_FILE_CREATE2
- : MLOG_FILE_CREATE,
- space_id, mlog_file_flag,
- flags & ~FSP_FLAGS_MEM_MASK,
- tablename, NULL, &mtr);
-
- mtr_commit(&mtr);
- }
-#endif
- err = DB_SUCCESS;
-
- /* Error code is set. Cleanup the various variables used.
- These labels reflect the order in which variables are assigned or
- actions are done. */
-error_exit_1:
- if (has_data_dir && err != DB_SUCCESS) {
- fil_delete_link_file(tablename);
- }
-error_exit_2:
- os_file_close(file);
- if (err != DB_SUCCESS) {
- os_file_delete(innodb_file_data_key, path);
- }
-error_exit_3:
- mem_free(path);
-
- return(err);
-}
-
-#include "pars0pars.h"
-#include "que0que.h"
-#include "dict0priv.h"
-static
-void
-fil_remove_invalid_table_from_data_dict(const char *name)
-{
- trx_t* trx;
- pars_info_t* info = NULL;
-
- trx = trx_allocate_for_mysql();
- trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- trx->op_info = "removing invalid table from data dictionary";
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "table_name", name);
-
- que_eval_sql(info,
- "PROCEDURE DROP_TABLE_PROC () IS\n"
- "sys_foreign_id CHAR;\n"
- "table_id CHAR;\n"
- "index_id CHAR;\n"
- "foreign_id CHAR;\n"
- "found INT;\n"
-
- "DECLARE CURSOR cur_fk IS\n"
- "SELECT ID FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME = :table_name\n"
- "AND TO_BINARY(FOR_NAME)\n"
- " = TO_BINARY(:table_name)\n"
- "LOCK IN SHARE MODE;\n"
-
- "DECLARE CURSOR cur_idx IS\n"
- "SELECT ID FROM SYS_INDEXES\n"
- "WHERE TABLE_ID = table_id\n"
- "LOCK IN SHARE MODE;\n"
-
- "BEGIN\n"
- "SELECT ID INTO table_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " RETURN;\n"
- "END IF;\n"
- "found := 1;\n"
- "SELECT ID INTO sys_foreign_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = 'SYS_FOREIGN'\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "IF (:table_name = 'SYS_FOREIGN') THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "IF (:table_name = 'SYS_FOREIGN_COLS') THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "OPEN cur_fk;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur_fk INTO foreign_id;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur_fk;\n"
- "found := 1;\n"
- "OPEN cur_idx;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur_idx INTO index_id;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FIELDS\n"
- " WHERE INDEX_ID = index_id;\n"
- " DELETE FROM SYS_INDEXES\n"
- " WHERE ID = index_id\n"
- " AND TABLE_ID = table_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur_idx;\n"
- "DELETE FROM SYS_COLUMNS\n"
- "WHERE TABLE_ID = table_id;\n"
- "DELETE FROM SYS_TABLES\n"
- "WHERE NAME = :table_name;\n"
- "END;\n"
- , FALSE, trx);
-
- /* SYS_DATAFILES and SYS_TABLESPACES do not necessarily exist
- on XtraBackup recovery. See comments around
- dict_create_or_check_foreign_constraint_tables() in
- innobase_start_or_create_for_mysql(). */
- if (dict_table_get_low("SYS_DATAFILES") != NULL) {
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "table_name", name);
-
- que_eval_sql(info,
- "PROCEDURE DROP_TABLE_PROC () IS\n"
- "space_id INT;\n"
-
- "BEGIN\n"
- "SELECT SPACE INTO space_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " RETURN;\n"
- "END IF;\n"
- "DELETE FROM SYS_TABLESPACES\n"
- "WHERE SPACE = space_id;\n"
- "DELETE FROM SYS_DATAFILES\n"
- "WHERE SPACE = space_id;\n"
- "END;\n"
- , FALSE, trx);
- }
-
- trx_commit_for_mysql(trx);
-
- trx_free_for_mysql(trx);
-}
-
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Report information about a bad tablespace. */
-static
-void
-fil_report_bad_tablespace(
-/*======================*/
- const char* filepath, /*!< in: filepath */
- const char* check_msg, /*!< in: fil_check_first_page() */
- ulint found_id, /*!< in: found space ID */
- ulint found_flags, /*!< in: found flags */
- ulint expected_id, /*!< in: expected space id */
- ulint expected_flags) /*!< in: expected flags */
-{
- if (check_msg) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error %s in file '%s',"
- "tablespace id=%lu, flags=%lu. "
- "Please refer to "
- REFMAN "innodb-troubleshooting-datadict.html "
- "for how to resolve the issue.",
- check_msg, filepath,
- (ulong) expected_id, (ulong) expected_flags);
- return;
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "In file '%s', tablespace id and flags are %lu and %lu, "
- "but in the InnoDB data dictionary they are %lu and %lu. "
- "Have you moved InnoDB .ibd files around without using the "
- "commands DISCARD TABLESPACE and IMPORT TABLESPACE? "
- "Please refer to "
- REFMAN "innodb-troubleshooting-datadict.html "
- "for how to resolve the issue.",
- filepath, (ulong) found_id, (ulong) found_flags,
- (ulong) expected_id, (ulong) expected_flags);
-}
-
-/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
-(Typically when upgrading from MariaDB 10.1.0..10.1.20.)
-@param[in] space_id tablespace ID
-@param[in] flags desired tablespace flags */
-UNIV_INTERN
-void
-fsp_flags_try_adjust(ulint space_id, ulint flags)
-{
- ut_ad(!srv_read_only_mode);
- ut_ad(fsp_flags_is_valid(flags));
-
- mtr_t mtr;
- mtr_start(&mtr);
- if (buf_block_t* b = buf_page_get(
- space_id, fsp_flags_get_zip_size(flags), 0, RW_X_LATCH,
- &mtr)) {
- ulint f = fsp_header_get_flags(b->frame);
- /* Suppress the message if only the DATA_DIR flag to differs. */
- if ((f ^ flags) & ~(1U << FSP_FLAGS_POS_RESERVED)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "adjusting FSP_SPACE_FLAGS of tablespace "
- ULINTPF " from 0x%x to 0x%x",
- space_id, int(f), int(flags));
- }
- if (f != flags) {
- mlog_write_ulint(FSP_HEADER_OFFSET
- + FSP_SPACE_FLAGS + b->frame,
- flags, MLOG_4BYTES, &mtr);
- }
- }
-
- mtr_commit(&mtr);
-}
-
-/********************************************************************//**
-Tries to open a single-table tablespace and optionally checks that the
-space id in it is correct. If this does not succeed, print an error message
-to the .err log. This function is used to open a tablespace when we start
-mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
-
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it.
-
-If the validate boolean is set, we read the first page of the file and
-check that the space id in the file is what we expect. We assume that
-this function runs much faster if no check is made, since accessing the
-file inode probably is much faster (the OS caches them) than accessing
-the first page of the file. This boolean may be initially FALSE, but if
-a remote tablespace is found it will be changed to true.
-
-If the fix_dict boolean is set, then it is safe to use an internal SQL
-statement to update the dictionary tables if they are incorrect.
-
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_open_single_table_tablespace(
-/*=============================*/
- bool validate, /*!< in: Do we validate tablespace? */
- bool fix_dict, /*!< in: Can we fix the dictionary? */
- ulint id, /*!< in: space id */
- ulint flags, /*!< in: expected FSP_SPACE_FLAGS */
- const char* tablename, /*!< in: table name in the
- databasename/tablename format */
- const char* path_in) /*!< in: table */
-{
- dberr_t err = DB_SUCCESS;
- bool dict_filepath_same_as_default = false;
- bool link_file_found = false;
- bool link_file_is_bad = false;
- fsp_open_info def;
- fsp_open_info dict;
- fsp_open_info remote;
- ulint tablespaces_found = 0;
- ulint valid_tablespaces_found = 0;
- ulint atomic_writes = 0;
- fil_space_crypt_t* crypt_data = NULL;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
-
- /* Table flags can be ULINT_UNDEFINED if
- dict_tf_to_fsp_flags_failure is set. */
- if (flags == ULINT_UNDEFINED) {
- return(DB_CORRUPTION);
- }
-
- ut_ad(fsp_flags_is_valid(flags & ~FSP_FLAGS_MEM_MASK));
- atomic_writes = fsp_flags_get_atomic_writes(flags);
-
- memset(&def, 0, sizeof(def));
- memset(&dict, 0, sizeof(dict));
- memset(&remote, 0, sizeof(remote));
-
- /* Discover the correct filepath. We will always look for an ibd
- in the default location. If it is remote, it should not be here. */
- def.filepath = fil_make_ibd_name(tablename, false);
-
- /* The path_in was read from SYS_DATAFILES.
- We skip SYS_DATAFILES validation and remote tablespaces discovery for
- XtraBackup, as all tablespaces are local for XtraBackup recovery. */
- if (path_in && !IS_XTRABACKUP()) {
- if (strcmp(def.filepath, path_in)) {
- dict.filepath = mem_strdup(path_in);
- /* possibility of multiple files. */
- validate = true;
- } else {
- dict_filepath_same_as_default = true;
- }
- }
-
- link_file_found = fil_open_linked_file(
- tablename, &remote.filepath, &remote.file, atomic_writes);
- remote.success = link_file_found;
- if (remote.success) {
- /* possibility of multiple files. */
- validate = true;
- tablespaces_found++;
-
- /* A link file was found. MySQL does not allow a DATA
- DIRECTORY to be be the same as the default filepath. */
- ut_a(strcmp(def.filepath, remote.filepath));
-
- /* If there was a filepath found in SYS_DATAFILES,
- we hope it was the same as this remote.filepath found
- in the ISL file. */
- if (dict.filepath
- && (0 == strcmp(dict.filepath, remote.filepath))) {
- remote.success = FALSE;
- os_file_close(remote.file);
- mem_free(remote.filepath);
- remote.filepath = NULL;
- tablespaces_found--;
- }
- }
-
- /* Attempt to open the tablespace at other possible filepaths. */
- if (dict.filepath) {
- dict.file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &dict.success, atomic_writes);
- if (dict.success) {
- /* possibility of multiple files. */
- validate = true;
- tablespaces_found++;
- }
- }
-
- /* Always look for a file at the default location. */
- ut_a(def.filepath);
- def.file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, def.filepath, OS_FILE_OPEN,
- OS_FILE_READ_ONLY, &def.success, atomic_writes);
-
- if (def.success) {
- tablespaces_found++;
- }
-
- /* We have now checked all possible tablespace locations and
- have a count of how many we found. If things are normal, we
- only found 1. */
- if (!validate && tablespaces_found == 1) {
- goto skip_validate;
- }
-
- /* Read the first page of the datadir tablespace, if found. */
- if (def.success) {
- def.check_msg = fil_read_first_page(
- def.file, false, &def.flags, &def.id,
- NULL, &def.crypt_data);
-
- def.valid = !def.check_msg && def.id == id
- && fsp_flags_match(flags, def.flags);
-
- if (def.valid) {
- valid_tablespaces_found++;
- } else {
- /* Do not use this tablespace. */
- fil_report_bad_tablespace(
- def.filepath, def.check_msg, def.id,
- def.flags, id, flags);
- }
- }
-
- /* Read the first page of the remote tablespace */
- if (remote.success) {
- remote.check_msg = fil_read_first_page(
- remote.file, false, &remote.flags, &remote.id,
- NULL, &remote.crypt_data);
-
- /* Validate this single-table-tablespace with SYS_TABLES. */
- remote.valid = !remote.check_msg && remote.id == id
- && fsp_flags_match(flags, remote.flags);
-
- if (remote.valid) {
- valid_tablespaces_found++;
- } else {
- /* Do not use this linked tablespace. */
- fil_report_bad_tablespace(
- remote.filepath, remote.check_msg, remote.id,
- remote.flags, id, flags);
- link_file_is_bad = true;
- }
- }
-
- /* Read the first page of the datadir tablespace, if found. */
- if (dict.success) {
- dict.check_msg = fil_read_first_page(
- dict.file, false, &dict.flags, &dict.id,
- NULL, &dict.crypt_data);
-
- /* Validate this single-table-tablespace with SYS_TABLES. */
- dict.valid = !dict.check_msg && dict.id == id
- && fsp_flags_match(flags, dict.flags);
-
- if (dict.valid) {
- valid_tablespaces_found++;
- } else {
- /* Do not use this tablespace. */
- fil_report_bad_tablespace(
- dict.filepath, dict.check_msg, dict.id,
- dict.flags, id, flags);
- }
- }
-
- /* Make sense of these three possible locations.
- First, bail out if no tablespace files were found. */
- if (valid_tablespaces_found == 0) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib_logf(IS_XTRABACKUP() ? IB_LOG_LEVEL_WARN : IB_LOG_LEVEL_ERROR,
- "Could not find a valid tablespace file for '%s'. "
- "See " REFMAN "innodb-troubleshooting-datadict.html "
- "for how to resolve the issue.",
- tablename);
-
- if (IS_XTRABACKUP() && fix_dict) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "It will be removed from the data dictionary.");
-
- if (purge_sys) {
- fil_remove_invalid_table_from_data_dict(tablename);
- }
- }
-
- err = DB_CORRUPTION;
-
- goto cleanup_and_exit;
- }
-
- /* Do not open any tablespaces if more than one tablespace with
- the correct space ID and flags were found. */
- if (tablespaces_found > 1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "A tablespace for %s has been found in "
- "multiple places;", tablename);
-
- if (def.success) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Default location; %s"
- ", Space ID=" ULINTPF " , Flags=" ULINTPF " .",
- def.filepath,
- def.id,
- def.flags);
- }
-
- if (remote.success) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Remote location; %s"
- ", Space ID=" ULINTPF " , Flags=" ULINTPF " .",
- remote.filepath,
- remote.id,
- remote.flags);
- }
-
- if (dict.success) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Dictionary location; %s"
- ", Space ID=" ULINTPF " , Flags=" ULINTPF " .",
- dict.filepath,
- dict.id,
- dict.flags);
- }
-
- /* Force-recovery will allow some tablespaces to be
- skipped by REDO if there was more than one file found.
- Unlike during the REDO phase of recovery, we now know
- if the tablespace is valid according to the dictionary,
- which was not available then. So if we did not force
- recovery and there is only one good tablespace, ignore
- any bad tablespaces. */
- if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Will not open the tablespace for '%s'",
- tablename);
-
- if (def.success != def.valid
- || dict.success != dict.valid
- || remote.success != remote.valid) {
- err = DB_CORRUPTION;
- } else {
- err = DB_ERROR;
- }
- goto cleanup_and_exit;
- }
-
- /* There is only one valid tablespace found and we did
- not use srv_force_recovery during REDO. Use this one
- tablespace and clean up invalid tablespace pointers */
- if (def.success && !def.valid) {
- def.success = false;
- os_file_close(def.file);
- tablespaces_found--;
- }
-
- if (dict.success && !dict.valid) {
- dict.success = false;
- os_file_close(dict.file);
- /* Leave dict.filepath so that SYS_DATAFILES
- can be corrected below. */
- tablespaces_found--;
- }
- if (remote.success && !remote.valid) {
- remote.success = false;
- os_file_close(remote.file);
- mem_free(remote.filepath);
- remote.filepath = NULL;
- tablespaces_found--;
- }
- }
-
- /* At this point, there should be only one filepath. */
- ut_a(tablespaces_found == 1);
- ut_a(valid_tablespaces_found == 1);
-
- /* Only fix the dictionary at startup when there is only one thread.
- Calls to dict_load_table() can be done while holding other latches. */
- if (!fix_dict) {
- goto skip_validate;
- }
-
- /* We may need to change what is stored in SYS_DATAFILES or
- SYS_TABLESPACES or adjust the link file.
- Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does
- not prevent opening and using the single_table_tablespace either
- this time or the next, we do not check the return code or fail
- to open the tablespace. But dict_update_filepath() will issue a
- warning to the log. */
- if (dict.filepath) {
- if (remote.success) {
- dict_update_filepath(id, remote.filepath);
- } else if (def.success) {
- dict_update_filepath(id, def.filepath);
- if (link_file_is_bad) {
- fil_delete_link_file(tablename);
- }
- } else if (!link_file_found || link_file_is_bad) {
- ut_ad(dict.success);
- /* Fix the link file if we got our filepath
- from the dictionary but a link file did not
- exist or it did not point to a valid file. */
- fil_delete_link_file(tablename);
- fil_create_link_file(tablename, dict.filepath);
- }
-
- } else if (remote.success && dict_filepath_same_as_default) {
- dict_update_filepath(id, remote.filepath);
-
- } else if (remote.success && path_in == NULL) {
- /* SYS_DATAFILES record for this space ID was not found. */
- dict_insert_tablespace_and_filepath(
- id, tablename, remote.filepath, flags);
- }
-
-skip_validate:
- if (remote.success)
- crypt_data = remote.crypt_data;
- else if (dict.success)
- crypt_data = dict.crypt_data;
- else if (def.success)
- crypt_data = def.crypt_data;
-
- if (err != DB_SUCCESS) {
- ; // Don't load the tablespace into the cache
- } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE,
- crypt_data, false)) {
- err = DB_ERROR;
- } else {
- /* We do not measure the size of the file, that is why
- we pass the 0 below */
-
- if (!fil_node_create(remote.success ? remote.filepath :
- dict.success ? dict.filepath :
- def.filepath, 0, id, FALSE)) {
- err = DB_ERROR;
- }
- }
-
-cleanup_and_exit:
- if (remote.success) {
- os_file_close(remote.file);
- }
- if (remote.filepath) {
- mem_free(remote.filepath);
- }
- if (remote.crypt_data && remote.crypt_data != crypt_data) {
- if (err == DB_SUCCESS) {
- fil_space_destroy_crypt_data(&remote.crypt_data);
- }
- }
- if (dict.success) {
- os_file_close(dict.file);
- }
- if (dict.filepath) {
- mem_free(dict.filepath);
- }
- if (dict.crypt_data && dict.crypt_data != crypt_data) {
- fil_space_destroy_crypt_data(&dict.crypt_data);
- }
- if (def.success) {
- os_file_close(def.file);
- }
- if (def.crypt_data && def.crypt_data != crypt_data) {
- if (err == DB_SUCCESS) {
- fil_space_destroy_crypt_data(&def.crypt_data);
- }
- }
-
- mem_free(def.filepath);
-
- /* We need to check fsp flags when no errors has happened and
- server was not started on read only mode and tablespace validation
- was requested or flags contain other table options except
- low order bits to FSP_FLAGS_POS_PAGE_SSIZE position.
- Note that flag comparison is pessimistic. Adjust is required
- only when flags contain buggy MariaDB 10.1.0 -
- MariaDB 10.1.20 flags. */
- if (err == DB_SUCCESS
- && !srv_read_only_mode
- && (validate
- || flags >= (1U << FSP_FLAGS_POS_PAGE_SSIZE))) {
- fsp_flags_try_adjust(id, flags & ~FSP_FLAGS_MEM_MASK);
- }
-
- return(err);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Allocates a file name for an old version of a single-table tablespace.
-The string must be freed by caller with mem_free()!
-@return own: file name */
-static
-char*
-fil_make_ibbackup_old_name(
-/*=======================*/
- const char* name) /*!< in: original file name */
-{
- static const char suffix[] = "_ibbackup_old_vers_";
- char* path;
- ulint len = strlen(name);
-
- path = static_cast<char*>(mem_alloc(len + (15 + sizeof suffix)));
-
- memcpy(path, name, len);
- memcpy(path + len, suffix, (sizeof suffix) - 1);
- ut_sprintf_timestamp_without_extra_chars(
- path + len + ((sizeof suffix) - 1));
- return(path);
-}
-#endif /* UNIV_HOTBACKUP */
-
-
-/*******************************************************************//**
-Determine the space id of the given file descriptor by reading a few
-pages from the beginning of the .ibd file.
-@return true if space id was successfully identified, or false. */
-static
-bool
-fil_user_tablespace_find_space_id(
-/*==============================*/
- fsp_open_info* fsp) /* in/out: contains file descriptor, which is
- used as input. contains space_id, which is
- the output */
-{
- bool st;
- os_offset_t file_size;
-
- file_size = os_file_get_size(fsp->file);
-
- if (file_size == (os_offset_t) -1) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s",
- fsp->filepath);
- return(false);
- }
-
- /* Assuming a page size, read the space_id from each page and store it
- in a map. Find out which space_id is agreed on by majority of the
- pages. Choose that space_id. */
- for (ulint page_size = UNIV_ZIP_SIZE_MIN;
- page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) {
-
- /* map[space_id] = count of pages */
- std::map<ulint, ulint> verify;
-
- ulint page_count = 64;
- ulint valid_pages = 0;
-
- /* Adjust the number of pages to analyze based on file size */
- while ((page_count * page_size) > file_size) {
- --page_count;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:"
- "%lu", page_size, page_count);
-
- byte* buf = static_cast<byte*>(ut_malloc(2*page_size));
- byte* page = static_cast<byte*>(ut_align(buf, page_size));
-
- for (ulint j = 0; j < page_count; ++j) {
-
- st = os_file_read(fsp->file, page, (j* page_size), page_size);
-
- if (!st) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "READ FAIL: page_no:%lu", j);
- continue;
- }
-
- bool uncompressed_ok = false;
-
- /* For uncompressed pages, the page size must be equal
- to UNIV_PAGE_SIZE. */
- if (page_size == UNIV_PAGE_SIZE) {
- uncompressed_ok = !buf_page_is_corrupted(
- false, page, 0, NULL);
- }
-
- bool compressed_ok = false;
- if (page_size <= UNIV_PAGE_SIZE_DEF) {
- compressed_ok = !buf_page_is_corrupted(
- false, page, page_size, NULL);
- }
-
- if (uncompressed_ok || compressed_ok) {
-
- ulint space_id = mach_read_from_4(page
- + FIL_PAGE_SPACE_ID);
-
- if (space_id > 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "VALID: space:%lu "
- "page_no:%lu page_size:%lu",
- space_id, j, page_size);
- verify[space_id]++;
- ++valid_pages;
- }
- }
- }
-
- ut_free(buf);
-
- ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id "
- "count:%lu", page_size, (ulint) verify.size());
-
- const ulint pages_corrupted = 3;
- for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
-
- for (std::map<ulint, ulint>::iterator
- m = verify.begin(); m != verify.end(); ++m ) {
-
- ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, "
- "Number of pages matched: %lu/%lu "
- "(%lu)", m->first, m->second,
- valid_pages, page_size);
-
- if (m->second == (valid_pages - missed)) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Chosen space:%lu\n", m->first);
-
- fsp->id = m->first;
- return(true);
- }
- }
-
- }
- }
-
- return(false);
-}
-
-/*******************************************************************//**
-Finds the given page_no of the given space id from the double write buffer,
-and copies it to the corresponding .ibd file.
-@return true if copy was successful, or false. */
-bool
-fil_user_tablespace_restore_page(
-/*==============================*/
- fsp_open_info* fsp, /* in: contains space id and .ibd
- file information */
- ulint page_no) /* in: page_no to obtain from double
- write buffer */
-{
- bool err;
- ulint flags;
- ulint zip_size;
- ulint page_size;
- ulint buflen;
- byte* page;
-
- ib_logf(IB_LOG_LEVEL_INFO, "Restoring page %lu of tablespace %lu",
- page_no, fsp->id);
-
- // find if double write buffer has page_no of given space id
- page = recv_sys->dblwr.find_page(fsp->id, page_no);
-
- if (!page) {
- ib_logf(IB_LOG_LEVEL_WARN, "Doublewrite does not have "
- "page_no=%lu of space: %lu", page_no, fsp->id);
- err = false;
- goto out;
- }
-
- flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
-
- if (!fsp_flags_is_valid(flags)) {
- ulint cflags = fsp_flags_convert_from_101(flags);
- if (cflags == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Ignoring a doublewrite copy of page "
- ULINTPF ":" ULINTPF
- " due to invalid flags 0x%x",
- fsp->id, page_no, int(flags));
- err = false;
- goto out;
- }
- flags = cflags;
- /* The flags on the page should be converted later. */
- }
-
- zip_size = fsp_flags_get_zip_size(flags);
- page_size = fsp_flags_get_page_size(flags);
-
- ut_ad(page_no == page_get_page_no(page));
-
- buflen = zip_size ? zip_size: page_size;
-
- ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s",
- buflen, fsp->filepath);
-
- err = os_file_write(fsp->filepath, fsp->file, page,
- (zip_size ? zip_size : page_size) * page_no,
- buflen);
-
- os_file_flush(fsp->file);
-out:
- return(err);
-}
-
-/********************************************************************//**
-Opens an .ibd file and adds the associated single-table tablespace to the
-InnoDB fil0fil.cc data structures.
-Set fsp->success to TRUE if tablespace is valid, FALSE if not. */
-static
-void
-fil_validate_single_table_tablespace(
-/*=================================*/
- const char* tablename, /*!< in: database/tablename */
- fsp_open_info* fsp) /*!< in/out: tablespace info */
-{
- bool restore_attempted = false;
-
-check_first_page:
- fsp->success = TRUE;
- if (const char* check_msg = fil_read_first_page(
- fsp->file, false, &fsp->flags, &fsp->id,
- NULL, &fsp->crypt_data)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s in tablespace %s (table %s)",
- check_msg, fsp->filepath, tablename);
- fsp->success = FALSE;
- }
-
- if (!fsp->success) {
- if (IS_XTRABACKUP()) {
- /* Do not attempt restore from doublewrite buffer
- in Xtrabackup, this does not work.*/
- return;
- }
-
- if (!restore_attempted) {
- if (!fil_user_tablespace_find_space_id(fsp)) {
- return;
- }
- restore_attempted = true;
-
- if (fsp->id > 0
- && !fil_user_tablespace_restore_page(fsp, 0)) {
- return;
- }
- goto check_first_page;
- }
- return;
- }
-
- if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace is not sensible;"
- " Table: %s Space ID: %lu Filepath: %s\n",
- tablename, (ulong) fsp->id, fsp->filepath);
- fsp->success = FALSE;
- return;
- }
-
- mutex_enter(&fil_system->mutex);
- fil_space_t* space = fil_space_get_by_id(fsp->id);
- mutex_exit(&fil_system->mutex);
- if (space != NULL) {
- char* prev_filepath = fil_space_get_first_path(fsp->id);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Attempted to open a previously opened tablespace. "
- "Previous tablespace %s uses space ID: %lu at "
- "filepath: %s. Cannot open tablespace %s which uses "
- "space ID: %lu at filepath: %s",
- space->name, (ulong) space->id, prev_filepath,
- tablename, (ulong) fsp->id, fsp->filepath);
-
- mem_free(prev_filepath);
- fsp->success = FALSE;
- return;
- }
-
- fsp->success = TRUE;
-}
-
-
-/********************************************************************//**
-Opens an .ibd file and adds the associated single-table tablespace to the
-InnoDB fil0fil.cc data structures. */
-static
-void
-fil_load_single_table_tablespace(
-/*=============================*/
- const char* dbname, /*!< in: database name */
- const char* filename) /*!< in: file name (not a path),
- including the .ibd or .isl extension */
-{
- char* tablename;
- ulint tablename_len;
- ulint dbname_len = strlen(dbname);
- ulint filename_len = strlen(filename);
- fsp_open_info def;
- fsp_open_info remote;
- os_offset_t size;
- fil_space_t* space;
-
- fsp_open_info* fsp;
- ulong minimum_size;
- ibool file_space_create_success;
-
- memset(&def, 0, sizeof(def));
- memset(&remote, 0, sizeof(remote));
-
- /* The caller assured that the extension is ".ibd" or ".isl". */
- ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4)
- || 0 == memcmp(filename + filename_len - 4, ".isl", 4));
-
- /* Build up the tablename in the standard form database/table. */
- tablename = static_cast<char*>(
- mem_alloc(dbname_len + filename_len + 2));
-
- /* When lower_case_table_names = 2 it is possible that the
- dbname is in upper case ,but while storing it in fil_space_t
- we must convert it into lower case */
- sprintf(tablename, "%s" , dbname);
- tablename[dbname_len] = '\0';
-
- if (lower_case_file_system) {
- dict_casedn_str(tablename);
- }
-
- sprintf(tablename+dbname_len,"/%s",filename);
- tablename_len = strlen(tablename) - strlen(".ibd");
- tablename[tablename_len] = '\0';
-
- /* There may be both .ibd and .isl file in the directory.
- And it is possible that the .isl file refers to a different
- .ibd file. If so, we open and compare them the first time
- one of them is sent to this function. So if this table has
- already been loaded, there is nothing to do.*/
- mutex_enter(&fil_system->mutex);
- space = fil_space_get_by_name(tablename);
- if (space) {
- mem_free(tablename);
- mutex_exit(&fil_system->mutex);
- return;
- }
- mutex_exit(&fil_system->mutex);
-
- /* Build up the filepath of the .ibd tablespace in the datadir.
- This must be freed independent of def.success. */
- def.filepath = fil_make_ibd_name(tablename, false);
-
-#ifdef __WIN__
-# ifndef UNIV_HOTBACKUP
- /* If lower_case_table_names is 0 or 2, then MySQL allows database
- directory names with upper case letters. On Windows, all table and
- database names in InnoDB are internally always in lower case. Put the
- file path to lower case, so that we are consistent with InnoDB's
- internal data dictionary. */
-
- dict_casedn_str(def.filepath);
-# endif /* !UNIV_HOTBACKUP */
-#endif
-
-
- /* Check for a link file which locates a remote tablespace. */
- remote.success = fil_open_linked_file(
- tablename, &remote.filepath, &remote.file, FALSE);
-
- /* Read the first page of the remote tablespace */
- if (remote.success) {
- fil_validate_single_table_tablespace(tablename, &remote);
- if (!remote.success) {
- os_file_close(remote.file);
- mem_free(remote.filepath);
-
- if (srv_backup_mode && (remote.id == ULINT_UNDEFINED
- || remote.id == 0)) {
-
- /* Ignore files that have uninitialized space
- IDs on the backup stage. This means that a
- tablespace has just been created and we will
- replay the corresponding log records on
- prepare. */
- goto func_exit_after_close;
- }
- }
- }
-
-
- /* Try to open the tablespace in the datadir. */
- def.file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, def.filepath, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &def.success, FALSE);
-
- /* Read the first page of the remote tablespace */
- if (def.success) {
- fil_validate_single_table_tablespace(tablename, &def);
- if (!def.success) {
- os_file_close(def.file);
-
- if (IS_XTRABACKUP() && srv_backup_mode && (def.id == ULINT_UNDEFINED
- || def.id == 0)) {
-
- /* Ignore files that have uninitialized space
- IDs on the backup stage. This means that a
- tablespace has just been created and we will
- replay the corresponding log records on
- prepare. */
-
- goto func_exit_after_close;
- }
- }
- }
-
- if (!def.success && !remote.success) {
-
- /* The following call prints an error message */
- os_file_get_last_error(true);
- fprintf(stderr,
- "InnoDB: Error: could not open single-table"
- " tablespace file %s\n", def.filepath);
-
- if (!strncmp(filename,
- tmp_file_prefix, tmp_file_prefix_length)) {
- /* Ignore errors for #sql tablespaces. */
- mem_free(tablename);
- if (remote.filepath) {
- mem_free(remote.filepath);
- }
- if (def.filepath) {
- mem_free(def.filepath);
- }
- return;
- }
-no_good_file:
- fprintf(stderr,
- "InnoDB: We do not continue the crash recovery,"
- " because the table may become\n"
- "InnoDB: corrupt if we cannot apply the log"
- " records in the InnoDB log to it.\n"
- "InnoDB: To fix the problem and start mysqld:\n"
- "InnoDB: 1) If there is a permission problem"
- " in the file and mysqld cannot\n"
- "InnoDB: open the file, you should"
- " modify the permissions.\n"
- "InnoDB: 2) If the table is not needed, or you"
- " can restore it from a backup,\n"
- "InnoDB: then you can remove the .ibd file,"
- " and InnoDB will do a normal\n"
- "InnoDB: crash recovery and ignore that table.\n"
- "InnoDB: 3) If the file system or the"
- " disk is broken, and you cannot remove\n"
- "InnoDB: the .ibd file, you can set"
- " innodb_force_recovery > 0 in my.cnf\n"
- "InnoDB: and force InnoDB to continue crash"
- " recovery here.\n");
-will_not_choose:
- mem_free(tablename);
- if (remote.filepath) {
- mem_free(remote.filepath);
- }
- if (def.filepath) {
- mem_free(def.filepath);
- }
-
- if (srv_force_recovery > 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "innodb_force_recovery was set to %lu. "
- "Continuing crash recovery even though we "
- "cannot access the .ibd file of this table.",
- srv_force_recovery);
- return;
- }
-
- abort();
- }
-
- if (def.success && remote.success) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespaces for %s have been found in two places;\n"
- "Location 1: SpaceID: " ULINTPF " File: %s\n"
- "Location 2: SpaceID: " ULINTPF " File: %s\n"
- "You must delete one of them.",
- tablename, def.id,
- def.filepath, remote.id,
- remote.filepath);
-
- def.success = FALSE;
- os_file_close(def.file);
- os_file_close(remote.file);
- goto will_not_choose;
- }
-
- /* At this point, only one tablespace is open */
- ut_a(def.success == !remote.success);
-
- fsp = def.success ? &def : &remote;
-
- /* Get and test the file size. */
- size = os_file_get_size(fsp->file);
-
- if (size == (os_offset_t) -1) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "could not measure the size of single-table "
- "tablespace file %s", fsp->filepath);
-
- os_file_close(fsp->file);
- goto no_good_file;
- }
-
- /* Every .ibd file is created >= 4 pages in size. Smaller files
- cannot be ok. */
- minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
- if (size < minimum_size) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The size of single-table tablespace file %s "
- "is only " UINT64PF ", should be at least %lu!",
- fsp->filepath, size, minimum_size);
- os_file_close(fsp->file);
- goto no_good_file;
- }
-
-#ifdef UNIV_HOTBACKUP
- if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
- char* new_path;
-
- fprintf(stderr,
- "InnoDB: Renaming tablespace %s of id %lu,\n"
- "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because its size %" PRId64 " is too small"
- " (< 4 pages 16 kB each),\n"
- "InnoDB: or the space id in the file header"
- " is not sensible.\n"
- "InnoDB: This can happen in an mysqlbackup run,"
- " and is not dangerous.\n",
- fsp->filepath, fsp->id, fsp->filepath, size);
- os_file_close(fsp->file);
-
- new_path = fil_make_ibbackup_old_name(fsp->filepath);
-
- bool success = os_file_rename(
- innodb_file_data_key, fsp->filepath, new_path);
-
- ut_a(success);
-
- mem_free(new_path);
-
- goto func_exit_after_close;
- }
-
- /* A backup may contain the same space several times, if the space got
- renamed at a sensitive time. Since it is enough to have one version of
- the space, we rename the file if a space with the same space id
- already exists in the tablespace memory cache. We rather rename the
- file than delete it, because if there is a bug, we do not want to
- destroy valuable data. */
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(fsp->id);
-
- if (space) {
- char* new_path;
-
- fprintf(stderr,
- "InnoDB: Renaming tablespace %s of id %lu,\n"
- "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
- "InnoDB: because space %s with the same id\n"
- "InnoDB: was scanned earlier. This can happen"
- " if you have renamed tables\n"
- "InnoDB: during an mysqlbackup run.\n",
- fsp->filepath, fsp->id, fsp->filepath,
- space->name);
- os_file_close(fsp->file);
-
- new_path = fil_make_ibbackup_old_name(fsp->filepath);
-
- mutex_exit(&fil_system->mutex);
-
- bool success = os_file_rename(
- innodb_file_data_key, fsp->filepath, new_path);
-
- ut_a(success);
-
- mem_free(new_path);
-
- goto func_exit_after_close;
- }
- mutex_exit(&fil_system->mutex);
-#endif /* UNIV_HOTBACKUP */
-
- /* Adjust the memory-based flags that would normally be set by
- dict_tf_to_fsp_flags(). In recovery, we have no data dictionary. */
- if (FSP_FLAGS_HAS_PAGE_COMPRESSION(fsp->flags)) {
- fsp->flags |= page_zip_level
- << FSP_FLAGS_MEM_COMPRESSION_LEVEL;
- }
- remote.flags |= 1U << FSP_FLAGS_MEM_DATA_DIR;
- /* We will leave atomic_writes at ATOMIC_WRITES_DEFAULT.
- That will be adjusted in fil_space_for_table_exists_in_mem(). */
-
- file_space_create_success = fil_space_create(
- tablename, fsp->id, fsp->flags, FIL_TABLESPACE,
- fsp->crypt_data, false);
-
- if (!file_space_create_success) {
- if (srv_force_recovery > 0) {
- fprintf(stderr,
- "InnoDB: innodb_force_recovery was set"
- " to %lu. Continuing crash recovery\n"
- "InnoDB: even though the tablespace"
- " creation of this table failed.\n",
- srv_force_recovery);
- goto func_exit;
- }
-
- /* Exit here with a core dump, stack, etc. */
- ut_a(file_space_create_success);
- }
-
- /* We do not use the size information we have about the file, because
- the rounding formula for extents and pages is somewhat complex; we
- let fil_node_open() do that task. */
-
- if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) {
- ut_error;
- }
-
-func_exit:
- /* We reuse file handles on the backup stage in XtraBackup to avoid
- inconsistencies between the file name and the actual tablespace contents
- if a DDL occurs between a fil_load_single_table_tablespaces() call and
- the actual copy operation. */
- if (IS_XTRABACKUP() && srv_backup_mode && !srv_close_files) {
-
- fil_node_t* node;
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(fsp->id);
-
- if (space) {
- node = UT_LIST_GET_LAST(space->chain);
-
- /* The handle will be closed by xtrabackup in
- xtrabackup_copy_datafile(). We set node->open to TRUE to
- make sure no one calls fil_node_open_file()
- (i.e. attempts to reopen the tablespace by name) during
- the backup stage. */
-
- node->open = TRUE;
- node->handle = fsp->file;
-
- /* The following is copied from fil_node_open_file() to
- pass fil_system validaty checks. We cannot use
- fil_node_open_file() directly, as that would re-open the
- file by name and create another file handle. */
-
- fil_system->n_open++;
- fil_n_file_opened++;
-
- if (fil_space_belongs_in_lru(space)) {
-
- /* Put the node to the LRU list */
- UT_LIST_ADD_FIRST(LRU, fil_system->LRU, node);
- }
- }
-
- mutex_exit(&fil_system->mutex);
- }
- else {
- os_file_close(fsp->file);
- }
-
-
-func_exit_after_close:
- ut_ad(!mutex_own(&fil_system->mutex));
-
- mem_free(tablename);
- if (remote.success) {
- mem_free(remote.filepath);
- }
- mem_free(def.filepath);
-}
-
-/***********************************************************************//**
-A fault-tolerant function that tries to read the next file name in the
-directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
-idea is to read as much good data as we can and jump over bad data.
-@return 0 if ok, -1 if error even after the retries, 1 if at the end
-of the directory */
-UNIV_INTERN
-int
-fil_file_readdir_next_file(
-/*=======================*/
- dberr_t* err, /*!< out: this is set to DB_ERROR if an error
- was encountered, otherwise not changed */
- const char* dirname,/*!< in: directory name or path */
- os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info) /*!< in/out: buffer where the
- info is returned */
-{
- for (ulint i = 0; i < 100; i++) {
- int ret = os_file_readdir_next_file(dirname, dir, info);
-
- if (ret != -1) {
-
- return(ret);
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "os_file_readdir_next_file() returned -1 in "
- "directory %s, crash recovery may have failed "
- "for some .ibd files!", dirname);
-
- *err = DB_ERROR;
- }
-
- return(-1);
-}
-
-
-my_bool(*fil_check_if_skip_database_by_path)(const char* name);
-
-#define CHECK_TIME_EVERY_N_FILES 10
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-fil_load_single_table_tablespaces(ibool (*pred)(const char*, const char*))
-/*===================================*/
-{
- int ret;
- char* dbpath = NULL;
- ulint dbpath_len = 100;
- ulint files_read = 0;
- ulint files_read_at_last_check = 0;
- ib_time_t prev_report_time = ut_time();
- os_file_dir_t dir;
- os_file_dir_t dbdir;
- os_file_stat_t dbinfo;
- os_file_stat_t fileinfo;
- dberr_t err = DB_SUCCESS;
-
- /* The datadir of MySQL is always the default directory of mysqld */
-
- dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
-
- if (dir == NULL) {
-
- return(DB_ERROR);
- }
-
- dbpath = static_cast<char*>(mem_alloc(dbpath_len));
-
- /* Scan all directories under the datadir. They are the database
- directories of MySQL. */
-
- ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
- &dbinfo);
- while (ret == 0) {
- ulint len;
- /* printf("Looking at %s in datadir\n", dbinfo.name); */
-
- if (dbinfo.type == OS_FILE_TYPE_FILE
- || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
-
- goto next_datadir_item;
- }
-
- /* We found a symlink or a directory; try opening it to see
- if a symlink is a directory */
-
- len = strlen(fil_path_to_mysql_datadir)
- + strlen (dbinfo.name) + 2;
- if (len > dbpath_len) {
- dbpath_len = len;
-
- if (dbpath) {
- mem_free(dbpath);
- }
-
- dbpath = static_cast<char*>(mem_alloc(dbpath_len));
- }
- ut_snprintf(dbpath, dbpath_len,
- "%s/%s", fil_path_to_mysql_datadir, dbinfo.name);
- srv_normalize_path_for_win(dbpath);
-
- if (IS_XTRABACKUP()) {
- ut_a(fil_check_if_skip_database_by_path);
- if (fil_check_if_skip_database_by_path(dbpath)) {
- fprintf(stderr, "Skipping db: %s\n", dbpath);
- dbdir = NULL;
- } else {
- /* We want wrong directory permissions to be a fatal
- error for XtraBackup. */
- dbdir = os_file_opendir(dbpath, TRUE);
- }
- } else {
- dbdir = os_file_opendir(dbpath, FALSE);
- }
-
- if (dbdir != NULL) {
-
- /* We found a database directory; loop through it,
- looking for possible .ibd files in it */
-
- ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
- &fileinfo);
- while (ret == 0) {
-
- if (fileinfo.type == OS_FILE_TYPE_DIR) {
-
- goto next_file_item;
- }
-
- /* We found a symlink or a file
-
- Ignore .isl files on XtraBackup
- recovery, all tablespaces must be local. */
- if (strlen(fileinfo.name) > 4
- && (0 == strcmp(fileinfo.name
- + strlen(fileinfo.name) - 4,
- ".ibd")
- || ((!IS_XTRABACKUP() || srv_backup_mode)
- && 0 == strcmp(fileinfo.name
- + strlen(fileinfo.name) - 4,
- ".isl")))
- && (!pred ||
- pred(dbinfo.name, fileinfo.name))) {
- /* The name ends in .ibd or .isl;
- try opening the file */
- fil_load_single_table_tablespace(
- dbinfo.name, fileinfo.name);
- files_read++;
- if (files_read - files_read_at_last_check >
- CHECK_TIME_EVERY_N_FILES) {
- ib_time_t cur_time= ut_time();
- files_read_at_last_check= files_read;
- double time_elapsed= ut_difftime(cur_time,
- prev_report_time);
- if (time_elapsed > 15) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Processed %ld .ibd/.isl files",
- files_read);
- prev_report_time= cur_time;
- }
- }
- }
-next_file_item:
- ret = fil_file_readdir_next_file(&err,
- dbpath, dbdir,
- &fileinfo);
- }
-
- if (0 != os_file_closedir(dbdir)) {
- fputs("InnoDB: Warning: could not"
- " close database directory ", stderr);
- ut_print_filename(stderr, dbpath);
- putc('\n', stderr);
-
- err = DB_ERROR;
- }
- }
-
-next_datadir_item:
- ret = fil_file_readdir_next_file(&err,
- fil_path_to_mysql_datadir,
- dir, &dbinfo);
- }
-
- mem_free(dbpath);
-
- if (0 != os_file_closedir(dir)) {
- fprintf(stderr,
- "InnoDB: Error: could not close MySQL datadir\n");
-
- return(DB_ERROR);
- }
-
- return(err);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return TRUE if does not exist or is being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
- ulint id, /*!< in: space id */
- ib_int64_t version)/*!< in: tablespace_version should be this; if
- you pass -1 as the value of this, then this
- parameter is ignored */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL || space->is_stopping()) {
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- if (version != ((ib_int64_t)-1)
- && space->tablespace_version != version) {
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- mutex_exit(&fil_system->mutex);
-
- return(space != NULL);
-}
-
-/*******************************************************************//**
-Report that a tablespace for a table was not found. */
-static
-void
-fil_report_missing_tablespace(
-/*===========================*/
- const char* name, /*!< in: table name */
- ulint space_id) /*!< in: table's space id */
-{
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(index_name, sizeof(index_name), name, TRUE);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Table %s in the InnoDB data dictionary has tablespace id %lu, "
- "but tablespace with that id or name does not exist. Have "
- "you deleted or moved .ibd files? This may also be a table "
- "created with CREATE TEMPORARY TABLE whose .ibd and .frm "
- "files MySQL automatically removed, but the table still "
- "exists in the InnoDB internal data dictionary.",
- name, space_id);
-}
-
-/** Check if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache.
-@return whether a matching tablespace exists in the memory cache */
-UNIV_INTERN
-bool
-fil_space_for_table_exists_in_mem(
-/*==============================*/
- ulint id, /*!< in: space id */
- const char* name, /*!< in: table name used in
- fil_space_create(). Either the
- standard 'dbname/tablename' format
- or table->dir_path_of_temp_table */
- bool print_error_if_does_not_exist,
- /*!< in: print detailed error
- information to the .err log if a
- matching tablespace is not found from
- memory */
- bool remove_from_data_dict_if_does_not_exist,
- /*!< in: remove from the data dictionary
- if tablespace does not exist */
- bool adjust_space, /*!< in: whether to adjust space id
- when find table space mismatch */
- mem_heap_t* heap, /*!< in: heap memory */
- table_id_t table_id, /*!< in: table id */
- ulint table_flags) /*!< in: table flags */
-{
- fil_space_t* fnamespace;
- fil_space_t* space;
-
- const ulint expected_flags = dict_tf_to_fsp_flags(table_flags);
-
- mutex_enter(&fil_system->mutex);
-
- /* Look if there is a space with the same id */
-
- space = fil_space_get_by_id(id);
-
- /* Look if there is a space with the same name; the name is the
- directory path from the datadir to the file */
-
- fnamespace = fil_space_get_by_name(name);
- bool valid = space && !((space->flags ^ expected_flags)
- & ~FSP_FLAGS_MEM_MASK);
-
- if (!space) {
- } else if (!valid || space == fnamespace) {
- /* Found with the same file name, or got a flag mismatch. */
- goto func_exit;
- } else if (adjust_space
- && row_is_mysql_tmp_table_name(space->name)
- && !row_is_mysql_tmp_table_name(name)) {
- /* Info from fnamespace comes from the ibd file
- itself, it can be different from data obtained from
- System tables since renaming files is not
- transactional. We shall adjust the ibd file name
- according to system table info. */
- mutex_exit(&fil_system->mutex);
-
- DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
- DBUG_SUICIDE(););
-
- char* tmp_name = dict_mem_create_temporary_tablename(
- heap, name, table_id);
-
- fil_rename_tablespace(fnamespace->name, fnamespace->id,
- tmp_name, NULL);
-
- DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
- DBUG_SUICIDE(););
-
- fil_rename_tablespace(space->name, id, name, NULL);
-
- DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
- DBUG_SUICIDE(););
-
- mutex_enter(&fil_system->mutex);
- fnamespace = fil_space_get_by_name(name);
- ut_ad(space == fnamespace);
- goto func_exit;
- }
-
- if (!print_error_if_does_not_exist) {
- valid = false;
- goto func_exit;
- }
-
- if (space == NULL) {
- if (fnamespace == NULL) {
- if (print_error_if_does_not_exist) {
- fil_report_missing_tablespace(name, id);
- if (IS_XTRABACKUP() && remove_from_data_dict_if_does_not_exist) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "It will be removed from "
- "the data dictionary.");
- }
- }
- } else {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary has"
- " tablespace id %lu,\n"
- "InnoDB: but a tablespace with that id"
- " does not exist. There is\n"
- "InnoDB: a tablespace of name %s and id %lu,"
- " though. Have\n"
- "InnoDB: you deleted or moved .ibd files?\n",
- (ulong) id, fnamespace->name,
- (ulong) fnamespace->id);
- }
-error_exit:
- fputs("InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
- "InnoDB: for how to resolve the issue.\n", stderr);
- valid = false;
- goto func_exit;
- }
-
- if (0 != strcmp(space->name, name)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_filename(stderr, name);
- fprintf(stderr, "\n"
- "InnoDB: in InnoDB data dictionary has"
- " tablespace id %lu,\n"
- "InnoDB: but the tablespace with that id"
- " has name %s.\n"
- "InnoDB: Have you deleted or moved .ibd files?\n",
- (ulong) id, space->name);
-
- if (fnamespace != NULL) {
- fputs("InnoDB: There is a tablespace"
- " with the right name\n"
- "InnoDB: ", stderr);
- ut_print_filename(stderr, fnamespace->name);
- fprintf(stderr, ", but its id is %lu.\n",
- (ulong) fnamespace->id);
- }
-
- goto error_exit;
- }
-
-func_exit:
- if (valid) {
- /* Adjust the flags that are in FSP_FLAGS_MEM_MASK.
- FSP_SPACE_FLAGS will not be written back here. */
- space->flags = expected_flags;
- }
- mutex_exit(&fil_system->mutex);
-
- if (valid && !srv_read_only_mode) {
- fsp_flags_try_adjust(id, expected_flags & ~FSP_FLAGS_MEM_MASK);
- }
-
- return(valid);
-}
-
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return space id, ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
- const char* tablename) /*!< in: table name in the standard
- 'databasename/tablename' format */
-{
- fil_space_t* fnamespace;
- ulint id = ULINT_UNDEFINED;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- /* Look if there is a space with the same name. */
-
- fnamespace = fil_space_get_by_name(tablename);
-
- if (fnamespace) {
- id = fnamespace->id;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(id);
-}
-
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
- ulint* actual_size, /*!< out: size of the space after extension;
- if we ran out of disk space this may be lower
- than the desired size */
- ulint space_id, /*!< in: space id */
- ulint size_after_extend)/*!< in: desired size in pages after the
- extension; if the current space size is bigger
- than this already, the function does nothing */
-{
- ut_ad(!srv_read_only_mode);
-
- for (;;) {
- fil_mutex_enter_and_prepare_for_io(space_id);
-
- fil_space_t* space = fil_space_get_by_id(space_id);
- ut_a(space);
- ibool success;
-
- if (!fil_space_extend_must_retry(
- space, UT_LIST_GET_LAST(space->chain),
- size_after_extend, &success)) {
- *actual_size = space->size;
- mutex_exit(&fil_system->mutex);
- return(success);
- }
- }
-}
-
-#ifdef UNIV_HOTBACKUP
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-mysqlbackup --apply-log phase we extended the spaces on-demand so that log
-records could be applied, but that may have left spaces still too small
-compared to the size stored in the space header. */
-UNIV_INTERN
-void
-fil_extend_tablespaces_to_stored_len(void)
-/*======================================*/
-{
- fil_space_t* space;
- byte* buf;
- ulint actual_size;
- ulint size_in_header;
- dberr_t error;
- ibool success;
-
- buf = mem_alloc(UNIV_PAGE_SIZE);
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- while (space) {
- ut_a(space->purpose == FIL_TABLESPACE);
-
- mutex_exit(&fil_system->mutex); /* no need to protect with a
- mutex, because this is a
- single-threaded operation */
- error = fil_read(TRUE, space->id,
- fsp_flags_get_zip_size(space->flags),
- 0, 0, UNIV_PAGE_SIZE, buf, NULL, 0);
- ut_a(error == DB_SUCCESS);
-
- size_in_header = fsp_get_size_low(buf);
-
- success = fil_extend_space_to_desired_size(
- &actual_size, space->id, size_in_header);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Error: could not extend the"
- " tablespace of %s\n"
- "InnoDB: to the size stored in header,"
- " %lu pages;\n"
- "InnoDB: size after extension %lu pages\n"
- "InnoDB: Check that you have free disk space"
- " and retry!\n",
- space->name, size_in_header, actual_size);
- ut_a(success);
- }
-
- mutex_enter(&fil_system->mutex);
-
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- mutex_exit(&fil_system->mutex);
-
- mem_free(buf);
-}
-#endif
-
-/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
-
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return TRUE if succeed */
-UNIV_INTERN
-ibool
-fil_space_reserve_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_free_now, /*!< in: number of free extents now */
- ulint n_to_reserve) /*!< in: how many one wants to reserve */
-{
- fil_space_t* space;
- ibool success;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- if (space->n_reserved_extents + n_to_reserve > n_free_now) {
- success = FALSE;
- } else {
- space->n_reserved_extents += n_to_reserve;
- success = TRUE;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(success);
-}
-
-/*******************************************************************//**
-Releases free extents in a file space. */
-UNIV_INTERN
-void
-fil_space_release_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_reserved) /*!< in: how many one reserved */
-{
- fil_space_t* space;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
- ut_a(space->n_reserved_extents >= n_reserved);
-
- space->n_reserved_extents -= n_reserved;
-
- mutex_exit(&fil_system->mutex);
-}
-
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-UNIV_INTERN
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
- ulint id) /*!< in: space id */
-{
- fil_space_t* space;
- ulint n;
-
- ut_ad(fil_system);
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- ut_a(space);
-
- n = space->n_reserved_extents;
-
- mutex_exit(&fil_system->mutex);
-
- return(n);
-}
-
-/*============================ FILE I/O ================================*/
-
-/********************************************************************//**
-NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
-
-Prepares a file node for i/o. Opens the file if it is closed. Updates the
-pending i/o's field in the node and the system appropriately. Takes the node
-off the LRU list if it is in the LRU list. The caller must hold the fil_sys
-mutex.
-@return false if the file can't be opened, otherwise true */
-static
-bool
-fil_node_prepare_for_io(
-/*====================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- fil_space_t* space) /*!< in: space */
-{
- ut_ad(node && system && space);
- ut_ad(mutex_own(&(system->mutex)));
-
- if (system->n_open > system->max_n_open + 5) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: open files %lu"
- " exceeds the limit %lu\n",
- (ulong) system->n_open,
- (ulong) system->max_n_open);
- }
-
- if (node->open == FALSE) {
- /* File is closed: open it */
- ut_a(node->n_pending == 0);
-
- if (!fil_node_open_file(node, system, space)) {
- return(false);
- }
- }
-
- if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
- /* The node is in the LRU list, remove it */
-
- ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
-
- UT_LIST_REMOVE(LRU, system->LRU, node);
- }
-
- node->n_pending++;
-
- return(true);
-}
-
-/********************************************************************//**
-Updates the data structures when an i/o operation finishes. Updates the
-pending i/o's field in the node appropriately. */
-static
-void
-fil_node_complete_io(
-/*=================*/
- fil_node_t* node, /*!< in: file node */
- fil_system_t* system, /*!< in: tablespace memory cache */
- ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
- the node as modified if
- type == OS_FILE_WRITE */
-{
- ut_ad(node);
- ut_ad(system);
- ut_ad(mutex_own(&(system->mutex)));
-
- ut_a(node->n_pending > 0);
-
- node->n_pending--;
-
- if (type == OS_FILE_WRITE) {
- ut_ad(!srv_read_only_mode);
- system->modification_counter++;
- node->modification_counter = system->modification_counter;
-
- if (fil_buffering_disabled(node->space)) {
-
- /* We don't need to keep track of unflushed
- changes as user has explicitly disabled
- buffering. */
- ut_ad(!node->space->is_in_unflushed_spaces);
- node->flush_counter = node->modification_counter;
-
- } else if (!node->space->is_in_unflushed_spaces) {
-
- node->space->is_in_unflushed_spaces = true;
- UT_LIST_ADD_FIRST(unflushed_spaces,
- system->unflushed_spaces,
- node->space);
- }
- }
-
- if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
-
- /* The node must be put back to the LRU list */
- UT_LIST_ADD_FIRST(LRU, system->LRU, node);
- }
-}
-
-/********************************************************************//**
-Report information about an invalid page access. */
-static
-void
-fil_report_invalid_page_access(
-/*===========================*/
- ulint block_offset, /*!< in: block offset */
- ulint space_id, /*!< in: space id */
- const char* space_name, /*!< in: space name */
- ulint byte_offset, /*!< in: byte offset */
- ulint len, /*!< in: I/O length */
- ulint type) /*!< in: I/O type */
-{
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Trying to access page number " ULINTPF
- " in space " ULINTPF
- " space name %s,"
- " which is outside the tablespace bounds."
- " Byte offset " ULINTPF ", len " ULINTPF
- " i/o type " ULINTPF ".%s",
- block_offset, space_id, space_name,
- byte_offset, len, type,
- space_id == 0 && !srv_was_started
- ? "Please check that the configuration matches"
- " the InnoDB system tablespace location (ibdata files)"
- : "");
-}
-
-/********************************************************************//**
-Find correct node from file space
-@return node */
-static
-fil_node_t*
-fil_space_get_node(
- fil_space_t* space, /*!< in: file spage */
- ulint space_id, /*!< in: space id */
- ulint* block_offset, /*!< in/out: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len) /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
-{
- fil_node_t* node;
- ut_ad(mutex_own(&fil_system->mutex));
-
- node = UT_LIST_GET_FIRST(space->chain);
-
- for (;;) {
- if (node == NULL) {
- return(NULL);
- } else if (fil_is_user_tablespace_id(space->id)
- && node->size == 0) {
-
- /* We do not know the size of a single-table tablespace
- before we open the file */
- break;
- } else if (node->size > *block_offset) {
- /* Found! */
- break;
- } else {
- (*block_offset) -= node->size;
- node = UT_LIST_GET_NEXT(chain, node);
- }
- }
-
- return (node);
-}
-
-/** Determine the block size of the data file.
-@param[in] space tablespace
-@param[in] offset page number
-@return block size */
-UNIV_INTERN
-ulint
-fil_space_get_block_size(const fil_space_t* space, unsigned offset)
-{
- ut_ad(space->n_pending_ios > 0);
-
- ulint block_size = 512;
-
- for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
- node != NULL;
- node = UT_LIST_GET_NEXT(chain, node)) {
- block_size = node->file_block_size;
- if (node->size > offset) {
- break;
- }
- offset -= node->size;
- }
-
- /* Currently supporting block size up to 4K,
- fall back to default if bigger requested. */
- if (block_size > 4096) {
- block_size = 512;
- }
-
- return block_size;
-}
-
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
-dberr_t
-_fil_io(
-/*===*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- bool sync, /*!< in: true if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
- void* buf, /*!< in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
- ulint* write_size, /*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
- trx_t* trx)
-{
- ulint mode;
- fil_space_t* space;
- fil_node_t* node;
- ibool ret=TRUE;
- ulint is_log;
- ulint wake_later;
- os_offset_t offset;
- bool ignore_nonexistent_pages;
-
- is_log = type & OS_FILE_LOG;
- type = type & ~OS_FILE_LOG;
-
- wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
- type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
-
- ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES;
- type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
-
- ut_ad(byte_offset < UNIV_PAGE_SIZE);
- ut_ad(!zip_size || !byte_offset);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(buf);
- ut_ad(len > 0);
- ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
-#if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
-# error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
-#endif
-#if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN
-# error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
-#endif
- ut_ad(fil_validate_skip());
-#ifndef UNIV_HOTBACKUP
-# ifndef UNIV_LOG_DEBUG
- /* ibuf bitmap pages must be read in the sync aio mode: */
- ut_ad(recv_no_ibuf_operations
- || type == OS_FILE_WRITE
- || !ibuf_bitmap_page(zip_size, block_offset)
- || sync
- || is_log);
-# endif /* UNIV_LOG_DEBUG */
- if (sync) {
- mode = OS_AIO_SYNC;
- } else if (is_log) {
- mode = OS_AIO_LOG;
- } else if (type == OS_FILE_READ
- && !recv_no_ibuf_operations
- && ibuf_page(space_id, zip_size, block_offset, NULL)) {
- mode = OS_AIO_IBUF;
- } else {
- mode = OS_AIO_NORMAL;
- }
-#else /* !UNIV_HOTBACKUP */
- ut_a(sync);
- mode = OS_AIO_SYNC;
-#endif /* !UNIV_HOTBACKUP */
-
- if (type == OS_FILE_READ) {
- srv_stats.data_read.add(len);
- } else if (type == OS_FILE_WRITE) {
- ut_ad(!srv_read_only_mode);
- srv_stats.data_written.add(len);
- if (fil_page_is_index_page((byte *)buf)) {
- srv_stats.index_pages_written.inc();
- } else {
- srv_stats.non_index_pages_written.inc();
- }
- }
-
- /* Reserve the fil_system mutex and make sure that we can open at
- least one file while holding it, if the file is not already open */
-
- fil_mutex_enter_and_prepare_for_io(space_id);
-
- space = fil_space_get_by_id(space_id);
-
- /* If we are deleting a tablespace we don't allow async read operations
- on that. However, we do allow write and sync read operations */
- if (space == 0
- || (type == OS_FILE_READ
- && !sync
- && space->stop_new_ops)) {
- mutex_exit(&fil_system->mutex);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to do i/o to a tablespace which does "
- "not exist. i/o type " ULINTPF
- ", space id " ULINTPF " , "
- "page no. " ULINTPF
- ", i/o length " ULINTPF " bytes",
- type, space_id, block_offset,
- len);
-
- return(DB_TABLESPACE_DELETED);
- }
-
- ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE);
-
- node = fil_space_get_node(space, space_id, &block_offset, byte_offset, len);
-
- if (!node) {
- if (ignore_nonexistent_pages) {
- mutex_exit(&fil_system->mutex);
- return(DB_ERROR);
- }
-
- fil_report_invalid_page_access(
- block_offset, space_id, space->name,
- byte_offset, len, type);
- }
-
- /* Open file if closed */
- if (!fil_node_prepare_for_io(node, fil_system, space)) {
- if (space->purpose == FIL_TABLESPACE
- && fil_is_user_tablespace_id(space->id)) {
- mutex_exit(&fil_system->mutex);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to do i/o to a tablespace which "
- "exists without .ibd data file. "
- "i/o type " ULINTPF ", space id "
- ULINTPF ", page no " ULINTPF ", "
- "i/o length " ULINTPF " bytes",
- type, space_id,
- block_offset, len);
-
- return(DB_TABLESPACE_DELETED);
- }
-
- /* The tablespace is for log. Currently, we just assert here
- to prevent handling errors along the way fil_io returns.
- Also, if the log files are missing, it would be hard to
- promise the server can continue running. */
- ut_a(0);
- }
-
- /* Check that at least the start offset is within the bounds of a
- single-table tablespace, including rollback tablespaces. */
- if (UNIV_UNLIKELY(node->size <= block_offset)
- && space->id != 0 && space->purpose == FIL_TABLESPACE) {
-
- fil_report_invalid_page_access(
- block_offset, space_id, space->name, byte_offset,
- len, type);
- }
-
- /* Now we have made the changes in the data structures of fil_system */
- mutex_exit(&fil_system->mutex);
-
- /* Calculate the low 32 bits and the high 32 bits of the file offset */
-
- if (!zip_size) {
- offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT)
- + byte_offset;
-
- ut_a(node->size - block_offset
- >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
- / UNIV_PAGE_SIZE));
- } else {
- ulint zip_size_shift;
- switch (zip_size) {
- case 1024: zip_size_shift = 10; break;
- case 2048: zip_size_shift = 11; break;
- case 4096: zip_size_shift = 12; break;
- case 8192: zip_size_shift = 13; break;
- case 16384: zip_size_shift = 14; break;
- case 32768: zip_size_shift = 15; break;
- case 65536: zip_size_shift = 16; break;
- default: ut_error;
- }
- offset = ((os_offset_t) block_offset << zip_size_shift)
- + byte_offset;
- ut_a(node->size - block_offset
- >= (len + (zip_size - 1)) / zip_size);
- }
-
- /* Do aio */
-
- ut_a(byte_offset % OS_MIN_LOG_BLOCK_SIZE == 0);
- ut_a((len % OS_MIN_LOG_BLOCK_SIZE) == 0);
-
-#ifndef UNIV_HOTBACKUP
- if (UNIV_UNLIKELY(space->is_corrupt && srv_pass_corrupt_table)) {
-
- /* should ignore i/o for the crashed space */
- if (srv_pass_corrupt_table == 1 ||
- type == OS_FILE_WRITE) {
-
- mutex_enter(&fil_system->mutex);
- fil_node_complete_io(node, fil_system, type);
- mutex_exit(&fil_system->mutex);
- if (mode == OS_AIO_NORMAL) {
- ut_a(space->purpose == FIL_TABLESPACE);
- dberr_t err = buf_page_io_complete(static_cast<buf_page_t *>
- (message));
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Write operation failed for tablespace %s ("
- ULINTPF ") offset " ULINTPF " error=%d.",
- space->name, space->id, byte_offset, err);
- }
- }
- }
-
- if (srv_pass_corrupt_table == 1 && type == OS_FILE_READ) {
-
- return(DB_TABLESPACE_DELETED);
-
- } else if (type == OS_FILE_WRITE) {
-
- return(DB_SUCCESS);
- }
- }
-
- const char* name = node->name == NULL ? space->name : node->name;
-
- /* Queue the aio request */
- ret = os_aio(type, is_log, mode | wake_later, name, node->handle, buf,
- offset, len, zip_size ? zip_size : UNIV_PAGE_SIZE, node,
- message, space_id, trx, write_size);
-
-#else
- /* In mysqlbackup do normal i/o, not aio */
- if (type == OS_FILE_READ) {
- ret = os_file_read(node->handle, buf, offset, len);
- } else {
- ut_ad(!srv_read_only_mode);
- ret = os_file_write(name, node->handle, buf,
- offset, len);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- if (mode == OS_AIO_SYNC) {
- /* The i/o operation is already completed when we return from
- os_aio: */
-
- mutex_enter(&fil_system->mutex);
-
- fil_node_complete_io(node, fil_system, type);
-
- mutex_exit(&fil_system->mutex);
-
- ut_ad(fil_validate_skip());
- }
-
- if (!ret) {
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- return(DB_SUCCESS);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Waits for an aio operation to complete. This function is used to write the
-handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.cc for more info). The thread specifies which
-segment it wants to wait for. */
-UNIV_INTERN
-void
-fil_aio_wait(
-/*=========*/
- ulint segment) /*!< in: the number of the segment in the aio
- array to wait for */
-{
- ibool ret;
- fil_node_t* fil_node;
- void* message;
- ulint type;
- ulint space_id = 0;
-
- ut_ad(fil_validate_skip());
-
- if (srv_use_native_aio) {
- srv_set_io_thread_op_info(segment, "native aio handle");
-#ifdef WIN_ASYNC_IO
- ret = os_aio_windows_handle(
- segment, 0, &fil_node, &message, &type, &space_id);
-#elif defined(LINUX_NATIVE_AIO)
- ret = os_aio_linux_handle(
- segment, &fil_node, &message, &type, &space_id);
-#else
- ut_error;
- ret = 0; /* Eliminate compiler warning */
-#endif /* WIN_ASYNC_IO */
- } else {
- srv_set_io_thread_op_info(segment, "simulated aio handle");
-
- ret = os_aio_simulated_handle(
- segment, &fil_node, &message, &type, &space_id);
- }
-
- ut_a(ret);
- if (fil_node == NULL) {
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
- return;
- }
-
- srv_set_io_thread_op_info(segment, "complete io for fil node");
-
- mutex_enter(&fil_system->mutex);
-
- fil_node_complete_io(fil_node, fil_system, type);
- ulint purpose = fil_node->space->purpose;
- space_id = fil_node->space->id;
-
- mutex_exit(&fil_system->mutex);
-
- ut_ad(fil_validate_skip());
-
- /* Do the i/o handling */
- /* IMPORTANT: since i/o handling for reads will read also the insert
- buffer in tablespace 0, you have to be very careful not to introduce
- deadlocks in the i/o system. We keep tablespace 0 data files always
- open, and use a special i/o thread to serve insert buffer requests. */
-
- if (purpose == FIL_TABLESPACE) {
- srv_set_io_thread_op_info(segment, "complete io for buf page");
- buf_page_t* bpage = static_cast<buf_page_t*>(message);
- ulint offset = bpage->offset;
- dberr_t err = buf_page_io_complete(bpage);
-
- if (err != DB_SUCCESS) {
- ut_ad(type == OS_FILE_READ);
- /* In crash recovery set log corruption on
- and produce only an error to fail InnoDB startup. */
- if (recv_recovery_is_on() && !srv_force_recovery) {
- recv_sys->found_corrupt_log = true;
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Read operation failed for tablespace %s"
- " offset " ULINTPF " with error %s",
- fil_node->name,
- offset,
- ut_strerr(err));
- }
- } else {
- srv_set_io_thread_op_info(segment, "complete io for log");
- log_io_complete(static_cast<log_group_t*>(message));
- }
-}
-#endif /* UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-UNIV_INTERN
-void
-fil_flush(
-/*======*/
- ulint space_id) /*!< in: file space id (this can be a group of
- log files or a tablespace of the database) */
-{
- mutex_enter(&fil_system->mutex);
-
- if (fil_space_t* space = fil_space_get_by_id(space_id)) {
- if (!space->stop_new_ops) {
-
- fil_flush_low(space);
- }
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/** Flush a tablespace.
-@param[in,out] space tablespace to flush */
-UNIV_INTERN
-void
-fil_flush(fil_space_t* space)
-{
- ut_ad(space->n_pending_ios > 0);
-
- if (!space->is_stopping()) {
- mutex_enter(&fil_system->mutex);
- if (!space->is_stopping()) {
- fil_flush_low(space);
- }
- mutex_exit(&fil_system->mutex);
- }
-}
-
-/** Flush to disk the writes in file spaces of the given type
-possibly cached by the OS.
-@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
-UNIV_INTERN
-void
-fil_flush_file_spaces(ulint purpose)
-{
- fil_space_t* space;
- ulint* space_ids;
- ulint n_space_ids;
- ulint i;
-
- mutex_enter(&fil_system->mutex);
-
- n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
- if (n_space_ids == 0) {
-
- mutex_exit(&fil_system->mutex);
- return;
- }
-
- /* Assemble a list of space ids to flush. Previously, we
- traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
- on a space that was just removed from the list by fil_flush().
- Thus, the space could be dropped and the memory overwritten. */
- space_ids = static_cast<ulint*>(
- mem_alloc(n_space_ids * sizeof *space_ids));
-
- n_space_ids = 0;
-
- for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
- space;
- space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
-
- if (space->purpose == purpose && !space->is_stopping()) {
- space_ids[n_space_ids++] = space->id;
- }
- }
-
- mutex_exit(&fil_system->mutex);
-
- /* Flush the spaces. It will not hurt to call fil_flush() on
- a non-existing space id. */
- for (i = 0; i < n_space_ids; i++) {
-
- fil_flush(space_ids[i]);
- }
-
- mem_free(space_ids);
-}
-
-/** Functor to validate the space list. */
-struct Check {
- void operator()(const fil_node_t* elem)
- {
- ut_a(elem->open || !elem->n_pending);
- }
-};
-
-/******************************************************************//**
-Checks the consistency of the tablespace cache.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fil_validate(void)
-/*==============*/
-{
- fil_space_t* space;
- fil_node_t* fil_node;
- ulint n_open = 0;
- ulint i;
-
- mutex_enter(&fil_system->mutex);
-
- /* Look for spaces in the hash table */
-
- for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
-
- for (space = static_cast<fil_space_t*>(
- HASH_GET_FIRST(fil_system->spaces, i));
- space != 0;
- space = static_cast<fil_space_t*>(
- HASH_GET_NEXT(hash, space))) {
-
- UT_LIST_VALIDATE(
- chain, fil_node_t, space->chain, Check());
-
- for (fil_node = UT_LIST_GET_FIRST(space->chain);
- fil_node != 0;
- fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
-
- if (fil_node->n_pending > 0) {
- ut_a(fil_node->open);
- }
-
- if (fil_node->open) {
- n_open++;
- }
- }
- }
- }
-
- ut_a(fil_system->n_open == n_open);
-
- UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU);
-
- for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
- fil_node != 0;
- fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
-
- ut_a(fil_node->n_pending == 0);
- ut_a(!fil_node->being_extended);
- ut_a(fil_node->open);
- ut_a(fil_space_belongs_in_lru(fil_node->space));
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Returns TRUE if file address is undefined.
-@return TRUE if undefined */
-UNIV_INTERN
-ibool
-fil_addr_is_null(
-/*=============*/
- fil_addr_t addr) /*!< in: address */
-{
- return(addr.page == FIL_NULL);
-}
-
-/********************************************************************//**
-Get the predecessor of a file page.
-@return FIL_PAGE_PREV */
-UNIV_INTERN
-ulint
-fil_page_get_prev(
-/*==============*/
- const byte* page) /*!< in: file page */
-{
- return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-/********************************************************************//**
-Get the successor of a file page.
-@return FIL_PAGE_NEXT */
-UNIV_INTERN
-ulint
-fil_page_get_next(
-/*==============*/
- const byte* page) /*!< in: file page */
-{
- return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/*********************************************************************//**
-Sets the file page type. */
-UNIV_INTERN
-void
-fil_page_set_type(
-/*==============*/
- byte* page, /*!< in/out: file page */
- ulint type) /*!< in: type */
-{
- ut_ad(page);
-
- mach_write_to_2(page + FIL_PAGE_TYPE, type);
-}
-
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
-ulint
-fil_page_get_type(
-/*==============*/
- const byte* page) /*!< in: file page */
-{
- ut_ad(page);
-
- return(mach_read_from_2(page + FIL_PAGE_TYPE));
-}
-
-/****************************************************************//**
-Closes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_close(void)
-/*===========*/
-{
- fil_space_crypt_cleanup();
-
- mutex_free(&fil_system->mutex);
-
- hash_table_free(fil_system->spaces);
-
- hash_table_free(fil_system->name_hash);
-
- ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
- ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
- ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
-
- mem_free(fil_system);
-
- fil_system = NULL;
-}
-
-/********************************************************************//**
-Initializes a buffer control block when the buf_pool is created. */
-static
-void
-fil_buf_block_init(
-/*===============*/
- buf_block_t* block, /*!< in: pointer to control block */
- byte* frame) /*!< in: pointer to buffer frame */
-{
- UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
-
- block->frame = frame;
-
- block->page.io_fix = BUF_IO_NONE;
- /* There are assertions that check for this. */
- block->page.buf_fix_count = 1;
- block->page.state = BUF_BLOCK_READY_FOR_USE;
-
- page_zip_des_init(&block->page.zip);
-}
-
-struct fil_iterator_t {
- pfs_os_file_t file; /*!< File handle */
- const char* filepath; /*!< File path name */
- os_offset_t start; /*!< From where to start */
- os_offset_t end; /*!< Where to stop */
- os_offset_t file_size; /*!< File size in bytes */
- ulint page_size; /*!< Page size */
- ulint n_io_buffers; /*!< Number of pages to use
- for IO */
- byte* io_buffer; /*!< Buffer to use for IO */
- fil_space_crypt_t *crypt_data; /*!< Crypt data (if encrypted) */
- byte* crypt_io_buffer; /*!< IO buffer when encrypted */
-};
-
-/********************************************************************//**
-TODO: This can be made parallel trivially by chunking up the file and creating
-a callback per thread. . Main benefit will be to use multiple CPUs for
-checksums and compressed tables. We have to do compressed tables block by
-block right now. Secondly we need to decompress/compress and copy too much
-of data. These are CPU intensive.
-
-Iterate over all the pages in the tablespace.
-@param iter - Tablespace iterator
-@param block - block to use for IO
-@param callback - Callback to inspect and update page contents
-@retval DB_SUCCESS or error code */
-static
-dberr_t
-fil_iterate(
-/*========*/
- const fil_iterator_t& iter,
- buf_block_t* block,
- PageCallback& callback)
-{
- os_offset_t offset;
- ulint page_no = 0;
- ulint space_id = callback.get_space_id();
- ulint n_bytes = iter.n_io_buffers * iter.page_size;
-
- ut_ad(!srv_read_only_mode);
-
- /* TODO: For compressed tables we do a lot of useless
- copying for non-index pages. Unfortunately, it is
- required by buf_zip_decompress() */
- const bool row_compressed = callback.get_zip_size() > 0;
-
- for (offset = iter.start; offset < iter.end; offset += n_bytes) {
-
- byte* io_buffer = iter.io_buffer;
-
- block->frame = io_buffer;
-
- if (row_compressed) {
- page_zip_des_init(&block->page.zip);
- page_zip_set_size(&block->page.zip, iter.page_size);
- block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
- ut_d(block->page.zip.m_external = true);
- ut_ad(iter.page_size == callback.get_zip_size());
-
- /* Zip IO is done in the compressed page buffer. */
- io_buffer = block->page.zip.data;
- }
-
- /* We have to read the exact number of bytes. Otherwise the
- InnoDB IO functions croak on failed reads. */
-
- n_bytes = static_cast<ulint>(
- ut_min(static_cast<os_offset_t>(n_bytes),
- iter.end - offset));
-
- ut_ad(n_bytes > 0);
- ut_ad(!(n_bytes % iter.page_size));
-
- const bool encrypted = iter.crypt_data != NULL
- && iter.crypt_data->should_encrypt();
- /* Use additional crypt io buffer if tablespace is encrypted */
- byte* const readptr = encrypted
- ? iter.crypt_io_buffer : io_buffer;
- byte* const writeptr = readptr;
-
- if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
-
- return(DB_IO_ERROR);
- }
-
- bool updated = false;
- os_offset_t page_off = offset;
- ulint n_pages_read = (ulint) n_bytes / iter.page_size;
- bool decrypted = false;
-
- for (ulint i = 0; i < n_pages_read; ++i) {
- ulint size = iter.page_size;
- dberr_t err = DB_SUCCESS;
- byte* src = readptr + (i * size);
- byte* dst = io_buffer + (i * size);
- bool frame_changed = false;
-
- ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE);
-
- const bool page_compressed
- = page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED
- || page_type == FIL_PAGE_PAGE_COMPRESSED;
-
- /* If tablespace is encrypted, we need to decrypt
- the page. Note that tablespaces are not in
- fil_system during import. */
- if (encrypted) {
- decrypted = fil_space_decrypt(
- iter.crypt_data,
- dst, //dst
- iter.page_size,
- src, // src
- &err); // src
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- if (decrypted) {
- updated = true;
- } else {
- if (!page_compressed && !row_compressed) {
- block->frame = src;
- frame_changed = true;
- } else {
- memcpy(dst, src, size);
- }
- }
- }
-
- /* If the original page is page_compressed, we need
- to decompress page before we can update it. */
- if (page_compressed) {
- fil_decompress_page(NULL, dst, ulong(size),
- NULL);
- updated = true;
- }
-
- buf_block_set_file_page(block, space_id, page_no++);
-
- if ((err = callback(page_off, block)) != DB_SUCCESS) {
-
- return(err);
-
- } else if (!updated) {
- updated = buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE;
- }
-
- buf_block_set_state(block, BUF_BLOCK_NOT_USED);
- buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
-
- /* If tablespace is encrypted we use additional
- temporary scratch area where pages are read
- for decrypting readptr == crypt_io_buffer != io_buffer.
-
- Destination for decryption is a buffer pool block
- block->frame == dst == io_buffer that is updated.
- Pages that did not require decryption even when
- tablespace is marked as encrypted are not copied
- instead block->frame is set to src == readptr.
-
- For encryption we again use temporary scratch area
- writeptr != io_buffer == dst
- that is then written to the tablespace
-
- (1) For normal tables io_buffer == dst == writeptr
- (2) For only page compressed tables
- io_buffer == dst == writeptr
- (3) For encrypted (and page compressed)
- readptr != io_buffer == dst != writeptr
- */
-
- ut_ad(!encrypted && !page_compressed ?
- src == dst && dst == writeptr + (i * size):1);
- ut_ad(page_compressed && !encrypted ?
- src == dst && dst == writeptr + (i * size):1);
- ut_ad(encrypted ?
- src != dst && dst != writeptr + (i * size):1);
-
- if (encrypted) {
- memcpy(writeptr + (i * size),
- row_compressed ? block->page.zip.data :
- block->frame, size);
- }
-
- if (frame_changed) {
- block->frame = dst;
- }
-
- src = io_buffer + (i * size);
-
- if (page_compressed) {
- ulint len = 0;
-
- fil_compress_page(
- NULL,
- src,
- NULL,
- size,
- 0,/* FIXME: compression level */
- 512,/* FIXME: use proper block size */
- encrypted,
- &len);
-
- updated = true;
- }
-
- /* If tablespace is encrypted, encrypt page before we
- write it back. Note that we should not encrypt the
- buffer that is in buffer pool. */
- /* NOTE: At this stage of IMPORT the
- buffer pool is not being used at all! */
- if (decrypted && encrypted) {
- byte *dest = writeptr + (i * size);
- ulint space = mach_read_from_4(
- src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- ulint offset = mach_read_from_4(src + FIL_PAGE_OFFSET);
- ib_uint64_t lsn = mach_read_from_8(src + FIL_PAGE_LSN);
-
- byte* tmp = fil_encrypt_buf(
- iter.crypt_data,
- space,
- offset,
- lsn,
- src,
- iter.page_size == UNIV_PAGE_SIZE ? 0 : iter.page_size,
- dest);
-
- if (tmp == src) {
- /* TODO: remove unnecessary memcpy's */
- memcpy(dest, src, size);
- }
-
- updated = true;
- }
-
- page_off += iter.page_size;
- block->frame += iter.page_size;
- }
-
- /* A page was updated in the set, write back to disk. */
- if (updated
- && !os_file_write(
- iter.filepath, iter.file, writeptr,
- offset, (ulint) n_bytes)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
-
- return(DB_IO_ERROR);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
- dict_table_t* table,
- ulint n_io_buffers,
- PageCallback& callback)
-{
- dberr_t err;
- pfs_os_file_t file;
- char* filepath;
-
- ut_a(n_io_buffers > 0);
- ut_ad(!srv_read_only_mode);
-
- DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
- return(DB_CORRUPTION););
-
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- dict_get_and_save_data_dir_path(table, false);
- ut_a(table->data_dir_path);
-
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
- } else {
- filepath = fil_make_ibd_name(table->name, false);
- }
-
- {
- ibool success;
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key, filepath,
- OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
-
- DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
- {
- static bool once;
-
- if (!once || ut_rnd_interval(0, 10) == 5) {
- once = true;
- success = FALSE;
- os_file_close(file);
- }
- });
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Trying to import a tablespace, but could not "
- "open the tablespace file %s", filepath);
-
- mem_free(filepath);
-
- return(DB_TABLESPACE_NOT_FOUND);
-
- } else {
- err = DB_SUCCESS;
- }
- }
-
- callback.set_file(filepath, file);
-
- os_offset_t file_size = os_file_get_size(file);
- ut_a(file_size != (os_offset_t) -1);
-
- /* The block we will use for every physical page */
- buf_block_t block;
-
- memset(&block, 0x0, sizeof(block));
-
- /* Allocate a page to read in the tablespace header, so that we
- can determine the page size and zip_size (if it is compressed).
- We allocate an extra page in case it is a compressed table. One
- page is to ensure alignement. */
-
- void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
- byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
-
- fil_buf_block_init(&block, page);
-
- /* Read the first page and determine the page and zip size. */
-
- if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
-
- err = DB_IO_ERROR;
-
- } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
- fil_iterator_t iter;
-
- iter.file = file;
- iter.start = 0;
- iter.end = file_size;
- iter.filepath = filepath;
- iter.file_size = file_size;
- iter.n_io_buffers = n_io_buffers;
- iter.page_size = callback.get_page_size();
-
- /* In MariaDB/MySQL 5.6 tablespace does not exist
- during import, therefore we can't use space directly
- here. */
- ulint crypt_data_offset = fsp_header_get_crypt_offset(
- callback.get_zip_size());
-
- /* read (optional) crypt data */
- iter.crypt_data = fil_space_read_crypt_data(
- 0, page, crypt_data_offset);
-
- /* Compressed pages can't be optimised for block IO for now.
- We do the IMPORT page by page. */
-
- if (callback.get_zip_size() > 0) {
- iter.n_io_buffers = 1;
- ut_a(iter.page_size == callback.get_zip_size());
- }
-
- /** If tablespace is encrypted, it needs extra buffers */
- if (iter.crypt_data != NULL) {
- /* decrease io buffers so that memory
- * consumption doesnt double
- * note: the +1 is to avoid n_io_buffers getting down to 0 */
- iter.n_io_buffers = (iter.n_io_buffers + 1) / 2;
- }
-
- /** Add an extra page for compressed page scratch area. */
-
- void* io_buffer = mem_alloc(
- (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
-
- iter.io_buffer = static_cast<byte*>(
- ut_align(io_buffer, UNIV_PAGE_SIZE));
-
- void* crypt_io_buffer = NULL;
- if (iter.crypt_data != NULL) {
- crypt_io_buffer = mem_alloc(
- (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
- iter.crypt_io_buffer = static_cast<byte*>(
- ut_align(crypt_io_buffer, UNIV_PAGE_SIZE));
- }
-
- err = fil_iterate(iter, &block, callback);
-
- mem_free(io_buffer);
-
- if (crypt_io_buffer != NULL) {
- mem_free(crypt_io_buffer);
- iter.crypt_io_buffer = NULL;
- fil_space_destroy_crypt_data(&iter.crypt_data);
- }
- }
-
- if (err == DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
-
- if (!os_file_flush(file)) {
- ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
- err = DB_IO_ERROR;
- } else {
- ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
- }
- }
-
- os_file_close(file);
-
- mem_free(page_ptr);
- mem_free(filepath);
-
- return(err);
-}
-
-/**
-Set the tablespace compressed table size.
-@return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
-dberr_t
-PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
-{
- m_zip_size = fsp_header_get_zip_size(page);
-
- if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
- return(DB_CORRUPTION);
- }
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************//**
-Delete the tablespace file and any related files like .cfg.
-This should not be called for temporary tables. */
-UNIV_INTERN
-void
-fil_delete_file(
-/*============*/
- const char* ibd_name) /*!< in: filepath of the ibd
- tablespace */
-{
- /* Force a delete of any stale .ibd files that are lying around. */
-
- ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
-
- os_file_delete_if_exists(innodb_file_data_key, ibd_name);
-
- char* cfg_name = fil_make_cfg_name(ibd_name);
-
- os_file_delete_if_exists(innodb_file_data_key, cfg_name);
-
- mem_free(cfg_name);
-}
-
-/*************************************************************************
-Return local hash table informations. */
-
-ulint
-fil_system_hash_cells(void)
-/*=======================*/
-{
- if (fil_system) {
- return (fil_system->spaces->n_cells
- + fil_system->name_hash->n_cells);
- } else {
- return 0;
- }
-}
-
-ulint
-fil_system_hash_nodes(void)
-/*=======================*/
-{
- if (fil_system) {
- return (UT_LIST_GET_LEN(fil_system->space_list)
- * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE));
- } else {
- return 0;
- }
-}
-
-/**
-Iterate over all the spaces in the space list and fetch the
-tablespace names. It will return a copy of the name that must be
-freed by the caller using: delete[].
-@return DB_SUCCESS if all OK. */
-UNIV_INTERN
-dberr_t
-fil_get_space_names(
-/*================*/
- space_name_list_t& space_name_list)
- /*!< in/out: List to append to */
-{
- fil_space_t* space;
- dberr_t err = DB_SUCCESS;
-
- mutex_enter(&fil_system->mutex);
-
- for (space = UT_LIST_GET_FIRST(fil_system->space_list);
- space != NULL;
- space = UT_LIST_GET_NEXT(space_list, space)) {
-
- if (space->purpose == FIL_TABLESPACE) {
- ulint len;
- char* name;
-
- len = strlen(space->name);
- name = new(std::nothrow) char[len + 1];
-
- if (name == 0) {
- /* Caller to free elements allocated so far. */
- err = DB_OUT_OF_MEMORY;
- break;
- }
-
- memcpy(name, space->name, len);
- name[len] = 0;
-
- space_name_list.push_back(name);
- }
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(err);
-}
-
-/** Generate redo log for swapping two .ibd files
-@param[in] old_table old table
-@param[in] new_table new table
-@param[in] tmp_name temporary table name
-@param[in,out] mtr mini-transaction
-@return innodb error code */
-UNIV_INTERN
-dberr_t
-fil_mtr_rename_log(
- const dict_table_t* old_table,
- const dict_table_t* new_table,
- const char* tmp_name,
- mtr_t* mtr)
-{
- dberr_t err = DB_SUCCESS;
- char* old_path;
-
- /* If neither table is file-per-table,
- there will be no renaming of files. */
- if (old_table->space == TRX_SYS_SPACE
- && new_table->space == TRX_SYS_SPACE) {
- return(DB_SUCCESS);
- }
-
- if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
- old_path = os_file_make_remote_pathname(
- old_table->data_dir_path, old_table->name, "ibd");
- } else {
- old_path = fil_make_ibd_name(old_table->name, false);
- }
- if (old_path == NULL) {
- return(DB_OUT_OF_MEMORY);
- }
-
- if (old_table->space != TRX_SYS_SPACE) {
- char* tmp_path;
-
- if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
- tmp_path = os_file_make_remote_pathname(
- old_table->data_dir_path, tmp_name, "ibd");
- }
- else {
- tmp_path = fil_make_ibd_name(tmp_name, false);
- }
-
- if (tmp_path == NULL) {
- mem_free(old_path);
- return(DB_OUT_OF_MEMORY);
- }
-
- /* Temp filepath must not exist. */
- err = fil_rename_tablespace_check(
- old_table->space, old_path, tmp_path,
- dict_table_is_discarded(old_table));
- mem_free(tmp_path);
- if (err != DB_SUCCESS) {
- mem_free(old_path);
- return(err);
- }
-
- fil_op_write_log(MLOG_FILE_RENAME, old_table->space,
- 0, 0, old_table->name, tmp_name, mtr);
- }
-
- if (new_table->space != TRX_SYS_SPACE) {
-
- /* Destination filepath must not exist unless this ALTER
- TABLE starts and ends with a file_per-table tablespace. */
- if (old_table->space == TRX_SYS_SPACE) {
- char* new_path = NULL;
-
- if (DICT_TF_HAS_DATA_DIR(new_table->flags)) {
- new_path = os_file_make_remote_pathname(
- new_table->data_dir_path,
- new_table->name, "ibd");
- }
- else {
- new_path = fil_make_ibd_name(
- new_table->name, false);
- }
-
- if (new_path == NULL) {
- mem_free(old_path);
- return(DB_OUT_OF_MEMORY);
- }
-
- err = fil_rename_tablespace_check(
- new_table->space, new_path, old_path,
- dict_table_is_discarded(new_table));
- mem_free(new_path);
- if (err != DB_SUCCESS) {
- mem_free(old_path);
- return(err);
- }
- }
-
- fil_op_write_log(MLOG_FILE_RENAME, new_table->space,
- 0, 0, new_table->name, old_table->name, mtr);
-
- }
-
- mem_free(old_path);
-
- return(err);
-}
-
-/*************************************************************************
-functions to access is_corrupt flag of fil_space_t*/
-
-void
-fil_space_set_corrupt(
-/*==================*/
- ulint space_id)
-{
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(space_id);
-
- if (space) {
- space->is_corrupt = true;
- }
-
- mutex_exit(&fil_system->mutex);
-}
-
-/** Acquire a tablespace when it could be dropped concurrently.
-Used by background threads that do not necessarily hold proper locks
-for concurrency control.
-@param[in] id tablespace ID
-@param[in] silent whether to silently ignore missing tablespaces
-@return the tablespace
-@retval NULL if missing or being deleted or truncated */
-UNIV_INTERN
-fil_space_t*
-fil_space_acquire_low(ulint id, bool silent)
-{
- fil_space_t* space;
-
- mutex_enter(&fil_system->mutex);
-
- space = fil_space_get_by_id(id);
-
- if (space == NULL) {
- if (!silent) {
- ib_logf(IB_LOG_LEVEL_WARN, "Trying to access missing"
- " tablespace " ULINTPF ".", id);
- }
- } else if (space->is_stopping()) {
- space = NULL;
- } else {
- space->n_pending_ops++;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(space);
-}
-
-/** Acquire a tablespace for reading or writing a block,
-when it could be dropped concurrently.
-@param[in] id tablespace ID
-@return the tablespace
-@retval NULL if missing */
-UNIV_INTERN
-fil_space_t*
-fil_space_acquire_for_io(ulint id)
-{
- mutex_enter(&fil_system->mutex);
-
- fil_space_t* space = fil_space_get_by_id(id);
-
- if (space) {
- space->n_pending_ios++;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(space);
-}
-
-/** Release a tablespace acquired with fil_space_acquire_for_io().
-@param[in,out] space tablespace to release */
-UNIV_INTERN
-void
-fil_space_release_for_io(fil_space_t* space)
-{
- mutex_enter(&fil_system->mutex);
- ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_ad(space->n_pending_ios > 0);
- space->n_pending_ios--;
- mutex_exit(&fil_system->mutex);
-}
-
-/** Release a tablespace acquired with fil_space_acquire().
-@param[in,out] space tablespace to release */
-UNIV_INTERN
-void
-fil_space_release(fil_space_t* space)
-{
- mutex_enter(&fil_system->mutex);
- ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
- ut_ad(space->n_pending_ops > 0);
- space->n_pending_ops--;
- mutex_exit(&fil_system->mutex);
-}
-
-/** Return the next fil_space_t.
-Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
-blocks a concurrent operation from dropping the tablespace.
-@param[in] prev_space Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
-@return pointer to the next fil_space_t.
-@retval NULL if this was the last*/
-UNIV_INTERN
-fil_space_t*
-fil_space_next(fil_space_t* prev_space)
-{
- fil_space_t* space=prev_space;
-
- mutex_enter(&fil_system->mutex);
-
- if (prev_space == NULL) {
- space = UT_LIST_GET_FIRST(fil_system->space_list);
-
- /* We can trust that space is not NULL because at least the
- system tablespace is always present and loaded first. */
- space->n_pending_ops++;
- } else {
- ut_ad(space->n_pending_ops > 0);
-
- /* Move on to the next fil_space_t */
- space->n_pending_ops--;
- space = UT_LIST_GET_NEXT(space_list, space);
-
- /* Skip spaces that are being created by
- fil_ibd_create(), or dropped, or !tablespace. */
- while (space != NULL
- && (UT_LIST_GET_LEN(space->chain) == 0
- || space->is_stopping()
- || space->purpose != FIL_TABLESPACE)) {
- space = UT_LIST_GET_NEXT(space_list, space);
- }
-
- if (space != NULL) {
- space->n_pending_ops++;
- }
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(space);
-}
-
-/**
-Remove space from key rotation list if there are no more
-pending operations.
-@param[in] space Tablespace */
-static
-void
-fil_space_remove_from_keyrotation(
- fil_space_t* space)
-{
- ut_ad(mutex_own(&fil_system->mutex));
- ut_ad(space);
-
- if (space->n_pending_ops == 0 && space->is_in_rotation_list) {
- space->is_in_rotation_list = false;
- ut_a(UT_LIST_GET_LEN(fil_system->rotation_list) > 0);
- UT_LIST_REMOVE(rotation_list, fil_system->rotation_list, space);
- }
-}
-
-
-/** Return the next fil_space_t from key rotation list.
-Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
-blocks a concurrent operation from dropping the tablespace.
-@param[in] prev_space Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
-@return pointer to the next fil_space_t.
-@retval NULL if this was the last*/
-UNIV_INTERN
-fil_space_t*
-fil_space_keyrotate_next(
- fil_space_t* prev_space)
-{
- fil_space_t* space = prev_space;
- fil_space_t* old = NULL;
-
- mutex_enter(&fil_system->mutex);
-
- if (UT_LIST_GET_LEN(fil_system->rotation_list) == 0) {
- if (space) {
- ut_ad(space->n_pending_ops > 0);
- space->n_pending_ops--;
- fil_space_remove_from_keyrotation(space);
- }
- mutex_exit(&fil_system->mutex);
- return(NULL);
- }
-
- if (prev_space == NULL) {
- space = UT_LIST_GET_FIRST(fil_system->rotation_list);
-
- /* We can trust that space is not NULL because we
- checked list length above */
- } else {
- ut_ad(space->n_pending_ops > 0);
-
- /* Move on to the next fil_space_t */
- space->n_pending_ops--;
-
- old = space;
- space = UT_LIST_GET_NEXT(rotation_list, space);
-
- fil_space_remove_from_keyrotation(old);
- }
-
- /* Skip spaces that are being created by fil_ibd_create(),
- or dropped. Note that rotation_list contains only
- space->purpose == FIL_TABLESPACE. */
- while (space != NULL
- && (UT_LIST_GET_LEN(space->chain) == 0
- || space->is_stopping())) {
-
- old = space;
- space = UT_LIST_GET_NEXT(rotation_list, space);
- fil_space_remove_from_keyrotation(old);
- }
-
- if (space != NULL) {
- space->n_pending_ops++;
- }
-
- mutex_exit(&fil_system->mutex);
-
- return(space);
-}
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
deleted file mode 100644
index 2b6ae95640f..00000000000
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ /dev/null
@@ -1,745 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fil/fil0pagecompress.cc
-Implementation for page compressed file spaces.
-
-Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com
-Updated 14/02/2015
-***********************************************************************/
-
-#include "fil0fil.h"
-#include "fil0pagecompress.h"
-
-#include <debug_sync.h>
-#include <my_dbug.h>
-
-#include "mem0mem.h"
-#include "hash0hash.h"
-#include "os0file.h"
-#include "mach0data.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "log0recv.h"
-#include "fsp0fsp.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "dict0dict.h"
-#include "page0page.h"
-#include "page0zip.h"
-#include "trx0sys.h"
-#include "row0mysql.h"
-#include "ha_prototypes.h" // IB_LOG_
-#ifndef UNIV_HOTBACKUP
-# include "buf0lru.h"
-# include "ibuf0ibuf.h"
-# include "sync0sync.h"
-# include "os0sync.h"
-#else /* !UNIV_HOTBACKUP */
-# include "srv0srv.h"
-static ulint srv_data_read, srv_data_written;
-#endif /* !UNIV_HOTBACKUP */
-#include "zlib.h"
-#ifdef __linux__
-#include <linux/fs.h>
-#include <sys/ioctl.h>
-#include <fcntl.h>
-#endif
-#include "row0mysql.h"
-#ifdef HAVE_LZ4
-#include "lz4.h"
-#endif
-#ifdef HAVE_LZO
-#include "lzo/lzo1x.h"
-#endif
-#ifdef HAVE_LZMA
-#include "lzma.h"
-#endif
-#ifdef HAVE_BZIP2
-#include "bzlib.h"
-#endif
-#ifdef HAVE_SNAPPY
-#include "snappy-c.h"
-#endif
-
-/* Used for debugging */
-//#define UNIV_PAGECOMPRESS_DEBUG 1
-
-/****************************************************************//**
-For page compressed pages compress the page before actual write
-operation.
-@return compressed page to be written*/
-UNIV_INTERN
-byte*
-fil_compress_page(
-/*==============*/
- fil_space_t* space, /*!< in,out: tablespace (NULL during IMPORT) */
- byte* buf, /*!< in: buffer from which to write; in aio
- this must be appropriately aligned */
- byte* out_buf, /*!< out: compressed buffer */
- ulint len, /*!< in: length of input buffer.*/
- ulint level, /* in: compression level */
- ulint block_size, /*!< in: block size */
- bool encrypted, /*!< in: is page also encrypted */
- ulint* out_len) /*!< out: actual length of compressed
- page */
-{
- int err = Z_OK;
- int comp_level = level;
- ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
- ulint write_size = 0;
- /* Cache to avoid change during function execution */
- ulint comp_method = innodb_compression_algorithm;
- bool allocated = false;
-
- /* page_compression does not apply to tables or tablespaces
- that use ROW_FORMAT=COMPRESSED */
- ut_ad(!space || !FSP_FLAGS_GET_ZIP_SSIZE(space->flags));
-
- if (encrypted) {
- header_len += FIL_PAGE_COMPRESSION_METHOD_SIZE;
- }
-
- if (!out_buf) {
- allocated = true;
- ulint size = UNIV_PAGE_SIZE;
-
- /* Both snappy and lzo compression methods require that
- output buffer used for compression is bigger than input
- buffer. Increase the allocated buffer size accordingly. */
-#if HAVE_SNAPPY
- if (comp_method == PAGE_SNAPPY_ALGORITHM) {
- size = snappy_max_compressed_length(size);
- }
-#endif
-#if HAVE_LZO
- if (comp_method == PAGE_LZO_ALGORITHM) {
- size += LZO1X_1_15_MEM_COMPRESS;
- }
-#endif
-
- out_buf = static_cast<byte *>(ut_malloc(size));
- }
-
- ut_ad(buf);
- ut_ad(out_buf);
- ut_ad(len);
- ut_ad(out_len);
-
- /* Let's not compress file space header or
- extent descriptor */
- switch (fil_page_get_type(buf)) {
- case 0:
- case FIL_PAGE_TYPE_FSP_HDR:
- case FIL_PAGE_TYPE_XDES:
- case FIL_PAGE_PAGE_COMPRESSED:
- *out_len = len;
- goto err_exit;
- }
-
- /* If no compression level was provided to this table, use system
- default level */
- if (comp_level == 0) {
- comp_level = page_zip_level;
- }
-
- DBUG_PRINT("compress",
- ("Preparing for space " ULINTPF " '%s' len " ULINTPF,
- space ? space->id : 0,
- space ? space->name : "(import)",
- len));
-
- write_size = UNIV_PAGE_SIZE - header_len;
-
- switch(comp_method) {
-#ifdef HAVE_LZ4
- case PAGE_LZ4_ALGORITHM:
-
-#ifdef HAVE_LZ4_COMPRESS_DEFAULT
- err = LZ4_compress_default((const char *)buf,
- (char *)out_buf+header_len, len, write_size);
-#else
- err = LZ4_compress_limitedOutput((const char *)buf,
- (char *)out_buf+header_len, len, write_size);
-#endif /* HAVE_LZ4_COMPRESS_DEFAULT */
- write_size = err;
-
- if (err == 0) {
- /* If error we leave the actual page as it was */
-
-#ifndef UNIV_PAGECOMPRESS_DEBUG
- if (space && !space->printed_compression_failure) {
- space->printed_compression_failure = true;
-#endif
- ib_logf(IB_LOG_LEVEL_WARN,
- "Compression failed for space " ULINTPF
- " name %s len " ULINTPF
- " err %d write_size " ULINTPF ".",
- space->id, space->name, len,
- err, write_size);
-#ifndef UNIV_PAGECOMPRESS_DEBUG
- }
-#endif
- srv_stats.pages_page_compression_error.inc();
- *out_len = len;
- goto err_exit;
- }
- break;
-#endif /* HAVE_LZ4 */
-#ifdef HAVE_LZO
- case PAGE_LZO_ALGORITHM:
- err = lzo1x_1_15_compress(
- buf, len, out_buf+header_len, &write_size, out_buf+UNIV_PAGE_SIZE);
-
- if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
- if (space && !space->printed_compression_failure) {
- space->printed_compression_failure = true;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Compression failed for space " ULINTPF
- " name %s len " ULINTPF
- " err %d write_size " ULINTPF ".",
- space->id, space->name, len,
- err, write_size);
- }
-
- srv_stats.pages_page_compression_error.inc();
- *out_len = len;
- goto err_exit;
- }
-
- break;
-#endif /* HAVE_LZO */
-#ifdef HAVE_LZMA
- case PAGE_LZMA_ALGORITHM: {
- size_t out_pos=0;
-
- err = lzma_easy_buffer_encode(
- comp_level,
- LZMA_CHECK_NONE,
- NULL, /* No custom allocator, use malloc/free */
- reinterpret_cast<uint8_t*>(buf),
- len,
- reinterpret_cast<uint8_t*>(out_buf + header_len),
- &out_pos,
- (size_t)write_size);
-
- if (err != LZMA_OK || out_pos > UNIV_PAGE_SIZE-header_len) {
- if (space && !space->printed_compression_failure) {
- space->printed_compression_failure = true;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Compression failed for space " ULINTPF
- " name %s len " ULINTPF
- " err %d write_size " ULINTPF ".",
- space->id, space->name, len,
- err, out_pos);
- }
-
- srv_stats.pages_page_compression_error.inc();
- *out_len = len;
- goto err_exit;
- }
-
- write_size = out_pos;
-
- break;
- }
-#endif /* HAVE_LZMA */
-
-#ifdef HAVE_BZIP2
- case PAGE_BZIP2_ALGORITHM: {
-
- err = BZ2_bzBuffToBuffCompress(
- (char *)(out_buf + header_len),
- (unsigned int *)&write_size,
- (char *)buf,
- len,
- 1,
- 0,
- 0);
-
- if (err != BZ_OK || write_size > UNIV_PAGE_SIZE-header_len) {
- if (space && !space->printed_compression_failure) {
- space->printed_compression_failure = true;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Compression failed for space " ULINTPF
- " name %s len " ULINTPF
- " err %d write_size " ULINTPF ".",
- space->id, space->name, len,
- err, write_size);
- }
-
- srv_stats.pages_page_compression_error.inc();
- *out_len = len;
- goto err_exit;
- }
- break;
- }
-#endif /* HAVE_BZIP2 */
-
-#ifdef HAVE_SNAPPY
- case PAGE_SNAPPY_ALGORITHM:
- {
- snappy_status cstatus;
- write_size = snappy_max_compressed_length(UNIV_PAGE_SIZE);
-
- cstatus = snappy_compress(
- (const char *)buf,
- (size_t)len,
- (char *)(out_buf+header_len),
- (size_t*)&write_size);
-
- if (cstatus != SNAPPY_OK || write_size > UNIV_PAGE_SIZE-header_len) {
- if (space && !space->printed_compression_failure) {
- space->printed_compression_failure = true;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Compression failed for space " ULINTPF
- " name %s len " ULINTPF
- " err %d write_size " ULINTPF ".",
- space->id, space->name, len,
- (int)cstatus, write_size);
- }
-
- srv_stats.pages_page_compression_error.inc();
- *out_len = len;
- goto err_exit;
- }
- break;
- }
-#endif /* HAVE_SNAPPY */
-
- case PAGE_ZLIB_ALGORITHM:
- err = compress2(out_buf+header_len, (ulong*)&write_size, buf,
- uLong(len), comp_level);
-
- if (err != Z_OK) {
- /* If error we leave the actual page as it was */
-
- if (space && !space->printed_compression_failure) {
- space->printed_compression_failure = true;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Compression failed for space " ULINTPF
- " name %s len " ULINTPF
- " rt %d write_size " ULINTPF ".",
- space->id, space->name, len,
- err, write_size);
- }
-
- srv_stats.pages_page_compression_error.inc();
- *out_len = len;
- goto err_exit;
- }
- break;
-
- case PAGE_UNCOMPRESSED:
- *out_len = len;
- return (buf);
- break;
- default:
- ut_error;
- break;
- }
-
- /* Set up the page header */
- memcpy(out_buf, buf, FIL_PAGE_DATA);
- /* Set up the checksum */
- mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
-
- /* Set up the compression algorithm */
- mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, comp_method);
-
- if (encrypted) {
- /* Set up the correct page type */
- mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
- mach_write_to_2(out_buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, comp_method);
- } else {
- /* Set up the correct page type */
- mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
- }
-
- /* Set up the actual payload lenght */
- mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
-
-#ifdef UNIV_DEBUG
- /* Verify */
- ut_ad(fil_page_is_compressed(out_buf) || fil_page_is_compressed_encrypted(out_buf));
- ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
- ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
- ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == (ulint)comp_method ||
- mach_read_from_2(out_buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE) == (ulint)comp_method);
-
- /* Verify that page can be decompressed */
- {
- byte *comp_page;
- byte *uncomp_page;
-
- comp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
- uncomp_page = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
- memcpy(comp_page, out_buf, UNIV_PAGE_SIZE);
-
- fil_decompress_page(uncomp_page, comp_page, ulong(len), NULL);
-
- if (buf_page_is_corrupted(false, uncomp_page, 0, space)) {
- buf_page_print(uncomp_page, 0, 0);
- }
-
- ut_free(comp_page);
- ut_free(uncomp_page);
- }
-#endif /* UNIV_DEBUG */
-
- write_size+=header_len;
-
- if (block_size <= 0) {
- block_size = 512;
- }
-
- ut_ad(write_size > 0 && block_size > 0);
-
- /* Actual write needs to be alligned on block size */
- if (write_size % block_size) {
- size_t tmp = write_size;
- write_size = (size_t)ut_uint64_align_up((ib_uint64_t)write_size, block_size);
- /* Clean up the end of buffer */
- memset(out_buf+tmp, 0, write_size - tmp);
-#ifdef UNIV_DEBUG
- ut_a(write_size > 0 && ((write_size % block_size) == 0));
- ut_a(write_size >= tmp);
-#endif
- }
-
- DBUG_PRINT("compress",
- ("Succeeded for space " ULINTPF
- " '%s' len " ULINTPF " out_len " ULINTPF,
- space ? space->id : 0,
- space ? space->name : "(import)",
- len, write_size));
-
- srv_stats.page_compression_saved.add((len - write_size));
- srv_stats.pages_page_compressed.inc();
-
- /* If we do not persistently trim rest of page, we need to write it
- all */
- if (!srv_use_trim) {
- memset(out_buf+write_size,0,len-write_size);
- write_size = len;
- }
-
- *out_len = write_size;
-
- if (allocated) {
- /* TODO: reduce number of memcpy's */
- memcpy(buf, out_buf, len);
- } else {
- return(out_buf);
- }
-
-err_exit:
- if (allocated) {
- ut_free(out_buf);
- }
-
- return (buf);
-
-}
-
-/****************************************************************//**
-For page compressed pages decompress the page after actual read
-operation. */
-UNIV_INTERN
-void
-fil_decompress_page(
-/*================*/
- byte* page_buf, /*!< in: preallocated buffer or NULL */
- byte* buf, /*!< out: buffer from which to read; in aio
- this must be appropriately aligned */
- ulong len, /*!< in: length of output buffer.*/
- ulint* write_size, /*!< in/out: Actual payload size of
- the compressed data. */
- bool return_error) /*!< in: true if only an error should
- be produced when decompression fails.
- By default this parameter is false. */
-{
- int err = 0;
- ulint actual_size = 0;
- ulint compression_alg = 0;
- byte *in_buf;
- ulint ptype;
- ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
-
- ut_ad(buf);
- ut_ad(len);
-
- ptype = mach_read_from_2(buf+FIL_PAGE_TYPE);
-
- if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
- header_len += FIL_PAGE_COMPRESSION_METHOD_SIZE;
- }
-
- /* Do not try to uncompressed pages that are not compressed */
- if (ptype != FIL_PAGE_PAGE_COMPRESSED &&
- ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED &&
- ptype != FIL_PAGE_TYPE_COMPRESSED) {
- return;
- }
-
- // If no buffer was given, we need to allocate temporal buffer
- if (page_buf == NULL) {
- in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
- memset(in_buf, 0, UNIV_PAGE_SIZE);
- } else {
- in_buf = page_buf;
- }
-
- /* Before actual decompress, make sure that page type is correct */
-
- if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
- (ptype != FIL_PAGE_PAGE_COMPRESSED &&
- ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: We try to uncompress corrupted page"
- " CRC " ULINTPF " type " ULINTPF " len " ULINTPF ".",
- mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
- mach_read_from_2(buf+FIL_PAGE_TYPE), len);
-
- fflush(stderr);
- if (return_error) {
- goto error_return;
- }
- ut_error;
- }
-
- /* Get compression algorithm */
- if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) {
- compression_alg = mach_read_from_2(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE);
- } else {
- compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- }
-
- /* Get the actual size of compressed page */
- actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
- /* Check if payload size is corrupted */
- if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: We try to uncompress corrupted page"
- " actual size " ULINTPF " compression %s.",
- actual_size, fil_get_compression_alg_name(compression_alg));
- fflush(stderr);
- if (return_error) {
- goto error_return;
- }
- ut_error;
- }
-
- /* Store actual payload size of the compressed data. This pointer
- points to buffer pool. */
- if (write_size) {
- *write_size = actual_size;
- }
-
- DBUG_PRINT("compress",
- ("Preparing for decompress for len " ULINTPF ".",
- actual_size));
-
- switch(compression_alg) {
- case PAGE_ZLIB_ALGORITHM:
- err= uncompress(in_buf, &len, buf+header_len, (unsigned long)actual_size);
-
- /* If uncompress fails it means that page is corrupted */
- if (err != Z_OK) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Page is marked as compressed"
- " but uncompress failed with error %d "
- " size " ULINTPF " len " ULINTPF ".",
- err, actual_size, len);
-
- fflush(stderr);
-
- if (return_error) {
- goto error_return;
- }
- ut_error;
- }
- break;
-
-#ifdef HAVE_LZ4
- case PAGE_LZ4_ALGORITHM:
- err = LZ4_decompress_fast((const char *)buf+header_len, (char *)in_buf, len);
-
- if (err != (int)actual_size) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Page is marked as compressed"
- " but uncompress failed with error %d "
- " size " ULINTPF " len " ULINTPF ".",
- err, actual_size, len);
-
- fflush(stderr);
-
- if (return_error) {
- goto error_return;
- }
- ut_error;
- }
- break;
-#endif /* HAVE_LZ4 */
-#ifdef HAVE_LZO
- case PAGE_LZO_ALGORITHM: {
- ulint olen = 0;
- err = lzo1x_decompress((const unsigned char *)buf+header_len,
- actual_size,(unsigned char *)in_buf, &olen, NULL);
-
- if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Page is marked as compressed"
- " but uncompress failed with error %d "
- " size " ULINTPF " len " ULINTPF ".",
- err, actual_size, len);
-
- fflush(stderr);
-
- if (return_error) {
- goto error_return;
- }
- ut_error;
- }
- break;
- }
-#endif /* HAVE_LZO */
-#ifdef HAVE_LZMA
- case PAGE_LZMA_ALGORITHM: {
-
- lzma_ret ret;
- size_t src_pos = 0;
- size_t dst_pos = 0;
- uint64_t memlimit = UINT64_MAX;
-
- ret = lzma_stream_buffer_decode(
- &memlimit,
- 0,
- NULL,
- buf+header_len,
- &src_pos,
- actual_size,
- in_buf,
- &dst_pos,
- len);
-
-
- if (ret != LZMA_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Page is marked as compressed"
- " but decompression read only %ld bytes"
- " size " ULINTPF "len " ULINTPF ".",
- dst_pos, actual_size, len);
- fflush(stderr);
-
- if (return_error) {
- goto error_return;
- }
- ut_error;
- }
-
- break;
- }
-#endif /* HAVE_LZMA */
-#ifdef HAVE_BZIP2
- case PAGE_BZIP2_ALGORITHM: {
- unsigned int dst_pos = UNIV_PAGE_SIZE;
-
- err = BZ2_bzBuffToBuffDecompress(
- (char *)in_buf,
- &dst_pos,
- (char *)(buf+header_len),
- actual_size,
- 1,
- 0);
-
- if (err != BZ_OK || (dst_pos == 0 || dst_pos > UNIV_PAGE_SIZE)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Page is marked as compressed"
- " but decompression read only %du bytes"
- " size " ULINTPF " len " ULINTPF " err %d.",
- dst_pos, actual_size, len, err);
- fflush(stderr);
-
- if (return_error) {
- goto error_return;
- }
- ut_error;
- }
- break;
- }
-#endif /* HAVE_BZIP2 */
-#ifdef HAVE_SNAPPY
- case PAGE_SNAPPY_ALGORITHM:
- {
- snappy_status cstatus;
- ulint olen = UNIV_PAGE_SIZE;
-
- cstatus = snappy_uncompress(
- (const char *)(buf+header_len),
- (size_t)actual_size,
- (char *)in_buf,
- (size_t*)&olen);
-
- if (cstatus != SNAPPY_OK || olen != UNIV_PAGE_SIZE) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Page is marked as compressed"
- " but decompression read only " ULINTPF " bytes"
- " size " ULINTPF " len " ULINTPF " err %d.",
- olen, actual_size, len, (int)cstatus);
- fflush(stderr);
-
- if (return_error) {
- goto error_return;
- }
- ut_error;
- }
-
- break;
- }
-#endif /* HAVE_SNAPPY */
- default:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Corruption: Page is marked as compressed"
- " but compression algorithm %s"
- " is not known."
- ,fil_get_compression_alg_name(compression_alg));
-
- fflush(stderr);
- if (return_error) {
- goto error_return;
- }
- ut_error;
- break;
- }
-
- srv_stats.pages_page_decompressed.inc();
-
- /* Copy the uncompressed page to the buffer pool, not
- really any other options. */
- memcpy(buf, in_buf, len);
-
-error_return:
- if (page_buf != in_buf) {
- ut_free(in_buf);
- }
-}
diff --git a/storage/xtradb/fsp/fsp0fsp.cc b/storage/xtradb/fsp/fsp0fsp.cc
deleted file mode 100644
index df8c6ffe222..00000000000
--- a/storage/xtradb/fsp/fsp0fsp.cc
+++ /dev/null
@@ -1,4171 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fsp/fsp0fsp.cc
-File space management
-
-Created 11/29/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fsp0fsp.h"
-
-#ifdef UNIV_NONINL
-#include "fsp0fsp.ic"
-#endif
-
-#include "buf0buf.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#include "mtr0log.h"
-#include "ut0byte.h"
-#include "page0page.h"
-#include "page0zip.h"
-#ifdef UNIV_HOTBACKUP
-# include "fut0lst.h"
-#else /* UNIV_HOTBACKUP */
-# include "sync0sync.h"
-# include "fut0fut.h"
-# include "srv0srv.h"
-# include "ibuf0ibuf.h"
-# include "btr0btr.h"
-# include "btr0sea.h"
-# include "dict0boot.h"
-# include "log0log.h"
-#endif /* UNIV_HOTBACKUP */
-#include "dict0mem.h"
-#include "srv0start.h"
-
-
-#ifndef UNIV_HOTBACKUP
-/** Flag to indicate if we have printed the tablespace full error. */
-static ibool fsp_tbs_full_error_printed = FALSE;
-
-/**********************************************************************//**
-Returns an extent to the free list of a space. */
-static
-void
-fsp_free_extent(
-/*============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Frees an extent of a segment to the space free list. */
-static
-void
-fseg_free_extent(
-/*=============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how
-many pages are currently used.
-@return number of reserved pages */
-static
-ulint
-fseg_n_reserved_pages_low(
-/*======================*/
- fseg_inode_t* header, /*!< in: segment inode */
- ulint* used, /*!< out: number of pages used (not
- more than reserved) */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/********************************************************************//**
-Marks a page used. The page must reside within the extents of the given
-segment. */
-static MY_ATTRIBUTE((nonnull))
-void
-fseg_mark_page_used(
-/*================*/
- fseg_inode_t* seg_inode,/*!< in: segment inode */
- ulint page, /*!< in: page offset */
- xdes_t* descr, /*!< in: extent descriptor */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE.
-@return the first extent descriptor, or NULL if none */
-static
-xdes_t*
-fseg_get_first_extent(
-/*==================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Puts new extents to the free list if
-there are free extents above the free limit. If an extent happens
-to contain an extent descriptor page, the extent is put to
-the FSP_FREE_FRAG list with the page marked as used. */
-static
-void
-fsp_fill_free_list(
-/*===============*/
- ibool init_space, /*!< in: TRUE if this is a single-table
- tablespace and we are only initing
- the tablespace's first extent
- descriptor page and ibuf bitmap page;
- then we do not allocate more extents */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- UNIV_COLD;
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-static
-buf_block_t*
-fseg_alloc_free_page_low(
-/*=====================*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* seg_inode, /*!< in/out: segment inode */
- ulint hint, /*!< in: hint of which page would be
- desirable */
- byte direction, /*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
- in which the page should be initialized.
- If init_mtr!=mtr, but the page is already
- latched in mtr, do not initialize the page. */
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return tablespace size stored in the space header */
-UNIV_INTERN
-ulint
-fsp_get_size_low(
-/*=============*/
- page_t* page) /*!< in: header page (page 0 in the tablespace) */
-{
- return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Gets a pointer to the space header and x-locks its page.
-@return pointer to the space header, page x-locked */
-UNIV_INLINE
-fsp_header_t*
-fsp_get_space_header(
-/*=================*/
- ulint id, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- buf_block_t* block;
- fsp_header_t* header;
-
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_ad(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(id || !zip_size);
-
- block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(block, return(0););
-
- header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
- ut_ad(zip_size == fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + header)));
- return(header);
-}
-
-/**********************************************************************//**
-Gets a descriptor bit of a page.
-@return TRUE if free */
-UNIV_INLINE
-ibool
-xdes_mtr_get_bit(
-/*=============*/
- const xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /*!< in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
-
- return(xdes_get_bit(descr, bit, offset));
-}
-
-/**********************************************************************//**
-Sets a descriptor bit of a page. */
-UNIV_INLINE
-void
-xdes_set_bit(
-/*=========*/
- xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset, /*!< in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
- ibool val, /*!< in: bit value */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint index;
- ulint byte_index;
- ulint bit_index;
- ulint descr_byte;
-
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT));
- ut_ad(offset < FSP_EXTENT_SIZE);
-
- index = bit + XDES_BITS_PER_PAGE * offset;
-
- byte_index = index / 8;
- bit_index = index % 8;
-
- descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index,
- MLOG_1BYTE, mtr);
- descr_byte = ut_bit_set_nth(descr_byte, bit_index, val);
-
- mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte,
- MLOG_1BYTE, mtr);
-}
-
-/**********************************************************************//**
-Looks for a descriptor bit having the desired value. Starts from hint
-and scans upward; at the end of the extent the search is wrapped to
-the start of the extent.
-@return bit index of the bit, ULINT_UNDEFINED if not found */
-UNIV_INLINE
-ulint
-xdes_find_bit(
-/*==========*/
- xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ibool val, /*!< in: desired bit value */
- ulint hint, /*!< in: hint of which bit position would
- be desirable */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint i;
-
- ut_ad(descr && mtr);
- ut_ad(val <= TRUE);
- ut_ad(hint < FSP_EXTENT_SIZE);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- for (i = hint; i < FSP_EXTENT_SIZE; i++) {
- if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
-
- return(i);
- }
- }
-
- for (i = 0; i < hint; i++) {
- if (val == xdes_mtr_get_bit(descr, bit, i, mtr)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Returns the number of used pages in a descriptor.
-@return number of pages used */
-UNIV_INLINE
-ulint
-xdes_get_n_used(
-/*============*/
- const xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint count = 0;
-
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- for (ulint i = 0; i < FSP_EXTENT_SIZE; ++i) {
- if (FALSE == xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
- count++;
- }
- }
-
- return(count);
-}
-
-/**********************************************************************//**
-Returns true if extent contains no used pages.
-@return TRUE if totally free */
-UNIV_INLINE
-ibool
-xdes_is_free(
-/*=========*/
- const xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- if (0 == xdes_get_n_used(descr, mtr)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Returns true if extent contains no free pages.
-@return TRUE if full */
-UNIV_INLINE
-ibool
-xdes_is_full(
-/*=========*/
- const xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Sets the state of an xdes. */
-UNIV_INLINE
-void
-xdes_set_state(
-/*===========*/
- xdes_t* descr, /*!< in/out: descriptor */
- ulint state, /*!< in: state to set */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(descr && mtr);
- ut_ad(state >= XDES_FREE);
- ut_ad(state <= XDES_FSEG);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
-
- mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr);
-}
-
-/**********************************************************************//**
-Gets the state of an xdes.
-@return state */
-UNIV_INLINE
-ulint
-xdes_get_state(
-/*===========*/
- const xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint state;
-
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
-
- state = mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr);
- ut_ad(state - 1 < XDES_FSEG);
- return(state);
-}
-
-/**********************************************************************//**
-Inits an extent descriptor to the free and clean state. */
-UNIV_INLINE
-void
-xdes_init(
-/*======*/
- xdes_t* descr, /*!< in: descriptor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint i;
-
- ut_ad(descr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, descr, MTR_MEMO_PAGE_X_FIX));
- ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0);
-
- for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) {
- mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr);
- }
-
- xdes_set_state(descr, XDES_FREE, mtr);
-}
-
-/********************************************************************//**
-Gets pointer to a the extent descriptor of a page. The page where the extent
-descriptor resides is x-locked. This function no longer extends the data
-file.
-@return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset is >= the free limit */
-UNIV_INLINE MY_ATTRIBUTE((nonnull, warn_unused_result))
-xdes_t*
-xdes_get_descriptor_with_space_hdr(
-/*===============================*/
- fsp_header_t* sp_header, /*!< in/out: space header, x-latched
- in mtr */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page offset; if equal
- to the free limit, we try to
- add new extents to the space
- free list */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint limit;
- ulint size;
- ulint zip_size;
- ulint descr_page_no;
- page_t* descr_page;
-
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
- ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
- /* Read free limit and space size */
- limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
- size = mach_read_from_4(sp_header + FSP_SIZE);
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(sp_header + FSP_SPACE_FLAGS));
-
- if ((offset >= size) || (offset >= limit)) {
- return(NULL);
- }
-
- descr_page_no = xdes_calc_descriptor_page(zip_size, offset);
-
- if (descr_page_no == 0) {
- /* It is on the space header page */
-
- descr_page = page_align(sp_header);
- } else {
- buf_block_t* block;
-
- block = buf_page_get(space, zip_size, descr_page_no,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- descr_page = buf_block_get_frame(block);
- }
-
- return(descr_page + XDES_ARR_OFFSET
- + XDES_SIZE * xdes_calc_descriptor_index(zip_size, offset));
-}
-
-/********************************************************************//**
-Gets pointer to a the extent descriptor of a page. The page where the extent
-descriptor resides is x-locked. This function no longer extends the data
-file.
-@return pointer to the extent descriptor, NULL if the page does not
-exist in the space or if the offset exceeds the free limit */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-xdes_t*
-xdes_get_descriptor(
-/*================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page offset; if equal to the free limit,
- we try to add new extents to the space free list */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- buf_block_t* block;
- fsp_header_t* sp_header;
-
- block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(block, return(0););
-
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
- return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
- mtr));
-}
-
-/********************************************************************//**
-Gets pointer to a the extent descriptor if the file address
-of the descriptor list node is known. The page where the
-extent descriptor resides is x-locked.
-@return pointer to the extent descriptor */
-UNIV_INLINE
-xdes_t*
-xdes_lst_get_descriptor(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t lst_node,/*!< in: file address of the list node
- contained in the descriptor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- xdes_t* descr;
-
- ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
- descr = fut_get_ptr(space, zip_size, lst_node, RW_X_LATCH, mtr)
- - XDES_FLST_NODE;
-
- return(descr);
-}
-
-/********************************************************************//**
-Returns page offset of the first page in extent described by a descriptor.
-@return offset of the first page in extent */
-UNIV_INLINE
-ulint
-xdes_get_offset(
-/*============*/
- const xdes_t* descr) /*!< in: extent descriptor */
-{
- ut_ad(descr);
-
- return(page_get_page_no(page_align(descr))
- + ((page_offset(descr) - XDES_ARR_OFFSET) / XDES_SIZE)
- * FSP_EXTENT_SIZE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Inits a file page whose prior contents should be ignored. */
-static
-void
-fsp_init_file_page_low(
-/*===================*/
- buf_block_t* block) /*!< in: pointer to a page */
-{
- page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
-
-#ifndef UNIV_HOTBACKUP
- block->check_index_page_at_flush = FALSE;
-#endif /* !UNIV_HOTBACKUP */
-
- if (page_zip) {
- memset(page, 0, UNIV_PAGE_SIZE);
- memset(page_zip->data, 0, page_zip_get_size(page_zip));
- mach_write_to_4(page + FIL_PAGE_OFFSET,
- buf_block_get_page_no(block));
- mach_write_to_4(page
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- buf_block_get_space(block));
- memcpy(page_zip->data + FIL_PAGE_OFFSET,
- page + FIL_PAGE_OFFSET, 4);
- memcpy(page_zip->data + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 4);
- return;
- }
-
- memset(page, 0, UNIV_PAGE_SIZE);
- mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
- mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- buf_block_get_space(block));
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Inits a file page whose prior contents should be ignored. */
-static
-void
-fsp_init_file_page(
-/*===============*/
- buf_block_t* block, /*!< in: pointer to a page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fsp_init_file_page_low(block);
-
- mlog_write_initial_log_record(buf_block_get_frame(block),
- MLOG_INIT_FILE_PAGE, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of a file page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- buf_block_t* block) /*!< in: block or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (block) {
- fsp_init_file_page_low(block);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Initializes the fsp system. */
-UNIV_INTERN
-void
-fsp_init(void)
-/*==========*/
-{
- /* FSP_EXTENT_SIZE must be a multiple of page & zip size */
- ut_a(0 == (UNIV_PAGE_SIZE % FSP_EXTENT_SIZE));
- ut_a(UNIV_PAGE_SIZE);
-
-#if UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX
-# error "UNIV_PAGE_SIZE_MAX % FSP_EXTENT_SIZE_MAX != 0"
-#endif
-#if UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN
-# error "UNIV_ZIP_SIZE_MIN % FSP_EXTENT_SIZE_MIN != 0"
-#endif
-
- /* Does nothing at the moment */
-}
-
-/**********************************************************************//**
-Writes the space id and flags to a tablespace header. The flags contain
-row type, physical/compressed page size, and logical/uncompressed page
-size of the tablespace. */
-UNIV_INTERN
-void
-fsp_header_init_fields(
-/*===================*/
- page_t* page, /*!< in/out: first page in the space */
- ulint space_id, /*!< in: space id */
- ulint flags) /*!< in: tablespace flags (FSP_SPACE_FLAGS) */
-{
- flags &= ~FSP_FLAGS_MEM_MASK;
- ut_a(fsp_flags_is_valid(flags));
-
- mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page,
- space_id);
- mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page,
- flags);
-}
-
-#ifndef UNIV_HOTBACKUP
-/** Initialize a tablespace header.
-@param[in] space_id space id
-@param[in] size current size in blocks
-@param[in,out] mtr mini-transaction */
-UNIV_INTERN
-void
-fsp_header_init(ulint space_id, ulint size, mtr_t* mtr)
-{
- fsp_header_t* header;
- buf_block_t* block;
- page_t* page;
- ulint flags;
- ulint zip_size;
-
- ut_ad(mtr);
-
- mtr_x_lock(fil_space_get_latch(space_id, &flags), mtr);
-
- zip_size = fsp_flags_get_zip_size(flags);
- block = buf_page_create(space_id, 0, zip_size, mtr);
- buf_page_get(space_id, zip_size, 0, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- /* The prior contents of the file page should be ignored */
-
- fsp_init_file_page(block, mtr);
- page = buf_block_get_frame(block);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR,
- MLOG_2BYTES, mtr);
-
- header = FSP_HEADER_OFFSET + page;
-
- mlog_write_ulint(header + FSP_SPACE_ID, space_id, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_SPACE_FLAGS, flags & ~FSP_FLAGS_MEM_MASK,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr);
-
- flst_init(header + FSP_FREE, mtr);
- flst_init(header + FSP_FREE_FRAG, mtr);
- flst_init(header + FSP_FULL_FRAG, mtr);
- flst_init(header + FSP_SEG_INODES_FULL, mtr);
- flst_init(header + FSP_SEG_INODES_FREE, mtr);
-
- mlog_write_ull(header + FSP_SEG_ID, 1, mtr);
-
- fsp_fill_free_list(space_id != TRX_SYS_SPACE, space_id, header, mtr);
-
- fil_space_t* space = fil_space_acquire(space_id);
- ut_ad(space);
-
- if (space->crypt_data) {
- space->crypt_data->write_page0(page, mtr);
- }
-
- fil_space_release(space);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Reads the space id from the first page of a tablespace.
-@return space id, ULINT UNDEFINED if error */
-UNIV_INTERN
-ulint
-fsp_header_get_space_id(
-/*====================*/
- const page_t* page) /*!< in: first page of a tablespace */
-{
- ulint fsp_id;
- ulint id;
-
- fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID);
-
- id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- DBUG_EXECUTE_IF("fsp_header_get_space_id_failure",
- id = ULINT_UNDEFINED;);
-
- if (id != fsp_id) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Space id in fsp header %lu,but in the page header "
- "%lu", fsp_id, id);
-
- return(ULINT_UNDEFINED);
- }
-
- return(id);
-}
-
-/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return flags */
-UNIV_INTERN
-ulint
-fsp_header_get_flags(
-/*=================*/
- const page_t* page) /*!< in: first page of a tablespace */
-{
- ut_ad(!page_offset(page));
-
- return(mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page));
-}
-
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
-ulint
-fsp_header_get_zip_size(
-/*====================*/
- const page_t* page) /*!< in: first page of a tablespace */
-{
- ulint flags = fsp_header_get_flags(page);
-
- return(fsp_flags_get_zip_size(flags));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Increases the space size field of a space. */
-UNIV_INTERN
-void
-fsp_header_inc_size(
-/*================*/
- ulint space, /*!< in: space id */
- ulint size_inc, /*!< in: size increment in pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fsp_header_t* header;
- ulint size;
- ulint flags;
-
- ut_ad(mtr);
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
-
- header = fsp_get_space_header(space,
- fsp_flags_get_zip_size(flags),
- mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES,
- mtr);
-}
-
-/**********************************************************************//**
-Gets the size of the system tablespace from the tablespace header. If
-we do not have an auto-extending data file, this should be equal to
-the size of the data files. If there is an auto-extending data file,
-this can be smaller.
-@return size in pages */
-UNIV_INTERN
-ulint
-fsp_header_get_tablespace_size(void)
-/*================================*/
-{
- fsp_header_t* header;
- ulint size;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- mtr_x_lock(fil_space_get_latch(0, NULL), &mtr);
-
- header = fsp_get_space_header(0, 0, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- mtr_commit(&mtr);
-
- return(size);
-}
-
-/***********************************************************************//**
-Tries to extend a single-table tablespace so that a page would fit in the
-data file.
-@return TRUE if success */
-static UNIV_COLD MY_ATTRIBUTE((nonnull, warn_unused_result))
-ibool
-fsp_try_extend_data_file_with_pages(
-/*================================*/
- ulint space, /*!< in: space */
- ulint page_no, /*!< in: page number */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ibool success;
- ulint actual_size;
- ulint size;
-
- ut_a(space != 0);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- ut_a(page_no >= size);
-
- success = fil_extend_space_to_desired_size(&actual_size, space,
- page_no + 1);
- /* actual_size now has the space size in pages; it may be less than
- we wanted if we ran out of disk space */
-
- mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr);
-
- return(success);
-}
-
-/***********************************************************************//**
-Tries to extend the last data file of a tablespace if it is auto-extending.
-@return FALSE if not auto-extending */
-static UNIV_COLD MY_ATTRIBUTE((nonnull))
-ibool
-fsp_try_extend_data_file(
-/*=====================*/
- ulint* actual_increase,/*!< out: actual increase in pages, where
- we measure the tablespace size from
- what the header field says; it may be
- the actual file size rounded down to
- megabyte */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint size;
- ulint zip_size;
- ulint new_size;
- ulint old_size;
- ulint size_increase;
- ulint actual_size;
- ibool success;
-
- *actual_increase = 0;
-
- if (space == 0 && !srv_auto_extend_last_data_file) {
-
- /* We print the error message only once to avoid
- spamming the error log. Note that we don't need
- to reset the flag to FALSE as dealing with this
- error requires server restart. */
- if (fsp_tbs_full_error_printed == FALSE) {
- fprintf(stderr,
- "InnoDB: Error: Data file(s) ran"
- " out of space.\n"
- "Please add another data file or"
- " use \'autoextend\' for the last"
- " data file.\n");
- fsp_tbs_full_error_printed = TRUE;
- }
- return(FALSE);
- }
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(header + FSP_SPACE_FLAGS));
-
- old_size = size;
-
- if (space == 0) {
- if (!srv_last_file_size_max) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
- } else {
- if (srv_last_file_size_max
- < srv_data_file_sizes[srv_n_data_files - 1]) {
-
- fprintf(stderr,
- "InnoDB: Error: Last data file size"
- " is %lu, max size allowed %lu\n",
- (ulong) srv_data_file_sizes[
- srv_n_data_files - 1],
- (ulong) srv_last_file_size_max);
- }
-
- size_increase = srv_last_file_size_max
- - srv_data_file_sizes[srv_n_data_files - 1];
- if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
- size_increase = SRV_AUTO_EXTEND_INCREMENT;
- }
- }
- } else {
- /* We extend single-table tablespaces first one extent
- at a time, but 4 at a time for bigger tablespaces. It is
- not enough to extend always by one extent, because we need
- to add at least one extent to FSP_FREE.
- A single extent descriptor page will track many extents.
- And the extent that uses its extent descriptor page is
- put onto the FSP_FREE_FRAG list. Extents that do not
- use their extent descriptor page are added to FSP_FREE.
- The physical page size is used to determine how many
- extents are tracked on one extent descriptor page. */
- ulint extent_size; /*!< one megabyte, in pages */
- ulint threshold; /*!< The size of the tablespace
- (in number of pages) where we
- start allocating more than one
- extent at a time. */
-
- if (!zip_size) {
- extent_size = FSP_EXTENT_SIZE;
- } else {
- extent_size = FSP_EXTENT_SIZE
- * UNIV_PAGE_SIZE / zip_size;
- }
-
- /* Threshold is set at 32mb except when the page
- size is small enough that it must be done sooner.
- For page size less than 4k, we may reach the
- extent contains extent descriptor page before
- 32 mb. */
- threshold = ut_min((32 * extent_size),
- (zip_size ? zip_size : UNIV_PAGE_SIZE));
-
- if (size < extent_size) {
- /* Let us first extend the file to extent_size */
- success = fsp_try_extend_data_file_with_pages(
- space, extent_size - 1, header, mtr);
- if (!success) {
- new_size = mtr_read_ulint(header + FSP_SIZE,
- MLOG_4BYTES, mtr);
-
- *actual_increase = new_size - old_size;
-
- return(FALSE);
- }
-
- size = extent_size;
- }
-
- if (size < threshold) {
- size_increase = extent_size;
- } else {
- /* Below in fsp_fill_free_list() we assume
- that we add at most FSP_FREE_ADD extents at
- a time */
- size_increase = FSP_FREE_ADD * extent_size;
- }
- }
-
- if (size_increase == 0) {
-
- return(TRUE);
- }
-
- success = fil_extend_space_to_desired_size(&actual_size, space,
- size + size_increase);
- if (!success) {
-
- return(false);
- }
-
- /* We ignore any fragments of a full megabyte when storing the size
- to the space header */
-
- if (!zip_size) {
- new_size = ut_calc_align_down(actual_size,
- (1024 * 1024) / UNIV_PAGE_SIZE);
- } else {
- new_size = ut_calc_align_down(actual_size,
- (1024 * 1024) / zip_size);
- }
- mlog_write_ulint(header + FSP_SIZE, new_size, MLOG_4BYTES, mtr);
-
- *actual_increase = new_size - old_size;
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Puts new extents to the free list if there are free extents above the free
-limit. If an extent happens to contain an extent descriptor page, the extent
-is put to the FSP_FREE_FRAG list with the page marked as used. */
-static
-void
-fsp_fill_free_list(
-/*===============*/
- ibool init_space, /*!< in: TRUE if this is a single-table
- tablespace and we are only initing
- the tablespace's first extent
- descriptor page and ibuf bitmap page;
- then we do not allocate more extents */
- ulint space, /*!< in: space */
- fsp_header_t* header, /*!< in/out: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint limit;
- ulint size;
- ulint zip_size;
- xdes_t* descr;
- ulint count = 0;
- ulint frag_n_used;
- ulint actual_increase;
- ulint i;
- mtr_t ibuf_mtr;
-
- ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
-
- /* Check if we can fill free list from above the free list limit */
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);
-
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + header));
- ut_a(ut_is_2pow(zip_size));
- ut_a(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_a(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
-
- if (space == 0 && srv_auto_extend_last_data_file
- && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
-
- /* Try to increase the last data file size */
- fsp_try_extend_data_file(&actual_increase, space, header, mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- }
-
- if (space != 0 && !init_space
- && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
-
- /* Try to increase the .ibd file size */
- fsp_try_extend_data_file(&actual_increase, space, header, mtr);
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
- }
-
- i = limit;
-
- while ((init_space && i < 1)
- || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) {
-
- ibool init_xdes;
- if (zip_size) {
- init_xdes = ut_2pow_remainder(i, zip_size) == 0;
- } else {
- init_xdes = ut_2pow_remainder(i, UNIV_PAGE_SIZE) == 0;
- }
-
- mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
- MLOG_4BYTES, mtr);
-
- if (UNIV_UNLIKELY(init_xdes)) {
-
- buf_block_t* block;
-
- /* We are going to initialize a new descriptor page
- and a new ibuf bitmap page: the prior contents of the
- pages should be ignored. */
-
- if (i > 0) {
- block = buf_page_create(
- space, i, zip_size, mtr);
- buf_page_get(space, zip_size, i,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block,
- SYNC_FSP_PAGE);
-
- fsp_init_file_page(block, mtr);
- mlog_write_ulint(buf_block_get_frame(block)
- + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_XDES,
- MLOG_2BYTES, mtr);
- }
-
- /* Initialize the ibuf bitmap page in a separate
- mini-transaction because it is low in the latching
- order, and we must be able to release its latch
- before returning from the fsp routine */
-
- mtr_start(&ibuf_mtr);
-
- block = buf_page_create(space,
- i + FSP_IBUF_BITMAP_OFFSET,
- zip_size, &ibuf_mtr);
- buf_page_get(space, zip_size,
- i + FSP_IBUF_BITMAP_OFFSET,
- RW_X_LATCH, &ibuf_mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- fsp_init_file_page(block, &ibuf_mtr);
-
- ibuf_bitmap_page_init(block, &ibuf_mtr);
-
- mtr_commit(&ibuf_mtr);
- }
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, i,
- mtr);
- xdes_init(descr, mtr);
-
- if (UNIV_UNLIKELY(init_xdes)) {
-
- /* The first page in the extent is a descriptor page
- and the second is an ibuf bitmap page: mark them
- used */
-
- xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr);
- xdes_set_bit(descr, XDES_FREE_BIT,
- FSP_IBUF_BITMAP_OFFSET, FALSE, mtr);
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
-
- flst_add_last(header + FSP_FREE_FRAG,
- descr + XDES_FLST_NODE, mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used + 2, MLOG_4BYTES, mtr);
- } else {
- flst_add_last(header + FSP_FREE,
- descr + XDES_FLST_NODE, mtr);
- count++;
- }
-
- i += FSP_EXTENT_SIZE;
- }
-}
-
-/**********************************************************************//**
-Allocates a new free extent.
-@return extent descriptor, NULL if cannot be allocated */
-static
-xdes_t*
-fsp_alloc_free_extent(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint of which extent would be desirable: any
- page offset in the extent goes; the hint must not
- be > FSP_FREE_LIMIT */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fsp_header_t* header;
- fil_addr_t first;
- xdes_t* descr;
-
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, zip_size, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
-
- if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) {
- /* Ok, we can take this extent */
- } else {
- /* Take the first extent in the free list */
- first = flst_get_first(header + FSP_FREE, mtr);
-
- if (fil_addr_is_null(first)) {
- fsp_fill_free_list(FALSE, space, header, mtr);
-
- first = flst_get_first(header + FSP_FREE, mtr);
- }
-
- if (fil_addr_is_null(first)) {
-
- return(NULL); /* No free extents left */
- }
-
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
- }
-
- flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
-
- return(descr);
-}
-
-/**********************************************************************//**
-Allocates a single free page from a space. */
-static MY_ATTRIBUTE((nonnull))
-void
-fsp_alloc_from_free_frag(
-/*=====================*/
- fsp_header_t* header, /*!< in/out: tablespace header */
- xdes_t* descr, /*!< in/out: extent descriptor */
- ulint bit, /*!< in: slot to allocate in the extent */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint frag_n_used;
-
- ut_ad(xdes_get_state(descr, mtr) == XDES_FREE_FRAG);
- ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT, bit, mtr));
- xdes_set_bit(descr, XDES_FREE_BIT, bit, FALSE, mtr);
-
- /* Update the FRAG_N_USED field */
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- frag_n_used++;
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES,
- mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FULL_FRAG, mtr);
-
- flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES,
- mtr);
- }
-}
-
-/**********************************************************************//**
-Gets a buffer block for an allocated page.
-
-NOTE: If init_mtr != mtr, the block will only be initialized if it was
-not previously x-latched. It is assumed that the block has been
-x-latched only by mtr, and freed in mtr in that case.
-
-@return block, initialized if init_mtr==mtr
-or rw_lock_x_lock_count(&block->lock) == 1 */
-static
-buf_block_t*
-fsp_page_create(
-/*============*/
- ulint space, /*!< in: space id of the allocated page */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the allocated page */
- mtr_t* mtr, /*!< in: mini-transaction of the allocation */
- mtr_t* init_mtr) /*!< in: mini-transaction for initializing
- the page */
-{
- buf_block_t* block
- = buf_page_create(space, page_no, zip_size, init_mtr);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)
- == rw_lock_own(&block->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Mimic buf_page_get(), but avoid the buf_pool->page_hash lookup. */
- rw_lock_x_lock(&block->lock);
- mutex_enter(&block->mutex);
- buf_block_buf_fix_inc(block, __FILE__, __LINE__);
- mutex_exit(&block->mutex);
- mtr_memo_push(init_mtr, block, MTR_MEMO_PAGE_X_FIX);
-
- if (init_mtr == mtr
- || rw_lock_get_x_lock_count(&block->lock) == 1) {
-
- /* Initialize the page, unless it was already
- X-latched in mtr. (In this case, we would want to
- allocate another page that has not been freed in mtr.) */
- ut_ad(init_mtr == mtr
- || !mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-
- fsp_init_file_page(block, init_mtr);
- }
-
- return(block);
-}
-
-/**********************************************************************//**
-Allocates a single free page from a space. The page is marked as used.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-static MY_ATTRIBUTE((warn_unused_result))
-buf_block_t*
-fsp_alloc_free_page(
-/*================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint of which page would be desirable */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mini-transaction in which the
- page should be initialized
- (may be the same as mtr) */
-{
- fsp_header_t* header;
- fil_addr_t first;
- xdes_t* descr;
- ulint free;
- ulint page_no;
- ulint space_size;
-
- header = fsp_get_space_header(space, zip_size, mtr);
-
- /* Get the hinted descriptor */
- descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr);
-
- if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) {
- /* Ok, we can take this extent */
- } else {
- /* Else take the first extent in free_frag list */
- first = flst_get_first(header + FSP_FREE_FRAG, mtr);
-
- if (fil_addr_is_null(first)) {
- /* There are no partially full fragments: allocate
- a free extent and add it to the FREE_FRAG list. NOTE
- that the allocation may have as a side-effect that an
- extent containing a descriptor page is added to the
- FREE_FRAG list. But we will allocate our page from the
- the free extent anyway. */
-
- descr = fsp_alloc_free_extent(space, zip_size,
- hint, mtr);
-
- if (descr == NULL) {
- /* No free space left */
-
- return(NULL);
- }
-
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
- flst_add_last(header + FSP_FREE_FRAG,
- descr + XDES_FLST_NODE, mtr);
- } else {
- descr = xdes_lst_get_descriptor(space, zip_size,
- first, mtr);
- }
-
- /* Reset the hint */
- hint = 0;
- }
-
- /* Now we have in descr an extent with at least one free page. Look
- for a free page in the extent. */
-
- free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE,
- hint % FSP_EXTENT_SIZE, mtr);
- if (free == ULINT_UNDEFINED) {
-
- ut_print_buf(stderr, ((byte*) descr) - 500, 1000);
- putc('\n', stderr);
-
- ut_error;
- }
-
- page_no = xdes_get_offset(descr) + free;
-
- space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- if (space_size <= page_no) {
- /* It must be that we are extending a single-table tablespace
- whose size is still < 64 pages */
-
- ut_a(space != 0);
- if (page_no >= FSP_EXTENT_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: trying to extend a"
- " single-table tablespace %lu\n"
- "InnoDB: by single page(s) though the"
- " space size %lu. Page no %lu.\n",
- (ulong) space, (ulong) space_size,
- (ulong) page_no);
- return(NULL);
- }
- if (!fsp_try_extend_data_file_with_pages(space, page_no,
- header, mtr)) {
- /* No disk space left */
- return(NULL);
- }
- }
-
- fsp_alloc_from_free_frag(header, descr, free, mtr);
- return(fsp_page_create(space, zip_size, page_no, mtr, init_mtr));
-}
-
-/**********************************************************************//**
-Frees a single page of a space. The page is marked as free and clean. */
-static
-void
-fsp_free_page(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fsp_header_t* header;
- xdes_t* descr;
- ulint state;
- ulint frag_n_used;
-
- ut_ad(mtr);
-
- /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */
-
- header = fsp_get_space_header(space, zip_size, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
-
- state = xdes_get_state(descr, mtr);
-
- if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
- fprintf(stderr,
- "InnoDB: Error: File space extent descriptor"
- " of page %lu has state %lu\n",
- (ulong) page,
- (ulong) state);
- fputs("InnoDB: Dump of descriptor: ", stderr);
- ut_print_buf(stderr, ((byte*) descr) - 50, 200);
- putc('\n', stderr);
- /* Crash in debug version, so that we get a core dump
- of this corruption. */
- ut_ad(0);
-
- if (state == XDES_FREE) {
- /* We put here some fault tolerance: if the page
- is already free, return without doing anything! */
-
- return;
- }
-
- ut_error;
- }
-
- if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
- page % FSP_EXTENT_SIZE, mtr)) {
-
- fprintf(stderr,
- "InnoDB: Error: File space extent descriptor"
- " of page %lu says it is free\n"
- "InnoDB: Dump of descriptor: ", (ulong) page);
- ut_print_buf(stderr, ((byte*) descr) - 50, 200);
- putc('\n', stderr);
- /* Crash in debug version, so that we get a core dump
- of this corruption. */
- ut_ad(0);
-
- /* We put here some fault tolerance: if the page
- is already free, return without doing anything! */
-
- return;
- }
-
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
- xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
-
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- mtr);
- if (state == XDES_FULL_FRAG) {
- /* The fragment was full: move it to another list */
- flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE,
- mtr);
- xdes_set_state(descr, XDES_FREE_FRAG, mtr);
- flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- mlog_write_ulint(header + FSP_FRAG_N_USED,
- frag_n_used + FSP_EXTENT_SIZE - 1,
- MLOG_4BYTES, mtr);
- } else {
- ut_a(frag_n_used > 0);
- mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1,
- MLOG_4BYTES, mtr);
- }
-
- if (xdes_is_free(descr, mtr)) {
- /* The extent has become free: move it to another list */
- flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE,
- mtr);
- fsp_free_extent(space, zip_size, page, mtr);
- }
-
- mtr->n_freed_pages++;
-}
-
-/**********************************************************************//**
-Returns an extent to the free list of a space. */
-static
-void
-fsp_free_extent(
-/*============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset in the extent */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fsp_header_t* header;
- xdes_t* descr;
-
- ut_ad(mtr);
-
- header = fsp_get_space_header(space, zip_size, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr);
-
- if (xdes_get_state(descr, mtr) == XDES_FREE) {
-
- ut_print_buf(stderr, (byte*) descr - 500, 1000);
- putc('\n', stderr);
-
- ut_error;
- }
-
- xdes_init(descr, mtr);
-
- flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr);
-}
-
-/**********************************************************************//**
-Returns the nth inode slot on an inode page.
-@return segment inode */
-UNIV_INLINE
-fseg_inode_t*
-fsp_seg_inode_page_get_nth_inode(
-/*=============================*/
- page_t* page, /*!< in: segment inode page */
- ulint i, /*!< in: inode index on page */
- ulint zip_size MY_ATTRIBUTE((unused)),
- /*!< in: compressed page size, or 0 */
- mtr_t* mtr MY_ATTRIBUTE((unused)))
- /*!< in/out: mini-transaction */
-{
- ut_ad(i < FSP_SEG_INODES_PER_PAGE(zip_size));
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
-
- return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i);
-}
-
-/**********************************************************************//**
-Looks for a used segment inode on a segment inode page.
-@return segment inode index, or ULINT_UNDEFINED if not found */
-static
-ulint
-fsp_seg_inode_page_find_used(
-/*=========================*/
- page_t* page, /*!< in: segment inode page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint i;
- fseg_inode_t* inode;
-
- for (i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(
- page, i, zip_size, mtr);
-
- if (mach_read_from_8(inode + FSEG_ID)) {
- /* This is used */
-
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Looks for an unused segment inode on a segment inode page.
-@return segment inode index, or ULINT_UNDEFINED if not found */
-static
-ulint
-fsp_seg_inode_page_find_free(
-/*=========================*/
- page_t* page, /*!< in: segment inode page */
- ulint i, /*!< in: search forward starting from this index */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- SRV_CORRUPT_TABLE_CHECK(page, return(ULINT_UNDEFINED););
-
- for (; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
-
- fseg_inode_t* inode;
-
- inode = fsp_seg_inode_page_get_nth_inode(
- page, i, zip_size, mtr);
-
- if (!mach_read_from_8(inode + FSEG_ID)) {
- /* This is unused */
- return(i);
- }
-
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Allocates a new file segment inode page.
-@return TRUE if could be allocated */
-static
-ibool
-fsp_alloc_seg_inode_page(
-/*=====================*/
- fsp_header_t* space_header, /*!< in: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fseg_inode_t* inode;
- buf_block_t* block;
- page_t* page;
- ulint space;
- ulint zip_size;
-
- ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
-
- space = page_get_space_id(page_align(space_header));
-
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + space_header));
-
- block = fsp_alloc_free_page(space, zip_size, 0, mtr, mtr);
-
- if (block == NULL) {
-
- return(FALSE);
- }
-
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
- ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
-
- block->check_index_page_at_flush = FALSE;
-
- page = buf_block_get_frame(block);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE,
- MLOG_2BYTES, mtr);
-
- for (ulint i = 0; i < FSP_SEG_INODES_PER_PAGE(zip_size); i++) {
-
- inode = fsp_seg_inode_page_get_nth_inode(
- page, i, zip_size, mtr);
-
- mlog_write_ull(inode + FSEG_ID, 0, mtr);
- }
-
- flst_add_last(
- space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Allocates a new file segment inode.
-@return segment inode, or NULL if not enough space */
-static
-fseg_inode_t*
-fsp_alloc_seg_inode(
-/*================*/
- fsp_header_t* space_header, /*!< in: space header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint page_no;
- buf_block_t* block;
- page_t* page;
- fseg_inode_t* inode;
- ibool success;
- ulint zip_size;
- ulint n;
-
- ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
-
- if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
- /* Allocate a new segment inode page */
-
- success = fsp_alloc_seg_inode_page(space_header, mtr);
-
- if (!success) {
-
- return(NULL);
- }
- }
-
- page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page;
-
- zip_size = fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS + space_header));
- block = buf_page_get(page_get_space_id(page_align(space_header)),
- zip_size, page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
-
- page = buf_block_get_frame(block);
-
- SRV_CORRUPT_TABLE_CHECK(page, return(0););
-
- n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
-
- ut_a(n != ULINT_UNDEFINED);
-
- inode = fsp_seg_inode_page_get_nth_inode(page, n, zip_size, mtr);
-
- if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1,
- zip_size, mtr)) {
- /* There are no other unused headers left on the page: move it
- to another list */
-
- flst_remove(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- flst_add_last(space_header + FSP_SEG_INODES_FULL,
- page + FSEG_INODE_PAGE_NODE, mtr);
- }
-
- ut_ad(!mach_read_from_8(inode + FSEG_ID)
- || mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
- return(inode);
-}
-
-/**********************************************************************//**
-Frees a file segment inode. */
-static
-void
-fsp_free_seg_inode(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- page_t* page;
- fsp_header_t* space_header;
-
- page = page_align(inode);
-
- space_header = fsp_get_space_header(space, zip_size, mtr);
-
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- if (ULINT_UNDEFINED
- == fsp_seg_inode_page_find_free(page, 0, zip_size, mtr)) {
-
- /* Move the page to another list */
-
- flst_remove(space_header + FSP_SEG_INODES_FULL,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- flst_add_last(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
- }
-
- mlog_write_ull(inode + FSEG_ID, 0, mtr);
- mlog_write_ulint(inode + FSEG_MAGIC_N, 0xfa051ce3, MLOG_4BYTES, mtr);
-
- if (ULINT_UNDEFINED
- == fsp_seg_inode_page_find_used(page, zip_size, mtr)) {
-
- /* There are no other used headers left on the page: free it */
-
- flst_remove(space_header + FSP_SEG_INODES_FREE,
- page + FSEG_INODE_PAGE_NODE, mtr);
-
- fsp_free_page(space, zip_size, page_get_page_no(page), mtr);
- }
-}
-
-/**********************************************************************//**
-Returns the file segment inode, page x-latched.
-@return segment inode, page x-latched; NULL if the inode is free */
-static
-fseg_inode_t*
-fseg_inode_try_get(
-/*===============*/
- fseg_header_t* header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fil_addr_t inode_addr;
- fseg_inode_t* inode;
-
- inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO);
- inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET);
- ut_ad(space == mach_read_from_4(header + FSEG_HDR_SPACE));
-
- inode = fut_get_ptr(space, zip_size, inode_addr, RW_X_LATCH, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(inode, return(0););
-
- if (UNIV_UNLIKELY(!mach_read_from_8(inode + FSEG_ID))) {
-
- inode = NULL;
- } else {
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- }
-
- return(inode);
-}
-
-/**********************************************************************//**
-Returns the file segment inode, page x-latched.
-@return segment inode, page x-latched */
-static
-fseg_inode_t*
-fseg_inode_get(
-/*===========*/
- fseg_header_t* header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fseg_inode_t* inode
- = fseg_inode_try_get(header, space, zip_size, mtr);
- SRV_CORRUPT_TABLE_CHECK(inode, ; /* do nothing */);
- return(inode);
-}
-
-/**********************************************************************//**
-Gets the page number from the nth fragment page slot.
-@return page number, FIL_NULL if not in use */
-UNIV_INLINE
-ulint
-fseg_get_nth_frag_page_no(
-/*======================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint n, /*!< in: slot index */
- mtr_t* mtr MY_ATTRIBUTE((unused)))
- /*!< in/out: mini-transaction */
-{
- ut_ad(inode && mtr);
- ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
- return(mach_read_from_4(inode + FSEG_FRAG_ARR
- + n * FSEG_FRAG_SLOT_SIZE));
-}
-
-/**********************************************************************//**
-Sets the page number in the nth fragment page slot. */
-UNIV_INLINE
-void
-fseg_set_nth_frag_page_no(
-/*======================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint n, /*!< in: slot index */
- ulint page_no,/*!< in: page number to set */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(inode && mtr);
- ut_ad(n < FSEG_FRAG_ARR_N_SLOTS);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE,
- page_no, MLOG_4BYTES, mtr);
-}
-
-/**********************************************************************//**
-Finds a fragment page slot which is free.
-@return slot index; ULINT_UNDEFINED if none found */
-static
-ulint
-fseg_find_free_frag_page_slot(
-/*==========================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint i;
- ulint page_no;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- page_no = fseg_get_nth_frag_page_no(inode, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Finds a fragment page slot which is used and last in the array.
-@return slot index; ULINT_UNDEFINED if none found */
-static
-ulint
-fseg_find_last_used_frag_page_slot(
-/*===============================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint i;
- ulint page_no;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- page_no = fseg_get_nth_frag_page_no(
- inode, FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr);
-
- if (page_no != FIL_NULL) {
-
- return(FSEG_FRAG_ARR_N_SLOTS - i - 1);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Calculates reserved fragment page slots.
-@return number of fragment pages */
-static
-ulint
-fseg_get_n_frag_pages(
-/*==================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint i;
- ulint count = 0;
-
- ut_ad(inode && mtr);
-
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) {
- count++;
- }
- }
-
- return(count);
-}
-
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-UNIV_INTERN
-buf_block_t*
-fseg_create_general(
-/*================*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- ibool has_done_reservation, /*!< in: TRUE if the caller has already
- done the reservation for the pages with
- fsp_reserve_free_extents (at least 2 extents: one for
- the inode and the other for the segment) then there is
- no need to do the check for this individual
- operation */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint flags;
- ulint zip_size;
- fsp_header_t* space_header;
- fseg_inode_t* inode;
- ib_id_t seg_id;
- buf_block_t* block = 0; /* remove warning */
- fseg_header_t* header = 0; /* remove warning */
- prio_rw_lock_t* latch;
- ibool success;
- ulint n_reserved;
- ulint i;
-
- ut_ad(mtr);
- ut_ad(byte_offset + FSEG_HEADER_SIZE
- <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- if (page != 0) {
- block = buf_page_get(space, zip_size, page, RW_X_LATCH, mtr);
- header = byte_offset + buf_block_get_frame(block);
- }
-
- mtr_x_lock(latch, mtr);
-
- if (rw_lock_get_x_lock_count(latch) == 1) {
- /* This thread did not own the latch before this call: free
- excess pages from the insert buffer free list */
-
- if (space == IBUF_SPACE_ID) {
- ibuf_free_excess_pages();
- }
- }
-
- if (!has_done_reservation) {
- success = fsp_reserve_free_extents(&n_reserved, space, 2,
- FSP_NORMAL, mtr);
- if (!success) {
- return(NULL);
- }
- }
-
- space_header = fsp_get_space_header(space, zip_size, mtr);
-
- inode = fsp_alloc_seg_inode(space_header, mtr);
-
- if (inode == NULL) {
- goto funct_exit;
- }
-
- /* Read the next segment id from space header and increment the
- value in space header */
-
- seg_id = mach_read_from_8(space_header + FSP_SEG_ID);
-
- mlog_write_ull(space_header + FSP_SEG_ID, seg_id + 1, mtr);
-
- mlog_write_ull(inode + FSEG_ID, seg_id, mtr);
- mlog_write_ulint(inode + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr);
-
- flst_init(inode + FSEG_FREE, mtr);
- flst_init(inode + FSEG_NOT_FULL, mtr);
- flst_init(inode + FSEG_FULL, mtr);
-
- mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE,
- MLOG_4BYTES, mtr);
- for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) {
- fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr);
- }
-
- if (page == 0) {
- block = fseg_alloc_free_page_low(space, zip_size,
- inode, 0, FSP_UP, mtr, mtr);
-
- if (block == NULL) {
-
- fsp_free_seg_inode(space, zip_size, inode, mtr);
-
- goto funct_exit;
- }
-
- ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
-
- header = byte_offset + buf_block_get_frame(block);
- mlog_write_ulint(buf_block_get_frame(block) + FIL_PAGE_TYPE,
- FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr);
- }
-
- mlog_write_ulint(header + FSEG_HDR_OFFSET,
- page_offset(inode), MLOG_2BYTES, mtr);
-
- mlog_write_ulint(header + FSEG_HDR_PAGE_NO,
- page_get_page_no(page_align(inode)),
- MLOG_4BYTES, mtr);
-
- mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr);
-
-funct_exit:
- if (!has_done_reservation) {
-
- fil_space_release_free_extents(space, n_reserved);
- }
-
- return(block);
-}
-
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-UNIV_INTERN
-buf_block_t*
-fseg_create(
-/*========*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- return(fseg_create_general(space, page, byte_offset, FALSE, mtr));
-}
-
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used.
-@return number of reserved pages */
-static
-ulint
-fseg_n_reserved_pages_low(
-/*======================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint* used, /*!< out: number of pages used (not
- more than reserved) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint ret;
-
- ut_ad(inode && used && mtr);
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
-
- *used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr)
- + fseg_get_n_frag_pages(inode, mtr);
-
- ret = fseg_get_n_frag_pages(inode, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr)
- + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr);
-
- return(ret);
-}
-
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used.
-@return number of reserved pages */
-UNIV_INTERN
-ulint
-fseg_n_reserved_pages(
-/*==================*/
- fseg_header_t* header, /*!< in: segment header */
- ulint* used, /*!< out: number of pages used (<= reserved) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint ret;
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- prio_rw_lock_t* latch;
-
- space = page_get_space_id(page_align(header));
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, mtr);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- ret = fseg_n_reserved_pages_low(inode, used, mtr);
-
- return(ret);
-}
-
-/*********************************************************************//**
-Tries to fill the free list of a segment with consecutive free extents.
-This happens if the segment is big enough to allow extents in the free list,
-the free list is empty, and the extents can be allocated consecutively from
-the hint onward. */
-static
-void
-fseg_fill_free_list(
-/*================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hint, /*!< in: hint which extent would be good as
- the first extent */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- xdes_t* descr;
- ulint i;
- ib_id_t seg_id;
- ulint reserved;
- ulint used;
-
- ut_ad(inode && mtr);
- ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
-
- reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
-
- if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) {
-
- /* The segment is too small to allow extents in free list */
-
- return;
- }
-
- if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
- /* Free list is not empty */
-
- return;
- }
-
- for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) {
- descr = xdes_get_descriptor(space, zip_size, hint, mtr);
-
- if ((descr == NULL)
- || (XDES_FREE != xdes_get_state(descr, mtr))) {
-
- /* We cannot allocate the desired extent: stop */
-
- return;
- }
-
- descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
-
- xdes_set_state(descr, XDES_FSEG, mtr);
-
- seg_id = mach_read_from_8(inode + FSEG_ID);
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- mlog_write_ull(descr + XDES_ID, seg_id, mtr);
-
- flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
- hint += FSP_EXTENT_SIZE;
- }
-}
-
-/*********************************************************************//**
-Allocates a free extent for the segment: looks first in the free list of the
-segment, then tries to allocate from the space free list. NOTE that the extent
-returned still resides in the segment free list, it is not yet taken off it!
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-static
-xdes_t*
-fseg_alloc_free_extent(
-/*===================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- xdes_t* descr;
- ib_id_t seg_id;
- fil_addr_t first;
-
- ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
- /* Segment free list is not empty, allocate from it */
-
- first = flst_get_first(inode + FSEG_FREE, mtr);
-
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
- } else {
- /* Segment free list was empty, allocate from space */
- descr = fsp_alloc_free_extent(space, zip_size, 0, mtr);
-
- if (descr == NULL) {
-
- return(NULL);
- }
-
- seg_id = mach_read_from_8(inode + FSEG_ID);
-
- xdes_set_state(descr, XDES_FSEG, mtr);
- mlog_write_ull(descr + XDES_ID, seg_id, mtr);
- flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr);
-
- /* Try to fill the segment free list */
- fseg_fill_free_list(inode, space, zip_size,
- xdes_get_offset(descr) + FSP_EXTENT_SIZE,
- mtr);
- }
-
- return(descr);
-}
-
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-static
-buf_block_t*
-fseg_alloc_free_page_low(
-/*=====================*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fseg_inode_t* seg_inode, /*!< in/out: segment inode */
- ulint hint, /*!< in: hint of which page would be
- desirable */
- byte direction, /*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
- in which the page should be initialized.
- If init_mtr!=mtr, but the page is already
- latched in mtr, do not initialize the page. */
-{
- fsp_header_t* space_header;
- ulint space_size;
- ib_id_t seg_id;
- ulint used;
- ulint reserved;
- xdes_t* descr; /*!< extent of the hinted page */
- ulint ret_page; /*!< the allocated page offset, FIL_NULL
- if could not be allocated */
- xdes_t* ret_descr; /*!< the extent of the allocated page */
- ibool success;
- ulint n;
-
- ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
- seg_id = mach_read_from_8(seg_inode + FSEG_ID);
-
- ut_ad(seg_id);
-
- reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr);
-
- space_header = fsp_get_space_header(space, zip_size, mtr);
-
- descr = xdes_get_descriptor_with_space_hdr(space_header, space,
- hint, mtr);
- if (descr == NULL) {
- /* Hint outside space or too high above free limit: reset
- hint */
- /* The file space header page is always allocated. */
- hint = 0;
- descr = xdes_get_descriptor(space, zip_size, hint, mtr);
- }
-
- /* In the big if-else below we look for ret_page and ret_descr */
- /*-------------------------------------------------------------*/
- if ((xdes_get_state(descr, mtr) == XDES_FSEG)
- && mach_read_from_8(descr + XDES_ID) == seg_id
- && (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
- hint % FSP_EXTENT_SIZE, mtr) == TRUE)) {
-take_hinted_page:
- /* 1. We can take the hinted page
- =================================*/
- ret_descr = descr;
- ret_page = hint;
- /* Skip the check for extending the tablespace. If the
- page hint were not within the size of the tablespace,
- we would have got (descr == NULL) above and reset the hint. */
- goto got_hinted_page;
- /*-----------------------------------------------------------*/
- } else if (xdes_get_state(descr, mtr) == XDES_FREE
- && reserved - used < reserved / FSEG_FILLFACTOR
- && used >= FSEG_FRAG_LIMIT) {
-
- /* 2. We allocate the free extent from space and can take
- =========================================================
- the hinted page
- ===============*/
- ret_descr = fsp_alloc_free_extent(space, zip_size, hint, mtr);
-
- ut_a(ret_descr == descr);
-
- xdes_set_state(ret_descr, XDES_FSEG, mtr);
- mlog_write_ull(ret_descr + XDES_ID, seg_id, mtr);
- flst_add_last(seg_inode + FSEG_FREE,
- ret_descr + XDES_FLST_NODE, mtr);
-
- /* Try to fill the segment free list */
- fseg_fill_free_list(seg_inode, space, zip_size,
- hint + FSP_EXTENT_SIZE, mtr);
- goto take_hinted_page;
- /*-----------------------------------------------------------*/
- } else if ((direction != FSP_NO_DIR)
- && ((reserved - used) < reserved / FSEG_FILLFACTOR)
- && (used >= FSEG_FRAG_LIMIT)
- && (!!(ret_descr
- = fseg_alloc_free_extent(seg_inode,
- space, zip_size, mtr)))) {
-
- /* 3. We take any free extent (which was already assigned above
- ===============================================================
- in the if-condition to ret_descr) and take the lowest or
- ========================================================
- highest page in it, depending on the direction
- ==============================================*/
- ret_page = xdes_get_offset(ret_descr);
-
- if (direction == FSP_DOWN) {
- ret_page += FSP_EXTENT_SIZE - 1;
- }
- /*-----------------------------------------------------------*/
- } else if ((xdes_get_state(descr, mtr) == XDES_FSEG)
- && mach_read_from_8(descr + XDES_ID) == seg_id
- && (!xdes_is_full(descr, mtr))) {
-
- /* 4. We can take the page from the same extent as the
- ======================================================
- hinted page (and the extent already belongs to the
- ==================================================
- segment)
- ========*/
- ret_descr = descr;
- ret_page = xdes_get_offset(ret_descr)
- + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
- hint % FSP_EXTENT_SIZE, mtr);
- /*-----------------------------------------------------------*/
- } else if (reserved - used > 0) {
- /* 5. We take any unused page from the segment
- ==============================================*/
- fil_addr_t first;
-
- if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) {
- first = flst_get_first(seg_inode + FSEG_NOT_FULL,
- mtr);
- } else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) {
- first = flst_get_first(seg_inode + FSEG_FREE, mtr);
- } else {
- ut_error;
- return(NULL);
- }
-
- ret_descr = xdes_lst_get_descriptor(space, zip_size,
- first, mtr);
- ret_page = xdes_get_offset(ret_descr)
- + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE,
- 0, mtr);
- /*-----------------------------------------------------------*/
- } else if (used < FSEG_FRAG_LIMIT) {
- /* 6. We allocate an individual page from the space
- ===================================================*/
- buf_block_t* block = fsp_alloc_free_page(
- space, zip_size, hint, mtr, init_mtr);
-
- if (block != NULL) {
- /* Put the page in the fragment page array of the
- segment */
- n = fseg_find_free_frag_page_slot(seg_inode, mtr);
- ut_a(n != ULINT_UNDEFINED);
-
- fseg_set_nth_frag_page_no(
- seg_inode, n, buf_block_get_page_no(block),
- mtr);
- }
-
- /* fsp_alloc_free_page() invoked fsp_init_file_page()
- already. */
- return(block);
- /*-----------------------------------------------------------*/
- } else {
- /* 7. We allocate a new extent and take its first page
- ======================================================*/
- ret_descr = fseg_alloc_free_extent(seg_inode,
- space, zip_size, mtr);
-
- if (ret_descr == NULL) {
- ret_page = FIL_NULL;
- } else {
- ret_page = xdes_get_offset(ret_descr);
- }
- }
-
- if (ret_page == FIL_NULL) {
- /* Page could not be allocated */
-
- return(NULL);
- }
-
- if (space != 0) {
- space_size = fil_space_get_size(space);
-
- if (space_size <= ret_page) {
- /* It must be that we are extending a single-table
- tablespace whose size is still < 64 pages */
-
- if (ret_page >= FSP_EXTENT_SIZE) {
- fprintf(stderr,
- "InnoDB: Error (2): trying to extend"
- " a single-table tablespace %lu\n"
- "InnoDB: by single page(s) though"
- " the space size %lu. Page no %lu.\n",
- (ulong) space, (ulong) space_size,
- (ulong) ret_page);
- return(NULL);
- }
-
- success = fsp_try_extend_data_file_with_pages(
- space, ret_page, space_header, mtr);
- if (!success) {
- /* No disk space left */
- return(NULL);
- }
- }
- }
-
-got_hinted_page:
- /* ret_descr == NULL if the block was allocated from free_frag
- (XDES_FREE_FRAG) */
- if (ret_descr != NULL) {
- /* At this point we know the extent and the page offset.
- The extent is still in the appropriate list (FSEG_NOT_FULL
- or FSEG_FREE), and the page is not yet marked as used. */
-
- ut_ad(xdes_get_descriptor(space, zip_size, ret_page, mtr)
- == ret_descr);
-
- ut_ad(xdes_mtr_get_bit(
- ret_descr, XDES_FREE_BIT,
- ret_page % FSP_EXTENT_SIZE, mtr));
-
- fseg_mark_page_used(seg_inode, ret_page, ret_descr, mtr);
- }
-
- return(fsp_page_create(
- space, fsp_flags_get_zip_size(
- mach_read_from_4(FSP_SPACE_FLAGS
- + space_header)),
- ret_page, mtr, init_mtr));
-}
-
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
-buf_block_t*
-fseg_alloc_free_page_general(
-/*=========================*/
- fseg_header_t* seg_header,/*!< in/out: segment header */
- ulint hint, /*!< in: hint of which page would be
- desirable */
- byte direction,/*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- ibool has_done_reservation, /*!< in: TRUE if the caller has
- already done the reservation for the page
- with fsp_reserve_free_extents, then there
- is no need to do the check for this individual
- page */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
- in which the page should be initialized.
- If init_mtr!=mtr, but the page is already
- latched in mtr, do not initialize the page. */
-{
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- prio_rw_lock_t* latch;
- buf_block_t* block;
- ulint n_reserved;
-
- space = page_get_space_id(page_align(seg_header));
-
- latch = fil_space_get_latch(space, &flags);
-
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, mtr);
-
- if (rw_lock_get_x_lock_count(latch) == 1) {
- /* This thread did not own the latch before this call: free
- excess pages from the insert buffer free list */
-
- if (space == IBUF_SPACE_ID) {
- ibuf_free_excess_pages();
- }
- }
-
- inode = fseg_inode_get(seg_header, space, zip_size, mtr);
-
- if (!has_done_reservation
- && !fsp_reserve_free_extents(&n_reserved, space, 2,
- FSP_NORMAL, mtr)) {
- return(NULL);
- }
-
- block = fseg_alloc_free_page_low(space, zip_size,
- inode, hint, direction,
- mtr, init_mtr);
- if (!has_done_reservation) {
- fil_space_release_free_extents(space, n_reserved);
- }
-
- return(block);
-}
-
-/**********************************************************************//**
-Checks that we have at least 2 frag pages free in the first extent of a
-single-table tablespace, and they are also physically initialized to the data
-file. That is we have already extended the data file so that those pages are
-inside the data file. If not, this function extends the tablespace with
-pages.
-@return TRUE if there were >= 3 free pages, or we were able to extend */
-static
-ibool
-fsp_reserve_free_pages(
-/*===================*/
- ulint space, /*!< in: space id, must be != 0 */
- fsp_header_t* space_header, /*!< in: header of that space,
- x-latched */
- ulint size, /*!< in: size of the tablespace in
- pages, must be < FSP_EXTENT_SIZE */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- xdes_t* descr;
- ulint n_used;
-
- ut_a(space != 0);
- ut_a(size < FSP_EXTENT_SIZE);
-
- descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0,
- mtr);
- n_used = xdes_get_n_used(descr, mtr);
-
- ut_a(n_used <= size);
-
- if (size >= n_used + 2) {
-
- return(TRUE);
- }
-
- return(fsp_try_extend_data_file_with_pages(space, n_used + 1,
- space_header, mtr));
-}
-
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
-use several pages from the tablespace should call this function beforehand
-and reserve enough free extents so that they certainly will be able
-to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
-
-The alloc_type below has the following meaning: FSP_NORMAL means an
-operation which will probably result in more space usage, like an
-insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
-deleting rows, then this allocation will in the long run result in
-less space usage (after a purge); FSP_CLEANING means allocation done
-in a physical record delete (like in a purge) or other cleaning operation
-which will result in less space usage in the long run. We prefer the latter
-two types of allocation: when space is scarce, FSP_NORMAL allocations
-will not succeed, but the latter two allocations will succeed, if possible.
-The purpose is to avoid dead end where the database is full but the
-user cannot free any space because these freeing operations temporarily
-reserve some space.
-
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
-fsp_reserve_free_extents(
-/*=====================*/
- ulint* n_reserved,/*!< out: number of extents actually reserved; if we
- return TRUE and the tablespace size is < 64 pages,
- then this can be 0, otherwise it is n_ext */
- ulint space, /*!< in: space id */
- ulint n_ext, /*!< in: number of extents to reserve */
- ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fsp_header_t* space_header;
- prio_rw_lock_t* latch;
- ulint n_free_list_ext;
- ulint free_limit;
- ulint size;
- ulint flags;
- ulint zip_size;
- ulint n_free;
- ulint n_free_up;
- ulint reserve;
- ibool success;
- ulint n_pages_added;
- size_t total_reserved = 0;
-
- ut_ad(mtr);
- *n_reserved = n_ext;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, mtr);
-
- space_header = fsp_get_space_header(space, zip_size, mtr);
-try_again:
- size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
-
- if (size < FSP_EXTENT_SIZE / 2) {
- /* Use different rules for small single-table tablespaces */
- *n_reserved = 0;
- return(fsp_reserve_free_pages(space, space_header, size, mtr));
- }
-
- n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
-
- free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
- MLOG_4BYTES, mtr);
-
- /* Below we play safe when counting free extents above the free limit:
- some of them will contain extent descriptor pages, and therefore
- will not be free extents */
-
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
-
- if (n_free_up > 0) {
- n_free_up--;
- if (!zip_size) {
- n_free_up -= n_free_up
- / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
- } else {
- n_free_up -= n_free_up
- / (zip_size / FSP_EXTENT_SIZE);
- }
- }
-
- n_free = n_free_list_ext + n_free_up;
-
- if (alloc_type == FSP_NORMAL) {
- /* We reserve 1 extent + 0.5 % of the space size to undo logs
- and 1 extent + 0.5 % to cleaning operations; NOTE: this source
- code is duplicated in the function below! */
-
- reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
-
- if (n_free <= reserve + n_ext) {
-
- goto try_to_extend;
- }
- } else if (alloc_type == FSP_UNDO) {
- /* We reserve 0.5 % of the space size to cleaning operations */
-
- reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200;
-
- if (n_free <= reserve + n_ext) {
-
- goto try_to_extend;
- }
- } else {
- ut_a(alloc_type == FSP_CLEANING);
- reserve = 0;
- }
-
- success = fil_space_reserve_free_extents(space, n_free, n_ext);
- *n_reserved = n_ext;
-
- if (success) {
- return(TRUE);
- }
-try_to_extend:
- success = fsp_try_extend_data_file(&n_pages_added, space,
- space_header, mtr);
- if (success && n_pages_added > 0) {
- total_reserved += n_pages_added;
- goto try_again;
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return available space in kB */
-UNIV_INTERN
-ullint
-fsp_get_available_space_in_free_extents(
-/*====================================*/
- ulint space) /*!< in: space id */
-{
- fsp_header_t* space_header;
- ulint n_free_list_ext;
- ulint free_limit;
- ulint size;
- ulint flags;
- ulint zip_size;
- ulint n_free;
- ulint n_free_up;
- ulint reserve;
- prio_rw_lock_t* latch;
- mtr_t mtr;
-
- /* The convoluted mutex acquire is to overcome latching order
- issues: The problem is that the fil_mutex is at a lower level
- than the tablespace latch and the buffer pool mutexes. We have to
- first prevent any operations on the file system by acquiring the
- dictionary mutex. Then acquire the tablespace latch to obey the
- latching order and then release the dictionary mutex. That way we
- ensure that the tablespace instance can't be freed while we are
- examining its contents (see fil_space_free()).
-
- However, there is one further complication, we release the fil_mutex
- when we need to invalidate the the pages in the buffer pool and we
- reacquire the fil_mutex when deleting and freeing the tablespace
- instance in fil0fil.cc. Here we need to account for that situation
- too. */
-
- mutex_enter(&dict_sys->mutex);
-
- /* At this stage there is no guarantee that the tablespace even
- exists in the cache. */
-
- if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
-
- mutex_exit(&dict_sys->mutex);
-
- return(ULLINT_UNDEFINED);
- }
-
- mtr_start(&mtr);
-
- latch = fil_space_get_latch(space, &flags);
-
- /* This should ensure that the tablespace instance can't be freed
- by another thread. However, the tablespace pages can still be freed
- from the buffer pool. We need to check for that again. */
-
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, &mtr);
-
- mutex_exit(&dict_sys->mutex);
-
- /* At this point it is possible for the tablespace to be deleted and
- its pages removed from the buffer pool. We need to check for that
- situation. However, the tablespace instance can't be deleted because
- our latching above should ensure that. */
-
- if (fil_tablespace_is_being_deleted(space)) {
-
- mtr_commit(&mtr);
-
- return(ULLINT_UNDEFINED);
- }
-
- /* From here on even if the user has dropped the tablespace, the
- pages _must_ still exist in the buffer pool and the tablespace
- instance _must_ be in the file system hash table. */
-
- space_header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr);
-
- free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT,
- MLOG_4BYTES, &mtr);
- mtr_commit(&mtr);
-
- if (size < FSP_EXTENT_SIZE) {
- ut_a(space != 0); /* This must be a single-table
- tablespace */
-
- return(0); /* TODO: count free frag pages and
- return a value based on that */
- }
-
- /* Below we play safe when counting free extents above the free limit:
- some of them will contain extent descriptor pages, and therefore
- will not be free extents */
-
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
-
- if (n_free_up > 0) {
- n_free_up--;
- if (!zip_size) {
- n_free_up -= n_free_up
- / (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE);
- } else {
- n_free_up -= n_free_up
- / (zip_size / FSP_EXTENT_SIZE);
- }
- }
-
- n_free = n_free_list_ext + n_free_up;
-
- /* We reserve 1 extent + 0.5 % of the space size to undo logs
- and 1 extent + 0.5 % to cleaning operations; NOTE: this source
- code is duplicated in the function above! */
-
- reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200;
-
- if (reserve > n_free) {
- return(0);
- }
-
- if (!zip_size) {
- return((ullint) (n_free - reserve)
- * FSP_EXTENT_SIZE
- * (UNIV_PAGE_SIZE / 1024));
- } else {
- return((ullint) (n_free - reserve)
- * FSP_EXTENT_SIZE
- * (zip_size / 1024));
- }
-}
-
-/********************************************************************//**
-Marks a page used. The page must reside within the extents of the given
-segment. */
-static MY_ATTRIBUTE((nonnull))
-void
-fseg_mark_page_used(
-/*================*/
- fseg_inode_t* seg_inode,/*!< in: segment inode */
- ulint page, /*!< in: page offset */
- xdes_t* descr, /*!< in: extent descriptor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint not_full_n_used;
-
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
-
- ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr)
- == mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr));
-
- if (xdes_is_free(descr, mtr)) {
- /* We move the extent from the free list to the
- NOT_FULL list */
- flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE,
- mtr);
- flst_add_last(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- }
-
- ut_ad(xdes_mtr_get_bit(
- descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr));
-
- /* We mark the page as used */
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr);
-
- not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- not_full_n_used++;
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used,
- MLOG_4BYTES, mtr);
- if (xdes_is_full(descr, mtr)) {
- /* We move the extent from the NOT_FULL list to the
- FULL list */
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- flst_add_last(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
-
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - FSP_EXTENT_SIZE,
- MLOG_4BYTES, mtr);
- }
-}
-
-/**********************************************************************//**
-Frees a single page of a segment. */
-static
-void
-fseg_free_page_low(
-/*===============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- xdes_t* descr;
- ulint not_full_n_used;
- ulint state;
- ib_id_t descr_id;
- ib_id_t seg_id;
- ulint i;
-
- ut_ad(seg_inode != NULL);
- ut_ad(mtr != NULL);
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
-
- /* Drop search system page hash index if the page is found in
- the pool and is hashed */
-
- btr_search_drop_page_hash_when_freed(space, zip_size, page);
-
- descr = xdes_get_descriptor(space, zip_size, page, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(descr,
- {
- /* The page may be corrupt. pass it. */
- return;
- });
-
- if (xdes_mtr_get_bit(descr, XDES_FREE_BIT,
- page % FSP_EXTENT_SIZE, mtr)) {
- fputs("InnoDB: Dump of the tablespace extent descriptor: ",
- stderr);
- ut_print_buf(stderr, descr, 40);
-
- fprintf(stderr, "\n"
- "InnoDB: Serious error! InnoDB is trying to"
- " free page %lu\n"
- "InnoDB: though it is already marked as free"
- " in the tablespace!\n"
- "InnoDB: The tablespace free space info is corrupt.\n"
- "InnoDB: You may need to dump your"
- " InnoDB tables and recreate the whole\n"
- "InnoDB: database!\n", (ulong) page);
-crash:
- fputs("InnoDB: Please refer to\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
- ut_error;
- }
-
- state = xdes_get_state(descr, mtr);
-
- if (state != XDES_FSEG) {
- /* The page is in the fragment pages of the segment */
-
- for (i = 0;; i++) {
- if (fseg_get_nth_frag_page_no(seg_inode, i, mtr)
- == page) {
-
- fseg_set_nth_frag_page_no(seg_inode, i,
- FIL_NULL, mtr);
- break;
- }
- }
-
- fsp_free_page(space, zip_size, page, mtr);
-
- return;
- }
-
- /* If we get here, the page is in some extent of the segment */
-
- descr_id = mach_read_from_8(descr + XDES_ID);
- seg_id = mach_read_from_8(seg_inode + FSEG_ID);
-#if 0
- fprintf(stderr,
- "InnoDB: InnoDB is freeing space %lu page %lu,\n"
- "InnoDB: which belongs to descr seg %llu\n"
- "InnoDB: segment %llu.\n",
- (ulong) space, (ulong) page,
- (ullint) descr_id,
- (ullint) seg_id);
-#endif /* 0 */
- if (UNIV_UNLIKELY(descr_id != seg_id)) {
- fputs("InnoDB: Dump of the tablespace extent descriptor: ",
- stderr);
- ut_print_buf(stderr, descr, 40);
- fputs("\nInnoDB: Dump of the segment inode: ", stderr);
- ut_print_buf(stderr, seg_inode, 40);
- putc('\n', stderr);
-
- fprintf(stderr,
- "InnoDB: Serious error: InnoDB is trying to"
- " free space %lu page %lu,\n"
- "InnoDB: which does not belong to"
- " segment %llu but belongs\n"
- "InnoDB: to segment %llu.\n",
- (ulong) space, (ulong) page,
- (ullint) descr_id,
- (ullint) seg_id);
- goto crash;
- }
-
- not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- if (xdes_is_full(descr, mtr)) {
- /* The fragment is full: move it to another list */
- flst_remove(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
- flst_add_last(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used + FSP_EXTENT_SIZE - 1,
- MLOG_4BYTES, mtr);
- } else {
- ut_a(not_full_n_used > 0);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - 1, MLOG_4BYTES, mtr);
- }
-
- xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
- xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
-
- if (xdes_is_free(descr, mtr)) {
- /* The extent has become free: free it to space */
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
- fsp_free_extent(space, zip_size, page, mtr);
- }
-
- mtr->n_freed_pages++;
-}
-
-/**********************************************************************//**
-Frees a single page of a segment. */
-UNIV_INTERN
-void
-fseg_free_page(
-/*===========*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page offset */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint flags;
- ulint zip_size;
- fseg_inode_t* seg_inode;
- prio_rw_lock_t* latch;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, mtr);
-
- seg_inode = fseg_inode_get(seg_header, space, zip_size, mtr);
-
- fseg_free_page_low(seg_inode, space, zip_size, page, mtr);
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- buf_page_set_file_page_was_freed(space, page);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-}
-
-/**********************************************************************//**
-Checks if a single page of a segment is free.
-@return true if free */
-UNIV_INTERN
-bool
-fseg_page_is_free(
-/*==============*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint page) /*!< in: page offset */
-{
- mtr_t mtr;
- ibool is_free;
- ulint flags;
- prio_rw_lock_t* latch;
- xdes_t* descr;
- ulint zip_size;
- fseg_inode_t* seg_inode;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = dict_tf_get_zip_size(flags);
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode = fseg_inode_get(seg_header, space, zip_size, &mtr);
-
- ut_a(seg_inode);
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
- ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
-
- descr = xdes_get_descriptor(space, zip_size, page, &mtr);
- ut_a(descr);
-
- is_free = xdes_mtr_get_bit(
- descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, &mtr);
-
- mtr_commit(&mtr);
-
- return(is_free);
-}
-
-/**********************************************************************//**
-Frees an extent of a segment to the space free list. */
-static
-void
-fseg_free_extent(
-/*=============*/
- fseg_inode_t* seg_inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page, /*!< in: a page in the extent */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint first_page_in_extent;
- xdes_t* descr;
- ulint not_full_n_used;
- ulint descr_n_used;
- ulint i;
-
- ut_ad(seg_inode != NULL);
- ut_ad(mtr != NULL);
-
- descr = xdes_get_descriptor(space, zip_size, page, mtr);
-
- ut_a(xdes_get_state(descr, mtr) == XDES_FSEG);
- ut_a(!memcmp(descr + XDES_ID, seg_inode + FSEG_ID, 8));
- ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
- == FSEG_MAGIC_N_VALUE);
-
- first_page_in_extent = page - (page % FSP_EXTENT_SIZE);
-
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
- if (!xdes_mtr_get_bit(descr, XDES_FREE_BIT, i, mtr)) {
-
- /* Drop search system page hash index if the page is
- found in the pool and is hashed */
-
- btr_search_drop_page_hash_when_freed(
- space, zip_size, first_page_in_extent + i);
- }
- }
-
- if (xdes_is_full(descr, mtr)) {
- flst_remove(seg_inode + FSEG_FULL,
- descr + XDES_FLST_NODE, mtr);
- } else if (xdes_is_free(descr, mtr)) {
- flst_remove(seg_inode + FSEG_FREE,
- descr + XDES_FLST_NODE, mtr);
- } else {
- flst_remove(seg_inode + FSEG_NOT_FULL,
- descr + XDES_FLST_NODE, mtr);
-
- not_full_n_used = mtr_read_ulint(
- seg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr);
-
- descr_n_used = xdes_get_n_used(descr, mtr);
- ut_a(not_full_n_used >= descr_n_used);
- mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED,
- not_full_n_used - descr_n_used,
- MLOG_4BYTES, mtr);
- }
-
- fsp_free_extent(space, zip_size, page, mtr);
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- for (i = 0; i < FSP_EXTENT_SIZE; i++) {
-
- buf_page_set_file_page_was_freed(space,
- first_page_in_extent + i);
- }
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-}
-
-/**********************************************************************//**
-Frees part of a segment. This function can be used to free a segment by
-repeatedly calling this function in different mini-transactions. Doing
-the freeing in a single mini-transaction might result in too big a
-mini-transaction.
-@return TRUE if freeing completed */
-UNIV_INTERN
-ibool
-fseg_free_step(
-/*===========*/
- fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
- resides on the first page of the frag list
- of the segment, this pointer becomes obsolete
- after the last freeing step */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint n;
- ulint page;
- xdes_t* descr;
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- ulint header_page;
- prio_rw_lock_t* latch;
-
- space = page_get_space_id(page_align(header));
- header_page = page_get_page_no(page_align(header));
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, mtr);
-
- descr = xdes_get_descriptor(space, zip_size, header_page, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(descr,
- {
- /* The page may be corrupt. pass it. */
- return(TRUE);
- });
-
- /* Check that the header resides on a page which has not been
- freed yet */
-
- ut_a(xdes_mtr_get_bit(descr, XDES_FREE_BIT,
- header_page % FSP_EXTENT_SIZE, mtr) == FALSE);
-
- inode = fseg_inode_try_get(header, space, zip_size, mtr);
-
- if (UNIV_UNLIKELY(inode == NULL)) {
- fprintf(stderr, "double free of inode from %u:%u\n",
- (unsigned) space, (unsigned) header_page);
- return(TRUE);
- }
-
- descr = fseg_get_first_extent(inode, space, zip_size, mtr);
-
- if (descr != NULL) {
- /* Free the extent held by the segment */
- page = xdes_get_offset(descr);
-
- fseg_free_extent(inode, space, zip_size, page, mtr);
-
- return(FALSE);
- }
-
- /* Free a frag page */
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- /* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, zip_size, inode, mtr);
-
- return(TRUE);
- }
-
- fseg_free_page_low(inode, space, zip_size,
- fseg_get_nth_frag_page_no(inode, n, mtr), mtr);
-
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- /* Freeing completed: free the segment inode */
- fsp_free_seg_inode(space, zip_size, inode, mtr);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed.
-@return TRUE if freeing completed, except the header page */
-UNIV_INTERN
-ibool
-fseg_free_step_not_header(
-/*======================*/
- fseg_header_t* header, /*!< in: segment header which must reside on
- the first fragment page of the segment */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint n;
- ulint page;
- xdes_t* descr;
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
- ulint page_no;
- prio_rw_lock_t* latch;
-
- space = page_get_space_id(page_align(header));
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- mtr_x_lock(latch, mtr);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(inode,
- {
- /* ignore the corruption */
- return(TRUE);
- });
-
- descr = fseg_get_first_extent(inode, space, zip_size, mtr);
-
- if (descr != NULL) {
- /* Free the extent held by the segment */
- page = xdes_get_offset(descr);
-
- fseg_free_extent(inode, space, zip_size, page, mtr);
-
- return(FALSE);
- }
-
- /* Free a frag page */
-
- n = fseg_find_last_used_frag_page_slot(inode, mtr);
-
- if (n == ULINT_UNDEFINED) {
- ut_error;
- }
-
- page_no = fseg_get_nth_frag_page_no(inode, n, mtr);
-
- if (page_no == page_get_page_no(page_align(header))) {
-
- return(TRUE);
- }
-
- fseg_free_page_low(inode, space, zip_size, page_no, mtr);
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Returns the first extent descriptor for a segment. We think of the extent
-lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL
--> FSEG_FREE.
-@return the first extent descriptor, or NULL if none */
-static
-xdes_t*
-fseg_get_first_extent(
-/*==================*/
- fseg_inode_t* inode, /*!< in: segment inode */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fil_addr_t first;
- xdes_t* descr;
-
- ut_ad(inode && mtr);
-
- ut_ad(space == page_get_space_id(page_align(inode)));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- first = fil_addr_null;
-
- if (flst_get_len(inode + FSEG_FULL, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_FULL, mtr);
-
- } else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_NOT_FULL, mtr);
-
- } else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
-
- first = flst_get_first(inode + FSEG_FREE, mtr);
- }
-
- if (first.page == FIL_NULL) {
-
- return(NULL);
- }
- descr = xdes_lst_get_descriptor(space, zip_size, first, mtr);
-
- return(descr);
-}
-
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-static
-ibool
-fseg_validate_low(
-/*==============*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr2) /*!< in/out: mini-transaction */
-{
- ulint space;
- ib_id_t seg_id;
- mtr_t mtr;
- xdes_t* descr;
- fil_addr_t node_addr;
- ulint n_used = 0;
- ulint n_used2 = 0;
-
- ut_ad(mtr_memo_contains_page(mtr2, inode, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-
- space = page_get_space_id(page_align(inode));
-
- seg_id = mach_read_from_8(inode + FSEG_ID);
- n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr2);
- flst_validate(inode + FSEG_FREE, mtr2);
- flst_validate(inode + FSEG_NOT_FULL, mtr2);
- flst_validate(inode + FSEG_FULL, mtr2);
-
- /* Validate FSEG_FREE list */
- node_addr = flst_get_first(inode + FSEG_FREE, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == 0);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(mach_read_from_8(descr + XDES_ID) == seg_id);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSEG_NOT_FULL list */
-
- node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) > 0);
- ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(mach_read_from_8(descr + XDES_ID) == seg_id);
-
- n_used2 += xdes_get_n_used(descr, &mtr);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSEG_FULL list */
-
- node_addr = flst_get_first(inode + FSEG_FULL, mtr2);
-
- while (!fil_addr_is_null(node_addr)) {
- ulint flags;
- ulint zip_size;
-
- mtr_start(&mtr);
- mtr_x_lock(fil_space_get_latch(space, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG);
- ut_a(mach_read_from_8(descr + XDES_ID) == seg_id);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- ut_a(n_used == n_used2);
-
- return(TRUE);
-}
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fseg_validate(
-/*==========*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fseg_inode_t* inode;
- ibool ret;
- ulint space;
- ulint flags;
- ulint zip_size;
-
- space = page_get_space_id(page_align(header));
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- ret = fseg_validate_low(inode, mtr);
-
- return(ret);
-}
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Writes info of a segment. */
-static
-void
-fseg_print_low(
-/*===========*/
- fseg_inode_t* inode, /*!< in: segment inode */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint space;
- ulint n_used;
- ulint n_frag;
- ulint n_free;
- ulint n_not_full;
- ulint n_full;
- ulint reserved;
- ulint used;
- ulint page_no;
- ib_id_t seg_id;
-
- ut_ad(mtr_memo_contains_page(mtr, inode, MTR_MEMO_PAGE_X_FIX));
- space = page_get_space_id(page_align(inode));
- page_no = page_get_page_no(page_align(inode));
-
- reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
-
- seg_id = mach_read_from_8(inode + FSEG_ID);
-
- n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED,
- MLOG_4BYTES, mtr);
- n_frag = fseg_get_n_frag_pages(inode, mtr);
- n_free = flst_get_len(inode + FSEG_FREE, mtr);
- n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr);
- n_full = flst_get_len(inode + FSEG_FULL, mtr);
-
- fprintf(stderr,
- "SEGMENT id %llu space %lu; page %lu;"
- " res %lu used %lu; full ext %lu\n"
- "fragm pages %lu; free extents %lu;"
- " not full extents %lu: pages %lu\n",
- (ullint) seg_id,
- (ulong) space, (ulong) page_no,
- (ulong) reserved, (ulong) used, (ulong) n_full,
- (ulong) n_frag, (ulong) n_free, (ulong) n_not_full,
- (ulong) n_used);
- ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE);
-}
-
-#ifdef UNIV_BTR_PRINT
-/*******************************************************************//**
-Writes info of a segment. */
-UNIV_INTERN
-void
-fseg_print(
-/*=======*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- fseg_inode_t* inode;
- ulint space;
- ulint flags;
- ulint zip_size;
-
- space = page_get_space_id(page_align(header));
-
- mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- inode = fseg_inode_get(header, space, zip_size, mtr);
-
- fseg_print_low(inode, mtr);
-}
-#endif /* UNIV_BTR_PRINT */
-
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
- ulint space) /*!< in: space id */
-{
- fsp_header_t* header;
- fseg_inode_t* seg_inode;
- page_t* seg_inode_page;
- prio_rw_lock_t* latch;
- ulint size;
- ulint flags;
- ulint zip_size;
- ulint free_limit;
- ulint frag_n_used;
- mtr_t mtr;
- mtr_t mtr2;
- xdes_t* descr;
- fil_addr_t node_addr;
- fil_addr_t next_node_addr;
- ulint descr_count = 0;
- ulint n_used = 0;
- ulint n_used2 = 0;
- ulint n_full_frag_pages;
- ulint n;
- ulint seg_inode_len_free;
- ulint seg_inode_len_full;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
- ut_a(ut_is_2pow(zip_size));
- ut_a(zip_size <= UNIV_ZIP_SIZE_MAX);
- ut_a(!zip_size || zip_size >= UNIV_ZIP_SIZE_MIN);
-
- /* Start first a mini-transaction mtr2 to lock out all other threads
- from the fsp system */
- mtr_start(&mtr2);
- mtr_x_lock(latch, &mtr2);
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
- free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT,
- MLOG_4BYTES, &mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED,
- MLOG_4BYTES, &mtr);
-
- n_full_frag_pages = FSP_EXTENT_SIZE
- * flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
- if (UNIV_UNLIKELY(free_limit > size)) {
-
- ut_a(space != 0);
- ut_a(size < FSP_EXTENT_SIZE);
- }
-
- flst_validate(header + FSP_FREE, &mtr);
- flst_validate(header + FSP_FREE_FRAG, &mtr);
- flst_validate(header + FSP_FULL_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- /* Validate FSP_FREE list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == 0);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FREE);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate FSP_FREE_FRAG list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) > 0);
- ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG);
-
- n_used += xdes_get_n_used(descr, &mtr);
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
-
- mtr_commit(&mtr);
- }
-
- /* Validate FSP_FULL_FRAG list */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
- node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- descr_count++;
- descr = xdes_lst_get_descriptor(space, zip_size,
- node_addr, &mtr);
-
- ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE);
- ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG);
-
- node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr);
- mtr_commit(&mtr);
- }
-
- /* Validate segments */
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
- seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
- do {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0);
- fseg_validate_low(seg_inode, &mtr);
-
- descr_count += flst_get_len(seg_inode + FSEG_FREE,
- &mtr);
- descr_count += flst_get_len(seg_inode + FSEG_FULL,
- &mtr);
- descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL,
- &mtr);
-
- n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr);
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
- seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- if (mach_read_from_8(seg_inode + FSEG_ID)) {
- fseg_validate_low(seg_inode, &mtr);
-
- descr_count += flst_get_len(
- seg_inode + FSEG_FREE, &mtr);
- descr_count += flst_get_len(
- seg_inode + FSEG_FULL, &mtr);
- descr_count += flst_get_len(
- seg_inode + FSEG_NOT_FULL, &mtr);
- n_used2 += fseg_get_n_frag_pages(
- seg_inode, &mtr);
- }
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
- if (!zip_size) {
- ut_a(n_used + n_full_frag_pages
- == n_used2 + 2 * ((free_limit + (UNIV_PAGE_SIZE - 1))
- / UNIV_PAGE_SIZE)
- + seg_inode_len_full + seg_inode_len_free);
- } else {
- ut_a(n_used + n_full_frag_pages
- == n_used2 + 2 * ((free_limit + (zip_size - 1))
- / zip_size)
- + seg_inode_len_full + seg_inode_len_free);
- }
- ut_a(frag_n_used == n_used);
-
- mtr_commit(&mtr2);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
- ulint space) /*!< in: space id */
-{
- fsp_header_t* header;
- fseg_inode_t* seg_inode;
- page_t* seg_inode_page;
- prio_rw_lock_t* latch;
- ulint flags;
- ulint zip_size;
- ulint size;
- ulint free_limit;
- ulint frag_n_used;
- fil_addr_t node_addr;
- fil_addr_t next_node_addr;
- ulint n_free;
- ulint n_free_frag;
- ulint n_full_frag;
- ib_id_t seg_id;
- ulint n;
- ulint n_segs = 0;
- mtr_t mtr;
- mtr_t mtr2;
-
- latch = fil_space_get_latch(space, &flags);
- zip_size = fsp_flags_get_zip_size(flags);
-
- /* Start first a mini-transaction mtr2 to lock out all other threads
- from the fsp system */
-
- mtr_start(&mtr2);
-
- mtr_x_lock(latch, &mtr2);
-
- mtr_start(&mtr);
-
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr);
-
- free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES,
- &mtr);
- frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES,
- &mtr);
- n_free = flst_get_len(header + FSP_FREE, &mtr);
- n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr);
- n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr);
-
- seg_id = mach_read_from_8(header + FSP_SEG_ID);
-
- fprintf(stderr,
- "FILE SPACE INFO: id %lu\n"
- "size %lu, free limit %lu, free extents %lu\n"
- "not full frag extents %lu: used pages %lu,"
- " full frag extents %lu\n"
- "first seg id not used %llu\n",
- (ulong) space,
- (ulong) size, (ulong) free_limit, (ulong) n_free,
- (ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag,
- (ullint) seg_id);
-
- mtr_commit(&mtr);
-
- /* Print segments */
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- ut_a(mach_read_from_8(seg_inode + FSEG_ID) != 0);
- fseg_print_low(seg_inode, &mtr);
-
- n_segs++;
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- header = fsp_get_space_header(space, zip_size, &mtr);
-
- node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr);
-
- mtr_commit(&mtr);
-
- while (!fil_addr_is_null(node_addr)) {
-
- n = 0;
-
- do {
-
- mtr_start(&mtr);
- mtr_x_lock(latch, &mtr);
-
- seg_inode_page = fut_get_ptr(
- space, zip_size, node_addr, RW_X_LATCH, &mtr)
- - FSEG_INODE_PAGE_NODE;
-
- seg_inode = fsp_seg_inode_page_get_nth_inode(
- seg_inode_page, n, zip_size, &mtr);
- if (mach_read_from_8(seg_inode + FSEG_ID)) {
-
- fseg_print_low(seg_inode, &mtr);
- n_segs++;
- }
-
- next_node_addr = flst_get_next_addr(
- seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr);
- mtr_commit(&mtr);
- } while (++n < FSP_SEG_INODES_PER_PAGE(zip_size));
-
- node_addr = next_node_addr;
- }
-
- mtr_commit(&mtr2);
-
- fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Compute offset after xdes where crypt data can be stored
-@param[in] zip_size Compressed size or 0
-@return offset */
-ulint
-fsp_header_get_crypt_offset(
- const ulint zip_size)
-{
- return (FSP_HEADER_OFFSET + (XDES_ARR_OFFSET + XDES_SIZE *
- (zip_size ? zip_size : UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE));
-}
-
-/**********************************************************************//**
-Checks if a single page is free.
-@return true if free */
-UNIV_INTERN
-bool
-fsp_page_is_free_func(
-/*==============*/
- ulint space, /*!< in: space id */
- ulint page_no, /*!< in: page offset */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- const char *file,
- ulint line)
-{
- ulint flags;
-
- ut_ad(mtr);
-
- mtr_x_lock_func(fil_space_get_latch(space, &flags), file, line, mtr);
- ulint zip_size = fsp_flags_get_zip_size(flags);
-
- xdes_t* descr = xdes_get_descriptor(space, zip_size, page_no, mtr);
- ut_a(descr);
-
- return xdes_mtr_get_bit(
- descr, XDES_FREE_BIT, page_no % FSP_EXTENT_SIZE, mtr);
-}
diff --git a/storage/xtradb/fts/Makefile.query b/storage/xtradb/fts/Makefile.query
deleted file mode 100644
index 12dcd833064..00000000000
--- a/storage/xtradb/fts/Makefile.query
+++ /dev/null
@@ -1,32 +0,0 @@
-LEX=flex
-YACC=bison
-PREFIX=fts
-
-all: fts0pars.cc fts0blex.cc fts0tlex.cc
-
-fts0par.cc: fts0pars.y
-fts0blex.cc: fts0blex.l
-fts0tlex.cc: fts0tlex.l
-
-.l.cc:
- $(LEX) -P$(subst lex,,$*) -o $*.cc --header-file=../include/$*.h $<
-
-.y.cc:
- $(YACC) -p $(PREFIX) -o $*.cc -d $<
- mv $*.h ../include
-LEX=flex
-YACC=bison
-PREFIX=fts
-
-all: fts0pars.cc fts0blex.cc fts0tlex.cc
-
-fts0par.cc: fts0pars.y
-fts0blex.cc: fts0blex.l
-fts0tlex.cc: fts0tlex.l
-
-.l.cc:
- $(LEX) -P$(subst lex,,$*) -o $*.cc --header-file=../include/$*.h $<
-
-.y.cc:
- $(YACC) -p $(PREFIX) -o $*.cc -d $<
- mv $*.h ../include
diff --git a/storage/xtradb/fts/fts0ast.cc b/storage/xtradb/fts/fts0ast.cc
deleted file mode 100644
index 030b972440f..00000000000
--- a/storage/xtradb/fts/fts0ast.cc
+++ /dev/null
@@ -1,744 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file fts/fts0ast.cc
-Full Text Search parser helper file.
-
-Created 2007/3/16 Sunny Bains.
-***********************************************************************/
-
-#include "mem0mem.h"
-#include "fts0ast.h"
-#include "fts0pars.h"
-#include "fts0fts.h"
-
-/* The FTS ast visit pass. */
-enum fts_ast_visit_pass_t {
- FTS_PASS_FIRST, /*!< First visit pass,
- process operators excluding
- FTS_EXIST and FTS_IGNORE */
- FTS_PASS_EXIST, /*!< Exist visit pass,
- process operator FTS_EXIST */
- FTS_PASS_IGNORE /*!< Ignore visit pass,
- process operator FTS_IGNORE */
-};
-
-/******************************************************************//**
-Create an empty fts_ast_node_t.
-@return Create a new node */
-static
-fts_ast_node_t*
-fts_ast_node_create(void)
-/*=====================*/
-{
- fts_ast_node_t* node;
-
- node = (fts_ast_node_t*) ut_malloc(sizeof(*node));
- memset(node, 0x0, sizeof(*node));
-
- return(node);
-}
-
-/******************************************************************//**
-Create a operator fts_ast_node_t.
-@return new node */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_oper(
-/*=====================*/
- void* arg, /*!< in: ast state instance */
- fts_ast_oper_t oper) /*!< in: ast operator */
-{
- fts_ast_node_t* node = fts_ast_node_create();
-
- node->type = FTS_AST_OPER;
- node->oper = oper;
-
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
-
- return(node);
-}
-
-/******************************************************************//**
-This function takes ownership of the ptr and is responsible
-for free'ing it
-@return new node or a node list with tokenized words */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_term(
-/*=====================*/
- void* arg, /*!< in: ast state instance */
- const fts_ast_string_t* ptr) /*!< in: ast term string */
-{
- fts_ast_state_t* state = static_cast<fts_ast_state_t*>(arg);
- ulint len = ptr->len;
- ulint cur_pos = 0;
- fts_ast_node_t* node = NULL;
- fts_ast_node_t* node_list = NULL;
- fts_ast_node_t* first_node = NULL;
-
- /* Scan the incoming string and filter out any "non-word" characters */
- while (cur_pos < len) {
- fts_string_t str;
- ulint offset;
- ulint cur_len;
-
- cur_len = innobase_mysql_fts_get_token(
- state->charset,
- reinterpret_cast<const byte*>(ptr->str) + cur_pos,
- reinterpret_cast<const byte*>(ptr->str) + len,
- &str, &offset);
-
- if (cur_len == 0) {
- break;
- }
-
- cur_pos += cur_len;
-
- if (str.f_n_char > 0) {
- /* If the subsequent term (after the first one)'s size
- is less than fts_min_token_size or the term is greater
- than fts_max_token_size, we shall ignore that. This is
- to make consistent with MyISAM behavior */
- if ((first_node && (str.f_n_char < fts_min_token_size))
- || str.f_n_char > fts_max_token_size) {
- continue;
- }
-
- node = fts_ast_node_create();
-
- node->type = FTS_AST_TERM;
-
- node->term.ptr = fts_ast_string_create(
- str.f_str, str.f_len);
-
- fts_ast_state_add_node(
- static_cast<fts_ast_state_t*>(arg), node);
-
- if (first_node) {
- /* There is more than one word, create
- a list to organize them */
- if (!node_list) {
- node_list = fts_ast_create_node_list(
- static_cast<fts_ast_state_t*>(
- arg),
- first_node);
- }
-
- fts_ast_add_node(node_list, node);
- } else {
- first_node = node;
- }
- }
- }
-
- return((node_list != NULL) ? node_list : first_node);
-}
-
-/******************************************************************//**
-This function takes ownership of the ptr and is responsible
-for free'ing it.
-@return new node */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_text(
-/*=====================*/
- void* arg, /*!< in: ast state instance */
- const fts_ast_string_t* ptr) /*!< in: ast text string */
-{
- ulint len = ptr->len;
- fts_ast_node_t* node = NULL;
-
- /* Once we come here, the string must have at least 2 quotes ""
- around the query string, which could be empty. Also the query
- string may contain 0x00 in it, we don't treat it as null-terminated. */
- ut_ad(len >= 2);
- ut_ad(ptr->str[0] == '\"' && ptr->str[len - 1] == '\"');
-
- if (len == 2) {
- /* If the query string contains nothing except quotes,
- it's obviously an invalid query. */
- return(NULL);
- }
-
- node = fts_ast_node_create();
-
- /*!< We ignore the actual quotes "" */
- len -= 2;
-
- node->type = FTS_AST_TEXT;
- /*!< Skip copying the first quote */
- node->text.ptr = fts_ast_string_create(
- reinterpret_cast<const byte*>(ptr->str + 1), len);
- node->text.distance = ULINT_UNDEFINED;
-
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
-
- return(node);
-}
-
-/******************************************************************//**
-This function takes ownership of the expr and is responsible
-for free'ing it.
-@return new node */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_list(
-/*=====================*/
- void* arg, /*!< in: ast state instance */
- fts_ast_node_t* expr) /*!< in: ast expr instance */
-{
- fts_ast_node_t* node = fts_ast_node_create();
-
- node->type = FTS_AST_LIST;
- node->list.head = node->list.tail = expr;
-
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
-
- return(node);
-}
-
-/******************************************************************//**
-Create a sub-expression list node. This function takes ownership of
-expr and is responsible for deleting it.
-@return new node */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_create_node_subexp_list(
-/*============================*/
- void* arg, /*!< in: ast state instance */
- fts_ast_node_t* expr) /*!< in: ast expr instance */
-{
- fts_ast_node_t* node = fts_ast_node_create();
-
- node->type = FTS_AST_SUBEXP_LIST;
- node->list.head = node->list.tail = expr;
-
- fts_ast_state_add_node((fts_ast_state_t*) arg, node);
-
- return(node);
-}
-
-/******************************************************************//**
-Free an expr list node elements. */
-static
-void
-fts_ast_free_list(
-/*==============*/
- fts_ast_node_t* node) /*!< in: ast node to free */
-{
- ut_a(node->type == FTS_AST_LIST
- || node->type == FTS_AST_SUBEXP_LIST);
-
- for (node = node->list.head;
- node != NULL;
- node = fts_ast_free_node(node)) {
-
- /*!< No op */
- }
-}
-
-/********************************************************************//**
-Free a fts_ast_node_t instance.
-@return next node to free */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_free_node(
-/*==============*/
- fts_ast_node_t* node) /*!< in: the node to free */
-{
- fts_ast_node_t* next_node;
-
- switch (node->type) {
- case FTS_AST_TEXT:
- if (node->text.ptr) {
- fts_ast_string_free(node->text.ptr);
- node->text.ptr = NULL;
- }
- break;
-
- case FTS_AST_TERM:
- if (node->term.ptr) {
- fts_ast_string_free(node->term.ptr);
- node->term.ptr = NULL;
- }
- break;
-
- case FTS_AST_LIST:
- case FTS_AST_SUBEXP_LIST:
- fts_ast_free_list(node);
- node->list.head = node->list.tail = NULL;
- break;
-
- case FTS_AST_OPER:
- break;
-
- default:
- ut_error;
- }
-
- /*!< Get next node before freeing the node itself */
- next_node = node->next;
-
- ut_free(node);
-
- return(next_node);
-}
-
-/******************************************************************//**
-This AST takes ownership of the expr and is responsible
-for free'ing it.
-@return in param "list" */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_add_node(
-/*=============*/
- fts_ast_node_t* node, /*!< in: list instance */
- fts_ast_node_t* elem) /*!< in: node to add to list */
-{
- if (!elem) {
- return(NULL);
- }
-
- ut_a(!elem->next);
- ut_a(node->type == FTS_AST_LIST
- || node->type == FTS_AST_SUBEXP_LIST);
-
- if (!node->list.head) {
- ut_a(!node->list.tail);
-
- node->list.head = node->list.tail = elem;
- } else {
- ut_a(node->list.tail);
-
- node->list.tail->next = elem;
- node->list.tail = elem;
- }
-
- return(node);
-}
-
-/******************************************************************//**
-For tracking node allocations, in case there is an error during
-parsing. */
-UNIV_INTERN
-void
-fts_ast_state_add_node(
-/*===================*/
- fts_ast_state_t*state, /*!< in: ast instance */
- fts_ast_node_t* node) /*!< in: node to add to ast */
-{
- if (!state->list.head) {
- ut_a(!state->list.tail);
-
- state->list.head = state->list.tail = node;
- } else {
- state->list.tail->next_alloc = node;
- state->list.tail = node;
- }
-}
-
-/******************************************************************//**
-Set the wildcard attribute of a term. */
-UNIV_INTERN
-void
-fts_ast_term_set_wildcard(
-/*======================*/
- fts_ast_node_t* node) /*!< in/out: set attribute of
- a term node */
-{
- if (!node) {
- return;
- }
-
- /* If it's a node list, the wildcard should be set to the tail node*/
- if (node->type == FTS_AST_LIST) {
- ut_ad(node->list.tail != NULL);
- node = node->list.tail;
- }
-
- ut_a(node->type == FTS_AST_TERM);
- ut_a(!node->term.wildcard);
-
- node->term.wildcard = TRUE;
-}
-
-/******************************************************************//**
-Set the proximity attribute of a text node. */
-UNIV_INTERN
-void
-fts_ast_term_set_distance(
-/*======================*/
- fts_ast_node_t* node, /*!< in/out: text node */
- ulint distance) /*!< in: the text proximity
- distance */
-{
- if (node == NULL) {
- return;
- }
-
- ut_a(node->type == FTS_AST_TEXT);
- ut_a(node->text.distance == ULINT_UNDEFINED);
-
- node->text.distance = distance;
-}
-
-/******************************************************************//**
-Free node and expr allocations. */
-UNIV_INTERN
-void
-fts_ast_state_free(
-/*===============*/
- fts_ast_state_t*state) /*!< in: ast state to free */
-{
- fts_ast_node_t* node = state->list.head;
-
- /* Free the nodes that were allocated during parsing. */
- while (node) {
- fts_ast_node_t* next = node->next_alloc;
-
- if (node->type == FTS_AST_TEXT && node->text.ptr) {
- fts_ast_string_free(node->text.ptr);
- node->text.ptr = NULL;
- } else if (node->type == FTS_AST_TERM && node->term.ptr) {
- fts_ast_string_free(node->term.ptr);
- node->term.ptr = NULL;
- }
-
- ut_free(node);
- node = next;
- }
-
- state->root = state->list.head = state->list.tail = NULL;
-}
-
-/******************************************************************//**
-Print an ast node. */
-UNIV_INTERN
-void
-fts_ast_node_print(
-/*===============*/
- fts_ast_node_t* node) /*!< in: ast node to print */
-{
- switch (node->type) {
- case FTS_AST_TEXT:
- printf("TEXT: ");
- fts_ast_string_print(node->text.ptr);
- break;
-
- case FTS_AST_TERM:
- printf("TERM: ");
- fts_ast_string_print(node->term.ptr);
- break;
-
- case FTS_AST_LIST:
- printf("LIST: ");
- node = node->list.head;
-
- while (node) {
- fts_ast_node_print(node);
- node = node->next;
- }
- break;
-
- case FTS_AST_SUBEXP_LIST:
- printf("SUBEXP_LIST: ");
- node = node->list.head;
-
- while (node) {
- fts_ast_node_print(node);
- node = node->next;
- }
- case FTS_AST_OPER:
- printf("OPER: %d\n", node->oper);
- break;
-
- default:
- ut_error;
- }
-}
-
-/******************************************************************//**
-Traverse the AST - in-order traversal, except for the FTX_EXIST and FTS_IGNORE
-nodes, which will be ignored in the first pass of each level, and visited in a
-second and third pass after all other nodes in the same level are visited.
-@return DB_SUCCESS if all went well */
-UNIV_INTERN
-dberr_t
-fts_ast_visit(
-/*==========*/
- fts_ast_oper_t oper, /*!< in: current operator */
- fts_ast_node_t* node, /*!< in: current root node */
- fts_ast_callback visitor, /*!< in: callback function */
- void* arg, /*!< in: arg for callback */
- bool* has_ignore) /*!< out: true, if the operator
- was ignored during processing,
- currently we ignore FTS_EXIST
- and FTS_IGNORE operators */
-{
- dberr_t error = DB_SUCCESS;
- fts_ast_node_t* oper_node = NULL;
- fts_ast_node_t* start_node;
- bool revisit = false;
- bool will_be_ignored = false;
- fts_ast_visit_pass_t visit_pass = FTS_PASS_FIRST;
-
- start_node = node->list.head;
-
- ut_a(node->type == FTS_AST_LIST
- || node->type == FTS_AST_SUBEXP_LIST);
-
- if (oper == FTS_EXIST_SKIP) {
- visit_pass = FTS_PASS_EXIST;
- } else if (oper == FTS_IGNORE_SKIP) {
- visit_pass = FTS_PASS_IGNORE;
- }
-
- /* In the first pass of the tree, at the leaf level of the
- tree, FTS_EXIST and FTS_IGNORE operation will be ignored.
- It will be repeated at the level above the leaf level.
-
- The basic idea here is that when we encounter FTS_EXIST or
- FTS_IGNORE, we will change the operator node into FTS_EXIST_SKIP
- or FTS_IGNORE_SKIP, and term node & text node with the operators
- is ignored in the first pass. We have two passes during the revisit:
- We process nodes with FTS_EXIST_SKIP in the exist pass, and then
- process nodes with FTS_IGNORE_SKIP in the ignore pass.
-
- The order should be restrictly followed, or we will get wrong results.
- For example, we have a query 'a +b -c d +e -f'.
- first pass: process 'a' and 'd' by union;
- exist pass: process '+b' and '+e' by intersection;
- ignore pass: process '-c' and '-f' by difference. */
-
- for (node = node->list.head;
- node && (error == DB_SUCCESS);
- node = node->next) {
-
- switch(node->type) {
- case FTS_AST_LIST:
- if (visit_pass != FTS_PASS_FIRST) {
- break;
- }
-
- error = fts_ast_visit(oper, node, visitor,
- arg, &will_be_ignored);
-
- /* If will_be_ignored is set to true, then
- we encountered and ignored a FTS_EXIST or FTS_IGNORE
- operator. */
- if (will_be_ignored) {
- revisit = true;
- /* Remember oper for list in case '-abc&def',
- ignored oper is from previous node of list.*/
- node->oper = oper;
- }
-
- break;
-
- case FTS_AST_OPER:
- oper = node->oper;
- oper_node = node;
-
- /* Change the operator for revisit */
- if (oper == FTS_EXIST) {
- oper_node->oper = FTS_EXIST_SKIP;
- } else if (oper == FTS_IGNORE) {
- oper_node->oper = FTS_IGNORE_SKIP;
- }
-
- break;
-
- default:
- if (node->visited) {
- continue;
- }
-
- ut_a(oper == FTS_NONE || !oper_node
- || oper_node->oper == oper
- || oper_node->oper == FTS_EXIST_SKIP
- || oper_node->oper == FTS_IGNORE_SKIP);
-
- if (oper== FTS_EXIST || oper == FTS_IGNORE) {
- *has_ignore = true;
- continue;
- }
-
- /* Process leaf node accroding to its pass.*/
- if (oper == FTS_EXIST_SKIP
- && visit_pass == FTS_PASS_EXIST) {
- error = visitor(FTS_EXIST, node, arg);
- node->visited = true;
- } else if (oper == FTS_IGNORE_SKIP
- && visit_pass == FTS_PASS_IGNORE) {
- error = visitor(FTS_IGNORE, node, arg);
- node->visited = true;
- } else if (visit_pass == FTS_PASS_FIRST) {
- error = visitor(oper, node, arg);
- node->visited = true;
- }
- }
- }
-
- if (revisit) {
- /* Exist pass processes the skipped FTS_EXIST operation. */
- for (node = start_node;
- node && error == DB_SUCCESS;
- node = node->next) {
-
- if (node->type == FTS_AST_LIST
- && node->oper != FTS_IGNORE) {
- error = fts_ast_visit(FTS_EXIST_SKIP, node,
- visitor, arg, &will_be_ignored);
- }
- }
-
- /* Ignore pass processes the skipped FTS_IGNORE operation. */
- for (node = start_node;
- node && error == DB_SUCCESS;
- node = node->next) {
-
- if (node->type == FTS_AST_LIST) {
- error = fts_ast_visit(FTS_IGNORE_SKIP, node,
- visitor, arg, &will_be_ignored);
- }
- }
- }
-
- return(error);
-}
-
-/**
-Create an ast string object, with NUL-terminator, so the string
-has one more byte than len
-@param[in] str pointer to string
-@param[in] len length of the string
-@return ast string with NUL-terminator */
-UNIV_INTERN
-fts_ast_string_t*
-fts_ast_string_create(
- const byte* str,
- ulint len)
-{
- fts_ast_string_t* ast_str;
-
- ut_ad(len > 0);
-
- ast_str = static_cast<fts_ast_string_t*>
- (ut_malloc(sizeof(fts_ast_string_t)));
- ast_str->str = static_cast<byte*>(ut_malloc(len + 1));
-
- ast_str->len = len;
- memcpy(ast_str->str, str, len);
- ast_str->str[len] = '\0';
-
- return(ast_str);
-}
-
-/**
-Free an ast string instance
-@param[in,out] ast_str string to free */
-UNIV_INTERN
-void
-fts_ast_string_free(
- fts_ast_string_t* ast_str)
-{
- if (ast_str != NULL) {
- ut_free(ast_str->str);
- ut_free(ast_str);
- }
-}
-
-/**
-Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
-@param[in] str string to translate
-@param[in] base the base
-@return translated number */
-UNIV_INTERN
-ulint
-fts_ast_string_to_ul(
- const fts_ast_string_t* ast_str,
- int base)
-{
- return(strtoul(reinterpret_cast<const char*>(ast_str->str),
- NULL, base));
-}
-
-/**
-Print the ast string
-@param[in] str string to print */
-UNIV_INTERN
-void
-fts_ast_string_print(
- const fts_ast_string_t* ast_str)
-{
- for (ulint i = 0; i < ast_str->len; ++i) {
- printf("%c", ast_str->str[i]);
- }
-
- printf("\n");
-}
-
-#ifdef UNIV_DEBUG
-const char*
-fts_ast_oper_name_get(fts_ast_oper_t oper)
-{
- switch(oper) {
- case FTS_NONE:
- return("FTS_NONE");
- case FTS_IGNORE:
- return("FTS_IGNORE");
- case FTS_EXIST:
- return("FTS_EXIST");
- case FTS_NEGATE:
- return("FTS_NEGATE");
- case FTS_INCR_RATING:
- return("FTS_INCR_RATING");
- case FTS_DECR_RATING:
- return("FTS_DECR_RATING");
- case FTS_DISTANCE:
- return("FTS_DISTANCE");
- case FTS_IGNORE_SKIP:
- return("FTS_IGNORE_SKIP");
- case FTS_EXIST_SKIP:
- return("FTS_EXIST_SKIP");
- }
- ut_ad(0);
-}
-
-const char*
-fts_ast_node_type_get(fts_ast_type_t type)
-{
- switch (type) {
- case FTS_AST_OPER:
- return("FTS_AST_OPER");
- case FTS_AST_NUMB:
- return("FTS_AST_NUMB");
- case FTS_AST_TERM:
- return("FTS_AST_TERM");
- case FTS_AST_TEXT:
- return("FTS_AST_TEXT");
- case FTS_AST_LIST:
- return("FTS_AST_LIST");
- case FTS_AST_SUBEXP_LIST:
- return("FTS_AST_SUBEXP_LIST");
- }
- ut_ad(0);
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/xtradb/fts/fts0blex.cc b/storage/xtradb/fts/fts0blex.cc
deleted file mode 100644
index 2d71934fa0e..00000000000
--- a/storage/xtradb/fts/fts0blex.cc
+++ /dev/null
@@ -1,1957 +0,0 @@
-#include "univ.i"
-#line 2 "fts0blex.cc"
-
-#line 4 "fts0blex.cc"
-
-#define YY_INT_ALIGNED short int
-
-/* A lexical scanner generated by flex */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
-#if YY_FLEX_SUBMINOR_VERSION > 0
-#define FLEX_BETA
-#endif
-
-/* First, we deal with platform-specific or compiler-specific issues. */
-
-/* begin standard C headers. */
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/* end standard C headers. */
-
-/* flex integer type definitions */
-
-#ifndef FLEXINT_H
-#define FLEXINT_H
-
-/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-
-/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
- */
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS 1
-#endif
-
-#include <inttypes.h>
-typedef int8_t flex_int8_t;
-typedef uint8_t flex_uint8_t;
-typedef int16_t flex_int16_t;
-typedef uint16_t flex_uint16_t;
-typedef int32_t flex_int32_t;
-typedef uint32_t flex_uint32_t;
-#else
-typedef signed char flex_int8_t;
-typedef short int flex_int16_t;
-typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
-typedef unsigned short int flex_uint16_t;
-typedef unsigned int flex_uint32_t;
-
-/* Limits of integral types. */
-#ifndef INT8_MIN
-#define INT8_MIN (-128)
-#endif
-#ifndef INT16_MIN
-#define INT16_MIN (-32767-1)
-#endif
-#ifndef INT32_MIN
-#define INT32_MIN (-2147483647-1)
-#endif
-#ifndef INT8_MAX
-#define INT8_MAX (127)
-#endif
-#ifndef INT16_MAX
-#define INT16_MAX (32767)
-#endif
-#ifndef INT32_MAX
-#define INT32_MAX (2147483647)
-#endif
-#ifndef UINT8_MAX
-#define UINT8_MAX (255U)
-#endif
-#ifndef UINT16_MAX
-#define UINT16_MAX (65535U)
-#endif
-#ifndef UINT32_MAX
-#define UINT32_MAX (4294967295U)
-#endif
-
-#endif /* ! C99 */
-
-#endif /* ! FLEXINT_H */
-
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else /* ! __cplusplus */
-
-/* C99 requires __STDC__ to be defined as 1. */
-#if defined (__STDC__)
-
-#define YY_USE_CONST
-
-#endif /* defined (__STDC__) */
-#endif /* ! __cplusplus */
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-/* Returned upon end-of-file. */
-#define YY_NULL 0
-
-/* Promotes a possibly negative, possibly signed char to an unsigned
- * integer for use as an array index. If the signed char is negative,
- * we want to instead treat it as an 8-bit unsigned char, hence the
- * double cast.
- */
-#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
-
-/* An opaque pointer. */
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
-typedef void* yyscan_t;
-#endif
-
-/* For convenience, these vars (plus the bison vars far below)
- are macros in the reentrant scanner. */
-#define yyin yyg->yyin_r
-#define yyout yyg->yyout_r
-#define yyextra yyg->yyextra_r
-#define yyleng yyg->yyleng_r
-#define yytext yyg->yytext_r
-#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
-#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
-#define yy_flex_debug yyg->yy_flex_debug_r
-
-/* Enter a start condition. This macro really ought to take a parameter,
- * but we do it the disgusting crufty way forced on us by the ()-less
- * definition of BEGIN.
- */
-#define BEGIN yyg->yy_start = 1 + 2 *
-
-/* Translate the current start state into a value that can be later handed
- * to BEGIN to return to the state. The YYSTATE alias is for lex
- * compatibility.
- */
-#define YY_START ((yyg->yy_start - 1) / 2)
-#define YYSTATE YY_START
-
-/* Action number for EOF rule of a given start state. */
-#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
-
-/* Special action meaning "start processing a new file". */
-#define YY_NEW_FILE fts0brestart(yyin ,yyscanner )
-
-#define YY_END_OF_BUFFER_CHAR 0
-
-/* Size of default input buffer. */
-#ifndef YY_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k.
- * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
- * Ditto for the __ia64__ case accordingly.
- */
-#define YY_BUF_SIZE 32768
-#else
-#define YY_BUF_SIZE 16384
-#endif /* __ia64__ */
-#endif
-
-/* The state buf must be large enough to hold one state per character in the main buffer.
- */
-#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
-
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-
-#define EOB_ACT_CONTINUE_SCAN 0
-#define EOB_ACT_END_OF_FILE 1
-#define EOB_ACT_LAST_MATCH 2
-
-#define YY_LESS_LINENO(n)
-
-/* Return all but the first "n" matched characters back to the input stream. */
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- *yy_cp = yyg->yy_hold_char; \
- YY_RESTORE_YY_MORE_OFFSET \
- yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
- YY_DO_BEFORE_ACTION; /* set up yytext again */ \
- } \
- while ( 0 )
-
-#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner )
-
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
-#ifndef YY_STRUCT_YY_BUFFER_STATE
-#define YY_STRUCT_YY_BUFFER_STATE
-struct yy_buffer_state
- {
- FILE *yy_input_file;
-
- char *yy_ch_buf; /* input buffer */
- char *yy_buf_pos; /* current position in input buffer */
-
- /* Size of input buffer in bytes, not including room for EOB
- * characters.
- */
- yy_size_t yy_buf_size;
-
- /* Number of characters read into yy_ch_buf, not including EOB
- * characters.
- */
- int yy_n_chars;
-
- /* Whether we "own" the buffer - i.e., we know we created it,
- * and can realloc() it to grow it, and should free() it to
- * delete it.
- */
- int yy_is_our_buffer;
-
- /* Whether this is an "interactive" input source; if so, and
- * if we're using stdio for input, then we want to use getc()
- * instead of fread(), to make sure we stop fetching input after
- * each newline.
- */
- int yy_is_interactive;
-
- /* Whether we're considered to be at the beginning of a line.
- * If so, '^' rules will be active on the next match, otherwise
- * not.
- */
- int yy_at_bol;
-
- int yy_bs_lineno; /**< The line count. */
- int yy_bs_column; /**< The column count. */
-
- /* Whether to try to fill the input buffer when we reach the
- * end of it.
- */
- int yy_fill_buffer;
-
- int yy_buffer_status;
-
-#define YY_BUFFER_NEW 0
-#define YY_BUFFER_NORMAL 1
- /* When an EOF's been seen but there's still some text to process
- * then we mark the buffer as YY_EOF_PENDING, to indicate that we
- * shouldn't try reading from the input source any more. We might
- * still have a bunch of tokens to match, though, because of
- * possible backing-up.
- *
- * When we actually see the EOF, we change the status to "new"
- * (via fts0brestart()), so that the user can continue scanning by
- * just pointing yyin at a new input file.
- */
-#define YY_BUFFER_EOF_PENDING 2
-
- };
-#endif /* !YY_STRUCT_YY_BUFFER_STATE */
-
-/* We provide macros for accessing buffer states in case in the
- * future we want to put the buffer states in a more general
- * "scanner state".
- *
- * Returns the top of the stack, or NULL.
- */
-#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \
- ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \
- : NULL)
-
-/* Same as previous macro, but useful when we know that the buffer stack is not
- * NULL or when we need an lvalue. For internal use only.
- */
-#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top]
-
-void fts0brestart (FILE *input_file ,yyscan_t yyscanner );
-void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0b_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
-void fts0b_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
-void fts0b_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
-void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
-void fts0bpop_buffer_state (yyscan_t yyscanner );
-
-static void fts0bensure_buffer_stack (yyscan_t yyscanner );
-static void fts0b_load_buffer_state (yyscan_t yyscanner );
-static void fts0b_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner );
-
-#define YY_FLUSH_BUFFER fts0b_flush_buffer(YY_CURRENT_BUFFER ,yyscanner)
-
-YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
-
-void *fts0balloc (yy_size_t , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) );
-void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) );
-void fts0bfree (void * , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) );
-
-#define yy_new_buffer fts0b_create_buffer
-
-#define yy_set_interactive(is_interactive) \
- { \
- if ( ! YY_CURRENT_BUFFER ){ \
- fts0bensure_buffer_stack (yyscanner); \
- YY_CURRENT_BUFFER_LVALUE = \
- fts0b_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
- }
-
-#define yy_set_bol(at_bol) \
- { \
- if ( ! YY_CURRENT_BUFFER ){\
- fts0bensure_buffer_stack (yyscanner); \
- YY_CURRENT_BUFFER_LVALUE = \
- fts0b_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
- }
-
-#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
-
-/* Begin user sect3 */
-
-#define fts0bwrap(n) 1
-#define YY_SKIP_YYWRAP
-
-typedef unsigned char YY_CHAR;
-
-typedef int yy_state_type;
-
-#define yytext_ptr yytext_r
-
-static yy_state_type yy_get_previous_state (yyscan_t yyscanner );
-static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner);
-static int yy_get_next_buffer (yyscan_t yyscanner );
-static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) );
-
-/* Done after the current pattern has been matched and before the
- * corresponding action - sets up yytext.
- */
-#define YY_DO_BEFORE_ACTION \
- yyg->yytext_ptr = yy_bp; \
- yyleng = static_cast<int>(yy_cp - yy_bp); \
- yyg->yy_hold_char = *yy_cp; \
- *yy_cp = '\0'; \
- yyg->yy_c_buf_p = yy_cp;
-
-#define YY_NUM_RULES 7
-#define YY_END_OF_BUFFER 8
-/* This struct is not used in this scanner,
- but its presence is necessary. */
-struct yy_trans_info
- {
- flex_int32_t yy_verify;
- flex_int32_t yy_nxt;
- };
-static yyconst flex_int16_t yy_accept[19] =
- { 0,
- 4, 4, 8, 4, 1, 6, 1, 7, 7, 2,
- 3, 4, 1, 1, 0, 5, 3, 0
- } ;
-
-static yyconst flex_int32_t yy_ec[256] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 4, 1, 5, 1, 1, 6, 1, 1, 7,
- 7, 7, 7, 1, 7, 1, 1, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 1, 1, 7,
- 1, 7, 1, 7, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 7, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1
- } ;
-
-static yyconst flex_int32_t yy_meta[9] =
- { 0,
- 1, 2, 3, 4, 5, 5, 5, 1
- } ;
-
-static yyconst flex_int16_t yy_base[22] =
- { 0,
- 0, 0, 22, 0, 7, 23, 0, 14, 23, 23,
- 7, 0, 0, 0, 5, 23, 0, 23, 11, 12,
- 16
- } ;
-
-static yyconst flex_int16_t yy_def[22] =
- { 0,
- 18, 1, 18, 19, 19, 18, 20, 21, 18, 18,
- 19, 19, 5, 20, 21, 18, 11, 0, 18, 18,
- 18
- } ;
-
-static yyconst flex_int16_t yy_nxt[32] =
- { 0,
- 4, 5, 6, 7, 8, 9, 10, 11, 13, 16,
- 14, 12, 12, 14, 17, 14, 15, 15, 16, 15,
- 15, 18, 3, 18, 18, 18, 18, 18, 18, 18,
- 18
- } ;
-
-static yyconst flex_int16_t yy_chk[32] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 5, 15,
- 5, 19, 19, 20, 11, 20, 21, 21, 8, 21,
- 21, 3, 18, 18, 18, 18, 18, 18, 18, 18,
- 18
- } ;
-
-/* The intent behind this definition is that it'll catch
- * any uses of REJECT which flex missed.
- */
-#define REJECT reject_used_but_not_detected
-#define yymore() yymore_used_but_not_detected
-#define YY_MORE_ADJ 0
-#define YY_RESTORE_YY_MORE_OFFSET
-#line 1 "fts0blex.l"
-/*****************************************************************************
-
-Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-/**
- * @file fts/fts0blex.l
- * FTS parser lexical analyzer
- *
- * Created 2007/5/9 Sunny Bains
- */
-#line 27 "fts0blex.l"
-
-#include "fts0ast.h"
-#include "fts0pars.h"
-
-/* Required for reentrant parser */
-#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
-
-#define YY_NO_INPUT 1
-#line 484 "fts0blex.cc"
-
-#define INITIAL 0
-
-#ifndef YY_NO_UNISTD_H
-/* Special case for "unistd.h", since it is non-ANSI. We include it way
- * down here because we want the user's section 1 to have been scanned first.
- * The user has a chance to override it with an option.
- */
-#include <unistd.h>
-#endif
-
-#ifndef YY_EXTRA_TYPE
-#define YY_EXTRA_TYPE void *
-#endif
-
-/* Holds the entire state of the reentrant scanner. */
-struct yyguts_t
-{
-
- /* User-defined. Not touched by flex. */
- YY_EXTRA_TYPE yyextra_r;
-
- /* The rest are the same as the globals declared in the non-reentrant scanner. */
- FILE *yyin_r, *yyout_r;
- size_t yy_buffer_stack_top; /**< index of top of stack. */
- size_t yy_buffer_stack_max; /**< capacity of stack. */
- YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
- char yy_hold_char;
- int yy_n_chars;
- int yyleng_r;
- char *yy_c_buf_p;
- int yy_init;
- int yy_start;
- int yy_did_buffer_switch_on_eof;
- int yy_start_stack_ptr;
- int yy_start_stack_depth;
- int *yy_start_stack;
- yy_state_type yy_last_accepting_state;
- char* yy_last_accepting_cpos;
-
- int yylineno_r;
- int yy_flex_debug_r;
-
- char *yytext_r;
- int yy_more_flag;
- int yy_more_len;
-
-}; /* end struct yyguts_t */
-
-static int yy_init_globals (yyscan_t yyscanner );
-
-int fts0blex_init (yyscan_t* scanner);
-
-int fts0blex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
-
-/* Accessor methods to globals.
- These are made visible to non-reentrant scanners for convenience. */
-
-int fts0blex_destroy (yyscan_t yyscanner );
-
-int fts0bget_debug (yyscan_t yyscanner );
-
-void fts0bset_debug (int debug_flag ,yyscan_t yyscanner );
-
-YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner );
-
-void fts0bset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
-
-FILE *fts0bget_in (yyscan_t yyscanner );
-
-void fts0bset_in (FILE * in_str ,yyscan_t yyscanner );
-
-FILE *fts0bget_out (yyscan_t yyscanner );
-
-void fts0bset_out (FILE * out_str ,yyscan_t yyscanner );
-
-int fts0bget_leng (yyscan_t yyscanner );
-
-char *fts0bget_text (yyscan_t yyscanner );
-
-int fts0bget_lineno (yyscan_t yyscanner );
-
-void fts0bset_lineno (int line_number ,yyscan_t yyscanner );
-
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
-
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int fts0bwrap (yyscan_t yyscanner );
-#else
-extern int fts0bwrap (yyscan_t yyscanner );
-#endif
-#endif
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)));
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)));
-#endif
-
-#ifndef YY_NO_INPUT
-
-#ifdef __cplusplus
-static int yyinput (yyscan_t yyscanner );
-#else
-static int input (yyscan_t yyscanner );
-#endif
-
-#endif
-
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k */
-#define YY_READ_BUF_SIZE 16384
-#else
-#define YY_READ_BUF_SIZE 8192
-#endif /* __ia64__ */
-#endif
-
-/* Copy whatever the last rule matched to the standard output. */
-#ifndef ECHO
-/* This used to be an fputs(), but since the string might contain NUL's,
- * we now use fwrite().
- */
-#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
-#endif
-
-/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
- * is returned in "result".
- */
-#ifndef YY_INPUT
-#define YY_INPUT(buf,result,max_size) \
- if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
- { \
- int c = '*'; \
- int n; \
- for ( n = 0; n < static_cast<int>(max_size) && \
- (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
- buf[n] = (char) c; \
- if ( c == '\n' ) \
- buf[n++] = (char) c; \
- if ( c == EOF && ferror( yyin ) ) \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- result = n; \
- } \
- else \
- { \
- errno=0; \
- while ( (result = static_cast<int>(fread(buf, 1, max_size, yyin))) \
- == 0 && ferror(yyin) ) \
- { \
- if( errno != EINTR) \
- { \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- break; \
- } \
- errno=0; \
- clearerr(yyin); \
- } \
- }\
-\
-
-#endif
-
-/* No semi-colon after return; correct usage is to write "yyterminate();" -
- * we don't want an extra ';' after the "return" because that will cause
- * some compilers to complain about unreachable statements.
- */
-#ifndef yyterminate
-#define yyterminate() return YY_NULL
-#endif
-
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
-
-/* Report a fatal error. */
-#ifndef YY_FATAL_ERROR
-#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner)
-#endif
-
-/* end tables serialization structures and prototypes */
-
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL_IS_OURS 1
-
-extern int fts0blex (yyscan_t yyscanner);
-
-#define YY_DECL int fts0blex (yyscan_t yyscanner)
-#endif /* !YY_DECL */
-
-/* Code executed at the beginning of each rule, after yytext and yyleng
- * have been set up.
- */
-#ifndef YY_USER_ACTION
-#define YY_USER_ACTION
-#endif
-
-/* Code executed at the end of each rule. */
-#ifndef YY_BREAK
-#define YY_BREAK break;
-#endif
-
-#define YY_RULE_SETUP \
- YY_USER_ACTION
-
-/** The main scanner function which does all the work.
- */
-YY_DECL
-{
- register yy_state_type yy_current_state;
- register char *yy_cp, *yy_bp;
- register int yy_act;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
-#line 43 "fts0blex.l"
-
-
-#line 712 "fts0blex.cc"
-
- if ( !yyg->yy_init )
- {
- yyg->yy_init = 1;
-
-#ifdef YY_USER_INIT
- YY_USER_INIT;
-#endif
-
- if ( ! yyg->yy_start )
- yyg->yy_start = 1; /* first start state */
-
- if ( ! yyin )
- yyin = stdin;
-
- if ( ! yyout )
- yyout = stdout;
-
- if ( ! YY_CURRENT_BUFFER ) {
- fts0bensure_buffer_stack (yyscanner);
- YY_CURRENT_BUFFER_LVALUE =
- fts0b_create_buffer(yyin,YY_BUF_SIZE ,yyscanner);
- }
-
- fts0b_load_buffer_state(yyscanner );
- }
-
- while ( 1 ) /* loops until end-of-file is reached */
- {
- yy_cp = yyg->yy_c_buf_p;
-
- /* Support of yytext. */
- *yy_cp = yyg->yy_hold_char;
-
- /* yy_bp points to the position in yy_ch_buf of the start of
- * the current run.
- */
- yy_bp = yy_cp;
-
- yy_current_state = yyg->yy_start;
-yy_match:
- do
- {
- register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
- if ( yy_accept[yy_current_state] )
- {
- yyg->yy_last_accepting_state = yy_current_state;
- yyg->yy_last_accepting_cpos = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 19 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- ++yy_cp;
- }
- while ( yy_current_state != 18 );
- yy_cp = yyg->yy_last_accepting_cpos;
- yy_current_state = yyg->yy_last_accepting_state;
-
-yy_find_action:
- yy_act = yy_accept[yy_current_state];
-
- YY_DO_BEFORE_ACTION;
-
-do_action: /* This label is used only to access EOF actions. */
-
- switch ( yy_act )
- { /* beginning of action switch */
- case 0: /* must back up */
- /* undo the effects of YY_DO_BEFORE_ACTION */
- *yy_cp = yyg->yy_hold_char;
- yy_cp = yyg->yy_last_accepting_cpos;
- yy_current_state = yyg->yy_last_accepting_state;
- goto yy_find_action;
-
-case 1:
-YY_RULE_SETUP
-#line 45 "fts0blex.l"
-/* Ignore whitespace */ ;
- YY_BREAK
-case 2:
-YY_RULE_SETUP
-#line 47 "fts0blex.l"
-{
- val->oper = fts0bget_text(yyscanner)[0];
-
- return(val->oper);
-}
- YY_BREAK
-case 3:
-YY_RULE_SETUP
-#line 53 "fts0blex.l"
-{
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
-
- return(FTS_NUMB);
-}
- YY_BREAK
-case 4:
-YY_RULE_SETUP
-#line 59 "fts0blex.l"
-{
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
-
- return(FTS_TERM);
-}
- YY_BREAK
-case 5:
-YY_RULE_SETUP
-#line 65 "fts0blex.l"
-{
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
-
- return(FTS_TEXT);
-}
- YY_BREAK
-case 6:
-/* rule 6 can match eol */
-YY_RULE_SETUP
-#line 71 "fts0blex.l"
-
- YY_BREAK
-case 7:
-YY_RULE_SETUP
-#line 73 "fts0blex.l"
-ECHO;
- YY_BREAK
-#line 843 "fts0blex.cc"
-case YY_STATE_EOF(INITIAL):
- yyterminate();
-
- case YY_END_OF_BUFFER:
- {
- /* Amount of text matched not including the EOB char. */
- int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1;
-
- /* Undo the effects of YY_DO_BEFORE_ACTION. */
- *yy_cp = yyg->yy_hold_char;
- YY_RESTORE_YY_MORE_OFFSET
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
- {
- /* We're scanning a new file or input source. It's
- * possible that this happened because the user
- * just pointed yyin at a new source and called
- * fts0blex(). If so, then we have to assure
- * consistency between YY_CURRENT_BUFFER and our
- * globals. Here is the right place to do so, because
- * this is the first action (other than possibly a
- * back-up) that will match for the new input source.
- */
- yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
- }
-
- /* Note that here we test for yy_c_buf_p "<=" to the position
- * of the first EOB in the buffer, since yy_c_buf_p will
- * already have been incremented past the NUL character
- * (since all states make transitions on EOB to the
- * end-of-buffer state). Contrast this with the test
- * in input().
- */
- if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
- { /* This was really a NUL. */
- yy_state_type yy_next_state;
-
- yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( yyscanner );
-
- /* Okay, we're now positioned to make the NUL
- * transition. We couldn't have
- * yy_get_previous_state() go ahead and do it
- * for us because it doesn't know how to deal
- * with the possibility of jamming (and we don't
- * want to build jamming into it because then it
- * will run more slowly).
- */
-
- yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner);
-
- yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
-
- if ( yy_next_state )
- {
- /* Consume the NUL. */
- yy_cp = ++yyg->yy_c_buf_p;
- yy_current_state = yy_next_state;
- goto yy_match;
- }
-
- else
- {
- yy_cp = yyg->yy_last_accepting_cpos;
- yy_current_state = yyg->yy_last_accepting_state;
- goto yy_find_action;
- }
- }
-
- else switch ( yy_get_next_buffer( yyscanner ) )
- {
- case EOB_ACT_END_OF_FILE:
- {
- yyg->yy_did_buffer_switch_on_eof = 0;
-
- if ( fts0bwrap(yyscanner ) )
- {
- /* Note: because we've taken care in
- * yy_get_next_buffer() to have set up
- * yytext, we can now set up
- * yy_c_buf_p so that if some total
- * hoser (like flex itself) wants to
- * call the scanner after we return the
- * YY_NULL, it'll still work - another
- * YY_NULL will get returned.
- */
- yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ;
-
- yy_act = YY_STATE_EOF(YY_START);
- goto do_action;
- }
-
- else
- {
- if ( ! yyg->yy_did_buffer_switch_on_eof )
- YY_NEW_FILE;
- }
- break;
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- yyg->yy_c_buf_p =
- yyg->yytext_ptr + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( yyscanner );
-
- yy_cp = yyg->yy_c_buf_p;
- yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
- goto yy_match;
-
- case EOB_ACT_LAST_MATCH:
- yyg->yy_c_buf_p =
- &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars];
-
- yy_current_state = yy_get_previous_state( yyscanner );
-
- yy_cp = yyg->yy_c_buf_p;
- yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
- goto yy_find_action;
- }
- break;
- }
-
- default:
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--no action found" );
- } /* end of action switch */
- } /* end of scanning one token */
-} /* end of fts0blex */
-
-/* yy_get_next_buffer - try to read in a new buffer
- *
- * Returns a code representing an action:
- * EOB_ACT_LAST_MATCH -
- * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
- * EOB_ACT_END_OF_FILE - end of file
- */
-static int yy_get_next_buffer (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
- register char *source = yyg->yytext_ptr;
- register int number_to_move, i;
- int ret_val;
-
- if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] )
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--end of buffer missed" );
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
- { /* Don't try to fill the buffer, so this is an EOF. */
- if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 )
- {
- /* We matched a single character, the EOB, so
- * treat this as a final EOF.
- */
- return EOB_ACT_END_OF_FILE;
- }
-
- else
- {
- /* We matched some text prior to the EOB, first
- * process it.
- */
- return EOB_ACT_LAST_MATCH;
- }
- }
-
- /* Try to read more data. */
-
- /* First move last chars to start of buffer. */
- number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1;
-
- for ( i = 0; i < number_to_move; ++i )
- *(dest++) = *(source++);
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
- /* don't do the read, it's not guaranteed to return an EOF,
- * just force an EOF
- */
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0;
-
- else
- {
- int num_to_read = static_cast<int>(
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1);
-
- while ( num_to_read <= 0 )
- { /* Not enough room in the buffer - grow it. */
-
- /* just a shorter name for the current buffer */
- YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
-
- int yy_c_buf_p_offset =
- (int) (yyg->yy_c_buf_p - b->yy_ch_buf);
-
- if ( b->yy_is_our_buffer )
- {
- int new_size = static_cast<int>(b->yy_buf_size * 2);
-
- if ( new_size <= 0 )
- b->yy_buf_size += b->yy_buf_size / 8;
- else
- b->yy_buf_size *= 2;
-
- b->yy_ch_buf = (char *)
- /* Include room in for 2 EOB chars. */
- fts0brealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
- }
- else
- /* Can't grow it, we don't own it. */
- b->yy_ch_buf = 0;
-
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR(
- "fatal error - scanner input buffer overflow" );
-
- yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
-
- num_to_read = static_cast<int>(
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1);
-
- }
-
- if ( num_to_read > YY_READ_BUF_SIZE )
- num_to_read = YY_READ_BUF_SIZE;
-
- /* Read in more data. */
- YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- yyg->yy_n_chars, num_to_read);
-
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
- }
-
- if ( yyg->yy_n_chars == 0 )
- {
- if ( number_to_move == YY_MORE_ADJ )
- {
- ret_val = EOB_ACT_END_OF_FILE;
- fts0brestart(yyin ,yyscanner);
- }
-
- else
- {
- ret_val = EOB_ACT_LAST_MATCH;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
- YY_BUFFER_EOF_PENDING;
- }
- }
-
- else
- ret_val = EOB_ACT_CONTINUE_SCAN;
-
- if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
- /* Extend the array by 50%, plus the number we really need. */
- yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) fts0brealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
- if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
- }
-
- yyg->yy_n_chars += number_to_move;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
-
- yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
-
- return ret_val;
-}
-
-/* yy_get_previous_state - get the state just before the EOB char was reached */
-
-static yy_state_type yy_get_previous_state (yyscan_t yyscanner)
-{
- register yy_state_type yy_current_state;
- register char *yy_cp;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- yy_current_state = yyg->yy_start;
-
- for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp )
- {
- register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
- if ( yy_accept[yy_current_state] )
- {
- yyg->yy_last_accepting_state = yy_current_state;
- yyg->yy_last_accepting_cpos = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 19 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- }
-
- return yy_current_state;
-}
-
-/* yy_try_NUL_trans - try to make a transition on the NUL character
- *
- * synopsis
- * next_state = yy_try_NUL_trans( current_state );
- */
-static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner)
-{
- register int yy_is_jam;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
- register char *yy_cp = yyg->yy_c_buf_p;
-
- register YY_CHAR yy_c = 1;
- if ( yy_accept[yy_current_state] )
- {
- yyg->yy_last_accepting_state = yy_current_state;
- yyg->yy_last_accepting_cpos = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 19 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 18);
-
- return yy_is_jam ? 0 : yy_current_state;
-}
-
-#ifndef YY_NO_INPUT
-#ifdef __cplusplus
- static int yyinput (yyscan_t yyscanner)
-#else
- static int input (yyscan_t yyscanner)
-#endif
-
-{
- int c;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- *yyg->yy_c_buf_p = yyg->yy_hold_char;
-
- if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
- {
- /* yy_c_buf_p now points to the character we want to return.
- * If this occurs *before* the EOB characters, then it's a
- * valid NUL; if not, then we've hit the end of the buffer.
- */
- if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
- /* This was really a NUL. */
- *yyg->yy_c_buf_p = '\0';
-
- else
- { /* need more input */
- int offset = yyg->yy_c_buf_p - yyg->yytext_ptr;
- ++yyg->yy_c_buf_p;
-
- switch ( yy_get_next_buffer( yyscanner ) )
- {
- case EOB_ACT_LAST_MATCH:
- /* This happens because yy_g_n_b()
- * sees that we've accumulated a
- * token and flags that we need to
- * try matching the token before
- * proceeding. But for input(),
- * there's no matching to consider.
- * So convert the EOB_ACT_LAST_MATCH
- * to EOB_ACT_END_OF_FILE.
- */
-
- /* Reset buffer status. */
- fts0brestart(yyin ,yyscanner);
-
- /*FALLTHROUGH*/
-
- case EOB_ACT_END_OF_FILE:
- {
- if ( fts0bwrap(yyscanner ) )
- return EOF;
-
- if ( ! yyg->yy_did_buffer_switch_on_eof )
- YY_NEW_FILE;
-#ifdef __cplusplus
- return yyinput(yyscanner);
-#else
- return input(yyscanner);
-#endif
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- yyg->yy_c_buf_p = yyg->yytext_ptr + offset;
- break;
- }
- }
- }
-
- c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */
- *yyg->yy_c_buf_p = '\0'; /* preserve yytext */
- yyg->yy_hold_char = *++yyg->yy_c_buf_p;
-
- return c;
-}
-#endif /* ifndef YY_NO_INPUT */
-
-/** Immediately switch to a different input stream.
- * @param input_file A readable stream.
- * @param yyscanner The scanner object.
- * @note This function does not reset the start condition to @c INITIAL .
- */
-void fts0brestart (FILE * input_file , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if ( ! YY_CURRENT_BUFFER ){
- fts0bensure_buffer_stack (yyscanner);
- YY_CURRENT_BUFFER_LVALUE =
- fts0b_create_buffer(yyin,YY_BUF_SIZE ,yyscanner);
- }
-
- fts0b_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner);
- fts0b_load_buffer_state(yyscanner );
-}
-
-/** Switch to a different input buffer.
- * @param new_buffer The new input buffer.
- * @param yyscanner The scanner object.
- */
-void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- /* TODO. We should be able to replace this entire function body
- * with
- * fts0bpop_buffer_state();
- * fts0bpush_buffer_state(new_buffer);
- */
- fts0bensure_buffer_stack (yyscanner);
- if ( YY_CURRENT_BUFFER == new_buffer )
- return;
-
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *yyg->yy_c_buf_p = yyg->yy_hold_char;
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
- }
-
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
- fts0b_load_buffer_state(yyscanner );
-
- /* We don't actually know whether we did this switch during
- * EOF (fts0bwrap()) processing, but the only time this flag
- * is looked at is after fts0bwrap() is called, so it's safe
- * to go ahead and always set it.
- */
- yyg->yy_did_buffer_switch_on_eof = 1;
-}
-
-static void fts0b_load_buffer_state (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
- yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
- yyg->yy_hold_char = *yyg->yy_c_buf_p;
-}
-
-/** Allocate and initialize an input buffer state.
- * @param file A readable stream.
- * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- * @param yyscanner The scanner object.
- * @return the allocated buffer state.
- */
-YY_BUFFER_STATE fts0b_create_buffer (FILE * file, int size , yyscan_t yyscanner)
-{
- YY_BUFFER_STATE b;
-
- b = (YY_BUFFER_STATE) fts0balloc(sizeof( struct yy_buffer_state ) ,yyscanner );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in fts0b_create_buffer()" );
-
- b->yy_buf_size = size;
-
- /* yy_ch_buf has to be 2 characters longer than the size given because
- * we need to put in 2 end-of-buffer characters.
- */
- b->yy_ch_buf = (char *) fts0balloc(b->yy_buf_size + 2 ,yyscanner );
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in fts0b_create_buffer()" );
-
- b->yy_is_our_buffer = 1;
-
- fts0b_init_buffer(b,file ,yyscanner);
-
- return b;
-}
-
-/** Destroy the buffer.
- * @param b a buffer created with fts0b_create_buffer()
- * @param yyscanner The scanner object.
- */
-void fts0b_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if ( ! b )
- return;
-
- if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
- YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
-
- if ( b->yy_is_our_buffer )
- fts0bfree((void *) b->yy_ch_buf ,yyscanner );
-
- fts0bfree((void *) b ,yyscanner );
-}
-
-/* Initializes or reinitializes a buffer.
- * This function is sometimes called more than once on the same buffer,
- * such as during a fts0brestart() or at EOF.
- */
-static void fts0b_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner)
-
-{
- int oerrno = errno;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- fts0b_flush_buffer(b ,yyscanner);
-
- b->yy_input_file = file;
- b->yy_fill_buffer = 1;
-
- /* If b is the current buffer, then fts0b_init_buffer was _probably_
- * called from fts0brestart() or through yy_get_next_buffer.
- * In that case, we don't want to reset the lineno or column.
- */
- if (b != YY_CURRENT_BUFFER){
- b->yy_bs_lineno = 1;
- b->yy_bs_column = 0;
- }
-
- b->yy_is_interactive = 0;
-
- errno = oerrno;
-}
-
-/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
- * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- * @param yyscanner The scanner object.
- */
-void fts0b_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- if ( ! b )
- return;
-
- b->yy_n_chars = 0;
-
- /* We always need two end-of-buffer characters. The first causes
- * a transition to the end-of-buffer state. The second causes
- * a jam in that state.
- */
- b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
- b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
-
- b->yy_buf_pos = &b->yy_ch_buf[0];
-
- b->yy_at_bol = 1;
- b->yy_buffer_status = YY_BUFFER_NEW;
-
- if ( b == YY_CURRENT_BUFFER )
- fts0b_load_buffer_state(yyscanner );
-}
-
-/** Pushes the new state onto the stack. The new state becomes
- * the current state. This function will allocate the stack
- * if necessary.
- * @param new_buffer The new state.
- * @param yyscanner The scanner object.
- */
-void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- if (new_buffer == NULL)
- return;
-
- fts0bensure_buffer_stack(yyscanner);
-
- /* This block is copied from fts0b_switch_to_buffer. */
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *yyg->yy_c_buf_p = yyg->yy_hold_char;
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
- }
-
- /* Only push if top exists. Otherwise, replace top. */
- if (YY_CURRENT_BUFFER)
- yyg->yy_buffer_stack_top++;
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
-
- /* copied from fts0b_switch_to_buffer. */
- fts0b_load_buffer_state(yyscanner );
- yyg->yy_did_buffer_switch_on_eof = 1;
-}
-
-/** Removes and deletes the top of the stack, if present.
- * The next element becomes the new top.
- * @param yyscanner The scanner object.
- */
-void fts0bpop_buffer_state (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- if (!YY_CURRENT_BUFFER)
- return;
-
- fts0b_delete_buffer(YY_CURRENT_BUFFER ,yyscanner);
- YY_CURRENT_BUFFER_LVALUE = NULL;
- if (yyg->yy_buffer_stack_top > 0)
- --yyg->yy_buffer_stack_top;
-
- if (YY_CURRENT_BUFFER) {
- fts0b_load_buffer_state(yyscanner );
- yyg->yy_did_buffer_switch_on_eof = 1;
- }
-}
-
-/* Allocates the stack if it does not exist.
- * Guarantees space for at least one push.
- */
-static void fts0bensure_buffer_stack (yyscan_t yyscanner)
-{
- int num_to_alloc;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if (!yyg->yy_buffer_stack) {
-
- /* First allocation is just for 2 elements, since we don't know if this
- * scanner will even need a stack. We use 2 instead of 1 to avoid an
- * immediate realloc on the next call.
- */
- num_to_alloc = 1;
- yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0balloc
- (num_to_alloc * sizeof(struct yy_buffer_state*)
- , yyscanner);
- if ( ! yyg->yy_buffer_stack )
- YY_FATAL_ERROR( "out of dynamic memory in fts0bensure_buffer_stack()" );
-
- memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
- yyg->yy_buffer_stack_max = num_to_alloc;
- yyg->yy_buffer_stack_top = 0;
- return;
- }
-
- if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){
-
- /* Increase the buffer to prepare for a possible push. */
- int grow_size = 8 /* arbitrary grow size */;
-
- num_to_alloc = static_cast<int>(yyg->yy_buffer_stack_max + grow_size);
- yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0brealloc
- (yyg->yy_buffer_stack,
- num_to_alloc * sizeof(struct yy_buffer_state*)
- , yyscanner);
- if ( ! yyg->yy_buffer_stack )
- YY_FATAL_ERROR( "out of dynamic memory in fts0bensure_buffer_stack()" );
-
- /* zero only the new slots.*/
- memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*));
- yyg->yy_buffer_stack_max = num_to_alloc;
- }
-}
-
-/** Setup the input buffer state to scan directly from a user-specified character buffer.
- * @param base the character buffer
- * @param size the size in bytes of the character buffer
- * @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
- */
-YY_BUFFER_STATE fts0b_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
-{
- YY_BUFFER_STATE b;
-
- if ( size < 2 ||
- base[size-2] != YY_END_OF_BUFFER_CHAR ||
- base[size-1] != YY_END_OF_BUFFER_CHAR )
- /* They forgot to leave room for the EOB's. */
- return 0;
-
- b = (YY_BUFFER_STATE) fts0balloc(sizeof( struct yy_buffer_state ) ,yyscanner );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in fts0b_scan_buffer()" );
-
- b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
- b->yy_buf_pos = b->yy_ch_buf = base;
- b->yy_is_our_buffer = 0;
- b->yy_input_file = 0;
- b->yy_n_chars = static_cast<int>(b->yy_buf_size);
- b->yy_is_interactive = 0;
- b->yy_at_bol = 1;
- b->yy_fill_buffer = 0;
- b->yy_buffer_status = YY_BUFFER_NEW;
-
- fts0b_switch_to_buffer(b ,yyscanner );
-
- return b;
-}
-
-/** Setup the input buffer state to scan a string. The next call to fts0blex() will
- * scan from a @e copy of @a str.
- * @param yystr a NUL-terminated string to scan
- * @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
- * @note If you want to scan bytes that may contain NUL values, then use
- * fts0b_scan_bytes() instead.
- */
-YY_BUFFER_STATE fts0b_scan_string (yyconst char * yystr , yyscan_t yyscanner)
-{
- return fts0b_scan_bytes(yystr,static_cast<int>(strlen(yystr)), yyscanner);
-}
-
-/** Setup the input buffer state to scan the given bytes. The next call to fts0blex() will
- * scan from a @e copy of @a bytes.
- * @param yybytes the byte buffer to scan
- * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
- * @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
- */
-YY_BUFFER_STATE fts0b_scan_bytes (yyconst char * yybytes, int _yybytes_len , yyscan_t yyscanner)
-{
- YY_BUFFER_STATE b;
- char *buf;
- yy_size_t n;
- int i;
-
- /* Get memory for full buffer, including space for trailing EOB's. */
- n = _yybytes_len + 2;
- buf = (char *) fts0balloc(n ,yyscanner );
- if ( ! buf )
- YY_FATAL_ERROR( "out of dynamic memory in fts0b_scan_bytes()" );
-
- for ( i = 0; i < _yybytes_len; ++i )
- buf[i] = yybytes[i];
-
- buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
-
- b = fts0b_scan_buffer(buf,n ,yyscanner);
- if ( ! b )
- YY_FATAL_ERROR( "bad buffer in fts0b_scan_bytes()" );
-
- /* It's okay to grow etc. this buffer, and we should throw it
- * away when we're done.
- */
- b->yy_is_our_buffer = 1;
-
- return b;
-}
-
-#ifndef YY_EXIT_FAILURE
-#define YY_EXIT_FAILURE 2
-#endif
-
-static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- (void) fprintf( stderr, "%s\n", msg );
- exit( YY_EXIT_FAILURE );
-}
-
-/* Redefine yyless() so it works in section 3 code. */
-
-#undef yyless
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- yytext[yyleng] = yyg->yy_hold_char; \
- yyg->yy_c_buf_p = yytext + yyless_macro_arg; \
- yyg->yy_hold_char = *yyg->yy_c_buf_p; \
- *yyg->yy_c_buf_p = '\0'; \
- yyleng = yyless_macro_arg; \
- } \
- while ( 0 )
-
-/* Accessor methods (get/set functions) to struct members. */
-
-/** Get the user-defined data for this scanner.
- * @param yyscanner The scanner object.
- */
-YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yyextra;
-}
-
-/** Get the current line number.
- * @param yyscanner The scanner object.
- */
-int fts0bget_lineno (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if (! YY_CURRENT_BUFFER)
- return 0;
-
- return yylineno;
-}
-
-/** Get the current column number.
- * @param yyscanner The scanner object.
- */
-int fts0bget_column (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if (! YY_CURRENT_BUFFER)
- return 0;
-
- return yycolumn;
-}
-
-/** Get the input stream.
- * @param yyscanner The scanner object.
- */
-FILE *fts0bget_in (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yyin;
-}
-
-/** Get the output stream.
- * @param yyscanner The scanner object.
- */
-FILE *fts0bget_out (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yyout;
-}
-
-/** Get the length of the current token.
- * @param yyscanner The scanner object.
- */
-int fts0bget_leng (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yyleng;
-}
-
-/** Get the current token.
- * @param yyscanner The scanner object.
- */
-
-char *fts0bget_text (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yytext;
-}
-
-/** Set the user-defined data. This data is never touched by the scanner.
- * @param user_defined The data to be associated with this scanner.
- * @param yyscanner The scanner object.
- */
-void fts0bset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yyextra = user_defined ;
-}
-
-/** Set the current line number.
- * @param line_number
- * @param yyscanner The scanner object.
- */
-void fts0bset_lineno (int line_number , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- /* lineno is only valid if an input buffer exists. */
- if (! YY_CURRENT_BUFFER )
- yy_fatal_error( "fts0bset_lineno called with no buffer" , yyscanner);
-
- yylineno = line_number;
-}
-
-/** Set the current column.
- * @param line_number
- * @param yyscanner The scanner object.
- */
-void fts0bset_column (int column_no , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- /* column is only valid if an input buffer exists. */
- if (! YY_CURRENT_BUFFER )
- yy_fatal_error( "fts0bset_column called with no buffer" , yyscanner);
-
- yycolumn = column_no;
-}
-
-/** Set the input stream. This does not discard the current
- * input buffer.
- * @param in_str A readable stream.
- * @param yyscanner The scanner object.
- * @see fts0b_switch_to_buffer
- */
-void fts0bset_in (FILE * in_str , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yyin = in_str ;
-}
-
-void fts0bset_out (FILE * out_str , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yyout = out_str ;
-}
-
-int fts0bget_debug (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yy_flex_debug;
-}
-
-void fts0bset_debug (int bdebug , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yy_flex_debug = bdebug ;
-}
-
-/* Accessor methods for yylval and yylloc */
-
-/* User-visible API */
-
-/* fts0blex_init is special because it creates the scanner itself, so it is
- * the ONLY reentrant function that doesn't take the scanner as the last argument.
- * That's why we explicitly handle the declaration, instead of using our macros.
- */
-
-int fts0blex_init(yyscan_t* ptr_yy_globals)
-
-{
- if (ptr_yy_globals == NULL){
- errno = EINVAL;
- return 1;
- }
-
- *ptr_yy_globals = (yyscan_t) fts0balloc ( sizeof( struct yyguts_t ), NULL );
-
- if (*ptr_yy_globals == NULL){
- errno = ENOMEM;
- return 1;
- }
-
- /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */
- memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
-
- return yy_init_globals ( *ptr_yy_globals );
-}
-
-/* fts0blex_init_extra has the same functionality as fts0blex_init, but follows the
- * convention of taking the scanner as the last argument. Note however, that
- * this is a *pointer* to a scanner, as it will be allocated by this call (and
- * is the reason, too, why this function also must handle its own declaration).
- * The user defined value in the first argument will be available to fts0balloc in
- * the yyextra field.
- */
-
-int fts0blex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
-
-{
- struct yyguts_t dummy_yyguts;
-
- fts0bset_extra (yy_user_defined, &dummy_yyguts);
-
- if (ptr_yy_globals == NULL){
- errno = EINVAL;
- return 1;
- }
-
- *ptr_yy_globals = (yyscan_t) fts0balloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
-
- if (*ptr_yy_globals == NULL){
- errno = ENOMEM;
- return 1;
- }
-
- /* By setting to 0xAA, we expose bugs in
- yy_init_globals. Leave at 0x00 for releases. */
- memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
-
- fts0bset_extra (yy_user_defined, *ptr_yy_globals);
-
- return yy_init_globals ( *ptr_yy_globals );
-}
-
-static int yy_init_globals (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- /* Initialization is the same as for the non-reentrant scanner.
- * This function is called from fts0blex_destroy(), so don't allocate here.
- */
-
- yyg->yy_buffer_stack = 0;
- yyg->yy_buffer_stack_top = 0;
- yyg->yy_buffer_stack_max = 0;
- yyg->yy_c_buf_p = (char *) 0;
- yyg->yy_init = 0;
- yyg->yy_start = 0;
-
- yyg->yy_start_stack_ptr = 0;
- yyg->yy_start_stack_depth = 0;
- yyg->yy_start_stack = NULL;
-
- /* Defined in main.c */
-#ifdef YY_STDINIT
- yyin = stdin;
- yyout = stdout;
-#else
- yyin = (FILE *) 0;
- yyout = (FILE *) 0;
-#endif
-
- /* For future reference: Set errno on error, since we are called by
- * fts0blex_init()
- */
- return 0;
-}
-
-/* fts0blex_destroy is for both reentrant and non-reentrant scanners. */
-int fts0blex_destroy (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- /* Pop the buffer stack, destroying each element. */
- while(YY_CURRENT_BUFFER){
- fts0b_delete_buffer(YY_CURRENT_BUFFER ,yyscanner );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- fts0bpop_buffer_state(yyscanner);
- }
-
- /* Destroy the stack itself. */
- fts0bfree(yyg->yy_buffer_stack ,yyscanner);
- yyg->yy_buffer_stack = NULL;
-
- /* Destroy the start condition stack. */
- fts0bfree(yyg->yy_start_stack ,yyscanner );
- yyg->yy_start_stack = NULL;
-
- /* Reset the globals. This is important in a non-reentrant scanner so the next time
- * fts0blex() is called, initialization will occur. */
- yy_init_globals( yyscanner);
-
- /* Destroy the main struct (reentrant only). */
- fts0bfree ( yyscanner , yyscanner );
- yyscanner = NULL;
- return 0;
-}
-
-/*
- * Internal utility routines.
- */
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- register int i;
- for ( i = 0; i < n; ++i )
- s1[i] = s2[i];
-}
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- register int n;
- for ( n = 0; s[n]; ++n )
- ;
-
- return n;
-}
-#endif
-
-void *fts0balloc (yy_size_t size , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- return (void *) malloc( size );
-}
-
-void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- /* The cast to (char *) in the following accommodates both
- * implementations that use char* generic pointers, and those
- * that use void* generic pointers. It works with the latter
- * because both ANSI C and C++ allow castless assignment from
- * any pointer type to void*, and deal with argument conversions
- * as though doing an assignment.
- */
- return (void *) realloc( (char *) ptr, size );
-}
-
-void fts0bfree (void * ptr , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- free( (char *) ptr ); /* see fts0brealloc() for (char *) cast */
-}
-
-#define YYTABLES_NAME "yytables"
-
-#line 73 "fts0blex.l"
-
diff --git a/storage/xtradb/fts/fts0blex.l b/storage/xtradb/fts/fts0blex.l
deleted file mode 100644
index ae6e8ffaa48..00000000000
--- a/storage/xtradb/fts/fts0blex.l
+++ /dev/null
@@ -1,73 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**
- * @file fts/fts0blex.l
- * FTS parser lexical analyzer
- *
- * Created 2007/5/9 Sunny Bains
- */
-
-%{
-
-#include "fts0ast.h"
-#include "fts0pars.h"
-
-/* Required for reentrant parser */
-#define YY_DECL int fts_blexer(YYSTYPE* val, yyscan_t yyscanner)
-
-%}
-
-%option noinput
-%option nounput
-%option noyywrap
-%option nostdinit
-%option reentrant
-%option never-interactive
-
-%%
-
-[\t ]+ /* Ignore whitespace */ ;
-
-[*()+\-<>~@] {
- val->oper = fts0bget_text(yyscanner)[0];
-
- return(val->oper);
-}
-
-[0-9]+ {
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
-
- return(FTS_NUMB);
-}
-
-[^" \n*()+\-<>~@%]* {
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
-
- return(FTS_TERM);
-}
-
-\"[^\"\n]*\" {
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0bget_text(yyscanner)), fts0bget_leng(yyscanner));
-
- return(FTS_TEXT);
-}
-
-\n
-
-%%
diff --git a/storage/xtradb/fts/fts0config.cc b/storage/xtradb/fts/fts0config.cc
deleted file mode 100644
index 5b4ae5c39f7..00000000000
--- a/storage/xtradb/fts/fts0config.cc
+++ /dev/null
@@ -1,564 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fts/fts0config.cc
-Full Text Search configuration table.
-
-Created 2007/5/9 Sunny Bains
-***********************************************************************/
-
-#include "trx0roll.h"
-#include "row0sel.h"
-
-#include "fts0priv.h"
-
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
-
-/******************************************************************//**
-Callback function for fetching the config value.
-@return always returns TRUE */
-static
-ibool
-fts_config_fetch_value(
-/*===================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: pointer to
- ib_vector_t */
-{
- sel_node_t* node = static_cast<sel_node_t*>(row);
- fts_string_t* value = static_cast<fts_string_t*>(user_arg);
-
- dfield_t* dfield = que_node_get_val(node->select_list);
- dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
- void* data = dfield_get_data(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
-
- if (len != UNIV_SQL_NULL) {
- ulint max_len = ut_min(value->f_len - 1, len);
-
- memcpy(value->f_str, data, max_len);
- value->f_len = max_len;
- value->f_str[value->f_len] = '\0';
- }
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Get value from the config table. The caller must ensure that enough
-space is allocated for value to hold the column contents.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_get_value(
-/*=================*/
- trx_t* trx, /*!< transaction */
- fts_table_t* fts_table, /*!< in: the indexed
- FTS table */
- const char* name, /*!< in: get config value for
- this parameter name */
- fts_string_t* value) /*!< out: value read from
- config table */
-{
- pars_info_t* info;
- que_t* graph;
- dberr_t error;
- ulint name_len = strlen(name);
-
- info = pars_info_create();
-
- *value->f_str = '\0';
- ut_a(value->f_len > 0);
-
- pars_info_bind_function(info, "my_func", fts_config_fetch_value,
- value);
-
- /* The len field of value must be set to the max bytes that
- it can hold. On a successful read, the len field will be set
- to the actual number of bytes copied to value. */
- pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len);
-
- fts_table->suffix = "CONFIG";
-
- graph = fts_parse_sql(
- fts_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS SELECT value FROM \"%s\""
- " WHERE key = :name;\n"
- "BEGIN\n"
- ""
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- trx->op_info = "getting FTS config value";
-
- error = fts_eval_sql(trx, graph);
-
- mutex_enter(&dict_sys->mutex);
- que_graph_free(graph);
- mutex_exit(&dict_sys->mutex);
-
- return(error);
-}
-
-/*********************************************************************//**
-Create the config table name for retrieving index specific value.
-@return index config parameter name */
-UNIV_INTERN
-char*
-fts_config_create_index_param_name(
-/*===============================*/
- const char* param, /*!< in: base name of param */
- const dict_index_t* index) /*!< in: index for config */
-{
- ulint len;
- char* name;
-
- /* The format of the config name is: name_<index_id>. */
- len = strlen(param);
-
- /* Caller is responsible for deleting name. */
- name = static_cast<char*>(ut_malloc(
- len + FTS_AUX_MIN_TABLE_ID_LENGTH + 2));
- strcpy(name, param);
- name[len] = '_';
-
- fts_write_object_id(index->id, name + len + 1,
- DICT_TF2_FLAG_IS_SET(index->table,
- DICT_TF2_FTS_AUX_HEX_NAME));
-
- return(name);
-}
-
-/******************************************************************//**
-Get value specific to an FTS index from the config table. The caller
-must ensure that enough space is allocated for value to hold the
-column contents.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_get_index_value(
-/*=======================*/
- trx_t* trx, /*!< transaction */
- dict_index_t* index, /*!< in: index */
- const char* param, /*!< in: get config value for
- this parameter name */
- fts_string_t* value) /*!< out: value read from
- config table */
-{
- char* name;
- dberr_t error;
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
- index->table);
-
- /* We are responsible for free'ing name. */
- name = fts_config_create_index_param_name(param, index);
-
- error = fts_config_get_value(trx, &fts_table, name, value);
-
- ut_free(name);
-
- return(error);
-}
-
-/******************************************************************//**
-Set the value in the config table for name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_set_value(
-/*=================*/
- trx_t* trx, /*!< transaction */
- fts_table_t* fts_table, /*!< in: the indexed
- FTS table */
- const char* name, /*!< in: get config value for
- this parameter name */
- const fts_string_t*
- value) /*!< in: value to update */
-{
- pars_info_t* info;
- que_t* graph;
- dberr_t error;
- undo_no_t undo_no;
- undo_no_t n_rows_updated;
- ulint name_len = strlen(name);
-
- info = pars_info_create();
-
- pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len);
- pars_info_bind_varchar_literal(info, "value",
- value->f_str, value->f_len);
-
- fts_table->suffix = "CONFIG";
-
- graph = fts_parse_sql(
- fts_table, info,
- "BEGIN UPDATE \"%s\" SET value = :value WHERE key = :name;");
-
- trx->op_info = "setting FTS config value";
-
- undo_no = trx->undo_no;
-
- error = fts_eval_sql(trx, graph);
-
- fts_que_graph_free_check_lock(fts_table, NULL, graph);
-
- n_rows_updated = trx->undo_no - undo_no;
-
- /* Check if we need to do an insert. */
- if (n_rows_updated == 0) {
- info = pars_info_create();
-
- pars_info_bind_varchar_literal(
- info, "name", (byte*) name, name_len);
-
- pars_info_bind_varchar_literal(
- info, "value", value->f_str, value->f_len);
-
- graph = fts_parse_sql(
- fts_table, info,
- "BEGIN\n"
- "INSERT INTO \"%s\" VALUES(:name, :value);");
-
- trx->op_info = "inserting FTS config value";
-
- error = fts_eval_sql(trx, graph);
-
- fts_que_graph_free_check_lock(fts_table, NULL, graph);
- }
-
- return(error);
-}
-
-/******************************************************************//**
-Set the value specific to an FTS index in the config table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_set_index_value(
-/*=======================*/
- trx_t* trx, /*!< transaction */
- dict_index_t* index, /*!< in: index */
- const char* param, /*!< in: get config value for
- this parameter name */
- fts_string_t* value) /*!< out: value read from
- config table */
-{
- char* name;
- dberr_t error;
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
- index->table);
-
- /* We are responsible for free'ing name. */
- name = fts_config_create_index_param_name(param, index);
-
- error = fts_config_set_value(trx, &fts_table, name, value);
-
- ut_free(name);
-
- return(error);
-}
-
-/******************************************************************//**
-Get an ulint value from the config table.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-fts_config_get_index_ulint(
-/*=======================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index */
- const char* name, /*!< in: param name */
- ulint* int_value) /*!< out: value */
-{
- dberr_t error;
- fts_string_t value;
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value.*/
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
-
- error = fts_config_get_index_value(trx, index, name, &value);
-
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr, " InnoDB: Error: (%s) reading `%s'\n",
- ut_strerr(error), name);
- } else {
- *int_value = strtoul((char*) value.f_str, NULL, 10);
- }
-
- ut_free(value.f_str);
-
- return(error);
-}
-
-/******************************************************************//**
-Set an ulint value in the config table.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-fts_config_set_index_ulint(
-/*=======================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index */
- const char* name, /*!< in: param name */
- ulint int_value) /*!< in: value */
-{
- dberr_t error;
- fts_string_t value;
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value.*/
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
-
- // FIXME: Get rid of snprintf
- ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
-
- value.f_len = ut_snprintf(
- (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value);
-
- error = fts_config_set_index_value(trx, index, name, &value);
-
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr, " InnoDB: Error: (%s) writing `%s'\n",
- ut_strerr(error), name);
- }
-
- ut_free(value.f_str);
-
- return(error);
-}
-
-/******************************************************************//**
-Get an ulint value from the config table.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-fts_config_get_ulint(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table, /*!< in: the indexed
- FTS table */
- const char* name, /*!< in: param name */
- ulint* int_value) /*!< out: value */
-{
- dberr_t error;
- fts_string_t value;
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value.*/
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
-
- error = fts_config_get_value(trx, fts_table, name, &value);
-
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr, " InnoDB: Error: (%s) reading `%s'\n",
- ut_strerr(error), name);
- } else {
- *int_value = strtoul((char*) value.f_str, NULL, 10);
- }
-
- ut_free(value.f_str);
-
- return(error);
-}
-
-/******************************************************************//**
-Set an ulint value in the config table.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-fts_config_set_ulint(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table, /*!< in: the indexed
- FTS table */
- const char* name, /*!< in: param name */
- ulint int_value) /*!< in: value */
-{
- dberr_t error;
- fts_string_t value;
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value.*/
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
-
- // FIXME: Get rid of snprintf
- ut_a(FTS_MAX_INT_LEN < FTS_MAX_CONFIG_VALUE_LEN);
-
- value.f_len = snprintf(
- (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value);
-
- error = fts_config_set_value(trx, fts_table, name, &value);
-
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr, " InnoDB: Error: (%s) writing `%s'\n",
- ut_strerr(error), name);
- }
-
- ut_free(value.f_str);
-
- return(error);
-}
-
-/******************************************************************//**
-Increment the value in the config table for column name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_increment_value(
-/*=======================*/
- trx_t* trx, /*!< transaction */
- fts_table_t* fts_table, /*!< in: the indexed
- FTS table */
- const char* name, /*!< in: increment config value
- for this parameter name */
- ulint delta) /*!< in: increment by this
- much */
-{
- dberr_t error;
- fts_string_t value;
- que_t* graph = NULL;
- ulint name_len = strlen(name);
- pars_info_t* info = pars_info_create();
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value.*/
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
-
- *value.f_str = '\0';
-
- pars_info_bind_varchar_literal(info, "name", (byte*) name, name_len);
-
- pars_info_bind_function(
- info, "my_func", fts_config_fetch_value, &value);
-
- fts_table->suffix = "CONFIG";
-
- graph = fts_parse_sql(
- fts_table, info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS SELECT value FROM \"%s\""
- " WHERE key = :name FOR UPDATE;\n"
- "BEGIN\n"
- ""
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- trx->op_info = "read FTS config value";
-
- error = fts_eval_sql(trx, graph);
-
- fts_que_graph_free_check_lock(fts_table, NULL, graph);
-
- if (UNIV_UNLIKELY(error == DB_SUCCESS)) {
- ulint int_value;
-
- int_value = strtoul((char*) value.f_str, NULL, 10);
-
- int_value += delta;
-
- ut_a(FTS_MAX_CONFIG_VALUE_LEN > FTS_MAX_INT_LEN);
-
- // FIXME: Get rid of snprintf
- value.f_len = snprintf(
- (char*) value.f_str, FTS_MAX_INT_LEN, "%lu", int_value);
-
- fts_config_set_value(trx, fts_table, name, &value);
- }
-
- if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while incrementing %s.\n", ut_strerr(error), name);
- }
-
- ut_free(value.f_str);
-
- return(error);
-}
-
-/******************************************************************//**
-Increment the per index value in the config table for column name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_increment_index_value(
-/*=============================*/
- trx_t* trx, /*!< transaction */
- dict_index_t* index, /*!< in: FTS index */
- const char* param, /*!< in: increment config value
- for this parameter name */
- ulint delta) /*!< in: increment by this
- much */
-{
- char* name;
- dberr_t error;
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE,
- index->table);
-
- /* We are responsible for free'ing name. */
- name = fts_config_create_index_param_name(param, index);
-
- error = fts_config_increment_value(trx, &fts_table, name, delta);
-
- ut_free(name);
-
- return(error);
-}
-
diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc
deleted file mode 100644
index e1a95bcd427..00000000000
--- a/storage/xtradb/fts/fts0fts.cc
+++ /dev/null
@@ -1,7711 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation. All Rights reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file fts/fts0fts.cc
-Full Text Search interface
-***********************************************************************/
-
-#include "trx0roll.h"
-#include "row0mysql.h"
-#include "row0upd.h"
-#include "dict0types.h"
-#include "row0sel.h"
-
-#include "fts0fts.h"
-#include "fts0priv.h"
-#include "fts0types.h"
-
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#include "dict0priv.h"
-#include "dict0stats.h"
-#include "btr0pcur.h"
-#include <vector>
-
-#include "ha_prototypes.h"
-
-#define FTS_MAX_ID_LEN 32
-
-/** Column name from the FTS config table */
-#define FTS_MAX_CACHE_SIZE_IN_MB "cache_size_in_mb"
-
-/** Verify if a aux table name is a obsolete table
-by looking up the key word in the obsolete table names */
-#define FTS_IS_OBSOLETE_AUX_TABLE(table_name) \
- (strstr((table_name), "DOC_ID") != NULL \
- || strstr((table_name), "ADDED") != NULL \
- || strstr((table_name), "STOPWORDS") != NULL)
-
-/** This is maximum FTS cache for each table and would be
-a configurable variable */
-UNIV_INTERN ulong fts_max_cache_size;
-
-/** Whether the total memory used for FTS cache is exhausted, and we will
-need a sync to free some memory */
-UNIV_INTERN bool fts_need_sync = false;
-
-/** Variable specifying the total memory allocated for FTS cache */
-UNIV_INTERN ulong fts_max_total_cache_size;
-
-/** This is FTS result cache limit for each query and would be
-a configurable variable */
-UNIV_INTERN ulong fts_result_cache_limit;
-
-/** Variable specifying the maximum FTS max token size */
-UNIV_INTERN ulong fts_max_token_size;
-
-/** Variable specifying the minimum FTS max token size */
-UNIV_INTERN ulong fts_min_token_size;
-
-
-// FIXME: testing
-ib_time_t elapsed_time = 0;
-ulint n_nodes = 0;
-
-/** Error condition reported by fts_utf8_decode() */
-const ulint UTF8_ERROR = 0xFFFFFFFF;
-
-#ifdef FTS_CACHE_SIZE_DEBUG
-/** The cache size permissible lower limit (1K) */
-static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
-
-/** The cache size permissible upper limit (1G) */
-static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
-#endif /* FTS_CACHE_SIZE_DEBUG */
-
-/** Time to sleep after DEADLOCK error before retrying operation. */
-static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
-
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t fts_cache_rw_lock_key;
-UNIV_INTERN mysql_pfs_key_t fts_cache_init_rw_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t fts_delete_mutex_key;
-UNIV_INTERN mysql_pfs_key_t fts_optimize_mutex_key;
-UNIV_INTERN mysql_pfs_key_t fts_bg_threads_mutex_key;
-UNIV_INTERN mysql_pfs_key_t fts_doc_id_mutex_key;
-UNIV_INTERN mysql_pfs_key_t fts_pll_tokenize_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/** variable to record innodb_fts_internal_tbl_name for information
-schema table INNODB_FTS_INSERTED etc. */
-UNIV_INTERN char* fts_internal_tbl_name = NULL;
-UNIV_INTERN char* fts_internal_tbl_name2 = NULL;
-
-/** InnoDB default stopword list:
-There are different versions of stopwords, the stop words listed
-below comes from "Google Stopword" list. Reference:
-http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
-The final version of InnoDB default stopword list is still pending
-for decision */
-const char *fts_default_stopword[] =
-{
- "a",
- "about",
- "an",
- "are",
- "as",
- "at",
- "be",
- "by",
- "com",
- "de",
- "en",
- "for",
- "from",
- "how",
- "i",
- "in",
- "is",
- "it",
- "la",
- "of",
- "on",
- "or",
- "that",
- "the",
- "this",
- "to",
- "was",
- "what",
- "when",
- "where",
- "who",
- "will",
- "with",
- "und",
- "the",
- "www",
- NULL
-};
-
-/** For storing table info when checking for orphaned tables. */
-struct fts_aux_table_t {
- table_id_t id; /*!< Table id */
- table_id_t parent_id; /*!< Parent table id */
- table_id_t index_id; /*!< Table FT index id */
- char* name; /*!< Name of the table */
-};
-
-/** SQL statements for creating the ancillary common FTS tables. */
-static const char* fts_create_common_tables_sql = {
- "BEGIN\n"
- ""
- "CREATE TABLE \"%s_DELETED\" (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DELETED\"(doc_id);\n"
- ""
- "CREATE TABLE \"%s_DELETED_CACHE\" (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND "
- "ON \"%s_DELETED_CACHE\"(doc_id);\n"
- ""
- "CREATE TABLE \"%s_BEING_DELETED\" (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND "
- "ON \"%s_BEING_DELETED\"(doc_id);\n"
- ""
- "CREATE TABLE \"%s_BEING_DELETED_CACHE\" (\n"
- " doc_id BIGINT UNSIGNED\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND "
- "ON \"%s_BEING_DELETED_CACHE\"(doc_id);\n"
- ""
- "CREATE TABLE \"%s_CONFIG\" (\n"
- " key CHAR(50),\n"
- " value CHAR(200) NOT NULL\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_CONFIG\"(key);\n"
-};
-
-#ifdef FTS_DOC_STATS_DEBUG
-/** Template for creating the FTS auxiliary index specific tables. This is
-mainly designed for the statistics work in the future */
-static const char* fts_create_index_tables_sql = {
- "BEGIN\n"
- ""
- "CREATE TABLE \"%s_DOC_ID\" (\n"
- " doc_id BIGINT UNSIGNED,\n"
- " word_count INTEGER UNSIGNED NOT NULL\n"
- ") COMPACT;\n"
- "CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DOC_ID\"(doc_id);\n"
-};
-#endif
-
-/** Template for creating the ancillary FTS tables word index tables. */
-static const char* fts_create_index_sql = {
- "BEGIN\n"
- ""
- "CREATE UNIQUE CLUSTERED INDEX FTS_INDEX_TABLE_IND "
- "ON \"%s\"(word, first_doc_id);\n"
-};
-
-/** FTS auxiliary table suffixes that are common to all FT indexes. */
-static const char* fts_common_tables[] = {
- "BEING_DELETED",
- "BEING_DELETED_CACHE",
- "CONFIG",
- "DELETED",
- "DELETED_CACHE",
- NULL
-};
-
-/** FTS auxiliary INDEX split intervals. */
-const fts_index_selector_t fts_index_selector[] = {
- { 9, "INDEX_1" },
- { 65, "INDEX_2" },
- { 70, "INDEX_3" },
- { 75, "INDEX_4" },
- { 80, "INDEX_5" },
- { 85, "INDEX_6" },
- { 0 , NULL }
-};
-
-/** Default config values for FTS indexes on a table. */
-static const char* fts_config_table_insert_values_sql =
- "BEGIN\n"
- "\n"
- "INSERT INTO \"%s\" VALUES('"
- FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
- ""
- "INSERT INTO \"%s\" VALUES('"
- FTS_OPTIMIZE_LIMIT_IN_SECS "', '180');\n"
- ""
- "INSERT INTO \"%s\" VALUES ('"
- FTS_SYNCED_DOC_ID "', '0');\n"
- ""
- "INSERT INTO \"%s\" VALUES ('"
- FTS_TOTAL_DELETED_COUNT "', '0');\n"
- "" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
- "INSERT INTO \"%s\" VALUES ('"
- FTS_TABLE_STATE "', '0');\n";
-
-/** Run SYNC on the table, i.e., write out data from the cache to the
-FTS auxiliary INDEX table and clear the cache at the end.
-@param[in,out] sync sync state
-@param[in] unlock_cache whether unlock cache lock when write node
-@param[in] wait whether wait when a sync is in progress
-@param[in] has_dict whether has dict operation lock
-@return DB_SUCCESS if all OK */
-static
-dberr_t
-fts_sync(
- fts_sync_t* sync,
- bool unlock_cache,
- bool wait,
- bool has_dict);
-
-/****************************************************************//**
-Release all resources help by the words rb tree e.g., the node ilist. */
-static
-void
-fts_words_free(
-/*===========*/
- ib_rbt_t* words) /*!< in: rb tree of words */
- MY_ATTRIBUTE((nonnull));
-#ifdef FTS_CACHE_SIZE_DEBUG
-/****************************************************************//**
-Read the max cache size parameter from the config table. */
-static
-void
-fts_update_max_cache_size(
-/*======================*/
- fts_sync_t* sync); /*!< in: sync state */
-#endif
-
-/*********************************************************************//**
-This function fetches the document just inserted right before
-we commit the transaction, and tokenize the inserted text data
-and insert into FTS auxiliary table and its cache.
-@return TRUE if successful */
-static
-ulint
-fts_add_doc_by_id(
-/*==============*/
- fts_trx_table_t*ftt, /*!< in: FTS trx table */
- doc_id_t doc_id, /*!< in: doc id */
- ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)));
- /*!< in: affected fts indexes */
-#ifdef FTS_DOC_STATS_DEBUG
-/****************************************************************//**
-Check whether a particular word (term) exists in the FTS index.
-@return DB_SUCCESS if all went fine */
-static
-dberr_t
-fts_is_word_in_index(
-/*=================*/
- trx_t* trx, /*!< in: FTS query state */
- que_t** graph, /*!< out: Query graph */
- fts_table_t* fts_table, /*!< in: table instance */
- const fts_string_t* word, /*!< in: the word to check */
- ibool* found) /*!< out: TRUE if exists */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* FTS_DOC_STATS_DEBUG */
-
-/******************************************************************//**
-Update the last document id. This function could create a new
-transaction to update the last document id.
-@return DB_SUCCESS if OK */
-static
-dberr_t
-fts_update_sync_doc_id(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- const char* table_name, /*!< in: table name, or NULL */
- doc_id_t doc_id, /*!< in: last document id */
- trx_t* trx) /*!< in: update trx, or NULL */
- MY_ATTRIBUTE((nonnull(1)));
-
-/****************************************************************//**
-This function loads the default InnoDB stopword list */
-static
-void
-fts_load_default_stopword(
-/*======================*/
- fts_stopword_t* stopword_info) /*!< in: stopword info */
-{
- fts_string_t str;
- mem_heap_t* heap;
- ib_alloc_t* allocator;
- ib_rbt_t* stop_words;
-
- allocator = stopword_info->heap;
- heap = static_cast<mem_heap_t*>(allocator->arg);
-
- if (!stopword_info->cached_stopword) {
- /* For default stopword, we always use fts_utf8_string_cmp() */
- stopword_info->cached_stopword = rbt_create(
- sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
- }
-
- stop_words = stopword_info->cached_stopword;
-
- str.f_n_char = 0;
-
- for (ulint i = 0; fts_default_stopword[i]; ++i) {
- char* word;
- fts_tokenizer_word_t new_word;
-
- /* We are going to duplicate the value below. */
- word = const_cast<char*>(fts_default_stopword[i]);
-
- new_word.nodes = ib_vector_create(
- allocator, sizeof(fts_node_t), 4);
-
- str.f_len = ut_strlen(word);
- str.f_str = reinterpret_cast<byte*>(word);
-
- fts_utf8_string_dup(&new_word.text, &str, heap);
-
- rbt_insert(stop_words, &new_word, &new_word);
- }
-
- stopword_info->status = STOPWORD_FROM_DEFAULT;
-}
-
-/****************************************************************//**
-Callback function to read a single stopword value.
-@return Always return TRUE */
-static
-ibool
-fts_read_stopword(
-/*==============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: pointer to ib_vector_t */
-{
- ib_alloc_t* allocator;
- fts_stopword_t* stopword_info;
- sel_node_t* sel_node;
- que_node_t* exp;
- ib_rbt_t* stop_words;
- dfield_t* dfield;
- fts_string_t str;
- mem_heap_t* heap;
- ib_rbt_bound_t parent;
-
- sel_node = static_cast<sel_node_t*>(row);
- stopword_info = static_cast<fts_stopword_t*>(user_arg);
-
- stop_words = stopword_info->cached_stopword;
- allocator = static_cast<ib_alloc_t*>(stopword_info->heap);
- heap = static_cast<mem_heap_t*>(allocator->arg);
-
- exp = sel_node->select_list;
-
- /* We only need to read the first column */
- dfield = que_node_get_val(exp);
-
- str.f_n_char = 0;
- str.f_str = static_cast<byte*>(dfield_get_data(dfield));
- str.f_len = dfield_get_len(dfield);
-
- /* Only create new node if it is a value not already existed */
- if (str.f_len != UNIV_SQL_NULL
- && rbt_search(stop_words, &parent, &str) != 0) {
-
- fts_tokenizer_word_t new_word;
-
- new_word.nodes = ib_vector_create(
- allocator, sizeof(fts_node_t), 4);
-
- new_word.text.f_str = static_cast<byte*>(
- mem_heap_alloc(heap, str.f_len + 1));
-
- memcpy(new_word.text.f_str, str.f_str, str.f_len);
-
- new_word.text.f_n_char = 0;
- new_word.text.f_len = str.f_len;
- new_word.text.f_str[str.f_len] = 0;
-
- rbt_insert(stop_words, &new_word, &new_word);
- }
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Load user defined stopword from designated user table
-@return TRUE if load operation is successful */
-static
-ibool
-fts_load_user_stopword(
-/*===================*/
- fts_t* fts, /*!< in: FTS struct */
- const char* stopword_table_name, /*!< in: Stopword table
- name */
- fts_stopword_t* stopword_info) /*!< in: Stopword info */
-{
- pars_info_t* info;
- que_t* graph;
- dberr_t error = DB_SUCCESS;
- ibool ret = TRUE;
- trx_t* trx;
- ibool has_lock = fts->fts_status & TABLE_DICT_LOCKED;
-
- trx = trx_allocate_for_background();
- trx->op_info = "Load user stopword table into FTS cache";
-
- if (!has_lock) {
- mutex_enter(&dict_sys->mutex);
- }
-
- /* Validate the user table existence and in the right
- format */
- stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
- if (!stopword_info->charset) {
- ret = FALSE;
- goto cleanup;
- } else if (!stopword_info->cached_stopword) {
- /* Create the stopword RB tree with the stopword column
- charset. All comparison will use this charset */
- stopword_info->cached_stopword = rbt_create_arg_cmp(
- sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
- (void*)stopword_info->charset);
-
- }
-
- info = pars_info_create();
-
- pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
-
- pars_info_bind_function(info, "my_func", fts_read_stopword,
- stopword_info);
-
- graph = fts_parse_sql_no_dict_lock(
- NULL,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT value "
- " FROM $table_stopword;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- for (;;) {
- error = fts_eval_sql(trx, graph);
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
- stopword_info->status = STOPWORD_USER_TABLE;
- break;
- } else {
-
- fts_sql_rollback(trx);
-
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading user stopword table. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error '%s' "
- "while reading user stopword table.\n",
- ut_strerr(error));
- ret = FALSE;
- break;
- }
- }
- }
-
- que_graph_free(graph);
-
-cleanup:
- if (!has_lock) {
- mutex_exit(&dict_sys->mutex);
- }
-
- trx_free_for_background(trx);
- return(ret);
-}
-
-/******************************************************************//**
-Initialize the index cache. */
-static
-void
-fts_index_cache_init(
-/*=================*/
- ib_alloc_t* allocator, /*!< in: the allocator to use */
- fts_index_cache_t* index_cache) /*!< in: index cache */
-{
- ulint i;
-
- ut_a(index_cache->words == NULL);
-
- index_cache->words = rbt_create_arg_cmp(
- sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
- (void*)index_cache->charset);
-
- ut_a(index_cache->doc_stats == NULL);
-
- index_cache->doc_stats = ib_vector_create(
- allocator, sizeof(fts_doc_stats_t), 4);
-
- for (i = 0; fts_index_selector[i].value; ++i) {
- ut_a(index_cache->ins_graph[i] == NULL);
- ut_a(index_cache->sel_graph[i] == NULL);
- }
-}
-
-/*********************************************************************//**
-Initialize FTS cache. */
-UNIV_INTERN
-void
-fts_cache_init(
-/*===========*/
- fts_cache_t* cache) /*!< in: cache to initialize */
-{
- ulint i;
-
- /* Just to make sure */
- ut_a(cache->sync_heap->arg == NULL);
-
- cache->sync_heap->arg = mem_heap_create(1024);
-
- cache->total_size = 0;
-
- mutex_enter((ib_mutex_t*) &cache->deleted_lock);
- cache->deleted_doc_ids = ib_vector_create(
- cache->sync_heap, sizeof(fts_update_t), 4);
- mutex_exit((ib_mutex_t*) &cache->deleted_lock);
-
- /* Reset the cache data for all the FTS indexes. */
- for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(cache->indexes, i));
-
- fts_index_cache_init(cache->sync_heap, index_cache);
- }
-}
-
-/****************************************************************//**
-Create a FTS cache. */
-UNIV_INTERN
-fts_cache_t*
-fts_cache_create(
-/*=============*/
- dict_table_t* table) /*!< in: table owns the FTS cache */
-{
- mem_heap_t* heap;
- fts_cache_t* cache;
-
- heap = static_cast<mem_heap_t*>(mem_heap_create(512));
-
- cache = static_cast<fts_cache_t*>(
- mem_heap_zalloc(heap, sizeof(*cache)));
-
- cache->cache_heap = heap;
-
- rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
-
- rw_lock_create(
- fts_cache_init_rw_lock_key, &cache->init_lock,
- SYNC_FTS_CACHE_INIT);
-
- mutex_create(
- fts_delete_mutex_key, &cache->deleted_lock, SYNC_FTS_OPTIMIZE);
-
- mutex_create(
- fts_optimize_mutex_key, &cache->optimize_lock,
- SYNC_FTS_OPTIMIZE);
-
- mutex_create(
- fts_doc_id_mutex_key, &cache->doc_id_lock, SYNC_FTS_OPTIMIZE);
-
- /* This is the heap used to create the cache itself. */
- cache->self_heap = ib_heap_allocator_create(heap);
-
- /* This is a transient heap, used for storing sync data. */
- cache->sync_heap = ib_heap_allocator_create(heap);
- cache->sync_heap->arg = NULL;
-
- fts_need_sync = false;
-
- cache->sync = static_cast<fts_sync_t*>(
- mem_heap_zalloc(heap, sizeof(fts_sync_t)));
-
- cache->sync->table = table;
- cache->sync->event = os_event_create();
-
- /* Create the index cache vector that will hold the inverted indexes. */
- cache->indexes = ib_vector_create(
- cache->self_heap, sizeof(fts_index_cache_t), 2);
-
- fts_cache_init(cache);
-
- cache->stopword_info.cached_stopword = NULL;
- cache->stopword_info.charset = NULL;
-
- cache->stopword_info.heap = cache->self_heap;
-
- cache->stopword_info.status = STOPWORD_NOT_INIT;
-
- return(cache);
-}
-
-/*******************************************************************//**
-Add a newly create index into FTS cache */
-UNIV_INTERN
-void
-fts_add_index(
-/*==========*/
- dict_index_t* index, /*!< FTS index to be added */
- dict_table_t* table) /*!< table */
-{
- fts_t* fts = table->fts;
- fts_cache_t* cache;
- fts_index_cache_t* index_cache;
-
- ut_ad(fts);
- cache = table->fts->cache;
-
- rw_lock_x_lock(&cache->init_lock);
-
- ib_vector_push(fts->indexes, &index);
-
- index_cache = fts_find_index_cache(cache, index);
-
- if (!index_cache) {
- /* Add new index cache structure */
- index_cache = fts_cache_index_cache_create(table, index);
- }
-
- rw_lock_x_unlock(&cache->init_lock);
-}
-
-/*******************************************************************//**
-recalibrate get_doc structure after index_cache in cache->indexes changed */
-static
-void
-fts_reset_get_doc(
-/*==============*/
- fts_cache_t* cache) /*!< in: FTS index cache */
-{
- fts_get_doc_t* get_doc;
- ulint i;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
- ib_vector_reset(cache->get_docs);
-
- for (i = 0; i < ib_vector_size(cache->indexes); i++) {
- fts_index_cache_t* ind_cache;
-
- ind_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(cache->indexes, i));
-
- get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_push(cache->get_docs, NULL));
-
- memset(get_doc, 0x0, sizeof(*get_doc));
-
- get_doc->index_cache = ind_cache;
- }
-
- ut_ad(ib_vector_size(cache->get_docs)
- == ib_vector_size(cache->indexes));
-}
-
-/*******************************************************************//**
-Check an index is in the table->indexes list
-@return TRUE if it exists */
-static
-ibool
-fts_in_dict_index(
-/*==============*/
- dict_table_t* table, /*!< in: Table */
- dict_index_t* index_check) /*!< in: index to be checked */
-{
- dict_index_t* index;
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- if (index == index_check) {
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Check an index is in the fts->cache->indexes list
-@return TRUE if it exists */
-static
-ibool
-fts_in_index_cache(
-/*===============*/
- dict_table_t* table, /*!< in: Table */
- dict_index_t* index) /*!< in: index to be checked */
-{
- ulint i;
-
- for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(table->fts->cache->indexes, i));
-
- if (index_cache->index == index) {
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Check indexes in the fts->indexes is also present in index cache and
-table->indexes list
-@return TRUE if all indexes match */
-UNIV_INTERN
-ibool
-fts_check_cached_index(
-/*===================*/
- dict_table_t* table) /*!< in: Table where indexes are dropped */
-{
- ulint i;
-
- if (!table->fts || !table->fts->cache) {
- return(TRUE);
- }
-
- ut_a(ib_vector_size(table->fts->indexes)
- == ib_vector_size(table->fts->cache->indexes));
-
- for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
- dict_index_t* index;
-
- index = static_cast<dict_index_t*>(
- ib_vector_getp(table->fts->indexes, i));
-
- if (!fts_in_index_cache(table, index)) {
- return(FALSE);
- }
-
- if (!fts_in_dict_index(table, index)) {
- return(FALSE);
- }
- }
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Drop auxiliary tables related to an FTS index
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-fts_drop_index(
-/*===========*/
- dict_table_t* table, /*!< in: Table where indexes are dropped */
- dict_index_t* index, /*!< in: Index to be dropped */
- trx_t* trx) /*!< in: Transaction for the drop */
-{
- ib_vector_t* indexes = table->fts->indexes;
- dberr_t err = DB_SUCCESS;
-
- ut_a(indexes);
-
- if ((ib_vector_size(indexes) == 1
- && (index == static_cast<dict_index_t*>(
- ib_vector_getp(table->fts->indexes, 0))))
- || ib_vector_is_empty(indexes)) {
- doc_id_t current_doc_id;
- doc_id_t first_doc_id;
-
- /* If we are dropping the only FTS index of the table,
- remove it from optimize thread */
- fts_optimize_remove_table(table);
-
- DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
-
- /* If Doc ID column is not added internally by FTS index,
- we can drop all FTS auxiliary tables. Otherwise, we will
- need to keep some common table such as CONFIG table, so
- as to keep track of incrementing Doc IDs */
- if (!DICT_TF2_FLAG_IS_SET(
- table, DICT_TF2_FTS_HAS_DOC_ID)) {
-
- err = fts_drop_tables(trx, table);
-
- err = fts_drop_index_tables(trx, index);
-
- fts_free(table);
-
- return(err);
- }
-
- current_doc_id = table->fts->cache->next_doc_id;
- first_doc_id = table->fts->cache->first_doc_id;
- fts_cache_clear(table->fts->cache);
- fts_cache_destroy(table->fts->cache);
- table->fts->cache = fts_cache_create(table);
- table->fts->cache->next_doc_id = current_doc_id;
- table->fts->cache->first_doc_id = first_doc_id;
- } else {
- fts_cache_t* cache = table->fts->cache;
- fts_index_cache_t* index_cache;
-
- rw_lock_x_lock(&cache->init_lock);
-
- index_cache = fts_find_index_cache(cache, index);
-
- if (index_cache != NULL) {
- if (index_cache->words) {
- fts_words_free(index_cache->words);
- rbt_free(index_cache->words);
- }
-
- ib_vector_remove(cache->indexes, *(void**) index_cache);
- }
-
- if (cache->get_docs) {
- fts_reset_get_doc(cache);
- }
-
- rw_lock_x_unlock(&cache->init_lock);
- }
-
- err = fts_drop_index_tables(trx, index);
-
- ib_vector_remove(indexes, (const void*) index);
-
- return(err);
-}
-
-/****************************************************************//**
-Free the query graph but check whether dict_sys->mutex is already
-held */
-UNIV_INTERN
-void
-fts_que_graph_free_check_lock(
-/*==========================*/
- fts_table_t* fts_table, /*!< in: FTS table */
- const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
- que_t* graph) /*!< in: query graph */
-{
- ibool has_dict = FALSE;
-
- if (fts_table && fts_table->table) {
- ut_ad(fts_table->table->fts);
-
- has_dict = fts_table->table->fts->fts_status
- & TABLE_DICT_LOCKED;
- } else if (index_cache) {
- ut_ad(index_cache->index->table->fts);
-
- has_dict = index_cache->index->table->fts->fts_status
- & TABLE_DICT_LOCKED;
- }
-
- if (!has_dict) {
- mutex_enter(&dict_sys->mutex);
- }
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- que_graph_free(graph);
-
- if (!has_dict) {
- mutex_exit(&dict_sys->mutex);
- }
-}
-
-/****************************************************************//**
-Create an FTS index cache. */
-UNIV_INTERN
-CHARSET_INFO*
-fts_index_get_charset(
-/*==================*/
- dict_index_t* index) /*!< in: FTS index */
-{
- CHARSET_INFO* charset = NULL;
- dict_field_t* field;
- ulint prtype;
-
- field = dict_index_get_nth_field(index, 0);
- prtype = field->col->prtype;
-
- charset = innobase_get_fts_charset(
- (int) (prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
-
-#ifdef FTS_DEBUG
- /* Set up charset info for this index. Please note all
- field of the FTS index should have the same charset */
- for (i = 1; i < index->n_fields; i++) {
- CHARSET_INFO* fld_charset;
-
- field = dict_index_get_nth_field(index, i);
- prtype = field->col->prtype;
-
- fld_charset = innobase_get_fts_charset(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
-
- /* All FTS columns should have the same charset */
- if (charset) {
- ut_a(charset == fld_charset);
- } else {
- charset = fld_charset;
- }
- }
-#endif
-
- return(charset);
-
-}
-/****************************************************************//**
-Create an FTS index cache.
-@return Index Cache */
-UNIV_INTERN
-fts_index_cache_t*
-fts_cache_index_cache_create(
-/*=========================*/
- dict_table_t* table, /*!< in: table with FTS index */
- dict_index_t* index) /*!< in: FTS index */
-{
- ulint n_bytes;
- fts_index_cache_t* index_cache;
- fts_cache_t* cache = table->fts->cache;
-
- ut_a(cache != NULL);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
-
- /* Must not already exist in the cache vector. */
- ut_a(fts_find_index_cache(cache, index) == NULL);
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_push(cache->indexes, NULL));
-
- memset(index_cache, 0x0, sizeof(*index_cache));
-
- index_cache->index = index;
-
- index_cache->charset = fts_index_get_charset(index);
-
- n_bytes = sizeof(que_t*) * sizeof(fts_index_selector);
-
- index_cache->ins_graph = static_cast<que_t**>(
- mem_heap_zalloc(static_cast<mem_heap_t*>(
- cache->self_heap->arg), n_bytes));
-
- index_cache->sel_graph = static_cast<que_t**>(
- mem_heap_zalloc(static_cast<mem_heap_t*>(
- cache->self_heap->arg), n_bytes));
-
- fts_index_cache_init(cache->sync_heap, index_cache);
-
- if (cache->get_docs) {
- fts_reset_get_doc(cache);
- }
-
- return(index_cache);
-}
-
-/****************************************************************//**
-Release all resources help by the words rb tree e.g., the node ilist. */
-static
-void
-fts_words_free(
-/*===========*/
- ib_rbt_t* words) /*!< in: rb tree of words */
-{
- const ib_rbt_node_t* rbt_node;
-
- /* Free the resources held by a word. */
- for (rbt_node = rbt_first(words);
- rbt_node != NULL;
- rbt_node = rbt_first(words)) {
-
- ulint i;
- fts_tokenizer_word_t* word;
-
- word = rbt_value(fts_tokenizer_word_t, rbt_node);
-
- /* Free the ilists of this word. */
- for (i = 0; i < ib_vector_size(word->nodes); ++i) {
-
- fts_node_t* fts_node = static_cast<fts_node_t*>(
- ib_vector_get(word->nodes, i));
-
- ut_free(fts_node->ilist);
- fts_node->ilist = NULL;
- }
-
- /* NOTE: We are responsible for free'ing the node */
- ut_free(rbt_remove_node(words, rbt_node));
- }
-}
-
-/** Clear cache.
-@param[in,out] cache fts cache */
-UNIV_INTERN
-void
-fts_cache_clear(
- fts_cache_t* cache)
-{
- ulint i;
-
- for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
- ulint j;
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(cache->indexes, i));
-
- fts_words_free(index_cache->words);
-
- rbt_free(index_cache->words);
-
- index_cache->words = NULL;
-
- for (j = 0; fts_index_selector[j].value; ++j) {
-
- if (index_cache->ins_graph[j] != NULL) {
-
- fts_que_graph_free_check_lock(
- NULL, index_cache,
- index_cache->ins_graph[j]);
-
- index_cache->ins_graph[j] = NULL;
- }
-
- if (index_cache->sel_graph[j] != NULL) {
-
- fts_que_graph_free_check_lock(
- NULL, index_cache,
- index_cache->sel_graph[j]);
-
- index_cache->sel_graph[j] = NULL;
- }
- }
-
- index_cache->doc_stats = NULL;
- }
-
- mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
- cache->sync_heap->arg = NULL;
-
- cache->total_size = 0;
-
- mutex_enter((ib_mutex_t*) &cache->deleted_lock);
- cache->deleted_doc_ids = NULL;
- mutex_exit((ib_mutex_t*) &cache->deleted_lock);
-}
-
-/*********************************************************************//**
-Search the index specific cache for a particular FTS index.
-@return the index cache else NULL */
-UNIV_INLINE
-fts_index_cache_t*
-fts_get_index_cache(
-/*================*/
- fts_cache_t* cache, /*!< in: cache to search */
- const dict_index_t* index) /*!< in: index to search for */
-{
- ulint i;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX)
- || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
-#endif
-
- for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(cache->indexes, i));
-
- if (index_cache->index == index) {
-
- return(index_cache);
- }
- }
-
- return(NULL);
-}
-
-#ifdef FTS_DEBUG
-/*********************************************************************//**
-Search the index cache for a get_doc structure.
-@return the fts_get_doc_t item else NULL */
-static
-fts_get_doc_t*
-fts_get_index_get_doc(
-/*==================*/
- fts_cache_t* cache, /*!< in: cache to search */
- const dict_index_t* index) /*!< in: index to search for */
-{
- ulint i;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
-#endif
-
- for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
- fts_get_doc_t* get_doc;
-
- get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_get(cache->get_docs, i));
-
- if (get_doc->index_cache->index == index) {
-
- return(get_doc);
- }
- }
-
- return(NULL);
-}
-#endif
-
-/**********************************************************************//**
-Free the FTS cache. */
-UNIV_INTERN
-void
-fts_cache_destroy(
-/*==============*/
- fts_cache_t* cache) /*!< in: cache*/
-{
- rw_lock_free(&cache->lock);
- rw_lock_free(&cache->init_lock);
- mutex_free(&cache->optimize_lock);
- mutex_free(&cache->deleted_lock);
- mutex_free(&cache->doc_id_lock);
- os_event_free(cache->sync->event);
-
- if (cache->stopword_info.cached_stopword) {
- rbt_free(cache->stopword_info.cached_stopword);
- }
-
- if (cache->sync_heap->arg) {
- mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
- }
-
- mem_heap_free(cache->cache_heap);
-}
-
-/**********************************************************************//**
-Find an existing word, or if not found, create one and return it.
-@return specified word token */
-static
-fts_tokenizer_word_t*
-fts_tokenizer_word_get(
-/*===================*/
- fts_cache_t* cache, /*!< in: cache */
- fts_index_cache_t*
- index_cache, /*!< in: index cache */
- fts_string_t* text) /*!< in: node text */
-{
- fts_tokenizer_word_t* word;
- ib_rbt_bound_t parent;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
-#endif
-
- /* If it is a stopword, do not index it */
- if (cache->stopword_info.cached_stopword != NULL
- && rbt_search(cache->stopword_info.cached_stopword,
- &parent, text) == 0) {
-
- return(NULL);
- }
-
- /* Check if we found a match, if not then add word to tree. */
- if (rbt_search(index_cache->words, &parent, text) != 0) {
- mem_heap_t* heap;
- fts_tokenizer_word_t new_word;
-
- heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
-
- new_word.nodes = ib_vector_create(
- cache->sync_heap, sizeof(fts_node_t), 4);
-
- fts_utf8_string_dup(&new_word.text, text, heap);
-
- parent.last = rbt_add_node(
- index_cache->words, &parent, &new_word);
-
- /* Take into account the RB tree memory use and the vector. */
- cache->total_size += sizeof(new_word)
- + sizeof(ib_rbt_node_t)
- + text->f_len
- + (sizeof(fts_node_t) * 4)
- + sizeof(*new_word.nodes);
-
- ut_ad(rbt_validate(index_cache->words));
- }
-
- word = rbt_value(fts_tokenizer_word_t, parent.last);
-
- return(word);
-}
-
-/**********************************************************************//**
-Add the given doc_id/word positions to the given node's ilist. */
-UNIV_INTERN
-void
-fts_cache_node_add_positions(
-/*=========================*/
- fts_cache_t* cache, /*!< in: cache */
- fts_node_t* node, /*!< in: word node */
- doc_id_t doc_id, /*!< in: doc id */
- ib_vector_t* positions) /*!< in: fts_token_t::positions */
-{
- ulint i;
- byte* ptr;
- byte* ilist;
- ulint enc_len;
- ulint last_pos;
- byte* ptr_start;
- ulint doc_id_delta;
-
-#ifdef UNIV_SYNC_DEBUG
- if (cache) {
- ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
- }
-#endif
- ut_ad(doc_id >= node->last_doc_id);
-
- /* Calculate the space required to store the ilist. */
- doc_id_delta = (ulint)(doc_id - node->last_doc_id);
- enc_len = fts_get_encoded_len(doc_id_delta);
-
- last_pos = 0;
- for (i = 0; i < ib_vector_size(positions); i++) {
- ulint pos = *(static_cast<ulint*>(
- ib_vector_get(positions, i)));
-
- ut_ad(last_pos == 0 || pos > last_pos);
-
- enc_len += fts_get_encoded_len(pos - last_pos);
- last_pos = pos;
- }
-
- /* The 0x00 byte at the end of the token positions list. */
- enc_len++;
-
- if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
- /* No need to allocate more space, we can fit in the new
- data at the end of the old one. */
- ilist = NULL;
- ptr = node->ilist + node->ilist_size;
- } else {
- ulint new_size = node->ilist_size + enc_len;
-
- /* Over-reserve space by a fixed size for small lengths and
- by 20% for lengths >= 48 bytes. */
- if (new_size < 16) {
- new_size = 16;
- } else if (new_size < 32) {
- new_size = 32;
- } else if (new_size < 48) {
- new_size = 48;
- } else {
- new_size = (ulint)(1.2 * new_size);
- }
-
- ilist = static_cast<byte*>(ut_malloc(new_size));
- ptr = ilist + node->ilist_size;
-
- node->ilist_size_alloc = new_size;
- }
-
- ptr_start = ptr;
-
- /* Encode the new fragment. */
- ptr += fts_encode_int(doc_id_delta, ptr);
-
- last_pos = 0;
- for (i = 0; i < ib_vector_size(positions); i++) {
- ulint pos = *(static_cast<ulint*>(
- ib_vector_get(positions, i)));
-
- ptr += fts_encode_int(pos - last_pos, ptr);
- last_pos = pos;
- }
-
- *ptr++ = 0;
-
- ut_a(enc_len == (ulint)(ptr - ptr_start));
-
- if (ilist) {
- /* Copy old ilist to the start of the new one and switch the
- new one into place in the node. */
- if (node->ilist_size > 0) {
- memcpy(ilist, node->ilist, node->ilist_size);
- ut_free(node->ilist);
- }
-
- node->ilist = ilist;
- }
-
- node->ilist_size += enc_len;
-
- if (cache) {
- cache->total_size += enc_len;
- }
-
- if (node->first_doc_id == FTS_NULL_DOC_ID) {
- node->first_doc_id = doc_id;
- }
-
- node->last_doc_id = doc_id;
- ++node->doc_count;
-}
-
-/**********************************************************************//**
-Add document to the cache. */
-static
-void
-fts_cache_add_doc(
-/*==============*/
- fts_cache_t* cache, /*!< in: cache */
- fts_index_cache_t*
- index_cache, /*!< in: index cache */
- doc_id_t doc_id, /*!< in: doc id to add */
- ib_rbt_t* tokens) /*!< in: document tokens */
-{
- const ib_rbt_node_t* node;
- ulint n_words;
- fts_doc_stats_t* doc_stats;
-
- if (!tokens) {
- return;
- }
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
-#endif
-
- n_words = rbt_size(tokens);
-
- for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
-
- fts_tokenizer_word_t* word;
- fts_node_t* fts_node = NULL;
- fts_token_t* token = rbt_value(fts_token_t, node);
-
- /* Find and/or add token to the cache. */
- word = fts_tokenizer_word_get(
- cache, index_cache, &token->text);
-
- if (!word) {
- ut_free(rbt_remove_node(tokens, node));
- continue;
- }
-
- if (ib_vector_size(word->nodes) > 0) {
- fts_node = static_cast<fts_node_t*>(
- ib_vector_last(word->nodes));
- }
-
- if (fts_node == NULL || fts_node->synced
- || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
- || doc_id < fts_node->last_doc_id) {
-
- fts_node = static_cast<fts_node_t*>(
- ib_vector_push(word->nodes, NULL));
-
- memset(fts_node, 0x0, sizeof(*fts_node));
-
- cache->total_size += sizeof(*fts_node);
- }
-
- fts_cache_node_add_positions(
- cache, fts_node, doc_id, token->positions);
-
- ut_free(rbt_remove_node(tokens, node));
- }
-
- ut_a(rbt_empty(tokens));
-
- /* Add to doc ids processed so far. */
- doc_stats = static_cast<fts_doc_stats_t*>(
- ib_vector_push(index_cache->doc_stats, NULL));
-
- doc_stats->doc_id = doc_id;
- doc_stats->word_count = n_words;
-
- /* Add the doc stats memory usage too. */
- cache->total_size += sizeof(*doc_stats);
-
- if (doc_id > cache->sync->max_doc_id) {
- cache->sync->max_doc_id = doc_id;
- }
-}
-
-/****************************************************************//**
-Drops a table. If the table can't be found we return a SUCCESS code.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_drop_table(
-/*===========*/
- trx_t* trx, /*!< in: transaction */
- const char* table_name) /*!< in: table to drop */
-{
- dict_table_t* table;
- dberr_t error = DB_SUCCESS;
-
- /* Check that the table exists in our data dictionary.
- Similar to regular drop table case, we will open table with
- DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
- table = dict_table_open_on_name(
- table_name, TRUE, FALSE,
- static_cast<dict_err_ignore_t>(
- DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
-
- if (table != 0) {
-
- dict_table_close(table, TRUE, FALSE);
-
- /* Pass nonatomic=false (dont allow data dict unlock),
- because the transaction may hold locks on SYS_* tables from
- previous calls to fts_drop_table(). */
- error = row_drop_table_for_mysql(table_name, trx, true, false);
-
- if (error != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to drop FTS index aux table %s: %s",
- table_name, ut_strerr(error));
- }
- } else {
- error = DB_FAIL;
- }
-
- return(error);
-}
-
-/****************************************************************//**
-Rename a single auxiliary table due to database name change.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_rename_one_aux_table(
-/*=====================*/
- const char* new_name, /*!< in: new parent tbl name */
- const char* fts_table_old_name, /*!< in: old aux tbl name */
- trx_t* trx) /*!< in: transaction */
-{
- char fts_table_new_name[MAX_TABLE_NAME_LEN];
- ulint new_db_name_len = dict_get_db_name_len(new_name);
- ulint old_db_name_len = dict_get_db_name_len(fts_table_old_name);
- ulint table_new_name_len = strlen(fts_table_old_name)
- + new_db_name_len - old_db_name_len;
-
- /* Check if the new and old database names are the same, if so,
- nothing to do */
- ut_ad((new_db_name_len != old_db_name_len)
- || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
-
- /* Get the database name from "new_name", and table name
- from the fts_table_old_name */
- strncpy(fts_table_new_name, new_name, new_db_name_len);
- strncpy(fts_table_new_name + new_db_name_len,
- strchr(fts_table_old_name, '/'),
- table_new_name_len - new_db_name_len);
- fts_table_new_name[table_new_name_len] = 0;
-
- return(row_rename_table_for_mysql(
- fts_table_old_name, fts_table_new_name, trx, false));
-}
-
-/****************************************************************//**
-Rename auxiliary tables for all fts index for a table. This(rename)
-is due to database name change
-@return DB_SUCCESS or error code */
-
-dberr_t
-fts_rename_aux_tables(
-/*==================*/
- dict_table_t* table, /*!< in: user Table */
- const char* new_name, /*!< in: new table name */
- trx_t* trx) /*!< in: transaction */
-{
- ulint i;
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
-
- /* Rename common auxiliary tables */
- for (i = 0; fts_common_tables[i] != NULL; ++i) {
- char* old_table_name;
- dberr_t err = DB_SUCCESS;
-
- fts_table.suffix = fts_common_tables[i];
-
- old_table_name = fts_get_table_name(&fts_table);
-
- err = fts_rename_one_aux_table(new_name, old_table_name, trx);
-
- mem_free(old_table_name);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- fts_t* fts = table->fts;
-
- /* Rename index specific auxiliary tables */
- for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
- ++i) {
- dict_index_t* index;
-
- index = static_cast<dict_index_t*>(
- ib_vector_getp(fts->indexes, i));
-
- FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
-
- for (ulint j = 0; fts_index_selector[j].value; ++j) {
- dberr_t err;
- char* old_table_name;
-
- fts_table.suffix = fts_get_suffix(j);
-
- old_table_name = fts_get_table_name(&fts_table);
-
- err = fts_rename_one_aux_table(
- new_name, old_table_name, trx);
-
- DBUG_EXECUTE_IF("fts_rename_failure",
- err = DB_DEADLOCK;
- fts_sql_rollback(trx););
-
- mem_free(old_table_name);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Drops the common ancillary tables needed for supporting an FTS index
-on the given table. row_mysql_lock_data_dictionary must have been called
-before this.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_drop_common_tables(
-/*===================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table) /*!< in: table with an FTS
- index */
-{
- ulint i;
- dberr_t error = DB_SUCCESS;
-
- for (i = 0; fts_common_tables[i] != NULL; ++i) {
- dberr_t err;
- char* table_name;
-
- fts_table->suffix = fts_common_tables[i];
-
- table_name = fts_get_table_name(fts_table);
-
- err = fts_drop_table(trx, table_name);
-
- /* We only return the status of the last error. */
- if (err != DB_SUCCESS && err != DB_FAIL) {
- error = err;
- }
-
- mem_free(table_name);
- }
-
- return(error);
-}
-
-/****************************************************************//**
-Since we do a horizontal split on the index table, we need to drop
-all the split tables.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_drop_index_split_tables(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index) /*!< in: fts instance */
-
-{
- ulint i;
- fts_table_t fts_table;
- dberr_t error = DB_SUCCESS;
-
- FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
-
- for (i = 0; fts_index_selector[i].value; ++i) {
- dberr_t err;
- char* table_name;
-
- fts_table.suffix = fts_get_suffix(i);
-
- table_name = fts_get_table_name(&fts_table);
-
- err = fts_drop_table(trx, table_name);
-
- /* We only return the status of the last error. */
- if (err != DB_SUCCESS && err != DB_FAIL) {
- error = err;
- }
-
- mem_free(table_name);
- }
-
- return(error);
-}
-
-/****************************************************************//**
-Drops FTS auxiliary tables for an FTS index
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_drop_index_tables(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index) /*!< in: Index to drop */
-{
- dberr_t error = DB_SUCCESS;
-
-#ifdef FTS_DOC_STATS_DEBUG
- fts_table_t fts_table;
- static const char* index_tables[] = {
- "DOC_ID",
- NULL
- };
-#endif /* FTS_DOC_STATS_DEBUG */
-
- dberr_t err = fts_drop_index_split_tables(trx, index);
-
- /* We only return the status of the last error. */
- if (err != DB_SUCCESS) {
- error = err;
- }
-
-#ifdef FTS_DOC_STATS_DEBUG
- FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
-
- for (ulint i = 0; index_tables[i] != NULL; ++i) {
- char* table_name;
-
- fts_table.suffix = index_tables[i];
-
- table_name = fts_get_table_name(&fts_table);
-
- err = fts_drop_table(trx, table_name);
-
- /* We only return the status of the last error. */
- if (err != DB_SUCCESS && err != DB_FAIL) {
- error = err;
- }
-
- mem_free(table_name);
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
- return(error);
-}
-
-/****************************************************************//**
-Drops FTS ancillary tables needed for supporting an FTS index
-on the given table. row_mysql_lock_data_dictionary must have been called
-before this.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_drop_all_index_tables(
-/*======================*/
- trx_t* trx, /*!< in: transaction */
- fts_t* fts) /*!< in: fts instance */
-{
- dberr_t error = DB_SUCCESS;
-
- for (ulint i = 0;
- fts->indexes != 0 && i < ib_vector_size(fts->indexes);
- ++i) {
-
- dberr_t err;
- dict_index_t* index;
-
- index = static_cast<dict_index_t*>(
- ib_vector_getp(fts->indexes, i));
-
- err = fts_drop_index_tables(trx, index);
-
- if (err != DB_SUCCESS) {
- error = err;
- }
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Drops the ancillary tables needed for supporting an FTS index on a
-given table. row_mysql_lock_data_dictionary must have been called before
-this.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_drop_tables(
-/*============*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table) /*!< in: table has the FTS index */
-{
- dberr_t error;
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
-
- /* TODO: This is not atomic and can cause problems during recovery. */
-
- error = fts_drop_common_tables(trx, &fts_table);
-
- if (error == DB_SUCCESS) {
- error = fts_drop_all_index_tables(trx, table->fts);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Prepare the SQL, so that all '%s' are replaced by the common prefix.
-@return sql string, use mem_free() to free the memory */
-static
-char*
-fts_prepare_sql(
-/*============*/
- fts_table_t* fts_table, /*!< in: table name info */
- const char* my_template) /*!< in: sql template */
-{
- char* sql;
- char* name_prefix;
-
- name_prefix = fts_get_table_name_prefix(fts_table);
- sql = ut_strreplace(my_template, "%s", name_prefix);
- mem_free(name_prefix);
-
- return(sql);
-}
-
-/*********************************************************************//**
-Creates the common ancillary tables needed for supporting an FTS index
-on the given table. row_mysql_lock_data_dictionary must have been called
-before this.
-@return DB_SUCCESS if succeed */
-UNIV_INTERN
-dberr_t
-fts_create_common_tables(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- const dict_table_t* table, /*!< in: table with FTS index */
- const char* name, /*!< in: table name normalized.*/
- bool skip_doc_id_index)/*!< in: Skip index on doc id */
-{
- char* sql;
- dberr_t error;
- que_t* graph;
- fts_table_t fts_table;
- mem_heap_t* heap = mem_heap_create(1024);
- pars_info_t* info;
-
- FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
-
- error = fts_drop_common_tables(trx, &fts_table);
-
- if (error != DB_SUCCESS) {
-
- goto func_exit;
- }
-
- /* Create the FTS tables that are common to an FTS index. */
- sql = fts_prepare_sql(&fts_table, fts_create_common_tables_sql);
- graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
- mem_free(sql);
-
- error = fts_eval_sql(trx, graph);
-
- que_graph_free(graph);
-
- if (error != DB_SUCCESS) {
-
- goto func_exit;
- }
-
- /* Write the default settings to the config table. */
- fts_table.suffix = "CONFIG";
- graph = fts_parse_sql_no_dict_lock(
- &fts_table, NULL, fts_config_table_insert_values_sql);
-
- error = fts_eval_sql(trx, graph);
-
- que_graph_free(graph);
-
- if (error != DB_SUCCESS || skip_doc_id_index) {
-
- goto func_exit;
- }
-
- info = pars_info_create();
-
- pars_info_bind_id(info, TRUE, "table_name", name);
- pars_info_bind_id(info, TRUE, "index_name", FTS_DOC_ID_INDEX_NAME);
- pars_info_bind_id(info, TRUE, "doc_id_col_name", FTS_DOC_ID_COL_NAME);
-
- /* Create the FTS DOC_ID index on the hidden column. Currently this
- is common for any FT index created on the table. */
- graph = fts_parse_sql_no_dict_lock(
- NULL,
- info,
- mem_heap_printf(
- heap,
- "BEGIN\n"
- ""
- "CREATE UNIQUE INDEX $index_name ON $table_name("
- "$doc_id_col_name);\n"));
-
- error = fts_eval_sql(trx, graph);
- que_graph_free(graph);
-
-func_exit:
- if (error != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- row_drop_table_for_mysql(table->name, trx, FALSE, TRUE);
-
- trx->error_state = DB_SUCCESS;
- }
-
- mem_heap_free(heap);
-
- return(error);
-}
-
-/*************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-
-@see row_merge_create_fts_sort_index()
-@return: DB_SUCCESS or error code */
-static
-dict_table_t*
-fts_create_one_index_table(
-/*=======================*/
- trx_t* trx, /*!< in: transaction */
- const dict_index_t*
- index, /*!< in: the index instance */
- fts_table_t* fts_table, /*!< in: fts_table structure */
- mem_heap_t* heap) /*!< in: heap */
-{
- dict_field_t* field;
- dict_table_t* new_table = NULL;
- char* table_name = fts_get_table_name(fts_table);
- dberr_t error;
- CHARSET_INFO* charset;
- ulint flags2 = 0;
-
- ut_ad(index->type & DICT_FTS);
-
- if (srv_file_per_table) {
- flags2 = DICT_TF2_USE_TABLESPACE;
- }
-
- new_table = dict_mem_table_create(table_name, 0, 5, 1, flags2);
-
- field = dict_index_get_nth_field(index, 0);
- charset = innobase_get_fts_charset(
- (int)(field->col->prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(field->col->prtype));
-
- dict_mem_table_add_col(new_table, heap, "word",
- charset == &my_charset_latin1
- ? DATA_VARCHAR : DATA_VARMYSQL,
- field->col->prtype,
- FTS_MAX_WORD_LEN_IN_CHAR
- * DATA_MBMAXLEN(field->col->mbminmaxlen));
-
- dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED,
- sizeof(doc_id_t));
-
- dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED,
- sizeof(doc_id_t));
-
- dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
- DATA_NOT_NULL | DATA_UNSIGNED, 4);
-
- dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
- 4130048, 0);
-
- error = row_create_table_for_mysql(new_table, trx, false, FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
-
- if (error != DB_SUCCESS) {
- trx->error_state = error;
- dict_mem_table_free(new_table);
- new_table = NULL;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Fail to create FTS index table %s", table_name);
- }
-
- mem_free(table_name);
-
- return(new_table);
-}
-
-/*************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-@return: DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_create_index_tables_low(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- const dict_index_t*
- index, /*!< in: the index instance */
- const char* table_name, /*!< in: the table name */
- table_id_t table_id) /*!< in: the table id */
-
-{
- ulint i;
- que_t* graph;
- fts_table_t fts_table;
- dberr_t error = DB_SUCCESS;
- mem_heap_t* heap = mem_heap_create(1024);
-
- fts_table.type = FTS_INDEX_TABLE;
- fts_table.index_id = index->id;
- fts_table.table_id = table_id;
- fts_table.parent = table_name;
- fts_table.table = index->table;
-
-#ifdef FTS_DOC_STATS_DEBUG
- char* sql;
-
- /* Create the FTS auxiliary tables that are specific
- to an FTS index. */
- sql = fts_prepare_sql(&fts_table, fts_create_index_tables_sql);
-
- graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
- mem_free(sql);
-
- error = fts_eval_sql(trx, graph);
- que_graph_free(graph);
-#endif /* FTS_DOC_STATS_DEBUG */
-
- for (i = 0; fts_index_selector[i].value && error == DB_SUCCESS; ++i) {
- dict_table_t* new_table;
-
- /* Create the FTS auxiliary tables that are specific
- to an FTS index. We need to preserve the table_id %s
- which fts_parse_sql_no_dict_lock() will fill in for us. */
- fts_table.suffix = fts_get_suffix(i);
-
- new_table = fts_create_one_index_table(
- trx, index, &fts_table, heap);
-
- if (!new_table) {
- error = DB_FAIL;
- break;
- }
-
- graph = fts_parse_sql_no_dict_lock(
- &fts_table, NULL, fts_create_index_sql);
-
- error = fts_eval_sql(trx, graph);
- que_graph_free(graph);
- }
-
- if (error != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- row_drop_table_for_mysql(table_name, trx, FALSE, TRUE);
-
- trx->error_state = DB_SUCCESS;
- }
-
- mem_heap_free(heap);
-
- return(error);
-}
-
-/******************************************************************//**
-Creates the column specific ancillary tables needed for supporting an
-FTS index on the given table. row_mysql_lock_data_dictionary must have
-been called before this.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_create_index_tables(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- const dict_index_t* index) /*!< in: the index instance */
-{
- dberr_t err;
- dict_table_t* table;
-
- table = dict_table_get_low(index->table_name);
- ut_a(table != NULL);
-
- err = fts_create_index_tables_low(trx, index, table->name, table->id);
-
- if (err == DB_SUCCESS) {
- trx_commit(trx);
- }
-
- return(err);
-}
-#if 0
-/******************************************************************//**
-Return string representation of state. */
-static
-const char*
-fts_get_state_str(
-/*==============*/
- /* out: string representation of state */
- fts_row_state state) /*!< in: state */
-{
- switch (state) {
- case FTS_INSERT:
- return("INSERT");
-
- case FTS_MODIFY:
- return("MODIFY");
-
- case FTS_DELETE:
- return("DELETE");
-
- case FTS_NOTHING:
- return("NOTHING");
-
- case FTS_INVALID:
- return("INVALID");
-
- default:
- return("UNKNOWN");
- }
-}
-#endif
-
-/******************************************************************//**
-Calculate the new state of a row given the existing state and a new event.
-@return new state of row */
-static
-fts_row_state
-fts_trx_row_get_new_state(
-/*======================*/
- fts_row_state old_state, /*!< in: existing state of row */
- fts_row_state event) /*!< in: new event */
-{
- /* The rules for transforming states:
-
- I = inserted
- M = modified
- D = deleted
- N = nothing
-
- M+D -> D:
-
- If the row existed before the transaction started and it is modified
- during the transaction, followed by a deletion of the row, only the
- deletion will be signaled.
-
- M+ -> M:
-
- If the row existed before the transaction started and it is modified
- more than once during the transaction, only the last modification
- will be signaled.
-
- IM*D -> N:
-
- If a new row is added during the transaction (and possibly modified
- after its initial insertion) but it is deleted before the end of the
- transaction, nothing will be signaled.
-
- IM* -> I:
-
- If a new row is added during the transaction and modified after its
- initial insertion, only the addition will be signaled.
-
- M*DI -> M:
-
- If the row existed before the transaction started and it is deleted,
- then re-inserted, only a modification will be signaled. Note that
- this case is only possible if the table is using the row's primary
- key for FTS row ids, since those can be re-inserted by the user,
- which is not true for InnoDB generated row ids.
-
- It is easily seen that the above rules decompose such that we do not
- need to store the row's entire history of events. Instead, we can
- store just one state for the row and update that when new events
- arrive. Then we can implement the above rules as a two-dimensional
- look-up table, and get checking of invalid combinations "for free"
- in the process. */
-
- /* The lookup table for transforming states. old_state is the
- Y-axis, event is the X-axis. */
- static const fts_row_state table[4][4] = {
- /* I M D N */
- /* I */ { FTS_INVALID, FTS_INSERT, FTS_NOTHING, FTS_INVALID },
- /* M */ { FTS_INVALID, FTS_MODIFY, FTS_DELETE, FTS_INVALID },
- /* D */ { FTS_MODIFY, FTS_INVALID, FTS_INVALID, FTS_INVALID },
- /* N */ { FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
- };
-
- fts_row_state result;
-
- ut_a(old_state < FTS_INVALID);
- ut_a(event < FTS_INVALID);
-
- result = table[(int) old_state][(int) event];
- ut_a(result != FTS_INVALID);
-
- return(result);
-}
-
-/******************************************************************//**
-Create a savepoint instance.
-@return savepoint instance */
-static
-fts_savepoint_t*
-fts_savepoint_create(
-/*=================*/
- ib_vector_t* savepoints, /*!< out: InnoDB transaction */
- const char* name, /*!< in: savepoint name */
- mem_heap_t* heap) /*!< in: heap */
-{
- fts_savepoint_t* savepoint;
-
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_push(savepoints, NULL));
-
- memset(savepoint, 0x0, sizeof(*savepoint));
-
- if (name) {
- savepoint->name = mem_heap_strdup(heap, name);
- }
-
- savepoint->tables = rbt_create(
- sizeof(fts_trx_table_t*), fts_trx_table_cmp);
-
- return(savepoint);
-}
-
-/******************************************************************//**
-Create an FTS trx.
-@return FTS trx */
-static
-fts_trx_t*
-fts_trx_create(
-/*===========*/
- trx_t* trx) /*!< in/out: InnoDB
- transaction */
-{
- fts_trx_t* ftt;
- ib_alloc_t* heap_alloc;
- mem_heap_t* heap = mem_heap_create(1024);
- trx_named_savept_t* savep;
-
- ut_a(trx->fts_trx == NULL);
-
- ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
- ftt->trx = trx;
- ftt->heap = heap;
-
- heap_alloc = ib_heap_allocator_create(heap);
-
- ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
- heap_alloc, sizeof(fts_savepoint_t), 4));
-
- ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
- heap_alloc, sizeof(fts_savepoint_t), 4));
-
- /* Default instance has no name and no heap. */
- fts_savepoint_create(ftt->savepoints, NULL, NULL);
- fts_savepoint_create(ftt->last_stmt, NULL, NULL);
-
- /* Copy savepoints that already set before. */
- for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
- savep != NULL;
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
-
- fts_savepoint_take(trx, ftt, savep->name);
- }
-
- return(ftt);
-}
-
-/******************************************************************//**
-Create an FTS trx table.
-@return FTS trx table */
-static
-fts_trx_table_t*
-fts_trx_table_create(
-/*=================*/
- fts_trx_t* fts_trx, /*!< in: FTS trx */
- dict_table_t* table) /*!< in: table */
-{
- fts_trx_table_t* ftt;
-
- ftt = static_cast<fts_trx_table_t*>(
- mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
-
- memset(ftt, 0x0, sizeof(*ftt));
-
- ftt->table = table;
- ftt->fts_trx = fts_trx;
-
- ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
-
- return(ftt);
-}
-
-/******************************************************************//**
-Clone an FTS trx table.
-@return FTS trx table */
-static
-fts_trx_table_t*
-fts_trx_table_clone(
-/*=================*/
- const fts_trx_table_t* ftt_src) /*!< in: FTS trx */
-{
- fts_trx_table_t* ftt;
-
- ftt = static_cast<fts_trx_table_t*>(
- mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
-
- memset(ftt, 0x0, sizeof(*ftt));
-
- ftt->table = ftt_src->table;
- ftt->fts_trx = ftt_src->fts_trx;
-
- ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
-
- /* Copy the rb tree values to the new savepoint. */
- rbt_merge_uniq(ftt->rows, ftt_src->rows);
-
- /* These are only added on commit. At this stage we only have
- the updated row state. */
- ut_a(ftt_src->added_doc_ids == NULL);
-
- return(ftt);
-}
-
-/******************************************************************//**
-Initialize the FTS trx instance.
-@return FTS trx instance */
-static
-fts_trx_table_t*
-fts_trx_init(
-/*=========*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: FTS table instance */
- ib_vector_t* savepoints) /*!< in: Savepoints */
-{
- fts_trx_table_t* ftt;
- ib_rbt_bound_t parent;
- ib_rbt_t* tables;
- fts_savepoint_t* savepoint;
-
- savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
-
- tables = savepoint->tables;
- rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
-
- if (parent.result == 0) {
- fts_trx_table_t** fttp;
-
- fttp = rbt_value(fts_trx_table_t*, parent.last);
- ftt = *fttp;
- } else {
- ftt = fts_trx_table_create(trx->fts_trx, table);
- rbt_add_node(tables, &parent, &ftt);
- }
-
- ut_a(ftt->table == table);
-
- return(ftt);
-}
-
-/******************************************************************//**
-Notify the FTS system about an operation on an FTS-indexed table. */
-static
-void
-fts_trx_table_add_op(
-/*=================*/
- fts_trx_table_t*ftt, /*!< in: FTS trx table */
- doc_id_t doc_id, /*!< in: doc id */
- fts_row_state state, /*!< in: state of the row */
- ib_vector_t* fts_indexes) /*!< in: FTS indexes affected */
-{
- ib_rbt_t* rows;
- ib_rbt_bound_t parent;
-
- rows = ftt->rows;
- rbt_search(rows, &parent, &doc_id);
-
- /* Row id found, update state, and if new state is FTS_NOTHING,
- we delete the row from our tree. */
- if (parent.result == 0) {
- fts_trx_row_t* row = rbt_value(fts_trx_row_t, parent.last);
-
- row->state = fts_trx_row_get_new_state(row->state, state);
-
- if (row->state == FTS_NOTHING) {
- if (row->fts_indexes) {
- ib_vector_free(row->fts_indexes);
- }
-
- ut_free(rbt_remove_node(rows, parent.last));
- row = NULL;
- } else if (row->fts_indexes != NULL) {
- ib_vector_free(row->fts_indexes);
- row->fts_indexes = fts_indexes;
- }
-
- } else { /* Row-id not found, create a new one. */
- fts_trx_row_t row;
-
- row.doc_id = doc_id;
- row.state = state;
- row.fts_indexes = fts_indexes;
-
- rbt_add_node(rows, &parent, &row);
- }
-}
-
-/******************************************************************//**
-Notify the FTS system about an operation on an FTS-indexed table. */
-UNIV_INTERN
-void
-fts_trx_add_op(
-/*===========*/
- trx_t* trx, /*!< in: InnoDB transaction */
- dict_table_t* table, /*!< in: table */
- doc_id_t doc_id, /*!< in: new doc id */
- fts_row_state state, /*!< in: state of the row */
- ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
- (NULL=all) */
-{
- fts_trx_table_t* tran_ftt;
- fts_trx_table_t* stmt_ftt;
-
- if (!trx->fts_trx) {
- trx->fts_trx = fts_trx_create(trx);
- }
-
- tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
- stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
-
- fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
- fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
-}
-
-/******************************************************************//**
-Fetch callback that converts a textual document id to a binary value and
-stores it in the given place.
-@return always returns NULL */
-static
-ibool
-fts_fetch_store_doc_id(
-/*===================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: doc_id_t* to store
- doc_id in */
-{
- int n_parsed;
- sel_node_t* node = static_cast<sel_node_t*>(row);
- doc_id_t* doc_id = static_cast<doc_id_t*>(user_arg);
- dfield_t* dfield = que_node_get_val(node->select_list);
- dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- char buf[32];
-
- ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
- ut_a(len > 0 && len < sizeof(buf));
-
- memcpy(buf, dfield_get_data(dfield), len);
- buf[len] = '\0';
-
- n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
- ut_a(n_parsed == 1);
-
- return(FALSE);
-}
-
-#ifdef FTS_CACHE_SIZE_DEBUG
-/******************************************************************//**
-Get the max cache size in bytes. If there is an error reading the
-value we simply print an error message here and return the default
-value to the caller.
-@return max cache size in bytes */
-static
-ulint
-fts_get_max_cache_size(
-/*===================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table) /*!< in: table instance */
-{
- dberr_t error;
- fts_string_t value;
- ulint cache_size_in_mb;
-
- /* Set to the default value. */
- cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value. */
- value.f_n_char = 0;
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = ut_malloc(value.f_len + 1);
-
- error = fts_config_get_value(
- trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
-
- if (error == DB_SUCCESS) {
-
- value.f_str[value.f_len] = 0;
- cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
-
- if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning: FTS max cache size "
- " (%lu) out of range. Minimum value is "
- "%luMB and the maximum values is %luMB, "
- "setting cache size to upper limit\n",
- cache_size_in_mb,
- FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
- FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
-
- cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
-
- } else if (cache_size_in_mb
- < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning: FTS max cache size "
- " (%lu) out of range. Minimum value is "
- "%luMB and the maximum values is %luMB, "
- "setting cache size to lower limit\n",
- cache_size_in_mb,
- FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
- FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
-
- cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
- }
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, "InnoDB: Error: (%lu) reading max cache "
- "config value from config table\n", error);
- }
-
- ut_free(value.f_str);
-
- return(cache_size_in_mb * 1024 * 1024);
-}
-#endif
-
-#ifdef FTS_DOC_STATS_DEBUG
-/*********************************************************************//**
-Get the total number of words in the FTS for a particular FTS index.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-fts_get_total_word_count(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: for this index */
- ulint* total) /* out: total words */
-{
- dberr_t error;
- fts_string_t value;
-
- *total = 0;
-
- /* We set the length of value to the max bytes it can hold. This
- information is used by the callback that reads the value. */
- value.f_n_char = 0;
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
- value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
-
- error = fts_config_get_index_value(
- trx, index, FTS_TOTAL_WORD_COUNT, &value);
-
- if (error == DB_SUCCESS) {
-
- value.f_str[value.f_len] = 0;
- *total = strtoul((char*) value.f_str, NULL, 10);
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) reading total words "
- "value from config table\n", ut_strerr(error));
- }
-
- ut_free(value.f_str);
-
- return(error);
-}
-#endif /* FTS_DOC_STATS_DEBUG */
-
-/*********************************************************************//**
-Update the next and last Doc ID in the CONFIG table to be the input
-"doc_id" value (+ 1). We would do so after each FTS index build or
-table truncate */
-UNIV_INTERN
-void
-fts_update_next_doc_id(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction */
- const dict_table_t* table, /*!< in: table */
- const char* table_name, /*!< in: table name, or NULL */
- doc_id_t doc_id) /*!< in: DOC ID to set */
-{
- table->fts->cache->synced_doc_id = doc_id;
- table->fts->cache->next_doc_id = doc_id + 1;
-
- table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
-
- fts_update_sync_doc_id(
- table, table_name, table->fts->cache->synced_doc_id, trx);
-
-}
-
-/*********************************************************************//**
-Get the next available document id.
-@return DB_SUCCESS if OK */
-UNIV_INTERN
-dberr_t
-fts_get_next_doc_id(
-/*================*/
- const dict_table_t* table, /*!< in: table */
- doc_id_t* doc_id) /*!< out: new document id */
-{
- fts_cache_t* cache = table->fts->cache;
-
- /* If the Doc ID system has not yet been initialized, we
- will consult the CONFIG table and user table to re-establish
- the initial value of the Doc ID */
-
- if (cache->first_doc_id != 0 || !fts_init_doc_id(table)) {
- if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- *doc_id = FTS_NULL_DOC_ID;
- return(DB_SUCCESS);
- }
-
- /* Otherwise, simply increment the value in cache */
- mutex_enter(&cache->doc_id_lock);
- *doc_id = ++cache->next_doc_id;
- mutex_exit(&cache->doc_id_lock);
- } else {
- mutex_enter(&cache->doc_id_lock);
- *doc_id = cache->next_doc_id;
- mutex_exit(&cache->doc_id_lock);
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-This function fetch the Doc ID from CONFIG table, and compare with
-the Doc ID supplied. And store the larger one to the CONFIG table.
-@return DB_SUCCESS if OK */
-static MY_ATTRIBUTE((nonnull))
-dberr_t
-fts_cmp_set_sync_doc_id(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- doc_id_t doc_id_cmp, /*!< in: Doc ID to compare */
- ibool read_only, /*!< in: TRUE if read the
- synced_doc_id only */
- doc_id_t* doc_id) /*!< out: larger document id
- after comparing "doc_id_cmp"
- to the one stored in CONFIG
- table */
-{
- trx_t* trx;
- pars_info_t* info;
- dberr_t error;
- fts_table_t fts_table;
- que_t* graph = NULL;
- fts_cache_t* cache = table->fts->cache;
-retry:
- ut_a(table->fts->doc_col != ULINT_UNDEFINED);
-
- fts_table.suffix = "CONFIG";
- fts_table.table_id = table->id;
- fts_table.type = FTS_COMMON_TABLE;
- fts_table.table = table;
-
- fts_table.parent = table->name;
-
- trx = trx_allocate_for_background();
-
- trx->op_info = "update the next FTS document id";
-
- info = pars_info_create();
-
- pars_info_bind_function(
- info, "my_func", fts_fetch_store_doc_id, doc_id);
-
- graph = fts_parse_sql(
- &fts_table, info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS SELECT value FROM \"%s\""
- " WHERE key = 'synced_doc_id' FOR UPDATE;\n"
- "BEGIN\n"
- ""
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- *doc_id = 0;
-
- error = fts_eval_sql(trx, graph);
-
- fts_que_graph_free_check_lock(&fts_table, NULL, graph);
-
- // FIXME: We need to retry deadlock errors
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (read_only) {
- goto func_exit;
- }
-
- if (doc_id_cmp == 0 && *doc_id) {
- cache->synced_doc_id = *doc_id - 1;
- } else {
- cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
- }
-
- mutex_enter(&cache->doc_id_lock);
- /* For each sync operation, we will add next_doc_id by 1,
- so to mark a sync operation */
- if (cache->next_doc_id < cache->synced_doc_id + 1) {
- cache->next_doc_id = cache->synced_doc_id + 1;
- }
- mutex_exit(&cache->doc_id_lock);
-
- if (doc_id_cmp > *doc_id) {
- error = fts_update_sync_doc_id(
- table, table->name, cache->synced_doc_id, trx);
- }
-
- *doc_id = cache->next_doc_id;
-
-func_exit:
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
- } else {
- *doc_id = 0;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while getting next doc id.\n", ut_strerr(error));
-
- fts_sql_rollback(trx);
-
- if (error == DB_DEADLOCK) {
- os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
- goto retry;
- }
- }
-
- trx_free_for_background(trx);
-
- return(error);
-}
-
-/*********************************************************************//**
-Update the last document id. This function could create a new
-transaction to update the last document id.
-@return DB_SUCCESS if OK */
-static
-dberr_t
-fts_update_sync_doc_id(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- const char* table_name, /*!< in: table name, or NULL */
- doc_id_t doc_id, /*!< in: last document id */
- trx_t* trx) /*!< in: update trx, or NULL */
-{
- byte id[FTS_MAX_ID_LEN];
- pars_info_t* info;
- fts_table_t fts_table;
- ulint id_len;
- que_t* graph = NULL;
- dberr_t error;
- ibool local_trx = FALSE;
- fts_cache_t* cache = table->fts->cache;
-
- fts_table.suffix = "CONFIG";
- fts_table.table_id = table->id;
- fts_table.type = FTS_COMMON_TABLE;
- fts_table.table = table;
- if (table_name) {
- fts_table.parent = table_name;
- } else {
- fts_table.parent = table->name;
- }
-
- if (!trx) {
- trx = trx_allocate_for_background();
-
- trx->op_info = "setting last FTS document id";
- local_trx = TRUE;
- }
-
- info = pars_info_create();
-
- id_len = ut_snprintf(
- (char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
-
- pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
-
- graph = fts_parse_sql(
- &fts_table, info,
- "BEGIN "
- "UPDATE \"%s\" SET value = :doc_id"
- " WHERE key = 'synced_doc_id';");
-
- error = fts_eval_sql(trx, graph);
-
- fts_que_graph_free_check_lock(&fts_table, NULL, graph);
-
- if (local_trx) {
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
- cache->synced_doc_id = doc_id;
- } else {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "(%s) while updating last doc id.",
- ut_strerr(error));
-
- fts_sql_rollback(trx);
- }
- trx_free_for_background(trx);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Create a new fts_doc_ids_t.
-@return new fts_doc_ids_t */
-UNIV_INTERN
-fts_doc_ids_t*
-fts_doc_ids_create(void)
-/*====================*/
-{
- fts_doc_ids_t* fts_doc_ids;
- mem_heap_t* heap = mem_heap_create(512);
-
- fts_doc_ids = static_cast<fts_doc_ids_t*>(
- mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
-
- fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
-
- fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
- fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
-
- return(fts_doc_ids);
-}
-
-/*********************************************************************//**
-Free a fts_doc_ids_t. */
-
-void
-fts_doc_ids_free(
-/*=============*/
- fts_doc_ids_t* fts_doc_ids)
-{
- mem_heap_t* heap = static_cast<mem_heap_t*>(
- fts_doc_ids->self_heap->arg);
-
- memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
-
- mem_heap_free(heap);
-}
-
-/*********************************************************************//**
-Do commit-phase steps necessary for the insertion of a new row. */
-void
-fts_add(
-/*====*/
- fts_trx_table_t*ftt, /*!< in: FTS trx table */
- fts_trx_row_t* row) /*!< in: row */
-{
- dict_table_t* table = ftt->table;
- doc_id_t doc_id = row->doc_id;
-
- ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
-
- fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
-
- mutex_enter(&table->fts->cache->deleted_lock);
- ++table->fts->cache->added;
- mutex_exit(&table->fts->cache->deleted_lock);
-
- if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
- && doc_id >= table->fts->cache->next_doc_id) {
- table->fts->cache->next_doc_id = doc_id + 1;
- }
-}
-
-/*********************************************************************//**
-Do commit-phase steps necessary for the deletion of a row.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_delete(
-/*=======*/
- fts_trx_table_t*ftt, /*!< in: FTS trx table */
- fts_trx_row_t* row) /*!< in: row */
-{
- que_t* graph;
- fts_table_t fts_table;
- dberr_t error = DB_SUCCESS;
- doc_id_t write_doc_id;
- dict_table_t* table = ftt->table;
- doc_id_t doc_id = row->doc_id;
- trx_t* trx = ftt->fts_trx->trx;
- pars_info_t* info = pars_info_create();
- fts_cache_t* cache = table->fts->cache;
-
- /* we do not index Documents whose Doc ID value is 0 */
- if (doc_id == FTS_NULL_DOC_ID) {
- ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
- return(error);
- }
-
- ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
-
- FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &write_doc_id, doc_id);
- fts_bind_doc_id(info, "doc_id", &write_doc_id);
-
- /* It is possible we update a record that has not yet been sync-ed
- into cache from last crash (delete Doc will not initialize the
- sync). Avoid any added counter accounting until the FTS cache
- is re-established and sync-ed */
- if (table->fts->fts_status & ADDED_TABLE_SYNCED
- && doc_id > cache->synced_doc_id) {
- mutex_enter(&table->fts->cache->deleted_lock);
-
- /* The Doc ID could belong to those left in
- ADDED table from last crash. So need to check
- if it is less than first_doc_id when we initialize
- the Doc ID system after reboot */
- if (doc_id >= table->fts->cache->first_doc_id
- && table->fts->cache->added > 0) {
- --table->fts->cache->added;
- }
-
- mutex_exit(&table->fts->cache->deleted_lock);
-
- /* Only if the row was really deleted. */
- ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
- }
-
- /* Note the deleted document for OPTIMIZE to purge. */
- if (error == DB_SUCCESS) {
-
- trx->op_info = "adding doc id to FTS DELETED";
-
- info->graph_owns_us = TRUE;
-
- fts_table.suffix = "DELETED";
-
- graph = fts_parse_sql(
- &fts_table,
- info,
- "BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
-
- error = fts_eval_sql(trx, graph);
-
- fts_que_graph_free(graph);
- } else {
- pars_info_free(info);
- }
-
- /* Increment the total deleted count, this is used to calculate the
- number of documents indexed. */
- if (error == DB_SUCCESS) {
- mutex_enter(&table->fts->cache->deleted_lock);
-
- ++table->fts->cache->deleted;
-
- mutex_exit(&table->fts->cache->deleted_lock);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Do commit-phase steps necessary for the modification of a row.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_modify(
-/*=======*/
- fts_trx_table_t* ftt, /*!< in: FTS trx table */
- fts_trx_row_t* row) /*!< in: row */
-{
- dberr_t error;
-
- ut_a(row->state == FTS_MODIFY);
-
- error = fts_delete(ftt, row);
-
- if (error == DB_SUCCESS) {
- fts_add(ftt, row);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Create a new document id.
-@return DB_SUCCESS if all went well else error */
-UNIV_INTERN
-dberr_t
-fts_create_doc_id(
-/*==============*/
- dict_table_t* table, /*!< in: row is of this table. */
- dtuple_t* row, /* in/out: add doc id value to this
- row. This is the current row that is
- being inserted. */
- mem_heap_t* heap) /*!< in: heap */
-{
- doc_id_t doc_id;
- dberr_t error = DB_SUCCESS;
-
- ut_a(table->fts->doc_col != ULINT_UNDEFINED);
-
- if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
- error = fts_get_next_doc_id(table, &doc_id);
- }
- return(error);
- }
-
- error = fts_get_next_doc_id(table, &doc_id);
-
- if (error == DB_SUCCESS) {
- dfield_t* dfield;
- doc_id_t* write_doc_id;
-
- ut_a(doc_id > 0);
-
- dfield = dtuple_get_nth_field(row, table->fts->doc_col);
- write_doc_id = static_cast<doc_id_t*>(
- mem_heap_alloc(heap, sizeof(*write_doc_id)));
-
- ut_a(doc_id != FTS_NULL_DOC_ID);
- ut_a(sizeof(doc_id) == dfield->type.len);
- fts_write_doc_id((byte*) write_doc_id, doc_id);
-
- dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-The given transaction is about to be committed; do whatever is necessary
-from the FTS system's POV.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_commit_table(
-/*=============*/
- fts_trx_table_t* ftt) /*!< in: FTS table to commit*/
-{
- const ib_rbt_node_t* node;
- ib_rbt_t* rows;
- dberr_t error = DB_SUCCESS;
- fts_cache_t* cache = ftt->table->fts->cache;
- trx_t* trx = trx_allocate_for_background();
-
- rows = ftt->rows;
-
- ftt->fts_trx->trx = trx;
-
- if (cache->get_docs == NULL) {
- rw_lock_x_lock(&cache->init_lock);
- if (cache->get_docs == NULL) {
- cache->get_docs = fts_get_docs_create(cache);
- }
- rw_lock_x_unlock(&cache->init_lock);
- }
-
- for (node = rbt_first(rows);
- node != NULL && error == DB_SUCCESS;
- node = rbt_next(rows, node)) {
-
- fts_trx_row_t* row = rbt_value(fts_trx_row_t, node);
-
- switch (row->state) {
- case FTS_INSERT:
- fts_add(ftt, row);
- break;
-
- case FTS_MODIFY:
- error = fts_modify(ftt, row);
- break;
-
- case FTS_DELETE:
- error = fts_delete(ftt, row);
- break;
-
- default:
- ut_error;
- }
- }
-
- fts_sql_commit(trx);
-
- trx_free_for_background(trx);
-
- return(error);
-}
-
-/*********************************************************************//**
-The given transaction is about to be committed; do whatever is necessary
-from the FTS system's POV.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_commit(
-/*=======*/
- trx_t* trx) /*!< in: transaction */
-{
- const ib_rbt_node_t* node;
- dberr_t error;
- ib_rbt_t* tables;
- fts_savepoint_t* savepoint;
-
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_last(trx->fts_trx->savepoints));
- tables = savepoint->tables;
-
- for (node = rbt_first(tables), error = DB_SUCCESS;
- node != NULL && error == DB_SUCCESS;
- node = rbt_next(tables, node)) {
-
- fts_trx_table_t** ftt;
-
- ftt = rbt_value(fts_trx_table_t*, node);
-
- error = fts_commit_table(*ftt);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Initialize a document. */
-UNIV_INTERN
-void
-fts_doc_init(
-/*=========*/
- fts_doc_t* doc) /*!< in: doc to initialize */
-{
- mem_heap_t* heap = mem_heap_create(32);
-
- memset(doc, 0, sizeof(*doc));
-
- doc->self_heap = ib_heap_allocator_create(heap);
-}
-
-/*********************************************************************//**
-Free document. */
-UNIV_INTERN
-void
-fts_doc_free(
-/*=========*/
- fts_doc_t* doc) /*!< in: document */
-{
- mem_heap_t* heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
-
- if (doc->tokens) {
- rbt_free(doc->tokens);
- }
-
-#ifdef UNIV_DEBUG
- memset(doc, 0, sizeof(*doc));
-#endif /* UNIV_DEBUG */
-
- mem_heap_free(heap);
-}
-
-/*********************************************************************//**
-Callback function for fetch that stores a row id to the location pointed.
-The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
-@return always returns NULL */
-UNIV_INTERN
-void*
-fts_fetch_row_id(
-/*=============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: data pointer */
-{
- sel_node_t* node = static_cast<sel_node_t*>(row);
-
- dfield_t* dfield = que_node_get_val(node->select_list);
- dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
- ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
- ut_a(len == 8);
-
- memcpy(user_arg, dfield_get_data(dfield), 8);
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Callback function for fetch that stores the text of an FTS document,
-converting each column to UTF-16.
-@return always FALSE */
-UNIV_INTERN
-ibool
-fts_query_expansion_fetch_doc(
-/*==========================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: fts_doc_t* */
-{
- que_node_t* exp;
- sel_node_t* node = static_cast<sel_node_t*>(row);
- fts_doc_t* result_doc = static_cast<fts_doc_t*>(user_arg);
- dfield_t* dfield;
- ulint len;
- ulint doc_len;
- fts_doc_t doc;
- CHARSET_INFO* doc_charset = NULL;
- ulint field_no = 0;
-
- len = 0;
-
- fts_doc_init(&doc);
- doc.found = TRUE;
-
- exp = node->select_list;
- doc_len = 0;
-
- doc_charset = result_doc->charset;
-
- /* Copy each indexed column content into doc->text.f_str */
- while (exp) {
- dfield = que_node_get_val(exp);
- len = dfield_get_len(dfield);
-
- /* NULL column */
- if (len == UNIV_SQL_NULL) {
- exp = que_node_get_next(exp);
- continue;
- }
-
- if (!doc_charset) {
- ulint prtype = dfield->type.prtype;
- doc_charset = innobase_get_fts_charset(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
- }
-
- doc.charset = doc_charset;
-
- if (dfield_is_ext(dfield)) {
- /* We ignore columns that are stored externally, this
- could result in too many words to search */
- exp = que_node_get_next(exp);
- continue;
- } else {
- doc.text.f_n_char = 0;
-
- doc.text.f_str = static_cast<byte*>(
- dfield_get_data(dfield));
-
- doc.text.f_len = len;
- }
-
- if (field_no == 0) {
- fts_tokenize_document(&doc, result_doc);
- } else {
- fts_tokenize_document_next(&doc, doc_len, result_doc);
- }
-
- exp = que_node_get_next(exp);
-
- doc_len += (exp) ? len + 1 : len;
-
- field_no++;
- }
-
- ut_ad(doc_charset);
-
- if (!result_doc->charset) {
- result_doc->charset = doc_charset;
- }
-
- fts_doc_free(&doc);
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-fetch and tokenize the document. */
-static
-void
-fts_fetch_doc_from_rec(
-/*===================*/
- fts_get_doc_t* get_doc, /*!< in: FTS index's get_doc struct */
- dict_index_t* clust_index, /*!< in: cluster index */
- btr_pcur_t* pcur, /*!< in: cursor whose position
- has been stored */
- ulint* offsets, /*!< in: offsets */
- fts_doc_t* doc) /*!< out: fts doc to hold parsed
- documents */
-{
- dict_index_t* index;
- dict_table_t* table;
- const rec_t* clust_rec;
- ulint num_field;
- const dict_field_t* ifield;
- const dict_col_t* col;
- ulint clust_pos;
- ulint i;
- ulint doc_len = 0;
- ulint processed_doc = 0;
-
- if (!get_doc) {
- return;
- }
-
- index = get_doc->index_cache->index;
- table = get_doc->index_cache->index->table;
-
- clust_rec = btr_pcur_get_rec(pcur);
-
- num_field = dict_index_get_n_fields(index);
-
- for (i = 0; i < num_field; i++) {
- ifield = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(ifield);
- clust_pos = dict_col_get_clust_pos(col, clust_index);
-
- if (!get_doc->index_cache->charset) {
- ulint prtype = ifield->col->prtype;
-
- get_doc->index_cache->charset =
- innobase_get_fts_charset(
- (int) (prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
- }
-
- if (rec_offs_nth_extern(offsets, clust_pos)) {
- doc->text.f_str =
- btr_rec_copy_externally_stored_field(
- clust_rec, offsets,
- dict_table_zip_size(table),
- clust_pos, &doc->text.f_len,
- static_cast<mem_heap_t*>(
- doc->self_heap->arg),
- NULL);
- } else {
- doc->text.f_str = (byte*) rec_get_nth_field(
- clust_rec, offsets, clust_pos,
- &doc->text.f_len);
- }
-
- doc->found = TRUE;
- doc->charset = get_doc->index_cache->charset;
-
- /* Null Field */
- if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
- continue;
- }
-
- if (processed_doc == 0) {
- fts_tokenize_document(doc, NULL);
- } else {
- fts_tokenize_document_next(doc, doc_len, NULL);
- }
-
- processed_doc++;
- doc_len += doc->text.f_len + 1;
- }
-}
-
-/*********************************************************************//**
-This function fetches the document inserted during the committing
-transaction, and tokenize the inserted text data and insert into
-FTS auxiliary table and its cache.
-@return TRUE if successful */
-static
-ulint
-fts_add_doc_by_id(
-/*==============*/
- fts_trx_table_t*ftt, /*!< in: FTS trx table */
- doc_id_t doc_id, /*!< in: doc id */
- ib_vector_t* fts_indexes MY_ATTRIBUTE((unused)))
- /*!< in: affected fts indexes */
-{
- mtr_t mtr;
- mem_heap_t* heap;
- btr_pcur_t pcur;
- dict_table_t* table;
- dtuple_t* tuple;
- dfield_t* dfield;
- fts_get_doc_t* get_doc;
- doc_id_t temp_doc_id;
- dict_index_t* clust_index;
- dict_index_t* fts_id_index;
- ibool is_id_cluster;
- fts_cache_t* cache = ftt->table->fts->cache;
-
- ut_ad(cache->get_docs);
-
- /* If Doc ID has been supplied by the user, then the table
- might not yet be sync-ed */
-
- if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
- fts_init_index(ftt->table, FALSE);
- }
-
- /* Get the first FTS index's get_doc */
- get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_get(cache->get_docs, 0));
- ut_ad(get_doc);
-
- table = get_doc->index_cache->index->table;
-
- heap = mem_heap_create(512);
-
- clust_index = dict_table_get_first_index(table);
- fts_id_index = dict_table_get_index_on_name(
- table, FTS_DOC_ID_INDEX_NAME);
-
- /* Check whether the index on FTS_DOC_ID is cluster index */
- is_id_cluster = (clust_index == fts_id_index);
-
- mtr_start(&mtr);
- btr_pcur_init(&pcur);
-
- /* Search based on Doc ID. Here, we'll need to consider the case
- when there is no primary index on Doc ID */
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
- dfield->type.mtype = DATA_INT;
- dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
-
- mach_write_to_8((byte*) &temp_doc_id, doc_id);
- dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
-
- btr_pcur_open_with_no_init(
- fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
- &pcur, 0, &mtr);
-
- /* If we have a match, add the data to doc structure */
- if (btr_pcur_get_low_match(&pcur) == 1) {
- const rec_t* rec;
- btr_pcur_t* doc_pcur;
- const rec_t* clust_rec;
- btr_pcur_t clust_pcur;
- ulint* offsets = NULL;
- ulint num_idx = ib_vector_size(cache->get_docs);
-
- rec = btr_pcur_get_rec(&pcur);
-
- /* Doc could be deleted */
- if (page_rec_is_infimum(rec)
- || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
-
- goto func_exit;
- }
-
- if (is_id_cluster) {
- clust_rec = rec;
- doc_pcur = &pcur;
- } else {
- dtuple_t* clust_ref;
- ulint n_fields;
-
- btr_pcur_init(&clust_pcur);
- n_fields = dict_index_get_n_unique(clust_index);
-
- clust_ref = dtuple_create(heap, n_fields);
- dict_index_copy_types(clust_ref, clust_index, n_fields);
-
- row_build_row_ref_in_tuple(
- clust_ref, rec, fts_id_index, NULL, NULL);
-
- btr_pcur_open_with_no_init(
- clust_index, clust_ref, PAGE_CUR_LE,
- BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
-
- doc_pcur = &clust_pcur;
- clust_rec = btr_pcur_get_rec(&clust_pcur);
-
- }
-
- offsets = rec_get_offsets(clust_rec, clust_index,
- NULL, ULINT_UNDEFINED, &heap);
-
- for (ulint i = 0; i < num_idx; ++i) {
- fts_doc_t doc;
- dict_table_t* table;
- fts_get_doc_t* get_doc;
-
- get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_get(cache->get_docs, i));
-
- table = get_doc->index_cache->index->table;
-
- fts_doc_init(&doc);
-
- fts_fetch_doc_from_rec(
- get_doc, clust_index, doc_pcur, offsets, &doc);
-
- if (doc.found) {
- ibool success MY_ATTRIBUTE((unused));
-
- btr_pcur_store_position(doc_pcur, &mtr);
- mtr_commit(&mtr);
-
- rw_lock_x_lock(&table->fts->cache->lock);
-
- if (table->fts->cache->stopword_info.status
- & STOPWORD_NOT_INIT) {
- fts_load_stopword(table, NULL, NULL,
- NULL, TRUE, TRUE);
- }
-
- fts_cache_add_doc(
- table->fts->cache,
- get_doc->index_cache,
- doc_id, doc.tokens);
-
- bool need_sync = false;
- if ((cache->total_size > fts_max_cache_size / 10
- || fts_need_sync)
- && !cache->sync->in_progress) {
- need_sync = true;
- }
-
- rw_lock_x_unlock(&table->fts->cache->lock);
-
- DBUG_EXECUTE_IF(
- "fts_instrument_sync",
- fts_optimize_request_sync_table(table);
- os_event_wait(cache->sync->event);
- );
-
- DBUG_EXECUTE_IF(
- "fts_instrument_sync_debug",
- fts_sync(cache->sync, true, true, false);
- );
-
- DEBUG_SYNC_C("fts_instrument_sync_request");
- DBUG_EXECUTE_IF(
- "fts_instrument_sync_request",
- fts_optimize_request_sync_table(table);
- );
-
- if (need_sync) {
- fts_optimize_request_sync_table(table);
- }
-
- mtr_start(&mtr);
-
- if (i < num_idx - 1) {
-
- success = btr_pcur_restore_position(
- BTR_SEARCH_LEAF, doc_pcur,
- &mtr);
-
- ut_ad(success);
- }
- }
-
- fts_doc_free(&doc);
- }
-
- if (!is_id_cluster) {
- btr_pcur_close(doc_pcur);
- }
- }
-func_exit:
- mtr_commit(&mtr);
-
- btr_pcur_close(&pcur);
-
- mem_heap_free(heap);
- return(TRUE);
-}
-
-
-/*********************************************************************//**
-Callback function to read a single ulint column.
-return always returns TRUE */
-static
-ibool
-fts_read_ulint(
-/*===========*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: pointer to ulint */
-{
- sel_node_t* sel_node = static_cast<sel_node_t*>(row);
- ulint* value = static_cast<ulint*>(user_arg);
- que_node_t* exp = sel_node->select_list;
- dfield_t* dfield = que_node_get_val(exp);
- void* data = dfield_get_data(dfield);
-
- *value = static_cast<ulint>(mach_read_from_4(
- static_cast<const byte*>(data)));
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
-@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
-UNIV_INTERN
-doc_id_t
-fts_get_max_doc_id(
-/*===============*/
- dict_table_t* table) /*!< in: user table */
-{
- dict_index_t* index;
- dict_field_t* dfield MY_ATTRIBUTE((unused)) = NULL;
- doc_id_t doc_id = 0;
- mtr_t mtr;
- btr_pcur_t pcur;
-
- index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
-
- if (!index) {
- return(0);
- }
-
- dfield = dict_index_get_nth_field(index, 0);
-
-#if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
- ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
-#endif
-
- mtr_start(&mtr);
-
- /* fetch the largest indexes value */
- btr_pcur_open_at_index_side(
- false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
-
- if (!page_is_empty(btr_pcur_get_page(&pcur))) {
- const rec_t* rec = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- mem_heap_t* heap = NULL;
- ulint len;
- const void* data;
-
- rec_offs_init(offsets_);
-
- do {
- rec = btr_pcur_get_rec(&pcur);
-
- if (page_rec_is_user_rec(rec)) {
- break;
- }
- } while (btr_pcur_move_to_prev(&pcur, &mtr));
-
- if (!rec) {
- goto func_exit;
- }
-
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- data = rec_get_nth_field(rec, offsets, 0, &len);
-
- doc_id = static_cast<doc_id_t>(fts_read_doc_id(
- static_cast<const byte*>(data)));
- }
-
-func_exit:
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- return(doc_id);
-}
-
-/*********************************************************************//**
-Fetch document with the given document id.
-@return DB_SUCCESS if OK else error */
-UNIV_INTERN
-dberr_t
-fts_doc_fetch_by_doc_id(
-/*====================*/
- fts_get_doc_t* get_doc, /*!< in: state */
- doc_id_t doc_id, /*!< in: id of document to
- fetch */
- dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
- or NULL */
- ulint option, /*!< in: search option, if it is
- greater than doc_id or equal */
- fts_sql_callback
- callback, /*!< in: callback to read */
- void* arg) /*!< in: callback arg */
-{
- pars_info_t* info;
- dberr_t error;
- const char* select_str;
- doc_id_t write_doc_id;
- dict_index_t* index;
- trx_t* trx = trx_allocate_for_background();
- que_t* graph;
-
- trx->op_info = "fetching indexed FTS document";
-
- /* The FTS index can be supplied by caller directly with
- "index_to_use", otherwise, get it from "get_doc" */
- index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
-
- if (get_doc && get_doc->get_document_graph) {
- info = get_doc->get_document_graph->info;
- } else {
- info = pars_info_create();
- }
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &write_doc_id, doc_id);
- fts_bind_doc_id(info, "doc_id", &write_doc_id);
- pars_info_bind_function(info, "my_func", callback, arg);
-
- select_str = fts_get_select_columns_str(index, info, info->heap);
- pars_info_bind_id(info, TRUE, "table_name", index->table_name);
-
- if (!get_doc || !get_doc->get_document_graph) {
- if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
- graph = fts_parse_sql(
- NULL,
- info,
- mem_heap_printf(info->heap,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT %s FROM $table_name"
- " WHERE %s = :doc_id;\n"
- "BEGIN\n"
- ""
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c %% NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;",
- select_str, FTS_DOC_ID_COL_NAME));
- } else {
- ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
-
- /* This is used for crash recovery of table with
- hidden DOC ID or FTS indexes. We will scan the table
- to re-processing user table rows whose DOC ID or
- FTS indexed documents have not been sync-ed to disc
- during recent crash.
- In the case that all fulltext indexes are dropped
- for a table, we will keep the "hidden" FTS_DOC_ID
- column, and this scan is to retreive the largest
- DOC ID being used in the table to determine the
- appropriate next DOC ID.
- In the case of there exists fulltext index(es), this
- operation will re-tokenize any docs that have not
- been sync-ed to the disk, and re-prime the FTS
- cached */
- graph = fts_parse_sql(
- NULL,
- info,
- mem_heap_printf(info->heap,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT %s, %s FROM $table_name"
- " WHERE %s > :doc_id;\n"
- "BEGIN\n"
- ""
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c %% NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;",
- FTS_DOC_ID_COL_NAME,
- select_str, FTS_DOC_ID_COL_NAME));
- }
- if (get_doc) {
- get_doc->get_document_graph = graph;
- }
- } else {
- graph = get_doc->get_document_graph;
- }
-
- error = fts_eval_sql(trx, graph);
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
- } else {
- fts_sql_rollback(trx);
- }
-
- trx_free_for_background(trx);
-
- if (!get_doc) {
- fts_que_graph_free(graph);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Write out a single word's data as new entry/entries in the INDEX table.
-@return DB_SUCCESS if all OK. */
-UNIV_INTERN
-dberr_t
-fts_write_node(
-/*===========*/
- trx_t* trx, /*!< in: transaction */
- que_t** graph, /*!< in: query graph */
- fts_table_t* fts_table, /*!< in: aux table */
- fts_string_t* word, /*!< in: word in UTF-8 */
- fts_node_t* node) /*!< in: node columns */
-{
- pars_info_t* info;
- dberr_t error;
- ib_uint32_t doc_count;
- ib_time_t start_time;
- doc_id_t last_doc_id;
- doc_id_t first_doc_id;
-
- if (*graph) {
- info = (*graph)->info;
- } else {
- info = pars_info_create();
- }
-
- pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
- fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
- fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
-
- ut_a(node->last_doc_id >= node->first_doc_id);
-
- /* Convert to "storage" byte order. */
- mach_write_to_4((byte*) &doc_count, node->doc_count);
- pars_info_bind_int4_literal(
- info, "doc_count", (const ib_uint32_t*) &doc_count);
-
- /* Set copy_name to FALSE since it's a static. */
- pars_info_bind_literal(
- info, "ilist", node->ilist, node->ilist_size,
- DATA_BLOB, DATA_BINARY_TYPE);
-
- if (!*graph) {
- *graph = fts_parse_sql(
- fts_table,
- info,
- "BEGIN\n"
- "INSERT INTO \"%s\" VALUES "
- "(:token, :first_doc_id,"
- " :last_doc_id, :doc_count, :ilist);");
- }
-
- start_time = ut_time();
- error = fts_eval_sql(trx, *graph);
- elapsed_time += ut_time() - start_time;
- ++n_nodes;
-
- return(error);
-}
-
-/*********************************************************************//**
-Add rows to the DELETED_CACHE table.
-@return DB_SUCCESS if all went well else error code*/
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_sync_add_deleted_cache(
-/*=======================*/
- fts_sync_t* sync, /*!< in: sync state */
- ib_vector_t* doc_ids) /*!< in: doc ids to add */
-{
- ulint i;
- pars_info_t* info;
- que_t* graph;
- fts_table_t fts_table;
- doc_id_t dummy = 0;
- dberr_t error = DB_SUCCESS;
- ulint n_elems = ib_vector_size(doc_ids);
-
- ut_a(ib_vector_size(doc_ids) > 0);
-
- ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
-
- info = pars_info_create();
-
- fts_bind_doc_id(info, "doc_id", &dummy);
-
- FTS_INIT_FTS_TABLE(
- &fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
-
- graph = fts_parse_sql(
- &fts_table,
- info,
- "BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
-
- for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
- fts_update_t* update;
- doc_id_t write_doc_id;
-
- update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
- fts_bind_doc_id(info, "doc_id", &write_doc_id);
-
- error = fts_eval_sql(sync->trx, graph);
- }
-
- fts_que_graph_free(graph);
-
- return(error);
-}
-
-/** Write the words and ilist to disk.
-@param[in,out] trx transaction
-@param[in] index_cache index cache
-@param[in] unlock_cache whether unlock cache when write node
-@return DB_SUCCESS if all went well else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_sync_write_words(
- trx_t* trx,
- fts_index_cache_t* index_cache,
- bool unlock_cache)
-{
- fts_table_t fts_table;
- ulint n_nodes = 0;
- ulint n_words = 0;
- const ib_rbt_node_t* rbt_node;
- dberr_t error = DB_SUCCESS;
- ibool print_error = FALSE;
- dict_table_t* table = index_cache->index->table;
-#ifdef FTS_DOC_STATS_DEBUG
- ulint n_new_words = 0;
-#endif /* FTS_DOC_STATS_DEBUG */
-
- FTS_INIT_INDEX_TABLE(
- &fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
-
- n_words = rbt_size(index_cache->words);
-
- /* We iterate over the entire tree, even if there is an error,
- since we want to free the memory used during caching. */
- for (rbt_node = rbt_first(index_cache->words);
- rbt_node;
- rbt_node = rbt_next(index_cache->words, rbt_node)) {
-
- ulint i;
- ulint selected;
- fts_tokenizer_word_t* word;
-
- word = rbt_value(fts_tokenizer_word_t, rbt_node);
-
- selected = fts_select_index(
- index_cache->charset, word->text.f_str,
- word->text.f_len);
-
- fts_table.suffix = fts_get_suffix(selected);
-
-#ifdef FTS_DOC_STATS_DEBUG
- /* Check if the word exists in the FTS index and if not
- then we need to increment the total word count stats. */
- if (error == DB_SUCCESS && fts_enable_diag_print) {
- ibool found = FALSE;
-
- error = fts_is_word_in_index(
- trx,
- &index_cache->sel_graph[selected],
- &fts_table,
- &word->text, &found);
-
- if (error == DB_SUCCESS && !found) {
-
- ++n_new_words;
- }
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
- /* We iterate over all the nodes even if there was an error */
- for (i = 0; i < ib_vector_size(word->nodes); ++i) {
-
- fts_node_t* fts_node = static_cast<fts_node_t*>(
- ib_vector_get(word->nodes, i));
-
- if (fts_node->synced) {
- continue;
- } else {
- fts_node->synced = true;
- }
-
- /*FIXME: we need to handle the error properly. */
- if (error == DB_SUCCESS) {
- if (unlock_cache) {
- rw_lock_x_unlock(
- &table->fts->cache->lock);
- }
-
- error = fts_write_node(
- trx,
- &index_cache->ins_graph[selected],
- &fts_table, &word->text, fts_node);
-
- DEBUG_SYNC_C("fts_write_node");
- DBUG_EXECUTE_IF("fts_write_node_crash",
- DBUG_SUICIDE(););
-
- DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
- os_thread_sleep(1000000);
- );
-
- if (unlock_cache) {
- rw_lock_x_lock(
- &table->fts->cache->lock);
- }
- }
- }
-
- n_nodes += ib_vector_size(word->nodes);
-
- if (error != DB_SUCCESS && !print_error) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error (%s) writing "
- "word node to FTS auxiliary index "
- "table.\n", ut_strerr(error));
-
- print_error = TRUE;
- }
- }
-
-#ifdef FTS_DOC_STATS_DEBUG
- if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
-
- /* Increment the total number of words in the FTS index */
- error = fts_config_increment_index_value(
- trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
- n_new_words);
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
- if (fts_enable_diag_print) {
- printf("Avg number of nodes: %lf\n",
- (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
- }
-
- return(error);
-}
-
-#ifdef FTS_DOC_STATS_DEBUG
-/*********************************************************************//**
-Write a single documents statistics to disk.
-@return DB_SUCCESS if all went well else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_sync_write_doc_stat(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: index */
- que_t** graph, /* out: query graph */
- const fts_doc_stats_t* doc_stat) /*!< in: doc stats to write */
-{
- pars_info_t* info;
- doc_id_t doc_id;
- dberr_t error = DB_SUCCESS;
- ib_uint32_t word_count;
-
- if (*graph) {
- info = (*graph)->info;
- } else {
- info = pars_info_create();
- }
-
- /* Convert to "storage" byte order. */
- mach_write_to_4((byte*) &word_count, doc_stat->word_count);
- pars_info_bind_int4_literal(
- info, "count", (const ib_uint32_t*) &word_count);
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
- fts_bind_doc_id(info, "doc_id", &doc_id);
-
- if (!*graph) {
- fts_table_t fts_table;
-
- FTS_INIT_INDEX_TABLE(
- &fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
-
- *graph = fts_parse_sql(
- &fts_table,
- info,
- "BEGIN INSERT INTO \"%s\" VALUES (:doc_id, :count);");
- }
-
- for (;;) {
- error = fts_eval_sql(trx, *graph);
-
- if (error == DB_SUCCESS) {
-
- break; /* Exit the loop. */
- } else {
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout writing to FTS doc_id. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while writing to FTS doc_id.\n",
- ut_strerr(error));
-
- break; /* Exit the loop. */
- }
- }
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Write document statistics to disk.
-@return DB_SUCCESS if all OK */
-static
-ulint
-fts_sync_write_doc_stats(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- const fts_index_cache_t*index_cache) /*!< in: index cache */
-{
- dberr_t error = DB_SUCCESS;
- que_t* graph = NULL;
- fts_doc_stats_t* doc_stat;
-
- if (ib_vector_is_empty(index_cache->doc_stats)) {
- return(DB_SUCCESS);
- }
-
- doc_stat = static_cast<ts_doc_stats_t*>(
- ib_vector_pop(index_cache->doc_stats));
-
- while (doc_stat) {
- error = fts_sync_write_doc_stat(
- trx, index_cache->index, &graph, doc_stat);
-
- if (error != DB_SUCCESS) {
- break;
- }
-
- if (ib_vector_is_empty(index_cache->doc_stats)) {
- break;
- }
-
- doc_stat = static_cast<ts_doc_stats_t*>(
- ib_vector_pop(index_cache->doc_stats));
- }
-
- if (graph != NULL) {
- fts_que_graph_free_check_lock(NULL, index_cache, graph);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Callback to check the existince of a word.
-@return always return NULL */
-static
-ibool
-fts_lookup_word(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: fts_doc_t* */
-{
-
- que_node_t* exp;
- sel_node_t* node = static_cast<sel_node_t*>(row);
- ibool* found = static_cast<ibool*>(user_arg);
-
- exp = node->select_list;
-
- while (exp) {
- dfield_t* dfield = que_node_get_val(exp);
- ulint len = dfield_get_len(dfield);
-
- if (len != UNIV_SQL_NULL && len != 0) {
- *found = TRUE;
- }
-
- exp = que_node_get_next(exp);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Check whether a particular word (term) exists in the FTS index.
-@return DB_SUCCESS if all went well else error code */
-static
-dberr_t
-fts_is_word_in_index(
-/*=================*/
- trx_t* trx, /*!< in: FTS query state */
- que_t** graph, /* out: Query graph */
- fts_table_t* fts_table, /*!< in: table instance */
- const fts_string_t*
- word, /*!< in: the word to check */
- ibool* found) /* out: TRUE if exists */
-{
- pars_info_t* info;
- dberr_t error;
-
- trx->op_info = "looking up word in FTS index";
-
- if (*graph) {
- info = (*graph)->info;
- } else {
- info = pars_info_create();
- }
-
- pars_info_bind_function(info, "my_func", fts_lookup_word, found);
- pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
-
- if (*graph == NULL) {
- *graph = fts_parse_sql(
- fts_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT doc_count\n"
- " FROM \"%s\"\n"
- " WHERE word = :word "
- " ORDER BY first_doc_id;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
- }
-
- for (;;) {
- error = fts_eval_sql(trx, *graph);
-
- if (error == DB_SUCCESS) {
-
- break; /* Exit the loop. */
- } else {
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS index. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while reading FTS index.\n",
- ut_strerr(error));
-
- break; /* Exit the loop. */
- }
- }
- }
-
- return(error);
-}
-#endif /* FTS_DOC_STATS_DEBUG */
-
-/*********************************************************************//**
-Begin Sync, create transaction, acquire locks, etc. */
-static
-void
-fts_sync_begin(
-/*===========*/
- fts_sync_t* sync) /*!< in: sync state */
-{
- fts_cache_t* cache = sync->table->fts->cache;
-
- n_nodes = 0;
- elapsed_time = 0;
-
- sync->start_time = ut_time();
-
- sync->trx = trx_allocate_for_background();
-
- if (fts_enable_diag_print) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "FTS SYNC for table %s, deleted count: %ld size: "
- "%lu bytes",
- sync->table->name,
- ib_vector_size(cache->deleted_doc_ids),
- cache->total_size);
- }
-}
-
-/*********************************************************************//**
-Run SYNC on the table, i.e., write out data from the index specific
-cache to the FTS aux INDEX table and FTS aux doc id stats table.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_sync_index(
-/*===========*/
- fts_sync_t* sync, /*!< in: sync state */
- fts_index_cache_t* index_cache) /*!< in: index cache */
-{
- trx_t* trx = sync->trx;
- dberr_t error = DB_SUCCESS;
-
- trx->op_info = "doing SYNC index";
-
- if (fts_enable_diag_print) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "SYNC words: %ld", rbt_size(index_cache->words));
- }
-
- ut_ad(rbt_validate(index_cache->words));
-
- error = fts_sync_write_words(sync->trx, index_cache, sync->unlock_cache);
-
-#ifdef FTS_DOC_STATS_DEBUG
- /* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
- is not used currently for ranking. We disable fts_sync_write_doc_stats()
- for now */
- /* Write the per doc statistics that will be used for ranking. */
- if (error == DB_SUCCESS) {
-
- error = fts_sync_write_doc_stats(trx, index_cache);
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
- return(error);
-}
-
-/** Check if index cache has been synced completely
-@param[in,out] index_cache index cache
-@return true if index is synced, otherwise false. */
-static
-bool
-fts_sync_index_check(
- fts_index_cache_t* index_cache)
-{
- const ib_rbt_node_t* rbt_node;
-
- for (rbt_node = rbt_first(index_cache->words);
- rbt_node != NULL;
- rbt_node = rbt_next(index_cache->words, rbt_node)) {
-
- fts_tokenizer_word_t* word;
- word = rbt_value(fts_tokenizer_word_t, rbt_node);
-
- fts_node_t* fts_node;
- fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
-
- if (!fts_node->synced) {
- return(false);
- }
- }
-
- return(true);
-}
-
-/** Reset synced flag in index cache when rollback
-@param[in,out] index_cache index cache */
-static
-void
-fts_sync_index_reset(
- fts_index_cache_t* index_cache)
-{
- const ib_rbt_node_t* rbt_node;
-
- for (rbt_node = rbt_first(index_cache->words);
- rbt_node != NULL;
- rbt_node = rbt_next(index_cache->words, rbt_node)) {
-
- fts_tokenizer_word_t* word;
- word = rbt_value(fts_tokenizer_word_t, rbt_node);
-
- fts_node_t* fts_node;
- fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
-
- fts_node->synced = false;
- }
-}
-
-/** Commit the SYNC, change state of processed doc ids etc.
-@param[in,out] sync sync state
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_sync_commit(
- fts_sync_t* sync)
-{
- dberr_t error;
- trx_t* trx = sync->trx;
- fts_cache_t* cache = sync->table->fts->cache;
- doc_id_t last_doc_id;
-
- trx->op_info = "doing SYNC commit";
-
- /* After each Sync, update the CONFIG table about the max doc id
- we just sync-ed to index table */
- error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
- &last_doc_id);
-
- /* Get the list of deleted documents that are either in the
- cache or were headed there but were deleted before the add
- thread got to them. */
-
- if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
-
- error = fts_sync_add_deleted_cache(
- sync, cache->deleted_doc_ids);
- }
-
- /* We need to do this within the deleted lock since fts_delete() can
- attempt to add a deleted doc id to the cache deleted id array. */
- fts_cache_clear(cache);
- DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
- fts_cache_init(cache);
- rw_lock_x_unlock(&cache->lock);
-
- if (error == DB_SUCCESS) {
-
- fts_sql_commit(trx);
-
- } else if (error != DB_SUCCESS) {
-
- fts_sql_rollback(trx);
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) during SYNC.\n",
- ut_strerr(error));
- }
-
- if (fts_enable_diag_print && elapsed_time) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "SYNC for table %s: SYNC time : %lu secs: "
- "elapsed %lf ins/sec",
- sync->table->name,
- (ulong) (ut_time() - sync->start_time),
- (double) n_nodes/ (double) elapsed_time);
- }
-
- /* Avoid assertion in trx_free(). */
- trx->dict_operation_lock_mode = 0;
- trx_free_for_background(trx);
-
- return(error);
-}
-
-/** Rollback a sync operation
-@param[in,out] sync sync state */
-static
-void
-fts_sync_rollback(
- fts_sync_t* sync)
-{
- trx_t* trx = sync->trx;
- fts_cache_t* cache = sync->table->fts->cache;
-
- for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
- ulint j;
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(cache->indexes, i));
-
- /* Reset synced flag so nodes will not be skipped
- in the next sync, see fts_sync_write_words(). */
- fts_sync_index_reset(index_cache);
-
- for (j = 0; fts_index_selector[j].value; ++j) {
-
- if (index_cache->ins_graph[j] != NULL) {
-
- fts_que_graph_free_check_lock(
- NULL, index_cache,
- index_cache->ins_graph[j]);
-
- index_cache->ins_graph[j] = NULL;
- }
-
- if (index_cache->sel_graph[j] != NULL) {
-
- fts_que_graph_free_check_lock(
- NULL, index_cache,
- index_cache->sel_graph[j]);
-
- index_cache->sel_graph[j] = NULL;
- }
- }
- }
-
- rw_lock_x_unlock(&cache->lock);
-
- fts_sql_rollback(trx);
-
- /* Avoid assertion in trx_free(). */
- trx->dict_operation_lock_mode = 0;
- trx_free_for_background(trx);
-}
-
-/** Run SYNC on the table, i.e., write out data from the cache to the
-FTS auxiliary INDEX table and clear the cache at the end.
-@param[in,out] sync sync state
-@param[in] unlock_cache whether unlock cache lock when write node
-@param[in] wait whether wait when a sync is in progress
-@param[in] has_dict whether has dict operation lock
-@return DB_SUCCESS if all OK */
-static
-dberr_t
-fts_sync(
- fts_sync_t* sync,
- bool unlock_cache,
- bool wait,
- bool has_dict)
-{
- ulint i;
- dberr_t error = DB_SUCCESS;
- fts_cache_t* cache = sync->table->fts->cache;
-
- rw_lock_x_lock(&cache->lock);
-
- /* Check if cache is being synced.
- Note: we release cache lock in fts_sync_write_words() to
- avoid long wait for the lock by other threads. */
- while (sync->in_progress) {
- rw_lock_x_unlock(&cache->lock);
-
- if (wait) {
- os_event_wait(sync->event);
- } else {
- return(DB_SUCCESS);
- }
-
- rw_lock_x_lock(&cache->lock);
- }
-
- sync->unlock_cache = unlock_cache;
- sync->in_progress = true;
-
- DEBUG_SYNC_C("fts_sync_begin");
- fts_sync_begin(sync);
-
- /* When sync in background, we hold dict operation lock
- to prevent DDL like DROP INDEX, etc. */
- if (has_dict) {
- sync->trx->dict_operation_lock_mode = RW_S_LATCH;
- }
-
-begin_sync:
- if (cache->total_size > fts_max_cache_size) {
- /* Avoid the case: sync never finish when
- insert/update keeps comming. */
- ut_ad(sync->unlock_cache);
- sync->unlock_cache = false;
- }
-
- for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(cache->indexes, i));
-
- if (index_cache->index->to_be_dropped) {
- continue;
- }
-
- error = fts_sync_index(sync, index_cache);
-
- if (error != DB_SUCCESS && !sync->interrupted) {
-
- goto end_sync;
- }
- }
-
- DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
- sync->interrupted = true;
- error = DB_INTERRUPTED;
- goto end_sync;
- );
-
- /* Make sure all the caches are synced. */
- for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*>(
- ib_vector_get(cache->indexes, i));
-
- if (index_cache->index->to_be_dropped
- || fts_sync_index_check(index_cache)) {
- continue;
- }
-
- goto begin_sync;
- }
-
-end_sync:
- if (error == DB_SUCCESS && !sync->interrupted) {
- error = fts_sync_commit(sync);
- } else {
- fts_sync_rollback(sync);
- }
-
- rw_lock_x_lock(&cache->lock);
- sync->interrupted = false;
- sync->in_progress = false;
- os_event_set(sync->event);
- rw_lock_x_unlock(&cache->lock);
-
- /* We need to check whether an optimize is required, for that
- we make copies of the two variables that control the trigger. These
- variables can change behind our back and we don't want to hold the
- lock for longer than is needed. */
- mutex_enter(&cache->deleted_lock);
-
- cache->added = 0;
- cache->deleted = 0;
-
- mutex_exit(&cache->deleted_lock);
-
- return(error);
-}
-
-/** Run SYNC on the table, i.e., write out data from the cache to the
-FTS auxiliary INDEX table and clear the cache at the end.
-@param[in,out] table fts table
-@param[in] unlock_cache whether unlock cache when write node
-@param[in] wait whether wait for existing sync to finish
-@param[in] has_dict whether has dict operation lock
-@return DB_SUCCESS on success, error code on failure. */
-UNIV_INTERN
-dberr_t
-fts_sync_table(
- dict_table_t* table,
- bool unlock_cache,
- bool wait,
- bool has_dict)
-{
- dberr_t err = DB_SUCCESS;
-
- ut_ad(table->fts);
-
- if (!dict_table_is_discarded(table) && table->fts->cache) {
- err = fts_sync(table->fts->cache->sync,
- unlock_cache, wait, has_dict);
- }
-
- return(err);
-}
-
-/********************************************************************
-Process next token from document starting at the given position, i.e., add
-the token's start position to the token's list of positions.
-@return number of characters handled in this call */
-static
-ulint
-fts_process_token(
-/*==============*/
- fts_doc_t* doc, /* in/out: document to
- tokenize */
- fts_doc_t* result, /* out: if provided, save
- result here */
- ulint start_pos, /*!< in: start position in text */
- ulint add_pos) /*!< in: add this position to all
- tokens from this tokenization */
-{
- ulint ret;
- fts_string_t str;
- ulint offset = 0;
- fts_doc_t* result_doc;
-
- /* Determine where to save the result. */
- result_doc = (result) ? result : doc;
-
- /* The length of a string in characters is set here only. */
- ret = innobase_mysql_fts_get_token(
- doc->charset, doc->text.f_str + start_pos,
- doc->text.f_str + doc->text.f_len, &str, &offset);
-
- /* Ignore string whose character number is less than
- "fts_min_token_size" or more than "fts_max_token_size" */
-
- if (str.f_n_char >= fts_min_token_size
- && str.f_n_char <= fts_max_token_size) {
-
- mem_heap_t* heap;
- fts_string_t t_str;
- fts_token_t* token;
- ib_rbt_bound_t parent;
- ulint newlen;
-
- heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
-
- t_str.f_n_char = str.f_n_char;
-
- t_str.f_len = str.f_len * doc->charset->casedn_multiply + 1;
-
- t_str.f_str = static_cast<byte*>(
- mem_heap_alloc(heap, t_str.f_len));
-
- newlen = innobase_fts_casedn_str(
- doc->charset, (char*) str.f_str, str.f_len,
- (char*) t_str.f_str, t_str.f_len);
-
- t_str.f_len = newlen;
- t_str.f_str[newlen] = 0;
-
- /* Add the word to the document statistics. If the word
- hasn't been seen before we create a new entry for it. */
- if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
- fts_token_t new_token;
-
- new_token.text.f_len = newlen;
- new_token.text.f_str = t_str.f_str;
- new_token.text.f_n_char = t_str.f_n_char;
-
- new_token.positions = ib_vector_create(
- result_doc->self_heap, sizeof(ulint), 32);
-
- ut_a(new_token.text.f_n_char >= fts_min_token_size);
- ut_a(new_token.text.f_n_char <= fts_max_token_size);
-
- parent.last = rbt_add_node(
- result_doc->tokens, &parent, &new_token);
-
- ut_ad(rbt_validate(result_doc->tokens));
- }
-
-#ifdef FTS_CHARSET_DEBUG
- offset += start_pos + add_pos;
-#endif /* FTS_CHARSET_DEBUG */
-
- offset += start_pos + ret - str.f_len + add_pos;
-
- token = rbt_value(fts_token_t, parent.last);
- ib_vector_push(token->positions, &offset);
- }
-
- return(ret);
-}
-
-/******************************************************************//**
-Tokenize a document. */
-UNIV_INTERN
-void
-fts_tokenize_document(
-/*==================*/
- fts_doc_t* doc, /* in/out: document to
- tokenize */
- fts_doc_t* result) /* out: if provided, save
- the result token here */
-{
- ulint inc;
-
- ut_a(!doc->tokens);
- ut_a(doc->charset);
-
- doc->tokens = rbt_create_arg_cmp(
- sizeof(fts_token_t), innobase_fts_text_cmp, (void*) doc->charset);
-
- for (ulint i = 0; i < doc->text.f_len; i += inc) {
- inc = fts_process_token(doc, result, i, 0);
- ut_a(inc > 0);
- }
-}
-
-/******************************************************************//**
-Continue to tokenize a document. */
-UNIV_INTERN
-void
-fts_tokenize_document_next(
-/*=======================*/
- fts_doc_t* doc, /*!< in/out: document to
- tokenize */
- ulint add_pos, /*!< in: add this position to all
- tokens from this tokenization */
- fts_doc_t* result) /*!< out: if provided, save
- the result token here */
-{
- ulint inc;
-
- ut_a(doc->tokens);
-
- for (ulint i = 0; i < doc->text.f_len; i += inc) {
- inc = fts_process_token(doc, result, i, add_pos);
- ut_a(inc > 0);
- }
-}
-
-/********************************************************************
-Create the vector of fts_get_doc_t instances. */
-UNIV_INTERN
-ib_vector_t*
-fts_get_docs_create(
-/*================*/
- /* out: vector of
- fts_get_doc_t instances */
- fts_cache_t* cache) /*!< in: fts cache */
-{
- ulint i;
- ib_vector_t* get_docs;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
-#endif
- /* We need one instance of fts_get_doc_t per index. */
- get_docs = ib_vector_create(
- cache->self_heap, sizeof(fts_get_doc_t), 4);
-
- /* Create the get_doc instance, we need one of these
- per FTS index. */
- for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
-
- dict_index_t** index;
- fts_get_doc_t* get_doc;
-
- index = static_cast<dict_index_t**>(
- ib_vector_get(cache->indexes, i));
-
- get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_push(get_docs, NULL));
-
- memset(get_doc, 0x0, sizeof(*get_doc));
-
- get_doc->index_cache = fts_get_index_cache(cache, *index);
- get_doc->cache = cache;
-
- /* Must find the index cache. */
- ut_a(get_doc->index_cache != NULL);
- }
-
- return(get_docs);
-}
-
-/********************************************************************
-Release any resources held by the fts_get_doc_t instances. */
-static
-void
-fts_get_docs_clear(
-/*===============*/
- ib_vector_t* get_docs) /*!< in: Doc retrieval vector */
-{
- ulint i;
-
- /* Release the get doc graphs if any. */
- for (i = 0; i < ib_vector_size(get_docs); ++i) {
-
- fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_get(get_docs, i));
-
- if (get_doc->get_document_graph != NULL) {
-
- ut_a(get_doc->index_cache);
-
- fts_que_graph_free(get_doc->get_document_graph);
- get_doc->get_document_graph = NULL;
- }
- }
-}
-
-/*********************************************************************//**
-Get the initial Doc ID by consulting the CONFIG table
-@return initial Doc ID */
-UNIV_INTERN
-doc_id_t
-fts_init_doc_id(
-/*============*/
- const dict_table_t* table) /*!< in: table */
-{
- doc_id_t max_doc_id = 0;
-
- rw_lock_x_lock(&table->fts->cache->lock);
-
- /* Return if the table is already initialized for DOC ID */
- if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
- rw_lock_x_unlock(&table->fts->cache->lock);
- return(0);
- }
-
- DEBUG_SYNC_C("fts_initialize_doc_id");
-
- /* Then compare this value with the ID value stored in the CONFIG
- table. The larger one will be our new initial Doc ID */
- fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
-
- /* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
- creating index (and add doc id column. No need to recovery
- documents */
- if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
- fts_init_index((dict_table_t*) table, TRUE);
- }
-
- table->fts->fts_status |= ADDED_TABLE_SYNCED;
-
- table->fts->cache->first_doc_id = max_doc_id;
-
- rw_lock_x_unlock(&table->fts->cache->lock);
-
- ut_ad(max_doc_id > 0);
-
- return(max_doc_id);
-}
-
-#ifdef FTS_MULT_INDEX
-/*********************************************************************//**
-Check if the index is in the affected set.
-@return TRUE if index is updated */
-static
-ibool
-fts_is_index_updated(
-/*=================*/
- const ib_vector_t* fts_indexes, /*!< in: affected FTS indexes */
- const fts_get_doc_t* get_doc) /*!< in: info for reading
- document */
-{
- ulint i;
- dict_index_t* index = get_doc->index_cache->index;
-
- for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
- const dict_index_t* updated_fts_index;
-
- updated_fts_index = static_cast<const dict_index_t*>(
- ib_vector_getp_const(fts_indexes, i));
-
- ut_a(updated_fts_index != NULL);
-
- if (updated_fts_index == index) {
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-#endif
-
-/*********************************************************************//**
-Fetch COUNT(*) from specified table.
-@return the number of rows in the table */
-UNIV_INTERN
-ulint
-fts_get_rows_count(
-/*===============*/
- fts_table_t* fts_table) /*!< in: fts table to read */
-{
- trx_t* trx;
- pars_info_t* info;
- que_t* graph;
- dberr_t error;
- ulint count = 0;
-
- trx = trx_allocate_for_background();
-
- trx->op_info = "fetching FT table rows count";
-
- info = pars_info_create();
-
- pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
-
- graph = fts_parse_sql(
- fts_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT COUNT(*) "
- " FROM \"%s\";\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- for (;;) {
- error = fts_eval_sql(trx, graph);
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
-
- break; /* Exit the loop. */
- } else {
- fts_sql_rollback(trx);
-
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS table. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while reading FTS table.\n",
- ut_strerr(error));
-
- break; /* Exit the loop. */
- }
- }
- }
-
- fts_que_graph_free(graph);
-
- trx_free_for_background(trx);
-
- return(count);
-}
-
-#ifdef FTS_CACHE_SIZE_DEBUG
-/*********************************************************************//**
-Read the max cache size parameter from the config table. */
-static
-void
-fts_update_max_cache_size(
-/*======================*/
- fts_sync_t* sync) /*!< in: sync state */
-{
- trx_t* trx;
- fts_table_t fts_table;
-
- trx = trx_allocate_for_background();
-
- FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
-
- /* The size returned is in bytes. */
- sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
-
- fts_sql_commit(trx);
-
- trx_free_for_background(trx);
-}
-#endif /* FTS_CACHE_SIZE_DEBUG */
-
-/*********************************************************************//**
-Free the modified rows of a table. */
-UNIV_INLINE
-void
-fts_trx_table_rows_free(
-/*====================*/
- ib_rbt_t* rows) /*!< in: rbt of rows to free */
-{
- const ib_rbt_node_t* node;
-
- for (node = rbt_first(rows); node; node = rbt_first(rows)) {
- fts_trx_row_t* row;
-
- row = rbt_value(fts_trx_row_t, node);
-
- if (row->fts_indexes != NULL) {
- /* This vector shouldn't be using the
- heap allocator. */
- ut_a(row->fts_indexes->allocator->arg == NULL);
-
- ib_vector_free(row->fts_indexes);
- row->fts_indexes = NULL;
- }
-
- ut_free(rbt_remove_node(rows, node));
- }
-
- ut_a(rbt_empty(rows));
- rbt_free(rows);
-}
-
-/*********************************************************************//**
-Free an FTS savepoint instance. */
-UNIV_INLINE
-void
-fts_savepoint_free(
-/*===============*/
- fts_savepoint_t* savepoint) /*!< in: savepoint instance */
-{
- const ib_rbt_node_t* node;
- ib_rbt_t* tables = savepoint->tables;
-
- /* Nothing to free! */
- if (tables == NULL) {
- return;
- }
-
- for (node = rbt_first(tables); node; node = rbt_first(tables)) {
- fts_trx_table_t* ftt;
- fts_trx_table_t** fttp;
-
- fttp = rbt_value(fts_trx_table_t*, node);
- ftt = *fttp;
-
- /* This can be NULL if a savepoint was released. */
- if (ftt->rows != NULL) {
- fts_trx_table_rows_free(ftt->rows);
- ftt->rows = NULL;
- }
-
- /* This can be NULL if a savepoint was released. */
- if (ftt->added_doc_ids != NULL) {
- fts_doc_ids_free(ftt->added_doc_ids);
- ftt->added_doc_ids = NULL;
- }
-
- /* The default savepoint name must be NULL. */
- if (ftt->docs_added_graph) {
- fts_que_graph_free(ftt->docs_added_graph);
- }
-
- /* NOTE: We are responsible for free'ing the node */
- ut_free(rbt_remove_node(tables, node));
- }
-
- ut_a(rbt_empty(tables));
- rbt_free(tables);
- savepoint->tables = NULL;
-}
-
-/*********************************************************************//**
-Free an FTS trx. */
-UNIV_INTERN
-void
-fts_trx_free(
-/*=========*/
- fts_trx_t* fts_trx) /* in, own: FTS trx */
-{
- ulint i;
-
- for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
- fts_savepoint_t* savepoint;
-
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_get(fts_trx->savepoints, i));
-
- /* The default savepoint name must be NULL. */
- if (i == 0) {
- ut_a(savepoint->name == NULL);
- }
-
- fts_savepoint_free(savepoint);
- }
-
- for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
- fts_savepoint_t* savepoint;
-
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_get(fts_trx->last_stmt, i));
-
- /* The default savepoint name must be NULL. */
- if (i == 0) {
- ut_a(savepoint->name == NULL);
- }
-
- fts_savepoint_free(savepoint);
- }
-
- if (fts_trx->heap) {
- mem_heap_free(fts_trx->heap);
- }
-}
-
-/*********************************************************************//**
-Extract the doc id from the FTS hidden column.
-@return doc id that was extracted from rec */
-UNIV_INTERN
-doc_id_t
-fts_get_doc_id_from_row(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- dtuple_t* row) /*!< in: row whose FTS doc id we
- want to extract.*/
-{
- dfield_t* field;
- doc_id_t doc_id = 0;
-
- ut_a(table->fts->doc_col != ULINT_UNDEFINED);
-
- field = dtuple_get_nth_field(row, table->fts->doc_col);
-
- ut_a(dfield_get_len(field) == sizeof(doc_id));
- ut_a(dfield_get_type(field)->mtype == DATA_INT);
-
- doc_id = fts_read_doc_id(
- static_cast<const byte*>(dfield_get_data(field)));
-
- return(doc_id);
-}
-
-/*********************************************************************//**
-Extract the doc id from the FTS hidden column.
-@return doc id that was extracted from rec */
-UNIV_INTERN
-doc_id_t
-fts_get_doc_id_from_rec(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- const rec_t* rec, /*!< in: rec */
- mem_heap_t* heap) /*!< in: heap */
-{
- ulint len;
- const byte* data;
- ulint col_no;
- doc_id_t doc_id = 0;
- dict_index_t* clust_index;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- mem_heap_t* my_heap = heap;
-
- ut_a(table->fts->doc_col != ULINT_UNDEFINED);
-
- clust_index = dict_table_get_first_index(table);
-
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(
- rec, clust_index, offsets, ULINT_UNDEFINED, &my_heap);
-
- col_no = dict_col_get_clust_pos(
- &table->cols[table->fts->doc_col], clust_index);
- ut_ad(col_no != ULINT_UNDEFINED);
-
- data = rec_get_nth_field(rec, offsets, col_no, &len);
-
- ut_a(len == 8);
- ut_ad(8 == sizeof(doc_id));
- doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
-
- if (my_heap && !heap) {
- mem_heap_free(my_heap);
- }
-
- return(doc_id);
-}
-
-/*********************************************************************//**
-Search the index specific cache for a particular FTS index.
-@return the index specific cache else NULL */
-UNIV_INTERN
-fts_index_cache_t*
-fts_find_index_cache(
-/*=================*/
- const fts_cache_t* cache, /*!< in: cache to search */
- const dict_index_t* index) /*!< in: index to search for */
-{
- /* We cast away the const because our internal function, takes
- non-const cache arg and returns a non-const pointer. */
- return(static_cast<fts_index_cache_t*>(
- fts_get_index_cache((fts_cache_t*) cache, index)));
-}
-
-/*********************************************************************//**
-Search cache for word.
-@return the word node vector if found else NULL */
-UNIV_INTERN
-const ib_vector_t*
-fts_cache_find_word(
-/*================*/
- const fts_index_cache_t*index_cache, /*!< in: cache to search */
- const fts_string_t* text) /*!< in: word to search for */
-{
- ib_rbt_bound_t parent;
- const ib_vector_t* nodes = NULL;
-#ifdef UNIV_SYNC_DEBUG
- dict_table_t* table = index_cache->index->table;
- fts_cache_t* cache = table->fts->cache;
-
- ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX));
-#endif
-
- /* Lookup the word in the rb tree */
- if (rbt_search(index_cache->words, &parent, text) == 0) {
- const fts_tokenizer_word_t* word;
-
- word = rbt_value(fts_tokenizer_word_t, parent.last);
-
- nodes = word->nodes;
- }
-
- return(nodes);
-}
-
-/*********************************************************************//**
-Check cache for deleted doc id.
-@return TRUE if deleted */
-UNIV_INTERN
-ibool
-fts_cache_is_deleted_doc_id(
-/*========================*/
- const fts_cache_t* cache, /*!< in: cache ito search */
- doc_id_t doc_id) /*!< in: doc id to search for */
-{
- ulint i;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(mutex_own(&cache->deleted_lock));
-#endif
-
- for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
- const fts_update_t* update;
-
- update = static_cast<const fts_update_t*>(
- ib_vector_get_const(cache->deleted_doc_ids, i));
-
- if (doc_id == update->doc_id) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Append deleted doc ids to vector. */
-UNIV_INTERN
-void
-fts_cache_append_deleted_doc_ids(
-/*=============================*/
- const fts_cache_t* cache, /*!< in: cache to use */
- ib_vector_t* vector) /*!< in: append to this vector */
-{
- ulint i;
-
- mutex_enter((ib_mutex_t*) &cache->deleted_lock);
-
- if (cache->deleted_doc_ids == NULL) {
- mutex_exit((ib_mutex_t*) &cache->deleted_lock);
- return;
- }
-
-
- for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
- fts_update_t* update;
-
- update = static_cast<fts_update_t*>(
- ib_vector_get(cache->deleted_doc_ids, i));
-
- ib_vector_push(vector, &update->doc_id);
- }
-
- mutex_exit((ib_mutex_t*) &cache->deleted_lock);
-}
-
-/*********************************************************************//**
-Wait for the background thread to start. We poll to detect change
-of state, which is acceptable, since the wait should happen only
-once during startup.
-@return true if the thread started else FALSE (i.e timed out) */
-UNIV_INTERN
-ibool
-fts_wait_for_background_thread_to_start(
-/*====================================*/
- dict_table_t* table, /*!< in: table to which the thread
- is attached */
- ulint max_wait) /*!< in: time in microseconds, if
- set to 0 then it disables
- timeout checking */
-{
- ulint count = 0;
- ibool done = FALSE;
-
- ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
-
- for (;;) {
- fts_t* fts = table->fts;
-
- mutex_enter(&fts->bg_threads_mutex);
-
- if (fts->fts_status & BG_THREAD_READY) {
-
- done = TRUE;
- }
-
- mutex_exit(&fts->bg_threads_mutex);
-
- if (!done) {
- os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
-
- if (max_wait > 0) {
-
- max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
-
- /* We ignore the residual value. */
- if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
- break;
- }
- }
-
- ++count;
- } else {
- break;
- }
-
- if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error the background thread "
- "for the FTS table %s refuses to start\n",
- table->name);
-
- count = 0;
- }
- }
-
- return(done);
-}
-
-/*********************************************************************//**
-Add the FTS document id hidden column. */
-UNIV_INTERN
-void
-fts_add_doc_id_column(
-/*==================*/
- dict_table_t* table, /*!< in/out: Table with FTS index */
- mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
-{
- dict_mem_table_add_col(
- table, heap,
- FTS_DOC_ID_COL_NAME,
- DATA_INT,
- dtype_form_prtype(
- DATA_NOT_NULL | DATA_UNSIGNED
- | DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
- sizeof(doc_id_t));
- DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
-}
-
-/*********************************************************************//**
-Update the query graph with a new document id.
-@return Doc ID used */
-UNIV_INTERN
-doc_id_t
-fts_update_doc_id(
-/*==============*/
- dict_table_t* table, /*!< in: table */
- upd_field_t* ufield, /*!< out: update node */
- doc_id_t* next_doc_id) /*!< in/out: buffer for writing */
-{
- doc_id_t doc_id;
- dberr_t error = DB_SUCCESS;
-
- if (*next_doc_id) {
- doc_id = *next_doc_id;
- } else {
- /* Get the new document id that will be added. */
- error = fts_get_next_doc_id(table, &doc_id);
- }
-
- if (error == DB_SUCCESS) {
- dict_index_t* clust_index;
-
- ufield->exp = NULL;
-
- ufield->new_val.len = sizeof(doc_id);
-
- clust_index = dict_table_get_first_index(table);
-
- ufield->field_no = dict_col_get_clust_pos(
- &table->cols[table->fts->doc_col], clust_index);
-
- /* It is possible we update record that has
- not yet be sync-ed from last crash. */
-
- /* Convert to storage byte order. */
- ut_a(doc_id != FTS_NULL_DOC_ID);
- fts_write_doc_id((byte*) next_doc_id, doc_id);
-
- ufield->new_val.data = next_doc_id;
- }
-
- return(doc_id);
-}
-
-/*********************************************************************//**
-Check if the table has an FTS index. This is the non-inline version
-of dict_table_has_fts_index().
-@return TRUE if table has an FTS index */
-UNIV_INTERN
-ibool
-fts_dict_table_has_fts_index(
-/*=========================*/
- dict_table_t* table) /*!< in: table */
-{
- return(dict_table_has_fts_index(table));
-}
-
-/*********************************************************************//**
-Create an instance of fts_t.
-@return instance of fts_t */
-UNIV_INTERN
-fts_t*
-fts_create(
-/*=======*/
- dict_table_t* table) /*!< in/out: table with FTS indexes */
-{
- fts_t* fts;
- ib_alloc_t* heap_alloc;
- mem_heap_t* heap;
-
- ut_a(!table->fts);
-
- heap = mem_heap_create(512);
-
- fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
-
- memset(fts, 0x0, sizeof(*fts));
-
- fts->fts_heap = heap;
-
- fts->doc_col = ULINT_UNDEFINED;
-
- mutex_create(
- fts_bg_threads_mutex_key, &fts->bg_threads_mutex,
- SYNC_FTS_BG_THREADS);
-
- heap_alloc = ib_heap_allocator_create(heap);
- fts->indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
- dict_table_get_all_fts_indexes(table, fts->indexes);
-
- return(fts);
-}
-
-/*********************************************************************//**
-Free the FTS resources. */
-UNIV_INTERN
-void
-fts_free(
-/*=====*/
- dict_table_t* table) /*!< in/out: table with FTS indexes */
-{
- fts_t* fts = table->fts;
-
- mutex_free(&fts->bg_threads_mutex);
-
- ut_ad(!fts->add_wq);
-
- if (fts->cache) {
- fts_cache_clear(fts->cache);
- fts_cache_destroy(fts->cache);
- fts->cache = NULL;
- }
-
- mem_heap_free(fts->fts_heap);
-
- table->fts = NULL;
-}
-
-/*********************************************************************//**
-Signal FTS threads to initiate shutdown. */
-UNIV_INTERN
-void
-fts_start_shutdown(
-/*===============*/
- dict_table_t* table, /*!< in: table with FTS indexes */
- fts_t* fts) /*!< in: fts instance that needs
- to be informed about shutdown */
-{
- mutex_enter(&fts->bg_threads_mutex);
-
- fts->fts_status |= BG_THREAD_STOP;
-
- mutex_exit(&fts->bg_threads_mutex);
-
-}
-
-/*********************************************************************//**
-Wait for FTS threads to shutdown. */
-UNIV_INTERN
-void
-fts_shutdown(
-/*=========*/
- dict_table_t* table, /*!< in: table with FTS indexes */
- fts_t* fts) /*!< in: fts instance to shutdown */
-{
- mutex_enter(&fts->bg_threads_mutex);
-
- ut_a(fts->fts_status & BG_THREAD_STOP);
-
- dict_table_wait_for_bg_threads_to_exit(table, 20000);
-
- mutex_exit(&fts->bg_threads_mutex);
-}
-
-/*********************************************************************//**
-Take a FTS savepoint. */
-UNIV_INLINE
-void
-fts_savepoint_copy(
-/*===============*/
- const fts_savepoint_t* src, /*!< in: source savepoint */
- fts_savepoint_t* dst) /*!< out: destination savepoint */
-{
- const ib_rbt_node_t* node;
- const ib_rbt_t* tables;
-
- tables = src->tables;
-
- for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
-
- fts_trx_table_t* ftt_dst;
- const fts_trx_table_t** ftt_src;
-
- ftt_src = rbt_value(const fts_trx_table_t*, node);
-
- ftt_dst = fts_trx_table_clone(*ftt_src);
-
- rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
- }
-}
-
-/*********************************************************************//**
-Take a FTS savepoint. */
-UNIV_INTERN
-void
-fts_savepoint_take(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- fts_trx_t* fts_trx, /*!< in: fts transaction */
- const char* name) /*!< in: savepoint name */
-{
- mem_heap_t* heap;
- fts_savepoint_t* savepoint;
- fts_savepoint_t* last_savepoint;
-
- ut_a(name != NULL);
-
- heap = fts_trx->heap;
-
- /* The implied savepoint must exist. */
- ut_a(ib_vector_size(fts_trx->savepoints) > 0);
-
- last_savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_last(fts_trx->savepoints));
- savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
-
- if (last_savepoint->tables != NULL) {
- fts_savepoint_copy(last_savepoint, savepoint);
- }
-}
-
-/*********************************************************************//**
-Lookup a savepoint instance by name.
-@return ULINT_UNDEFINED if not found */
-UNIV_INLINE
-ulint
-fts_savepoint_lookup(
-/*==================*/
- ib_vector_t* savepoints, /*!< in: savepoints */
- const char* name) /*!< in: savepoint name */
-{
- ulint i;
-
- ut_a(ib_vector_size(savepoints) > 0);
-
- for (i = 1; i < ib_vector_size(savepoints); ++i) {
- fts_savepoint_t* savepoint;
-
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_get(savepoints, i));
-
- if (strcmp(name, savepoint->name) == 0) {
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/*********************************************************************//**
-Release the savepoint data identified by name. All savepoints created
-after the named savepoint are kept.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-void
-fts_savepoint_release(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- const char* name) /*!< in: savepoint name */
-{
- ut_a(name != NULL);
-
- ib_vector_t* savepoints = trx->fts_trx->savepoints;
-
- ut_a(ib_vector_size(savepoints) > 0);
-
- ulint i = fts_savepoint_lookup(savepoints, name);
- if (i != ULINT_UNDEFINED) {
- ut_a(i >= 1);
-
- fts_savepoint_t* savepoint;
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_get(savepoints, i));
-
- if (i == ib_vector_size(savepoints) - 1) {
- /* If the savepoint is the last, we save its
- tables to the previous savepoint. */
- fts_savepoint_t* prev_savepoint;
- prev_savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_get(savepoints, i - 1));
-
- ib_rbt_t* tables = savepoint->tables;
- savepoint->tables = prev_savepoint->tables;
- prev_savepoint->tables = tables;
- }
-
- fts_savepoint_free(savepoint);
- ib_vector_remove(savepoints, *(void**)savepoint);
-
- /* Make sure we don't delete the implied savepoint. */
- ut_a(ib_vector_size(savepoints) > 0);
- }
-}
-
-/**********************************************************************//**
-Refresh last statement savepoint. */
-UNIV_INTERN
-void
-fts_savepoint_laststmt_refresh(
-/*===========================*/
- trx_t* trx) /*!< in: transaction */
-{
-
- fts_trx_t* fts_trx;
- fts_savepoint_t* savepoint;
-
- fts_trx = trx->fts_trx;
-
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_pop(fts_trx->last_stmt));
- fts_savepoint_free(savepoint);
-
- ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
- savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
-}
-
-/********************************************************************
-Undo the Doc ID add/delete operations in last stmt */
-static
-void
-fts_undo_last_stmt(
-/*===============*/
- fts_trx_table_t* s_ftt, /*!< in: Transaction FTS table */
- fts_trx_table_t* l_ftt) /*!< in: last stmt FTS table */
-{
- ib_rbt_t* s_rows;
- ib_rbt_t* l_rows;
- const ib_rbt_node_t* node;
-
- l_rows = l_ftt->rows;
- s_rows = s_ftt->rows;
-
- for (node = rbt_first(l_rows);
- node;
- node = rbt_next(l_rows, node)) {
- fts_trx_row_t* l_row = rbt_value(fts_trx_row_t, node);
- ib_rbt_bound_t parent;
-
- rbt_search(s_rows, &parent, &(l_row->doc_id));
-
- if (parent.result == 0) {
- fts_trx_row_t* s_row = rbt_value(
- fts_trx_row_t, parent.last);
-
- switch (l_row->state) {
- case FTS_INSERT:
- ut_free(rbt_remove_node(s_rows, parent.last));
- break;
-
- case FTS_DELETE:
- if (s_row->state == FTS_NOTHING) {
- s_row->state = FTS_INSERT;
- } else if (s_row->state == FTS_DELETE) {
- ut_free(rbt_remove_node(
- s_rows, parent.last));
- }
- break;
-
- /* FIXME: Check if FTS_MODIFY need to be addressed */
- case FTS_MODIFY:
- case FTS_NOTHING:
- break;
- default:
- ut_error;
- }
- }
- }
-}
-
-/**********************************************************************//**
-Rollback to savepoint indentified by name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-void
-fts_savepoint_rollback_last_stmt(
-/*=============================*/
- trx_t* trx) /*!< in: transaction */
-{
- ib_vector_t* savepoints;
- fts_savepoint_t* savepoint;
- fts_savepoint_t* last_stmt;
- fts_trx_t* fts_trx;
- ib_rbt_bound_t parent;
- const ib_rbt_node_t* node;
- ib_rbt_t* l_tables;
- ib_rbt_t* s_tables;
-
- fts_trx = trx->fts_trx;
- savepoints = fts_trx->savepoints;
-
- savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
- last_stmt = static_cast<fts_savepoint_t*>(
- ib_vector_last(fts_trx->last_stmt));
-
- l_tables = last_stmt->tables;
- s_tables = savepoint->tables;
-
- for (node = rbt_first(l_tables);
- node;
- node = rbt_next(l_tables, node)) {
-
- fts_trx_table_t** l_ftt;
-
- l_ftt = rbt_value(fts_trx_table_t*, node);
-
- rbt_search_cmp(
- s_tables, &parent, &(*l_ftt)->table->id,
- fts_trx_table_id_cmp, NULL);
-
- if (parent.result == 0) {
- fts_trx_table_t** s_ftt;
-
- s_ftt = rbt_value(fts_trx_table_t*, parent.last);
-
- fts_undo_last_stmt(*s_ftt, *l_ftt);
- }
- }
-}
-
-/**********************************************************************//**
-Rollback to savepoint indentified by name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-void
-fts_savepoint_rollback(
-/*===================*/
- trx_t* trx, /*!< in: transaction */
- const char* name) /*!< in: savepoint name */
-{
- ulint i;
- ib_vector_t* savepoints;
-
- ut_a(name != NULL);
-
- savepoints = trx->fts_trx->savepoints;
-
- /* We pop all savepoints from the the top of the stack up to
- and including the instance that was found. */
- i = fts_savepoint_lookup(savepoints, name);
-
- if (i != ULINT_UNDEFINED) {
- fts_savepoint_t* savepoint;
-
- ut_a(i > 0);
-
- while (ib_vector_size(savepoints) > i) {
- fts_savepoint_t* savepoint;
-
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_pop(savepoints));
-
- if (savepoint->name != NULL) {
- /* Since name was allocated on the heap, the
- memory will be released when the transaction
- completes. */
- savepoint->name = NULL;
-
- fts_savepoint_free(savepoint);
- }
- }
-
- /* Pop all a elements from the top of the stack that may
- have been released. We have to be careful that we don't
- delete the implied savepoint. */
-
- for (savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_last(savepoints));
- ib_vector_size(savepoints) > 1
- && savepoint->name == NULL;
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_last(savepoints))) {
-
- ib_vector_pop(savepoints);
- }
-
- /* Make sure we don't delete the implied savepoint. */
- ut_a(ib_vector_size(savepoints) > 0);
-
- /* Restore the savepoint. */
- fts_savepoint_take(trx, trx->fts_trx, name);
- }
-}
-
-/**********************************************************************//**
-Check if a table is an FTS auxiliary table name.
-@return TRUE if the name matches an auxiliary table name pattern */
-static
-ibool
-fts_is_aux_table_name(
-/*==================*/
- fts_aux_table_t*table, /*!< out: table info */
- const char* name, /*!< in: table name */
- ulint len) /*!< in: length of table name */
-{
- const char* ptr;
- char* end;
- char my_name[MAX_FULL_NAME_LEN + 1];
-
- ut_ad(len <= MAX_FULL_NAME_LEN);
- ut_memcpy(my_name, name, len);
- my_name[len] = 0;
- end = my_name + len;
-
- ptr = static_cast<const char*>(memchr(my_name, '/', len));
-
- if (ptr != NULL) {
- /* We will start the match after the '/' */
- ++ptr;
- len = end - ptr;
- }
-
- /* All auxiliary tables are prefixed with "FTS_" and the name
- length will be at the very least greater than 20 bytes. */
- if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
- ulint i;
-
- /* Skip the prefix. */
- ptr += 4;
- len -= 4;
-
- /* Try and read the table id. */
- if (!fts_read_object_id(&table->parent_id, ptr)) {
- return(FALSE);
- }
-
- /* Skip the table id. */
- ptr = static_cast<const char*>(memchr(ptr, '_', len));
-
- if (ptr == NULL) {
- return(FALSE);
- }
-
- /* Skip the underscore. */
- ++ptr;
- ut_a(end > ptr);
- len = end - ptr;
-
- /* First search the common table suffix array. */
- for (i = 0; fts_common_tables[i] != NULL; ++i) {
-
- if (strncmp(ptr, fts_common_tables[i], len) == 0) {
- return(TRUE);
- }
- }
-
- /* Could be obsolete common tables. */
- if (strncmp(ptr, "ADDED", len) == 0
- || strncmp(ptr, "STOPWORDS", len) == 0) {
- return(true);
- }
-
- /* Try and read the index id. */
- if (!fts_read_object_id(&table->index_id, ptr)) {
- return(FALSE);
- }
-
- /* Skip the table id. */
- ptr = static_cast<const char*>(memchr(ptr, '_', len));
-
- if (ptr == NULL) {
- return(FALSE);
- }
-
- /* Skip the underscore. */
- ++ptr;
- ut_a(end > ptr);
- len = end - ptr;
-
- /* Search the FT index specific array. */
- for (i = 0; fts_index_selector[i].value; ++i) {
-
- if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
- return(TRUE);
- }
- }
-
- /* Other FT index specific table(s). */
- if (strncmp(ptr, "DOC_ID", len) == 0) {
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Callback function to read a single table ID column.
-@return Always return TRUE */
-static
-ibool
-fts_read_tables(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: pointer to ib_vector_t */
-{
- int i;
- fts_aux_table_t*table;
- mem_heap_t* heap;
- ibool done = FALSE;
- ib_vector_t* tables = static_cast<ib_vector_t*>(user_arg);
- sel_node_t* sel_node = static_cast<sel_node_t*>(row);
- que_node_t* exp = sel_node->select_list;
-
- /* Must be a heap allocated vector. */
- ut_a(tables->allocator->arg != NULL);
-
- /* We will use this heap for allocating strings. */
- heap = static_cast<mem_heap_t*>(tables->allocator->arg);
- table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
-
- memset(table, 0x0, sizeof(*table));
-
- /* Iterate over the columns and read the values. */
- for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
-
- dfield_t* dfield = que_node_get_val(exp);
- void* data = dfield_get_data(dfield);
- ulint len = dfield_get_len(dfield);
-
- ut_a(len != UNIV_SQL_NULL);
-
- /* Note: The column numbers below must match the SELECT */
- switch (i) {
- case 0: /* NAME */
-
- if (!fts_is_aux_table_name(
- table, static_cast<const char*>(data), len)) {
- ib_vector_pop(tables);
- done = TRUE;
- break;
- }
-
- table->name = static_cast<char*>(
- mem_heap_alloc(heap, len + 1));
- memcpy(table->name, data, len);
- table->name[len] = 0;
- break;
-
- case 1: /* ID */
- ut_a(len == 8);
- table->id = mach_read_from_8(
- static_cast<const byte*>(data));
- break;
-
- default:
- ut_error;
- }
- }
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Callback that sets a hex formatted FTS table's flags2 in
-SYS_TABLES. The flags is stored in MIX_LEN column.
-@return FALSE if all OK */
-static
-ibool
-fts_set_hex_format(
-/*===============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: bool set/unset flag */
-{
- sel_node_t* node = static_cast<sel_node_t*>(row);
- dfield_t* dfield = que_node_get_val(node->select_list);
-
- ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
- ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
- /* There should be at most one matching record. So the value
- must be the default value. */
- ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
- == ULINT32_UNDEFINED);
-
- ulint flags2 = mach_read_from_4(
- static_cast<byte*>(dfield_get_data(dfield)));
-
- flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
-
- mach_write_to_4(static_cast<byte*>(user_arg), flags2);
-
- return(FALSE);
-}
-
-/*****************************************************************//**
-Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
-@return DB_SUCCESS or error code. */
-UNIV_INTERN
-dberr_t
-fts_update_hex_format_flag(
-/*=======================*/
- trx_t* trx, /*!< in/out: transaction that
- covers the update */
- table_id_t table_id, /*!< in: Table for which we want
- to set the root table->flags2 */
- bool dict_locked) /*!< in: set to true if the
- caller already owns the
- dict_sys_t::mutex. */
-{
- pars_info_t* info;
- ib_uint32_t flags2;
-
- static const char sql[] =
- "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS\n"
- " SELECT MIX_LEN "
- " FROM SYS_TABLES "
- " WHERE ID = :table_id FOR UPDATE;"
- "\n"
- "BEGIN\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "UPDATE SYS_TABLES"
- " SET MIX_LEN = :flags2"
- " WHERE ID = :table_id;\n"
- "CLOSE c;\n"
- "END;\n";
-
- flags2 = ULINT32_UNDEFINED;
-
- info = pars_info_create();
-
- pars_info_add_ull_literal(info, "table_id", table_id);
- pars_info_bind_int4_literal(info, "flags2", &flags2);
-
- pars_info_bind_function(
- info, "my_func", fts_set_hex_format, &flags2);
-
- if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
- }
-
- dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
-
- ut_a(flags2 != ULINT32_UNDEFINED);
-
- return (err);
-}
-
-/*********************************************************************//**
-Rename an aux table to HEX format. It's called when "%016llu" is used
-to format an object id in table name, which only happens in Windows. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_rename_one_aux_table_to_hex_format(
-/*===================================*/
- trx_t* trx, /*!< in: transaction */
- const fts_aux_table_t* aux_table, /*!< in: table info */
- const dict_table_t* parent_table) /*!< in: parent table name */
-{
- const char* ptr;
- fts_table_t fts_table;
- char* new_name;
- dberr_t error;
-
- ptr = strchr(aux_table->name, '/');
- ut_a(ptr != NULL);
- ++ptr;
- /* Skip "FTS_", table id and underscore */
- for (ulint i = 0; i < 2; ++i) {
- ptr = strchr(ptr, '_');
- ut_a(ptr != NULL);
- ++ptr;
- }
-
- fts_table.suffix = NULL;
- if (aux_table->index_id == 0) {
- fts_table.type = FTS_COMMON_TABLE;
-
- for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
- if (strcmp(ptr, fts_common_tables[i]) == 0) {
- fts_table.suffix = fts_common_tables[i];
- break;
- }
- }
- } else {
- fts_table.type = FTS_INDEX_TABLE;
-
- /* Skip index id and underscore */
- ptr = strchr(ptr, '_');
- ut_a(ptr != NULL);
- ++ptr;
-
- for (ulint i = 0; fts_index_selector[i].value; ++i) {
- if (strcmp(ptr, fts_get_suffix(i)) == 0) {
- fts_table.suffix = fts_get_suffix(i);
- break;
- }
- }
- }
-
- ut_a(fts_table.suffix != NULL);
-
- fts_table.parent = parent_table->name;
- fts_table.table_id = aux_table->parent_id;
- fts_table.index_id = aux_table->index_id;
- fts_table.table = parent_table;
-
- new_name = fts_get_table_name(&fts_table);
- ut_ad(strcmp(new_name, aux_table->name) != 0);
-
- if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
- }
-
- error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
- FALSE);
-
- if (error != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to rename aux table \'%s\' to "
- "new format \'%s\'. ",
- aux_table->name, new_name);
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Renamed aux table \'%s\' to \'%s\'.",
- aux_table->name, new_name);
- }
-
- mem_free(new_name);
-
- return (error);
-}
-
-/**********************************************************************//**
-Rename all aux tables of a parent table to HEX format. Also set aux tables'
-flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
-It's called when "%016llu" is used to format an object id in table name,
-which only happens in Windows.
-Note the ids in tables are correct but the names are old ambiguous ones.
-
-This function should make sure that either all the parent table and aux tables
-are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_rename_aux_tables_to_hex_format_low(
-/*====================================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* parent_table, /*!< in: parent table */
- ib_vector_t* tables) /*!< in: aux tables to rename. */
-{
- dberr_t error;
- ulint count;
-
- ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
- ut_ad(!ib_vector_is_empty(tables));
-
- error = fts_update_hex_format_flag(trx, parent_table->id, true);
-
- if (error != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting parent table %s to hex format failed.",
- parent_table->name);
-
- fts_sql_rollback(trx);
- return (error);
- }
-
- DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
-
- for (count = 0; count < ib_vector_size(tables); ++count) {
- dict_table_t* table;
- fts_aux_table_t* aux_table;
-
- aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, count));
-
- table = dict_table_open_on_id(aux_table->id, TRUE,
- DICT_TABLE_OP_NORMAL);
-
- ut_ad(table != NULL);
- ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
-
- /* Set HEX_NAME flag here to make sure we can get correct
- new table name in following function */
- DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
- error = fts_rename_one_aux_table_to_hex_format(trx,
- aux_table, parent_table);
- /* We will rollback the trx if the error != DB_SUCCESS,
- so setting the flag here is the same with setting it in
- row_rename_table_for_mysql */
- DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
-
- if (error != DB_SUCCESS) {
- dict_table_close(table, TRUE, FALSE);
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to rename one aux table %s "
- "Will revert all successful rename "
- "operations.", aux_table->name);
-
- fts_sql_rollback(trx);
- break;
- }
-
- error = fts_update_hex_format_flag(trx, aux_table->id, true);
- dict_table_close(table, TRUE, FALSE);
-
- if (error != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting aux table %s to hex format failed.",
- aux_table->name);
-
- fts_sql_rollback(trx);
- break;
- }
- }
-
- if (error != DB_SUCCESS) {
- ut_ad(count != ib_vector_size(tables));
- /* If rename fails, thr trx would be rolled back, we can't
- use it any more, we'll start a new background trx to do
- the reverting. */
- ut_a(trx->state == TRX_STATE_NOT_STARTED);
- bool not_rename = false;
-
- /* Try to revert those succesful rename operations
- in order to revert the ibd file rename. */
- for (ulint i = 0; i <= count; ++i) {
- dict_table_t* table;
- fts_aux_table_t* aux_table;
- trx_t* trx_bg;
- dberr_t err;
-
- aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, i));
-
- table = dict_table_open_on_id(aux_table->id, TRUE,
- DICT_TABLE_OP_NORMAL);
- ut_ad(table != NULL);
-
- if (not_rename) {
- DICT_TF2_FLAG_UNSET(table,
- DICT_TF2_FTS_AUX_HEX_NAME);
- }
-
- if (!DICT_TF2_FLAG_IS_SET(table,
- DICT_TF2_FTS_AUX_HEX_NAME)) {
- dict_table_close(table, TRUE, FALSE);
- continue;
- }
-
- trx_bg = trx_allocate_for_background();
- trx_bg->op_info = "Revert half done rename";
- trx_bg->dict_operation_lock_mode = RW_X_LATCH;
- trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
-
- DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
- err = row_rename_table_for_mysql(table->name,
- aux_table->name,
- trx_bg, FALSE);
-
- trx_bg->dict_operation_lock_mode = 0;
- dict_table_close(table, TRUE, FALSE);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN, "Failed to revert "
- "table %s. Please revert manually.",
- table->name);
- fts_sql_rollback(trx_bg);
- trx_free_for_background(trx_bg);
- /* Continue to clear aux tables' flags2 */
- not_rename = true;
- continue;
- }
-
- fts_sql_commit(trx_bg);
- trx_free_for_background(trx_bg);
- }
-
- DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
- }
-
- return (error);
-}
-
-/**********************************************************************//**
-Convert an id, which is actually a decimal number but was regard as a HEX
-from a string, to its real value. */
-static
-ib_id_t
-fts_fake_hex_to_dec(
-/*================*/
- ib_id_t id) /*!< in: number to convert */
-{
- ib_id_t dec_id = 0;
- char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
- int ret MY_ATTRIBUTE((unused));
-
- ret = sprintf(tmp_id, UINT64PFx, id);
- ut_ad(ret == 16);
-#ifdef _WIN32
- ret = sscanf(tmp_id, "%016llu", &dec_id);
-#else
- ret = sscanf(tmp_id, "%016" PRIu64, &dec_id);
-#endif /* _WIN32 */
- ut_ad(ret == 1);
-
- return dec_id;
-}
-
-/*********************************************************************//**
-Compare two fts_aux_table_t parent_ids.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_check_aux_table_parent_id_cmp(
-/*==============================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1);
- const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2);
-
- return static_cast<int>(fa1->parent_id - fa2->parent_id);
-}
-
-/** Mark all the fts index associated with the parent table as corrupted.
-@param[in] trx transaction
-@param[in, out] parent_table fts index associated with this parent table
- will be marked as corrupted. */
-static
-void
-fts_parent_all_index_set_corrupt(
- trx_t* trx,
- dict_table_t* parent_table)
-{
- fts_t* fts = parent_table->fts;
-
- if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
- }
-
- for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
- dict_index_t* index = static_cast<dict_index_t*>(
- ib_vector_getp_const(fts->indexes, j));
- dict_set_corrupted(index,
- trx, "DROP ORPHANED TABLE");
- }
-}
-
-/** Mark the fts index which index id matches the id as corrupted.
-@param[in] trx transaction
-@param[in] id index id to search
-@param[in, out] parent_table parent table to check with all
- the index. */
-static
-void
-fts_set_index_corrupt(
- trx_t* trx,
- index_id_t id,
- dict_table_t* table)
-{
- fts_t* fts = table->fts;
-
- if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
- }
-
- for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
- dict_index_t* index = static_cast<dict_index_t*>(
- ib_vector_getp_const(fts->indexes, j));
- if (index->id == id) {
- dict_set_corrupted(index, trx,
- "DROP ORPHANED TABLE");
- break;
- }
- }
-}
-
-/** Check the index for the aux table is corrupted.
-@param[in] aux_table auxiliary table
-@retval nonzero if index is corrupted, zero for valid index */
-static
-ulint
-fts_check_corrupt_index(
- fts_aux_table_t* aux_table)
-{
- dict_table_t* table;
- dict_index_t* index;
- table = dict_table_open_on_id(
- aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
-
- if (table == NULL) {
- return(0);
- }
-
- for (index = UT_LIST_GET_FIRST(table->indexes);
- index;
- index = UT_LIST_GET_NEXT(indexes, index)) {
- if (index->id == aux_table->index_id) {
- ut_ad(index->type & DICT_FTS);
- dict_table_close(table, true, false);
- return(dict_index_is_corrupted(index));
- }
- }
-
- dict_table_close(table, true, false);
- return(0);
-}
-
-/* Get parent table name if it's a fts aux table
-@param[in] aux_table_name aux table name
-@param[in] aux_table_len aux table length
-@return parent table name, or NULL */
-char*
-fts_get_parent_table_name(
- const char* aux_table_name,
- ulint aux_table_len)
-{
- fts_aux_table_t aux_table;
- char* parent_table_name = NULL;
-
- if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
- dict_table_t* parent_table;
-
- parent_table = dict_table_open_on_id(
- aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
-
- if (parent_table != NULL) {
- parent_table_name = mem_strdupl(
- parent_table->name,
- strlen(parent_table->name));
-
- dict_table_close(parent_table, TRUE, FALSE);
- }
- }
-
- return(parent_table_name);
-}
-
-/** Check the validity of the parent table.
-@param[in] aux_table auxiliary table
-@return true if it is a valid table or false if it is not */
-static
-bool
-fts_valid_parent_table(
- const fts_aux_table_t* aux_table)
-{
- dict_table_t* parent_table;
- bool valid = false;
-
- parent_table = dict_table_open_on_id(
- aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
-
- if (parent_table != NULL && parent_table->fts != NULL) {
- if (aux_table->index_id == 0) {
- valid = true;
- } else {
- index_id_t id = aux_table->index_id;
- dict_index_t* index;
-
- /* Search for the FT index in the table's list. */
- for (index = UT_LIST_GET_FIRST(parent_table->indexes);
- index;
- index = UT_LIST_GET_NEXT(indexes, index)) {
- if (index->id == id) {
- valid = true;
- break;
- }
-
- }
- }
- }
-
- if (parent_table) {
- dict_table_close(parent_table, TRUE, FALSE);
- }
-
- return(valid);
-}
-
-/** Try to rename all aux tables of the specified parent table.
-@param[in] aux_tables aux_tables to be renamed
-@param[in] parent_table parent table of all aux
- tables stored in tables. */
-static
-void
-fts_rename_aux_tables_to_hex_format(
- ib_vector_t* aux_tables,
- dict_table_t* parent_table)
-{
- dberr_t err;
- trx_t* trx_rename = trx_allocate_for_background();
- trx_rename->op_info = "Rename aux tables to hex format";
- trx_rename->dict_operation_lock_mode = RW_X_LATCH;
- trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
-
- err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
- parent_table, aux_tables);
-
- trx_rename->dict_operation_lock_mode = 0;
-
- if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Rollback operations on all aux tables of table %s. "
- "All the fts index associated with the table are "
- "marked as corrupted. Please rebuild the "
- "index again.", parent_table->name);
- fts_sql_rollback(trx_rename);
-
- /* Corrupting the fts index related to parent table. */
- trx_t* trx_corrupt;
- trx_corrupt = trx_allocate_for_background();
- trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
- trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
- fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
- trx_corrupt->dict_operation_lock_mode = 0;
- fts_sql_commit(trx_corrupt);
- trx_free_for_background(trx_corrupt);
- } else {
- fts_sql_commit(trx_rename);
- }
-
- trx_free_for_background(trx_rename);
- ib_vector_reset(aux_tables);
-}
-
-/** Set the hex format flag for the parent table.
-@param[in, out] parent_table parent table
-@param[in] trx transaction */
-static
-void
-fts_set_parent_hex_format_flag(
- dict_table_t* parent_table,
- trx_t* trx)
-{
- if (!DICT_TF2_FLAG_IS_SET(parent_table,
- DICT_TF2_FTS_AUX_HEX_NAME)) {
- DBUG_EXECUTE_IF("parent_table_flag_fail",
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Setting parent table %s to hex format "
- "failed. Please try to restart the server "
- "again, if it doesn't work, the system "
- "tables might be corrupted.",
- parent_table->name);
- return;);
-
- dberr_t err = fts_update_hex_format_flag(
- trx, parent_table->id, true);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Setting parent table %s to hex format "
- "failed. Please try to restart the server "
- "again, if it doesn't work, the system "
- "tables might be corrupted.",
- parent_table->name);
- } else {
- DICT_TF2_FLAG_SET(
- parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
- }
- }
-}
-
-/** Drop the obsolete auxilary table.
-@param[in] tables tables to be dropped. */
-static
-void
-fts_drop_obsolete_aux_table_from_vector(
- ib_vector_t* tables)
-{
- dberr_t err;
-
- for (ulint count = 0; count < ib_vector_size(tables);
- ++count) {
-
- fts_aux_table_t* aux_drop_table;
- aux_drop_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, count));
- trx_t* trx_drop = trx_allocate_for_background();
- trx_drop->op_info = "Drop obsolete aux tables";
- trx_drop->dict_operation_lock_mode = RW_X_LATCH;
- trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
-
- err = row_drop_table_for_mysql(
- aux_drop_table->name, trx_drop, false, true);
-
- trx_drop->dict_operation_lock_mode = 0;
-
- if (err != DB_SUCCESS) {
- /* We don't need to worry about the
- failure, since server would try to
- drop it on next restart, even if
- the table was broken. */
- ib_logf(IB_LOG_LEVEL_WARN,
- "Fail to drop obsolete aux table '%s', which "
- "is harmless. will try to drop it on next "
- "restart.", aux_drop_table->name);
- fts_sql_rollback(trx_drop);
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Dropped obsolete aux table '%s'.",
- aux_drop_table->name);
-
- fts_sql_commit(trx_drop);
- }
-
- trx_free_for_background(trx_drop);
- }
-}
-
-/** Drop all the auxiliary table present in the vector.
-@param[in] trx transaction
-@param[in] tables tables to be dropped */
-static
-void
-fts_drop_aux_table_from_vector(
- trx_t* trx,
- ib_vector_t* tables)
-{
- for (ulint count = 0; count < ib_vector_size(tables);
- ++count) {
- fts_aux_table_t* aux_drop_table;
- aux_drop_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, count));
-
- /* Check for the validity of the parent table */
- if (!fts_valid_parent_table(aux_drop_table)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Parent table of FTS auxiliary table %s not "
- "found.", aux_drop_table->name);
- dberr_t err = fts_drop_table(trx, aux_drop_table->name);
- if (err == DB_FAIL) {
- char* path = fil_make_ibd_name(
- aux_drop_table->name, false);
- os_file_delete_if_exists(innodb_file_data_key,
- path);
- mem_free(path);
- }
- }
- }
-}
-
-/**********************************************************************//**
-Check and drop all orphaned FTS auxiliary tables, those that don't have
-a parent table or FTS index defined on them.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull))
-void
-fts_check_and_drop_orphaned_tables(
-/*===============================*/
- trx_t* trx, /*!< in: transaction */
- ib_vector_t* tables) /*!< in: tables to check */
-{
- mem_heap_t* heap;
- ib_vector_t* aux_tables_to_rename;
- ib_vector_t* invalid_aux_tables;
- ib_vector_t* valid_aux_tables;
- ib_vector_t* drop_aux_tables;
- ib_vector_t* obsolete_aux_tables;
- ib_alloc_t* heap_alloc;
-
- heap = mem_heap_create(1024);
- heap_alloc = ib_heap_allocator_create(heap);
-
- /* We store all aux tables belonging to the same parent table here,
- and rename all these tables in a batch mode. */
- aux_tables_to_rename = ib_vector_create(heap_alloc,
- sizeof(fts_aux_table_t), 128);
-
- /* We store all fake auxiliary table and orphaned table here. */
- invalid_aux_tables = ib_vector_create(heap_alloc,
- sizeof(fts_aux_table_t), 128);
-
- /* We store all valid aux tables. We use this to filter the
- fake auxiliary table from invalid auxiliary tables. */
- valid_aux_tables = ib_vector_create(heap_alloc,
- sizeof(fts_aux_table_t), 128);
-
- /* We store all auxiliary tables to be dropped. */
- drop_aux_tables = ib_vector_create(heap_alloc,
- sizeof(fts_aux_table_t), 128);
-
- /* We store all obsolete auxiliary tables to be dropped. */
- obsolete_aux_tables = ib_vector_create(heap_alloc,
- sizeof(fts_aux_table_t), 128);
-
- /* Sort by parent_id first, in case rename will fail */
- ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
-
- for (ulint i = 0; i < ib_vector_size(tables); ++i) {
- dict_table_t* parent_table;
- fts_aux_table_t* aux_table;
- bool drop = false;
- dict_table_t* table;
- fts_aux_table_t* next_aux_table = NULL;
- ib_id_t orig_parent_id = 0;
- ib_id_t orig_index_id = 0;
- bool rename = false;
-
- aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, i));
-
- table = dict_table_open_on_id(
- aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
- orig_parent_id = aux_table->parent_id;
- orig_index_id = aux_table->index_id;
-
- if (table == NULL || strcmp(table->name, aux_table->name)) {
-
- bool fake_aux = false;
-
- if (table != NULL) {
- dict_table_close(table, TRUE, FALSE);
- }
-
- if (i + 1 < ib_vector_size(tables)) {
- next_aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, i + 1));
- }
-
- /* To know whether aux table is fake fts or
- orphan fts table. */
- for (ulint count = 0;
- count < ib_vector_size(valid_aux_tables);
- count++) {
- fts_aux_table_t* valid_aux;
- valid_aux = static_cast<fts_aux_table_t*>(
- ib_vector_get(valid_aux_tables, count));
- if (strcmp(valid_aux->name,
- aux_table->name) == 0) {
- fake_aux = true;
- break;
- }
- }
-
- /* All aux tables of parent table, whose id is
- last_parent_id, have been checked, try to rename
- them if necessary. */
- if ((next_aux_table == NULL
- || orig_parent_id != next_aux_table->parent_id)
- && (!ib_vector_is_empty(aux_tables_to_rename))) {
-
- ulint parent_id = fts_fake_hex_to_dec(
- aux_table->parent_id);
-
- parent_table = dict_table_open_on_id(
- parent_id, TRUE,
- DICT_TABLE_OP_NORMAL);
-
- fts_rename_aux_tables_to_hex_format(
- aux_tables_to_rename, parent_table);
-
- dict_table_close(parent_table, TRUE,
- FALSE);
- }
-
- /* If the aux table is fake aux table. Skip it. */
- if (!fake_aux) {
- ib_vector_push(invalid_aux_tables, aux_table);
- }
-
- continue;
- } else if (!DICT_TF2_FLAG_IS_SET(table,
- DICT_TF2_FTS_AUX_HEX_NAME)) {
-
- aux_table->parent_id = fts_fake_hex_to_dec(
- aux_table->parent_id);
-
- if (aux_table->index_id != 0) {
- aux_table->index_id = fts_fake_hex_to_dec(
- aux_table->index_id);
- }
-
- ut_ad(aux_table->id > aux_table->parent_id);
-
- /* Check whether parent table id and index id
- are stored as decimal format. */
- if (fts_valid_parent_table(aux_table)) {
-
- parent_table = dict_table_open_on_id(
- aux_table->parent_id, true,
- DICT_TABLE_OP_NORMAL);
-
- ut_ad(parent_table != NULL);
- ut_ad(parent_table->fts != NULL);
-
- if (!DICT_TF2_FLAG_IS_SET(
- parent_table,
- DICT_TF2_FTS_AUX_HEX_NAME)) {
- rename = true;
- }
-
- dict_table_close(parent_table, TRUE, FALSE);
- }
-
- if (!rename) {
- /* Reassign the original value of
- aux table if it is not in decimal format */
- aux_table->parent_id = orig_parent_id;
- aux_table->index_id = orig_index_id;
- }
- }
-
- if (table != NULL) {
- dict_table_close(table, true, false);
- }
-
- if (!rename) {
- /* Check the validity of the parent table. */
- if (!fts_valid_parent_table(aux_table)) {
- drop = true;
- }
- }
-
- /* Filter out the fake aux table by comparing with the
- current valid auxiliary table name . */
- for (ulint count = 0;
- count < ib_vector_size(invalid_aux_tables); count++) {
- fts_aux_table_t* invalid_aux;
- invalid_aux = static_cast<fts_aux_table_t*>(
- ib_vector_get(invalid_aux_tables, count));
- if (strcmp(invalid_aux->name, aux_table->name) == 0) {
- ib_vector_remove(
- invalid_aux_tables,
- *reinterpret_cast<void**>(invalid_aux));
- break;
- }
- }
-
- ib_vector_push(valid_aux_tables, aux_table);
-
- /* If the index associated with aux table is corrupted,
- skip it. */
- if (fts_check_corrupt_index(aux_table) > 0) {
-
- if (i + 1 < ib_vector_size(tables)) {
- next_aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, i + 1));
- }
-
- if (next_aux_table == NULL
- || orig_parent_id != next_aux_table->parent_id) {
-
- parent_table = dict_table_open_on_id(
- aux_table->parent_id, TRUE,
- DICT_TABLE_OP_NORMAL);
-
- if (!ib_vector_is_empty(aux_tables_to_rename)) {
- fts_rename_aux_tables_to_hex_format(
- aux_tables_to_rename, parent_table);
-
- } else {
- fts_set_parent_hex_format_flag(
- parent_table, trx);
- }
-
- dict_table_close(parent_table, TRUE, FALSE);
- }
-
- continue;
- }
-
- parent_table = dict_table_open_on_id(
- aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
-
- if (drop) {
- ib_vector_push(drop_aux_tables, aux_table);
- } else {
- if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
-
- /* Current table could be one of the three
- obsolete tables, in this case, we should
- always try to drop it but not rename it.
- This could happen when we try to upgrade
- from older server to later one, which doesn't
- contain these obsolete tables. */
- ib_vector_push(obsolete_aux_tables, aux_table);
- continue;
- }
- }
-
- /* If the aux table is in decimal format, we should
- rename it, so push it to aux_tables_to_rename */
- if (!drop && rename) {
- ib_vector_push(aux_tables_to_rename, aux_table);
- }
-
- if (i + 1 < ib_vector_size(tables)) {
- next_aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_get(tables, i + 1));
- }
-
- if ((next_aux_table == NULL
- || orig_parent_id != next_aux_table->parent_id)
- && !ib_vector_is_empty(aux_tables_to_rename)) {
- /* All aux tables of parent table, whose id is
- last_parent_id, have been checked, try to rename
- them if necessary. We had better use a new background
- trx to rename rather than the original trx, in case
- any failure would cause a complete rollback. */
- ut_ad(rename);
- ut_ad(!DICT_TF2_FLAG_IS_SET(
- parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
-
- fts_rename_aux_tables_to_hex_format(
- aux_tables_to_rename,parent_table);
- }
-
- /* The IDs are already in correct hex format. */
- if (!drop && !rename) {
- dict_table_t* table;
-
- table = dict_table_open_on_id(
- aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
- if (table != NULL
- && strcmp(table->name, aux_table->name)) {
- dict_table_close(table, TRUE, FALSE);
- table = NULL;
- }
-
- if (table != NULL
- && !DICT_TF2_FLAG_IS_SET(
- table,
- DICT_TF2_FTS_AUX_HEX_NAME)) {
-
- DBUG_EXECUTE_IF("aux_table_flag_fail",
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting aux table %s to hex "
- "format failed.", table->name);
- fts_set_index_corrupt(
- trx, aux_table->index_id,
- parent_table);
- goto table_exit;);
-
- dberr_t err = fts_update_hex_format_flag(
- trx, table->id, true);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting aux table %s to hex "
- "format failed.", table->name);
-
- fts_set_index_corrupt(
- trx, aux_table->index_id,
- parent_table);
- } else {
- DICT_TF2_FLAG_SET(table,
- DICT_TF2_FTS_AUX_HEX_NAME);
- }
- }
-#ifndef DBUG_OFF
-table_exit:
-#endif /* !DBUG_OFF */
-
- if (table != NULL) {
- dict_table_close(table, TRUE, FALSE);
- }
-
- ut_ad(parent_table != NULL);
-
- fts_set_parent_hex_format_flag(
- parent_table, trx);
- }
-
- if (parent_table != NULL) {
- dict_table_close(parent_table, TRUE, FALSE);
- }
- }
-
- fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
- fts_drop_aux_table_from_vector(trx, drop_aux_tables);
- fts_sql_commit(trx);
-
- fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
-
- /* Free the memory allocated at the beginning */
- if (heap != NULL) {
- mem_heap_free(heap);
- }
-}
-
-/**********************************************************************//**
-Drop all orphaned FTS auxiliary tables, those that don't have a parent
-table or FTS index defined on them. */
-UNIV_INTERN
-void
-fts_drop_orphaned_tables(void)
-/*==========================*/
-{
- trx_t* trx;
- pars_info_t* info;
- mem_heap_t* heap;
- que_t* graph;
- ib_vector_t* tables;
- ib_alloc_t* heap_alloc;
- space_name_list_t space_name_list;
- dberr_t error = DB_SUCCESS;
-
- /* Note: We have to free the memory after we are done with the list. */
- error = fil_get_space_names(space_name_list);
-
- if (error == DB_OUT_OF_MEMORY) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Out of memory");
- ut_error;
- }
-
- heap = mem_heap_create(1024);
- heap_alloc = ib_heap_allocator_create(heap);
-
- /* We store the table ids of all the FTS indexes that were found. */
- tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
-
- /* Get the list of all known .ibd files and check for orphaned
- FTS auxiliary files in that list. We need to remove them because
- users can't map them back to table names and this will create
- unnecessary clutter. */
-
- for (space_name_list_t::iterator it = space_name_list.begin();
- it != space_name_list.end();
- ++it) {
-
- fts_aux_table_t* fts_aux_table;
-
- fts_aux_table = static_cast<fts_aux_table_t*>(
- ib_vector_push(tables, NULL));
-
- memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
-
- if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
- ib_vector_pop(tables);
- } else {
- ulint len = strlen(*it);
-
- fts_aux_table->id = fil_get_space_id_for_table(*it);
-
- /* We got this list from fil0fil.cc. The tablespace
- with this name must exist. */
- ut_a(fts_aux_table->id != ULINT_UNDEFINED);
-
- fts_aux_table->name = static_cast<char*>(
- mem_heap_dup(heap, *it, len + 1));
-
- fts_aux_table->name[len] = 0;
- }
- }
-
- trx = trx_allocate_for_background();
- trx->op_info = "dropping orphaned FTS tables";
- row_mysql_lock_data_dictionary(trx);
-
- info = pars_info_create();
-
- pars_info_bind_function(info, "my_func", fts_read_tables, tables);
-
- graph = fts_parse_sql_no_dict_lock(
- NULL,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT NAME, ID "
- " FROM SYS_TABLES;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- for (;;) {
- error = fts_eval_sql(trx, graph);
-
- if (error == DB_SUCCESS) {
- fts_check_and_drop_orphaned_tables(trx, tables);
- break; /* Exit the loop. */
- } else {
- ib_vector_reset(tables);
-
- fts_sql_rollback(trx);
-
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "lock wait timeout reading SYS_TABLES. "
- "Retrying!");
-
- trx->error_state = DB_SUCCESS;
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "(%s) while reading SYS_TABLES.",
- ut_strerr(error));
-
- break; /* Exit the loop. */
- }
- }
- }
-
- que_graph_free(graph);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_background(trx);
-
- if (heap != NULL) {
- mem_heap_free(heap);
- }
-
- /** Free the memory allocated to store the .ibd names. */
- for (space_name_list_t::iterator it = space_name_list.begin();
- it != space_name_list.end();
- ++it) {
-
- delete[] *it;
- }
-}
-
-/**********************************************************************//**
-Check whether user supplied stopword table is of the right format.
-Caller is responsible to hold dictionary locks.
-@return the stopword column charset if qualifies */
-UNIV_INTERN
-CHARSET_INFO*
-fts_valid_stopword_table(
-/*=====================*/
- const char* stopword_table_name) /*!< in: Stopword table
- name */
-{
- dict_table_t* table;
- dict_col_t* col = NULL;
-
- if (!stopword_table_name) {
- return(NULL);
- }
-
- table = dict_table_get_low(stopword_table_name);
-
- if (!table) {
- fprintf(stderr,
- "InnoDB: user stopword table %s does not exist.\n",
- stopword_table_name);
-
- return(NULL);
- } else {
- const char* col_name;
-
- col_name = dict_table_get_col_name(table, 0);
-
- if (ut_strcmp(col_name, "value")) {
- fprintf(stderr,
- "InnoDB: invalid column name for stopword "
- "table %s. Its first column must be named as "
- "'value'.\n", stopword_table_name);
-
- return(NULL);
- }
-
- col = dict_table_get_nth_col(table, 0);
-
- if (col->mtype != DATA_VARCHAR
- && col->mtype != DATA_VARMYSQL) {
- fprintf(stderr,
- "InnoDB: invalid column type for stopword "
- "table %s. Its first column must be of "
- "varchar type\n", stopword_table_name);
-
- return(NULL);
- }
- }
-
- ut_ad(col);
-
- return(innobase_get_fts_charset(
- static_cast<int>(col->prtype & DATA_MYSQL_TYPE_MASK),
- static_cast<uint>(dtype_get_charset_coll(col->prtype))));
-}
-
-/**********************************************************************//**
-This function loads the stopword into the FTS cache. It also
-records/fetches stopword configuration to/from FTS configure
-table, depending on whether we are creating or reloading the
-FTS.
-@return TRUE if load operation is successful */
-UNIV_INTERN
-ibool
-fts_load_stopword(
-/*==============*/
- const dict_table_t*
- table, /*!< in: Table with FTS */
- trx_t* trx, /*!< in: Transactions */
- const char* global_stopword_table, /*!< in: Global stopword table
- name */
- const char* session_stopword_table, /*!< in: Session stopword table
- name */
- ibool stopword_is_on, /*!< in: Whether stopword
- option is turned on/off */
- ibool reload) /*!< in: Whether it is
- for reloading FTS table */
-{
- fts_table_t fts_table;
- fts_string_t str;
- dberr_t error = DB_SUCCESS;
- ulint use_stopword;
- fts_cache_t* cache;
- const char* stopword_to_use = NULL;
- ibool new_trx = FALSE;
- byte str_buffer[MAX_FULL_NAME_LEN + 1];
-
- FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
-
- cache = table->fts->cache;
-
- if (!reload && !(cache->stopword_info.status
- & STOPWORD_NOT_INIT)) {
- return(TRUE);
- }
-
- if (!trx) {
- trx = trx_allocate_for_background();
- trx->op_info = "upload FTS stopword";
- new_trx = TRUE;
- }
-
- /* First check whether stopword filtering is turned off */
- if (reload) {
- error = fts_config_get_ulint(
- trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
- } else {
- use_stopword = (ulint) stopword_is_on;
-
- error = fts_config_set_ulint(
- trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
- }
-
- if (error != DB_SUCCESS) {
- goto cleanup;
- }
-
- /* If stopword is turned off, no need to continue to load the
- stopword into cache, but still need to do initialization */
- if (!use_stopword) {
- cache->stopword_info.status = STOPWORD_OFF;
- goto cleanup;
- }
-
- if (reload) {
- /* Fetch the stopword table name from FTS config
- table */
- str.f_n_char = 0;
- str.f_str = str_buffer;
- str.f_len = sizeof(str_buffer) - 1;
-
- error = fts_config_get_value(
- trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
-
- if (error != DB_SUCCESS) {
- goto cleanup;
- }
-
- if (strlen((char*) str.f_str) > 0) {
- stopword_to_use = (const char*) str.f_str;
- }
- } else {
- stopword_to_use = (session_stopword_table)
- ? session_stopword_table : global_stopword_table;
- }
-
- if (stopword_to_use
- && fts_load_user_stopword(table->fts, stopword_to_use,
- &cache->stopword_info)) {
- /* Save the stopword table name to the configure
- table */
- if (!reload) {
- str.f_n_char = 0;
- str.f_str = (byte*) stopword_to_use;
- str.f_len = ut_strlen(stopword_to_use);
-
- error = fts_config_set_value(
- trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
- }
- } else {
- /* Load system default stopword list */
- fts_load_default_stopword(&cache->stopword_info);
- }
-
-cleanup:
- if (new_trx) {
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
- } else {
- fts_sql_rollback(trx);
- }
-
- trx_free_for_background(trx);
- }
-
- if (!cache->stopword_info.cached_stopword) {
- cache->stopword_info.cached_stopword = rbt_create(
- sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
- }
-
- return(error == DB_SUCCESS);
-}
-
-/**********************************************************************//**
-Callback function when we initialize the FTS at the start up
-time. It recovers the maximum Doc IDs presented in the current table.
-@return: always returns TRUE */
-static
-ibool
-fts_init_get_doc_id(
-/*================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: fts cache */
-{
- doc_id_t doc_id = FTS_NULL_DOC_ID;
- sel_node_t* node = static_cast<sel_node_t*>(row);
- que_node_t* exp = node->select_list;
- fts_cache_t* cache = static_cast<fts_cache_t*>(user_arg);
-
- ut_ad(ib_vector_is_empty(cache->get_docs));
-
- /* Copy each indexed column content into doc->text.f_str */
- if (exp) {
- dfield_t* dfield = que_node_get_val(exp);
- dtype_t* type = dfield_get_type(dfield);
- void* data = dfield_get_data(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
-
- doc_id = static_cast<doc_id_t>(mach_read_from_8(
- static_cast<const byte*>(data)));
-
- if (doc_id >= cache->next_doc_id) {
- cache->next_doc_id = doc_id + 1;
- }
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Callback function when we initialize the FTS at the start up
-time. It recovers Doc IDs that have not sync-ed to the auxiliary
-table, and require to bring them back into FTS index.
-@return: always returns TRUE */
-static
-ibool
-fts_init_recover_doc(
-/*=================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: fts cache */
-{
-
- fts_doc_t doc;
- ulint doc_len = 0;
- ulint field_no = 0;
- fts_get_doc_t* get_doc = static_cast<fts_get_doc_t*>(user_arg);
- doc_id_t doc_id = FTS_NULL_DOC_ID;
- sel_node_t* node = static_cast<sel_node_t*>(row);
- que_node_t* exp = node->select_list;
- fts_cache_t* cache = get_doc->cache;
-
- fts_doc_init(&doc);
- doc.found = TRUE;
-
- ut_ad(cache);
-
- /* Copy each indexed column content into doc->text.f_str */
- while (exp) {
- dfield_t* dfield = que_node_get_val(exp);
- ulint len = dfield_get_len(dfield);
-
- if (field_no == 0) {
- dtype_t* type = dfield_get_type(dfield);
- void* data = dfield_get_data(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
-
- doc_id = static_cast<doc_id_t>(mach_read_from_8(
- static_cast<const byte*>(data)));
-
- field_no++;
- exp = que_node_get_next(exp);
- continue;
- }
-
- if (len == UNIV_SQL_NULL) {
- exp = que_node_get_next(exp);
- continue;
- }
-
- ut_ad(get_doc);
-
- if (!get_doc->index_cache->charset) {
- ulint prtype = dfield->type.prtype;
-
- get_doc->index_cache->charset =
- innobase_get_fts_charset(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype));
- }
-
- doc.charset = get_doc->index_cache->charset;
-
- if (dfield_is_ext(dfield)) {
- dict_table_t* table = cache->sync->table;
- ulint zip_size = dict_table_zip_size(table);
-
- doc.text.f_str = btr_copy_externally_stored_field(
- &doc.text.f_len,
- static_cast<byte*>(dfield_get_data(dfield)),
- zip_size, len,
- static_cast<mem_heap_t*>(doc.self_heap->arg),
- NULL);
- } else {
- doc.text.f_str = static_cast<byte*>(
- dfield_get_data(dfield));
-
- doc.text.f_len = len;
- }
-
- if (field_no == 1) {
- fts_tokenize_document(&doc, NULL);
- } else {
- fts_tokenize_document_next(&doc, doc_len, NULL);
- }
-
- exp = que_node_get_next(exp);
-
- doc_len += (exp) ? len + 1 : len;
-
- field_no++;
- }
-
- fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
-
- fts_doc_free(&doc);
-
- cache->added++;
-
- if (doc_id >= cache->next_doc_id) {
- cache->next_doc_id = doc_id + 1;
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-This function brings FTS index in sync when FTS index is first
-used. There are documents that have not yet sync-ed to auxiliary
-tables from last server abnormally shutdown, we will need to bring
-such document into FTS cache before any further operations
-@return TRUE if all OK */
-UNIV_INTERN
-ibool
-fts_init_index(
-/*===========*/
- dict_table_t* table, /*!< in: Table with FTS */
- ibool has_cache_lock) /*!< in: Whether we already have
- cache lock */
-{
- dict_index_t* index;
- doc_id_t start_doc;
- fts_get_doc_t* get_doc = NULL;
- fts_cache_t* cache = table->fts->cache;
- bool need_init = false;
-
- ut_ad(!mutex_own(&dict_sys->mutex));
-
- /* First check cache->get_docs is initialized */
- if (!has_cache_lock) {
- rw_lock_x_lock(&cache->lock);
- }
-
- rw_lock_x_lock(&cache->init_lock);
- if (cache->get_docs == NULL) {
- cache->get_docs = fts_get_docs_create(cache);
- }
- rw_lock_x_unlock(&cache->init_lock);
-
- if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
- goto func_exit;
- }
-
- need_init = true;
-
- start_doc = cache->synced_doc_id;
-
- if (!start_doc) {
- fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
- cache->synced_doc_id = start_doc;
- }
-
- /* No FTS index, this is the case when previous FTS index
- dropped, and we re-initialize the Doc ID system for subsequent
- insertion */
- if (ib_vector_is_empty(cache->get_docs)) {
- index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
-
- ut_a(index);
-
- fts_doc_fetch_by_doc_id(NULL, start_doc, index,
- FTS_FETCH_DOC_BY_ID_LARGE,
- fts_init_get_doc_id, cache);
- } else {
- if (table->fts->cache->stopword_info.status
- & STOPWORD_NOT_INIT) {
- fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
- }
-
- for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
- get_doc = static_cast<fts_get_doc_t*>(
- ib_vector_get(cache->get_docs, i));
-
- index = get_doc->index_cache->index;
-
- fts_doc_fetch_by_doc_id(NULL, start_doc, index,
- FTS_FETCH_DOC_BY_ID_LARGE,
- fts_init_recover_doc, get_doc);
- }
- }
-
- table->fts->fts_status |= ADDED_TABLE_SYNCED;
-
- fts_get_docs_clear(cache->get_docs);
-
-func_exit:
- if (!has_cache_lock) {
- rw_lock_x_unlock(&cache->lock);
- }
-
- if (need_init) {
- mutex_enter(&dict_sys->mutex);
- /* Register the table with the optimize thread. */
- fts_optimize_add_table(table);
- mutex_exit(&dict_sys->mutex);
- }
-
- return(TRUE);
-}
diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc
deleted file mode 100644
index cb30122adcb..00000000000
--- a/storage/xtradb/fts/fts0opt.cc
+++ /dev/null
@@ -1,3246 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation. All Rights reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fts/fts0opt.cc
-Full Text Search optimize thread
-
-Created 2007/03/27 Sunny Bains
-Completed 2011/7/10 Sunny and Jimmy Yang
-
-***********************************************************************/
-
-#include "fts0fts.h"
-#include "row0sel.h"
-#include "que0types.h"
-#include "fts0priv.h"
-#include "fts0types.h"
-#include "ut0wqueue.h"
-#include "srv0start.h"
-#include "zlib.h"
-
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
-
-/** The FTS optimize thread's work queue. */
-static ib_wqueue_t* fts_optimize_wq;
-
-/** Time to wait for a message. */
-static const ulint FTS_QUEUE_WAIT_IN_USECS = 5000000;
-
-/** Default optimize interval in secs. */
-static const ulint FTS_OPTIMIZE_INTERVAL_IN_SECS = 300;
-
-/** Server is shutting down, so does we exiting the optimize thread */
-static bool fts_opt_start_shutdown = false;
-
-/** Initial size of nodes in fts_word_t. */
-static const ulint FTS_WORD_NODES_INIT_SIZE = 64;
-
-/** Last time we did check whether system need a sync */
-static ib_time_t last_check_sync_time;
-
-#if 0
-/** Check each table in round robin to see whether they'd
-need to be "optimized" */
-static ulint fts_optimize_sync_iterator = 0;
-#endif
-
-/** State of a table within the optimization sub system. */
-enum fts_state_t {
- FTS_STATE_LOADED,
- FTS_STATE_RUNNING,
- FTS_STATE_SUSPENDED,
- FTS_STATE_DONE,
- FTS_STATE_EMPTY
-};
-
-/** FTS optimize thread message types. */
-enum fts_msg_type_t {
- FTS_MSG_START, /*!< Start optimizing thread */
-
- FTS_MSG_PAUSE, /*!< Pause optimizing thread */
-
- FTS_MSG_STOP, /*!< Stop optimizing and exit thread */
-
- FTS_MSG_ADD_TABLE, /*!< Add table to the optimize thread's
- work queue */
-
- FTS_MSG_OPTIMIZE_TABLE, /*!< Optimize a table */
-
- FTS_MSG_DEL_TABLE, /*!< Remove a table from the optimize
- threads work queue */
- FTS_MSG_SYNC_TABLE /*!< Sync fts cache of a table */
-};
-
-/** Compressed list of words that have been read from FTS INDEX
-that needs to be optimized. */
-struct fts_zip_t {
- lint status; /*!< Status of (un)/zip operation */
-
- ulint n_words; /*!< Number of words compressed */
-
- ulint block_sz; /*!< Size of a block in bytes */
-
- ib_vector_t* blocks; /*!< Vector of compressed blocks */
-
- ib_alloc_t* heap_alloc; /*!< Heap to use for allocations */
-
- ulint pos; /*!< Offset into blocks */
-
- ulint last_big_block; /*!< Offset of last block in the
- blocks array that is of size
- block_sz. Blocks beyond this offset
- are of size FTS_MAX_WORD_LEN */
-
- z_streamp zp; /*!< ZLib state */
-
- /*!< The value of the last word read
- from the FTS INDEX table. This is
- used to discard duplicates */
-
- fts_string_t word; /*!< UTF-8 string */
-
- ulint max_words; /*!< maximum number of words to read
- in one pase */
-};
-
-/** Prepared statemets used during optimize */
-struct fts_optimize_graph_t {
- /*!< Delete a word from FTS INDEX */
- que_t* delete_nodes_graph;
- /*!< Insert a word into FTS INDEX */
- que_t* write_nodes_graph;
- /*!< COMMIT a transaction */
- que_t* commit_graph;
- /*!< Read the nodes from FTS_INDEX */
- que_t* read_nodes_graph;
-};
-
-/** Used by fts_optimize() to store state. */
-struct fts_optimize_t {
- trx_t* trx; /*!< The transaction used for all SQL */
-
- ib_alloc_t* self_heap; /*!< Heap to use for allocations */
-
- char* name_prefix; /*!< FTS table name prefix */
-
- fts_table_t fts_index_table;/*!< Common table definition */
-
- /*!< Common table definition */
- fts_table_t fts_common_table;
-
- dict_table_t* table; /*!< Table that has to be queried */
-
- dict_index_t* index; /*!< The FTS index to be optimized */
-
- fts_doc_ids_t* to_delete; /*!< doc ids to delete, we check against
- this vector and purge the matching
- entries during the optimizing
- process. The vector entries are
- sorted on doc id */
-
- ulint del_pos; /*!< Offset within to_delete vector,
- this is used to keep track of where
- we are up to in the vector */
-
- ibool done; /*!< TRUE when optimize finishes */
-
- ib_vector_t* words; /*!< Word + Nodes read from FTS_INDEX,
- it contains instances of fts_word_t */
-
- fts_zip_t* zip; /*!< Words read from the FTS_INDEX */
-
- fts_optimize_graph_t /*!< Prepared statements used during */
- graph; /*optimize */
-
- ulint n_completed; /*!< Number of FTS indexes that have
- been optimized */
- ibool del_list_regenerated;
- /*!< BEING_DELETED list regenarated */
-};
-
-/** Used by the optimize, to keep state during compacting nodes. */
-struct fts_encode_t {
- doc_id_t src_last_doc_id;/*!< Last doc id read from src node */
- byte* src_ilist_ptr; /*!< Current ptr within src ilist */
-};
-
-/** We use this information to determine when to start the optimize
-cycle for a table. */
-struct fts_slot_t {
- dict_table_t* table; /*!< Table to optimize */
-
- table_id_t table_id; /*!< Table id */
-
- fts_state_t state; /*!< State of this slot */
-
- ulint added; /*!< Number of doc ids added since the
- last time this table was optimized */
-
- ulint deleted; /*!< Number of doc ids deleted since the
- last time this table was optimized */
-
- ib_time_t last_run; /*!< Time last run completed */
-
- ib_time_t completed; /*!< Optimize finish time */
-
- ib_time_t interval_time; /*!< Minimum time to wait before
- optimizing the table again. */
-};
-
-/** A table remove message for the FTS optimize thread. */
-struct fts_msg_del_t {
- dict_table_t* table; /*!< The table to remove */
-
- os_event_t event; /*!< Event to synchronize acknowledgement
- of receipt and processing of the
- this message by the consumer */
-};
-
-/** Stop the optimize thread. */
-struct fts_msg_optimize_t {
- dict_table_t* table; /*!< Table to optimize */
-};
-
-/** The FTS optimize message work queue message type. */
-struct fts_msg_t {
- fts_msg_type_t type; /*!< Message type */
-
- void* ptr; /*!< The message contents */
-
- mem_heap_t* heap; /*!< The heap used to allocate this
- message, the message consumer will
- free the heap. */
-};
-
-/** The number of words to read and optimize in a single pass. */
-UNIV_INTERN ulong fts_num_word_optimize;
-
-// FIXME
-UNIV_INTERN char fts_enable_diag_print;
-
-/** ZLib compressed block size.*/
-static ulint FTS_ZIP_BLOCK_SIZE = 1024;
-
-/** The amount of time optimizing in a single pass, in milliseconds. */
-static ib_time_t fts_optimize_time_limit = 0;
-
-/** SQL Statement for changing state of rows to be deleted from FTS Index. */
-static const char* fts_init_delete_sql =
- "BEGIN\n"
- "\n"
- "INSERT INTO \"%s_BEING_DELETED\"\n"
- "SELECT doc_id FROM \"%s_DELETED\";\n"
- "\n"
- "INSERT INTO \"%s_BEING_DELETED_CACHE\"\n"
- "SELECT doc_id FROM \"%s_DELETED_CACHE\";\n";
-
-static const char* fts_delete_doc_ids_sql =
- "BEGIN\n"
- "\n"
- "DELETE FROM \"%s_DELETED\" WHERE doc_id = :doc_id1;\n"
- "DELETE FROM \"%s_DELETED_CACHE\" WHERE doc_id = :doc_id2;\n";
-
-static const char* fts_end_delete_sql =
- "BEGIN\n"
- "\n"
- "DELETE FROM \"%s_BEING_DELETED\";\n"
- "DELETE FROM \"%s_BEING_DELETED_CACHE\";\n";
-
-/**********************************************************************//**
-Initialize fts_zip_t. */
-static
-void
-fts_zip_initialize(
-/*===============*/
- fts_zip_t* zip) /*!< out: zip instance to initialize */
-{
- zip->pos = 0;
- zip->n_words = 0;
-
- zip->status = Z_OK;
-
- zip->last_big_block = 0;
-
- zip->word.f_len = 0;
- *zip->word.f_str = 0;
-
- ib_vector_reset(zip->blocks);
-
- memset(zip->zp, 0, sizeof(*zip->zp));
-}
-
-/**********************************************************************//**
-Create an instance of fts_zip_t.
-@return a new instance of fts_zip_t */
-static
-fts_zip_t*
-fts_zip_create(
-/*===========*/
- mem_heap_t* heap, /*!< in: heap */
- ulint block_sz, /*!< in: size of a zip block.*/
- ulint max_words) /*!< in: max words to read */
-{
- fts_zip_t* zip;
-
- zip = static_cast<fts_zip_t*>(mem_heap_zalloc(heap, sizeof(*zip)));
-
- zip->word.f_str = static_cast<byte*>(
- mem_heap_zalloc(heap, FTS_MAX_WORD_LEN + 1));
-
- zip->block_sz = block_sz;
-
- zip->heap_alloc = ib_heap_allocator_create(heap);
-
- zip->blocks = ib_vector_create(zip->heap_alloc, sizeof(void*), 128);
-
- zip->max_words = max_words;
-
- zip->zp = static_cast<z_stream*>(
- mem_heap_zalloc(heap, sizeof(*zip->zp)));
-
- return(zip);
-}
-
-/**********************************************************************//**
-Initialize an instance of fts_zip_t. */
-static
-void
-fts_zip_init(
-/*=========*/
-
- fts_zip_t* zip) /*!< in: zip instance to init */
-{
- memset(zip->zp, 0, sizeof(*zip->zp));
-
- zip->word.f_len = 0;
- *zip->word.f_str = '\0';
-}
-
-/**********************************************************************//**
-Create a fts_optimizer_word_t instance.
-@return new instance */
-UNIV_INTERN
-fts_word_t*
-fts_word_init(
-/*==========*/
- fts_word_t* word, /*!< in: word to initialize */
- byte* utf8, /*!< in: UTF-8 string */
- ulint len) /*!< in: length of string in bytes */
-{
- mem_heap_t* heap = mem_heap_create(sizeof(fts_node_t));
-
- memset(word, 0, sizeof(*word));
-
- word->text.f_len = len;
- word->text.f_str = static_cast<byte*>(mem_heap_alloc(heap, len + 1));
-
- /* Need to copy the NUL character too. */
- memcpy(word->text.f_str, utf8, word->text.f_len);
- word->text.f_str[word->text.f_len] = 0;
-
- word->heap_alloc = ib_heap_allocator_create(heap);
-
- word->nodes = ib_vector_create(
- word->heap_alloc, sizeof(fts_node_t), FTS_WORD_NODES_INIT_SIZE);
-
- return(word);
-}
-
-/**********************************************************************//**
-Read the FTS INDEX row.
-@return fts_node_t instance */
-static
-fts_node_t*
-fts_optimize_read_node(
-/*===================*/
- fts_word_t* word, /*!< in: */
- que_node_t* exp) /*!< in: */
-{
- int i;
- fts_node_t* node = static_cast<fts_node_t*>(
- ib_vector_push(word->nodes, NULL));
-
- /* Start from 1 since the first node has been read by the caller */
- for (i = 1; exp; exp = que_node_get_next(exp), ++i) {
-
- dfield_t* dfield = que_node_get_val(exp);
- byte* data = static_cast<byte*>(
- dfield_get_data(dfield));
- ulint len = dfield_get_len(dfield);
-
- ut_a(len != UNIV_SQL_NULL);
-
- /* Note: The column numbers below must match the SELECT */
- switch (i) {
- case 1: /* DOC_COUNT */
- node->doc_count = mach_read_from_4(data);
- break;
-
- case 2: /* FIRST_DOC_ID */
- node->first_doc_id = fts_read_doc_id(data);
- break;
-
- case 3: /* LAST_DOC_ID */
- node->last_doc_id = fts_read_doc_id(data);
- break;
-
- case 4: /* ILIST */
- node->ilist_size_alloc = node->ilist_size = len;
- node->ilist = static_cast<byte*>(ut_malloc(len));
- memcpy(node->ilist, data, len);
- break;
-
- default:
- ut_error;
- }
- }
-
- /* Make sure all columns were read. */
- ut_a(i == 5);
-
- return(node);
-}
-
-/**********************************************************************//**
-Callback function to fetch the rows in an FTS INDEX record.
-@return always returns non-NULL */
-UNIV_INTERN
-ibool
-fts_optimize_index_fetch_node(
-/*==========================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: pointer to ib_vector_t */
-{
- fts_word_t* word;
- sel_node_t* sel_node = static_cast<sel_node_t*>(row);
- fts_fetch_t* fetch = static_cast<fts_fetch_t*>(user_arg);
- ib_vector_t* words = static_cast<ib_vector_t*>(fetch->read_arg);
- que_node_t* exp = sel_node->select_list;
- dfield_t* dfield = que_node_get_val(exp);
- void* data = dfield_get_data(dfield);
- ulint dfield_len = dfield_get_len(dfield);
- fts_node_t* node;
- bool is_word_init = false;
-
- ut_a(dfield_len <= FTS_MAX_WORD_LEN);
-
- if (ib_vector_size(words) == 0) {
-
- word = static_cast<fts_word_t*>(ib_vector_push(words, NULL));
- fts_word_init(word, (byte*) data, dfield_len);
- is_word_init = true;
- }
-
- word = static_cast<fts_word_t*>(ib_vector_last(words));
-
- if (dfield_len != word->text.f_len
- || memcmp(word->text.f_str, data, dfield_len)) {
-
- word = static_cast<fts_word_t*>(ib_vector_push(words, NULL));
- fts_word_init(word, (byte*) data, dfield_len);
- is_word_init = true;
- }
-
- node = fts_optimize_read_node(word, que_node_get_next(exp));
-
- fetch->total_memory += node->ilist_size;
- if (is_word_init) {
- fetch->total_memory += sizeof(fts_word_t)
- + sizeof(ib_alloc_t) + sizeof(ib_vector_t) + dfield_len
- + sizeof(fts_node_t) * FTS_WORD_NODES_INIT_SIZE;
- } else if (ib_vector_size(words) > FTS_WORD_NODES_INIT_SIZE) {
- fetch->total_memory += sizeof(fts_node_t);
- }
-
- if (fetch->total_memory >= fts_result_cache_limit) {
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Read the rows from the FTS inde.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_index_fetch_nodes(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- que_t** graph, /*!< in: prepared statement */
- fts_table_t* fts_table, /*!< in: table of the FTS INDEX */
- const fts_string_t*
- word, /*!< in: the word to fetch */
- fts_fetch_t* fetch) /*!< in: fetch callback.*/
-{
- pars_info_t* info;
- dberr_t error;
-
- trx->op_info = "fetching FTS index nodes";
-
- if (*graph) {
- info = (*graph)->info;
- } else {
- info = pars_info_create();
- }
-
- pars_info_bind_function(info, "my_func", fetch->read_record, fetch);
- pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
-
- if (!*graph) {
- ulint selected;
-
- ut_a(fts_table->type == FTS_INDEX_TABLE);
-
- selected = fts_select_index(fts_table->charset,
- word->f_str, word->f_len);
-
- fts_table->suffix = fts_get_suffix(selected);
-
- *graph = fts_parse_sql(
- fts_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT word, doc_count, first_doc_id, last_doc_id, "
- "ilist\n"
- " FROM \"%s\"\n"
- " WHERE word LIKE :word\n"
- " ORDER BY first_doc_id;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
- }
-
- for(;;) {
- error = fts_eval_sql(trx, *graph);
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
-
- break; /* Exit the loop. */
- } else {
- fts_sql_rollback(trx);
-
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS index. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while reading FTS index.\n",
- ut_strerr(error));
-
- break; /* Exit the loop. */
- }
- }
- }
-
- return(error);
-}
-
-/**********************************************************************//**
-Read a word */
-static
-byte*
-fts_zip_read_word(
-/*==============*/
- fts_zip_t* zip, /*!< in: Zip state + data */
- fts_string_t* word) /*!< out: uncompressed word */
-{
- short len = 0;
- void* null = NULL;
- byte* ptr = word->f_str;
- int flush = Z_NO_FLUSH;
-
- /* Either there was an error or we are at the Z_STREAM_END. */
- if (zip->status != Z_OK) {
- return(NULL);
- }
-
- zip->zp->next_out = reinterpret_cast<byte*>(&len);
- zip->zp->avail_out = sizeof(len);
-
- while (zip->status == Z_OK && zip->zp->avail_out > 0) {
-
- /* Finished decompressing block. */
- if (zip->zp->avail_in == 0) {
-
- /* Free the block that's been decompressed. */
- if (zip->pos > 0) {
- ulint prev = zip->pos - 1;
-
- ut_a(zip->pos < ib_vector_size(zip->blocks));
-
- ut_free(ib_vector_getp(zip->blocks, prev));
- ib_vector_set(zip->blocks, prev, &null);
- }
-
- /* Any more blocks to decompress. */
- if (zip->pos < ib_vector_size(zip->blocks)) {
-
- zip->zp->next_in = static_cast<byte*>(
- ib_vector_getp(
- zip->blocks, zip->pos));
-
- if (zip->pos > zip->last_big_block) {
- zip->zp->avail_in =
- FTS_MAX_WORD_LEN;
- } else {
- zip->zp->avail_in = static_cast<uInt>(zip->block_sz);
- }
-
- ++zip->pos;
- } else {
- flush = Z_FINISH;
- }
- }
-
- switch (zip->status = inflate(zip->zp, flush)) {
- case Z_OK:
- if (zip->zp->avail_out == 0 && len > 0) {
-
- ut_a(len <= FTS_MAX_WORD_LEN);
- ptr[len] = 0;
-
- zip->zp->next_out = ptr;
- zip->zp->avail_out = len;
-
- word->f_len = len;
- len = 0;
- }
- break;
-
- case Z_BUF_ERROR: /* No progress possible. */
- case Z_STREAM_END:
- inflateEnd(zip->zp);
- break;
-
- case Z_STREAM_ERROR:
- default:
- ut_error;
- }
- }
-
- /* All blocks must be freed at end of inflate. */
- if (zip->status != Z_OK) {
- for (ulint i = 0; i < ib_vector_size(zip->blocks); ++i) {
- if (ib_vector_getp(zip->blocks, i)) {
- ut_free(ib_vector_getp(zip->blocks, i));
- ib_vector_set(zip->blocks, i, &null);
- }
- }
- }
-
- if (ptr != NULL) {
- ut_ad(word->f_len == strlen((char*) ptr));
- }
-
- return(zip->status == Z_OK || zip->status == Z_STREAM_END ? ptr : NULL);
-}
-
-/**********************************************************************//**
-Callback function to fetch and compress the word in an FTS
-INDEX record.
-@return FALSE on EOF */
-static
-ibool
-fts_fetch_index_words(
-/*==================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: pointer to ib_vector_t */
-{
- sel_node_t* sel_node = static_cast<sel_node_t*>(row);
- fts_zip_t* zip = static_cast<fts_zip_t*>(user_arg);
- que_node_t* exp = sel_node->select_list;
- dfield_t* dfield = que_node_get_val(exp);
- short len = static_cast<short>(dfield_get_len(dfield));
- void* data = dfield_get_data(dfield);
-
- /* Skip the duplicate words. */
- if (zip->word.f_len == static_cast<ulint>(len)
- && !memcmp(zip->word.f_str, data, len)) {
-
- return(TRUE);
- }
-
- ut_a(len <= FTS_MAX_WORD_LEN);
-
- memcpy(zip->word.f_str, data, len);
- zip->word.f_len = len;
-
- ut_a(zip->zp->avail_in == 0);
- ut_a(zip->zp->next_in == NULL);
-
- /* The string is prefixed by len. */
- zip->zp->next_in = reinterpret_cast<byte*>(&len);
- zip->zp->avail_in = sizeof(len);
-
- /* Compress the word, create output blocks as necessary. */
- while (zip->zp->avail_in > 0) {
-
- /* No space left in output buffer, create a new one. */
- if (zip->zp->avail_out == 0) {
- byte* block;
-
- block = static_cast<byte*>(ut_malloc(zip->block_sz));
- ib_vector_push(zip->blocks, &block);
-
- zip->zp->next_out = block;
- zip->zp->avail_out = static_cast<uInt>(zip->block_sz);
- }
-
- switch (zip->status = deflate(zip->zp, Z_NO_FLUSH)) {
- case Z_OK:
- if (zip->zp->avail_in == 0) {
- zip->zp->next_in = static_cast<byte*>(data);
- zip->zp->avail_in = len;
- ut_a(len <= FTS_MAX_WORD_LEN);
- len = 0;
- }
- break;
-
- case Z_STREAM_END:
- case Z_BUF_ERROR:
- case Z_STREAM_ERROR:
- default:
- ut_error;
- break;
- }
- }
-
- /* All data should have been compressed. */
- ut_a(zip->zp->avail_in == 0);
- zip->zp->next_in = NULL;
-
- ++zip->n_words;
-
- return(zip->n_words >= zip->max_words ? FALSE : TRUE);
-}
-
-/**********************************************************************//**
-Finish Zip deflate. */
-static
-void
-fts_zip_deflate_end(
-/*================*/
- fts_zip_t* zip) /*!< in: instance that should be closed*/
-{
- ut_a(zip->zp->avail_in == 0);
- ut_a(zip->zp->next_in == NULL);
-
- zip->status = deflate(zip->zp, Z_FINISH);
-
- ut_a(ib_vector_size(zip->blocks) > 0);
- zip->last_big_block = ib_vector_size(zip->blocks) - 1;
-
- /* Allocate smaller block(s), since this is trailing data. */
- while (zip->status == Z_OK) {
- byte* block;
-
- ut_a(zip->zp->avail_out == 0);
-
- block = static_cast<byte*>(ut_malloc(FTS_MAX_WORD_LEN + 1));
- ib_vector_push(zip->blocks, &block);
-
- zip->zp->next_out = block;
- zip->zp->avail_out = FTS_MAX_WORD_LEN;
-
- zip->status = deflate(zip->zp, Z_FINISH);
- }
-
- ut_a(zip->status == Z_STREAM_END);
-
- zip->status = deflateEnd(zip->zp);
- ut_a(zip->status == Z_OK);
-
- /* Reset the ZLib data structure. */
- memset(zip->zp, 0, sizeof(*zip->zp));
-}
-
-/**********************************************************************//**
-Read the words from the FTS INDEX.
-@return DB_SUCCESS if all OK, DB_TABLE_NOT_FOUND if no more indexes
- to search else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_index_fetch_words(
-/*==================*/
- fts_optimize_t* optim, /*!< in: optimize scratch pad */
- const fts_string_t* word, /*!< in: get words greater than this
- word */
- ulint n_words)/*!< in: max words to read */
-{
- pars_info_t* info;
- que_t* graph;
- ulint selected;
- fts_zip_t* zip = NULL;
- dberr_t error = DB_SUCCESS;
- mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
- ibool inited = FALSE;
-
- optim->trx->op_info = "fetching FTS index words";
-
- if (optim->zip == NULL) {
- optim->zip = fts_zip_create(heap, FTS_ZIP_BLOCK_SIZE, n_words);
- } else {
- fts_zip_initialize(optim->zip);
- }
-
- for (selected = fts_select_index(
- optim->fts_index_table.charset, word->f_str, word->f_len);
- fts_index_selector[selected].value;
- selected++) {
-
- optim->fts_index_table.suffix = fts_get_suffix(selected);
-
- /* We've search all indexes. */
- if (optim->fts_index_table.suffix == NULL) {
- return(DB_TABLE_NOT_FOUND);
- }
-
- info = pars_info_create();
-
- pars_info_bind_function(
- info, "my_func", fts_fetch_index_words, optim->zip);
-
- pars_info_bind_varchar_literal(
- info, "word", word->f_str, word->f_len);
-
- graph = fts_parse_sql(
- &optim->fts_index_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT word\n"
- " FROM \"%s\"\n"
- " WHERE word > :word\n"
- " ORDER BY word;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- zip = optim->zip;
-
- for(;;) {
- int err;
-
- if (!inited && ((err = deflateInit(zip->zp, 9))
- != Z_OK)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: ZLib deflateInit() "
- "failed: %d\n", err);
-
- error = DB_ERROR;
- break;
- } else {
- inited = TRUE;
- error = fts_eval_sql(optim->trx, graph);
- }
-
- if (error == DB_SUCCESS) {
- //FIXME fts_sql_commit(optim->trx);
- break;
- } else {
- //FIXME fts_sql_rollback(optim->trx);
-
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: "
- "Warning: lock wait "
- "timeout reading document. "
- "Retrying!\n");
-
- /* We need to reset the ZLib state. */
- inited = FALSE;
- deflateEnd(zip->zp);
- fts_zip_init(zip);
-
- optim->trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: (%s) "
- "while reading document.\n",
- ut_strerr(error));
-
- break; /* Exit the loop. */
- }
- }
- }
-
- fts_que_graph_free(graph);
-
- /* Check if max word to fetch is exceeded */
- if (optim->zip->n_words >= n_words) {
- break;
- }
- }
-
- if (error == DB_SUCCESS && zip->status == Z_OK && zip->n_words > 0) {
-
- /* All data should have been read. */
- ut_a(zip->zp->avail_in == 0);
-
- fts_zip_deflate_end(zip);
- } else {
- deflateEnd(zip->zp);
- }
-
- return(error);
-}
-
-/**********************************************************************//**
-Callback function to fetch the doc id from the record.
-@return always returns TRUE */
-static
-ibool
-fts_fetch_doc_ids(
-/*==============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: pointer to ib_vector_t */
-{
- que_node_t* exp;
- int i = 0;
- sel_node_t* sel_node = static_cast<sel_node_t*>(row);
- fts_doc_ids_t* fts_doc_ids = static_cast<fts_doc_ids_t*>(user_arg);
- fts_update_t* update = static_cast<fts_update_t*>(
- ib_vector_push(fts_doc_ids->doc_ids, NULL));
-
- for (exp = sel_node->select_list;
- exp;
- exp = que_node_get_next(exp), ++i) {
-
- dfield_t* dfield = que_node_get_val(exp);
- void* data = dfield_get_data(dfield);
- ulint len = dfield_get_len(dfield);
-
- ut_a(len != UNIV_SQL_NULL);
-
- /* Note: The column numbers below must match the SELECT. */
- switch (i) {
- case 0: /* DOC_ID */
- update->fts_indexes = NULL;
- update->doc_id = fts_read_doc_id(
- static_cast<byte*>(data));
- break;
-
- default:
- ut_error;
- }
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Read the rows from a FTS common auxiliary table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_table_fetch_doc_ids(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table, /*!< in: table */
- fts_doc_ids_t* doc_ids) /*!< in: For collecting doc ids */
-{
- dberr_t error;
- que_t* graph;
- pars_info_t* info = pars_info_create();
- ibool alloc_bk_trx = FALSE;
-
- ut_a(fts_table->suffix != NULL);
- ut_a(fts_table->type == FTS_COMMON_TABLE);
-
- if (!trx) {
- trx = trx_allocate_for_background();
- alloc_bk_trx = TRUE;
- }
-
- trx->op_info = "fetching FTS doc ids";
-
- pars_info_bind_function(info, "my_func", fts_fetch_doc_ids, doc_ids);
-
- graph = fts_parse_sql(
- fts_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT doc_id FROM \"%s\";\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- error = fts_eval_sql(trx, graph);
-
- mutex_enter(&dict_sys->mutex);
- que_graph_free(graph);
- mutex_exit(&dict_sys->mutex);
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
-
- ib_vector_sort(doc_ids->doc_ids, fts_update_doc_id_cmp);
- } else {
- fts_sql_rollback(trx);
- }
-
- if (alloc_bk_trx) {
- trx_free_for_background(trx);
- }
-
- return(error);
-}
-
-/**********************************************************************//**
-Do a binary search for a doc id in the array
-@return +ve index if found -ve index where it should be inserted
- if not found */
-UNIV_INTERN
-int
-fts_bsearch(
-/*========*/
- fts_update_t* array, /*!< in: array to sort */
- int lower, /*!< in: the array lower bound */
- int upper, /*!< in: the array upper bound */
- doc_id_t doc_id) /*!< in: the doc id to search for */
-{
- int orig_size = upper;
-
- if (upper == 0) {
- /* Nothing to search */
- return(-1);
- } else {
- while (lower < upper) {
- int i = (lower + upper) >> 1;
-
- if (doc_id > array[i].doc_id) {
- lower = i + 1;
- } else if (doc_id < array[i].doc_id) {
- upper = i - 1;
- } else {
- return(i); /* Found. */
- }
- }
- }
-
- if (lower == upper && lower < orig_size) {
- if (doc_id == array[lower].doc_id) {
- return(lower);
- } else if (lower == 0) {
- return(-1);
- }
- }
-
- /* Not found. */
- return( (lower == 0) ? -1 : -lower);
-}
-
-/**********************************************************************//**
-Search in the to delete array whether any of the doc ids within
-the [first, last] range are to be deleted
-@return +ve index if found -ve index where it should be inserted
- if not found */
-static
-int
-fts_optimize_lookup(
-/*================*/
- ib_vector_t* doc_ids, /*!< in: array to search */
- ulint lower, /*!< in: lower limit of array */
- doc_id_t first_doc_id, /*!< in: doc id to lookup */
- doc_id_t last_doc_id) /*!< in: doc id to lookup */
-{
- int pos;
- int upper = static_cast<int>(ib_vector_size(doc_ids));
- fts_update_t* array = (fts_update_t*) doc_ids->data;
-
- pos = fts_bsearch(array, static_cast<int>(lower), upper, first_doc_id);
-
- ut_a(abs(pos) <= upper + 1);
-
- if (pos < 0) {
-
- int i = abs(pos);
-
- /* If i is 1, it could be first_doc_id is less than
- either the first or second array item, do a
- double check */
- if (i == 1 && array[0].doc_id <= last_doc_id
- && first_doc_id < array[0].doc_id) {
- pos = 0;
- } else if (i < upper && array[i].doc_id <= last_doc_id) {
-
- /* Check if the "next" doc id is within the
- first & last doc id of the node. */
- pos = i;
- }
- }
-
- return(pos);
-}
-
-/**********************************************************************//**
-Encode the word pos list into the node
-@return DB_SUCCESS or error code*/
-static MY_ATTRIBUTE((nonnull))
-dberr_t
-fts_optimize_encode_node(
-/*=====================*/
- fts_node_t* node, /*!< in: node to fill*/
- doc_id_t doc_id, /*!< in: doc id to encode */
- fts_encode_t* enc) /*!< in: encoding state.*/
-{
- byte* dst;
- ulint enc_len;
- ulint pos_enc_len;
- doc_id_t doc_id_delta;
- dberr_t error = DB_SUCCESS;
- byte* src = enc->src_ilist_ptr;
-
- if (node->first_doc_id == 0) {
- ut_a(node->last_doc_id == 0);
-
- node->first_doc_id = doc_id;
- }
-
- /* Calculate the space required to store the ilist. */
- ut_ad(doc_id > node->last_doc_id);
- doc_id_delta = doc_id - node->last_doc_id;
- enc_len = fts_get_encoded_len(static_cast<ulint>(doc_id_delta));
-
- /* Calculate the size of the encoded pos array. */
- while (*src) {
- fts_decode_vlc(&src);
- }
-
- /* Skip the 0x00 byte at the end of the word positions list. */
- ++src;
-
- /* Number of encoded pos bytes to copy. */
- pos_enc_len = src - enc->src_ilist_ptr;
-
- /* Total number of bytes required for copy. */
- enc_len += pos_enc_len;
-
- /* Check we have enough space in the destination buffer for
- copying the document word list. */
- if (!node->ilist) {
- ulint new_size;
-
- ut_a(node->ilist_size == 0);
-
- new_size = enc_len > FTS_ILIST_MAX_SIZE
- ? enc_len : FTS_ILIST_MAX_SIZE;
-
- node->ilist = static_cast<byte*>(ut_malloc(new_size));
- node->ilist_size_alloc = new_size;
-
- } else if ((node->ilist_size + enc_len) > node->ilist_size_alloc) {
- ulint new_size = node->ilist_size + enc_len;
- byte* ilist = static_cast<byte*>(ut_malloc(new_size));
-
- memcpy(ilist, node->ilist, node->ilist_size);
-
- ut_free(node->ilist);
-
- node->ilist = ilist;
- node->ilist_size_alloc = new_size;
- }
-
- src = enc->src_ilist_ptr;
- dst = node->ilist + node->ilist_size;
-
- /* Encode the doc id. Cast to ulint, the delta should be small and
- therefore no loss of precision. */
- dst += fts_encode_int((ulint) doc_id_delta, dst);
-
- /* Copy the encoded pos array. */
- memcpy(dst, src, pos_enc_len);
-
- node->last_doc_id = doc_id;
-
- /* Data copied upto here. */
- node->ilist_size += enc_len;
- enc->src_ilist_ptr += pos_enc_len;
-
- ut_a(node->ilist_size <= node->ilist_size_alloc);
-
- return(error);
-}
-
-/**********************************************************************//**
-Optimize the data contained in a node.
-@return DB_SUCCESS or error code*/
-static MY_ATTRIBUTE((nonnull))
-dberr_t
-fts_optimize_node(
-/*==============*/
- ib_vector_t* del_vec, /*!< in: vector of doc ids to delete*/
- int* del_pos, /*!< in: offset into above vector */
- fts_node_t* dst_node, /*!< in: node to fill*/
- fts_node_t* src_node, /*!< in: source node for data*/
- fts_encode_t* enc) /*!< in: encoding state */
-{
- ulint copied;
- dberr_t error = DB_SUCCESS;
- doc_id_t doc_id = enc->src_last_doc_id;
-
- if (!enc->src_ilist_ptr) {
- enc->src_ilist_ptr = src_node->ilist;
- }
-
- copied = enc->src_ilist_ptr - src_node->ilist;
-
- /* While there is data in the source node and space to copy
- into in the destination node. */
- while (copied < src_node->ilist_size
- && dst_node->ilist_size < FTS_ILIST_MAX_SIZE) {
-
- doc_id_t delta;
- doc_id_t del_doc_id = FTS_NULL_DOC_ID;
-
- delta = fts_decode_vlc(&enc->src_ilist_ptr);
-
-test_again:
- /* Check whether the doc id is in the delete list, if
- so then we skip the entries but we need to track the
- delta for decoding the entries following this document's
- entries. */
- if (*del_pos >= 0 && *del_pos < (int) ib_vector_size(del_vec)) {
- fts_update_t* update;
-
- update = (fts_update_t*) ib_vector_get(
- del_vec, *del_pos);
-
- del_doc_id = update->doc_id;
- }
-
- if (enc->src_ilist_ptr == src_node->ilist && doc_id == 0) {
- ut_a(delta == src_node->first_doc_id);
- }
-
- doc_id += delta;
-
- if (del_doc_id > 0 && doc_id == del_doc_id) {
-
- ++*del_pos;
-
- /* Skip the entries for this document. */
- while (*enc->src_ilist_ptr) {
- fts_decode_vlc(&enc->src_ilist_ptr);
- }
-
- /* Skip the end of word position marker. */
- ++enc->src_ilist_ptr;
-
- } else {
-
- /* DOC ID already becomes larger than
- del_doc_id, check the next del_doc_id */
- if (del_doc_id > 0 && doc_id > del_doc_id) {
- del_doc_id = 0;
- ++*del_pos;
- delta = 0;
- goto test_again;
- }
-
- /* Decode and copy the word positions into
- the dest node. */
- fts_optimize_encode_node(dst_node, doc_id, enc);
-
- ++dst_node->doc_count;
-
- ut_a(dst_node->last_doc_id == doc_id);
- }
-
- /* Bytes copied so for from source. */
- copied = enc->src_ilist_ptr - src_node->ilist;
- }
-
- if (copied >= src_node->ilist_size) {
- ut_a(doc_id == src_node->last_doc_id);
- }
-
- enc->src_last_doc_id = doc_id;
-
- return(error);
-}
-
-/**********************************************************************//**
-Determine the starting pos within the deleted doc id vector for a word.
-@return delete position */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-int
-fts_optimize_deleted_pos(
-/*=====================*/
- fts_optimize_t* optim, /*!< in: optimize state data */
- fts_word_t* word) /*!< in: the word data to check */
-{
- int del_pos;
- ib_vector_t* del_vec = optim->to_delete->doc_ids;
-
- /* Get the first and last dict ids for the word, we will use
- these values to determine which doc ids need to be removed
- when we coalesce the nodes. This way we can reduce the numer
- of elements that need to be searched in the deleted doc ids
- vector and secondly we can remove the doc ids during the
- coalescing phase. */
- if (ib_vector_size(del_vec) > 0) {
- fts_node_t* node;
- doc_id_t last_id;
- doc_id_t first_id;
- ulint size = ib_vector_size(word->nodes);
-
- node = (fts_node_t*) ib_vector_get(word->nodes, 0);
- first_id = node->first_doc_id;
-
- node = (fts_node_t*) ib_vector_get(word->nodes, size - 1);
- last_id = node->last_doc_id;
-
- ut_a(first_id <= last_id);
-
- del_pos = fts_optimize_lookup(
- del_vec, optim->del_pos, first_id, last_id);
- } else {
-
- del_pos = -1; /* Note that there is nothing to delete. */
- }
-
- return(del_pos);
-}
-
-#define FTS_DEBUG_PRINT
-/**********************************************************************//**
-Compact the nodes for a word, we also remove any doc ids during the
-compaction pass.
-@return DB_SUCCESS or error code.*/
-static
-ib_vector_t*
-fts_optimize_word(
-/*==============*/
- fts_optimize_t* optim, /*!< in: optimize state data */
- fts_word_t* word) /*!< in: the word to optimize */
-{
- fts_encode_t enc;
- ib_vector_t* nodes;
- ulint i = 0;
- int del_pos;
- fts_node_t* dst_node = NULL;
- ib_vector_t* del_vec = optim->to_delete->doc_ids;
- ulint size = ib_vector_size(word->nodes);
-
- del_pos = fts_optimize_deleted_pos(optim, word);
- nodes = ib_vector_create(word->heap_alloc, sizeof(*dst_node), 128);
-
- enc.src_last_doc_id = 0;
- enc.src_ilist_ptr = NULL;
-
- if (fts_enable_diag_print) {
- word->text.f_str[word->text.f_len] = 0;
- fprintf(stderr, "FTS_OPTIMIZE: optimize \"%s\"\n",
- word->text.f_str);
- }
-
- while (i < size) {
- ulint copied;
- fts_node_t* src_node;
-
- src_node = (fts_node_t*) ib_vector_get(word->nodes, i);
-
- if (dst_node == NULL
- || dst_node->last_doc_id > src_node->first_doc_id) {
-
- dst_node = static_cast<fts_node_t*>(
- ib_vector_push(nodes, NULL));
- memset(dst_node, 0, sizeof(*dst_node));
- }
-
- /* Copy from the src to the dst node. */
- fts_optimize_node(del_vec, &del_pos, dst_node, src_node, &enc);
-
- ut_a(enc.src_ilist_ptr != NULL);
-
- /* Determine the numer of bytes copied to dst_node. */
- copied = enc.src_ilist_ptr - src_node->ilist;
-
- /* Can't copy more than whats in the vlc array. */
- ut_a(copied <= src_node->ilist_size);
-
- /* We are done with this node release the resources. */
- if (copied == src_node->ilist_size) {
-
- enc.src_last_doc_id = 0;
- enc.src_ilist_ptr = NULL;
-
- ut_free(src_node->ilist);
-
- src_node->ilist = NULL;
- src_node->ilist_size = src_node->ilist_size_alloc = 0;
-
- src_node = NULL;
-
- ++i; /* Get next source node to OPTIMIZE. */
- }
-
- if (dst_node->ilist_size >= FTS_ILIST_MAX_SIZE || i >= size) {
-
- dst_node = NULL;
- }
- }
-
- /* All dst nodes created should have been added to the vector. */
- ut_a(dst_node == NULL);
-
- /* Return the OPTIMIZED nodes. */
- return(nodes);
-}
-
-/**********************************************************************//**
-Update the FTS index table. This is a delete followed by an insert.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_write_word(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table, /*!< in: table of FTS index */
- fts_string_t* word, /*!< in: word data to write */
- ib_vector_t* nodes) /*!< in: the nodes to write */
-{
- ulint i;
- pars_info_t* info;
- que_t* graph;
- ulint selected;
- dberr_t error = DB_SUCCESS;
- char* table_name = fts_get_table_name(fts_table);
-
- info = pars_info_create();
-
- ut_ad(fts_table->charset);
-
- if (fts_enable_diag_print) {
- fprintf(stderr, "FTS_OPTIMIZE: processed \"%s\"\n",
- word->f_str);
- }
-
- pars_info_bind_varchar_literal(
- info, "word", word->f_str, word->f_len);
-
- selected = fts_select_index(fts_table->charset,
- word->f_str, word->f_len);
-
- fts_table->suffix = fts_get_suffix(selected);
-
- graph = fts_parse_sql(
- fts_table,
- info,
- "BEGIN DELETE FROM \"%s\" WHERE word = :word;");
-
- error = fts_eval_sql(trx, graph);
-
- if (error != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) during optimize, "
- "when deleting a word from the FTS index.\n",
- ut_strerr(error));
- }
-
- fts_que_graph_free(graph);
- graph = NULL;
-
- mem_free(table_name);
-
- /* Even if the operation needs to be rolled back and redone,
- we iterate over the nodes in order to free the ilist. */
- for (i = 0; i < ib_vector_size(nodes); ++i) {
-
- fts_node_t* node = (fts_node_t*) ib_vector_get(nodes, i);
-
- if (error == DB_SUCCESS) {
- error = fts_write_node(
- trx, &graph, fts_table, word, node);
-
- if (error != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: (%s) "
- "during optimize, while adding a "
- "word to the FTS index.\n",
- ut_strerr(error));
- }
- }
-
- ut_free(node->ilist);
- node->ilist = NULL;
- node->ilist_size = node->ilist_size_alloc = 0;
- }
-
- if (graph != NULL) {
- fts_que_graph_free(graph);
- }
-
- return(error);
-}
-
-/**********************************************************************//**
-Free fts_optimizer_word_t instanace.*/
-UNIV_INTERN
-void
-fts_word_free(
-/*==========*/
- fts_word_t* word) /*!< in: instance to free.*/
-{
- mem_heap_t* heap = static_cast<mem_heap_t*>(word->heap_alloc->arg);
-
-#ifdef UNIV_DEBUG
- memset(word, 0, sizeof(*word));
-#endif /* UNIV_DEBUG */
-
- mem_heap_free(heap);
-}
-
-/**********************************************************************//**
-Optimize the word ilist and rewrite data to the FTS index.
-@return status one of RESTART, EXIT, ERROR */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_compact(
-/*=================*/
- fts_optimize_t* optim, /*!< in: optimize state data */
- dict_index_t* index, /*!< in: current FTS being optimized */
- ib_time_t start_time) /*!< in: optimize start time */
-{
- ulint i;
- dberr_t error = DB_SUCCESS;
- ulint size = ib_vector_size(optim->words);
-
- for (i = 0; i < size && error == DB_SUCCESS && !optim->done; ++i) {
- fts_word_t* word;
- ib_vector_t* nodes;
- trx_t* trx = optim->trx;
-
- word = (fts_word_t*) ib_vector_get(optim->words, i);
-
- /* nodes is allocated from the word heap and will be destroyed
- when the word is freed. We however have to be careful about
- the ilist, that needs to be freed explicitly. */
- nodes = fts_optimize_word(optim, word);
-
- /* Update the data on disk. */
- error = fts_optimize_write_word(
- trx, &optim->fts_index_table, &word->text, nodes);
-
- if (error == DB_SUCCESS) {
- /* Write the last word optimized to the config table,
- we use this value for restarting optimize. */
- error = fts_config_set_index_value(
- optim->trx, index,
- FTS_LAST_OPTIMIZED_WORD, &word->text);
- }
-
- /* Free the word that was optimized. */
- fts_word_free(word);
-
- if (fts_optimize_time_limit > 0
- && (ut_time() - start_time) > fts_optimize_time_limit) {
-
- optim->done = TRUE;
- }
- }
-
- return(error);
-}
-
-/**********************************************************************//**
-Create an instance of fts_optimize_t. Also create a new
-background transaction.*/
-static
-fts_optimize_t*
-fts_optimize_create(
-/*================*/
- dict_table_t* table) /*!< in: table with FTS indexes */
-{
- fts_optimize_t* optim;
- mem_heap_t* heap = mem_heap_create(128);
-
- optim = (fts_optimize_t*) mem_heap_zalloc(heap, sizeof(*optim));
-
- optim->self_heap = ib_heap_allocator_create(heap);
-
- optim->to_delete = fts_doc_ids_create();
-
- optim->words = ib_vector_create(
- optim->self_heap, sizeof(fts_word_t), 256);
-
- optim->table = table;
-
- optim->trx = trx_allocate_for_background();
-
- optim->fts_common_table.parent = table->name;
- optim->fts_common_table.table_id = table->id;
- optim->fts_common_table.type = FTS_COMMON_TABLE;
- optim->fts_common_table.table = table;
-
- optim->fts_index_table.parent = table->name;
- optim->fts_index_table.table_id = table->id;
- optim->fts_index_table.type = FTS_INDEX_TABLE;
- optim->fts_index_table.table = table;
-
- /* The common prefix for all this parent table's aux tables. */
- optim->name_prefix = fts_get_table_name_prefix(
- &optim->fts_common_table);
-
- return(optim);
-}
-
-#ifdef FTS_OPTIMIZE_DEBUG
-/**********************************************************************//**
-Get optimize start time of an FTS index.
-@return DB_SUCCESS if all OK else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_get_index_start_time(
-/*==============================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index */
- ib_time_t* start_time) /*!< out: time in secs */
-{
- return(fts_config_get_index_ulint(
- trx, index, FTS_OPTIMIZE_START_TIME,
- (ulint*) start_time));
-}
-
-/**********************************************************************//**
-Set the optimize start time of an FTS index.
-@return DB_SUCCESS if all OK else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_set_index_start_time(
-/*==============================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index */
- ib_time_t start_time) /*!< in: start time */
-{
- return(fts_config_set_index_ulint(
- trx, index, FTS_OPTIMIZE_START_TIME,
- (ulint) start_time));
-}
-
-/**********************************************************************//**
-Get optimize end time of an FTS index.
-@return DB_SUCCESS if all OK else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_get_index_end_time(
-/*============================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index */
- ib_time_t* end_time) /*!< out: time in secs */
-{
- return(fts_config_get_index_ulint(
- trx, index, FTS_OPTIMIZE_END_TIME, (ulint*) end_time));
-}
-
-/**********************************************************************//**
-Set the optimize end time of an FTS index.
-@return DB_SUCCESS if all OK else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_set_index_end_time(
-/*============================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index */
- ib_time_t end_time) /*!< in: end time */
-{
- return(fts_config_set_index_ulint(
- trx, index, FTS_OPTIMIZE_END_TIME, (ulint) end_time));
-}
-#endif
-
-/**********************************************************************//**
-Free the optimize prepared statements.*/
-static
-void
-fts_optimize_graph_free(
-/*====================*/
- fts_optimize_graph_t* graph) /*!< in/out: The graph instances
- to free */
-{
- if (graph->commit_graph) {
- que_graph_free(graph->commit_graph);
- graph->commit_graph = NULL;
- }
-
- if (graph->write_nodes_graph) {
- que_graph_free(graph->write_nodes_graph);
- graph->write_nodes_graph = NULL;
- }
-
- if (graph->delete_nodes_graph) {
- que_graph_free(graph->delete_nodes_graph);
- graph->delete_nodes_graph = NULL;
- }
-
- if (graph->read_nodes_graph) {
- que_graph_free(graph->read_nodes_graph);
- graph->read_nodes_graph = NULL;
- }
-}
-
-/**********************************************************************//**
-Free all optimize resources. */
-static
-void
-fts_optimize_free(
-/*==============*/
- fts_optimize_t* optim) /*!< in: table with on FTS index */
-{
- mem_heap_t* heap = static_cast<mem_heap_t*>(optim->self_heap->arg);
-
- trx_free_for_background(optim->trx);
-
- fts_doc_ids_free(optim->to_delete);
- fts_optimize_graph_free(&optim->graph);
-
- mem_free(optim->name_prefix);
-
- /* This will free the heap from which optim itself was allocated. */
- mem_heap_free(heap);
-}
-
-/**********************************************************************//**
-Get the max time optimize should run in millisecs.
-@return max optimize time limit in millisecs. */
-static
-ib_time_t
-fts_optimize_get_time_limit(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table) /*!< in: aux table */
-{
- ib_time_t time_limit = 0;
-
- fts_config_get_ulint(
- trx, fts_table,
- FTS_OPTIMIZE_LIMIT_IN_SECS, (ulint*) &time_limit);
-
- return(time_limit * 1000);
-}
-
-
-/**********************************************************************//**
-Run OPTIMIZE on the given table. Note: this can take a very long time
-(hours). */
-static
-void
-fts_optimize_words(
-/*===============*/
- fts_optimize_t* optim, /*!< in: optimize instance */
- dict_index_t* index, /*!< in: current FTS being optimized */
- fts_string_t* word) /*!< in: the starting word to optimize */
-{
- fts_fetch_t fetch;
- ib_time_t start_time;
- que_t* graph = NULL;
- CHARSET_INFO* charset = optim->fts_index_table.charset;
-
- ut_a(!optim->done);
-
- /* Get the time limit from the config table. */
- fts_optimize_time_limit = fts_optimize_get_time_limit(
- optim->trx, &optim->fts_common_table);
-
- start_time = ut_time();
-
- /* Setup the callback to use for fetching the word ilist etc. */
- fetch.read_arg = optim->words;
- fetch.read_record = fts_optimize_index_fetch_node;
-
- fprintf(stderr, "%.*s\n", (int) word->f_len, word->f_str);
-
- while(!optim->done) {
- dberr_t error;
- trx_t* trx = optim->trx;
- ulint selected;
-
- ut_a(ib_vector_size(optim->words) == 0);
-
- selected = fts_select_index(charset, word->f_str, word->f_len);
-
- /* Read the index records to optimize. */
- fetch.total_memory = 0;
- error = fts_index_fetch_nodes(
- trx, &graph, &optim->fts_index_table, word,
- &fetch);
- ut_ad(fetch.total_memory < fts_result_cache_limit);
-
- if (error == DB_SUCCESS) {
- /* There must be some nodes to read. */
- ut_a(ib_vector_size(optim->words) > 0);
-
- /* Optimize the nodes that were read and write
- back to DB. */
- error = fts_optimize_compact(optim, index, start_time);
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(optim->trx);
- } else {
- fts_sql_rollback(optim->trx);
- }
- }
-
- ib_vector_reset(optim->words);
-
- if (error == DB_SUCCESS) {
- if (!optim->done) {
- if (!fts_zip_read_word(optim->zip, word)) {
- optim->done = TRUE;
- } else if (selected
- != fts_select_index(
- charset, word->f_str,
- word->f_len)
- && graph) {
- fts_que_graph_free(graph);
- graph = NULL;
- }
- }
- } else if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, "InnoDB: Warning: lock wait timeout "
- "during optimize. Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else if (error == DB_DEADLOCK) {
- fprintf(stderr, "InnoDB: Warning: deadlock "
- "during optimize. Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- optim->done = TRUE; /* Exit the loop. */
- }
- }
-
- if (graph != NULL) {
- fts_que_graph_free(graph);
- }
-}
-
-/**********************************************************************//**
-Select the FTS index to search.
-@return TRUE if last index */
-static
-ibool
-fts_optimize_set_next_word(
-/*=======================*/
- CHARSET_INFO* charset, /*!< in: charset */
- fts_string_t* word) /*!< in: current last word */
-{
- ulint selected;
- ibool last = FALSE;
-
- selected = fts_select_next_index(charset, word->f_str, word->f_len);
-
- /* If this was the last index then reset to start. */
- if (fts_index_selector[selected].value == 0) {
- /* Reset the last optimized word to '' if no
- more words could be read from the FTS index. */
- word->f_len = 0;
- *word->f_str = 0;
-
- last = TRUE;
- } else {
- ulint value = fts_index_selector[selected].value;
-
- ut_a(value <= 0xff);
-
- /* Set to the first character of the next slot. */
- word->f_len = 1;
- *word->f_str = (byte) value;
- }
-
- return(last);
-}
-
-/**********************************************************************//**
-Optimize is complete. Set the completion time, and reset the optimize
-start string for this FTS index to "".
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_index_completed(
-/*=========================*/
- fts_optimize_t* optim, /*!< in: optimize instance */
- dict_index_t* index) /*!< in: table with one FTS index */
-{
- fts_string_t word;
- dberr_t error;
- byte buf[sizeof(ulint)];
-#ifdef FTS_OPTIMIZE_DEBUG
- ib_time_t end_time = ut_time();
-
- error = fts_optimize_set_index_end_time(optim->trx, index, end_time);
-#endif
-
- /* If we've reached the end of the index then set the start
- word to the empty string. */
-
- word.f_len = 0;
- word.f_str = buf;
- *word.f_str = '\0';
-
- error = fts_config_set_index_value(
- optim->trx, index, FTS_LAST_OPTIMIZED_WORD, &word);
-
- if (error != DB_SUCCESS) {
-
- fprintf(stderr, "InnoDB: Error: (%s) while "
- "updating last optimized word!\n", ut_strerr(error));
- }
-
- return(error);
-}
-
-
-/**********************************************************************//**
-Read the list of words from the FTS auxiliary index that will be
-optimized in this pass.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_index_read_words(
-/*==========================*/
- fts_optimize_t* optim, /*!< in: optimize instance */
- dict_index_t* index, /*!< in: table with one FTS index */
- fts_string_t* word) /*!< in: buffer to use */
-{
- dberr_t error = DB_SUCCESS;
-
- if (optim->del_list_regenerated) {
- word->f_len = 0;
- } else {
-
- /* Get the last word that was optimized from
- the config table. */
- error = fts_config_get_index_value(
- optim->trx, index, FTS_LAST_OPTIMIZED_WORD, word);
- }
-
- /* If record not found then we start from the top. */
- if (error == DB_RECORD_NOT_FOUND) {
- word->f_len = 0;
- error = DB_SUCCESS;
- }
-
- while (error == DB_SUCCESS) {
-
- error = fts_index_fetch_words(
- optim, word, fts_num_word_optimize);
-
- if (error == DB_SUCCESS) {
-
- /* If the search returned an empty set
- try the next index in the horizontal split. */
- if (optim->zip->n_words > 0) {
- break;
- } else {
-
- fts_optimize_set_next_word(
- optim->fts_index_table.charset,
- word);
-
- if (word->f_len == 0) {
- break;
- }
- }
- }
- }
-
- return(error);
-}
-
-/**********************************************************************//**
-Run OPTIMIZE on the given FTS index. Note: this can take a very long
-time (hours).
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_index(
-/*===============*/
- fts_optimize_t* optim, /*!< in: optimize instance */
- dict_index_t* index) /*!< in: table with one FTS index */
-{
- fts_string_t word;
- dberr_t error;
- byte str[FTS_MAX_WORD_LEN + 1];
-
- /* Set the current index that we have to optimize. */
- optim->fts_index_table.index_id = index->id;
- optim->fts_index_table.charset = fts_index_get_charset(index);
-
- optim->done = FALSE; /* Optimize until !done */
-
- /* We need to read the last word optimized so that we start from
- the next word. */
- word.f_str = str;
-
- /* We set the length of word to the size of str since we
- need to pass the max len info to the fts_get_config_value() function. */
- word.f_len = sizeof(str) - 1;
-
- memset(word.f_str, 0x0, word.f_len);
-
- /* Read the words that will be optimized in this pass. */
- error = fts_optimize_index_read_words(optim, index, &word);
-
- if (error == DB_SUCCESS) {
- int zip_error;
-
- ut_a(optim->zip->pos == 0);
- ut_a(optim->zip->zp->total_in == 0);
- ut_a(optim->zip->zp->total_out == 0);
-
- zip_error = inflateInit(optim->zip->zp);
- ut_a(zip_error == Z_OK);
-
- word.f_len = 0;
- word.f_str = str;
-
- /* Read the first word to optimize from the Zip buffer. */
- if (!fts_zip_read_word(optim->zip, &word)) {
-
- optim->done = TRUE;
- } else {
- fts_optimize_words(optim, index, &word);
- }
-
- /* If we couldn't read any records then optimize is
- complete. Increment the number of indexes that have
- been optimized and set FTS index optimize state to
- completed. */
- if (error == DB_SUCCESS && optim->zip->n_words == 0) {
-
- error = fts_optimize_index_completed(optim, index);
-
- if (error == DB_SUCCESS) {
- ++optim->n_completed;
- }
- }
- }
-
- return(error);
-}
-
-/**********************************************************************//**
-Delete the document ids in the delete, and delete cache tables.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_purge_deleted_doc_ids(
-/*===============================*/
- fts_optimize_t* optim) /*!< in: optimize instance */
-{
- ulint i;
- pars_info_t* info;
- que_t* graph;
- fts_update_t* update;
- char* sql_str;
- doc_id_t write_doc_id;
- dberr_t error = DB_SUCCESS;
-
- info = pars_info_create();
-
- ut_a(ib_vector_size(optim->to_delete->doc_ids) > 0);
-
- update = static_cast<fts_update_t*>(
- ib_vector_get(optim->to_delete->doc_ids, 0));
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
-
- /* This is required for the SQL parser to work. It must be able
- to find the following variables. So we do it twice. */
- fts_bind_doc_id(info, "doc_id1", &write_doc_id);
- fts_bind_doc_id(info, "doc_id2", &write_doc_id);
-
- /* Since we only replace the table_id and don't construct the full
- name, we do substitution ourselves. Remember to free sql_str. */
- sql_str = ut_strreplace(
- fts_delete_doc_ids_sql, "%s", optim->name_prefix);
-
- graph = fts_parse_sql(NULL, info, sql_str);
-
- mem_free(sql_str);
-
- /* Delete the doc ids that were copied at the start. */
- for (i = 0; i < ib_vector_size(optim->to_delete->doc_ids); ++i) {
-
- update = static_cast<fts_update_t*>(ib_vector_get(
- optim->to_delete->doc_ids, i));
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
-
- fts_bind_doc_id(info, "doc_id1", &write_doc_id);
-
- fts_bind_doc_id(info, "doc_id2", &write_doc_id);
-
- error = fts_eval_sql(optim->trx, graph);
-
- // FIXME: Check whether delete actually succeeded!
- if (error != DB_SUCCESS) {
-
- fts_sql_rollback(optim->trx);
- break;
- }
- }
-
- fts_que_graph_free(graph);
-
- return(error);
-}
-
-/**********************************************************************//**
-Delete the document ids in the pending delete, and delete tables.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_purge_deleted_doc_id_snapshot(
-/*=======================================*/
- fts_optimize_t* optim) /*!< in: optimize instance */
-{
- dberr_t error;
- que_t* graph;
- char* sql_str;
-
- /* Since we only replace the table_id and don't construct
- the full name, we do the '%s' substitution ourselves. */
- sql_str = ut_strreplace(fts_end_delete_sql, "%s", optim->name_prefix);
-
- /* Delete the doc ids that were copied to delete pending state at
- the start of optimize. */
- graph = fts_parse_sql(NULL, NULL, sql_str);
-
- mem_free(sql_str);
-
- error = fts_eval_sql(optim->trx, graph);
- fts_que_graph_free(graph);
-
- return(error);
-}
-
-/**********************************************************************//**
-Copy the deleted doc ids that will be purged during this optimize run
-to the being deleted FTS auxiliary tables. The transaction is committed
-upon successfull copy and rolled back on DB_DUPLICATE_KEY error.
-@return DB_SUCCESS if all OK */
-static
-ulint
-fts_optimize_being_deleted_count(
-/*=============================*/
- fts_optimize_t* optim) /*!< in: optimize instance */
-{
- fts_table_t fts_table;
-
- FTS_INIT_FTS_TABLE(&fts_table, "BEING_DELETED", FTS_COMMON_TABLE,
- optim->table);
-
- return(fts_get_rows_count(&fts_table));
-}
-
-/*********************************************************************//**
-Copy the deleted doc ids that will be purged during this optimize run
-to the being deleted FTS auxiliary tables. The transaction is committed
-upon successfull copy and rolled back on DB_DUPLICATE_KEY error.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_create_deleted_doc_id_snapshot(
-/*========================================*/
- fts_optimize_t* optim) /*!< in: optimize instance */
-{
- dberr_t error;
- que_t* graph;
- char* sql_str;
-
- /* Since we only replace the table_id and don't construct the
- full name, we do the substitution ourselves. */
- sql_str = ut_strreplace(fts_init_delete_sql, "%s", optim->name_prefix);
-
- /* Move doc_ids that are to be deleted to state being deleted. */
- graph = fts_parse_sql(NULL, NULL, sql_str);
-
- mem_free(sql_str);
-
- error = fts_eval_sql(optim->trx, graph);
-
- fts_que_graph_free(graph);
-
- if (error != DB_SUCCESS) {
- fts_sql_rollback(optim->trx);
- } else {
- fts_sql_commit(optim->trx);
- }
-
- optim->del_list_regenerated = TRUE;
-
- return(error);
-}
-
-/*********************************************************************//**
-Read in the document ids that are to be purged during optimize. The
-transaction is committed upon successfully read.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_read_deleted_doc_id_snapshot(
-/*======================================*/
- fts_optimize_t* optim) /*!< in: optimize instance */
-{
- dberr_t error;
-
- optim->fts_common_table.suffix = "BEING_DELETED";
-
- /* Read the doc_ids to delete. */
- error = fts_table_fetch_doc_ids(
- optim->trx, &optim->fts_common_table, optim->to_delete);
-
- if (error == DB_SUCCESS) {
-
- optim->fts_common_table.suffix = "BEING_DELETED_CACHE";
-
- /* Read additional doc_ids to delete. */
- error = fts_table_fetch_doc_ids(
- optim->trx, &optim->fts_common_table, optim->to_delete);
- }
-
- if (error != DB_SUCCESS) {
-
- fts_doc_ids_free(optim->to_delete);
- optim->to_delete = NULL;
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Optimze all the FTS indexes, skipping those that have already been
-optimized, since the FTS auxiliary indexes are not guaranteed to be
-of the same cardinality.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_indexes(
-/*=================*/
- fts_optimize_t* optim) /*!< in: optimize instance */
-{
- ulint i;
- dberr_t error = DB_SUCCESS;
- fts_t* fts = optim->table->fts;
-
- /* Optimize the FTS indexes. */
- for (i = 0; i < ib_vector_size(fts->indexes); ++i) {
- dict_index_t* index;
-
-#ifdef FTS_OPTIMIZE_DEBUG
- ib_time_t end_time;
- ib_time_t start_time;
-
- /* Get the start and end optimize times for this index. */
- error = fts_optimize_get_index_start_time(
- optim->trx, index, &start_time);
-
- if (error != DB_SUCCESS) {
- break;
- }
-
- error = fts_optimize_get_index_end_time(
- optim->trx, index, &end_time);
-
- if (error != DB_SUCCESS) {
- break;
- }
-
- /* Start time will be 0 only for the first time or after
- completing the optimization of all FTS indexes. */
- if (start_time == 0) {
- start_time = ut_time();
-
- error = fts_optimize_set_index_start_time(
- optim->trx, index, start_time);
- }
-
- /* Check if this index needs to be optimized or not. */
- if (ut_difftime(end_time, start_time) < 0) {
- error = fts_optimize_index(optim, index);
-
- if (error != DB_SUCCESS) {
- break;
- }
- } else {
- ++optim->n_completed;
- }
-#endif
- index = static_cast<dict_index_t*>(
- ib_vector_getp(fts->indexes, i));
- error = fts_optimize_index(optim, index);
- }
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(optim->trx);
- } else {
- fts_sql_rollback(optim->trx);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Cleanup the snapshot tables and the master deleted table.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_purge_snapshot(
-/*========================*/
- fts_optimize_t* optim) /*!< in: optimize instance */
-{
- dberr_t error;
-
- /* Delete the doc ids from the master deleted tables, that were
- in the snapshot that was taken at the start of optimize. */
- error = fts_optimize_purge_deleted_doc_ids(optim);
-
- if (error == DB_SUCCESS) {
- /* Destroy the deleted doc id snapshot. */
- error = fts_optimize_purge_deleted_doc_id_snapshot(optim);
- }
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(optim->trx);
- } else {
- fts_sql_rollback(optim->trx);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Reset the start time to 0 so that a new optimize can be started.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_optimize_reset_start_time(
-/*==========================*/
- fts_optimize_t* optim) /*!< in: optimize instance */
-{
- dberr_t error = DB_SUCCESS;
-#ifdef FTS_OPTIMIZE_DEBUG
- fts_t* fts = optim->table->fts;
-
- /* Optimization should have been completed for all indexes. */
- ut_a(optim->n_completed == ib_vector_size(fts->indexes));
-
- for (uint i = 0; i < ib_vector_size(fts->indexes); ++i) {
- dict_index_t* index;
-
- ib_time_t start_time = 0;
-
- /* Reset the start time to 0 for this index. */
- error = fts_optimize_set_index_start_time(
- optim->trx, index, start_time);
-
- index = static_cast<dict_index_t*>(
- ib_vector_getp(fts->indexes, i));
- }
-#endif
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(optim->trx);
- } else {
- fts_sql_rollback(optim->trx);
- }
-
- return(error);
-}
-
-/*********************************************************************//**
-Run OPTIMIZE on the given table by a background thread.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull))
-dberr_t
-fts_optimize_table_bk(
-/*==================*/
- fts_slot_t* slot) /*!< in: table to optimiza */
-{
- dberr_t error;
- dict_table_t* table = slot->table;
- fts_t* fts = table->fts;
-
- /* Avoid optimizing tables that were optimized recently. */
- if (slot->last_run > 0
- && (ut_time() - slot->last_run) < slot->interval_time) {
-
- return(DB_SUCCESS);
-
- } else if (fts && fts->cache
- && fts->cache->deleted >= FTS_OPTIMIZE_THRESHOLD) {
-
- error = fts_optimize_table(table);
-
- if (error == DB_SUCCESS) {
- slot->state = FTS_STATE_DONE;
- slot->last_run = 0;
- slot->completed = ut_time();
- }
- } else {
- error = DB_SUCCESS;
- }
-
- /* Note time this run completed. */
- slot->last_run = ut_time();
-
- return(error);
-}
-/*********************************************************************//**
-Run OPTIMIZE on the given table.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-fts_optimize_table(
-/*===============*/
- dict_table_t* table) /*!< in: table to optimiza */
-{
- dberr_t error = DB_SUCCESS;
- fts_optimize_t* optim = NULL;
- fts_t* fts = table->fts;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: FTS start optimize %s\n", table->name);
-
- optim = fts_optimize_create(table);
-
- // FIXME: Call this only at the start of optimize, currently we
- // rely on DB_DUPLICATE_KEY to handle corrupting the snapshot.
-
- /* Check whether there are still records in BEING_DELETED table */
- if (fts_optimize_being_deleted_count(optim) == 0) {
- /* Take a snapshot of the deleted document ids, they are copied
- to the BEING_ tables. */
- error = fts_optimize_create_deleted_doc_id_snapshot(optim);
- }
-
- /* A duplicate error is OK, since we don't erase the
- doc ids from the being deleted state until all FTS
- indexes have been optimized. */
- if (error == DB_DUPLICATE_KEY) {
- error = DB_SUCCESS;
- }
-
- if (error == DB_SUCCESS) {
-
- /* These document ids will be filtered out during the
- index optimization phase. They are in the snapshot that we
- took above, at the start of the optimize. */
- error = fts_optimize_read_deleted_doc_id_snapshot(optim);
-
- if (error == DB_SUCCESS) {
-
- /* Commit the read of being deleted
- doc ids transaction. */
- fts_sql_commit(optim->trx);
-
- /* We would do optimization only if there
- are deleted records to be cleaned up */
- if (ib_vector_size(optim->to_delete->doc_ids) > 0) {
- error = fts_optimize_indexes(optim);
- }
-
- } else {
- ut_a(optim->to_delete == NULL);
- }
-
- /* Only after all indexes have been optimized can we
- delete the (snapshot) doc ids in the pending delete,
- and master deleted tables. */
- if (error == DB_SUCCESS
- && optim->n_completed == ib_vector_size(fts->indexes)) {
-
- if (fts_enable_diag_print) {
- fprintf(stderr, "FTS_OPTIMIZE: Completed "
- "Optimize, cleanup DELETED "
- "table\n");
- }
-
- if (ib_vector_size(optim->to_delete->doc_ids) > 0) {
-
- /* Purge the doc ids that were in the
- snapshot from the snapshot tables and
- the master deleted table. */
- error = fts_optimize_purge_snapshot(optim);
- }
-
- if (error == DB_SUCCESS) {
- /* Reset the start time of all the FTS indexes
- so that optimize can be restarted. */
- error = fts_optimize_reset_start_time(optim);
- }
- }
- }
-
- fts_optimize_free(optim);
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: FTS end optimize %s\n", table->name);
-
- return(error);
-}
-
-/********************************************************************//**
-Add the table to add to the OPTIMIZER's list.
-@return new message instance */
-static
-fts_msg_t*
-fts_optimize_create_msg(
-/*====================*/
- fts_msg_type_t type, /*!< in: type of message */
- void* ptr) /*!< in: message payload */
-{
- mem_heap_t* heap;
- fts_msg_t* msg;
-
- heap = mem_heap_create(sizeof(*msg) + sizeof(ib_list_node_t) + 16);
- msg = static_cast<fts_msg_t*>(mem_heap_alloc(heap, sizeof(*msg)));
-
- msg->ptr = ptr;
- msg->type = type;
- msg->heap = heap;
-
- return(msg);
-}
-
-/**********************************************************************//**
-Add the table to add to the OPTIMIZER's list. */
-UNIV_INTERN
-void
-fts_optimize_add_table(
-/*===================*/
- dict_table_t* table) /*!< in: table to add */
-{
- fts_msg_t* msg;
-
- if (!fts_optimize_wq) {
- return;
- }
-
- /* Make sure table with FTS index cannot be evicted */
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
-
- msg = fts_optimize_create_msg(FTS_MSG_ADD_TABLE, table);
-
- ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
-}
-
-/**********************************************************************//**
-Optimize a table. */
-UNIV_INTERN
-void
-fts_optimize_do_table(
-/*==================*/
- dict_table_t* table) /*!< in: table to optimize */
-{
- fts_msg_t* msg;
-
- /* Optimizer thread could be shutdown */
- if (!fts_optimize_wq) {
- return;
- }
-
- msg = fts_optimize_create_msg(FTS_MSG_OPTIMIZE_TABLE, table);
-
- ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
-}
-
-/**********************************************************************//**
-Remove the table from the OPTIMIZER's list. We do wait for
-acknowledgement from the consumer of the message. */
-UNIV_INTERN
-void
-fts_optimize_remove_table(
-/*======================*/
- dict_table_t* table) /*!< in: table to remove */
-{
- fts_msg_t* msg;
- os_event_t event;
- fts_msg_del_t* remove;
-
- /* if the optimize system not yet initialized, return */
- if (!fts_optimize_wq) {
- return;
- }
-
- /* FTS optimizer thread is already exited */
- if (fts_opt_start_shutdown) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Try to remove table %s after FTS optimize"
- " thread exiting.", table->name);
- return;
- }
-
- msg = fts_optimize_create_msg(FTS_MSG_DEL_TABLE, NULL);
-
- /* We will wait on this event until signalled by the consumer. */
- event = os_event_create();
-
- remove = static_cast<fts_msg_del_t*>(
- mem_heap_alloc(msg->heap, sizeof(*remove)));
-
- remove->table = table;
- remove->event = event;
- msg->ptr = remove;
-
- ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
-
- os_event_wait(event);
-
- os_event_free(event);
-}
-
-/** Send sync fts cache for the table.
-@param[in] table table to sync */
-UNIV_INTERN
-void
-fts_optimize_request_sync_table(
- dict_table_t* table)
-{
- fts_msg_t* msg;
- table_id_t* table_id;
-
- /* if the optimize system not yet initialized, return */
- if (!fts_optimize_wq) {
- return;
- }
-
- /* FTS optimizer thread is already exited */
- if (fts_opt_start_shutdown) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Try to sync table %s after FTS optimize"
- " thread exiting.", table->name);
- return;
- }
-
- msg = fts_optimize_create_msg(FTS_MSG_SYNC_TABLE, NULL);
-
- table_id = static_cast<table_id_t*>(
- mem_heap_alloc(msg->heap, sizeof(table_id_t)));
- *table_id = table->id;
- msg->ptr = table_id;
-
- ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
-}
-
-/**********************************************************************//**
-Find the slot for a particular table.
-@return slot if found else NULL. */
-static
-fts_slot_t*
-fts_optimize_find_slot(
-/*===================*/
- ib_vector_t* tables, /*!< in: vector of tables */
- const dict_table_t* table) /*!< in: table to add */
-{
- ulint i;
-
- for (i = 0; i < ib_vector_size(tables); ++i) {
- fts_slot_t* slot;
-
- slot = static_cast<fts_slot_t*>(ib_vector_get(tables, i));
-
- if (slot->table->id == table->id) {
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/**********************************************************************//**
-Start optimizing table. */
-static
-void
-fts_optimize_start_table(
-/*=====================*/
- ib_vector_t* tables, /*!< in/out: vector of tables */
- dict_table_t* table) /*!< in: table to optimize */
-{
- fts_slot_t* slot;
-
- slot = fts_optimize_find_slot(tables, table);
-
- if (slot == NULL) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: table %s not registered "
- "with the optimize thread.\n", table->name);
- } else {
- slot->last_run = 0;
- slot->completed = 0;
- }
-}
-
-/**********************************************************************//**
-Add the table to the vector if it doesn't already exist. */
-static
-ibool
-fts_optimize_new_table(
-/*===================*/
- ib_vector_t* tables, /*!< in/out: vector of tables */
- dict_table_t* table) /*!< in: table to add */
-{
- ulint i;
- fts_slot_t* slot;
- ulint empty_slot = ULINT_UNDEFINED;
-
- /* Search for duplicates, also find a free slot if one exists. */
- for (i = 0; i < ib_vector_size(tables); ++i) {
-
- slot = static_cast<fts_slot_t*>(
- ib_vector_get(tables, i));
-
- if (slot->state == FTS_STATE_EMPTY) {
- empty_slot = i;
- } else if (slot->table->id == table->id) {
- /* Already exists in our optimize queue. */
- ut_ad(slot->table_id = table->id);
- return(FALSE);
- }
- }
-
- /* Reuse old slot. */
- if (empty_slot != ULINT_UNDEFINED) {
-
- slot = static_cast<fts_slot_t*>(
- ib_vector_get(tables, empty_slot));
-
- ut_a(slot->state == FTS_STATE_EMPTY);
-
- } else { /* Create a new slot. */
-
- slot = static_cast<fts_slot_t*>(ib_vector_push(tables, NULL));
- }
-
- memset(slot, 0x0, sizeof(*slot));
-
- slot->table = table;
- slot->table_id = table->id;
- slot->state = FTS_STATE_LOADED;
- slot->interval_time = FTS_OPTIMIZE_INTERVAL_IN_SECS;
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Remove the table from the vector if it exists. */
-static
-ibool
-fts_optimize_del_table(
-/*===================*/
- ib_vector_t* tables, /*!< in/out: vector of tables */
- fts_msg_del_t* msg) /*!< in: table to delete */
-{
- ulint i;
- dict_table_t* table = msg->table;
-
- for (i = 0; i < ib_vector_size(tables); ++i) {
- fts_slot_t* slot;
-
- slot = static_cast<fts_slot_t*>(ib_vector_get(tables, i));
-
- /* FIXME: Should we assert on this ? */
- if (slot->state != FTS_STATE_EMPTY
- && slot->table->id == table->id) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: FTS Optimize Removing "
- "table %s\n", table->name);
-
- slot->table = NULL;
- slot->state = FTS_STATE_EMPTY;
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Calculate how many of the registered tables need to be optimized.
-@return no. of tables to optimize */
-static
-ulint
-fts_optimize_how_many(
-/*==================*/
- const ib_vector_t* tables) /*!< in: registered tables
- vector*/
-{
- ulint i;
- ib_time_t delta;
- ulint n_tables = 0;
- ib_time_t current_time;
-
- current_time = ut_time();
-
- for (i = 0; i < ib_vector_size(tables); ++i) {
- const fts_slot_t* slot;
-
- slot = static_cast<const fts_slot_t*>(
- ib_vector_get_const(tables, i));
-
- switch (slot->state) {
- case FTS_STATE_DONE:
- case FTS_STATE_LOADED:
- ut_a(slot->completed <= current_time);
-
- delta = current_time - slot->completed;
-
- /* Skip slots that have been optimized recently. */
- if (delta >= slot->interval_time) {
- ++n_tables;
- }
- break;
-
- case FTS_STATE_RUNNING:
- ut_a(slot->last_run <= current_time);
-
- delta = current_time - slot->last_run;
-
- if (delta > slot->interval_time) {
- ++n_tables;
- }
- break;
-
- /* Slots in a state other than the above
- are ignored. */
- case FTS_STATE_EMPTY:
- case FTS_STATE_SUSPENDED:
- break;
- }
-
- }
-
- return(n_tables);
-}
-
-/**********************************************************************//**
-Check if the total memory used by all FTS table exceeds the maximum limit.
-@return true if a sync is needed, false otherwise */
-static
-bool
-fts_is_sync_needed(
-/*===============*/
- const ib_vector_t* tables) /*!< in: registered tables
- vector*/
-{
- ulint total_memory = 0;
- double time_diff = difftime(ut_time(), last_check_sync_time);
-
- if (fts_need_sync || time_diff < 5) {
- return(false);
- }
-
- last_check_sync_time = ut_time();
-
- for (ulint i = 0; i < ib_vector_size(tables); ++i) {
- const fts_slot_t* slot;
-
- slot = static_cast<const fts_slot_t*>(
- ib_vector_get_const(tables, i));
-
- if (slot->state != FTS_STATE_EMPTY && slot->table
- && slot->table->fts) {
- total_memory += slot->table->fts->cache->total_size;
- }
-
- if (total_memory > fts_max_total_cache_size) {
- return(true);
- }
- }
-
- return(false);
-}
-
-#if 0
-/*********************************************************************//**
-Check whether a table needs to be optimized. */
-static
-void
-fts_optimize_need_sync(
-/*===================*/
- ib_vector_t* tables) /*!< in: list of tables */
-{
- dict_table_t* table = NULL;
- fts_slot_t* slot;
- ulint num_table = ib_vector_size(tables);
-
- if (!num_table) {
- return;
- }
-
- if (fts_optimize_sync_iterator >= num_table) {
- fts_optimize_sync_iterator = 0;
- }
-
- slot = ib_vector_get(tables, fts_optimize_sync_iterator);
- table = slot->table;
-
- if (!table) {
- return;
- }
-
- ut_ad(table->fts);
-
- if (table->fts->cache) {
- ulint deleted = table->fts->cache->deleted;
-
- if (table->fts->cache->added
- >= fts_optimize_add_threshold) {
- fts_sync_table(table);
- } else if (deleted >= fts_optimize_delete_threshold) {
- fts_optimize_do_table(table);
-
- mutex_enter(&table->fts->cache->deleted_lock);
- table->fts->cache->deleted -= deleted;
- mutex_exit(&table->fts->cache->deleted_lock);
- }
- }
-
- fts_optimize_sync_iterator++;
-
- return;
-}
-#endif
-
-/** Sync fts cache of a table
-@param[in] table_id table id */
-void
-fts_optimize_sync_table(
- table_id_t table_id)
-{
- dict_table_t* table = NULL;
-
- /* Prevent DROP INDEX etc. from running when we are syncing
- cache in background. */
- if (!rw_lock_s_lock_nowait(&dict_operation_lock, __FILE__, __LINE__)) {
- /* Exit when fail to get dict operation lock. */
- return;
- }
-
- table = dict_table_open_on_id(table_id, FALSE, DICT_TABLE_OP_NORMAL);
-
- if (table) {
- if (dict_table_has_fts_index(table) && table->fts->cache) {
- fts_sync_table(table, true, false, true);
- }
-
- dict_table_close(table, FALSE, FALSE);
- }
-
- rw_lock_s_unlock(&dict_operation_lock);
-}
-
-/**********************************************************************//**
-Optimize all FTS tables.
-@return Dummy return */
-UNIV_INTERN
-os_thread_ret_t
-fts_optimize_thread(
-/*================*/
- void* arg) /*!< in: work queue*/
-{
- mem_heap_t* heap;
- ib_vector_t* tables;
- ib_alloc_t* heap_alloc;
- ulint current = 0;
- ibool done = FALSE;
- ulint n_tables = 0;
- os_event_t exit_event = 0;
- ulint n_optimize = 0;
- ib_wqueue_t* wq = (ib_wqueue_t*) arg;
-
- ut_ad(!srv_read_only_mode);
- my_thread_init();
-
- heap = mem_heap_create(sizeof(dict_table_t*) * 64);
- heap_alloc = ib_heap_allocator_create(heap);
-
- tables = ib_vector_create(heap_alloc, sizeof(fts_slot_t), 4);
-
- while(!done && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
-
- /* If there is no message in the queue and we have tables
- to optimize then optimize the tables. */
-
- if (!done
- && ib_wqueue_is_empty(wq)
- && n_tables > 0
- && n_optimize > 0) {
-
- fts_slot_t* slot;
-
- ut_a(ib_vector_size(tables) > 0);
-
- slot = static_cast<fts_slot_t*>(
- ib_vector_get(tables, current));
-
- /* Handle the case of empty slots. */
- if (slot->state != FTS_STATE_EMPTY) {
-
- slot->state = FTS_STATE_RUNNING;
-
- fts_optimize_table_bk(slot);
- }
-
- ++current;
-
- /* Wrap around the counter. */
- if (current >= ib_vector_size(tables)) {
- n_optimize = fts_optimize_how_many(tables);
-
- current = 0;
- }
-
- } else if (n_optimize == 0 || !ib_wqueue_is_empty(wq)) {
- fts_msg_t* msg;
-
- msg = static_cast<fts_msg_t*>(
- ib_wqueue_timedwait(wq,
- FTS_QUEUE_WAIT_IN_USECS));
-
- /* Timeout ? */
- if (msg == NULL) {
- if (fts_is_sync_needed(tables)) {
- fts_need_sync = true;
- }
-
- continue;
- }
-
- switch (msg->type) {
- case FTS_MSG_START:
- break;
-
- case FTS_MSG_PAUSE:
- break;
-
- case FTS_MSG_STOP:
- done = TRUE;
- exit_event = (os_event_t) msg->ptr;
- break;
-
- case FTS_MSG_ADD_TABLE:
- ut_a(!done);
- if (fts_optimize_new_table(
- tables,
- static_cast<dict_table_t*>(
- msg->ptr))) {
- ++n_tables;
- }
- break;
-
- case FTS_MSG_OPTIMIZE_TABLE:
- if (!done) {
- fts_optimize_start_table(
- tables,
- static_cast<dict_table_t*>(
- msg->ptr));
- }
- break;
-
- case FTS_MSG_DEL_TABLE:
- if (fts_optimize_del_table(
- tables, static_cast<fts_msg_del_t*>(
- msg->ptr))) {
- --n_tables;
- }
-
- /* Signal the producer that we have
- removed the table. */
- os_event_set(
- ((fts_msg_del_t*) msg->ptr)->event);
- break;
-
- case FTS_MSG_SYNC_TABLE:
- fts_optimize_sync_table(
- *static_cast<table_id_t*>(msg->ptr));
- break;
-
- default:
- ut_error;
- }
-
- mem_heap_free(msg->heap);
-
- if (!done) {
- n_optimize = fts_optimize_how_many(tables);
- } else {
- n_optimize = 0;
- }
- }
- }
-
- /* Server is being shutdown, sync the data from FTS cache to disk
- if needed */
- if (n_tables > 0) {
- ulint i;
-
- for (i = 0; i < ib_vector_size(tables); i++) {
- fts_slot_t* slot;
-
- slot = static_cast<fts_slot_t*>(
- ib_vector_get(tables, i));
-
- if (slot->state != FTS_STATE_EMPTY) {
- fts_optimize_sync_table(slot->table_id);
- }
- }
- }
-
- ib_vector_free(tables);
-
- ib_logf(IB_LOG_LEVEL_INFO, "FTS optimize thread exiting.");
-
- os_event_set(exit_event);
- my_thread_end();
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/**********************************************************************//**
-Startup the optimize thread and create the work queue. */
-UNIV_INTERN
-void
-fts_optimize_init(void)
-/*===================*/
-{
- ut_ad(!srv_read_only_mode);
-
- /* For now we only support one optimize thread. */
- ut_a(fts_optimize_wq == NULL);
-
- fts_optimize_wq = ib_wqueue_create();
- ut_a(fts_optimize_wq != NULL);
- last_check_sync_time = ut_time();
-
- os_thread_create(fts_optimize_thread, fts_optimize_wq, NULL);
-}
-
-/**********************************************************************//**
-Check whether the work queue is initialized.
-@return TRUE if optimze queue is initialized. */
-UNIV_INTERN
-ibool
-fts_optimize_is_init(void)
-/*======================*/
-{
- return(fts_optimize_wq != NULL);
-}
-
-/**********************************************************************//**
-Signal the optimize thread to prepare for shutdown. */
-UNIV_INTERN
-void
-fts_optimize_start_shutdown(void)
-/*=============================*/
-{
- ut_ad(!srv_read_only_mode);
-
- fts_msg_t* msg;
- os_event_t event;
-
- /* If there is an ongoing activity on dictionary, such as
- srv_master_evict_from_table_cache(), wait for it */
- dict_mutex_enter_for_mysql();
-
- /* Tells FTS optimizer system that we are exiting from
- optimizer thread, message send their after will not be
- processed */
- fts_opt_start_shutdown = true;
- dict_mutex_exit_for_mysql();
-
- /* We tell the OPTIMIZE thread to switch to state done, we
- can't delete the work queue here because the add thread needs
- deregister the FTS tables. */
- event = os_event_create();
-
- msg = fts_optimize_create_msg(FTS_MSG_STOP, NULL);
- msg->ptr = event;
-
- ib_wqueue_add(fts_optimize_wq, msg, msg->heap);
-
- os_event_wait(event);
- os_event_free(event);
-
- ib_wqueue_free(fts_optimize_wq);
-
-}
-
-/**********************************************************************//**
-Reset the work queue. */
-UNIV_INTERN
-void
-fts_optimize_end(void)
-/*==================*/
-{
- ut_ad(!srv_read_only_mode);
-
- // FIXME: Potential race condition here: We should wait for
- // the optimize thread to confirm shutdown.
- fts_optimize_wq = NULL;
-}
diff --git a/storage/xtradb/fts/fts0pars.cc b/storage/xtradb/fts/fts0pars.cc
deleted file mode 100644
index 7f0ba4e0c1b..00000000000
--- a/storage/xtradb/fts/fts0pars.cc
+++ /dev/null
@@ -1,2010 +0,0 @@
-/* A Bison parser, made by GNU Bison 2.5. */
-
-/* Bison implementation for Yacc-like parsers in C
-
- Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-/* As a special exception, you may create a larger work that contains
- part or all of the Bison parser skeleton and distribute that work
- under terms of your choice, so long as that work isn't itself a
- parser generator using the skeleton or a modified version thereof
- as a parser skeleton. Alternatively, if you modify or redistribute
- the parser skeleton itself, you may (at your option) remove this
- special exception, which will cause the skeleton and the resulting
- Bison output files to be licensed under the GNU General Public
- License without this special exception.
-
- This special exception was added by the Free Software Foundation in
- version 2.2 of Bison. */
-
-/* C LALR(1) parser skeleton written by Richard Stallman, by
- simplifying the original so-called "semantic" parser. */
-
-/* All symbols defined below should begin with yy or YY, to avoid
- infringing on user name space. This should be done even for local
- variables, as they might otherwise be expanded by user macros.
- There are some unavoidable exceptions within include files to
- define necessary library symbols; they are noted "INFRINGES ON
- USER NAME SPACE" below. */
-
-/* Identify Bison output. */
-#define YYBISON 1
-
-/* Bison version. */
-#define YYBISON_VERSION "2.5"
-
-/* Skeleton name. */
-#define YYSKELETON_NAME "yacc.c"
-
-/* Pure parsers. */
-#define YYPURE 1
-
-/* Push parsers. */
-#define YYPUSH 0
-
-/* Pull parsers. */
-#define YYPULL 1
-
-/* Using locations. */
-#define YYLSP_NEEDED 0
-
-/* Substitute the variable and function names. */
-#define yyparse ftsparse
-#define yylex ftslex
-#define yyerror ftserror
-#define yylval ftslval
-#define yychar ftschar
-#define yydebug ftsdebug
-#define yynerrs ftsnerrs
-
-
-/* Copy the first part of user declarations. */
-
-/* Line 268 of yacc.c */
-#line 26 "fts0pars.y"
-
-
-#include "mem0mem.h"
-#include "fts0ast.h"
-#include "fts0blex.h"
-#include "fts0tlex.h"
-#include "fts0pars.h"
-
-extern int fts_lexer(YYSTYPE*, fts_lexer_t*);
-extern int fts_blexer(YYSTYPE*, yyscan_t);
-extern int fts_tlexer(YYSTYPE*, yyscan_t);
-
-typedef int (*fts_scan)();
-
-extern int ftserror(const char* p);
-
-/* Required for reentrant parser */
-#define ftslex fts_lexer
-
-#define YYERROR_VERBOSE
-
-/* For passing an argument to yyparse() */
-#define YYPARSE_PARAM state
-#define YYLEX_PARAM ((fts_ast_state_t*) state)->lexer
-
-#define YYTOKENFREE(token) fts_ast_string_free((token))
-
-typedef int (*fts_scanner_alt)(YYSTYPE* val, yyscan_t yyscanner);
-typedef int (*fts_scanner)();
-
-struct fts_lexer_t {
- fts_scanner scanner;
- void* yyscanner;
-};
-
-
-
-/* Line 268 of yacc.c */
-#line 115 "fts0pars.cc"
-
-/* Enabling traces. */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
-
-/* Enabling verbose error messages. */
-#ifdef YYERROR_VERBOSE
-# undef YYERROR_VERBOSE
-# define YYERROR_VERBOSE 1
-#else
-# define YYERROR_VERBOSE 0
-#endif
-
-/* Enabling the token table. */
-#ifndef YYTOKEN_TABLE
-# define YYTOKEN_TABLE 0
-#endif
-
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- FTS_OPER = 258,
- FTS_TEXT = 259,
- FTS_TERM = 260,
- FTS_NUMB = 261
- };
-#endif
-
-
-
-#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-typedef union YYSTYPE
-{
-
-/* Line 293 of yacc.c */
-#line 61 "fts0pars.y"
-
- int oper;
- fts_ast_string_t* token;
- fts_ast_node_t* node;
-
-
-
-/* Line 293 of yacc.c */
-#line 165 "fts0pars.cc"
-} YYSTYPE;
-# define YYSTYPE_IS_TRIVIAL 1
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-#endif
-
-
-/* Copy the second part of user declarations. */
-
-
-/* Line 343 of yacc.c */
-#line 177 "fts0pars.cc"
-
-#ifdef short
-# undef short
-#endif
-
-#ifdef YYTYPE_UINT8
-typedef YYTYPE_UINT8 yytype_uint8;
-#else
-typedef unsigned char yytype_uint8;
-#endif
-
-#ifdef YYTYPE_INT8
-typedef YYTYPE_INT8 yytype_int8;
-#elif (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-typedef signed char yytype_int8;
-#else
-typedef short int yytype_int8;
-#endif
-
-#ifdef YYTYPE_UINT16
-typedef YYTYPE_UINT16 yytype_uint16;
-#else
-typedef unsigned short int yytype_uint16;
-#endif
-
-#ifdef YYTYPE_INT16
-typedef YYTYPE_INT16 yytype_int16;
-#else
-typedef short int yytype_int16;
-#endif
-
-#ifndef YYSIZE_T
-# ifdef __SIZE_TYPE__
-# define YYSIZE_T __SIZE_TYPE__
-# elif defined size_t
-# define YYSIZE_T size_t
-# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
-# define YYSIZE_T size_t
-# else
-# define YYSIZE_T unsigned int
-# endif
-#endif
-
-#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
-
-#ifndef YY_
-# if defined YYENABLE_NLS && YYENABLE_NLS
-# if ENABLE_NLS
-# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
-# define YY_(msgid) dgettext ("bison-runtime", msgid)
-# endif
-# endif
-# ifndef YY_
-# define YY_(msgid) msgid
-# endif
-#endif
-
-/* Suppress unused-variable warnings by "using" E. */
-#if ! defined lint || defined __GNUC__
-# define YYUSE(e) ((void) (e))
-#else
-# define YYUSE(e) /* empty */
-#endif
-
-/* Identity function, used to suppress warnings about constant conditions. */
-#ifndef lint
-# define YYID(n) (n)
-#else
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static int
-YYID (int yyi)
-#else
-static int
-YYID (yyi)
- int yyi;
-#endif
-{
- return yyi;
-}
-#endif
-
-#if ! defined yyoverflow || YYERROR_VERBOSE
-
-/* The parser invokes alloca or malloc; define the necessary symbols. */
-
-# ifdef YYSTACK_USE_ALLOCA
-# if YYSTACK_USE_ALLOCA
-# ifdef __GNUC__
-# define YYSTACK_ALLOC __builtin_alloca
-# elif defined __BUILTIN_VA_ARG_INCR
-# include <alloca.h> /* INFRINGES ON USER NAME SPACE */
-# elif defined _AIX
-# define YYSTACK_ALLOC __alloca
-# elif defined _MSC_VER
-# include <malloc.h> /* INFRINGES ON USER NAME SPACE */
-# define alloca _alloca
-# else
-# define YYSTACK_ALLOC alloca
-# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef EXIT_SUCCESS
-# define EXIT_SUCCESS 0
-# endif
-# endif
-# endif
-# endif
-# endif
-
-# ifdef YYSTACK_ALLOC
- /* Pacify GCC's `empty if-body' warning. */
-# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
-# ifndef YYSTACK_ALLOC_MAXIMUM
- /* The OS might guarantee only one guard page at the bottom of the stack,
- and a page size can be as small as 4096 bytes. So we cannot safely
- invoke alloca (N) if N exceeds 4096. Use a slightly smaller number
- to allow for a few compiler-allocated temporary stack slots. */
-# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
-# endif
-# else
-# define YYSTACK_ALLOC YYMALLOC
-# define YYSTACK_FREE YYFREE
-# ifndef YYSTACK_ALLOC_MAXIMUM
-# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
-# endif
-# if (defined __cplusplus && ! defined EXIT_SUCCESS \
- && ! ((defined YYMALLOC || defined malloc) \
- && (defined YYFREE || defined free)))
-# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef EXIT_SUCCESS
-# define EXIT_SUCCESS 0
-# endif
-# endif
-# ifndef YYMALLOC
-# define YYMALLOC malloc
-# if ! defined malloc && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
-# endif
-# endif
-# ifndef YYFREE
-# define YYFREE free
-# if ! defined free && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-void free (void *); /* INFRINGES ON USER NAME SPACE */
-# endif
-# endif
-# endif
-#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
-
-
-#if (! defined yyoverflow \
- && (! defined __cplusplus \
- || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
-
-/* A type that is properly aligned for any stack member. */
-union yyalloc
-{
- yytype_int16 yyss_alloc;
- YYSTYPE yyvs_alloc;
-};
-
-/* The size of the maximum gap between one aligned stack and the next. */
-# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
-
-/* The size of an array large to enough to hold all stacks, each with
- N elements. */
-# define YYSTACK_BYTES(N) \
- ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
- + YYSTACK_GAP_MAXIMUM)
-
-# define YYCOPY_NEEDED 1
-
-/* Relocate STACK from its old location to the new one. The
- local variables YYSIZE and YYSTACKSIZE give the old and new number of
- elements in the stack, and YYPTR gives the new location of the
- stack. Advance YYPTR to a properly aligned location for the next
- stack. */
-# define YYSTACK_RELOCATE(Stack_alloc, Stack) \
- do \
- { \
- YYSIZE_T yynewbytes; \
- YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \
- Stack = &yyptr->Stack_alloc; \
- yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
- yyptr += yynewbytes / sizeof (*yyptr); \
- } \
- while (YYID (0))
-
-#endif
-
-#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
-/* Copy COUNT objects from FROM to TO. The source and destination do
- not overlap. */
-# ifndef YYCOPY
-# if defined __GNUC__ && 1 < __GNUC__
-# define YYCOPY(To, From, Count) \
- __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
-# else
-# define YYCOPY(To, From, Count) \
- do \
- { \
- YYSIZE_T yyi; \
- for (yyi = 0; yyi < (Count); yyi++) \
- (To)[yyi] = (From)[yyi]; \
- } \
- while (YYID (0))
-# endif
-# endif
-#endif /* !YYCOPY_NEEDED */
-
-/* YYFINAL -- State number of the termination state. */
-#define YYFINAL 3
-/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 52
-
-/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 16
-/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 8
-/* YYNRULES -- Number of rules. */
-#define YYNRULES 24
-/* YYNRULES -- Number of states. */
-#define YYNSTATES 33
-
-/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
-#define YYUNDEFTOK 2
-#define YYMAXUTOK 261
-
-#define YYTRANSLATE(YYX) \
- ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
-
-/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
-static const yytype_uint8 yytranslate[] =
-{
- 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 12, 13, 14, 7, 2, 8, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 10, 2, 11, 2, 15, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 9, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
- 5, 6
-};
-
-#if YYDEBUG
-/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
- YYRHS. */
-static const yytype_uint8 yyprhs[] =
-{
- 0, 0, 3, 5, 6, 9, 12, 16, 21, 23,
- 25, 28, 32, 36, 39, 44, 47, 49, 51, 53,
- 55, 57, 59, 61, 64
-};
-
-/* YYRHS -- A `-1'-separated list of the rules' RHS. */
-static const yytype_int8 yyrhs[] =
-{
- 17, 0, -1, 18, -1, -1, 18, 20, -1, 18,
- 19, -1, 12, 18, 13, -1, 21, 12, 18, 13,
- -1, 22, -1, 23, -1, 22, 14, -1, 23, 15,
- 6, -1, 21, 22, 14, -1, 21, 22, -1, 21,
- 23, 15, 6, -1, 21, 23, -1, 8, -1, 7,
- -1, 9, -1, 10, -1, 11, -1, 5, -1, 6,
- -1, 14, 22, -1, 4, -1
-};
-
-/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
-static const yytype_uint8 yyrline[] =
-{
- 0, 79, 79, 85, 89, 99, 111, 119, 129, 133,
- 137, 141, 146, 152, 157, 164, 170, 174, 178, 182,
- 186, 191, 196, 202, 207
-};
-#endif
-
-#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
-/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
- First, the terminals, then, starting at YYNTOKENS, nonterminals. */
-static const char *const yytname[] =
-{
- "$end", "error", "$undefined", "FTS_OPER", "FTS_TEXT", "FTS_TERM",
- "FTS_NUMB", "'+'", "'-'", "'~'", "'<'", "'>'", "'('", "')'", "'*'",
- "'@'", "$accept", "query", "expr_lst", "sub_expr", "expr", "prefix",
- "term", "text", 0
-};
-#endif
-
-# ifdef YYPRINT
-/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
- token YYLEX-NUM. */
-static const yytype_uint16 yytoknum[] =
-{
- 0, 256, 257, 258, 259, 260, 261, 43, 45, 126,
- 60, 62, 40, 41, 42, 64
-};
-# endif
-
-/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
-static const yytype_uint8 yyr1[] =
-{
- 0, 16, 17, 18, 18, 18, 19, 19, 20, 20,
- 20, 20, 20, 20, 20, 20, 21, 21, 21, 21,
- 21, 22, 22, 22, 23
-};
-
-/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
-static const yytype_uint8 yyr2[] =
-{
- 0, 2, 1, 0, 2, 2, 3, 4, 1, 1,
- 2, 3, 3, 2, 4, 2, 1, 1, 1, 1,
- 1, 1, 1, 2, 1
-};
-
-/* YYDEFACT[STATE-NAME] -- Default reduction number in state STATE-NUM.
- Performed when YYTABLE doesn't specify something else to do. Zero
- means the default is an error. */
-static const yytype_uint8 yydefact[] =
-{
- 3, 0, 2, 1, 24, 21, 22, 17, 16, 18,
- 19, 20, 3, 0, 5, 4, 0, 8, 9, 0,
- 23, 3, 13, 15, 10, 0, 6, 0, 12, 0,
- 11, 7, 14
-};
-
-/* YYDEFGOTO[NTERM-NUM]. */
-static const yytype_int8 yydefgoto[] =
-{
- -1, 1, 2, 14, 15, 16, 17, 18
-};
-
-/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
- STATE-NUM. */
-#define YYPACT_NINF -5
-static const yytype_int8 yypact[] =
-{
- -5, 38, 18, -5, -5, -5, -5, -5, -5, -5,
- -5, -5, -5, 31, -5, -5, 29, 30, 32, -4,
- -5, -5, 34, 35, -5, 40, -5, 7, -5, 43,
- -5, -5, -5
-};
-
-/* YYPGOTO[NTERM-NUM]. */
-static const yytype_int8 yypgoto[] =
-{
- -5, -5, 19, -5, -5, -5, 26, 36
-};
-
-/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
- positive, shift that token. If negative, reduce the rule which
- number is the opposite. If YYTABLE_NINF, syntax error. */
-#define YYTABLE_NINF -1
-static const yytype_uint8 yytable[] =
-{
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 26,
- 13, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 31, 13, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 19, 13, 4, 5, 6, 5, 6, 3, 20,
- 27, 21, 22, 13, 24, 13, 30, 25, 28, 32,
- 29, 0, 23
-};
-
-#define yypact_value_is_default(yystate) \
- ((yystate) == (-5))
-
-#define yytable_value_is_error(yytable_value) \
- YYID (0)
-
-static const yytype_int8 yycheck[] =
-{
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
- 14, 4, 5, 6, 7, 8, 9, 10, 11, 12,
- 13, 14, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 12, 14, 4, 5, 6, 5, 6, 0, 13,
- 21, 12, 16, 14, 14, 14, 6, 15, 14, 6,
- 15, -1, 16
-};
-
-/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
- symbol of state STATE-NUM. */
-static const yytype_uint8 yystos[] =
-{
- 0, 17, 18, 0, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 14, 19, 20, 21, 22, 23, 18,
- 22, 12, 22, 23, 14, 15, 13, 18, 14, 15,
- 6, 13, 6
-};
-
-#define yyerrok (yyerrstatus = 0)
-#define yyclearin (yychar = YYEMPTY)
-#define YYEMPTY (-2)
-#define YYEOF 0
-
-#define YYACCEPT goto yyacceptlab
-#define YYABORT goto yyabortlab
-#define YYERROR goto yyerrorlab
-
-
-/* Like YYERROR except do call yyerror. This remains here temporarily
- to ease the transition to the new meaning of YYERROR, for GCC.
- Once GCC version 2 has supplanted version 1, this can go. However,
- YYFAIL appears to be in use. Nevertheless, it is formally deprecated
- in Bison 2.4.2's NEWS entry, where a plan to phase it out is
- discussed. */
-
-#define YYFAIL goto yyerrlab
-#if defined YYFAIL
- /* This is here to suppress warnings from the GCC cpp's
- -Wunused-macros. Normally we don't worry about that warning, but
- some users do, and we want to make it easy for users to remove
- YYFAIL uses, which will produce warnings from Bison 2.5. */
-#endif
-
-#define YYRECOVERING() (!!yyerrstatus)
-
-#define YYBACKUP(Token, Value) \
-do \
- if (yychar == YYEMPTY && yylen == 1) \
- { \
- yychar = (Token); \
- yylval = (Value); \
- YYPOPSTACK (1); \
- goto yybackup; \
- } \
- else \
- { \
- yyerror (YY_("syntax error: cannot back up")); \
- YYERROR; \
- } \
-while (YYID (0))
-
-
-#define YYTERROR 1
-#define YYERRCODE 256
-
-#define YYERRCLEANUP \
-do \
- switch (yylastchar) \
- { \
- case FTS_NUMB: \
- case FTS_TEXT: \
- case FTS_TERM: \
- YYTOKENFREE(yylval.token); \
- break; \
- default: \
- break; \
- } \
-while (YYID (0))
-
-/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
- If N is 0, then set CURRENT to the empty location which ends
- the previous symbol: RHS[0] (always defined). */
-
-#define YYRHSLOC(Rhs, K) ((Rhs)[K])
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N) \
- do \
- if (YYID (N)) \
- { \
- (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
- (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
- (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
- (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
- } \
- else \
- { \
- (Current).first_line = (Current).last_line = \
- YYRHSLOC (Rhs, 0).last_line; \
- (Current).first_column = (Current).last_column = \
- YYRHSLOC (Rhs, 0).last_column; \
- } \
- while (YYID (0))
-#endif
-
-
-/* This macro is provided for backward compatibility. */
-
-#ifndef YY_LOCATION_PRINT
-# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
-#endif
-
-
-/* YYLEX -- calling `yylex' with the right arguments. */
-
-#ifdef YYLEX_PARAM
-# define YYLEX yylex (&yylval, YYLEX_PARAM)
-#else
-# define YYLEX yylex (&yylval)
-#endif
-
-/* Enable debugging if requested. */
-#if YYDEBUG
-
-# ifndef YYFPRINTF
-# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
-# define YYFPRINTF fprintf
-# endif
-
-# define YYDPRINTF(Args) \
-do { \
- if (yydebug) \
- YYFPRINTF Args; \
-} while (YYID (0))
-
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
-do { \
- if (yydebug) \
- { \
- YYFPRINTF (stderr, "%s ", Title); \
- yy_symbol_print (stderr, \
- Type, Value); \
- YYFPRINTF (stderr, "\n"); \
- } \
-} while (YYID (0))
-
-
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-/*ARGSUSED*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
-#else
-static void
-yy_symbol_value_print (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE const * const yyvaluep;
-#endif
-{
- if (!yyvaluep)
- return;
-# ifdef YYPRINT
- if (yytype < YYNTOKENS)
- YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
-# else
- YYUSE (yyoutput);
-# endif
- switch (yytype)
- {
- default:
- break;
- }
-}
-
-
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
-#else
-static void
-yy_symbol_print (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE const * const yyvaluep;
-#endif
-{
- if (yytype < YYNTOKENS)
- YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
- else
- YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
-
- yy_symbol_value_print (yyoutput, yytype, yyvaluep);
- YYFPRINTF (yyoutput, ")");
-}
-
-/*------------------------------------------------------------------.
-| yy_stack_print -- Print the state stack from its BOTTOM up to its |
-| TOP (included). |
-`------------------------------------------------------------------*/
-
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop)
-#else
-static void
-yy_stack_print (yybottom, yytop)
- yytype_int16 *yybottom;
- yytype_int16 *yytop;
-#endif
-{
- YYFPRINTF (stderr, "Stack now");
- for (; yybottom <= yytop; yybottom++)
- {
- int yybot = *yybottom;
- YYFPRINTF (stderr, " %d", yybot);
- }
- YYFPRINTF (stderr, "\n");
-}
-
-# define YY_STACK_PRINT(Bottom, Top) \
-do { \
- if (yydebug) \
- yy_stack_print ((Bottom), (Top)); \
-} while (YYID (0))
-
-
-/*------------------------------------------------.
-| Report that the YYRULE is going to be reduced. |
-`------------------------------------------------*/
-
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
-#else
-static void
-yy_reduce_print (yyvsp, yyrule)
- YYSTYPE *yyvsp;
- int yyrule;
-#endif
-{
- int yynrhs = yyr2[yyrule];
- int yyi;
- unsigned long int yylno = yyrline[yyrule];
- YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
- yyrule - 1, yylno);
- /* The symbols being reduced. */
- for (yyi = 0; yyi < yynrhs; yyi++)
- {
- YYFPRINTF (stderr, " $%d = ", yyi + 1);
- yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
- &(yyvsp[(yyi + 1) - (yynrhs)])
- );
- YYFPRINTF (stderr, "\n");
- }
-}
-
-# define YY_REDUCE_PRINT(Rule) \
-do { \
- if (yydebug) \
- yy_reduce_print (yyvsp, Rule); \
-} while (YYID (0))
-
-/* Nonzero means print parse trace. It is left uninitialized so that
- multiple parsers can coexist. */
-int yydebug;
-#else /* !YYDEBUG */
-# define YYDPRINTF(Args)
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
-# define YY_STACK_PRINT(Bottom, Top)
-# define YY_REDUCE_PRINT(Rule)
-#endif /* !YYDEBUG */
-
-
-/* YYINITDEPTH -- initial size of the parser's stacks. */
-#ifndef YYINITDEPTH
-# define YYINITDEPTH 200
-#endif
-
-/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
- if the built-in stack extension method is used).
-
- Do not make this value too large; the results are undefined if
- YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
- evaluated with infinite-precision integer arithmetic. */
-
-#ifndef YYMAXDEPTH
-# define YYMAXDEPTH 10000
-#endif
-
-
-#if YYERROR_VERBOSE
-
-# ifndef yystrlen
-# if defined __GLIBC__ && defined _STRING_H
-# define yystrlen strlen
-# else
-/* Return the length of YYSTR. */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static YYSIZE_T
-yystrlen (const char *yystr)
-#else
-static YYSIZE_T
-yystrlen (yystr)
- const char *yystr;
-#endif
-{
- YYSIZE_T yylen;
- for (yylen = 0; yystr[yylen]; yylen++)
- continue;
- return yylen;
-}
-# endif
-# endif
-
-# ifndef yystpcpy
-# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
-# define yystpcpy stpcpy
-# else
-/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
- YYDEST. */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static char *
-yystpcpy (char *yydest, const char *yysrc)
-#else
-static char *
-yystpcpy (yydest, yysrc)
- char *yydest;
- const char *yysrc;
-#endif
-{
- char *yyd = yydest;
- const char *yys = yysrc;
-
- while ((*yyd++ = *yys++) != '\0')
- continue;
-
- return yyd - 1;
-}
-# endif
-# endif
-
-# ifndef yytnamerr
-/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
- quotes and backslashes, so that it's suitable for yyerror. The
- heuristic is that double-quoting is unnecessary unless the string
- contains an apostrophe, a comma, or backslash (other than
- backslash-backslash). YYSTR is taken from yytname. If YYRES is
- null, do not copy; instead, return the length of what the result
- would have been. */
-static YYSIZE_T
-yytnamerr (char *yyres, const char *yystr)
-{
- if (*yystr == '"')
- {
- YYSIZE_T yyn = 0;
- char const *yyp = yystr;
-
- for (;;)
- switch (*++yyp)
- {
- case '\'':
- case ',':
- goto do_not_strip_quotes;
-
- case '\\':
- if (*++yyp != '\\')
- goto do_not_strip_quotes;
- /* Fall through. */
- default:
- if (yyres)
- yyres[yyn] = *yyp;
- yyn++;
- break;
-
- case '"':
- if (yyres)
- yyres[yyn] = '\0';
- return yyn;
- }
- do_not_strip_quotes: ;
- }
-
- if (! yyres)
- return yystrlen (yystr);
-
- return yystpcpy (yyres, yystr) - yyres;
-}
-# endif
-
-/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message
- about the unexpected token YYTOKEN for the state stack whose top is
- YYSSP.
-
- Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is
- not large enough to hold the message. In that case, also set
- *YYMSG_ALLOC to the required number of bytes. Return 2 if the
- required number of bytes is too large to store. */
-static int
-yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
- yytype_int16 *yyssp, int yytoken)
-{
- YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
- YYSIZE_T yysize = yysize0;
- YYSIZE_T yysize1;
- enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
- /* Internationalized format string. */
- const char *yyformat = 0;
- /* Arguments of yyformat. */
- char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
- /* Number of reported tokens (one for the "unexpected", one per
- "expected"). */
- int yycount = 0;
-
- /* There are many possibilities here to consider:
- - Assume YYFAIL is not used. It's too flawed to consider. See
- <http://lists.gnu.org/archive/html/bison-patches/2009-12/msg00024.html>
- for details. YYERROR is fine as it does not invoke this
- function.
- - If this state is a consistent state with a default action, then
- the only way this function was invoked is if the default action
- is an error action. In that case, don't check for expected
- tokens because there are none.
- - The only way there can be no lookahead present (in yychar) is if
- this state is a consistent state with a default action. Thus,
- detecting the absence of a lookahead is sufficient to determine
- that there is no unexpected or expected token to report. In that
- case, just report a simple "syntax error".
- - Don't assume there isn't a lookahead just because this state is a
- consistent state with a default action. There might have been a
- previous inconsistent state, consistent state with a non-default
- action, or user semantic action that manipulated yychar.
- - Of course, the expected token list depends on states to have
- correct lookahead information, and it depends on the parser not
- to perform extra reductions after fetching a lookahead from the
- scanner and before detecting a syntax error. Thus, state merging
- (from LALR or IELR) and default reductions corrupt the expected
- token list. However, the list is correct for canonical LR with
- one exception: it will still contain any token that will not be
- accepted due to an error action in a later state.
- */
- if (yytoken != YYEMPTY)
- {
- int yyn = yypact[*yyssp];
- yyarg[yycount++] = yytname[yytoken];
- if (!yypact_value_is_default (yyn))
- {
- /* Start YYX at -YYN if negative to avoid negative indexes in
- YYCHECK. In other words, skip the first -YYN actions for
- this state because they are default actions. */
- int yyxbegin = yyn < 0 ? -yyn : 0;
- /* Stay within bounds of both yycheck and yytname. */
- int yychecklim = YYLAST - yyn + 1;
- int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
- int yyx;
-
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR
- && !yytable_value_is_error (yytable[yyx + yyn]))
- {
- if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
- {
- yycount = 1;
- yysize = yysize0;
- break;
- }
- yyarg[yycount++] = yytname[yyx];
- yysize1 = yysize + yytnamerr (0, yytname[yyx]);
- if (! (yysize <= yysize1
- && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
- return 2;
- yysize = yysize1;
- }
- }
- }
-
- switch (yycount)
- {
-# define YYCASE_(N, S) \
- case N: \
- yyformat = S; \
- break
- YYCASE_(0, YY_("syntax error"));
- YYCASE_(1, YY_("syntax error, unexpected %s"));
- YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s"));
- YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s"));
- YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s"));
- YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"));
-# undef YYCASE_
- }
-
- yysize1 = yysize + yystrlen (yyformat);
- if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
- return 2;
- yysize = yysize1;
-
- if (*yymsg_alloc < yysize)
- {
- *yymsg_alloc = 2 * yysize;
- if (! (yysize <= *yymsg_alloc
- && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM))
- *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM;
- return 1;
- }
-
- /* Avoid sprintf, as that infringes on the user's name space.
- Don't have undefined behavior even if the translation
- produced a string with the wrong number of "%s"s. */
- {
- char *yyp = *yymsg;
- int yyi = 0;
- while ((*yyp = *yyformat) != '\0')
- if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount)
- {
- yyp += yytnamerr (yyp, yyarg[yyi++]);
- yyformat += 2;
- }
- else
- {
- yyp++;
- yyformat++;
- }
- }
- return 0;
-}
-#endif /* YYERROR_VERBOSE */
-
-/*-----------------------------------------------.
-| Release the memory associated to this symbol. |
-`-----------------------------------------------*/
-
-/*ARGSUSED*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
-#else
-static void
-yydestruct (yymsg, yytype, yyvaluep)
- const char *yymsg;
- int yytype;
- YYSTYPE *yyvaluep;
-#endif
-{
- YYUSE (yyvaluep);
-
- if (!yymsg)
- yymsg = "Deleting";
- YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
-
- switch (yytype)
- {
-
- default:
- break;
- }
-}
-
-
-/* Prevent warnings from -Wmissing-prototypes. */
-#ifdef YYPARSE_PARAM
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void *YYPARSE_PARAM);
-#else
-int yyparse ();
-#endif
-#else /* ! YYPARSE_PARAM */
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void);
-#else
-int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
-
-
-/*----------.
-| yyparse. |
-`----------*/
-
-#ifdef YYPARSE_PARAM
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-int
-yyparse (void *YYPARSE_PARAM)
-#else
-int
-yyparse (YYPARSE_PARAM)
- void *YYPARSE_PARAM;
-#endif
-#else /* ! YYPARSE_PARAM */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-int
-yyparse (void)
-#else
-int
-yyparse ()
-
-#endif
-#endif
-{
-/* The lookahead symbol. */
-int yychar;
-/* The backup of yychar when there is an error and we're in yyerrlab. */
-int yylastchar;
-
-/* The semantic value of the lookahead symbol. */
-YYSTYPE yylval;
-
- /* Number of syntax errors so far. */
- int yynerrs;
-
- int yystate;
- /* Number of tokens to shift before error messages enabled. */
- int yyerrstatus;
-
- /* The stacks and their tools:
- `yyss': related to states.
- `yyvs': related to semantic values.
-
- Refer to the stacks thru separate pointers, to allow yyoverflow
- to reallocate them elsewhere. */
-
- /* The state stack. */
- yytype_int16 yyssa[YYINITDEPTH];
- yytype_int16 *yyss;
- yytype_int16 *yyssp;
-
- /* The semantic value stack. */
- YYSTYPE yyvsa[YYINITDEPTH];
- YYSTYPE *yyvs;
- YYSTYPE *yyvsp;
-
- YYSIZE_T yystacksize;
-
- int yyn;
- int yyresult;
- /* Lookahead token as an internal (translated) token number. */
- int yytoken;
- /* The variables used to return semantic value and location from the
- action routines. */
- YYSTYPE yyval;
-
-#if YYERROR_VERBOSE
- /* Buffer for error messages, and its allocated size. */
- char yymsgbuf[128];
- char *yymsg = yymsgbuf;
- YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
-#endif
-
-#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N))
-
- /* The number of symbols on the RHS of the reduced rule.
- Keep to zero when no symbol should be popped. */
- int yylen = 0;
-
- yytoken = 0;
- yyss = yyssa;
- yyvs = yyvsa;
- yystacksize = YYINITDEPTH;
-
- YYDPRINTF ((stderr, "Starting parse\n"));
-
- yystate = 0;
- yyerrstatus = 0;
- yynerrs = 0;
- yychar = YYEMPTY; /* Cause a token to be read. */
-
- /* Initialize stack pointers.
- Waste one element of value and location stack
- so that they stay on the same level as the state stack.
- The wasted elements are never initialized. */
- yyssp = yyss;
- yyvsp = yyvs;
-
- goto yysetstate;
-
-/*------------------------------------------------------------.
-| yynewstate -- Push a new state, which is found in yystate. |
-`------------------------------------------------------------*/
- yynewstate:
- /* In all cases, when you get here, the value and location stacks
- have just been pushed. So pushing a state here evens the stacks. */
- yyssp++;
-
- yysetstate:
- *yyssp = yystate;
-
- if (yyss + yystacksize - 1 <= yyssp)
- {
- /* Get the current used size of the three stacks, in elements. */
- YYSIZE_T yysize = yyssp - yyss + 1;
-
-#ifdef yyoverflow
- {
- /* Give user a chance to reallocate the stack. Use copies of
- these so that the &'s don't force the real ones into
- memory. */
- YYSTYPE *yyvs1 = yyvs;
- yytype_int16 *yyss1 = yyss;
-
- /* Each stack pointer address is followed by the size of the
- data in use in that stack, in bytes. This used to be a
- conditional around just the two extra args, but that might
- be undefined if yyoverflow is a macro. */
- yyoverflow (YY_("memory exhausted"),
- &yyss1, yysize * sizeof (*yyssp),
- &yyvs1, yysize * sizeof (*yyvsp),
- &yystacksize);
-
- yyss = yyss1;
- yyvs = yyvs1;
- }
-#else /* no yyoverflow */
-# ifndef YYSTACK_RELOCATE
- goto yyexhaustedlab;
-# else
- /* Extend the stack our own way. */
- if (YYMAXDEPTH <= yystacksize)
- goto yyexhaustedlab;
- yystacksize *= 2;
- if (YYMAXDEPTH < yystacksize)
- yystacksize = YYMAXDEPTH;
-
- {
- yytype_int16 *yyss1 = yyss;
- union yyalloc *yyptr =
- (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
- if (! yyptr)
- goto yyexhaustedlab;
- YYSTACK_RELOCATE (yyss_alloc, yyss);
- YYSTACK_RELOCATE (yyvs_alloc, yyvs);
-# undef YYSTACK_RELOCATE
- if (yyss1 != yyssa)
- YYSTACK_FREE (yyss1);
- }
-# endif
-#endif /* no yyoverflow */
-
- yyssp = yyss + yysize - 1;
- yyvsp = yyvs + yysize - 1;
-
- YYDPRINTF ((stderr, "Stack size increased to %lu\n",
- (unsigned long int) yystacksize));
-
- if (yyss + yystacksize - 1 <= yyssp)
- YYABORT;
- }
-
- YYDPRINTF ((stderr, "Entering state %d\n", yystate));
-
- if (yystate == YYFINAL)
- YYACCEPT;
-
- goto yybackup;
-
-/*-----------.
-| yybackup. |
-`-----------*/
-yybackup:
-
- /* Do appropriate processing given the current state. Read a
- lookahead token if we need one and don't already have one. */
-
- /* First try to decide what to do without reference to lookahead token. */
- yyn = yypact[yystate];
- if (yypact_value_is_default (yyn))
- goto yydefault;
-
- /* Not known => get a lookahead token if don't already have one. */
-
- /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
- if (yychar == YYEMPTY)
- {
- YYDPRINTF ((stderr, "Reading a token: "));
- yychar = YYLEX;
- }
-
- if (yychar <= YYEOF)
- {
- yychar = yytoken = YYEOF;
- YYDPRINTF ((stderr, "Now at end of input.\n"));
- }
- else
- {
- yytoken = YYTRANSLATE (yychar);
- YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
- }
-
- /* If the proper action on seeing token YYTOKEN is to reduce or to
- detect an error, take that action. */
- yyn += yytoken;
- if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
- goto yydefault;
- yyn = yytable[yyn];
- if (yyn <= 0)
- {
- if (yytable_value_is_error (yyn))
- goto yyerrlab;
- yyn = -yyn;
- goto yyreduce;
- }
-
- /* Count tokens shifted since error; after three, turn off error
- status. */
- if (yyerrstatus)
- yyerrstatus--;
-
- /* Shift the lookahead token. */
- YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
-
- /* Discard the shifted token. */
- yychar = YYEMPTY;
-
- yystate = yyn;
- *++yyvsp = yylval;
-
- goto yynewstate;
-
-
-/*-----------------------------------------------------------.
-| yydefault -- do the default action for the current state. |
-`-----------------------------------------------------------*/
-yydefault:
- yyn = yydefact[yystate];
- if (yyn == 0)
- goto yyerrlab;
- goto yyreduce;
-
-
-/*-----------------------------.
-| yyreduce -- Do a reduction. |
-`-----------------------------*/
-yyreduce:
- /* yyn is the number of a rule to reduce with. */
- yylen = yyr2[yyn];
-
- /* If YYLEN is nonzero, implement the default value of the action:
- `$$ = $1'.
-
- Otherwise, the following line sets YYVAL to garbage.
- This behavior is undocumented and Bison
- users should not rely upon it. Assigning to YYVAL
- unconditionally makes the parser a bit smaller, and it avoids a
- GCC warning that YYVAL may be used uninitialized. */
- yyval = yyvsp[1-yylen];
-
-
- YY_REDUCE_PRINT (yyn);
- switch (yyn)
- {
- case 2:
-
-/* Line 1806 of yacc.c */
-#line 79 "fts0pars.y"
- {
- (yyval.node) = (yyvsp[(1) - (1)].node);
- ((fts_ast_state_t*) state)->root = (yyval.node);
- }
- break;
-
- case 3:
-
-/* Line 1806 of yacc.c */
-#line 85 "fts0pars.y"
- {
- (yyval.node) = NULL;
- }
- break;
-
- case 4:
-
-/* Line 1806 of yacc.c */
-#line 89 "fts0pars.y"
- {
- (yyval.node) = (yyvsp[(1) - (2)].node);
-
- if (!(yyval.node)) {
- (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(2) - (2)].node));
- } else {
- fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
- }
- }
- break;
-
- case 5:
-
-/* Line 1806 of yacc.c */
-#line 99 "fts0pars.y"
- {
- (yyval.node) = (yyvsp[(1) - (2)].node);
- (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node));
-
- if (!(yyval.node)) {
- (yyval.node) = (yyvsp[(2) - (2)].node);
- } else {
- fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
- }
- }
- break;
-
- case 6:
-
-/* Line 1806 of yacc.c */
-#line 111 "fts0pars.y"
- {
- (yyval.node) = (yyvsp[(2) - (3)].node);
-
- if ((yyval.node)) {
- (yyval.node) = fts_ast_create_node_subexp_list(state, (yyval.node));
- }
- }
- break;
-
- case 7:
-
-/* Line 1806 of yacc.c */
-#line 119 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node));
-
- if ((yyvsp[(3) - (4)].node)) {
- fts_ast_add_node((yyval.node),
- fts_ast_create_node_subexp_list(state, (yyvsp[(3) - (4)].node)));
- }
- }
- break;
-
- case 8:
-
-/* Line 1806 of yacc.c */
-#line 129 "fts0pars.y"
- {
- (yyval.node) = (yyvsp[(1) - (1)].node);
- }
- break;
-
- case 9:
-
-/* Line 1806 of yacc.c */
-#line 133 "fts0pars.y"
- {
- (yyval.node) = (yyvsp[(1) - (1)].node);
- }
- break;
-
- case 10:
-
-/* Line 1806 of yacc.c */
-#line 137 "fts0pars.y"
- {
- fts_ast_term_set_wildcard((yyvsp[(1) - (2)].node));
- }
- break;
-
- case 11:
-
-/* Line 1806 of yacc.c */
-#line 141 "fts0pars.y"
- {
- fts_ast_term_set_distance((yyvsp[(1) - (3)].node), fts_ast_string_to_ul((yyvsp[(3) - (3)].token), 10));
- fts_ast_string_free((yyvsp[(3) - (3)].token));
- }
- break;
-
- case 12:
-
-/* Line 1806 of yacc.c */
-#line 146 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (3)].node));
- fts_ast_add_node((yyval.node), (yyvsp[(2) - (3)].node));
- fts_ast_term_set_wildcard((yyvsp[(2) - (3)].node));
- }
- break;
-
- case 13:
-
-/* Line 1806 of yacc.c */
-#line 152 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node));
- fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
- }
- break;
-
- case 14:
-
-/* Line 1806 of yacc.c */
-#line 157 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node));
- fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node));
- fts_ast_term_set_distance((yyvsp[(2) - (4)].node), fts_ast_string_to_ul((yyvsp[(4) - (4)].token), 10));
- fts_ast_string_free((yyvsp[(4) - (4)].token));
- }
- break;
-
- case 15:
-
-/* Line 1806 of yacc.c */
-#line 164 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node));
- fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node));
- }
- break;
-
- case 16:
-
-/* Line 1806 of yacc.c */
-#line 170 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_oper(state, FTS_IGNORE);
- }
- break;
-
- case 17:
-
-/* Line 1806 of yacc.c */
-#line 174 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_oper(state, FTS_EXIST);
- }
- break;
-
- case 18:
-
-/* Line 1806 of yacc.c */
-#line 178 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_oper(state, FTS_NEGATE);
- }
- break;
-
- case 19:
-
-/* Line 1806 of yacc.c */
-#line 182 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_oper(state, FTS_DECR_RATING);
- }
- break;
-
- case 20:
-
-/* Line 1806 of yacc.c */
-#line 186 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_oper(state, FTS_INCR_RATING);
- }
- break;
-
- case 21:
-
-/* Line 1806 of yacc.c */
-#line 191 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token));
- fts_ast_string_free((yyvsp[(1) - (1)].token));
- }
- break;
-
- case 22:
-
-/* Line 1806 of yacc.c */
-#line 196 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token));
- fts_ast_string_free((yyvsp[(1) - (1)].token));
- }
- break;
-
- case 23:
-
-/* Line 1806 of yacc.c */
-#line 202 "fts0pars.y"
- {
- (yyval.node) = (yyvsp[(2) - (2)].node);
- }
- break;
-
- case 24:
-
-/* Line 1806 of yacc.c */
-#line 207 "fts0pars.y"
- {
- (yyval.node) = fts_ast_create_node_text(state, (yyvsp[(1) - (1)].token));
- fts_ast_string_free((yyvsp[(1) - (1)].token));
- }
- break;
-
-
-
-/* Line 1806 of yacc.c */
-#line 1663 "fts0pars.cc"
- default: break;
- }
- /* User semantic actions sometimes alter yychar, and that requires
- that yytoken be updated with the new translation. We take the
- approach of translating immediately before every use of yytoken.
- One alternative is translating here after every semantic action,
- but that translation would be missed if the semantic action invokes
- YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or
- if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an
- incorrect destructor might then be invoked immediately. In the
- case of YYERROR or YYBACKUP, subsequent parser actions might lead
- to an incorrect destructor call or verbose syntax error message
- before the lookahead is translated. */
- YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
-
- YYPOPSTACK (yylen);
- yylen = 0;
- YY_STACK_PRINT (yyss, yyssp);
-
- *++yyvsp = yyval;
-
- /* Now `shift' the result of the reduction. Determine what state
- that goes to, based on the state we popped back to and the rule
- number reduced by. */
-
- yyn = yyr1[yyn];
-
- yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
- if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
- yystate = yytable[yystate];
- else
- yystate = yydefgoto[yyn - YYNTOKENS];
-
- goto yynewstate;
-
-
-/*------------------------------------.
-| yyerrlab -- here on detecting error |
-`------------------------------------*/
-yyerrlab:
- /* Backup yychar, in case we would change it. */
- yylastchar = yychar;
- /* Make sure we have latest lookahead translation. See comments at
- user semantic actions for why this is necessary. */
- yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar);
-
- /* If not already recovering from an error, report this error. */
- if (!yyerrstatus)
- {
- ++yynerrs;
-#if ! YYERROR_VERBOSE
- yyerror (YY_("syntax error"));
-#else
-# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \
- yyssp, yytoken)
- {
- char const *yymsgp = YY_("syntax error");
- int yysyntax_error_status;
- yysyntax_error_status = YYSYNTAX_ERROR;
- if (yysyntax_error_status == 0)
- yymsgp = yymsg;
- else if (yysyntax_error_status == 1)
- {
- if (yymsg != yymsgbuf)
- YYSTACK_FREE (yymsg);
- yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc);
- if (!yymsg)
- {
- yymsg = yymsgbuf;
- yymsg_alloc = sizeof yymsgbuf;
- yysyntax_error_status = 2;
- }
- else
- {
- yysyntax_error_status = YYSYNTAX_ERROR;
- yymsgp = yymsg;
- }
- }
- yyerror (yymsgp);
- if (yysyntax_error_status == 2)
- goto yyexhaustedlab;
- }
-# undef YYSYNTAX_ERROR
-#endif
- }
-
-
-
- if (yyerrstatus == 3)
- {
- /* If just tried and failed to reuse lookahead token after an
- error, discard it. */
-
- if (yychar <= YYEOF)
- {
- /* Return failure if at end of input. */
- if (yychar == YYEOF)
- {
- /* Since we don't need the token, we have to free it first. */
- YYERRCLEANUP;
- YYABORT;
- }
- }
- else
- {
- yydestruct ("Error: discarding",
- yytoken, &yylval);
- yychar = YYEMPTY;
- }
- }
-
- /* Else will try to reuse lookahead token after shifting the error
- token. */
- goto yyerrlab1;
-
-
-/*---------------------------------------------------.
-| yyerrorlab -- error raised explicitly by YYERROR. |
-`---------------------------------------------------*/
-yyerrorlab:
-
- /* Pacify compilers like GCC when the user code never invokes
- YYERROR and the label yyerrorlab therefore never appears in user
- code. */
- if (/*CONSTCOND*/ 0)
- goto yyerrorlab;
-
- /* Do not reclaim the symbols of the rule which action triggered
- this YYERROR. */
- YYPOPSTACK (yylen);
- yylen = 0;
- YY_STACK_PRINT (yyss, yyssp);
- yystate = *yyssp;
- goto yyerrlab1;
-
-
-/*-------------------------------------------------------------.
-| yyerrlab1 -- common code for both syntax error and YYERROR. |
-`-------------------------------------------------------------*/
-yyerrlab1:
- yyerrstatus = 3; /* Each real token shifted decrements this. */
-
- for (;;)
- {
- yyn = yypact[yystate];
- if (!yypact_value_is_default (yyn))
- {
- yyn += YYTERROR;
- if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
- {
- yyn = yytable[yyn];
- if (0 < yyn)
- break;
- }
- }
-
- /* Pop the current state because it cannot handle the error token. */
- if (yyssp == yyss)
- {
- /* Since we don't need the error token, we have to free it first. */
- YYERRCLEANUP;
- YYABORT;
- }
-
-
- yydestruct ("Error: popping",
- yystos[yystate], yyvsp);
- YYPOPSTACK (1);
- yystate = *yyssp;
- YY_STACK_PRINT (yyss, yyssp);
- }
-
- *++yyvsp = yylval;
-
-
- /* Shift the error token. */
- YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
-
- yystate = yyn;
- goto yynewstate;
-
-
-/*-------------------------------------.
-| yyacceptlab -- YYACCEPT comes here. |
-`-------------------------------------*/
-yyacceptlab:
- yyresult = 0;
- goto yyreturn;
-
-/*-----------------------------------.
-| yyabortlab -- YYABORT comes here. |
-`-----------------------------------*/
-yyabortlab:
- yyresult = 1;
- goto yyreturn;
-
-#if !defined(yyoverflow) || YYERROR_VERBOSE
-/*-------------------------------------------------.
-| yyexhaustedlab -- memory exhaustion comes here. |
-`-------------------------------------------------*/
-yyexhaustedlab:
- yyerror (YY_("memory exhausted"));
- yyresult = 2;
- /* Fall through. */
-#endif
-
-yyreturn:
- if (yychar != YYEMPTY)
- {
- /* Make sure we have latest lookahead translation. See comments at
- user semantic actions for why this is necessary. */
- yytoken = YYTRANSLATE (yychar);
- yydestruct ("Cleanup: discarding lookahead",
- yytoken, &yylval);
- }
- /* Do not reclaim the symbols of the rule which action triggered
- this YYABORT or YYACCEPT. */
- YYPOPSTACK (yylen);
- YY_STACK_PRINT (yyss, yyssp);
- while (yyssp != yyss)
- {
- yydestruct ("Cleanup: popping",
- yystos[*yyssp], yyvsp);
- YYPOPSTACK (1);
- }
-#ifndef yyoverflow
- if (yyss != yyssa)
- YYSTACK_FREE (yyss);
-#endif
-#if YYERROR_VERBOSE
- if (yymsg != yymsgbuf)
- YYSTACK_FREE (yymsg);
-#endif
- /* Make sure YYID is used. */
- return YYID (yyresult);
-}
-
-
-
-/* Line 2067 of yacc.c */
-#line 212 "fts0pars.y"
-
-
-/********************************************************************
-*/
-int
-ftserror(
-/*=====*/
- const char* p)
-{
- my_printf_error(ER_PARSE_ERROR, "%s", MYF(0), p);
- return(0);
-}
-
-/********************************************************************
-Create a fts_lexer_t instance.*/
-
-fts_lexer_t*
-fts_lexer_create(
-/*=============*/
- ibool boolean_mode,
- const byte* query,
- ulint query_len)
-{
- fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>(
- ut_malloc(sizeof(fts_lexer_t)));
-
- if (boolean_mode) {
- fts0blex_init(&fts_lexer->yyscanner);
- fts0b_scan_bytes(
- reinterpret_cast<const char*>(query),
- static_cast<int>(query_len),
- fts_lexer->yyscanner);
- fts_lexer->scanner = reinterpret_cast<fts_scan>(fts_blexer);
- /* FIXME: Debugging */
- /* fts0bset_debug(1 , fts_lexer->yyscanner); */
- } else {
- fts0tlex_init(&fts_lexer->yyscanner);
- fts0t_scan_bytes(
- reinterpret_cast<const char*>(query),
- static_cast<int>(query_len),
- fts_lexer->yyscanner);
- fts_lexer->scanner = reinterpret_cast<fts_scan>(fts_tlexer);
- }
-
- return(fts_lexer);
-}
-
-/********************************************************************
-Free an fts_lexer_t instance.*/
-void
-
-fts_lexer_free(
-/*===========*/
- fts_lexer_t* fts_lexer)
-{
- if (fts_lexer->scanner == (fts_scan) fts_blexer) {
- fts0blex_destroy(fts_lexer->yyscanner);
- } else {
- fts0tlex_destroy(fts_lexer->yyscanner);
- }
-
- ut_free(fts_lexer);
-}
-
-/********************************************************************
-Call the appropaiate scanner.*/
-
-int
-fts_lexer(
-/*======*/
- YYSTYPE* val,
- fts_lexer_t* fts_lexer)
-{
- fts_scanner_alt func_ptr;
-
- func_ptr = (fts_scanner_alt) fts_lexer->scanner;
-
- return(func_ptr(val, fts_lexer->yyscanner));
-}
-
-/********************************************************************
-Parse the query.*/
-int
-fts_parse(
-/*======*/
- fts_ast_state_t* state)
-{
- return(ftsparse(state));
-}
-
diff --git a/storage/xtradb/fts/fts0pars.y b/storage/xtradb/fts/fts0pars.y
deleted file mode 100644
index e48036e82fe..00000000000
--- a/storage/xtradb/fts/fts0pars.y
+++ /dev/null
@@ -1,294 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**
- * @file fts/fts0pars.y
- * FTS parser: input file for the GNU Bison parser generator
- *
- * Created 2007/5/9 Sunny Bains
- */
-
-%{
-
-#include "mem0mem.h"
-#include "fts0ast.h"
-#include "fts0blex.h"
-#include "fts0tlex.h"
-#include "fts0pars.h"
-
-extern int fts_lexer(YYSTYPE*, fts_lexer_t*);
-extern int fts_blexer(YYSTYPE*, yyscan_t);
-extern int fts_tlexer(YYSTYPE*, yyscan_t);
-
-typedef int (*fts_scan)();
-
-extern int ftserror(const char* p);
-
-/* Required for reentrant parser */
-#define ftslex fts_lexer
-
-#define YYERROR_VERBOSE
-
-/* For passing an argument to yyparse() */
-#define YYPARSE_PARAM state
-#define YYLEX_PARAM ((fts_ast_state_t*) state)->lexer
-
-typedef int (*fts_scanner_alt)(YYSTYPE* val, yyscan_t yyscanner);
-typedef int (*fts_scanner)();
-
-struct fts_lexer_struct {
- fts_scanner scanner;
- void* yyscanner;
-};
-
-%}
-
-%union {
- int oper;
- fts_ast_string_t* token;
- fts_ast_node_t* node;
-};
-
-/* Enable re-entrant parser */
-%pure_parser
-
-%token<oper> FTS_OPER
-%token<token> FTS_TEXT FTS_TERM FTS_NUMB
-
-%type<node> prefix term text expr sub_expr expr_lst query
-
-%nonassoc '+' '-' '~' '<' '>'
-
-%%
-
-query : expr_lst {
- $$ = $1;
- ((fts_ast_state_t*) state)->root = $$;
- }
- ;
-
-expr_lst: /* Empty */ {
- $$ = NULL;
- }
-
- | expr_lst expr {
- $$ = $1;
-
- if (!$$) {
- $$ = fts_ast_create_node_list(state, $2);
- } else {
- fts_ast_add_node($$, $2);
- }
- }
-
- | expr_lst sub_expr {
- $$ = $1;
- $$ = fts_ast_create_node_list(state, $1);
-
- if (!$$) {
- $$ = $2;
- } else {
- fts_ast_add_node($$, $2);
- }
- }
- ;
-
-sub_expr: '(' expr_lst ')' {
- $$ = $2;
-
- if ($$) {
- $$ = fts_ast_create_node_subexp_list(state, $$);
- }
- }
-
- | prefix '(' expr_lst ')' {
- $$ = fts_ast_create_node_list(state, $1);
-
- if ($3) {
- fts_ast_add_node($$,
- fts_ast_create_node_subexp_list(state, $3));
- }
- }
- ;
-
-expr : term {
- $$ = $1;
- }
-
- | text {
- $$ = $1;
- }
-
- | term '*' {
- fts_ast_term_set_wildcard($1);
- }
-
- | text '@' FTS_NUMB {
- fts_ast_term_set_distance($1, fts_ast_string_to_ul($3, 10));
- fts_ast_string_free($3);
- }
-
- | prefix term '*' {
- $$ = fts_ast_create_node_list(state, $1);
- fts_ast_add_node($$, $2);
- fts_ast_term_set_wildcard($2);
- }
-
- | prefix term {
- $$ = fts_ast_create_node_list(state, $1);
- fts_ast_add_node($$, $2);
- }
-
- | prefix text '@' FTS_NUMB {
- $$ = fts_ast_create_node_list(state, $1);
- fts_ast_add_node($$, $2);
- fts_ast_term_set_distance($2, fts_ast_string_to_ul($4, 10));
- fts_ast_string_free($4);
- }
-
- | prefix text {
- $$ = fts_ast_create_node_list(state, $1);
- fts_ast_add_node($$, $2);
- }
- ;
-
-prefix : '-' {
- $$ = fts_ast_create_node_oper(state, FTS_IGNORE);
- }
-
- | '+' {
- $$ = fts_ast_create_node_oper(state, FTS_EXIST);
- }
-
- | '~' {
- $$ = fts_ast_create_node_oper(state, FTS_NEGATE);
- }
-
- | '<' {
- $$ = fts_ast_create_node_oper(state, FTS_DECR_RATING);
- }
-
- | '>' {
- $$ = fts_ast_create_node_oper(state, FTS_INCR_RATING);
- }
- ;
-
-term : FTS_TERM {
- $$ = fts_ast_create_node_term(state, $1);
- fts_ast_string_free($1);
- }
-
- | FTS_NUMB {
- $$ = fts_ast_create_node_term(state, $1);
- fts_ast_string_free($1);
- }
-
- /* Ignore leading '*' */
- | '*' term {
- $$ = $2;
- }
- ;
-
-text : FTS_TEXT {
- $$ = fts_ast_create_node_text(state, $1);
- fts_ast_string_free($1);
- }
- ;
-%%
-
-/********************************************************************
-*/
-int
-ftserror(
-/*=====*/
- const char* p)
-{
- fprintf(stderr, "%s\n", p);
- return(0);
-}
-
-/********************************************************************
-Create a fts_lexer_t instance.*/
-
-fts_lexer_t*
-fts_lexer_create(
-/*=============*/
- ibool boolean_mode,
- const byte* query,
- ulint query_len)
-{
- fts_lexer_t* fts_lexer = static_cast<fts_lexer_t*>(
- ut_malloc(sizeof(fts_lexer_t)));
-
- if (boolean_mode) {
- fts0blex_init(&fts_lexer->yyscanner);
- fts0b_scan_bytes((char*) query, query_len, fts_lexer->yyscanner);
- fts_lexer->scanner = (fts_scan) fts_blexer;
- /* FIXME: Debugging */
- /* fts0bset_debug(1 , fts_lexer->yyscanner); */
- } else {
- fts0tlex_init(&fts_lexer->yyscanner);
- fts0t_scan_bytes((char*) query, query_len, fts_lexer->yyscanner);
- fts_lexer->scanner = (fts_scan) fts_tlexer;
- }
-
- return(fts_lexer);
-}
-
-/********************************************************************
-Free an fts_lexer_t instance.*/
-void
-
-fts_lexer_free(
-/*===========*/
- fts_lexer_t* fts_lexer)
-{
- if (fts_lexer->scanner == (fts_scan) fts_blexer) {
- fts0blex_destroy(fts_lexer->yyscanner);
- } else {
- fts0tlex_destroy(fts_lexer->yyscanner);
- }
-
- ut_free(fts_lexer);
-}
-
-/********************************************************************
-Call the appropaiate scanner.*/
-
-int
-fts_lexer(
-/*======*/
- YYSTYPE* val,
- fts_lexer_t* fts_lexer)
-{
- fts_scanner_alt func_ptr;
-
- func_ptr = (fts_scanner_alt) fts_lexer->scanner;
-
- return(func_ptr(val, fts_lexer->yyscanner));
-}
-
-/********************************************************************
-Parse the query.*/
-int
-fts_parse(
-/*======*/
- fts_ast_state_t* state)
-{
- return(ftsparse(state));
-}
diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc
deleted file mode 100644
index f24973e26fb..00000000000
--- a/storage/xtradb/fts/fts0que.cc
+++ /dev/null
@@ -1,4491 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file fts/fts0que.cc
-Full Text Search functionality.
-
-Created 2007/03/27 Sunny Bains
-Completed 2011/7/10 Sunny and Jimmy Yang
-*******************************************************/
-
-#include "dict0dict.h" /* dict_table_get_n_rows() */
-#include "ut0rbt.h"
-#include "row0sel.h"
-#include "fts0fts.h"
-#include "fts0priv.h"
-#include "fts0ast.h"
-#include "fts0pars.h"
-#include "fts0types.h"
-#include "ha_prototypes.h"
-#include <ctype.h>
-
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
-
-#include <vector>
-
-#define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)])
-
-#define RANK_DOWNGRADE (-1.0F)
-#define RANK_UPGRADE (1.0F)
-
-/* Maximum number of words supported in a phrase or proximity search. */
-#define MAX_PROXIMITY_ITEM 128
-
-/* Memory used by rbt itself for create and node add */
-#define SIZEOF_RBT_CREATE sizeof(ib_rbt_t) + sizeof(ib_rbt_node_t) * 2
-#define SIZEOF_RBT_NODE_ADD sizeof(ib_rbt_node_t)
-
-/*Initial byte length for 'words' in fts_ranking_t */
-#define RANKING_WORDS_INIT_LEN 4
-
-// FIXME: Need to have a generic iterator that traverses the ilist.
-
-typedef std::vector<fts_string_t> word_vector_t;
-
-struct fts_word_freq_t;
-
-/** State of an FTS query. */
-struct fts_query_t {
- mem_heap_t* heap; /*!< Heap to use for allocations */
-
- trx_t* trx; /*!< The query transaction */
-
- dict_index_t* index; /*!< The FTS index to search */
- /*!< FTS auxiliary common table def */
- fts_table_t fts_common_table;
-
- fts_table_t fts_index_table;/*!< FTS auxiliary index table def */
-
- ulint total_size; /*!< total memory size used by query */
-
- fts_doc_ids_t* deleted; /*!< Deleted doc ids that need to be
- filtered from the output */
-
- fts_ast_node_t* root; /*!< Abstract syntax tree */
-
- fts_ast_node_t* cur_node; /*!< Current tree node */
-
- ib_rbt_t* word_map; /*!< Matched word map for
- searching by word*/
-
- word_vector_t* word_vector; /*!< Matched word vector for
- searching by index */
-
- ib_rbt_t* doc_ids; /*!< The current set of matching
- doc ids, elements are of
- type fts_ranking_t */
-
- ib_rbt_t* intersection; /*!< The doc ids that were found in
- doc_ids, this tree will become
- the new doc_ids, elements are of type
- fts_ranking_t */
-
- /*!< Prepared statement to read the
- nodes from the FTS INDEX */
- que_t* read_nodes_graph;
-
- fts_ast_oper_t oper; /*!< Current boolean mode operator */
-
- /*!< TRUE if we want to collect the
- word positions within the document */
- ibool collect_positions;
-
- ulint flags; /*!< Specify the full text search type,
- such as boolean search, phrase
- search, proximity search etc. */
-
- ulint distance; /*!< The proximity distance of a
- phrase search. */
-
- /*!< These doc ids are used as a
- boundary condition when searching the
- FTS index rows */
-
- doc_id_t lower_doc_id; /*!< Lowest doc id in doc_ids */
-
- doc_id_t upper_doc_id; /*!< Highest doc id in doc_ids */
-
- bool boolean_mode; /*!< TRUE if boolean mode query */
-
- ib_vector_t* matched; /*!< Array of matching documents
- (fts_match_t) to search for a phrase */
-
- ib_vector_t** match_array; /*!< Used for proximity search, contains
- position info for each matched word
- in the word list */
-
- ib_uint64_t total_docs; /*!< The total number of documents */
-
- ulint total_words; /*!< The total number of words */
-
- dberr_t error; /*!< Error code if any, that is
- encountered during query processing */
-
- ib_rbt_t* word_freqs; /*!< RB tree of word frequencies per
- document, its elements are of type
- fts_word_freq_t */
-
- bool multi_exist; /*!< multiple FTS_EXIST oper */
-};
-
-/** For phrase matching, first we collect the documents and the positions
-then we match. */
-struct fts_match_t {
- doc_id_t doc_id; /*!< Document id */
-
- ulint start; /*!< Start the phrase match from
- this offset within the positions
- vector. */
-
- ib_vector_t* positions; /*!< Offsets of a word in a
- document */
-};
-
-/** For matching tokens in a phrase search. We use this data structure in
-the callback that determines whether a document should be accepted or
-rejected for a phrase search. */
-struct fts_select_t {
- doc_id_t doc_id; /*!< The document id to match */
-
- ulint min_pos; /*!< For found to be TRUE at least
- one position must be greater than
- min_pos. */
-
- ibool found; /*!< TRUE if found */
-
- fts_word_freq_t*
- word_freq; /*!< Word frequency instance of the
- current word being looked up in
- the FTS index */
-};
-
-typedef std::vector<ulint> pos_vector_t;
-
-/** structure defines a set of ranges for original documents, each of which
-has a minimum position and maximum position. Text in such range should
-contain all words in the proximity search. We will need to count the
-words in such range to make sure it is less than the specified distance
-of the proximity search */
-struct fts_proximity_t {
- ulint n_pos; /*!< number of position set, defines
- a range (min to max) containing all
- matching words */
- pos_vector_t min_pos; /*!< the minimum position (in bytes)
- of the range */
- pos_vector_t max_pos; /*!< the maximum position (in bytes)
- of the range */
-};
-
-/** The match positions and tokesn to match */
-struct fts_phrase_t {
- ibool found; /*!< Match result */
-
- const fts_match_t*
- match; /*!< Positions within text */
-
- const ib_vector_t*
- tokens; /*!< Tokens to match */
-
- ulint distance; /*!< For matching on proximity
- distance. Can be 0 for exact match */
- CHARSET_INFO* charset; /*!< Phrase match charset */
- mem_heap_t* heap; /*!< Heap for word processing */
- ulint zip_size; /*!< row zip size */
- fts_proximity_t*proximity_pos; /*!< position info for proximity
- search verification. Records the min
- and max position of words matched */
-};
-
-/** For storing the frequncy of a word/term in a document */
-struct fts_doc_freq_t {
- doc_id_t doc_id; /*!< Document id */
- ulint freq; /*!< Frequency of a word in a document */
-};
-
-/** To determine the word frequency per document. */
-struct fts_word_freq_t {
- fts_string_t word; /*!< Word for which we need the freq,
- it's allocated on the query heap */
-
- ib_rbt_t* doc_freqs; /*!< RB Tree for storing per document
- word frequencies. The elements are
- of type fts_doc_freq_t */
- ib_uint64_t doc_count; /*!< Total number of documents that
- contain this word */
- double idf; /*!< Inverse document frequency */
-};
-
-/********************************************************************
-Callback function to fetch the rows in an FTS INDEX record.
-@return always TRUE */
-static
-ibool
-fts_query_index_fetch_nodes(
-/*========================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg); /*!< in: pointer to ib_vector_t */
-
-/********************************************************************
-Read and filter nodes.
-@return fts_node_t instance */
-static
-dberr_t
-fts_query_filter_doc_ids(
-/*=====================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_string_t* word, /*!< in: the current word */
- fts_word_freq_t* word_freq, /*!< in/out: word frequency */
- const fts_node_t* node, /*!< in: current FTS node */
- void* data, /*!< in: doc id ilist */
- ulint len, /*!< in: doc id ilist size */
- ibool calc_doc_count);/*!< in: whether to remember doc
- count */
-
-#if 0
-/*****************************************************************//***
-Find a doc_id in a word's ilist.
-@return TRUE if found. */
-static
-ibool
-fts_query_find_doc_id(
-/*==================*/
- fts_select_t* select, /*!< in/out: search the doc id selected,
- update the frequency if found. */
- void* data, /*!< in: doc id ilist */
- ulint len); /*!< in: doc id ilist size */
-#endif
-
-/*************************************************************//**
-This function implements a simple "blind" query expansion search:
-words in documents found in the first search pass will be used as
-search arguments to search the document again, thus "expand"
-the search result set.
-@return DB_SUCCESS if success, otherwise the error code */
-static
-dberr_t
-fts_expand_query(
-/*=============*/
- dict_index_t* index, /*!< in: FTS index to search */
- fts_query_t* query) /*!< in: query result, to be freed
- by the client */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-This function finds documents that contain all words in a
-phrase or proximity search. And if proximity search, verify
-the words are close enough to each other, as in specified distance.
-This function is called for phrase and proximity search.
-@return TRUE if documents are found, FALSE if otherwise */
-static
-ibool
-fts_phrase_or_proximity_search(
-/*===========================*/
- fts_query_t* query, /*!< in/out: query instance
- query->doc_ids might be instantiated
- with qualified doc IDs */
- ib_vector_t* tokens); /*!< in: Tokens contain words */
-/*************************************************************//**
-This function checks whether words in result documents are close to
-each other (within proximity range as specified by "distance").
-If "distance" is MAX_ULINT, then it will find all combinations of
-positions of matching words and store min and max positions
-in the "qualified_pos" for later verification.
-@return true if words are close to each other, false if otherwise */
-static
-bool
-fts_proximity_get_positions(
-/*========================*/
- fts_match_t** match, /*!< in: query instance */
- ulint num_match, /*!< in: number of matching
- items */
- ulint distance, /*!< in: distance value
- for proximity search */
- fts_proximity_t* qualified_pos); /*!< out: the position info
- records ranges containing
- all matching words. */
-#if 0
-/********************************************************************
-Get the total number of words in a documents. */
-static
-ulint
-fts_query_terms_in_document(
-/*========================*/
- /*!< out: DB_SUCCESS if all go well
- else error code */
- fts_query_t* query, /*!< in: FTS query state */
- doc_id_t doc_id, /*!< in: the word to check */
- ulint* total); /*!< out: total words in document */
-#endif
-
-/********************************************************************
-Compare two fts_doc_freq_t doc_ids.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_freq_doc_id_cmp(
-/*================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const fts_doc_freq_t* fq1 = (const fts_doc_freq_t*) p1;
- const fts_doc_freq_t* fq2 = (const fts_doc_freq_t*) p2;
-
- return((int) (fq1->doc_id - fq2->doc_id));
-}
-
-#if 0
-/*******************************************************************//**
-Print the table used for calculating LCS. */
-static
-void
-fts_print_lcs_table(
-/*================*/
- const ulint* table, /*!< in: array to print */
- ulint n_rows, /*!< in: total no. of rows */
- ulint n_cols) /*!< in: total no. of cols */
-{
- ulint i;
-
- for (i = 0; i < n_rows; ++i) {
- ulint j;
-
- printf("\n");
-
- for (j = 0; j < n_cols; ++j) {
-
- printf("%2lu ", FTS_ELEM(table, n_cols, i, j));
- }
- }
-}
-
-/********************************************************************
-Find the longest common subsequence between the query string and
-the document. */
-static
-ulint
-fts_query_lcs(
-/*==========*/
- /*!< out: LCS (length) between
- two ilists */
- const ulint* p1, /*!< in: word positions of query */
- ulint len_p1, /*!< in: no. of elements in p1 */
- const ulint* p2, /*!< in: word positions within document */
- ulint len_p2) /*!< in: no. of elements in p2 */
-{
- int i;
- ulint len = 0;
- ulint r = len_p1;
- ulint c = len_p2;
- ulint size = (r + 1) * (c + 1) * sizeof(ulint);
- ulint* table = (ulint*) ut_malloc(size);
-
- /* Traverse the table backwards, from the last row to the first and
- also from the last column to the first. We compute the smaller
- common subsequeces first, then use the caluclated values to determine
- the longest common subsequence. The result will be in TABLE[0][0]. */
- for (i = r; i >= 0; --i) {
- int j;
-
- for (j = c; j >= 0; --j) {
-
- if (p1[i] == (ulint) -1 || p2[j] == (ulint) -1) {
-
- FTS_ELEM(table, c, i, j) = 0;
-
- } else if (p1[i] == p2[j]) {
-
- FTS_ELEM(table, c, i, j) = FTS_ELEM(
- table, c, i + 1, j + 1) + 1;
-
- } else {
-
- ulint value;
-
- value = ut_max(
- FTS_ELEM(table, c, i + 1, j),
- FTS_ELEM(table, c, i, j + 1));
-
- FTS_ELEM(table, c, i, j) = value;
- }
- }
- }
-
- len = FTS_ELEM(table, c, 0, 0);
-
- fts_print_lcs_table(table, r, c);
- printf("\nLen=%lu\n", len);
-
- ut_free(table);
-
- return(len);
-}
-#endif
-
-/*******************************************************************//**
-Compare two fts_ranking_t instance on their rank value and doc ids in
-descending order on the rank and ascending order on doc id.
-@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
-static
-int
-fts_query_compare_rank(
-/*===================*/
- const void* p1, /*!< in: pointer to elem */
- const void* p2) /*!< in: pointer to elem */
-{
- const fts_ranking_t* r1 = (const fts_ranking_t*) p1;
- const fts_ranking_t* r2 = (const fts_ranking_t*) p2;
-
- if (r2->rank < r1->rank) {
- return(-1);
- } else if (r2->rank == r1->rank) {
-
- if (r1->doc_id < r2->doc_id) {
- return(1);
- } else if (r1->doc_id > r2->doc_id) {
- return(1);
- }
-
- return(0);
- }
-
- return(1);
-}
-
-#ifdef FTS_UTF8_DEBUG
-/*******************************************************************//**
-Convert string to lowercase.
-@return lower case string, callers responsibility to delete using
-ut_free() */
-static
-byte*
-fts_tolower(
-/*========*/
- const byte* src, /*!< in: src string */
- ulint len) /*!< in: src string length */
-{
- fts_string_t str;
- byte* lc_str = ut_malloc(len + 1);
-
- str.f_len = len;
- str.f_str = lc_str;
-
- memcpy(str.f_str, src, len);
-
- /* Make sure the last byte is NUL terminated */
- str.f_str[len] = '\0';
-
- fts_utf8_tolower(&str);
-
- return(lc_str);
-}
-
-/*******************************************************************//**
-Do a case insensitive search. Doesn't check for NUL byte end marker
-only relies on len. Convert str2 to lower case before comparing.
-@return 0 if p1 == p2, < 0 if p1 < p2, > 0 if p1 > p2 */
-static
-int
-fts_utf8_strcmp(
-/*============*/
- const fts_string_t*
- str1, /*!< in: should be lower case*/
-
- fts_string_t* str2) /*!< in: any case. We will use the length
- of this string during compare as it
- should be the min of the two strings */
-{
- byte b = str2->f_str[str2->f_len];
-
- ut_a(str2->f_len <= str1->f_len);
-
- /* We need to write a NUL byte at the end of the string because the
- string is converted to lowercase by a MySQL function which doesn't
- care about the length. */
- str2->f_str[str2->f_len] = 0;
-
- fts_utf8_tolower(str2);
-
- /* Restore the value we replaced above. */
- str2->f_str[str2->f_len] = b;
-
- return(memcmp(str1->f_str, str2->f_str, str2->f_len));
-}
-#endif
-
-/*******************************************************************//**
-Create words in ranking */
-static
-void
-fts_ranking_words_create(
-/*=====================*/
- fts_query_t* query, /*!< in: query instance */
- fts_ranking_t* ranking) /*!< in: ranking instance */
-{
- ranking->words = static_cast<byte*>(
- mem_heap_zalloc(query->heap, RANKING_WORDS_INIT_LEN));
- ranking->words_len = RANKING_WORDS_INIT_LEN;
-}
-
-/*
-The optimization here is using a char array(bitmap) to replace words rb tree
-in fts_ranking_t.
-
-It can save lots of memory except in some cases of QUERY EXPANSION.
-
-'word_map' is used as a word dictionary, in which the key is a word, the value
-is a number. In 'fts_ranking_words_add', we first check if the word is in 'word_map'.
-if not, we add it into 'word_map', and give it a position(actually a number).
-then we set the corresponding bit to '1' at the position in the char array 'words'.
-
-'word_vector' is a useful backup of 'word_map', and we can get a word by its position,
-more quickly than searching by value in 'word_map'. we use 'word_vector'
-in 'fts_query_calculate_ranking' and 'fts_expand_query'. In the two functions, we need
-to scan the bitmap 'words', and get a word when a bit is '1', then we get word_freq
-by the word.
-*/
-
-/*******************************************************************//**
-Add a word into ranking */
-static
-void
-fts_ranking_words_add(
-/*==================*/
- fts_query_t* query, /*!< in: query instance */
- fts_ranking_t* ranking, /*!< in: ranking instance */
- const fts_string_t* word) /*!< in: term/word to add */
-{
- ulint pos;
- ulint byte_offset;
- ulint bit_offset;
- ib_rbt_bound_t parent;
-
- /* Note: we suppose the word map and vector are append-only. */
- ut_ad(query->word_vector->size() == rbt_size(query->word_map));
-
- /* We use ib_rbt to simulate a map, f_n_char means position. */
- if (rbt_search(query->word_map, &parent, word) == 0) {
- fts_string_t* result_word;
-
- result_word = rbt_value(fts_string_t, parent.last);
- pos = result_word->f_n_char;
- ut_ad(pos < rbt_size(query->word_map));
- } else {
- /* Add the word to map. */
- fts_string_t new_word;
-
- pos = rbt_size(query->word_map);
-
- new_word.f_str = static_cast<byte*>(mem_heap_alloc(query->heap,
- word->f_len + 1));
- memcpy(new_word.f_str, word->f_str, word->f_len);
- new_word.f_str[word->f_len] = 0;
- new_word.f_len = word->f_len;
- new_word.f_n_char = pos;
-
- rbt_add_node(query->word_map, &parent, &new_word);
- ut_ad(rbt_validate(query->word_map));
- query->word_vector->push_back(new_word);
- }
-
- /* Check words len */
- byte_offset = pos / CHAR_BIT;
- if (byte_offset >= ranking->words_len) {
- byte* words = ranking->words;
- ulint words_len = ranking->words_len;
-
- while (byte_offset >= words_len) {
- words_len *= 2;
- }
-
- ranking->words = static_cast<byte*>(
- mem_heap_zalloc(query->heap, words_len));
- ut_memcpy(ranking->words, words, ranking->words_len);
- ranking->words_len = words_len;
- }
-
- /* Set ranking words */
- ut_ad(byte_offset < ranking->words_len);
- bit_offset = pos % CHAR_BIT;
- ranking->words[byte_offset] |= 1 << bit_offset;
-}
-
-/*******************************************************************//**
-Get a word from a ranking
-@return true if it's successful */
-static
-bool
-fts_ranking_words_get_next(
-/*=======================*/
- const fts_query_t* query, /*!< in: query instance */
- fts_ranking_t* ranking,/*!< in: ranking instance */
- ulint* pos, /*!< in/out: word start pos */
- fts_string_t* word) /*!< in/out: term/word to add */
-{
- bool ret = false;
- ulint max_pos = ranking->words_len * CHAR_BIT;
-
- /* Search for next word */
- while (*pos < max_pos) {
- ulint byte_offset = *pos / CHAR_BIT;
- ulint bit_offset = *pos % CHAR_BIT;
-
- if (ranking->words[byte_offset] & (1 << bit_offset)) {
- ret = true;
- break;
- }
-
- *pos += 1;
- };
-
- /* Get next word from word vector */
- if (ret) {
- ut_ad(*pos < query->word_vector->size());
- *word = query->word_vector->at((size_t)*pos);
- *pos += 1;
- }
-
- return ret;
-}
-
-/*******************************************************************//**
-Add a word if it doesn't exist, to the term freq RB tree. We store
-a pointer to the word that is passed in as the argument.
-@return pointer to word */
-static
-fts_word_freq_t*
-fts_query_add_word_freq(
-/*====================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_string_t* word) /*!< in: term/word to add */
-{
- ib_rbt_bound_t parent;
-
- /* Lookup the word in our rb tree and add if it doesn't exist. */
- if (rbt_search(query->word_freqs, &parent, word) != 0) {
- fts_word_freq_t word_freq;
-
- memset(&word_freq, 0, sizeof(word_freq));
-
- word_freq.word.f_str = static_cast<byte*>(
- mem_heap_alloc(query->heap, word->f_len + 1));
- memcpy(word_freq.word.f_str, word->f_str, word->f_len);
- word_freq.word.f_str[word->f_len] = 0;
- word_freq.word.f_len = word->f_len;
-
- word_freq.doc_count = 0;
-
- word_freq.doc_freqs = rbt_create(
- sizeof(fts_doc_freq_t), fts_freq_doc_id_cmp);
-
- parent.last = rbt_add_node(
- query->word_freqs, &parent, &word_freq);
-
- query->total_size += word->f_len
- + SIZEOF_RBT_CREATE
- + SIZEOF_RBT_NODE_ADD
- + sizeof(fts_word_freq_t);
- }
-
- return(rbt_value(fts_word_freq_t, parent.last));
-}
-
-/*******************************************************************//**
-Add a doc id if it doesn't exist, to the doc freq RB tree.
-@return pointer to word */
-static
-fts_doc_freq_t*
-fts_query_add_doc_freq(
-/*===================*/
- fts_query_t* query, /*!< in: query instance */
- ib_rbt_t* doc_freqs, /*!< in: rb tree of fts_doc_freq_t */
- doc_id_t doc_id) /*!< in: doc id to add */
-{
- ib_rbt_bound_t parent;
-
- /* Lookup the doc id in our rb tree and add if it doesn't exist. */
- if (rbt_search(doc_freqs, &parent, &doc_id) != 0) {
- fts_doc_freq_t doc_freq;
-
- memset(&doc_freq, 0, sizeof(doc_freq));
-
- doc_freq.freq = 0;
- doc_freq.doc_id = doc_id;
-
- parent.last = rbt_add_node(doc_freqs, &parent, &doc_freq);
-
- query->total_size += SIZEOF_RBT_NODE_ADD
- + sizeof(fts_doc_freq_t);
- }
-
- return(rbt_value(fts_doc_freq_t, parent.last));
-}
-
-/*******************************************************************//**
-Add the doc id to the query set only if it's not in the
-deleted array. */
-static
-void
-fts_query_union_doc_id(
-/*===================*/
- fts_query_t* query, /*!< in: query instance */
- doc_id_t doc_id, /*!< in: the doc id to add */
- fts_rank_t rank) /*!< in: if non-zero, it is the
- rank associated with the doc_id */
-{
- ib_rbt_bound_t parent;
- ulint size = ib_vector_size(query->deleted->doc_ids);
- fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data;
-
- /* Check if the doc id is deleted and it's not already in our set. */
- if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
- && rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
-
- fts_ranking_t ranking;
-
- ranking.rank = rank;
- ranking.doc_id = doc_id;
- fts_ranking_words_create(query, &ranking);
-
- rbt_add_node(query->doc_ids, &parent, &ranking);
-
- query->total_size += SIZEOF_RBT_NODE_ADD
- + sizeof(fts_ranking_t) + RANKING_WORDS_INIT_LEN;
- }
-}
-
-/*******************************************************************//**
-Remove the doc id from the query set only if it's not in the
-deleted set. */
-static
-void
-fts_query_remove_doc_id(
-/*====================*/
- fts_query_t* query, /*!< in: query instance */
- doc_id_t doc_id) /*!< in: the doc id to add */
-{
- ib_rbt_bound_t parent;
- ulint size = ib_vector_size(query->deleted->doc_ids);
- fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data;
-
- /* Check if the doc id is deleted and it's in our set. */
- if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
- && rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
- ut_free(rbt_remove_node(query->doc_ids, parent.last));
-
- ut_ad(query->total_size >=
- SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t));
- query->total_size -= SIZEOF_RBT_NODE_ADD
- + sizeof(fts_ranking_t);
- }
-}
-
-/*******************************************************************//**
-Find the doc id in the query set but not in the deleted set, artificialy
-downgrade or upgrade its ranking by a value and make/initialize its ranking
-under or above its normal range 0 to 1. This is used for Boolean Search
-operator such as Negation operator, which makes word's contribution to the
-row's relevance to be negative */
-static
-void
-fts_query_change_ranking(
-/*====================*/
- fts_query_t* query, /*!< in: query instance */
- doc_id_t doc_id, /*!< in: the doc id to add */
- ibool downgrade) /*!< in: Whether to downgrade ranking */
-{
- ib_rbt_bound_t parent;
- ulint size = ib_vector_size(query->deleted->doc_ids);
- fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data;
-
- /* Check if the doc id is deleted and it's in our set. */
- if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0
- && rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
-
- fts_ranking_t* ranking;
-
- ranking = rbt_value(fts_ranking_t, parent.last);
-
- ranking->rank += downgrade ? RANK_DOWNGRADE : RANK_UPGRADE;
-
- /* Allow at most 2 adjustment by RANK_DOWNGRADE (-0.5)
- and RANK_UPGRADE (0.5) */
- if (ranking->rank >= 1.0F) {
- ranking->rank = 1.0F;
- } else if (ranking->rank <= -1.0F) {
- ranking->rank = -1.0F;
- }
- }
-}
-
-/*******************************************************************//**
-Check the doc id in the query set only if it's not in the
-deleted array. The doc ids that were found are stored in
-another rb tree (fts_query_t::intersect). */
-static
-void
-fts_query_intersect_doc_id(
-/*=======================*/
- fts_query_t* query, /*!< in: query instance */
- doc_id_t doc_id, /*!< in: the doc id to add */
- fts_rank_t rank) /*!< in: if non-zero, it is the
- rank associated with the doc_id */
-{
- ib_rbt_bound_t parent;
- ulint size = ib_vector_size(query->deleted->doc_ids);
- fts_update_t* array = (fts_update_t*) query->deleted->doc_ids->data;
- fts_ranking_t* ranking= NULL;
-
- /* There are three types of intersect:
- 1. '+a': doc_ids is empty, add doc into intersect if it matches 'a'.
- 2. 'a +b': docs match 'a' is in doc_ids, add doc into intersect
- if it matches 'b'. if the doc is also in doc_ids, then change the
- doc's rank, and add 'a' in doc's words.
- 3. '+a +b': docs matching '+a' is in doc_ids, add doc into intsersect
- if it matches 'b' and it's in doc_ids.(multi_exist = true). */
-
- /* Check if the doc id is deleted and it's in our set */
- if (fts_bsearch(array, 0, static_cast<int>(size), doc_id) < 0) {
- fts_ranking_t new_ranking;
-
- if (rbt_search(query->doc_ids, &parent, &doc_id) != 0) {
- if (query->multi_exist) {
- return;
- } else {
- new_ranking.words = NULL;
- }
- } else {
- ranking = rbt_value(fts_ranking_t, parent.last);
-
- /* We've just checked the doc id before */
- if (ranking->words == NULL) {
- ut_ad(rbt_search(query->intersection, &parent,
- ranking) == 0);
- return;
- }
-
- /* Merge rank */
- rank += ranking->rank;
- if (rank >= 1.0F) {
- rank = 1.0F;
- } else if (rank <= -1.0F) {
- rank = -1.0F;
- }
-
- /* Take words */
- new_ranking.words = ranking->words;
- new_ranking.words_len = ranking->words_len;
- }
-
- new_ranking.rank = rank;
- new_ranking.doc_id = doc_id;
-
- if (rbt_search(query->intersection, &parent,
- &new_ranking) != 0) {
- if (new_ranking.words == NULL) {
- fts_ranking_words_create(query, &new_ranking);
-
- query->total_size += RANKING_WORDS_INIT_LEN;
- } else {
- /* Note that the intersection has taken
- ownership of the ranking data. */
- ranking->words = NULL;
- }
-
- rbt_add_node(query->intersection,
- &parent, &new_ranking);
-
- query->total_size += SIZEOF_RBT_NODE_ADD
- + sizeof(fts_ranking_t);
- }
- }
-}
-
-/*******************************************************************//**
-Free the document ranking rb tree. */
-static
-void
-fts_query_free_doc_ids(
-/*===================*/
- fts_query_t* query, /*!< in: query instance */
- ib_rbt_t* doc_ids) /*!< in: rb tree to free */
-{
- const ib_rbt_node_t* node;
-
- for (node = rbt_first(doc_ids); node; node = rbt_first(doc_ids)) {
-
- fts_ranking_t* ranking;
-
- ranking = rbt_value(fts_ranking_t, node);
-
- if (ranking->words) {
- ranking->words = NULL;
- }
-
- ut_free(rbt_remove_node(doc_ids, node));
-
- ut_ad(query->total_size >=
- SIZEOF_RBT_NODE_ADD + sizeof(fts_ranking_t));
- query->total_size -= SIZEOF_RBT_NODE_ADD
- + sizeof(fts_ranking_t);
- }
-
- rbt_free(doc_ids);
-
- ut_ad(query->total_size >= SIZEOF_RBT_CREATE);
- query->total_size -= SIZEOF_RBT_CREATE;
-}
-
-/**
-Free the query intersection
-@param[in] query query instance */
-static
-void
-fts_query_free_intersection(
- fts_query_t* query)
-{
- fts_query_free_doc_ids(query, query->intersection);
- query->intersection = NULL;
-}
-
-/*******************************************************************//**
-Add the word to the documents "list" of matching words from
-the query. We make a copy of the word from the query heap. */
-static
-void
-fts_query_add_word_to_document(
-/*===========================*/
- fts_query_t* query, /*!< in: query to update */
- doc_id_t doc_id, /*!< in: the document to update */
- const fts_string_t* word) /*!< in: the token to add */
-{
- ib_rbt_bound_t parent;
- fts_ranking_t* ranking = NULL;
-
- if (query->flags == FTS_OPT_RANKING) {
- return;
- }
-
- /* First we search the intersection RB tree as it could have
- taken ownership of the words rb tree instance. */
- if (query->intersection
- && rbt_search(query->intersection, &parent, &doc_id) == 0) {
-
- ranking = rbt_value(fts_ranking_t, parent.last);
- }
-
- if (ranking == NULL
- && rbt_search(query->doc_ids, &parent, &doc_id) == 0) {
-
- ranking = rbt_value(fts_ranking_t, parent.last);
- }
-
- if (ranking != NULL) {
- fts_ranking_words_add(query, ranking, word);
- }
-}
-
-/*******************************************************************//**
-Check the node ilist. */
-static
-void
-fts_query_check_node(
-/*=================*/
- fts_query_t* query, /*!< in: query to update */
- const fts_string_t* token, /*!< in: the token to search */
- const fts_node_t* node) /*!< in: node to check */
-{
- /* Skip nodes whose doc ids are out range. */
- if (query->oper == FTS_EXIST
- && ((query->upper_doc_id > 0
- && node->first_doc_id > query->upper_doc_id)
- || (query->lower_doc_id > 0
- && node->last_doc_id < query->lower_doc_id))) {
-
- /* Ignore */
-
- } else {
- int ret;
- ib_rbt_bound_t parent;
- ulint ilist_size = node->ilist_size;
- fts_word_freq_t*word_freqs;
-
- /* The word must exist. */
- ret = rbt_search(query->word_freqs, &parent, token);
- ut_a(ret == 0);
-
- word_freqs = rbt_value(fts_word_freq_t, parent.last);
-
- query->error = fts_query_filter_doc_ids(
- query, token, word_freqs, node,
- node->ilist, ilist_size, TRUE);
- }
-}
-
-/*****************************************************************//**
-Search index cache for word with wildcard match.
-@return number of words matched */
-static
-ulint
-fts_cache_find_wildcard(
-/*====================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_index_cache_t*index_cache, /*!< in: cache to search */
- const fts_string_t* token) /*!< in: token to search */
-{
- ib_rbt_bound_t parent;
- const ib_vector_t* nodes = NULL;
- fts_string_t srch_text;
- byte term[FTS_MAX_WORD_LEN + 1];
- ulint num_word = 0;
-
- srch_text.f_len = (token->f_str[token->f_len - 1] == '%')
- ? token->f_len - 1
- : token->f_len;
-
- strncpy((char*) term, (char*) token->f_str, srch_text.f_len);
- term[srch_text.f_len] = '\0';
- srch_text.f_str = term;
-
- /* Lookup the word in the rb tree */
- if (rbt_search_cmp(index_cache->words, &parent, &srch_text, NULL,
- innobase_fts_text_cmp_prefix) == 0) {
- const fts_tokenizer_word_t* word;
- ulint i;
- const ib_rbt_node_t* cur_node;
- ibool forward = FALSE;
-
- word = rbt_value(fts_tokenizer_word_t, parent.last);
- cur_node = parent.last;
-
- while (innobase_fts_text_cmp_prefix(
- index_cache->charset, &srch_text, &word->text) == 0) {
-
- nodes = word->nodes;
-
- for (i = 0; nodes && i < ib_vector_size(nodes); ++i) {
- int ret;
- const fts_node_t* node;
- ib_rbt_bound_t freq_parent;
- fts_word_freq_t* word_freqs;
-
- node = static_cast<const fts_node_t*>(
- ib_vector_get_const(nodes, i));
-
- ret = rbt_search(query->word_freqs,
- &freq_parent,
- &srch_text);
-
- ut_a(ret == 0);
-
- word_freqs = rbt_value(
- fts_word_freq_t,
- freq_parent.last);
-
- query->error = fts_query_filter_doc_ids(
- query, &srch_text,
- word_freqs, node,
- node->ilist, node->ilist_size, TRUE);
-
- if (query->error != DB_SUCCESS) {
- return(0);
- }
- }
-
- num_word++;
-
- if (!forward) {
- cur_node = rbt_prev(
- index_cache->words, cur_node);
- } else {
-cont_search:
- cur_node = rbt_next(
- index_cache->words, cur_node);
- }
-
- if (!cur_node) {
- break;
- }
-
- word = rbt_value(fts_tokenizer_word_t, cur_node);
- }
-
- if (!forward) {
- forward = TRUE;
- cur_node = parent.last;
- goto cont_search;
- }
- }
-
- return(num_word);
-}
-
-/*****************************************************************//**
-Set difference.
-@return DB_SUCCESS if all go well */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_difference(
-/*=================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_string_t* token) /*!< in: token to search */
-{
- ulint n_doc_ids= 0;
- trx_t* trx = query->trx;
- dict_table_t* table = query->index->table;
-
- ut_a(query->oper == FTS_IGNORE);
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- fprintf(stderr, "DIFFERENCE: Searching: '%.*s'\n",
- (int) token->f_len, token->f_str);
-#endif
-
- if (query->doc_ids) {
- n_doc_ids = rbt_size(query->doc_ids);
- }
-
- /* There is nothing we can substract from an empty set. */
- if (query->doc_ids && !rbt_empty(query->doc_ids)) {
- ulint i;
- fts_fetch_t fetch;
- const ib_vector_t* nodes;
- const fts_index_cache_t*index_cache;
- que_t* graph = NULL;
- fts_cache_t* cache = table->fts->cache;
- dberr_t error;
-
- rw_lock_x_lock(&cache->lock);
-
- index_cache = fts_find_index_cache(cache, query->index);
-
- /* Must find the index cache */
- ut_a(index_cache != NULL);
-
- /* Search the cache for a matching word first. */
- if (query->cur_node->term.wildcard
- && query->flags != FTS_PROXIMITY
- && query->flags != FTS_PHRASE) {
- fts_cache_find_wildcard(query, index_cache, token);
- } else {
- nodes = fts_cache_find_word(index_cache, token);
-
- for (i = 0; nodes && i < ib_vector_size(nodes)
- && query->error == DB_SUCCESS; ++i) {
- const fts_node_t* node;
-
- node = static_cast<const fts_node_t*>(
- ib_vector_get_const(nodes, i));
-
- fts_query_check_node(query, token, node);
- }
- }
-
- rw_lock_x_unlock(&cache->lock);
-
- /* error is passed by 'query->error' */
- if (query->error != DB_SUCCESS) {
- ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
- return(query->error);
- }
-
- /* Setup the callback args for filtering and
- consolidating the ilist. */
- fetch.read_arg = query;
- fetch.read_record = fts_query_index_fetch_nodes;
-
- error = fts_index_fetch_nodes(
- trx, &graph, &query->fts_index_table, token, &fetch);
-
- /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
- ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
- if (error != DB_SUCCESS) {
- query->error = error;
- }
-
- fts_que_graph_free(graph);
- }
-
- /* The size can't increase. */
- ut_a(rbt_size(query->doc_ids) <= n_doc_ids);
-
- return(query->error);
-}
-
-/*****************************************************************//**
-Intersect the token doc ids with the current set.
-@return DB_SUCCESS if all go well */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_intersect(
-/*================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_string_t* token) /*!< in: the token to search */
-{
- trx_t* trx = query->trx;
- dict_table_t* table = query->index->table;
-
- ut_a(query->oper == FTS_EXIST);
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- fprintf(stderr, "INTERSECT: Searching: '%.*s'\n",
- (int) token->f_len, token->f_str);
-#endif
-
- /* If the words set is not empty and multi exist is true,
- we know the intersection set is empty in advance. */
- if (!(rbt_empty(query->doc_ids) && query->multi_exist)) {
- ulint n_doc_ids = 0;
- ulint i;
- fts_fetch_t fetch;
- const ib_vector_t* nodes;
- const fts_index_cache_t*index_cache;
- que_t* graph = NULL;
- fts_cache_t* cache = table->fts->cache;
- dberr_t error;
-
- ut_a(!query->intersection);
-
- n_doc_ids = rbt_size(query->doc_ids);
-
- /* Create the rb tree that will hold the doc ids of
- the intersection. */
- query->intersection = rbt_create(
- sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
-
- query->total_size += SIZEOF_RBT_CREATE;
-
- /* This is to avoid decompressing the ilist if the
- node's ilist doc ids are out of range. */
- if (!rbt_empty(query->doc_ids) && query->multi_exist) {
- const ib_rbt_node_t* node;
- doc_id_t* doc_id;
-
- node = rbt_first(query->doc_ids);
- doc_id = rbt_value(doc_id_t, node);
- query->lower_doc_id = *doc_id;
-
- node = rbt_last(query->doc_ids);
- doc_id = rbt_value(doc_id_t, node);
- query->upper_doc_id = *doc_id;
-
- } else {
- query->lower_doc_id = 0;
- query->upper_doc_id = 0;
- }
-
- /* Search the cache for a matching word first. */
-
- rw_lock_x_lock(&cache->lock);
-
- /* Search for the index specific cache. */
- index_cache = fts_find_index_cache(cache, query->index);
-
- /* Must find the index cache. */
- ut_a(index_cache != NULL);
-
- if (query->cur_node->term.wildcard) {
- /* Wildcard search the index cache */
- fts_cache_find_wildcard(query, index_cache, token);
- } else {
- nodes = fts_cache_find_word(index_cache, token);
-
- for (i = 0; nodes && i < ib_vector_size(nodes)
- && query->error == DB_SUCCESS; ++i) {
- const fts_node_t* node;
-
- node = static_cast<const fts_node_t*>(
- ib_vector_get_const(nodes, i));
-
- fts_query_check_node(query, token, node);
- }
- }
-
- rw_lock_x_unlock(&cache->lock);
-
- /* error is passed by 'query->error' */
- if (query->error != DB_SUCCESS) {
- ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
- fts_query_free_intersection(query);
- return(query->error);
- }
-
- /* Setup the callback args for filtering and
- consolidating the ilist. */
- fetch.read_arg = query;
- fetch.read_record = fts_query_index_fetch_nodes;
-
- error = fts_index_fetch_nodes(
- trx, &graph, &query->fts_index_table, token, &fetch);
-
- /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
- ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
- if (error != DB_SUCCESS) {
- query->error = error;
- }
-
- fts_que_graph_free(graph);
-
- if (query->error == DB_SUCCESS) {
- /* Make the intesection (rb tree) the current doc id
- set and free the old set. */
- fts_query_free_doc_ids(query, query->doc_ids);
- query->doc_ids = query->intersection;
- query->intersection = NULL;
-
- ut_a(!query->multi_exist || (query->multi_exist
- && rbt_size(query->doc_ids) <= n_doc_ids));
- } else if (query->intersection != NULL) {
- fts_query_free_intersection(query);
- }
- }
-
- return(query->error);
-}
-
-/*****************************************************************//**
-Query index cache.
-@return DB_SUCCESS if all go well */
-static
-dberr_t
-fts_query_cache(
-/*============*/
- fts_query_t* query, /*!< in/out: query instance */
- const fts_string_t* token) /*!< in: token to search */
-{
- const fts_index_cache_t*index_cache;
- dict_table_t* table = query->index->table;
- fts_cache_t* cache = table->fts->cache;
-
- /* Search the cache for a matching word first. */
- rw_lock_x_lock(&cache->lock);
-
- /* Search for the index specific cache. */
- index_cache = fts_find_index_cache(cache, query->index);
-
- /* Must find the index cache. */
- ut_a(index_cache != NULL);
-
- if (query->cur_node->term.wildcard
- && query->flags != FTS_PROXIMITY
- && query->flags != FTS_PHRASE) {
- /* Wildcard search the index cache */
- fts_cache_find_wildcard(query, index_cache, token);
- } else {
- const ib_vector_t* nodes;
- ulint i;
-
- nodes = fts_cache_find_word(index_cache, token);
-
- for (i = 0; nodes && i < ib_vector_size(nodes)
- && query->error == DB_SUCCESS; ++i) {
- const fts_node_t* node;
-
- node = static_cast<const fts_node_t*>(
- ib_vector_get_const(nodes, i));
-
- fts_query_check_node(query, token, node);
- }
- }
-
- rw_lock_x_unlock(&cache->lock);
-
- return(query->error);
-}
-
-/*****************************************************************//**
-Set union.
-@return DB_SUCCESS if all go well */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_union(
-/*============*/
- fts_query_t* query, /*!< in: query instance */
- fts_string_t* token) /*!< in: token to search */
-{
- fts_fetch_t fetch;
- ulint n_doc_ids = 0;
- trx_t* trx = query->trx;
- que_t* graph = NULL;
- dberr_t error;
-
- ut_a(query->oper == FTS_NONE || query->oper == FTS_DECR_RATING ||
- query->oper == FTS_NEGATE || query->oper == FTS_INCR_RATING);
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- fprintf(stderr, "UNION: Searching: '%.*s'\n",
- (int) token->f_len, token->f_str);
-#endif
-
- if (query->doc_ids) {
- n_doc_ids = rbt_size(query->doc_ids);
- }
-
- if (token->f_len == 0) {
- return(query->error);
- }
-
- /* Single '%' would confuse parser in pars_like_rebind(). In addition,
- our wildcard search only supports prefix search */
- ut_ad(*token->f_str != '%');
-
- fts_query_cache(query, token);
-
- /* Setup the callback args for filtering and
- consolidating the ilist. */
- fetch.read_arg = query;
- fetch.read_record = fts_query_index_fetch_nodes;
-
- /* Read the nodes from disk. */
- error = fts_index_fetch_nodes(
- trx, &graph, &query->fts_index_table, token, &fetch);
-
- /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
- ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
- if (error != DB_SUCCESS) {
- query->error = error;
- }
-
- fts_que_graph_free(graph);
-
- if (query->error == DB_SUCCESS) {
-
- /* The size can't decrease. */
- ut_a(rbt_size(query->doc_ids) >= n_doc_ids);
-
- /* Calulate the number of doc ids that were added to
- the current doc id set. */
- if (query->doc_ids) {
- n_doc_ids = rbt_size(query->doc_ids) - n_doc_ids;
- }
- }
-
- return(query->error);
-}
-
-/*****************************************************************//**
-Depending upon the current query operator process the doc id.
-return DB_SUCCESS if all go well
-or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */
-static
-dberr_t
-fts_query_process_doc_id(
-/*=====================*/
- fts_query_t* query, /*!< in: query instance */
- doc_id_t doc_id, /*!< in: doc id to process */
- fts_rank_t rank) /*!< in: if non-zero, it is the
- rank associated with the doc_id */
-{
- if (query->flags == FTS_OPT_RANKING) {
- return(DB_SUCCESS);
- }
-
- switch (query->oper) {
- case FTS_NONE:
- fts_query_union_doc_id(query, doc_id, rank);
- break;
-
- case FTS_EXIST:
- fts_query_intersect_doc_id(query, doc_id, rank);
- break;
-
- case FTS_IGNORE:
- fts_query_remove_doc_id(query, doc_id);
- break;
-
- case FTS_NEGATE:
- fts_query_change_ranking(query, doc_id, TRUE);
- break;
-
- case FTS_DECR_RATING:
- fts_query_union_doc_id(query, doc_id, rank);
- fts_query_change_ranking(query, doc_id, TRUE);
- break;
-
- case FTS_INCR_RATING:
- fts_query_union_doc_id(query, doc_id, rank);
- fts_query_change_ranking(query, doc_id, FALSE);
- break;
-
- default:
- ut_error;
- }
-
- if (query->total_size > fts_result_cache_limit) {
- return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
- } else {
- return(DB_SUCCESS);
- }
-}
-
-/*****************************************************************//**
-Merge two result sets. */
-static
-dberr_t
-fts_merge_doc_ids(
-/*==============*/
- fts_query_t* query, /*!< in,out: query instance */
- const ib_rbt_t* doc_ids) /*!< in: result set to merge */
-{
- const ib_rbt_node_t* node;
-
- DBUG_ENTER("fts_merge_doc_ids");
-
- ut_a(!query->intersection);
-
- /* To process FTS_EXIST operation (intersection), we need
- to create a new result set for fts_query_intersect(). */
- if (query->oper == FTS_EXIST) {
-
- query->intersection = rbt_create(
- sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
-
- query->total_size += SIZEOF_RBT_CREATE;
- }
-
- /* Merge the elements to the result set. */
- for (node = rbt_first(doc_ids); node; node = rbt_next(doc_ids, node)) {
- fts_ranking_t* ranking;
- ulint pos = 0;
- fts_string_t word;
-
- ranking = rbt_value(fts_ranking_t, node);
-
- query->error = fts_query_process_doc_id(
- query, ranking->doc_id, ranking->rank);
-
- if (query->error != DB_SUCCESS) {
- if (query->intersection != NULL)
- {
- ut_a(query->oper == FTS_EXIST);
- fts_query_free_intersection(query);
- }
- DBUG_RETURN(query->error);
- }
-
- /* Merge words. Don't need to take operator into account. */
- ut_a(ranking->words);
- while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
- fts_query_add_word_to_document(query, ranking->doc_id,
- &word);
- }
- }
-
- /* If it is an intersection operation, reset query->doc_ids
- to query->intersection and free the old result list. */
- if (query->oper == FTS_EXIST && query->intersection != NULL) {
- fts_query_free_doc_ids(query, query->doc_ids);
- query->doc_ids = query->intersection;
- query->intersection = NULL;
- }
-
- DBUG_RETURN(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Skip non-whitespace in a string. Move ptr to the next word boundary.
-@return pointer to first whitespace character or end */
-UNIV_INLINE
-byte*
-fts_query_skip_word(
-/*================*/
- byte* ptr, /*!< in: start of scan */
- const byte* end) /*!< in: pointer to end of string */
-{
- /* TODO: Does this have to be UTF-8 too ? */
- while (ptr < end && !(ispunct(*ptr) || isspace(*ptr))) {
- ++ptr;
- }
-
- return(ptr);
-}
-
-/*****************************************************************//**
-Check whether the remaining terms in the phrase match the text.
-@return TRUE if matched else FALSE */
-static
-ibool
-fts_query_match_phrase_terms(
-/*=========================*/
- fts_phrase_t* phrase, /*!< in: phrase to match */
- byte** start, /*!< in/out: text to search, we can't
- make this const becase we need to
- first convert the string to
- lowercase */
- const byte* end, /*!< in: pointer to the end of
- the string to search */
- mem_heap_t* heap) /*!< in: heap */
-{
- ulint i;
- byte* ptr = *start;
- const ib_vector_t* tokens = phrase->tokens;
- ulint distance = phrase->distance;
-
- /* We check only from the second term onwards, since the first
- must have matched otherwise we wouldn't be here. */
- for (i = 1; ptr < end && i < ib_vector_size(tokens); /* No op */) {
- fts_string_t match;
- fts_string_t cmp_str;
- const fts_string_t* token;
- int result;
- ulint ret;
- ulint offset;
-
- ret = innobase_mysql_fts_get_token(
- phrase->charset, ptr, (byte*) end,
- &match, &offset);
-
- if (match.f_len > 0) {
- /* Get next token to match. */
- token = static_cast<const fts_string_t*>(
- ib_vector_get_const(tokens, i));
-
- fts_utf8_string_dup(&cmp_str, &match, heap);
-
- result = innobase_fts_text_case_cmp(
- phrase->charset, token, &cmp_str);
-
- /* Skip the rest of the tokens if this one doesn't
- match and the proximity distance is exceeded. */
- if (result
- && (distance == ULINT_UNDEFINED
- || distance == 0)) {
-
- break;
- }
-
- /* This token matched move to the next token. */
- if (result == 0) {
- /* Advance the text to search by the length
- of the last token. */
- ptr += ret;
-
- /* Advance to the next token. */
- ++i;
- } else {
-
- ut_a(distance != ULINT_UNDEFINED);
-
- ptr = fts_query_skip_word(ptr, end);
- }
-
- /* Distance can be 0 for exact matches. */
- if (distance != ULINT_UNDEFINED && distance > 0) {
- --distance;
- }
- } else {
- ptr += ret;
- }
- }
-
- *start = ptr;
-
- /* Can't be greater than the number of elements. */
- ut_a(i <= ib_vector_size(tokens));
-
- /* This is the case for multiple words. */
- if (i == ib_vector_size(tokens)) {
- phrase->found = TRUE;
- }
-
- return(phrase->found);
-}
-
-/*****************************************************************//**
-Callback function to count the number of words in position ranges,
-and see whether the word count is in specified "phrase->distance"
-@return true if the number of characters is less than the "distance" */
-static
-bool
-fts_proximity_is_word_in_range(
-/*===========================*/
- const fts_phrase_t*
- phrase, /*!< in: phrase with the search info */
- byte* start, /*!< in: text to search */
- ulint total_len) /*!< in: length of text */
-{
- fts_proximity_t* proximity_pos = phrase->proximity_pos;
-
- ut_ad(proximity_pos->n_pos == proximity_pos->min_pos.size());
- ut_ad(proximity_pos->n_pos == proximity_pos->max_pos.size());
-
- /* Search each matched position pair (with min and max positions)
- and count the number of words in the range */
- for (ulint i = 0; i < proximity_pos->n_pos; i++) {
- ulint cur_pos = proximity_pos->min_pos[i];
- ulint n_word = 0;
-
- ut_ad(proximity_pos->max_pos[i] <= total_len);
-
- /* Walk through words in the range and count them */
- while (cur_pos <= proximity_pos->max_pos[i]) {
- ulint len;
- fts_string_t str;
- ulint offset = 0;
-
- len = innobase_mysql_fts_get_token(
- phrase->charset,
- start + cur_pos,
- start + total_len, &str, &offset);
-
- if (len == 0) {
- break;
- }
-
- /* Advances position with "len" bytes */
- cur_pos += len;
-
- /* Record the number of words */
- if (str.f_n_char > 0) {
- n_word++;
- }
-
- if (n_word > phrase->distance) {
- break;
- }
- }
-
- /* Check if the number of words is less than specified
- "distance" */
- if (n_word && n_word <= phrase->distance) {
- return(true);
- }
- }
-
- return(false);
-}
-
-/*****************************************************************//**
-Callback function to fetch and search the document.
-@return TRUE if matched else FALSE */
-static
-ibool
-fts_query_match_phrase(
-/*===================*/
- fts_phrase_t* phrase, /*!< in: phrase to match */
- byte* start, /*!< in: text to search, we can't make
- this const becase we need to first
- convert the string to lowercase */
- ulint cur_len, /*!< in: length of text */
- ulint prev_len, /*!< in: total length for searched
- doc fields*/
- mem_heap_t* heap) /* heap */
-{
- ulint i;
- const fts_string_t* first;
- const byte* end = start + cur_len;
- const ib_vector_t* tokens = phrase->tokens;
- const ib_vector_t* positions = phrase->match->positions;
-
- ut_a(!phrase->found);
- ut_a(phrase->match->doc_id > 0);
- ut_a(ib_vector_size(tokens) > 0);
- ut_a(ib_vector_size(positions) > 0);
-
- first = static_cast<const fts_string_t*>(
- ib_vector_get_const(tokens, 0));
-
- ut_a(phrase->match->start < ib_vector_size(positions));
-
- for (i = phrase->match->start; i < ib_vector_size(positions); ++i) {
- ulint pos;
- fts_string_t match;
- fts_string_t cmp_str;
- byte* ptr = start;
- ulint ret;
- ulint offset;
-
- pos = *(ulint*) ib_vector_get_const(positions, i);
-
- if (pos == ULINT_UNDEFINED) {
- break;
- }
-
- if (pos < prev_len) {
- continue;
- }
-
- /* Document positions are calculated from the beginning
- of the first field, need to save the length for each
- searched field to adjust the doc position when search
- phrases. */
- pos -= prev_len;
- ptr = match.f_str = start + pos;
-
- /* Within limits ? */
- if (ptr >= end) {
- break;
- }
-
- ret = innobase_mysql_fts_get_token(
- phrase->charset, start + pos, (byte*) end,
- &match, &offset);
-
- if (match.f_len == 0) {
- break;
- }
-
- fts_utf8_string_dup(&cmp_str, &match, heap);
-
- if (innobase_fts_text_case_cmp(
- phrase->charset, first, &cmp_str) == 0) {
-
- /* This is the case for the single word
- in the phrase. */
- if (ib_vector_size(phrase->tokens) == 1) {
- phrase->found = TRUE;
- break;
- }
-
- ptr += ret;
-
- /* Match the remaining terms in the phrase. */
- if (fts_query_match_phrase_terms(phrase, &ptr,
- end, heap)) {
- break;
- }
- }
- }
-
- return(phrase->found);
-}
-
-/*****************************************************************//**
-Callback function to fetch and search the document.
-@return whether the phrase is found */
-static
-ibool
-fts_query_fetch_document(
-/*=====================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: fts_doc_t* */
-{
-
- que_node_t* exp;
- sel_node_t* node = static_cast<sel_node_t*>(row);
- fts_phrase_t* phrase = static_cast<fts_phrase_t*>(user_arg);
- ulint prev_len = 0;
- ulint total_len = 0;
- byte* document_text = NULL;
-
- exp = node->select_list;
-
- phrase->found = FALSE;
-
- /* For proximity search, we will need to get the whole document
- from all fields, so first count the total length of the document
- from all the fields */
- if (phrase->proximity_pos) {
- while (exp) {
- ulint field_len;
- dfield_t* dfield = que_node_get_val(exp);
- byte* data = static_cast<byte*>(
- dfield_get_data(dfield));
-
- if (dfield_is_ext(dfield)) {
- ulint local_len = dfield_get_len(dfield);
-
- local_len -= BTR_EXTERN_FIELD_REF_SIZE;
-
- field_len = mach_read_from_4(
- data + local_len + BTR_EXTERN_LEN + 4);
- } else {
- field_len = dfield_get_len(dfield);
- }
-
- if (field_len != UNIV_SQL_NULL) {
- total_len += field_len + 1;
- }
-
- exp = que_node_get_next(exp);
- }
-
- document_text = static_cast<byte*>(mem_heap_zalloc(
- phrase->heap, total_len));
-
- if (!document_text) {
- return(FALSE);
- }
- }
-
- exp = node->select_list;
-
- while (exp) {
- dfield_t* dfield = que_node_get_val(exp);
- byte* data = static_cast<byte*>(
- dfield_get_data(dfield));
- ulint cur_len;
-
- if (dfield_is_ext(dfield)) {
- data = btr_copy_externally_stored_field(
- &cur_len, data, phrase->zip_size,
- dfield_get_len(dfield), phrase->heap,
- NULL);
- } else {
- cur_len = dfield_get_len(dfield);
- }
-
- if (cur_len != UNIV_SQL_NULL && cur_len != 0) {
- if (phrase->proximity_pos) {
- ut_ad(prev_len + cur_len <= total_len);
- memcpy(document_text + prev_len, data, cur_len);
- } else {
- /* For phrase search */
- phrase->found =
- fts_query_match_phrase(
- phrase,
- static_cast<byte*>(data),
- cur_len, prev_len,
- phrase->heap);
- }
-
- /* Document positions are calculated from the beginning
- of the first field, need to save the length for each
- searched field to adjust the doc position when search
- phrases. */
- prev_len += cur_len + 1;
- }
-
- if (phrase->found) {
- break;
- }
-
- exp = que_node_get_next(exp);
- }
-
- if (phrase->proximity_pos) {
- ut_ad(prev_len <= total_len);
-
- phrase->found = fts_proximity_is_word_in_range(
- phrase, document_text, total_len);
- }
-
- return(phrase->found);
-}
-
-#if 0
-/********************************************************************
-Callback function to check whether a record was found or not. */
-static
-ibool
-fts_query_select(
-/*=============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: fts_doc_t* */
-{
- int i;
- que_node_t* exp;
- sel_node_t* node = row;
- fts_select_t* select = user_arg;
-
- ut_a(select->word_freq);
- ut_a(select->word_freq->doc_freqs);
-
- exp = node->select_list;
-
- for (i = 0; exp && !select->found; ++i) {
- dfield_t* dfield = que_node_get_val(exp);
- void* data = dfield_get_data(dfield);
- ulint len = dfield_get_len(dfield);
-
- switch (i) {
- case 0: /* DOC_COUNT */
- if (len != UNIV_SQL_NULL && len != 0) {
-
- select->word_freq->doc_count +=
- mach_read_from_4(data);
- }
- break;
-
- case 1: /* ILIST */
- if (len != UNIV_SQL_NULL && len != 0) {
-
- fts_query_find_doc_id(select, data, len);
- }
- break;
-
- default:
- ut_error;
- }
-
- exp = que_node_get_next(exp);
- }
-
- return(FALSE);
-}
-
-/********************************************************************
-Read the rows from the FTS index, that match word and where the
-doc id is between first and last doc id.
-@return DB_SUCCESS if all go well else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_find_term(
-/*================*/
- fts_query_t* query, /*!< in: FTS query state */
- que_t** graph, /*!< in: prepared statement */
- const fts_string_t* word, /*!< in: the word to fetch */
- doc_id_t doc_id, /*!< in: doc id to match */
- ulint* min_pos,/*!< in/out: pos found must be
- greater than this minimum value. */
- ibool* found) /*!< out: TRUE if found else FALSE */
-{
- pars_info_t* info;
- dberr_t error;
- fts_select_t select;
- doc_id_t match_doc_id;
- trx_t* trx = query->trx;
-
- trx->op_info = "fetching FTS index matching nodes";
-
- if (*graph) {
- info = (*graph)->info;
- } else {
- info = pars_info_create();
- }
-
- select.found = FALSE;
- select.doc_id = doc_id;
- select.min_pos = *min_pos;
- select.word_freq = fts_query_add_word_freq(query, word->f_str);
-
- pars_info_bind_function(info, "my_func", fts_query_select, &select);
- pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &match_doc_id, doc_id);
-
- fts_bind_doc_id(info, "min_doc_id", &match_doc_id);
-
- fts_bind_doc_id(info, "max_doc_id", &match_doc_id);
-
- if (!*graph) {
- ulint selected;
-
- selected = fts_select_index(*word->f_str);
-
- query->fts_index_table.suffix = fts_get_suffix(selected);
-
- *graph = fts_parse_sql(
- &query->fts_index_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT doc_count, ilist\n"
- " FROM \"%s\"\n"
- " WHERE word LIKE :word AND "
- " first_doc_id <= :min_doc_id AND "
- " last_doc_id >= :max_doc_id\n"
- " ORDER BY first_doc_id;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
- }
-
- for(;;) {
- error = fts_eval_sql(trx, *graph);
-
- if (error == DB_SUCCESS) {
-
- break; /* Exit the loop. */
- } else {
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS index. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading FTS index.\n", error);
-
- break; /* Exit the loop. */
- }
- }
- }
-
- /* Value to return */
- *found = select.found;
-
- if (*found) {
- *min_pos = select.min_pos;
- }
-
- return(error);
-}
-
-/********************************************************************
-Callback aggregator for int columns. */
-static
-ibool
-fts_query_sum(
-/*==========*/
- /*!< out: always returns TRUE */
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: ulint* */
-{
-
- que_node_t* exp;
- sel_node_t* node = row;
- ulint* total = user_arg;
-
- exp = node->select_list;
-
- while (exp) {
- dfield_t* dfield = que_node_get_val(exp);
- void* data = dfield_get_data(dfield);
- ulint len = dfield_get_len(dfield);
-
- if (len != UNIV_SQL_NULL && len != 0) {
- *total += mach_read_from_4(data);
- }
-
- exp = que_node_get_next(exp);
- }
-
- return(TRUE);
-}
-
-/********************************************************************
-Calculate the total documents that contain a particular word (term).
-@return DB_SUCCESS if all go well else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_total_docs_containing_term(
-/*=================================*/
- fts_query_t* query, /*!< in: FTS query state */
- const fts_string_t* word, /*!< in: the word to check */
- ulint* total) /*!< out: documents containing word */
-{
- pars_info_t* info;
- dberr_t error;
- que_t* graph;
- ulint selected;
- trx_t* trx = query->trx;
-
- trx->op_info = "fetching FTS index document count";
-
- *total = 0;
-
- info = pars_info_create();
-
- pars_info_bind_function(info, "my_func", fts_query_sum, total);
- pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
-
- selected = fts_select_index(*word->f_str);
-
- query->fts_index_table.suffix = fts_get_suffix(selected);
-
- graph = fts_parse_sql(
- &query->fts_index_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT doc_count\n"
- " FROM %s\n"
- " WHERE word = :word "
- " ORDER BY first_doc_id;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- for(;;) {
- error = fts_eval_sql(trx, graph);
-
- if (error == DB_SUCCESS) {
-
- break; /* Exit the loop. */
- } else {
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS index. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading FTS index.\n", error);
-
- break; /* Exit the loop. */
- }
- }
- }
-
- fts_que_graph_free(graph);
-
- return(error);
-}
-
-/********************************************************************
-Get the total number of words in a documents.
-@return DB_SUCCESS if all go well else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_terms_in_document(
-/*========================*/
- fts_query_t* query, /*!< in: FTS query state */
- doc_id_t doc_id, /*!< in: the word to check */
- ulint* total) /*!< out: total words in document */
-{
- pars_info_t* info;
- dberr_t error;
- que_t* graph;
- doc_id_t read_doc_id;
- trx_t* trx = query->trx;
-
- trx->op_info = "fetching FTS document term count";
-
- *total = 0;
-
- info = pars_info_create();
-
- pars_info_bind_function(info, "my_func", fts_query_sum, total);
-
- /* Convert to "storage" byte order. */
- fts_write_doc_id((byte*) &read_doc_id, doc_id);
- fts_bind_doc_id(info, "doc_id", &read_doc_id);
-
- query->fts_index_table.suffix = "DOC_ID";
-
- graph = fts_parse_sql(
- &query->fts_index_table,
- info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT count\n"
- " FROM \"%s\"\n"
- " WHERE doc_id = :doc_id "
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- for(;;) {
- error = fts_eval_sql(trx, graph);
-
- if (error == DB_SUCCESS) {
-
- break; /* Exit the loop. */
- } else {
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: lock wait "
- "timeout reading FTS doc id table. "
- "Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: %lu "
- "while reading FTS doc id table.\n",
- error);
-
- break; /* Exit the loop. */
- }
- }
- }
-
- fts_que_graph_free(graph);
-
- return(error);
-}
-#endif
-
-/*****************************************************************//**
-Retrieve the document and match the phrase tokens.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_match_document(
-/*=====================*/
- ib_vector_t* tokens, /*!< in: phrase tokens */
- fts_get_doc_t* get_doc, /*!< in: table and prepared statements */
- fts_match_t* match, /*!< in: doc id and positions */
- ulint distance, /*!< in: proximity distance */
- ibool* found) /*!< out: TRUE if phrase found */
-{
- dberr_t error;
- fts_phrase_t phrase;
-
- memset(&phrase, 0x0, sizeof(phrase));
-
- phrase.match = match; /* Positions to match */
- phrase.tokens = tokens; /* Tokens to match */
- phrase.distance = distance;
- phrase.charset = get_doc->index_cache->charset;
- phrase.zip_size = dict_table_zip_size(
- get_doc->index_cache->index->table);
- phrase.heap = mem_heap_create(512);
-
- *found = phrase.found = FALSE;
-
- error = fts_doc_fetch_by_doc_id(
- get_doc, match->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL,
- fts_query_fetch_document, &phrase);
-
- if (error != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, "InnoDB: Error: (%s) matching document.\n",
- ut_strerr(error));
- } else {
- *found = phrase.found;
- }
-
- mem_heap_free(phrase.heap);
-
- return(error);
-}
-
-/*****************************************************************//**
-This function fetches the original documents and count the
-words in between matching words to see that is in specified distance
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-fts_query_is_in_proximity_range(
-/*============================*/
- const fts_query_t* query, /*!< in: query instance */
- fts_match_t** match, /*!< in: query instance */
- fts_proximity_t* qualified_pos) /*!< in: position info for
- qualified ranges */
-{
- fts_get_doc_t get_doc;
- fts_cache_t* cache = query->index->table->fts->cache;
- dberr_t err;
- fts_phrase_t phrase;
-
- memset(&get_doc, 0x0, sizeof(get_doc));
- memset(&phrase, 0x0, sizeof(phrase));
-
- rw_lock_x_lock(&cache->lock);
- get_doc.index_cache = fts_find_index_cache(cache, query->index);
- rw_lock_x_unlock(&cache->lock);
- ut_a(get_doc.index_cache != NULL);
-
- phrase.distance = query->distance;
- phrase.charset = get_doc.index_cache->charset;
- phrase.zip_size = dict_table_zip_size(
- get_doc.index_cache->index->table);
- phrase.heap = mem_heap_create(512);
- phrase.proximity_pos = qualified_pos;
- phrase.found = FALSE;
-
- err = fts_doc_fetch_by_doc_id(
- &get_doc, match[0]->doc_id, NULL, FTS_FETCH_DOC_BY_ID_EQUAL,
- fts_query_fetch_document, &phrase);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error: (%s) in verification phase of proximity "
- "search", ut_strerr(err));
- }
-
- /* Free the prepared statement. */
- if (get_doc.get_document_graph) {
- fts_que_graph_free(get_doc.get_document_graph);
- get_doc.get_document_graph = NULL;
- }
-
- mem_heap_free(phrase.heap);
-
- return(err == DB_SUCCESS && phrase.found);
-}
-
-/*****************************************************************//**
-Iterate over the matched document ids and search the for the
-actual phrase in the text.
-@return DB_SUCCESS if all OK */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_search_phrase(
-/*====================*/
- fts_query_t* query, /*!< in: query instance */
- ib_vector_t* orig_tokens, /*!< in: tokens to search,
- with any stopwords in the
- original phrase */
- ib_vector_t* tokens) /*!< in: tokens that does
- not include stopwords and
- can be used to calculate
- ranking */
-{
- ulint i;
- fts_get_doc_t get_doc;
- ulint n_matched;
- fts_cache_t* cache = query->index->table->fts->cache;
-
- n_matched = ib_vector_size(query->matched);
-
- /* Setup the doc retrieval infrastructure. */
- memset(&get_doc, 0x0, sizeof(get_doc));
-
- rw_lock_x_lock(&cache->lock);
-
- get_doc.index_cache = fts_find_index_cache(cache, query->index);
-
- /* Must find the index cache */
- ut_a(get_doc.index_cache != NULL);
-
- rw_lock_x_unlock(&cache->lock);
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- ut_print_timestamp(stderr);
- fprintf(stderr, " Start phrase search\n");
-#endif
-
- /* Read the document from disk and do the actual
- match, matching documents will be added to the current
- doc id set. */
- for (i = 0; i < n_matched && query->error == DB_SUCCESS; ++i) {
- fts_match_t* match;
- ibool found = FALSE;
-
- match = static_cast<fts_match_t*>(
- ib_vector_get(query->matched, i));
-
- /* Skip the document ids that were filtered out by
- an earlier pass. */
- if (match->doc_id != 0) {
-
- query->error = fts_query_match_document(
- orig_tokens, &get_doc,
- match, query->distance, &found);
-
- if (query->error == DB_SUCCESS && found) {
- ulint z;
-
- query->error = fts_query_process_doc_id(query,
- match->doc_id, 0);
- if (query->error != DB_SUCCESS) {
- goto func_exit;
- }
-
- for (z = 0; z < ib_vector_size(tokens); z++) {
- fts_string_t* token;
- token = static_cast<fts_string_t*>(
- ib_vector_get(tokens, z));
- fts_query_add_word_to_document(
- query, match->doc_id, token);
- }
- }
- }
- }
-
-func_exit:
- /* Free the prepared statement. */
- if (get_doc.get_document_graph) {
- fts_que_graph_free(get_doc.get_document_graph);
- get_doc.get_document_graph = NULL;
- }
-
- return(query->error);
-}
-
-/*****************************************************************//**
-Text/Phrase search.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_phrase_search(
-/*====================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_string_t* phrase) /*!< in: token to search */
-{
- ib_vector_t* tokens;
- ib_vector_t* orig_tokens;
- mem_heap_t* heap = mem_heap_create(sizeof(fts_string_t));
- ulint len = phrase->f_len;
- ulint cur_pos = 0;
- ib_alloc_t* heap_alloc;
- ulint num_token;
- CHARSET_INFO* charset;
-
- charset = query->fts_index_table.charset;
-
- heap_alloc = ib_heap_allocator_create(heap);
-
- tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
- orig_tokens = ib_vector_create(heap_alloc, sizeof(fts_string_t), 4);
-
- if (query->distance != ULINT_UNDEFINED && query->distance > 0) {
- query->flags = FTS_PROXIMITY;
- } else {
- query->flags = FTS_PHRASE;
- }
-
- /* Split the phrase into tokens. */
- while (cur_pos < len) {
- fts_cache_t* cache = query->index->table->fts->cache;
- ib_rbt_bound_t parent;
- ulint offset;
- ulint cur_len;
- fts_string_t result_str;
-
- cur_len = innobase_mysql_fts_get_token(
- charset,
- reinterpret_cast<const byte*>(phrase->f_str) + cur_pos,
- reinterpret_cast<const byte*>(phrase->f_str) + len,
- &result_str, &offset);
-
- if (cur_len == 0) {
- break;
- }
-
- cur_pos += cur_len;
-
- if (result_str.f_n_char == 0) {
- continue;
- }
-
- fts_string_t* token = static_cast<fts_string_t*>(
- ib_vector_push(tokens, NULL));
-
- token->f_str = static_cast<byte*>(
- mem_heap_alloc(heap, result_str.f_len + 1));
- ut_memcpy(token->f_str, result_str.f_str, result_str.f_len);
-
- token->f_len = result_str.f_len;
- token->f_str[token->f_len] = 0;
-
- if (cache->stopword_info.cached_stopword
- && rbt_search(cache->stopword_info.cached_stopword,
- &parent, token) != 0
- && result_str.f_n_char >= fts_min_token_size
- && result_str.f_n_char <= fts_max_token_size) {
- /* Add the word to the RB tree so that we can
- calculate it's frequencey within a document. */
- fts_query_add_word_freq(query, token);
- } else {
- ib_vector_pop(tokens);
- }
-
- /* we will start to store all words including stopwords
- in the "orig_tokens" vector, but skip any leading words
- that are stopwords */
- if (!ib_vector_is_empty(tokens)) {
- fts_string_t* orig_token = static_cast<fts_string_t*>(
- ib_vector_push(orig_tokens, NULL));
-
- orig_token->f_str = token->f_str;
- orig_token->f_len = token->f_len;
- }
- }
-
- num_token = ib_vector_size(tokens);
- if (num_token > MAX_PROXIMITY_ITEM) {
- query->error = DB_FTS_TOO_MANY_WORDS_IN_PHRASE;
- goto func_exit;
- }
-
- ut_ad(ib_vector_size(orig_tokens) >= num_token);
-
- /* Ignore empty strings. */
- if (num_token > 0) {
- fts_string_t* token;
- fts_fetch_t fetch;
- trx_t* trx = query->trx;
- fts_ast_oper_t oper = query->oper;
- que_t* graph = NULL;
- ulint i;
- dberr_t error;
-
- /* Create the vector for storing matching document ids
- and the positions of the first token of the phrase. */
- if (!query->matched) {
- ib_alloc_t* heap_alloc;
-
- heap_alloc = ib_heap_allocator_create(heap);
-
- if (!(query->flags & FTS_PROXIMITY)
- && !(query->flags & FTS_PHRASE)) {
- query->matched = ib_vector_create(
- heap_alloc, sizeof(fts_match_t),
- 64);
- } else {
- ut_a(num_token <= MAX_PROXIMITY_ITEM);
- query->match_array =
- (ib_vector_t**) mem_heap_alloc(
- heap,
- num_token *
- sizeof(query->matched));
-
- for (i = 0; i < num_token; i++) {
- query->match_array[i] =
- ib_vector_create(
- heap_alloc, sizeof(fts_match_t),
- 64);
- }
-
- query->matched = query->match_array[0];
- }
- }
-
- /* Setup the callback args for filtering and consolidating
- the ilist. */
- fetch.read_arg = query;
- fetch.read_record = fts_query_index_fetch_nodes;
-
- for (i = 0; i < num_token; i++) {
- /* Search for the first word from the phrase. */
- token = static_cast<fts_string_t*>(
- ib_vector_get(tokens, i));
-
- if (query->flags & FTS_PROXIMITY
- || query->flags & FTS_PHRASE) {
- query->matched = query->match_array[i];
- }
-
- error = fts_index_fetch_nodes(
- trx, &graph, &query->fts_index_table,
- token, &fetch);
-
- /* DB_FTS_EXCEED_RESULT_CACHE_LIMIT passed by 'query->error' */
- ut_ad(!(query->error != DB_SUCCESS && error != DB_SUCCESS));
- if (error != DB_SUCCESS) {
- query->error = error;
- }
-
- fts_que_graph_free(graph);
- graph = NULL;
-
- fts_query_cache(query, token);
-
- if (!(query->flags & FTS_PHRASE)
- && !(query->flags & FTS_PROXIMITY)) {
- break;
- }
-
- /* If any of the token can't be found,
- no need to continue match */
- if (ib_vector_is_empty(query->match_array[i])
- || query->error != DB_SUCCESS) {
- goto func_exit;
- }
- }
-
- /* Just a single word, no need to fetch the original
- documents to do phrase matching */
- if (ib_vector_size(orig_tokens) == 1
- && !ib_vector_is_empty(query->match_array[0])) {
- fts_match_t* match;
- ulint n_matched;
-
- n_matched = ib_vector_size(query->match_array[0]);
-
- for (i = 0; i < n_matched; i++) {
- match = static_cast<fts_match_t*>(
- ib_vector_get(
- query->match_array[0], i));
-
- query->error = fts_query_process_doc_id(
- query, match->doc_id, 0);
- if (query->error != DB_SUCCESS) {
- goto func_exit;
- }
-
- fts_query_add_word_to_document(
- query, match->doc_id, token);
- }
- query->oper = oper;
- goto func_exit;
- }
-
- /* If we are doing proximity search, verify the distance
- between all words, and check they are in specified distance. */
- if (query->flags & FTS_PROXIMITY) {
- fts_phrase_or_proximity_search(query, tokens);
- } else {
- ibool matched;
-
- /* Phrase Search case:
- We filter out the doc ids that don't contain
- all the tokens in the phrase. It's cheaper to
- search the ilist than bringing the documents in
- and then doing a search through the text. Isolated
- testing shows this also helps in mitigating disruption
- of the buffer cache. */
- matched = fts_phrase_or_proximity_search(query, tokens);
- query->matched = query->match_array[0];
-
- /* Read the actual text in and search for the phrase. */
- if (matched) {
- ut_ad(query->error == DB_SUCCESS);
- query->error = fts_query_search_phrase(
- query, orig_tokens, tokens);
- }
- }
-
- /* Restore original operation. */
- query->oper = oper;
-
- if (query->error != DB_SUCCESS) {
- goto func_exit;
- }
- }
-
-func_exit:
- mem_heap_free(heap);
-
- /* Don't need it anymore. */
- query->matched = NULL;
-
- return(query->error);
-}
-
-/*****************************************************************//**
-Find the word and evaluate.
-@return DB_SUCCESS if all go well */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_query_execute(
-/*==============*/
- fts_query_t* query, /*!< in: query instance */
- fts_string_t* token) /*!< in: token to search */
-{
- switch (query->oper) {
- case FTS_NONE:
- case FTS_NEGATE:
- case FTS_INCR_RATING:
- case FTS_DECR_RATING:
- query->error = fts_query_union(query, token);
- break;
-
- case FTS_EXIST:
- query->error = fts_query_intersect(query, token);
- break;
-
- case FTS_IGNORE:
- query->error = fts_query_difference(query, token);
- break;
-
- default:
- ut_error;
- }
-
- return(query->error);
-}
-
-/*****************************************************************//**
-Create a wildcard string. It's the responsibility of the caller to
-free the byte* pointer. It's allocated using ut_malloc().
-@return ptr to allocated memory */
-static
-byte*
-fts_query_get_token(
-/*================*/
- fts_ast_node_t* node, /*!< in: the current sub tree */
- fts_string_t* token) /*!< in: token to create */
-{
- ulint str_len;
- byte* new_ptr = NULL;
-
- str_len = node->term.ptr->len;
-
- ut_a(node->type == FTS_AST_TERM);
-
- token->f_len = str_len;
- token->f_str = node->term.ptr->str;
-
- if (node->term.wildcard) {
-
- token->f_str = static_cast<byte*>(ut_malloc(str_len + 2));
- token->f_len = str_len + 1;
-
- memcpy(token->f_str, node->term.ptr->str, str_len);
-
- token->f_str[str_len] = '%';
- token->f_str[token->f_len] = 0;
-
- new_ptr = token->f_str;
- }
-
- return(new_ptr);
-}
-
-/*****************************************************************//**
-Visit every node of the AST. */
-static
-dberr_t
-fts_query_visitor(
-/*==============*/
- fts_ast_oper_t oper, /*!< in: current operator */
- fts_ast_node_t* node, /*!< in: The root of the current subtree*/
- void* arg) /*!< in: callback arg*/
-{
- byte* ptr;
- fts_string_t token;
- fts_query_t* query = static_cast<fts_query_t*>(arg);
-
- ut_a(node);
- DBUG_ENTER("fts_query_visitor");
- DBUG_PRINT("fts", ("nodetype: %s", fts_ast_node_type_get(node->type)));
-
- token.f_n_char = 0;
- query->oper = oper;
- query->cur_node = node;
-
- switch (node->type) {
- case FTS_AST_TEXT:
- token.f_str = node->text.ptr->str;
- token.f_len = node->text.ptr->len;
-
- if (query->oper == FTS_EXIST) {
- ut_ad(query->intersection == NULL);
- query->intersection = rbt_create(
- sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
-
- query->total_size += SIZEOF_RBT_CREATE;
- }
-
- /* Set the current proximity distance. */
- query->distance = node->text.distance;
-
- /* Force collection of doc ids and the positions. */
- query->collect_positions = TRUE;
-
- query->error = fts_query_phrase_search(query, &token);
-
- query->collect_positions = FALSE;
-
- if (query->oper == FTS_EXIST) {
- fts_query_free_doc_ids(query, query->doc_ids);
- query->doc_ids = query->intersection;
- query->intersection = NULL;
- }
-
- break;
-
- case FTS_AST_TERM:
- token.f_str = node->term.ptr->str;
- token.f_len = node->term.ptr->len;
-
- /* Add the word to our RB tree that will be used to
- calculate this terms per document frequency. */
- fts_query_add_word_freq(query, &token);
-
- ptr = fts_query_get_token(node, &token);
- query->error = fts_query_execute(query, &token);
-
- if (ptr) {
- ut_free(ptr);
- }
- break;
-
- case FTS_AST_SUBEXP_LIST:
- query->error = fts_ast_visit_sub_exp(node, fts_query_visitor, arg);
- break;
-
- default:
- ut_error;
- }
-
- if (query->oper == FTS_EXIST) {
- query->multi_exist = true;
- }
-
- DBUG_RETURN(query->error);
-}
-
-/*****************************************************************//**
-Process (nested) sub-expression, create a new result set to store the
-sub-expression result by processing nodes under current sub-expression
-list. Merge the sub-expression result with that of parent expression list.
-@return DB_SUCCESS if all well */
-UNIV_INTERN
-dberr_t
-fts_ast_visit_sub_exp(
-/*==================*/
- fts_ast_node_t* node, /*!< in,out: current root node */
- fts_ast_callback visitor, /*!< in: callback function */
- void* arg) /*!< in,out: arg for callback */
-{
- fts_ast_oper_t cur_oper;
- fts_query_t* query = static_cast<fts_query_t*>(arg);
- ib_rbt_t* parent_doc_ids;
- ib_rbt_t* subexpr_doc_ids;
- dberr_t error = DB_SUCCESS;
- bool will_be_ignored = false;
- bool multi_exist;
-
- DBUG_ENTER("fts_ast_visit_sub_exp");
-
- ut_a(node->type == FTS_AST_SUBEXP_LIST);
-
- cur_oper = query->oper;
-
- /* Save current result set */
- parent_doc_ids = query->doc_ids;
-
- /* Create new result set to store the sub-expression result. We
- will merge this result set with the parent after processing. */
- query->doc_ids = rbt_create(sizeof(fts_ranking_t),
- fts_ranking_doc_id_cmp);
-
- query->total_size += SIZEOF_RBT_CREATE;
-
- multi_exist = query->multi_exist;
- query->multi_exist = false;
- /* Process nodes in current sub-expression and store its
- result set in query->doc_ids we created above. */
- error = fts_ast_visit(FTS_NONE, node, visitor,
- arg, &will_be_ignored);
-
- /* Reinstate parent node state */
- query->multi_exist = multi_exist;
- query->oper = cur_oper;
-
- /* Merge the sub-expression result with the parent result set. */
- subexpr_doc_ids = query->doc_ids;
- query->doc_ids = parent_doc_ids;
- if (error == DB_SUCCESS) {
- error = fts_merge_doc_ids(query, subexpr_doc_ids);
- }
-
- /* Free current result set. Result already merged into parent. */
- fts_query_free_doc_ids(query, subexpr_doc_ids);
-
- DBUG_RETURN(error);
-}
-
-#if 0
-/*****************************************************************//***
-Check if the doc id exists in the ilist.
-@return TRUE if doc id found */
-static
-ulint
-fts_query_find_doc_id(
-/*==================*/
- fts_select_t* select, /*!< in/out: contains the doc id to
- find, we update the word freq if
- document found */
- void* data, /*!< in: doc id ilist */
- ulint len) /*!< in: doc id ilist size */
-{
- byte* ptr = data;
- doc_id_t doc_id = 0;
- ulint decoded = 0;
-
- /* Decode the ilist and search for selected doc_id. We also
- calculate the frequency of the word in the document if found. */
- while (decoded < len && !select->found) {
- ulint freq = 0;
- ulint min_pos = 0;
- ulint last_pos = 0;
- ulint pos = fts_decode_vlc(&ptr);
-
- /* Add the delta. */
- doc_id += pos;
-
- while (*ptr) {
- ++freq;
- last_pos += fts_decode_vlc(&ptr);
-
- /* Only if min_pos is not set and the current
- term exists in a position greater than the
- min_pos of the previous term. */
- if (min_pos == 0 && last_pos > select->min_pos) {
- min_pos = last_pos;
- }
- }
-
- /* Skip the end of word position marker. */
- ++ptr;
-
- /* Bytes decoded so far. */
- decoded = ptr - (byte*) data;
-
- /* A word may exist in the document but we only consider a
- match if it exists in a position that is greater than the
- position of the previous term. */
- if (doc_id == select->doc_id && min_pos > 0) {
- fts_doc_freq_t* doc_freq;
-
- /* Add the doc id to the doc freq rb tree, if
- the doc id doesn't exist it will be created. */
- doc_freq = fts_query_add_doc_freq(
- select->word_freq->doc_freqs, doc_id);
-
- /* Avoid duplicating the frequency tally */
- if (doc_freq->freq == 0) {
- doc_freq->freq = freq;
- }
-
- select->found = TRUE;
- select->min_pos = min_pos;
- }
- }
-
- return(select->found);
-}
-#endif
-
-/*****************************************************************//**
-Read and filter nodes.
-@return DB_SUCCESS if all go well,
-or return DB_FTS_EXCEED_RESULT_CACHE_LIMIT */
-static
-dberr_t
-fts_query_filter_doc_ids(
-/*=====================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_string_t* word, /*!< in: the current word */
- fts_word_freq_t* word_freq, /*!< in/out: word frequency */
- const fts_node_t* node, /*!< in: current FTS node */
- void* data, /*!< in: doc id ilist */
- ulint len, /*!< in: doc id ilist size */
- ibool calc_doc_count) /*!< in: whether to remember doc count */
-{
- byte* ptr = static_cast<byte*>(data);
- doc_id_t doc_id = 0;
- ulint decoded = 0;
- ib_rbt_t* doc_freqs = word_freq->doc_freqs;
-
- /* Decode the ilist and add the doc ids to the query doc_id set. */
- while (decoded < len) {
- ulint freq = 0;
- fts_doc_freq_t* doc_freq;
- fts_match_t* match = NULL;
- ulint last_pos = 0;
- ulint pos = fts_decode_vlc(&ptr);
-
- /* Some sanity checks. */
- if (doc_id == 0) {
- ut_a(pos == node->first_doc_id);
- }
-
- /* Add the delta. */
- doc_id += pos;
-
- if (calc_doc_count) {
- word_freq->doc_count++;
- }
-
- /* We simply collect the matching instances here. */
- if (query->collect_positions) {
- ib_alloc_t* heap_alloc;
-
- /* Create a new fts_match_t instance. */
- match = static_cast<fts_match_t*>(
- ib_vector_push(query->matched, NULL));
-
- match->start = 0;
- match->doc_id = doc_id;
- heap_alloc = ib_vector_allocator(query->matched);
-
- /* Allocate from the same heap as the
- parent container. */
- match->positions = ib_vector_create(
- heap_alloc, sizeof(ulint), 64);
-
- query->total_size += sizeof(fts_match_t)
- + sizeof(ib_vector_t)
- + sizeof(ulint) * 64;
- }
-
- /* Unpack the positions within the document. */
- while (*ptr) {
- last_pos += fts_decode_vlc(&ptr);
-
- /* Collect the matching word positions, for phrase
- matching later. */
- if (query->collect_positions) {
- ib_vector_push(match->positions, &last_pos);
- }
-
- ++freq;
- }
-
- /* End of list marker. */
- last_pos = (ulint) -1;
-
- if (query->collect_positions) {
- ut_a(match != NULL);
- ib_vector_push(match->positions, &last_pos);
- }
-
- /* Add the doc id to the doc freq rb tree, if the doc id
- doesn't exist it will be created. */
- doc_freq = fts_query_add_doc_freq(query, doc_freqs, doc_id);
-
- /* Avoid duplicating frequency tally. */
- if (doc_freq->freq == 0) {
- doc_freq->freq = freq;
- }
-
- /* Skip the end of word position marker. */
- ++ptr;
-
- /* Bytes decoded so far */
- decoded = ptr - (byte*) data;
-
- /* We simply collect the matching documents and the
- positions here and match later. */
- if (!query->collect_positions) {
- /* We ignore error here and will check it later */
- fts_query_process_doc_id(query, doc_id, 0);
-
- /* Add the word to the document's matched RB tree. */
- fts_query_add_word_to_document(query, doc_id, word);
- }
- }
-
- /* Some sanity checks. */
- ut_a(doc_id == node->last_doc_id);
-
- if (query->total_size > fts_result_cache_limit) {
- return(DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
- } else {
- return(DB_SUCCESS);
- }
-}
-
-/*****************************************************************//**
-Read the FTS INDEX row.
-@return DB_SUCCESS if all go well. */
-static
-dberr_t
-fts_query_read_node(
-/*================*/
- fts_query_t* query, /*!< in: query instance */
- const fts_string_t* word, /*!< in: current word */
- que_node_t* exp) /*!< in: query graph node */
-{
- int i;
- int ret;
- fts_node_t node;
- ib_rbt_bound_t parent;
- fts_word_freq_t* word_freq;
- ibool skip = FALSE;
- fts_string_t term;
- byte buf[FTS_MAX_WORD_LEN + 1];
- dberr_t error = DB_SUCCESS;
-
- ut_a(query->cur_node->type == FTS_AST_TERM ||
- query->cur_node->type == FTS_AST_TEXT);
-
- memset(&node, 0, sizeof(node));
- term.f_str = buf;
-
- /* Need to consider the wildcard search case, the word frequency
- is created on the search string not the actual word. So we need
- to assign the frequency on search string behalf. */
- if (query->cur_node->type == FTS_AST_TERM
- && query->cur_node->term.wildcard) {
- term.f_len = query->cur_node->term.ptr->len;
- ut_ad(FTS_MAX_WORD_LEN >= term.f_len);
- memcpy(term.f_str, query->cur_node->term.ptr->str, term.f_len);
- } else {
- term.f_len = word->f_len;
- ut_ad(FTS_MAX_WORD_LEN >= word->f_len);
- memcpy(term.f_str, word->f_str, word->f_len);
- }
-
- /* Lookup the word in our rb tree, it must exist. */
- ret = rbt_search(query->word_freqs, &parent, &term);
-
- ut_a(ret == 0);
-
- word_freq = rbt_value(fts_word_freq_t, parent.last);
-
- /* Start from 1 since the first column has been read by the caller.
- Also, we rely on the order of the columns projected, to filter
- out ilists that are out of range and we always want to read
- the doc_count irrespective of the suitablility of the row. */
-
- for (i = 1; exp && !skip; exp = que_node_get_next(exp), ++i) {
-
- dfield_t* dfield = que_node_get_val(exp);
- byte* data = static_cast<byte*>(
- dfield_get_data(dfield));
- ulint len = dfield_get_len(dfield);
-
- ut_a(len != UNIV_SQL_NULL);
-
- /* Note: The column numbers below must match the SELECT. */
-
- switch (i) {
- case 1: /* DOC_COUNT */
- word_freq->doc_count += mach_read_from_4(data);
- break;
-
- case 2: /* FIRST_DOC_ID */
- node.first_doc_id = fts_read_doc_id(data);
-
- /* Skip nodes whose doc ids are out range. */
- if (query->oper == FTS_EXIST
- && query->upper_doc_id > 0
- && node.first_doc_id > query->upper_doc_id) {
- skip = TRUE;
- }
- break;
-
- case 3: /* LAST_DOC_ID */
- node.last_doc_id = fts_read_doc_id(data);
-
- /* Skip nodes whose doc ids are out range. */
- if (query->oper == FTS_EXIST
- && query->lower_doc_id > 0
- && node.last_doc_id < query->lower_doc_id) {
- skip = TRUE;
- }
- break;
-
- case 4: /* ILIST */
-
- error = fts_query_filter_doc_ids(
- query, &word_freq->word, word_freq,
- &node, data, len, FALSE);
-
- break;
-
- default:
- ut_error;
- }
- }
-
- if (!skip) {
- /* Make sure all columns were read. */
-
- ut_a(i == 5);
- }
-
- return error;
-}
-
-/*****************************************************************//**
-Callback function to fetch the rows in an FTS INDEX record.
-@return always returns TRUE */
-static
-ibool
-fts_query_index_fetch_nodes(
-/*========================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: pointer to fts_fetch_t */
-{
- fts_string_t key;
- sel_node_t* sel_node = static_cast<sel_node_t*>(row);
- fts_fetch_t* fetch = static_cast<fts_fetch_t*>(user_arg);
- fts_query_t* query = static_cast<fts_query_t*>(fetch->read_arg);
- que_node_t* exp = sel_node->select_list;
- dfield_t* dfield = que_node_get_val(exp);
- void* data = dfield_get_data(dfield);
- ulint dfield_len = dfield_get_len(dfield);
-
- key.f_str = static_cast<byte*>(data);
- key.f_len = dfield_len;
-
- ut_a(dfield_len <= FTS_MAX_WORD_LEN);
-
- /* Note: we pass error out by 'query->error' */
- query->error = fts_query_read_node(query, &key, que_node_get_next(exp));
-
- if (query->error != DB_SUCCESS) {
- ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT);
- return(FALSE);
- } else {
- return(TRUE);
- }
-}
-
-/*****************************************************************//**
-Calculate the inverse document frequency (IDF) for all the terms. */
-static
-void
-fts_query_calculate_idf(
-/*====================*/
- fts_query_t* query) /*!< in: Query state */
-{
- const ib_rbt_node_t* node;
- ib_uint64_t total_docs = query->total_docs;
-
- /* We need to free any instances of fts_doc_freq_t that we
- may have allocated. */
- for (node = rbt_first(query->word_freqs);
- node;
- node = rbt_next(query->word_freqs, node)) {
-
- fts_word_freq_t* word_freq;
-
- word_freq = rbt_value(fts_word_freq_t, node);
-
- if (word_freq->doc_count > 0) {
- if (total_docs == word_freq->doc_count) {
- /* QP assume ranking > 0 if we find
- a match. Since Log10(1) = 0, we cannot
- make IDF a zero value if do find a
- word in all documents. So let's make
- it an arbitrary very small number */
- word_freq->idf = log10(1.0001);
- } else {
- word_freq->idf = log10(
- total_docs
- / (double) word_freq->doc_count);
- }
- }
-
- if (fts_enable_diag_print) {
- fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF
- " %6.5lf\n",
- word_freq->word.f_str,
- query->total_docs, word_freq->doc_count,
- word_freq->idf);
- }
- }
-}
-
-/*****************************************************************//**
-Calculate the ranking of the document. */
-static
-void
-fts_query_calculate_ranking(
-/*========================*/
- const fts_query_t* query, /*!< in: query state */
- fts_ranking_t* ranking) /*!< in: Document to rank */
-{
- ulint pos = 0;
- fts_string_t word;
-
- /* At this stage, ranking->rank should not exceed the 1.0
- bound */
- ut_ad(ranking->rank <= 1.0 && ranking->rank >= -1.0);
- ut_ad(rbt_size(query->word_map) == query->word_vector->size());
-
- while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
- int ret;
- ib_rbt_bound_t parent;
- double weight;
- fts_doc_freq_t* doc_freq;
- fts_word_freq_t* word_freq;
-
- ret = rbt_search(query->word_freqs, &parent, &word);
-
- /* It must exist. */
- ut_a(ret == 0);
-
- word_freq = rbt_value(fts_word_freq_t, parent.last);
-
- ret = rbt_search(
- word_freq->doc_freqs, &parent, &ranking->doc_id);
-
- /* It must exist. */
- ut_a(ret == 0);
-
- doc_freq = rbt_value(fts_doc_freq_t, parent.last);
-
- weight = (double) doc_freq->freq * word_freq->idf;
-
- ranking->rank += (fts_rank_t) (weight * word_freq->idf);
- }
-}
-
-/*****************************************************************//**
-Add ranking to the result set. */
-static
-void
-fts_query_add_ranking(
-/*==================*/
- fts_query_t* query, /*!< in: query state */
- ib_rbt_t* ranking_tree, /*!< in: ranking tree */
- const fts_ranking_t* new_ranking) /*!< in: ranking of a document */
-{
- ib_rbt_bound_t parent;
-
- /* Lookup the ranking in our rb tree and add if it doesn't exist. */
- if (rbt_search(ranking_tree, &parent, new_ranking) == 0) {
- fts_ranking_t* ranking;
-
- ranking = rbt_value(fts_ranking_t, parent.last);
-
- ranking->rank += new_ranking->rank;
-
- ut_a(ranking->words == NULL);
- } else {
- rbt_add_node(ranking_tree, &parent, new_ranking);
-
- query->total_size += SIZEOF_RBT_NODE_ADD
- + sizeof(fts_ranking_t);
- }
-}
-
-/*****************************************************************//**
-Retrieve the FTS Relevance Ranking result for doc with doc_id
-@return the relevance ranking value, 0 if no ranking value
-present. */
-float
-fts_retrieve_ranking(
-/*=================*/
- fts_result_t* result, /*!< in: FTS result structure */
- doc_id_t doc_id) /*!< in: doc_id of the item to retrieve */
-{
- ib_rbt_bound_t parent;
- fts_ranking_t new_ranking;
-
- DBUG_ENTER("fts_retrieve_ranking");
-
- if (!result || !result->rankings_by_id) {
- DBUG_RETURN(0);
- }
-
- new_ranking.doc_id = doc_id;
-
- /* Lookup the ranking in our rb tree */
- if (rbt_search(result->rankings_by_id, &parent, &new_ranking) == 0) {
- fts_ranking_t* ranking;
-
- ranking = rbt_value(fts_ranking_t, parent.last);
-
- DBUG_RETURN(ranking->rank);
- }
-
- DBUG_RETURN(0);
-}
-
-/*****************************************************************//**
-Create the result and copy the data to it. */
-static
-fts_result_t*
-fts_query_prepare_result(
-/*=====================*/
- fts_query_t* query, /*!< in: Query state */
- fts_result_t* result) /*!< in: result this can contain
- data from a previous search on
- another FTS index */
-{
- const ib_rbt_node_t* node;
- bool result_is_null = false;
-
- DBUG_ENTER("fts_query_prepare_result");
-
- if (result == NULL) {
- result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result)));
-
- memset(result, 0x0, sizeof(*result));
-
- result->rankings_by_id = rbt_create(
- sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
-
- query->total_size += sizeof(fts_result_t) + SIZEOF_RBT_CREATE;
- result_is_null = true;
- }
-
- if (query->flags == FTS_OPT_RANKING) {
- fts_word_freq_t* word_freq;
- ulint size = ib_vector_size(query->deleted->doc_ids);
- fts_update_t* array =
- (fts_update_t*) query->deleted->doc_ids->data;
-
- node = rbt_first(query->word_freqs);
- ut_ad(node);
- word_freq = rbt_value(fts_word_freq_t, node);
-
- for (node = rbt_first(word_freq->doc_freqs);
- node;
- node = rbt_next(word_freq->doc_freqs, node)) {
- fts_doc_freq_t* doc_freq;
- fts_ranking_t ranking;
-
- doc_freq = rbt_value(fts_doc_freq_t, node);
-
- /* Don't put deleted docs into result */
- if (fts_bsearch(array, 0, static_cast<int>(size),
- doc_freq->doc_id) >= 0) {
- /* one less matching doc count */
- --word_freq->doc_count;
- continue;
- }
-
- ranking.doc_id = doc_freq->doc_id;
- ranking.rank = static_cast<fts_rank_t>(doc_freq->freq);
- ranking.words = NULL;
-
- fts_query_add_ranking(query, result->rankings_by_id,
- &ranking);
-
- if (query->total_size > fts_result_cache_limit) {
- query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
- fts_query_free_result(result);
- DBUG_RETURN(NULL);
- }
- }
-
- /* Calculate IDF only after we exclude the deleted items */
- fts_query_calculate_idf(query);
-
- node = rbt_first(query->word_freqs);
- word_freq = rbt_value(fts_word_freq_t, node);
-
- /* Calculate the ranking for each doc */
- for (node = rbt_first(result->rankings_by_id);
- node != NULL;
- node = rbt_next(result->rankings_by_id, node)) {
-
- fts_ranking_t* ranking;
-
- ranking = rbt_value(fts_ranking_t, node);
-
- ranking->rank = static_cast<fts_rank_t>(
- ranking->rank * word_freq->idf * word_freq->idf);
- }
-
- DBUG_RETURN(result);
- }
-
- ut_a(rbt_size(query->doc_ids) > 0);
-
- for (node = rbt_first(query->doc_ids);
- node;
- node = rbt_next(query->doc_ids, node)) {
-
- fts_ranking_t* ranking;
-
- ranking = rbt_value(fts_ranking_t, node);
- fts_query_calculate_ranking(query, ranking);
-
- // FIXME: I think we may requre this information to improve the
- // ranking of doc ids which have more word matches from
- // different FTS indexes.
-
- /* We don't need these anymore free the resources. */
- ranking->words = NULL;
-
- if (!result_is_null) {
- fts_query_add_ranking(query, result->rankings_by_id, ranking);
-
- if (query->total_size > fts_result_cache_limit) {
- query->error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
- fts_query_free_result(result);
- DBUG_RETURN(NULL);
- }
- }
- }
-
- if (result_is_null) {
- /* Use doc_ids directly */
- rbt_free(result->rankings_by_id);
- result->rankings_by_id = query->doc_ids;
- query->doc_ids = NULL;
- }
-
- DBUG_RETURN(result);
-}
-
-/*****************************************************************//**
-Get the result of the query. Calculate the similarity coefficient. */
-static
-fts_result_t*
-fts_query_get_result(
-/*=================*/
- fts_query_t* query, /*!< in: query instance */
- fts_result_t* result) /*!< in: result */
-{
- DBUG_ENTER("fts_query_get_result");
-
- if (rbt_size(query->doc_ids) > 0 || query->flags == FTS_OPT_RANKING) {
- /* Copy the doc ids to the result. */
- result = fts_query_prepare_result(query, result);
- } else {
- /* Create an empty result instance. */
- result = static_cast<fts_result_t*>(ut_malloc(sizeof(*result)));
- memset(result, 0, sizeof(*result));
- }
-
- DBUG_RETURN(result);
-}
-
-/*****************************************************************//**
-FTS Query free resources and reset. */
-static
-void
-fts_query_free(
-/*===========*/
- fts_query_t* query) /*!< in: query instance to free*/
-{
-
- if (query->read_nodes_graph) {
- fts_que_graph_free(query->read_nodes_graph);
- }
-
- if (query->root) {
- fts_ast_free_node(query->root);
- }
-
- if (query->deleted) {
- fts_doc_ids_free(query->deleted);
- }
-
- if (query->doc_ids) {
- fts_query_free_doc_ids(query, query->doc_ids);
- }
-
- if (query->word_freqs) {
- const ib_rbt_node_t* node;
-
- /* We need to free any instances of fts_doc_freq_t that we
- may have allocated. */
- for (node = rbt_first(query->word_freqs);
- node;
- node = rbt_next(query->word_freqs, node)) {
-
- fts_word_freq_t* word_freq;
-
- word_freq = rbt_value(fts_word_freq_t, node);
-
- /* We need to cast away the const. */
- rbt_free(word_freq->doc_freqs);
- }
-
- rbt_free(query->word_freqs);
- }
-
- ut_a(!query->intersection);
-
- if (query->word_map) {
- rbt_free(query->word_map);
- }
-
- if (query->word_vector) {
- delete query->word_vector;
- }
-
- if (query->heap) {
- mem_heap_free(query->heap);
- }
-
- memset(query, 0, sizeof(*query));
-}
-
-/*****************************************************************//**
-Parse the query using flex/bison. */
-static
-fts_ast_node_t*
-fts_query_parse(
-/*============*/
- fts_query_t* query, /*!< in: query instance */
- byte* query_str, /*!< in: query string */
- ulint query_len) /*!< in: query string length */
-{
- int error;
- fts_ast_state_t state;
- bool mode = query->boolean_mode;
- DBUG_ENTER("fts_query_parse");
-
- memset(&state, 0x0, sizeof(state));
-
- /* Setup the scanner to use, this depends on the mode flag. */
- state.lexer = fts_lexer_create(mode, query_str, query_len);
- state.charset = query->fts_index_table.charset;
- error = fts_parse(&state);
- fts_lexer_free(state.lexer);
- state.lexer = NULL;
-
- /* Error during parsing ? */
- if (error) {
- /* Free the nodes that were allocated during parsing. */
- fts_ast_state_free(&state);
- } else {
- query->root = state.root;
- }
-
- DBUG_RETURN(state.root);
-}
-
-/*******************************************************************//**
-FTS Query optimization
-Set FTS_OPT_RANKING if it is a simple term query */
-static
-void
-fts_query_can_optimize(
-/*===================*/
- fts_query_t* query, /*!< in/out: query instance */
- uint flags) /*!< In: FTS search mode */
-{
- fts_ast_node_t* node = query->root;
-
- if (flags & FTS_EXPAND) {
- return;
- }
-
- /* Check if it has only a term without oper */
- ut_ad(node->type == FTS_AST_LIST);
- node = node->list.head;
- if (node != NULL && node->type == FTS_AST_TERM && node->next == NULL) {
- query->flags = FTS_OPT_RANKING;
- }
-}
-
-/*******************************************************************//**
-Pre-process the query string
-1) make it lower case
-2) in boolean mode, if there is '-' or '+' that is immediately proceeded
-and followed by valid word, make it a space
-@return the processed string */
-static
-byte*
-fts_query_str_preprocess(
-/*=====================*/
- const byte* query_str, /*!< in: FTS query */
- ulint query_len, /*!< in: FTS query string len */
- ulint *result_len, /*!< out: result string length */
- CHARSET_INFO* charset, /*!< in: string charset */
- bool boolean_mode) /*!< in: is boolean mode */
-{
- ulint cur_pos = 0;
- ulint str_len;
- byte* str_ptr;
- bool in_phrase = false;
-
- /* Convert the query string to lower case before parsing. We own
- the ut_malloc'ed result and so remember to free it before return. */
-
- str_len = query_len * charset->casedn_multiply + 1;
- str_ptr = static_cast<byte*>(ut_malloc(str_len));
-
- *result_len = innobase_fts_casedn_str(
- charset, const_cast<char*>(reinterpret_cast<const char*>(
- query_str)), query_len,
- reinterpret_cast<char*>(str_ptr), str_len);
-
- ut_ad(*result_len < str_len);
-
- str_ptr[*result_len] = 0;
-
- /* If it is boolean mode, no need to check for '-/+' */
- if (!boolean_mode) {
- return(str_ptr);
- }
-
- /* Otherwise, we travese the string to find any '-/+' that are
- immediately proceeded and followed by valid search word.
- NOTE: we should not do so for CJK languages, this should
- be taken care of in our CJK implementation */
- while (cur_pos < *result_len) {
- fts_string_t str;
- ulint offset;
- ulint cur_len;
-
- cur_len = innobase_mysql_fts_get_token(
- charset, str_ptr + cur_pos, str_ptr + *result_len,
- &str, &offset);
-
- if (cur_len == 0 || str.f_str == NULL) {
- /* No valid word found */
- break;
- }
-
- /* Check if we are in a phrase, if so, no need to do
- replacement of '-/+'. */
- for (byte* ptr = str_ptr + cur_pos; ptr < str.f_str; ptr++) {
- if ((char) (*ptr) == '"' ) {
- in_phrase = !in_phrase;
- }
- }
-
- /* Find those are not leading '-/+' and also not in a phrase */
- if (cur_pos > 0 && str.f_str - str_ptr - cur_pos == 1
- && !in_phrase) {
- char* last_op = reinterpret_cast<char*>(
- str_ptr + cur_pos);
-
- if (*last_op == '-' || *last_op == '+') {
- *last_op = ' ';
- }
- }
-
- cur_pos += cur_len;
- }
-
- return(str_ptr);
-}
-
-/*******************************************************************//**
-FTS Query entry point.
-@return DB_SUCCESS if successful otherwise error code */
-UNIV_INTERN
-dberr_t
-fts_query(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: The FTS index to search */
- uint flags, /*!< in: FTS search mode */
- const byte* query_str, /*!< in: FTS query */
- ulint query_len, /*!< in: FTS query string len
- in bytes */
- fts_result_t** result) /*!< in/out: result doc ids */
-{
- fts_query_t query;
- dberr_t error = DB_SUCCESS;
- byte* lc_query_str;
- ulint result_len;
- bool boolean_mode;
- trx_t* query_trx;
- CHARSET_INFO* charset;
- ulint start_time_ms;
- bool will_be_ignored = false;
-
- boolean_mode = flags & FTS_BOOL;
-
- *result = NULL;
- memset(&query, 0x0, sizeof(query));
- query_trx = trx_allocate_for_background();
- query_trx->op_info = "FTS query";
-
- start_time_ms = ut_time_ms();
-
- query.trx = query_trx;
- query.index = index;
- query.boolean_mode = boolean_mode;
- query.deleted = fts_doc_ids_create();
- query.cur_node = NULL;
-
- query.fts_common_table.type = FTS_COMMON_TABLE;
- query.fts_common_table.table_id = index->table->id;
- query.fts_common_table.parent = index->table->name;
- query.fts_common_table.table = index->table;
-
- charset = fts_index_get_charset(index);
-
- query.fts_index_table.type = FTS_INDEX_TABLE;
- query.fts_index_table.index_id = index->id;
- query.fts_index_table.table_id = index->table->id;
- query.fts_index_table.parent = index->table->name;
- query.fts_index_table.charset = charset;
- query.fts_index_table.table = index->table;
-
- query.word_map = rbt_create_arg_cmp(
- sizeof(fts_string_t), innobase_fts_text_cmp, (void*) charset);
- query.word_vector = new word_vector_t;
- query.error = DB_SUCCESS;
-
- /* Setup the RB tree that will be used to collect per term
- statistics. */
- query.word_freqs = rbt_create_arg_cmp(
- sizeof(fts_word_freq_t), innobase_fts_text_cmp, (void*) charset);
-
- query.total_size += SIZEOF_RBT_CREATE;
-
- query.total_docs = dict_table_get_n_rows(index->table);
-
-#ifdef FTS_DOC_STATS_DEBUG
- if (ft_enable_diag_print) {
- error = fts_get_total_word_count(
- trx, query.index, &query.total_words);
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- fprintf(stderr, "Total docs: " UINT64PF " Total words: %lu\n",
- query.total_docs, query.total_words);
- }
-#endif /* FTS_DOC_STATS_DEBUG */
-
- query.fts_common_table.suffix = "DELETED";
-
- /* Read the deleted doc_ids, we need these for filtering. */
- error = fts_table_fetch_doc_ids(
- NULL, &query.fts_common_table, query.deleted);
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- query.fts_common_table.suffix = "DELETED_CACHE";
-
- error = fts_table_fetch_doc_ids(
- NULL, &query.fts_common_table, query.deleted);
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- /* Get the deleted doc ids that are in the cache. */
- fts_cache_append_deleted_doc_ids(
- index->table->fts->cache, query.deleted->doc_ids);
- DEBUG_SYNC_C("fts_deleted_doc_ids_append");
-
- /* Sort the vector so that we can do a binary search over the ids. */
- ib_vector_sort(query.deleted->doc_ids, fts_update_doc_id_cmp);
-
-#if 0
- /* Convert the query string to lower case before parsing. We own
- the ut_malloc'ed result and so remember to free it before return. */
-
- lc_query_str_len = query_len * charset->casedn_multiply + 1;
- lc_query_str = static_cast<byte*>(ut_malloc(lc_query_str_len));
-
- result_len = innobase_fts_casedn_str(
- charset, (char*) query_str, query_len,
- (char*) lc_query_str, lc_query_str_len);
-
- ut_ad(result_len < lc_query_str_len);
-
- lc_query_str[result_len] = 0;
-
-#endif
-
- lc_query_str = fts_query_str_preprocess(
- query_str, query_len, &result_len, charset, boolean_mode);
-
- query.heap = mem_heap_create(128);
-
- /* Create the rb tree for the doc id (current) set. */
- query.doc_ids = rbt_create(
- sizeof(fts_ranking_t), fts_ranking_doc_id_cmp);
-
- query.total_size += SIZEOF_RBT_CREATE;
-
- /* Parse the input query string. */
- if (fts_query_parse(&query, lc_query_str, result_len)) {
- fts_ast_node_t* ast = query.root;
-
- /* Optimize query to check if it's a single term */
- fts_query_can_optimize(&query, flags);
-
- DBUG_EXECUTE_IF("fts_instrument_result_cache_limit",
- fts_result_cache_limit = 2048;
- );
-
- /* Traverse the Abstract Syntax Tree (AST) and execute
- the query. */
- query.error = fts_ast_visit(
- FTS_NONE, ast, fts_query_visitor,
- &query, &will_be_ignored);
-
- /* If query expansion is requested, extend the search
- with first search pass result */
- if (query.error == DB_SUCCESS && (flags & FTS_EXPAND)) {
- query.error = fts_expand_query(index, &query);
- }
-
- /* Calculate the inverse document frequency of the terms. */
- if (query.error == DB_SUCCESS
- && query.flags != FTS_OPT_RANKING) {
- fts_query_calculate_idf(&query);
- }
-
- /* Copy the result from the query state, so that we can
- return it to the caller. */
- if (query.error == DB_SUCCESS) {
- *result = fts_query_get_result(&query, *result);
- }
-
- error = query.error;
- } else {
- /* still return an empty result set */
- *result = static_cast<fts_result_t*>(
- ut_malloc(sizeof(**result)));
- memset(*result, 0, sizeof(**result));
- }
-
- ut_free(lc_query_str);
-
- if (fts_enable_diag_print && (*result)) {
- ulint diff_time = ut_time_ms() - start_time_ms;
- fprintf(stderr, "FTS Search Processing time: %ld secs:"
- " %ld millisec: row(s) %d \n",
- diff_time / 1000, diff_time % 1000,
- (*result)->rankings_by_id
- ? (int) rbt_size((*result)->rankings_by_id)
- : -1);
-
- /* Log memory consumption & result size */
- ib_logf(IB_LOG_LEVEL_INFO,
- "Full Search Memory: "
- "%lu (bytes), Row: %lu .",
- query.total_size,
- (*result)->rankings_by_id
- ? rbt_size((*result)->rankings_by_id)
- : 0);
- }
-
-func_exit:
- fts_query_free(&query);
-
- trx_free_for_background(query_trx);
-
- return(error);
-}
-
-/*****************************************************************//**
-FTS Query free result, returned by fts_query(). */
-
-void
-fts_query_free_result(
-/*==================*/
- fts_result_t* result) /*!< in: result instance to free.*/
-{
- if (result) {
- if (result->rankings_by_id != NULL) {
- rbt_free(result->rankings_by_id);
- result->rankings_by_id = NULL;
- }
- if (result->rankings_by_rank != NULL) {
- rbt_free(result->rankings_by_rank);
- result->rankings_by_rank = NULL;
- }
-
- ut_free(result);
- result = NULL;
- }
-}
-
-/*****************************************************************//**
-FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
-
-void
-fts_query_sort_result_on_rank(
-/*==========================*/
- fts_result_t* result) /*!< out: result instance to sort.*/
-{
- const ib_rbt_node_t* node;
- ib_rbt_t* ranked;
-
- ut_a(result->rankings_by_id != NULL);
- if (result->rankings_by_rank) {
- rbt_free(result->rankings_by_rank);
- }
-
- ranked = rbt_create(sizeof(fts_ranking_t), fts_query_compare_rank);
-
- /* We need to free any instances of fts_doc_freq_t that we
- may have allocated. */
- for (node = rbt_first(result->rankings_by_id);
- node;
- node = rbt_next(result->rankings_by_id, node)) {
-
- fts_ranking_t* ranking;
-
- ranking = rbt_value(fts_ranking_t, node);
-
- ut_a(ranking->words == NULL);
-
- rbt_insert(ranked, ranking, ranking);
- }
-
- /* Reset the current node too. */
- result->current = NULL;
- result->rankings_by_rank = ranked;
-}
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-A debug function to print result doc_id set. */
-static
-void
-fts_print_doc_id(
-/*=============*/
- fts_query_t* query) /*!< in : tree that stores doc_ids.*/
-{
- const ib_rbt_node_t* node;
-
- /* Iterate each member of the doc_id set */
- for (node = rbt_first(query->doc_ids);
- node;
- node = rbt_next(query->doc_ids, node)) {
- fts_ranking_t* ranking;
- ranking = rbt_value(fts_ranking_t, node);
-
- ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, doc_id: %ld \n",
- (ulint) ranking->doc_id);
-
- ulint pos = 0;
- fts_string_t word;
-
- while (fts_ranking_words_get_next(query, ranking, &pos, &word)) {
- ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, value: %s \n", word.f_str);
- }
- }
-}
-#endif
-
-/*************************************************************//**
-This function implements a simple "blind" query expansion search:
-words in documents found in the first search pass will be used as
-search arguments to search the document again, thus "expand"
-the search result set.
-@return DB_SUCCESS if success, otherwise the error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-fts_expand_query(
-/*=============*/
- dict_index_t* index, /*!< in: FTS index to search */
- fts_query_t* query) /*!< in: FTS query instance */
-{
- const ib_rbt_node_t* node;
- const ib_rbt_node_t* token_node;
- fts_doc_t result_doc;
- dberr_t error = DB_SUCCESS;
- const fts_index_cache_t*index_cache;
-
- /* If no doc is found in first search pass, return */
- if (!rbt_size(query->doc_ids)) {
- return(error);
- }
-
- /* Init "result_doc", to hold words from the first search pass */
- fts_doc_init(&result_doc);
-
- rw_lock_x_lock(&index->table->fts->cache->lock);
- index_cache = fts_find_index_cache(index->table->fts->cache, index);
- rw_lock_x_unlock(&index->table->fts->cache->lock);
-
- ut_a(index_cache);
-
- result_doc.tokens = rbt_create_arg_cmp(
- sizeof(fts_token_t), innobase_fts_text_cmp,
- (void *)index_cache->charset);
-
- result_doc.charset = index_cache->charset;
-
- query->total_size += SIZEOF_RBT_CREATE;
-#ifdef UNIV_DEBUG
- fts_print_doc_id(query);
-#endif
-
- for (node = rbt_first(query->doc_ids);
- node;
- node = rbt_next(query->doc_ids, node)) {
-
- fts_ranking_t* ranking;
- ulint pos;
- fts_string_t word;
- ulint prev_token_size;
- ulint estimate_size;
-
- prev_token_size = rbt_size(result_doc.tokens);
-
- ranking = rbt_value(fts_ranking_t, node);
-
- /* Fetch the documents with the doc_id from the
- result of first seach pass. Since we do not
- store document-to-word mapping, we need to
- fetch the original document and parse them.
- Future optimization could be done here if we
- support some forms of document-to-word mapping */
- fts_doc_fetch_by_doc_id(NULL, ranking->doc_id, index,
- FTS_FETCH_DOC_BY_ID_EQUAL,
- fts_query_expansion_fetch_doc,
- &result_doc);
-
- /* Remove words that have already been searched in the
- first pass */
- pos = 0;
- while (fts_ranking_words_get_next(query, ranking, &pos,
- &word)) {
- ibool ret;
-
- ret = rbt_delete(result_doc.tokens, &word);
-
- /* The word must exist in the doc we found */
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Did not "
- "find word %s in doc %ld for query "
- "expansion search.\n", word.f_str,
- (ulint) ranking->doc_id);
- }
- }
-
- /* Estimate memory used, see fts_process_token and fts_token_t.
- We ignore token size here. */
- estimate_size = (rbt_size(result_doc.tokens) - prev_token_size)
- * (SIZEOF_RBT_NODE_ADD + sizeof(fts_token_t)
- + sizeof(ib_vector_t) + sizeof(ulint) * 32);
- query->total_size += estimate_size;
-
- if (query->total_size > fts_result_cache_limit) {
- error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
- goto func_exit;
- }
- }
-
- /* Search the table the second time with expanded search list */
- for (token_node = rbt_first(result_doc.tokens);
- token_node;
- token_node = rbt_next(result_doc.tokens, token_node)) {
- fts_token_t* mytoken;
- mytoken = rbt_value(fts_token_t, token_node);
-
- ut_ad(mytoken->text.f_str[mytoken->text.f_len] == 0);
- fts_query_add_word_freq(query, &mytoken->text);
- error = fts_query_union(query, &mytoken->text);
-
- if (error != DB_SUCCESS) {
- break;
- }
- }
-
-func_exit:
- fts_doc_free(&result_doc);
-
- return(error);
-}
-/*************************************************************//**
-This function finds documents that contain all words in a
-phrase or proximity search. And if proximity search, verify
-the words are close enough to each other, as in specified distance.
-This function is called for phrase and proximity search.
-@return TRUE if documents are found, FALSE if otherwise */
-static
-ibool
-fts_phrase_or_proximity_search(
-/*===========================*/
- fts_query_t* query, /*!< in/out: query instance.
- query->doc_ids might be instantiated
- with qualified doc IDs */
- ib_vector_t* tokens) /*!< in: Tokens contain words */
-{
- ulint n_matched;
- ulint i;
- ibool matched = FALSE;
- ulint num_token = ib_vector_size(tokens);
- fts_match_t* match[MAX_PROXIMITY_ITEM];
- ibool end_list = FALSE;
-
- /* Number of matched documents for the first token */
- n_matched = ib_vector_size(query->match_array[0]);
-
- /* We have a set of match list for each word, we shall
- walk through the list and find common documents that
- contain all the matching words. */
- for (i = 0; i < n_matched; i++) {
- ulint j;
- ulint k = 0;
- fts_proximity_t qualified_pos;
-
- match[0] = static_cast<fts_match_t*>(
- ib_vector_get(query->match_array[0], i));
-
- /* For remaining match list for the token(word), we
- try to see if there is a document with the same
- doc id */
- for (j = 1; j < num_token; j++) {
- match[j] = static_cast<fts_match_t*>(
- ib_vector_get(query->match_array[j], k));
-
- while (match[j]->doc_id < match[0]->doc_id
- && k < ib_vector_size(query->match_array[j])) {
- match[j] = static_cast<fts_match_t*>(
- ib_vector_get(
- query->match_array[j], k));
- k++;
- }
-
- if (match[j]->doc_id > match[0]->doc_id) {
- /* no match */
- if (query->flags & FTS_PHRASE) {
- match[0]->doc_id = 0;
- }
- break;
- }
-
- if (k == ib_vector_size(query->match_array[j])) {
- end_list = TRUE;
-
- if (match[j]->doc_id != match[0]->doc_id) {
- /* no match */
- if (query->flags & FTS_PHRASE) {
- ulint s;
-
- match[0]->doc_id = 0;
-
- for (s = i + 1; s < n_matched;
- s++) {
- match[0] = static_cast<
- fts_match_t*>(
- ib_vector_get(
- query->match_array[0],
- s));
- match[0]->doc_id = 0;
- }
- }
-
- goto func_exit;
- }
- }
-
- /* FIXME: A better solution will be a counter array
- remember each run's last position. So we don't
- reset it here very time */
- k = 0;
- }
-
- if (j != num_token) {
- continue;
- }
-
- /* For this matching doc, we need to further
- verify whether the words in the doc are close
- to each other, and within the distance specified
- in the proximity search */
- if (query->flags & FTS_PHRASE) {
- matched = TRUE;
- } else if (fts_proximity_get_positions(
- match, num_token, ULINT_MAX, &qualified_pos)) {
-
- /* Fetch the original documents and count the
- words in between matching words to see that is in
- specified distance */
- if (fts_query_is_in_proximity_range(
- query, match, &qualified_pos)) {
- /* If so, mark we find a matching doc */
- query->error = fts_query_process_doc_id(
- query, match[0]->doc_id, 0);
- if (query->error != DB_SUCCESS) {
- matched = FALSE;
- goto func_exit;
- }
-
- matched = TRUE;
- for (ulint z = 0; z < num_token; z++) {
- fts_string_t* token;
- token = static_cast<fts_string_t*>(
- ib_vector_get(tokens, z));
- fts_query_add_word_to_document(
- query, match[0]->doc_id, token);
- }
- }
- }
-
- if (end_list) {
- break;
- }
- }
-
-func_exit:
- return(matched);
-}
-
-/*************************************************************//**
-This function checks whether words in result documents are close to
-each other (within proximity range as specified by "distance").
-If "distance" is MAX_ULINT, then it will find all combinations of
-positions of matching words and store min and max positions
-in the "qualified_pos" for later verification.
-@return true if words are close to each other, false if otherwise */
-static
-bool
-fts_proximity_get_positions(
-/*========================*/
- fts_match_t** match, /*!< in: query instance */
- ulint num_match, /*!< in: number of matching
- items */
- ulint distance, /*!< in: distance value
- for proximity search */
- fts_proximity_t* qualified_pos) /*!< out: the position info
- records ranges containing
- all matching words. */
-{
- ulint i;
- ulint idx[MAX_PROXIMITY_ITEM];
- ulint num_pos[MAX_PROXIMITY_ITEM];
- ulint min_idx;
-
- qualified_pos->n_pos = 0;
-
- ut_a(num_match <= MAX_PROXIMITY_ITEM);
-
- /* Each word could appear multiple times in a doc. So
- we need to walk through each word's position list, and find
- closest distance between different words to see if
- they are in the proximity distance. */
-
- /* Assume each word's position list is sorted, we
- will just do a walk through to all words' lists
- similar to a the merge phase of a merge sort */
- for (i = 0; i < num_match; i++) {
- /* idx is the current position we are checking
- for a particular word */
- idx[i] = 0;
-
- /* Number of positions for this word */
- num_pos[i] = ib_vector_size(match[i]->positions);
- }
-
- /* Start with the first word */
- min_idx = 0;
-
- while (idx[min_idx] < num_pos[min_idx]) {
- ulint position[MAX_PROXIMITY_ITEM];
- ulint min_pos = ULINT_MAX;
- ulint max_pos = 0;
-
- /* Check positions in each word position list, and
- record the max/min position */
- for (i = 0; i < num_match; i++) {
- position[i] = *(ulint*) ib_vector_get_const(
- match[i]->positions, idx[i]);
-
- if (position[i] == ULINT_UNDEFINED) {
- break;
- }
-
- if (position[i] < min_pos) {
- min_pos = position[i];
- min_idx = i;
- }
-
- if (position[i] > max_pos) {
- max_pos = position[i];
- }
- }
-
- /* If max and min position are within range, we
- find a good match */
- if (max_pos - min_pos <= distance
- && (i >= num_match || position[i] != ULINT_UNDEFINED)) {
- /* The charset has variable character
- length encoding, record the min_pos and
- max_pos, we will need to verify the actual
- number of characters */
- qualified_pos->min_pos.push_back(min_pos);
- qualified_pos->max_pos.push_back(max_pos);
- qualified_pos->n_pos++;
- }
-
- /* Otherwise, move to the next position is the
- list for the word with the smallest position */
- idx[min_idx]++;
- }
-
- return(qualified_pos->n_pos != 0);
-}
diff --git a/storage/xtradb/fts/fts0sql.cc b/storage/xtradb/fts/fts0sql.cc
deleted file mode 100644
index cb8eff3cacc..00000000000
--- a/storage/xtradb/fts/fts0sql.cc
+++ /dev/null
@@ -1,363 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file fts/fts0sql.cc
-Full Text Search functionality.
-
-Created 2007-03-27 Sunny Bains
-*******************************************************/
-
-#include "que0que.h"
-#include "trx0roll.h"
-#include "pars0pars.h"
-#include "dict0dict.h"
-#include "fts0types.h"
-#include "fts0priv.h"
-
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
-
-/** SQL statements for creating the ancillary FTS tables. %s must be replaced
-with the indexed table's id. */
-
-/** Preamble to all SQL statements. */
-static const char* fts_sql_begin=
- "PROCEDURE P() IS\n";
-
-/** Postamble to non-committing SQL statements. */
-static const char* fts_sql_end=
- "\n"
- "END;\n";
-
-/******************************************************************//**
-Get the table id.
-@return number of bytes written */
-UNIV_INTERN
-int
-fts_get_table_id(
-/*=============*/
- const fts_table_t*
- fts_table, /*!< in: FTS Auxiliary table */
- char* table_id) /*!< out: table id, must be at least
- FTS_AUX_MIN_TABLE_ID_LENGTH bytes
- long */
-{
- int len;
- bool hex_name = DICT_TF2_FLAG_IS_SET(fts_table->table,
- DICT_TF2_FTS_AUX_HEX_NAME);
-
- ut_a(fts_table->table != NULL);
-
- switch (fts_table->type) {
- case FTS_COMMON_TABLE:
- len = fts_write_object_id(fts_table->table_id, table_id,
- hex_name);
- break;
-
- case FTS_INDEX_TABLE:
-
- len = fts_write_object_id(fts_table->table_id, table_id,
- hex_name);
-
- table_id[len] = '_';
- ++len;
- table_id += len;
-
- len += fts_write_object_id(fts_table->index_id, table_id,
- hex_name);
- break;
-
- default:
- ut_error;
- }
-
- ut_a(len >= 16);
- ut_a(len < FTS_AUX_MIN_TABLE_ID_LENGTH);
-
- return(len);
-}
-
-/******************************************************************//**
-Construct the prefix name of an FTS table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
-char*
-fts_get_table_name_prefix(
-/*======================*/
- const fts_table_t*
- fts_table) /*!< in: Auxiliary table type */
-{
- int len;
- const char* slash;
- char* prefix_name;
- int dbname_len = 0;
- int prefix_name_len;
- char table_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
-
- slash = static_cast<const char*>(
- memchr(fts_table->parent, '/', strlen(fts_table->parent)));
-
- if (slash) {
- /* Print up to and including the separator. */
- dbname_len = static_cast<int>(slash - fts_table->parent) + 1;
- }
-
- len = fts_get_table_id(fts_table, table_id);
-
- prefix_name_len = dbname_len + 4 + len + 1;
-
- prefix_name = static_cast<char*>(mem_alloc(prefix_name_len));
-
- len = sprintf(prefix_name, "%.*sFTS_%s",
- dbname_len, fts_table->parent, table_id);
-
- ut_a(len > 0);
- ut_a(len == prefix_name_len - 1);
-
- return(prefix_name);
-}
-
-/******************************************************************//**
-Construct the name of an ancillary FTS table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
-char*
-fts_get_table_name(
-/*===============*/
- const fts_table_t* fts_table)
- /*!< in: Auxiliary table type */
-{
- int len;
- char* name;
- int name_len;
- char* prefix_name;
-
- prefix_name = fts_get_table_name_prefix(fts_table);
-
- name_len = static_cast<int>(
- strlen(prefix_name) + 1 + strlen(fts_table->suffix) + 1);
-
- name = static_cast<char*>(mem_alloc(name_len));
-
- len = sprintf(name, "%s_%s", prefix_name, fts_table->suffix);
-
- ut_a(len > 0);
- ut_a(len == name_len - 1);
-
- mem_free(prefix_name);
-
- return(name);
-}
-
-/******************************************************************//**
-Parse an SQL string. %s is replaced with the table's id.
-@return query graph */
-UNIV_INTERN
-que_t*
-fts_parse_sql(
-/*==========*/
- fts_table_t* fts_table, /*!< in: FTS auxiliarry table info */
- pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql) /*!< in: SQL string to evaluate */
-{
- char* str;
- que_t* graph;
- char* str_tmp;
- ibool dict_locked;
-
- if (fts_table != NULL) {
- char* table_name;
-
- table_name = fts_get_table_name(fts_table);
- str_tmp = ut_strreplace(sql, "%s", table_name);
- mem_free(table_name);
- } else {
- ulint sql_len = strlen(sql) + 1;
-
- str_tmp = static_cast<char*>(mem_alloc(sql_len));
- strcpy(str_tmp, sql);
- }
-
- str = ut_str3cat(fts_sql_begin, str_tmp, fts_sql_end);
- mem_free(str_tmp);
-
- dict_locked = (fts_table && fts_table->table->fts
- && (fts_table->table->fts->fts_status
- & TABLE_DICT_LOCKED));
-
- if (!dict_locked) {
- ut_ad(!mutex_own(&(dict_sys->mutex)));
-
- /* The InnoDB SQL parser is not re-entrant. */
- mutex_enter(&dict_sys->mutex);
- }
-
- graph = pars_sql(info, str);
- ut_a(graph);
-
- if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
- }
-
- mem_free(str);
-
- return(graph);
-}
-
-/******************************************************************//**
-Parse an SQL string. %s is replaced with the table's id.
-@return query graph */
-UNIV_INTERN
-que_t*
-fts_parse_sql_no_dict_lock(
-/*=======================*/
- fts_table_t* fts_table, /*!< in: FTS aux table info */
- pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql) /*!< in: SQL string to evaluate */
-{
- char* str;
- que_t* graph;
- char* str_tmp = NULL;
-
-#ifdef UNIV_DEBUG
- ut_ad(mutex_own(&dict_sys->mutex));
-#endif
-
- if (fts_table != NULL) {
- char* table_name;
-
- table_name = fts_get_table_name(fts_table);
- str_tmp = ut_strreplace(sql, "%s", table_name);
- mem_free(table_name);
- }
-
- if (str_tmp != NULL) {
- str = ut_str3cat(fts_sql_begin, str_tmp, fts_sql_end);
- mem_free(str_tmp);
- } else {
- str = ut_str3cat(fts_sql_begin, sql, fts_sql_end);
- }
-
- //fprintf(stderr, "%s\n", str);
-
- graph = pars_sql(info, str);
- ut_a(graph);
-
- mem_free(str);
-
- return(graph);
-}
-
-/******************************************************************//**
-Evaluate an SQL query graph.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_eval_sql(
-/*=========*/
- trx_t* trx, /*!< in: transaction */
- que_t* graph) /*!< in: Query graph to evaluate */
-{
- que_thr_t* thr;
-
- graph->trx = trx;
- graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
-
- ut_a(thr = que_fork_start_command(graph));
-
- que_run_threads(thr);
-
- return(trx->error_state);
-}
-
-/******************************************************************//**
-Construct the column specification part of the SQL string for selecting the
-indexed FTS columns for the given table. Adds the necessary bound
-ids to the given 'info' and returns the SQL string. Examples:
-
-One indexed column named "text":
-
- "$sel0",
- info/ids: sel0 -> "text"
-
-Two indexed columns named "subject" and "content":
-
- "$sel0, $sel1",
- info/ids: sel0 -> "subject", sel1 -> "content",
-@return heap-allocated WHERE string */
-UNIV_INTERN
-const char*
-fts_get_select_columns_str(
-/*=======================*/
- dict_index_t* index, /*!< in: index */
- pars_info_t* info, /*!< in/out: parser info */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint i;
- const char* str = "";
-
- for (i = 0; i < index->n_user_defined_cols; i++) {
- char* sel_str;
-
- dict_field_t* field = dict_index_get_nth_field(index, i);
-
- sel_str = mem_heap_printf(heap, "sel%lu", (ulong) i);
-
- /* Set copy_name to TRUE since it's dynamic. */
- pars_info_bind_id(info, TRUE, sel_str, field->name);
-
- str = mem_heap_printf(
- heap, "%s%s$%s", str, (*str) ? ", " : "", sel_str);
- }
-
- return(str);
-}
-
-/******************************************************************//**
-Commit a transaction.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_sql_commit(
-/*===========*/
- trx_t* trx) /*!< in: transaction */
-{
- dberr_t error;
-
- error = trx_commit_for_mysql(trx);
-
- /* Commit should always succeed */
- ut_a(error == DB_SUCCESS);
-
- return(DB_SUCCESS);
-}
-
-/******************************************************************//**
-Rollback a transaction.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_sql_rollback(
-/*=============*/
- trx_t* trx) /*!< in: transaction */
-{
- return(trx_rollback_to_savepoint(trx, NULL));
-}
diff --git a/storage/xtradb/fts/fts0tlex.cc b/storage/xtradb/fts/fts0tlex.cc
deleted file mode 100644
index d4d9b4c48d1..00000000000
--- a/storage/xtradb/fts/fts0tlex.cc
+++ /dev/null
@@ -1,1952 +0,0 @@
-#include "univ.i"
-#line 2 "fts0tlex.cc"
-
-#line 4 "fts0tlex.cc"
-
-#define YY_INT_ALIGNED short int
-
-/* A lexical scanner generated by flex */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
-#if YY_FLEX_SUBMINOR_VERSION > 0
-#define FLEX_BETA
-#endif
-
-/* First, we deal with platform-specific or compiler-specific issues. */
-
-/* begin standard C headers. */
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/* end standard C headers. */
-
-/* flex integer type definitions */
-
-#ifndef FLEXINT_H
-#define FLEXINT_H
-
-/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-
-/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
- */
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS 1
-#endif
-
-#include <inttypes.h>
-typedef int8_t flex_int8_t;
-typedef uint8_t flex_uint8_t;
-typedef int16_t flex_int16_t;
-typedef uint16_t flex_uint16_t;
-typedef int32_t flex_int32_t;
-typedef uint32_t flex_uint32_t;
-#else
-typedef signed char flex_int8_t;
-typedef short int flex_int16_t;
-typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
-typedef unsigned short int flex_uint16_t;
-typedef unsigned int flex_uint32_t;
-
-/* Limits of integral types. */
-#ifndef INT8_MIN
-#define INT8_MIN (-128)
-#endif
-#ifndef INT16_MIN
-#define INT16_MIN (-32767-1)
-#endif
-#ifndef INT32_MIN
-#define INT32_MIN (-2147483647-1)
-#endif
-#ifndef INT8_MAX
-#define INT8_MAX (127)
-#endif
-#ifndef INT16_MAX
-#define INT16_MAX (32767)
-#endif
-#ifndef INT32_MAX
-#define INT32_MAX (2147483647)
-#endif
-#ifndef UINT8_MAX
-#define UINT8_MAX (255U)
-#endif
-#ifndef UINT16_MAX
-#define UINT16_MAX (65535U)
-#endif
-#ifndef UINT32_MAX
-#define UINT32_MAX (4294967295U)
-#endif
-
-#endif /* ! C99 */
-
-#endif /* ! FLEXINT_H */
-
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else /* ! __cplusplus */
-
-/* C99 requires __STDC__ to be defined as 1. */
-#if defined (__STDC__)
-
-#define YY_USE_CONST
-
-#endif /* defined (__STDC__) */
-#endif /* ! __cplusplus */
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-/* Returned upon end-of-file. */
-#define YY_NULL 0
-
-/* Promotes a possibly negative, possibly signed char to an unsigned
- * integer for use as an array index. If the signed char is negative,
- * we want to instead treat it as an 8-bit unsigned char, hence the
- * double cast.
- */
-#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
-
-/* An opaque pointer. */
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
-typedef void* yyscan_t;
-#endif
-
-/* For convenience, these vars (plus the bison vars far below)
- are macros in the reentrant scanner. */
-#define yyin yyg->yyin_r
-#define yyout yyg->yyout_r
-#define yyextra yyg->yyextra_r
-#define yyleng yyg->yyleng_r
-#define yytext yyg->yytext_r
-#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
-#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
-#define yy_flex_debug yyg->yy_flex_debug_r
-
-/* Enter a start condition. This macro really ought to take a parameter,
- * but we do it the disgusting crufty way forced on us by the ()-less
- * definition of BEGIN.
- */
-#define BEGIN yyg->yy_start = 1 + 2 *
-
-/* Translate the current start state into a value that can be later handed
- * to BEGIN to return to the state. The YYSTATE alias is for lex
- * compatibility.
- */
-#define YY_START ((yyg->yy_start - 1) / 2)
-#define YYSTATE YY_START
-
-/* Action number for EOF rule of a given start state. */
-#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
-
-/* Special action meaning "start processing a new file". */
-#define YY_NEW_FILE fts0trestart(yyin ,yyscanner )
-
-#define YY_END_OF_BUFFER_CHAR 0
-
-/* Size of default input buffer. */
-#ifndef YY_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k.
- * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
- * Ditto for the __ia64__ case accordingly.
- */
-#define YY_BUF_SIZE 32768
-#else
-#define YY_BUF_SIZE 16384
-#endif /* __ia64__ */
-#endif
-
-/* The state buf must be large enough to hold one state per character in the main buffer.
- */
-#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
-
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-
-#define EOB_ACT_CONTINUE_SCAN 0
-#define EOB_ACT_END_OF_FILE 1
-#define EOB_ACT_LAST_MATCH 2
-
- #define YY_LESS_LINENO(n)
-
-/* Return all but the first "n" matched characters back to the input stream. */
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- *yy_cp = yyg->yy_hold_char; \
- YY_RESTORE_YY_MORE_OFFSET \
- yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
- YY_DO_BEFORE_ACTION; /* set up yytext again */ \
- } \
- while ( 0 )
-
-#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner )
-
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
-#ifndef YY_STRUCT_YY_BUFFER_STATE
-#define YY_STRUCT_YY_BUFFER_STATE
-struct yy_buffer_state
- {
- FILE *yy_input_file;
-
- char *yy_ch_buf; /* input buffer */
- char *yy_buf_pos; /* current position in input buffer */
-
- /* Size of input buffer in bytes, not including room for EOB
- * characters.
- */
- yy_size_t yy_buf_size;
-
- /* Number of characters read into yy_ch_buf, not including EOB
- * characters.
- */
- int yy_n_chars;
-
- /* Whether we "own" the buffer - i.e., we know we created it,
- * and can realloc() it to grow it, and should free() it to
- * delete it.
- */
- int yy_is_our_buffer;
-
- /* Whether this is an "interactive" input source; if so, and
- * if we're using stdio for input, then we want to use getc()
- * instead of fread(), to make sure we stop fetching input after
- * each newline.
- */
- int yy_is_interactive;
-
- /* Whether we're considered to be at the beginning of a line.
- * If so, '^' rules will be active on the next match, otherwise
- * not.
- */
- int yy_at_bol;
-
- int yy_bs_lineno; /**< The line count. */
- int yy_bs_column; /**< The column count. */
-
- /* Whether to try to fill the input buffer when we reach the
- * end of it.
- */
- int yy_fill_buffer;
-
- int yy_buffer_status;
-
-#define YY_BUFFER_NEW 0
-#define YY_BUFFER_NORMAL 1
- /* When an EOF's been seen but there's still some text to process
- * then we mark the buffer as YY_EOF_PENDING, to indicate that we
- * shouldn't try reading from the input source any more. We might
- * still have a bunch of tokens to match, though, because of
- * possible backing-up.
- *
- * When we actually see the EOF, we change the status to "new"
- * (via fts0trestart()), so that the user can continue scanning by
- * just pointing yyin at a new input file.
- */
-#define YY_BUFFER_EOF_PENDING 2
-
- };
-#endif /* !YY_STRUCT_YY_BUFFER_STATE */
-
-/* We provide macros for accessing buffer states in case in the
- * future we want to put the buffer states in a more general
- * "scanner state".
- *
- * Returns the top of the stack, or NULL.
- */
-#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \
- ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \
- : NULL)
-
-/* Same as previous macro, but useful when we know that the buffer stack is not
- * NULL or when we need an lvalue. For internal use only.
- */
-#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top]
-
-void fts0trestart (FILE *input_file ,yyscan_t yyscanner );
-void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0t_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
-void fts0t_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
-void fts0t_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
-void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
-void fts0tpop_buffer_state (yyscan_t yyscanner );
-
-static void fts0tensure_buffer_stack (yyscan_t yyscanner );
-static void fts0t_load_buffer_state (yyscan_t yyscanner );
-static void fts0t_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner );
-
-#define YY_FLUSH_BUFFER fts0t_flush_buffer(YY_CURRENT_BUFFER ,yyscanner)
-
-YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
-
-void *fts0talloc (yy_size_t , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) );
-void *fts0trealloc (void *,yy_size_t , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) );
-void fts0tfree (void * , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) );
-
-#define yy_new_buffer fts0t_create_buffer
-
-#define yy_set_interactive(is_interactive) \
- { \
- if ( ! YY_CURRENT_BUFFER ){ \
- fts0tensure_buffer_stack (yyscanner); \
- YY_CURRENT_BUFFER_LVALUE = \
- fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
- }
-
-#define yy_set_bol(at_bol) \
- { \
- if ( ! YY_CURRENT_BUFFER ){\
- fts0tensure_buffer_stack (yyscanner); \
- YY_CURRENT_BUFFER_LVALUE = \
- fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
- }
-
-#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
-
-/* Begin user sect3 */
-
-#define fts0twrap(n) 1
-#define YY_SKIP_YYWRAP
-
-typedef unsigned char YY_CHAR;
-
-typedef int yy_state_type;
-
-#define yytext_ptr yytext_r
-
-static yy_state_type yy_get_previous_state (yyscan_t yyscanner );
-static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner);
-static int yy_get_next_buffer (yyscan_t yyscanner );
-static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) );
-
-/* Done after the current pattern has been matched and before the
- * corresponding action - sets up yytext.
- */
-#define YY_DO_BEFORE_ACTION \
- yyg->yytext_ptr = yy_bp; \
- yyleng = static_cast<int>(yy_cp - yy_bp); \
- yyg->yy_hold_char = *yy_cp; \
- *yy_cp = '\0'; \
- yyg->yy_c_buf_p = yy_cp;
-
-#define YY_NUM_RULES 7
-#define YY_END_OF_BUFFER 8
-/* This struct is not used in this scanner,
- but its presence is necessary. */
-struct yy_trans_info
- {
- flex_int32_t yy_verify;
- flex_int32_t yy_nxt;
- };
-static yyconst flex_int16_t yy_accept[17] =
- { 0,
- 4, 4, 8, 4, 1, 6, 1, 5, 5, 2,
- 4, 1, 1, 0, 3, 0
- } ;
-
-static yyconst flex_int32_t yy_ec[256] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 4, 1, 5, 1, 1, 6, 1, 1, 1,
- 1, 7, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1
- } ;
-
-static yyconst flex_int32_t yy_meta[8] =
- { 0,
- 1, 2, 3, 4, 5, 5, 1
- } ;
-
-static yyconst flex_int16_t yy_base[20] =
- { 0,
- 0, 0, 18, 0, 6, 21, 0, 9, 21, 0,
- 0, 0, 0, 4, 21, 21, 10, 11, 15
- } ;
-
-static yyconst flex_int16_t yy_def[20] =
- { 0,
- 16, 1, 16, 17, 17, 16, 18, 19, 16, 17,
- 17, 5, 18, 19, 16, 0, 16, 16, 16
- } ;
-
-static yyconst flex_int16_t yy_nxt[29] =
- { 0,
- 4, 5, 6, 7, 8, 9, 10, 12, 15, 13,
- 11, 11, 13, 15, 13, 14, 14, 16, 14, 14,
- 3, 16, 16, 16, 16, 16, 16, 16
- } ;
-
-static yyconst flex_int16_t yy_chk[29] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 5, 14, 5,
- 17, 17, 18, 8, 18, 19, 19, 3, 19, 19,
- 16, 16, 16, 16, 16, 16, 16, 16
- } ;
-
-/* The intent behind this definition is that it'll catch
- * any uses of REJECT which flex missed.
- */
-#define REJECT reject_used_but_not_detected
-#define yymore() yymore_used_but_not_detected
-#define YY_MORE_ADJ 0
-#define YY_RESTORE_YY_MORE_OFFSET
-#line 1 "fts0tlex.l"
-/*****************************************************************************
-
-Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-/**
- * @file fts/fts0tlex.l
- * FTS parser lexical analyzer
- *
- * Created 2007/5/9 Sunny Bains
- */
-#line 27 "fts0tlex.l"
-
-#include "fts0ast.h"
-#include "fts0pars.h"
-
-/* Required for reentrant parser */
-#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner)
-
-#define YY_NO_INPUT 1
-#line 480 "fts0tlex.cc"
-
-#define INITIAL 0
-
-#ifndef YY_NO_UNISTD_H
-/* Special case for "unistd.h", since it is non-ANSI. We include it way
- * down here because we want the user's section 1 to have been scanned first.
- * The user has a chance to override it with an option.
- */
-#include <unistd.h>
-#endif
-
-#ifndef YY_EXTRA_TYPE
-#define YY_EXTRA_TYPE void *
-#endif
-
-/* Holds the entire state of the reentrant scanner. */
-struct yyguts_t
- {
-
- /* User-defined. Not touched by flex. */
- YY_EXTRA_TYPE yyextra_r;
-
- /* The rest are the same as the globals declared in the non-reentrant scanner. */
- FILE *yyin_r, *yyout_r;
- size_t yy_buffer_stack_top; /**< index of top of stack. */
- size_t yy_buffer_stack_max; /**< capacity of stack. */
- YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */
- char yy_hold_char;
- int yy_n_chars;
- int yyleng_r;
- char *yy_c_buf_p;
- int yy_init;
- int yy_start;
- int yy_did_buffer_switch_on_eof;
- int yy_start_stack_ptr;
- int yy_start_stack_depth;
- int *yy_start_stack;
- yy_state_type yy_last_accepting_state;
- char* yy_last_accepting_cpos;
-
- int yylineno_r;
- int yy_flex_debug_r;
-
- char *yytext_r;
- int yy_more_flag;
- int yy_more_len;
-
- }; /* end struct yyguts_t */
-
-static int yy_init_globals (yyscan_t yyscanner );
-
-int fts0tlex_init (yyscan_t* scanner);
-
-int fts0tlex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
-
-/* Accessor methods to globals.
- These are made visible to non-reentrant scanners for convenience. */
-
-int fts0tlex_destroy (yyscan_t yyscanner );
-
-int fts0tget_debug (yyscan_t yyscanner );
-
-void fts0tset_debug (int debug_flag ,yyscan_t yyscanner );
-
-YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner );
-
-void fts0tset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
-
-FILE *fts0tget_in (yyscan_t yyscanner );
-
-void fts0tset_in (FILE * in_str ,yyscan_t yyscanner );
-
-FILE *fts0tget_out (yyscan_t yyscanner );
-
-void fts0tset_out (FILE * out_str ,yyscan_t yyscanner );
-
-int fts0tget_leng (yyscan_t yyscanner );
-
-char *fts0tget_text (yyscan_t yyscanner );
-
-int fts0tget_lineno (yyscan_t yyscanner );
-
-void fts0tset_lineno (int line_number ,yyscan_t yyscanner );
-
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
-
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int fts0twrap (yyscan_t yyscanner );
-#else
-extern int fts0twrap (yyscan_t yyscanner );
-#endif
-#endif
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)));
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)));
-#endif
-
-#ifndef YY_NO_INPUT
-
-#ifdef __cplusplus
-static int yyinput (yyscan_t yyscanner );
-#else
-static int input (yyscan_t yyscanner );
-#endif
-
-#endif
-
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k */
-#define YY_READ_BUF_SIZE 16384
-#else
-#define YY_READ_BUF_SIZE 8192
-#endif /* __ia64__ */
-#endif
-
-/* Copy whatever the last rule matched to the standard output. */
-#ifndef ECHO
-/* This used to be an fputs(), but since the string might contain NUL's,
- * we now use fwrite().
- */
-#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
-#endif
-
-/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
- * is returned in "result".
- */
-#ifndef YY_INPUT
-#define YY_INPUT(buf,result,max_size) \
- if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
- { \
- int c = '*'; \
- int n; \
- for ( n = 0; n < static_cast<int>(max_size) && \
- (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
- buf[n] = (char) c; \
- if ( c == '\n' ) \
- buf[n++] = (char) c; \
- if ( c == EOF && ferror( yyin ) ) \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- result = n; \
- } \
- else \
- { \
- errno=0; \
- while ( (result = static_cast<int>(fread(buf, 1, max_size, yyin)))==0 \
- && ferror(yyin)) \
- { \
- if( errno != EINTR) \
- { \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- break; \
- } \
- errno=0; \
- clearerr(yyin); \
- } \
- }\
-\
-
-#endif
-
-/* No semi-colon after return; correct usage is to write "yyterminate();" -
- * we don't want an extra ';' after the "return" because that will cause
- * some compilers to complain about unreachable statements.
- */
-#ifndef yyterminate
-#define yyterminate() return YY_NULL
-#endif
-
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
-
-/* Report a fatal error. */
-#ifndef YY_FATAL_ERROR
-#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner)
-#endif
-
-/* end tables serialization structures and prototypes */
-
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL_IS_OURS 1
-
-extern int fts0tlex (yyscan_t yyscanner);
-
-#define YY_DECL int fts0tlex (yyscan_t yyscanner)
-#endif /* !YY_DECL */
-
-/* Code executed at the beginning of each rule, after yytext and yyleng
- * have been set up.
- */
-#ifndef YY_USER_ACTION
-#define YY_USER_ACTION
-#endif
-
-/* Code executed at the end of each rule. */
-#ifndef YY_BREAK
-#define YY_BREAK break;
-#endif
-
-#define YY_RULE_SETUP \
- YY_USER_ACTION
-
-/** The main scanner function which does all the work.
- */
-YY_DECL
-{
- register yy_state_type yy_current_state;
- register char *yy_cp, *yy_bp;
- register int yy_act;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
-#line 44 "fts0tlex.l"
-
-
-#line 707 "fts0tlex.cc"
-
- if ( !yyg->yy_init )
- {
- yyg->yy_init = 1;
-
-#ifdef YY_USER_INIT
- YY_USER_INIT;
-#endif
-
- if ( ! yyg->yy_start )
- yyg->yy_start = 1; /* first start state */
-
- if ( ! yyin )
- yyin = stdin;
-
- if ( ! yyout )
- yyout = stdout;
-
- if ( ! YY_CURRENT_BUFFER ) {
- fts0tensure_buffer_stack (yyscanner);
- YY_CURRENT_BUFFER_LVALUE =
- fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner);
- }
-
- fts0t_load_buffer_state(yyscanner );
- }
-
- while ( 1 ) /* loops until end-of-file is reached */
- {
- yy_cp = yyg->yy_c_buf_p;
-
- /* Support of yytext. */
- *yy_cp = yyg->yy_hold_char;
-
- /* yy_bp points to the position in yy_ch_buf of the start of
- * the current run.
- */
- yy_bp = yy_cp;
-
- yy_current_state = yyg->yy_start;
-yy_match:
- do
- {
- register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
- if ( yy_accept[yy_current_state] )
- {
- yyg->yy_last_accepting_state = yy_current_state;
- yyg->yy_last_accepting_cpos = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 17 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- ++yy_cp;
- }
- while ( yy_current_state != 16 );
- yy_cp = yyg->yy_last_accepting_cpos;
- yy_current_state = yyg->yy_last_accepting_state;
-
-yy_find_action:
- yy_act = yy_accept[yy_current_state];
-
- YY_DO_BEFORE_ACTION;
-
-do_action: /* This label is used only to access EOF actions. */
-
- switch ( yy_act )
- { /* beginning of action switch */
- case 0: /* must back up */
- /* undo the effects of YY_DO_BEFORE_ACTION */
- *yy_cp = yyg->yy_hold_char;
- yy_cp = yyg->yy_last_accepting_cpos;
- yy_current_state = yyg->yy_last_accepting_state;
- goto yy_find_action;
-
-case 1:
-YY_RULE_SETUP
-#line 46 "fts0tlex.l"
-/* Ignore whitespace */ ;
- YY_BREAK
-case 2:
-YY_RULE_SETUP
-#line 48 "fts0tlex.l"
-{
- val->oper = fts0tget_text(yyscanner)[0];
-
- return(val->oper);
-}
- YY_BREAK
-case 3:
-YY_RULE_SETUP
-#line 54 "fts0tlex.l"
-{
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
-
- return(FTS_TEXT);
-}
- YY_BREAK
-case 4:
-YY_RULE_SETUP
-#line 60 "fts0tlex.l"
-{
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
-
- return(FTS_TERM);
-}
- YY_BREAK
-case 5:
-YY_RULE_SETUP
-#line 65 "fts0tlex.l"
-;
- YY_BREAK
-case 6:
-/* rule 6 can match eol */
-YY_RULE_SETUP
-#line 66 "fts0tlex.l"
-
- YY_BREAK
-case 7:
-YY_RULE_SETUP
-#line 68 "fts0tlex.l"
-ECHO;
- YY_BREAK
-#line 834 "fts0tlex.cc"
-case YY_STATE_EOF(INITIAL):
- yyterminate();
-
- case YY_END_OF_BUFFER:
- {
- /* Amount of text matched not including the EOB char. */
- int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1;
-
- /* Undo the effects of YY_DO_BEFORE_ACTION. */
- *yy_cp = yyg->yy_hold_char;
- YY_RESTORE_YY_MORE_OFFSET
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
- {
- /* We're scanning a new file or input source. It's
- * possible that this happened because the user
- * just pointed yyin at a new source and called
- * fts0tlex(). If so, then we have to assure
- * consistency between YY_CURRENT_BUFFER and our
- * globals. Here is the right place to do so, because
- * this is the first action (other than possibly a
- * back-up) that will match for the new input source.
- */
- yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
- }
-
- /* Note that here we test for yy_c_buf_p "<=" to the position
- * of the first EOB in the buffer, since yy_c_buf_p will
- * already have been incremented past the NUL character
- * (since all states make transitions on EOB to the
- * end-of-buffer state). Contrast this with the test
- * in input().
- */
- if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
- { /* This was really a NUL. */
- yy_state_type yy_next_state;
-
- yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( yyscanner );
-
- /* Okay, we're now positioned to make the NUL
- * transition. We couldn't have
- * yy_get_previous_state() go ahead and do it
- * for us because it doesn't know how to deal
- * with the possibility of jamming (and we don't
- * want to build jamming into it because then it
- * will run more slowly).
- */
-
- yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner);
-
- yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
-
- if ( yy_next_state )
- {
- /* Consume the NUL. */
- yy_cp = ++yyg->yy_c_buf_p;
- yy_current_state = yy_next_state;
- goto yy_match;
- }
-
- else
- {
- yy_cp = yyg->yy_last_accepting_cpos;
- yy_current_state = yyg->yy_last_accepting_state;
- goto yy_find_action;
- }
- }
-
- else switch ( yy_get_next_buffer( yyscanner ) )
- {
- case EOB_ACT_END_OF_FILE:
- {
- yyg->yy_did_buffer_switch_on_eof = 0;
-
- if ( fts0twrap(yyscanner ) )
- {
- /* Note: because we've taken care in
- * yy_get_next_buffer() to have set up
- * yytext, we can now set up
- * yy_c_buf_p so that if some total
- * hoser (like flex itself) wants to
- * call the scanner after we return the
- * YY_NULL, it'll still work - another
- * YY_NULL will get returned.
- */
- yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ;
-
- yy_act = YY_STATE_EOF(YY_START);
- goto do_action;
- }
-
- else
- {
- if ( ! yyg->yy_did_buffer_switch_on_eof )
- YY_NEW_FILE;
- }
- break;
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- yyg->yy_c_buf_p =
- yyg->yytext_ptr + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( yyscanner );
-
- yy_cp = yyg->yy_c_buf_p;
- yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
- goto yy_match;
-
- case EOB_ACT_LAST_MATCH:
- yyg->yy_c_buf_p =
- &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars];
-
- yy_current_state = yy_get_previous_state( yyscanner );
-
- yy_cp = yyg->yy_c_buf_p;
- yy_bp = yyg->yytext_ptr + YY_MORE_ADJ;
- goto yy_find_action;
- }
- break;
- }
-
- default:
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--no action found" );
- } /* end of action switch */
- } /* end of scanning one token */
-} /* end of fts0tlex */
-
-/* yy_get_next_buffer - try to read in a new buffer
- *
- * Returns a code representing an action:
- * EOB_ACT_LAST_MATCH -
- * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
- * EOB_ACT_END_OF_FILE - end of file
- */
-static int yy_get_next_buffer (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
- register char *source = yyg->yytext_ptr;
- register int number_to_move, i;
- int ret_val;
-
- if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] )
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--end of buffer missed" );
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
- { /* Don't try to fill the buffer, so this is an EOF. */
- if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 )
- {
- /* We matched a single character, the EOB, so
- * treat this as a final EOF.
- */
- return EOB_ACT_END_OF_FILE;
- }
-
- else
- {
- /* We matched some text prior to the EOB, first
- * process it.
- */
- return EOB_ACT_LAST_MATCH;
- }
- }
-
- /* Try to read more data. */
-
- /* First move last chars to start of buffer. */
- number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1;
-
- for ( i = 0; i < number_to_move; ++i )
- *(dest++) = *(source++);
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
- /* don't do the read, it's not guaranteed to return an EOF,
- * just force an EOF
- */
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0;
-
- else
- {
- int num_to_read =static_cast<int>(
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1);
-
- while ( num_to_read <= 0 )
- { /* Not enough room in the buffer - grow it. */
-
- /* just a shorter name for the current buffer */
- YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
-
- int yy_c_buf_p_offset =
- (int) (yyg->yy_c_buf_p - b->yy_ch_buf);
-
- if ( b->yy_is_our_buffer )
- {
- int new_size = static_cast<int>(b->yy_buf_size * 2);
-
- if ( new_size <= 0 )
- b->yy_buf_size += b->yy_buf_size / 8;
- else
- b->yy_buf_size *= 2;
-
- b->yy_ch_buf = (char *)
- /* Include room in for 2 EOB chars. */
- fts0trealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner );
- }
- else
- /* Can't grow it, we don't own it. */
- b->yy_ch_buf = 0;
-
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR(
- "fatal error - scanner input buffer overflow" );
-
- yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset];
-
- num_to_read = static_cast<int>(
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1);
-
- }
-
- if ( num_to_read > YY_READ_BUF_SIZE )
- num_to_read = YY_READ_BUF_SIZE;
-
- /* Read in more data. */
- YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- yyg->yy_n_chars, num_to_read);
-
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
- }
-
- if ( yyg->yy_n_chars == 0 )
- {
- if ( number_to_move == YY_MORE_ADJ )
- {
- ret_val = EOB_ACT_END_OF_FILE;
- fts0trestart(yyin ,yyscanner);
- }
-
- else
- {
- ret_val = EOB_ACT_LAST_MATCH;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
- YY_BUFFER_EOF_PENDING;
- }
- }
-
- else
- ret_val = EOB_ACT_CONTINUE_SCAN;
-
- if ((yy_size_t) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
- /* Extend the array by 50%, plus the number we really need. */
- yy_size_t new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1);
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) fts0trealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner );
- if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
- }
-
- yyg->yy_n_chars += number_to_move;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR;
-
- yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
-
- return ret_val;
-}
-
-/* yy_get_previous_state - get the state just before the EOB char was reached */
-
- static yy_state_type yy_get_previous_state (yyscan_t yyscanner)
-{
- register yy_state_type yy_current_state;
- register char *yy_cp;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- yy_current_state = yyg->yy_start;
-
- for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp )
- {
- register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
- if ( yy_accept[yy_current_state] )
- {
- yyg->yy_last_accepting_state = yy_current_state;
- yyg->yy_last_accepting_cpos = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 17 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- }
-
- return yy_current_state;
-}
-
-/* yy_try_NUL_trans - try to make a transition on the NUL character
- *
- * synopsis
- * next_state = yy_try_NUL_trans( current_state );
- */
- static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner)
-{
- register int yy_is_jam;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */
- register char *yy_cp = yyg->yy_c_buf_p;
-
- register YY_CHAR yy_c = 1;
- if ( yy_accept[yy_current_state] )
- {
- yyg->yy_last_accepting_state = yy_current_state;
- yyg->yy_last_accepting_cpos = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 17 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 16);
-
- return yy_is_jam ? 0 : yy_current_state;
-}
-
-#ifndef YY_NO_INPUT
-#ifdef __cplusplus
- static int yyinput (yyscan_t yyscanner)
-#else
- static int input (yyscan_t yyscanner)
-#endif
-
-{
- int c;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- *yyg->yy_c_buf_p = yyg->yy_hold_char;
-
- if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR )
- {
- /* yy_c_buf_p now points to the character we want to return.
- * If this occurs *before* the EOB characters, then it's a
- * valid NUL; if not, then we've hit the end of the buffer.
- */
- if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] )
- /* This was really a NUL. */
- *yyg->yy_c_buf_p = '\0';
-
- else
- { /* need more input */
- int offset = yyg->yy_c_buf_p - yyg->yytext_ptr;
- ++yyg->yy_c_buf_p;
-
- switch ( yy_get_next_buffer( yyscanner ) )
- {
- case EOB_ACT_LAST_MATCH:
- /* This happens because yy_g_n_b()
- * sees that we've accumulated a
- * token and flags that we need to
- * try matching the token before
- * proceeding. But for input(),
- * there's no matching to consider.
- * So convert the EOB_ACT_LAST_MATCH
- * to EOB_ACT_END_OF_FILE.
- */
-
- /* Reset buffer status. */
- fts0trestart(yyin ,yyscanner);
-
- /*FALLTHROUGH*/
-
- case EOB_ACT_END_OF_FILE:
- {
- if ( fts0twrap(yyscanner ) )
- return EOF;
-
- if ( ! yyg->yy_did_buffer_switch_on_eof )
- YY_NEW_FILE;
-#ifdef __cplusplus
- return yyinput(yyscanner);
-#else
- return input(yyscanner);
-#endif
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- yyg->yy_c_buf_p = yyg->yytext_ptr + offset;
- break;
- }
- }
- }
-
- c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */
- *yyg->yy_c_buf_p = '\0'; /* preserve yytext */
- yyg->yy_hold_char = *++yyg->yy_c_buf_p;
-
- return c;
-}
-#endif /* ifndef YY_NO_INPUT */
-
-/** Immediately switch to a different input stream.
- * @param input_file A readable stream.
- * @param yyscanner The scanner object.
- * @note This function does not reset the start condition to @c INITIAL .
- */
- void fts0trestart (FILE * input_file , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if ( ! YY_CURRENT_BUFFER ){
- fts0tensure_buffer_stack (yyscanner);
- YY_CURRENT_BUFFER_LVALUE =
- fts0t_create_buffer(yyin,YY_BUF_SIZE ,yyscanner);
- }
-
- fts0t_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner);
- fts0t_load_buffer_state(yyscanner );
-}
-
-/** Switch to a different input buffer.
- * @param new_buffer The new input buffer.
- * @param yyscanner The scanner object.
- */
- void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- /* TODO. We should be able to replace this entire function body
- * with
- * fts0tpop_buffer_state();
- * fts0tpush_buffer_state(new_buffer);
- */
- fts0tensure_buffer_stack (yyscanner);
- if ( YY_CURRENT_BUFFER == new_buffer )
- return;
-
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *yyg->yy_c_buf_p = yyg->yy_hold_char;
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
- }
-
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
- fts0t_load_buffer_state(yyscanner );
-
- /* We don't actually know whether we did this switch during
- * EOF (fts0twrap()) processing, but the only time this flag
- * is looked at is after fts0twrap() is called, so it's safe
- * to go ahead and always set it.
- */
- yyg->yy_did_buffer_switch_on_eof = 1;
-}
-
-static void fts0t_load_buffer_state (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
- yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
- yyg->yy_hold_char = *yyg->yy_c_buf_p;
-}
-
-/** Allocate and initialize an input buffer state.
- * @param file A readable stream.
- * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- * @param yyscanner The scanner object.
- * @return the allocated buffer state.
- */
- YY_BUFFER_STATE fts0t_create_buffer (FILE * file, int size , yyscan_t yyscanner)
-{
- YY_BUFFER_STATE b;
-
- b = (YY_BUFFER_STATE) fts0talloc(sizeof( struct yy_buffer_state ) ,yyscanner );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in fts0t_create_buffer()" );
-
- b->yy_buf_size = size;
-
- /* yy_ch_buf has to be 2 characters longer than the size given because
- * we need to put in 2 end-of-buffer characters.
- */
- b->yy_ch_buf = (char *) fts0talloc(b->yy_buf_size + 2 ,yyscanner );
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in fts0t_create_buffer()" );
-
- b->yy_is_our_buffer = 1;
-
- fts0t_init_buffer(b,file ,yyscanner);
-
- return b;
-}
-
-/** Destroy the buffer.
- * @param b a buffer created with fts0t_create_buffer()
- * @param yyscanner The scanner object.
- */
- void fts0t_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if ( ! b )
- return;
-
- if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
- YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
-
- if ( b->yy_is_our_buffer )
- fts0tfree((void *) b->yy_ch_buf ,yyscanner );
-
- fts0tfree((void *) b ,yyscanner );
-}
-
-/* Initializes or reinitializes a buffer.
- * This function is sometimes called more than once on the same buffer,
- * such as during a fts0trestart() or at EOF.
- */
- static void fts0t_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner)
-
-{
- int oerrno = errno;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- fts0t_flush_buffer(b ,yyscanner);
-
- b->yy_input_file = file;
- b->yy_fill_buffer = 1;
-
- /* If b is the current buffer, then fts0t_init_buffer was _probably_
- * called from fts0trestart() or through yy_get_next_buffer.
- * In that case, we don't want to reset the lineno or column.
- */
- if (b != YY_CURRENT_BUFFER){
- b->yy_bs_lineno = 1;
- b->yy_bs_column = 0;
- }
-
- b->yy_is_interactive = 0;
-
- errno = oerrno;
-}
-
-/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
- * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- * @param yyscanner The scanner object.
- */
- void fts0t_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- if ( ! b )
- return;
-
- b->yy_n_chars = 0;
-
- /* We always need two end-of-buffer characters. The first causes
- * a transition to the end-of-buffer state. The second causes
- * a jam in that state.
- */
- b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
- b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
-
- b->yy_buf_pos = &b->yy_ch_buf[0];
-
- b->yy_at_bol = 1;
- b->yy_buffer_status = YY_BUFFER_NEW;
-
- if ( b == YY_CURRENT_BUFFER )
- fts0t_load_buffer_state(yyscanner );
-}
-
-/** Pushes the new state onto the stack. The new state becomes
- * the current state. This function will allocate the stack
- * if necessary.
- * @param new_buffer The new state.
- * @param yyscanner The scanner object.
- */
-void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- if (new_buffer == NULL)
- return;
-
- fts0tensure_buffer_stack(yyscanner);
-
- /* This block is copied from fts0t_switch_to_buffer. */
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *yyg->yy_c_buf_p = yyg->yy_hold_char;
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p;
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars;
- }
-
- /* Only push if top exists. Otherwise, replace top. */
- if (YY_CURRENT_BUFFER)
- yyg->yy_buffer_stack_top++;
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
-
- /* copied from fts0t_switch_to_buffer. */
- fts0t_load_buffer_state(yyscanner );
- yyg->yy_did_buffer_switch_on_eof = 1;
-}
-
-/** Removes and deletes the top of the stack, if present.
- * The next element becomes the new top.
- * @param yyscanner The scanner object.
- */
-void fts0tpop_buffer_state (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- if (!YY_CURRENT_BUFFER)
- return;
-
- fts0t_delete_buffer(YY_CURRENT_BUFFER ,yyscanner);
- YY_CURRENT_BUFFER_LVALUE = NULL;
- if (yyg->yy_buffer_stack_top > 0)
- --yyg->yy_buffer_stack_top;
-
- if (YY_CURRENT_BUFFER) {
- fts0t_load_buffer_state(yyscanner );
- yyg->yy_did_buffer_switch_on_eof = 1;
- }
-}
-
-/* Allocates the stack if it does not exist.
- * Guarantees space for at least one push.
- */
-static void fts0tensure_buffer_stack (yyscan_t yyscanner)
-{
- int num_to_alloc;
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if (!yyg->yy_buffer_stack) {
-
- /* First allocation is just for 2 elements, since we don't know if this
- * scanner will even need a stack. We use 2 instead of 1 to avoid an
- * immediate realloc on the next call.
- */
- num_to_alloc = 1;
- yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0talloc
- (num_to_alloc * sizeof(struct yy_buffer_state*)
- , yyscanner);
- if ( ! yyg->yy_buffer_stack )
- YY_FATAL_ERROR( "out of dynamic memory in fts0tensure_buffer_stack()" );
-
- memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
- yyg->yy_buffer_stack_max = num_to_alloc;
- yyg->yy_buffer_stack_top = 0;
- return;
- }
-
- if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){
-
- /* Increase the buffer to prepare for a possible push. */
- int grow_size = 8 /* arbitrary grow size */;
-
- num_to_alloc = static_cast<int>(yyg->yy_buffer_stack_max + grow_size);
- yyg->yy_buffer_stack = (struct yy_buffer_state**)fts0trealloc
- (yyg->yy_buffer_stack,
- num_to_alloc * sizeof(struct yy_buffer_state*)
- , yyscanner);
- if ( ! yyg->yy_buffer_stack )
- YY_FATAL_ERROR( "out of dynamic memory in fts0tensure_buffer_stack()" );
-
- /* zero only the new slots.*/
- memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*));
- yyg->yy_buffer_stack_max = num_to_alloc;
- }
-}
-
-/** Setup the input buffer state to scan directly from a user-specified character buffer.
- * @param base the character buffer
- * @param size the size in bytes of the character buffer
- * @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
- */
-YY_BUFFER_STATE fts0t_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner)
-{
- YY_BUFFER_STATE b;
-
- if ( size < 2 ||
- base[size-2] != YY_END_OF_BUFFER_CHAR ||
- base[size-1] != YY_END_OF_BUFFER_CHAR )
- /* They forgot to leave room for the EOB's. */
- return 0;
-
- b = (YY_BUFFER_STATE) fts0talloc(sizeof( struct yy_buffer_state ) ,yyscanner );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in fts0t_scan_buffer()" );
-
- b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
- b->yy_buf_pos = b->yy_ch_buf = base;
- b->yy_is_our_buffer = 0;
- b->yy_input_file = 0;
- b->yy_n_chars = static_cast<int>(b->yy_buf_size);
- b->yy_is_interactive = 0;
- b->yy_at_bol = 1;
- b->yy_fill_buffer = 0;
- b->yy_buffer_status = YY_BUFFER_NEW;
-
- fts0t_switch_to_buffer(b ,yyscanner );
-
- return b;
-}
-
-/** Setup the input buffer state to scan a string. The next call to fts0tlex() will
- * scan from a @e copy of @a str.
- * @param yystr a NUL-terminated string to scan
- * @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
- * @note If you want to scan bytes that may contain NUL values, then use
- * fts0t_scan_bytes() instead.
- */
-YY_BUFFER_STATE fts0t_scan_string (yyconst char * yystr , yyscan_t yyscanner)
-{
-
- return fts0t_scan_bytes(yystr,static_cast<int>(strlen(yystr)) ,yyscanner);
-}
-
-/** Setup the input buffer state to scan the given bytes. The next call to fts0tlex() will
- * scan from a @e copy of @a bytes.
- * @param yybytes the byte buffer to scan
- * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
- * @param yyscanner The scanner object.
- * @return the newly allocated buffer state object.
- */
-YY_BUFFER_STATE fts0t_scan_bytes (yyconst char * yybytes, int _yybytes_len , yyscan_t yyscanner)
-{
- YY_BUFFER_STATE b;
- char *buf;
- yy_size_t n;
- int i;
-
- /* Get memory for full buffer, including space for trailing EOB's. */
- n = _yybytes_len + 2;
- buf = (char *) fts0talloc(n ,yyscanner );
- if ( ! buf )
- YY_FATAL_ERROR( "out of dynamic memory in fts0t_scan_bytes()" );
-
- for ( i = 0; i < _yybytes_len; ++i )
- buf[i] = yybytes[i];
-
- buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
-
- b = fts0t_scan_buffer(buf,n ,yyscanner);
- if ( ! b )
- YY_FATAL_ERROR( "bad buffer in fts0t_scan_bytes()" );
-
- /* It's okay to grow etc. this buffer, and we should throw it
- * away when we're done.
- */
- b->yy_is_our_buffer = 1;
-
- return b;
-}
-
-#ifndef YY_EXIT_FAILURE
-#define YY_EXIT_FAILURE 2
-#endif
-
-static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- (void) fprintf( stderr, "%s\n", msg );
- exit( YY_EXIT_FAILURE );
-}
-
-/* Redefine yyless() so it works in section 3 code. */
-
-#undef yyless
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- yytext[yyleng] = yyg->yy_hold_char; \
- yyg->yy_c_buf_p = yytext + yyless_macro_arg; \
- yyg->yy_hold_char = *yyg->yy_c_buf_p; \
- *yyg->yy_c_buf_p = '\0'; \
- yyleng = yyless_macro_arg; \
- } \
- while ( 0 )
-
-/* Accessor methods (get/set functions) to struct members. */
-
-/** Get the user-defined data for this scanner.
- * @param yyscanner The scanner object.
- */
-YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yyextra;
-}
-
-/** Get the current line number.
- * @param yyscanner The scanner object.
- */
-int fts0tget_lineno (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if (! YY_CURRENT_BUFFER)
- return 0;
-
- return yylineno;
-}
-
-/** Get the current column number.
- * @param yyscanner The scanner object.
- */
-int fts0tget_column (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- if (! YY_CURRENT_BUFFER)
- return 0;
-
- return yycolumn;
-}
-
-/** Get the input stream.
- * @param yyscanner The scanner object.
- */
-FILE *fts0tget_in (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yyin;
-}
-
-/** Get the output stream.
- * @param yyscanner The scanner object.
- */
-FILE *fts0tget_out (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yyout;
-}
-
-/** Get the length of the current token.
- * @param yyscanner The scanner object.
- */
-int fts0tget_leng (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yyleng;
-}
-
-/** Get the current token.
- * @param yyscanner The scanner object.
- */
-
-char *fts0tget_text (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yytext;
-}
-
-/** Set the user-defined data. This data is never touched by the scanner.
- * @param user_defined The data to be associated with this scanner.
- * @param yyscanner The scanner object.
- */
-void fts0tset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yyextra = user_defined ;
-}
-
-/** Set the current line number.
- * @param line_number
- * @param yyscanner The scanner object.
- */
-void fts0tset_lineno (int line_number , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- /* lineno is only valid if an input buffer exists. */
- if (! YY_CURRENT_BUFFER )
- yy_fatal_error( "fts0tset_lineno called with no buffer" , yyscanner);
-
- yylineno = line_number;
-}
-
-/** Set the current column.
- * @param line_number
- * @param yyscanner The scanner object.
- */
-void fts0tset_column (int column_no , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- /* column is only valid if an input buffer exists. */
- if (! YY_CURRENT_BUFFER )
- yy_fatal_error( "fts0tset_column called with no buffer" , yyscanner);
-
- yycolumn = column_no;
-}
-
-/** Set the input stream. This does not discard the current
- * input buffer.
- * @param in_str A readable stream.
- * @param yyscanner The scanner object.
- * @see fts0t_switch_to_buffer
- */
-void fts0tset_in (FILE * in_str , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yyin = in_str ;
-}
-
-void fts0tset_out (FILE * out_str , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yyout = out_str ;
-}
-
-int fts0tget_debug (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- return yy_flex_debug;
-}
-
-void fts0tset_debug (int bdebug , yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- yy_flex_debug = bdebug ;
-}
-
-/* Accessor methods for yylval and yylloc */
-
-/* User-visible API */
-
-/* fts0tlex_init is special because it creates the scanner itself, so it is
- * the ONLY reentrant function that doesn't take the scanner as the last argument.
- * That's why we explicitly handle the declaration, instead of using our macros.
- */
-
-int fts0tlex_init(yyscan_t* ptr_yy_globals)
-
-{
- if (ptr_yy_globals == NULL){
- errno = EINVAL;
- return 1;
- }
-
- *ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), NULL );
-
- if (*ptr_yy_globals == NULL){
- errno = ENOMEM;
- return 1;
- }
-
- /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */
- memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
-
- return yy_init_globals ( *ptr_yy_globals );
-}
-
-/* fts0tlex_init_extra has the same functionality as fts0tlex_init, but follows the
- * convention of taking the scanner as the last argument. Note however, that
- * this is a *pointer* to a scanner, as it will be allocated by this call (and
- * is the reason, too, why this function also must handle its own declaration).
- * The user defined value in the first argument will be available to fts0talloc in
- * the yyextra field.
- */
-
-int fts0tlex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals )
-
-{
- struct yyguts_t dummy_yyguts;
-
- fts0tset_extra (yy_user_defined, &dummy_yyguts);
-
- if (ptr_yy_globals == NULL){
- errno = EINVAL;
- return 1;
- }
-
- *ptr_yy_globals = (yyscan_t) fts0talloc ( sizeof( struct yyguts_t ), &dummy_yyguts );
-
- if (*ptr_yy_globals == NULL){
- errno = ENOMEM;
- return 1;
- }
-
- /* By setting to 0xAA, we expose bugs in
- yy_init_globals. Leave at 0x00 for releases. */
- memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t));
-
- fts0tset_extra (yy_user_defined, *ptr_yy_globals);
-
- return yy_init_globals ( *ptr_yy_globals );
-}
-
-static int yy_init_globals (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- /* Initialization is the same as for the non-reentrant scanner.
- * This function is called from fts0tlex_destroy(), so don't allocate here.
- */
-
- yyg->yy_buffer_stack = 0;
- yyg->yy_buffer_stack_top = 0;
- yyg->yy_buffer_stack_max = 0;
- yyg->yy_c_buf_p = (char *) 0;
- yyg->yy_init = 0;
- yyg->yy_start = 0;
-
- yyg->yy_start_stack_ptr = 0;
- yyg->yy_start_stack_depth = 0;
- yyg->yy_start_stack = NULL;
-
-/* Defined in main.c */
-#ifdef YY_STDINIT
- yyin = stdin;
- yyout = stdout;
-#else
- yyin = (FILE *) 0;
- yyout = (FILE *) 0;
-#endif
-
- /* For future reference: Set errno on error, since we are called by
- * fts0tlex_init()
- */
- return 0;
-}
-
-/* fts0tlex_destroy is for both reentrant and non-reentrant scanners. */
-int fts0tlex_destroy (yyscan_t yyscanner)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
-
- /* Pop the buffer stack, destroying each element. */
- while(YY_CURRENT_BUFFER){
- fts0t_delete_buffer(YY_CURRENT_BUFFER ,yyscanner );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- fts0tpop_buffer_state(yyscanner);
- }
-
- /* Destroy the stack itself. */
- fts0tfree(yyg->yy_buffer_stack ,yyscanner);
- yyg->yy_buffer_stack = NULL;
-
- /* Destroy the start condition stack. */
- fts0tfree(yyg->yy_start_stack ,yyscanner );
- yyg->yy_start_stack = NULL;
-
- /* Reset the globals. This is important in a non-reentrant scanner so the next time
- * fts0tlex() is called, initialization will occur. */
- yy_init_globals( yyscanner);
-
- /* Destroy the main struct (reentrant only). */
- fts0tfree ( yyscanner , yyscanner );
- yyscanner = NULL;
- return 0;
-}
-
-/*
- * Internal utility routines.
- */
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- register int i;
- for ( i = 0; i < n; ++i )
- s1[i] = s2[i];
-}
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- register int n;
- for ( n = 0; s[n]; ++n )
- ;
-
- return n;
-}
-#endif
-
-void *fts0talloc (yy_size_t size , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- return (void *) malloc( size );
-}
-
-void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- /* The cast to (char *) in the following accommodates both
- * implementations that use char* generic pointers, and those
- * that use void* generic pointers. It works with the latter
- * because both ANSI C and C++ allow castless assignment from
- * any pointer type to void*, and deal with argument conversions
- * as though doing an assignment.
- */
- return (void *) realloc( (char *) ptr, size );
-}
-
-void fts0tfree (void * ptr , yyscan_t yyscanner MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)) MY_ATTRIBUTE((unused)))
-{
- free( (char *) ptr ); /* see fts0trealloc() for (char *) cast */
-}
-
-#define YYTABLES_NAME "yytables"
-
-#line 68 "fts0tlex.l"
-
-
-
diff --git a/storage/xtradb/fts/fts0tlex.l b/storage/xtradb/fts/fts0tlex.l
deleted file mode 100644
index 4f55a83afe5..00000000000
--- a/storage/xtradb/fts/fts0tlex.l
+++ /dev/null
@@ -1,68 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**
- * @file fts/fts0tlex.l
- * FTS parser lexical analyzer
- *
- * Created 2007/5/9 Sunny Bains
- */
-
-%{
-
-#include "fts0ast.h"
-#include "fts0pars.h"
-
-/* Required for reentrant parser */
-#define YY_DECL int fts_tlexer(YYSTYPE* val, yyscan_t yyscanner)
-
-%}
-
-%option noinput
-%option nounput
-%option noyywrap
-%option nostdinit
-%option reentrant
-%option never-interactive
-
-
-%%
-
-[\t ]+ /* Ignore whitespace */ ;
-
-[*] {
- val->oper = fts0tget_text(yyscanner)[0];
-
- return(val->oper);
-}
-
-\"[^\"\n]*\" {
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
-
- return(FTS_TEXT);
-}
-
-[^" \n\%]* {
- val->token = fts_ast_string_create(reinterpret_cast<const byte*>(fts0tget_text(yyscanner)), fts0tget_leng(yyscanner));
-
- return(FTS_TERM);
-}
-. ;
-\n
-
-%%
diff --git a/storage/xtradb/fts/make_parser.sh b/storage/xtradb/fts/make_parser.sh
deleted file mode 100755
index 52b63eff674..00000000000
--- a/storage/xtradb/fts/make_parser.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/sh
-#
-# Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-
-TMPF=t.$$
-
-make -f Makefile.query
-
-echo '#include "univ.i"' > $TMPF
-
-# This is to avoid compiler warning about unused parameters.
-# FIXME: gcc extension "MY_ATTRIBUTE" causing compilation errors on windows
-# platform. Quote them out for now.
-sed -e '
-s/^\(static.*void.*yy_fatal_error.*msg.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
-s/^\(static.*void.*yy_flex_strncpy.*n.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
-s/^\(static.*int.*yy_flex_strlen.*s.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
-s/^\(\(static\|void\).*fts0[bt]alloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
-s/^\(\(static\|void\).*fts0[bt]realloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
-s/^\(\(static\|void\).*fts0[bt]free.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
-' < fts0blex.cc >> $TMPF
-
-mv $TMPF fts0blex.cc
-
-echo '#include "univ.i"' > $TMPF
-
-sed -e '
-s/^\(static.*void.*yy_fatal_error.*msg.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
-s/^\(static.*void.*yy_flex_strncpy.*n.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
-s/^\(static.*int.*yy_flex_strlen.*s.*,\)\(.*yyscanner\)/\1 \2 MY_ATTRIBUTE((unused))/;
-s/^\(\(static\|void\).*fts0[bt]alloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
-s/^\(\(static\|void\).*fts0[bt]realloc.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
-s/^\(\(static\|void\).*fts0[bt]free.*,\)\(.*yyscanner\)/\1 \3 MY_ATTRIBUTE((unused))/;
-' < fts0tlex.cc >> $TMPF
-
-mv $TMPF fts0tlex.cc
diff --git a/storage/xtradb/fut/fut0fut.cc b/storage/xtradb/fut/fut0fut.cc
deleted file mode 100644
index 9bb1c512182..00000000000
--- a/storage/xtradb/fut/fut0fut.cc
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fut/fut0fut.cc
-File-based utilities
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0fut.h"
-
-#ifdef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
diff --git a/storage/xtradb/fut/fut0lst.cc b/storage/xtradb/fut/fut0lst.cc
deleted file mode 100644
index dd3fa1238d9..00000000000
--- a/storage/xtradb/fut/fut0lst.cc
+++ /dev/null
@@ -1,432 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file fut/fut0lst.cc
-File-based list utilities
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0lst.h"
-
-#ifdef UNIV_NONINL
-#include "fut0lst.ic"
-#endif
-
-#include "buf0buf.h"
-#include "page0page.h"
-
-/********************************************************************//**
-Adds a node to an empty list. */
-static
-void
-flst_add_to_empty(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of
- empty list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- ut_a(len == 0);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* Update first and last fields of base node */
- flst_write_addr(base + FLST_FIRST, node_addr, mtr);
- flst_write_addr(base + FLST_LAST, node_addr, mtr);
-
- /* Set prev and next fields of node to add */
- flst_write_addr(node + FLST_PREV, fil_addr_null, mtr);
- flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr);
-
- /* Update len of base node */
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Adds a node as the last node in a list. */
-UNIV_INTERN
-void
-flst_add_last(
-/*==========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
- fil_addr_t last_addr;
- flst_node_t* last_node;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- last_addr = flst_get_last(base, mtr);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* If the list is not empty, call flst_insert_after */
- if (len != 0) {
- if (last_addr.page == node_addr.page) {
- last_node = page_align(node) + last_addr.boffset;
- } else {
- ulint zip_size = fil_space_get_zip_size(space);
-
- last_node = fut_get_ptr(space, zip_size, last_addr,
- RW_X_LATCH, mtr);
- }
-
- flst_insert_after(base, last_node, node, mtr);
- } else {
- /* else call flst_add_to_empty */
- flst_add_to_empty(base, node, mtr);
- }
-}
-
-/********************************************************************//**
-Adds a node as the first node in a list. */
-UNIV_INTERN
-void
-flst_add_first(
-/*===========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node_addr;
- ulint len;
- fil_addr_t first_addr;
- flst_node_t* first_node;
-
- ut_ad(mtr && base && node);
- ut_ad(base != node);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node, MTR_MEMO_PAGE_X_FIX));
- len = flst_get_len(base, mtr);
- first_addr = flst_get_first(base, mtr);
-
- buf_ptr_get_fsp_addr(node, &space, &node_addr);
-
- /* If the list is not empty, call flst_insert_before */
- if (len != 0) {
- if (first_addr.page == node_addr.page) {
- first_node = page_align(node) + first_addr.boffset;
- } else {
- ulint zip_size = fil_space_get_zip_size(space);
-
- first_node = fut_get_ptr(space, zip_size, first_addr,
- RW_X_LATCH, mtr);
- }
-
- flst_insert_before(base, node, first_node, mtr);
- } else {
- /* else call flst_add_to_empty */
- flst_add_to_empty(base, node, mtr);
- }
-}
-
-/********************************************************************//**
-Inserts a node after another in a list. */
-UNIV_INTERN
-void
-flst_insert_after(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node1, /*!< in: node to insert after */
- flst_node_t* node2, /*!< in: node to add */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- flst_node_t* node3;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node1 && node2 && base);
- ut_ad(base != node1);
- ut_ad(base != node2);
- ut_ad(node2 != node1);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node1, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node1, &space, &node1_addr);
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
-
- node3_addr = flst_get_next_addr(node1, mtr);
-
- /* Set prev and next fields of node2 */
- flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
- flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
-
- if (!fil_addr_is_null(node3_addr)) {
- /* Update prev field of node3 */
- ulint zip_size = fil_space_get_zip_size(space);
-
- node3 = fut_get_ptr(space, zip_size,
- node3_addr, RW_X_LATCH, mtr);
- flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
- } else {
- /* node1 was last in list: update last field in base */
- flst_write_addr(base + FLST_LAST, node2_addr, mtr);
- }
-
- /* Set next field of node1 */
- flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Inserts a node before another in a list. */
-UNIV_INTERN
-void
-flst_insert_before(
-/*===============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to insert */
- flst_node_t* node3, /*!< in: node to insert before */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- flst_node_t* node1;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node2 && node3 && base);
- ut_ad(base != node2);
- ut_ad(base != node3);
- ut_ad(node2 != node3);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node3, MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
- buf_ptr_get_fsp_addr(node3, &space, &node3_addr);
-
- node1_addr = flst_get_prev_addr(node3, mtr);
-
- /* Set prev and next fields of node2 */
- flst_write_addr(node2 + FLST_PREV, node1_addr, mtr);
- flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr);
-
- if (!fil_addr_is_null(node1_addr)) {
- ulint zip_size = fil_space_get_zip_size(space);
- /* Update next field of node1 */
- node1 = fut_get_ptr(space, zip_size, node1_addr,
- RW_X_LATCH, mtr);
- flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr);
- } else {
- /* node3 was first in list: update first field in base */
- flst_write_addr(base + FLST_FIRST, node2_addr, mtr);
- }
-
- /* Set prev field of node3 */
- flst_write_addr(node3 + FLST_PREV, node2_addr, mtr);
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Removes a node. */
-UNIV_INTERN
-void
-flst_remove(
-/*========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to remove */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint space;
- ulint zip_size;
- flst_node_t* node1;
- fil_addr_t node1_addr;
- fil_addr_t node2_addr;
- flst_node_t* node3;
- fil_addr_t node3_addr;
- ulint len;
-
- ut_ad(mtr && node2 && base);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, node2, MTR_MEMO_PAGE_X_FIX));
-
- buf_ptr_get_fsp_addr(node2, &space, &node2_addr);
- zip_size = fil_space_get_zip_size(space);
-
- node1_addr = flst_get_prev_addr(node2, mtr);
- node3_addr = flst_get_next_addr(node2, mtr);
-
- if (!fil_addr_is_null(node1_addr)) {
-
- /* Update next field of node1 */
-
- if (node1_addr.page == node2_addr.page) {
-
- node1 = page_align(node2) + node1_addr.boffset;
- } else {
- node1 = fut_get_ptr(space, zip_size,
- node1_addr, RW_X_LATCH, mtr);
- }
-
- ut_ad(node1 != node2);
-
- flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr);
- } else {
- /* node2 was first in list: update first field in base */
- flst_write_addr(base + FLST_FIRST, node3_addr, mtr);
- }
-
- if (!fil_addr_is_null(node3_addr)) {
- /* Update prev field of node3 */
-
- if (node3_addr.page == node2_addr.page) {
-
- node3 = page_align(node2) + node3_addr.boffset;
- } else {
- node3 = fut_get_ptr(space, zip_size,
- node3_addr, RW_X_LATCH, mtr);
- }
-
- ut_ad(node2 != node3);
-
- flst_write_addr(node3 + FLST_PREV, node1_addr, mtr);
- } else {
- /* node2 was last in list: update last field in base */
- flst_write_addr(base + FLST_LAST, node1_addr, mtr);
- }
-
- /* Update len of base node */
- len = flst_get_len(base, mtr);
- ut_ad(len > 0);
-
- mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr);
-}
-
-/********************************************************************//**
-Validates a file-based list.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-flst_validate(
-/*==========*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr1) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- const flst_node_t* node;
- fil_addr_t node_addr;
- fil_addr_t base_addr;
- ulint len;
- ulint i;
- mtr_t mtr2;
-
- ut_ad(base);
- ut_ad(mtr_memo_contains_page(mtr1, base, MTR_MEMO_PAGE_X_FIX));
-
- /* We use two mini-transaction handles: the first is used to
- lock the base node, and prevent other threads from modifying the
- list. The second is used to traverse the list. We cannot run the
- second mtr without committing it at times, because if the list
- is long, then the x-locked pages could fill the buffer resulting
- in a deadlock. */
-
- /* Find out the space id */
- buf_ptr_get_fsp_addr(base, &space, &base_addr);
- zip_size = fil_space_get_zip_size(space);
-
- len = flst_get_len(base, mtr1);
- node_addr = flst_get_first(base, mtr1);
-
- for (i = 0; i < len; i++) {
- mtr_start(&mtr2);
-
- node = fut_get_ptr(space, zip_size,
- node_addr, RW_X_LATCH, &mtr2);
- node_addr = flst_get_next_addr(node, &mtr2);
-
- mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
- becoming full */
- }
-
- ut_a(fil_addr_is_null(node_addr));
-
- node_addr = flst_get_last(base, mtr1);
-
- for (i = 0; i < len; i++) {
- mtr_start(&mtr2);
-
- node = fut_get_ptr(space, zip_size,
- node_addr, RW_X_LATCH, &mtr2);
- node_addr = flst_get_prev_addr(node, &mtr2);
-
- mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer
- becoming full */
- }
-
- ut_a(fil_addr_is_null(node_addr));
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Prints info of a file-based list. */
-UNIV_INTERN
-void
-flst_print(
-/*=======*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr) /*!< in: mtr */
-{
- const buf_frame_t* frame;
- ulint len;
-
- ut_ad(base && mtr);
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
- frame = page_align((byte*) base);
-
- len = flst_get_len(base, mtr);
-
- fprintf(stderr,
- "FILE-BASED LIST:\n"
- "Base node in space %lu page %lu byte offset %lu; len %lu\n",
- (ulong) page_get_space_id(frame),
- (ulong) page_get_page_no(frame),
- (ulong) page_offset(base), (ulong) len);
-}
diff --git a/storage/xtradb/ha/ha0ha.cc b/storage/xtradb/ha/ha0ha.cc
deleted file mode 100644
index 3674260f173..00000000000
--- a/storage/xtradb/ha/ha0ha.cc
+++ /dev/null
@@ -1,528 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file ha/ha0ha.cc
-The hash table with external chains
-
-Created 8/22/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ha0ha.h"
-#ifdef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_DEBUG
-# include "buf0buf.h"
-#endif /* UNIV_DEBUG */
-# include "btr0sea.h"
-#include "page0page.h"
-
-/*************************************************************//**
-Creates a hash table with at least n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
-hash_table_t*
-ha_create_func(
-/*===========*/
- ulint n, /*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level, /*!< in: level of the mutexes or rw_locks
- in the latching order: this is used in the
- debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_sync_obj, /*!< in: number of mutexes or rw_locks
- to protect the hash table: must be a
- power of 2, or 0 */
- ulint type) /*!< in: type of datastructure for which
- the memory heap is going to be used e.g.:
- MEM_HEAP_FOR_BTR_SEARCH or
- MEM_HEAP_FOR_PAGE_HASH */
-{
- hash_table_t* table;
- ulint i;
-
- ut_a(type == MEM_HEAP_FOR_BTR_SEARCH
- || type == MEM_HEAP_FOR_PAGE_HASH);
-
- ut_ad(ut_is_2pow(n_sync_obj));
- table = hash_create(n);
-
- /* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail,
- but in practise it never should in this case, hence the asserts. */
-
- if (n_sync_obj == 0) {
- table->heap = mem_heap_create_typed(
- ut_min(4096, MEM_MAX_ALLOC_IN_BUF), type);
- ut_a(table->heap);
-
- return(table);
- }
-
- if (type == MEM_HEAP_FOR_PAGE_HASH) {
- /* We create a hash table protected by rw_locks for
- buf_pool->page_hash. */
- hash_create_sync_obj(table, HASH_TABLE_SYNC_RW_LOCK,
- n_sync_obj, sync_level);
- } else {
- hash_create_sync_obj(table, HASH_TABLE_SYNC_MUTEX,
- n_sync_obj, sync_level);
- }
-
- table->heaps = static_cast<mem_heap_t**>(
- mem_alloc(n_sync_obj * sizeof(void*)));
-
- for (i = 0; i < n_sync_obj; i++) {
- table->heaps[i] = mem_heap_create_typed(4096, type);
- ut_a(table->heaps[i]);
- }
-
- return(table);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************//**
-Verifies that the specified hash table is a part of adaptive hash index and
-that its corresponding latch is X-latched by the current thread. */
-static
-bool
-ha_assert_btr_x_locked(
-/*===================*/
- const hash_table_t* table) /*!<in: hash table to check */
-{
- ulint i;
-
- ut_ad(table->adaptive);
-
- for (i = 0; i < btr_search_index_num; i++) {
- if (btr_search_sys->hash_tables[i] == table) {
- break;
- }
- }
-
- ut_ad(i < btr_search_index_num);
- ut_ad(rw_lock_own(&btr_search_latch_arr[i], RW_LOCK_EX));
-
- return(true);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/*************************************************************//**
-Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
-void
-ha_clear(
-/*=====*/
- hash_table_t* table) /*!< in, own: hash table */
-{
- ulint i;
- ulint n;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!table->adaptive || ha_assert_btr_x_locked(table));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Free the memory heaps. */
- n = table->n_sync_obj;
-
- for (i = 0; i < n; i++) {
- mem_heap_free(table->heaps[i]);
- }
-
- if (table->heaps) {
- mem_free(table->heaps);
- }
-
- switch (table->type) {
- case HASH_TABLE_SYNC_MUTEX:
- for (ulint i = 0; i < table->n_sync_obj; i++)
- mutex_free(table->sync_obj.mutexes + i);
- mem_free(table->sync_obj.mutexes);
- table->sync_obj.mutexes = NULL;
- break;
-
- case HASH_TABLE_SYNC_RW_LOCK:
- for (ulint i = 0; i < table->n_sync_obj; i++)
- rw_lock_free(table->sync_obj.rw_locks + i);
- mem_free(table->sync_obj.rw_locks);
- table->sync_obj.rw_locks = NULL;
- break;
-
- case HASH_TABLE_SYNC_NONE:
- /* do nothing */
- break;
- }
-
- table->n_sync_obj = 0;
- table->type = HASH_TABLE_SYNC_NONE;
-
-
- /* Clear the hash table. */
- n = hash_get_n_cells(table);
-
- for (i = 0; i < n; i++) {
- hash_get_nth_cell(table, i)->node = NULL;
- }
-}
-
-/*************************************************************//**
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted. If btr_search_enabled is set to FALSE, we will only allow
-updating existing nodes, but no new node is allowed to be added.
-@return TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
-ibool
-ha_insert_for_fold_func(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of data; if a node with
- the same fold value already exists, it is
- updated to point to the same data, and no new
- node is created! */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* block, /*!< in: buffer block containing the data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- const rec_t* data) /*!< in: data, must not be NULL */
-{
- hash_cell_t* cell;
- ha_node_t* node;
- ha_node_t* prev_node;
- ulint hash;
-
- ut_ad(data);
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ut_a(block->frame == page_align(data));
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- hash_assert_can_modify(table, fold);
- ut_ad(btr_search_enabled);
-
- hash = hash_calc_hash(fold, table);
-
- cell = hash_get_nth_cell(table, hash);
-
- prev_node = static_cast<ha_node_t*>(cell->node);
-
- while (prev_node != NULL) {
- if (prev_node->fold == fold) {
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (table->adaptive) {
- buf_block_t* prev_block = prev_node->block;
- ut_a(prev_block->frame
- == page_align(prev_node->data));
- ut_a(prev_block->n_pointers > 0);
- prev_block->n_pointers--;
- block->n_pointers++;
- }
-
- prev_node->block = block;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- prev_node->data = data;
-
- return(TRUE);
- }
-
- prev_node = prev_node->next;
- }
-
- /* We have to allocate a new chain node */
-
- node = static_cast<ha_node_t*>(
- mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t)));
-
- if (node == NULL) {
- /* It was a btr search type memory heap and at the moment
- no more memory could be allocated: return */
-
- ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH);
-
- return(FALSE);
- }
-
- ha_node_set_data(node, block, data);
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (table->adaptive) {
- block->n_pointers++;
- }
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
- node->fold = fold;
-
- node->next = NULL;
-
- prev_node = static_cast<ha_node_t*>(cell->node);
-
- if (prev_node == NULL) {
-
- cell->node = node;
-
- return(TRUE);
- }
-
- while (prev_node->next != NULL) {
-
- prev_node = prev_node->next;
- }
-
- prev_node->next = node;
-
- return(TRUE);
-}
-
-/***********************************************************//**
-Deletes a hash node. */
-UNIV_INTERN
-void
-ha_delete_hash_node(
-/*================*/
- hash_table_t* table, /*!< in: hash table */
- ha_node_t* del_node) /*!< in: node to be deleted */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(ha_assert_btr_x_locked(table));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(btr_search_enabled);
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (table->adaptive) {
- ut_a(del_node->block->frame = page_align(del_node->data));
- ut_a(del_node->block->n_pointers > 0);
- del_node->block->n_pointers--;
- }
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
- HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node);
-}
-
-/*********************************************************//**
-Looks for an element when we know the pointer to the data, and updates
-the pointer to data, if found.
-@return TRUE if found */
-UNIV_INTERN
-ibool
-ha_search_and_update_if_found_func(
-/*===============================*/
- hash_table_t* table, /*!< in/out: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- const rec_t* data, /*!< in: pointer to the data */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* new_block,/*!< in: block containing new_data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- const rec_t* new_data)/*!< in: new pointer to the data */
-{
- ha_node_t* node;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- hash_assert_can_modify(table, fold);
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ut_a(new_block->frame == page_align(new_data));
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(ha_assert_btr_x_locked(table));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!btr_search_enabled) {
- return(FALSE);
- }
-
- node = ha_search_with_data(table, fold, data);
-
- if (node) {
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (table->adaptive) {
- ut_a(node->block->n_pointers > 0);
- node->block->n_pointers--;
- new_block->n_pointers++;
- }
-
- node->block = new_block;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- node->data = new_data;
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*****************************************************************//**
-Removes from the chain determined by fold all nodes whose data pointer
-points to the page given. */
-UNIV_INTERN
-void
-ha_remove_all_nodes_to_page(
-/*========================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: fold value */
- const page_t* page) /*!< in: buffer page */
-{
- ha_node_t* node;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- hash_assert_can_modify(table, fold);
- ut_ad(btr_search_enabled);
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (page_align(ha_node_get_data(node)) == page) {
-
- /* Remove the hash node */
-
- ha_delete_hash_node(table, node);
-
- /* Start again from the first node in the chain
- because the deletion may compact the heap of
- nodes and move other nodes! */
-
- node = ha_chain_get_first(table, fold);
- } else {
- node = ha_chain_get_next(node);
- }
- }
-#ifdef UNIV_DEBUG
- /* Check that all nodes really got deleted */
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- ut_a(page_align(ha_node_get_data(node)) != page);
-
- node = ha_chain_get_next(node);
- }
-#endif
-}
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/*************************************************************//**
-Validates a given range of the cells in hash table.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-ha_validate(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint start_index, /*!< in: start index */
- ulint end_index) /*!< in: end index */
-{
- ibool ok = TRUE;
- ulint i;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_a(start_index <= end_index);
- ut_a(start_index < hash_get_n_cells(table));
- ut_a(end_index < hash_get_n_cells(table));
-
- for (i = start_index; i <= end_index; i++) {
- ha_node_t* node;
- hash_cell_t* cell;
-
- cell = hash_get_nth_cell(table, i);
-
- for (node = static_cast<ha_node_t*>(cell->node);
- node != 0;
- node = node->next) {
-
- if (hash_calc_hash(node->fold, table) != i) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: Error: hash table node"
- " fold value %lu does not\n"
- "InnoDB: match the cell number %lu.\n",
- (ulong) node->fold, (ulong) i);
-
- ok = FALSE;
- }
- }
- }
-
- return(ok);
-}
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-
-/*************************************************************//**
-Prints info of a hash table. */
-UNIV_INTERN
-void
-ha_print_info(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- hash_table_t* table) /*!< in: hash table */
-{
-#ifdef UNIV_DEBUG
-/* Some of the code here is disabled for performance reasons in production
-builds, see http://bugs.mysql.com/36941 */
-#define PRINT_USED_CELLS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_USED_CELLS
- hash_cell_t* cell;
- ulint cells = 0;
- ulint i;
-#endif /* PRINT_USED_CELLS */
- ulint n_bufs;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-#ifdef PRINT_USED_CELLS
- for (i = 0; i < hash_get_n_cells(table); i++) {
-
- cell = hash_get_nth_cell(table, i);
-
- if (cell->node) {
-
- cells++;
- }
- }
-#endif /* PRINT_USED_CELLS */
-
- fprintf(file, "Hash table size %lu",
- (ulong) hash_get_n_cells(table));
-
-#ifdef PRINT_USED_CELLS
- fprintf(file, ", used cells %lu", (ulong) cells);
-#endif /* PRINT_USED_CELLS */
-
- if (table->heaps == NULL && table->heap != NULL) {
-
- /* This calculation is intended for the adaptive hash
- index: how many buffer frames we have reserved? */
-
- n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1;
-
- if (table->heap->free_block) {
- n_bufs++;
- }
-
- fprintf(file, ", node heap has %lu buffer(s)\n",
- (ulong) n_bufs);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/ha/ha0storage.cc b/storage/xtradb/ha/ha0storage.cc
deleted file mode 100644
index 6820591f316..00000000000
--- a/storage/xtradb/ha/ha0storage.cc
+++ /dev/null
@@ -1,184 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file ha/ha0storage.cc
-Hash storage.
-Provides a data structure that stores chunks of data in
-its own storage, avoiding duplicates.
-
-Created September 22, 2007 Vasil Dimov
-*******************************************************/
-
-#include "univ.i"
-#include "ha0storage.h"
-#include "hash0hash.h"
-#include "mem0mem.h"
-#include "ut0rnd.h"
-
-#ifdef UNIV_NONINL
-#include "ha0storage.ic"
-#endif
-
-/*******************************************************************//**
-Retrieves a data from a storage. If it is present, a pointer to the
-stored copy of data is returned, otherwise NULL is returned. */
-static
-const void*
-ha_storage_get(
-/*===========*/
- ha_storage_t* storage, /*!< in: hash storage */
- const void* data, /*!< in: data to check for */
- ulint data_len) /*!< in: data length */
-{
- ha_storage_node_t* node;
- ulint fold;
-
- /* avoid repetitive calls to ut_fold_binary() in the HASH_SEARCH
- macro */
- fold = ut_fold_binary(static_cast<const byte*>(data), data_len);
-
-#define IS_FOUND \
- node->data_len == data_len && memcmp(node->data, data, data_len) == 0
-
- HASH_SEARCH(
- next, /* node->"next" */
- storage->hash, /* the hash table */
- fold, /* key */
- ha_storage_node_t*, /* type of node->next */
- node, /* auxiliary variable */
- , /* assertion */
- IS_FOUND); /* search criteria */
-
- if (node == NULL) {
-
- return(NULL);
- }
- /* else */
-
- return(node->data);
-}
-
-/*******************************************************************//**
-Copies data into the storage and returns a pointer to the copy. If the
-same data chunk is already present, then pointer to it is returned.
-Data chunks are considered to be equal if len1 == len2 and
-memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
-data_len bytes need to be allocated) and the size of storage is going to
-become more than "memlim" then "data" is not added and NULL is returned.
-To disable this behavior "memlim" can be set to 0, which stands for
-"no limit". */
-UNIV_INTERN
-const void*
-ha_storage_put_memlim(
-/*==================*/
- ha_storage_t* storage, /*!< in/out: hash storage */
- const void* data, /*!< in: data to store */
- ulint data_len, /*!< in: data length */
- ulint memlim) /*!< in: memory limit to obey */
-{
- void* raw;
- ha_storage_node_t* node;
- const void* data_copy;
- ulint fold;
-
- /* check if data chunk is already present */
- data_copy = ha_storage_get(storage, data, data_len);
- if (data_copy != NULL) {
-
- return(data_copy);
- }
-
- /* not present */
-
- /* check if we are allowed to allocate data_len bytes */
- if (memlim > 0
- && ha_storage_get_size(storage) + data_len > memlim) {
-
- return(NULL);
- }
-
- /* we put the auxiliary node struct and the data itself in one
- continuous block */
- raw = mem_heap_alloc(storage->heap,
- sizeof(ha_storage_node_t) + data_len);
-
- node = (ha_storage_node_t*) raw;
- data_copy = (byte*) raw + sizeof(*node);
-
- memcpy((byte*) raw + sizeof(*node), data, data_len);
-
- node->data_len = data_len;
- node->data = data_copy;
-
- /* avoid repetitive calls to ut_fold_binary() in the HASH_INSERT
- macro */
- fold = ut_fold_binary(static_cast<const byte*>(data), data_len);
-
- HASH_INSERT(
- ha_storage_node_t, /* type used in the hash chain */
- next, /* node->"next" */
- storage->hash, /* the hash table */
- fold, /* key */
- node); /* add this data to the hash */
-
- /* the output should not be changed because it will spoil the
- hash table */
- return(data_copy);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-void
-test_ha_storage()
-{
- ha_storage_t* storage;
- char buf[1024];
- int i;
- const void* stored[256];
- const void* p;
-
- storage = ha_storage_create(0, 0);
-
- for (i = 0; i < 256; i++) {
-
- memset(buf, i, sizeof(buf));
- stored[i] = ha_storage_put(storage, buf, sizeof(buf));
- }
-
- //ha_storage_empty(&storage);
-
- for (i = 255; i >= 0; i--) {
-
- memset(buf, i, sizeof(buf));
- p = ha_storage_put(storage, buf, sizeof(buf));
-
- if (p != stored[i]) {
-
- fprintf(stderr, "ha_storage_put() returned %p "
- "instead of %p, i=%d\n", p, stored[i], i);
- return;
- }
- }
-
- fprintf(stderr, "all ok\n");
-
- ha_storage_free(storage);
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/xtradb/ha/hash0hash.cc b/storage/xtradb/ha/hash0hash.cc
deleted file mode 100644
index 6f5b98e5e98..00000000000
--- a/storage/xtradb/ha/hash0hash.cc
+++ /dev/null
@@ -1,403 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file ha/hash0hash.cc
-The simple hash table utility
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "hash0hash.h"
-#ifdef UNIV_NONINL
-#include "hash0hash.ic"
-#endif
-
-#include "mem0mem.h"
-
-#ifndef UNIV_HOTBACKUP
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t hash_table_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
-
-# ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t hash_table_rw_lock_key;
-# endif /* UNIV_PFS_RWLOCK */
-/************************************************************//**
-Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_enter(
-/*=============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- mutex_enter(hash_get_mutex(table, fold));
-}
-
-/************************************************************//**
-Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit(
-/*============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- mutex_exit(hash_get_mutex(table, fold));
-}
-
-/************************************************************//**
-Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_mutex_enter_all(
-/*=================*/
- hash_table_t* table) /*!< in: hash table */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- mutex_enter(table->sync_obj.mutexes + i);
- }
-}
-
-/************************************************************//**
-Releases all the mutexes of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all(
-/*================*/
- hash_table_t* table) /*!< in: hash table */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- mutex_exit(table->sync_obj.mutexes + i);
- }
-}
-
-/************************************************************//**
-Releases all but the passed in mutex of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all_but(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ib_prio_mutex_t* keep_mutex) /*!< in: mutex to keep */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- ib_prio_mutex_t* mutex = table->sync_obj.mutexes + i;
- if (UNIV_LIKELY(keep_mutex != mutex)) {
- mutex_exit(mutex);
- }
- }
-
- ut_ad(mutex_own(keep_mutex));
-}
-
-/************************************************************//**
-s-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_lock_s(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
-
- prio_rw_lock_t* lock = hash_get_lock(table, fold);
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(lock);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_lock(lock);
-}
-
-/************************************************************//**
-x-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_lock_x(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
-
- prio_rw_lock_t* lock = hash_get_lock(table, fold);
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(lock);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_x_lock(lock);
-}
-
-/************************************************************//**
-unlock an s-lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_unlock_s(
-/*==========*/
-
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
-
- prio_rw_lock_t* lock = hash_get_lock(table, fold);
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(lock);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_s_unlock(lock);
-}
-
-/************************************************************//**
-unlock x-lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_unlock_x(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- prio_rw_lock_t* lock = hash_get_lock(table, fold);
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(lock);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_x_unlock(lock);
-}
-
-/************************************************************//**
-Reserves all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_lock_x_all(
-/*============*/
- hash_table_t* table) /*!< in: hash table */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- prio_rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_x_lock(lock);
- }
-}
-
-/************************************************************//**
-Releases all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_unlock_x_all(
-/*==============*/
- hash_table_t* table) /*!< in: hash table */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- prio_rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_x_unlock(lock);
- }
-}
-
-/************************************************************//**
-Releases all but passed in lock of a hash table, */
-UNIV_INTERN
-void
-hash_unlock_x_all_but(
-/*==================*/
- hash_table_t* table, /*!< in: hash table */
- prio_rw_lock_t* keep_lock) /*!< in: lock to keep */
-{
- ulint i;
-
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- for (i = 0; i < table->n_sync_obj; i++) {
-
- prio_rw_lock_t* lock = table->sync_obj.rw_locks + i;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (UNIV_LIKELY(keep_lock != lock)) {
- rw_lock_x_unlock(lock);
- }
- }
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Creates a hash table with >= n array cells. The actual number of cells is
-chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
-hash_table_t*
-hash_create(
-/*========*/
- ulint n) /*!< in: number of array cells */
-{
- hash_cell_t* array;
- ulint prime;
- hash_table_t* table;
-
- prime = ut_find_prime(n);
-
- table = static_cast<hash_table_t*>(mem_alloc(sizeof(hash_table_t)));
-
- array = static_cast<hash_cell_t*>(
- ut_malloc(sizeof(hash_cell_t) * prime));
-
- /* The default type of hash_table is HASH_TABLE_SYNC_NONE i.e.:
- the caller is responsible for access control to the table. */
- table->type = HASH_TABLE_SYNC_NONE;
- table->array = array;
- table->n_cells = prime;
-#ifndef UNIV_HOTBACKUP
-# if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- table->adaptive = FALSE;
-# endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- table->n_sync_obj = 0;
- table->sync_obj.mutexes = NULL;
- table->heaps = NULL;
-#endif /* !UNIV_HOTBACKUP */
- table->heap = NULL;
- ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
-
- /* Initialize the cell array */
- hash_table_clear(table);
-
- return(table);
-}
-
-/*************************************************************//**
-Frees a hash table. */
-UNIV_INTERN
-void
-hash_table_free(
-/*============*/
- hash_table_t* table) /*!< in, own: hash table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-
- ut_free(table->array);
- mem_free(table);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Creates a sync object array to protect a hash table.
-::sync_obj can be mutexes or rw_locks depening on the type of
-hash table. */
-UNIV_INTERN
-void
-hash_create_sync_obj_func(
-/*======================*/
- hash_table_t* table, /*!< in: hash table */
- enum hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX
- or HASH_TABLE_SYNC_RW_LOCK */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level,/*!< in: latching order level
- of the mutexes: used in the
- debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_sync_obj)/*!< in: number of sync objects,
- must be a power of 2 */
-{
- ulint i;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_a(n_sync_obj > 0);
- ut_a(ut_is_2pow(n_sync_obj));
-
- table->type = type;
-
- switch (type) {
- case HASH_TABLE_SYNC_MUTEX:
- table->sync_obj.mutexes = static_cast<ib_prio_mutex_t*>(
- mem_alloc(n_sync_obj * sizeof(ib_prio_mutex_t)));
-
- for (i = 0; i < n_sync_obj; i++) {
- mutex_create(hash_table_mutex_key,
- table->sync_obj.mutexes + i, sync_level);
- }
-
- break;
-
- case HASH_TABLE_SYNC_RW_LOCK:
- table->sync_obj.rw_locks = static_cast<prio_rw_lock_t*>(
- mem_alloc(n_sync_obj * sizeof(prio_rw_lock_t)));
-
- for (i = 0; i < n_sync_obj; i++) {
- rw_lock_create(hash_table_rw_lock_key,
- table->sync_obj.rw_locks + i, sync_level);
- }
-
- break;
-
- case HASH_TABLE_SYNC_NONE:
- ut_error;
- }
-
- table->n_sync_obj = n_sync_obj;
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/ha_innodb.def b/storage/xtradb/ha_innodb.def
deleted file mode 100644
index e0faa62deb1..00000000000
--- a/storage/xtradb/ha_innodb.def
+++ /dev/null
@@ -1,4 +0,0 @@
-EXPORTS
- _mysql_plugin_interface_version_
- _mysql_sizeof_struct_st_plugin_
- _mysql_plugin_declarations_
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
deleted file mode 100644
index 6097f87b43d..00000000000
--- a/storage/xtradb/handler/ha_innodb.cc
+++ /dev/null
@@ -1,22318 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-Copyright (c) 2008, 2009 Google Inc.
-Copyright (c) 2009, Percona Inc.
-Copyright (c) 2012, Facebook Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-#define MYSQL_SERVER
-
-#include <sql_table.h> // explain_filename, nz2, EXPLAIN_PARTITIONS_AS_COMMENT,
- // EXPLAIN_FILENAME_MAX_EXTRA_LENGTH
-
-#include <sql_acl.h> // PROCESS_ACL
-#include <debug_sync.h> // DEBUG_SYNC
-#include <my_base.h> // HA_OPTION_*
-#include <mysys_err.h>
-#include <innodb_priv.h>
-#include <table_cache.h>
-#include <my_check_opt.h>
-
-#ifdef _WIN32
-#include <io.h>
-#endif
-
-#include <my_systemd.h>
-
-/** @file ha_innodb.cc */
-
-/* Include necessary InnoDB headers */
-#include "univ.i"
-#include "buf0dump.h"
-#include "buf0lru.h"
-#include "buf0flu.h"
-#include "buf0dblwr.h"
-#include "btr0sea.h"
-#include "btr0defragment.h"
-#include "os0file.h"
-#include "os0thread.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "trx0roll.h"
-#include "trx0trx.h"
-#include "trx0sys.h"
-#include "rem0types.h"
-#include "row0ins.h"
-#include "row0mysql.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "log0log.h"
-#include "log0online.h"
-#include "lock0lock.h"
-#include "dict0crea.h"
-#include "btr0cur.h"
-#include "btr0btr.h"
-#include "fsp0fsp.h"
-#include "sync0sync.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#include "trx0xa.h"
-#include "row0merge.h"
-#include "dict0boot.h"
-#include "dict0stats.h"
-#include "dict0stats_bg.h"
-#include "ha_prototypes.h"
-#include "ut0mem.h"
-#include "ut0timer.h"
-#include "ibuf0ibuf.h"
-#include "dict0dict.h"
-#include "srv0mon.h"
-#include "api0api.h"
-#include "api0misc.h"
-#include "pars0pars.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "row0import.h"
-#include "row0quiesce.h"
-#include "row0mysql.h"
-#ifdef UNIV_DEBUG
-#include "trx0purge.h"
-#endif /* UNIV_DEBUG */
-#include "fts0priv.h"
-#include "page0zip.h"
-#include "fil0pagecompress.h"
-
-#define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
-
-#ifdef MYSQL_DYNAMIC_PLUGIN
-#define tc_size 400
-#define tdc_size 400
-#endif
-
-#include "ha_innodb.h"
-#include "i_s.h"
-#include "xtradb_i_s.h"
-
-#include <string>
-#include <sstream>
-
-#include <mysql/plugin.h>
-#include <mysql/service_wsrep.h>
-
-# ifndef MYSQL_PLUGIN_IMPORT
-# define MYSQL_PLUGIN_IMPORT /* nothing */
-# endif /* MYSQL_PLUGIN_IMPORT */
-
-#ifdef WITH_WSREP
-#include "dict0priv.h"
-#include "../storage/innobase/include/ut0byte.h"
-#include <mysql/service_md5.h>
-
-class binlog_trx_data;
-extern handlerton *binlog_hton;
-
-extern MYSQL_PLUGIN_IMPORT mysql_mutex_t LOCK_wsrep_rollback;
-extern MYSQL_PLUGIN_IMPORT mysql_cond_t COND_wsrep_rollback;
-extern MYSQL_PLUGIN_IMPORT wsrep_aborting_thd_t wsrep_aborting_thd;
-
-static inline wsrep_ws_handle_t*
-wsrep_ws_handle(THD* thd, const trx_t* trx) {
- return wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd),
- (wsrep_trx_id_t)trx->id);
-}
-
-extern TC_LOG* tc_log;
-extern void wsrep_cleanup_transaction(THD *thd);
-static int
-wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
- my_bool signal);
-static void
-wsrep_fake_trx_id(handlerton* hton, THD *thd);
-static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid);
-static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid);
-#endif /* WITH_WSREP */
-
-/** to protect innobase_open_files */
-static mysql_mutex_t innobase_share_mutex;
-/** to force correct commit order in binlog */
-static ulong commit_threads = 0;
-static mysql_cond_t commit_cond;
-static mysql_mutex_t commit_cond_m;
-static mysql_mutex_t pending_checkpoint_mutex;
-static bool innodb_inited = 0;
-
-#define INSIDE_HA_INNOBASE_CC
-
-#define EQ_CURRENT_THD(thd) ((thd) == current_thd)
-
-static struct handlerton* innodb_hton_ptr;
-
-static const long AUTOINC_OLD_STYLE_LOCKING = 0;
-static const long AUTOINC_NEW_STYLE_LOCKING = 1;
-static const long AUTOINC_NO_LOCKING = 2;
-
-static long innobase_mirrored_log_groups;
-static long innobase_log_buffer_size;
-static long innobase_additional_mem_pool_size;
-static long innobase_file_io_threads;
-static long innobase_open_files;
-static long innobase_autoinc_lock_mode;
-static ulong innobase_commit_concurrency = 0;
-static ulong innobase_read_io_threads;
-static ulong innobase_write_io_threads;
-static long innobase_buffer_pool_instances = 1;
-
-static ulong innobase_log_block_size;
-
-static long long innobase_buffer_pool_size, innobase_log_file_size;
-/** Deprecated option that has no effect. */
-static my_bool innodb_buffer_pool_populate;
-
-/** Percentage of the buffer pool to reserve for 'old' blocks.
-Connected to buf_LRU_old_ratio. */
-static uint innobase_old_blocks_pct;
-
-/** Maximum on-disk size of change buffer in terms of percentage
-of the buffer pool. */
-static uint innobase_change_buffer_max_size = CHANGE_BUFFER_DEFAULT_SIZE;
-
-/* The default values for the following char* start-up parameters
-are determined in innobase_init below: */
-
-static char* innobase_data_home_dir = NULL;
-static char* innobase_data_file_path = NULL;
-static char* innobase_file_format_name = NULL;
-static char* innobase_change_buffering = NULL;
-static char* innobase_enable_monitor_counter = NULL;
-static char* innobase_disable_monitor_counter = NULL;
-static char* innobase_reset_monitor_counter = NULL;
-static char* innobase_reset_all_monitor_counter = NULL;
-
-/* The highest file format being used in the database. The value can be
-set by user, however, it will be adjusted to the newer file format if
-a table of such format is created/opened. */
-static char* innobase_file_format_max = NULL;
-
-/** Default value of innodb_file_format */
-static const char* innodb_file_format_default = "Barracuda";
-/** Default value of innodb_file_format_max */
-static const char* innodb_file_format_max_default = "Antelope";
-
-static char* innobase_file_flush_method = NULL;
-
-/* This variable can be set in the server configure file, specifying
-stopword table to be used */
-static char* innobase_server_stopword_table = NULL;
-
-/* Below we have boolean-valued start-up parameters, and their default
-values */
-
-static ulong innobase_fast_shutdown = 1;
-static my_bool innobase_file_format_check = TRUE;
-#ifdef UNIV_LOG_ARCHIVE
-static my_bool innobase_log_archive = FALSE;
-static char* innobase_log_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
-static my_bool innobase_use_atomic_writes = FALSE;
-static my_bool innobase_use_fallocate = TRUE;
-static my_bool innobase_use_doublewrite = TRUE;
-static my_bool innobase_use_checksums = TRUE;
-static my_bool innobase_locks_unsafe_for_binlog = FALSE;
-static my_bool innobase_rollback_on_timeout = FALSE;
-static my_bool innobase_create_status_file = FALSE;
-static my_bool innobase_stats_on_metadata = TRUE;
-static my_bool innobase_large_prefix = FALSE;
-static my_bool innodb_optimize_fulltext_only = FALSE;
-
-static char* internal_innobase_data_file_path = NULL;
-
-static char* innodb_version_str = (char*) INNODB_VERSION_STR;
-
-extern uint srv_fil_crypt_rotate_key_age;
-extern uint srv_n_fil_crypt_iops;
-
-extern my_bool srv_immediate_scrub_data_uncompressed;
-extern my_bool srv_background_scrub_data_uncompressed;
-extern my_bool srv_background_scrub_data_compressed;
-extern uint srv_background_scrub_data_interval;
-extern uint srv_background_scrub_data_check_interval;
-#ifdef UNIV_DEBUG
-extern my_bool srv_scrub_force_testing;
-#endif
-
-/** Possible values for system variable "innodb_stats_method". The values
-are defined the same as its corresponding MyISAM system variable
-"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
-static const char* innodb_stats_method_names[] = {
- "nulls_equal",
- "nulls_unequal",
- "nulls_ignored",
- NullS
-};
-
-/** Used to define an enumerate type of the system variable innodb_stats_method.
-This is the same as "myisam_stats_method_typelib" */
-static TYPELIB innodb_stats_method_typelib = {
- array_elements(innodb_stats_method_names) - 1,
- "innodb_stats_method_typelib",
- innodb_stats_method_names,
- NULL
-};
-
-/** Possible values for system variables "innodb_checksum_algorithm" and
-"innodb_log_checksum_algorithm". */
-UNIV_INTERN
-const char* innodb_checksum_algorithm_names[] = {
- "CRC32",
- "STRICT_CRC32",
- "INNODB",
- "STRICT_INNODB",
- "NONE",
- "STRICT_NONE",
- NullS
-};
-
-/** Used to define an enumerate type of the system variables
-innodb_checksum_algorithm and innodb_log_checksum_algorithm. */
-UNIV_INTERN
-TYPELIB innodb_checksum_algorithm_typelib = {
- array_elements(innodb_checksum_algorithm_names) - 1,
- "innodb_checksum_algorithm_typelib",
- innodb_checksum_algorithm_names,
- NULL
-};
-
-/** Possible values for system variable "innodb_cleaner_lsn_age_factor". */
-static const char* innodb_cleaner_lsn_age_factor_names[] = {
- "LEGACY",
- "HIGH_CHECKPOINT",
- NullS
-};
-
-/** Enumeration for innodb_cleaner_lsn_age_factor. */
-static TYPELIB innodb_cleaner_lsn_age_factor_typelib = {
- array_elements(innodb_cleaner_lsn_age_factor_names) - 1,
- "innodb_cleaner_lsn_age_factor_typelib",
- innodb_cleaner_lsn_age_factor_names,
- NULL
-};
-
-/** Possible values for system variable "innodb_foreground_preflush". */
-static const char* innodb_foreground_preflush_names[] = {
- "SYNC_PREFLUSH",
- "EXPONENTIAL_BACKOFF",
- NullS
-};
-
-/* Enumeration for innodb_foreground_preflush. */
-static TYPELIB innodb_foreground_preflush_typelib = {
- array_elements(innodb_foreground_preflush_names) - 1,
- "innodb_foreground_preflush_typelib",
- innodb_foreground_preflush_names,
- NULL
-};
-
-/** Possible values for system variable "innodb_empty_free_list_algorithm". */
-static const char* innodb_empty_free_list_algorithm_names[] = {
- "LEGACY",
- "BACKOFF",
- NullS
-};
-
-/** Enumeration for innodb_empty_free_list_algorithm. */
-static TYPELIB innodb_empty_free_list_algorithm_typelib = {
- array_elements(innodb_empty_free_list_algorithm_names) - 1,
- "innodb_empty_free_list_algorithm_typelib",
- innodb_empty_free_list_algorithm_names,
- NULL
-};
-
-/** Possible values of the parameter innodb_lock_schedule_algorithm */
-static const char* innodb_lock_schedule_algorithm_names[] = {
- "fcfs",
- "vats",
- NullS
-};
-
-/** Used to define an enumerate type of the system variable
-innodb_lock_schedule_algorithm. */
-static TYPELIB innodb_lock_schedule_algorithm_typelib = {
- array_elements(innodb_lock_schedule_algorithm_names) - 1,
- "innodb_lock_schedule_algorithm_typelib",
- innodb_lock_schedule_algorithm_names,
- NULL
-};
-
-
-/* The following counter is used to convey information to InnoDB
-about server activity: in case of normal DML ops it is not
-sensible to call srv_active_wake_master_thread after each
-operation, we only do it every INNOBASE_WAKE_INTERVAL'th step. */
-
-#define INNOBASE_WAKE_INTERVAL 32
-static ulong innobase_active_counter = 0;
-
-static hash_table_t* innobase_open_tables;
-
-/** Allowed values of innodb_change_buffering */
-static const char* innobase_change_buffering_values[IBUF_USE_COUNT] = {
- "none", /* IBUF_USE_NONE */
- "inserts", /* IBUF_USE_INSERT */
- "deletes", /* IBUF_USE_DELETE_MARK */
- "changes", /* IBUF_USE_INSERT_DELETE_MARK */
- "purges", /* IBUF_USE_DELETE */
- "all" /* IBUF_USE_ALL */
-};
-
-/* Call back function array defined by MySQL and used to
-retrieve FTS results. */
-const struct _ft_vft ft_vft_result = {NULL,
- innobase_fts_find_ranking,
- innobase_fts_close_ranking,
- innobase_fts_retrieve_ranking,
- NULL};
-
-const struct _ft_vft_ext ft_vft_ext_result = {innobase_fts_get_version,
- innobase_fts_flags,
- innobase_fts_retrieve_docid,
- innobase_fts_count_matches};
-
-#ifdef HAVE_PSI_INTERFACE
-/* Keys to register pthread mutexes/cond in the current file with
-performance schema */
-static mysql_pfs_key_t innobase_share_mutex_key;
-static mysql_pfs_key_t commit_cond_mutex_key;
-static mysql_pfs_key_t commit_cond_key;
-static mysql_pfs_key_t pending_checkpoint_mutex_key;
-
-static PSI_mutex_info all_pthread_mutexes[] = {
- {&commit_cond_mutex_key, "commit_cond_mutex", 0},
- {&innobase_share_mutex_key, "innobase_share_mutex", 0},
- {&pending_checkpoint_mutex_key, "pending_checkpoint_mutex", 0}
-};
-
-static PSI_cond_info all_innodb_conds[] = {
- {&commit_cond_key, "commit_cond", 0}
-};
-
-# ifdef UNIV_PFS_MUTEX
-/* all_innodb_mutexes array contains mutexes that are
-performance schema instrumented if "UNIV_PFS_MUTEX"
-is defined */
-static PSI_mutex_info all_innodb_mutexes[] = {
- {&autoinc_mutex_key, "autoinc_mutex", 0},
-# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
- {&buffer_block_mutex_key, "buffer_block_mutex", 0},
-# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
- {&buf_pool_zip_mutex_key, "buf_pool_zip_mutex", 0},
- {&buf_pool_LRU_list_mutex_key, "buf_pool_LRU_list_mutex", 0},
- {&buf_pool_free_list_mutex_key, "buf_pool_free_list_mutex", 0},
- {&buf_pool_zip_free_mutex_key, "buf_pool_zip_free_mutex", 0},
- {&buf_pool_zip_hash_mutex_key, "buf_pool_zip_hash_mutex", 0},
- {&buf_pool_flush_state_mutex_key, "buf_pool_flush_state_mutex", 0},
- {&cache_last_read_mutex_key, "cache_last_read_mutex", 0},
- {&dict_foreign_err_mutex_key, "dict_foreign_err_mutex", 0},
- {&dict_sys_mutex_key, "dict_sys_mutex", 0},
- {&file_format_max_mutex_key, "file_format_max_mutex", 0},
- {&fil_system_mutex_key, "fil_system_mutex", 0},
- {&flush_list_mutex_key, "flush_list_mutex", 0},
- {&fts_bg_threads_mutex_key, "fts_bg_threads_mutex", 0},
- {&fts_delete_mutex_key, "fts_delete_mutex", 0},
- {&fts_optimize_mutex_key, "fts_optimize_mutex", 0},
- {&fts_doc_id_mutex_key, "fts_doc_id_mutex", 0},
- {&fts_pll_tokenize_mutex_key, "fts_pll_tokenize_mutex", 0},
- {&log_flush_order_mutex_key, "log_flush_order_mutex", 0},
- {&hash_table_mutex_key, "hash_table_mutex", 0},
- {&ibuf_bitmap_mutex_key, "ibuf_bitmap_mutex", 0},
- {&ibuf_mutex_key, "ibuf_mutex", 0},
- {&ibuf_pessimistic_insert_mutex_key,
- "ibuf_pessimistic_insert_mutex", 0},
-# ifndef HAVE_ATOMIC_BUILTINS
- {&server_mutex_key, "server_mutex", 0},
-# endif /* !HAVE_ATOMIC_BUILTINS */
- {&log_bmp_sys_mutex_key, "log_bmp_sys_mutex", 0},
- {&log_sys_mutex_key, "log_sys_mutex", 0},
-# ifdef UNIV_MEM_DEBUG
- {&mem_hash_mutex_key, "mem_hash_mutex", 0},
-# endif /* UNIV_MEM_DEBUG */
- {&mem_pool_mutex_key, "mem_pool_mutex", 0},
- {&mutex_list_mutex_key, "mutex_list_mutex", 0},
- {&page_zip_stat_per_index_mutex_key, "page_zip_stat_per_index_mutex", 0},
- {&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
- {&recv_sys_mutex_key, "recv_sys_mutex", 0},
- {&recv_writer_mutex_key, "recv_writer_mutex", 0},
- {&rseg_mutex_key, "rseg_mutex", 0},
-# ifdef UNIV_SYNC_DEBUG
- {&rw_lock_debug_mutex_key, "rw_lock_debug_mutex", 0},
-# endif /* UNIV_SYNC_DEBUG */
- {&rw_lock_list_mutex_key, "rw_lock_list_mutex", 0},
- {&rw_lock_mutex_key, "rw_lock_mutex", 0},
- {&srv_dict_tmpfile_mutex_key, "srv_dict_tmpfile_mutex", 0},
- {&srv_innodb_monitor_mutex_key, "srv_innodb_monitor_mutex", 0},
- {&srv_misc_tmpfile_mutex_key, "srv_misc_tmpfile_mutex", 0},
- {&srv_monitor_file_mutex_key, "srv_monitor_file_mutex", 0},
-# ifdef UNIV_SYNC_DEBUG
- {&sync_thread_mutex_key, "sync_thread_mutex", 0},
-# endif /* UNIV_SYNC_DEBUG */
- {&buf_dblwr_mutex_key, "buf_dblwr_mutex", 0},
- {&trx_undo_mutex_key, "trx_undo_mutex", 0},
- {&srv_sys_mutex_key, "srv_sys_mutex", 0},
- {&lock_sys_mutex_key, "lock_mutex", 0},
- {&lock_sys_wait_mutex_key, "lock_wait_mutex", 0},
- {&trx_mutex_key, "trx_mutex", 0},
- {&srv_sys_tasks_mutex_key, "srv_threads_mutex", 0},
- /* mutex with os_fast_mutex_ interfaces */
-# ifndef PFS_SKIP_EVENT_MUTEX
- {&event_os_mutex_key, "event_os_mutex", 0},
-# endif /* PFS_SKIP_EVENT_MUTEX */
- {&os_mutex_key, "os_mutex", 0},
-#ifndef HAVE_ATOMIC_BUILTINS
- {&srv_conc_mutex_key, "srv_conc_mutex", 0},
-#endif /* !HAVE_ATOMIC_BUILTINS */
-#ifndef HAVE_ATOMIC_BUILTINS_64
- {&monitor_mutex_key, "monitor_mutex", 0},
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
- {&ut_list_mutex_key, "ut_list_mutex", 0},
- {&trx_sys_mutex_key, "trx_sys_mutex", 0},
- {&zip_pad_mutex_key, "zip_pad_mutex", 0},
-};
-# endif /* UNIV_PFS_MUTEX */
-
-# ifdef UNIV_PFS_RWLOCK
-/* all_innodb_rwlocks array contains rwlocks that are
-performance schema instrumented if "UNIV_PFS_RWLOCK"
-is defined */
-static PSI_rwlock_info all_innodb_rwlocks[] = {
-# ifdef UNIV_LOG_ARCHIVE
- {&archive_lock_key, "archive_lock", 0},
-# endif /* UNIV_LOG_ARCHIVE */
- {&btr_search_latch_key, "btr_search_latch", 0},
-# ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
- {&buf_block_lock_key, "buf_block_lock", 0},
-# endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
-# ifdef UNIV_SYNC_DEBUG
- {&buf_block_debug_latch_key, "buf_block_debug_latch", 0},
-# endif /* UNIV_SYNC_DEBUG */
- {&dict_operation_lock_key, "dict_operation_lock", 0},
- {&fil_space_latch_key, "fil_space_latch", 0},
- {&checkpoint_lock_key, "checkpoint_lock", 0},
- {&fts_cache_rw_lock_key, "fts_cache_rw_lock", 0},
- {&fts_cache_init_rw_lock_key, "fts_cache_init_rw_lock", 0},
- {&trx_i_s_cache_lock_key, "trx_i_s_cache_lock", 0},
- {&trx_purge_latch_key, "trx_purge_latch", 0},
- {&index_tree_rw_lock_key, "index_tree_rw_lock", 0},
- {&index_online_log_key, "index_online_log", 0},
- {&dict_table_stats_key, "dict_table_stats", 0},
- {&hash_table_rw_lock_key, "hash_table_locks", 0}
-};
-# endif /* UNIV_PFS_RWLOCK */
-
-# ifdef UNIV_PFS_THREAD
-/* all_innodb_threads array contains threads that are
-performance schema instrumented if "UNIV_PFS_THREAD"
-is defined */
-static PSI_thread_info all_innodb_threads[] = {
- {&trx_rollback_clean_thread_key, "trx_rollback_clean_thread", 0},
- {&io_handler_thread_key, "io_handler_thread", 0},
- {&srv_lock_timeout_thread_key, "srv_lock_timeout_thread", 0},
- {&srv_error_monitor_thread_key, "srv_error_monitor_thread", 0},
- {&srv_monitor_thread_key, "srv_monitor_thread", 0},
- {&srv_master_thread_key, "srv_master_thread", 0},
- {&srv_purge_thread_key, "srv_purge_thread", 0},
- {&buf_page_cleaner_thread_key, "page_cleaner_thread", 0},
- {&buf_lru_manager_thread_key, "lru_manager_thread", 0},
- {&recv_writer_thread_key, "recv_writer_thread", 0},
- {&srv_log_tracking_thread_key, "srv_redo_log_follow_thread", 0}
-};
-# endif /* UNIV_PFS_THREAD */
-
-# ifdef UNIV_PFS_IO
-/* all_innodb_files array contains the type of files that are
-performance schema instrumented if "UNIV_PFS_IO" is defined */
-static PSI_file_info all_innodb_files[] = {
- {&innodb_file_data_key, "innodb_data_file", 0},
- {&innodb_file_log_key, "innodb_log_file", 0},
- {&innodb_file_temp_key, "innodb_temp_file", 0},
- {&innodb_file_bmp_key, "innodb_bmp_file", 0}
-};
-# endif /* UNIV_PFS_IO */
-#endif /* HAVE_PSI_INTERFACE */
-
-/** Always normalize table name to lower case on Windows */
-#ifdef __WIN__
-#define normalize_table_name(norm_name, name) \
- normalize_table_name_low(norm_name, name, TRUE)
-#else
-#define normalize_table_name(norm_name, name) \
- normalize_table_name_low(norm_name, name, FALSE)
-#endif /* __WIN__ */
-
-/** Set up InnoDB API callback function array */
-ib_cb_t innodb_api_cb[] = {
- (ib_cb_t) ib_cursor_open_table,
- (ib_cb_t) ib_cursor_read_row,
- (ib_cb_t) ib_cursor_insert_row,
- (ib_cb_t) ib_cursor_delete_row,
- (ib_cb_t) ib_cursor_update_row,
- (ib_cb_t) ib_cursor_moveto,
- (ib_cb_t) ib_cursor_first,
- (ib_cb_t) ib_cursor_next,
- (ib_cb_t) ib_cursor_last,
- (ib_cb_t) ib_cursor_set_match_mode,
- (ib_cb_t) ib_sec_search_tuple_create,
- (ib_cb_t) ib_clust_read_tuple_create,
- (ib_cb_t) ib_tuple_delete,
- (ib_cb_t) ib_tuple_copy,
- (ib_cb_t) ib_tuple_read_u8,
- (ib_cb_t) ib_tuple_write_u8,
- (ib_cb_t) ib_tuple_read_u16,
- (ib_cb_t) ib_tuple_write_u16,
- (ib_cb_t) ib_tuple_read_u32,
- (ib_cb_t) ib_tuple_write_u32,
- (ib_cb_t) ib_tuple_read_u64,
- (ib_cb_t) ib_tuple_write_u64,
- (ib_cb_t) ib_tuple_read_i8,
- (ib_cb_t) ib_tuple_write_i8,
- (ib_cb_t) ib_tuple_read_i16,
- (ib_cb_t) ib_tuple_write_i16,
- (ib_cb_t) ib_tuple_read_i32,
- (ib_cb_t) ib_tuple_write_i32,
- (ib_cb_t) ib_tuple_read_i64,
- (ib_cb_t) ib_tuple_write_i64,
- (ib_cb_t) ib_tuple_get_n_cols,
- (ib_cb_t) ib_col_set_value,
- (ib_cb_t) ib_col_get_value,
- (ib_cb_t) ib_col_get_meta,
- (ib_cb_t) ib_trx_begin,
- (ib_cb_t) ib_trx_commit,
- (ib_cb_t) ib_trx_rollback,
- (ib_cb_t) ib_trx_start,
- (ib_cb_t) ib_trx_release,
- (ib_cb_t) ib_trx_state,
- (ib_cb_t) ib_cursor_lock,
- (ib_cb_t) ib_cursor_close,
- (ib_cb_t) ib_cursor_new_trx,
- (ib_cb_t) ib_cursor_reset,
- (ib_cb_t) ib_open_table_by_name,
- (ib_cb_t) ib_col_get_name,
- (ib_cb_t) ib_table_truncate,
- (ib_cb_t) ib_cursor_open_index_using_name,
- (ib_cb_t) ib_close_thd,
- (ib_cb_t) ib_cfg_get_cfg,
- (ib_cb_t) ib_cursor_set_memcached_sync,
- (ib_cb_t) ib_cursor_set_cluster_access,
- (ib_cb_t) ib_cursor_commit_trx,
- (ib_cb_t) ib_cfg_trx_level,
- (ib_cb_t) ib_tuple_get_n_user_cols,
- (ib_cb_t) ib_cursor_set_lock_mode,
- (ib_cb_t) ib_cursor_clear_trx,
- (ib_cb_t) ib_get_idx_field_name,
- (ib_cb_t) ib_trx_get_start_time,
- (ib_cb_t) ib_cfg_bk_commit_interval,
- (ib_cb_t) ib_cursor_stmt_begin,
- (ib_cb_t) ib_trx_read_only
-};
-
-static void innodb_remember_check_sysvar_funcs();
-mysql_var_check_func check_sysvar_enum;
-
-static MYSQL_THDVAR_UINT(default_encryption_key_id, PLUGIN_VAR_RQCMDARG,
- "Default encryption key id used for table encryption.",
- NULL, NULL,
- FIL_DEFAULT_ENCRYPTION_KEY, 1, UINT_MAX32, 0);
-
-/**
- Structure for CREATE TABLE options (table options).
- It needs to be called ha_table_option_struct.
-
- The option values can be specified in the CREATE TABLE at the end:
- CREATE TABLE ( ... ) *here*
-*/
-
-ha_create_table_option innodb_table_option_list[]=
-{
- /* With this option user can enable page compression feature for the
- table */
- HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0),
- /* With this option user can set zip compression level for page
- compression for this table*/
- HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, 0, 1, 9, 1),
- /* With this option user can enable atomic writes feature for this table */
- HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0),
- /* With this option the user can enable encryption for the table */
- HA_TOPTION_ENUM("ENCRYPTED", encryption, "DEFAULT,YES,NO", 0),
- /* With this option the user defines the key identifier using for the encryption */
- HA_TOPTION_SYSVAR("ENCRYPTION_KEY_ID", encryption_key_id, default_encryption_key_id),
-
- HA_TOPTION_END
-};
-
-/**
- Test a file path whether it is same as mysql data directory path.
-
- @param path null terminated character string
-
- @return
- @retval TRUE The path is different from mysql data directory.
- @retval FALSE The path is same as mysql data directory.
-*/
-static bool is_mysql_datadir_path(const char *path)
-{
- if (path == NULL)
- return false;
-
- char mysql_data_dir[FN_REFLEN], path_dir[FN_REFLEN];
- convert_dirname(path_dir, path, NullS);
- convert_dirname(mysql_data_dir, mysql_unpacked_real_data_home, NullS);
- size_t mysql_data_home_len= dirname_length(mysql_data_dir);
- size_t path_len = dirname_length(path_dir);
-
- if (path_len < mysql_data_home_len)
- return true;
-
- if (!lower_case_file_system)
- return(memcmp(mysql_data_dir, path_dir, mysql_data_home_len));
-
- return(files_charset_info->coll->strnncoll(files_charset_info,
- (uchar *) path_dir, path_len,
- (uchar *) mysql_data_dir,
- mysql_data_home_len,
- TRUE));
-}
-
-
-static int mysql_tmpfile_path(const char *path, const char *prefix)
-{
- DBUG_ASSERT(path != NULL);
- DBUG_ASSERT((strlen(path) + strlen(prefix)) <= FN_REFLEN);
-
- char filename[FN_REFLEN];
- File fd = create_temp_file(filename, path, prefix,
-#ifdef __WIN__
- O_BINARY | O_TRUNC | O_SEQUENTIAL |
- O_SHORT_LIVED |
-#endif /* __WIN__ */
- O_CREAT | O_EXCL | O_RDWR | O_TEMPORARY,
- MYF(MY_WME));
- if (fd >= 0) {
-#ifndef __WIN__
- /*
- This can be removed once the following bug is fixed:
- Bug #28903 create_temp_file() doesn't honor O_TEMPORARY option
- (file not removed) (Unix)
- */
- unlink(filename);
-#endif /* !__WIN__ */
- }
-
- return fd;
-}
-
-/*************************************************************//**
-Check whether valid argument given to innodb_ft_*_stopword_table.
-This function is registered as a callback with MySQL.
-@return 0 for valid stopword table */
-static
-int
-innodb_stopword_table_validate(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value); /*!< in: incoming string */
-
-/** Validate passed-in "value" is a valid directory name.
-This function is registered as a callback with MySQL.
-@param[in,out] thd thread handle
-@param[in] var pointer to system variable
-@param[out] save immediate result for update
-@param[in] value incoming string
-@return 0 for valid name */
-static
-int
-innodb_tmpdir_validate(
- THD* thd,
- struct st_mysql_sys_var* var,
- void* save,
- struct st_mysql_value* value)
-{
-
- char* alter_tmp_dir;
- char* innodb_tmp_dir;
- char buff[OS_FILE_MAX_PATH];
- int len = sizeof(buff);
- char tmp_abs_path[FN_REFLEN + 2];
-
- ut_ad(save != NULL);
- ut_ad(value != NULL);
-
- if (check_global_access(thd, FILE_ACL)) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: FILE Permissions required");
- *static_cast<const char**>(save) = NULL;
- return(1);
- }
-
- alter_tmp_dir = (char*) value->val_str(value, buff, &len);
-
- if (!alter_tmp_dir) {
- *static_cast<const char**>(save) = alter_tmp_dir;
- return(0);
- }
-
- if (strlen(alter_tmp_dir) > FN_REFLEN) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Path length should not exceed %d bytes", FN_REFLEN);
- *static_cast<const char**>(save) = NULL;
- return(1);
- }
-
- my_realpath(tmp_abs_path, alter_tmp_dir, 0);
- size_t tmp_abs_len = strlen(tmp_abs_path);
-
- if (my_access(tmp_abs_path, F_OK)) {
-
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: Path doesn't exist.");
- *static_cast<const char**>(save) = NULL;
- return(1);
- } else if (my_access(tmp_abs_path, R_OK | W_OK)) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: Server doesn't have permission in "
- "the given location.");
- *static_cast<const char**>(save) = NULL;
- return(1);
- }
-
- MY_STAT stat_info_dir;
-
- if (my_stat(tmp_abs_path, &stat_info_dir, MYF(0))) {
- if ((stat_info_dir.st_mode & S_IFDIR) != S_IFDIR) {
-
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Given path is not a directory. ");
- *static_cast<const char**>(save) = NULL;
- return(1);
- }
- }
-
- if (!is_mysql_datadir_path(tmp_abs_path)) {
-
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: Path Location should not be same as "
- "mysql data directory location.");
- *static_cast<const char**>(save) = NULL;
- return(1);
- }
-
- innodb_tmp_dir = static_cast<char*>(
- thd_memdup(thd, tmp_abs_path, tmp_abs_len + 1));
- *static_cast<const char**>(save) = innodb_tmp_dir;
- return(0);
-}
-
-/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
-system clustered index when there is no primary key. */
-const char innobase_index_reserve_name[] = "GEN_CLUST_INDEX";
-/************************************************************//**
-Synchronously read and parse the redo log up to the last
-checkpoint to write the changed page bitmap.
-@return 0 to indicate success. Current implementation cannot fail. */
-static
-my_bool
-innobase_flush_changed_page_bitmaps() __attribute__((unused));
-/*==================================*/
-/************************************************************//**
-Delete all the bitmap files for data less than the specified LSN.
-If called with lsn == 0 (i.e. set by RESET request) or
-IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise
-continue it.
-@return 0 to indicate success, 1 for failure. */
-static
-my_bool
-innobase_purge_changed_page_bitmaps(
-/*================================*/
- ulonglong lsn) __attribute__((unused)); /*!< in: LSN to purge files up to */
-
-/** Empty free list algorithm.
-Checks if buffer pool is big enough to enable backoff algorithm.
-InnoDB empty free list algorithm backoff requires free pages
-from LRU for the best performance.
-buf_LRU_buf_pool_running_out cancels query if 1/4 of
-buffer pool belongs to LRU or freelist.
-At the same time buf_flush_LRU_list_batch
-keeps up to BUF_LRU_MIN_LEN in LRU.
-In order to avoid deadlock baclkoff requires buffer pool
-to be at least 4*BUF_LRU_MIN_LEN,
-but flush peformance is bad because of trashing
-and additional BUF_LRU_MIN_LEN pages are requested.
-@param[in] algorithm desired algorithm from srv_empty_free_list_t
-@return true if it's possible to enable backoff. */
-static inline
-bool
-innodb_empty_free_list_algorithm_allowed(
- srv_empty_free_list_t algorithm)
-{
- long long buf_pool_pages = srv_buf_pool_size / srv_page_size
- / srv_buf_pool_instances;
-
- return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1)
- || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF);
-}
-
-/** Get the list of foreign keys referencing a specified table
-table.
-@param thd The thread handle
-@param path Path to the table
-@param f_key_list[out] The list of foreign keys
-
-@return error code or zero for success */
-static
-int
-innobase_get_parent_fk_list(
- THD* thd,
- const char* path,
- List<FOREIGN_KEY_INFO>* f_key_list) __attribute__((unused));
-
-/******************************************************************//**
-Maps a MySQL trx isolation level code to the InnoDB isolation level code
-@return InnoDB isolation level */
-static inline
-ulint
-innobase_map_isolation_level(
-/*=========================*/
- enum_tx_isolation iso); /*!< in: MySQL isolation level code */
-
-/* Enable / disable checkpoints */
-static int innobase_checkpoint_state(handlerton *hton, bool disable)
-{
- if (disable)
- (void) log_disable_checkpoint();
- else
- log_enable_checkpoint();
- return 0;
-}
-
-/*************************************************************//**
-Check for a valid value of innobase_compression_algorithm.
-@return 0 for valid innodb_compression_algorithm. */
-static
-int
-innodb_compression_algorithm_validate(
-/*==================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value); /*!< in: incoming string */
-
-static
-int
-innodb_encrypt_tables_validate(
-/*==================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value); /*!< in: incoming string */
-
-static const char innobase_hton_name[]= "InnoDB";
-
-static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB support for the XA two-phase commit",
- /* check_func */ NULL, /* update_func */ NULL,
- /* default */ TRUE);
-
-static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB locking in LOCK TABLES",
- /* check_func */ NULL, /* update_func */ NULL,
- /* default */ TRUE);
-
-static MYSQL_THDVAR_BOOL(strict_mode, PLUGIN_VAR_OPCMDARG,
- "Use strict mode when evaluating create options.",
- NULL, NULL, TRUE);
-
-static MYSQL_THDVAR_BOOL(ft_enable_stopword, PLUGIN_VAR_OPCMDARG,
- "Create FTS index with stopword.",
- NULL, NULL,
- /* default */ TRUE);
-
-static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG,
- "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back. Values above 100000000 disable the timeout.",
- NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
-
-static MYSQL_THDVAR_STR(ft_user_stopword_table,
- PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
- "User supplied stopword table name, effective in the session level.",
- innodb_stopword_table_validate, NULL, NULL);
-
-static MYSQL_THDVAR_ULONG(flush_log_at_trx_commit, PLUGIN_VAR_OPCMDARG,
- "Controls the durability/speed trade-off for commits."
- " Set to 0 (write and flush redo log to disk only once per second),"
- " 1 (flush to disk at each commit),"
- " 2 (write to log at commit but flush to disk only once per second)"
- " or 3 (flush to disk at prepare and at commit, slower and usually redundant)."
- " 1 and 3 guarantees that after a crash, committed transactions will"
- " not be lost and will be consistent with the binlog and other transactional"
- " engines. 2 can get inconsistent and lose transactions if there is a"
- " power failure or kernel crash but not if mysqld crashes. 0 has no"
- " guarantees in case of crash. 0 and 2 can be faster than 1 or 3.",
- NULL, NULL, 1, 0, 3, 0);
-
-static MYSQL_THDVAR_BOOL(fake_changes, PLUGIN_VAR_OPCMDARG,
- "In the transaction after enabled, UPDATE, INSERT and DELETE only move the cursor to the records "
- "and do nothing other operations (no changes, no ibuf, no undo, no transaction log) in the transaction. "
- "This is to cause replication prefetch IO. ATTENTION: the transaction started after enabled is affected.",
- NULL, NULL, FALSE);
-
-static MYSQL_THDVAR_STR(tmpdir,
- PLUGIN_VAR_OPCMDARG|PLUGIN_VAR_MEMALLOC,
- "Directory for temporary non-tablespace files.",
- innodb_tmpdir_validate, NULL, NULL);
-
-static ibool innodb_have_lzo=IF_LZO(1, 0);
-static ibool innodb_have_lz4=IF_LZ4(1, 0);
-static ibool innodb_have_lzma=IF_LZMA(1, 0);
-static ibool innodb_have_bzip2=IF_BZIP2(1, 0);
-static ibool innodb_have_snappy=IF_SNAPPY(1, 0);
-
-static SHOW_VAR innodb_status_variables[]= {
- {"available_undo_logs",
- (char*) &export_vars.innodb_available_undo_logs, SHOW_LONG},
- {"background_log_sync",
- (char*) &export_vars.innodb_background_log_sync, SHOW_LONG},
- {"buffer_pool_bytes_data",
- (char*) &export_vars.innodb_buffer_pool_bytes_data, SHOW_LONG},
- {"buffer_pool_bytes_dirty",
- (char*) &export_vars.innodb_buffer_pool_bytes_dirty, SHOW_LONG},
- {"buffer_pool_dump_status",
- (char*) &export_vars.innodb_buffer_pool_dump_status, SHOW_CHAR},
- {"buffer_pool_load_status",
- (char*) &export_vars.innodb_buffer_pool_load_status, SHOW_CHAR},
- {"buffer_pool_pages_data",
- (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
- {"buffer_pool_pages_dirty",
- (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
- {"buffer_pool_pages_flushed",
- (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
- {"buffer_pool_pages_free",
- (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG},
-#ifdef UNIV_DEBUG
- {"buffer_pool_pages_latched",
- (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG},
-#endif /* UNIV_DEBUG */
- {"buffer_pool_pages_LRU_flushed",
- (char*) &export_vars.innodb_buffer_pool_pages_LRU_flushed, SHOW_LONG},
- {"buffer_pool_pages_made_not_young",
- (char*) &export_vars.innodb_buffer_pool_pages_made_not_young, SHOW_LONG},
- {"buffer_pool_pages_made_young",
- (char*) &export_vars.innodb_buffer_pool_pages_made_young, SHOW_LONG},
- {"buffer_pool_pages_misc",
- (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG},
- {"buffer_pool_pages_old",
- (char*) &export_vars.innodb_buffer_pool_pages_old, SHOW_LONG},
- {"buffer_pool_pages_total",
- (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG},
- {"buffer_pool_read_ahead",
- (char*) &export_vars.innodb_buffer_pool_read_ahead, SHOW_LONG},
- {"buffer_pool_read_ahead_evicted",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_evicted, SHOW_LONG},
- {"buffer_pool_read_ahead_rnd",
- (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
- {"buffer_pool_read_requests",
- (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG},
- {"buffer_pool_reads",
- (char*) &export_vars.innodb_buffer_pool_reads, SHOW_LONG},
- {"buffer_pool_wait_free",
- (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG},
- {"buffer_pool_write_requests",
- (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
- {"checkpoint_age",
- (char*) &export_vars.innodb_checkpoint_age, SHOW_LONG},
- {"checkpoint_max_age",
- (char*) &export_vars.innodb_checkpoint_max_age, SHOW_LONG},
- {"data_fsyncs",
- (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG},
- {"data_pending_fsyncs",
- (char*) &export_vars.innodb_data_pending_fsyncs, SHOW_LONG},
- {"data_pending_reads",
- (char*) &export_vars.innodb_data_pending_reads, SHOW_LONG},
- {"data_pending_writes",
- (char*) &export_vars.innodb_data_pending_writes, SHOW_LONG},
- {"data_read",
- (char*) &export_vars.innodb_data_read, SHOW_LONG},
- {"data_reads",
- (char*) &export_vars.innodb_data_reads, SHOW_LONG},
- {"data_writes",
- (char*) &export_vars.innodb_data_writes, SHOW_LONG},
- {"data_written",
- (char*) &export_vars.innodb_data_written, SHOW_LONG},
- {"dblwr_pages_written",
- (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG},
- {"dblwr_writes",
- (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG},
- {"deadlocks",
- (char*) &export_vars.innodb_deadlocks, SHOW_LONG},
- {"have_atomic_builtins",
- (char*) &export_vars.innodb_have_atomic_builtins, SHOW_BOOL},
- {"history_list_length",
- (char*) &export_vars.innodb_history_list_length, SHOW_LONG},
- {"ibuf_discarded_delete_marks",
- (char*) &export_vars.innodb_ibuf_discarded_delete_marks, SHOW_LONG},
- {"ibuf_discarded_deletes",
- (char*) &export_vars.innodb_ibuf_discarded_deletes, SHOW_LONG},
- {"ibuf_discarded_inserts",
- (char*) &export_vars.innodb_ibuf_discarded_inserts, SHOW_LONG},
- {"ibuf_free_list",
- (char*) &export_vars.innodb_ibuf_free_list, SHOW_LONG},
- {"ibuf_merged_delete_marks",
- (char*) &export_vars.innodb_ibuf_merged_delete_marks, SHOW_LONG},
- {"ibuf_merged_deletes",
- (char*) &export_vars.innodb_ibuf_merged_deletes, SHOW_LONG},
- {"ibuf_merged_inserts",
- (char*) &export_vars.innodb_ibuf_merged_inserts, SHOW_LONG},
- {"ibuf_merges",
- (char*) &export_vars.innodb_ibuf_merges, SHOW_LONG},
- {"ibuf_segment_size",
- (char*) &export_vars.innodb_ibuf_segment_size, SHOW_LONG},
- {"ibuf_size",
- (char*) &export_vars.innodb_ibuf_size, SHOW_LONG},
- {"log_waits",
- (char*) &export_vars.innodb_log_waits, SHOW_LONG},
- {"log_write_requests",
- (char*) &export_vars.innodb_log_write_requests, SHOW_LONG},
- {"log_writes",
- (char*) &export_vars.innodb_log_writes, SHOW_LONG},
- {"lsn_current",
- (char*) &export_vars.innodb_lsn_current, SHOW_LONGLONG},
- {"lsn_flushed",
- (char*) &export_vars.innodb_lsn_flushed, SHOW_LONGLONG},
- {"lsn_last_checkpoint",
- (char*) &export_vars.innodb_lsn_last_checkpoint, SHOW_LONGLONG},
- {"master_thread_active_loops",
- (char*) &export_vars.innodb_master_thread_active_loops, SHOW_LONG},
- {"master_thread_idle_loops",
- (char*) &export_vars.innodb_master_thread_idle_loops, SHOW_LONG},
- {"max_trx_id",
- (char*) &export_vars.innodb_max_trx_id, SHOW_LONGLONG},
- {"mem_adaptive_hash",
- (char*) &export_vars.innodb_mem_adaptive_hash, SHOW_LONG},
- {"mem_dictionary",
- (char*) &export_vars.innodb_mem_dictionary, SHOW_LONG},
- {"mem_total",
- (char*) &export_vars.innodb_mem_total, SHOW_LONG},
- {"mutex_os_waits",
- (char*) &export_vars.innodb_mutex_os_waits, SHOW_LONGLONG},
- {"mutex_spin_rounds",
- (char*) &export_vars.innodb_mutex_spin_rounds, SHOW_LONGLONG},
- {"mutex_spin_waits",
- (char*) &export_vars.innodb_mutex_spin_waits, SHOW_LONGLONG},
- {"oldest_view_low_limit_trx_id",
- (char*) &export_vars.innodb_oldest_view_low_limit_trx_id, SHOW_LONGLONG},
- {"os_log_fsyncs",
- (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG},
- {"os_log_pending_fsyncs",
- (char*) &export_vars.innodb_os_log_pending_fsyncs, SHOW_LONG},
- {"os_log_pending_writes",
- (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG},
- {"os_log_written",
- (char*) &export_vars.innodb_os_log_written, SHOW_LONGLONG},
- {"page_size",
- (char*) &export_vars.innodb_page_size, SHOW_LONG},
- {"pages_created",
- (char*) &export_vars.innodb_pages_created, SHOW_LONG},
- {"pages_read",
- (char*) &export_vars.innodb_pages_read, SHOW_LONG},
- {"pages0_read",
- (char*) &export_vars.innodb_page0_read, SHOW_LONG},
- {"pages_written",
- (char*) &export_vars.innodb_pages_written, SHOW_LONG},
- {"purge_trx_id",
- (char*) &export_vars.innodb_purge_trx_id, SHOW_LONGLONG},
-#ifdef UNIV_DEBUG
- {"purge_trx_id_age",
- (char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG},
-#endif /* UNIV_DEBUG */
- {"purge_undo_no",
- (char*) &export_vars.innodb_purge_undo_no, SHOW_LONGLONG},
-#ifdef UNIV_DEBUG
- {"purge_view_trx_id_age",
- (char*) &export_vars.innodb_purge_view_trx_id_age, SHOW_LONG},
-#endif /* UNIV_DEBUG */
- {"read_views_memory",
- (char*) &export_vars.innodb_read_views_memory, SHOW_LONG},
- {"row_lock_current_waits",
- (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG},
- {"row_lock_time",
- (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG},
- {"row_lock_time_avg",
- (char*) &export_vars.innodb_row_lock_time_avg, SHOW_LONG},
- {"row_lock_time_max",
- (char*) &export_vars.innodb_row_lock_time_max, SHOW_LONG},
- {"row_lock_waits",
- (char*) &export_vars.innodb_row_lock_waits, SHOW_LONG},
- {"rows_deleted",
- (char*) &export_vars.innodb_rows_deleted, SHOW_LONG},
- {"rows_inserted",
- (char*) &export_vars.innodb_rows_inserted, SHOW_LONG},
- {"rows_read",
- (char*) &export_vars.innodb_rows_read, SHOW_LONG},
- {"rows_updated",
- (char*) &export_vars.innodb_rows_updated, SHOW_LONG},
- {"system_rows_deleted",
- (char*) &export_vars.innodb_system_rows_deleted, SHOW_LONG},
- {"system_rows_inserted",
- (char*) &export_vars.innodb_system_rows_inserted, SHOW_LONG},
- {"system_rows_read",
- (char*) &export_vars.innodb_system_rows_read, SHOW_LONG},
- {"system_rows_updated",
- (char*) &export_vars.innodb_system_rows_updated, SHOW_LONG},
- {"s_lock_os_waits",
- (char*) &export_vars.innodb_s_lock_os_waits, SHOW_LONGLONG},
- {"s_lock_spin_rounds",
- (char*) &export_vars.innodb_s_lock_spin_rounds, SHOW_LONGLONG},
- {"s_lock_spin_waits",
- (char*) &export_vars.innodb_s_lock_spin_waits, SHOW_LONGLONG},
- {"truncated_status_writes",
- (char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG},
- {"x_lock_os_waits",
- (char*) &export_vars.innodb_x_lock_os_waits, SHOW_LONGLONG},
- {"x_lock_spin_rounds",
- (char*) &export_vars.innodb_x_lock_spin_rounds, SHOW_LONGLONG},
- {"x_lock_spin_waits",
- (char*) &export_vars.innodb_x_lock_spin_waits, SHOW_LONGLONG},
-
- /* Status variables for page compression */
- {"page_compression_saved",
- (char*) &export_vars.innodb_page_compression_saved, SHOW_LONGLONG},
- {"page_compression_trim_sect512",
- (char*) &export_vars.innodb_page_compression_trim_sect512, SHOW_LONGLONG},
- {"page_compression_trim_sect1024",
- (char*) &export_vars.innodb_page_compression_trim_sect1024, SHOW_LONGLONG},
- {"page_compression_trim_sect2048",
- (char*) &export_vars.innodb_page_compression_trim_sect2048, SHOW_LONGLONG},
- {"page_compression_trim_sect4096",
- (char*) &export_vars.innodb_page_compression_trim_sect4096, SHOW_LONGLONG},
- {"page_compression_trim_sect8192",
- (char*) &export_vars.innodb_page_compression_trim_sect8192, SHOW_LONGLONG},
- {"page_compression_trim_sect16384",
- (char*) &export_vars.innodb_page_compression_trim_sect16384, SHOW_LONGLONG},
- {"page_compression_trim_sect32768",
- (char*) &export_vars.innodb_page_compression_trim_sect32768, SHOW_LONGLONG},
- {"num_index_pages_written",
- (char*) &export_vars.innodb_index_pages_written, SHOW_LONGLONG},
- {"num_non_index_pages_written",
- (char*) &export_vars.innodb_non_index_pages_written, SHOW_LONGLONG},
- {"num_pages_page_compressed",
- (char*) &export_vars.innodb_pages_page_compressed, SHOW_LONGLONG},
- {"num_page_compressed_trim_op",
- (char*) &export_vars.innodb_page_compressed_trim_op, SHOW_LONGLONG},
- {"num_page_compressed_trim_op_saved",
- (char*) &export_vars.innodb_page_compressed_trim_op_saved, SHOW_LONGLONG},
- {"num_pages_page_decompressed",
- (char*) &export_vars.innodb_pages_page_decompressed, SHOW_LONGLONG},
- {"num_pages_page_compression_error",
- (char*) &export_vars.innodb_pages_page_compression_error, SHOW_LONGLONG},
- {"num_pages_encrypted",
- (char*) &export_vars.innodb_pages_encrypted, SHOW_LONGLONG},
- {"num_pages_decrypted",
- (char*) &export_vars.innodb_pages_decrypted, SHOW_LONGLONG},
- {"have_lz4",
- (char*) &innodb_have_lz4, SHOW_BOOL},
- {"have_lzo",
- (char*) &innodb_have_lzo, SHOW_BOOL},
- {"have_lzma",
- (char*) &innodb_have_lzma, SHOW_BOOL},
- {"have_bzip2",
- (char*) &innodb_have_bzip2, SHOW_BOOL},
- {"have_snappy",
- (char*) &innodb_have_snappy, SHOW_BOOL},
-
- /* Defragment */
- {"defragment_compression_failures",
- (char*) &export_vars.innodb_defragment_compression_failures, SHOW_LONG},
- {"defragment_failures",
- (char*) &export_vars.innodb_defragment_failures, SHOW_LONG},
- {"defragment_count",
- (char*) &export_vars.innodb_defragment_count, SHOW_LONG},
-
- /* Online alter table status variables */
- {"onlineddl_rowlog_rows",
- (char*) &export_vars.innodb_onlineddl_rowlog_rows, SHOW_LONG},
- {"onlineddl_rowlog_pct_used",
- (char*) &export_vars.innodb_onlineddl_rowlog_pct_used, SHOW_LONG},
- {"onlineddl_pct_progress",
- (char*) &export_vars.innodb_onlineddl_pct_progress, SHOW_LONG},
-
- /* Times secondary index lookup triggered cluster lookup and
- times prefix optimization avoided triggering cluster lookup */
- {"secondary_index_triggered_cluster_reads",
- (char*) &export_vars.innodb_sec_rec_cluster_reads, SHOW_LONG},
- {"secondary_index_triggered_cluster_reads_avoided",
- (char*) &export_vars.innodb_sec_rec_cluster_reads_avoided, SHOW_LONG},
-
- /* Encryption */
- {"encryption_rotation_pages_read_from_cache",
- (char*) &export_vars.innodb_encryption_rotation_pages_read_from_cache,
- SHOW_LONG},
- {"encryption_rotation_pages_read_from_disk",
- (char*) &export_vars.innodb_encryption_rotation_pages_read_from_disk,
- SHOW_LONG},
- {"encryption_rotation_pages_modified",
- (char*) &export_vars.innodb_encryption_rotation_pages_modified,
- SHOW_LONG},
- {"encryption_rotation_pages_flushed",
- (char*) &export_vars.innodb_encryption_rotation_pages_flushed,
- SHOW_LONG},
- {"encryption_rotation_estimated_iops",
- (char*) &export_vars.innodb_encryption_rotation_estimated_iops,
- SHOW_LONG},
- {"encryption_key_rotation_list_length",
- (char*)&export_vars.innodb_key_rotation_list_length,
- SHOW_LONGLONG},
-
- /* Scrubing feature */
- {"scrub_background_page_reorganizations",
- (char*) &export_vars.innodb_scrub_page_reorganizations,
- SHOW_LONG},
- {"scrub_background_page_splits",
- (char*) &export_vars.innodb_scrub_page_splits,
- SHOW_LONG},
- {"scrub_background_page_split_failures_underflow",
- (char*) &export_vars.innodb_scrub_page_split_failures_underflow,
- SHOW_LONG},
- {"scrub_background_page_split_failures_out_of_filespace",
- (char*) &export_vars.innodb_scrub_page_split_failures_out_of_filespace,
- SHOW_LONG},
- {"scrub_background_page_split_failures_missing_index",
- (char*) &export_vars.innodb_scrub_page_split_failures_missing_index,
- SHOW_LONG},
- {"scrub_background_page_split_failures_unknown",
- (char*) &export_vars.innodb_scrub_page_split_failures_unknown,
- SHOW_LONG},
- {"encryption_num_key_requests",
- (char*) &export_vars.innodb_encryption_key_requests, SHOW_LONGLONG},
-
- {NullS, NullS, SHOW_LONG}
-};
-
-/************************************************************************//**
-Handling the shared INNOBASE_SHARE structure that is needed to provide table
-locking. Register the table name if it doesn't exist in the hash table. */
-static
-INNOBASE_SHARE*
-get_share(
-/*======*/
- const char* table_name); /*!< in: table to lookup */
-
-/************************************************************************//**
-Free the shared object that was registered with get_share(). */
-static
-void
-free_share(
-/*=======*/
- INNOBASE_SHARE* share); /*!< in/own: share to free */
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-static
-int
-innobase_close_connection(
-/*======================*/
- handlerton* hton, /*!< in/out: Innodb handlerton */
- THD* thd); /*!< in: MySQL thread handle for
- which to close the connection */
-
-static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
-static void innobase_checkpoint_request(handlerton *hton, void *cookie);
-
-/*****************************************************************//**
-Cancel any pending lock request associated with the current THD. */
-static
-void
-innobase_kill_connection(
-/*======================*/
- handlerton* hton, /*!< in: innobase handlerton */
- THD* thd, /*!< in: handle to the MySQL thread being killed */
- thd_kill_levels);
-
-/*****************************************************************//**
-Commits a transaction in an InnoDB database or marks an SQL statement
-ended.
-@return 0 */
-static
-int
-innobase_commit(
-/*============*/
- handlerton* hton, /*!< in/out: Innodb handlerton */
- THD* thd, /*!< in: MySQL thread handle of the
- user for whom the transaction should
- be committed */
- bool commit_trx); /*!< in: true - commit transaction
- false - the current SQL statement
- ended */
-
-/*****************************************************************//**
-Rolls back a transaction to a savepoint.
-@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
-given name */
-static
-int
-innobase_rollback(
-/*==============*/
- handlerton* hton, /*!< in/out: Innodb handlerton */
- THD* thd, /*!< in: handle to the MySQL thread
- of the user whose transaction should
- be rolled back */
- bool rollback_trx); /*!< in: TRUE - rollback entire
- transaction FALSE - rollback the current
- statement only */
-
-/*****************************************************************//**
-Rolls back a transaction to a savepoint.
-@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
-given name */
-static
-int
-innobase_rollback_to_savepoint(
-/*===========================*/
- handlerton* hton, /*!< in/out: InnoDB handlerton */
- THD* thd, /*!< in: handle to the MySQL thread of
- the user whose XA transaction should
- be rolled back to savepoint */
- void* savepoint); /*!< in: savepoint data */
-
-/*****************************************************************//**
-Check whether innodb state allows to safely release MDL locks after
-rollback to savepoint.
-@return true if it is safe, false if its not safe. */
-static
-bool
-innobase_rollback_to_savepoint_can_release_mdl(
-/*===========================================*/
- handlerton* hton, /*!< in/out: InnoDB handlerton */
- THD* thd); /*!< in: handle to the MySQL thread of
- the user whose XA transaction should
- be rolled back to savepoint */
-
-/*****************************************************************//**
-Sets a transaction savepoint.
-@return always 0, that is, always succeeds */
-static
-int
-innobase_savepoint(
-/*===============*/
- handlerton* hton, /*!< in/out: InnoDB handlerton */
- THD* thd, /*!< in: handle to the MySQL thread of
- the user's XA transaction for which
- we need to take a savepoint */
- void* savepoint); /*!< in: savepoint data */
-
-/*****************************************************************//**
-Release transaction savepoint name.
-@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
-given name */
-static
-int
-innobase_release_savepoint(
-/*=======================*/
- handlerton* hton, /*!< in/out: handlerton for Innodb */
- THD* thd, /*!< in: handle to the MySQL thread
- of the user whose transaction's
- savepoint should be released */
- void* savepoint); /*!< in: savepoint data */
-
-/************************************************************************//**
-Function for constructing an InnoDB table handler instance. */
-static
-handler*
-innobase_create_handler(
-/*====================*/
- handlerton* hton, /*!< in/out: handlerton for Innodb */
- TABLE_SHARE* table,
- MEM_ROOT* mem_root);
-
-/** @brief Initialize the default value of innodb_commit_concurrency.
-
-Once InnoDB is running, the innodb_commit_concurrency must not change
-from zero to nonzero. (Bug #42101)
-
-The initial default value is 0, and without this extra initialization,
-SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
-to 0, even if it was initially set to nonzero at the command line
-or configuration file. */
-static
-void
-innobase_commit_concurrency_init_default();
-/*=======================================*/
-
-/** @brief Initialize the default and max value of innodb_undo_logs.
-
-Once InnoDB is running, the default value and the max value of
-innodb_undo_logs must be equal to the available undo logs,
-given by srv_available_undo_logs. */
-static
-void
-innobase_undo_logs_init_default_max();
-/*==================================*/
-
-/************************************************************//**
-Validate the file format name and return its corresponding id.
-@return valid file format id */
-static
-uint
-innobase_file_format_name_lookup(
-/*=============================*/
- const char* format_name); /*!< in: pointer to file format
- name */
-/************************************************************//**
-Validate the file format check config parameters, as a side effect it
-sets the srv_max_file_format_at_startup variable.
-@return the format_id if valid config value, otherwise, return -1 */
-static
-int
-innobase_file_format_validate_and_set(
-/*==================================*/
- const char* format_max); /*!< in: parameter value */
-
-/*******************************************************************//**
-This function is used to prepare an X/Open XA distributed transaction.
-@return 0 or error number */
-static
-int
-innobase_xa_prepare(
-/*================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- THD* thd, /*!< in: handle to the MySQL thread of
- the user whose XA transaction should
- be prepared */
- bool all); /*!< in: true - prepare transaction
- false - the current SQL statement
- ended */
-/*******************************************************************//**
-This function is used to recover X/Open XA distributed transactions.
-@return number of prepared transactions stored in xid_list */
-static
-int
-innobase_xa_recover(
-/*================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid_list, /*!< in/out: prepared transactions */
- uint len); /*!< in: number of slots in xid_list */
-/*******************************************************************//**
-This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_commit_by_xid(
-/*===================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid); /*!< in: X/Open XA transaction
- identification */
-/*******************************************************************//**
-This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_rollback_by_xid(
-/*=====================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid); /*!< in: X/Open XA transaction
- identification */
-/*******************************************************************//**
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor.
-@return pointer to cursor view or NULL */
-static
-void*
-innobase_create_cursor_view(
-/*========================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd); /*!< in: user thread handle */
-/*******************************************************************//**
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- THD* thd, /*!< in: user thread handle */
- void* curview); /*!< in: Consistent cursor view to
- be set */
-/*******************************************************************//**
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- THD* thd, /*!< in: user thread handle */
- void* curview); /*!< in: Consistent read view to be
- closed */
-/*****************************************************************//**
-Removes all tables in the named database inside InnoDB. */
-static
-void
-innobase_drop_database(
-/*===================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- char* path); /*!< in: database path; inside InnoDB
- the name of the last directory in
- the path is used as the database name:
- for example, in 'mysql/data/test' the
- database name is 'test' */
-/** Shut down the InnoDB storage engine.
-@return 0 */
-static
-int
-innobase_end(handlerton*, ha_panic_function);
-
-#if NOT_USED
-/*****************************************************************//**
-Stores the current binlog coordinates in the trx system header. */
-static
-int
-innobase_store_binlog_info(
-/*=======================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- THD* thd); /*!< in: MySQL thread handle */
-#endif
-
-/*****************************************************************//**
-Creates an InnoDB transaction struct for the thd if it does not yet have one.
-Starts a new InnoDB transaction if a transaction is not yet started. And
-assigns a new snapshot for a consistent read if the transaction does not yet
-have one.
-@return 0 */
-static
-int
-innobase_start_trx_and_assign_read_view(
-/*====================================*/
- handlerton* hton, /* in: Innodb handlerton */
- THD* thd); /* in: MySQL thread handle of the
- user for whom the transaction should
- be committed */
-/****************************************************************//**
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint.
-@return TRUE if error */
-static
-bool
-innobase_flush_logs(
-/*================*/
- handlerton* hton); /*!< in: InnoDB handlerton */
-
-/************************************************************************//**
-Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
-InnoDB Monitor to the client.
-@return 0 on success */
-static
-int
-innodb_show_status(
-/*===============*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread of
- the caller */
- stat_print_fn* stat_print);
-/************************************************************************//**
-Return 0 on success and non-zero on failure. Note: the bool return type
-seems to be abused here, should be an int. */
-static
-bool
-innobase_show_status(
-/*=================*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread of
- the caller */
- stat_print_fn* stat_print,
- enum ha_stat_type stat_type);
-
-/*****************************************************************//**
-Commits a transaction in an InnoDB database. */
-static
-void
-innobase_commit_low(
-/*================*/
- trx_t* trx); /*!< in: transaction handle */
-
-/****************************************************************//**
-Parse and enable InnoDB monitor counters during server startup.
-User can enable monitor counters/groups by specifying
-"loose-innodb_monitor_enable = monitor_name1;monitor_name2..."
-in server configuration file or at the command line. */
-static
-void
-innodb_enable_monitor_at_startup(
-/*=============================*/
- char* str); /*!< in: monitor counter enable list */
-
-/*********************************************************************
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
-table name always to lower case if "set_lower_case" is set to TRUE. */
-void
-normalize_table_name_low(
-/*=====================*/
- char* norm_name, /* out: normalized name as a
- null-terminated string */
- const char* name, /* in: table name string */
- ibool set_lower_case); /* in: TRUE if we want to set
- name to lower case */
-
-#ifdef NOT_USED
-/*************************************************************//**
-Removes old archived transaction log files.
-@return true on error */
-static bool innobase_purge_archive_logs(
- handlerton *hton, /*!< in: InnoDB handlerton */
- time_t before_date, /*!< in: all files modified
- before timestamp should be removed */
- const char* to_filename) /*!< in: this and earler files
- should be removed */
-{
- ulint err= DB_ERROR;
- if (before_date > 0) {
- err= purge_archived_logs(before_date, 0);
- } else if (to_filename) {
- if (is_prefix(to_filename, IB_ARCHIVED_LOGS_PREFIX)) {
- unsigned long long log_file_lsn = strtoll(to_filename
- + IB_ARCHIVED_LOGS_PREFIX_LEN,
- NULL, 10);
- if (log_file_lsn > 0 && log_file_lsn < ULLONG_MAX) {
- err= purge_archived_logs(0, log_file_lsn);
- }
- }
- }
- return (err != DB_SUCCESS);
-}
-#endif
-
-
-/*************************************************************//**
-Check for a valid value of innobase_commit_concurrency.
-@return 0 for valid innodb_commit_concurrency */
-static
-int
-innobase_commit_concurrency_validate(
-/*=================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- long long intbuf;
- ulong commit_concurrency;
-
- DBUG_ENTER("innobase_commit_concurrency_validate");
-
- if (value->val_int(value, &intbuf)) {
- /* The value is NULL. That is invalid. */
- DBUG_RETURN(1);
- }
-
- *reinterpret_cast<ulong*>(save) = commit_concurrency
- = static_cast<ulong>(intbuf);
-
- /* Allow the value to be updated, as long as it remains zero
- or nonzero. */
- DBUG_RETURN(!(!commit_concurrency == !innobase_commit_concurrency));
-}
-
-/*******************************************************************//**
-Function for constructing an InnoDB table handler instance. */
-static
-handler*
-innobase_create_handler(
-/*====================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- TABLE_SHARE* table,
- MEM_ROOT* mem_root)
-{
- return(new (mem_root) ha_innobase(hton, table));
-}
-
-/* General functions */
-
-/*************************************************************//**
-Check that a page_size is correct for InnoDB. If correct, set the
-associated page_size_shift which is the power of 2 for this page size.
-@return an associated page_size_shift if valid, 0 if invalid. */
-inline
-int
-innodb_page_size_validate(
-/*======================*/
- ulong page_size) /*!< in: Page Size to evaluate */
-{
- ulong n;
-
- DBUG_ENTER("innodb_page_size_validate");
-
- for (n = UNIV_PAGE_SIZE_SHIFT_MIN;
- n <= UNIV_PAGE_SIZE_SHIFT_MAX;
- n++) {
- if (page_size == (ulong) (1 << n)) {
- DBUG_RETURN(n);
- }
- }
-
- DBUG_RETURN(0);
-}
-
-/******************************************************************//**
-Returns true if the thread is the replication thread on the slave
-server. Used in srv_conc_enter_innodb() to determine if the thread
-should be allowed to enter InnoDB - the replication thread is treated
-differently than other threads. Also used in
-srv_conc_force_exit_innodb().
-@return true if thd is the replication thread */
-UNIV_INTERN
-ibool
-thd_is_replication_slave_thread(
-/*============================*/
- THD* thd) /*!< in: thread handle */
-{
- return thd && ((ibool) thd_slave_thread(thd));
-}
-
-/******************************************************************//**
-Gets information on the durability property requested by thread.
-Used when writing either a prepare or commit record to the log
-buffer. @return the durability property. */
-UNIV_INTERN
-enum durability_properties
-thd_requested_durability(
-/*=====================*/
- const THD* thd) /*!< in: thread handle */
-{
- return(thd_get_durability_property(thd));
-}
-
-/******************************************************************//**
-Returns true if transaction should be flagged as read-only.
-@return true if the thd is marked as read-only */
-UNIV_INTERN
-ibool
-thd_trx_is_read_only(
-/*=================*/
- THD* thd) /*!< in: thread handle */
-{
- return(thd != 0 && thd_tx_is_read_only(thd));
-}
-
-/******************************************************************//**
-Check if the transaction is an auto-commit transaction. TRUE also
-implies that it is a SELECT (read-only) transaction.
-@return true if the transaction is an auto commit read-only transaction. */
-UNIV_INTERN
-ibool
-thd_trx_is_auto_commit(
-/*===================*/
- THD* thd) /*!< in: thread handle, can be NULL */
-{
- return(thd != NULL
- && !thd_test_options(
- thd,
- OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)
- && thd_is_select(thd));
-}
-
-/******************************************************************//**
-Save some CPU by testing the value of srv_thread_concurrency in inline
-functions. */
-static inline
-void
-innobase_srv_conc_enter_innodb(
-/*===========================*/
- trx_t* trx) /*!< in: transaction handle */
-{
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
-#endif /* WITH_WSREP */
- if (srv_thread_concurrency) {
- if (trx->n_tickets_to_enter_innodb > 0) {
-
- /* If trx has 'free tickets' to enter the engine left,
- then use one such ticket */
-
- --trx->n_tickets_to_enter_innodb;
-
- } else if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- UT_WAIT_FOR(
- srv_conc_get_active_threads()
- < srv_thread_concurrency,
- srv_replication_delay * 1000);
-
- } else {
- srv_conc_enter_innodb(trx);
- }
- }
-}
-
-/******************************************************************//**
-Note that the thread wants to leave InnoDB only if it doesn't have
-any spare tickets. */
-static inline
-void
-innobase_srv_conc_exit_innodb(
-/*==========================*/
- trx_t* trx) /*!< in: transaction handle */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- wsrep_thd_is_BF(trx->mysql_thd, FALSE)) return;
-#endif /* WITH_WSREP */
-
- /* This is to avoid making an unnecessary function call. */
- if (trx->declared_to_be_inside_innodb
- && trx->n_tickets_to_enter_innodb == 0) {
-
- srv_conc_force_exit_innodb(trx);
- }
-}
-
-/******************************************************************//**
-Force a thread to leave InnoDB even if it has spare tickets. */
-static inline
-void
-innobase_srv_conc_force_exit_innodb(
-/*================================*/
- trx_t* trx) /*!< in: transaction handle */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* This is to avoid making an unnecessary function call. */
- if (trx->declared_to_be_inside_innodb) {
- srv_conc_force_exit_innodb(trx);
- }
-}
-
-/******************************************************************//**
-Returns the NUL terminated value of glob_hostname.
-@return pointer to glob_hostname. */
-UNIV_INTERN
-const char*
-server_get_hostname()
-/*=================*/
-{
- return(glob_hostname);
-}
-
-/******************************************************************//**
-Returns true if the transaction this thread is processing has edited
-non-transactional tables. Used by the deadlock detector when deciding
-which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables.
-@return true if non-transactional tables have been edited */
-UNIV_INTERN
-ibool
-thd_has_edited_nontrans_tables(
-/*===========================*/
- THD* thd) /*!< in: thread handle */
-{
- return((ibool) thd_non_transactional_update(thd));
-}
-
-/* Return high resolution timestamp for the start of the current query */
-UNIV_INTERN
-unsigned long long
-thd_query_start_micro(
- const THD* thd) /*!< in: thread handle */
-{
- return thd_start_utime(thd);
-}
-
-/******************************************************************//**
-Returns true if the thread is executing a SELECT statement.
-@return true if thd is executing SELECT */
-UNIV_INTERN
-ibool
-thd_is_select(
-/*==========*/
- const THD* thd) /*!< in: thread handle */
-{
- return(thd_sql_command(thd) == SQLCOM_SELECT);
-}
-
-/******************************************************************//**
-Returns true if the thread supports XA,
-global value of innodb_supports_xa if thd is NULL.
-@return true if thd has XA support */
-UNIV_INTERN
-ibool
-thd_supports_xa(
-/*============*/
- THD* thd) /*!< in: thread handle, or NULL to query
- the global innodb_supports_xa */
-{
- /* THDVAR cannot be used in xtrabackup,
- plugin variables for innodb are not loaded. */
- return (thd || !IS_XTRABACKUP())? THDVAR(thd, support_xa): FALSE;
-}
-
-/** Get the value of innodb_tmpdir.
-@param[in] thd thread handle, or NULL to query
- the global innodb_tmpdir.
-@retval NULL if innodb_tmpdir="" */
-UNIV_INTERN
-const char*
-thd_innodb_tmpdir(
- THD* thd)
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(false));
-#endif /* UNIV_SYNC_DEBUG */
-
- const char* tmp_dir = THDVAR(thd, tmpdir);
- if (tmp_dir != NULL && *tmp_dir == '\0') {
- tmp_dir = NULL;
- }
-
- return(tmp_dir);
-}
-/******************************************************************//**
-Check the status of fake changes mode (innodb_fake_changes)
-@return true if fake change mode is enabled. */
-UNIV_INTERN
-ibool
-thd_fake_changes(
-/*=============*/
- THD* thd) /*!< in: thread handle, or NULL to query
- the global innodb_supports_xa */
-{
- /* THDVAR cannot be used in xtrabackup,
- plugin variables for innodb are not loaded */
- return (thd || !IS_XTRABACKUP())? THDVAR((THD*) thd, fake_changes) : FALSE ;
-}
-
-/******************************************************************//**
-Returns the lock wait timeout for the current connection.
-@return the lock wait timeout, in seconds */
-UNIV_INTERN
-ulong
-thd_lock_wait_timeout(
-/*==================*/
- THD* thd) /*!< in: thread handle, or NULL to query
- the global innodb_lock_wait_timeout */
-{
- /* According to <mysql/plugin.h>, passing thd == NULL
- returns the global value of the session variable. */
- return(THDVAR(thd, lock_wait_timeout));
-}
-
-/******************************************************************//**
-Set the time waited for the lock for the current query. */
-UNIV_INTERN
-void
-thd_set_lock_wait_time(
-/*===================*/
- THD* thd, /*!< in/out: thread handle */
- ulint value) /*!< in: time waited for the lock */
-{
- if (thd) {
- thd_storage_lock_wait(thd, value);
- }
-}
-
-/******************************************************************//**
-*/
-UNIV_INTERN
-ulong
-thd_flush_log_at_trx_commit(
-/*================================*/
- void* thd)
-{
- /* THDVAR cannot be used in xtrabackup,
- plugin variables for innodb are not loaded,
- this makes xtrabackup crash when trying to use them. */
- return (thd || !IS_XTRABACKUP())? THDVAR((THD*)thd, flush_log_at_trx_commit) : FALSE;
-}
-
-/********************************************************************//**
-Obtain the InnoDB transaction of a MySQL thread.
-@return reference to transaction pointer */
-MY_ATTRIBUTE((warn_unused_result, nonnull))
-static inline
-trx_t*&
-thd_to_trx(
-/*=======*/
- THD* thd) /*!< in: MySQL thread */
-{
- return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
-}
-
-#ifdef WITH_WSREP
-ulonglong
-thd_to_trx_id(
-/*=======*/
- THD* thd) /*!< in: MySQL thread */
-{
- return(thd_to_trx(thd)->id);
-}
-#endif /* WITH_WSREP */
-
-my_bool
-ha_innobase::is_fake_change_enabled(THD* thd)
-{
- trx_t* trx = thd_to_trx(thd);
- return(trx && UNIV_UNLIKELY(trx->fake_changes));
-}
-
-/********************************************************************//**
-Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
-time calls srv_active_wake_master_thread. This function should be used
-when a single database operation may introduce a small need for
-server utility activity, like checkpointing. */
-static inline
-void
-innobase_active_small(void)
-/*=======================*/
-{
- innobase_active_counter++;
-
- if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
- srv_active_wake_master_thread();
- }
-}
-
-/********************************************************************//**
-Converts an InnoDB error code to a MySQL error code and also tells to MySQL
-about a possible transaction rollback inside InnoDB caused by a lock wait
-timeout or a deadlock.
-@return MySQL error code */
-static
-int
-convert_error_code_to_mysql(
-/*========================*/
- dberr_t error, /*!< in: InnoDB error code */
- ulint flags, /*!< in: InnoDB table flags, or 0 */
- THD* thd) /*!< in: user thread handle or NULL */
-{
- switch (error) {
- case DB_SUCCESS:
- return(0);
-
- case DB_INTERRUPTED:
- return(HA_ERR_ABORTED_BY_USER);
-
- case DB_FOREIGN_EXCEED_MAX_CASCADE:
- ut_ad(thd);
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_ROW_IS_REFERENCED,
- "InnoDB: Cannot delete/update "
- "rows with cascading foreign key "
- "constraints that exceed max "
- "depth of %d. Please "
- "drop extra constraints and try "
- "again", DICT_FK_MAX_RECURSIVE_LOAD);
-
- /* fall through */
-
- case DB_ERROR:
- default:
- return(-1); /* unspecified error */
-
- case DB_DUPLICATE_KEY:
- /* Be cautious with returning this error, since
- mysql could re-enter the storage layer to get
- duplicated key info, the operation requires a
- valid table handle and/or transaction information,
- which might not always be available in the error
- handling stage. */
- return(HA_ERR_FOUND_DUPP_KEY);
-
- case DB_READ_ONLY:
- return(HA_ERR_TABLE_READONLY);
-
- case DB_FOREIGN_DUPLICATE_KEY:
- return(HA_ERR_FOREIGN_DUPLICATE_KEY);
-
- case DB_MISSING_HISTORY:
- return(HA_ERR_TABLE_DEF_CHANGED);
-
- case DB_RECORD_NOT_FOUND:
- return(HA_ERR_NO_ACTIVE_RECORD);
-
- case DB_SEARCH_ABORTED_BY_USER:
- return(HA_ERR_ABORTED_BY_USER);
-
- case DB_DEADLOCK:
- /* Since we rolled back the whole transaction, we must
- tell it also to MySQL so that MySQL knows to empty the
- cached binlog for this transaction */
-
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
- }
-
- return(HA_ERR_LOCK_DEADLOCK);
-
- case DB_LOCK_WAIT_TIMEOUT:
- /* Starting from 5.0.13, we let MySQL just roll back the
- latest SQL statement in a lock wait timeout. Previously, we
- rolled back the whole transaction. */
-
- if (thd) {
- thd_mark_transaction_to_rollback(
- thd, (bool) row_rollback_on_timeout);
- }
-
- return(HA_ERR_LOCK_WAIT_TIMEOUT);
-
- case DB_NO_REFERENCED_ROW:
- return(HA_ERR_NO_REFERENCED_ROW);
-
- case DB_ROW_IS_REFERENCED:
- return(HA_ERR_ROW_IS_REFERENCED);
-
- case DB_CANNOT_ADD_CONSTRAINT:
- case DB_CHILD_NO_INDEX:
- case DB_PARENT_NO_INDEX:
- return(HA_ERR_CANNOT_ADD_FOREIGN);
-
- case DB_CANNOT_DROP_CONSTRAINT:
-
- return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
- misleading, a new MySQL error
- code should be introduced */
-
- case DB_CORRUPTION:
- return(HA_ERR_CRASHED);
-
- case DB_OUT_OF_FILE_SPACE:
- return(HA_ERR_RECORD_FILE_FULL);
-
- case DB_TEMP_FILE_WRITE_FAILURE:
- my_error(ER_GET_ERRMSG, MYF(0),
- DB_TEMP_FILE_WRITE_FAILURE,
- ut_strerr(DB_TEMP_FILE_WRITE_FAILURE),
- "InnoDB");
- return(HA_ERR_INTERNAL_ERROR);
-
- case DB_TABLE_IN_FK_CHECK:
- return(HA_ERR_TABLE_IN_FK_CHECK);
-
- case DB_TABLE_IS_BEING_USED:
- return(HA_ERR_WRONG_COMMAND);
-
- case DB_TABLESPACE_DELETED:
- case DB_TABLE_NOT_FOUND:
- return(HA_ERR_NO_SUCH_TABLE);
-
- case DB_DECRYPTION_FAILED:
- return(HA_ERR_DECRYPTION_FAILED);
-
- case DB_TABLESPACE_NOT_FOUND:
- return(HA_ERR_NO_SUCH_TABLE);
-
- case DB_TOO_BIG_RECORD: {
- /* If prefix is true then a 768-byte prefix is stored
- locally for BLOB fields. Refer to dict_table_get_format() */
- bool prefix = (dict_tf_get_format(flags) == UNIV_FORMAT_A);
- my_printf_error(ER_TOO_BIG_ROWSIZE,
- "Row size too large (> %lu). Changing some columns "
- "to TEXT or BLOB %smay help. In current row "
- "format, BLOB prefix of %d bytes is stored inline.",
- MYF(0),
- page_get_free_space_of_empty(flags &
- DICT_TF_COMPACT) / 2,
- prefix ? "or using ROW_FORMAT=DYNAMIC "
- "or ROW_FORMAT=COMPRESSED ": "",
- prefix ? DICT_MAX_FIXED_COL_LEN : 0);
- return(HA_ERR_TO_BIG_ROW);
- }
-
-
- case DB_TOO_BIG_FOR_REDO:
- my_printf_error(ER_TOO_BIG_ROWSIZE, "%s" , MYF(0),
- "The size of BLOB/TEXT data inserted"
- " in one transaction is greater than"
- " 10% of redo log size. Increase the"
- " redo log size using innodb_log_file_size.");
- return(HA_ERR_TO_BIG_ROW);
-
- case DB_TOO_BIG_INDEX_COL:
- my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
- DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
- return(HA_ERR_INDEX_COL_TOO_LONG);
-
- case DB_NO_SAVEPOINT:
- return(HA_ERR_NO_SAVEPOINT);
-
- case DB_LOCK_TABLE_FULL:
- /* Since we rolled back the whole transaction, we must
- tell it also to MySQL so that MySQL knows to empty the
- cached binlog for this transaction */
-
- if (thd) {
- thd_mark_transaction_to_rollback(thd, TRUE);
- }
-
- return(HA_ERR_LOCK_TABLE_FULL);
-
- case DB_FTS_INVALID_DOCID:
- return(HA_FTS_INVALID_DOCID);
- case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
- return(HA_ERR_OUT_OF_MEM);
- case DB_TOO_MANY_CONCURRENT_TRXS:
- return(HA_ERR_TOO_MANY_CONCURRENT_TRXS);
- case DB_UNSUPPORTED:
- return(HA_ERR_UNSUPPORTED);
- case DB_INDEX_CORRUPT:
- return(HA_ERR_INDEX_CORRUPT);
- case DB_UNDO_RECORD_TOO_BIG:
- return(HA_ERR_UNDO_REC_TOO_BIG);
- case DB_OUT_OF_MEMORY:
- return(HA_ERR_OUT_OF_MEM);
- case DB_TABLESPACE_EXISTS:
- return(HA_ERR_TABLESPACE_EXISTS);
- case DB_IDENTIFIER_TOO_LONG:
- return(HA_ERR_INTERNAL_ERROR);
- case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
- return(HA_ERR_FTS_TOO_MANY_WORDS_IN_PHRASE);
- }
-}
-
-/*************************************************************//**
-Prints info of a THD object (== user session thread) to the given file. */
-UNIV_INTERN
-void
-innobase_mysql_print_thd(
-/*=====================*/
- FILE* f, /*!< in: output stream */
- THD* thd, /*!< in: MySQL THD object */
- uint max_query_len) /*!< in: max query length to print, or 0 to
- use the default max length */
-{
- char buffer[1024];
-
- fputs(thd_get_error_context_description((THD*) thd,
- buffer, sizeof buffer,
- max_query_len), f);
- putc('\n', f);
-}
-
-/******************************************************************//**
-Get the error message format string.
-@return the format string or 0 if not found. */
-UNIV_INTERN
-const char*
-innobase_get_err_msg(
-/*=================*/
- int error_code) /*!< in: MySQL error code */
-{
- return(my_get_err_msg(error_code));
-}
-
-/******************************************************************//**
-Get the variable length bounds of the given character set. */
-UNIV_INTERN
-void
-innobase_get_cset_width(
-/*====================*/
- ulint cset, /*!< in: MySQL charset-collation code */
- ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
- ulint* mbmaxlen) /*!< out: maximum length of a char (in bytes) */
-{
- CHARSET_INFO* cs;
- ut_ad(cset <= MAX_CHAR_COLL_NUM);
- ut_ad(mbminlen);
- ut_ad(mbmaxlen);
-
- cs = all_charsets[cset];
- if (cs) {
- *mbminlen = cs->mbminlen;
- *mbmaxlen = cs->mbmaxlen;
- ut_ad(*mbminlen < DATA_MBMAX);
- ut_ad(*mbmaxlen < DATA_MBMAX);
- } else {
- THD* thd = current_thd;
-
- if (thd && thd_sql_command(thd) == SQLCOM_DROP_TABLE) {
-
- /* Fix bug#46256: allow tables to be dropped if the
- collation is not found, but issue a warning. */
- if ((global_system_variables.log_warnings)
- && (cset != 0)){
-
- sql_print_warning(
- "Unknown collation #%lu.", cset);
- }
- } else {
-
- ut_a(cset == 0);
- }
-
- *mbminlen = *mbmaxlen = 0;
- }
-}
-
-/******************************************************************//**
-Converts an identifier to a table name. */
-UNIV_INTERN
-void
-innobase_convert_from_table_id(
-/*===========================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len) /*!< in: length of 'to', in bytes */
-{
- uint errors;
-
- strconvert(cs, from, strlen(from), &my_charset_filename, to, (uint) len, &errors);
-}
-
-/**********************************************************************
-Check if the length of the identifier exceeds the maximum allowed.
-return true when length of identifier is too long. */
-UNIV_INTERN
-my_bool
-innobase_check_identifier_length(
-/*=============================*/
- const char* id) /* in: FK identifier to check excluding the
- database portion. */
-{
- int well_formed_error = 0;
- CHARSET_INFO *cs = system_charset_info;
- DBUG_ENTER("innobase_check_identifier_length");
-
- size_t len = cs->cset->well_formed_len(
- cs, id, id + strlen(id),
- NAME_CHAR_LEN, &well_formed_error);
-
- if (well_formed_error || len == NAME_CHAR_LEN) {
- my_error(ER_TOO_LONG_IDENT, MYF(0), id);
- DBUG_RETURN(true);
- }
- DBUG_RETURN(false);
-}
-
-/******************************************************************//**
-Converts an identifier to UTF-8. */
-UNIV_INTERN
-void
-innobase_convert_from_id(
-/*=====================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len) /*!< in: length of 'to', in bytes */
-{
- uint errors;
-
- strconvert(cs, from, strlen(from), system_charset_info, to, (uint) len, &errors);
-}
-
-/******************************************************************//**
-Compares NUL-terminated UTF-8 strings case insensitively.
-@return 0 if a=b, <0 if a<b, >1 if a>b */
-UNIV_INTERN
-int
-innobase_strcasecmp(
-/*================*/
- const char* a, /*!< in: first string to compare */
- const char* b) /*!< in: second string to compare */
-{
- if (!a) {
- if (!b) {
- return(0);
- } else {
- return(-1);
- }
- } else if (!b) {
- return(1);
- }
-
- return(my_strcasecmp(system_charset_info, a, b));
-}
-
-/******************************************************************//**
-Compares NUL-terminated UTF-8 strings case insensitively. The
-second string contains wildcards.
-@return 0 if a match is found, 1 if not */
-UNIV_INTERN
-int
-innobase_wildcasecmp(
-/*=================*/
- const char* a, /*!< in: string to compare */
- const char* b) /*!< in: wildcard string to compare */
-{
- return(wild_case_compare(system_charset_info, a, b));
-}
-
-/******************************************************************//**
-Strip dir name from a full path name and return only the file name
-@return file name or "null" if no file name */
-UNIV_INTERN
-const char*
-innobase_basename(
-/*==============*/
- const char* path_name) /*!< in: full path name */
-{
- const char* name = base_name(path_name);
-
- return((name) ? name : "null");
-}
-
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-innobase_casedn_str(
-/*================*/
- char* a) /*!< in/out: string to put in lower case */
-{
- my_casedn_str(system_charset_info, a);
-}
-
-/**********************************************************************//**
-Determines the connection character set.
-@return connection character set */
-UNIV_INTERN
-struct charset_info_st*
-innobase_get_charset(
-/*=================*/
- THD* mysql_thd) /*!< in: MySQL thread handle */
-{
- return(thd_charset(mysql_thd));
-}
-
-/**********************************************************************//**
-Determines the current SQL statement.
-@return SQL statement string */
-UNIV_INTERN
-const char*
-innobase_get_stmt(
-/*==============*/
- THD* thd, /*!< in: MySQL thread handle */
- size_t* length) /*!< out: length of the SQL statement */
-{
- if (const LEX_STRING *stmt = thd_query_string(thd)) {
- *length = stmt->length;
- return stmt->str;
- }
- return NULL;
-}
-
-/**********************************************************************//**
-Get the current setting of the table_def_size global parameter. We do
-a dirty read because for one there is no synchronization object and
-secondly there is little harm in doing so even if we get a torn read.
-@return value of table_def_size */
-UNIV_INTERN
-ulint
-innobase_get_table_cache_size(void)
-/*===============================*/
-{
- return(tdc_size);
-}
-
-/**********************************************************************//**
-Get the current setting of the lower_case_table_names global parameter from
-mysqld.cc. We do a dirty read because for one there is no synchronization
-object and secondly there is little harm in doing so even if we get a torn
-read.
-@return value of lower_case_table_names */
-UNIV_INTERN
-ulint
-innobase_get_lower_case_table_names(void)
-/*=====================================*/
-{
- return(lower_case_table_names);
-}
-
-/** Create a temporary file in the location specified by the parameter
-path. If the path is null, then it will be created in tmpdir.
-@param[in] path location for creating temporary file
-@return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
-int
-innobase_mysql_tmpfile(
- const char* path)
-{
-#ifdef WITH_INNODB_DISALLOW_WRITES
- os_event_wait(srv_allow_writes_event);
-#endif /* WITH_INNODB_DISALLOW_WRITES */
- int fd2 = -1;
- File fd;
-
- DBUG_EXECUTE_IF(
- "innobase_tmpfile_creation_failure",
- return(-1);
- );
-
- if (path == NULL) {
- fd = mysql_tmpfile("ib");
- } else {
- fd = mysql_tmpfile_path(path, "ib");
- }
-
- if (fd >= 0) {
- /* Copy the file descriptor, so that the additional resources
- allocated by create_temp_file() can be freed by invoking
- my_close().
-
- Because the file descriptor returned by this function
- will be passed to fdopen(), it will be closed by invoking
- fclose(), which in turn will invoke close() instead of
- my_close(). */
-
-#ifdef _WIN32
- /* Note that on Windows, the integer returned by mysql_tmpfile
- has no relation to C runtime file descriptor. Here, we need
- to call my_get_osfhandle to get the HANDLE and then convert it
- to C runtime filedescriptor. */
- {
- HANDLE hFile = my_get_osfhandle(fd);
- HANDLE hDup;
- BOOL bOK = DuplicateHandle(
- GetCurrentProcess(),
- hFile, GetCurrentProcess(),
- &hDup, 0, FALSE, DUPLICATE_SAME_ACCESS);
- if (bOK) {
- fd2 = _open_osfhandle((intptr_t) hDup, 0);
- } else {
- my_osmaperr(GetLastError());
- fd2 = -1;
- }
- }
-#else
- fd2 = dup(fd);
-#endif
- if (fd2 < 0) {
- DBUG_PRINT("error",("Got error %d on dup",fd2));
- my_errno=errno;
- my_error(EE_OUT_OF_FILERESOURCES,
- MYF(ME_BELL+ME_WAITTANG),
- "ib*", my_errno);
- }
- my_close(fd, MYF(MY_WME));
- }
- return(fd2);
-}
-
-/*********************************************************************//**
-Wrapper around MySQL's copy_and_convert function.
-@return number of bytes copied to 'to' */
-UNIV_INTERN
-ulint
-innobase_convert_string(
-/*====================*/
- void* to, /*!< out: converted string */
- ulint to_length, /*!< in: number of bytes reserved
- for the converted string */
- CHARSET_INFO* to_cs, /*!< in: character set to convert to */
- const void* from, /*!< in: string to convert */
- ulint from_length, /*!< in: number of bytes to convert */
- CHARSET_INFO* from_cs, /*!< in: character set to convert
- from */
- uint* errors) /*!< out: number of errors encountered
- during the conversion */
-{
- return(copy_and_convert(
- (char*) to, (uint32) to_length, to_cs,
- (const char*) from, (uint32) from_length, from_cs,
- errors));
-}
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) that is of
-type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
-the result to "buf". The result is converted to "system_charset_info".
-Not more than "buf_size" bytes are written to "buf".
-The result is always NUL-terminated (provided buf_size > 0) and the
-number of bytes that were written to "buf" is returned (including the
-terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
-ulint
-innobase_raw_format(
-/*================*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- ulint charset_coll, /*!< in: charset collation */
- char* buf, /*!< out: output buffer */
- ulint buf_size) /*!< in: output buffer size
- in bytes */
-{
- /* XXX we use a hard limit instead of allocating
- but_size bytes from the heap */
- CHARSET_INFO* data_cs;
- char buf_tmp[8192];
- ulint buf_tmp_used;
- uint num_errors;
-
- data_cs = all_charsets[charset_coll];
-
- buf_tmp_used = innobase_convert_string(buf_tmp, sizeof(buf_tmp),
- system_charset_info,
- data, data_len, data_cs,
- &num_errors);
-
- return(ut_str_sql_format(buf_tmp, buf_tmp_used, buf, buf_size));
-}
-
-/*********************************************************************//**
-Compute the next autoinc value.
-
-For MySQL replication the autoincrement values can be partitioned among
-the nodes. The offset is the start or origin of the autoincrement value
-for a particular node. For n nodes the increment will be n and the offset
-will be in the interval [1, n]. The formula tries to allocate the next
-value for a particular node.
-
-Note: This function is also called with increment set to the number of
-values we want to reserve for multi-value inserts e.g.,
-
- INSERT INTO T VALUES(), (), ();
-
-innobase_next_autoinc() will be called with increment set to 3 where
-autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
-the multi-value INSERT above.
-@return the next value */
-UNIV_INTERN
-ulonglong
-innobase_next_autoinc(
-/*==================*/
- ulonglong current, /*!< in: Current value */
- ulonglong need, /*!< in: count of values needed */
- ulonglong step, /*!< in: AUTOINC increment step */
- ulonglong offset, /*!< in: AUTOINC offset */
- ulonglong max_value) /*!< in: max value for type */
-{
- ulonglong next_value;
- ulonglong block = need * step;
-
- /* Should never be 0. */
- ut_a(need > 0);
- ut_a(block > 0);
- ut_a(max_value > 0);
-
- /*
- Allow auto_increment to go over max_value up to max ulonglong.
- This allows us to detect that all values are exhausted.
- If we don't do this, we will return max_value several times
- and get duplicate key errors instead of auto increment value
- out of range.
- */
- max_value= (~(ulonglong) 0);
-
- /* According to MySQL documentation, if the offset is greater than
- the step then the offset is ignored. */
- if (offset > block) {
- offset = 0;
- }
-
- /* Check for overflow. Current can be > max_value if the value is
- in reality a negative value.The visual studio compilers converts
- large double values automatically into unsigned long long datatype
- maximum value */
-
- if (block >= max_value
- || offset > max_value
- || current >= max_value
- || max_value - offset <= offset) {
-
- next_value = max_value;
- } else {
- ut_a(max_value > current);
-
- ulonglong free = max_value - current;
-
- if (free < offset || free - offset <= block) {
- next_value = max_value;
- } else {
- next_value = 0;
- }
- }
-
- if (next_value == 0) {
- ulonglong next;
-
- if (current >= offset) {
- next = (current - offset) / step;
- } else {
- next = 0;
- block -= step;
- }
-
- ut_a(max_value > next);
- next_value = next * step;
- /* Check for multiplication overflow. */
- ut_a(next_value >= next);
- ut_a(max_value > next_value);
-
- /* Check for overflow */
- if (max_value - next_value >= block) {
-
- next_value += block;
-
- if (max_value - next_value >= offset) {
- next_value += offset;
- } else {
- next_value = max_value;
- }
- } else {
- next_value = max_value;
- }
- }
-
- ut_a(next_value != 0);
- ut_a(next_value <= max_value);
-
- return(next_value);
-}
-
-/*********************************************************************//**
-Initializes some fields in an InnoDB transaction object. */
-static
-void
-innobase_trx_init(
-/*==============*/
- THD* thd, /*!< in: user thread handle */
- trx_t* trx) /*!< in/out: InnoDB transaction handle */
-{
- DBUG_ENTER("innobase_trx_init");
- DBUG_ASSERT(thd == trx->mysql_thd);
-
- trx->check_foreigns = !thd_test_options(
- thd, OPTION_NO_FOREIGN_KEY_CHECKS);
-
- trx->check_unique_secondary = !thd_test_options(
- thd, OPTION_RELAXED_UNIQUE_CHECKS);
-
- /* Transaction on start caches the fake_changes state and uses it for
- complete transaction lifetime.
- There are some APIs that doesn't need an active transaction object
- but transaction object are just use as a cache object/data carrier.
- Before using transaction object for such APIs refresh the state of
- fake_changes. */
- if (trx->state == TRX_STATE_NOT_STARTED) {
- trx->fake_changes = thd_fake_changes(thd);
- }
-
-#ifdef EXTENDED_SLOWLOG
- if (thd_log_slow_verbosity(thd) & (1ULL << SLOG_V_INNODB)) {
- trx->take_stats = TRUE;
- } else {
- trx->take_stats = FALSE;
- }
-#else
- trx->take_stats = FALSE;
-#endif
-
- DBUG_VOID_RETURN;
-}
-
-/*********************************************************************//**
-Allocates an InnoDB transaction for a MySQL handler object for DML.
-@return InnoDB transaction handle */
-UNIV_INTERN
-trx_t*
-innobase_trx_allocate(
-/*==================*/
- THD* thd) /*!< in: user thread handle */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_trx_allocate");
- DBUG_ASSERT(thd != NULL);
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
-
- trx = trx_allocate_for_mysql();
-
- trx->mysql_thd = thd;
-
- innobase_trx_init(thd, trx);
-
- DBUG_RETURN(trx);
-}
-
-/*********************************************************************//**
-Gets the InnoDB transaction handle for a MySQL handler object, creates
-an InnoDB transaction struct if the corresponding MySQL thread struct still
-lacks one.
-@return InnoDB transaction handle */
-static inline
-trx_t*
-check_trx_exists(
-/*=============*/
- THD* thd) /*!< in: user thread handle */
-{
- trx_t*& trx = thd_to_trx(thd);
-
- if (trx == NULL) {
- trx = innobase_trx_allocate(thd);
- thd_set_ha_data(thd, innodb_hton_ptr, trx);
- } else if (UNIV_UNLIKELY(trx->magic_n != TRX_MAGIC_N)) {
- mem_analyze_corruption(trx);
- ut_error;
- }
-
- innobase_trx_init(thd, trx);
-
- return(trx);
-}
-
-/*************************************************************************
-Gets current trx. */
-trx_t*
-innobase_get_trx()
-{
- THD *thd=current_thd;
- if (likely(thd != 0)) {
- trx_t*& trx = thd_to_trx(thd);
- return(trx);
- } else {
- return(NULL);
- }
-}
-
-ibool
-innobase_get_slow_log()
-{
-#ifdef EXTENDED_SLOWLOG
- return((ibool) thd_opt_slow_log());
-#else
- return(FALSE);
-#endif
-}
-
-/*********************************************************************//**
-Note that a transaction has been registered with MySQL.
-@return true if transaction is registered with MySQL 2PC coordinator */
-static inline
-bool
-trx_is_registered_for_2pc(
-/*=========================*/
- const trx_t* trx) /* in: transaction */
-{
- return(trx->is_registered == 1);
-}
-
-/*********************************************************************//**
-Note that innobase_commit_ordered() was run. */
-static inline
-void
-trx_set_active_commit_ordered(
-/*==============================*/
- trx_t* trx) /* in: transaction */
-{
- ut_a(trx_is_registered_for_2pc(trx));
- trx->active_commit_ordered = 1;
-}
-
-/*********************************************************************//**
-Note that a transaction has been registered with MySQL 2PC coordinator. */
-static inline
-void
-trx_register_for_2pc(
-/*==================*/
- trx_t* trx) /* in: transaction */
-{
- trx->is_registered = 1;
- ut_ad(trx->active_commit_ordered == 0);
-}
-
-/*********************************************************************//**
-Note that a transaction has been deregistered. */
-static inline
-void
-trx_deregister_from_2pc(
-/*====================*/
- trx_t* trx) /* in: transaction */
-{
- trx->is_registered = 0;
- trx->active_commit_ordered = 0;
-}
-
-/*********************************************************************//**
-Check whether a transaction has active_commit_ordered set */
-static inline
-bool
-trx_is_active_commit_ordered(
-/*=========================*/
- const trx_t* trx) /* in: transaction */
-{
- return(trx->active_commit_ordered == 1);
-}
-
-/*********************************************************************//**
-Check if transaction is started.
-@reutrn true if transaction is in state started */
-static
-bool
-trx_is_started(
-/*===========*/
- trx_t* trx) /* in: transaction */
-{
- return(trx->state != TRX_STATE_NOT_STARTED);
-}
-
-/****************************************************************//**
-Update log_checksum_algorithm_ptr with a pointer to the function corresponding
-to a given checksum algorithm. */
-
-void
-innodb_log_checksum_func_update(
-/*============================*/
- ulint algorithm) /*!< in: algorithm */
-{
- switch (algorithm) {
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
- case SRV_CHECKSUM_ALGORITHM_INNODB:
- log_checksum_algorithm_ptr=log_block_calc_checksum_innodb;
- break;
- case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
- case SRV_CHECKSUM_ALGORITHM_CRC32:
- log_checksum_algorithm_ptr=log_block_calc_checksum_crc32;
- break;
- case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
- case SRV_CHECKSUM_ALGORITHM_NONE:
- log_checksum_algorithm_ptr=log_block_calc_checksum_none;
- break;
- default:
- ut_a(0);
- }
-}
-
-/****************************************************************//**
-On update hook for the innodb_log_checksum_algorithm variable. */
-static
-void
-innodb_log_checksum_algorithm_update(
-/*=================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- srv_checksum_algorithm_t algorithm;
-
- algorithm = (srv_checksum_algorithm_t)
- (*static_cast<const ulong*>(save));
-
- /* Make sure we are the only log user */
- mutex_enter(&log_sys->mutex);
-
- innodb_log_checksum_func_update(algorithm);
-
- srv_log_checksum_algorithm = algorithm;
-
- mutex_exit(&log_sys->mutex);
-}
-
-/*********************************************************************//**
-Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
-Those flags are stored in .frm file and end up in the MySQL table object,
-but are frequently used inside InnoDB so we keep their copies into the
-InnoDB table object. */
-UNIV_INTERN
-void
-innobase_copy_frm_flags_from_create_info(
-/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- const HA_CREATE_INFO* create_info) /*!< in: create info */
-{
- ibool ps_on;
- ibool ps_off;
-
- if (dict_table_is_temporary(innodb_table)) {
- /* Temp tables do not use persistent stats. */
- ps_on = FALSE;
- ps_off = TRUE;
- } else {
- ps_on = create_info->table_options
- & HA_OPTION_STATS_PERSISTENT;
- ps_off = create_info->table_options
- & HA_OPTION_NO_STATS_PERSISTENT;
- }
-
- dict_stats_set_persistent(innodb_table, ps_on, ps_off);
-
- dict_stats_auto_recalc_set(
- innodb_table,
- create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
- create_info->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
-
- innodb_table->stats_sample_pages = create_info->stats_sample_pages;
-}
-
-/*********************************************************************//**
-Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
-Those flags are stored in .frm file and end up in the MySQL table object,
-but are frequently used inside InnoDB so we keep their copies into the
-InnoDB table object. */
-UNIV_INTERN
-void
-innobase_copy_frm_flags_from_table_share(
-/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- const TABLE_SHARE* table_share) /*!< in: table share */
-{
- ibool ps_on;
- ibool ps_off;
-
- if (dict_table_is_temporary(innodb_table)) {
- /* Temp tables do not use persistent stats */
- ps_on = FALSE;
- ps_off = TRUE;
- } else {
- ps_on = table_share->db_create_options
- & HA_OPTION_STATS_PERSISTENT;
- ps_off = table_share->db_create_options
- & HA_OPTION_NO_STATS_PERSISTENT;
- }
-
- dict_stats_set_persistent(innodb_table, ps_on, ps_off);
-
- dict_stats_auto_recalc_set(
- innodb_table,
- table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_ON,
- table_share->stats_auto_recalc == HA_STATS_AUTO_RECALC_OFF);
-
- innodb_table->stats_sample_pages = table_share->stats_sample_pages;
-}
-
-/*********************************************************************//**
-Construct ha_innobase handler. */
-UNIV_INTERN
-ha_innobase::ha_innobase(
-/*=====================*/
- handlerton* hton,
- TABLE_SHARE* table_arg)
- :handler(hton, table_arg),
- int_table_flags(HA_REC_NOT_IN_SEQ |
- HA_NULL_IN_KEY | HA_CAN_VIRTUAL_COLUMNS |
- HA_CAN_INDEX_BLOBS | HA_CONCURRENT_OPTIMIZE |
- HA_CAN_SQL_HANDLER |
- HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
- HA_PRIMARY_KEY_IN_READ_INDEX |
- HA_BINLOG_ROW_CAPABLE |
- HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
- HA_TABLE_SCAN_ON_INDEX | HA_CAN_FULLTEXT |
- (srv_force_primary_key ? HA_REQUIRE_PRIMARY_KEY : 0 ) |
- HA_CAN_FULLTEXT_EXT | HA_CAN_EXPORT),
- start_of_scan(0),
- num_write_row(0),
- ha_partition_stats(NULL)
-{}
-
-/*********************************************************************//**
-Destruct ha_innobase handler. */
-UNIV_INTERN
-ha_innobase::~ha_innobase()
-/*======================*/
-{
-}
-
-/*********************************************************************//**
-Updates the user_thd field in a handle and also allocates a new InnoDB
-transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-UNIV_INTERN inline
-void
-ha_innobase::update_thd(
-/*====================*/
- THD* thd) /*!< in: thd to use the handle */
-{
- trx_t* trx;
-
- DBUG_ENTER("ha_innobase::update_thd");
- DBUG_PRINT("ha_innobase::update_thd", ("user_thd: %p -> %p",
- user_thd, thd));
-
- /* The table should have been opened in ha_innobase::open(). */
- DBUG_ASSERT(prebuilt->table->n_ref_count > 0);
-
- trx = check_trx_exists(thd);
-
- if (prebuilt->trx != trx) {
-
- row_update_prebuilt_trx(prebuilt, trx);
- }
-
- user_thd = thd;
- DBUG_VOID_RETURN;
-}
-
-/*********************************************************************//**
-Updates the user_thd field in a handle and also allocates a new InnoDB
-transaction handle if needed, and updates the transaction fields in the
-prebuilt struct. */
-UNIV_INTERN
-void
-ha_innobase::update_thd()
-/*=====================*/
-{
- THD* thd = ha_thd();
-
- ut_ad(EQ_CURRENT_THD(thd));
- update_thd(thd);
-}
-
-/*********************************************************************//**
-Registers an InnoDB transaction with the MySQL 2PC coordinator, so that
-the MySQL XA code knows to call the InnoDB prepare and commit, or rollback
-for the transaction. This MUST be called for every transaction for which
-the user may call commit or rollback. Calling this several times to register
-the same transaction is allowed, too. This function also registers the
-current SQL statement. */
-static inline
-void
-innobase_register_trx(
-/*==================*/
- handlerton* hton, /* in: Innobase handlerton */
- THD* thd, /* in: MySQL thd (connection) object */
- trx_t* trx) /* in: transaction to register */
-{
- trans_register_ha(thd, FALSE, hton);
-
- if (!trx_is_registered_for_2pc(trx)
- && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- trans_register_ha(thd, TRUE, hton);
- }
-
- trx_register_for_2pc(trx);
-}
-
-/* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
- ------------------------------------------------------------
-
-1) The use of the query cache for TBL is disabled when there is an
-uncommitted change to TBL.
-
-2) When a change to TBL commits, InnoDB stores the current value of
-its global trx id counter, let us denote it by INV_TRX_ID, to the table object
-in the InnoDB data dictionary, and does only allow such transactions whose
-id <= INV_TRX_ID to use the query cache.
-
-3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
-modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
-of TBL immediately.
-
-How this is implemented inside InnoDB:
-
-1) Since every modification always sets an IX type table lock on the InnoDB
-table, it is easy to check if there can be uncommitted modifications for a
-table: just check if there are locks in the lock list of the table.
-
-2) When a transaction inside InnoDB commits, it reads the global trx id
-counter and stores the value INV_TRX_ID to the tables on which it had a lock.
-
-3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
-InnoDB calls an invalidate method for the MySQL query cache for that table.
-
-How this is implemented inside sql_cache.cc:
-
-1) The query cache for an InnoDB table TBL is invalidated immediately at an
-INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
-invalidation to the transaction commit.
-
-2) To store or retrieve a value from the query cache of an InnoDB table TBL,
-any query must first ask InnoDB's permission. We must pass the thd as a
-parameter because InnoDB will look at the trx id, if any, associated with
-that thd. Also the full_name which is used as key to search for the table
-object. The full_name is a string containing the normalized path to the
-table in the canonical format.
-
-3) Use of the query cache for InnoDB tables is now allowed also when
-AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
-put restrictions on the use of the query cache.
-*/
-
-/******************************************************************//**
-The MySQL query cache uses this to check from InnoDB if the query cache at
-the moment is allowed to operate on an InnoDB table. The SQL query must
-be a non-locking SELECT.
-
-The query cache is allowed to operate on certain query only if this function
-returns TRUE for all tables in the query.
-
-If thd is not in the autocommit state, this function also starts a new
-transaction for thd if there is no active trx yet, and assigns a consistent
-read view to it if there is no read view yet.
-
-Why a deadlock of threads is not possible: the query cache calls this function
-at the start of a SELECT processing. Then the calling thread cannot be
-holding any InnoDB semaphores. The calling thread is holding the
-query cache mutex, and this function will reserve the InnoDB trx_sys->mutex.
-Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
-the InnoDB trx_sys->mutex.
-@return TRUE if permitted, FALSE if not; note that the value FALSE
-does not mean we should invalidate the query cache: invalidation is
-called explicitly */
-static
-my_bool
-innobase_query_caching_of_table_permitted(
-/*======================================*/
- THD* thd, /*!< in: thd of the user who is trying to
- store a result to the query cache or
- retrieve it */
- const char* full_name, /*!< in: normalized path to the table */
- uint full_name_len, /*!< in: length of the normalized path
- to the table */
- ulonglong *unused) /*!< unused for this engine */
-{
- ibool is_autocommit;
- trx_t* trx;
- char norm_name[1000];
-
- ut_a(full_name_len < 999);
-
- trx = check_trx_exists(thd);
-
- if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
- /* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
- plain SELECT if AUTOCOMMIT is not on. */
-
- return((my_bool)FALSE);
- }
-
- if (UNIV_UNLIKELY(trx->has_search_latch)) {
- sql_print_error("The calling thread is holding the adaptive "
- "search, latch though calling "
- "innobase_query_caching_of_table_permitted.");
- trx_print(stderr, trx, 1024);
- }
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- is_autocommit = TRUE;
- } else {
- is_autocommit = FALSE;
-
- }
-
- if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
- /* We are going to retrieve the query result from the query
- cache. This cannot be a store operation to the query cache
- because then MySQL would have locks on tables already.
-
- TODO: if the user has used LOCK TABLES to lock the table,
- then we open a transaction in the call of row_.. below.
- That trx can stay open until UNLOCK TABLES. The same problem
- exists even if we do not use the query cache. MySQL should be
- modified so that it ALWAYS calls some cleanup function when
- the processing of a query ends!
-
- We can imagine we instantaneously serialize this consistent
- read trx to the current trx id counter. If trx2 would have
- changed the tables of a query result stored in the cache, and
- trx2 would have already committed, making the result obsolete,
- then trx2 would have already invalidated the cache. Thus we
- can trust the result in the cache is ok for this query. */
-
- return((my_bool)TRUE);
- }
-
- /* Normalize the table name to InnoDB format */
- normalize_table_name(norm_name, full_name);
-
- innobase_register_trx(innodb_hton_ptr, thd, trx);
-
- if (row_search_check_if_query_cache_permitted(trx, norm_name)) {
-
- /* printf("Query cache for %s permitted\n", norm_name); */
-
- return((my_bool)TRUE);
- }
-
- /* printf("Query cache for %s NOT permitted\n", norm_name); */
-
- return((my_bool)FALSE);
-}
-
-/*****************************************************************//**
-Invalidates the MySQL query cache for the table. */
-UNIV_INTERN
-void
-innobase_invalidate_query_cache(
-/*============================*/
- trx_t* trx, /*!< in: transaction which
- modifies the table */
- const char* full_name, /*!< in: concatenation of
- database name, null char NUL,
- table name, null char NUL;
- NOTE that in Windows this is
- always in LOWER CASE! */
- ulint full_name_len) /*!< in: full name length where
- also the null chars count */
-{
- /* Note that the sync0sync.h rank of the query cache mutex is just
- above the InnoDB trx_sys_t->lock. The caller of this function must
- not have latches of a lower rank. */
-
-#ifdef HAVE_QUERY_CACHE
- char qcache_key_name[2 * (NAME_LEN + 1)];
- size_t tabname_len;
- size_t dbname_len;
-
- /* Construct the key("db-name\0table$name\0") for the query cache using
- the path name("db@002dname\0table@0024name\0") of the table in its
- canonical form. */
- dbname_len = filename_to_tablename(full_name, qcache_key_name,
- sizeof(qcache_key_name));
- tabname_len = filename_to_tablename(full_name + strlen(full_name) + 1,
- qcache_key_name + dbname_len + 1,
- sizeof(qcache_key_name)
- - dbname_len - 1);
-
- /* Argument TRUE below means we are using transactions */
- mysql_query_cache_invalidate4(trx->mysql_thd,
- qcache_key_name,
- (dbname_len + tabname_len + 2),
- TRUE);
-#endif
-}
-
-/*****************************************************************//**
-Convert an SQL identifier to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-static
-char*
-innobase_convert_identifier(
-/*========================*/
- char* buf, /*!< out: buffer for converted identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
- ulint idlen, /*!< in: length of id, in bytes */
- THD* thd, /*!< in: MySQL connection thread, or NULL */
- ibool file_id)/*!< in: TRUE=id is a table or database name;
- FALSE=id is an UTF-8 string */
-{
- char nz2[MAX_TABLE_NAME_LEN + 1];
- const char* s = id;
- int q;
-
- if (file_id) {
-
- char nz[MAX_TABLE_NAME_LEN + 1];
-
- /* Decode the table name. The MySQL function expects
- a NUL-terminated string. The input and output strings
- buffers must not be shared. */
- ut_a(idlen <= MAX_TABLE_NAME_LEN);
- memcpy(nz, id, idlen);
- nz[idlen] = 0;
-
- s = nz2;
- idlen = explain_filename(thd, nz, nz2, sizeof nz2,
- EXPLAIN_PARTITIONS_AS_COMMENT);
- goto no_quote;
- }
-
- /* See if the identifier needs to be quoted. */
- if (UNIV_UNLIKELY(!thd)) {
- q = '"';
- } else {
- q = get_quote_char_for_identifier(thd, s, (int) idlen);
- }
-
- if (q == EOF) {
-no_quote:
- if (UNIV_UNLIKELY(idlen > buflen)) {
- idlen = buflen;
- }
- memcpy(buf, s, idlen);
- return(buf + idlen);
- }
-
- /* Quote the identifier. */
- if (buflen < 2) {
- return(buf);
- }
-
- *buf++ = q;
- buflen--;
-
- for (; idlen; idlen--) {
- int c = *s++;
- if (UNIV_UNLIKELY(c == q)) {
- if (UNIV_UNLIKELY(buflen < 3)) {
- break;
- }
-
- *buf++ = c;
- *buf++ = c;
- buflen -= 2;
- } else {
- if (UNIV_UNLIKELY(buflen < 2)) {
- break;
- }
-
- *buf++ = c;
- buflen--;
- }
- }
-
- *buf++ = q;
- return(buf);
-}
-
-/*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
-char*
-innobase_convert_name(
-/*==================*/
- char* buf, /*!< out: buffer for converted identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
- ulint idlen, /*!< in: length of id, in bytes */
- THD* thd, /*!< in: MySQL connection thread, or NULL */
- ibool table_id)/*!< in: TRUE=id is a table or database name;
- FALSE=id is an index name */
-{
- char* s = buf;
- const char* bufend = buf + buflen;
-
- if (table_id) {
- const char* slash = (const char*) memchr(id, '/', idlen);
- if (!slash) {
-
- goto no_db_name;
- }
-
- /* Print the database name and table name separately. */
- s = innobase_convert_identifier(s, bufend - s, id, slash - id,
- thd, TRUE);
- if (UNIV_LIKELY(s < bufend)) {
- *s++ = '.';
- s = innobase_convert_identifier(s, bufend - s,
- slash + 1, idlen
- - (slash - id) - 1,
- thd, TRUE);
- }
- } else if (UNIV_UNLIKELY(*id == TEMP_INDEX_PREFIX)) {
- /* Temporary index name (smart ALTER TABLE) */
- const char temp_index_suffix[]= "--temporary--";
-
- s = innobase_convert_identifier(buf, buflen, id + 1, idlen - 1,
- thd, FALSE);
- if (s - buf + (sizeof temp_index_suffix - 1) < buflen) {
- memcpy(s, temp_index_suffix,
- sizeof temp_index_suffix - 1);
- s += sizeof temp_index_suffix - 1;
- }
- } else {
-no_db_name:
- s = innobase_convert_identifier(buf, buflen, id, idlen,
- thd, table_id);
- }
-
- return(s);
-}
-
-/*****************************************************************//**
-A wrapper function of innobase_convert_name(), convert a table or
-index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
-void
-innobase_format_name(
-/*==================*/
- char* buf, /*!< out: buffer for converted identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* name, /*!< in: index or table name to format */
- ibool is_index_name) /*!< in: index name */
-{
- const char* bufend;
-
- bufend = innobase_convert_name(buf, buflen, name, strlen(name),
- NULL, !is_index_name);
-
- ut_ad((ulint) (bufend - buf) < buflen);
-
- buf[bufend - buf] = '\0';
-}
-
-/**********************************************************************//**
-Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-UNIV_INTERN
-ibool
-trx_is_interrupted(
-/*===============*/
- const trx_t* trx) /*!< in: transaction */
-{
- return(trx && trx->mysql_thd && thd_kill_level((THD*) trx->mysql_thd));
-}
-
-/**********************************************************************//**
-Determines if the currently running transaction is in strict mode.
-@return TRUE if strict */
-UNIV_INTERN
-ibool
-trx_is_strict(
-/*==========*/
- trx_t* trx) /*!< in: transaction */
-{
- return(trx && trx->mysql_thd && THDVAR(trx->mysql_thd, strict_mode));
-}
-
-/**************************************************************//**
-Resets some fields of a prebuilt struct. The template is used in fast
-retrieval of just those column values MySQL needs in its processing. */
-inline
-void
-ha_innobase::reset_template(void)
-/*=============================*/
-{
- ut_ad(prebuilt->magic_n == ROW_PREBUILT_ALLOCATED);
- ut_ad(prebuilt->magic_n2 == prebuilt->magic_n);
-
- /* Force table to be freed in close_thread_table(). */
- DBUG_EXECUTE_IF("free_table_in_fts_query",
- if (prebuilt->in_fts_query) {
- table->m_needs_reopen = true;
- }
- );
-
- prebuilt->keep_other_fields_on_keyread = 0;
- prebuilt->read_just_key = 0;
- prebuilt->in_fts_query = 0;
- /* Reset index condition pushdown state. */
- if (prebuilt->idx_cond) {
- prebuilt->idx_cond = NULL;
- prebuilt->idx_cond_n_cols = 0;
- /* Invalidate prebuilt->mysql_template
- in ha_innobase::write_row(). */
- prebuilt->template_type = ROW_MYSQL_NO_TEMPLATE;
- }
-}
-
-/*****************************************************************//**
-Call this when you have opened a new table handle in HANDLER, before you
-call index_read_idx() etc. Actually, we can let the cursor stay open even
-over a transaction commit! Then you should call this before every operation,
-fetch next etc. This function inits the necessary things even after a
-transaction commit. */
-UNIV_INTERN
-void
-ha_innobase::init_table_handle_for_HANDLER(void)
-/*============================================*/
-{
- /* If current thd does not yet have a trx struct, create one.
- If the current handle does not yet have a prebuilt struct, create
- one. Update the trx pointers in the prebuilt struct. Normally
- this operation is done in external_lock. */
-
- update_thd(ha_thd());
-
- /* Initialize the prebuilt struct much like it would be inited in
- external_lock */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- innobase_srv_conc_force_exit_innodb(prebuilt->trx);
-
- /* If the transaction is not started yet, start it */
-
- trx_start_if_not_started_xa(prebuilt->trx);
-
- /* Assign a read view if the transaction does not have it yet */
-
- trx_assign_read_view(prebuilt->trx);
-
- innobase_register_trx(ht, user_thd, prebuilt->trx);
-
- /* We did the necessary inits in this function, no need to repeat them
- in row_search_for_mysql */
-
- prebuilt->sql_stat_start = FALSE;
-
- /* We let HANDLER always to do the reads as consistent reads, even
- if the trx isolation level would have been specified as SERIALIZABLE */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
-
- /* Always fetch all columns in the index record */
-
- prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
-
- /* We want always to fetch all columns in the whole row? Or do
- we???? */
-
- prebuilt->used_in_HANDLER = TRUE;
- reset_template();
-}
-
-/****************************************************************//**
-Gives the file extension of an InnoDB single-table tablespace. */
-static const char* ha_innobase_exts[] = {
- ".ibd",
- ".isl",
- NullS
-};
-
-/*********************************************************************//**
-Opens an InnoDB database.
-@return 0 on success, error code on failure */
-static
-int
-innobase_init(
-/*==========*/
- void *p) /*!< in: InnoDB handlerton */
-{
- static char current_dir[3]; /*!< Set if using current lib */
- int err;
- bool ret;
- char *default_path;
- uint format_id;
- ulong num_pll_degree;
-
- DBUG_ENTER("innobase_init");
- handlerton *innobase_hton= (handlerton*) p;
- innodb_hton_ptr = innobase_hton;
-
- innobase_hton->state = SHOW_OPTION_YES;
- innobase_hton->db_type= DB_TYPE_INNODB;
- innobase_hton->savepoint_offset = sizeof(trx_named_savept_t);
- innobase_hton->close_connection = innobase_close_connection;
- innobase_hton->savepoint_set = innobase_savepoint;
- innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint;
- innobase_hton->savepoint_rollback_can_release_mdl =
- innobase_rollback_to_savepoint_can_release_mdl;
- innobase_hton->savepoint_release = innobase_release_savepoint;
- innobase_hton->commit_ordered=innobase_commit_ordered;
- innobase_hton->commit = innobase_commit;
- innobase_hton->rollback = innobase_rollback;
- innobase_hton->prepare = innobase_xa_prepare;
- innobase_hton->recover = innobase_xa_recover;
- innobase_hton->commit_by_xid = innobase_commit_by_xid;
- innobase_hton->rollback_by_xid = innobase_rollback_by_xid;
- innobase_hton->commit_checkpoint_request=innobase_checkpoint_request;
- innobase_hton->checkpoint_state= innobase_checkpoint_state;
- innobase_hton->create_cursor_read_view = innobase_create_cursor_view;
- innobase_hton->set_cursor_read_view = innobase_set_cursor_view;
- innobase_hton->close_cursor_read_view = innobase_close_cursor_view;
- innobase_hton->create = innobase_create_handler;
- innobase_hton->drop_database = innobase_drop_database;
- innobase_hton->panic = innobase_end;
-
- innobase_hton->start_consistent_snapshot =
- innobase_start_trx_and_assign_read_view;
-
- /*innobase_hton->store_binlog_info =
- innobase_store_binlog_info;*/
-
- innobase_hton->flush_logs = innobase_flush_logs;
- innobase_hton->show_status = innobase_show_status;
- innobase_hton->flags = HTON_SUPPORTS_EXTENDED_KEYS |
- HTON_SUPPORTS_FOREIGN_KEYS;
-
- innobase_hton->kill_query = innobase_kill_connection;
-
- if (srv_file_per_table)
- innobase_hton->tablefile_extensions = ha_innobase_exts;
-
- innobase_hton->table_options = innodb_table_option_list;
-#ifdef WITH_WSREP
- innobase_hton->abort_transaction=wsrep_abort_transaction;
- innobase_hton->set_checkpoint=innobase_wsrep_set_checkpoint;
- innobase_hton->get_checkpoint=innobase_wsrep_get_checkpoint;
- innobase_hton->fake_trx_id=wsrep_fake_trx_id;
-#endif /* WITH_WSREP */
-
- innodb_remember_check_sysvar_funcs();
-
- ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
-
-#ifndef DBUG_OFF
- static const char test_filename[] = "-@";
- char test_tablename[sizeof test_filename
- + sizeof(srv_mysql50_table_name_prefix) - 1];
- if ((sizeof(test_tablename)) - 1
- != filename_to_tablename(test_filename,
- test_tablename,
- sizeof(test_tablename), true)
- || strncmp(test_tablename,
- srv_mysql50_table_name_prefix,
- sizeof(srv_mysql50_table_name_prefix) - 1)
- || strcmp(test_tablename
- + sizeof(srv_mysql50_table_name_prefix) - 1,
- test_filename)) {
-
- sql_print_error("tablename encoding has been changed");
-
- goto error;
- }
-#endif /* DBUG_OFF */
-
- srv_log_block_size = 0;
- if (innobase_log_block_size != (1 << 9)) { /*!=512*/
- uint n_shift;
-
- fprintf(stderr,
- "InnoDB: Warning: innodb_log_block_size has been "
- "changed from default value 512. (###EXPERIMENTAL### "
- "operation)\n");
- for (n_shift = 9; n_shift <= UNIV_PAGE_SIZE_SHIFT_MAX;
- n_shift++) {
- if (innobase_log_block_size == ((ulong)1 << n_shift)) {
- srv_log_block_size = (1 << n_shift);
- fprintf(stderr,
- "InnoDB: The log block size is set to "
- ULINTPF ".\n",srv_log_block_size);
- break;
- }
- }
- } else {
- srv_log_block_size = 512;
- }
-
- /* The buffer pool needs to be able to accommodate enough many
- pages, even for larger pages */
- if (UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF
- && innobase_buffer_pool_size < (24 * 1024 * 1024)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "innodb_page_size= " ULINTPF " requires "
- "innodb_buffer_pool_size > 24M current %lld. ",
- UNIV_PAGE_SIZE,
- innobase_buffer_pool_size);
- goto error;
- }
-
- ut_ad (srv_log_block_size >= OS_MIN_LOG_BLOCK_SIZE);
-
- if (!srv_log_block_size) {
- fprintf(stderr,
- "InnoDB: Error: %lu is not a valid value for "
- "innodb_log_block_size.\n"
- "InnoDB: Error: A valid value for "
- "innodb_log_block_size is\n"
- "InnoDB: Error: a power of 2 from 512 to 16384.\n",
- innobase_log_block_size);
- goto error;
- }
-
- /* Check that values don't overflow on 32-bit systems. */
- if (sizeof(ulint) == 4) {
- if (innobase_buffer_pool_size > UINT_MAX32) {
- sql_print_error(
- "innobase_buffer_pool_size can't be over 4GB"
- " on 32-bit systems");
-
- goto error;
- }
- }
-
-#ifndef HAVE_LZ4
- if (innodb_compression_algorithm == PAGE_LZ4_ALGORITHM) {
- sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: liblz4 is not installed. \n",
- innodb_compression_algorithm);
- goto error;
- }
-#endif
-
-#ifndef HAVE_LZO
- if (innodb_compression_algorithm == PAGE_LZO_ALGORITHM) {
- sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: liblzo is not installed. \n",
- innodb_compression_algorithm);
- goto error;
- }
-#endif
-
-#ifndef HAVE_LZMA
- if (innodb_compression_algorithm == PAGE_LZMA_ALGORITHM) {
- sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: liblzma is not installed. \n",
- innodb_compression_algorithm);
- goto error;
- }
-#endif
-
-#ifndef HAVE_BZIP2
- if (innodb_compression_algorithm == PAGE_BZIP2_ALGORITHM) {
- sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: libbz2 is not installed. \n",
- innodb_compression_algorithm);
- goto error;
- }
-#endif
-
-#ifndef HAVE_SNAPPY
- if (innodb_compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
- sql_print_error("InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: libsnappy is not installed. \n",
- innodb_compression_algorithm);
- goto error;
- }
-#endif
-
- if ((srv_encrypt_tables || srv_encrypt_log)
- && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
- sql_print_error("InnoDB: cannot enable encryption, "
- "encryption plugin is not available");
- goto error;
- }
-
- os_innodb_umask = (ulint) my_umask;
-
- /* First calculate the default path for innodb_data_home_dir etc.,
- in case the user has not given any value.
-
- Note that when using the embedded server, the datadirectory is not
- necessarily the current directory of this program. */
-
- if (mysqld_embedded) {
- default_path = mysql_real_data_home;
- fil_path_to_mysql_datadir = mysql_real_data_home;
- } else {
- /* It's better to use current lib, to keep paths short */
- current_dir[0] = FN_CURLIB;
- current_dir[1] = FN_LIBCHAR;
- current_dir[2] = 0;
- default_path = current_dir;
- }
-
- ut_a(default_path);
-
- /* Set InnoDB initialization parameters according to the values
- read from MySQL .cnf file */
-
- /*--------------- Data files -------------------------*/
-
- /* The default dir for data files is the datadir of MySQL */
-
- srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
- default_path);
-
-
- /* Set default InnoDB data file size to 12 MB and let it be
- auto-extending. Thus users can use InnoDB in >= 4.0 without having
- to specify any startup options. */
-
- if (!innobase_data_file_path) {
- innobase_data_file_path = (char*) "ibdata1:12M:autoextend";
- }
-
- /* Since InnoDB edits the argument in the next call, we make another
- copy of it: */
-
- internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
- MYF(MY_FAE));
-
- ret = (bool) srv_parse_data_file_paths_and_sizes(
- internal_innobase_data_file_path);
- if (ret == FALSE) {
- sql_print_error(
- "InnoDB: syntax error in innodb_data_file_path"
- " or size specified is less than 1 megabyte");
-mem_free_and_error:
- srv_free_paths_and_sizes();
- my_free(internal_innobase_data_file_path);
- goto error;
- }
-
- /* -------------- All log files ---------------------------*/
-
- /* The default dir for log files is the datadir of MySQL */
-
- if (!srv_log_group_home_dir) {
- srv_log_group_home_dir = default_path;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (!innobase_log_arch_dir) {
- innobase_log_arch_dir = srv_log_group_home_dir;
- }
- srv_arch_dir = innobase_log_arch_dir;
-#endif /* UNIG_LOG_ARCHIVE */
-
- srv_normalize_path_for_win(srv_log_group_home_dir);
-
- if (strchr(srv_log_group_home_dir, ';')) {
- sql_print_error("syntax error in innodb_log_group_home_dir");
- goto mem_free_and_error;
- }
-
- if (innobase_mirrored_log_groups == 1) {
- sql_print_warning(
- "innodb_mirrored_log_groups is an unimplemented "
- "feature and the variable will be completely "
- "removed in a future version.");
- }
-
- if (innobase_mirrored_log_groups > 1) {
- sql_print_error(
- "innodb_mirrored_log_groups is an unimplemented feature and "
- "the variable will be completely removed in a future version. "
- "Using values other than 1 is not supported.");
- goto mem_free_and_error;
- }
-
- if (innobase_mirrored_log_groups == 0) {
- /* To throw a deprecation warning message when the option is
- passed, the default was changed to '0' (as a workaround). Since
- the only value accepted for this option is '1', reset it to 1 */
- innobase_mirrored_log_groups = 1;
- }
-
- /* Validate the file format by animal name */
- if (innobase_file_format_name != NULL) {
-
- format_id = innobase_file_format_name_lookup(
- innobase_file_format_name);
-
- if (format_id > UNIV_FORMAT_MAX) {
-
- sql_print_error("InnoDB: wrong innodb_file_format.");
-
- goto mem_free_and_error;
- }
- } else {
- /* Set it to the default file format id. Though this
- should never happen. */
- format_id = 0;
- }
-
- srv_file_format = format_id;
-
- /* Given the type of innobase_file_format_name we have little
- choice but to cast away the constness from the returned name.
- innobase_file_format_name is used in the MySQL set variable
- interface and so can't be const. */
-
- innobase_file_format_name =
- (char*) trx_sys_file_format_id_to_name(format_id);
-
- /* Check innobase_file_format_check variable */
- if (!innobase_file_format_check) {
-
- /* Set the value to disable checking. */
- srv_max_file_format_at_startup = UNIV_FORMAT_MAX + 1;
-
- } else {
-
- /* Set the value to the lowest supported format. */
- srv_max_file_format_at_startup = UNIV_FORMAT_MIN;
- }
-
- /* Did the user specify a format name that we support?
- As a side effect it will update the variable
- srv_max_file_format_at_startup */
- if (innobase_file_format_validate_and_set(
- innobase_file_format_max) < 0) {
-
- sql_print_error("InnoDB: invalid "
- "innodb_file_format_max value: "
- "should be any value up to %s or its "
- "equivalent numeric id",
- trx_sys_file_format_id_to_name(
- UNIV_FORMAT_MAX));
-
- goto mem_free_and_error;
- }
-
- if (innobase_change_buffering) {
- ulint use;
-
- for (use = 0;
- use < UT_ARR_SIZE(innobase_change_buffering_values);
- use++) {
- if (!innobase_strcasecmp(
- innobase_change_buffering,
- innobase_change_buffering_values[use])) {
- ibuf_use = (ibuf_use_t) use;
- goto innobase_change_buffering_inited_ok;
- }
- }
-
- sql_print_error("InnoDB: invalid value "
- "innodb_change_buffering=%s",
- innobase_change_buffering);
- goto mem_free_and_error;
- }
-
-innobase_change_buffering_inited_ok:
- ut_a((ulint) ibuf_use < UT_ARR_SIZE(innobase_change_buffering_values));
- innobase_change_buffering = (char*)
- innobase_change_buffering_values[ibuf_use];
-
- /* Check that interdependent parameters have sane values. */
- if (srv_max_buf_pool_modified_pct < srv_max_dirty_pages_pct_lwm) {
- sql_print_warning("InnoDB: innodb_max_dirty_pages_pct_lwm"
- " cannot be set higher than"
- " innodb_max_dirty_pages_pct.\n"
- "InnoDB: Setting"
- " innodb_max_dirty_pages_pct_lwm to %lf\n",
- srv_max_buf_pool_modified_pct);
-
- srv_max_dirty_pages_pct_lwm = srv_max_buf_pool_modified_pct;
- }
-
- if (srv_max_io_capacity == SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT) {
-
- if (srv_io_capacity >= SRV_MAX_IO_CAPACITY_LIMIT / 2) {
- /* Avoid overflow. */
- srv_max_io_capacity = SRV_MAX_IO_CAPACITY_LIMIT;
- } else {
- /* The user has not set the value. We should
- set it based on innodb_io_capacity. */
- srv_max_io_capacity = static_cast<ulong>(
- ut_max(2 * srv_io_capacity, 2000));
- }
-
- } else if (srv_max_io_capacity < srv_io_capacity) {
- sql_print_warning("InnoDB: innodb_io_capacity"
- " cannot be set higher than"
- " innodb_io_capacity_max.\n"
- "InnoDB: Setting"
- " innodb_io_capacity to %lu\n",
- srv_max_io_capacity);
-
- srv_io_capacity = srv_max_io_capacity;
- }
-
- if (!is_filename_allowed(srv_buf_dump_filename,
- strlen(srv_buf_dump_filename), FALSE)) {
- sql_print_error("InnoDB: innodb_buffer_pool_filename"
- " cannot have colon (:) in the file name.");
- goto mem_free_and_error;
- }
-
- /* --------------------------------------------------*/
-
- srv_file_flush_method_str = innobase_file_flush_method;
-
- srv_log_file_size = (ib_uint64_t) innobase_log_file_size;
-
-#ifdef UNIV_LOG_ARCHIVE
- srv_log_archive_on = (ulint) innobase_log_archive;
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* Check that the value of system variable innodb_page_size was
- set correctly. Its value was put into srv_page_size. If valid,
- return the associated srv_page_size_shift.*/
- srv_page_size_shift = innodb_page_size_validate(srv_page_size);
- if (!srv_page_size_shift) {
- sql_print_error("InnoDB: Invalid page size=%lu.\n",
- srv_page_size);
- goto mem_free_and_error;
- }
-
- if (UNIV_PAGE_SIZE_DEF != srv_page_size) {
- ib_logf(IB_LOG_LEVEL_INFO,
- " innodb-page-size has been changed"
- " from the default value %d to " ULINTPF " .",
- UNIV_PAGE_SIZE_DEF, srv_page_size);
- }
-
- srv_log_buffer_size = (ulint) innobase_log_buffer_size;
-
- if (innobase_buffer_pool_instances == 0) {
- innobase_buffer_pool_instances = 8;
-
-#if defined(__WIN__) && !defined(_WIN64)
- if (innobase_buffer_pool_size > 1331 * 1024 * 1024) {
- innobase_buffer_pool_instances
- = ut_min(MAX_BUFFER_POOLS,
- (long) (innobase_buffer_pool_size
- / (128 * 1024 * 1024)));
- }
-#endif /* defined(__WIN__) && !defined(_WIN64) */
- }
- srv_buf_pool_size = (ulint) innobase_buffer_pool_size;
- srv_buf_pool_instances = (ulint) innobase_buffer_pool_instances;
-
- srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
-
- if (innobase_additional_mem_pool_size
- != 8*1024*1024L /* the default */ ) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Using "
- "innodb_additional_mem_pool_size is DEPRECATED. "
- "This option may be removed in future releases, "
- "together with the option innodb_use_sys_malloc "
- "and with the InnoDB's internal memory "
- "allocator.\n");
- }
-
- if (!srv_use_sys_malloc ) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Setting "
- "innodb_use_sys_malloc to FALSE is DEPRECATED. "
- "This option may be removed in future releases, "
- "together with the InnoDB's internal memory "
- "allocator.\n");
- }
-
- if (innodb_buffer_pool_populate) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Setting "
- "innodb_buffer_pool_populate is DEPRECATED"
- " and has no effect. "
- "This option will be removed in MariaDB 10.2.3.\n");
- }
-
- srv_n_file_io_threads = (ulint) innobase_file_io_threads;
- srv_n_read_io_threads = (ulint) innobase_read_io_threads;
- srv_n_write_io_threads = (ulint) innobase_write_io_threads;
-
- srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
-
- if (!innobase_use_checksums) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Setting "
- "innodb_checksums to OFF is DEPRECATED. "
- "This option may be removed in future releases. "
- "You should set innodb_checksum_algorithm=NONE "
- "instead.\n");
- srv_checksum_algorithm = SRV_CHECKSUM_ALGORITHM_NONE;
- }
-
- innodb_log_checksum_func_update(srv_log_checksum_algorithm);
-
-#ifdef HAVE_LARGE_PAGES
- if ((os_use_large_pages = (ibool) my_use_large_pages)) {
- os_large_page_size = (ulint) opt_large_page_size;
- }
-#endif
-
- row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;
-
- srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
- if (innobase_locks_unsafe_for_binlog) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: Using "
- "innodb_locks_unsafe_for_binlog is DEPRECATED. "
- "This option may be removed in future releases. "
- "Please use READ COMMITTED transaction isolation "
- "level instead, see " REFMAN "set-transaction.html.\n");
- }
-
- if (innobase_open_files < 10) {
- innobase_open_files = 300;
- if (srv_file_per_table && tc_size > 300) {
- innobase_open_files = tc_size;
- }
- }
-
- if (innobase_open_files > (long) open_files_limit) {
- fprintf(stderr,
- "innodb_open_files should not be greater"
- " than the open_files_limit.\n");
- if (innobase_open_files > (long) tc_size) {
- innobase_open_files = tc_size;
- }
- }
-
- srv_max_n_open_files = (ulint) innobase_open_files;
- srv_innodb_status = (ibool) innobase_create_status_file;
-
- srv_print_verbose_log = mysqld_embedded ? 0 : 1;
-
- /* Round up fts_sort_pll_degree to nearest power of 2 number */
- for (num_pll_degree = 1;
- num_pll_degree < fts_sort_pll_degree;
- num_pll_degree <<= 1) {
-
- /* No op */
- }
-
- fts_sort_pll_degree = num_pll_degree;
-
- /* Store the default charset-collation number of this MySQL
- installation */
-
- data_mysql_default_charset_coll = (ulint) default_charset_info->number;
-
- ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
- my_charset_latin1.number);
- ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
-
- /* Store the latin1_swedish_ci character ordering table to InnoDB. For
- non-latin1_swedish_ci charsets we use the MySQL comparison functions,
- and consequently we do not need to know the ordering internally in
- InnoDB. */
-
- srv_latin1_ordering = my_charset_latin1.sort_order;
-
- innobase_commit_concurrency_init_default();
-
-#ifdef HAVE_POSIX_FALLOCATE
- srv_use_posix_fallocate = (ibool) innobase_use_fallocate;
-#endif
- /* Do not enable backoff algorithm for small buffer pool. */
- if (!innodb_empty_free_list_algorithm_allowed(
- static_cast<srv_empty_free_list_t>(
- srv_empty_free_list_algorithm))) {
- sql_print_information(
- "InnoDB: innodb_empty_free_list_algorithm "
- "has been changed to legacy "
- "because of small buffer pool size. "
- "In order to use backoff, "
- "increase buffer pool at least up to 20MB.\n");
- srv_empty_free_list_algorithm
- = SRV_EMPTY_FREE_LIST_LEGACY;
- }
-
- srv_use_atomic_writes = (ibool) innobase_use_atomic_writes;
- if (innobase_use_atomic_writes) {
- ib_logf(IB_LOG_LEVEL_INFO, "using atomic writes.");
-
- /* Force doublewrite buffer off, atomic writes replace it. */
- if (srv_use_doublewrite_buf) {
- ib_logf(IB_LOG_LEVEL_INFO, "switching off doublewrite "
- "buffer because of atomic writes.");
- innobase_use_doublewrite = FALSE;
- srv_use_doublewrite_buf = FALSE;
- }
-
- /* Force O_DIRECT on Unixes (on Windows writes are always
- unbuffered)*/
-#ifndef _WIN32
- if(!innobase_file_flush_method ||
- !strstr(innobase_file_flush_method, "O_DIRECT")) {
- innobase_file_flush_method =
- srv_file_flush_method_str = (char*)"O_DIRECT";
- ib_logf(IB_LOG_LEVEL_INFO,
- "using O_DIRECT due to atomic writes.");
- }
-#endif
-#ifdef HAVE_POSIX_FALLOCATE
- /* Due to a bug in directFS, using atomics needs
- posix_fallocate() to extend the file, because pwrite() past the
- end of the file won't work */
- srv_use_posix_fallocate = TRUE;
-#endif
- }
-
-#ifdef HAVE_PSI_INTERFACE
- /* Register keys with MySQL performance schema */
- int count;
-
- count = array_elements(all_pthread_mutexes);
- mysql_mutex_register("innodb", all_pthread_mutexes, count);
-
-# ifdef UNIV_PFS_MUTEX
- count = array_elements(all_innodb_mutexes);
- mysql_mutex_register("innodb", all_innodb_mutexes, count);
-# endif /* UNIV_PFS_MUTEX */
-
-# ifdef UNIV_PFS_RWLOCK
- count = array_elements(all_innodb_rwlocks);
- mysql_rwlock_register("innodb", all_innodb_rwlocks, count);
-# endif /* UNIV_PFS_MUTEX */
-
-# ifdef UNIV_PFS_THREAD
- count = array_elements(all_innodb_threads);
- mysql_thread_register("innodb", all_innodb_threads, count);
-# endif /* UNIV_PFS_THREAD */
-
-# ifdef UNIV_PFS_IO
- count = array_elements(all_innodb_files);
- mysql_file_register("innodb", all_innodb_files, count);
-# endif /* UNIV_PFS_IO */
-
- count = array_elements(all_innodb_conds);
- mysql_cond_register("innodb", all_innodb_conds, count);
-#endif /* HAVE_PSI_INTERFACE */
-
- /* Since we in this module access directly the fields of a trx
- struct, and due to different headers and flags it might happen that
- ib_mutex_t has a different size in this module and in InnoDB
- modules, we check at run time that the size is the same in
- these compilation modules. */
-
- err = innobase_start_or_create_for_mysql();
-
- if (err != DB_SUCCESS) {
- goto mem_free_and_error;
- }
-
- /* Adjust the innodb_undo_logs config object */
- innobase_undo_logs_init_default_max();
-
- innobase_old_blocks_pct = static_cast<uint>(
- buf_LRU_old_ratio_update(innobase_old_blocks_pct, TRUE));
-
- ibuf_max_size_update(innobase_change_buffer_max_size);
-
- innobase_open_tables = hash_create(200);
- mysql_mutex_init(innobase_share_mutex_key,
- &innobase_share_mutex,
- MY_MUTEX_INIT_FAST);
- mysql_mutex_init(commit_cond_mutex_key,
- &commit_cond_m, MY_MUTEX_INIT_FAST);
- mysql_cond_init(commit_cond_key, &commit_cond, NULL);
- mysql_mutex_init(pending_checkpoint_mutex_key,
- &pending_checkpoint_mutex,
- MY_MUTEX_INIT_FAST);
- innodb_inited= 1;
-#ifdef MYSQL_DYNAMIC_PLUGIN
- if (innobase_hton != p) {
- innobase_hton = reinterpret_cast<handlerton*>(p);
- *innobase_hton = *innodb_hton_ptr;
- }
-#endif /* MYSQL_DYNAMIC_PLUGIN */
-
- /* Get the current high water mark format. */
- innobase_file_format_max = (char*) trx_sys_file_format_max_get();
-
- /* Currently, monitor counter information are not persistent. */
- memset(monitor_set_tbl, 0, sizeof monitor_set_tbl);
-
- memset(innodb_counter_value, 0, sizeof innodb_counter_value);
-
- /* Do this as late as possible so server is fully starts up,
- since we might get some initial stats if user choose to turn
- on some counters from start up */
- if (innobase_enable_monitor_counter) {
- innodb_enable_monitor_at_startup(
- innobase_enable_monitor_counter);
- }
-
- /* Turn on monitor counters that are default on */
- srv_mon_default_on();
-
- DBUG_RETURN(FALSE);
-error:
- DBUG_RETURN(TRUE);
-}
-
-/** Shut down the InnoDB storage engine.
-@return 0 */
-static
-int
-innobase_end(handlerton*, ha_panic_function)
-{
- DBUG_ENTER("innobase_end");
-
- if (innodb_inited) {
-
- THD *thd= current_thd;
- if (thd) { // may be UNINSTALL PLUGIN statement
- trx_t* trx = thd_to_trx(thd);
- if (trx) {
- trx_free_for_mysql(trx);
- }
- }
-
- srv_fast_shutdown = (ulint) innobase_fast_shutdown;
-
- innodb_inited = 0;
- hash_table_free(innobase_open_tables);
- innobase_open_tables = NULL;
- innodb_shutdown();
- srv_free_paths_and_sizes();
- my_free(internal_innobase_data_file_path);
- mysql_mutex_destroy(&innobase_share_mutex);
- mysql_mutex_destroy(&commit_cond_m);
- mysql_cond_destroy(&commit_cond);
- mysql_mutex_destroy(&pending_checkpoint_mutex);
- }
-
- DBUG_RETURN(0);
-}
-
-/****************************************************************//**
-Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
-the logs, and the name of this function should be innobase_checkpoint.
-@return TRUE if error */
-static
-bool
-innobase_flush_logs(
-/*================*/
- handlerton* hton) /*!< in/out: InnoDB handlerton */
-{
- bool result = 0;
-
- DBUG_ENTER("innobase_flush_logs");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (!srv_read_only_mode) {
- log_buffer_flush_to_disk();
- }
-
- DBUG_RETURN(result);
-}
-
-/************************************************************//**
-Synchronously read and parse the redo log up to the last
-checkpoint to write the changed page bitmap.
-@return 0 to indicate success. Current implementation cannot fail. */
-my_bool
-innobase_flush_changed_page_bitmaps()
-/*=================================*/
-{
- if (srv_track_changed_pages) {
- os_event_reset(srv_checkpoint_completed_event);
- log_online_follow_redo_log();
- }
- return FALSE;
-}
-
-/************************************************************//**
-Delete all the bitmap files for data less than the specified LSN.
-If called with lsn == IB_ULONGLONG_MAX (i.e. set by RESET request),
-restart the bitmap file sequence, otherwise continue it.
-@return 0 to indicate success, 1 for failure. */
-static
-my_bool
-innobase_purge_changed_page_bitmaps(
-/*================================*/
- ulonglong lsn) /*!< in: LSN to purge files up to */
-{
- return (my_bool)log_online_purge_changed_page_bitmaps(lsn);
-}
-
-/*****************************************************************//**
-Commits a transaction in an InnoDB database. */
-static
-void
-innobase_commit_low(
-/*================*/
- trx_t* trx) /*!< in: transaction handle */
-{
-#ifdef WITH_WSREP
- THD* thd = (THD*)trx->mysql_thd;
- const char* tmp = 0;
- if (thd && wsrep_on(thd)) {
-#ifdef WSREP_PROC_INFO
- char info[64];
- info[sizeof(info) - 1] = '\0';
- snprintf(info, sizeof(info) - 1,
- "innobase_commit_low():trx_commit_for_mysql(%lld)",
- (long long) wsrep_thd_trx_seqno(thd));
- tmp = thd_proc_info(thd, info);
-
-#else
- tmp = thd_proc_info(thd, "innobase_commit_low()");
-#endif /* WSREP_PROC_INFO */
- }
-#endif /* WITH_WSREP */
- if (trx_is_started(trx)) {
-
- trx_commit_for_mysql(trx);
- }
-#ifdef WITH_WSREP
- if (wsrep_on(thd)) { thd_proc_info(thd, tmp); }
-#endif /* WITH_WSREP */
-}
-
-#if NOT_USED
-/*****************************************************************//**
-Stores the current binlog coordinates in the trx system header. */
-static
-int
-innobase_store_binlog_info(
-/*=======================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- THD* thd) /*!< in: MySQL thread handle */
-
-{
- const char* file_name;
- unsigned long long pos;
- mtr_t mtr;
-
- DBUG_ENTER("innobase_store_binlog_info");
-
- thd_binlog_pos(thd, &file_name, &pos);
-
- mtr_start(&mtr);
-
- trx_sys_update_mysql_binlog_offset(file_name, pos,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
-
- mtr_commit(&mtr);
-
- innobase_flush_logs(hton);
-
- DBUG_RETURN(0);
-}
-#endif
-
-/*****************************************************************//**
-Creates an InnoDB transaction struct for the thd if it does not yet have one.
-Starts a new InnoDB transaction if a transaction is not yet started. And
-assigns a new snapshot for a consistent read if the transaction does not yet
-have one.
-@return 0 */
-static
-int
-innobase_start_trx_and_assign_read_view(
-/*====================================*/
- handlerton* hton, /*!< in: Innodb handlerton */
- THD* thd) /*!< in: MySQL thread handle of the user for
- whom the transaction should be committed */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_start_trx_and_assign_read_view");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- /* Create a new trx struct for thd, if it does not yet have one */
-
- trx = check_trx_exists(thd);
-
- /* This is just to play safe: release a possible FIFO ticket and
- search latch. Since we can potentially reserve the trx_sys->mutex,
- we have to release the search system latch first to obey the latching
- order. */
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- /* If the transaction is not started yet, start it */
-
- trx_start_if_not_started_xa(trx);
-
- /* Assign a read view if the transaction does not have it yet.
- Do this only if transaction is using REPEATABLE READ isolation
- level. */
- trx->isolation_level = innobase_map_isolation_level(
- thd_get_trx_isolation(thd));
-
- if (trx->isolation_level == TRX_ISO_REPEATABLE_READ) {
- trx_assign_read_view(trx);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: WITH CONSISTENT SNAPSHOT "
- "was ignored because this phrase "
- "can only be used with "
- "REPEATABLE READ isolation level.");
- }
-
- /* Set the MySQL flag to mark that there is an active transaction */
-
- innobase_register_trx(hton, current_thd, trx);
-
- DBUG_RETURN(0);
-}
-
-static
-void
-innobase_commit_ordered_2(
-/*============*/
- trx_t* trx, /*!< in: Innodb transaction */
- THD* thd) /*!< in: MySQL thread handle */
-{
- DBUG_ENTER("innobase_commit_ordered_2");
-
- /* We need current binlog position for mysqlbackup to work. */
-retry:
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads++;
-
- if (commit_threads > innobase_commit_concurrency) {
- commit_threads--;
- mysql_cond_wait(&commit_cond,
- &commit_cond_m);
- mysql_mutex_unlock(&commit_cond_m);
- goto retry;
- }
- else {
- mysql_mutex_unlock(&commit_cond_m);
- }
- }
-
- /* The following call read the binary log position of
- the transaction being committed.
-
- Binary logging of other engines is not relevant to
- InnoDB as all InnoDB requires is that committing
- InnoDB transactions appear in the same order in the
- MySQL binary log as they appear in InnoDB logs, which
- is guaranteed by the server.
-
- If the binary log is not enabled, or the transaction
- is not written to the binary log, the file name will
- be a NULL pointer. */
- unsigned long long pos;
- thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos);
- trx->mysql_log_offset= static_cast<ib_int64_t>(pos);
- /* Don't do write + flush right now. For group commit
- to work we want to do the flush later. */
- trx->flush_log_later = TRUE;
- innobase_commit_low(trx);
- trx->flush_log_later = FALSE;
-
- if (innobase_commit_concurrency > 0) {
- mysql_mutex_lock(&commit_cond_m);
- commit_threads--;
- mysql_cond_signal(&commit_cond);
- mysql_mutex_unlock(&commit_cond_m);
- }
-
- /* Now do a write + flush of logs. */
- DBUG_VOID_RETURN;
-}
-
-/*****************************************************************//**
-Perform the first, fast part of InnoDB commit.
-
-Doing it in this call ensures that we get the same commit order here
-as in binlog and any other participating transactional storage engines.
-
-Note that we want to do as little as really needed here, as we run
-under a global mutex. The expensive fsync() is done later, in
-innobase_commit(), without a lock so group commit can take place.
-
-Note also that this method can be called from a different thread than
-the one handling the rest of the transaction. */
-static
-void
-innobase_commit_ordered(
-/*============*/
- handlerton *hton, /*!< in: Innodb handlerton */
- THD* thd, /*!< in: MySQL thread handle of the user for whom
- the transaction should be committed */
- bool all) /*!< in: TRUE - commit transaction
- FALSE - the current SQL statement ended */
-{
- trx_t* trx;
- DBUG_ENTER("innobase_commit_ordered");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = check_trx_exists(thd);
-
- /* Since we will reserve the kernel mutex, we must not be holding the
- search system latch, or we will disobey the latching order. But we
- already released it in innobase_xa_prepare() (if not before), so just
- have an assert here.*/
- ut_ad(!trx->has_search_latch);
-
- if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
- /* We cannot throw error here; instead we will catch this error
- again in innobase_commit() and report it from there. */
- DBUG_VOID_RETURN;
- }
-
- /* commit_ordered is only called when committing the whole transaction
- (or an SQL statement when autocommit is on). */
- DBUG_ASSERT(all ||
- (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)));
-
- innobase_commit_ordered_2(trx, thd);
-
- trx_set_active_commit_ordered(trx);
-
- DBUG_VOID_RETURN;
-}
-
-/*****************************************************************//**
-Commits a transaction in an InnoDB database or marks an SQL statement
-ended.
-@return 0 */
-static
-int
-innobase_commit(
-/*============*/
- handlerton* hton, /*!< in: Innodb handlerton */
- THD* thd, /*!< in: MySQL thread handle of the
- user for whom the transaction should
- be committed */
- bool commit_trx) /*!< in: true - commit transaction
- false - the current SQL statement
- ended */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_commit");
- DBUG_ASSERT(hton == innodb_hton_ptr);
- DBUG_PRINT("trans", ("ending transaction"));
-
- trx = check_trx_exists(thd);
-
- /* Since we will reserve the trx_sys->mutex, we have to release
- the search system latch first to obey the latching order. */
-
- /* No-op in XtraDB */
- trx_search_latch_release_if_reserved(trx);
-
- /* If fake-changes mode = ON then allow
- SELECT (they are read-only) and
- CREATE ... SELECT * from table (Well this doesn't open up DDL for InnoDB
- as ha_innobase::create will return appropriate error if fake-change = ON
- but if create is trying to use other SE and SELECT is executing on
- InnoDB table then we allow SELECT to proceed.
- Ideally, statement like this should be marked CREATE_SELECT like
- INSERT_SELECT but unfortunately it doesn't). */
- if (UNIV_UNLIKELY(trx->fake_changes
- && (thd_sql_command(thd) != SQLCOM_SELECT
- && thd_sql_command(thd) != SQLCOM_CREATE_TABLE)
- && (commit_trx || (!thd_test_options(thd,
- OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))))) {
-
- /* rollback implicitly */
- innobase_rollback(hton, thd, commit_trx);
-
- /* because debug assertion code complains, if something left */
- thd->get_stmt_da()->reset_diagnostics_area();
-
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
- }
- /* Transaction is deregistered only in a commit or a rollback. If
- it is deregistered we know there cannot be resources to be freed
- and we could return immediately. For the time being, we play safe
- and do the cleanup though there should be nothing to clean up. */
-
- if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
-
- sql_print_error("Transaction not registered for MySQL 2PC, "
- "but transaction is active");
- }
-
- if (commit_trx
- || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- DBUG_EXECUTE_IF("crash_innodb_before_commit",
- DBUG_SUICIDE(););
-
- /* Run the fast part of commit if we did not already. */
- if (!trx_is_active_commit_ordered(trx)) {
- innobase_commit_ordered_2(trx, thd);
- }
-
- /* We were instructed to commit the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- /* At this point commit order is fixed and transaction is
- visible to others. So we can wakeup other commits waiting for
- this one, to allow then to group commit with us. */
- thd_wakeup_subsequent_commits(thd, 0);
-
- trx_commit_complete_for_mysql(trx);
- trx_deregister_from_2pc(trx);
- } else {
- /* We just mark the SQL statement ended and do not do a
- transaction commit */
-
- /* If we had reserved the auto-inc lock for some
- table in this SQL statement we release it now */
-
- lock_unlock_table_autoinc(trx);
-
- /* Store the current undo_no of the transaction so that we
- know where to roll back if we have to roll back the next
- SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
-
- trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
-
- /* This is a statement level variable. */
- trx->fts_next_doc_id = 0;
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- DBUG_RETURN(0);
-}
-
-/*****************************************************************//**
-Rolls back a transaction or the latest SQL statement.
-@return 0 or error number */
-static
-int
-innobase_rollback(
-/*==============*/
- handlerton* hton, /*!< in: Innodb handlerton */
- THD* thd, /*!< in: handle to the MySQL thread
- of the user whose transaction should
- be rolled back */
- bool rollback_trx) /*!< in: TRUE - rollback entire
- transaction FALSE - rollback the current
- statement only */
-{
- dberr_t error;
- trx_t* trx;
-
- DBUG_ENTER("innobase_rollback");
- DBUG_ASSERT(hton == innodb_hton_ptr);
- DBUG_PRINT("trans", ("aborting transaction"));
-
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
-
- /* If we had reserved the auto-inc lock for some table (if
- we come here to roll back the latest SQL statement) we
- release it now before a possibly lengthy rollback */
-
- lock_unlock_table_autoinc(trx);
-
- /* This is a statement level variable. */
- trx->fts_next_doc_id = 0;
-
- if (rollback_trx
- || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- error = trx_rollback_for_mysql(trx);
- trx_deregister_from_2pc(trx);
- } else {
- error = trx_rollback_last_sql_stat_for_mysql(trx);
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Rolls back a transaction
-@return 0 or error number */
-static
-int
-innobase_rollback_trx(
-/*==================*/
- trx_t* trx) /*!< in: transaction */
-{
- dberr_t error = DB_SUCCESS;
-
- DBUG_ENTER("innobase_rollback_trx");
- DBUG_PRINT("trans", ("aborting transaction"));
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- /* If we had reserved the auto-inc lock for some table (if
- we come here to roll back the latest SQL statement) we
- release it now before a possibly lengthy rollback */
-
- lock_unlock_table_autoinc(trx);
-
- if (!trx->read_only) {
- error = trx_rollback_for_mysql(trx);
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-
-struct pending_checkpoint {
- struct pending_checkpoint *next;
- handlerton *hton;
- void *cookie;
- ib_uint64_t lsn;
-};
-static struct pending_checkpoint *pending_checkpoint_list;
-static struct pending_checkpoint *pending_checkpoint_list_end;
-
-/*****************************************************************//**
-Handle a commit checkpoint request from server layer.
-We put the request in a queue, so that we can notify upper layer about
-checkpoint complete when we have flushed the redo log.
-If we have already flushed all relevant redo log, we notify immediately.*/
-static
-void
-innobase_checkpoint_request(
- handlerton *hton,
- void *cookie)
-{
- ib_uint64_t lsn;
- ib_uint64_t flush_lsn;
- struct pending_checkpoint * entry;
-
- /* Do the allocation outside of lock to reduce contention. The normal
- case is that not everything is flushed, so we will need to enqueue. */
- entry = static_cast<struct pending_checkpoint *>
- (my_malloc(sizeof(*entry), MYF(MY_WME)));
- if (!entry) {
- sql_print_error("Failed to allocate %u bytes."
- " Commit checkpoint will be skipped.",
- static_cast<unsigned>(sizeof(*entry)));
- return;
- }
-
- entry->next = NULL;
- entry->hton = hton;
- entry->cookie = cookie;
-
- mysql_mutex_lock(&pending_checkpoint_mutex);
- lsn = log_get_lsn();
- flush_lsn = log_get_flush_lsn();
- if (lsn > flush_lsn) {
- /* Put the request in queue.
- When the log gets flushed past the lsn, we will remove the
- entry from the queue and notify the upper layer. */
- entry->lsn = lsn;
- if (pending_checkpoint_list_end) {
- pending_checkpoint_list_end->next = entry;
- /* There is no need to order the entries in the list
- by lsn. The upper layer can accept notifications in
- any order, and short delays in notifications do not
- significantly impact performance. */
- } else {
- pending_checkpoint_list = entry;
- }
- pending_checkpoint_list_end = entry;
- entry = NULL;
- }
- mysql_mutex_unlock(&pending_checkpoint_mutex);
-
- if (entry) {
- /* We are already flushed. Notify the checkpoint immediately. */
- commit_checkpoint_notify_ha(entry->hton, entry->cookie);
- my_free(entry);
- }
-}
-
-/*****************************************************************//**
-Log code calls this whenever log has been written and/or flushed up
-to a new position. We use this to notify upper layer of a new commit
-checkpoint when necessary.*/
-UNIV_INTERN
-void
-innobase_mysql_log_notify(
-/*===============*/
- ib_uint64_t write_lsn, /*!< in: LSN written to log file */
- ib_uint64_t flush_lsn) /*!< in: LSN flushed to disk */
-{
- struct pending_checkpoint * pending;
- struct pending_checkpoint * entry;
- struct pending_checkpoint * last_ready;
-
- /* It is safe to do a quick check for NULL first without lock.
- Even if we should race, we will at most skip one checkpoint and
- take the next one, which is harmless. */
- if (!pending_checkpoint_list)
- return;
-
- mysql_mutex_lock(&pending_checkpoint_mutex);
- pending = pending_checkpoint_list;
- if (!pending)
- {
- mysql_mutex_unlock(&pending_checkpoint_mutex);
- return;
- }
-
- last_ready = NULL;
- for (entry = pending; entry != NULL; entry = entry -> next)
- {
- /* Notify checkpoints up until the first entry that has not
- been fully flushed to the redo log. Since we do not maintain
- the list ordered, in principle there could be more entries
- later than were also flushed. But there is no harm in
- delaying notifications for those a bit. And in practise, the
- list is unlikely to have more than one element anyway, as we
- flush the redo log at least once every second. */
- if (entry->lsn > flush_lsn)
- break;
- last_ready = entry;
- }
-
- if (last_ready)
- {
- /* We found some pending checkpoints that are now flushed to
- disk. So remove them from the list. */
- pending_checkpoint_list = entry;
- if (!entry)
- pending_checkpoint_list_end = NULL;
- }
-
- mysql_mutex_unlock(&pending_checkpoint_mutex);
-
- if (!last_ready)
- return;
-
- /* Now that we have released the lock, notify upper layer about all
- commit checkpoints that have now completed. */
- for (;;) {
- entry = pending;
- pending = pending->next;
-
- commit_checkpoint_notify_ha(entry->hton, entry->cookie);
-
- my_free(entry);
- if (entry == last_ready)
- break;
- }
-}
-
-/*****************************************************************//**
-Rolls back a transaction to a savepoint.
-@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
-given name */
-static
-int
-innobase_rollback_to_savepoint(
-/*===========================*/
- handlerton* hton, /*!< in: Innodb handlerton */
- THD* thd, /*!< in: handle to the MySQL thread
- of the user whose transaction should
- be rolled back to savepoint */
- void* savepoint) /*!< in: savepoint data */
-{
- ib_int64_t mysql_binlog_cache_pos;
- dberr_t error;
- trx_t* trx;
- char name[64];
-
- DBUG_ENTER("innobase_rollback_to_savepoint");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- /* TODO: use provided savepoint data area to store savepoint data */
-
- longlong2str((ulint) savepoint, name, 36);
-
- error = trx_rollback_to_savepoint_for_mysql(
- trx, name, &mysql_binlog_cache_pos);
-
- if (error == DB_SUCCESS && trx->fts_trx != NULL) {
- fts_savepoint_rollback(trx, name);
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Check whether innodb state allows to safely release MDL locks after
-rollback to savepoint.
-When binlog is on, MDL locks acquired after savepoint unit are not
-released if there are any locks held in InnoDB.
-@return true if it is safe, false if its not safe. */
-static
-bool
-innobase_rollback_to_savepoint_can_release_mdl(
-/*===========================================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- THD* thd) /*!< in: handle to the MySQL thread
- of the user whose transaction should
- be rolled back to savepoint */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = check_trx_exists(thd);
- ut_ad(trx);
-
- /* If transaction has not acquired any locks then it is safe
- to release MDL after rollback to savepoint */
- if (!(UT_LIST_GET_LEN(trx->lock.trx_locks))) {
- DBUG_RETURN(true);
- }
-
- DBUG_RETURN(false);
-}
-
-/*****************************************************************//**
-Release transaction savepoint name.
-@return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the
-given name */
-static
-int
-innobase_release_savepoint(
-/*=======================*/
- handlerton* hton, /*!< in: handlerton for Innodb */
- THD* thd, /*!< in: handle to the MySQL thread
- of the user whose transaction's
- savepoint should be released */
- void* savepoint) /*!< in: savepoint data */
-{
- dberr_t error;
- trx_t* trx;
- char name[64];
-
- DBUG_ENTER("innobase_release_savepoint");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = check_trx_exists(thd);
-
- if (trx->state == TRX_STATE_NOT_STARTED) {
- trx_start_if_not_started(trx);
- }
-
- /* TODO: use provided savepoint data area to store savepoint data */
-
- longlong2str((ulint) savepoint, name, 36);
-
- error = trx_release_savepoint_for_mysql(trx, name);
-
- if (error == DB_SUCCESS && trx->fts_trx != NULL) {
- fts_savepoint_release(trx, name);
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Sets a transaction savepoint.
-@return always 0, that is, always succeeds */
-static
-int
-innobase_savepoint(
-/*===============*/
- handlerton* hton, /*!< in: handle to the Innodb handlerton */
- THD* thd, /*!< in: handle to the MySQL thread */
- void* savepoint) /*!< in: savepoint data */
-{
- dberr_t error;
- trx_t* trx;
-
- DBUG_ENTER("innobase_savepoint");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- /* In the autocommit mode there is no sense to set a savepoint
- (unless we are in sub-statement), so SQL layer ensures that
- this method is never called in such situation. */
-
- trx = check_trx_exists(thd);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- /* Cannot happen outside of transaction */
- DBUG_ASSERT(trx_is_registered_for_2pc(trx));
-
- /* TODO: use provided savepoint data area to store savepoint data */
- char name[64];
- longlong2str((ulint) savepoint,name,36);
-
- error = trx_savepoint_for_mysql(trx, name, (ib_int64_t)0);
-
- if (error == DB_SUCCESS && trx->fts_trx != NULL) {
- fts_savepoint_take(trx, trx->fts_trx, name);
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-static
-int
-innobase_close_connection(
-/*======================*/
- handlerton* hton, /*!< in: innobase handlerton */
- THD* thd) /*!< in: handle to the MySQL thread of the user
- whose resources should be free'd */
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_close_connection");
- DBUG_ASSERT(hton == innodb_hton_ptr);
- trx = thd_to_trx(thd);
-
- ut_a(trx);
-
- if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
-
- sql_print_error("Transaction not registered for MySQL 2PC, "
- "but transaction is active");
- }
-
- if (trx_is_started(trx) && global_system_variables.log_warnings) {
-
- sql_print_warning(
- "MySQL is closing a connection that has an active "
- "InnoDB transaction. " TRX_ID_FMT " row modifications "
- "will roll back.",
- trx->undo_no);
- }
-
- innobase_rollback_trx(trx);
-
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(0);
-}
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-int
-innobase_close_thd(
-/*===============*/
- THD* thd) /*!< in: handle to the MySQL thread of the user
- whose resources should be free'd */
-{
- trx_t* trx = thd_to_trx(thd);
-
- if (!trx) {
- return(0);
- }
-
- return(innobase_close_connection(innodb_hton_ptr, thd));
-}
-
-/*************************************************************************//**
-** InnoDB database tables
-*****************************************************************************/
-
-/****************************************************************//**
-Get the record format from the data dictionary.
-@return one of ROW_TYPE_REDUNDANT, ROW_TYPE_COMPACT,
-ROW_TYPE_COMPRESSED, ROW_TYPE_DYNAMIC */
-UNIV_INTERN
-enum row_type
-ha_innobase::get_row_type() const
-/*=============================*/
-{
- if (prebuilt && prebuilt->table) {
- const ulint flags = prebuilt->table->flags;
-
- switch (dict_tf_get_rec_format(flags)) {
- case REC_FORMAT_REDUNDANT:
- return(ROW_TYPE_REDUNDANT);
- case REC_FORMAT_COMPACT:
- return(ROW_TYPE_COMPACT);
- case REC_FORMAT_COMPRESSED:
- return(ROW_TYPE_COMPRESSED);
- case REC_FORMAT_DYNAMIC:
- return(ROW_TYPE_DYNAMIC);
- }
- }
- ut_ad(0);
- return(ROW_TYPE_NOT_USED);
-}
-
-/*****************************************************************//**
-Cancel any pending lock request associated with the current THD. */
-static
-void
-innobase_kill_connection(
-/*======================*/
- handlerton* hton, /*!< in: innobase handlerton */
- THD* thd, /*!< in: handle to the MySQL thread being killed */
- thd_kill_levels)
-{
- trx_t* trx;
-
- DBUG_ENTER("innobase_kill_connection");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
-#ifdef WITH_WSREP
- wsrep_thd_LOCK(thd);
- if (wsrep_thd_get_conflict_state(thd) != NO_CONFLICT) {
- /* if victim has been signaled by BF thread and/or aborting
- is already progressing, following query aborting is not necessary
- any more.
- Also, BF thread should own trx mutex for the victim, which would
- conflict with trx_mutex_enter() below
- */
- wsrep_thd_UNLOCK(thd);
- DBUG_VOID_RETURN;
- }
- wsrep_thd_UNLOCK(thd);
-#endif /* WITH_WSREP */
- trx = thd_to_trx(thd);
-
- if (trx && trx->lock.wait_lock) {
- /* In wsrep BF we have already took lock_sys and trx
- mutex either on wsrep_abort_transaction() or
- before wsrep_kill_victim(). In replication we
- could own lock_sys mutex taken in
- lock_deadlock_check_and_resolve().*/
-
- WSREP_DEBUG("Killing victim trx %p BF %d trx BF %d trx_id " TRX_ID_FMT " ABORT %d thd %p"
- " current_thd %p BF %d wait_lock_modes: %s\n",
- trx, wsrep_thd_is_BF(trx->mysql_thd, FALSE),
- wsrep_thd_is_BF(thd, FALSE),
- trx->id, trx->abort_type,
- trx->mysql_thd,
- current_thd,
- wsrep_thd_is_BF(current_thd, FALSE),
- lock_get_info(trx->lock.wait_lock).c_str());
-
- if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
- trx->abort_type == TRX_SERVER_ABORT) {
- ut_ad(!lock_mutex_own());
- lock_mutex_enter();
- }
-
- if (trx->abort_type != TRX_WSREP_ABORT) {
- trx_mutex_enter(trx);
- }
-
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(trx));
-
- if (trx->lock.wait_lock) {
- lock_cancel_waiting_and_release(trx->lock.wait_lock);
- }
-
- if (trx->abort_type != TRX_WSREP_ABORT) {
- trx_mutex_exit(trx);
- }
-
- if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
- trx->abort_type == TRX_SERVER_ABORT) {
- lock_mutex_exit();
- }
- }
-
- DBUG_VOID_RETURN;
-}
-
-
-
-/****************************************************************//**
-Get the table flags to use for the statement.
-@return table flags */
-UNIV_INTERN
-handler::Table_flags
-ha_innobase::table_flags() const
-/*============================*/
-{
- /* Need to use tx_isolation here since table flags is (also)
- called before prebuilt is inited. */
- ulong const tx_isolation = thd_tx_isolation(ha_thd());
-
- if (tx_isolation <= ISO_READ_COMMITTED) {
- return(int_table_flags);
- }
-
- return(int_table_flags | HA_BINLOG_STMT_CAPABLE);
-}
-
-/****************************************************************//**
-Returns the table type (storage engine name).
-@return table type */
-UNIV_INTERN
-const char*
-ha_innobase::table_type() const
-/*===========================*/
-{
- return(innobase_hton_name);
-}
-
-/****************************************************************//**
-Returns the index type. */
-UNIV_INTERN
-const char*
-ha_innobase::index_type(
-/*====================*/
- uint keynr) /*!< : index number */
-{
- dict_index_t* index = innobase_get_index(keynr);
-
- if (index && index->type & DICT_FTS) {
- return("FULLTEXT");
- } else {
- return("BTREE");
- }
-}
-
-/****************************************************************//**
-Returns the operations supported for indexes.
-@return flags of supported operations */
-UNIV_INTERN
-ulong
-ha_innobase::index_flags(
-/*=====================*/
- uint key,
- uint,
- bool) const
-{
- return((table_share->key_info[key].algorithm == HA_KEY_ALG_FULLTEXT)
- ? 0
- : (HA_READ_NEXT | HA_READ_PREV | HA_READ_ORDER
- | HA_READ_RANGE | HA_KEYREAD_ONLY
- | (key == table_share->primary_key ? HA_CLUSTERED_INDEX : 0)
- | HA_DO_INDEX_COND_PUSHDOWN));
-}
-
-/****************************************************************//**
-Returns the maximum number of keys.
-@return MAX_KEY */
-UNIV_INTERN
-uint
-ha_innobase::max_supported_keys() const
-/*===================================*/
-{
- return(MAX_KEY);
-}
-
-/****************************************************************//**
-Returns the maximum key length.
-@return maximum supported key length, in bytes */
-UNIV_INTERN
-uint
-ha_innobase::max_supported_key_length() const
-/*=========================================*/
-{
- /* An InnoDB page must store >= 2 keys; a secondary key record
- must also contain the primary key value. Therefore, if both
- the primary key and the secondary key are at this maximum length,
- it must be less than 1/4th of the free space on a page including
- record overhead.
-
- MySQL imposes its own limit to this number; MAX_KEY_LENGTH = 3072.
-
- For page sizes = 16k, InnoDB historically reported 3500 bytes here,
- But the MySQL limit of 3072 was always used through the handler
- interface.
-
- Note: Handle 16k and 32k pages the same here since the limits
- are higher than imposed by MySQL. */
-
- switch (UNIV_PAGE_SIZE) {
- case 4096:
- return(768);
- case 8192:
- return(1536);
- default:
-#ifdef WITH_WSREP
- return(3500);
-#else
- return(3500);
-#endif
- }
-}
-
-/****************************************************************//**
-Returns the key map of keys that are usable for scanning.
-@return key_map_full */
-UNIV_INTERN
-const key_map*
-ha_innobase::keys_to_use_for_scanning()
-/*===================================*/
-{
- return(&key_map_full);
-}
-
-/****************************************************************//**
-Determines if table caching is supported.
-@return HA_CACHE_TBL_ASKTRANSACT */
-UNIV_INTERN
-uint8
-ha_innobase::table_cache_type()
-/*===========================*/
-{
- return(HA_CACHE_TBL_ASKTRANSACT);
-}
-
-/****************************************************************//**
-Determines if the primary key is clustered index.
-@return true */
-UNIV_INTERN
-bool
-ha_innobase::primary_key_is_clustered()
-/*===================================*/
-{
- return(true);
-}
-
-/*****************************************************************//**
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. Example: test/mytable.
-On Windows normalization puts both the database name and the
-table name always to lower case if "set_lower_case" is set to TRUE. */
-void
-normalize_table_name_low(
-/*=====================*/
- char* norm_name, /*!< out: normalized name as a
- null-terminated string */
- const char* name, /*!< in: table name string */
- ibool set_lower_case) /*!< in: TRUE if we want to set name
- to lower case */
-{
- char* name_ptr;
- ulint name_len;
- char* db_ptr;
- ulint db_len;
- char* ptr;
- ulint norm_len;
-
- /* Scan name from the end */
-
- ptr = strend(name) - 1;
-
- /* seek to the last path separator */
- while (ptr >= name && *ptr != '\\' && *ptr != '/') {
- ptr--;
- }
-
- name_ptr = ptr + 1;
- name_len = strlen(name_ptr);
-
- /* skip any number of path separators */
- while (ptr >= name && (*ptr == '\\' || *ptr == '/')) {
- ptr--;
- }
-
- DBUG_ASSERT(ptr >= name);
-
- /* seek to the last but one path separator or one char before
- the beginning of name */
- db_len = 0;
- while (ptr >= name && *ptr != '\\' && *ptr != '/') {
- ptr--;
- db_len++;
- }
-
- db_ptr = ptr + 1;
-
- norm_len = db_len + name_len + sizeof "/";
- ut_a(norm_len < FN_REFLEN - 1);
-
- memcpy(norm_name, db_ptr, db_len);
-
- norm_name[db_len] = '/';
-
- /* Copy the name and null-byte. */
- memcpy(norm_name + db_len + 1, name_ptr, name_len + 1);
-
- if (set_lower_case) {
- innobase_casedn_str(norm_name);
- }
-}
-
-#if !defined(DBUG_OFF)
-/*********************************************************************
-Test normalize_table_name_low(). */
-static
-void
-test_normalize_table_name_low()
-/*===========================*/
-{
- char norm_name[FN_REFLEN];
- const char* test_data[][2] = {
- /* input, expected result */
- {"./mysqltest/t1", "mysqltest/t1"},
- {"./test/#sql-842b_2", "test/#sql-842b_2"},
- {"./test/#sql-85a3_10", "test/#sql-85a3_10"},
- {"./test/#sql2-842b-2", "test/#sql2-842b-2"},
- {"./test/bug29807", "test/bug29807"},
- {"./test/foo", "test/foo"},
- {"./test/innodb_bug52663", "test/innodb_bug52663"},
- {"./test/t", "test/t"},
- {"./test/t1", "test/t1"},
- {"./test/t10", "test/t10"},
- {"/a/b/db/table", "db/table"},
- {"/a/b/db///////table", "db/table"},
- {"/a/b////db///////table", "db/table"},
- {"/var/tmp/mysqld.1/#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
- {"db/table", "db/table"},
- {"ddd/t", "ddd/t"},
- {"d/ttt", "d/ttt"},
- {"d/t", "d/t"},
- {".\\mysqltest\\t1", "mysqltest/t1"},
- {".\\test\\#sql-842b_2", "test/#sql-842b_2"},
- {".\\test\\#sql-85a3_10", "test/#sql-85a3_10"},
- {".\\test\\#sql2-842b-2", "test/#sql2-842b-2"},
- {".\\test\\bug29807", "test/bug29807"},
- {".\\test\\foo", "test/foo"},
- {".\\test\\innodb_bug52663", "test/innodb_bug52663"},
- {".\\test\\t", "test/t"},
- {".\\test\\t1", "test/t1"},
- {".\\test\\t10", "test/t10"},
- {"C:\\a\\b\\db\\table", "db/table"},
- {"C:\\a\\b\\db\\\\\\\\\\\\\\table", "db/table"},
- {"C:\\a\\b\\\\\\\\db\\\\\\\\\\\\\\table", "db/table"},
- {"C:\\var\\tmp\\mysqld.1\\#sql842b_2_10", "mysqld.1/#sql842b_2_10"},
- {"db\\table", "db/table"},
- {"ddd\\t", "ddd/t"},
- {"d\\ttt", "d/ttt"},
- {"d\\t", "d/t"},
- };
-
- for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
- printf("test_normalize_table_name_low(): "
- "testing \"%s\", expected \"%s\"... ",
- test_data[i][0], test_data[i][1]);
-
- normalize_table_name_low(norm_name, test_data[i][0], FALSE);
-
- if (strcmp(norm_name, test_data[i][1]) == 0) {
- printf("ok\n");
- } else {
- printf("got \"%s\"\n", norm_name);
- ut_error;
- }
- }
-}
-
-/*********************************************************************
-Test ut_format_name(). */
-static
-void
-test_ut_format_name()
-/*=================*/
-{
- char buf[NAME_LEN * 3];
-
- struct {
- const char* name;
- ibool is_table;
- ulint buf_size;
- const char* expected;
- } test_data[] = {
- {"test/t1", TRUE, sizeof(buf), "\"test\".\"t1\""},
- {"test/t1", TRUE, 12, "\"test\".\"t1\""},
- {"test/t1", TRUE, 11, "\"test\".\"t1"},
- {"test/t1", TRUE, 10, "\"test\".\"t"},
- {"test/t1", TRUE, 9, "\"test\".\""},
- {"test/t1", TRUE, 8, "\"test\"."},
- {"test/t1", TRUE, 7, "\"test\""},
- {"test/t1", TRUE, 6, "\"test"},
- {"test/t1", TRUE, 5, "\"tes"},
- {"test/t1", TRUE, 4, "\"te"},
- {"test/t1", TRUE, 3, "\"t"},
- {"test/t1", TRUE, 2, "\""},
- {"test/t1", TRUE, 1, ""},
- {"test/t1", TRUE, 0, "BUF_NOT_CHANGED"},
- {"table", TRUE, sizeof(buf), "\"table\""},
- {"ta'le", TRUE, sizeof(buf), "\"ta'le\""},
- {"ta\"le", TRUE, sizeof(buf), "\"ta\"\"le\""},
- {"ta`le", TRUE, sizeof(buf), "\"ta`le\""},
- {"index", FALSE, sizeof(buf), "\"index\""},
- {"ind/ex", FALSE, sizeof(buf), "\"ind/ex\""},
- };
-
- for (size_t i = 0; i < UT_ARR_SIZE(test_data); i++) {
-
- memcpy(buf, "BUF_NOT_CHANGED", strlen("BUF_NOT_CHANGED") + 1);
-
- char* ret;
-
- ret = ut_format_name(test_data[i].name,
- test_data[i].is_table,
- buf,
- test_data[i].buf_size);
-
- ut_a(ret == buf);
-
- if (strcmp(buf, test_data[i].expected) == 0) {
- fprintf(stderr,
- "ut_format_name(%s, %s, buf, %lu), "
- "expected %s, OK\n",
- test_data[i].name,
- test_data[i].is_table ? "TRUE" : "FALSE",
- test_data[i].buf_size,
- test_data[i].expected);
- } else {
- fprintf(stderr,
- "ut_format_name(%s, %s, buf, %lu), "
- "expected %s, ERROR: got %s\n",
- test_data[i].name,
- test_data[i].is_table ? "TRUE" : "FALSE",
- test_data[i].buf_size,
- test_data[i].expected,
- buf);
- ut_error;
- }
- }
-}
-#endif /* !DBUG_OFF */
-
-/********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type.
-@return maximum allowed value for the field */
-UNIV_INTERN
-ulonglong
-innobase_get_int_col_max_value(
-/*===========================*/
- const Field* field) /*!< in: MySQL field */
-{
- ulonglong max_value = 0;
-
- switch (field->key_type()) {
- /* TINY */
- case HA_KEYTYPE_BINARY:
- max_value = 0xFFULL;
- break;
- case HA_KEYTYPE_INT8:
- max_value = 0x7FULL;
- break;
- /* SHORT */
- case HA_KEYTYPE_USHORT_INT:
- max_value = 0xFFFFULL;
- break;
- case HA_KEYTYPE_SHORT_INT:
- max_value = 0x7FFFULL;
- break;
- /* MEDIUM */
- case HA_KEYTYPE_UINT24:
- max_value = 0xFFFFFFULL;
- break;
- case HA_KEYTYPE_INT24:
- max_value = 0x7FFFFFULL;
- break;
- /* LONG */
- case HA_KEYTYPE_ULONG_INT:
- max_value = 0xFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONG_INT:
- max_value = 0x7FFFFFFFULL;
- break;
- /* BIG */
- case HA_KEYTYPE_ULONGLONG:
- max_value = 0xFFFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_LONGLONG:
- max_value = 0x7FFFFFFFFFFFFFFFULL;
- break;
- case HA_KEYTYPE_FLOAT:
- /* We use the maximum as per IEEE754-2008 standard, 2^24 */
- max_value = 0x1000000ULL;
- break;
- case HA_KEYTYPE_DOUBLE:
- /* We use the maximum as per IEEE754-2008 standard, 2^53 */
- max_value = 0x20000000000000ULL;
- break;
- default:
- ut_error;
- }
-
- return(max_value);
-}
-
-/*******************************************************************//**
-This function checks whether the index column information
-is consistent between KEY info from mysql and that from innodb index.
-@return TRUE if all column types match. */
-static
-ibool
-innobase_match_index_columns(
-/*=========================*/
- const KEY* key_info, /*!< in: Index info
- from mysql */
- const dict_index_t* index_info) /*!< in: Index info
- from Innodb */
-{
- const KEY_PART_INFO* key_part;
- const KEY_PART_INFO* key_end;
- const dict_field_t* innodb_idx_fld;
- const dict_field_t* innodb_idx_fld_end;
-
- DBUG_ENTER("innobase_match_index_columns");
-
- /* Check whether user defined index column count matches */
- if (key_info->user_defined_key_parts !=
- index_info->n_user_defined_cols) {
- DBUG_RETURN(FALSE);
- }
-
- key_part = key_info->key_part;
- key_end = key_part + key_info->user_defined_key_parts;
- innodb_idx_fld = index_info->fields;
- innodb_idx_fld_end = index_info->fields + index_info->n_fields;
-
- /* Check each index column's datatype. We do not check
- column name because there exists case that index
- column name got modified in mysql but such change does not
- propagate to InnoDB.
- One hidden assumption here is that the index column sequences
- are matched up between those in mysql and Innodb. */
- for (; key_part != key_end; ++key_part) {
- ulint col_type;
- ibool is_unsigned;
- ulint mtype = innodb_idx_fld->col->mtype;
-
- /* Need to translate to InnoDB column type before
- comparison. */
- col_type = get_innobase_type_from_mysql_type(&is_unsigned,
- key_part->field);
-
- /* Ignore Innodb specific system columns. */
- while (mtype == DATA_SYS) {
- innodb_idx_fld++;
-
- if (innodb_idx_fld >= innodb_idx_fld_end) {
- DBUG_RETURN(FALSE);
- }
-
- mtype = innodb_idx_fld->col->mtype;
- }
-
- if (col_type != mtype) {
- /* Column Type mismatches */
- DBUG_RETURN(FALSE);
- }
-
- innodb_idx_fld++;
- }
-
- DBUG_RETURN(TRUE);
-}
-
-/*******************************************************************//**
-This function builds a translation table in INNOBASE_SHARE
-structure for fast index location with mysql array number from its
-table->key_info structure. This also provides the necessary translation
-between the key order in mysql key_info and Innodb ib_table->indexes if
-they are not fully matched with each other.
-Note we do not have any mutex protecting the translation table
-building based on the assumption that there is no concurrent
-index creation/drop and DMLs that requires index lookup. All table
-handle will be closed before the index creation/drop.
-@return TRUE if index translation table built successfully */
-UNIV_INTERN
-ibool
-innobase_build_index_translation(
-/*=============================*/
- const TABLE* table, /*!< in: table in MySQL data
- dictionary */
- dict_table_t* ib_table,/*!< in: table in Innodb data
- dictionary */
- INNOBASE_SHARE* share) /*!< in/out: share structure
- where index translation table
- will be constructed in. */
-{
- ulint mysql_num_index;
- ulint ib_num_index;
- dict_index_t** index_mapping;
- ibool ret = TRUE;
-
- DBUG_ENTER("innobase_build_index_translation");
-
- mutex_enter(&dict_sys->mutex);
-
- mysql_num_index = table->s->keys;
- ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
-
- index_mapping = share->idx_trans_tbl.index_mapping;
-
- /* If there exists inconsistency between MySQL and InnoDB dictionary
- (metadata) information, the number of index defined in MySQL
- could exceed that in InnoDB, do not build index translation
- table in such case */
- if (UNIV_UNLIKELY(ib_num_index < mysql_num_index)) {
- ret = FALSE;
- goto func_exit;
- }
-
- /* If index entry count is non-zero, nothing has
- changed since last update, directly return TRUE */
- if (share->idx_trans_tbl.index_count) {
- /* Index entry count should still match mysql_num_index */
- ut_a(share->idx_trans_tbl.index_count == mysql_num_index);
- goto func_exit;
- }
-
- /* The number of index increased, rebuild the mapping table */
- if (mysql_num_index > share->idx_trans_tbl.array_size) {
- index_mapping = (dict_index_t**) my_realloc(index_mapping,
- mysql_num_index *
- sizeof(*index_mapping),
- MYF(MY_ALLOW_ZERO_PTR));
-
- if (!index_mapping) {
- /* Report an error if index_mapping continues to be
- NULL and mysql_num_index is a non-zero value */
- sql_print_error("InnoDB: fail to allocate memory for "
- "index translation table. Number of "
- "Index:%lu, array size:%lu",
- mysql_num_index,
- share->idx_trans_tbl.array_size);
- ret = FALSE;
- goto func_exit;
- }
-
- share->idx_trans_tbl.array_size = mysql_num_index;
- }
-
- /* For each index in the mysql key_info array, fetch its
- corresponding InnoDB index pointer into index_mapping
- array. */
- for (ulint count = 0; count < mysql_num_index; count++) {
-
- /* Fetch index pointers into index_mapping according to mysql
- index sequence */
- index_mapping[count] = dict_table_get_index_on_name(
- ib_table, table->key_info[count].name);
-
- if (!index_mapping[count]) {
- sql_print_error("Cannot find index %s in InnoDB "
- "index dictionary.",
- table->key_info[count].name);
- ret = FALSE;
- goto func_exit;
- }
-
- /* Double check fetched index has the same
- column info as those in mysql key_info. */
- if (!innobase_match_index_columns(&table->key_info[count],
- index_mapping[count])) {
- sql_print_error("Found index %s whose column info "
- "does not match that of MySQL.",
- table->key_info[count].name);
- ret = FALSE;
- goto func_exit;
- }
- }
-
- /* Successfully built the translation table */
- share->idx_trans_tbl.index_count = mysql_num_index;
-
-func_exit:
- if (!ret) {
- /* Build translation table failed. */
- my_free(index_mapping);
-
- share->idx_trans_tbl.array_size = 0;
- share->idx_trans_tbl.index_count = 0;
- index_mapping = NULL;
- }
-
- share->idx_trans_tbl.index_mapping = index_mapping;
-
- mutex_exit(&dict_sys->mutex);
-
- DBUG_RETURN(ret);
-}
-
-/*******************************************************************//**
-This function uses index translation table to quickly locate the
-requested index structure.
-Note we do not have mutex protection for the index translatoin table
-access, it is based on the assumption that there is no concurrent
-translation table rebuild (fter create/drop index) and DMLs that
-require index lookup.
-@return dict_index_t structure for requested index. NULL if
-fail to locate the index structure. */
-static
-dict_index_t*
-innobase_index_lookup(
-/*==================*/
- INNOBASE_SHARE* share, /*!< in: share structure for index
- translation table. */
- uint keynr) /*!< in: index number for the requested
- index */
-{
- if (!share->idx_trans_tbl.index_mapping
- || keynr >= share->idx_trans_tbl.index_count) {
- return(NULL);
- }
-
- return(share->idx_trans_tbl.index_mapping[keynr]);
-}
-
-/************************************************************************
-Set the autoinc column max value. This should only be called once from
-ha_innobase::open(). Therefore there's no need for a covering lock. */
-UNIV_INTERN
-void
-ha_innobase::innobase_initialize_autoinc()
-/*======================================*/
-{
- ulonglong auto_inc;
- const Field* field = table->found_next_number_field;
-
- if (field != NULL) {
- auto_inc = innobase_get_int_col_max_value(field);
- } else {
- /* We have no idea what's been passed in to us as the
- autoinc column. We set it to the 0, effectively disabling
- updates to the table. */
- auto_inc = 0;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Unable to determine the AUTOINC "
- "column name\n");
- }
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
- /* If the recovery level is set so high that writes
- are disabled we force the AUTOINC counter to 0
- value effectively disabling writes to the table.
- Secondly, we avoid reading the table in case the read
- results in failure due to a corrupted table/index.
-
- We will not return an error to the client, so that the
- tables can be dumped with minimal hassle. If an error
- were returned in this case, the first attempt to read
- the table would fail and subsequent SELECTs would succeed. */
- auto_inc = 0;
- } else if (field == NULL) {
- /* This is a far more serious error, best to avoid
- opening the table and return failure. */
- my_error(ER_AUTOINC_READ_FAILED, MYF(0));
- } else {
- dict_index_t* index;
- const char* col_name;
- ib_uint64_t read_auto_inc;
- ulint err;
-
- update_thd(ha_thd());
-
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- col_name = field->field_name.str;
- index = innobase_get_index(table->s->next_number_index);
-
- /* Execute SELECT MAX(col_name) FROM TABLE; */
- err = row_search_max_autoinc(index, col_name, &read_auto_inc);
-
- switch (err) {
- case DB_SUCCESS: {
- ulonglong col_max_value;
-
- col_max_value = innobase_get_int_col_max_value(field);
-
- /* At the this stage we do not know the increment
- nor the offset, so use a default increment of 1. */
-
- auto_inc = innobase_next_autoinc(
- read_auto_inc, 1, 1, 0, col_max_value);
-
- break;
- }
- case DB_RECORD_NOT_FOUND:
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(index->table->name, TRUE, buf, sizeof(buf));
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "MySQL and InnoDB data "
- "dictionaries are out of sync."
- " Unable to find the AUTOINC column "
- " %s in the InnoDB table %s."
- " We set the next AUTOINC column "
- "value to 0"
- " in effect disabling the AUTOINC "
- "next value generation."
- " You can either set the next "
- "AUTOINC value explicitly using ALTER TABLE "
- " or fix the data dictionary by "
- "recreating the table.",
- col_name, buf);
-
- /* This will disable the AUTOINC generation. */
- auto_inc = 0;
-
- /* We want the open to succeed, so that the user can
- take corrective action. ie. reads should succeed but
- updates should fail. */
- err = DB_SUCCESS;
- break;
- default:
- /* row_search_max_autoinc() should only return
- one of DB_SUCCESS or DB_RECORD_NOT_FOUND. */
- ut_error;
- }
- }
-
- dict_table_autoinc_initialize(prebuilt->table, auto_inc);
-}
-
-/*****************************************************************//**
-Creates and opens a handle to a table which already exists in an InnoDB
-database.
-@return 1 if error, 0 if success */
-UNIV_INTERN
-int
-ha_innobase::open(
-/*==============*/
- const char* name, /*!< in: table name */
- int mode, /*!< in: not used */
- uint test_if_locked) /*!< in: not used */
-{
- dict_table_t* ib_table;
- char norm_name[FN_REFLEN];
- THD* thd;
- char* is_part = NULL;
- ibool par_case_name_set = FALSE;
- char par_case_name[FN_REFLEN];
- dict_err_ignore_t ignore_err = DICT_ERR_IGNORE_NONE;
-
- DBUG_ENTER("ha_innobase::open");
-
- UT_NOT_USED(mode);
- UT_NOT_USED(test_if_locked);
-
- thd = ha_thd();
-
- normalize_table_name(norm_name, name);
-
- user_thd = NULL;
-
- if (!(share=get_share(name))) {
-
- DBUG_RETURN(1);
- }
-
- if (UNIV_UNLIKELY(share->ib_table && share->ib_table->corrupted &&
- srv_pass_corrupt_table <= 1)) {
- free_share(share);
-
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
- }
-
- /* Will be allocated if it is needed in ::update_row() */
- upd_buf = NULL;
- upd_buf_size = 0;
-
- /* We look for pattern #P# to see if the table is partitioned
- MySQL table. */
-#ifdef __WIN__
- is_part = strstr(norm_name, "#p#");
-#else
- is_part = strstr(norm_name, "#P#");
-#endif /* __WIN__ */
-
- /* Check whether FOREIGN_KEY_CHECKS is set to 0. If so, the table
- can be opened even if some FK indexes are missing. If not, the table
- can't be opened in the same situation */
- if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
- ignore_err = DICT_ERR_IGNORE_FK_NOKEY;
- }
-
- /* Get pointer to a table object in InnoDB dictionary cache */
- ib_table = dict_table_open_on_name(norm_name, FALSE, TRUE, ignore_err);
-
- if (ib_table
- && ((!DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
- && table->s->stored_fields != dict_table_get_n_user_cols(ib_table))
- || (DICT_TF2_FLAG_IS_SET(ib_table, DICT_TF2_FTS_HAS_DOC_ID)
- && (table->s->fields
- != dict_table_get_n_user_cols(ib_table) - 1)))) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "table %s contains " ULINTPF " user defined columns "
- "in InnoDB, but %u columns in MySQL. Please "
- "check INFORMATION_SCHEMA.INNODB_SYS_COLUMNS and "
- REFMAN "innodb-troubleshooting.html "
- "for how to resolve it",
- norm_name, dict_table_get_n_user_cols(ib_table),
- table->s->fields);
-
- /* Mark this table as corrupted, so the drop table
- or force recovery can still use it, but not others. */
- ib_table->corrupted = true;
- ib_table->file_unreadable = true;
- dict_table_close(ib_table, FALSE, FALSE);
- ib_table = NULL;
- is_part = NULL;
- }
-
- if (UNIV_UNLIKELY(ib_table && ib_table->corrupted &&
- srv_pass_corrupt_table <= 1)) {
- free_share(share);
- my_free(upd_buf);
- upd_buf = NULL;
- upd_buf_size = 0;
-
- DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
- }
-
- share->ib_table = ib_table;
-
- if (NULL == ib_table) {
- if (is_part) {
- /* MySQL partition engine hard codes the file name
- separator as "#P#". The text case is fixed even if
- lower_case_table_names is set to 1 or 2. This is true
- for sub-partition names as well. InnoDB always
- normalises file names to lower case on Windows, this
- can potentially cause problems when copying/moving
- tables between platforms.
-
- 1) If boot against an installation from Windows
- platform, then its partition table name could
- be in lower case in system tables. So we will
- need to check lower case name when load table.
-
- 2) If we boot an installation from other case
- sensitive platform in Windows, we might need to
- check the existence of table name without lower
- case in the system table. */
- if (innobase_get_lower_case_table_names() == 1) {
-
- if (!par_case_name_set) {
-#ifndef __WIN__
- /* Check for the table using lower
- case name, including the partition
- separator "P" */
- strcpy(par_case_name, norm_name);
- innobase_casedn_str(par_case_name);
-#else
- /* On Windows platfrom, check
- whether there exists table name in
- system table whose name is
- not being normalized to lower case */
- normalize_table_name_low(
- par_case_name, name, FALSE);
-#endif
- par_case_name_set = TRUE;
- }
-
- ib_table = dict_table_open_on_name(
- par_case_name, FALSE, TRUE,
- ignore_err);
- }
-
- if (ib_table) {
-#ifndef __WIN__
- sql_print_warning("Partition table %s opened "
- "after converting to lower "
- "case. The table may have "
- "been moved from a case "
- "in-sensitive file system. "
- "Please recreate table in "
- "the current file system\n",
- norm_name);
-#else
- sql_print_warning("Partition table %s opened "
- "after skipping the step to "
- "lower case the table name. "
- "The table may have been "
- "moved from a case sensitive "
- "file system. Please "
- "recreate table in the "
- "current file system\n",
- norm_name);
-#endif
- /* We allow use of table if it is found.
- this is consistent to current behavior
- to innodb_plugin */
- share->ib_table = ib_table;
- goto table_opened;
- }
- }
-
- if (is_part) {
- sql_print_error("Failed to open table %s.\n",
- norm_name);
- }
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Cannot open table %s from the internal data "
- "dictionary of InnoDB though the .frm file "
- "for the table exists. See "
- REFMAN "innodb-troubleshooting.html for how "
- "you can resolve the problem.", norm_name);
-
- free_share(share);
- my_errno = ENOENT;
-
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
-
-table_opened:
-
- innobase_copy_frm_flags_from_table_share(ib_table, table->s);
-
- /* No point to init any statistics if tablespace is still encrypted. */
- if (ib_table->is_readable()) {
- dict_stats_init(ib_table);
- } else {
- ib_table->stat_initialized = 1;
- }
-
- MONITOR_INC(MONITOR_TABLE_OPEN);
-
- bool no_tablespace = false;
- bool encrypted = false;
- FilSpace space;
-
- if (dict_table_is_discarded(ib_table)) {
-
- ib_senderrf(thd,
- IB_LOG_LEVEL_WARN, ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
-
- /* Allow an open because a proper DISCARD should have set
- all the flags and index root page numbers to FIL_NULL that
- should prevent any DML from running but it should allow DDL
- operations. */
-
- no_tablespace = false;
-
- } else if (!ib_table->is_readable()) {
- space = fil_space_acquire_silent(ib_table->space);
-
- if (space()) {
- if (space()->crypt_data && space()->crypt_data->is_encrypted()) {
- /* This means that tablespace was found but we could not
- decrypt encrypted page. */
- no_tablespace = true;
- encrypted = true;
- } else {
- no_tablespace = true;
- }
- } else {
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN,
- ER_TABLESPACE_MISSING, norm_name);
-
- /* This means we have no idea what happened to the tablespace
- file, best to play it safe. */
-
- no_tablespace = true;
- }
- }
-
- if (!thd_tablespace_op(thd) && no_tablespace) {
- free_share(share);
- my_errno = ENOENT;
- int ret_err = HA_ERR_NO_SUCH_TABLE;
-
- /* If table has no talespace but it has crypt data, check
- is tablespace made unaccessible because encryption service
- or used key_id is not available. */
- if (encrypted) {
- bool warning_pushed = false;
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(ib_table->name, TRUE, buf, sizeof(buf));
-
- if (!encryption_key_id_exists(space()->crypt_data->key_id)) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_DECRYPTION_FAILED,
- "Table %s in file %s is encrypted but encryption service or"
- " used key_id %u is not available. "
- " Can't continue reading table.",
- buf, space()->chain.start->name,
- space()->crypt_data->key_id);
- ret_err = HA_ERR_DECRYPTION_FAILED;
- warning_pushed = true;
- }
-
- /* If table is marked as encrypted then we push
- warning if it has not been already done as used
- key_id might be found but it is incorrect. */
- if (!warning_pushed) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_DECRYPTION_FAILED,
- "Table %s in file %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- buf, space()->chain.start->name);
- ret_err = HA_ERR_DECRYPTION_FAILED;
- }
- }
-
- dict_table_close(ib_table, FALSE, FALSE);
-
- DBUG_RETURN(ret_err);
- }
-
- prebuilt = row_create_prebuilt(ib_table, table->s->stored_rec_length);
-
- prebuilt->default_rec = table->s->default_values;
- ut_ad(prebuilt->default_rec);
-
- /* Looks like MySQL-3.23 sometimes has primary key number != 0 */
- primary_key = table->s->primary_key;
- key_used_on_scan = primary_key;
-
- if (!innobase_build_index_translation(table, ib_table, share)) {
- sql_print_error("Build InnoDB index translation table for"
- " Table %s failed", name);
- }
-
- /* Allocate a buffer for a 'row reference'. A row reference is
- a string of bytes of length ref_length which uniquely specifies
- a row in our table. Note that MySQL may also compare two row
- references for equality by doing a simple memcmp on the strings
- of length ref_length! */
-
- if (!row_table_got_default_clust_index(ib_table)) {
-
- prebuilt->clust_index_was_generated = FALSE;
-
- if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) {
- ib_table->dict_frm_mismatch = DICT_FRM_NO_PK;
-
- /* This mismatch could cause further problems
- if not attended, bring this to the user's attention
- by printing a warning in addition to log a message
- in the errorlog */
-
- ib_push_frm_error(thd, ib_table, table, 0, true);
-
- /* If primary_key >= MAX_KEY, its (primary_key)
- value could be out of bound if continue to index
- into key_info[] array. Find InnoDB primary index,
- and assign its key_length to ref_length.
- In addition, since MySQL indexes are sorted starting
- with primary index, unique index etc., initialize
- ref_length to the first index key length in
- case we fail to find InnoDB cluster index.
-
- Please note, this will not resolve the primary
- index mismatch problem, other side effects are
- possible if users continue to use the table.
- However, we allow this table to be opened so
- that user can adopt necessary measures for the
- mismatch while still being accessible to the table
- date. */
- if (!table->key_info) {
- ut_ad(!table->s->keys);
- ref_length = 0;
- } else {
- ref_length = table->key_info[0].key_length;
- }
-
- /* Find corresponding cluster index
- key length in MySQL's key_info[] array */
- for (uint i = 0; i < table->s->keys; i++) {
- dict_index_t* index;
- index = innobase_get_index(i);
- if (dict_index_is_clust(index)) {
- ref_length =
- table->key_info[i].key_length;
- }
- }
- } else {
- /* MySQL allocates the buffer for ref.
- key_info->key_length includes space for all key
- columns + one byte for each column that may be
- NULL. ref_length must be as exact as possible to
- save space, because all row reference buffers are
- allocated based on ref_length. */
-
- ref_length = table->key_info[primary_key].key_length;
- }
- } else {
- if (primary_key != MAX_KEY) {
-
- ib_table->dict_frm_mismatch = DICT_NO_PK_FRM_HAS;
-
- /* This mismatch could cause further problems
- if not attended, bring this to the user attention
- by printing a warning in addition to log a message
- in the errorlog */
- ib_push_frm_error(thd, ib_table, table, 0, true);
- }
-
- prebuilt->clust_index_was_generated = TRUE;
-
- ref_length = DATA_ROW_ID_LEN;
-
- /* If we automatically created the clustered index, then
- MySQL does not know about it, and MySQL must NOT be aware
- of the index used on scan, to make it avoid checking if we
- update the column of the index. That is why we assert below
- that key_used_on_scan is the undefined value MAX_KEY.
- The column is the row id in the automatical generation case,
- and it will never be updated anyway. */
-
- if (key_used_on_scan != MAX_KEY) {
- sql_print_warning(
- "Table %s key_used_on_scan is %lu even "
- "though there is no primary key inside "
- "InnoDB.", name, (ulong) key_used_on_scan);
- }
- }
-
- /* Index block size in InnoDB: used by MySQL in query optimization */
- stats.block_size = UNIV_PAGE_SIZE;
-
- /* Init table lock structure */
- thr_lock_data_init(&share->lock,&lock,(void*) 0);
-
- if (prebuilt->table) {
- /* We update the highest file format in the system table
- space, if this table has higher file format setting. */
-
- trx_sys_file_format_max_upgrade(
- (const char**) &innobase_file_format_max,
- dict_table_get_format(prebuilt->table));
- }
-
- /* Only if the table has an AUTOINC column. */
- if (prebuilt->table != NULL
- && prebuilt->table->is_readable()
- && table->found_next_number_field != NULL) {
- dict_table_autoinc_lock(prebuilt->table);
-
- /* Since a table can already be "open" in InnoDB's internal
- data dictionary, we only init the autoinc counter once, the
- first time the table is loaded. We can safely reuse the
- autoinc value from a previous MySQL open. */
- if (dict_table_autoinc_read(prebuilt->table) == 0) {
-
- innobase_initialize_autoinc();
- }
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN
-handler*
-ha_innobase::clone(
-/*===============*/
- const char* name, /*!< in: table name */
- MEM_ROOT* mem_root) /*!< in: memory context */
-{
- ha_innobase* new_handler;
-
- DBUG_ENTER("ha_innobase::clone");
-
- new_handler = static_cast<ha_innobase*>(handler::clone(name,
- mem_root));
- if (new_handler) {
- DBUG_ASSERT(new_handler->prebuilt != NULL);
-
- new_handler->prebuilt->select_lock_type
- = prebuilt->select_lock_type;
- }
-
- DBUG_RETURN(new_handler);
-}
-
-UNIV_INTERN
-uint
-ha_innobase::max_supported_key_part_length() const
-/*==============================================*/
-{
- /* A table format specific index column length check will be performed
- at ha_innobase::add_index() and row_create_index_for_mysql() */
- return(innobase_large_prefix
- ? REC_VERSION_56_MAX_INDEX_COL_LEN
- : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1);
-}
-
-/******************************************************************//**
-Closes a handle to an InnoDB table.
-@return 0 */
-UNIV_INTERN
-int
-ha_innobase::close()
-/*================*/
-{
- THD* thd;
-
- DBUG_ENTER("ha_innobase::close");
-
- thd = ha_thd();
-
- row_prebuilt_free(prebuilt, FALSE);
-
- if (upd_buf != NULL) {
- ut_ad(upd_buf_size != 0);
- my_free(upd_buf);
- upd_buf = NULL;
- upd_buf_size = 0;
- }
-
- free_share(share);
-
- MONITOR_INC(MONITOR_TABLE_CLOSE);
-
- /* Tell InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- DBUG_RETURN(0);
-}
-
-/* The following accessor functions should really be inside MySQL code! */
-
-/**************************************************************//**
-Gets field offset for a field in a table.
-@return offset */
-static inline
-uint
-get_field_offset(
-/*=============*/
- const TABLE* table, /*!< in: MySQL table object */
- const Field* field) /*!< in: MySQL field object */
-{
- return((uint) (field->ptr - table->record[0]));
-}
-
-#ifdef WITH_WSREP
-UNIV_INTERN
-int
-wsrep_innobase_mysql_sort(
-/*===============*/
- /* out: str contains sort string */
- int mysql_type, /* in: MySQL type */
- uint charset_number, /* in: number of the charset */
- unsigned char* str, /* in: data field */
- unsigned int str_length, /* in: data field length,
- not UNIV_SQL_NULL */
- unsigned int buf_length) /* in: total str buffer length */
-
-{
- CHARSET_INFO* charset;
- enum_field_types mysql_tp;
- int ret_length = str_length;
-
- DBUG_ASSERT(str_length != UNIV_SQL_NULL);
-
- mysql_tp = (enum_field_types) mysql_type;
-
- switch (mysql_tp) {
-
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- case MYSQL_TYPE_VAR_STRING:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_VARCHAR:
- {
- uchar tmp_str[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
- uint tmp_length = REC_VERSION_56_MAX_INDEX_COL_LEN;
-
- /* Use the charset number to pick the right charset struct for
- the comparison. Since the MySQL function get_charset may be
- slow before Bar removes the mutex operation there, we first
- look at 2 common charsets directly. */
-
- if (charset_number == default_charset_info->number) {
- charset = default_charset_info;
- } else if (charset_number == my_charset_latin1.number) {
- charset = &my_charset_latin1;
- } else {
- charset = get_charset(charset_number, MYF(MY_WME));
-
- if (charset == NULL) {
- sql_print_error("InnoDB needs charset %lu for doing "
- "a comparison, but MySQL cannot "
- "find that charset.",
- (ulong) charset_number);
- ut_a(0);
- }
- }
-
- ut_a(str_length <= tmp_length);
- memcpy(tmp_str, str, str_length);
-
- tmp_length = charset->coll->strnxfrm(charset, str, str_length,
- str_length, tmp_str,
- tmp_length, 0);
- DBUG_ASSERT(tmp_length <= str_length);
- if (wsrep_protocol_version < 3) {
- tmp_length = charset->coll->strnxfrm(
- charset, str, str_length,
- str_length, tmp_str, tmp_length, 0);
- DBUG_ASSERT(tmp_length <= str_length);
- } else {
- /* strnxfrm will expand the destination string,
- protocols < 3 truncated the sorted sring
- protocols >= 3 gets full sorted sring
- */
- tmp_length = charset->coll->strnxfrm(
- charset, str, buf_length,
- str_length, tmp_str, str_length, 0);
- DBUG_ASSERT(tmp_length <= buf_length);
- ret_length = tmp_length;
- }
-
- break;
- }
- case MYSQL_TYPE_DECIMAL :
- case MYSQL_TYPE_TINY :
- case MYSQL_TYPE_SHORT :
- case MYSQL_TYPE_LONG :
- case MYSQL_TYPE_FLOAT :
- case MYSQL_TYPE_DOUBLE :
- case MYSQL_TYPE_NULL :
- case MYSQL_TYPE_TIMESTAMP :
- case MYSQL_TYPE_LONGLONG :
- case MYSQL_TYPE_INT24 :
- case MYSQL_TYPE_DATE :
- case MYSQL_TYPE_TIME :
- case MYSQL_TYPE_DATETIME :
- case MYSQL_TYPE_YEAR :
- case MYSQL_TYPE_NEWDATE :
- case MYSQL_TYPE_NEWDECIMAL :
- case MYSQL_TYPE_ENUM :
- case MYSQL_TYPE_SET :
- case MYSQL_TYPE_GEOMETRY :
- break;
- default:
- break;
- }
-
- return ret_length;
-}
-#endif /* WITH_WSREP */
-
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. NOTE that the prototype
-of this function is in rem0cmp.cc in InnoDB source code! If you change this
-function, remember to update the prototype there!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp(
-/*===============*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
-{
- CHARSET_INFO* charset;
- enum_field_types mysql_tp;
- int ret;
-
- DBUG_ASSERT(a_length != UNIV_SQL_NULL);
- DBUG_ASSERT(b_length != UNIV_SQL_NULL);
-
- mysql_tp = (enum_field_types) mysql_type;
-
- switch (mysql_tp) {
-
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- case MYSQL_TYPE_VAR_STRING:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_VARCHAR:
- /* Use the charset number to pick the right charset struct for
- the comparison. Since the MySQL function get_charset may be
- slow before Bar removes the mutex operation there, we first
- look at 2 common charsets directly. */
-
- if (charset_number == default_charset_info->number) {
- charset = default_charset_info;
- } else if (charset_number == my_charset_latin1.number) {
- charset = &my_charset_latin1;
- } else {
- charset = get_charset(charset_number, MYF(MY_WME));
-
- if (charset == NULL) {
- sql_print_error("InnoDB needs charset %lu for doing "
- "a comparison, but MySQL cannot "
- "find that charset.",
- (ulong) charset_number);
- ut_a(0);
- }
- }
-
- /* Starting from 4.1.3, we use strnncollsp() in comparisons of
- non-latin1_swedish_ci strings. NOTE that the collation order
- changes then: 'b\0\0...' is ordered BEFORE 'b ...'. Users
- having indexes on such data need to rebuild their tables! */
-
- ret = charset->coll->strnncollsp(
- charset, a, a_length, b, b_length, 0);
-
- if (ret < 0) {
- return(-1);
- } else if (ret > 0) {
- return(1);
- } else {
- return(0);
- }
- default:
- ut_error;
- }
-
- return(0);
-}
-
-
-/*************************************************************//**
-Get the next token from the given string and store it in *token. */
-UNIV_INTERN
-CHARSET_INFO*
-innobase_get_fts_charset(
-/*=====================*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number) /*!< in: number of the charset */
-{
- enum_field_types mysql_tp;
- CHARSET_INFO* charset;
-
- mysql_tp = (enum_field_types) mysql_type;
-
- switch (mysql_tp) {
-
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- case MYSQL_TYPE_VAR_STRING:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- case MYSQL_TYPE_VARCHAR:
- /* Use the charset number to pick the right charset struct for
- the comparison. Since the MySQL function get_charset may be
- slow before Bar removes the mutex operation there, we first
- look at 2 common charsets directly. */
-
- if (charset_number == default_charset_info->number) {
- charset = default_charset_info;
- } else if (charset_number == my_charset_latin1.number) {
- charset = &my_charset_latin1;
- } else {
- charset = get_charset(charset_number, MYF(MY_WME));
-
- if (charset == NULL) {
- sql_print_error("InnoDB needs charset %lu for doing "
- "a comparison, but MySQL cannot "
- "find that charset.",
- (ulong) charset_number);
- ut_a(0);
- }
- }
- break;
- default:
- ut_error;
- }
-
- return(charset);
-}
-
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. NOTE that the prototype
-of this function is in rem0cmp.c in InnoDB source code! If you change this
-function, remember to update the prototype there!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp_prefix(
-/*======================*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
-{
- CHARSET_INFO* charset;
- int result;
-
- charset = innobase_get_fts_charset(mysql_type, charset_number);
-
- result = ha_compare_text(charset, (uchar*) a, a_length,
- (uchar*) b, b_length, 1, 0);
-
- return(result);
-}
-/******************************************************************//**
-compare two character string according to their charset. */
-UNIV_INTERN
-int
-innobase_fts_text_cmp(
-/*==================*/
- const void* cs, /*!< in: Character set */
- const void* p1, /*!< in: key */
- const void* p2) /*!< in: node */
-{
- const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
- const fts_string_t* s1 = (const fts_string_t*) p1;
- const fts_string_t* s2 = (const fts_string_t*) p2;
-
- return(ha_compare_text(
- charset, s1->f_str, static_cast<uint>(s1->f_len),
- s2->f_str, static_cast<uint>(s2->f_len), 0, 0));
-}
-/******************************************************************//**
-compare two character string case insensitively according to their charset. */
-UNIV_INTERN
-int
-innobase_fts_text_case_cmp(
-/*=======================*/
- const void* cs, /*!< in: Character set */
- const void* p1, /*!< in: key */
- const void* p2) /*!< in: node */
-{
- const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
- const fts_string_t* s1 = (const fts_string_t*) p1;
- const fts_string_t* s2 = (const fts_string_t*) p2;
- ulint newlen;
-
- my_casedn_str(charset, (char*) s2->f_str);
-
- newlen = strlen((const char*) s2->f_str);
-
- return(ha_compare_text(
- charset, s1->f_str, static_cast<uint>(s1->f_len),
- s2->f_str, static_cast<uint>(newlen), 0, 0));
-}
-/******************************************************************//**
-Get the first character's code position for FTS index partition. */
-UNIV_INTERN
-ulint
-innobase_strnxfrm(
-/*==============*/
- const CHARSET_INFO*
- cs, /*!< in: Character set */
- const uchar* str, /*!< in: string */
- const ulint len) /*!< in: string length */
-{
- uchar mystr[2];
- ulint value;
-
- if (!str || len == 0) {
- return(0);
- }
-
- my_strnxfrm(cs, (uchar*) mystr, 2, str, len);
-
- value = mach_read_from_2(mystr);
-
- if (value > 255) {
- value = value / 256;
- }
-
- return(value);
-}
-
-/******************************************************************//**
-compare two character string according to their charset. */
-UNIV_INTERN
-int
-innobase_fts_text_cmp_prefix(
-/*=========================*/
- const void* cs, /*!< in: Character set */
- const void* p1, /*!< in: prefix key */
- const void* p2) /*!< in: value to compare */
-{
- const CHARSET_INFO* charset = (const CHARSET_INFO*) cs;
- const fts_string_t* s1 = (const fts_string_t*) p1;
- const fts_string_t* s2 = (const fts_string_t*) p2;
- int result;
-
- result = ha_compare_text(
- charset, s2->f_str, static_cast<uint>(s2->f_len),
- s1->f_str, static_cast<uint>(s1->f_len), 1, 0);
-
- /* We switched s1, s2 position in ha_compare_text. So we need
- to negate the result */
- return(-result);
-}
-
-/******************************************************************//**
-Makes all characters in a string lower case. */
-UNIV_INTERN
-size_t
-innobase_fts_casedn_str(
-/*====================*/
- CHARSET_INFO* cs, /*!< in: Character set */
- char* src, /*!< in: string to put in lower case */
- size_t src_len,/*!< in: input string length */
- char* dst, /*!< in: buffer for result string */
- size_t dst_len)/*!< in: buffer size */
-{
- if (cs->casedn_multiply == 1) {
- memcpy(dst, src, src_len);
- dst[src_len] = 0;
- my_casedn_str(cs, dst);
-
- return(strlen(dst));
- } else {
- return(cs->cset->casedn(cs, src, src_len, dst, dst_len));
- }
-}
-
-#define true_word_char(c, ch) ((c) & (_MY_U | _MY_L | _MY_NMR) || (ch) == '_')
-
-#define misc_word_char(X) 0
-
-/*************************************************************//**
-Get the next token from the given string and store it in *token.
-It is mostly copied from MyISAM's doc parsing function ft_simple_get_word()
-@return length of string processed */
-UNIV_INTERN
-ulint
-innobase_mysql_fts_get_token(
-/*=========================*/
- CHARSET_INFO* cs, /*!< in: Character set */
- const byte* start, /*!< in: start of text */
- const byte* end, /*!< in: one character past end of
- text */
- fts_string_t* token, /*!< out: token's text */
- ulint* offset) /*!< out: offset to token,
- measured as characters from
- 'start' */
-{
- int mbl;
- const uchar* doc = start;
-
- ut_a(cs);
-
- token->f_n_char = token->f_len = 0;
- token->f_str = NULL;
-
- for (;;) {
-
- if (doc >= end) {
- return(doc - start);
- }
-
- int ctype;
-
- mbl = cs->cset->ctype(
- cs, &ctype, doc, (const uchar*) end);
-
- if (true_word_char(ctype, *doc)) {
- break;
- }
-
- doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
- }
-
- ulint mwc = 0;
- ulint length = 0;
-
- token->f_str = const_cast<byte*>(doc);
-
- while (doc < end) {
-
- int ctype;
-
- mbl = cs->cset->ctype(
- cs, &ctype, (uchar*) doc, (uchar*) end);
- if (true_word_char(ctype, *doc)) {
- mwc = 0;
- } else if (!misc_word_char(*doc) || mwc) {
- break;
- } else {
- ++mwc;
- }
-
- ++length;
-
- doc += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
- }
-
- token->f_len = (uint) (doc - token->f_str) - mwc;
- token->f_n_char = length;
-
- return(doc - start);
-}
-
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
-the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
-VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return DATA_BINARY, DATA_VARCHAR, ... */
-UNIV_INTERN
-ulint
-get_innobase_type_from_mysql_type(
-/*==============================*/
- ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
- 'unsigned type';
- at least ENUM and SET,
- and unsigned integer
- types are 'unsigned types' */
- const void* f) /*!< in: MySQL Field */
-{
- const class Field* field = reinterpret_cast<const class Field*>(f);
-
- /* The following asserts try to check that the MySQL type code fits in
- 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
- the type */
-
- DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
- DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
-
- if (field->flags & UNSIGNED_FLAG) {
-
- *unsigned_flag = DATA_UNSIGNED;
- } else {
- *unsigned_flag = 0;
- }
-
- if (field->real_type() == MYSQL_TYPE_ENUM
- || field->real_type() == MYSQL_TYPE_SET) {
-
- /* MySQL has field->type() a string type for these, but the
- data is actually internally stored as an unsigned integer
- code! */
-
- *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
- flag set to zero, even though
- internally this is an unsigned
- integer type */
- return(DATA_INT);
- }
-
- switch (field->type()) {
- /* NOTE that we only allow string types in DATA_MYSQL and
- DATA_VARMYSQL */
- case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
- case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */
- if (field->binary()) {
- return(DATA_BINARY);
- } else if (field->charset() == &my_charset_latin1) {
- return(DATA_VARCHAR);
- } else {
- return(DATA_VARMYSQL);
- }
- case MYSQL_TYPE_BIT:
- case MYSQL_TYPE_STRING:
- if (field->binary()) {
- return(DATA_FIXBINARY);
- } else if (field->charset() == &my_charset_latin1) {
- return(DATA_CHAR);
- } else {
- return(DATA_MYSQL);
- }
- case MYSQL_TYPE_NEWDECIMAL:
- return(DATA_FIXBINARY);
- case MYSQL_TYPE_LONG:
- case MYSQL_TYPE_LONGLONG:
- case MYSQL_TYPE_TINY:
- case MYSQL_TYPE_SHORT:
- case MYSQL_TYPE_INT24:
- case MYSQL_TYPE_DATE:
- case MYSQL_TYPE_YEAR:
- case MYSQL_TYPE_NEWDATE:
- return(DATA_INT);
- case MYSQL_TYPE_TIME:
- case MYSQL_TYPE_DATETIME:
- case MYSQL_TYPE_TIMESTAMP:
- if (field->key_type() == HA_KEYTYPE_BINARY)
- return(DATA_FIXBINARY);
- else
- return(DATA_INT);
- case MYSQL_TYPE_FLOAT:
- return(DATA_FLOAT);
- case MYSQL_TYPE_DOUBLE:
- return(DATA_DOUBLE);
- case MYSQL_TYPE_DECIMAL:
- return(DATA_DECIMAL);
- case MYSQL_TYPE_GEOMETRY:
- case MYSQL_TYPE_TINY_BLOB:
- case MYSQL_TYPE_MEDIUM_BLOB:
- case MYSQL_TYPE_BLOB:
- case MYSQL_TYPE_LONG_BLOB:
- return(DATA_BLOB);
- case MYSQL_TYPE_NULL:
- /* MySQL currently accepts "NULL" datatype, but will
- reject such datatype in the next release. We will cope
- with it and not trigger assertion failure in 5.1 */
- break;
- default:
- ut_error;
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
-storage format. */
-static inline
-void
-innobase_write_to_2_little_endian(
-/*==============================*/
- byte* buf, /*!< in: where to store */
- ulint val) /*!< in: value to write, must be < 64k */
-{
- ut_a(val < 256 * 256);
-
- buf[0] = (byte)(val & 0xFF);
- buf[1] = (byte)(val / 256);
-}
-
-/*******************************************************************//**
-Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
-storage format.
-@return value */
-static inline
-uint
-innobase_read_from_2_little_endian(
-/*===============================*/
- const uchar* buf) /*!< in: from where to read */
-{
- return((uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))));
-}
-
-#ifdef WITH_WSREP
-/*******************************************************************//**
-Stores a key value for a row to a buffer.
-@return key value length as stored in buff */
-UNIV_INTERN
-uint
-wsrep_store_key_val_for_row(
-/*===============================*/
- THD* thd,
- TABLE* table,
- uint keynr, /*!< in: key number */
- char* buff, /*!< in/out: buffer for the key value (in MySQL
- format) */
- uint buff_len,/*!< in: buffer length */
- const uchar* record,
- row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */
- ibool* key_is_null)/*!< out: full key was null */
-{
- KEY* key_info = table->key_info + keynr;
- KEY_PART_INFO* key_part = key_info->key_part;
- KEY_PART_INFO* end = key_part + key_info->user_defined_key_parts;
- char* buff_start = buff;
- enum_field_types mysql_type;
- Field* field;
- uint buff_space = buff_len;
-
- DBUG_ENTER("wsrep_store_key_val_for_row");
-
- memset(buff, 0, buff_len);
- *key_is_null = TRUE;
-
- for (; key_part != end; key_part++) {
-
- uchar sorted[REC_VERSION_56_MAX_INDEX_COL_LEN] = {'\0'};
- ibool part_is_null = FALSE;
-
- if (key_part->null_bit) {
- if (buff_space > 0) {
- if (record[key_part->null_offset]
- & key_part->null_bit) {
- *buff = 1;
- part_is_null = TRUE;
- } else {
- *buff = 0;
- }
- buff++;
- buff_space--;
- } else {
- fprintf (stderr, "WSREP: key truncated: %s\n",
- wsrep_thd_query(thd));
- }
- }
- if (!part_is_null) *key_is_null = FALSE;
-
- field = key_part->field;
- mysql_type = field->type();
-
- if (mysql_type == MYSQL_TYPE_VARCHAR) {
- /* >= 5.0.3 true VARCHAR */
- ulint lenlen;
- ulint len;
- const byte* data;
- ulint key_len;
- ulint true_len;
- const CHARSET_INFO* cs;
- int error=0;
-
- key_len = key_part->length;
-
- if (part_is_null) {
- true_len = key_len + 2;
- if (true_len > buff_space) {
- fprintf (stderr,
- "WSREP: key truncated: %s\n",
- wsrep_thd_query(thd));
- true_len = buff_space;
- }
- buff += true_len;
- buff_space -= true_len;
- continue;
- }
- cs = field->charset();
-
- lenlen = (ulint)
- (((Field_varstring*)field)->length_bytes);
-
- data = row_mysql_read_true_varchar(&len,
- (byte*) (record
- + (ulint)get_field_offset(table, field)),
- lenlen);
-
- true_len = len;
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char *) data,
- (const char *) data + len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
-
- /* In a column prefix index, we may need to truncate
- the stored value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- memcpy(sorted, data, true_len);
- true_len = wsrep_innobase_mysql_sort(
- mysql_type, cs->number, sorted, true_len,
- REC_VERSION_56_MAX_INDEX_COL_LEN);
-
- if (wsrep_protocol_version > 1) {
- /* Note that we always reserve the maximum possible
- length of the true VARCHAR in the key value, though
- only len first bytes after the 2 length bytes contain
- actual data. The rest of the space was reset to zero
- in the bzero() call above. */
- if (true_len > buff_space) {
- fprintf (stderr,
- "WSREP: key truncated: %s\n",
- wsrep_thd_query(thd));
- true_len = buff_space;
- }
- memcpy(buff, sorted, true_len);
- buff += true_len;
- buff_space -= true_len;
- } else {
- buff += key_len;
- }
- } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
- || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
- || mysql_type == MYSQL_TYPE_BLOB
- || mysql_type == MYSQL_TYPE_LONG_BLOB
- /* MYSQL_TYPE_GEOMETRY data is treated
- as BLOB data in innodb. */
- || mysql_type == MYSQL_TYPE_GEOMETRY) {
-
- const CHARSET_INFO* cs;
- ulint key_len;
- ulint true_len;
- int error=0;
- ulint blob_len;
- const byte* blob_data;
-
- ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
-
- key_len = key_part->length;
-
- if (part_is_null) {
- true_len = key_len + 2;
- if (true_len > buff_space) {
- fprintf (stderr,
- "WSREP: key truncated: %s\n",
- wsrep_thd_query(thd));
- true_len = buff_space;
- }
- buff += true_len;
- buff_space -= true_len;
-
- continue;
- }
-
- cs = field->charset();
-
- blob_data = row_mysql_read_blob_ref(&blob_len,
- (byte*) (record
- + (ulint) get_field_offset(table, field)),
- (ulint) field->pack_length());
-
- true_len = blob_len;
-
- ut_a(get_field_offset(table, field)
- == key_part->offset);
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (blob_len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char *) blob_data,
- (const char *) blob_data
- + blob_len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
-
- /* All indexes on BLOB and TEXT are column prefix
- indexes, and we may need to truncate the data to be
- stored in the key value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- memcpy(sorted, blob_data, true_len);
- true_len = wsrep_innobase_mysql_sort(
- mysql_type, cs->number, sorted, true_len,
- REC_VERSION_56_MAX_INDEX_COL_LEN);
-
-
- /* Note that we always reserve the maximum possible
- length of the BLOB prefix in the key value. */
- if (wsrep_protocol_version > 1) {
- if (true_len > buff_space) {
- fprintf (stderr,
- "WSREP: key truncated: %s\n",
- wsrep_thd_query(thd));
- true_len = buff_space;
- }
- buff += true_len;
- buff_space -= true_len;
- } else {
- buff += key_len;
- }
- memcpy(buff, sorted, true_len);
- } else {
- /* Here we handle all other data types except the
- true VARCHAR, BLOB and TEXT. Note that the column
- value we store may be also in a column prefix
- index. */
-
- const CHARSET_INFO* cs = NULL;
- ulint true_len;
- ulint key_len;
- const uchar* src_start;
- int error=0;
- enum_field_types real_type;
-
- key_len = key_part->length;
-
- if (part_is_null) {
- true_len = key_len;
- if (true_len > buff_space) {
- fprintf (stderr,
- "WSREP: key truncated: %s\n",
- wsrep_thd_query(thd));
- true_len = buff_space;
- }
- buff += true_len;
- buff_space -= true_len;
-
- continue;
- }
-
- src_start = record + key_part->offset;
- real_type = field->real_type();
- true_len = key_len;
-
- /* Character set for the field is defined only
- to fields whose type is string and real field
- type is not enum or set. For these fields check
- if character set is multi byte. */
-
- if (real_type != MYSQL_TYPE_ENUM
- && real_type != MYSQL_TYPE_SET
- && ( mysql_type == MYSQL_TYPE_VAR_STRING
- || mysql_type == MYSQL_TYPE_STRING)) {
-
- cs = field->charset();
-
- /* For multi byte character sets we need to
- calculate the true length of the key */
-
- if (key_len > 0 && cs->mbmaxlen > 1) {
-
- true_len = (ulint)
- cs->cset->well_formed_len(cs,
- (const char *)src_start,
- (const char *)src_start
- + key_len,
- (uint) (key_len /
- cs->mbmaxlen),
- &error);
- }
- memcpy(sorted, src_start, true_len);
- true_len = wsrep_innobase_mysql_sort(
- mysql_type, cs->number, sorted, true_len,
- REC_VERSION_56_MAX_INDEX_COL_LEN);
-
- if (true_len > buff_space) {
- fprintf (stderr,
- "WSREP: key truncated: %s\n",
- wsrep_thd_query(thd));
- true_len = buff_space;
- }
- memcpy(buff, sorted, true_len);
- } else {
- memcpy(buff, src_start, true_len);
- }
- buff += true_len;
- buff_space -= true_len;
- }
- }
-
- ut_a(buff <= buff_start + buff_len);
-
- DBUG_RETURN((uint)(buff - buff_start));
-}
-#endif /* WITH_WSREP */
-
-/*******************************************************************//**
-Stores a key value for a row to a buffer.
-@return key value length as stored in buff */
-UNIV_INTERN
-uint
-ha_innobase::store_key_val_for_row(
-/*===============================*/
- uint keynr, /*!< in: key number */
- char* buff, /*!< in/out: buffer for the key value (in MySQL
- format) */
- uint buff_len,/*!< in: buffer length */
- const uchar* record)/*!< in: row in MySQL format */
-{
- KEY* key_info = table->key_info + keynr;
- KEY_PART_INFO* key_part = key_info->key_part;
- KEY_PART_INFO* end =
- key_part + key_info->user_defined_key_parts;
- char* buff_start = buff;
- enum_field_types mysql_type;
- Field* field;
- ibool is_null;
-
- DBUG_ENTER("store_key_val_for_row");
-
- /* The format for storing a key field in MySQL is the following:
-
- 1. If the column can be NULL, then in the first byte we put 1 if the
- field value is NULL, 0 otherwise.
-
- 2. If the column is of a BLOB type (it must be a column prefix field
- in this case), then we put the length of the data in the field to the
- next 2 bytes, in the little-endian format. If the field is SQL NULL,
- then these 2 bytes are set to 0. Note that the length of data in the
- field is <= column prefix length.
-
- 3. In a column prefix field, prefix_len next bytes are reserved for
- data. In a normal field the max field length next bytes are reserved
- for data. For a VARCHAR(n) the max field length is n. If the stored
- value is the SQL NULL then these data bytes are set to 0.
-
- 4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
- in the MySQL row format, the length is stored in 1 or 2 bytes,
- depending on the maximum allowed length. But in the MySQL key value
- format, the length always takes 2 bytes.
-
- We have to zero-fill the buffer so that MySQL is able to use a
- simple memcmp to compare two key values to determine if they are
- equal. MySQL does this to compare contents of two 'ref' values. */
-
- memset(buff, 0, buff_len);
-
- for (; key_part != end; key_part++) {
- is_null = FALSE;
-
- if (key_part->null_bit) {
- if (record[key_part->null_offset]
- & key_part->null_bit) {
- *buff = 1;
- is_null = TRUE;
- } else {
- *buff = 0;
- }
- buff++;
- }
-
- field = key_part->field;
- mysql_type = field->type();
-
- if (mysql_type == MYSQL_TYPE_VARCHAR) {
- /* >= 5.0.3 true VARCHAR */
- ulint lenlen;
- ulint len;
- const byte* data;
- ulint key_len;
- ulint true_len;
- const CHARSET_INFO* cs;
- int error=0;
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len + 2;
-
- continue;
- }
- cs = field->charset();
-
- lenlen = (ulint)
- (((Field_varstring*) field)->length_bytes);
-
- data = row_mysql_read_true_varchar(&len,
- (byte*) (record
- + (ulint) get_field_offset(table, field)),
- lenlen);
-
- true_len = len;
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char*) data,
- (const char*) data + len,
- (uint) (key_len / cs->mbmaxlen),
- &error);
- }
-
- /* In a column prefix index, we may need to truncate
- the stored value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- /* The length in a key value is always stored in 2
- bytes */
-
- row_mysql_store_true_var_len((byte*) buff, true_len, 2);
- buff += 2;
-
- memcpy(buff, data, true_len);
-
- /* Note that we always reserve the maximum possible
- length of the true VARCHAR in the key value, though
- only len first bytes after the 2 length bytes contain
- actual data. The rest of the space was reset to zero
- in the memset() call above. */
-
- buff += key_len;
-
- } else if (mysql_type == MYSQL_TYPE_TINY_BLOB
- || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
- || mysql_type == MYSQL_TYPE_BLOB
- || mysql_type == MYSQL_TYPE_LONG_BLOB
- /* MYSQL_TYPE_GEOMETRY data is treated
- as BLOB data in innodb. */
- || mysql_type == MYSQL_TYPE_GEOMETRY) {
-
- const CHARSET_INFO* cs;
- ulint key_len;
- ulint true_len;
- int error=0;
- ulint blob_len;
- const byte* blob_data;
-
- ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len + 2;
-
- continue;
- }
-
- cs = field->charset();
-
- blob_data = row_mysql_read_blob_ref(&blob_len,
- (byte*) (record
- + (ulint) get_field_offset(table, field)),
- (ulint) field->pack_length());
-
- true_len = blob_len;
-
- ut_a(get_field_offset(table, field)
- == key_part->offset);
-
- /* For multi byte character sets we need to calculate
- the true length of the key */
-
- if (blob_len > 0 && cs->mbmaxlen > 1) {
- true_len = (ulint) cs->cset->well_formed_len(cs,
- (const char*) blob_data,
- (const char*) blob_data
- + blob_len,
- (uint) (key_len / cs->mbmaxlen),
- &error);
- }
-
- /* All indexes on BLOB and TEXT are column prefix
- indexes, and we may need to truncate the data to be
- stored in the key value: */
-
- if (true_len > key_len) {
- true_len = key_len;
- }
-
- /* MySQL reserves 2 bytes for the length and the
- storage of the number is little-endian */
-
- innobase_write_to_2_little_endian(
- (byte*) buff, true_len);
- buff += 2;
-
- memcpy(buff, blob_data, true_len);
-
- /* Note that we always reserve the maximum possible
- length of the BLOB prefix in the key value. */
-
- buff += key_len;
- } else {
- /* Here we handle all other data types except the
- true VARCHAR, BLOB and TEXT. Note that the column
- value we store may be also in a column prefix
- index. */
-
- const CHARSET_INFO* cs = NULL;
- ulint true_len;
- ulint key_len;
- const uchar* src_start;
- int error=0;
- enum_field_types real_type;
-
- key_len = key_part->length;
-
- if (is_null) {
- buff += key_len;
-
- continue;
- }
-
- src_start = record + key_part->offset;
- real_type = field->real_type();
- true_len = key_len;
-
- /* Character set for the field is defined only
- to fields whose type is string and real field
- type is not enum or set. For these fields check
- if character set is multi byte. */
-
- if (real_type != MYSQL_TYPE_ENUM
- && real_type != MYSQL_TYPE_SET
- && ( mysql_type == MYSQL_TYPE_VAR_STRING
- || mysql_type == MYSQL_TYPE_STRING)) {
-
- cs = field->charset();
-
- /* For multi byte character sets we need to
- calculate the true length of the key */
-
- if (key_len > 0 && cs->mbmaxlen > 1) {
-
- true_len = (ulint)
- cs->cset->well_formed_len(cs,
- (const char*) src_start,
- (const char*) src_start
- + key_len,
- (uint) (key_len
- / cs->mbmaxlen),
- &error);
- }
- }
-
- memcpy(buff, src_start, true_len);
- buff += true_len;
-
- /* Pad the unused space with spaces. */
-
- if (true_len < key_len) {
- ulint pad_len = key_len - true_len;
- ut_a(cs != NULL);
- ut_a(!(pad_len % cs->mbminlen));
-
- cs->cset->fill(cs, buff, pad_len,
- 0x20 /* space */);
- buff += pad_len;
- }
- }
- }
-
- ut_a(buff <= buff_start + buff_len);
-
- DBUG_RETURN((uint)(buff - buff_start));
-}
-
-/**************************************************************//**
-Determines if a field is needed in a prebuilt struct 'template'.
-@return field to use, or NULL if the field is not needed */
-static
-const Field*
-build_template_needs_field(
-/*=======================*/
- ibool index_contains, /*!< in:
- dict_index_contains_col_or_prefix(
- index, i) */
- ibool read_just_key, /*!< in: TRUE when MySQL calls
- ha_innobase::extra with the
- argument HA_EXTRA_KEYREAD; it is enough
- to read just columns defined in
- the index (i.e., no read of the
- clustered index record necessary) */
- ibool fetch_all_in_key,
- /*!< in: true=fetch all fields in
- the index */
- ibool fetch_primary_key_cols,
- /*!< in: true=fetch the
- primary key columns */
- dict_index_t* index, /*!< in: InnoDB index to use */
- const TABLE* table, /*!< in: MySQL table object */
- ulint i, /*!< in: field index in InnoDB table */
- ulint sql_idx) /*!< in: field index in SQL table */
-{
- const Field* field = table->field[sql_idx];
-
- ut_ad(index_contains == dict_index_contains_col_or_prefix(index, i));
-
- if (!index_contains) {
- if (read_just_key) {
- /* If this is a 'key read', we do not need
- columns that are not in the key */
-
- return(NULL);
- }
- } else if (fetch_all_in_key) {
- /* This field is needed in the query */
-
- return(field);
- }
-
- if (bitmap_is_set(table->read_set, static_cast<uint>(sql_idx))
- || bitmap_is_set(table->write_set, static_cast<uint>(sql_idx))) {
- /* This field is needed in the query */
-
- return(field);
- }
-
- if (fetch_primary_key_cols
- && dict_table_col_in_clustered_key(index->table, i)) {
- /* This field is needed in the query */
-
- return(field);
- }
-
- /* This field is not needed in the query, skip it */
-
- return(NULL);
-}
-
-/**************************************************************//**
-Determines if a field is needed in a prebuilt struct 'template'.
-@return whether the field is needed for index condition pushdown */
-inline
-bool
-build_template_needs_field_in_icp(
-/*==============================*/
- const dict_index_t* index, /*!< in: InnoDB index */
- const row_prebuilt_t* prebuilt,/*!< in: row fetch template */
- bool contains,/*!< in: whether the index contains
- column i */
- ulint i) /*!< in: column number */
-{
- ut_ad(contains == dict_index_contains_col_or_prefix(index, i));
-
- return(index == prebuilt->index
- ? contains
- : dict_index_contains_col_or_prefix(prebuilt->index, i));
-}
-
-/**************************************************************//**
-Adds a field to a prebuilt struct 'template'.
-@return the field template */
-static
-mysql_row_templ_t*
-build_template_field(
-/*=================*/
- row_prebuilt_t* prebuilt, /*!< in/out: template */
- dict_index_t* clust_index, /*!< in: InnoDB clustered index */
- dict_index_t* index, /*!< in: InnoDB index to use */
- TABLE* table, /*!< in: MySQL table object */
- const Field* field, /*!< in: field in MySQL table */
- ulint i) /*!< in: field index in InnoDB table */
-{
- mysql_row_templ_t* templ;
- const dict_col_t* col;
-
- //ut_ad(field == table->field[i]);
- ut_ad(clust_index->table == index->table);
-
- col = dict_table_get_nth_col(index->table, i);
-
- templ = prebuilt->mysql_template + prebuilt->n_template++;
- UNIV_MEM_INVALID(templ, sizeof *templ);
- templ->col_no = i;
- templ->clust_rec_field_no = dict_col_get_clust_pos(col, clust_index);
-
- /* If clustered index record field is not found, lets print out
- field names and all the rest to understand why field is not found. */
- if (templ->clust_rec_field_no == ULINT_UNDEFINED) {
- const char* tb_col_name = dict_table_get_col_name(clust_index->table, i);
- dict_field_t* field=NULL;
- size_t size = 0;
-
- for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
- dict_field_t* ifield = &(clust_index->fields[j]);
- if (ifield && !memcmp(tb_col_name, ifield->name,
- strlen(tb_col_name))) {
- field = ifield;
- break;
- }
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Looking for field %lu name %s from table %s",
- i,
- (tb_col_name ? tb_col_name : "NULL"),
- clust_index->table->name);
-
-
- for(ulint j=0; j < clust_index->n_user_defined_cols; j++) {
- dict_field_t* ifield = &(clust_index->fields[j]);
- ib_logf(IB_LOG_LEVEL_INFO,
- "InnoDB Table %s field %lu name %s",
- clust_index->table->name,
- j,
- (ifield ? ifield->name : "NULL"));
- }
-
- for(ulint j=0; j < table->s->stored_fields; j++) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "MySQL table %s field %lu name %s",
- table->s->table_name.str,
- j,
- table->field[j]->field_name.str);
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Clustered record field for column %lu"
- " not found table n_user_defined %d"
- " index n_user_defined %d"
- " InnoDB table %s field name %s"
- " MySQL table %s field name %s n_fields %d"
- " query %s",
- i,
- clust_index->n_user_defined_cols,
- clust_index->table->n_cols - DATA_N_SYS_COLS,
- clust_index->table->name,
- (field ? field->name : "NULL"),
- table->s->table_name.str,
- (tb_col_name ? tb_col_name : "NULL"),
- table->s->stored_fields,
- innobase_get_stmt(current_thd, &size));
-
- ut_a(templ->clust_rec_field_no != ULINT_UNDEFINED);
- }
- templ->rec_field_is_prefix = FALSE;
-
- if (dict_index_is_clust(index)) {
- templ->rec_field_is_prefix = false;
- templ->rec_field_no = templ->clust_rec_field_no;
- templ->rec_prefix_field_no = ULINT_UNDEFINED;
- } else {
- /* If we're in a secondary index, keep track of the original
- index position even if this is just a prefix index; we will use
- this later to avoid a cluster index lookup in some cases.*/
-
- templ->rec_field_no = dict_index_get_nth_col_pos(index, i,
- &templ->rec_prefix_field_no);
- templ->rec_field_is_prefix
- = (templ->rec_field_no == ULINT_UNDEFINED)
- && (templ->rec_prefix_field_no != ULINT_UNDEFINED);
-#ifdef UNIV_DEBUG
- if (templ->rec_prefix_field_no != ULINT_UNDEFINED)
- {
- const dict_field_t* field = dict_index_get_nth_field(
- index,
- templ->rec_prefix_field_no);
- ut_ad(templ->rec_field_is_prefix
- == (field->prefix_len != 0));
- } else {
- ut_ad(!templ->rec_field_is_prefix);
- }
-#endif
- }
-
- if (field->real_maybe_null()) {
- templ->mysql_null_byte_offset =
- field->null_offset();
-
- templ->mysql_null_bit_mask = (ulint) field->null_bit;
- } else {
- templ->mysql_null_bit_mask = 0;
- }
-
- templ->mysql_col_offset = (ulint) get_field_offset(table, field);
-
- templ->mysql_col_len = (ulint) field->pack_length();
- templ->type = col->mtype;
- templ->mysql_type = (ulint) field->type();
-
- if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
- templ->mysql_length_bytes = (ulint)
- (((Field_varstring*) field)->length_bytes);
- }
-
- templ->charset = dtype_get_charset_coll(col->prtype);
- templ->mbminlen = dict_col_get_mbminlen(col);
- templ->mbmaxlen = dict_col_get_mbmaxlen(col);
- templ->is_unsigned = col->prtype & DATA_UNSIGNED;
-
- if (!dict_index_is_clust(index)
- && templ->rec_field_no == ULINT_UNDEFINED) {
- prebuilt->need_to_access_clustered = TRUE;
-
- if (templ->rec_prefix_field_no != ULINT_UNDEFINED) {
- dict_field_t* field = dict_index_get_nth_field(
- index,
- templ->rec_prefix_field_no);
- templ->rec_field_is_prefix = (field->prefix_len != 0);
- }
- }
-
- if (prebuilt->mysql_prefix_len < templ->mysql_col_offset
- + templ->mysql_col_len) {
- prebuilt->mysql_prefix_len = templ->mysql_col_offset
- + templ->mysql_col_len;
- }
-
- if (templ->type == DATA_BLOB) {
- prebuilt->templ_contains_blob = TRUE;
- }
-
- return(templ);
-}
-
-/**************************************************************//**
-Builds a 'template' to the prebuilt struct. The template is used in fast
-retrieval of just those column values MySQL needs in its processing. */
-UNIV_INTERN
-void
-ha_innobase::build_template(
-/*========================*/
- bool whole_row) /*!< in: true=ROW_MYSQL_WHOLE_ROW,
- false=ROW_MYSQL_REC_FIELDS */
-{
- dict_index_t* index;
- dict_index_t* clust_index;
- ulint n_stored_fields;
- ibool fetch_all_in_key = FALSE;
- ibool fetch_primary_key_cols = FALSE;
- ulint i, sql_idx;
-
- if (prebuilt->select_lock_type == LOCK_X) {
- /* We always retrieve the whole clustered index record if we
- use exclusive row level locks, for example, if the read is
- done in an UPDATE statement. */
-
- whole_row = true;
- } else if (!whole_row) {
- if (prebuilt->hint_need_to_fetch_extra_cols
- == ROW_RETRIEVE_ALL_COLS) {
-
- /* We know we must at least fetch all columns in the
- key, or all columns in the table */
-
- if (prebuilt->read_just_key) {
- /* MySQL has instructed us that it is enough
- to fetch the columns in the key; looks like
- MySQL can set this flag also when there is
- only a prefix of the column in the key: in
- that case we retrieve the whole column from
- the clustered index */
-
- fetch_all_in_key = TRUE;
- } else {
- whole_row = true;
- }
- } else if (prebuilt->hint_need_to_fetch_extra_cols
- == ROW_RETRIEVE_PRIMARY_KEY) {
- /* We must at least fetch all primary key cols. Note
- that if the clustered index was internally generated
- by InnoDB on the row id (no primary key was
- defined), then row_search_for_mysql() will always
- retrieve the row id to a special buffer in the
- prebuilt struct. */
-
- fetch_primary_key_cols = TRUE;
- }
- }
-
- clust_index = dict_table_get_first_index(prebuilt->table);
-
- index = whole_row ? clust_index : prebuilt->index;
-
- prebuilt->need_to_access_clustered = (index == clust_index);
-
- /* Either prebuilt->index should be a secondary index, or it
- should be the clustered index. */
- ut_ad(dict_index_is_clust(index) == (index == clust_index));
-
- /* Below we check column by column if we need to access
- the clustered index. */
-
- n_stored_fields= (ulint)table->s->stored_fields; /* number of stored columns */
-
- if (!prebuilt->mysql_template) {
- prebuilt->mysql_template = (mysql_row_templ_t*)
- mem_alloc(n_stored_fields * sizeof(mysql_row_templ_t));
- }
-
- prebuilt->template_type = whole_row
- ? ROW_MYSQL_WHOLE_ROW : ROW_MYSQL_REC_FIELDS;
- prebuilt->null_bitmap_len = table->s->null_bytes;
-
- /* Prepare to build prebuilt->mysql_template[]. */
- prebuilt->templ_contains_blob = FALSE;
- prebuilt->mysql_prefix_len = 0;
- prebuilt->n_template = 0;
- prebuilt->idx_cond_n_cols = 0;
-
- /* Note that in InnoDB, i is the column number in the table.
- MySQL calls columns 'fields'. */
-
- if (active_index != MAX_KEY && active_index == pushed_idx_cond_keyno) {
- /* Push down an index condition or an end_range check. */
- for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
-
- while (!table->field[sql_idx]->stored_in_db) {
- sql_idx++;
- }
-
- const ibool index_contains
- = dict_index_contains_col_or_prefix(index, i);
-
- /* Test if an end_range or an index condition
- refers to the field. Note that "index" and
- "index_contains" may refer to the clustered index.
- Index condition pushdown is relative to prebuilt->index
- (the index that is being looked up first). */
-
- /* When join_read_always_key() invokes this
- code via handler::ha_index_init() and
- ha_innobase::index_init(), end_range is not
- yet initialized. Because of that, we must
- always check for index_contains, instead of
- the subset
- field->part_of_key.is_set(active_index)
- which would be acceptable if end_range==NULL. */
- if (build_template_needs_field_in_icp(
- index, prebuilt, index_contains, i)) {
- /* Needed in ICP */
- const Field* field;
- mysql_row_templ_t* templ;
-
- if (whole_row) {
- field = table->field[sql_idx];
- } else {
- field = build_template_needs_field(
- index_contains,
- prebuilt->read_just_key,
- fetch_all_in_key,
- fetch_primary_key_cols,
- index, table, i, sql_idx);
- if (!field) {
- continue;
- }
- }
-
- templ = build_template_field(
- prebuilt, clust_index, index,
- table, field, i);
- prebuilt->idx_cond_n_cols++;
- ut_ad(prebuilt->idx_cond_n_cols
- == prebuilt->n_template);
-
- if (index == prebuilt->index) {
- templ->icp_rec_field_no
- = templ->rec_field_no;
- } else {
- templ->icp_rec_field_no
- = dict_index_get_nth_col_pos(
- prebuilt->index, i,
- NULL);
- }
-
- if (dict_index_is_clust(prebuilt->index)) {
- ut_ad(templ->icp_rec_field_no
- != ULINT_UNDEFINED);
- /* If the primary key includes
- a column prefix, use it in
- index condition pushdown,
- because the condition is
- evaluated before fetching any
- off-page (externally stored)
- columns. */
- if (templ->icp_rec_field_no
- < prebuilt->index->n_uniq) {
- /* This is a key column;
- all set. */
- continue;
- }
- } else if (templ->icp_rec_field_no
- != ULINT_UNDEFINED) {
- continue;
- }
-
- /* This is a column prefix index.
- The column prefix can be used in
- an end_range comparison. */
-
- templ->icp_rec_field_no
- = dict_index_get_nth_col_or_prefix_pos(
- prebuilt->index, i, TRUE, NULL);
- ut_ad(templ->icp_rec_field_no
- != ULINT_UNDEFINED);
-
- /* Index condition pushdown can be used on
- all columns of a secondary index, and on
- the PRIMARY KEY columns. On the clustered
- index, it must never be used on other than
- PRIMARY KEY columns, because those columns
- may be stored off-page, and we will not
- fetch externally stored columns before
- checking the index condition. */
- /* TODO: test the above with an assertion
- like this. Note that index conditions are
- currently pushed down as part of the
- "optimizer phase" while end_range is done
- as part of the execution phase. Therefore,
- we were unable to use an accurate condition
- for end_range in the "if" condition above,
- and the following assertion would fail.
- ut_ad(!dict_index_is_clust(prebuilt->index)
- || templ->rec_field_no
- < prebuilt->index->n_uniq);
- */
- }
- }
-
- ut_ad(prebuilt->idx_cond_n_cols > 0);
- ut_ad(prebuilt->idx_cond_n_cols == prebuilt->n_template);
-
- /* Include the fields that are not needed in index condition
- pushdown. */
- for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
-
- while (!table->field[sql_idx]->stored_in_db) {
- sql_idx++;
- }
-
- const ibool index_contains
- = dict_index_contains_col_or_prefix(index, i);
-
- if (!build_template_needs_field_in_icp(
- index, prebuilt, index_contains, i)) {
- /* Not needed in ICP */
- const Field* field;
-
- if (whole_row) {
- field = table->field[sql_idx];
- } else {
- field = build_template_needs_field(
- index_contains,
- prebuilt->read_just_key,
- fetch_all_in_key,
- fetch_primary_key_cols,
- index, table, i, sql_idx);
- if (!field) {
- continue;
- }
- }
-
- build_template_field(prebuilt,
- clust_index, index,
- table, field, i);
- }
- }
-
- prebuilt->idx_cond = this;
- } else {
- /* No index condition pushdown */
- prebuilt->idx_cond = NULL;
-
- for (i = 0, sql_idx = 0; i < n_stored_fields; i++, sql_idx++) {
- const Field* field;
-
- while (!table->field[sql_idx]->stored_in_db) {
- sql_idx++;
- }
-
- if (whole_row) {
- field = table->field[sql_idx];
- } else {
- field = build_template_needs_field(
- dict_index_contains_col_or_prefix(
- index, i),
- prebuilt->read_just_key,
- fetch_all_in_key,
- fetch_primary_key_cols,
- index, table, i, sql_idx);
- if (!field) {
- continue;
- }
- }
-
- build_template_field(prebuilt, clust_index, index,
- table, field, i);
- }
- }
-
- if (index != clust_index && prebuilt->need_to_access_clustered) {
- /* Change rec_field_no's to correspond to the clustered index
- record */
- for (i = 0; i < prebuilt->n_template; i++) {
-
- mysql_row_templ_t* templ
- = &prebuilt->mysql_template[i];
-
- templ->rec_field_no = templ->clust_rec_field_no;
- }
- }
-}
-
-/********************************************************************//**
-This special handling is really to overcome the limitations of MySQL's
-binlogging. We need to eliminate the non-determinism that will arise in
-INSERT ... SELECT type of statements, since MySQL binlog only stores the
-min value of the autoinc interval. Once that is fixed we can get rid of
-the special lock handling.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-ha_innobase::innobase_lock_autoinc(void)
-/*====================================*/
-{
- DBUG_ENTER("ha_innobase::innobase_lock_autoinc");
- dberr_t error = DB_SUCCESS;
-
- ut_ad(!srv_read_only_mode);
-
- switch (innobase_autoinc_lock_mode) {
- case AUTOINC_NO_LOCKING:
- /* Acquire only the AUTOINC mutex. */
- dict_table_autoinc_lock(prebuilt->table);
- break;
-
- case AUTOINC_NEW_STYLE_LOCKING:
- /* For simple (single/multi) row INSERTs/REPLACEs and RBR
- events, we fallback to the old style only if another
- transaction has already acquired the AUTOINC lock on
- behalf of a LOAD FILE or INSERT ... SELECT etc. type of
- statement. */
- if (thd_sql_command(user_thd) == SQLCOM_INSERT
- || thd_sql_command(user_thd) == SQLCOM_REPLACE
- || thd_sql_command(user_thd) == SQLCOM_END // RBR event
- ) {
- dict_table_t* ib_table = prebuilt->table;
-
- /* Acquire the AUTOINC mutex. */
- dict_table_autoinc_lock(ib_table);
-
- /* We need to check that another transaction isn't
- already holding the AUTOINC lock on the table. */
- if (ib_table->n_waiting_or_granted_auto_inc_locks) {
- /* Release the mutex to avoid deadlocks and
- fall back to old style locking. */
- dict_table_autoinc_unlock(ib_table);
- } else {
- /* Do not fall back to old style locking. */
- break;
- }
- }
- /* Use old style locking. */
- /* fall through */
- case AUTOINC_OLD_STYLE_LOCKING:
- DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used",
- ut_ad(0););
- error = row_lock_table_autoinc_for_mysql(prebuilt);
-
- if (error == DB_SUCCESS) {
-
- /* Acquire the AUTOINC mutex. */
- dict_table_autoinc_lock(prebuilt->table);
- }
- break;
-
- default:
- ut_error;
- }
-
- DBUG_RETURN(error);
-}
-
-/********************************************************************//**
-Reset the autoinc value in the table.
-@return DB_SUCCESS if all went well else error code */
-UNIV_INTERN
-dberr_t
-ha_innobase::innobase_reset_autoinc(
-/*================================*/
- ulonglong autoinc) /*!< in: value to store */
-{
- dberr_t error;
-
- error = innobase_lock_autoinc();
-
- if (error == DB_SUCCESS) {
-
- dict_table_autoinc_initialize(prebuilt->table, autoinc);
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- return(error);
-}
-
-/********************************************************************//**
-Store the autoinc value in the table. The autoinc value is only set if
-it's greater than the existing autoinc value in the table.
-@return DB_SUCCESS if all went well else error code */
-UNIV_INTERN
-dberr_t
-ha_innobase::innobase_set_max_autoinc(
-/*==================================*/
- ulonglong auto_inc) /*!< in: value to store */
-{
- dberr_t error;
-
- error = innobase_lock_autoinc();
-
- if (error == DB_SUCCESS) {
-
- dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc);
-
- dict_table_autoinc_unlock(prebuilt->table);
- }
-
- return(error);
-}
-
-/********************************************************************//**
-Stores a row in an InnoDB database, to the table specified in this
-handle.
-@return error code */
-UNIV_INTERN
-int
-ha_innobase::write_row(
-/*===================*/
- uchar* record) /*!< in: a row in MySQL format */
-{
- dberr_t error;
- int error_result= 0;
- ibool auto_inc_used= FALSE;
-#ifdef WITH_WSREP
- ibool auto_inc_inserted= FALSE; /* if NULL was inserted */
-#endif
- ulint sql_command;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::write_row");
-
- if (high_level_read_only) {
- ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- } else if (prebuilt->trx != trx) {
- sql_print_error("The transaction object for the table handle "
- "is at %p, but for the current thread it is at "
- "%p",
- (const void*) prebuilt->trx, (const void*) trx);
-
- fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
- ut_print_buf(stderr, ((const byte*) prebuilt) - 100, 200);
- fputs("\n"
- "InnoDB: Dump of 200 bytes around ha_data: ",
- stderr);
- ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
- putc('\n', stderr);
- ut_error;
- } else if (!trx_is_started(trx)) {
- ++trx->will_lock;
- }
-
- ha_statistic_increment(&SSV::ha_write_count);
-
- sql_command = thd_sql_command(user_thd);
-
- if ((sql_command == SQLCOM_ALTER_TABLE
- || sql_command == SQLCOM_OPTIMIZE
- || sql_command == SQLCOM_CREATE_INDEX
-#ifdef WITH_WSREP
- || (wsrep_on(user_thd) && wsrep_load_data_splitting &&
- sql_command == SQLCOM_LOAD &&
- !thd_test_options(
- user_thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
-#endif /* WITH_WSREP */
- || sql_command == SQLCOM_DROP_INDEX)
- && num_write_row >= 10000) {
-#ifdef WITH_WSREP
- if (wsrep_on(user_thd) && sql_command == SQLCOM_LOAD) {
- WSREP_DEBUG("forced trx split for LOAD: %s",
- wsrep_thd_query(user_thd));
- }
-#endif /* WITH_WSREP */
- /* ALTER TABLE is COMMITted at every 10000 copied rows.
- The IX table lock for the original table has to be re-issued.
- As this method will be called on a temporary table where the
- contents of the original table is being copied to, it is
- a bit tricky to determine the source table. The cursor
- position in the source table need not be adjusted after the
- intermediate COMMIT, since writes by other transactions are
- being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
-
- dict_table_t* src_table;
- enum lock_mode mode;
-
- num_write_row = 0;
-
- /* Commit the transaction. This will release the table
- locks, so they have to be acquired again. */
-
- /* Altering an InnoDB table */
- /* Get the source table. */
- src_table = lock_get_src_table(
- prebuilt->trx, prebuilt->table, &mode);
- if (!src_table) {
-no_commit:
- /* Unknown situation: do not commit */
- /*
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ALTER TABLE is holding lock"
- " on %lu tables!\n",
- prebuilt->trx->mysql_n_tables_locked);
- */
- ;
- } else if (src_table == prebuilt->table) {
-#ifdef WITH_WSREP
- if (wsrep_on(user_thd) &&
- wsrep_load_data_splitting &&
- sql_command == SQLCOM_LOAD &&
- !thd_test_options(user_thd,
- OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
- {
- switch (wsrep_run_wsrep_commit(user_thd, 1))
- {
- case WSREP_TRX_OK:
- break;
- case WSREP_TRX_SIZE_EXCEEDED:
- case WSREP_TRX_CERT_FAIL:
- case WSREP_TRX_ERROR:
- DBUG_RETURN(1);
- }
-
- if (binlog_hton->commit(binlog_hton, user_thd, 1))
- DBUG_RETURN(1);
- wsrep_post_commit(user_thd, TRUE);
- }
-#endif /* WITH_WSREP */
- /* Source table is not in InnoDB format:
- no need to re-acquire locks on it. */
-
- /* Altering to InnoDB format */
- innobase_commit(ht, user_thd, 1);
- /* Note that this transaction is still active. */
- trx_register_for_2pc(prebuilt->trx);
- /* We will need an IX lock on the destination table. */
- prebuilt->sql_stat_start = TRUE;
- } else {
-#ifdef WITH_WSREP
- if (wsrep_on(user_thd) &&
- wsrep_load_data_splitting &&
- sql_command == SQLCOM_LOAD &&
- !thd_test_options(user_thd,
- OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
- {
- switch (wsrep_run_wsrep_commit(user_thd, 1))
- {
- case WSREP_TRX_OK:
- break;
- case WSREP_TRX_SIZE_EXCEEDED:
- case WSREP_TRX_CERT_FAIL:
- case WSREP_TRX_ERROR:
- DBUG_RETURN(1);
- }
-
- if (binlog_hton->commit(binlog_hton, user_thd, 1))
- DBUG_RETURN(1);
- wsrep_post_commit(user_thd, TRUE);
- }
-#endif /* WITH_WSREP */
- /* Ensure that there are no other table locks than
- LOCK_IX and LOCK_AUTO_INC on the destination table. */
-
- if (!lock_is_table_exclusive(prebuilt->table,
- prebuilt->trx)) {
- goto no_commit;
- }
-
- /* Commit the transaction. This will release the table
- locks, so they have to be acquired again. */
- innobase_commit(ht, user_thd, 1);
- /* Note that this transaction is still active. */
- trx_register_for_2pc(prebuilt->trx);
- /* Re-acquire the table lock on the source table. */
- row_lock_table_for_mysql(prebuilt, src_table, mode);
- /* We will need an IX lock on the destination table. */
- prebuilt->sql_stat_start = TRUE;
- }
- }
-
- num_write_row++;
-
- /* This is the case where the table has an auto-increment column */
- if (table->next_number_field && record == table->record[0]) {
-
- /* Reset the error code before calling
- innobase_get_auto_increment(). */
- prebuilt->autoinc_error = DB_SUCCESS;
-
-#ifdef WITH_WSREP
- auto_inc_inserted= (table->next_number_field->val_int() == 0);
-#endif
-
- if ((error_result = update_auto_increment())) {
- /* We don't want to mask autoinc overflow errors. */
-
- /* Handle the case where the AUTOINC sub-system
- failed during initialization. */
- if (prebuilt->autoinc_error == DB_UNSUPPORTED) {
- error_result = ER_AUTOINC_READ_FAILED;
- /* Set the error message to report too. */
- my_error(ER_AUTOINC_READ_FAILED, MYF(0));
- goto func_exit;
- } else if (prebuilt->autoinc_error != DB_SUCCESS) {
- error = prebuilt->autoinc_error;
- goto report_error;
- }
-
- /* MySQL errors are passed straight back. except for
- ER_AUTOINC_READ_FAILED. This can only happen
- for values out of range.
- */
- goto func_exit;
- }
-
- auto_inc_used = TRUE;
- }
-
- if (prebuilt->mysql_template == NULL
- || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
-
- /* Build the template used in converting quickly between
- the two database formats */
-
- build_template(true);
- }
-
- innobase_srv_conc_enter_innodb(prebuilt->trx);
-
- error = row_insert_for_mysql((byte*) record, prebuilt);
- DEBUG_SYNC(user_thd, "ib_after_row_insert");
-
- /* Handle duplicate key errors */
- if (auto_inc_used) {
- ulonglong auto_inc;
- ulonglong col_max_value;
-
- /* Note the number of rows processed for this statement, used
- by get_auto_increment() to determine the number of AUTO-INC
- values to reserve. This is only useful for a mult-value INSERT
- and is a statement level counter.*/
- if (trx->n_autoinc_rows > 0) {
- --trx->n_autoinc_rows;
- }
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- /* Get the value that MySQL attempted to store in the table.*/
- auto_inc = table->next_number_field->val_int();
-
- switch (error) {
- case DB_DUPLICATE_KEY:
-
- /* A REPLACE command and LOAD DATA INFILE REPLACE
- handle a duplicate key error themselves, but we
- must update the autoinc counter if we are performing
- those statements. */
-
- switch (sql_command) {
- case SQLCOM_LOAD:
- if (trx->duplicates) {
-
- goto set_max_autoinc;
- }
- break;
-
- case SQLCOM_REPLACE:
- case SQLCOM_INSERT_SELECT:
- case SQLCOM_REPLACE_SELECT:
- goto set_max_autoinc;
-
-#ifdef WITH_WSREP
- /* workaround for LP bug #355000, retrying the insert */
- case SQLCOM_INSERT:
-
- WSREP_DEBUG("DUPKEY error for autoinc\n"
- "THD %ld, value %llu, off %llu inc %llu",
- thd_get_thread_id(current_thd),
- auto_inc,
- prebuilt->autoinc_offset,
- prebuilt->autoinc_increment);
-
- if (wsrep_on(current_thd) &&
- auto_inc_inserted &&
- wsrep_drupal_282555_workaround &&
- wsrep_thd_retry_counter(current_thd) == 0 &&
- !thd_test_options(current_thd,
- OPTION_NOT_AUTOCOMMIT |
- OPTION_BEGIN)) {
- WSREP_DEBUG(
- "retrying insert: %s",
- (*wsrep_thd_query(current_thd)) ?
- wsrep_thd_query(current_thd) :
- (char *)"void");
- error= DB_SUCCESS;
- wsrep_thd_set_conflict_state(
- current_thd, MUST_ABORT);
- innobase_srv_conc_exit_innodb(prebuilt->trx);
- /* jump straight to func exit over
- * later wsrep hooks */
- goto func_exit;
- }
- break;
-#endif /* WITH_WSREP */
-
- default:
- break;
- }
-
- break;
-
- case DB_SUCCESS:
- /* If the actual value inserted is greater than
- the upper limit of the interval, then we try and
- update the table upper limit. Note: last_value
- will be 0 if get_auto_increment() was not called.*/
-
- if (auto_inc >= prebuilt->autoinc_last_value) {
-set_max_autoinc:
- /* This should filter out the negative
- values set explicitly by the user. */
- if (auto_inc <= col_max_value) {
- ut_a(prebuilt->autoinc_increment > 0);
-
- ulonglong offset;
- ulonglong increment;
- dberr_t err;
-
- offset = prebuilt->autoinc_offset;
- increment = prebuilt->autoinc_increment;
-
- auto_inc = innobase_next_autoinc(
- auto_inc,
- 1, increment, offset,
- col_max_value);
-
- err = innobase_set_max_autoinc(
- auto_inc);
-
- if (err != DB_SUCCESS) {
- error = err;
- }
- }
- }
- break;
- default:
- break;
- }
- }
-
- innobase_srv_conc_exit_innodb(prebuilt->trx);
-
-report_error:
- if (error == DB_TABLESPACE_DELETED) {
- ib_senderrf(
- trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
- }
-
- error_result = convert_error_code_to_mysql(error,
- prebuilt->table->flags,
- user_thd);
-
-#ifdef WITH_WSREP
- if (!error_result &&
- wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
- wsrep_on(user_thd) &&
- !wsrep_consistency_check(user_thd) &&
- !wsrep_thd_ignore_table(user_thd))
- {
- if (wsrep_append_keys(user_thd, false, record, NULL))
- {
- DBUG_PRINT("wsrep", ("row key failed"));
- error_result = HA_ERR_INTERNAL_ERROR;
- goto wsrep_error;
- }
- }
-wsrep_error:
-#endif /* WITH_WSREP */
-
- if (error_result == HA_FTS_INVALID_DOCID) {
- my_error(HA_FTS_INVALID_DOCID, MYF(0));
- }
-
-func_exit:
- innobase_active_small();
-
- DBUG_RETURN(error_result);
-}
-
-/**********************************************************************//**
-Checks which fields have changed in a row and stores information
-of them to an update vector.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-calc_row_difference(
-/*================*/
- upd_t* uvect, /*!< in/out: update vector */
- uchar* old_row, /*!< in: old row in MySQL format */
- uchar* new_row, /*!< in: new row in MySQL format */
- TABLE* table, /*!< in: table in MySQL data
- dictionary */
- uchar* upd_buff, /*!< in: buffer to use */
- ulint buff_len, /*!< in: buffer length */
- row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */
- THD* thd) /*!< in: user thread */
-{
- uchar* original_upd_buff = upd_buff;
- Field* field;
- enum_field_types field_mysql_type;
- uint n_fields;
- ulint o_len;
- ulint n_len;
- ulint col_pack_len;
- const byte* new_mysql_row_col;
- const byte* o_ptr;
- const byte* n_ptr;
- byte* buf;
- upd_field_t* ufield;
- ulint col_type;
- ulint n_changed = 0;
- dfield_t dfield;
- dict_index_t* clust_index;
- uint sql_idx, innodb_idx= 0;
- ibool changes_fts_column = FALSE;
- ibool changes_fts_doc_col = FALSE;
- trx_t* trx = thd_to_trx(thd);
- doc_id_t doc_id = FTS_NULL_DOC_ID;
-
- ut_ad(!srv_read_only_mode);
-
- n_fields = table->s->fields;
- clust_index = dict_table_get_first_index(prebuilt->table);
-
- /* We use upd_buff to convert changed fields */
- buf = (byte*) upd_buff;
-
- for (sql_idx = 0; sql_idx < n_fields; sql_idx++) {
- field = table->field[sql_idx];
- if (!field->stored_in_db)
- continue;
-
- o_ptr = (const byte*) old_row + get_field_offset(table, field);
- n_ptr = (const byte*) new_row + get_field_offset(table, field);
-
- /* Use new_mysql_row_col and col_pack_len save the values */
-
- new_mysql_row_col = n_ptr;
- col_pack_len = field->pack_length();
-
- o_len = col_pack_len;
- n_len = col_pack_len;
-
- /* We use o_ptr and n_ptr to dig up the actual data for
- comparison. */
-
- field_mysql_type = field->type();
-
- col_type = prebuilt->table->cols[innodb_idx].mtype;
-
- switch (col_type) {
-
- case DATA_BLOB:
- /* Do not compress blob column while comparing*/
- o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
- n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
-
- break;
-
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_VARMYSQL:
- if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
- /* This is a >= 5.0.3 type true VARCHAR where
- the real payload data length is stored in
- 1 or 2 bytes */
-
- o_ptr = row_mysql_read_true_varchar(
- &o_len, o_ptr,
- (ulint)
- (((Field_varstring*) field)->length_bytes));
-
- n_ptr = row_mysql_read_true_varchar(
- &n_len, n_ptr,
- (ulint)
- (((Field_varstring*) field)->length_bytes));
- }
-
- break;
- default:
- ;
- }
-
- if (field_mysql_type == MYSQL_TYPE_LONGLONG
- && prebuilt->table->fts
- && innobase_strcasecmp(
- field->field_name.str, FTS_DOC_ID_COL_NAME) == 0) {
- doc_id = (doc_id_t) mach_read_from_n_little_endian(
- n_ptr, 8);
- if (doc_id == 0) {
- return(DB_FTS_INVALID_DOCID);
- }
- }
-
-
- if (field->real_maybe_null()) {
- if (field->is_null_in_record(old_row)) {
- o_len = UNIV_SQL_NULL;
- }
-
- if (field->is_null_in_record(new_row)) {
- n_len = UNIV_SQL_NULL;
- }
- }
-
- if (o_len != n_len || (o_len != 0 && o_len != UNIV_SQL_NULL
- && 0 != memcmp(o_ptr, n_ptr, o_len))) {
- /* The field has changed */
-
- ufield = uvect->fields + n_changed;
- UNIV_MEM_INVALID(ufield, sizeof *ufield);
-
- /* Let us use a dummy dfield to make the conversion
- from the MySQL column format to the InnoDB format */
-
- if (n_len != UNIV_SQL_NULL) {
- dict_col_copy_type(prebuilt->table->cols + innodb_idx,
- dfield_get_type(&dfield));
-
- buf = row_mysql_store_col_in_innobase_format(
- &dfield,
- (byte*) buf,
- TRUE,
- new_mysql_row_col,
- col_pack_len,
- dict_table_is_comp(prebuilt->table));
- dfield_copy(&ufield->new_val, &dfield);
- } else {
- dfield_set_null(&ufield->new_val);
- }
-
- ufield->exp = NULL;
- ufield->orig_len = 0;
- ufield->field_no = dict_col_get_clust_pos(
- &prebuilt->table->cols[innodb_idx], clust_index);
- n_changed++;
-
- /* If an FTS indexed column was changed by this
- UPDATE then we need to inform the FTS sub-system.
-
- NOTE: Currently we re-index all FTS indexed columns
- even if only a subset of the FTS indexed columns
- have been updated. That is the reason we are
- checking only once here. Later we will need to
- note which columns have been updated and do
- selective processing. */
- if (prebuilt->table->fts != NULL) {
- ulint offset;
- dict_table_t* innodb_table;
-
- innodb_table = prebuilt->table;
-
- if (!changes_fts_column) {
- offset = row_upd_changes_fts_column(
- innodb_table, ufield);
-
- if (offset != ULINT_UNDEFINED) {
- changes_fts_column = TRUE;
- }
- }
-
- if (!changes_fts_doc_col) {
- changes_fts_doc_col =
- row_upd_changes_doc_id(
- innodb_table, ufield);
- }
- }
- }
- if (field->stored_in_db)
- innodb_idx++;
- }
-
- /* If the update changes a column with an FTS index on it, we
- then add an update column node with a new document id to the
- other changes. We piggy back our changes on the normal UPDATE
- to reduce processing and IO overhead. */
- if (!prebuilt->table->fts) {
- trx->fts_next_doc_id = 0;
- } else if (changes_fts_column || changes_fts_doc_col) {
- dict_table_t* innodb_table = prebuilt->table;
-
- ufield = uvect->fields + n_changed;
-
- if (!DICT_TF2_FLAG_IS_SET(
- innodb_table, DICT_TF2_FTS_HAS_DOC_ID)) {
-
- /* If Doc ID is managed by user, and if any
- FTS indexed column has been updated, its corresponding
- Doc ID must also be updated. Otherwise, return
- error */
- if (changes_fts_column && !changes_fts_doc_col) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: A new Doc ID"
- " must be supplied while updating"
- " FTS indexed columns.\n");
- return(DB_FTS_INVALID_DOCID);
- }
-
- /* Doc ID must monotonically increase */
- ut_ad(innodb_table->fts->cache);
- if (doc_id < prebuilt->table->fts->cache->next_doc_id) {
- fprintf(stderr,
- "InnoDB: FTS Doc ID must be larger than"
- " " IB_ID_FMT " for table",
- innodb_table->fts->cache->next_doc_id
- - 1);
- ut_print_name(stderr, trx,
- TRUE, innodb_table->name);
- putc('\n', stderr);
-
- return(DB_FTS_INVALID_DOCID);
- } else if ((doc_id
- - prebuilt->table->fts->cache->next_doc_id)
- >= FTS_DOC_ID_MAX_STEP) {
- fprintf(stderr,
- "InnoDB: Doc ID " UINT64PF " is too"
- " big. Its difference with largest"
- " Doc ID used " UINT64PF " cannot"
- " exceed or equal to %d\n",
- doc_id,
- prebuilt->table->fts->cache->next_doc_id - 1,
- FTS_DOC_ID_MAX_STEP);
- }
-
-
- trx->fts_next_doc_id = doc_id;
- } else {
- /* If the Doc ID is a hidden column, it can't be
- changed by user */
- ut_ad(!changes_fts_doc_col);
-
- /* Doc ID column is hidden, a new Doc ID will be
- generated by following fts_update_doc_id() call */
- trx->fts_next_doc_id = 0;
- }
-
- fts_update_doc_id(
- innodb_table, ufield, &trx->fts_next_doc_id);
-
- ++n_changed;
- } else {
- /* We have a Doc ID column, but none of FTS indexed
- columns are touched, nor the Doc ID column, so set
- fts_next_doc_id to UINT64_UNDEFINED, which means do not
- update the Doc ID column */
- trx->fts_next_doc_id = UINT64_UNDEFINED;
- }
-
- uvect->n_fields = n_changed;
- uvect->info_bits = 0;
-
- ut_a(buf <= (byte*) original_upd_buff + buff_len);
-
- return(DB_SUCCESS);
-}
-
-#ifdef WITH_WSREP
-static
-int
-wsrep_calc_row_hash(
-/*================*/
- byte* digest, /*!< in/out: md5 sum */
- const uchar* row, /*!< in: row in MySQL format */
- TABLE* table, /*!< in: table in MySQL data
- dictionary */
- row_prebuilt_t* prebuilt, /*!< in: InnoDB prebuilt struct */
- THD* thd) /*!< in: user thread */
-{
- Field* field;
- enum_field_types field_mysql_type;
- uint n_fields;
- ulint len;
- const byte* ptr;
- ulint col_type;
- uint i;
-
- void *ctx = alloca(my_md5_context_size());
- my_md5_init(ctx);
-
- n_fields = table->s->fields;
-
- for (i = 0; i < n_fields; i++) {
- byte null_byte=0;
- byte true_byte=1;
-
- field = table->field[i];
-
- ptr = (const byte*) row + get_field_offset(table, field);
- len = field->pack_length();
-
- field_mysql_type = field->type();
-
- col_type = prebuilt->table->cols[i].mtype;
-
- switch (col_type) {
-
- case DATA_BLOB:
- ptr = row_mysql_read_blob_ref(&len, ptr, len);
- break;
-
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_VARMYSQL:
- if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
- /* This is a >= 5.0.3 type true VARCHAR where
- the real payload data length is stored in
- 1 or 2 bytes */
-
- ptr = row_mysql_read_true_varchar(
- &len, ptr,
- (ulint)
- (((Field_varstring*)field)->length_bytes));
-
- }
-
- break;
- default:
- ;
- }
- /*
- if (field->null_ptr &&
- field_in_record_is_null(table, field, (char*) row)) {
- */
-
- if (field->is_null_in_record(row)) {
- my_md5_input(ctx, &null_byte, 1);
- } else {
- my_md5_input(ctx, &true_byte, 1);
- my_md5_input(ctx, ptr, len);
- }
- }
-
- my_md5_result(ctx, digest);
-
- return(0);
-}
-#endif /* WITH_WSREP */
-/**********************************************************************//**
-Updates a row given as a parameter to a new value. Note that we are given
-whole rows, not just the fields which are updated: this incurs some
-overhead for CPU when we check which fields are actually updated.
-TODO: currently InnoDB does not prevent the 'Halloween problem':
-in a searched update a single row can get updated several times
-if its index columns are updated!
-@return error number or 0 */
-UNIV_INTERN
-int
-ha_innobase::update_row(
-/*====================*/
- const uchar* old_row, /*!< in: old row in MySQL format */
- const uchar* new_row) /*!< in: new row in MySQL format */
-{
- upd_t* uvect;
- dberr_t error;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::update_row");
-
- ut_a(prebuilt->trx == trx);
-
- if (high_level_read_only) {
- ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- } else if (!trx_is_started(trx)) {
- ++trx->will_lock;
- }
-
- if (upd_buf == NULL) {
- ut_ad(upd_buf_size == 0);
-
- /* Create a buffer for packing the fields of a record. Why
- table->stored_rec_length did not work here? Obviously, because char
- fields when packed actually became 1 byte longer, when we also
- stored the string length as the first byte. */
-
- upd_buf_size = table->s->stored_rec_length + table->s->max_key_length
- + MAX_REF_PARTS * 3;
- upd_buf = (uchar*) my_malloc(upd_buf_size, MYF(MY_WME));
- if (upd_buf == NULL) {
- upd_buf_size = 0;
- DBUG_RETURN(HA_ERR_OUT_OF_MEM);
- }
- }
-
- ha_statistic_increment(&SSV::ha_update_count);
-
- if (prebuilt->upd_node) {
- uvect = prebuilt->upd_node->update;
- } else {
- uvect = row_get_prebuilt_update_vector(prebuilt);
- }
-
- /* Build an update vector from the modified fields in the rows
- (uses upd_buf of the handle) */
-
- error = calc_row_difference(uvect, (uchar*) old_row, new_row, table,
- upd_buf, upd_buf_size, prebuilt, user_thd);
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- /* This is not a delete */
- prebuilt->upd_node->is_delete = FALSE;
-
- ut_a(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
-
- innobase_srv_conc_enter_innodb(trx);
-
- error = row_update_for_mysql((byte*) old_row, prebuilt);
-
- /* We need to do some special AUTOINC handling for the following case:
-
- INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...
-
- We need to use the AUTOINC counter that was actually used by
- MySQL in the UPDATE statement, which can be different from the
- value used in the INSERT statement.*/
-
- if (error == DB_SUCCESS
- && table->next_number_field
- && new_row == table->record[0]
- && thd_sql_command(user_thd) == SQLCOM_INSERT
- && trx->duplicates) {
-
- ulonglong auto_inc;
- ulonglong col_max_value;
-
- auto_inc = table->next_number_field->val_int();
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- if (auto_inc <= col_max_value && auto_inc != 0) {
-
- ulonglong offset;
- ulonglong increment;
-
- offset = prebuilt->autoinc_offset;
- increment = prebuilt->autoinc_increment;
-
- auto_inc = innobase_next_autoinc(
- auto_inc, 1, increment, offset, col_max_value);
-
- error = innobase_set_max_autoinc(auto_inc);
- }
- }
-
- innobase_srv_conc_exit_innodb(trx);
-
-func_exit:
- int err = convert_error_code_to_mysql(error,
- prebuilt->table->flags, user_thd);
-
- /* If success and no columns were updated. */
- if (err == 0 && uvect->n_fields == 0) {
-
- /* This is the same as success, but instructs
- MySQL that the row is not really updated and it
- should not increase the count of updated rows.
- This is fix for http://bugs.mysql.com/29157 */
- err = HA_ERR_RECORD_IS_THE_SAME;
- } else if (err == HA_FTS_INVALID_DOCID) {
- my_error(HA_FTS_INVALID_DOCID, MYF(0));
- }
-
- /* Tell InnoDB server that there might be work for
- utility threads: */
-
- innobase_active_small();
-
-#ifdef WITH_WSREP
- if (error == DB_SUCCESS &&
- wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
- wsrep_on(user_thd) &&
- !wsrep_thd_ignore_table(user_thd))
- {
- DBUG_PRINT("wsrep", ("update row key"));
-
- if (wsrep_append_keys(user_thd, false, old_row, new_row)) {
- WSREP_DEBUG("WSREP: UPDATE_ROW_KEY FAILED");
- DBUG_PRINT("wsrep", ("row key failed"));
- err = HA_ERR_INTERNAL_ERROR;
- goto wsrep_error;
- }
- }
-wsrep_error:
-#endif /* WITH_WSREP */
-
- DBUG_RETURN(err);
-}
-
-/**********************************************************************//**
-Deletes a row given as the parameter.
-@return error number or 0 */
-UNIV_INTERN
-int
-ha_innobase::delete_row(
-/*====================*/
- const uchar* record) /*!< in: a row in MySQL format */
-{
- dberr_t error;
- trx_t* trx = thd_to_trx(user_thd);
-
- DBUG_ENTER("ha_innobase::delete_row");
-
- ut_a(prebuilt->trx == trx);
-
- if (high_level_read_only) {
- ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- } else if (!trx_is_started(trx)) {
- ++trx->will_lock;
- }
-
- ha_statistic_increment(&SSV::ha_delete_count);
-
- if (!prebuilt->upd_node) {
- row_get_prebuilt_update_vector(prebuilt);
- }
-
- /* This is a delete */
-
- prebuilt->upd_node->is_delete = TRUE;
-
- innobase_srv_conc_enter_innodb(trx);
-
- error = row_update_for_mysql((byte*) record, prebuilt);
-
- innobase_srv_conc_exit_innodb(trx);
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- innobase_active_small();
-
-#ifdef WITH_WSREP
- if (error == DB_SUCCESS &&
- wsrep_thd_exec_mode(user_thd) == LOCAL_STATE &&
- wsrep_on(user_thd) &&
- !wsrep_thd_ignore_table(user_thd))
- {
- if (wsrep_append_keys(user_thd, false, record, NULL)) {
- DBUG_PRINT("wsrep", ("delete fail"));
- error = (dberr_t) HA_ERR_INTERNAL_ERROR;
- goto wsrep_error;
- }
- }
-wsrep_error:
-#endif /* WITH_WSREP */
-
- DBUG_RETURN(convert_error_code_to_mysql(
- error, prebuilt->table->flags, user_thd));
-}
-
-/**********************************************************************//**
-Removes a new lock set on a row, if it was not read optimistically. This can
-be called after a row has been read in the processing of an UPDATE or a DELETE
-query, if the option innodb_locks_unsafe_for_binlog is set. */
-UNIV_INTERN
-void
-ha_innobase::unlock_row(void)
-/*=========================*/
-{
- DBUG_ENTER("ha_innobase::unlock_row");
-
- /* Consistent read does not take any locks, thus there is
- nothing to unlock. */
-
- if (prebuilt->select_lock_type == LOCK_NONE) {
- DBUG_VOID_RETURN;
- }
-
- /* Ideally, this assert must be in the beginning of the function.
- But there are some calls to this function from the SQL layer when the
- transaction is in state TRX_STATE_NOT_STARTED. The check on
- prebuilt->select_lock_type above gets around this issue. */
- ut_ad(trx_state_eq(prebuilt->trx, TRX_STATE_ACTIVE));
-
- switch (prebuilt->row_read_type) {
- case ROW_READ_WITH_LOCKS:
- if (!srv_locks_unsafe_for_binlog
- && prebuilt->trx->isolation_level
- > TRX_ISO_READ_COMMITTED) {
- break;
- }
- /* fall through */
- case ROW_READ_TRY_SEMI_CONSISTENT:
- row_unlock_for_mysql(prebuilt, FALSE);
- break;
- case ROW_READ_DID_SEMI_CONSISTENT:
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- break;
- }
-
- DBUG_VOID_RETURN;
-}
-
-/* See handler.h and row0mysql.h for docs on this function. */
-UNIV_INTERN
-bool
-ha_innobase::was_semi_consistent_read(void)
-/*=======================================*/
-{
- return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
-}
-
-/* See handler.h and row0mysql.h for docs on this function. */
-UNIV_INTERN
-void
-ha_innobase::try_semi_consistent_read(bool yes)
-/*===========================================*/
-{
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- /* Row read type is set to semi consistent read if this was
- requested by the MySQL and either innodb_locks_unsafe_for_binlog
- option is used or this session is using READ COMMITTED isolation
- level. */
-
- if (yes
- && (srv_locks_unsafe_for_binlog
- || prebuilt->trx->isolation_level <= TRX_ISO_READ_COMMITTED)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- } else {
- prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
- }
-}
-
-/******************************************************************//**
-Initializes a handle to use an index.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::index_init(
-/*====================*/
- uint keynr, /*!< in: key (index) number */
- bool sorted) /*!< in: 1 if result MUST be sorted according to index */
-{
- DBUG_ENTER("index_init");
-
- DBUG_RETURN(change_active_index(keynr));
-}
-
-/******************************************************************//**
-Currently does nothing.
-@return 0 */
-UNIV_INTERN
-int
-ha_innobase::index_end(void)
-/*========================*/
-{
- int error = 0;
- DBUG_ENTER("index_end");
- active_index = MAX_KEY;
- in_range_check_pushed_down = FALSE;
- ds_mrr.dsmrr_close();
- DBUG_RETURN(error);
-}
-
-/*********************************************************************//**
-Converts a search mode flag understood by MySQL to a flag understood
-by InnoDB. */
-static inline
-ulint
-convert_search_mode_to_innobase(
-/*============================*/
- enum ha_rkey_function find_flag)
-{
- switch (find_flag) {
- case HA_READ_KEY_EXACT:
- /* this does not require the index to be UNIQUE */
- return(PAGE_CUR_GE);
- case HA_READ_KEY_OR_NEXT:
- return(PAGE_CUR_GE);
- case HA_READ_KEY_OR_PREV:
- return(PAGE_CUR_LE);
- case HA_READ_AFTER_KEY:
- return(PAGE_CUR_G);
- case HA_READ_BEFORE_KEY:
- return(PAGE_CUR_L);
- case HA_READ_PREFIX:
- return(PAGE_CUR_GE);
- case HA_READ_PREFIX_LAST:
- return(PAGE_CUR_LE);
- case HA_READ_PREFIX_LAST_OR_PREV:
- return(PAGE_CUR_LE);
- /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
- pass a complete-field prefix of a key value as the search
- tuple. I.e., it is not allowed that the last field would
- just contain n first bytes of the full field value.
- MySQL uses a 'padding' trick to convert LIKE 'abc%'
- type queries so that it can use as a search tuple
- a complete-field-prefix of a key value. Thus, the InnoDB
- search mode PAGE_CUR_LE_OR_EXTENDS is never used.
- TODO: when/if MySQL starts to use also partial-field
- prefixes, we have to deal with stripping of spaces
- and comparison of non-latin1 char type fields in
- innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
- work correctly. */
- case HA_READ_MBR_CONTAIN:
- case HA_READ_MBR_INTERSECT:
- case HA_READ_MBR_WITHIN:
- case HA_READ_MBR_DISJOINT:
- case HA_READ_MBR_EQUAL:
- return(PAGE_CUR_UNSUPP);
- /* do not use "default:" in order to produce a gcc warning:
- enumeration value '...' not handled in switch
- (if -Wswitch or -Wall is used) */
- }
-
- my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality");
-
- return(PAGE_CUR_UNSUPP);
-}
-
-/*
- BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
- ---------------------------------------------------
-The following does not cover all the details, but explains how we determine
-the start of a new SQL statement, and what is associated with it.
-
-For each table in the database the MySQL interpreter may have several
-table handle instances in use, also in a single SQL query. For each table
-handle instance there is an InnoDB 'prebuilt' struct which contains most
-of the InnoDB data associated with this table handle instance.
-
- A) if the user has not explicitly set any MySQL table level locks:
-
- 1) MySQL calls ::external_lock to set an 'intention' table level lock on
-the table of the handle instance. There we set
-prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
-true if we are taking this table handle instance to use in a new SQL
-statement issued by the user. We also increment trx->n_mysql_tables_in_use.
-
- 2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
-instructions to prebuilt->template of the table handle instance in
-::index_read. The template is used to save CPU time in large joins.
-
- 3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
-allocate a new consistent read view for the trx if it does not yet have one,
-or in the case of a locking read, set an InnoDB 'intention' table level
-lock on the table.
-
- 4) We do the SELECT. MySQL may repeatedly call ::index_read for the
-same table handle instance, if it is a join.
-
- 5) When the SELECT ends, MySQL removes its intention table level locks
-in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
- (a) we execute a COMMIT there if the autocommit is on,
- (b) we also release possible 'SQL statement level resources' InnoDB may
-have for this SQL statement. The MySQL interpreter does NOT execute
-autocommit for pure read transactions, though it should. That is why the
-table handler in that case has to execute the COMMIT in ::external_lock.
-
- B) If the user has explicitly set MySQL table level locks, then MySQL
-does NOT call ::external_lock at the start of the statement. To determine
-when we are at the start of a new SQL statement we at the start of
-::index_read also compare the query id to the latest query id where the
-table handle instance was used. If it has changed, we know we are at the
-start of a new SQL statement. Since the query id can theoretically
-overwrap, we use this test only as a secondary way of determining the
-start of a new SQL statement. */
-
-
-/**********************************************************************//**
-Positions an index cursor to the index specified in the handle. Fetches the
-row if any.
-@return 0, HA_ERR_KEY_NOT_FOUND, or error number */
-UNIV_INTERN
-int
-ha_innobase::index_read(
-/*====================*/
- uchar* buf, /*!< in/out: buffer for the returned
- row */
- const uchar* key_ptr, /*!< in: key value; if this is NULL
- we position the cursor at the
- start or end of index; this can
- also contain an InnoDB row id, in
- which case key_len is the InnoDB
- row id length; the key value can
- also be a prefix of a full key value,
- and the last column can be a prefix
- of a full column */
- uint key_len,/*!< in: key value length */
- enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
-{
- ulint mode;
- dict_index_t* index;
- ulint match_mode = 0;
- int error;
- dberr_t ret;
-
- DBUG_ENTER("index_read");
- DEBUG_SYNC_C("ha_innobase_index_read_begin");
-
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
- ut_ad(key_len != 0 || find_flag != HA_READ_KEY_EXACT);
-
- ha_statistic_increment(&SSV::ha_read_key_count);
-
- index = prebuilt->index;
-
- if (UNIV_UNLIKELY(index == NULL) || dict_index_is_corrupted(index)) {
- prebuilt->index_usable = FALSE;
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- DBUG_RETURN(dict_index_is_corrupted(index)
- ? HA_ERR_INDEX_CORRUPT
- : HA_ERR_TABLE_DEF_CHANGED);
- }
-
- if (index->type & DICT_FTS) {
- DBUG_RETURN(HA_ERR_KEY_NOT_FOUND);
- }
-
- /* Note that if the index for which the search template is built is not
- necessarily prebuilt->index, but can also be the clustered index */
-
- if (prebuilt->sql_stat_start) {
- build_template(false);
- }
-
- if (key_ptr) {
- /* Convert the search key value to InnoDB format into
- prebuilt->search_tuple */
-
- row_sel_convert_mysql_key_to_innobase(
- prebuilt->search_tuple,
- prebuilt->srch_key_val1,
- prebuilt->srch_key_val_len,
- index,
- (byte*) key_ptr,
- (ulint) key_len,
- prebuilt->trx);
- DBUG_ASSERT(prebuilt->search_tuple->n_fields > 0);
- } else {
- /* We position the cursor to the last or the first entry
- in the index */
-
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
- }
-
- mode = convert_search_mode_to_innobase(find_flag);
-
- match_mode = 0;
-
- if (find_flag == HA_READ_KEY_EXACT) {
-
- match_mode = ROW_SEL_EXACT;
-
- } else if (find_flag == HA_READ_PREFIX
- || find_flag == HA_READ_PREFIX_LAST) {
-
- match_mode = ROW_SEL_EXACT_PREFIX;
- }
-
- last_match_mode = (uint) match_mode;
-
- if (mode != PAGE_CUR_UNSUPP) {
-
- innobase_srv_conc_enter_innodb(prebuilt->trx);
-
- ret = row_search_for_mysql((byte*) buf, mode, prebuilt,
- match_mode, 0);
-
- innobase_srv_conc_exit_innodb(prebuilt->trx);
- } else {
-
- ret = DB_UNSUPPORTED;
- }
-
- switch (ret) {
- case DB_SUCCESS:
- error = 0;
- table->status = 0;
- if (prebuilt->table->is_system_db) {
- srv_stats.n_system_rows_read.add(
- (size_t) prebuilt->trx->id, 1);
- } else {
- srv_stats.n_rows_read.add(
- (size_t) prebuilt->trx->id, 1);
- }
- break;
- case DB_RECORD_NOT_FOUND:
- error = HA_ERR_KEY_NOT_FOUND;
- table->status = STATUS_NOT_FOUND;
- break;
- case DB_END_OF_INDEX:
- error = HA_ERR_KEY_NOT_FOUND;
- table->status = STATUS_NOT_FOUND;
- break;
- case DB_TABLESPACE_DELETED:
-
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
-
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
- case DB_TABLESPACE_NOT_FOUND:
-
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_MISSING, MYF(0),
- table->s->table_name.str);
-
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
- default:
- error = convert_error_code_to_mysql(
- ret, prebuilt->table->flags, user_thd);
-
- table->status = STATUS_NOT_FOUND;
- break;
- }
-
- DBUG_RETURN(error);
-}
-
-/*******************************************************************//**
-The following functions works like index_read, but it find the last
-row with the current key value or prefix.
-@return 0, HA_ERR_KEY_NOT_FOUND, or an error code */
-UNIV_INTERN
-int
-ha_innobase::index_read_last(
-/*=========================*/
- uchar* buf, /*!< out: fetched row */
- const uchar* key_ptr,/*!< in: key value, or a prefix of a full
- key value */
- uint key_len)/*!< in: length of the key val or prefix
- in bytes */
-{
- return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
-}
-
-/********************************************************************//**
-Get the index for a handle. Does not change active index.
-@return NULL or index instance. */
-UNIV_INTERN
-dict_index_t*
-ha_innobase::innobase_get_index(
-/*============================*/
- uint keynr) /*!< in: use this index; MAX_KEY means always
- clustered index, even if it was internally
- generated by InnoDB */
-{
- KEY* key = 0;
- dict_index_t* index = 0;
-
- DBUG_ENTER("innobase_get_index");
-
- if (keynr != MAX_KEY && table->s->keys > 0) {
- key = table->key_info + keynr;
-
- index = innobase_index_lookup(share, keynr);
-
- if (index) {
-
- if (!key || ut_strcmp(index->name, key->name) != 0) {
- fprintf(stderr, "InnoDB: [Error] Index for key no %u"
- " mysql name %s , InnoDB name %s for table %s\n",
- keynr, key ? key->name : "NULL",
- index->name,
- prebuilt->table->name);
-
- for(ulint i=0; i < table->s->keys; i++) {
- index = innobase_index_lookup(share, i);
- key = table->key_info + keynr;
-
- if (index) {
-
- fprintf(stderr, "InnoDB: [Note] Index for key no %u"
- " mysql name %s , InnoDB name %s for table %s\n",
- keynr, key ? key->name : "NULL",
- index->name,
- prebuilt->table->name);
- }
- }
- }
-
- ut_a(ut_strcmp(index->name, key->name) == 0);
- } else {
- /* Can't find index with keynr in the translation
- table. Only print message if the index translation
- table exists */
- if (share->idx_trans_tbl.index_mapping) {
- sql_print_warning("InnoDB could not find "
- "index %s key no %u for "
- "table %s through its "
- "index translation table",
- key ? key->name : "NULL",
- keynr,
- prebuilt->table->name);
- }
-
- index = dict_table_get_index_on_name(prebuilt->table,
- key->name);
- }
- } else {
- index = dict_table_get_first_index(prebuilt->table);
- }
-
- if (!index) {
- sql_print_error(
- "Innodb could not find key n:o %u with name %s "
- "from dict cache for table %s",
- keynr, key ? key->name : "NULL",
- prebuilt->table->name);
- }
-
- DBUG_RETURN(index);
-}
-
-/********************************************************************//**
-Changes the active index of a handle.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::change_active_index(
-/*=============================*/
- uint keynr) /*!< in: use this index; MAX_KEY means always clustered
- index, even if it was internally generated by
- InnoDB */
-{
- DBUG_ENTER("change_active_index");
-
- ut_ad(user_thd == ha_thd());
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- active_index = keynr;
-
- prebuilt->index = innobase_get_index(keynr);
-
- if (UNIV_UNLIKELY(!prebuilt->index)) {
- sql_print_warning("InnoDB: change_active_index(%u) failed",
- keynr);
- prebuilt->index_usable = FALSE;
- DBUG_RETURN(1);
- }
-
- prebuilt->index_usable = row_merge_is_index_usable(prebuilt->trx,
- prebuilt->index);
-
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- if (dict_index_is_corrupted(prebuilt->index)) {
- char index_name[MAX_FULL_NAME_LEN + 1];
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof index_name,
- prebuilt->index->name, TRUE);
-
- innobase_format_name(
- table_name, sizeof table_name,
- prebuilt->index->table->name, FALSE);
-
- push_warning_printf(
- user_thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_INDEX_CORRUPT,
- "InnoDB: Index %s for table %s is"
- " marked as corrupted",
- index_name, table_name);
- DBUG_RETURN(HA_ERR_INDEX_CORRUPT);
- } else {
- push_warning_printf(
- user_thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_TABLE_DEF_CHANGED,
- "InnoDB: insufficient history for index %u",
- keynr);
- }
-
- /* The caller seems to ignore this. Thus, we must check
- this again in row_search_for_mysql(). */
- DBUG_RETURN(convert_error_code_to_mysql(DB_MISSING_HISTORY,
- 0, NULL));
- }
-
- ut_a(prebuilt->search_tuple != 0);
-
- dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
-
- dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
- prebuilt->index->n_fields);
-
- /* MySQL changes the active index for a handle also during some
- queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
- and then calculates the sum. Previously we played safe and used
- the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
- copying. Starting from MySQL-4.1 we use a more efficient flag here. */
-
- build_template(false);
-
- DBUG_RETURN(0);
-}
-
-/**********************************************************************//**
-Positions an index cursor to the index specified in keynr. Fetches the
-row if any.
-??? This is only used to read whole keys ???
-@return error number or 0 */
-UNIV_INTERN
-int
-ha_innobase::index_read_idx(
-/*========================*/
- uchar* buf, /*!< in/out: buffer for the returned
- row */
- uint keynr, /*!< in: use this index */
- const uchar* key, /*!< in: key value; if this is NULL
- we position the cursor at the
- start or end of index */
- uint key_len, /*!< in: key value length */
- enum ha_rkey_function find_flag)/*!< in: search flags from my_base.h */
-{
- if (change_active_index(keynr)) {
-
- return(1);
- }
-
- return(index_read(buf, key, key_len, find_flag));
-}
-
-/***********************************************************************//**
-Reads the next or previous row from a cursor, which must have previously been
-positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::general_fetch(
-/*=======================*/
- uchar* buf, /*!< in/out: buffer for next row in MySQL
- format */
- uint direction, /*!< in: ROW_SEL_NEXT or ROW_SEL_PREV */
- uint match_mode) /*!< in: 0, ROW_SEL_EXACT, or
- ROW_SEL_EXACT_PREFIX */
-{
- dberr_t ret;
- int error;
-
- DBUG_ENTER("general_fetch");
-
- /* If transaction is not startted do not continue, instead return a error code. */
- if(!(prebuilt->sql_stat_start || (prebuilt->trx && prebuilt->trx->state == 1))) {
- DBUG_RETURN(HA_ERR_END_OF_FILE);
- }
-
- ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
- if (prebuilt->table->is_readable()) {
- } else {
- if (prebuilt->table->corrupted) {
- DBUG_RETURN(HA_ERR_CRASHED);
- } else {
- FilSpace space(prebuilt->table->space, true);
-
- if (space()) {
- DBUG_RETURN(HA_ERR_DECRYPTION_FAILED);
- } else {
- DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
- }
- }
- }
-
- innobase_srv_conc_enter_innodb(prebuilt->trx);
-
- ret = row_search_for_mysql(
- (byte*) buf, 0, prebuilt, match_mode, direction);
-
- innobase_srv_conc_exit_innodb(prebuilt->trx);
-
- switch (ret) {
- case DB_SUCCESS:
- error = 0;
- table->status = 0;
- srv_stats.n_rows_read.add((size_t) prebuilt->trx->id, 1);
- break;
- case DB_RECORD_NOT_FOUND:
- error = HA_ERR_END_OF_FILE;
- table->status = STATUS_NOT_FOUND;
- break;
- case DB_END_OF_INDEX:
- error = HA_ERR_END_OF_FILE;
- table->status = STATUS_NOT_FOUND;
- break;
- case DB_TABLESPACE_DELETED:
-
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
-
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
- case DB_TABLESPACE_NOT_FOUND:
-
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_MISSING,
- table->s->table_name.str);
-
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
- default:
- error = convert_error_code_to_mysql(
- ret, prebuilt->table->flags, user_thd);
-
- table->status = STATUS_NOT_FOUND;
- break;
- }
-
- DBUG_RETURN(error);
-}
-
-/***********************************************************************//**
-Reads the next row from a cursor, which must have previously been
-positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::index_next(
-/*====================*/
- uchar* buf) /*!< in/out: buffer for next row in MySQL
- format */
-{
- return(general_fetch(buf, ROW_SEL_NEXT, 0));
-}
-
-/*******************************************************************//**
-Reads the next row matching to the key value given as the parameter.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::index_next_same(
-/*=========================*/
- uchar* buf, /*!< in/out: buffer for the row */
- const uchar* key, /*!< in: key value */
- uint keylen) /*!< in: key value length */
-{
- return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
-}
-
-/***********************************************************************//**
-Reads the previous row from a cursor, which must have previously been
-positioned using index_read.
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::index_prev(
-/*====================*/
- uchar* buf) /*!< in/out: buffer for previous row in MySQL format */
-{
- return(general_fetch(buf, ROW_SEL_PREV, 0));
-}
-
-/********************************************************************//**
-Positions a cursor on the first record in an index and reads the
-corresponding row to buf.
-@return 0, HA_ERR_END_OF_FILE, or error code */
-UNIV_INTERN
-int
-ha_innobase::index_first(
-/*=====================*/
- uchar* buf) /*!< in/out: buffer for the row */
-{
- int error;
-
- DBUG_ENTER("index_first");
-
- error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
-
- /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
-
- DBUG_RETURN(error);
-}
-
-/********************************************************************//**
-Positions a cursor on the last record in an index and reads the
-corresponding row to buf.
-@return 0, HA_ERR_END_OF_FILE, or error code */
-UNIV_INTERN
-int
-ha_innobase::index_last(
-/*====================*/
- uchar* buf) /*!< in/out: buffer for the row */
-{
- int error;
-
- DBUG_ENTER("index_last");
-
- error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
-
- /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
-
- DBUG_RETURN(error);
-}
-
-/****************************************************************//**
-Initialize a table scan.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::rnd_init(
-/*==================*/
- bool scan) /*!< in: TRUE if table/index scan FALSE otherwise */
-{
- int err;
-
- /* Store the active index value so that we can restore the original
- value after a scan */
-
- if (prebuilt->clust_index_was_generated) {
- err = change_active_index(MAX_KEY);
- } else {
- err = change_active_index(primary_key);
- }
-
- /* Don't use semi-consistent read in random row reads (by position).
- This means we must disable semi_consistent_read if scan is false */
-
- if (!scan) {
- try_semi_consistent_read(0);
- }
-
- start_of_scan = 1;
-
- return(err);
-}
-
-/*****************************************************************//**
-Ends a table scan.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::rnd_end(void)
-/*======================*/
-{
- return(index_end());
-}
-
-/*****************************************************************//**
-Reads the next row in a table scan (also used to read the FIRST row
-in a table scan).
-@return 0, HA_ERR_END_OF_FILE, or error number */
-UNIV_INTERN
-int
-ha_innobase::rnd_next(
-/*==================*/
- uchar* buf) /*!< in/out: returns the row in this buffer,
- in MySQL format */
-{
- int error;
-
- DBUG_ENTER("rnd_next");
-
- if (start_of_scan) {
- error = index_first(buf);
-
- if (error == HA_ERR_KEY_NOT_FOUND) {
- error = HA_ERR_END_OF_FILE;
- }
-
- start_of_scan = 0;
- } else {
- error = general_fetch(buf, ROW_SEL_NEXT, 0);
- }
-
- DBUG_RETURN(error);
-}
-
-/**********************************************************************//**
-Fetches a row from the table based on a row reference.
-@return 0, HA_ERR_KEY_NOT_FOUND, or error code */
-UNIV_INTERN
-int
-ha_innobase::rnd_pos(
-/*=================*/
- uchar* buf, /*!< in/out: buffer for the row */
- uchar* pos) /*!< in: primary key value of the row in the
- MySQL format, or the row id if the clustered
- index was internally generated by InnoDB; the
- length of data in pos has to be ref_length */
-{
- int error;
- DBUG_ENTER("rnd_pos");
- DBUG_DUMP("key", pos, ref_length);
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- /* Note that we assume the length of the row reference is fixed
- for the table, and it is == ref_length */
-
- error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
-
- if (error) {
- DBUG_PRINT("error", ("Got error: %d", error));
- }
-
- DBUG_RETURN(error);
-}
-
-/**********************************************************************//**
-Initialize FT index scan
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::ft_init()
-/*==================*/
-{
- DBUG_ENTER("ft_init");
-
- trx_t* trx = check_trx_exists(ha_thd());
-
- /* FTS queries are not treated as autocommit non-locking selects.
- This is because the FTS implementation can acquire locks behind
- the scenes. This has not been verified but it is safer to treat
- them as regular read only transactions for now. */
-
- if (!trx_is_started(trx)) {
- ++trx->will_lock;
- }
-
- DBUG_RETURN(rnd_init(false));
-}
-
-/**********************************************************************//**
-Initialize FT index scan
-@return FT_INFO structure if successful or NULL */
-UNIV_INTERN
-FT_INFO*
-ha_innobase::ft_init_ext(
-/*=====================*/
- uint flags, /* in: */
- uint keynr, /* in: */
- String* key) /* in: */
-{
- trx_t* trx;
- dict_table_t* ft_table;
- dberr_t error;
- byte* query = (byte*) key->ptr();
- ulint query_len = key->length();
- const CHARSET_INFO* char_set = key->charset();
- NEW_FT_INFO* fts_hdl = NULL;
- dict_index_t* index;
- fts_result_t* result;
- char buf_tmp[8192];
- ulint buf_tmp_used;
- uint num_errors;
-
- if (fts_enable_diag_print) {
- fprintf(stderr, "keynr=%u, '%.*s'\n",
- keynr, (int) key->length(), (byte*) key->ptr());
-
- if (flags & FT_BOOL) {
- fprintf(stderr, "BOOL search\n");
- } else {
- fprintf(stderr, "NL search\n");
- }
- }
-
- /* FIXME: utf32 and utf16 are not compatible with some
- string function used. So to convert them to uft8 before
- proceed. */
- if (strcmp(char_set->csname, "utf32") == 0
- || strcmp(char_set->csname, "utf16") == 0) {
- buf_tmp_used = innobase_convert_string(
- buf_tmp, sizeof(buf_tmp) - 1,
- &my_charset_utf8_general_ci,
- query, query_len, (CHARSET_INFO*) char_set,
- &num_errors);
-
- query = (byte*) buf_tmp;
- query_len = buf_tmp_used;
- query[query_len] = 0;
- }
-
- trx = prebuilt->trx;
-
- /* FTS queries are not treated as autocommit non-locking selects.
- This is because the FTS implementation can acquire locks behind
- the scenes. This has not been verified but it is safer to treat
- them as regular read only transactions for now. */
-
- if (!trx_is_started(trx)) {
- ++trx->will_lock;
- }
-
- ft_table = prebuilt->table;
-
- /* Table does not have an FTS index */
- if (!ft_table->fts || ib_vector_is_empty(ft_table->fts->indexes)) {
- my_error(ER_TABLE_HAS_NO_FT, MYF(0));
- return(NULL);
- }
-
- /* If tablespace is discarded, we should return here */
- if (dict_table_is_discarded(ft_table)) {
- my_error(ER_NO_SUCH_TABLE, MYF(0), table->s->db.str,
- table->s->table_name.str);
- return(NULL);
- }
-
- if (keynr == NO_SUCH_KEY) {
- /* FIXME: Investigate the NO_SUCH_KEY usage */
- index = (dict_index_t*) ib_vector_getp(ft_table->fts->indexes, 0);
- } else {
- index = innobase_get_index(keynr);
- }
-
- if (!index || index->type != DICT_FTS) {
- my_error(ER_TABLE_HAS_NO_FT, MYF(0));
- return(NULL);
- }
-
- if (!(ft_table->fts->fts_status & ADDED_TABLE_SYNCED)) {
- fts_init_index(ft_table, FALSE);
-
- ft_table->fts->fts_status |= ADDED_TABLE_SYNCED;
- }
-
- error = fts_query(trx, index, flags, query, query_len, &result);
-
- if (error != DB_SUCCESS) {
- my_error(convert_error_code_to_mysql(error, 0, NULL),
- MYF(0));
- return(NULL);
- }
-
- /* Allocate FTS handler, and instantiate it before return */
- fts_hdl = static_cast<NEW_FT_INFO*>(my_malloc(sizeof(NEW_FT_INFO),
- MYF(0)));
-
- fts_hdl->please = const_cast<_ft_vft*>(&ft_vft_result);
- fts_hdl->could_you = const_cast<_ft_vft_ext*>(&ft_vft_ext_result);
- fts_hdl->ft_prebuilt = prebuilt;
- fts_hdl->ft_result = result;
-
- /* FIXME: Re-evluate the condition when Bug 14469540
- is resolved */
- prebuilt->in_fts_query = true;
-
- return((FT_INFO*) fts_hdl);
-}
-
-/*****************************************************************//**
-Set up search tuple for a query through FTS_DOC_ID_INDEX on
-supplied Doc ID. This is used by MySQL to retrieve the documents
-once the search result (Doc IDs) is available */
-static
-void
-innobase_fts_create_doc_id_key(
-/*===========================*/
- dtuple_t* tuple, /* in/out: prebuilt->search_tuple */
- const dict_index_t*
- index, /* in: index (FTS_DOC_ID_INDEX) */
- doc_id_t* doc_id) /* in/out: doc id to search, value
- could be changed to storage format
- used for search. */
-{
- doc_id_t temp_doc_id;
- dfield_t* dfield = dtuple_get_nth_field(tuple, 0);
-
- ut_a(dict_index_get_n_unique(index) == 1);
-
- dtuple_set_n_fields(tuple, index->n_fields);
- dict_index_copy_types(tuple, index, index->n_fields);
-
-#ifdef UNIV_DEBUG
- /* The unique Doc ID field should be an eight-bytes integer */
- dict_field_t* field = dict_index_get_nth_field(index, 0);
- ut_a(field->col->mtype == DATA_INT);
- ut_ad(sizeof(*doc_id) == field->fixed_len);
- ut_ad(innobase_strcasecmp(index->name, FTS_DOC_ID_INDEX_NAME) == 0);
-#endif /* UNIV_DEBUG */
-
- /* Convert to storage byte order */
- mach_write_to_8(reinterpret_cast<byte*>(&temp_doc_id), *doc_id);
- *doc_id = temp_doc_id;
- dfield_set_data(dfield, doc_id, sizeof(*doc_id));
-
- dtuple_set_n_fields_cmp(tuple, 1);
-
- for (ulint i = 1; i < index->n_fields; i++) {
- dfield = dtuple_get_nth_field(tuple, i);
- dfield_set_null(dfield);
- }
-}
-
-/**********************************************************************//**
-Fetch next result from the FT result set
-@return error code */
-UNIV_INTERN
-int
-ha_innobase::ft_read(
-/*=================*/
- uchar* buf) /*!< in/out: buf contain result row */
-{
- fts_result_t* result;
- int error;
- row_prebuilt_t* ft_prebuilt;
-
- ft_prebuilt = ((NEW_FT_INFO*) ft_handler)->ft_prebuilt;
-
- ut_a(ft_prebuilt == prebuilt);
-
- result = ((NEW_FT_INFO*) ft_handler)->ft_result;
-
- if (result->current == NULL) {
- /* This is the case where the FTS query did not
- contain and matching documents. */
- if (result->rankings_by_id != NULL) {
- /* Now that we have the complete result, we
- need to sort the document ids on their rank
- calculation. */
-
- fts_query_sort_result_on_rank(result);
-
- result->current = const_cast<ib_rbt_node_t*>(
- rbt_first(result->rankings_by_rank));
- } else {
- ut_a(result->current == NULL);
- }
- } else {
- result->current = const_cast<ib_rbt_node_t*>(
- rbt_next(result->rankings_by_rank, result->current));
- }
-
-next_record:
-
- if (result->current != NULL) {
- dict_index_t* index;
- dtuple_t* tuple = prebuilt->search_tuple;
- doc_id_t search_doc_id;
-
- /* If we only need information from result we can return
- without fetching the table row */
- if (ft_prebuilt->read_just_key) {
- table->status= 0;
- return(0);
- }
-
- index = dict_table_get_index_on_name(
- prebuilt->table, FTS_DOC_ID_INDEX_NAME);
-
- /* Must find the index */
- ut_a(index);
-
- /* Switch to the FTS doc id index */
- prebuilt->index = index;
-
- fts_ranking_t* ranking = rbt_value(
- fts_ranking_t, result->current);
-
- search_doc_id = ranking->doc_id;
-
- /* We pass a pointer of search_doc_id because it will be
- converted to storage byte order used in the search
- tuple. */
- innobase_fts_create_doc_id_key(tuple, index, &search_doc_id);
-
- innobase_srv_conc_enter_innodb(prebuilt->trx);
-
- dberr_t ret = row_search_for_mysql(
- (byte*) buf, PAGE_CUR_GE, prebuilt, ROW_SEL_EXACT, 0);
-
- innobase_srv_conc_exit_innodb(prebuilt->trx);
-
- switch (ret) {
- case DB_SUCCESS:
- error = 0;
- table->status = 0;
- break;
- case DB_RECORD_NOT_FOUND:
- result->current = const_cast<ib_rbt_node_t*>(
- rbt_next(result->rankings_by_rank,
- result->current));
-
- if (!result->current) {
- /* exhaust the result set, should return
- HA_ERR_END_OF_FILE just like
- ha_innobase::general_fetch() and/or
- ha_innobase::index_first() etc. */
- error = HA_ERR_END_OF_FILE;
- table->status = STATUS_NOT_FOUND;
- } else {
- goto next_record;
- }
- break;
- case DB_END_OF_INDEX:
- error = HA_ERR_END_OF_FILE;
- table->status = STATUS_NOT_FOUND;
- break;
- case DB_TABLESPACE_DELETED:
-
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
-
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
- case DB_TABLESPACE_NOT_FOUND:
-
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_MISSING,
- table->s->table_name.str);
-
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
- default:
- error = convert_error_code_to_mysql(
- ret, 0, user_thd);
-
- table->status = STATUS_NOT_FOUND;
- break;
- }
-
- return(error);
- }
-
- return(HA_ERR_END_OF_FILE);
-}
-
-/*************************************************************************
-*/
-
-void
-ha_innobase::ft_end()
-{
- fprintf(stderr, "ft_end()\n");
-
- rnd_end();
-}
-#ifdef WITH_WSREP
-extern dict_index_t*
-wsrep_dict_foreign_find_index(
- dict_table_t* table,
- const char** col_names,
- const char** columns,
- ulint n_cols,
- dict_index_t* types_idx,
- ibool check_charsets,
- ulint check_null);
-
-
-extern dberr_t
-wsrep_append_foreign_key(
-/*===========================*/
- trx_t* trx, /*!< in: trx */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- const rec_t* rec, /*!<in: clustered index record */
- dict_index_t* index, /*!<in: clustered index */
- ibool referenced, /*!<in: is check for referenced table */
- ibool shared) /*!<in: is shared access */
-{
- ut_a(trx);
- THD* thd = (THD*)trx->mysql_thd;
- ulint rcode = DB_SUCCESS;
- char cache_key[513] = {'\0'};
- int cache_key_len;
- bool const copy = true;
-
- if (!wsrep_on(trx->mysql_thd) ||
- wsrep_thd_exec_mode(thd) != LOCAL_STATE)
- return DB_SUCCESS;
-
- if (!thd || !foreign ||
- (!foreign->referenced_table && !foreign->foreign_table))
- {
- WSREP_INFO("FK: %s missing in: %s",
- (!thd) ? "thread" :
- ((!foreign) ? "constraint" :
- ((!foreign->referenced_table) ?
- "referenced table" : "foreign table")),
- (thd && wsrep_thd_query(thd)) ?
- wsrep_thd_query(thd) : "void");
- return DB_ERROR;
- }
-
- if ( !((referenced) ?
- foreign->referenced_table : foreign->foreign_table))
- {
- WSREP_DEBUG("pulling %s table into cache",
- (referenced) ? "referenced" : "foreign");
- mutex_enter(&(dict_sys->mutex));
- if (referenced)
- {
- foreign->referenced_table =
- dict_table_get_low(
- foreign->referenced_table_name_lookup);
- if (foreign->referenced_table)
- {
- foreign->referenced_index =
- wsrep_dict_foreign_find_index(
- foreign->referenced_table, NULL,
- foreign->referenced_col_names,
- foreign->n_fields,
- foreign->foreign_index,
- TRUE, FALSE);
- }
- }
- else
- {
- foreign->foreign_table =
- dict_table_get_low(
- foreign->foreign_table_name_lookup);
- if (foreign->foreign_table)
- {
- foreign->foreign_index =
- wsrep_dict_foreign_find_index(
- foreign->foreign_table, NULL,
- foreign->foreign_col_names,
- foreign->n_fields,
- foreign->referenced_index,
- TRUE, FALSE);
- }
- }
- mutex_exit(&(dict_sys->mutex));
- }
-
- if ( !((referenced) ?
- foreign->referenced_table : foreign->foreign_table))
- {
- WSREP_WARN("FK: %s missing in query: %s",
- (!foreign->referenced_table) ?
- "referenced table" : "foreign table",
- (wsrep_thd_query(thd)) ?
- wsrep_thd_query(thd) : "void");
- return DB_ERROR;
- }
- byte key[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
- ulint len = WSREP_MAX_SUPPORTED_KEY_LENGTH;
-
- dict_index_t *idx_target = (referenced) ?
- foreign->referenced_index : index;
- dict_index_t *idx = (referenced) ?
- UT_LIST_GET_FIRST(foreign->referenced_table->indexes) :
- UT_LIST_GET_FIRST(foreign->foreign_table->indexes);
- int i = 0;
- while (idx != NULL && idx != idx_target) {
- if (innobase_strcasecmp (idx->name, innobase_index_reserve_name) != 0) {
- i++;
- }
- idx = UT_LIST_GET_NEXT(indexes, idx);
- }
- ut_a(idx);
- key[0] = (char)i;
-
- rcode = wsrep_rec_get_foreign_key(
- &key[1], &len, rec, index, idx,
- wsrep_protocol_version > 1);
- if (rcode != DB_SUCCESS) {
- WSREP_ERROR(
- "FK key set failed: %lu (%lu %lu), index: %s %s, %s",
- rcode, referenced, shared,
- (index && index->name) ? index->name :
- "void index",
- (index && index->table_name) ? index->table_name :
- "void table",
- wsrep_thd_query(thd));
- return DB_ERROR;
- }
- strncpy(cache_key,
- (wsrep_protocol_version > 1) ?
- ((referenced) ?
- foreign->referenced_table->name :
- foreign->foreign_table->name) :
- foreign->foreign_table->name, sizeof(cache_key) - 1);
- cache_key_len = strlen(cache_key);
-#ifdef WSREP_DEBUG_PRINT
- ulint j;
- fprintf(stderr, "FK parent key, table: %s %s len: %lu ",
- cache_key, (shared) ? "shared" : "exclusive", len+1);
- for (j=0; j<len+1; j++) {
- fprintf(stderr, " %hhX, ", key[j]);
- }
- fprintf(stderr, "\n");
-#endif
- char *p = strchr(cache_key, '/');
- if (p) {
- *p = '\0';
- } else {
- WSREP_WARN("unexpected foreign key table %s %s",
- foreign->referenced_table->name,
- foreign->foreign_table->name);
- }
-
- wsrep_buf_t wkey_part[3];
- wsrep_key_t wkey = {wkey_part, 3};
- if (!wsrep_prepare_key(
- (const uchar*)cache_key,
- cache_key_len + 1,
- (const uchar*)key, len+1,
- wkey_part,
- (size_t*)&wkey.key_parts_num)) {
- WSREP_WARN("key prepare failed for cascaded FK: %s",
- (wsrep_thd_query(thd)) ?
- wsrep_thd_query(thd) : "void");
- return DB_ERROR;
- }
- wsrep_t *wsrep= get_wsrep();
- rcode = (int)wsrep->append_key(
- wsrep,
- wsrep_ws_handle(thd, trx),
- &wkey,
- 1,
- shared ? WSREP_KEY_SHARED : WSREP_KEY_EXCLUSIVE,
- copy);
- if (rcode) {
- DBUG_PRINT("wsrep", ("row key failed: %lu", rcode));
- WSREP_ERROR("Appending cascaded fk row key failed: %s, %lu",
- (wsrep_thd_query(thd)) ?
- wsrep_thd_query(thd) : "void", rcode);
- return DB_ERROR;
- }
-
- return DB_SUCCESS;
-}
-
-static int
-wsrep_append_key(
-/*==================*/
- THD *thd,
- trx_t *trx,
- TABLE_SHARE *table_share,
- TABLE *table,
- const char* key,
- uint16_t key_len,
- bool shared
-)
-{
- DBUG_ENTER("wsrep_append_key");
- bool const copy = true;
-#ifdef WSREP_DEBUG_PRINT
- fprintf(stderr, "%s conn %ld, trx %llu, keylen %d, table %s\n Query: %s ",
- (shared) ? "Shared" : "Exclusive",
- thd_get_thread_id(thd), (long long)trx->id, key_len,
- table_share->table_name.str, wsrep_thd_query(thd));
- for (int i=0; i<key_len; i++) {
- fprintf(stderr, "%hhX, ", key[i]);
- }
- fprintf(stderr, "\n");
-#endif
- wsrep_buf_t wkey_part[3];
- wsrep_key_t wkey = {wkey_part, 3};
- if (!wsrep_prepare_key(
- (const uchar*)table_share->table_cache_key.str,
- table_share->table_cache_key.length,
- (const uchar*)key, key_len,
- wkey_part,
- (size_t*)&wkey.key_parts_num)) {
- WSREP_WARN("key prepare failed for: %s",
- (wsrep_thd_query(thd)) ?
- wsrep_thd_query(thd) : "void");
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
-
- wsrep_t *wsrep= get_wsrep();
- int rcode = (int)wsrep->append_key(
- wsrep,
- wsrep_ws_handle(thd, trx),
- &wkey,
- 1,
- shared ? WSREP_KEY_SHARED : WSREP_KEY_EXCLUSIVE,
- copy);
- if (rcode) {
- DBUG_PRINT("wsrep", ("row key failed: %d", rcode));
- WSREP_WARN("Appending row key failed: %s, %d",
- (wsrep_thd_query(thd)) ?
- wsrep_thd_query(thd) : "void", rcode);
- DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
- }
- DBUG_RETURN(0);
-}
-
-static bool
-referenced_by_foreign_key2(dict_table_t* table,
- dict_index_t* index) {
- ut_ad(table != NULL);
- ut_ad(index != NULL);
-
- const dict_foreign_set* fks = &table->referenced_set;
- for (dict_foreign_set::const_iterator it = fks->begin();
- it != fks->end();
- ++it)
- {
- dict_foreign_t* foreign = *it;
- if (foreign->referenced_index != index) {
- continue;
- }
- ut_ad(table == foreign->referenced_table);
- return true;
- }
- return false;
-}
-
-int
-ha_innobase::wsrep_append_keys(
-/*==================*/
- THD *thd,
- bool shared,
- const uchar* record0, /* in: row in MySQL format */
- const uchar* record1) /* in: row in MySQL format */
-{
- int rcode;
- DBUG_ENTER("wsrep_append_keys");
-
- bool key_appended = false;
- trx_t *trx = thd_to_trx(thd);
-
- if (table_share && table_share->tmp_table != NO_TMP_TABLE) {
- WSREP_DEBUG("skipping tmp table DML: THD: %lu tmp: %d SQL: %s",
- thd_get_thread_id(thd),
- table_share->tmp_table,
- (wsrep_thd_query(thd)) ?
- wsrep_thd_query(thd) : "void");
- DBUG_RETURN(0);
- }
-
- if (wsrep_protocol_version == 0) {
- uint len;
- char keyval[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
- char *key = &keyval[0];
- ibool is_null;
-
- len = wsrep_store_key_val_for_row(
- thd, table, 0, key, WSREP_MAX_SUPPORTED_KEY_LENGTH,
- record0, prebuilt, &is_null);
-
- if (!is_null) {
- rcode = wsrep_append_key(
- thd, trx, table_share, table, keyval,
- len, shared);
- if (rcode) DBUG_RETURN(rcode);
- }
- else
- {
- WSREP_DEBUG("NULL key skipped (proto 0): %s",
- wsrep_thd_query(thd));
- }
- } else {
- ut_a(table->s->keys <= 256);
- uint i;
- bool hasPK= false;
-
- for (i=0; i<table->s->keys; ++i) {
- KEY* key_info = table->key_info + i;
- if (key_info->flags & HA_NOSAME) {
- hasPK = true;
- }
- }
-
- for (i=0; i<table->s->keys; ++i) {
- uint len;
- char keyval0[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
- char keyval1[WSREP_MAX_SUPPORTED_KEY_LENGTH+1] = {'\0'};
- char* key0 = &keyval0[1];
- char* key1 = &keyval1[1];
- KEY* key_info = table->key_info + i;
- ibool is_null;
-
- dict_index_t* idx = innobase_get_index(i);
- dict_table_t* tab = (idx) ? idx->table : NULL;
-
- keyval0[0] = (char)i;
- keyval1[0] = (char)i;
-
- if (!tab) {
- WSREP_WARN("MySQL-InnoDB key mismatch %s %s",
- table->s->table_name.str,
- key_info->name);
- }
- /* !hasPK == table with no PK, must append all non-unique keys */
- if (!hasPK || key_info->flags & HA_NOSAME ||
- ((tab &&
- referenced_by_foreign_key2(tab, idx)) ||
- (!tab && referenced_by_foreign_key()))) {
-
- len = wsrep_store_key_val_for_row(
- thd, table, i, key0,
- WSREP_MAX_SUPPORTED_KEY_LENGTH,
- record0, prebuilt, &is_null);
- if (!is_null) {
- rcode = wsrep_append_key(
- thd, trx, table_share, table,
- keyval0, len+1, shared);
- if (rcode) DBUG_RETURN(rcode);
-
- if (key_info->flags & HA_NOSAME || shared)
- key_appended = true;
- }
- else
- {
- WSREP_DEBUG("NULL key skipped: %s",
- wsrep_thd_query(thd));
- }
- if (record1) {
- len = wsrep_store_key_val_for_row(
- thd, table, i, key1,
- WSREP_MAX_SUPPORTED_KEY_LENGTH,
- record1, prebuilt, &is_null);
- if (!is_null && memcmp(key0, key1, len)) {
- rcode = wsrep_append_key(
- thd, trx, table_share,
- table,
- keyval1, len+1, shared);
- if (rcode) DBUG_RETURN(rcode);
- }
- }
- }
- }
- }
-
- /* if no PK, calculate hash of full row, to be the key value */
- if (!key_appended && wsrep_certify_nonPK) {
- uchar digest[16];
- int rcode;
-
- wsrep_calc_row_hash(digest, record0, table, prebuilt, thd);
- if ((rcode = wsrep_append_key(thd, trx, table_share, table,
- (const char*) digest, 16,
- shared))) {
- DBUG_RETURN(rcode);
- }
-
- if (record1) {
- wsrep_calc_row_hash(
- digest, record1, table, prebuilt, thd);
- if ((rcode = wsrep_append_key(thd, trx, table_share,
- table,
- (const char*) digest,
- 16, shared))) {
- DBUG_RETURN(rcode);
- }
- }
- DBUG_RETURN(0);
- }
-
- DBUG_RETURN(0);
-}
-#endif /* WITH_WSREP */
-
-/*********************************************************************//**
-Stores a reference to the current row to 'ref' field of the handle. Note
-that in the case where we have generated the clustered index for the
-table, the function parameter is illogical: we MUST ASSUME that 'record'
-is the current 'position' of the handle, because if row ref is actually
-the row id internally generated in InnoDB, then 'record' does not contain
-it. We just guess that the row id must be for the record where the handle
-was positioned the last time. */
-UNIV_INTERN
-void
-ha_innobase::position(
-/*==================*/
- const uchar* record) /*!< in: row in MySQL format */
-{
- uint len;
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- if (prebuilt->clust_index_was_generated) {
- /* No primary key was defined for the table and we
- generated the clustered index from row id: the
- row reference will be the row id, not any key value
- that MySQL knows of */
-
- len = DATA_ROW_ID_LEN;
-
- memcpy(ref, prebuilt->row_id, len);
- } else {
- len = store_key_val_for_row(primary_key, (char*) ref,
- ref_length, record);
- }
-
- /* We assume that the 'ref' value len is always fixed for the same
- table. */
-
- if (len != ref_length) {
- sql_print_error("Stored ref len is %lu, but table ref len is "
- "%lu", (ulong) len, (ulong) ref_length);
- }
-}
-
-/*****************************************************************//**
-Check whether there exist a column named as "FTS_DOC_ID", which is
-reserved for InnoDB FTS Doc ID
-@return true if there exist a "FTS_DOC_ID" column */
-static
-bool
-create_table_check_doc_id_col(
-/*==========================*/
- trx_t* trx, /*!< in: InnoDB transaction handle */
- const TABLE* form, /*!< in: information on table
- columns and indexes */
- ulint* doc_id_col) /*!< out: Doc ID column number if
- there exist a FTS_DOC_ID column,
- ULINT_UNDEFINED if column is of the
- wrong type/name/size */
-{
- for (ulint i = 0; i < form->s->fields; i++) {
- const Field* field;
- ulint col_type;
- ulint col_len;
- ulint unsigned_type;
-
- field = form->field[i];
-
- col_type = get_innobase_type_from_mysql_type(&unsigned_type,
- field);
-
- col_len = field->pack_length();
-
- if (innobase_strcasecmp(field->field_name.str,
- FTS_DOC_ID_COL_NAME) == 0) {
-
- /* Note the name is case sensitive due to
- our internal query parser */
- if (col_type == DATA_INT
- && !field->real_maybe_null()
- && col_len == sizeof(doc_id_t)
- && (strcmp(field->field_name.str,
- FTS_DOC_ID_COL_NAME) == 0)) {
- *doc_id_col = i;
- } else {
- push_warning_printf(
- trx->mysql_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: FTS_DOC_ID column must be "
- "of BIGINT NOT NULL type, and named "
- "in all capitalized characters");
- my_error(ER_WRONG_COLUMN_NAME, MYF(0),
- field->field_name.str);
- *doc_id_col = ULINT_UNDEFINED;
- }
-
- return(true);
- }
- }
-
- return(false);
-}
-
-/*****************************************************************//**
-Creates a table definition to an InnoDB database. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-int
-create_table_def(
-/*=============*/
- trx_t* trx, /*!< in: InnoDB transaction handle */
- const TABLE* form, /*!< in: information on table
- columns and indexes */
- const char* table_name, /*!< in: table name */
- const char* temp_path, /*!< in: if this is a table explicitly
- created by the user with the
- TEMPORARY keyword, then this
- parameter is the dir path where the
- table should be placed if we create
- an .ibd file for it (no .ibd extension
- in the path, though). Otherwise this
- is a zero length-string */
- const char* remote_path, /*!< in: Remote path or zero length-string */
- ulint flags, /*!< in: table flags */
- ulint flags2, /*!< in: table flags2 */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
-{
- THD* thd = trx->mysql_thd;
- dict_table_t* table;
- ulint n_cols, s_cols;
- dberr_t err;
- ulint col_type;
- ulint col_len;
- ulint nulls_allowed;
- ulint unsigned_type;
- ulint binary_type;
- ulint long_true_varchar;
- ulint charset_no;
- ulint i;
- ulint doc_id_col = 0;
- ibool has_doc_id_col = FALSE;
- mem_heap_t* heap;
-
- DBUG_ENTER("create_table_def");
- DBUG_PRINT("enter", ("table_name: %s", table_name));
-
- DBUG_ASSERT(thd != NULL);
-
- /* MySQL does the name length check. But we do additional check
- on the name length here */
- const size_t table_name_len = strlen(table_name);
- if (table_name_len > MAX_FULL_NAME_LEN) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_TABLE_NAME,
- "InnoDB: Table Name or Database Name is too long");
-
- DBUG_RETURN(ER_TABLE_NAME);
- }
-
- if (table_name[table_name_len - 1] == '/') {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_TABLE_NAME,
- "InnoDB: Table name is empty");
-
- DBUG_RETURN(ER_WRONG_TABLE_NAME);
- }
-
- n_cols = form->s->fields;
- s_cols = form->s->stored_fields;
-
- /* Check whether there already exists a FTS_DOC_ID column */
- if (create_table_check_doc_id_col(trx, form, &doc_id_col)){
-
- /* Raise error if the Doc ID column is of wrong type or name */
- if (doc_id_col == ULINT_UNDEFINED) {
- trx_commit_for_mysql(trx);
-
- err = DB_ERROR;
- goto error_ret;
- } else {
- has_doc_id_col = TRUE;
- }
- }
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
-
- if (flags2 & DICT_TF2_FTS) {
- /* Adjust for the FTS hidden field */
- if (!has_doc_id_col) {
- table = dict_mem_table_create(table_name, 0, s_cols + 1,
- flags, flags2);
-
- /* Set the hidden doc_id column. */
- table->fts->doc_col = s_cols;
- } else {
- table = dict_mem_table_create(table_name, 0, s_cols,
- flags, flags2);
- table->fts->doc_col = doc_id_col;
- }
- } else {
- table = dict_mem_table_create(table_name, 0, s_cols,
- flags, flags2);
- }
-
- if (flags2 & DICT_TF2_TEMPORARY) {
- ut_a(strlen(temp_path));
- table->dir_path_of_temp_table =
- mem_heap_strdup(table->heap, temp_path);
- }
-
- if (DICT_TF_HAS_DATA_DIR(flags)) {
- ut_a(strlen(remote_path));
- table->data_dir_path = mem_heap_strdup(table->heap, remote_path);
- } else {
- table->data_dir_path = NULL;
- }
- heap = mem_heap_create(1000);
-
- for (i = 0; i < n_cols; i++) {
- Field* field = form->field[i];
- if (!field->stored_in_db)
- continue;
-
- col_type = get_innobase_type_from_mysql_type(&unsigned_type,
- field);
-
- if (!col_type) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_CREATE_TABLE,
- "Error creating table '%s' with "
- "column '%s'. Please check its "
- "column type and try to re-create "
- "the table with an appropriate "
- "column type.",
- table->name, field->field_name.str);
- goto err_col;
- }
-
- nulls_allowed = field->real_maybe_null() ? 0 : DATA_NOT_NULL;
- binary_type = field->binary() ? DATA_BINARY_TYPE : 0;
-
- charset_no = 0;
-
- if (dtype_is_string_type(col_type)) {
-
- charset_no = (ulint) field->charset()->number;
-
- if (UNIV_UNLIKELY(charset_no > MAX_CHAR_COLL_NUM)) {
- /* in data0type.h we assume that the
- number fits in one byte in prtype */
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_CREATE_TABLE,
- "In InnoDB, charset-collation codes"
- " must be below 256."
- " Unsupported code %lu.",
- (ulong) charset_no);
- mem_heap_free(heap);
- DBUG_RETURN(ER_CANT_CREATE_TABLE);
- }
- }
-
- /* we assume in dtype_form_prtype() that this fits in
- two bytes */
- ut_a(static_cast<uint>(field->type()) <= MAX_CHAR_COLL_NUM);
- col_len = field->pack_length();
-
- /* The MySQL pack length contains 1 or 2 bytes length field
- for a true VARCHAR. Let us subtract that, so that the InnoDB
- column length in the InnoDB data dictionary is the real
- maximum byte length of the actual data. */
-
- long_true_varchar = 0;
-
- if (field->type() == MYSQL_TYPE_VARCHAR) {
- col_len -= ((Field_varstring*) field)->length_bytes;
-
- if (((Field_varstring*) field)->length_bytes == 2) {
- long_true_varchar = DATA_LONG_TRUE_VARCHAR;
- }
- }
-
- /* First check whether the column to be added has a
- system reserved name. */
- if (dict_col_name_is_reserved(field->field_name.str)){
- my_error(ER_WRONG_COLUMN_NAME, MYF(0),
- field->field_name.str);
-err_col:
- dict_mem_table_free(table);
- mem_heap_free(heap);
- trx_commit_for_mysql(trx);
-
- err = DB_ERROR;
- goto error_ret;
- }
-
- dict_mem_table_add_col(table, heap,
- field->field_name.str,
- col_type,
- dtype_form_prtype(
- (ulint) field->type()
- | nulls_allowed | unsigned_type
- | binary_type | long_true_varchar,
- charset_no),
- col_len);
- }
-
- /* Add the FTS doc_id hidden column. */
- if (flags2 & DICT_TF2_FTS && !has_doc_id_col) {
- fts_add_doc_id_column(table, heap);
- }
-
- err = row_create_table_for_mysql(table, trx, false, mode, key_id);
-
- mem_heap_free(heap);
-
- DBUG_EXECUTE_IF("ib_create_err_tablespace_exist",
- err = DB_TABLESPACE_EXISTS;);
-
- if (err == DB_DUPLICATE_KEY || err == DB_TABLESPACE_EXISTS) {
- char display_name[FN_REFLEN];
- char* buf_end = innobase_convert_identifier(
- display_name, sizeof(display_name) - 1,
- table_name, strlen(table_name),
- thd, TRUE);
-
- *buf_end = '\0';
-
- my_error(err == DB_DUPLICATE_KEY
- ? ER_TABLE_EXISTS_ERROR
- : ER_TABLESPACE_EXISTS, MYF(0), display_name);
- }
-
- if (err == DB_SUCCESS && (flags2 & DICT_TF2_FTS)) {
- fts_optimize_add_table(table);
- }
-
-error_ret:
- DBUG_RETURN(convert_error_code_to_mysql(err, flags, thd));
-}
-
-/*****************************************************************//**
-Creates an index in an InnoDB database. */
-static
-int
-create_index(
-/*=========*/
- trx_t* trx, /*!< in: InnoDB transaction handle */
- const TABLE* form, /*!< in: information on table
- columns and indexes */
- ulint flags, /*!< in: InnoDB table flags */
- const char* table_name, /*!< in: table name */
- uint key_num) /*!< in: index number */
-{
- dict_index_t* index;
- int error;
- const KEY* key;
- ulint ind_type;
- ulint* field_lengths;
-
- DBUG_ENTER("create_index");
-
- key = form->key_info + key_num;
-
- /* Assert that "GEN_CLUST_INDEX" cannot be used as non-primary index */
- ut_a(innobase_strcasecmp(key->name, innobase_index_reserve_name) != 0);
-
- if (key->flags & HA_FULLTEXT) {
- index = dict_mem_index_create(table_name, key->name, 0,
- DICT_FTS,
- key->user_defined_key_parts);
-
- for (ulint i = 0; i < key->user_defined_key_parts; i++) {
- KEY_PART_INFO* key_part = key->key_part + i;
- dict_mem_index_add_field(
- index, key_part->field->field_name.str, 0);
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(
- row_create_index_for_mysql(
- index, trx, NULL),
- flags, NULL));
-
- }
-
- ind_type = 0;
-
- if (key_num == form->s->primary_key) {
- ind_type |= DICT_CLUSTERED;
- }
-
- if (key->flags & HA_NOSAME) {
- ind_type |= DICT_UNIQUE;
- }
-
- field_lengths = (ulint*) my_malloc(
- key->user_defined_key_parts * sizeof *
- field_lengths, MYF(MY_FAE));
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
-
- index = dict_mem_index_create(table_name, key->name, 0,
- ind_type, key->user_defined_key_parts);
-
- for (ulint i = 0; i < key->user_defined_key_parts; i++) {
- KEY_PART_INFO* key_part = key->key_part + i;
- ulint prefix_len;
- ulint col_type;
- ulint is_unsigned;
-
-
- /* (The flag HA_PART_KEY_SEG denotes in MySQL a
- column prefix field in an index: we only store a
- specified number of first bytes of the column to
- the index field.) The flag does not seem to be
- properly set by MySQL. Let us fall back on testing
- the length of the key part versus the column. */
-
- Field* field = NULL;
-
- for (ulint j = 0; j < form->s->fields; j++) {
-
- field = form->field[j];
-
- if (0 == innobase_strcasecmp(
- field->field_name.str,
- key_part->field->field_name.str)) {
- /* Found the corresponding column */
-
- goto found;
- }
- }
-
- ut_error;
-found:
- col_type = get_innobase_type_from_mysql_type(
- &is_unsigned, key_part->field);
-
- if (DATA_BLOB == col_type
- || (key_part->length < field->pack_length()
- && field->type() != MYSQL_TYPE_VARCHAR)
- || (field->type() == MYSQL_TYPE_VARCHAR
- && key_part->length < field->pack_length()
- - ((Field_varstring*) field)->length_bytes)) {
-
- switch (col_type) {
- default:
- prefix_len = key_part->length;
- break;
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- case DATA_DECIMAL:
- sql_print_error(
- "MySQL is trying to create a column "
- "prefix index field, on an "
- "inappropriate data type. Table "
- "name %s, column name %s.",
- table_name,
- key_part->field->field_name.str);
-
- prefix_len = 0;
- }
- } else {
- prefix_len = 0;
- }
-
- field_lengths[i] = key_part->length;
-
- dict_mem_index_add_field(
- index, key_part->field->field_name.str, prefix_len);
- }
-
- ut_ad(key->flags & HA_FULLTEXT || !(index->type & DICT_FTS));
-
- /* Even though we've defined max_supported_key_part_length, we
- still do our own checking using field_lengths to be absolutely
- sure we don't create too long indexes. */
-
- error = convert_error_code_to_mysql(
- row_create_index_for_mysql(index, trx, field_lengths),
- flags, NULL);
-
- my_free(field_lengths);
-
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
-Creates an index to an InnoDB table when the user has defined no
-primary index. */
-static
-int
-create_clustered_index_when_no_primary(
-/*===================================*/
- trx_t* trx, /*!< in: InnoDB transaction handle */
- ulint flags, /*!< in: InnoDB table flags */
- const char* table_name) /*!< in: table name */
-{
- dict_index_t* index;
- dberr_t error;
-
- /* We pass 0 as the space id, and determine at a lower level the space
- id where to store the table */
- index = dict_mem_index_create(table_name,
- innobase_index_reserve_name,
- 0, DICT_CLUSTERED, 0);
-
- error = row_create_index_for_mysql(index, trx, NULL);
-
- return(convert_error_code_to_mysql(error, flags, NULL));
-}
-
-/*****************************************************************//**
-Return a display name for the row format
-@return row format name */
-UNIV_INTERN
-const char*
-get_row_format_name(
-/*================*/
- enum row_type row_format) /*!< in: Row Format */
-{
- switch (row_format) {
- case ROW_TYPE_COMPACT:
- return("COMPACT");
- case ROW_TYPE_COMPRESSED:
- return("COMPRESSED");
- case ROW_TYPE_DYNAMIC:
- return("DYNAMIC");
- case ROW_TYPE_REDUNDANT:
- return("REDUNDANT");
- case ROW_TYPE_DEFAULT:
- return("DEFAULT");
- case ROW_TYPE_FIXED:
- return("FIXED");
- case ROW_TYPE_PAGE:
- case ROW_TYPE_NOT_USED:
- default:
- break;
- }
- return("NOT USED");
-}
-
-/** If file-per-table is missing, issue warning and set ret false */
-#define CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace)\
- if (!use_tablespace) { \
- push_warning_printf( \
- thd, Sql_condition::WARN_LEVEL_WARN, \
- ER_ILLEGAL_HA_CREATE_OPTION, \
- "InnoDB: ROW_FORMAT=%s requires" \
- " innodb_file_per_table.", \
- get_row_format_name(row_format)); \
- ret = "ROW_FORMAT"; \
- }
-
-/** If file-format is Antelope, issue warning and set ret false */
-#define CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE \
- if (srv_file_format < UNIV_FORMAT_B) { \
- push_warning_printf( \
- thd, Sql_condition::WARN_LEVEL_WARN, \
- ER_ILLEGAL_HA_CREATE_OPTION, \
- "InnoDB: ROW_FORMAT=%s requires" \
- " innodb_file_format > Antelope.", \
- get_row_format_name(row_format)); \
- ret = "ROW_FORMAT"; \
- }
-
-
-/*****************************************************************//**
-Validates the create options. We may build on this function
-in future. For now, it checks two specifiers:
-KEY_BLOCK_SIZE and ROW_FORMAT
-If innodb_strict_mode is not set then this function is a no-op
-@return NULL if valid, string if not. */
-UNIV_INTERN
-const char*
-create_options_are_invalid(
-/*=======================*/
- THD* thd, /*!< in: connection thread. */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info, /*!< in: create info. */
- bool use_tablespace) /*!< in: srv_file_per_table */
-{
- ibool kbs_specified = FALSE;
- const char* ret = NULL;
- enum row_type row_format = form->s->row_type;
-
- ut_ad(thd != NULL);
-
- /* If innodb_strict_mode is not set don't do any validation. */
- if (!(THDVAR(thd, strict_mode))) {
- return(NULL);
- }
-
- ut_ad(form != NULL);
- ut_ad(create_info != NULL);
-
- /* First check if a non-zero KEY_BLOCK_SIZE was specified. */
- if (create_info->key_block_size) {
- kbs_specified = TRUE;
- switch (create_info->key_block_size) {
- ulint kbs_max;
- case 1:
- case 2:
- case 4:
- case 8:
- case 16:
- /* Valid KEY_BLOCK_SIZE, check its dependencies. */
- if (!use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE requires"
- " innodb_file_per_table.");
- ret = "KEY_BLOCK_SIZE";
- }
- if (srv_file_format < UNIV_FORMAT_B) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE requires"
- " innodb_file_format > Antelope.");
- ret = "KEY_BLOCK_SIZE";
- }
-
- /* The maximum KEY_BLOCK_SIZE (KBS) is 16. But if
- UNIV_PAGE_SIZE is smaller than 16k, the maximum
- KBS is also smaller. */
- kbs_max = ut_min(
- 1 << (UNIV_PAGE_SSIZE_MAX - 1),
- 1 << (PAGE_ZIP_SSIZE_MAX - 1));
- if (create_info->key_block_size > kbs_max) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE=%ld"
- " cannot be larger than %ld.",
- create_info->key_block_size,
- kbs_max);
- ret = "KEY_BLOCK_SIZE";
- }
- break;
- default:
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: invalid KEY_BLOCK_SIZE = %lu."
- " Valid values are [1, 2, 4, 8, 16]",
- create_info->key_block_size);
- ret = "KEY_BLOCK_SIZE";
- break;
- }
- }
-
- /* Check for a valid Innodb ROW_FORMAT specifier and
- other incompatibilities. */
- switch (row_format) {
- case ROW_TYPE_COMPRESSED:
- CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace);
- CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE;
- break;
- case ROW_TYPE_DYNAMIC:
- CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace);
- CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE;
- /* ROW_FORMAT=DYNAMIC also shuns KEY_BLOCK_SIZE */
- /* fall through */
- case ROW_TYPE_COMPACT:
- case ROW_TYPE_REDUNDANT:
- if (kbs_specified) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: cannot specify ROW_FORMAT = %s"
- " with KEY_BLOCK_SIZE.",
- get_row_format_name(row_format));
- ret = "KEY_BLOCK_SIZE";
- }
- break;
- case ROW_TYPE_DEFAULT:
- break;
- case ROW_TYPE_FIXED:
- case ROW_TYPE_PAGE:
- case ROW_TYPE_NOT_USED:
- default:
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION, \
- "InnoDB: invalid ROW_FORMAT specifier.");
- ret = "ROW_TYPE";
- break;
- }
-
- /* Use DATA DIRECTORY only with file-per-table. */
- if (create_info->data_file_name && !use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: DATA DIRECTORY requires"
- " innodb_file_per_table.");
- ret = "DATA DIRECTORY";
- }
-
- /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
- if (create_info->data_file_name
- && create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: DATA DIRECTORY cannot be used"
- " for TEMPORARY tables.");
- ret = "DATA DIRECTORY";
- }
-
- /* Do not allow INDEX_DIRECTORY */
- if (create_info->index_file_name) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: INDEX DIRECTORY is not supported");
- ret = "INDEX DIRECTORY";
- }
-
- if ((kbs_specified || row_format == ROW_TYPE_COMPRESSED)
- && UNIV_PAGE_SIZE > (1<<14)) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: Cannot create a COMPRESSED table"
- " when innodb_page_size > 16k.");
-
- if (kbs_specified) {
- ret = "KEY_BLOCK_SIZE";
- } else {
- ret = "ROW_TYPE";
- }
- }
-
- return(ret);
-}
-
-/*****************************************************************//**
-Update create_info. Used in SHOW CREATE TABLE et al. */
-UNIV_INTERN
-void
-ha_innobase::update_create_info(
-/*============================*/
- HA_CREATE_INFO* create_info) /*!< in/out: create info */
-{
- if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
- ha_innobase::info(HA_STATUS_AUTO);
- create_info->auto_increment_value = stats.auto_increment_value;
- }
-
- /* Update the DATA DIRECTORY name from SYS_DATAFILES. */
- dict_get_and_save_data_dir_path(prebuilt->table, false);
-
- if (prebuilt->table->data_dir_path) {
- create_info->data_file_name = prebuilt->table->data_dir_path;
- }
-}
-
-/*****************************************************************//**
-Initialize the table FTS stopword list
-@return TRUE if success */
-UNIV_INTERN
-ibool
-innobase_fts_load_stopword(
-/*=======================*/
- dict_table_t* table, /*!< in: Table has the FTS */
- trx_t* trx, /*!< in: transaction */
- THD* thd) /*!< in: current thread */
-{
- return(fts_load_stopword(table, trx,
- innobase_server_stopword_table,
- THDVAR(thd, ft_user_stopword_table),
- THDVAR(thd, ft_enable_stopword), FALSE));
-}
-
-/*****************************************************************//**
-Parses the table name into normal name and either temp path or remote path
-if needed.
-@return 0 if successful, otherwise, error number */
-UNIV_INTERN
-int
-ha_innobase::parse_table_name(
-/*==========================*/
- const char* name, /*!< in/out: table name provided*/
- HA_CREATE_INFO* create_info, /*!< in: more information of the
- created table, contains also the
- create statement string */
- ulint flags, /*!< in: flags*/
- ulint flags2, /*!< in: flags2*/
- char* norm_name, /*!< out: normalized table name */
- char* temp_path, /*!< out: absolute path of table */
- char* remote_path) /*!< out: remote path of table */
-{
- THD* thd = ha_thd();
- bool use_tablespace = flags2 & DICT_TF2_USE_TABLESPACE;
- DBUG_ENTER("ha_innobase::parse_table_name");
-
-#ifdef __WIN__
- /* Names passed in from server are in two formats:
- 1. <database_name>/<table_name>: for normal table creation
- 2. full path: for temp table creation, or DATA DIRECTORY.
-
- When srv_file_per_table is on and mysqld_embedded is off,
- check for full path pattern, i.e.
- X:\dir\..., X is a driver letter, or
- \\dir1\dir2\..., UNC path
- returns error if it is in full path format, but not creating a temp.
- table. Currently InnoDB does not support symbolic link on Windows. */
-
- if (use_tablespace
- && !mysqld_embedded
- && !(create_info->options & HA_LEX_CREATE_TMP_TABLE)) {
-
- if ((name[1] == ':')
- || (name[0] == '\\' && name[1] == '\\')) {
- sql_print_error("Cannot create table %s\n", name);
- DBUG_RETURN(HA_ERR_GENERIC);
- }
- }
-#endif
-
- normalize_table_name(norm_name, name);
- temp_path[0] = '\0';
- remote_path[0] = '\0';
-
- /* A full path is used for TEMPORARY TABLE and DATA DIRECTORY.
- In the case of;
- CREATE TEMPORARY TABLE ... DATA DIRECTORY={path} ... ;
- We ignore the DATA DIRECTORY. */
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- strncpy(temp_path, name, FN_REFLEN - 1);
- }
-
- if (create_info->data_file_name) {
- bool ignore = false;
-
- /* Use DATA DIRECTORY only with file-per-table. */
- if (!use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: DATA DIRECTORY requires"
- " innodb_file_per_table.");
- ignore = true;
- }
-
- /* Do not use DATA DIRECTORY with TEMPORARY TABLE. */
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: DATA DIRECTORY cannot be"
- " used for TEMPORARY tables.");
- ignore = true;
- }
-
- if (ignore) {
- my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING,
- "DATA DIRECTORY");
- } else {
- strncpy(remote_path, create_info->data_file_name,
- FN_REFLEN - 1);
- }
- }
-
- if (create_info->index_file_name) {
- my_error(WARN_OPTION_IGNORED, ME_JUST_WARNING,
- "INDEX DIRECTORY");
- }
-
- DBUG_RETURN(0);
-}
-
-/*****************************************************************//**
-Determines InnoDB table flags.
-@retval true if successful, false if error */
-UNIV_INTERN
-bool
-innobase_table_flags(
-/*=================*/
- const TABLE* form, /*!< in: table */
- const HA_CREATE_INFO* create_info, /*!< in: information
- on table columns and indexes */
- THD* thd, /*!< in: connection */
- bool use_tablespace, /*!< in: whether to create
- outside system tablespace */
- ulint* flags, /*!< out: DICT_TF flags */
- ulint* flags2) /*!< out: DICT_TF2 flags */
-{
- DBUG_ENTER("innobase_table_flags");
-
- const char* fts_doc_id_index_bad = NULL;
- bool zip_allowed = true;
- ulint zip_ssize = 0;
- enum row_type row_format;
- rec_format_t innodb_row_format = REC_FORMAT_COMPACT;
- bool use_data_dir;
- ha_table_option_struct *options= form->s->option_struct;
-
- /* Cache the value of innodb_file_format, in case it is
- modified by another thread while the table is being created. */
- const ulint file_format_allowed = srv_file_format;
-
- /* Cache the value of innobase_compression_level, in case it is
- modified by another thread while the table is being created. */
- const ulint default_compression_level = page_zip_level;
-
- *flags = 0;
- *flags2 = 0;
-
- /* Check if there are any FTS indexes defined on this table. */
- for (uint i = 0; i < form->s->keys; i++) {
- const KEY* key = &form->key_info[i];
-
- if (key->flags & HA_FULLTEXT) {
- *flags2 |= DICT_TF2_FTS;
-
- /* We don't support FTS indexes in temporary
- tables. */
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
-
- my_error(ER_INNODB_NO_FT_TEMP_TABLE, MYF(0));
- DBUG_RETURN(false);
- }
-
- if (key->flags & HA_USES_PARSER) {
- my_error(ER_INNODB_NO_FT_USES_PARSER, MYF(0));
- DBUG_RETURN(false);
- }
-
- if (fts_doc_id_index_bad) {
- goto index_bad;
- }
- }
-
- if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) {
- continue;
- }
-
- /* Do a pre-check on FTS DOC ID index */
- if (!(key->flags & HA_NOSAME)
- || strcmp(key->name, FTS_DOC_ID_INDEX_NAME)
- || strcmp(key->key_part[0].field->field_name.str,
- FTS_DOC_ID_COL_NAME)) {
- fts_doc_id_index_bad = key->name;
- }
-
- if (fts_doc_id_index_bad && (*flags2 & DICT_TF2_FTS)) {
-index_bad:
- my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
- fts_doc_id_index_bad);
- DBUG_RETURN(false);
- }
- }
-
- row_format = form->s->row_type;
-
- if (create_info->key_block_size) {
- /* The requested compressed page size (key_block_size)
- is given in kilobytes. If it is a valid number, store
- that value as the number of log2 shifts from 512 in
- zip_ssize. Zero means it is not compressed. */
- ulint zssize; /* Zip Shift Size */
- ulint kbsize; /* Key Block Size */
- for (zssize = kbsize = 1;
- zssize <= ut_min(UNIV_PAGE_SSIZE_MAX,
- PAGE_ZIP_SSIZE_MAX);
- zssize++, kbsize <<= 1) {
- if (kbsize == create_info->key_block_size) {
- zip_ssize = zssize;
- break;
- }
- }
-
- /* Make sure compressed row format is allowed. */
- if (!use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE requires"
- " innodb_file_per_table.");
- zip_allowed = FALSE;
- }
-
- if (file_format_allowed < UNIV_FORMAT_B) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: KEY_BLOCK_SIZE requires"
- " innodb_file_format > Antelope.");
- zip_allowed = FALSE;
- }
-
- if (!zip_allowed
- || zssize > ut_min(UNIV_PAGE_SSIZE_MAX,
- PAGE_ZIP_SSIZE_MAX)) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ignoring KEY_BLOCK_SIZE=%lu.",
- create_info->key_block_size);
- }
- }
-
- if (zip_ssize && zip_allowed) {
- /* if ROW_FORMAT is set to default,
- automatically change it to COMPRESSED.*/
- if (row_format == ROW_TYPE_DEFAULT) {
- row_format = ROW_TYPE_COMPRESSED;
- } else if (row_format != ROW_TYPE_COMPRESSED) {
- /* ROW_FORMAT other than COMPRESSED
- ignores KEY_BLOCK_SIZE. It does not
- make sense to reject conflicting
- KEY_BLOCK_SIZE and ROW_FORMAT, because
- such combinations can be obtained
- with ALTER TABLE anyway. */
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ignoring KEY_BLOCK_SIZE=%lu"
- " unless ROW_FORMAT=COMPRESSED.",
- create_info->key_block_size);
- zip_allowed = FALSE;
- }
- } else {
- /* zip_ssize == 0 means no KEY_BLOCK_SIZE.*/
- if (row_format == ROW_TYPE_COMPRESSED && zip_allowed) {
- /* ROW_FORMAT=COMPRESSED without KEY_BLOCK_SIZE
- implies half the maximum KEY_BLOCK_SIZE(*1k) or
- UNIV_PAGE_SIZE, whichever is less. */
- zip_ssize = ut_min(UNIV_PAGE_SSIZE_MAX,
- PAGE_ZIP_SSIZE_MAX) - 1;
- }
- }
-
- /* Validate the row format. Correct it if necessary */
- switch (row_format) {
- case ROW_TYPE_REDUNDANT:
- innodb_row_format = REC_FORMAT_REDUNDANT;
- break;
-
- case ROW_TYPE_COMPRESSED:
- case ROW_TYPE_DYNAMIC:
- if (!use_tablespace) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ROW_FORMAT=%s requires"
- " innodb_file_per_table.",
- get_row_format_name(row_format));
- } else if (file_format_allowed == UNIV_FORMAT_A) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: ROW_FORMAT=%s requires"
- " innodb_file_format > Antelope.",
- get_row_format_name(row_format));
- } else {
- switch(row_format) {
- case ROW_TYPE_COMPRESSED:
- innodb_row_format = REC_FORMAT_COMPRESSED;
- break;
- case ROW_TYPE_DYNAMIC:
- innodb_row_format = REC_FORMAT_DYNAMIC;
- break;
- default:
- /* Not possible, avoid compiler warning */
- break;
- }
- break; /* Correct row_format */
- }
- zip_allowed = FALSE;
- /* Set ROW_FORMAT = COMPACT */
- /* fall through */
- case ROW_TYPE_NOT_USED:
- case ROW_TYPE_FIXED:
- case ROW_TYPE_PAGE:
- default:
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: assuming ROW_FORMAT=COMPACT.");
- /* fall through */
- case ROW_TYPE_DEFAULT:
- /* If we fell through, set row format to Compact. */
- row_format = ROW_TYPE_COMPACT;
- case ROW_TYPE_COMPACT:
- break;
- }
-
- /* Don't support compressed table when page size > 16k. */
- if (zip_allowed && zip_ssize && UNIV_PAGE_SIZE > UNIV_PAGE_SIZE_DEF) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_ILLEGAL_HA_CREATE_OPTION,
- "InnoDB: Cannot create a COMPRESSED table"
- " when innodb_page_size > 16k."
- " Assuming ROW_FORMAT=COMPACT.");
- zip_allowed = FALSE;
- }
-
- /* Set the table flags */
- if (!zip_allowed) {
- zip_ssize = 0;
- }
-
- use_data_dir = use_tablespace
- && ((create_info->data_file_name != NULL)
- && !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
-
- /* Set up table dictionary flags */
- dict_tf_set(flags,
- innodb_row_format,
- zip_ssize,
- use_data_dir,
- options->page_compressed,
- options->page_compression_level == 0 ?
- default_compression_level : options->page_compression_level,
- options->atomic_writes);
-
- if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
- *flags2 |= DICT_TF2_TEMPORARY;
- }
-
- if (use_tablespace) {
- *flags2 |= DICT_TF2_USE_TABLESPACE;
- }
-
- /* Set the flags2 when create table or alter tables */
- *flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
- DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- *flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
-
- DBUG_RETURN(true);
-}
-
-/*****************************************************************//**
-Check engine specific table options not handled by SQL-parser.
-@return NULL if valid, string if not */
-UNIV_INTERN
-const char*
-ha_innobase::check_table_options(
- THD *thd, /*!< in: thread handle */
- TABLE* table, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info, /*!< in: more information of the
- created table, contains also the
- create statement string */
- const bool use_tablespace, /*!< in: use file par table */
- const ulint file_format)
-{
- enum row_type row_format = table->s->row_type;
- ha_table_option_struct *options= table->s->option_struct;
- atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
- fil_encryption_t encrypt = (fil_encryption_t)options->encryption;
-
- if (encrypt != FIL_ENCRYPTION_DEFAULT && !use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ENCRYPTED requires innodb_file_per_table");
- return "ENCRYPTED";
- }
-
- if (encrypt == FIL_ENCRYPTION_OFF && srv_encrypt_tables == 2) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ENCRYPTED=OFF cannot be used when innodb_encrypt_tables=FORCE");
- return "ENCRYPTED";
- }
-
- /* Check page compression requirements */
- if (options->page_compressed) {
-
- if (row_format == ROW_TYPE_COMPRESSED) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED table can't have"
- " ROW_TYPE=COMPRESSED");
- return "PAGE_COMPRESSED";
- }
-
- if (row_format == ROW_TYPE_REDUNDANT) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED table can't have"
- " ROW_TYPE=REDUNDANT");
- return "PAGE_COMPRESSED";
- }
-
- if (!use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED requires"
- " innodb_file_per_table.");
- return "PAGE_COMPRESSED";
- }
-
- if (file_format < UNIV_FORMAT_B) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED requires"
- " innodb_file_format > Antelope.");
- return "PAGE_COMPRESSED";
- }
-
- if (create_info->key_block_size) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSED table can't have"
- " key_block_size");
- return "PAGE_COMPRESSED";
- }
- }
-
- /* Check page compression level requirements, some of them are
- already checked above */
- if (options->page_compression_level != 0) {
- if (options->page_compressed == false) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: PAGE_COMPRESSION_LEVEL requires"
- " PAGE_COMPRESSED");
- return "PAGE_COMPRESSION_LEVEL";
- }
-
- if (options->page_compression_level < 1 || options->page_compression_level > 9) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
- " Valid values are [1, 2, 3, 4, 5, 6, 7, 8, 9]",
- options->page_compression_level);
- return "PAGE_COMPRESSION_LEVEL";
- }
- }
-
- /* If encryption is set up make sure that used key_id is found */
- if (encrypt == FIL_ENCRYPTION_ON ||
- (encrypt == FIL_ENCRYPTION_DEFAULT && srv_encrypt_tables)) {
- if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ENCRYPTION_KEY_ID %u not available",
- (uint)options->encryption_key_id
- );
- return "ENCRYPTION_KEY_ID";
-
- }
- }
-
- /* Ignore nondefault key_id if encryption is set off */
- if (encrypt == FIL_ENCRYPTION_OFF &&
- options->encryption_key_id != THDVAR(thd, default_encryption_key_id)) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: Ignored ENCRYPTION_KEY_ID %u when encryption is disabled",
- (uint)options->encryption_key_id
- );
- options->encryption_key_id = FIL_DEFAULT_ENCRYPTION_KEY;
- }
-
- /* If default encryption is used make sure that used kay is found
- from key file. */
- if (encrypt == FIL_ENCRYPTION_DEFAULT &&
- !srv_encrypt_tables &&
- options->encryption_key_id != FIL_DEFAULT_ENCRYPTION_KEY) {
- if (!encryption_key_id_exists((unsigned int)options->encryption_key_id)) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ENCRYPTION_KEY_ID %u not available",
- (uint)options->encryption_key_id
- );
- return "ENCRYPTION_KEY_ID";
-
- }
- }
-
- /* Check atomic writes requirements */
- if (awrites == ATOMIC_WRITES_ON ||
- (awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
- if (!use_tablespace) {
- push_warning(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_WRONG_CREATE_OPTION,
- "InnoDB: ATOMIC_WRITES requires"
- " innodb_file_per_table.");
- return "ATOMIC_WRITES";
- }
- }
-
- return 0;
-}
-
-/*****************************************************************//**
-Creates a new table to an InnoDB database.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::create(
-/*================*/
- const char* name, /*!< in: table name */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info) /*!< in: more information of the
- created table, contains also the
- create statement string */
-{
- int error;
- trx_t* parent_trx;
- trx_t* trx;
- int primary_key_no;
- uint i;
- char norm_name[FN_REFLEN]; /* {database}/{tablename} */
- char temp_path[FN_REFLEN]; /* absolute path of temp frm */
- char remote_path[FN_REFLEN]; /* absolute path of table */
- THD* thd = ha_thd();
- ib_int64_t auto_inc_value;
-
- /* Cache the global variable "srv_file_per_table" to a local
- variable before using it. Note that "srv_file_per_table"
- is not under dict_sys mutex protection, and could be changed
- while creating the table. So we read the current value here
- and make all further decisions based on this. */
- bool use_tablespace = srv_file_per_table;
- const ulint file_format = srv_file_format;
-
- /* Zip Shift Size - log2 - 9 of compressed page size,
- zero for uncompressed */
- ulint flags;
- ulint flags2;
- dict_table_t* innobase_table = NULL;
-
- const char* stmt;
- size_t stmt_len;
- /* Cache table options */
- ha_table_option_struct *options= form->s->option_struct;
- fil_encryption_t encrypt = (fil_encryption_t)options->encryption;
- uint key_id = (uint)options->encryption_key_id;
-
- DBUG_ENTER("ha_innobase::create");
-
- DBUG_ASSERT(thd != NULL);
- DBUG_ASSERT(create_info != NULL);
-
- if (form->s->stored_fields > REC_MAX_N_USER_FIELDS) {
- DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
- } else if (high_level_read_only) {
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- }
-
- /* Create the table definition in InnoDB */
-
- /* Validate table options not handled by the SQL-parser */
- if(check_table_options(thd, form, create_info, use_tablespace,
- file_format)) {
- DBUG_RETURN(HA_WRONG_CREATE_OPTION);
- }
-
- /* Validate create options if innodb_strict_mode is set. */
- if (create_options_are_invalid(
- thd, form, create_info, use_tablespace)) {
- DBUG_RETURN(HA_WRONG_CREATE_OPTION);
- }
-
- if (!innobase_table_flags(form, create_info,
- thd, use_tablespace,
- &flags, &flags2)) {
- DBUG_RETURN(-1);
- }
-
- error = parse_table_name(name, create_info, flags, flags2,
- norm_name, temp_path, remote_path);
- if (error) {
- DBUG_RETURN(error);
- }
-
- /* Look for a primary key */
- primary_key_no = (form->s->primary_key != MAX_KEY ?
- (int) form->s->primary_key :
- -1);
-
- /* Our function innobase_get_mysql_key_number_for_index assumes
- the primary key is always number 0, if it exists */
- ut_a(primary_key_no == -1 || primary_key_no == 0);
-
- /* Check for name conflicts (with reserved name) for
- any user indices to be created. */
- if (innobase_index_name_is_reserved(thd, form->key_info,
- form->s->keys)) {
- DBUG_RETURN(-1);
- }
-
- if (row_is_magic_monitor_table(norm_name)) {
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_WRONG_COMMAND,
- "Using the table name %s to enable "
- "diagnostic output is deprecated "
- "and may be removed in future releases. "
- "Use INFORMATION_SCHEMA or "
- "PERFORMANCE_SCHEMA tables or "
- "SET GLOBAL innodb_status_output=ON.",
- dict_remove_db_name(norm_name));
-
- /* Limit innodb monitor access to users with PROCESS privilege.
- See http://bugs.mysql.com/32710 why we chose PROCESS. */
- if (check_global_access(thd, PROCESS_ACL)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- trx = innobase_trx_allocate(thd);
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
- }
-
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during a table create operation.
- Drop table etc. do this latching in row0mysql.cc. */
-
- row_mysql_lock_data_dictionary(trx);
-
- error = create_table_def(trx, form, norm_name, temp_path,
- remote_path, flags, flags2, encrypt, key_id);
- if (error) {
- goto cleanup;
- }
-
- /* Create the keys */
-
- if (form->s->keys == 0 || primary_key_no == -1) {
- /* Create an index which is used as the clustered index;
- order the rows by their row id which is internally generated
- by InnoDB */
-
- error = create_clustered_index_when_no_primary(
- trx, flags, norm_name);
- if (error) {
- goto cleanup;
- }
- }
-
- if (primary_key_no != -1) {
- /* In InnoDB the clustered index must always be created
- first */
- if ((error = create_index(trx, form, flags, norm_name,
- (uint) primary_key_no))) {
- goto cleanup;
- }
- }
-
- /* Create the ancillary tables that are common to all FTS indexes on
- this table. */
- if (flags2 & DICT_TF2_FTS) {
- enum fts_doc_id_index_enum ret;
-
- innobase_table = dict_table_open_on_name(
- norm_name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
-
- ut_a(innobase_table);
-
- /* Check whether there already exists FTS_DOC_ID_INDEX */
- ret = innobase_fts_check_doc_id_index_in_def(
- form->s->keys, form->key_info);
-
- switch (ret) {
- case FTS_INCORRECT_DOC_ID_INDEX:
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_NAME_FOR_INDEX,
- " InnoDB: Index name %s is reserved"
- " for the unique index on"
- " FTS_DOC_ID column for FTS"
- " Document ID indexing"
- " on table %s. Please check"
- " the index definition to"
- " make sure it is of correct"
- " type\n",
- FTS_DOC_ID_INDEX_NAME,
- innobase_table->name);
-
- if (innobase_table->fts) {
- fts_free(innobase_table);
- }
-
- dict_table_close(innobase_table, TRUE, FALSE);
- my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
- FTS_DOC_ID_INDEX_NAME);
- error = -1;
- goto cleanup;
- case FTS_EXIST_DOC_ID_INDEX:
- case FTS_NOT_EXIST_DOC_ID_INDEX:
- break;
- }
-
- dberr_t err = fts_create_common_tables(
- trx, innobase_table, norm_name,
- (ret == FTS_EXIST_DOC_ID_INDEX));
-
- error = convert_error_code_to_mysql(err, 0, NULL);
-
- dict_table_close(innobase_table, TRUE, FALSE);
-
- if (error) {
- goto cleanup;
- }
- }
-
- for (i = 0; i < form->s->keys; i++) {
-
- if (i != static_cast<uint>(primary_key_no)) {
-
- if ((error = create_index(trx, form, flags,
- norm_name, i))) {
- goto cleanup;
- }
- }
- }
-
- /* Cache all the FTS indexes on this table in the FTS specific
- structure. They are used for FTS indexed column update handling. */
- if (flags2 & DICT_TF2_FTS) {
- fts_t* fts = innobase_table->fts;
-
- ut_a(fts != NULL);
-
- dict_table_get_all_fts_indexes(innobase_table, fts->indexes);
- }
-
- stmt = innobase_get_stmt(thd, &stmt_len);
-
- if (stmt) {
- dberr_t err = row_table_add_foreign_constraints(
- trx, stmt, stmt_len, norm_name,
- create_info->options & HA_LEX_CREATE_TMP_TABLE);
-
- switch (err) {
-
- case DB_PARENT_NO_INDEX:
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_CANNOT_ADD_FOREIGN,
- "Create table '%s' with foreign key constraint"
- " failed. There is no index in the referenced"
- " table where the referenced columns appear"
- " as the first columns.\n", norm_name);
- break;
-
- case DB_CHILD_NO_INDEX:
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_CANNOT_ADD_FOREIGN,
- "Create table '%s' with foreign key constraint"
- " failed. There is no index in the referencing"
- " table where referencing columns appear"
- " as the first columns.\n", norm_name);
- break;
- default:
- break;
- }
-
- error = convert_error_code_to_mysql(err, flags, NULL);
-
- if (error) {
- goto cleanup;
- }
- }
-
- innobase_commit_low(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- innobase_table = dict_table_open_on_name(
- norm_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
-
- DBUG_ASSERT(innobase_table != 0);
-
- innobase_copy_frm_flags_from_create_info(innobase_table, create_info);
-
- dict_stats_update(innobase_table, DICT_STATS_EMPTY_TABLE);
-
- if (innobase_table) {
- /* We update the highest file format in the system table
- space, if this table has higher file format setting. */
-
- trx_sys_file_format_max_upgrade(
- (const char**) &innobase_file_format_max,
- dict_table_get_format(innobase_table));
- }
-
- /* Load server stopword into FTS cache */
- if (flags2 & DICT_TF2_FTS) {
- if (!innobase_fts_load_stopword(innobase_table, NULL, thd)) {
- dict_table_close(innobase_table, FALSE, FALSE);
- srv_active_wake_master_thread();
- trx_free_for_mysql(trx);
- DBUG_RETURN(-1);
- }
- }
-
- /* Note: We can't call update_thd() as prebuilt will not be
- setup at this stage and so we use thd. */
-
- /* We need to copy the AUTOINC value from the old table if
- this is an ALTER|OPTIMIZE TABLE or CREATE INDEX because CREATE INDEX
- does a table copy too. If query was one of :
-
- CREATE TABLE ...AUTO_INCREMENT = x; or
- ALTER TABLE...AUTO_INCREMENT = x; or
- OPTIMIZE TABLE t; or
- CREATE INDEX x on t(...);
-
- Find out a table definition from the dictionary and get
- the current value of the auto increment field. Set a new
- value to the auto increment field if the value is greater
- than the maximum value in the column. */
-
- if (((create_info->used_fields & HA_CREATE_USED_AUTO)
- || thd_sql_command(thd) == SQLCOM_ALTER_TABLE
- || thd_sql_command(thd) == SQLCOM_OPTIMIZE
- || thd_sql_command(thd) == SQLCOM_CREATE_INDEX)
- && create_info->auto_increment_value > 0) {
-
- auto_inc_value = create_info->auto_increment_value;
-
- dict_table_autoinc_lock(innobase_table);
- dict_table_autoinc_initialize(innobase_table, auto_inc_value);
- dict_table_autoinc_unlock(innobase_table);
- }
-
- dict_table_close(innobase_table, FALSE, FALSE);
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(0);
-
-cleanup:
- trx_rollback_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
-Discards or imports an InnoDB tablespace.
-@return 0 == success, -1 == error */
-UNIV_INTERN
-int
-ha_innobase::discard_or_import_tablespace(
-/*======================================*/
- my_bool discard) /*!< in: TRUE if discard, else import */
-{
- dberr_t err;
- dict_table_t* dict_table;
-
- DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
-
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- if (high_level_read_only) {
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- }
-
- if (UNIV_UNLIKELY(prebuilt->trx->fake_changes)) {
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
- }
-
- dict_table = prebuilt->table;
-
- if (dict_table->space == TRX_SYS_SPACE) {
-
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_IN_SYSTEM_TABLESPACE,
- table->s->table_name.str);
-
- DBUG_RETURN(HA_ERR_TABLE_NEEDS_UPGRADE);
- }
-
- trx_start_if_not_started(prebuilt->trx);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads. */
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- /* Obtain an exclusive lock on the table. */
- err = row_mysql_lock_table(
- prebuilt->trx, dict_table, LOCK_X,
- discard ? "setting table lock for DISCARD TABLESPACE"
- : "setting table lock for IMPORT TABLESPACE");
-
- if (err != DB_SUCCESS) {
- /* unable to lock the table: do nothing */
- } else if (discard) {
-
- /* Discarding an already discarded tablespace should be an
- idempotent operation. Also, if the .ibd file is missing the
- user may want to set the DISCARD flag in order to IMPORT
- a new tablespace. */
-
- if (!dict_table->is_readable()) {
- ib_senderrf(
- prebuilt->trx->mysql_thd,
- IB_LOG_LEVEL_WARN, ER_TABLESPACE_MISSING,
- table->s->table_name.str);
- }
-
- err = row_discard_tablespace_for_mysql(
- dict_table->name, prebuilt->trx);
-
- } else if (dict_table->is_readable()) {
- /* Commit the transaction in order to
- release the table lock. */
- trx_commit_for_mysql(prebuilt->trx);
-
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_EXISTS, table->s->table_name.str);
-
- DBUG_RETURN(HA_ERR_TABLE_EXIST);
- } else {
- err = row_import_for_mysql(dict_table, prebuilt);
-
- if (err == DB_SUCCESS) {
-
- if (table->found_next_number_field) {
- dict_table_autoinc_lock(dict_table);
- innobase_initialize_autoinc();
- dict_table_autoinc_unlock(dict_table);
- }
-
- info(HA_STATUS_TIME
- | HA_STATUS_CONST
- | HA_STATUS_VARIABLE
- | HA_STATUS_AUTO);
-
- fil_crypt_set_encrypt_tables(srv_encrypt_tables);
- }
- }
-
- /* Commit the transaction in order to release the table lock. */
- trx_commit_for_mysql(prebuilt->trx);
-
- if (err == DB_SUCCESS && !discard
- && dict_stats_is_persistent_enabled(dict_table)) {
- dberr_t ret;
-
- /* Adjust the persistent statistics. */
- ret = dict_stats_update(dict_table,
- DICT_STATS_RECALC_PERSISTENT);
-
- if (ret != DB_SUCCESS) {
- push_warning_printf(
- ha_thd(),
- Sql_condition::WARN_LEVEL_WARN,
- ER_ALTER_INFO,
- "Error updating stats for table '%s'"
- " after table rebuild: %s",
- dict_table->name, ut_strerr(ret));
- }
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(err, dict_table->flags, NULL));
-}
-
-/*****************************************************************//**
-Deletes all rows of an InnoDB table.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::truncate()
-/*===================*/
-{
- dberr_t err;
- int error;
-
- DBUG_ENTER("ha_innobase::truncate");
-
- if (high_level_read_only) {
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created, and update prebuilt->trx */
-
- update_thd(ha_thd());
-
- DBUG_ASSERT(share->ib_table == prebuilt->table);
-
- if (UNIV_UNLIKELY(prebuilt->trx->fake_changes)) {
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
- }
-
- if (!trx_is_started(prebuilt->trx)) {
- ++prebuilt->trx->will_lock;
- }
- /* Truncate the table in InnoDB */
-
- err = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
-
- switch (err) {
-
- case DB_TABLESPACE_DELETED:
- case DB_TABLESPACE_NOT_FOUND:
- ib_senderrf(
- prebuilt->trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- (err == DB_TABLESPACE_DELETED ?
- ER_TABLESPACE_DISCARDED : ER_TABLESPACE_MISSING),
- table->s->table_name.str);
- table->status = STATUS_NOT_FOUND;
- error = HA_ERR_NO_SUCH_TABLE;
- break;
-
- default:
- error = convert_error_code_to_mysql(
- err, prebuilt->table->flags,
- prebuilt->trx->mysql_thd);
- table->status = STATUS_NOT_FOUND;
- break;
- }
- DBUG_RETURN(error);
-}
-
-/*****************************************************************//**
-Drops a table from an InnoDB database. Before calling this function,
-MySQL calls innobase_commit to commit the transaction of the current user.
-Then the current user cannot have locks set on the table. Drop table
-operation inside InnoDB will remove all locks any user has on the table
-inside InnoDB.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::delete_table(
-/*======================*/
- const char* name) /*!< in: table name */
-{
- ulint name_len;
- dberr_t err;
- trx_t* parent_trx;
- trx_t* trx;
- THD* thd = ha_thd();
- char norm_name[FN_REFLEN];
-
- DBUG_ENTER("ha_innobase::delete_table");
-
- DBUG_EXECUTE_IF(
- "test_normalize_table_name_low",
- test_normalize_table_name_low();
- );
- DBUG_EXECUTE_IF(
- "test_ut_format_name",
- test_ut_format_name();
- );
-
- /* Strangely, MySQL passes the table name without the '.frm'
- extension, in contrast to ::create */
- normalize_table_name(norm_name, name);
-
- if (srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) {
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- } else if (row_is_magic_monitor_table(norm_name)
- && check_global_access(thd, PROCESS_ACL)) {
- DBUG_RETURN(HA_ERR_GENERIC);
- }
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- trx = innobase_trx_allocate(thd);
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
- }
-
- name_len = strlen(name);
-
- ut_a(name_len < 1000);
-
- /* Either the transaction is already flagged as a locking transaction
- or it hasn't been started yet. */
-
- ut_a(!trx_is_started(trx) || trx->will_lock > 0);
-
- /* We are doing a DDL operation. */
- ++trx->will_lock;
- trx->ddl = true;
-
- /* Drop the table in InnoDB */
- err = row_drop_table_for_mysql(
- norm_name, trx, thd_sql_command(thd) == SQLCOM_DROP_DB,
- FALSE);
-
-
- if (err == DB_TABLE_NOT_FOUND
- && innobase_get_lower_case_table_names() == 1) {
- char* is_part = NULL;
-#ifdef __WIN__
- is_part = strstr(norm_name, "#p#");
-#else
- is_part = strstr(norm_name, "#P#");
-#endif /* __WIN__ */
-
- if (is_part) {
- char par_case_name[FN_REFLEN];
-
-#ifndef __WIN__
- /* Check for the table using lower
- case name, including the partition
- separator "P" */
- strcpy(par_case_name, norm_name);
- innobase_casedn_str(par_case_name);
-#else
- /* On Windows platfrom, check
- whether there exists table name in
- system table whose name is
- not being normalized to lower case */
- normalize_table_name_low(
- par_case_name, name, FALSE);
-#endif
- err = row_drop_table_for_mysql(
- par_case_name, trx,
- thd_sql_command(thd) == SQLCOM_DROP_DB,
- FALSE);
- }
- }
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- innobase_commit_low(trx);
-
- trx_free_for_mysql(trx);
-
- DBUG_RETURN(convert_error_code_to_mysql(err, 0, NULL));
-}
-
-/*****************************************************************//**
-Defragment table.
-@return error number */
-UNIV_INTERN
-int
-ha_innobase::defragment_table(
-/*==========================*/
- const char* name, /*!< in: table name */
- const char* index_name, /*!< in: index name */
- bool async) /*!< in: whether to wait until finish */
-{
- char norm_name[FN_REFLEN];
- dict_table_t* table = NULL;
- dict_index_t* index = NULL;
- ibool one_index = (index_name != 0);
- int ret = 0;
- dberr_t err = DB_SUCCESS;
-
- if (!srv_defragment) {
- return ER_FEATURE_DISABLED;
- }
-
- normalize_table_name(norm_name, name);
-
- table = dict_table_open_on_name(norm_name, FALSE,
- FALSE, DICT_ERR_IGNORE_NONE);
-
- for (index = dict_table_get_first_index(table); index;
- index = dict_table_get_next_index(index)) {
-
- if (dict_index_is_corrupted(index)) {
- continue;
- }
-
- if (index->page == FIL_NULL) {
- /* Do not defragment auxiliary tables related
- to FULLTEXT INDEX. */
- ut_ad(index->type & DICT_FTS);
- continue;
- }
-
- if (one_index && strcasecmp(index_name, index->name) != 0) {
- continue;
- }
-
- if (btr_defragment_find_index(index)) {
- // We borrow this error code. When the same index is
- // already in the defragmentation queue, issue another
- // defragmentation only introduces overhead. We return
- // an error here to let the user know this is not
- // necessary. Note that this will fail a query that's
- // trying to defragment a full table if one of the
- // indicies in that table is already in defragmentation.
- // We choose this behavior so user is aware of this
- // rather than silently defragment other indicies of
- // that table.
- ret = ER_SP_ALREADY_EXISTS;
- break;
- }
-
- os_event_t event = btr_defragment_add_index(index, async, &err);
-
- if (err != DB_SUCCESS) {
- push_warning_printf(
- current_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_NO_SUCH_TABLE,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue checking table.",
- index->table->name);
-
- ret = convert_error_code_to_mysql(err, 0, current_thd);
- break;
- }
-
- if (!async && event) {
- while(os_event_wait_time(event, 1000000)) {
- if (thd_killed(current_thd)) {
- btr_defragment_remove_index(index);
- ret = ER_QUERY_INTERRUPTED;
- break;
- }
- }
- os_event_free(event);
- }
-
- if (ret) {
- break;
- }
-
- if (one_index) {
- one_index = FALSE;
- break;
- }
- }
-
- dict_table_close(table, FALSE, FALSE);
-
- if (ret == 0 && one_index) {
- ret = ER_NO_SUCH_INDEX;
- }
-
- return ret;
-}
-
-/*****************************************************************//**
-Removes all tables in the named database inside InnoDB. */
-static
-void
-innobase_drop_database(
-/*===================*/
- handlerton* hton, /*!< in: handlerton of Innodb */
- char* path) /*!< in: database path; inside InnoDB the name
- of the last directory in the path is used as
- the database name: for example, in
- 'mysql/data/test' the database name is 'test' */
-{
- ulint len = 0;
- trx_t* trx;
- char* ptr;
- char* namebuf;
- THD* thd = current_thd;
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (srv_read_only_mode) {
- return;
- }
-
- /* In the Windows plugin, thd = current_thd is always NULL */
- if (thd) {
- trx_t* parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT
- query, release possible adaptive hash latch to avoid
- deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
- }
-
- ptr = strend(path) - 2;
-
- while (ptr >= path && *ptr != '\\' && *ptr != '/') {
- ptr--;
- len++;
- }
-
- ptr++;
- namebuf = (char*) my_malloc((uint) len + 2, MYF(0));
-
- memcpy(namebuf, ptr, len);
- namebuf[len] = '/';
- namebuf[len + 1] = '\0';
-#ifdef __WIN__
- innobase_casedn_str(namebuf);
-#endif
- trx = innobase_trx_allocate(thd);
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- my_free(namebuf);
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
- return; /* ignore */
- }
-
- /* Either the transaction is already flagged as a locking transaction
- or it hasn't been started yet. */
-
- ut_a(!trx_is_started(trx) || trx->will_lock > 0);
-
- /* We are doing a DDL operation. */
- ++trx->will_lock;
-
- row_drop_database_for_mysql(namebuf, trx);
-
- my_free(namebuf);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
-}
-
-/*********************************************************************//**
-Renames an InnoDB table.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-innobase_rename_table(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- const char* from, /*!< in: old name of the table */
- const char* to) /*!< in: new name of the table */
-{
- dberr_t error;
- char norm_to[FN_REFLEN];
- char norm_from[FN_REFLEN];
-
- DBUG_ENTER("innobase_rename_table");
- DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-
- ut_ad(!srv_read_only_mode);
-
- normalize_table_name(norm_to, to);
- normalize_table_name(norm_from, from);
-
- DEBUG_SYNC_C("innodb_rename_table_ready");
-
- trx_start_if_not_started(trx);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations. */
-
- row_mysql_lock_data_dictionary(trx);
-
- /* Transaction must be flagged as a locking transaction or it hasn't
- been started yet. */
-
- ut_a(trx->will_lock > 0);
-
- error = row_rename_table_for_mysql(
- norm_from, norm_to, trx, TRUE);
-
- if (error != DB_SUCCESS) {
- if (error == DB_TABLE_NOT_FOUND
- && innobase_get_lower_case_table_names() == 1) {
- char* is_part = NULL;
-#ifdef __WIN__
- is_part = strstr(norm_from, "#p#");
-#else
- is_part = strstr(norm_from, "#P#");
-#endif /* __WIN__ */
-
- if (is_part) {
- char par_case_name[FN_REFLEN];
-#ifndef __WIN__
- /* Check for the table using lower
- case name, including the partition
- separator "P" */
- strcpy(par_case_name, norm_from);
- innobase_casedn_str(par_case_name);
-#else
- /* On Windows platfrom, check
- whether there exists table name in
- system table whose name is
- not being normalized to lower case */
- normalize_table_name_low(
- par_case_name, from, FALSE);
-#endif
- trx_start_if_not_started(trx);
- error = row_rename_table_for_mysql(
- par_case_name, norm_to, trx, TRUE);
- }
- }
-
- if (error == DB_SUCCESS) {
-#ifndef __WIN__
- sql_print_warning("Rename partition table %s "
- "succeeds after converting to lower "
- "case. The table may have "
- "been moved from a case "
- "in-sensitive file system.\n",
- norm_from);
-#else
- sql_print_warning("Rename partition table %s "
- "succeeds after skipping the step to "
- "lower case the table name. "
- "The table may have been "
- "moved from a case sensitive "
- "file system.\n",
- norm_from);
-#endif /* __WIN__ */
- }
- }
-
- row_mysql_unlock_data_dictionary(trx);
-
- /* Flush the log to reduce probability that the .frm
- files and the InnoDB data dictionary get out-of-sync
- if the user runs with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************//**
-Renames an InnoDB table.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::rename_table(
-/*======================*/
- const char* from, /*!< in: old name of the table */
- const char* to) /*!< in: new name of the table */
-{
- trx_t* trx;
- dberr_t error;
- trx_t* parent_trx;
- THD* thd = ha_thd();
-
- DBUG_ENTER("ha_innobase::rename_table");
-
- if (high_level_read_only) {
- ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- }
-
- /* Get the transaction associated with the current thd, or create one
- if not yet created */
-
- parent_trx = check_trx_exists(thd);
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(parent_trx);
-
- trx = innobase_trx_allocate(thd);
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
- DBUG_RETURN(HA_ERR_WRONG_COMMAND);
- }
-
- /* We are doing a DDL operation. */
- ++trx->will_lock;
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
- error = innobase_rename_table(trx, from, to);
-
- DEBUG_SYNC(thd, "after_innobase_rename_table");
-
- innobase_commit_low(trx);
- trx_free_for_mysql(trx);
-
- if (error == DB_SUCCESS) {
- char norm_from[MAX_FULL_NAME_LEN];
- char norm_to[MAX_FULL_NAME_LEN];
- char errstr[512];
- dberr_t ret;
-
- normalize_table_name(norm_from, from);
- normalize_table_name(norm_to, to);
-
- ret = dict_stats_rename_table(norm_from, norm_to,
- errstr, sizeof(errstr));
-
- if (ret != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: %s\n", errstr);
-
- push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_LOCK_WAIT_TIMEOUT, errstr);
- }
- }
-
- /* Add a special case to handle the Duplicated Key error
- and return DB_ERROR instead.
- This is to avoid a possible SIGSEGV error from mysql error
- handling code. Currently, mysql handles the Duplicated Key
- error by re-entering the storage layer and getting dup key
- info by calling get_dup_key(). This operation requires a valid
- table handle ('row_prebuilt_t' structure) which could no
- longer be available in the error handling stage. The suggested
- solution is to report a 'table exists' error message (since
- the dup key error here is due to an existing table whose name
- is the one we are trying to rename to) and return the generic
- error code. */
- if (error == DB_DUPLICATE_KEY) {
- my_error(ER_TABLE_EXISTS_ERROR, MYF(0), to);
-
- error = DB_ERROR;
- }
-
- DBUG_RETURN(convert_error_code_to_mysql(error, 0, NULL));
-}
-
-/*********************************************************************//**
-Estimates the number of index records in a range.
-@return estimated number of rows */
-UNIV_INTERN
-ha_rows
-ha_innobase::records_in_range(
-/*==========================*/
- uint keynr, /*!< in: index number */
- key_range *min_key, /*!< in: start key value of the
- range, may also be 0 */
- key_range *max_key) /*!< in: range end key val, may
- also be 0 */
-{
- KEY* key;
- dict_index_t* index;
- dtuple_t* range_start;
- dtuple_t* range_end;
- ib_int64_t n_rows;
- ulint mode1;
- ulint mode2;
- mem_heap_t* heap;
-
- DBUG_ENTER("records_in_range");
-
- ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
-
- prebuilt->trx->op_info = (char*)"estimating records in index range";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- active_index = keynr;
-
- key = table->key_info + active_index;
-
- index = innobase_get_index(keynr);
-
- /* There exists possibility of not being able to find requested
- index due to inconsistency between MySQL and InoDB dictionary info.
- Necessary message should have been printed in innobase_get_index() */
- if (dict_table_is_discarded(prebuilt->table)) {
- n_rows = HA_POS_ERROR;
- goto func_exit;
- }
- if (UNIV_UNLIKELY(!index)) {
- n_rows = HA_POS_ERROR;
- goto func_exit;
- }
- if (dict_index_is_corrupted(index)) {
- n_rows = HA_ERR_INDEX_CORRUPT;
- goto func_exit;
- }
- if (UNIV_UNLIKELY(!row_merge_is_index_usable(prebuilt->trx, index))) {
- n_rows = HA_ERR_TABLE_DEF_CHANGED;
- goto func_exit;
- }
-
- heap = mem_heap_create(2 * (key->ext_key_parts * sizeof(dfield_t)
- + sizeof(dtuple_t)));
-
- range_start= dtuple_create(heap, key->ext_key_parts);
- dict_index_copy_types(range_start, index, key->ext_key_parts);
-
- range_end= dtuple_create(heap, key->ext_key_parts);
- dict_index_copy_types(range_end, index, key->ext_key_parts);
-
- row_sel_convert_mysql_key_to_innobase(
- range_start,
- prebuilt->srch_key_val1,
- prebuilt->srch_key_val_len,
- index,
- (byte*) (min_key ? min_key->key :
- (const uchar*) 0),
- (ulint) (min_key ? min_key->length : 0),
- prebuilt->trx);
- DBUG_ASSERT(min_key
- ? range_start->n_fields > 0
- : range_start->n_fields == 0);
-
- row_sel_convert_mysql_key_to_innobase(
- range_end,
- prebuilt->srch_key_val2,
- prebuilt->srch_key_val_len,
- index,
- (byte*) (max_key ? max_key->key :
- (const uchar*) 0),
- (ulint) (max_key ? max_key->length : 0),
- prebuilt->trx);
- DBUG_ASSERT(max_key
- ? range_end->n_fields > 0
- : range_end->n_fields == 0);
-
- mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
- HA_READ_KEY_EXACT);
- mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
- HA_READ_KEY_EXACT);
-
- if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) {
-
- n_rows = btr_estimate_n_rows_in_range(index, range_start,
- mode1, range_end,
- mode2, prebuilt->trx);
- } else {
-
- n_rows = HA_POS_ERROR;
- }
-
- mem_heap_free(heap);
-
-func_exit:
-
- prebuilt->trx->op_info = (char*)"";
-
- /* The MySQL optimizer seems to believe an estimate of 0 rows is
- always accurate and may return the result 'Empty set' based on that.
- The accuracy is not guaranteed, and even if it were, for a locking
- read we should anyway perform the search to set the next-key lock.
- Add 1 to the value to make sure MySQL does not make the assumption! */
-
- if (n_rows == 0) {
- n_rows = 1;
- }
-
- DBUG_RETURN((ha_rows) n_rows);
-}
-
-/*********************************************************************//**
-Gives an UPPER BOUND to the number of rows in a table. This is used in
-filesort.cc.
-@return upper bound of rows */
-UNIV_INTERN
-ha_rows
-ha_innobase::estimate_rows_upper_bound()
-/*====================================*/
-{
- const dict_index_t* index;
- ulonglong estimate;
- ulonglong local_data_file_length;
- ulint stat_n_leaf_pages;
-
- DBUG_ENTER("estimate_rows_upper_bound");
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = "calculating upper bound for table rows";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- index = dict_table_get_first_index(prebuilt->table);
-
- stat_n_leaf_pages = index->stat_n_leaf_pages;
-
- ut_a(stat_n_leaf_pages > 0);
-
- local_data_file_length =
- ((ulonglong) stat_n_leaf_pages) * UNIV_PAGE_SIZE;
-
- /* Calculate a minimum length for a clustered index record and from
- that an upper bound for the number of rows. Since we only calculate
- new statistics in row0mysql.cc when a table has grown by a threshold
- factor, we must add a safety factor 2 in front of the formula below. */
-
- estimate = 2 * local_data_file_length
- / dict_index_calc_min_rec_len(index);
-
- prebuilt->trx->op_info = "";
-
- /* Set num_rows less than MERGEBUFF to simulate the case where we do
- not have enough space to merge the externally sorted file blocks. */
- DBUG_EXECUTE_IF("set_num_rows_lt_MERGEBUFF",
- estimate = 2;
- DBUG_SET("-d,set_num_rows_lt_MERGEBUFF");
- );
-
- DBUG_RETURN((ha_rows) estimate);
-}
-
-/*********************************************************************//**
-How many seeks it will take to read through the table. This is to be
-comparable to the number returned by records_in_range so that we can
-decide if we should scan the table or use keys.
-@return estimated time measured in disk seeks */
-UNIV_INTERN
-double
-ha_innobase::scan_time()
-/*====================*/
-{
- /* Since MySQL seems to favor table scans too much over index
- searches, we pretend that a sequential read takes the same time
- as a random disk read, that is, we do not divide the following
- by 10, which would be physically realistic. */
-
- /* The locking below is disabled for performance reasons. Without
- it we could end up returning uninitialized value to the caller,
- which in the worst case could make some query plan go bogus or
- issue a Valgrind warning. */
-#if 0
- /* avoid potential lock order violation with dict_table_stats_lock()
- below */
- update_thd(ha_thd());
- trx_search_latch_release_if_reserved(prebuilt->trx);
-#endif
-
- ulint stat_clustered_index_size;
-
-#if 0
- dict_table_stats_lock(prebuilt->table, RW_S_LATCH);
-#endif
-
- ut_a(prebuilt->table->stat_initialized);
-
- stat_clustered_index_size = prebuilt->table->stat_clustered_index_size;
-
-#if 0
- dict_table_stats_unlock(prebuilt->table, RW_S_LATCH);
-#endif
-
- return((double) stat_clustered_index_size);
-}
-
-/******************************************************************//**
-Calculate the time it takes to read a set of ranges through an index
-This enables us to optimise reads for clustered indexes.
-@return estimated time measured in disk seeks */
-UNIV_INTERN
-double
-ha_innobase::read_time(
-/*===================*/
- uint index, /*!< in: key number */
- uint ranges, /*!< in: how many ranges */
- ha_rows rows) /*!< in: estimated number of rows in the ranges */
-{
- ha_rows total_rows;
- double time_for_scan;
-
- if (index != table->s->primary_key) {
- /* Not clustered */
- return(handler::read_time(index, ranges, rows));
- }
-
- /* Assume that the read time is proportional to the scan time for all
- rows + at most one seek per range. */
-
- time_for_scan = scan_time();
-
- if ((total_rows = estimate_rows_upper_bound()) < rows) {
-
- return(time_for_scan);
- }
-
- return(ranges + (double) rows / (double) total_rows * time_for_scan);
-}
-
-/******************************************************************//**
-Return the size of the InnoDB memory buffer. */
-UNIV_INTERN
-longlong
-ha_innobase::get_memory_buffer_size() const
-/*=======================================*/
-{
- return(innobase_buffer_pool_size);
-}
-
-/*********************************************************************//**
-Calculates the key number used inside MySQL for an Innobase index. We will
-first check the "index translation table" for a match of the index to get
-the index number. If there does not exist an "index translation table",
-or not able to find the index in the translation table, then we will fall back
-to the traditional way of looping through dict_index_t list to find a
-match. In this case, we have to take into account if we generated a
-default clustered index for the table
-@return the key number used inside MySQL */
-static
-int
-innobase_get_mysql_key_number_for_index(
-/*====================================*/
- INNOBASE_SHARE* share, /*!< in: share structure for index
- translation table. */
- const TABLE* table, /*!< in: table in MySQL data
- dictionary */
- dict_table_t* ib_table,/*!< in: table in Innodb data
- dictionary */
- const dict_index_t* index) /*!< in: index */
-{
- const dict_index_t* ind;
- unsigned int i;
-
- ut_a(index);
-
- /* If index does not belong to the table object of share structure
- (ib_table comes from the share structure) search the index->table
- object instead */
- if (index->table != ib_table) {
- i = 0;
- ind = dict_table_get_first_index(index->table);
-
- while (index != ind) {
- ind = dict_table_get_next_index(ind);
- i++;
- }
-
- if (row_table_got_default_clust_index(index->table)) {
- ut_a(i > 0);
- i--;
- }
-
- return(i);
- }
-
- /* If index translation table exists, we will first check
- the index through index translation table for a match. */
- if (share->idx_trans_tbl.index_mapping) {
- for (i = 0; i < share->idx_trans_tbl.index_count; i++) {
- if (share->idx_trans_tbl.index_mapping[i] == index) {
- return(i);
- }
- }
-
- /* Print an error message if we cannot find the index
- in the "index translation table". */
- if (*index->name != TEMP_INDEX_PREFIX) {
- sql_print_error("Cannot find index %s in InnoDB index "
- "translation table.", index->name);
- }
- }
-
- /* If we do not have an "index translation table", or not able
- to find the index in the translation table, we'll directly find
- matching index with information from mysql TABLE structure and
- InnoDB dict_index_t list */
- for (i = 0; i < table->s->keys; i++) {
- ind = dict_table_get_index_on_name(
- ib_table, table->key_info[i].name);
-
- if (index == ind) {
- return(i);
- }
- }
-
- /* Loop through each index of the table and lock them */
- for (ind = dict_table_get_first_index(ib_table);
- ind != NULL;
- ind = dict_table_get_next_index(ind)) {
- if (index == ind) {
- /* Temp index is internal to InnoDB, that is
- not present in the MySQL index list, so no
- need to print such mismatch warning. */
- if (*(index->name) != TEMP_INDEX_PREFIX) {
- sql_print_warning(
- "Find index %s in InnoDB index list "
- "but not its MySQL index number "
- "It could be an InnoDB internal index.",
- index->name);
- }
- return(-1);
- }
- }
-
- ut_error;
-
- return(-1);
-}
-
-/*********************************************************************//**
-Calculate Record Per Key value. Need to exclude the NULL value if
-innodb_stats_method is set to "nulls_ignored"
-@return estimated record per key value */
-static
-ha_rows
-innodb_rec_per_key(
-/*===============*/
- dict_index_t* index, /*!< in: dict_index_t structure */
- ulint i, /*!< in: the column we are
- calculating rec per key */
- ha_rows records) /*!< in: estimated total records */
-{
- ha_rows rec_per_key;
- ib_uint64_t n_diff;
-
- ut_a(index->table->stat_initialized);
-
- ut_ad(i < dict_index_get_n_unique(index));
-
- n_diff = index->stat_n_diff_key_vals[i];
-
- if (n_diff == 0) {
-
- rec_per_key = records;
- } else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
- ib_uint64_t n_null;
- ib_uint64_t n_non_null;
-
- n_non_null = index->stat_n_non_null_key_vals[i];
-
- /* In theory, index->stat_n_non_null_key_vals[i]
- should always be less than the number of records.
- Since this is statistics value, the value could
- have slight discrepancy. But we will make sure
- the number of null values is not a negative number. */
- if (records < n_non_null) {
- n_null = 0;
- } else {
- n_null = records - n_non_null;
- }
-
- /* If the number of NULL values is the same as or
- large than that of the distinct values, we could
- consider that the table consists mostly of NULL value.
- Set rec_per_key to 1. */
- if (n_diff <= n_null) {
- rec_per_key = 1;
- } else {
- /* Need to exclude rows with NULL values from
- rec_per_key calculation */
- rec_per_key = (ha_rows)
- ((records - n_null) / (n_diff - n_null));
- }
- } else {
- DEBUG_SYNC_C("after_checking_for_0");
- rec_per_key = (ha_rows) (records / n_diff);
- }
-
- return(rec_per_key);
-}
-
-/*********************************************************************//**
-Returns statistics information of the table to the MySQL interpreter,
-in various fields of the handle object.
-@return HA_ERR_* error code or 0 */
-UNIV_INTERN
-int
-ha_innobase::info_low(
-/*==================*/
- uint flag, /*!< in: what information is requested */
- bool is_analyze)
-{
- dict_table_t* ib_table;
- ha_rows rec_per_key;
- ib_uint64_t n_rows;
- os_file_stat_t stat_info;
-
- DBUG_ENTER("info");
-
- /* If we are forcing recovery at a high level, we will suppress
- statistics calculation on tables, because that may crash the
- server if an index is badly corrupted. */
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- prebuilt->trx->op_info = (char*)"returning various info to MySQL";
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- ib_table = prebuilt->table;
- DBUG_ASSERT(ib_table->n_ref_count > 0);
-
- if (flag & HA_STATUS_TIME) {
- if (is_analyze || innobase_stats_on_metadata) {
-
- dict_stats_upd_option_t opt;
- dberr_t ret;
-
- prebuilt->trx->op_info = "updating table statistics";
-
- if (dict_stats_is_persistent_enabled(ib_table)) {
-
- if (is_analyze) {
-
- /* If this table is already queued for
- background analyze, remove it from the
- queue as we are about to do the same */
- if (!srv_read_only_mode) {
-
- dict_mutex_enter_for_mysql();
- dict_stats_recalc_pool_del(
- ib_table);
- dict_mutex_exit_for_mysql();
- }
-
- opt = DICT_STATS_RECALC_PERSISTENT;
- } else {
- /* This is e.g. 'SHOW INDEXES', fetch
- the persistent stats from disk. */
- opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
- }
- } else {
- opt = DICT_STATS_RECALC_TRANSIENT;
- }
-
- ut_ad(!mutex_own(&dict_sys->mutex));
- ret = dict_stats_update(ib_table, opt);
-
- if (ret != DB_SUCCESS) {
- prebuilt->trx->op_info = "";
- DBUG_RETURN(HA_ERR_GENERIC);
- }
-
- prebuilt->trx->op_info =
- "returning various info to MySQL";
- }
-
- }
-
- if (flag & HA_STATUS_VARIABLE) {
-
- ulint page_size;
- ulint stat_clustered_index_size;
- ulint stat_sum_of_other_index_sizes;
-
- if (!(flag & HA_STATUS_NO_LOCK)) {
- dict_table_stats_lock(ib_table, RW_S_LATCH);
- }
-
- ut_a(ib_table->stat_initialized);
-
- n_rows = ib_table->stat_n_rows;
-
- stat_clustered_index_size
- = ib_table->stat_clustered_index_size;
-
- stat_sum_of_other_index_sizes
- = ib_table->stat_sum_of_other_index_sizes;
-
- if (!(flag & HA_STATUS_NO_LOCK)) {
- dict_table_stats_unlock(ib_table, RW_S_LATCH);
- }
-
- /*
- The MySQL optimizer seems to assume in a left join that n_rows
- is an accurate estimate if it is zero. Of course, it is not,
- since we do not have any locks on the rows yet at this phase.
- Since SHOW TABLE STATUS seems to call this function with the
- HA_STATUS_TIME flag set, while the left join optimizer does not
- set that flag, we add one to a zero value if the flag is not
- set. That way SHOW TABLE STATUS will show the best estimate,
- while the optimizer never sees the table empty. */
-
- if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
- n_rows++;
- }
-
- /* Fix bug#40386: Not flushing query cache after truncate.
- n_rows can not be 0 unless the table is empty, set to 1
- instead. The original problem of bug#29507 is actually
- fixed in the server code. */
- if (thd_sql_command(user_thd) == SQLCOM_TRUNCATE) {
-
- n_rows = 1;
-
- /* We need to reset the prebuilt value too, otherwise
- checks for values greater than the last value written
- to the table will fail and the autoinc counter will
- not be updated. This will force write_row() into
- attempting an update of the table's AUTOINC counter. */
-
- prebuilt->autoinc_last_value = 0;
- }
-
- page_size = dict_table_zip_size(ib_table);
- if (page_size == 0) {
- page_size = UNIV_PAGE_SIZE;
- }
-
- stats.records = (ha_rows) n_rows;
- stats.deleted = 0;
- stats.data_file_length
- = ((ulonglong) stat_clustered_index_size)
- * page_size;
- stats.index_file_length
- = ((ulonglong) stat_sum_of_other_index_sizes)
- * page_size;
-
- /* Since fsp_get_available_space_in_free_extents() is
- acquiring latches inside InnoDB, we do not call it if we
- are asked by MySQL to avoid locking. Another reason to
- avoid the call is that it uses quite a lot of CPU.
- See Bug#38185. */
- if (flag & HA_STATUS_NO_LOCK
- || !(flag & HA_STATUS_VARIABLE_EXTRA)) {
- /* We do not update delete_length if no
- locking is requested so the "old" value can
- remain. delete_length is initialized to 0 in
- the ha_statistics' constructor. Also we only
- need delete_length to be set when
- HA_STATUS_VARIABLE_EXTRA is set */
- } else if (UNIV_UNLIKELY
- (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE)) {
- /* Avoid accessing the tablespace if
- innodb_crash_recovery is set to a high value. */
- stats.delete_length = 0;
- } else {
- ullint avail_space;
-
- avail_space = fsp_get_available_space_in_free_extents(
- ib_table->space);
-
- if (avail_space == ULLINT_UNDEFINED) {
- THD* thd;
-
- thd = ha_thd();
-
- push_warning_printf(
- thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_GET_STAT,
- "InnoDB: Trying to get the free "
- "space for table %s but its "
- "tablespace has been discarded or "
- "the .ibd file is missing. Setting "
- "the free space to zero. "
- "(errno: %M)",
- ib_table->name, errno);
-
- stats.delete_length = 0;
- } else {
- stats.delete_length = avail_space * 1024;
- }
- }
-
- stats.check_time = 0;
- stats.mrr_length_per_rec = ref_length + sizeof(void*);
-
- if (stats.records == 0) {
- stats.mean_rec_length = 0;
- } else {
- stats.mean_rec_length = (ulong)
- (stats.data_file_length / stats.records);
- }
- }
-
- if (flag & HA_STATUS_CONST) {
- ulong i;
- char path[FN_REFLEN];
- /* Verify the number of index in InnoDB and MySQL
- matches up. If prebuilt->clust_index_was_generated
- holds, InnoDB defines GEN_CLUST_INDEX internally */
- ulint num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
- - prebuilt->clust_index_was_generated;
- if (table->s->keys < num_innodb_index) {
- /* If there are too many indexes defined
- inside InnoDB, ignore those that are being
- created, because MySQL will only consider
- the fully built indexes here. */
-
- for (const dict_index_t* index
- = UT_LIST_GET_FIRST(ib_table->indexes);
- index != NULL;
- index = UT_LIST_GET_NEXT(indexes, index)) {
-
- /* First, online index creation is
- completed inside InnoDB, and then
- MySQL attempts to upgrade the
- meta-data lock so that it can rebuild
- the .frm file. If we get here in that
- time frame, dict_index_is_online_ddl()
- would not hold and the index would
- still not be included in TABLE_SHARE. */
- if (*index->name == TEMP_INDEX_PREFIX) {
- num_innodb_index--;
- }
- }
-
- if (table->s->keys < num_innodb_index
- && innobase_fts_check_doc_id_index(
- ib_table, NULL, NULL)
- == FTS_EXIST_DOC_ID_INDEX) {
- num_innodb_index--;
- }
- }
-
- if (table->s->keys != num_innodb_index) {
- ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
- ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true);
- }
-
- if (!(flag & HA_STATUS_NO_LOCK)) {
- dict_table_stats_lock(ib_table, RW_S_LATCH);
- }
-
- ut_a(ib_table->stat_initialized);
-
- for (i = 0; i < table->s->keys; i++) {
- ulong j;
- rec_per_key = 1;
- /* We could get index quickly through internal
- index mapping with the index translation table.
- The identity of index (match up index name with
- that of table->key_info[i]) is already verified in
- innobase_get_index(). */
- dict_index_t* index = innobase_get_index(i);
-
- if (index == NULL) {
- ib_table->dict_frm_mismatch = DICT_FRM_INCONSISTENT_KEYS;
- ib_push_frm_error(user_thd, ib_table, table, num_innodb_index, true);
- break;
- }
-
- for (j = 0; j < table->key_info[i].ext_key_parts; j++) {
-
- if (table->key_info[i].flags & HA_FULLTEXT) {
- /* The whole concept has no validity
- for FTS indexes. */
- table->key_info[i].rec_per_key[j] = 1;
- continue;
- }
-
- if (j + 1 > index->n_uniq) {
- sql_print_error(
- "Index %s of %s has %lu columns"
- " unique inside InnoDB, but "
- "MySQL is asking statistics for"
- " %lu columns. Have you mixed "
- "up .frm files from different "
- "installations? "
- "See " REFMAN
- "innodb-troubleshooting.html\n",
- index->name,
- ib_table->name,
- (unsigned long)
- index->n_uniq, j + 1);
- break;
- }
-
- DBUG_EXECUTE_IF("ib_ha_innodb_stat_not_initialized",
- index->table->stat_initialized = FALSE;);
-
- if (!ib_table->stat_initialized ||
- (index->table != ib_table ||
- !index->table->stat_initialized)) {
- fprintf(stderr,
- "InnoDB: Warning: Index %s points to table %s"
- " and ib_table %s statistics is initialized %d "
- " but index table %s initialized %d "
- " mysql table is %s. Have you mixed "
- "up .frm files from different "
- "installations? "
- "See " REFMAN
- "innodb-troubleshooting.html\n",
- index->name,
- index->table->name,
- ib_table->name,
- ib_table->stat_initialized,
- index->table->name,
- index->table->stat_initialized,
- table->s->table_name.str
- );
-
- /* This is better than
- assert on below function */
- dict_stats_init(index->table);
- }
-
- rec_per_key = innodb_rec_per_key(
- index, j, stats.records);
-
- /* Since MySQL seems to favor table scans
- too much over index searches, we pretend
- index selectivity is 2 times better than
- our estimate: */
-
- rec_per_key = rec_per_key / 2;
-
- if (rec_per_key == 0) {
- rec_per_key = 1;
- }
-
- table->key_info[i].rec_per_key[j] =
- rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
- (ulong) rec_per_key;
- }
-
- }
-
- if (!(flag & HA_STATUS_NO_LOCK)) {
- dict_table_stats_unlock(ib_table, RW_S_LATCH);
- }
-
- my_snprintf(path, sizeof(path), "%s/%s%s",
- mysql_data_home,
- table->s->normalized_path.str,
- reg_ext);
-
- unpack_filename(path,path);
-
- /* Note that we do not know the access time of the table,
- nor the CHECK TABLE time, nor the UPDATE or INSERT time. */
-
- if (os_file_get_status(path, &stat_info, false) == DB_SUCCESS) {
- stats.create_time = (ulong) stat_info.ctime;
- }
- }
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
-
- goto func_exit;
- }
-
- if (flag & HA_STATUS_ERRKEY) {
- const dict_index_t* err_index;
-
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
-
- err_index = trx_get_error_info(prebuilt->trx);
-
- if (err_index) {
- errkey = innobase_get_mysql_key_number_for_index(
- share, table, ib_table, err_index);
- } else {
- errkey = (unsigned int) (
- (prebuilt->trx->error_key_num
- == ULINT_UNDEFINED)
- ? ~0
- : prebuilt->trx->error_key_num);
- }
- }
-
- if ((flag & HA_STATUS_AUTO) && table->found_next_number_field) {
- stats.auto_increment_value = innobase_peek_autoinc();
- }
-
-func_exit:
- prebuilt->trx->op_info = (char*)"";
-
- DBUG_RETURN(0);
-}
-
-/*********************************************************************//**
-Returns statistics information of the table to the MySQL interpreter,
-in various fields of the handle object.
-@return HA_ERR_* error code or 0 */
-UNIV_INTERN
-int
-ha_innobase::info(
-/*==============*/
- uint flag) /*!< in: what information is requested */
-{
- return(this->info_low(flag, false /* not ANALYZE */));
-}
-
-/**********************************************************************//**
-Updates index cardinalities of the table, based on random dives into
-each index tree. This does NOT calculate exact statistics on the table.
-@return HA_ADMIN_* error code or HA_ADMIN_OK */
-UNIV_INTERN
-int
-ha_innobase::analyze(
-/*=================*/
- THD* thd, /*!< in: connection thread handle */
- HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
-{
- int ret;
-
- DBUG_ASSERT(share->ib_table == prebuilt->table);
-
- /* Simply call this->info_low() with all the flags
- and request recalculation of the statistics */
- ret = this->info_low(
- HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE,
- true /* this is ANALYZE */);
-
- DBUG_ASSERT(share->ib_table == prebuilt->table);
-
- if (ret != 0) {
- return(HA_ADMIN_FAILED);
- }
-
- return(HA_ADMIN_OK);
-}
-
-/**********************************************************************//**
-This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
-the table in MySQL. */
-UNIV_INTERN
-int
-ha_innobase::optimize(
-/*==================*/
- THD* thd, /*!< in: connection thread handle */
- HA_CHECK_OPT* check_opt) /*!< in: currently ignored */
-{
- /*FTS-FIXME: Since MySQL doesn't support engine-specific commands,
- we have to hijack some existing command in order to be able to test
- the new admin commands added in InnoDB's FTS support. For now, we
- use MySQL's OPTIMIZE command, normally mapped to ALTER TABLE in
- InnoDB (so it recreates the table anew), and map it to OPTIMIZE.
-
- This works OK otherwise, but MySQL locks the entire table during
- calls to OPTIMIZE, which is undesirable. */
-
- if (srv_defragment) {
- int err;
-
- err = defragment_table(prebuilt->table->name, NULL, false);
-
- if (err == 0) {
- return (HA_ADMIN_OK);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- err,
- "InnoDB: Cannot defragment table %s: returned error code %d\n",
- prebuilt->table->name, err);
-
- if(err == ER_SP_ALREADY_EXISTS) {
- return (HA_ADMIN_OK);
- } else {
- return (HA_ADMIN_TRY_ALTER);
- }
- }
- }
-
- if (innodb_optimize_fulltext_only) {
- if (prebuilt->table->fts && prebuilt->table->fts->cache
- && !dict_table_is_discarded(prebuilt->table)) {
- fts_sync_table(prebuilt->table, false, true, false);
- fts_optimize_table(prebuilt->table);
- }
- return(HA_ADMIN_OK);
- } else {
-
- return(HA_ADMIN_TRY_ALTER);
- }
-}
-
-/*******************************************************************//**
-Tries to check that an InnoDB table is not corrupted. If corruption is
-noticed, prints to stderr information about it. In case of corruption
-may also assert a failure and crash the server.
-@return HA_ADMIN_CORRUPT or HA_ADMIN_OK */
-UNIV_INTERN
-int
-ha_innobase::check(
-/*===============*/
- THD* thd, /*!< in: user thread handle */
- HA_CHECK_OPT* check_opt) /*!< in: check options */
-{
- dict_index_t* index;
- ulint n_rows;
- ulint n_rows_in_table = ULINT_UNDEFINED;
- bool is_ok = true;
- ulint old_isolation_level;
- ibool table_corrupted;
-
- DBUG_ENTER("ha_innobase::check");
- DBUG_ASSERT(thd == ha_thd());
- ut_a(prebuilt->trx);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
- ut_a(prebuilt->trx == thd_to_trx(thd));
-
- if (prebuilt->mysql_template == NULL) {
- /* Build the template; we will use a dummy template
- in index scans done in checking */
-
- build_template(true);
- }
-
- if (dict_table_is_discarded(prebuilt->table)) {
-
- ib_senderrf(
- thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
-
- DBUG_RETURN(HA_ADMIN_CORRUPT);
-
- } else if (!prebuilt->table->is_readable() &&
- fil_space_get(prebuilt->table->space) == NULL) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_MISSING,
- table->s->table_name.str);
-
- DBUG_RETURN(HA_ADMIN_CORRUPT);
- }
-
- if (prebuilt->table->corrupted) {
- char index_name[MAX_FULL_NAME_LEN + 1];
- /* If some previous operation has marked the table as
- corrupted in memory, and has not propagated such to
- clustered index, we will do so here */
- index = dict_table_get_first_index(prebuilt->table);
-
- if (!dict_index_is_corrupted(index)) {
- row_mysql_lock_data_dictionary(prebuilt->trx);
- dict_set_corrupted(index, prebuilt->trx, "CHECK TABLE");
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- }
-
- innobase_format_name(index_name, sizeof index_name,
- index->name, TRUE);
-
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_INDEX_CORRUPT,
- "InnoDB: Index %s is marked as"
- " corrupted", index_name);
-
- /* Now that the table is already marked as corrupted,
- there is no need to check any index of this table */
- prebuilt->trx->op_info = "";
-
- DBUG_RETURN(HA_ADMIN_CORRUPT);
- }
-
- prebuilt->trx->op_info = "checking table";
-
- old_isolation_level = prebuilt->trx->isolation_level;
-
- /* We must run the index record counts at an isolation level
- >= READ COMMITTED, because a dirty read can see a wrong number
- of records in some index; to play safe, we use always
- REPEATABLE READ here */
-
- prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- /* Check whether the table is already marked as corrupted
- before running the check table */
- table_corrupted = prebuilt->table->corrupted;
-
- /* Reset table->corrupted bit so that check table can proceed to
- do additional check */
- prebuilt->table->corrupted = FALSE;
-
- for (index = dict_table_get_first_index(prebuilt->table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- /* If this is an index being created or dropped, skip */
- if (*index->name == TEMP_INDEX_PREFIX) {
- continue;
- }
-
- if (!(check_opt->flags & T_QUICK)) {
- /* Enlarge the fatal lock wait timeout during
- CHECK TABLE. */
- os_increment_counter_by_amount(
- server_mutex,
- srv_fatal_semaphore_wait_threshold,
- SRV_SEMAPHORE_WAIT_EXTENSION);
-
- dberr_t err = btr_validate_index(index, prebuilt->trx);
-
- /* Restore the fatal lock wait timeout after
- CHECK TABLE. */
- os_decrement_counter_by_amount(
- server_mutex,
- srv_fatal_semaphore_wait_threshold,
- SRV_SEMAPHORE_WAIT_EXTENSION);
-
- if (err != DB_SUCCESS) {
- is_ok = false;
-
- innobase_format_name(
- index_name, sizeof index_name,
- index->name, TRUE);
-
- if (err == DB_DECRYPTION_FAILED) {
- push_warning_printf(
- thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_NO_SUCH_TABLE,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue checking table.",
- index->table->name);
- } else {
- push_warning_printf(
- thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_NOT_KEYFILE,
- "InnoDB: The B-tree of"
- " index %s is corrupted.",
- index_name);
- }
-
- continue;
- }
- }
-
- /* Instead of invoking change_active_index(), set up
- a dummy template for non-locking reads, disabling
- access to the clustered index. */
- prebuilt->index = index;
-
- prebuilt->index_usable = row_merge_is_index_usable(
- prebuilt->trx, prebuilt->index);
-
- DBUG_EXECUTE_IF(
- "dict_set_index_corrupted",
- if (!dict_index_is_clust(index)) {
- prebuilt->index_usable = FALSE;
- row_mysql_lock_data_dictionary(prebuilt->trx);
- dict_set_corrupted(index, prebuilt->trx, "dict_set_index_corrupted");
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- });
-
- if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
- innobase_format_name(
- index_name, sizeof index_name,
- prebuilt->index->name, TRUE);
-
- if (dict_index_is_corrupted(prebuilt->index)) {
- push_warning_printf(
- user_thd,
- Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_INDEX_CORRUPT,
- "InnoDB: Index %s is marked as"
- " corrupted",
- index_name);
- is_ok = false;
- } else {
- push_warning_printf(
- thd,
- Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_TABLE_DEF_CHANGED,
- "InnoDB: Insufficient history for"
- " index %s",
- index_name);
- }
- continue;
- }
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
- prebuilt->n_template = 0;
- prebuilt->need_to_access_clustered = FALSE;
-
- dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
- prebuilt->select_lock_type = LOCK_NONE;
-
- bool check_result
- = row_check_index_for_mysql(prebuilt, index, &n_rows);
- DBUG_EXECUTE_IF(
- "dict_set_index_corrupted",
- if (!(index->type & DICT_CLUSTERED)) {
- check_result = false;
- });
-
- if (!check_result) {
- innobase_format_name(
- index_name, sizeof index_name,
- index->name, TRUE);
-
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NOT_KEYFILE,
- "InnoDB: The B-tree of"
- " index %s is corrupted.",
- index_name);
- is_ok = false;
- dict_set_corrupted(
- index, prebuilt->trx, "CHECK TABLE-check index");
- }
-
- if (thd_kill_level(user_thd)) {
- break;
- }
-
-#if 0
- fprintf(stderr, "%lu entries in index %s\n", n_rows,
- index->name);
-#endif
-
- if (index == dict_table_get_first_index(prebuilt->table)) {
- n_rows_in_table = n_rows;
- } else if (!(index->type & DICT_FTS)
- && (n_rows != n_rows_in_table)) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NOT_KEYFILE,
- "InnoDB: Index '%-.200s' contains %lu"
- " entries, should be %lu.",
- index->name,
- (ulong) n_rows,
- (ulong) n_rows_in_table);
- is_ok = false;
- dict_set_corrupted(
- index, prebuilt->trx,
- "CHECK TABLE; Wrong count");
- }
- }
-
- if (table_corrupted) {
- /* If some previous operation has marked the table as
- corrupted in memory, and has not propagated such to
- clustered index, we will do so here */
- index = dict_table_get_first_index(prebuilt->table);
-
- if (!dict_index_is_corrupted(index)) {
- dict_set_corrupted(
- index, prebuilt->trx, "CHECK TABLE");
- }
- prebuilt->table->corrupted = TRUE;
- }
-
- /* Restore the original isolation level */
- prebuilt->trx->isolation_level = old_isolation_level;
-
- /* We validate the whole adaptive hash index for all tables
- at every CHECK TABLE only when QUICK flag is not present. */
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) {
- push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NOT_KEYFILE,
- "InnoDB: The adaptive hash index is corrupted.");
- is_ok = false;
- }
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-
- prebuilt->trx->op_info = "";
- if (thd_kill_level(user_thd)) {
- my_error(ER_QUERY_INTERRUPTED, MYF(0));
- }
-
- if (UNIV_UNLIKELY(prebuilt->table && prebuilt->table->corrupted)) {
- DBUG_RETURN(HA_ADMIN_CORRUPT);
- }
-
- DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
-}
-
-/*************************************************************//**
-Adds information about free space in the InnoDB tablespace to a table comment
-which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
-foreign keys.
-@return table comment + InnoDB free space + info on foreign keys */
-UNIV_INTERN
-char*
-ha_innobase::update_table_comment(
-/*==============================*/
- const char* comment)/*!< in: table comment defined by user */
-{
- uint length = (uint) strlen(comment);
- char* str=0;
- long flen;
- std::string fk_str;
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- if (length > 64000 - 3) {
- return((char*) comment); /* string too long */
- }
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = (char*)"returning table comment";
-
- /* In case MySQL calls this in the middle of a SELECT query, release
- possible adaptive hash latch to avoid deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
-#define SSTR( x ) reinterpret_cast< std::ostringstream & >( \
- ( std::ostringstream() << std::dec << x ) ).str()
-
- fk_str.append("InnoDB free: ");
- fk_str.append(SSTR(fsp_get_available_space_in_free_extents(
- prebuilt->table->space)));
-
- fk_str.append(dict_print_info_on_foreign_keys(
- FALSE, prebuilt->trx,
- prebuilt->table));
-
- flen = fk_str.length();
-
- if (flen < 0) {
- flen = 0;
- } else if (length + flen + 3 > 64000) {
- flen = 64000 - 3 - length;
- }
-
- /* allocate buffer for the full string */
-
- str = (char*) my_malloc(length + flen + 3, MYF(0));
-
- if (str) {
- char* pos = str + length;
- if (length) {
- memcpy(str, comment, length);
- *pos++ = ';';
- *pos++ = ' ';
- }
-
- memcpy(pos, fk_str.c_str(), flen);
- pos[flen] = 0;
- }
-
- prebuilt->trx->op_info = (char*)"";
-
- return(str ? str : (char*) comment);
-}
-
-/*******************************************************************//**
-Gets the foreign key create info for a table stored in InnoDB.
-@return own: character string in the form which can be inserted to the
-CREATE TABLE statement, MUST be freed with
-ha_innobase::free_foreign_key_create_info */
-UNIV_INTERN
-char*
-ha_innobase::get_foreign_key_create_info(void)
-/*==========================================*/
-{
- char* fk_str = 0;
-
- ut_a(prebuilt != NULL);
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = (char*)"getting info on foreign keys";
-
- /* In case MySQL calls this in the middle of a SELECT query,
- release possible adaptive hash latch to avoid
- deadlocks of threads */
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- /* Output the data to a temporary file */
- std::string str = dict_print_info_on_foreign_keys(
- TRUE, prebuilt->trx,
- prebuilt->table);
-
- prebuilt->trx->op_info = (char*)"";
-
- /* Allocate buffer for the string */
- fk_str = (char*) my_malloc(str.length() + 1, MYF(0));
-
- if (fk_str) {
- memcpy(fk_str, str.c_str(), str.length());
- fk_str[str.length()]='\0';
- }
-
- return(fk_str);
-}
-
-
-/***********************************************************************//**
-Maps a InnoDB foreign key constraint to a equivalent MySQL foreign key info.
-@return pointer to foreign key info */
-static
-FOREIGN_KEY_INFO*
-get_foreign_key_info(
-/*=================*/
- THD* thd, /*!< in: user thread handle */
- dict_foreign_t* foreign) /*!< in: foreign key constraint */
-{
- FOREIGN_KEY_INFO f_key_info;
- FOREIGN_KEY_INFO* pf_key_info;
- uint i = 0;
- ulint len;
- char tmp_buff[NAME_LEN+1];
- char name_buff[NAME_LEN+1];
- const char* ptr;
- LEX_STRING* referenced_key_name;
- LEX_STRING* name = NULL;
-
- ptr = dict_remove_db_name(foreign->id);
- f_key_info.foreign_id = thd_make_lex_string(thd, 0, ptr,
- (uint) strlen(ptr), 1);
-
- /* Name format: database name, '/', table name, '\0' */
-
- /* Referenced (parent) database name */
- len = dict_get_db_name_len(foreign->referenced_table_name);
- ut_a(len < sizeof(tmp_buff));
- ut_memcpy(tmp_buff, foreign->referenced_table_name, len);
- tmp_buff[len] = 0;
-
- len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
- f_key_info.referenced_db = thd_make_lex_string(
- thd, 0, name_buff, static_cast<unsigned int>(len), 1);
-
- /* Referenced (parent) table name */
- ptr = dict_remove_db_name(foreign->referenced_table_name);
- len = filename_to_tablename(ptr, name_buff, sizeof(name_buff));
- f_key_info.referenced_table = thd_make_lex_string(
- thd, 0, name_buff, static_cast<unsigned int>(len), 1);
-
- /* Dependent (child) database name */
- len = dict_get_db_name_len(foreign->foreign_table_name);
- ut_a(len < sizeof(tmp_buff));
- ut_memcpy(tmp_buff, foreign->foreign_table_name, len);
- tmp_buff[len] = 0;
-
- len = filename_to_tablename(tmp_buff, name_buff, sizeof(name_buff));
- f_key_info.foreign_db = thd_make_lex_string(
- thd, 0, name_buff, static_cast<unsigned int>(len), 1);
-
- /* Dependent (child) table name */
- ptr = dict_remove_db_name(foreign->foreign_table_name);
- len = filename_to_tablename(ptr, name_buff, sizeof(name_buff));
- f_key_info.foreign_table = thd_make_lex_string(
- thd, 0, name_buff, static_cast<unsigned int>(len), 1);
-
- do {
- ptr = foreign->foreign_col_names[i];
- name = thd_make_lex_string(thd, name, ptr,
- (uint) strlen(ptr), 1);
- f_key_info.foreign_fields.push_back(name);
- ptr = foreign->referenced_col_names[i];
- name = thd_make_lex_string(thd, name, ptr,
- (uint) strlen(ptr), 1);
- f_key_info.referenced_fields.push_back(name);
- } while (++i < foreign->n_fields);
-
- if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) {
- len = 7;
- ptr = "CASCADE";
- } else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) {
- len = 8;
- ptr = "SET NULL";
- } else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) {
- len = 9;
- ptr = "NO ACTION";
- } else {
- len = 8;
- ptr = "RESTRICT";
- }
-
- f_key_info.delete_method = thd_make_lex_string(
- thd, f_key_info.delete_method, ptr,
- static_cast<unsigned int>(len), 1);
-
- if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) {
- len = 7;
- ptr = "CASCADE";
- } else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) {
- len = 8;
- ptr = "SET NULL";
- } else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) {
- len = 9;
- ptr = "NO ACTION";
- } else {
- len = 8;
- ptr = "RESTRICT";
- }
-
- f_key_info.update_method = thd_make_lex_string(
- thd, f_key_info.update_method, ptr,
- static_cast<unsigned int>(len), 1);
-
- if (foreign->referenced_index && foreign->referenced_index->name) {
- referenced_key_name = thd_make_lex_string(thd,
- f_key_info.referenced_key_name,
- foreign->referenced_index->name,
- (uint) strlen(foreign->referenced_index->name),
- 1);
- } else {
- referenced_key_name = NULL;
- }
-
- f_key_info.referenced_key_name = referenced_key_name;
-
- pf_key_info = (FOREIGN_KEY_INFO*) thd_memdup(thd, &f_key_info,
- sizeof(FOREIGN_KEY_INFO));
-
- return(pf_key_info);
-}
-
-/** Get the list of foreign keys referencing a specified table
-table.
-@param thd The thread handle
-@param path Path to the table
-@param f_key_list[out] The list of foreign keys */
-static
-void
-fill_foreign_key_list(THD* thd,
- const dict_table_t* table,
- List<FOREIGN_KEY_INFO>* f_key_list)
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-
- for (dict_foreign_set::const_iterator it
- = table->referenced_set.begin();
- it != table->referenced_set.end(); ++it) {
-
- dict_foreign_t* foreign = *it;
-
- FOREIGN_KEY_INFO* pf_key_info
- = get_foreign_key_info(thd, foreign);
- if (pf_key_info) {
- f_key_list->push_back(pf_key_info);
- }
- }
-}
-
-/** Get the list of foreign keys referencing a specified table
-table.
-@param thd The thread handle
-@param path Path to the table
-@param f_key_list[out] The list of foreign keys
-
-@return error code or zero for success */
-static
-int
-innobase_get_parent_fk_list(
- THD* thd,
- const char* path,
- List<FOREIGN_KEY_INFO>* f_key_list)
-{
- ut_a(strlen(path) <= FN_REFLEN);
- char norm_name[FN_REFLEN + 1];
- normalize_table_name(norm_name, path);
-
- trx_t* parent_trx = check_trx_exists(thd);
- parent_trx->op_info = "getting list of referencing foreign keys";
- trx_search_latch_release_if_reserved(parent_trx);
-
- mutex_enter(&dict_sys->mutex);
-
- dict_table_t* table
- = dict_table_open_on_name(norm_name, TRUE, FALSE,
- static_cast<dict_err_ignore_t>(
- DICT_ERR_IGNORE_INDEX_ROOT
- | DICT_ERR_IGNORE_CORRUPT));
- if (!table) {
- mutex_exit(&dict_sys->mutex);
- return(HA_ERR_NO_SUCH_TABLE);
- }
-
- fill_foreign_key_list(thd, table, f_key_list);
-
- dict_table_close(table, TRUE, FALSE);
-
- mutex_exit(&dict_sys->mutex);
- parent_trx->op_info = "";
- return(0);
-}
-
-/*******************************************************************//**
-Gets the list of foreign keys in this table.
-@return always 0, that is, always succeeds */
-UNIV_INTERN
-int
-ha_innobase::get_foreign_key_list(
-/*==============================*/
- THD* thd, /*!< in: user thread handle */
- List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
-{
- FOREIGN_KEY_INFO* pf_key_info;
- dict_foreign_t* foreign;
-
- ut_a(prebuilt != NULL);
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = "getting list of foreign keys";
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- mutex_enter(&(dict_sys->mutex));
-
- for (dict_foreign_set::iterator it
- = prebuilt->table->foreign_set.begin();
- it != prebuilt->table->foreign_set.end();
- ++it) {
-
- foreign = *it;
-
- pf_key_info = get_foreign_key_info(thd, foreign);
- if (pf_key_info) {
- f_key_list->push_back(pf_key_info);
- }
- }
-
- mutex_exit(&(dict_sys->mutex));
-
- prebuilt->trx->op_info = "";
-
- return(0);
-}
-
-/*******************************************************************//**
-Gets the set of foreign keys where this table is the referenced table.
-@return always 0, that is, always succeeds */
-UNIV_INTERN
-int
-ha_innobase::get_parent_foreign_key_list(
-/*=====================================*/
- THD* thd, /*!< in: user thread handle */
- List<FOREIGN_KEY_INFO>* f_key_list) /*!< out: foreign key list */
-{
- ut_a(prebuilt != NULL);
- update_thd(ha_thd());
-
- prebuilt->trx->op_info = "getting list of referencing foreign keys";
-
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- mutex_enter(&(dict_sys->mutex));
- fill_foreign_key_list(thd, prebuilt->table, f_key_list);
- mutex_exit(&(dict_sys->mutex));
-
- prebuilt->trx->op_info = "";
-
- return(0);
-}
-
-/*****************************************************************//**
-Checks if ALTER TABLE may change the storage engine of the table.
-Changing storage engines is not allowed for tables for which there
-are foreign key constraints (parent or child tables).
-@return TRUE if can switch engines */
-UNIV_INTERN
-bool
-ha_innobase::can_switch_engines(void)
-/*=================================*/
-{
- bool can_switch;
-
- DBUG_ENTER("ha_innobase::can_switch_engines");
- update_thd();
-
- prebuilt->trx->op_info =
- "determining if there are foreign key constraints";
- row_mysql_freeze_data_dictionary(prebuilt->trx);
-
- can_switch = prebuilt->table->referenced_set.empty()
- && prebuilt->table->foreign_set.empty();
-
- row_mysql_unfreeze_data_dictionary(prebuilt->trx);
- prebuilt->trx->op_info = "";
-
- DBUG_RETURN(can_switch);
-}
-
-/*******************************************************************//**
-Checks if a table is referenced by a foreign key. The MySQL manual states that
-a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
-delete is then allowed internally to resolve a duplicate key conflict in
-REPLACE, not an update.
-@return > 0 if referenced by a FOREIGN KEY */
-UNIV_INTERN
-uint
-ha_innobase::referenced_by_foreign_key(void)
-/*========================================*/
-{
- if (dict_table_is_referenced_by_foreign_key(prebuilt->table)) {
-
- return(1);
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Frees the foreign key create info for a table stored in InnoDB, if it is
-non-NULL. */
-UNIV_INTERN
-void
-ha_innobase::free_foreign_key_create_info(
-/*======================================*/
- char* str) /*!< in, own: create info string to free */
-{
- if (str) {
- my_free(str);
- }
-}
-
-/*******************************************************************//**
-Tells something additional to the handler about how to do things.
-@return 0 or error number */
-UNIV_INTERN
-int
-ha_innobase::extra(
-/*===============*/
- enum ha_extra_function operation)
- /*!< in: HA_EXTRA_FLUSH or some other flag */
-{
- check_trx_exists(ha_thd());
-
- /* Warning: since it is not sure that MySQL calls external_lock
- before calling this function, the trx field in prebuilt can be
- obsolete! */
-
- switch (operation) {
- case HA_EXTRA_FLUSH:
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
- }
-
- break;
- case HA_EXTRA_RESET_STATE:
- reset_template();
- thd_to_trx(ha_thd())->duplicates = 0;
- break;
- case HA_EXTRA_NO_KEYREAD:
- prebuilt->read_just_key = 0;
- break;
- case HA_EXTRA_KEYREAD:
- prebuilt->read_just_key = 1;
- break;
- case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
- prebuilt->keep_other_fields_on_keyread = 1;
- break;
-
- /* IMPORTANT: prebuilt->trx can be obsolete in
- this method, because it is not sure that MySQL
- calls external_lock before this method with the
- parameters below. We must not invoke update_thd()
- either, because the calling threads may change.
- CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
- case HA_EXTRA_INSERT_WITH_UPDATE:
- thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
- break;
- case HA_EXTRA_NO_IGNORE_DUP_KEY:
- thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_IGNORE;
- break;
- case HA_EXTRA_WRITE_CAN_REPLACE:
- thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
- break;
- case HA_EXTRA_WRITE_CANNOT_REPLACE:
- thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
- break;
- default:/* Do nothing */
- ;
- }
-
- return(0);
-}
-
-/******************************************************************//**
-*/
-UNIV_INTERN
-int
-ha_innobase::reset()
-/*================*/
-{
- if (prebuilt->blob_heap) {
- row_mysql_prebuilt_free_blob_heap(prebuilt);
- }
-
- reset_template();
- ds_mrr.dsmrr_close();
-
- /* TODO: This should really be reset in reset_template() but for now
- it's safer to do it explicitly here. */
-
- /* This is a statement level counter. */
- prebuilt->autoinc_last_value = 0;
-
- return(0);
-}
-
-/******************************************************************//**
-MySQL calls this function at the start of each SQL statement inside LOCK
-TABLES. Inside LOCK TABLES the ::external_lock method does not work to
-mark SQL statement borders. Note also a special case: if a temporary table
-is created inside LOCK TABLES, MySQL has not called external_lock() at all
-on that table.
-MySQL-5.0 also calls this before each statement in an execution of a stored
-procedure. To make the execution more deterministic for binlogging, MySQL-5.0
-locks all tables involved in a stored procedure with full explicit table
-locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
-procedure.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::start_stmt(
-/*====================*/
- THD* thd, /*!< in: handle to the user thread */
- thr_lock_type lock_type)
-{
- trx_t* trx;
- DBUG_ENTER("ha_innobase::start_stmt");
-
- update_thd(thd);
-
- trx = prebuilt->trx;
-
- /* Here we release the search latch and the InnoDB thread FIFO ticket
- if they were reserved. They should have been released already at the
- end of the previous statement, but because inside LOCK TABLES the
- lock count method does not work to mark the end of a SELECT statement,
- that may not be the case. We MUST release the search latch before an
- INSERT, for example. */
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- /* Reset the AUTOINC statement level counter for multi-row INSERTs. */
- trx->n_autoinc_rows = 0;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
- reset_template();
-
- if (dict_table_is_temporary(prebuilt->table)
- && prebuilt->mysql_has_locked
- && prebuilt->select_lock_type == LOCK_NONE) {
- dberr_t error;
-
- switch (thd_sql_command(thd)) {
- case SQLCOM_INSERT:
- case SQLCOM_UPDATE:
- case SQLCOM_DELETE:
- init_table_handle_for_HANDLER();
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- error = row_lock_table_for_mysql(prebuilt, NULL, 1);
-
- if (error != DB_SUCCESS) {
- int st = convert_error_code_to_mysql(
- error, 0, thd);
- DBUG_RETURN(st);
- }
- break;
- }
- }
-
- if (!prebuilt->mysql_has_locked) {
- /* This handle is for a temporary table created inside
- this same LOCK TABLES; since MySQL does NOT call external_lock
- in this case, we must use x-row locks inside InnoDB to be
- prepared for an update of a row */
-
- prebuilt->select_lock_type = LOCK_X;
-
- } else if (trx->isolation_level != TRX_ISO_SERIALIZABLE
- && thd_sql_command(thd) == SQLCOM_SELECT
- && lock_type == TL_READ) {
-
- /* For other than temporary tables, we obtain
- no lock for consistent read (plain SELECT). */
-
- prebuilt->select_lock_type = LOCK_NONE;
- } else {
- /* Not a consistent read: restore the
- select_lock_type value. The value of
- stored_select_lock_type was decided in:
- 1) ::store_lock(),
- 2) ::external_lock(),
- 3) ::init_table_handle_for_HANDLER(), and
- 4) ::transactional_table_lock(). */
-
- ut_a(prebuilt->stored_select_lock_type != LOCK_NONE_UNSET);
- prebuilt->select_lock_type = prebuilt->stored_select_lock_type;
- }
-
- *trx->detailed_error = 0;
-
- innobase_register_trx(ht, thd, trx);
-
- if (!trx_is_started(trx)) {
- ++trx->will_lock;
- }
-
- DBUG_RETURN(0);
-}
-
-/******************************************************************//**
-Maps a MySQL trx isolation level code to the InnoDB isolation level code
-@return InnoDB isolation level */
-static inline
-ulint
-innobase_map_isolation_level(
-/*=========================*/
- enum_tx_isolation iso) /*!< in: MySQL isolation level code */
-{
- switch (iso) {
- case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
- case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
- case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
- case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
- }
-
- ut_error;
-
- return(0);
-}
-
-/******************************************************************//**
-As MySQL will execute an external lock for every new table it uses when it
-starts to process an SQL statement (an exception is when MySQL calls
-start_stmt for the handle) we can use this function to store the pointer to
-the THD in the handle. We will also use this function to communicate
-to InnoDB that a new SQL statement has started and that we must store a
-savepoint to our transaction handle, so that we are able to roll back
-the SQL statement in case of an error.
-@return 0 */
-UNIV_INTERN
-int
-ha_innobase::external_lock(
-/*=======================*/
- THD* thd, /*!< in: handle to the user thread */
- int lock_type) /*!< in: lock type */
-{
- trx_t* trx;
-
- DBUG_ENTER("ha_innobase::external_lock");
- DBUG_PRINT("enter",("lock_type: %d", lock_type));
-
- update_thd(thd);
-
- /* Statement based binlogging does not work in isolation level
- READ UNCOMMITTED and READ COMMITTED since the necessary
- locks cannot be taken. In this case, we print an
- informative error message and return with an error.
- Note: decide_logging_format would give the same error message,
- except it cannot give the extra details. */
-
- if (lock_type == F_WRLCK
- && !(table_flags() & HA_BINLOG_STMT_CAPABLE)
- && thd_binlog_format(thd) == BINLOG_FORMAT_STMT
- && thd_binlog_filter_ok(thd)
- && thd_sqlcom_can_generate_row_events(thd)) {
- bool skip = 0;
- /* used by test case */
- DBUG_EXECUTE_IF("no_innodb_binlog_errors", skip = true;);
- if (!skip) {
-#ifdef WITH_WSREP
- if (!wsrep_on(thd) || wsrep_thd_exec_mode(thd) == LOCAL_STATE)
- {
-#endif /* WITH_WSREP */
- my_error(ER_BINLOG_STMT_MODE_AND_ROW_ENGINE, MYF(0),
- " InnoDB is limited to row-logging when "
- "transaction isolation level is "
- "READ COMMITTED or READ UNCOMMITTED.");
- DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
-#ifdef WITH_WSREP
- }
-#endif /* WITH_WSREP */
- }
- }
-
- /* Check for UPDATEs in read-only mode. */
- if (srv_read_only_mode
- && (thd_sql_command(thd) == SQLCOM_UPDATE
- || thd_sql_command(thd) == SQLCOM_INSERT
- || thd_sql_command(thd) == SQLCOM_REPLACE
- || thd_sql_command(thd) == SQLCOM_DROP_TABLE
- || thd_sql_command(thd) == SQLCOM_ALTER_TABLE
- || thd_sql_command(thd) == SQLCOM_OPTIMIZE
- || (thd_sql_command(thd) == SQLCOM_CREATE_TABLE
- && lock_type == F_WRLCK)
- || thd_sql_command(thd) == SQLCOM_CREATE_INDEX
- || thd_sql_command(thd) == SQLCOM_DROP_INDEX
- || thd_sql_command(thd) == SQLCOM_DELETE)) {
-
- if (thd_sql_command(thd) == SQLCOM_CREATE_TABLE)
- {
- ib_senderrf(thd, IB_LOG_LEVEL_WARN,
- ER_READ_ONLY_MODE);
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- } else {
- ib_senderrf(thd, IB_LOG_LEVEL_WARN,
- ER_READ_ONLY_MODE);
- DBUG_RETURN(HA_ERR_TABLE_READONLY);
- }
-
- }
-
- trx = prebuilt->trx;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
-
- reset_template();
-
- switch (prebuilt->table->quiesce) {
- case QUIESCE_START:
- /* Check for FLUSH TABLE t WITH READ LOCK; */
- if (!srv_read_only_mode
- && thd_sql_command(thd) == SQLCOM_FLUSH
- && lock_type == F_RDLCK) {
-
- row_quiesce_table_start(prebuilt->table, trx);
-
- /* Use the transaction instance to track UNLOCK
- TABLES. It can be done via START TRANSACTION; too
- implicitly. */
-
- ++trx->flush_tables;
- }
- break;
-
- case QUIESCE_COMPLETE:
- /* Check for UNLOCK TABLES; implicit or explicit
- or trx interruption. */
- if (trx->flush_tables > 0
- && (lock_type == F_UNLCK || trx_is_interrupted(trx))) {
-
- row_quiesce_table_complete(prebuilt->table, trx);
-
- ut_a(trx->flush_tables > 0);
- --trx->flush_tables;
- }
-
- break;
-
- case QUIESCE_NONE:
- break;
- }
-
- if (lock_type == F_WRLCK) {
-
- /* If this is a SELECT, then it is in UPDATE TABLE ...
- or SELECT ... FOR UPDATE */
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- }
-
- if (lock_type != F_UNLCK) {
- /* MySQL is setting a new table lock */
-
- *trx->detailed_error = 0;
-
- innobase_register_trx(ht, thd, trx);
-
- if (trx->isolation_level == TRX_ISO_SERIALIZABLE
- && prebuilt->select_lock_type == LOCK_NONE
- && thd_test_options(
- thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* To get serializable execution, we let InnoDB
- conceptually add 'LOCK IN SHARE MODE' to all SELECTs
- which otherwise would have been consistent reads. An
- exception is consistent reads in the AUTOCOMMIT=1 mode:
- we know that they are read-only transactions, and they
- can be serialized also if performed as consistent
- reads. */
-
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- }
-
- /* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
- TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
- an InnoDB table lock if it is released immediately at the end
- of LOCK TABLES, and InnoDB's table locks in that case cause
- VERY easily deadlocks.
-
- We do not set InnoDB table locks if user has not explicitly
- requested a table lock. Note that thd_in_lock_tables(thd)
- can hold in some cases, e.g., at the start of a stored
- procedure call (SQLCOM_CALL). */
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
-
- if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
- && THDVAR(thd, table_locks)
- && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
- && thd_in_lock_tables(thd)) {
-
- dberr_t error = row_lock_table_for_mysql(
- prebuilt, NULL, 0);
-
- if (error != DB_SUCCESS) {
- DBUG_RETURN(
- convert_error_code_to_mysql(
- error, 0, thd));
- }
- }
-
- trx->mysql_n_tables_locked++;
- }
-
- trx->n_mysql_tables_in_use++;
- prebuilt->mysql_has_locked = TRUE;
-
- if (!trx_is_started(trx)
- && (prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->stored_select_lock_type != LOCK_NONE)) {
-
- ++trx->will_lock;
- }
-
- DBUG_RETURN(0);
- }
-
- /* MySQL is releasing a table lock */
-
- trx->n_mysql_tables_in_use--;
- prebuilt->mysql_has_locked = FALSE;
-
- /* Release a possible FIFO ticket and search latch. Since we
- may reserve the trx_sys->mutex, we have to release the search
- system latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- /* If the MySQL lock count drops to zero we know that the current SQL
- statement has ended */
-
- if (trx->n_mysql_tables_in_use == 0) {
-#ifdef EXTENDED_SLOWLOG
- if (UNIV_UNLIKELY(trx->take_stats)) {
- increment_thd_innodb_stats(thd,
- (unsigned long long) trx->id,
- trx->io_reads,
- trx->io_read,
- trx->io_reads_wait_timer,
- trx->lock_que_wait_timer,
- trx->innodb_que_wait_timer,
- trx->distinct_page_access);
-
- trx->io_reads = 0;
- trx->io_read = 0;
- trx->io_reads_wait_timer = 0;
- trx->lock_que_wait_timer = 0;
- trx->innodb_que_wait_timer = 0;
- trx->distinct_page_access = 0;
- if (trx->distinct_page_access_hash)
- memset(trx->distinct_page_access_hash, 0,
- DPAH_SIZE);
- }
-#endif
-
- trx->mysql_n_tables_locked = 0;
- prebuilt->used_in_HANDLER = FALSE;
-
- if (!thd_test_options(
- thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- if (trx_is_started(trx)) {
- innobase_commit(ht, thd, TRUE);
- }
-
- } else if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && trx->global_read_view) {
-
- /* At low transaction isolation levels we let
- each consistent read set its own snapshot */
-
- read_view_close_for_mysql(trx);
- }
- }
-
- if (!trx_is_started(trx)
- && (prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->stored_select_lock_type != LOCK_NONE)) {
-
- ++trx->will_lock;
- }
-
- DBUG_RETURN(0);
-}
-
-/******************************************************************//**
-With this function MySQL request a transactional lock to a table when
-user issued query LOCK TABLES..WHERE ENGINE = InnoDB.
-@return error code */
-UNIV_INTERN
-int
-ha_innobase::transactional_table_lock(
-/*==================================*/
- THD* thd, /*!< in: handle to the user thread */
- int lock_type) /*!< in: lock type */
-{
- trx_t* trx;
-
- DBUG_ENTER("ha_innobase::transactional_table_lock");
- DBUG_PRINT("enter",("lock_type: %d", lock_type));
-
- /* We do not know if MySQL can call this function before calling
- external_lock(). To be safe, update the thd of the current table
- handle. */
-
- update_thd(thd);
-
- DBUG_ASSERT(share->ib_table == prebuilt->table);
-
- if (!thd_tablespace_op(thd)) {
-
- if (dict_table_is_discarded(prebuilt->table)) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_DISCARDED,
- table->s->table_name.str);
-
- } else if (!prebuilt->table->is_readable()) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR,
- ER_TABLESPACE_MISSING,
- table->s->table_name.str);
- }
-
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- trx = prebuilt->trx;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->hint_need_to_fetch_extra_cols = 0;
-
- reset_template();
-
- if (lock_type == F_WRLCK) {
- prebuilt->select_lock_type = LOCK_X;
- prebuilt->stored_select_lock_type = LOCK_X;
- } else if (lock_type == F_RDLCK) {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "MySQL is trying to set transactional table lock "
- "with corrupted lock type to table %s, lock type "
- "%d does not exist.",
- table->s->table_name.str, lock_type);
-
- DBUG_RETURN(HA_ERR_CRASHED);
- }
-
- /* MySQL is setting a new transactional table lock */
-
- innobase_register_trx(ht, thd, trx);
-
- if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
- dberr_t error;
-
- error = row_lock_table_for_mysql(prebuilt, NULL, 0);
-
- if (error != DB_SUCCESS) {
- DBUG_RETURN(
- convert_error_code_to_mysql(
- error, prebuilt->table->flags, thd));
- }
-
- if (thd_test_options(
- thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
-
- /* Store the current undo_no of the transaction
- so that we know where to roll back if we have
- to roll back the next SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
- }
-
- DBUG_RETURN(0);
-}
-
-/************************************************************************//**
-Here we export InnoDB status variables to MySQL. */
-static
-void
-innodb_export_status()
-/*==================*/
-{
- if (innodb_inited) {
- srv_export_innodb_status();
- }
-}
-
-/************************************************************************//**
-Implements the SHOW ENGINE INNODB STATUS command. Sends the output of the
-InnoDB Monitor to the client.
-@return 0 on success */
-static
-int
-innodb_show_status(
-/*===============*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread of the caller */
- stat_print_fn* stat_print)
-{
- trx_t* trx;
- static const char truncated_msg[] = "... truncated...\n";
- const long MAX_STATUS_SIZE = 1048576;
- ulint trx_list_start = ULINT_UNDEFINED;
- ulint trx_list_end = ULINT_UNDEFINED;
- bool ret_val;
-
- DBUG_ENTER("innodb_show_status");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- /* We don't create the temp files or associated
- mutexes in read-only-mode */
-
- if (srv_read_only_mode) {
- DBUG_RETURN(0);
- }
-
- trx = check_trx_exists(thd);
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- /* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
- bytes of text. */
-
- char* str;
- ssize_t flen, usable_len;
-
- mutex_enter(&srv_monitor_file_mutex);
- rewind(srv_monitor_file);
-
- srv_printf_innodb_monitor(srv_monitor_file, FALSE,
- &trx_list_start, &trx_list_end);
-
- os_file_set_eof(srv_monitor_file);
-
- if ((flen = ftell(srv_monitor_file)) < 0) {
- flen = 0;
- }
-
- if (flen > MAX_STATUS_SIZE) {
- usable_len = MAX_STATUS_SIZE;
- srv_truncated_status_writes++;
- } else {
- usable_len = flen;
- }
-
- /* allocate buffer for the string, and
- read the contents of the temporary file */
-
- if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
- mutex_exit(&srv_monitor_file_mutex);
- DBUG_RETURN(1);
- }
-
- rewind(srv_monitor_file);
-
- if (flen < MAX_STATUS_SIZE) {
- /* Display the entire output. */
- flen = fread(str, 1, flen, srv_monitor_file);
- } else if (trx_list_end < (ulint) flen
- && trx_list_start < trx_list_end
- && trx_list_start + (flen - trx_list_end)
- < MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
-
- /* Omit the beginning of the list of active transactions. */
- ssize_t len = fread(str, 1, trx_list_start, srv_monitor_file);
-
- memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
- len += sizeof truncated_msg - 1;
- usable_len = (MAX_STATUS_SIZE - 1) - len;
- fseek(srv_monitor_file,
- static_cast<long>(flen - usable_len), SEEK_SET);
- len += fread(str + len, 1, usable_len, srv_monitor_file);
- flen = len;
- } else {
- /* Omit the end of the output. */
- flen = fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
- }
-
- mutex_exit(&srv_monitor_file_mutex);
-
- ret_val= stat_print(
- thd, innobase_hton_name,
- static_cast<uint>(strlen(innobase_hton_name)),
- STRING_WITH_LEN(""), str, static_cast<uint>(flen));
-
- my_free(str);
-
- DBUG_RETURN(ret_val);
-}
-
-/************************************************************************//**
-Implements the SHOW MUTEX STATUS command.
-@return 0 on success. */
-static
-int
-innodb_mutex_show_status(
-/*=====================*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread of the
- caller */
- stat_print_fn* stat_print) /*!< in: function for printing
- statistics */
-{
- char buf1[IO_SIZE];
- char buf2[IO_SIZE];
- ib_mutex_t* mutex;
- rw_lock_t* lock;
- ulint block_mutex_oswait_count = 0;
- ulint block_lock_oswait_count = 0;
- ib_mutex_t* block_mutex = NULL;
- rw_lock_t* block_lock = NULL;
-#ifdef UNIV_DEBUG
- ulint rw_lock_count= 0;
- ulint rw_lock_count_spin_loop= 0;
- ulint rw_lock_count_spin_rounds= 0;
- ulint rw_lock_count_os_wait= 0;
- ulint rw_lock_count_os_yield= 0;
- ulonglong rw_lock_wait_time= 0;
-#endif /* UNIV_DEBUG */
- uint buf1len;
- uint buf2len;
- uint hton_name_len;
-
- hton_name_len = (uint) strlen(innobase_hton_name);
-
- DBUG_ENTER("innodb_mutex_show_status");
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- mutex_enter(&mutex_list_mutex);
-
- for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
- mutex = UT_LIST_GET_NEXT(list, mutex)) {
- if (mutex->count_os_wait == 0) {
- continue;
- }
-
- if (buf_pool_is_block_mutex(mutex)) {
- block_mutex = mutex;
- block_mutex_oswait_count += mutex->count_os_wait;
- continue;
- }
-
- buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s",
- mutex->cmutex_name);
- buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
- (ulong) mutex->count_os_wait);
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&mutex_list_mutex);
- DBUG_RETURN(1);
- }
- }
-
- if (block_mutex) {
- buf1len = (uint) my_snprintf(buf1, sizeof buf1,
- "combined %s",
- block_mutex->cmutex_name);
- buf2len = (uint) my_snprintf(buf2, sizeof buf2,
- "os_waits=%lu",
- (ulong) block_mutex_oswait_count);
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&mutex_list_mutex);
- DBUG_RETURN(1);
- }
- }
-
- mutex_exit(&mutex_list_mutex);
-
- mutex_enter(&rw_lock_list_mutex);
-
- for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL;
- lock = UT_LIST_GET_NEXT(list, lock)) {
- if (lock->count_os_wait == 0) {
- continue;
- }
-
- if (buf_pool_is_block_lock(lock)) {
- block_lock = lock;
- block_lock_oswait_count += lock->count_os_wait;
- continue;
- }
-
- buf1len = (uint) my_snprintf(
- buf1, sizeof buf1, "%s",
- lock->lock_name);
- buf2len = (uint) my_snprintf(
- buf2, sizeof buf2, "os_waits=%lu",
- static_cast<ulong>(lock->count_os_wait));
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&rw_lock_list_mutex);
- DBUG_RETURN(1);
- }
- }
-
- if (block_lock) {
- buf1len = (uint) my_snprintf(buf1, sizeof buf1,
- "combined %s",
- block_lock->lock_name);
- buf2len = (uint) my_snprintf(buf2, sizeof buf2,
- "os_waits=%lu",
- (ulong) block_lock_oswait_count);
-
- if (stat_print(thd, innobase_hton_name,
- hton_name_len, buf1, buf1len,
- buf2, buf2len)) {
- mutex_exit(&rw_lock_list_mutex);
- DBUG_RETURN(1);
- }
- }
-
- mutex_exit(&rw_lock_list_mutex);
-
-#ifdef UNIV_DEBUG
- buf2len = static_cast<uint>(my_snprintf(buf2, sizeof buf2,
- "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
- "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
- (ulong) rw_lock_count,
- (ulong) rw_lock_count_spin_loop,
- (ulong) rw_lock_count_spin_rounds,
- (ulong) rw_lock_count_os_wait,
- (ulong) rw_lock_count_os_yield,
- (ulong) (rw_lock_wait_time / 1000)));
-
- if (stat_print(thd, innobase_hton_name, hton_name_len,
- STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
- DBUG_RETURN(1);
- }
-#endif /* UNIV_DEBUG */
-
- /* Success */
- DBUG_RETURN(0);
-}
-
-/************************************************************************//**
-Return 0 on success and non-zero on failure. Note: the bool return type
-seems to be abused here, should be an int. */
-static
-bool
-innobase_show_status(
-/*=================*/
- handlerton* hton, /*!< in: the innodb handlerton */
- THD* thd, /*!< in: the MySQL query thread
- of the caller */
- stat_print_fn* stat_print,
- enum ha_stat_type stat_type)
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- switch (stat_type) {
- case HA_ENGINE_STATUS:
- /* Non-zero return value means there was an error. */
- return(innodb_show_status(hton, thd, stat_print) != 0);
-
- case HA_ENGINE_MUTEX:
- /* Non-zero return value means there was an error. */
- return(innodb_mutex_show_status(hton, thd, stat_print) != 0);
-
- case HA_ENGINE_LOGS:
- /* Not handled */
- break;
- }
-
- /* Success */
- return(false);
-}
-
-/************************************************************************//**
-Handling the shared INNOBASE_SHARE structure that is needed to provide table
-locking. Register the table name if it doesn't exist in the hash table. */
-static
-INNOBASE_SHARE*
-get_share(
-/*======*/
- const char* table_name)
-{
- INNOBASE_SHARE* share;
-
- mysql_mutex_lock(&innobase_share_mutex);
-
- ulint fold = ut_fold_string(table_name);
-
- HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
- INNOBASE_SHARE*, share,
- ut_ad(share->use_count > 0),
- !strcmp(share->table_name, table_name));
-
- if (!share) {
-
- uint length = (uint) strlen(table_name);
-
- /* TODO: invoke HASH_MIGRATE if innobase_open_tables
- grows too big */
-
- share = (INNOBASE_SHARE*) my_malloc(sizeof(*share)+length+1,
- MYF(MY_FAE | MY_ZEROFILL));
-
- share->table_name = (char*) memcpy(share + 1,
- table_name, length + 1);
-
- HASH_INSERT(INNOBASE_SHARE, table_name_hash,
- innobase_open_tables, fold, share);
-
- thr_lock_init(&share->lock);
-
- /* Index translation table initialization */
- share->idx_trans_tbl.index_mapping = NULL;
- share->idx_trans_tbl.index_count = 0;
- share->idx_trans_tbl.array_size = 0;
- }
-
- share->use_count++;
- mysql_mutex_unlock(&innobase_share_mutex);
-
- return(share);
-}
-
-/************************************************************************//**
-Free the shared object that was registered with get_share(). */
-static
-void
-free_share(
-/*=======*/
- INNOBASE_SHARE* share) /*!< in/own: table share to free */
-{
- mysql_mutex_lock(&innobase_share_mutex);
-
-#ifdef UNIV_DEBUG
- INNOBASE_SHARE* share2;
- ulint fold = ut_fold_string(share->table_name);
-
- HASH_SEARCH(table_name_hash, innobase_open_tables, fold,
- INNOBASE_SHARE*, share2,
- ut_ad(share->use_count > 0),
- !strcmp(share->table_name, share2->table_name));
-
- ut_a(share2 == share);
-#endif /* UNIV_DEBUG */
-
- if (!--share->use_count) {
- ulint fold = ut_fold_string(share->table_name);
-
- HASH_DELETE(INNOBASE_SHARE, table_name_hash,
- innobase_open_tables, fold, share);
- thr_lock_delete(&share->lock);
-
- /* Free any memory from index translation table */
- my_free(share->idx_trans_tbl.index_mapping);
-
- my_free(share);
-
- /* TODO: invoke HASH_MIGRATE if innobase_open_tables
- shrinks too much */
- }
-
- mysql_mutex_unlock(&innobase_share_mutex);
-}
-
-/*****************************************************************//**
-Converts a MySQL table lock stored in the 'lock' field of the handle to
-a proper type before storing pointer to the lock into an array of pointers.
-MySQL also calls this if it wants to reset some table locks to a not-locked
-state during the processing of an SQL query. An example is that during a
-SELECT the read lock is released early on the 'const' tables where we only
-fetch one row. MySQL does not call this when it releases all locks at the
-end of an SQL statement.
-@return pointer to the next element in the 'to' array */
-UNIV_INTERN
-THR_LOCK_DATA**
-ha_innobase::store_lock(
-/*====================*/
- THD* thd, /*!< in: user thread handle */
- THR_LOCK_DATA** to, /*!< in: pointer to an array
- of pointers to lock structs;
- pointer to the 'lock' field
- of current handle is stored
- next to this array */
- enum thr_lock_type lock_type) /*!< in: lock type to store in
- 'lock'; this may also be
- TL_IGNORE */
-{
- trx_t* trx;
-
- /* Note that trx in this function is NOT necessarily prebuilt->trx
- because we call update_thd() later, in ::external_lock()! Failure to
- understand this caused a serious memory corruption bug in 5.1.11. */
-
- trx = check_trx_exists(thd);
-
- /* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
- Be careful to ignore TL_IGNORE if we are going to do something with
- only 'real' locks! */
-
- /* If no MySQL table is in use, we need to set the isolation level
- of the transaction. */
-
- if (lock_type != TL_IGNORE
- && trx->n_mysql_tables_in_use == 0) {
- trx->isolation_level = innobase_map_isolation_level(
- (enum_tx_isolation) thd_tx_isolation(thd));
-
- if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && trx->global_read_view) {
-
- /* At low transaction isolation levels we let
- each consistent read set its own snapshot */
-
- read_view_close_for_mysql(trx);
- }
- }
-
- DBUG_ASSERT(EQ_CURRENT_THD(thd));
- const bool in_lock_tables = thd_in_lock_tables(thd);
- const uint sql_command = thd_sql_command(thd);
-
- if (srv_read_only_mode
- && (sql_command == SQLCOM_UPDATE
- || sql_command == SQLCOM_INSERT
- || sql_command == SQLCOM_REPLACE
- || sql_command == SQLCOM_DROP_TABLE
- || sql_command == SQLCOM_ALTER_TABLE
- || sql_command == SQLCOM_OPTIMIZE
- || (sql_command == SQLCOM_CREATE_TABLE
- && (lock_type >= TL_WRITE_CONCURRENT_INSERT
- && lock_type <= TL_WRITE))
- || sql_command == SQLCOM_CREATE_INDEX
- || sql_command == SQLCOM_DROP_INDEX
- || sql_command == SQLCOM_DELETE)) {
-
- ib_senderrf(trx->mysql_thd,
- IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
-
- } else if (sql_command == SQLCOM_FLUSH
- && lock_type == TL_READ_NO_INSERT) {
-
- /* Check for FLUSH TABLES ... WITH READ LOCK */
-
- /* Note: This call can fail, but there is no way to return
- the error to the caller. We simply ignore it for now here
- and push the error code to the caller where the error is
- detected in the function. */
-
- dberr_t err = row_quiesce_set_state(
- prebuilt->table, QUIESCE_START, trx);
-
- ut_a(err == DB_SUCCESS || err == DB_UNSUPPORTED);
-
- if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- } else {
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- }
-
- /* Check for DROP TABLE */
- } else if (sql_command == SQLCOM_DROP_TABLE) {
-
- /* MySQL calls this function in DROP TABLE though this table
- handle may belong to another thd that is running a query. Let
- us in that case skip any changes to the prebuilt struct. */
-
- /* Check for LOCK TABLE t1,...,tn WITH SHARED LOCKS */
- } else if ((lock_type == TL_READ && in_lock_tables)
- || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
- || lock_type == TL_READ_WITH_SHARED_LOCKS
- || lock_type == TL_READ_NO_INSERT
- || (lock_type != TL_IGNORE
- && sql_command != SQLCOM_SELECT)) {
-
- /* The OR cases above are in this order:
- 1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
- are processing a stored procedure or function, or
- 2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
- 3) this is a SELECT ... IN SHARE MODE, or
- 4) we are doing a complex SQL statement like
- INSERT INTO ... SELECT ... and the logical logging (MySQL
- binlog) requires the use of a locking read, or
- MySQL is doing LOCK TABLES ... READ.
- 5) we let InnoDB do locking reads for all SQL statements that
- are not simple SELECTs; note that select_lock_type in this
- case may get strengthened in ::external_lock() to LOCK_X.
- Note that we MUST use a locking read in all data modifying
- SQL statements, because otherwise the execution would not be
- serializable, and also the results from the update could be
- unexpected if an obsolete consistent read view would be
- used. */
-
- /* Use consistent read for checksum table */
-
- if (sql_command == SQLCOM_CHECKSUM
- || sql_command == SQLCOM_CHECKSUM
- || ((srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && trx->isolation_level != TRX_ISO_SERIALIZABLE
- && (lock_type == TL_READ
- || lock_type == TL_READ_NO_INSERT)
- && (sql_command == SQLCOM_INSERT_SELECT
- || sql_command == SQLCOM_REPLACE_SELECT
- || sql_command == SQLCOM_UPDATE
- || sql_command == SQLCOM_CREATE_TABLE))) {
-
- /* If we either have innobase_locks_unsafe_for_binlog
- option set or this session is using READ COMMITTED
- isolation level and isolation level of the transaction
- is not set to serializable and MySQL is doing
- INSERT INTO...SELECT or REPLACE INTO...SELECT
- or UPDATE ... = (SELECT ...) or CREATE ...
- SELECT... without FOR UPDATE or IN SHARE
- MODE in select, then we use consistent read
- for select. */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- } else {
- prebuilt->select_lock_type = LOCK_S;
- prebuilt->stored_select_lock_type = LOCK_S;
- }
-
- } else if (lock_type != TL_IGNORE) {
-
- /* We set possible LOCK_X value in external_lock, not yet
- here even if this would be SELECT ... FOR UPDATE */
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE;
- }
-
- if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {
-
- /* Starting from 5.0.7, we weaken also the table locks
- set at the start of a MySQL stored procedure call, just like
- we weaken the locks set at the start of an SQL statement.
- MySQL does set in_lock_tables TRUE there, but in reality
- we do not need table locks to make the execution of a
- single transaction stored procedure call deterministic
- (if it does not use a consistent read). */
-
- if (lock_type == TL_READ
- && sql_command == SQLCOM_LOCK_TABLES) {
- /* We come here if MySQL is processing LOCK TABLES
- ... READ LOCAL. MyISAM under that table lock type
- reads the table as it was at the time the lock was
- granted (new inserts are allowed, but not seen by the
- reader). To get a similar effect on an InnoDB table,
- we must use LOCK TABLES ... READ. We convert the lock
- type here, so that for InnoDB, READ LOCAL is
- equivalent to READ. This will change the InnoDB
- behavior in mysqldump, so that dumps of InnoDB tables
- are consistent with dumps of MyISAM tables. */
-
- lock_type = TL_READ_NO_INSERT;
- }
-
- /* If we are not doing a LOCK TABLE, DISCARD/IMPORT
- TABLESPACE or TRUNCATE TABLE then allow multiple
- writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ
- < TL_WRITE_CONCURRENT_INSERT.
-
- We especially allow multiple writers if MySQL is at the
- start of a stored procedure call (SQLCOM_CALL) or a
- stored function call (MySQL does have in_lock_tables
- TRUE there). */
-
- if ((lock_type >= TL_WRITE_CONCURRENT_INSERT
- && lock_type <= TL_WRITE)
- && !(in_lock_tables
- && sql_command == SQLCOM_LOCK_TABLES)
- && !thd_tablespace_op(thd)
- && sql_command != SQLCOM_TRUNCATE
- && sql_command != SQLCOM_OPTIMIZE
- && sql_command != SQLCOM_CREATE_TABLE) {
-
- lock_type = TL_WRITE_ALLOW_WRITE;
- }
-
- /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
- MySQL would use the lock TL_READ_NO_INSERT on t2, and that
- would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
- to t2. Convert the lock to a normal read lock to allow
- concurrent inserts to t2.
-
- We especially allow concurrent inserts if MySQL is at the
- start of a stored procedure call (SQLCOM_CALL)
- (MySQL does have thd_in_lock_tables() TRUE there). */
-
- if (lock_type == TL_READ_NO_INSERT
- && sql_command != SQLCOM_LOCK_TABLES) {
-
- lock_type = TL_READ;
- }
-
- lock.type = lock_type;
- }
-
- *to++= &lock;
-
- if (!trx_is_started(trx)
- && (prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->stored_select_lock_type != LOCK_NONE)) {
-
- ++trx->will_lock;
- }
-
- return(to);
-}
-
-/*********************************************************************//**
-Read the next autoinc value. Acquire the relevant locks before reading
-the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked
-on return and all relevant locks acquired.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-ha_innobase::innobase_get_autoinc(
-/*==============================*/
- ulonglong* value) /*!< out: autoinc value */
-{
- *value = 0;
-
- prebuilt->autoinc_error = innobase_lock_autoinc();
-
- if (prebuilt->autoinc_error == DB_SUCCESS) {
-
- /* Determine the first value of the interval */
- *value = dict_table_autoinc_read(prebuilt->table);
-
- /* It should have been initialized during open. */
- if (*value == 0) {
- prebuilt->autoinc_error = DB_UNSUPPORTED;
- dict_table_autoinc_unlock(prebuilt->table);
- }
- }
-
- return(prebuilt->autoinc_error);
-}
-
-/*******************************************************************//**
-This function reads the global auto-inc counter. It doesn't use the
-AUTOINC lock even if the lock mode is set to TRADITIONAL.
-@return the autoinc value */
-UNIV_INTERN
-ulonglong
-ha_innobase::innobase_peek_autoinc(void)
-/*====================================*/
-{
- ulonglong auto_inc;
- dict_table_t* innodb_table;
-
- ut_a(prebuilt != NULL);
- ut_a(prebuilt->table != NULL);
-
- innodb_table = prebuilt->table;
-
- dict_table_autoinc_lock(innodb_table);
-
- auto_inc = dict_table_autoinc_read(innodb_table);
-
- if (auto_inc == 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: AUTOINC next value generation "
- "is disabled for '%s'\n", innodb_table->name);
- }
-
- dict_table_autoinc_unlock(innodb_table);
-
- return(auto_inc);
-}
-
-/*********************************************************************//**
-Returns the value of the auto-inc counter in *first_value and ~0 on failure. */
-UNIV_INTERN
-void
-ha_innobase::get_auto_increment(
-/*============================*/
- ulonglong offset, /*!< in: table autoinc offset */
- ulonglong increment, /*!< in: table autoinc
- increment */
- ulonglong nb_desired_values, /*!< in: number of values
- reqd */
- ulonglong* first_value, /*!< out: the autoinc value */
- ulonglong* nb_reserved_values) /*!< out: count of reserved
- values */
-{
- trx_t* trx;
- dberr_t error;
- ulonglong autoinc = 0;
-
- /* Prepare prebuilt->trx in the table handle */
- update_thd(ha_thd());
-
- error = innobase_get_autoinc(&autoinc);
-
- if (error != DB_SUCCESS) {
- *first_value = (~(ulonglong) 0);
- return;
- }
-
- /* This is a hack, since nb_desired_values seems to be accurate only
- for the first call to get_auto_increment() for multi-row INSERT and
- meaningless for other statements e.g, LOAD etc. Subsequent calls to
- this method for the same statement results in different values which
- don't make sense. Therefore we store the value the first time we are
- called and count down from that as rows are written (see write_row()).
- */
-
- trx = prebuilt->trx;
-
- /* Note: We can't rely on *first_value since some MySQL engines,
- in particular the partition engine, don't initialize it to 0 when
- invoking this method. So we are not sure if it's guaranteed to
- be 0 or not. */
-
- /* We need the upper limit of the col type to check for
- whether we update the table autoinc counter or not. */
- ulonglong col_max_value = innobase_get_int_col_max_value(
- table->next_number_field);
-
- /** The following logic is needed to avoid duplicate key error
- for autoincrement column.
-
- (1) InnoDB gives the current autoincrement value with respect
- to increment and offset value.
-
- (2) Basically it does compute_next_insert_id() logic inside InnoDB
- to avoid the current auto increment value changed by handler layer.
-
- (3) It is restricted only for insert operations. */
-
- if (increment > 1 && thd_sql_command(user_thd) != SQLCOM_ALTER_TABLE
- && autoinc < col_max_value) {
-
- ulonglong prev_auto_inc = autoinc;
-
- autoinc = ((autoinc - 1) + increment - offset)/ increment;
-
- autoinc = autoinc * increment + offset;
-
- /* If autoinc exceeds the col_max_value then reset
- to old autoinc value. Because in case of non-strict
- sql mode, boundary value is not considered as error. */
-
- if (autoinc >= col_max_value) {
- autoinc = prev_auto_inc;
- }
-
- ut_ad(autoinc > 0);
- }
-
- /* Called for the first time ? */
- if (trx->n_autoinc_rows == 0) {
-
- trx->n_autoinc_rows = (ulint) nb_desired_values;
-
- /* It's possible for nb_desired_values to be 0:
- e.g., INSERT INTO T1(C) SELECT C FROM T2; */
- if (nb_desired_values == 0) {
-
- trx->n_autoinc_rows = 1;
- }
-
- set_if_bigger(*first_value, autoinc);
- /* Not in the middle of a mult-row INSERT. */
- } else if (prebuilt->autoinc_last_value == 0) {
- set_if_bigger(*first_value, autoinc);
- }
-
- if (*first_value > col_max_value)
- {
- /* Out of range number. Let handler::update_auto_increment()
- take care of this */
- prebuilt->autoinc_last_value = 0;
- dict_table_autoinc_unlock(prebuilt->table);
- *nb_reserved_values = 0;
- return;
- }
- *nb_reserved_values = trx->n_autoinc_rows;
-
- /* With old style AUTOINC locking we only update the table's
- AUTOINC counter after attempting to insert the row. */
- if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) {
- ulonglong current;
- ulonglong next_value;
-
- current = *first_value;
-
- if (prebuilt->autoinc_increment != increment) {
-
- WSREP_DEBUG("autoinc decrease: %llu -> %llu\n"
- "THD: %ld, current: %llu, autoinc: %llu",
- prebuilt->autoinc_increment,
- increment,
- thd_get_thread_id(ha_thd()),
- current, autoinc);
- if (!wsrep_on(ha_thd()))
- {
- current = autoinc - prebuilt->autoinc_increment;
- }
-
- current = innobase_next_autoinc(
- current, 1, increment, offset, col_max_value);
-
- dict_table_autoinc_initialize(prebuilt->table, current);
-
- *first_value = current;
- }
-
- /* Compute the last value in the interval */
- next_value = innobase_next_autoinc(
- current, *nb_reserved_values, increment, offset,
- col_max_value);
-
- prebuilt->autoinc_last_value = next_value;
-
- if (prebuilt->autoinc_last_value < *first_value) {
- *first_value = (~(ulonglong) 0);
- } else {
- /* Update the table autoinc variable */
- dict_table_autoinc_update_if_greater(
- prebuilt->table, prebuilt->autoinc_last_value);
- }
- } else {
- /* This will force write_row() into attempting an update
- of the table's AUTOINC counter. */
- prebuilt->autoinc_last_value = 0;
- }
-
- /* The increment to be used to increase the AUTOINC value, we use
- this in write_row() and update_row() to increase the autoinc counter
- for columns that are filled by the user. We need the offset and
- the increment. */
- prebuilt->autoinc_offset = offset;
- prebuilt->autoinc_increment = increment;
-
- dict_table_autoinc_unlock(prebuilt->table);
-}
-
-/*******************************************************************//**
-Reset the auto-increment counter to the given value, i.e. the next row
-inserted will get the given value. This is called e.g. after TRUNCATE
-is emulated by doing a 'DELETE FROM t'. HA_ERR_WRONG_COMMAND is
-returned by storage engines that don't support this operation.
-@return 0 or error code */
-UNIV_INTERN
-int
-ha_innobase::reset_auto_increment(
-/*==============================*/
- ulonglong value) /*!< in: new value for table autoinc */
-{
- DBUG_ENTER("ha_innobase::reset_auto_increment");
-
- dberr_t error;
-
- update_thd(ha_thd());
-
- error = row_lock_table_autoinc_for_mysql(prebuilt);
-
- if (error != DB_SUCCESS) {
- DBUG_RETURN(convert_error_code_to_mysql(
- error, prebuilt->table->flags, user_thd));
- }
-
- /* The next value can never be 0. */
- if (value == 0) {
- value = 1;
- }
-
- innobase_reset_autoinc(value);
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-See comment in handler.cc */
-UNIV_INTERN
-bool
-ha_innobase::get_error_message(
-/*===========================*/
- int error,
- String* buf)
-{
- trx_t* trx = check_trx_exists(ha_thd());
-
- if (error == HA_ERR_DECRYPTION_FAILED) {
- const char *msg = "Table encrypted but decryption failed. This could be because correct encryption management plugin is not loaded, used encryption key is not available or encryption method does not match.";
- buf->copy(msg, (uint)strlen(msg), system_charset_info);
- } else {
- buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error),
- system_charset_info);
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
- Retrieves the names of the table and the key for which there was a
- duplicate entry in the case of HA_ERR_FOREIGN_DUPLICATE_KEY.
-
- If any of the names is not available, then this method will return
- false and will not change any of child_table_name or child_key_name.
-
- @param child_table_name[out] Table name
- @param child_table_name_len[in] Table name buffer size
- @param child_key_name[out] Key name
- @param child_key_name_len[in] Key name buffer size
-
- @retval true table and key names were available
- and were written into the corresponding
- out parameters.
- @retval false table and key names were not available,
- the out parameters were not touched.
-*/
-bool
-ha_innobase::get_foreign_dup_key(
-/*=============================*/
- char* child_table_name,
- uint child_table_name_len,
- char* child_key_name,
- uint child_key_name_len)
-{
- const dict_index_t* err_index;
-
- ut_a(prebuilt->trx != NULL);
- ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
-
- err_index = trx_get_error_info(prebuilt->trx);
-
- if (err_index == NULL) {
- return(false);
- }
- /* else */
-
- /* copy table name (and convert from filename-safe encoding to
- system_charset_info) */
- char* p;
- p = strchr(err_index->table->name, '/');
- /* strip ".../" prefix if any */
- if (p != NULL) {
- p++;
- } else {
- p = err_index->table->name;
- }
- uint len;
- len = filename_to_tablename(p, child_table_name, child_table_name_len);
- child_table_name[len] = '\0';
-
- /* copy index name */
- ut_snprintf(child_key_name, child_key_name_len, "%s", err_index->name);
-
- return(true);
-}
-
-/*******************************************************************//**
-Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
-If there is no explicitly declared non-null unique key or a primary key, then
-InnoDB internally uses the row id as the primary key.
-@return < 0 if ref1 < ref2, 0 if equal, else > 0 */
-UNIV_INTERN
-int
-ha_innobase::cmp_ref(
-/*=================*/
- const uchar* ref1, /*!< in: an (internal) primary key value in the
- MySQL key value format */
- const uchar* ref2) /*!< in: an (internal) primary key value in the
- MySQL key value format */
-{
- enum_field_types mysql_type;
- Field* field;
- KEY_PART_INFO* key_part;
- KEY_PART_INFO* key_part_end;
- uint len1;
- uint len2;
- int result;
-
- if (prebuilt->clust_index_was_generated) {
- /* The 'ref' is an InnoDB row id */
-
- return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
- }
-
- /* Do a type-aware comparison of primary key fields. PK fields
- are always NOT NULL, so no checks for NULL are performed. */
-
- key_part = table->key_info[table->s->primary_key].key_part;
-
- key_part_end = key_part
- + table->key_info[table->s->primary_key].user_defined_key_parts;
-
- for (; key_part != key_part_end; ++key_part) {
- field = key_part->field;
- mysql_type = field->type();
-
- if (mysql_type == MYSQL_TYPE_TINY_BLOB
- || mysql_type == MYSQL_TYPE_MEDIUM_BLOB
- || mysql_type == MYSQL_TYPE_BLOB
- || mysql_type == MYSQL_TYPE_LONG_BLOB) {
-
- /* In the MySQL key value format, a column prefix of
- a BLOB is preceded by a 2-byte length field */
-
- len1 = innobase_read_from_2_little_endian(ref1);
- len2 = innobase_read_from_2_little_endian(ref2);
-
- result = ((Field_blob*) field)->cmp(
- ref1 + 2, len1, ref2 + 2, len2);
- } else {
- result = field->key_cmp(ref1, ref2);
- }
-
- if (result) {
-
- return(result);
- }
-
- ref1 += key_part->store_length;
- ref2 += key_part->store_length;
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Ask InnoDB if a query to a table can be cached.
-@return TRUE if query caching of the table is permitted */
-UNIV_INTERN
-my_bool
-ha_innobase::register_query_cache_table(
-/*====================================*/
- THD* thd, /*!< in: user thread handle */
- const char* table_key, /*!< in: normalized path to the
- table */
- uint key_length, /*!< in: length of the normalized
- path to the table */
- qc_engine_callback*
- call_back, /*!< out: pointer to function for
- checking if query caching
- is permitted */
- ulonglong *engine_data) /*!< in/out: data to call_back */
-{
- *call_back = innobase_query_caching_of_table_permitted;
- *engine_data = 0;
- return(innobase_query_caching_of_table_permitted(thd, table_key,
- key_length,
- engine_data));
-}
-
-/*******************************************************************//**
-Get the bin log name. */
-UNIV_INTERN
-const char*
-ha_innobase::get_mysql_bin_log_name()
-/*=================================*/
-{
- return(trx_sys_mysql_bin_log_name);
-}
-
-/*******************************************************************//**
-Get the bin log offset (or file position). */
-UNIV_INTERN
-ulonglong
-ha_innobase::get_mysql_bin_log_pos()
-/*================================*/
-{
- /* trx... is ib_int64_t, which is a typedef for a 64-bit integer
- (__int64 or longlong) so it's ok to cast it to ulonglong. */
-
- return(trx_sys_mysql_bin_log_pos);
-}
-
-/******************************************************************//**
-This function is used to find the storage length in bytes of the first n
-characters for prefix indexes using a multibyte character set. The function
-finds charset information and returns length of prefix_len characters in the
-index field in bytes.
-@return number of bytes occupied by the first n characters */
-UNIV_INTERN
-ulint
-innobase_get_at_most_n_mbchars(
-/*===========================*/
- ulint charset_id, /*!< in: character set id */
- ulint prefix_len, /*!< in: prefix length in bytes of the index
- (this has to be divided by mbmaxlen to get the
- number of CHARACTERS n in the prefix) */
- ulint data_len, /*!< in: length of the string in bytes */
- const char* str) /*!< in: character string */
-{
- ulint char_length; /*!< character length in bytes */
- ulint n_chars; /*!< number of characters in prefix */
- CHARSET_INFO* charset; /*!< charset used in the field */
-
- charset = get_charset((uint) charset_id, MYF(MY_WME));
-
- ut_ad(charset);
- ut_ad(charset->mbmaxlen);
-
- /* Calculate how many characters at most the prefix index contains */
-
- n_chars = prefix_len / charset->mbmaxlen;
-
- /* If the charset is multi-byte, then we must find the length of the
- first at most n chars in the string. If the string contains less
- characters than n, then we return the length to the end of the last
- character. */
-
- if (charset->mbmaxlen > 1) {
- /* my_charpos() returns the byte length of the first n_chars
- characters, or a value bigger than the length of str, if
- there were not enough full characters in str.
-
- Why does the code below work:
- Suppose that we are looking for n UTF-8 characters.
-
- 1) If the string is long enough, then the prefix contains at
- least n complete UTF-8 characters + maybe some extra
- characters + an incomplete UTF-8 character. No problem in
- this case. The function returns the pointer to the
- end of the nth character.
-
- 2) If the string is not long enough, then the string contains
- the complete value of a column, that is, only complete UTF-8
- characters, and we can store in the column prefix index the
- whole string. */
-
- char_length = my_charpos(charset, str,
- str + data_len, (int) n_chars);
- if (char_length > data_len) {
- char_length = data_len;
- }
- } else {
- if (data_len < prefix_len) {
- char_length = data_len;
- } else {
- char_length = prefix_len;
- }
- }
-
- return(char_length);
-}
-
-/*******************************************************************//**
-This function is used to prepare an X/Open XA distributed transaction.
-@return 0 or error number */
-static
-int
-innobase_xa_prepare(
-/*================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- THD* thd, /*!< in: handle to the MySQL thread of
- the user whose XA transaction should
- be prepared */
- bool prepare_trx) /*!< in: true - prepare transaction
- false - the current SQL statement
- ended */
-{
- int error = 0;
- trx_t* trx = check_trx_exists(thd);
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- /* we use support_xa value as it was seen at transaction start
- time, not the current session variable value. Any possible changes
- to the session variable take effect only in the next transaction */
- if (!trx->support_xa) {
-#ifdef WITH_WSREP
- thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
-#endif // WITH_WSREP
-
- return(0);
- }
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
-
- if (prepare_trx
- || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT
- | OPTION_BEGIN))) {
-
- thd->get_stmt_da()->reset_diagnostics_area();
- return(HA_ERR_WRONG_COMMAND);
- }
- return(0);
- }
-
- thd_get_xid(thd, (MYSQL_XID*) &trx->xid);
-
- /* Release a possible FIFO ticket and search latch. Since we will
- reserve the trx_sys->mutex, we have to release the search system
- latch first to obey the latching order. */
-
- trx_search_latch_release_if_reserved(trx);
-
- innobase_srv_conc_force_exit_innodb(trx);
-
- if (!trx_is_registered_for_2pc(trx) && trx_is_started(trx)) {
-
- sql_print_error("Transaction not registered for MySQL 2PC, "
- "but transaction is active");
- }
-
- if (prepare_trx
- || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* We were instructed to prepare the whole transaction, or
- this is an SQL statement end and autocommit is on */
-
- ut_ad(trx_is_registered_for_2pc(trx));
-
- trx_prepare_for_mysql(trx);
-
- DBUG_EXECUTE_IF("crash_innodb_after_prepare",
- DBUG_SUICIDE(););
-
- error = 0;
- } else {
- /* We just mark the SQL statement ended and do not do a
- transaction prepare */
-
- /* If we had reserved the auto-inc lock for some
- table in this SQL statement we release it now */
-
- lock_unlock_table_autoinc(trx);
-
- /* Store the current undo_no of the transaction so that we
- know where to roll back if we have to roll back the next
- SQL statement */
-
- trx_mark_sql_stat_end(trx);
- }
-
- if (thd_sql_command(thd) != SQLCOM_XA_PREPARE
- && (prepare_trx
- || !thd_test_options(
- thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
-
- /* For mysqlbackup to work the order of transactions in binlog
- and InnoDB must be the same. Consider the situation
-
- thread1> prepare; write to binlog; ...
- <context switch>
- thread2> prepare; write to binlog; commit
- thread1> ... commit
-
- The server guarantees that writes to the binary log
- and commits are in the same order, so we do not have
- to handle this case. */
- }
-
- return(error);
-}
-
-/*******************************************************************//**
-This function is used to recover X/Open XA distributed transactions.
-@return number of prepared transactions stored in xid_list */
-static
-int
-innobase_xa_recover(
-/*================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid_list,/*!< in/out: prepared transactions */
- uint len) /*!< in: number of slots in xid_list */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- if (len == 0 || xid_list == NULL) {
-
- return(0);
- }
-
- return(trx_recover_for_mysql(xid_list, len));
-}
-
-/*******************************************************************//**
-This function is used to commit one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_commit_by_xid(
-/*===================*/
- handlerton* hton,
- XID* xid) /*!< in: X/Open XA transaction identification */
-{
- trx_t* trx;
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = trx_get_trx_by_xid(xid);
-
- if (trx) {
- innobase_commit_low(trx);
- trx_free_for_background(trx);
- return(XA_OK);
- } else {
- return(XAER_NOTA);
- }
-}
-
-/*******************************************************************//**
-This function is used to rollback one X/Open XA distributed transaction
-which is in the prepared state
-@return 0 or error number */
-static
-int
-innobase_rollback_by_xid(
-/*=====================*/
- handlerton* hton, /*!< in: InnoDB handlerton */
- XID* xid) /*!< in: X/Open XA transaction
- identification */
-{
- trx_t* trx;
-
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- trx = trx_get_trx_by_xid(xid);
-
- if (trx) {
- int ret = innobase_rollback_trx(trx);
- trx_free_for_background(trx);
- return(ret);
- } else {
- return(XAER_NOTA);
- }
-}
-
-/*******************************************************************//**
-Create a consistent view for a cursor based on current transaction
-which is created if the corresponding MySQL thread still lacks one.
-This consistent view is then used inside of MySQL when accessing records
-using a cursor.
-@return pointer to cursor view or NULL */
-static
-void*
-innobase_create_cursor_view(
-/*========================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd) /*!< in: user thread handle */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- return(read_cursor_view_create_for_mysql(check_trx_exists(thd)));
-}
-
-/*******************************************************************//**
-Close the given consistent cursor view of a transaction and restore
-global read view to a transaction read view. Transaction is created if the
-corresponding MySQL thread still lacks one. */
-static
-void
-innobase_close_cursor_view(
-/*=======================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd, /*!< in: user thread handle */
- void* curview)/*!< in: Consistent read view to be closed */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- read_cursor_view_close_for_mysql(check_trx_exists(thd),
- (cursor_view_t*) curview);
-}
-
-/*******************************************************************//**
-Set the given consistent cursor view to a transaction which is created
-if the corresponding MySQL thread still lacks one. If the given
-consistent cursor view is NULL global read view of a transaction is
-restored to a transaction read view. */
-static
-void
-innobase_set_cursor_view(
-/*=====================*/
- handlerton* hton, /*!< in: innobase hton */
- THD* thd, /*!< in: user thread handle */
- void* curview)/*!< in: Consistent cursor view to be set */
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
-
- read_cursor_set_for_mysql(check_trx_exists(thd),
- (cursor_view_t*) curview);
-}
-
-/*******************************************************************//**
-*/
-UNIV_INTERN
-bool
-ha_innobase::check_if_incompatible_data(
-/*====================================*/
- HA_CREATE_INFO* info,
- uint table_changes)
-{
- ha_table_option_struct *param_old, *param_new;
-
- /* Cache engine specific options */
- param_new = info->option_struct;
- param_old = table->s->option_struct;
-
- innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
-
- if (table_changes != IS_EQUAL_YES) {
-
- return(COMPATIBLE_DATA_NO);
- }
-
- /* Check that auto_increment value was not changed */
- if ((info->used_fields & HA_CREATE_USED_AUTO) &&
- info->auto_increment_value != 0) {
-
- return(COMPATIBLE_DATA_NO);
- }
-
- /* Check that row format didn't change */
- if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT)
- && info->row_type != get_row_type()) {
-
- return(COMPATIBLE_DATA_NO);
- }
-
- /* Specifying KEY_BLOCK_SIZE requests a rebuild of the table. */
- if (info->used_fields & HA_CREATE_USED_KEY_BLOCK_SIZE) {
- return(COMPATIBLE_DATA_NO);
- }
-
- /* Changes on engine specific table options requests a rebuild of the table. */
- if (param_new->page_compressed != param_old->page_compressed ||
- param_new->page_compression_level != param_old->page_compression_level ||
- param_new->atomic_writes != param_old->atomic_writes) {
- return(COMPATIBLE_DATA_NO);
- }
-
- return(COMPATIBLE_DATA_YES);
-}
-
-/****************************************************************//**
-Update the system variable innodb_io_capacity_max using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_io_capacity_max_update(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- ulong in_val = *static_cast<const ulong*>(save);
- if (in_val < srv_io_capacity) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Setting innodb_io_capacity_max %lu"
- " lower than innodb_io_capacity %lu.",
- in_val, srv_io_capacity);
-
- srv_io_capacity = in_val;
-
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Setting innodb_io_capacity to %lu",
- srv_io_capacity);
- }
-
- srv_max_io_capacity = in_val;
-}
-
-/****************************************************************//**
-Update the system variable innodb_io_capacity using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_io_capacity_update(
-/*======================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- ulong in_val = *static_cast<const ulong*>(save);
- if (in_val > srv_max_io_capacity) {
-
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Setting innodb_io_capacity to %lu"
- " higher than innodb_io_capacity_max %lu",
- in_val, srv_max_io_capacity);
-
- srv_max_io_capacity = in_val * 2;
-
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Setting innodb_max_io_capacity to %lu",
- srv_max_io_capacity);
- }
-
- srv_io_capacity = in_val;
-}
-
-/****************************************************************//**
-Update the system variable innodb_log_arch_expire_sec using
-the "saved" value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_log_archive_expire_update(
-/*==============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr, /*!< out: unused */
- const void* save) /*!< in: immediate result
- from check function */
-{
- srv_log_arch_expire_sec = *(ulint*) save;
-}
-
-static
-void
-innodb_log_archive_update(
-/*======================*/
- THD* thd,
- struct st_mysql_sys_var* var,
- void* var_ptr,
- const void* save)
-{
- if (srv_read_only_mode)
- return;
-
- my_bool in_val = *static_cast<const my_bool*>(save);
-
- if (in_val) {
- /* turn archiving on */
- innobase_log_archive = srv_log_archive_on = 1;
- log_archive_archivelog();
- } else {
- /* turn archivng off */
- innobase_log_archive = srv_log_archive_on = 0;
- log_archive_noarchivelog();
- }
-}
-
-/****************************************************************//**
-Update the system variable innodb_max_dirty_pages_pct using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_max_dirty_pages_pct_update(
-/*==============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- double in_val = *static_cast<const double*>(save);
- if (in_val < srv_max_dirty_pages_pct_lwm) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "innodb_max_dirty_pages_pct cannot be"
- " set lower than"
- " innodb_max_dirty_pages_pct_lwm.");
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Lowering"
- " innodb_max_dirty_page_pct_lwm to %lf",
- in_val);
-
- srv_max_dirty_pages_pct_lwm = in_val;
- }
-
- srv_max_buf_pool_modified_pct = in_val;
-}
-
-/****************************************************************//**
-Update the system variable innodb_max_dirty_pages_pct_lwm using the
-"saved" value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_max_dirty_pages_pct_lwm_update(
-/*==================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- double in_val = *static_cast<const double*>(save);
- if (in_val > srv_max_buf_pool_modified_pct) {
- in_val = srv_max_buf_pool_modified_pct;
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "innodb_max_dirty_pages_pct_lwm"
- " cannot be set higher than"
- " innodb_max_dirty_pages_pct.");
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Setting innodb_max_dirty_page_pct_lwm"
- " to %lf",
- in_val);
- }
-
- srv_max_dirty_pages_pct_lwm = in_val;
-}
-
-UNIV_INTERN
-void
-ha_innobase::set_partition_owner_stats(ha_statistics *stats)
-{
- ha_partition_stats= stats;
-}
-
-/************************************************************//**
-Validate the file format name and return its corresponding id.
-@return valid file format id */
-static
-uint
-innobase_file_format_name_lookup(
-/*=============================*/
- const char* format_name) /*!< in: pointer to file format name */
-{
- char* endp;
- uint format_id;
-
- ut_a(format_name != NULL);
-
- /* The format name can contain the format id itself instead of
- the name and we check for that. */
- format_id = (uint) strtoul(format_name, &endp, 10);
-
- /* Check for valid parse. */
- if (*endp == '\0' && *format_name != '\0') {
-
- if (format_id <= UNIV_FORMAT_MAX) {
-
- return(format_id);
- }
- } else {
-
- for (format_id = 0; format_id <= UNIV_FORMAT_MAX;
- format_id++) {
- const char* name;
-
- name = trx_sys_file_format_id_to_name(format_id);
-
- if (!innobase_strcasecmp(format_name, name)) {
-
- return(format_id);
- }
- }
- }
-
- return(UNIV_FORMAT_MAX + 1);
-}
-
-/************************************************************//**
-Validate the file format check config parameters, as a side effect it
-sets the srv_max_file_format_at_startup variable.
-@return the format_id if valid config value, otherwise, return -1 */
-static
-int
-innobase_file_format_validate_and_set(
-/*==================================*/
- const char* format_max) /*!< in: parameter value */
-{
- uint format_id;
-
- format_id = innobase_file_format_name_lookup(format_max);
-
- if (format_id < UNIV_FORMAT_MAX + 1) {
- srv_max_file_format_at_startup = format_id;
-
- return((int) format_id);
- } else {
- return(-1);
- }
-}
-
-/*************************************************************//**
-Check if it is a valid file format. This function is registered as
-a callback with MySQL.
-@return 0 for valid file format */
-static
-int
-innodb_file_format_name_validate(
-/*=============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* file_format_input;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- file_format_input = value->val_str(value, buff, &len);
-
- if (file_format_input != NULL) {
- uint format_id;
-
- format_id = innobase_file_format_name_lookup(
- file_format_input);
-
- if (format_id <= UNIV_FORMAT_MAX) {
-
- /* Save a pointer to the name in the
- 'file_format_name_map' constant array. */
- *static_cast<const char**>(save) =
- trx_sys_file_format_id_to_name(format_id);
-
- return(0);
- }
- }
-
- *static_cast<const char**>(save) = NULL;
- return(1);
-}
-
-/****************************************************************//**
-Update the system variable innodb_file_format using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_file_format_name_update(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr, /*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- const char* format_name;
-
- ut_a(var_ptr != NULL);
- ut_a(save != NULL);
-
- format_name = *static_cast<const char*const*>(save);
-
- if (format_name) {
- uint format_id;
-
- format_id = innobase_file_format_name_lookup(format_name);
-
- if (format_id <= UNIV_FORMAT_MAX) {
- srv_file_format = format_id;
- }
- }
-
- *static_cast<const char**>(var_ptr)
- = trx_sys_file_format_id_to_name(srv_file_format);
-}
-
-/*************************************************************//**
-Check if valid argument to innodb_file_format_max. This function
-is registered as a callback with MySQL.
-@return 0 for valid file format */
-static
-int
-innodb_file_format_max_validate(
-/*============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* file_format_input;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
- int format_id;
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- file_format_input = value->val_str(value, buff, &len);
-
- if (file_format_input != NULL) {
-
- format_id = innobase_file_format_validate_and_set(
- file_format_input);
-
- if (format_id >= 0) {
- /* Save a pointer to the name in the
- 'file_format_name_map' constant array. */
- *static_cast<const char**>(save) =
- trx_sys_file_format_id_to_name(
- (uint) format_id);
-
- return(0);
-
- } else {
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: invalid innodb_file_format_max "
- "value; can be any format up to %s "
- "or equivalent id of %d",
- trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX),
- UNIV_FORMAT_MAX);
- }
- }
-
- *static_cast<const char**>(save) = NULL;
- return(1);
-}
-
-/****************************************************************//**
-Update the system variable innodb_file_format_max using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_file_format_max_update(
-/*==========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- const char* format_name_in;
- const char** format_name_out;
- uint format_id;
-
- ut_a(save != NULL);
- ut_a(var_ptr != NULL);
-
- format_name_in = *static_cast<const char*const*>(save);
-
- if (!format_name_in) {
-
- return;
- }
-
- format_id = innobase_file_format_name_lookup(format_name_in);
-
- if (format_id > UNIV_FORMAT_MAX) {
- /* DEFAULT is "on", which is invalid at runtime. */
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Ignoring SET innodb_file_format=%s",
- format_name_in);
- return;
- }
-
- format_name_out = static_cast<const char**>(var_ptr);
-
- /* Update the max format id in the system tablespace. */
- if (trx_sys_file_format_max_set(format_id, format_name_out)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " [Info] InnoDB: the file format in the system "
- "tablespace is now set to %s.\n", *format_name_out);
- }
-}
-
-/*************************************************************//**
-Check whether valid argument given to innobase_*_stopword_table.
-This function is registered as a callback with MySQL.
-@return 0 for valid stopword table */
-static
-int
-innodb_stopword_table_validate(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* stopword_table_name;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
- trx_t* trx;
- int ret = 1;
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- stopword_table_name = value->val_str(value, buff, &len);
-
- trx = check_trx_exists(thd);
-
- row_mysql_lock_data_dictionary(trx);
-
- /* Validate the stopword table's (if supplied) existence and
- of the right format */
- if (!stopword_table_name
- || fts_valid_stopword_table(stopword_table_name)) {
- *static_cast<const char**>(save) = stopword_table_name;
- ret = 0;
- }
-
- row_mysql_unlock_data_dictionary(trx);
-
- return(ret);
-}
-
-/*************************************************************//**
-Check whether valid argument given to "innodb_fts_internal_tbl_name"
-This function is registered as a callback with MySQL.
-@return 0 for valid stopword table */
-static
-int
-innodb_internal_table_validate(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* table_name;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
- int ret = 1;
- dict_table_t* user_table;
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- table_name = value->val_str(value, buff, &len);
-
- if (!table_name) {
- *static_cast<const char**>(save) = NULL;
- return(0);
- }
-
- user_table = dict_table_open_on_name(
- table_name, FALSE, TRUE, DICT_ERR_IGNORE_NONE);
-
- if (user_table) {
- if (dict_table_has_fts_index(user_table)) {
- *static_cast<const char**>(save) = table_name;
- ret = 0;
- }
-
- dict_table_close(user_table, FALSE, TRUE);
-
- DBUG_EXECUTE_IF("innodb_evict_autoinc_table",
- mutex_enter(&dict_sys->mutex);
- dict_table_remove_from_cache_low(user_table, TRUE);
- mutex_exit(&dict_sys->mutex);
- );
- }
-
- return(ret);
-}
-
-/****************************************************************//**
-Update global variable "fts_internal_tbl_name" with the "saved"
-stopword table name value. This function is registered as a callback
-with MySQL. */
-static
-void
-innodb_internal_table_update(
-/*=========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- const char* table_name;
- char* old;
-
- ut_a(save != NULL);
- ut_a(var_ptr != NULL);
-
- table_name = *static_cast<const char*const*>(save);
- old = *(char**) var_ptr;
-
- if (table_name) {
- *(char**) var_ptr = my_strdup(table_name, MYF(0));
- } else {
- *(char**) var_ptr = NULL;
- }
-
- if (old) {
- my_free(old);
- }
-
- fts_internal_tbl_name2 = *(char**) var_ptr;
- if (fts_internal_tbl_name2 == NULL) {
- fts_internal_tbl_name = const_cast<char*>("default");
- } else {
- fts_internal_tbl_name = fts_internal_tbl_name2;
- }
-}
-
-/****************************************************************//**
-Update the system variable innodb_adaptive_hash_index using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_adaptive_hash_index_update(
-/*==============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- if (*(my_bool*) save) {
- btr_search_enable();
- } else {
- btr_search_disable();
- }
-}
-
-/****************************************************************//**
-Update the system variable innodb_cmp_per_index using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_cmp_per_index_update(
-/*========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- /* Reset the stats whenever we enable the table
- INFORMATION_SCHEMA.innodb_cmp_per_index. */
- if (!srv_cmp_per_index_enabled && *(my_bool*) save) {
- page_zip_reset_stat_per_index();
- }
-
- srv_cmp_per_index_enabled = !!(*(my_bool*) save);
-}
-
-/****************************************************************//**
-Update the system variable innodb_old_blocks_pct using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_old_blocks_pct_update(
-/*=========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- innobase_old_blocks_pct = static_cast<uint>(
- buf_LRU_old_ratio_update(
- *static_cast<const uint*>(save), TRUE));
-}
-
-/****************************************************************//**
-Update the system variable innodb_old_blocks_pct using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_change_buffer_max_size_update(
-/*=================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- innobase_change_buffer_max_size =
- (*static_cast<const uint*>(save));
- ibuf_max_size_update(innobase_change_buffer_max_size);
-}
-
-#ifdef UNIV_DEBUG
-ulong srv_fil_make_page_dirty_debug = 0;
-ulong srv_saved_page_number_debug = 0;
-
-/****************************************************************//**
-Save an InnoDB page number. */
-static
-void
-innodb_save_page_no(
-/*================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- srv_saved_page_number_debug = *static_cast<const ulong*>(save);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Saving InnoDB page number: %lu",
- srv_saved_page_number_debug);
-}
-
-/****************************************************************//**
-Make the first page of given user tablespace dirty. */
-static
-void
-innodb_make_page_dirty(
-/*===================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- mtr_t mtr;
- ulong space_id = *static_cast<const ulong*>(save);
-
- mtr_start(&mtr);
-
- buf_block_t* block = buf_page_get(
- space_id, 0, srv_saved_page_number_debug, RW_X_LATCH, &mtr);
-
- if (block) {
- byte* page = block->frame;
- ib_logf(IB_LOG_LEVEL_INFO,
- "Dirtying page:%lu of space:%lu",
- page_get_page_no(page),
- page_get_space_id(page));
- mlog_write_ulint(page + FIL_PAGE_TYPE,
- fil_page_get_type(page),
- MLOG_2BYTES, &mtr);
- }
- mtr_commit(&mtr);
-}
-#endif // UNIV_DEBUG
-
-/*************************************************************//**
-Find the corresponding ibuf_use_t value that indexes into
-innobase_change_buffering_values[] array for the input
-change buffering option name.
-@return corresponding IBUF_USE_* value for the input variable
-name, or IBUF_USE_COUNT if not able to find a match */
-static
-ibuf_use_t
-innodb_find_change_buffering_value(
-/*===============================*/
- const char* input_name) /*!< in: input change buffering
- option name */
-{
- ulint use;
-
- for (use = 0; use < UT_ARR_SIZE(innobase_change_buffering_values);
- use++) {
- /* found a match */
- if (!innobase_strcasecmp(
- input_name, innobase_change_buffering_values[use])) {
- return((ibuf_use_t) use);
- }
- }
-
- /* Did not find any match */
- return(IBUF_USE_COUNT);
-}
-
-/*************************************************************//**
-Check if it is a valid value of innodb_change_buffering. This function is
-registered as a callback with MySQL.
-@return 0 for valid innodb_change_buffering */
-static
-int
-innodb_change_buffering_validate(
-/*=============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* change_buffering_input;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- change_buffering_input = value->val_str(value, buff, &len);
-
- if (change_buffering_input != NULL) {
- ibuf_use_t use;
-
- use = innodb_find_change_buffering_value(
- change_buffering_input);
-
- if (use != IBUF_USE_COUNT) {
- /* Find a matching change_buffering option value. */
- *static_cast<const char**>(save) =
- innobase_change_buffering_values[use];
-
- return(0);
- }
- }
-
- /* No corresponding change buffering option for user supplied
- "change_buffering_input" */
- return(1);
-}
-
-/****************************************************************//**
-Update the system variable innodb_change_buffering using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_change_buffering_update(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- ibuf_use_t use;
-
- ut_a(var_ptr != NULL);
- ut_a(save != NULL);
-
- use = innodb_find_change_buffering_value(
- *static_cast<const char*const*>(save));
-
- ut_a(use < IBUF_USE_COUNT);
-
- ibuf_use = use;
- *static_cast<const char**>(var_ptr) =
- *static_cast<const char*const*>(save);
-}
-
-/*************************************************************//**
-Just emit a warning that the usage of the variable is deprecated.
-@return 0 */
-static
-void
-innodb_stats_sample_pages_update(
-/*=============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
-#define STATS_SAMPLE_PAGES_DEPRECATED_MSG \
- "Using innodb_stats_sample_pages is deprecated and " \
- "the variable may be removed in future releases. " \
- "Please use innodb_stats_transient_sample_pages " \
- "instead."
-
- push_warning(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_WRONG_COMMAND, STATS_SAMPLE_PAGES_DEPRECATED_MSG);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: %s\n",
- STATS_SAMPLE_PAGES_DEPRECATED_MSG);
-
- srv_stats_transient_sample_pages =
- *static_cast<const unsigned long long*>(save);
-}
-
-/****************************************************************//**
-Update the monitor counter according to the "set_option", turn
-on/off or reset specified monitor counter. */
-static
-void
-innodb_monitor_set_option(
-/*======================*/
- const monitor_info_t* monitor_info,/*!< in: monitor info for the monitor
- to set */
- mon_option_t set_option) /*!< in: Turn on/off reset the
- counter */
-{
- monitor_id_t monitor_id = monitor_info->monitor_id;
-
- /* If module type is MONITOR_GROUP_MODULE, it cannot be
- turned on/off individually. It should never use this
- function to set options */
- ut_a(!(monitor_info->monitor_type & MONITOR_GROUP_MODULE));
-
- switch (set_option) {
- case MONITOR_TURN_ON:
- MONITOR_ON(monitor_id);
- MONITOR_INIT(monitor_id);
- MONITOR_SET_START(monitor_id);
-
- /* If the monitor to be turned on uses
- exisitng monitor counter (status variable),
- make special processing to remember existing
- counter value. */
- if (monitor_info->monitor_type
- & MONITOR_EXISTING) {
- srv_mon_process_existing_counter(
- monitor_id, MONITOR_TURN_ON);
- }
- break;
-
- case MONITOR_TURN_OFF:
- if (monitor_info->monitor_type & MONITOR_EXISTING) {
- srv_mon_process_existing_counter(
- monitor_id, MONITOR_TURN_OFF);
- }
-
- MONITOR_OFF(monitor_id);
- MONITOR_SET_OFF(monitor_id);
- break;
-
- case MONITOR_RESET_VALUE:
- srv_mon_reset(monitor_id);
- break;
-
- case MONITOR_RESET_ALL_VALUE:
- srv_mon_reset_all(monitor_id);
- break;
-
- default:
- ut_error;
- }
-}
-
-/****************************************************************//**
-Find matching InnoDB monitor counters and update their status
-according to the "set_option", turn on/off or reset specified
-monitor counter. */
-static
-void
-innodb_monitor_update_wildcard(
-/*===========================*/
- const char* name, /*!< in: monitor name to match */
- mon_option_t set_option) /*!< in: the set option, whether
- to turn on/off or reset the counter */
-{
- ut_a(name);
-
- for (ulint use = 0; use < NUM_MONITOR; use++) {
- ulint type;
- monitor_id_t monitor_id = static_cast<monitor_id_t>(use);
- monitor_info_t* monitor_info;
-
- if (!innobase_wildcasecmp(
- srv_mon_get_name(monitor_id), name)) {
- monitor_info = srv_mon_get_info(monitor_id);
-
- type = monitor_info->monitor_type;
-
- /* If the monitor counter is of MONITOR_MODULE
- type, skip it. Except for those also marked with
- MONITOR_GROUP_MODULE flag, which can be turned
- on only as a module. */
- if (!(type & MONITOR_MODULE)
- && !(type & MONITOR_GROUP_MODULE)) {
- innodb_monitor_set_option(monitor_info,
- set_option);
- }
-
- /* Need to special handle counters marked with
- MONITOR_GROUP_MODULE, turn on the whole module if
- any one of it comes here. Currently, only
- "module_buf_page" is marked with MONITOR_GROUP_MODULE */
- if (type & MONITOR_GROUP_MODULE) {
- if ((monitor_id >= MONITOR_MODULE_BUF_PAGE)
- && (monitor_id < MONITOR_MODULE_OS)) {
- if (set_option == MONITOR_TURN_ON
- && MONITOR_IS_ON(
- MONITOR_MODULE_BUF_PAGE)) {
- continue;
- }
-
- srv_mon_set_module_control(
- MONITOR_MODULE_BUF_PAGE,
- set_option);
- } else {
- /* If new monitor is added with
- MONITOR_GROUP_MODULE, it needs
- to be added here. */
- ut_ad(0);
- }
- }
- }
- }
-}
-
-/*************************************************************//**
-Given a configuration variable name, find corresponding monitor counter
-and return its monitor ID if found.
-@return monitor ID if found, MONITOR_NO_MATCH if there is no match */
-static
-ulint
-innodb_monitor_id_by_name_get(
-/*==========================*/
- const char* name) /*!< in: monitor counter namer */
-{
- ut_a(name);
-
- /* Search for wild character '%' in the name, if
- found, we treat it as a wildcard match. We do not search for
- single character wildcard '_' since our monitor names already contain
- such character. To avoid confusion, we request user must include
- at least one '%' character to activate the wildcard search. */
- if (strchr(name, '%')) {
- return(MONITOR_WILDCARD_MATCH);
- }
-
- /* Not wildcard match, check for an exact match */
- for (ulint i = 0; i < NUM_MONITOR; i++) {
- if (!innobase_strcasecmp(
- name, srv_mon_get_name(static_cast<monitor_id_t>(i)))) {
- return(i);
- }
- }
-
- return(MONITOR_NO_MATCH);
-}
-/*************************************************************//**
-Validate that the passed in monitor name matches at least one
-monitor counter name with wildcard compare.
-@return TRUE if at least one monitor name matches */
-static
-ibool
-innodb_monitor_validate_wildcard_name(
-/*==================================*/
- const char* name) /*!< in: monitor counter namer */
-{
- for (ulint i = 0; i < NUM_MONITOR; i++) {
- if (!innobase_wildcasecmp(
- srv_mon_get_name(static_cast<monitor_id_t>(i)), name)) {
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-/*************************************************************//**
-Validate the passed in monitor name, find and save the
-corresponding monitor name in the function parameter "save".
-@return 0 if monitor name is valid */
-static
-int
-innodb_monitor_valid_byname(
-/*========================*/
- void* save, /*!< out: immediate result
- for update function */
- const char* name) /*!< in: incoming monitor name */
-{
- ulint use;
- monitor_info_t* monitor_info;
-
- if (!name) {
- return(1);
- }
-
- use = innodb_monitor_id_by_name_get(name);
-
- /* No monitor name matches, nor it is wildcard match */
- if (use == MONITOR_NO_MATCH) {
- return(1);
- }
-
- if (use < NUM_MONITOR) {
- monitor_info = srv_mon_get_info((monitor_id_t) use);
-
- /* If the monitor counter is marked with
- MONITOR_GROUP_MODULE flag, then this counter
- cannot be turned on/off individually, instead
- it shall be turned on/off as a group using
- its module name */
- if ((monitor_info->monitor_type & MONITOR_GROUP_MODULE)
- && (!(monitor_info->monitor_type & MONITOR_MODULE))) {
- sql_print_warning(
- "Monitor counter '%s' cannot"
- " be turned on/off individually."
- " Please use its module name"
- " to turn on/off the counters"
- " in the module as a group.\n",
- name);
-
- return(1);
- }
-
- } else {
- ut_a(use == MONITOR_WILDCARD_MATCH);
-
- /* For wildcard match, if there is not a single monitor
- counter name that matches, treat it as an invalid
- value for the system configuration variables */
- if (!innodb_monitor_validate_wildcard_name(name)) {
- return(1);
- }
- }
-
- /* Save the configure name for innodb_monitor_update() */
- *static_cast<const char**>(save) = name;
-
- return(0);
-}
-/*************************************************************//**
-Validate passed-in "value" is a valid monitor counter name.
-This function is registered as a callback with MySQL.
-@return 0 for valid name */
-static
-int
-innodb_monitor_validate(
-/*====================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* name;
- char* monitor_name;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
- int ret;
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- name = value->val_str(value, buff, &len);
-
- /* monitor_name could point to memory from MySQL
- or buff[]. Always dup the name to memory allocated
- by InnoDB, so we can access it in another callback
- function innodb_monitor_update() and free it appropriately */
- if (name) {
- monitor_name = my_strdup(name, MYF(0));
- } else {
- return(1);
- }
-
- ret = innodb_monitor_valid_byname(save, monitor_name);
-
- if (ret) {
- /* Validation failed */
- my_free(monitor_name);
- } else {
- /* monitor_name will be freed in separate callback function
- innodb_monitor_update(). Assert "save" point to
- the "monitor_name" variable */
- ut_ad(*static_cast<char**>(save) == monitor_name);
- }
-
- return(ret);
-}
-
-/****************************************************************//**
-Update the system variable innodb_enable(disable/reset/reset_all)_monitor
-according to the "set_option" and turn on/off or reset specified monitor
-counter. */
-static
-void
-innodb_monitor_update(
-/*==================*/
- THD* thd, /*!< in: thread handle */
- void* var_ptr, /*!< out: where the
- formal string goes */
- const void* save, /*!< in: immediate result
- from check function */
- mon_option_t set_option, /*!< in: the set option,
- whether to turn on/off or
- reset the counter */
- ibool free_mem) /*!< in: whether we will
- need to free the memory */
-{
- monitor_info_t* monitor_info;
- ulint monitor_id;
- ulint err_monitor = 0;
- const char* name;
-
- ut_a(save != NULL);
-
- name = *static_cast<const char*const*>(save);
-
- if (!name) {
- monitor_id = MONITOR_DEFAULT_START;
- } else {
- monitor_id = innodb_monitor_id_by_name_get(name);
-
- /* Double check we have a valid monitor ID */
- if (monitor_id == MONITOR_NO_MATCH) {
- return;
- }
- }
-
- if (monitor_id == MONITOR_DEFAULT_START) {
- /* If user set the variable to "default", we will
- print a message and make this set operation a "noop".
- The check is being made here is because "set default"
- does not go through validation function */
- if (thd) {
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NO_DEFAULT,
- "Default value is not defined for "
- "this set option. Please specify "
- "correct counter or module name.");
- } else {
- sql_print_error(
- "Default value is not defined for "
- "this set option. Please specify "
- "correct counter or module name.\n");
- }
-
- if (var_ptr) {
- *(const char**) var_ptr = NULL;
- }
- } else if (monitor_id == MONITOR_WILDCARD_MATCH) {
- innodb_monitor_update_wildcard(name, set_option);
- } else {
- monitor_info = srv_mon_get_info(
- static_cast<monitor_id_t>(monitor_id));
-
- ut_a(monitor_info);
-
- /* If monitor is already truned on, someone could already
- collect monitor data, exit and ask user to turn off the
- monitor before turn it on again. */
- if (set_option == MONITOR_TURN_ON
- && MONITOR_IS_ON(monitor_id)) {
- err_monitor = monitor_id;
- goto exit;
- }
-
- if (var_ptr) {
- *(const char**) var_ptr = monitor_info->monitor_name;
- }
-
- /* Depending on the monitor name is for a module or
- a counter, process counters in the whole module or
- individual counter. */
- if (monitor_info->monitor_type & MONITOR_MODULE) {
- srv_mon_set_module_control(
- static_cast<monitor_id_t>(monitor_id),
- set_option);
- } else {
- innodb_monitor_set_option(monitor_info, set_option);
- }
- }
-exit:
- /* Only if we are trying to turn on a monitor that already
- been turned on, we will set err_monitor. Print related
- information */
- if (err_monitor) {
- sql_print_warning("Monitor %s is already enabled.",
- srv_mon_get_name((monitor_id_t) err_monitor));
- }
-
- if (free_mem && name) {
- my_free((void*) name);
- }
-
- return;
-}
-
-#ifdef __WIN__
-/*************************************************************//**
-Validate if passed-in "value" is a valid value for
-innodb_buffer_pool_filename. On Windows, file names with colon (:)
-are not allowed.
-
-@return 0 for valid name */
-static
-int
-innodb_srv_buf_dump_filename_validate(
-/*==================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* buf_name;
- char buff[OS_FILE_MAX_PATH];
- int len= sizeof(buff);
-
- ut_a(save != NULL);
- ut_a(value != NULL);
-
- buf_name = value->val_str(value, buff, &len);
-
- if (buf_name) {
- if (is_filename_allowed(buf_name, len, FALSE)){
- *static_cast<const char**>(save) = buf_name;
- return(0);
- } else {
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "InnoDB: innodb_buffer_pool_filename "
- "cannot have colon (:) in the file name.");
-
- }
- }
-
- return(1);
-}
-#else /* __WIN__ */
-# define innodb_srv_buf_dump_filename_validate NULL
-#endif /* __WIN__ */
-
-#ifdef UNIV_DEBUG
-static char* srv_buffer_pool_evict;
-
-/****************************************************************//**
-Evict all uncompressed pages of compressed tables from the buffer pool.
-Keep the compressed pages in the buffer pool.
-@return whether all uncompressed pages were evicted */
-static MY_ATTRIBUTE((warn_unused_result))
-bool
-innodb_buffer_pool_evict_uncompressed(void)
-/*=======================================*/
-{
- bool all_evicted = true;
-
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool = &buf_pool_ptr[i];
-
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- for (buf_block_t* block = UT_LIST_GET_LAST(
- buf_pool->unzip_LRU);
- block != NULL; ) {
- buf_block_t* prev_block = UT_LIST_GET_PREV(
- unzip_LRU, block);
- ut_ad(buf_block_get_state(block)
- == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->in_unzip_LRU_list);
- ut_ad(block->page.in_LRU_list);
-
- mutex_enter(&block->mutex);
- all_evicted = buf_LRU_free_page(&block->page, false);
- mutex_exit(&block->mutex);
-
- if (all_evicted) {
-
- mutex_enter(&buf_pool->LRU_list_mutex);
- block = UT_LIST_GET_LAST(buf_pool->unzip_LRU);
- } else {
-
- block = prev_block;
- }
- }
-
- mutex_exit(&buf_pool->LRU_list_mutex);
- }
-
- return(all_evicted);
-}
-
-/****************************************************************//**
-Called on SET GLOBAL innodb_buffer_pool_evict=...
-Handles some values specially, to evict pages from the buffer pool.
-SET GLOBAL innodb_buffer_pool_evict='uncompressed'
-evicts all uncompressed page frames of compressed tablespaces. */
-static
-void
-innodb_buffer_pool_evict_update(
-/*============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var*var, /*!< in: pointer to system variable */
- void* var_ptr,/*!< out: ignored */
- const void* save) /*!< in: immediate result
- from check function */
-{
- if (const char* op = *static_cast<const char*const*>(save)) {
- if (!strcmp(op, "uncompressed")) {
- for (uint tries = 0; tries < 10000; tries++) {
- if (innodb_buffer_pool_evict_uncompressed()) {
- return;
- }
-
- os_thread_sleep(10000);
- }
-
- /* We failed to evict all uncompressed pages. */
- ut_ad(0);
- }
- }
-}
-#endif /* UNIV_DEBUG */
-
-/****************************************************************//**
-Update the system variable innodb_monitor_enable and enable
-specified monitor counter.
-This function is registered as a callback with MySQL. */
-static
-void
-innodb_enable_monitor_update(
-/*=========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_ON, TRUE);
-}
-
-/****************************************************************//**
-Update the system variable innodb_monitor_disable and turn
-off specified monitor counter. */
-static
-void
-innodb_disable_monitor_update(
-/*==========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- innodb_monitor_update(thd, var_ptr, save, MONITOR_TURN_OFF, TRUE);
-}
-
-/****************************************************************//**
-Update the system variable innodb_monitor_reset and reset
-specified monitor counter(s).
-This function is registered as a callback with MySQL. */
-static
-void
-innodb_reset_monitor_update(
-/*========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_VALUE, TRUE);
-}
-
-/****************************************************************//**
-Update the system variable innodb_monitor_reset_all and reset
-all value related monitor counter.
-This function is registered as a callback with MySQL. */
-static
-void
-innodb_reset_all_monitor_update(
-/*============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- innodb_monitor_update(thd, var_ptr, save, MONITOR_RESET_ALL_VALUE,
- TRUE);
-}
-
-static
-void
-innodb_defragment_frequency_update(
-/*===============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- srv_defragment_frequency = (*static_cast<const uint*>(save));
- srv_defragment_interval = ut_microseconds_to_timer(
- 1000000.0 / srv_defragment_frequency);
-}
-
-/****************************************************************//**
-Parse and enable InnoDB monitor counters during server startup.
-User can list the monitor counters/groups to be enable by specifying
-"loose-innodb_monitor_enable=monitor_name1;monitor_name2..."
-in server configuration file or at the command line. The string
-separate could be ";", "," or empty space. */
-static
-void
-innodb_enable_monitor_at_startup(
-/*=============================*/
- char* str) /*!< in/out: monitor counter enable list */
-{
- static const char* sep = " ;,";
- char* last;
-
- ut_a(str);
-
- /* Walk through the string, and separate each monitor counter
- and/or counter group name, and calling innodb_monitor_update()
- if successfully updated. Please note that the "str" would be
- changed by strtok_r() as it walks through it. */
- for (char* option = strtok_r(str, sep, &last);
- option;
- option = strtok_r(NULL, sep, &last)) {
- ulint ret;
- char* option_name;
-
- ret = innodb_monitor_valid_byname(&option_name, option);
-
- /* The name is validated if ret == 0 */
- if (!ret) {
- innodb_monitor_update(NULL, NULL, &option,
- MONITOR_TURN_ON, FALSE);
- } else {
- sql_print_warning("Invalid monitor counter"
- " name: '%s'", option);
- }
- }
-}
-
-#ifdef UNIV_LINUX
-
-/****************************************************************//**
-Update the innodb_sched_priority_cleaner variable and set the thread
-priorities accordingly. */
-static
-void
-innodb_sched_priority_cleaner_update(
-/*=================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- if (srv_read_only_mode) {
- return;
- }
-
- ulint priority = *static_cast<const ulint *>(save);
- ulint actual_priority;
- ulint nice = 0;
-
- /* Set the priority for the LRU manager thread */
- ut_ad(buf_lru_manager_is_active);
- nice = os_thread_get_priority(srv_lru_manager_tid);
- actual_priority = os_thread_set_priority(srv_lru_manager_tid,
- priority);
-
- if (UNIV_UNLIKELY(actual_priority != priority)) {
-
- if (actual_priority+nice != priority) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Failed to set the LRU manager thread "
- "priority to %lu, "
- "the nice is %lu and used priority is %lu", priority,
- nice, actual_priority);
- }
- } else {
-
- srv_sched_priority_cleaner = priority;
- }
-
- /* Set the priority for the page cleaner thread */
-
- ut_ad(buf_page_cleaner_is_active);
- nice = os_thread_get_priority(srv_cleaner_tid);
- actual_priority = os_thread_set_priority(srv_cleaner_tid, priority);
- if (UNIV_UNLIKELY(actual_priority != priority)) {
- if (actual_priority+nice != priority) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Failed to set the page cleaner thread "
- "priority to %lu, "
- "the nice is %lu and used priority is %lu", priority,
- nice, actual_priority);
- }
- }
-}
-
-#if defined(UNIV_DEBUG) || (UNIV_PERF_DEBUG)
-
-/****************************************************************//**
-Update the innodb_sched_priority_purge variable and set the thread
-priorities accordingly. */
-static
-void
-innodb_sched_priority_purge_update(
-/*===============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- ulint priority = *static_cast<const ulint *>(save);
-
- if (srv_read_only_mode) {
- return;
- }
-
- for (ulint i = 0; i < srv_n_purge_threads; i++) {
- ulint nice = os_thread_get_priority(srv_purge_tids[i]);
- ulint actual_priority
- = os_thread_set_priority(srv_purge_tids[i], priority);
- if (UNIV_UNLIKELY(actual_priority != priority)) {
- if (actual_priority+nice != priority) {
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Failed to set the purge "
- "thread priority to %lu, the "
- "nice is %lu the current priority is %lu, "
- "aborting priority update",
- priority, nice, actual_priority);
- return;
- }
- }
- }
-
- srv_sched_priority_purge = priority;
-}
-
-/****************************************************************//**
-Update the innodb_sched_priority_io variable and set the thread
-priorities accordingly. */
-static
-void
-innodb_sched_priority_io_update(
-/*============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- ulint priority = *static_cast<const ulint *>(save);
-
- for (ulint i = 0; i < srv_n_file_io_threads; i++) {
- ulint nice = os_thread_get_priority(srv_io_tids[i]);
- ulint actual_priority = os_thread_set_priority(srv_io_tids[i],
- priority);
-
- if (UNIV_UNLIKELY(actual_priority != priority)) {
-
- if (actual_priority+nice != priority) {
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Failed to set the I/O "
- "thread priority to %lu, the "
- "nice is %lu the current priority is %lu, "
- "aborting priority update",
- priority, nice, actual_priority);
- return;
- }
- }
- }
-
- srv_sched_priority_io = priority;
-}
-
-/****************************************************************//**
-Update the innodb_sched_priority_master variable and set the thread
-priorities accordingly. */
-static
-void
-innodb_sched_priority_master_update(
-/*================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- ulint priority = *static_cast<const lint *>(save);
- ulint actual_priority;
- ulint nice;
-
- if (srv_read_only_mode) {
- return;
- }
-
- nice = os_thread_get_priority(srv_master_tid);
- actual_priority = os_thread_set_priority(srv_master_tid, priority);
- if (UNIV_UNLIKELY(actual_priority != priority)) {
- if (actual_priority+nice != priority) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_ARGUMENTS,
- "Failed to set the master thread "
- "priority to %lu, "
- "the nice is %lu and the current priority is %lu", priority,
- nice, actual_priority);
- }
- } else {
-
- srv_sched_priority_master = priority;
- }
-}
-
-#endif /* defined(UNIV_DEBUG) || (UNIV_PERF_DEBUG) */
-
-#endif /* UNIV_LINUX */
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Check if it is a valid value of innodb_track_changed_pages.
-Changed pages tracking is not working correctly without initialization
-procedure on server startup. The function allows to temporary
-disable tracking, but only if the feature was enabled on startup.
-This function is registered as a callback with MySQL.
-@return 0 for valid innodb_track_changed_pages */
-static
-int
-innodb_track_changed_pages_validate(
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming bool */
-{
- long long intbuf = 0;
-
- if (value->val_int(value, &intbuf)) {
- /* The value is NULL. That is invalid. */
- return 1;
- }
-
- if (srv_redo_log_thread_started) {
- *reinterpret_cast<ulong*>(save)
- = static_cast<ulong>(intbuf);
- return 0;
- }
-
- if (intbuf == srv_track_changed_pages)
- return 0;
-
- return 1;
-}
-#endif
-
-/****************************************************************//**
-Callback function for accessing the InnoDB variables from MySQL:
-SHOW VARIABLES. */
-static
-int
-show_innodb_vars(
-/*=============*/
- THD* thd,
- SHOW_VAR* var,
- char* buff)
-{
- innodb_export_status();
- var->type = SHOW_ARRAY;
- var->value = (char*) &innodb_status_variables;
-
- return(0);
-}
-
-/****************************************************************//**
-This function checks each index name for a table against reserved
-system default primary index name 'GEN_CLUST_INDEX'. If a name
-matches, this function pushes an warning message to the client,
-and returns true.
-@return true if the index name matches the reserved name */
-UNIV_INTERN
-bool
-innobase_index_name_is_reserved(
-/*============================*/
- THD* thd, /*!< in/out: MySQL connection */
- const KEY* key_info, /*!< in: Indexes to be created */
- ulint num_of_keys) /*!< in: Number of indexes to
- be created. */
-{
- const KEY* key;
- uint key_num; /* index number */
-
- for (key_num = 0; key_num < num_of_keys; key_num++) {
- key = &key_info[key_num];
-
- if (innobase_strcasecmp(key->name,
- innobase_index_reserve_name) == 0) {
- /* Push warning to mysql */
- push_warning_printf(thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_WRONG_NAME_FOR_INDEX,
- "Cannot Create Index with name "
- "'%s'. The name is reserved "
- "for the system default primary "
- "index.",
- innobase_index_reserve_name);
-
- my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
- innobase_index_reserve_name);
-
- return(true);
- }
- }
-
- return(false);
-}
-
-/***********************************************************************
-Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
-@return the relevance ranking value */
-UNIV_INTERN
-float
-innobase_fts_retrieve_ranking(
-/*============================*/
- FT_INFO * fts_hdl) /*!< in: FTS handler */
-{
- row_prebuilt_t* ft_prebuilt;
- fts_result_t* result;
-
- result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
-
- ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
-
- if (ft_prebuilt->read_just_key) {
- fts_ranking_t* ranking =
- rbt_value(fts_ranking_t, result->current);
- return(ranking->rank);
- }
-
- /* Retrieve the ranking value for doc_id with value of
- prebuilt->fts_doc_id */
- return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
-}
-
-/***********************************************************************
-Free the memory for the FTS handler */
-UNIV_INTERN
-void
-innobase_fts_close_ranking(
-/*=======================*/
- FT_INFO * fts_hdl)
-{
- fts_result_t* result;
-
- result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
-
- fts_query_free_result(result);
-
- my_free((uchar*) fts_hdl);
-
- return;
-}
-
-/***********************************************************************
-Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
-@return the relevance ranking value */
-UNIV_INTERN
-float
-innobase_fts_find_ranking(
-/*======================*/
- FT_INFO* fts_hdl, /*!< in: FTS handler */
- uchar* record, /*!< in: Unused */
- uint len) /*!< in: Unused */
-{
- row_prebuilt_t* ft_prebuilt;
- fts_result_t* result;
-
- ft_prebuilt = ((NEW_FT_INFO*) fts_hdl)->ft_prebuilt;
- result = ((NEW_FT_INFO*) fts_hdl)->ft_result;
-
- /* Retrieve the ranking value for doc_id with value of
- prebuilt->fts_doc_id */
- return(fts_retrieve_ranking(result, ft_prebuilt->fts_doc_id));
-}
-
-#ifdef UNIV_DEBUG
-static my_bool innodb_purge_run_now = TRUE;
-static my_bool innodb_purge_stop_now = TRUE;
-static my_bool innodb_log_checkpoint_now = TRUE;
-static my_bool innodb_buf_flush_list_now = TRUE;
-static my_bool innodb_track_redo_log_now = TRUE;
-
-/****************************************************************//**
-Set the purge state to RUN. If purge is disabled then it
-is a no-op. This function is registered as a callback with MySQL. */
-static
-void
-purge_run_now_set(
-/*==============*/
- THD* thd /*!< in: thread handle */
- MY_ATTRIBUTE((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- MY_ATTRIBUTE((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- MY_ATTRIBUTE((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) {
- trx_purge_run();
- }
-}
-
-/****************************************************************//**
-Set the purge state to STOP. If purge is disabled then it
-is a no-op. This function is registered as a callback with MySQL. */
-static
-void
-purge_stop_now_set(
-/*===============*/
- THD* thd /*!< in: thread handle */
- MY_ATTRIBUTE((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- MY_ATTRIBUTE((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- MY_ATTRIBUTE((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save && trx_purge_state() != PURGE_STATE_DISABLED) {
- trx_purge_stop();
- }
-}
-
-/****************************************************************//**
-Force innodb to checkpoint. */
-static
-void
-checkpoint_now_set(
-/*===============*/
- THD* thd /*!< in: thread handle */
- MY_ATTRIBUTE((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- MY_ATTRIBUTE((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- MY_ATTRIBUTE((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save) {
- while (log_sys->last_checkpoint_lsn < log_sys->lsn) {
- log_make_checkpoint_at(LSN_MAX, TRUE);
- fil_flush_file_spaces(FIL_LOG);
- }
-
- dberr_t err = fil_write_flushed_lsn(log_sys->lsn);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to write flush lsn to the "
- "system tablespace at checkpoint err=%s",
- ut_strerr(err));
- }
- }
-}
-
-/****************************************************************//**
-Force a dirty pages flush now. */
-static
-void
-buf_flush_list_now_set(
-/*===================*/
- THD* thd /*!< in: thread handle */
- MY_ATTRIBUTE((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- MY_ATTRIBUTE((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- MY_ATTRIBUTE((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save) {
- buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
- }
-}
-
-/****************************************************************//**
-Force log tracker to track the log synchronously. */
-static
-void
-track_redo_log_now_set(
-/*===================*/
- THD* thd /*!< in: thread handle */
- MY_ATTRIBUTE((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- MY_ATTRIBUTE((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- MY_ATTRIBUTE((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save && srv_track_changed_pages) {
-
- log_online_follow_redo_log();
- }
-}
-
-#endif /* UNIV_DEBUG */
-
-/***********************************************************************
-@return version of the extended FTS API */
-uint
-innobase_fts_get_version()
-/*======================*/
-{
- /* Currently this doesn't make much sense as returning
- HA_CAN_FULLTEXT_EXT automatically mean this version is supported.
- This supposed to ease future extensions. */
- return(2);
-}
-
-/***********************************************************************
-@return Which part of the extended FTS API is supported */
-ulonglong
-innobase_fts_flags()
-/*================*/
-{
- return(FTS_ORDERED_RESULT | FTS_DOCID_IN_RESULT);
-}
-
-
-/***********************************************************************
-Find and Retrieve the FTS doc_id for the current result row
-@return the document ID */
-ulonglong
-innobase_fts_retrieve_docid(
-/*========================*/
- FT_INFO_EXT * fts_hdl) /*!< in: FTS handler */
-{
- row_prebuilt_t* ft_prebuilt;
- fts_result_t* result;
-
- ft_prebuilt = ((NEW_FT_INFO *)fts_hdl)->ft_prebuilt;
- result = ((NEW_FT_INFO *)fts_hdl)->ft_result;
-
- if (ft_prebuilt->read_just_key) {
- fts_ranking_t* ranking =
- rbt_value(fts_ranking_t, result->current);
- return(ranking->doc_id);
- }
-
- return(ft_prebuilt->fts_doc_id);
-}
-
-
-/***********************************************************************
-Find and retrieve the size of the current result
-@return number of matching rows */
-ulonglong
-innobase_fts_count_matches(
-/*=======================*/
- FT_INFO_EXT* fts_hdl) /*!< in: FTS handler */
-{
- NEW_FT_INFO* handle = (NEW_FT_INFO *) fts_hdl;
-
- if (handle->ft_result->rankings_by_id != 0) {
- return rbt_size(handle->ft_result->rankings_by_id);
- } else {
- return(0);
- }
-}
-
-/* These variables are never read by InnoDB or changed. They are a kind of
-dummies that are needed by the MySQL infrastructure to call
-buffer_pool_dump_now(), buffer_pool_load_now() and buffer_pool_load_abort()
-by the user by doing:
- SET GLOBAL innodb_buffer_pool_dump_now=ON;
- SET GLOBAL innodb_buffer_pool_load_now=ON;
- SET GLOBAL innodb_buffer_pool_load_abort=ON;
-Their values are read by MySQL and displayed to the user when the variables
-are queried, e.g.:
- SELECT @@innodb_buffer_pool_dump_now;
- SELECT @@innodb_buffer_pool_load_now;
- SELECT @@innodb_buffer_pool_load_abort; */
-static my_bool innodb_buffer_pool_dump_now = FALSE;
-static my_bool innodb_buffer_pool_load_now = FALSE;
-static my_bool innodb_buffer_pool_load_abort = FALSE;
-
-/****************************************************************//**
-Trigger a dump of the buffer pool if innodb_buffer_pool_dump_now is set
-to ON. This function is registered as a callback with MySQL. */
-static
-void
-buffer_pool_dump_now(
-/*=================*/
- THD* thd /*!< in: thread handle */
- MY_ATTRIBUTE((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- MY_ATTRIBUTE((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- MY_ATTRIBUTE((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save && !srv_read_only_mode) {
- buf_dump_start();
- }
-}
-
-/****************************************************************//**
-Trigger a load of the buffer pool if innodb_buffer_pool_load_now is set
-to ON. This function is registered as a callback with MySQL. */
-static
-void
-buffer_pool_load_now(
-/*=================*/
- THD* thd /*!< in: thread handle */
- __attribute__((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- __attribute__((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- __attribute__((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save) {
- buf_load_start();
- }
-}
-
-/****************************************************************//**
-Abort a load of the buffer pool if innodb_buffer_pool_load_abort
-is set to ON. This function is registered as a callback with MySQL. */
-static
-void
-buffer_pool_load_abort(
-/*===================*/
- THD* thd /*!< in: thread handle */
- __attribute__((unused)),
- struct st_mysql_sys_var* var /*!< in: pointer to system
- variable */
- __attribute__((unused)),
- void* var_ptr /*!< out: where the formal
- string goes */
- __attribute__((unused)),
- const void* save) /*!< in: immediate result from
- check function */
-{
- if (*(my_bool*) save) {
- buf_load_abort();
- }
-}
-
-/** Update innodb_status_output or innodb_status_output_locks,
-which control InnoDB "status monitor" output to the error log.
-@param[in] thd thread handle
-@param[in] var system variable
-@param[out] var_ptr current value
-@param[in] save to-be-assigned value */
-static
-void
-innodb_status_output_update(
-/*========================*/
- THD* thd __attribute__((unused)),
- struct st_mysql_sys_var* var __attribute__((unused)),
- void* var_ptr __attribute__((unused)),
- const void* save __attribute__((unused)))
-{
- *static_cast<my_bool*>(var_ptr) = *static_cast<const my_bool*>(save);
- /* Wakeup server monitor thread. */
- os_event_set(srv_monitor_event);
-}
-
-/******************************************************************
-Update the system variable innodb_encryption_threads */
-static
-void
-innodb_encryption_threads_update(
-/*=============================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- fil_crypt_set_thread_cnt(*static_cast<const uint*>(save));
-}
-
-/******************************************************************
-Update the system variable innodb_encryption_rotate_key_age */
-static
-void
-innodb_encryption_rotate_key_age_update(
-/*====================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- fil_crypt_set_rotate_key_age(*static_cast<const uint*>(save));
-}
-
-/******************************************************************
-Update the system variable innodb_encryption_rotation_iops */
-static
-void
-innodb_encryption_rotation_iops_update(
-/*===================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- fil_crypt_set_rotation_iops(*static_cast<const uint*>(save));
-}
-
-/******************************************************************
-Update the system variable innodb_encrypt_tables*/
-static
-void
-innodb_encrypt_tables_update(
-/*=========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to
- system variable */
- void* var_ptr,/*!< out: where the
- formal string goes */
- const void* save) /*!< in: immediate result
- from check function */
-{
- fil_crypt_set_encrypt_tables(*static_cast<const ulong*>(save));
-}
-
-static SHOW_VAR innodb_status_variables_export[]= {
- {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC},
- {NullS, NullS, SHOW_LONG}
-};
-
-static struct st_mysql_storage_engine innobase_storage_engine=
-{ MYSQL_HANDLERTON_INTERFACE_VERSION };
-
-#ifdef WITH_WSREP
-void
-wsrep_abort_slave_trx(wsrep_seqno_t bf_seqno, wsrep_seqno_t victim_seqno)
-{
- WSREP_ERROR("Trx %lld tries to abort slave trx %lld. This could be "
- "caused by:\n\t"
- "1) unsupported configuration options combination, please check documentation.\n\t"
- "2) a bug in the code.\n\t"
- "3) a database corruption.\n Node consistency compromized, "
- "need to abort. Restart the node to resync with cluster.",
- (long long)bf_seqno, (long long)victim_seqno);
- abort();
-}
-/*******************************************************************//**
-This function is used to kill one transaction in BF. */
-UNIV_INTERN
-int
-wsrep_innobase_kill_one_trx(
- void * const bf_thd_ptr,
- const trx_t * const bf_trx,
- trx_t *victim_trx,
- ibool signal)
-{
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(victim_trx));
- ut_ad(bf_thd_ptr);
- ut_ad(victim_trx);
-
- DBUG_ENTER("wsrep_innobase_kill_one_trx");
- THD *bf_thd = bf_thd_ptr ? (THD*) bf_thd_ptr : NULL;
- THD *thd = (THD *) victim_trx->mysql_thd;
- int64_t bf_seqno = (bf_thd) ? wsrep_thd_trx_seqno(bf_thd) : 0;
-
- if (!thd) {
- DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
- WSREP_WARN("no THD for trx: " TRX_ID_FMT, victim_trx->id);
- DBUG_RETURN(1);
- }
-
- if (!bf_thd) {
- DBUG_PRINT("wsrep", ("no BF thd for conflicting lock"));
- WSREP_WARN("no BF THD for trx: " TRX_ID_FMT,
- bf_trx ? bf_trx->id : 0);
- DBUG_RETURN(1);
- }
-
- WSREP_LOG_CONFLICT(bf_thd, thd, TRUE);
-
- WSREP_DEBUG("BF kill (%lu, seqno: %lld), victim: (%lu) trx: "
- TRX_ID_FMT,
- signal, (long long)bf_seqno,
- thd_get_thread_id(thd),
- victim_trx->id);
-
- WSREP_DEBUG("Aborting query: %s",
- (thd && wsrep_thd_query(thd)) ? wsrep_thd_query(thd) : "void");
-
- wsrep_thd_LOCK(thd);
- DBUG_EXECUTE_IF("sync.wsrep_after_BF_victim_lock",
- {
- const char act[]=
- "now "
- "wait_for signal.wsrep_after_BF_victim_lock";
- DBUG_ASSERT(!debug_sync_set_action(bf_thd,
- STRING_WITH_LEN(act)));
- };);
-
-
- if (wsrep_thd_query_state(thd) == QUERY_EXITING) {
- WSREP_DEBUG("kill trx EXITING for " TRX_ID_FMT,
- victim_trx->id);
- wsrep_thd_UNLOCK(thd);
- DBUG_RETURN(0);
- }
-
- if(wsrep_thd_exec_mode(thd) != LOCAL_STATE) {
- WSREP_DEBUG("withdraw for BF trx: " TRX_ID_FMT ", state: %d",
- victim_trx->id,
- wsrep_thd_get_conflict_state(thd));
- }
-
- switch (wsrep_thd_get_conflict_state(thd)) {
- case NO_CONFLICT:
- wsrep_thd_set_conflict_state(thd, MUST_ABORT);
- break;
- case MUST_ABORT:
- WSREP_DEBUG("victim " TRX_ID_FMT " in MUST ABORT state",
- victim_trx->id);
- wsrep_thd_UNLOCK(thd);
- wsrep_thd_awake(thd, signal);
- DBUG_RETURN(0);
- break;
- case ABORTED:
- case ABORTING: // fall through
- default:
- WSREP_DEBUG("victim " TRX_ID_FMT " in state %d",
- victim_trx->id, wsrep_thd_get_conflict_state(thd));
- wsrep_thd_UNLOCK(thd);
- DBUG_RETURN(0);
- break;
- }
-
- switch (wsrep_thd_query_state(thd)) {
- case QUERY_COMMITTING:
- enum wsrep_status rcode;
-
- WSREP_DEBUG("kill query for: %ld",
- thd_get_thread_id(thd));
- WSREP_DEBUG("kill trx QUERY_COMMITTING for " TRX_ID_FMT,
- victim_trx->id);
-
- if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
- wsrep_abort_slave_trx(bf_seqno,
- wsrep_thd_trx_seqno(thd));
- } else {
- wsrep_t *wsrep= get_wsrep();
- rcode = wsrep->abort_pre_commit(
- wsrep, bf_seqno,
- (wsrep_trx_id_t)victim_trx->id
- );
-
- switch (rcode) {
- case WSREP_WARNING:
- WSREP_DEBUG("cancel commit warning: "
- TRX_ID_FMT,
- victim_trx->id);
- wsrep_thd_UNLOCK(thd);
- wsrep_thd_awake(thd, signal);
- DBUG_RETURN(1);
- break;
- case WSREP_OK:
- break;
- default:
- WSREP_ERROR(
- "cancel commit bad exit: %d "
- TRX_ID_FMT,
- rcode,
- victim_trx->id);
- /* unable to interrupt, must abort */
- /* note: kill_mysql() will block, if we cannot.
- * kill the lock holder first.
- */
- abort();
- break;
- }
- }
- wsrep_thd_UNLOCK(thd);
- wsrep_thd_awake(thd, signal);
- break;
- case QUERY_EXEC:
- /* it is possible that victim trx is itself waiting for some
- * other lock. We need to cancel this waiting
- */
- WSREP_DEBUG("kill trx QUERY_EXEC for " TRX_ID_FMT,
- victim_trx->id);
-
- victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
- if (victim_trx->lock.wait_lock) {
- WSREP_DEBUG("victim has wait flag: %ld",
- thd_get_thread_id(thd));
- lock_t* wait_lock = victim_trx->lock.wait_lock;
- if (wait_lock) {
- WSREP_DEBUG("canceling wait lock");
- victim_trx->lock.was_chosen_as_deadlock_victim= TRUE;
- lock_cancel_waiting_and_release(wait_lock);
- }
-
- wsrep_thd_UNLOCK(thd);
- wsrep_thd_awake(thd, signal);
- } else {
- /* abort currently executing query */
- DBUG_PRINT("wsrep",("sending KILL_QUERY to: %ld",
- thd_get_thread_id(thd)));
- WSREP_DEBUG("kill query for: %ld",
- thd_get_thread_id(thd));
- /* Note that innobase_kill_connection will take lock_mutex
- and trx_mutex */
- wsrep_thd_UNLOCK(thd);
- wsrep_thd_awake(thd, signal);
-
- /* for BF thd, we need to prevent him from committing */
- if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
- wsrep_abort_slave_trx(bf_seqno,
- wsrep_thd_trx_seqno(thd));
- }
- }
- break;
- case QUERY_IDLE:
- {
- WSREP_DEBUG("kill IDLE for " TRX_ID_FMT, victim_trx->id);
-
- if (wsrep_thd_exec_mode(thd) == REPL_RECV) {
- WSREP_DEBUG("kill BF IDLE, seqno: %lld",
- (long long)wsrep_thd_trx_seqno(thd));
- wsrep_thd_UNLOCK(thd);
- wsrep_abort_slave_trx(bf_seqno,
- wsrep_thd_trx_seqno(thd));
- DBUG_RETURN(0);
- }
- /* This will lock thd from proceeding after net_read() */
- wsrep_thd_set_conflict_state(thd, ABORTING);
-
- wsrep_lock_rollback();
-
- if (wsrep_aborting_thd_contains(thd)) {
- WSREP_WARN("duplicate thd aborter %lu",
- thd_get_thread_id(thd));
- } else {
- wsrep_aborting_thd_enqueue(thd);
- DBUG_PRINT("wsrep",("enqueuing trx abort for %lu",
- thd_get_thread_id(thd)));
- WSREP_DEBUG("enqueuing trx abort for (%lu)",
- thd_get_thread_id(thd));
- }
-
- DBUG_PRINT("wsrep",("signalling wsrep rollbacker"));
- WSREP_DEBUG("signaling aborter");
- wsrep_unlock_rollback();
- wsrep_thd_UNLOCK(thd);
-
- break;
- }
- default:
- WSREP_WARN("bad wsrep query state: %d",
- wsrep_thd_query_state(thd));
- wsrep_thd_UNLOCK(thd);
- break;
- }
-
- DBUG_RETURN(0);
-}
-
-static int
-wsrep_abort_transaction(handlerton* hton, THD *bf_thd, THD *victim_thd,
- my_bool signal)
-{
- DBUG_ENTER("wsrep_innobase_abort_thd");
- trx_t* victim_trx = thd_to_trx(victim_thd);
- trx_t* bf_trx = (bf_thd) ? thd_to_trx(bf_thd) : NULL;
- WSREP_DEBUG("abort transaction: BF: %s victim: %s",
- wsrep_thd_query(bf_thd),
- wsrep_thd_query(victim_thd));
-
- if (victim_trx) {
- lock_mutex_enter();
- trx_mutex_enter(victim_trx);
- victim_trx->abort_type = TRX_WSREP_ABORT;
- int rcode = wsrep_innobase_kill_one_trx(bf_thd, bf_trx,
- victim_trx, signal);
- trx_mutex_exit(victim_trx);
- lock_mutex_exit();
- victim_trx->abort_type = TRX_SERVER_ABORT;
- wsrep_srv_conc_cancel_wait(victim_trx);
- DBUG_RETURN(rcode);
- } else {
- WSREP_DEBUG("victim does not have transaction");
- wsrep_thd_LOCK(victim_thd);
- wsrep_thd_set_conflict_state(victim_thd, MUST_ABORT);
- wsrep_thd_UNLOCK(victim_thd);
- wsrep_thd_awake(victim_thd, signal);
- }
-
- DBUG_RETURN(-1);
-}
-
-static int innobase_wsrep_set_checkpoint(handlerton* hton, const XID* xid)
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
- if (wsrep_is_wsrep_xid(xid)) {
- mtr_t mtr;
- mtr_start(&mtr);
- trx_sysf_t* sys_header = trx_sysf_get(&mtr);
- trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
- mtr_commit(&mtr);
- innobase_flush_logs(hton);
- return 0;
- } else {
- return 1;
- }
-}
-
-static int innobase_wsrep_get_checkpoint(handlerton* hton, XID* xid)
-{
- DBUG_ASSERT(hton == innodb_hton_ptr);
- trx_sys_read_wsrep_checkpoint(xid);
- return 0;
-}
-
-
-static void
-wsrep_fake_trx_id(
-/*==================*/
- handlerton *hton,
- THD *thd) /*!< in: user thread handle */
-{
- mutex_enter(&trx_sys->mutex);
- trx_id_t trx_id = trx_sys_get_new_trx_id();
- mutex_exit(&trx_sys->mutex);
-
- wsrep_ws_handle_for_trx(wsrep_thd_ws_handle(thd), trx_id);
-}
-
-#endif /* WITH_WSREP */
-
-
-/*************************************************************//**
-Empty free list algorithm. This function is registered as
-a callback with MySQL.
-@return 0 for valid algorithm */
-static
-int
-innodb_srv_empty_free_list_algorithm_validate(
-/*===========================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- const char* algorithm_name;
- char buff[STRING_BUFFER_USUAL_SIZE];
- int len = sizeof(buff);
- ulint algo;
- srv_empty_free_list_t algorithm;
-
- algorithm_name = value->val_str(value, buff, &len);
-
- if (!algorithm_name) {
- return(1);
- }
-
- for (algo = 0; algo < array_elements(
- innodb_empty_free_list_algorithm_names
- ) - 1;
- algo++) {
- if (!innobase_strcasecmp(
- algorithm_name,
- innodb_empty_free_list_algorithm_names[algo]))
- break;
- }
-
- if (algo == array_elements( innodb_empty_free_list_algorithm_names) - 1)
- return(1);
-
- algorithm = static_cast<srv_empty_free_list_t>(algo);
- if (!innodb_empty_free_list_algorithm_allowed(algorithm)) {
- sql_print_warning(
- "InnoDB: innodb_empty_free_list_algorithm "
- "= 'backoff' requires at least"
- " 20MB buffer pool instances.\n");
- return(1);
- }
-
- *reinterpret_cast<ulong*>(save) = static_cast<ulong>(algorithm);
- return(0);
-}
-
-/* plugin options */
-
-static MYSQL_SYSVAR_ENUM(checksum_algorithm, srv_checksum_algorithm,
- PLUGIN_VAR_RQCMDARG,
- "The algorithm InnoDB uses for page checksumming. Possible values are "
- "CRC32 (hardware accelerated if the CPU supports it) "
- "write crc32, allow any of the other checksums to match when reading; "
- "STRICT_CRC32 "
- "write crc32, do not allow other algorithms to match when reading; "
- "INNODB "
- "write a software calculated checksum, allow any other checksums "
- "to match when reading; "
- "STRICT_INNODB "
- "write a software calculated checksum, do not allow other algorithms "
- "to match when reading; "
- "NONE "
- "write a constant magic number, do not do any checksum verification "
- "when reading (same as innodb_checksums=OFF); "
- "STRICT_NONE "
- "write a constant magic number, do not allow values other than that "
- "magic number when reading; "
- "Files updated when this option is set to crc32 or strict_crc32 will "
- "not be readable by MySQL versions older than 5.6.3",
- NULL, NULL, SRV_CHECKSUM_ALGORITHM_CRC32,
- &innodb_checksum_algorithm_typelib);
-
-
-static MYSQL_SYSVAR_ENUM(log_checksum_algorithm, srv_log_checksum_algorithm,
- PLUGIN_VAR_RQCMDARG,
- "The algorithm InnoDB uses for log block checksums. Possible values are "
- "CRC32 (hardware accelerated if the CPU supports it) "
- "write crc32, allow any of the other checksums to match when reading; "
- "STRICT_CRC32 "
- "write crc32, do not allow other algorithms to match when reading; "
- "INNODB "
- "write a software calculated checksum, allow any other checksums "
- "to match when reading; "
- "STRICT_INNODB "
- "write a software calculated checksum, do not allow other algorithms "
- "to match when reading; "
- "NONE "
- "write a constant magic number, do not do any checksum verification "
- "when reading (same as innodb_checksums=OFF); "
- "STRICT_NONE "
- "write a constant magic number, do not allow values other than that "
- "magic number when reading; "
- "Logs created when this option is set to crc32/strict_crc32/none/strict_none "
- "will not be readable by any MySQL version or Percona Server versions that do"
- "not support this feature",
- NULL, innodb_log_checksum_algorithm_update, SRV_CHECKSUM_ALGORITHM_INNODB,
- &innodb_checksum_algorithm_typelib);
-
-
-static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "DEPRECATED. Use innodb_checksum_algorithm=NONE instead of setting "
- "this to OFF",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_ULONG(log_block_size, innobase_log_block_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "###EXPERIMENTAL###: The log block size of the transaction log file. Changing for created log file is not supported. Use on your own risk!",
- NULL, NULL, (1 << 9)/*512*/, OS_MIN_LOG_BLOCK_SIZE,
- (1 << UNIV_PAGE_SIZE_SHIFT_MAX), 0);
-
-static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
- PLUGIN_VAR_READONLY,
- "The common part for InnoDB table spaces.",
- NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable InnoDB doublewrite buffer (enabled by default). "
- "Disable with --skip-innodb-doublewrite.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(stats_include_delete_marked,
- srv_stats_include_delete_marked,
- PLUGIN_VAR_OPCMDARG,
- "Scan delete marked records for persistent stat",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Prevent partial page writes, via atomic writes (beta). "
- "The option is used to prevent partial writes in case of a crash/poweroff, "
- "as faster alternative to doublewrite buffer. "
- "Currently this option works only "
- "on Linux only with FusionIO device, and directFS filesystem.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(use_fallocate, innobase_use_fallocate,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Preallocate files fast, using operating system functionality. On POSIX systems, posix_fallocate system call is used.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
- PLUGIN_VAR_RQCMDARG,
- "Number of IOPs the server can do. Tunes the background IO rate",
- NULL, innodb_io_capacity_update, 200, 100, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULONG(io_capacity_max, srv_max_io_capacity,
- PLUGIN_VAR_RQCMDARG,
- "Limit to which innodb_io_capacity can be inflated.",
- NULL, innodb_io_capacity_max_update,
- SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT, 100,
- SRV_MAX_IO_CAPACITY_LIMIT, 0);
-
-static MYSQL_SYSVAR_ULONG(idle_flush_pct,
- srv_idle_flush_pct,
- PLUGIN_VAR_RQCMDARG,
- "Up to what percentage of dirty pages should be flushed when innodb "
- "finds it has spare resources to do so.",
- NULL, NULL, 100, 0, 100, 0);
-
-#ifdef UNIV_DEBUG
-static MYSQL_SYSVAR_BOOL(purge_run_now, innodb_purge_run_now,
- PLUGIN_VAR_OPCMDARG,
- "Set purge state to RUN",
- NULL, purge_run_now_set, FALSE);
-
-static MYSQL_SYSVAR_BOOL(purge_stop_now, innodb_purge_stop_now,
- PLUGIN_VAR_OPCMDARG,
- "Set purge state to STOP",
- NULL, purge_stop_now_set, FALSE);
-
-static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now,
- PLUGIN_VAR_OPCMDARG,
- "Force checkpoint now",
- NULL, checkpoint_now_set, FALSE);
-
-static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now,
- PLUGIN_VAR_OPCMDARG,
- "Force dirty page flush now",
- NULL, buf_flush_list_now_set, FALSE);
-
-static MYSQL_SYSVAR_BOOL(track_redo_log_now,
- innodb_track_redo_log_now,
- PLUGIN_VAR_OPCMDARG,
- "Force log tracker to catch up with checkpoint now",
- NULL, track_redo_log_now_set, FALSE);
-
-#endif /* UNIV_DEBUG */
-
-static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
- PLUGIN_VAR_OPCMDARG,
- "Number of UNDO log pages to purge in one batch from the history list.",
- NULL, NULL,
- 300, /* Default setting */
- 1, /* Minimum value */
- 5000, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Purge threads can be from 1 to 32. Default is 1.",
- NULL, NULL,
- 4, /* Default setting */
- 1, /* Minimum value */
- SRV_MAX_N_PURGE_THREADS, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_ULONG(sync_array_size, srv_sync_array_size,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Size of the mutex/lock wait array.",
- NULL, NULL,
- 1, /* Default setting */
- 1, /* Minimum value */
- 1024, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
- PLUGIN_VAR_OPCMDARG,
- "Speeds up the shutdown process of the InnoDB storage engine. Possible "
- "values are 0, 1 (faster) or 2 (fastest - crash-like).",
- NULL, NULL, 1, 0, 2, 0);
-
-static MYSQL_SYSVAR_BOOL(file_per_table, srv_file_per_table,
- PLUGIN_VAR_NOCMDARG,
- "Stores each InnoDB table to an .ibd file in the database dir.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_STR(file_format, innobase_file_format_name,
- PLUGIN_VAR_RQCMDARG,
- "File format to use for new tables in .ibd files.",
- innodb_file_format_name_validate,
- innodb_file_format_name_update, innodb_file_format_default);
-
-/* "innobase_file_format_check" decides whether we would continue
-booting the server if the file format stamped on the system
-table space exceeds the maximum file format supported
-by the server. Can be set during server startup at command
-line or configure file, and a read only variable after
-server startup */
-static MYSQL_SYSVAR_BOOL(file_format_check, innobase_file_format_check,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Whether to perform system file format check.",
- NULL, NULL, TRUE);
-
-/* If a new file format is introduced, the file format
-name needs to be updated accordingly. Please refer to
-file_format_name_map[] defined in trx0sys.cc for the next
-file format name. */
-static MYSQL_SYSVAR_STR(file_format_max, innobase_file_format_max,
- PLUGIN_VAR_OPCMDARG,
- "The highest file format in the tablespace.",
- innodb_file_format_max_validate,
- innodb_file_format_max_update, innodb_file_format_max_default);
-
-static MYSQL_SYSVAR_STR(ft_server_stopword_table, innobase_server_stopword_table,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC,
- "The user supplied stopword table name.",
- innodb_stopword_table_validate,
- NULL,
- NULL);
-
-static MYSQL_SYSVAR_UINT(flush_log_at_timeout, srv_flush_log_at_timeout,
- PLUGIN_VAR_OPCMDARG,
- "Write and flush logs every (n) second.",
- NULL, NULL, 1, 0, 2700, 0);
-
-/* Changed to the THDVAR */
-//static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
-// PLUGIN_VAR_OPCMDARG,
-// "Set to 0 (write and flush once per second),"
-// " 1 (write and flush at each commit)"
-// " or 2 (write at commit, flush once per second).",
-// NULL, NULL, 1, 0, 2, 0);
-
-static MYSQL_SYSVAR_BOOL(use_global_flush_log_at_trx_commit, srv_use_global_flush_log_at_trx_commit,
- PLUGIN_VAR_NOCMDARG,
- "Use global innodb_flush_log_at_trx_commit value. (default: ON).",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "With which method to flush data.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
- PLUGIN_VAR_NOCMDARG,
- "Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(force_load_corrupted, srv_load_corrupted,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Force InnoDB to load metadata of corrupted table.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "DEPRECATED. This option may be removed in future releases. "
- "Please use READ COMMITTED transaction isolation level instead. "
- "Force InnoDB to not use next-key locking, to use only row-level locking.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(show_verbose_locks, srv_show_verbose_locks,
- PLUGIN_VAR_OPCMDARG,
- "Whether to show records locked in SHOW INNODB STATUS.",
- NULL, NULL, 0, 0, 1, 0);
-
-static MYSQL_SYSVAR_ULONG(show_locks_held, srv_show_locks_held,
- PLUGIN_VAR_RQCMDARG,
- "Number of locks held to print for each InnoDB transaction in SHOW INNODB STATUS.",
- NULL, NULL, 10, 0, 1000, 0);
-
-#ifdef UNIV_LOG_ARCHIVE
-static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Where full logs should be archived.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
- PLUGIN_VAR_OPCMDARG,
- "Set to 1 if you want to have logs archived.",
- NULL, innodb_log_archive_update, FALSE);
-#endif /* UNIV_LOG_ARCHIVE */
-
-static MYSQL_SYSVAR_STR(log_group_home_dir, srv_log_group_home_dir,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Path to InnoDB log files.", NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_ULONG(log_arch_expire_sec,
- srv_log_arch_expire_sec, PLUGIN_VAR_OPCMDARG,
- "Expiration time for archived innodb transaction logs.",
- NULL, innodb_log_archive_expire_update, 0, 0, ~0UL, 0);
-
-static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
- PLUGIN_VAR_RQCMDARG,
- "Percentage of dirty pages allowed in bufferpool.",
- NULL, innodb_max_dirty_pages_pct_update, 75.0, 0.001, 99.999, 0);
-
-static MYSQL_SYSVAR_DOUBLE(max_dirty_pages_pct_lwm,
- srv_max_dirty_pages_pct_lwm,
- PLUGIN_VAR_RQCMDARG,
- "Percentage of dirty pages at which flushing kicks in.",
- NULL, innodb_max_dirty_pages_pct_lwm_update, 0.001, 0.000, 99.999, 0);
-
-static MYSQL_SYSVAR_DOUBLE(adaptive_flushing_lwm,
- srv_adaptive_flushing_lwm,
- PLUGIN_VAR_RQCMDARG,
- "Percentage of log capacity below which no adaptive flushing happens.",
- NULL, NULL, 10.0, 0.0, 70.0, 0);
-
-static MYSQL_SYSVAR_BOOL(adaptive_flushing, srv_adaptive_flushing,
- PLUGIN_VAR_NOCMDARG,
- "Attempt flushing dirty pages to avoid IO bursts at checkpoints.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_ULONG(flushing_avg_loops,
- srv_flushing_avg_loops,
- PLUGIN_VAR_RQCMDARG,
- "Number of iterations over which the background flushing is averaged.",
- NULL, NULL, 30, 1, 1000, 0);
-
-static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
- PLUGIN_VAR_RQCMDARG,
- "Desired maximum length of the purge queue (0 = no limit)",
- NULL, NULL, 0, 0, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULONG(max_purge_lag_delay, srv_max_purge_lag_delay,
- PLUGIN_VAR_RQCMDARG,
- "Maximum delay of user threads in micro-seconds",
- NULL, NULL,
- 0L, /* Default seting */
- 0L, /* Minimum value */
- 10000000UL, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
- "Enable SHOW ENGINE INNODB STATUS output in the innodb_status.<pid> file",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
- PLUGIN_VAR_OPCMDARG,
- "Enable statistics gathering for metadata commands such as "
- "SHOW TABLE STATUS for tables that use transient statistics (off by default)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONGLONG(stats_sample_pages, srv_stats_transient_sample_pages,
- PLUGIN_VAR_RQCMDARG,
- "Deprecated, use innodb_stats_transient_sample_pages instead",
- NULL, innodb_stats_sample_pages_update, 8, 1, ~0ULL, 0);
-
-static MYSQL_SYSVAR_ULONGLONG(stats_transient_sample_pages,
- srv_stats_transient_sample_pages,
- PLUGIN_VAR_RQCMDARG,
- "The number of leaf index pages to sample when calculating transient "
- "statistics (if persistent statistics are not used, default 8)",
- NULL, NULL, 8, 1, ~0ULL, 0);
-
-static MYSQL_SYSVAR_BOOL(stats_persistent, srv_stats_persistent,
- PLUGIN_VAR_OPCMDARG,
- "InnoDB persistent statistics enabled for all tables unless overridden "
- "at table level",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(stats_auto_recalc, srv_stats_auto_recalc,
- PLUGIN_VAR_OPCMDARG,
- "InnoDB automatic recalculation of persistent statistics enabled for all "
- "tables unless overridden at table level (automatic recalculation is only "
- "done when InnoDB decides that the table has changed too much and needs a "
- "new statistics)",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_ULONGLONG(stats_persistent_sample_pages,
- srv_stats_persistent_sample_pages,
- PLUGIN_VAR_RQCMDARG,
- "The number of leaf index pages to sample when calculating persistent "
- "statistics (by ANALYZE, default 20)",
- NULL, NULL, 20, 1, ~0ULL, 0);
-
-static MYSQL_SYSVAR_ULONGLONG(stats_modified_counter, srv_stats_modified_counter,
- PLUGIN_VAR_RQCMDARG,
- "The number of rows modified before we calculate new statistics (default 0 = current limits)",
- NULL, NULL, 0, 0, ~0ULL, 0);
-
-static MYSQL_SYSVAR_BOOL(stats_traditional, srv_stats_sample_traditional,
- PLUGIN_VAR_RQCMDARG,
- "Enable traditional statistic calculation based on number of configured pages (default true)",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(adaptive_hash_index, btr_search_enabled,
- PLUGIN_VAR_OPCMDARG,
- "Enable InnoDB adaptive hash index (enabled by default). "
- "Disable with --skip-innodb-adaptive-hash-index.",
- NULL, innodb_adaptive_hash_index_update, TRUE);
-
-/* btr_search_index_num is constrained to machine word size for historical
-reasons. This limitation can be easily removed later. */
-static MYSQL_SYSVAR_ULINT(adaptive_hash_index_partitions, btr_search_index_num,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of InnoDB adaptive hash index partitions (default 1: disable "
- "partitioning)",
- NULL, NULL, 1, 1, sizeof(ulint) * 8, 0);
-
-static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
- PLUGIN_VAR_RQCMDARG,
- "Replication thread delay (ms) on the slave server if "
- "innodb_thread_concurrency is reached (0 by default)",
- NULL, NULL, 0, 0, ~0UL, 0);
-
-static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
- PLUGIN_VAR_OPCMDARG,
- "Enables/disables the logging of entire compressed page images."
- " InnoDB logs the compressed pages to prevent corruption if"
- " the zlib compression algorithm changes."
- " When turned OFF, InnoDB will assume that the zlib"
- " compression algorithm doesn't change.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "DEPRECATED. This option may be removed in future releases, "
- "together with the option innodb_use_sys_malloc and with the InnoDB's "
- "internal memory allocator. "
- "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
- NULL, NULL, 8*1024*1024L, 512*1024L, LONG_MAX, 1024);
-
-static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
- PLUGIN_VAR_RQCMDARG,
- "Data file autoextend increment in megabytes",
- NULL, NULL, 64L, 1L, 1000L, 0);
-
-static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
- NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L);
-
-static MYSQL_SYSVAR_BOOL(buffer_pool_populate, innodb_buffer_pool_populate,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Deprecated. This option has no effect and "
- "will be removed in MariaDB 10.2.3.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ENUM(foreground_preflush, srv_foreground_preflush,
- PLUGIN_VAR_OPCMDARG,
- "The algorithm InnoDB uses for the query threads at sync preflush. "
- "Possible values are "
- "SYNC_PREFLUSH: perform a sync preflush as Oracle MySQL; "
- "EXPONENTIAL_BACKOFF: (default) wait for the page cleaner flush.",
- NULL, NULL, SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF,
- &innodb_foreground_preflush_typelib);
-
-#ifdef UNIV_LINUX
-
-static MYSQL_SYSVAR_ULONG(sched_priority_cleaner, srv_sched_priority_cleaner,
- PLUGIN_VAR_RQCMDARG,
- "Nice value for the cleaner and LRU manager thread scheduling",
- NULL, innodb_sched_priority_cleaner_update, 19, 0, 39, 0);
-
-#endif /* UNIV_LINUX */
-
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
-static MYSQL_SYSVAR_ULONG(page_hash_locks, srv_n_page_hash_locks,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Number of rw_locks protecting buffer pool page_hash. Rounded up to the next power of 2",
- NULL, NULL, 16, 1, MAX_PAGE_HASH_LOCKS, 0);
-
-static MYSQL_SYSVAR_ULONG(doublewrite_batch_size, srv_doublewrite_batch_size,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Number of pages reserved in doublewrite buffer for batch flushing",
- NULL, NULL, 120, 1, 127, 0);
-
-#ifdef UNIV_LINUX
-
-static MYSQL_SYSVAR_ULONG(sched_priority_purge, srv_sched_priority_purge,
- PLUGIN_VAR_RQCMDARG,
- "Nice value for the purge thread scheduling",
- NULL, innodb_sched_priority_purge_update, 19, 0, 39, 0);
-
-static MYSQL_SYSVAR_ULONG(sched_priority_io, srv_sched_priority_io,
- PLUGIN_VAR_RQCMDARG,
- "Nice value for the I/O handler thread scheduling",
- NULL, innodb_sched_priority_io_update, 19, 0, 39, 0);
-
-static MYSQL_SYSVAR_ULONG(sched_priority_master, srv_sched_priority_master,
- PLUGIN_VAR_RQCMDARG,
- "Nice value for the master thread scheduling",
- NULL, innodb_sched_priority_master_update, 19, 0, 39, 0);
-
-static MYSQL_SYSVAR_BOOL(priority_purge, srv_purge_thread_priority,
- PLUGIN_VAR_OPCMDARG,
- "Make purge coordinator and worker threads acquire shared resources with "
- "priority", NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(priority_io, srv_io_thread_priority,
- PLUGIN_VAR_OPCMDARG,
- "Make I/O threads acquire shared resources with priority",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(priority_cleaner, srv_cleaner_thread_priority,
- PLUGIN_VAR_OPCMDARG,
- "Make buffer pool cleaner and LRU manager threads acquire shared resources "
- "with priority",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(priority_master, srv_master_thread_priority,
- PLUGIN_VAR_OPCMDARG,
- "Make buffer pool cleaner thread acquire shared resources with priority",
- NULL, NULL, FALSE);
-
-#endif /* UNIV_LINUX */
-
-static MYSQL_SYSVAR_ULINT(cleaner_max_lru_time, srv_cleaner_max_lru_time,
- PLUGIN_VAR_RQCMDARG,
- "The maximum time limit for a single LRU tail flush iteration by the page "
- "cleaner thread in miliseconds",
- NULL, NULL, 1000, 0, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULINT(cleaner_max_flush_time, srv_cleaner_max_flush_time,
- PLUGIN_VAR_RQCMDARG,
- "The maximum time limit for a single flush list flush iteration by the page "
- "cleaner thread in miliseconds",
- NULL, NULL, 1000, 0, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULINT(cleaner_flush_chunk_size,
- srv_cleaner_flush_chunk_size,
- PLUGIN_VAR_RQCMDARG,
- "Divide page cleaner flush list flush batches into chunks of this size",
- NULL, NULL, 100, 1, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULINT(cleaner_lru_chunk_size,
- srv_cleaner_lru_chunk_size,
- PLUGIN_VAR_RQCMDARG,
- "Divide page cleaner LRU list flush batches into chunks of this size",
- NULL, NULL, 100, 1, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULINT(cleaner_free_list_lwm, srv_cleaner_free_list_lwm,
- PLUGIN_VAR_RQCMDARG,
- "Page cleaner will keep on flushing the same buffer pool instance if its "
- "free list length is below this percentage of innodb_lru_scan_depth",
- NULL, NULL, 10, 0, 100, 0);
-
-static MYSQL_SYSVAR_BOOL(cleaner_eviction_factor, srv_cleaner_eviction_factor,
- PLUGIN_VAR_OPCMDARG,
- "Make page cleaner LRU flushes use evicted instead of flushed page counts "
- "for its heuristics",
- NULL, NULL, FALSE);
-
-#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
-
-static MYSQL_SYSVAR_ENUM(cleaner_lsn_age_factor,
- srv_cleaner_lsn_age_factor,
- PLUGIN_VAR_OPCMDARG,
- "The formula for LSN age factor for page cleaner adaptive flushing. "
- "LEGACY: Original Oracle MySQL 5.6 formula. "
- "HIGH_CHECKPOINT: (the default) Percona Server 5.6 formula.",
- NULL, NULL, SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT,
- &innodb_cleaner_lsn_age_factor_typelib);
-
-static MYSQL_SYSVAR_ENUM(empty_free_list_algorithm,
- srv_empty_free_list_algorithm,
- PLUGIN_VAR_OPCMDARG,
- "The algorithm to use for empty free list handling. Allowed values: "
- "LEGACY: Original Oracle MySQL 5.6 handling with single page flushes; "
- "BACKOFF: (default) Wait until cleaner produces a free page.",
- innodb_srv_empty_free_list_algorithm_validate, NULL, SRV_EMPTY_FREE_LIST_BACKOFF,
- &innodb_empty_free_list_algorithm_typelib);
-
-static MYSQL_SYSVAR_ENUM(lock_schedule_algorithm, innodb_lock_schedule_algorithm,
- PLUGIN_VAR_RQCMDARG,
- "The algorithm Innodb uses for deciding which locks to grant next when"
- " a lock is released. Possible values are"
- " FCFS"
- " grant the locks in First-Come-First-Served order;"
- " VATS"
- " use the Variance-Aware-Transaction-Scheduling algorithm, which"
- " uses an Eldest-Transaction-First heuristic.",
- NULL, NULL, INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS,
- &innodb_lock_schedule_algorithm_typelib);
-
-static MYSQL_SYSVAR_LONG(buffer_pool_instances, innobase_buffer_pool_instances,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of buffer pool instances, set to higher value on high-end machines to increase scalability",
- NULL, NULL, 0L, 0L, MAX_BUFFER_POOLS, 1L);
-
-static MYSQL_SYSVAR_STR(buffer_pool_filename, srv_buf_dump_filename,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC,
- "Filename to/from which to dump/load the InnoDB buffer pool",
- innodb_srv_buf_dump_filename_validate, NULL, SRV_BUF_DUMP_FILENAME_DEFAULT);
-
-static MYSQL_SYSVAR_BOOL(buffer_pool_dump_now, innodb_buffer_pool_dump_now,
- PLUGIN_VAR_RQCMDARG,
- "Trigger an immediate dump of the buffer pool into a file named @@innodb_buffer_pool_filename",
- NULL, buffer_pool_dump_now, FALSE);
-
-static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_shutdown,
- PLUGIN_VAR_RQCMDARG,
- "Dump the buffer pool into a file named @@innodb_buffer_pool_filename",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct,
- PLUGIN_VAR_RQCMDARG,
- "Dump only the hottest N% of each buffer pool, defaults to 25",
- NULL, NULL, 25, 1, 100, 0);
-
-#ifdef UNIV_DEBUG
-static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict,
- PLUGIN_VAR_RQCMDARG,
- "Evict pages from the buffer pool",
- NULL, innodb_buffer_pool_evict_update, "");
-#endif /* UNIV_DEBUG */
-
-static MYSQL_SYSVAR_BOOL(buffer_pool_load_now, innodb_buffer_pool_load_now,
- PLUGIN_VAR_RQCMDARG,
- "Trigger an immediate load of the buffer pool from a file named @@innodb_buffer_pool_filename",
- NULL, buffer_pool_load_now, FALSE);
-
-static MYSQL_SYSVAR_BOOL(buffer_pool_load_abort, innodb_buffer_pool_load_abort,
- PLUGIN_VAR_RQCMDARG,
- "Abort a currently running load of the buffer pool",
- NULL, buffer_pool_load_abort, FALSE);
-
-/* there is no point in changing this during runtime, thus readonly */
-static MYSQL_SYSVAR_BOOL(buffer_pool_load_at_startup, srv_buffer_pool_load_at_startup,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Load the buffer pool from a file named @@innodb_buffer_pool_filename",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(defragment, srv_defragment,
- PLUGIN_VAR_RQCMDARG,
- "Enable/disable InnoDB defragmentation (default FALSE). When set to FALSE, all existing "
- "defragmentation will be paused. And new defragmentation command will fail."
- "Paused defragmentation commands will resume when this variable is set to "
- "true again.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_UINT(defragment_n_pages, srv_defragment_n_pages,
- PLUGIN_VAR_RQCMDARG,
- "Number of pages considered at once when merging multiple pages to "
- "defragment",
- NULL, NULL, 7, 2, 32, 0);
-
-static MYSQL_SYSVAR_UINT(defragment_stats_accuracy,
- srv_defragment_stats_accuracy,
- PLUGIN_VAR_RQCMDARG,
- "How many defragment stats changes there are before the stats "
- "are written to persistent storage. Set to 0 meaning disable "
- "defragment stats tracking.",
- NULL, NULL, 0, 0, ~0U, 0);
-
-static MYSQL_SYSVAR_UINT(defragment_fill_factor_n_recs,
- srv_defragment_fill_factor_n_recs,
- PLUGIN_VAR_RQCMDARG,
- "How many records of space defragmentation should leave on the page. "
- "This variable, together with innodb_defragment_fill_factor, is introduced "
- "so defragmentation won't pack the page too full and cause page split on "
- "the next insert on every page. The variable indicating more defragmentation"
- " gain is the one effective.",
- NULL, NULL, 20, 1, 100, 0);
-
-static MYSQL_SYSVAR_DOUBLE(defragment_fill_factor, srv_defragment_fill_factor,
- PLUGIN_VAR_RQCMDARG,
- "A number between [0.7, 1] that tells defragmentation how full it should "
- "fill a page. Default is 0.9. Number below 0.7 won't make much sense."
- "This variable, together with innodb_defragment_fill_factor_n_recs, is "
- "introduced so defragmentation won't pack the page too full and cause "
- "page split on the next insert on every page. The variable indicating more "
- "defragmentation gain is the one effective.",
- NULL, NULL, 0.9, 0.7, 1, 0);
-
-static MYSQL_SYSVAR_UINT(defragment_frequency, srv_defragment_frequency,
- PLUGIN_VAR_RQCMDARG,
- "Do not defragment a single index more than this number of time per second."
- "This controls the number of time defragmentation thread can request X_LOCK "
- "on an index. Defragmentation thread will check whether "
- "1/defragment_frequency (s) has passed since it worked on this index last "
- "time, and put the index back to the queue if not enough time has passed. "
- "The actual frequency can only be lower than this given number.",
- NULL, innodb_defragment_frequency_update,
- SRV_DEFRAGMENT_FREQUENCY_DEFAULT, 1, 1000, 0);
-
-
-static MYSQL_SYSVAR_ULONG(lru_scan_depth, srv_LRU_scan_depth,
- PLUGIN_VAR_RQCMDARG,
- "How deep to scan LRU to keep it clean",
- NULL, NULL, 1024, 100, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULONG(flush_neighbors, srv_flush_neighbors,
- PLUGIN_VAR_OPCMDARG,
- "Set to 0 (don't flush neighbors from buffer pool),"
- " 1 (flush contiguous neighbors from buffer pool)"
- " or 2 (flush neighbors from buffer pool),"
- " when flushing a block",
- NULL, NULL, 1, 0, 2, 0);
-
-static MYSQL_SYSVAR_ULONG(commit_concurrency, innobase_commit_concurrency,
- PLUGIN_VAR_RQCMDARG,
- "Helps in performance tuning in heavily concurrent environments.",
- innobase_commit_concurrency_validate, NULL, 0, 0, 1000, 0);
-
-static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
- PLUGIN_VAR_RQCMDARG,
- "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
- NULL, NULL, 5000L, 1L, ~0UL, 0);
-
-#ifdef EXTENDED_FOR_KILLIDLE
-#define kill_idle_help_text "If non-zero value, the idle session with transaction which is idle over the value in seconds is killed by InnoDB."
-#else
-#define kill_idle_help_text "No effect for this build."
-#endif
-static MYSQL_SYSVAR_LONGLONG(kill_idle_transaction, srv_kill_idle_transaction,
- PLUGIN_VAR_RQCMDARG, kill_idle_help_text, NULL, NULL, 0, 0, LONG_MAX, 0);
-
-static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR,
- "Number of file I/O threads in InnoDB.",
- NULL, NULL, 4, 4, 64, 0);
-
-static MYSQL_SYSVAR_BOOL(ft_enable_diag_print, fts_enable_diag_print,
- PLUGIN_VAR_OPCMDARG,
- "Whether to enable additional FTS diagnostic printout ",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(disable_sort_file_cache, srv_disable_sort_file_cache,
- PLUGIN_VAR_OPCMDARG,
- "Whether to disable OS system file cache for sort I/O",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_STR(ft_aux_table, fts_internal_tbl_name2,
- PLUGIN_VAR_NOCMDARG,
- "FTS internal auxiliary table to be checked",
- innodb_internal_table_validate,
- innodb_internal_table_update, NULL);
-
-static MYSQL_SYSVAR_ULONG(ft_cache_size, fts_max_cache_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "InnoDB Fulltext search cache size in bytes",
- NULL, NULL, 8000000, 1600000, 80000000, 0);
-
-static MYSQL_SYSVAR_ULONG(ft_total_cache_size, fts_max_total_cache_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Total memory allocated for InnoDB Fulltext Search cache",
- NULL, NULL, 640000000, 32000000, 1600000000, 0);
-
-static MYSQL_SYSVAR_ULONG(ft_result_cache_limit, fts_result_cache_limit,
- PLUGIN_VAR_RQCMDARG,
- "InnoDB Fulltext search query result cache limit in bytes",
- NULL, NULL, 2000000000L, 1000000L, 4294967295UL, 0);
-
-static MYSQL_SYSVAR_ULONG(ft_min_token_size, fts_min_token_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "InnoDB Fulltext search minimum token size in characters",
- NULL, NULL, 3, 0, 16, 0);
-
-static MYSQL_SYSVAR_ULONG(ft_max_token_size, fts_max_token_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "InnoDB Fulltext search maximum token size in characters",
- NULL, NULL, FTS_MAX_WORD_LEN_IN_CHAR, 10, FTS_MAX_WORD_LEN_IN_CHAR, 0);
-
-
-static MYSQL_SYSVAR_ULONG(ft_num_word_optimize, fts_num_word_optimize,
- PLUGIN_VAR_OPCMDARG,
- "InnoDB Fulltext search number of words to optimize for each optimize table call ",
- NULL, NULL, 2000, 1000, 10000, 0);
-
-static MYSQL_SYSVAR_ULONG(ft_sort_pll_degree, fts_sort_pll_degree,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "InnoDB Fulltext search parallel sort degree, will round up to nearest power of 2 number",
- NULL, NULL, 2, 1, 16, 0);
-
-static MYSQL_SYSVAR_ULONG(sort_buffer_size, srv_sort_buf_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Memory buffer size for index creation",
- NULL, NULL, 1048576, 65536, 64<<20, 0);
-
-static MYSQL_SYSVAR_ULONGLONG(online_alter_log_max_size, srv_online_max_size,
- PLUGIN_VAR_RQCMDARG,
- "Maximum modification log file size for online index creation",
- NULL, NULL, 128<<20, 65536, ~0ULL, 0);
-
-static MYSQL_SYSVAR_BOOL(optimize_fulltext_only, innodb_optimize_fulltext_only,
- PLUGIN_VAR_NOCMDARG,
- "Only optimize the Fulltext index of the table",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(read_io_threads, innobase_read_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of background read I/O threads in InnoDB.",
- NULL, NULL, 4, 1, 64, 0);
-
-static MYSQL_SYSVAR_ULONG(write_io_threads, innobase_write_io_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of background write I/O threads in InnoDB.",
- NULL, NULL, 4, 1, 64, 0);
-
-static MYSQL_SYSVAR_ULONG(force_recovery, srv_force_recovery,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Helps to save your data in case the disk image of the database becomes corrupt.",
- NULL, NULL, 0, 0, 6, 0);
-
-static MYSQL_SYSVAR_ULONG(page_size, srv_page_size,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Page size to use for all InnoDB tablespaces.",
- NULL, NULL, UNIV_PAGE_SIZE_DEF,
- UNIV_PAGE_SIZE_MIN, UNIV_PAGE_SIZE_MAX, 0);
-
-static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The size of the buffer which InnoDB uses to write log to the log files on disk.",
- NULL, NULL, 16*1024*1024L, 256*1024L, LONG_MAX, 1024);
-
-static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Size of each log file in a log group.",
- NULL, NULL, 48*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
-
-static MYSQL_SYSVAR_ULONG(log_files_in_group, srv_n_log_files,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of log files in the log group. InnoDB writes to the files in a circular fashion.",
- NULL, NULL, 2, 2, SRV_N_LOG_FILES_MAX, 0);
-
-/* Note that the default and minimum values are set to 0 to
-detect if the option is passed and print deprecation message */
-static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
- NULL, NULL, 0, 0, 10, 0);
-
-static MYSQL_SYSVAR_UINT(old_blocks_pct, innobase_old_blocks_pct,
- PLUGIN_VAR_RQCMDARG,
- "Percentage of the buffer pool to reserve for 'old' blocks.",
- NULL, innodb_old_blocks_pct_update, 100 * 3 / 8, 5, 95, 0);
-
-static MYSQL_SYSVAR_UINT(old_blocks_time, buf_LRU_old_threshold_ms,
- PLUGIN_VAR_RQCMDARG,
- "Move blocks to the 'new' end of the buffer pool if the first access"
- " was at least this many milliseconds ago."
- " The timeout is disabled if 0.",
- NULL, NULL, 1000, 0, UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "How many files at the maximum InnoDB keeps open at the same time.",
- NULL, NULL, 0L, 0L, LONG_MAX, 0);
-
-static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
- PLUGIN_VAR_RQCMDARG,
- "Count of spin-loop rounds in InnoDB mutexes (30 by default)",
- NULL, NULL, 30L, 0L, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULONG(spin_wait_delay, srv_spin_wait_delay,
- PLUGIN_VAR_OPCMDARG,
- "Maximum delay between polling for a spin lock (6 by default)",
- NULL, NULL, 6L, 0L, ~0UL, 0);
-
-static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
- PLUGIN_VAR_RQCMDARG,
- "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
- NULL, NULL, 0, 0, 1000, 0);
-
-#ifdef HAVE_ATOMIC_BUILTINS
-static MYSQL_SYSVAR_ULONG(
- adaptive_max_sleep_delay, srv_adaptive_max_sleep_delay,
- PLUGIN_VAR_RQCMDARG,
- "The upper limit of the sleep delay in usec. Value of 0 disables it.",
- NULL, NULL,
- 150000, /* Default setting */
- 0, /* Minimum value */
- 1000000, 0); /* Maximum value */
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-static MYSQL_SYSVAR_BOOL(prefix_index_cluster_optimization,
- srv_prefix_index_cluster_optimization,
- PLUGIN_VAR_OPCMDARG,
- "Enable prefix optimization to sometimes avoid cluster index lookups.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
- PLUGIN_VAR_RQCMDARG,
- "Time of innodb thread sleeping before joining InnoDB queue (usec). "
- "Value 0 disable a sleep",
- NULL, NULL,
- 10000L,
- 0L,
- 1000000L, 0);
-
-static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Path to individual files and their sizes.",
- NULL, NULL, NULL);
-
-static MYSQL_SYSVAR_STR(undo_directory, srv_undo_dir,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Directory where undo tablespace files live, this path can be absolute.",
- NULL, NULL, ".");
-
-static MYSQL_SYSVAR_ULONG(undo_tablespaces, srv_undo_tablespaces,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of undo tablespaces to use. ",
- NULL, NULL,
- 0L, /* Default seting */
- 0L, /* Minimum value */
- 126L, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
- PLUGIN_VAR_OPCMDARG,
- "Number of undo logs to use.",
- NULL, NULL,
- TRX_SYS_N_RSEGS, /* Default setting */
- 1, /* Minimum value */
- TRX_SYS_N_RSEGS, 0); /* Maximum value */
-
-/* Alias for innodb_undo_logs, this config variable is deprecated. */
-static MYSQL_SYSVAR_ULONG(rollback_segments, srv_undo_logs,
- PLUGIN_VAR_OPCMDARG,
- "Number of undo logs to use (deprecated).",
- NULL, NULL,
- TRX_SYS_N_RSEGS, /* Default setting */
- 1, /* Minimum value */
- TRX_SYS_N_RSEGS, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "The AUTOINC lock modes supported by InnoDB: "
- "0 => Old style AUTOINC locking (for backward"
- " compatibility) "
- "1 => New style AUTOINC locking "
- "2 => No AUTOINC locking (unsafe for SBR)",
- NULL, NULL,
- AUTOINC_NEW_STYLE_LOCKING, /* Default setting */
- AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
- AUTOINC_NO_LOCKING, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_STR(version, innodb_version_str,
- PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY,
- "Percona-InnoDB-plugin version", NULL, NULL, INNODB_VERSION_STR);
-
-static MYSQL_SYSVAR_BOOL(use_sys_malloc, srv_use_sys_malloc,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "DEPRECATED. This option may be removed in future releases, "
- "together with the InnoDB's internal memory allocator. "
- "Use OS memory allocator instead of InnoDB's internal memory allocator",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Use native AIO if supported on this platform.",
- NULL, NULL, TRUE);
-
-#ifdef HAVE_LIBNUMA
-static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Use NUMA interleave memory policy to allocate InnoDB buffer pool.",
- NULL, NULL, FALSE);
-#endif // HAVE_LIBNUMA
-
-static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable binlog for applications direct access InnoDB through InnoDB APIs",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(api_enable_mdl, ib_mdl_enabled,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Enable MDL for applications direct access InnoDB through InnoDB APIs",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(api_disable_rowlock, ib_disable_row_lock,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Disable row lock when direct access InnoDB through InnoDB APIs",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(api_trx_level, ib_trx_level_setting,
- PLUGIN_VAR_OPCMDARG,
- "InnoDB API transaction isolation level",
- NULL, NULL,
- 0, /* Default setting */
- 0, /* Minimum value */
- 3, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_ULONG(api_bk_commit_interval, ib_bk_commit_interval,
- PLUGIN_VAR_OPCMDARG,
- "Background commit interval in seconds",
- NULL, NULL,
- 5, /* Default setting */
- 1, /* Minimum value */
- 1024 * 1024 * 1024, 0); /* Maximum value */
-
-static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
- PLUGIN_VAR_RQCMDARG,
- "Buffer changes to reduce random access: "
- "OFF, ON, inserting, deleting, changing, or purging.",
- innodb_change_buffering_validate,
- innodb_change_buffering_update, "all");
-
-static MYSQL_SYSVAR_UINT(change_buffer_max_size,
- innobase_change_buffer_max_size,
- PLUGIN_VAR_RQCMDARG,
- "Maximum on-disk size of change buffer in terms of percentage"
- " of the buffer pool.",
- NULL, innodb_change_buffer_max_size_update,
- CHANGE_BUFFER_DEFAULT_SIZE, 0, 50, 0);
-
-static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
- PLUGIN_VAR_RQCMDARG,
- "Specifies how InnoDB index statistics collection code should "
- "treat NULLs",
- NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
-
-#ifdef UNIV_DEBUG
-/* Make this variable dynamic for debug builds to
-provide a testcase sync facility */
-#define track_changed_pages_flags PLUGIN_VAR_NOCMDARG
-#define track_changed_pages_check innodb_track_changed_pages_validate
-#else
-#define track_changed_pages_flags PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY
-#define track_changed_pages_check NULL
-#endif
-static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages,
- track_changed_pages_flags,
- "Track the redo log for changed pages and output a changed page bitmap",
- track_changed_pages_check,
- NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size,
- PLUGIN_VAR_RQCMDARG,
- "The maximum size of changed page bitmap files",
- NULL, NULL, 100*1024*1024ULL, 4096ULL, ULONGLONG_MAX, 0);
-
-static MYSQL_SYSVAR_ULONGLONG(max_changed_pages, srv_max_changed_pages,
- PLUGIN_VAR_RQCMDARG,
- "The maximum number of rows for "
- "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES table, "
- "0 - unlimited",
- NULL, NULL, 1000000, 0, ~0ULL, 0);
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
- PLUGIN_VAR_RQCMDARG,
- "Debug flags for InnoDB change buffering (0=none, 2=crash at merge)",
- NULL, NULL, 0, 0, 2, 0);
-
-static MYSQL_SYSVAR_BOOL(disable_background_merge,
- srv_ibuf_disable_background_merge,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_RQCMDARG,
- "Disable change buffering merges by the master thread",
- NULL, NULL, FALSE);
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-static MYSQL_SYSVAR_ULONG(buf_dump_status_frequency, srv_buf_dump_status_frequency,
- PLUGIN_VAR_RQCMDARG,
- "A number between [0, 100] that tells how oftern buffer pool dump status "
- "in percentages should be printed. E.g. 10 means that buffer pool dump "
- "status is printed when every 10% of number of buffer pool pages are "
- "dumped. Default is 0 (only start and end status is printed).",
- NULL, NULL, 0, 0, 100, 0);
-
-#ifdef WITH_INNODB_DISALLOW_WRITES
-/*******************************************************
- * innobase_disallow_writes variable definition *
- *******************************************************/
-
-/* Must always init to FALSE. */
-static my_bool innobase_disallow_writes = FALSE;
-
-/**************************************************************************
-An "update" method for innobase_disallow_writes variable. */
-static
-void
-innobase_disallow_writes_update(
-/*============================*/
- THD* thd, /* in: thread handle */
- st_mysql_sys_var* var, /* in: pointer to system
- variable */
- void* var_ptr, /* out: pointer to dynamic
- variable */
- const void* save) /* in: temporary storage */
-{
- *(my_bool*)var_ptr = *(my_bool*)save;
- ut_a(srv_allow_writes_event);
- if (*(my_bool*)var_ptr)
- os_event_reset(srv_allow_writes_event);
- else
- os_event_set(srv_allow_writes_event);
-}
-
-static MYSQL_SYSVAR_BOOL(disallow_writes, innobase_disallow_writes,
- PLUGIN_VAR_NOCMDOPT,
- "Tell InnoDB to stop any writes to disk",
- NULL, innobase_disallow_writes_update, FALSE);
-#endif /* WITH_INNODB_DISALLOW_WRITES */
-static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
- PLUGIN_VAR_NOCMDARG,
- "Whether to use read ahead for random access within an extent.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
- PLUGIN_VAR_RQCMDARG,
- "Number of pages that must be accessed sequentially for InnoDB to "
- "trigger a readahead.",
- NULL, NULL, 56, 0, 64, 0);
-
-static MYSQL_SYSVAR_STR(monitor_enable, innobase_enable_monitor_counter,
- PLUGIN_VAR_RQCMDARG,
- "Turn on a monitor counter",
- innodb_monitor_validate,
- innodb_enable_monitor_update, NULL);
-
-static MYSQL_SYSVAR_STR(monitor_disable, innobase_disable_monitor_counter,
- PLUGIN_VAR_RQCMDARG,
- "Turn off a monitor counter",
- innodb_monitor_validate,
- innodb_disable_monitor_update, NULL);
-
-static MYSQL_SYSVAR_STR(monitor_reset, innobase_reset_monitor_counter,
- PLUGIN_VAR_RQCMDARG,
- "Reset a monitor counter",
- innodb_monitor_validate,
- innodb_reset_monitor_update, NULL);
-
-static MYSQL_SYSVAR_STR(monitor_reset_all, innobase_reset_all_monitor_counter,
- PLUGIN_VAR_RQCMDARG,
- "Reset all values for a monitor counter",
- innodb_monitor_validate,
- innodb_reset_all_monitor_update, NULL);
-
-static MYSQL_SYSVAR_BOOL(status_output, srv_print_innodb_monitor,
- PLUGIN_VAR_OPCMDARG, "Enable InnoDB monitor output to the error log.",
- NULL, innodb_status_output_update, FALSE);
-
-static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor,
- PLUGIN_VAR_OPCMDARG, "Enable InnoDB lock monitor output to the error log."
- " Requires innodb_status_output=ON.",
- NULL, innodb_status_output_update, FALSE);
-
-static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
- PLUGIN_VAR_OPCMDARG,
- "Print all deadlocks to MySQL error log (off by default)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(compression_failure_threshold_pct,
- zip_failure_threshold_pct, PLUGIN_VAR_OPCMDARG,
- "If the compression failure rate of a table is greater than this number"
- " more padding is added to the pages to reduce the failures. A value of"
- " zero implies no padding",
- NULL, NULL, 5, 0, 100, 0);
-
-static MYSQL_SYSVAR_ULONG(compression_pad_pct_max,
- zip_pad_max, PLUGIN_VAR_OPCMDARG,
- "Percentage of empty space on a data page that can be reserved"
- " to make the page compressible.",
- NULL, NULL, 50, 0, 75, 0);
-
-static MYSQL_SYSVAR_BOOL(read_only, srv_read_only_mode,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Start InnoDB in read only mode (off by default)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(cmp_per_index_enabled, srv_cmp_per_index_enabled,
- PLUGIN_VAR_OPCMDARG,
- "Enable INFORMATION_SCHEMA.innodb_cmp_per_index, "
- "may have negative impact on performance (off by default)",
- NULL, innodb_cmp_per_index_update, FALSE);
-
-#ifdef UNIV_DEBUG
-static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT,
- "Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
- NULL, NULL, 0, 0, 1024, 0);
-
-static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
- btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
- "Artificially limit the number of records per B-tree page (0=unlimited).",
- NULL, NULL, 0, 0, UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
- srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
- "Pause actual purging any delete-marked records, but merely update the purge view. "
- "It is to create artificially the situation the purge view have been updated "
- "but the each purges were not done yet.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_UINT(data_file_size_debug,
- srv_sys_space_size_debug,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "InnoDB system tablespace size to be set in recovery.",
- NULL, NULL, 0, 0, UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_ULONG(fil_make_page_dirty_debug,
- srv_fil_make_page_dirty_debug, PLUGIN_VAR_OPCMDARG,
- "Make the first page of the given tablespace dirty.",
- NULL, innodb_make_page_dirty, 0, 0, UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_ULONG(saved_page_number_debug,
- srv_saved_page_number_debug, PLUGIN_VAR_OPCMDARG,
- "An InnoDB page number.",
- NULL, innodb_save_page_no, 0, 0, UINT_MAX32, 0);
-
-#endif /* UNIV_DEBUG */
-
-static MYSQL_SYSVAR_UINT(simulate_comp_failures, srv_simulate_comp_failures,
- PLUGIN_VAR_NOCMDARG,
- "Simulate compression failures.",
- NULL, NULL, 0, 0, 99, 0);
-
-static MYSQL_SYSVAR_BOOL(force_primary_key,
- srv_force_primary_key,
- PLUGIN_VAR_OPCMDARG,
- "Do not allow to create table without primary key (off by default)",
- NULL, NULL, FALSE);
-
-const char *corrupt_table_action_names[]=
-{
- "assert", /* 0 */
- "warn", /* 1 */
- "salvage", /* 2 */
- NullS
-};
-TYPELIB corrupt_table_action_typelib=
-{
- array_elements(corrupt_table_action_names) - 1, "corrupt_table_action_typelib",
- corrupt_table_action_names, NULL
-};
-static MYSQL_SYSVAR_ENUM(corrupt_table_action, srv_pass_corrupt_table,
- PLUGIN_VAR_RQCMDARG,
- "Warn corruptions of user tables as 'corrupt table' instead of not crashing itself, "
- "when used with file_per_table. "
- "All file io for the datafile after detected as corrupt are disabled, "
- "except for the deletion.",
- NULL, NULL, 0, &corrupt_table_action_typelib);
-
-static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
- PLUGIN_VAR_NOCMDARG,
- "###EXPERIMENTAL### if enabled, transactions will get S row locks instead "
- "of X locks for fake changes. If disabled, fake change transactions will "
- "not take any locks at all.",
- NULL, NULL, TRUE);
-
-static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Print stacktrace on long semaphore wait (off by default supported only on linux)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
- PLUGIN_VAR_RQCMDARG,
- "Compression level used for zlib compression. 0 is no compression"
- ", 1 is fastest, 9 is best compression and default is 6.",
- NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
-
-static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
- PLUGIN_VAR_OPCMDARG,
- "Use trim. Default FALSE.",
- NULL, NULL, FALSE);
-
-static const char *page_compression_algorithms[]= { "none", "zlib", "lz4", "lzo", "lzma", "bzip2", "snappy", 0 };
-static TYPELIB page_compression_algorithms_typelib=
-{
- array_elements(page_compression_algorithms) - 1, 0,
- page_compression_algorithms, 0
-};
-static MYSQL_SYSVAR_ENUM(compression_algorithm, innodb_compression_algorithm,
- PLUGIN_VAR_OPCMDARG,
- "Compression algorithm used on page compression. One of: none, zlib, lz4, lzo, lzma, or bzip2",
- innodb_compression_algorithm_validate, NULL,
- /* We use here the largest number of supported compression method to
- enable all those methods that are available. Availability of compression
- method is verified on innodb_compression_algorithm_validate function. */
- PAGE_ZLIB_ALGORITHM,
- &page_compression_algorithms_typelib);
-
-static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Number of multi-threaded flush threads",
- NULL, NULL,
- MTFLUSH_DEFAULT_WORKER, /* Default setting */
- 1, /* Minimum setting */
- MTFLUSH_MAX_WORKER, /* Max setting */
- 0);
-
-static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
- PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
- "Use multi-threaded flush. Default FALSE.",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_ULONG(fatal_semaphore_wait_threshold, srv_fatal_semaphore_wait_threshold,
- PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
- "Maximum number of seconds that semaphore times out in InnoDB.",
- NULL, NULL,
- DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT, /* Default setting */
- 1, /* Minimum setting */
- UINT_MAX32, /* Maximum setting */
- 0);
-
-static const char* srv_encrypt_tables_names[] = { "OFF", "ON", "FORCE", 0 };
-static TYPELIB srv_encrypt_tables_typelib = {
- array_elements(srv_encrypt_tables_names)-1, 0, srv_encrypt_tables_names,
- NULL
-};
-static MYSQL_SYSVAR_ENUM(encrypt_tables, srv_encrypt_tables,
- PLUGIN_VAR_OPCMDARG,
- "Enable encryption for tables. "
- "Don't forget to enable --innodb-encrypt-log too",
- innodb_encrypt_tables_validate,
- innodb_encrypt_tables_update,
- 0,
- &srv_encrypt_tables_typelib);
-
-static MYSQL_SYSVAR_UINT(encryption_threads, srv_n_fil_crypt_threads,
- PLUGIN_VAR_RQCMDARG,
- "Number of threads performing background key rotation and "
- "scrubbing",
- NULL,
- innodb_encryption_threads_update,
- srv_n_fil_crypt_threads, 0, UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_UINT(encryption_rotate_key_age,
- srv_fil_crypt_rotate_key_age,
- PLUGIN_VAR_RQCMDARG,
- "Key rotation - re-encrypt in background "
- "all pages that were encrypted with a key that "
- "many (or more) versions behind. Value 0 indicates "
- "that key rotation is disabled.",
- NULL,
- innodb_encryption_rotate_key_age_update,
- 1, 0, UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_UINT(encryption_rotation_iops, srv_n_fil_crypt_iops,
- PLUGIN_VAR_RQCMDARG,
- "Use this many iops for background key rotation",
- NULL,
- innodb_encryption_rotation_iops_update,
- srv_n_fil_crypt_iops, 0, UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_BOOL(scrub_log, srv_scrub_log,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Enable background redo log (ib_logfile0, ib_logfile1...) scrubbing",
- 0, 0, 0);
-
-static MYSQL_SYSVAR_ULONGLONG(scrub_log_speed, innodb_scrub_log_speed,
- PLUGIN_VAR_OPCMDARG,
- "Background redo log scrubbing speed in bytes/sec",
- NULL, NULL,
- 256, /* 256 bytes/sec, corresponds to 2000 ms scrub_log_interval */
- 1, /* min */
- 50000, 0); /* 50Kbyte/sec, corresponds to 10 ms scrub_log_interval */
-
-static MYSQL_SYSVAR_BOOL(encrypt_log, srv_encrypt_log,
- PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Enable redo log encryption",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(immediate_scrub_data_uncompressed,
- srv_immediate_scrub_data_uncompressed,
- 0,
- "Enable scrubbing of data",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(background_scrub_data_uncompressed,
- srv_background_scrub_data_uncompressed,
- 0,
- "Enable scrubbing of uncompressed data by "
- "background threads (same as encryption_threads)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_BOOL(background_scrub_data_compressed,
- srv_background_scrub_data_compressed,
- 0,
- "Enable scrubbing of compressed data by "
- "background threads (same as encryption_threads)",
- NULL, NULL, FALSE);
-
-static MYSQL_SYSVAR_UINT(background_scrub_data_check_interval,
- srv_background_scrub_data_check_interval,
- 0,
- "check if spaces needs scrubbing every "
- "innodb_background_scrub_data_check_interval "
- "seconds",
- NULL, NULL,
- srv_background_scrub_data_check_interval,
- 1,
- UINT_MAX32, 0);
-
-static MYSQL_SYSVAR_UINT(background_scrub_data_interval,
- srv_background_scrub_data_interval,
- 0,
- "scrub spaces that were last scrubbed longer than "
- " innodb_background_scrub_data_interval seconds ago",
- NULL, NULL,
- srv_background_scrub_data_interval,
- 1,
- UINT_MAX32, 0);
-
-#ifdef UNIV_DEBUG
-static MYSQL_SYSVAR_BOOL(debug_force_scrubbing,
- srv_scrub_force_testing,
- 0,
- "Perform extra scrubbing to increase test exposure",
- NULL, NULL, FALSE);
-#endif /* UNIV_DEBUG */
-
-static MYSQL_SYSVAR_BOOL(instrument_semaphores, srv_instrument_semaphores,
- PLUGIN_VAR_OPCMDARG,
- "Enable semaphore request instrumentation. This could have some effect on performance but allows better"
- " information on long semaphore wait problems. (Default: not enabled)",
- 0, 0, FALSE);
-
-static struct st_mysql_sys_var* innobase_system_variables[]= {
- MYSQL_SYSVAR(log_block_size),
- MYSQL_SYSVAR(additional_mem_pool_size),
- MYSQL_SYSVAR(api_trx_level),
- MYSQL_SYSVAR(api_bk_commit_interval),
- MYSQL_SYSVAR(autoextend_increment),
- MYSQL_SYSVAR(buffer_pool_size),
- MYSQL_SYSVAR(buffer_pool_populate),
- MYSQL_SYSVAR(buffer_pool_instances),
- MYSQL_SYSVAR(buffer_pool_filename),
- MYSQL_SYSVAR(buffer_pool_dump_now),
- MYSQL_SYSVAR(buffer_pool_dump_at_shutdown),
- MYSQL_SYSVAR(buffer_pool_dump_pct),
-#ifdef UNIV_DEBUG
- MYSQL_SYSVAR(buffer_pool_evict),
-#endif /* UNIV_DEBUG */
- MYSQL_SYSVAR(buffer_pool_load_now),
- MYSQL_SYSVAR(buffer_pool_load_abort),
- MYSQL_SYSVAR(buffer_pool_load_at_startup),
- MYSQL_SYSVAR(defragment),
- MYSQL_SYSVAR(defragment_n_pages),
- MYSQL_SYSVAR(defragment_stats_accuracy),
- MYSQL_SYSVAR(defragment_fill_factor),
- MYSQL_SYSVAR(defragment_fill_factor_n_recs),
- MYSQL_SYSVAR(defragment_frequency),
- MYSQL_SYSVAR(lru_scan_depth),
- MYSQL_SYSVAR(flush_neighbors),
- MYSQL_SYSVAR(checksum_algorithm),
- MYSQL_SYSVAR(log_checksum_algorithm),
- MYSQL_SYSVAR(checksums),
- MYSQL_SYSVAR(commit_concurrency),
- MYSQL_SYSVAR(concurrency_tickets),
- MYSQL_SYSVAR(compression_level),
- MYSQL_SYSVAR(kill_idle_transaction),
- MYSQL_SYSVAR(data_file_path),
- MYSQL_SYSVAR(data_home_dir),
- MYSQL_SYSVAR(doublewrite),
- MYSQL_SYSVAR(stats_include_delete_marked),
- MYSQL_SYSVAR(api_enable_binlog),
- MYSQL_SYSVAR(api_enable_mdl),
- MYSQL_SYSVAR(api_disable_rowlock),
- MYSQL_SYSVAR(use_atomic_writes),
- MYSQL_SYSVAR(use_fallocate),
- MYSQL_SYSVAR(fast_shutdown),
- MYSQL_SYSVAR(file_io_threads),
- MYSQL_SYSVAR(read_io_threads),
- MYSQL_SYSVAR(write_io_threads),
- MYSQL_SYSVAR(file_per_table),
- MYSQL_SYSVAR(file_format),
- MYSQL_SYSVAR(file_format_check),
- MYSQL_SYSVAR(file_format_max),
- MYSQL_SYSVAR(flush_log_at_timeout),
- MYSQL_SYSVAR(flush_log_at_trx_commit),
- MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
- MYSQL_SYSVAR(flush_method),
- MYSQL_SYSVAR(force_recovery),
- MYSQL_SYSVAR(ft_cache_size),
- MYSQL_SYSVAR(ft_total_cache_size),
- MYSQL_SYSVAR(ft_result_cache_limit),
- MYSQL_SYSVAR(ft_enable_stopword),
- MYSQL_SYSVAR(ft_max_token_size),
- MYSQL_SYSVAR(ft_min_token_size),
- MYSQL_SYSVAR(ft_num_word_optimize),
- MYSQL_SYSVAR(ft_sort_pll_degree),
- MYSQL_SYSVAR(large_prefix),
- MYSQL_SYSVAR(force_load_corrupted),
- MYSQL_SYSVAR(lock_schedule_algorithm),
- MYSQL_SYSVAR(locks_unsafe_for_binlog),
- MYSQL_SYSVAR(lock_wait_timeout),
-#ifdef UNIV_LOG_ARCHIVE
- MYSQL_SYSVAR(log_arch_dir),
- MYSQL_SYSVAR(log_archive),
- MYSQL_SYSVAR(log_arch_expire_sec),
-#endif /* UNIV_LOG_ARCHIVE */
- MYSQL_SYSVAR(page_size),
- MYSQL_SYSVAR(log_buffer_size),
- MYSQL_SYSVAR(log_file_size),
- MYSQL_SYSVAR(log_files_in_group),
- MYSQL_SYSVAR(log_group_home_dir),
- MYSQL_SYSVAR(log_compressed_pages),
- MYSQL_SYSVAR(max_dirty_pages_pct),
- MYSQL_SYSVAR(max_dirty_pages_pct_lwm),
- MYSQL_SYSVAR(adaptive_flushing_lwm),
- MYSQL_SYSVAR(adaptive_flushing),
- MYSQL_SYSVAR(flushing_avg_loops),
- MYSQL_SYSVAR(max_purge_lag),
- MYSQL_SYSVAR(max_purge_lag_delay),
- MYSQL_SYSVAR(mirrored_log_groups),
- MYSQL_SYSVAR(old_blocks_pct),
- MYSQL_SYSVAR(old_blocks_time),
- MYSQL_SYSVAR(open_files),
- MYSQL_SYSVAR(optimize_fulltext_only),
- MYSQL_SYSVAR(rollback_on_timeout),
- MYSQL_SYSVAR(ft_aux_table),
- MYSQL_SYSVAR(ft_enable_diag_print),
- MYSQL_SYSVAR(ft_server_stopword_table),
- MYSQL_SYSVAR(ft_user_stopword_table),
- MYSQL_SYSVAR(disable_sort_file_cache),
- MYSQL_SYSVAR(stats_on_metadata),
- MYSQL_SYSVAR(stats_sample_pages),
- MYSQL_SYSVAR(stats_transient_sample_pages),
- MYSQL_SYSVAR(stats_persistent),
- MYSQL_SYSVAR(stats_persistent_sample_pages),
- MYSQL_SYSVAR(stats_auto_recalc),
- MYSQL_SYSVAR(stats_modified_counter),
- MYSQL_SYSVAR(stats_traditional),
- MYSQL_SYSVAR(adaptive_hash_index),
- MYSQL_SYSVAR(adaptive_hash_index_partitions),
- MYSQL_SYSVAR(stats_method),
- MYSQL_SYSVAR(replication_delay),
- MYSQL_SYSVAR(status_file),
- MYSQL_SYSVAR(strict_mode),
- MYSQL_SYSVAR(support_xa),
- MYSQL_SYSVAR(sort_buffer_size),
- MYSQL_SYSVAR(online_alter_log_max_size),
- MYSQL_SYSVAR(sync_spin_loops),
- MYSQL_SYSVAR(spin_wait_delay),
- MYSQL_SYSVAR(table_locks),
- MYSQL_SYSVAR(thread_concurrency),
-#ifdef HAVE_ATOMIC_BUILTINS
- MYSQL_SYSVAR(adaptive_max_sleep_delay),
-#endif /* HAVE_ATOMIC_BUILTINS */
- MYSQL_SYSVAR(prefix_index_cluster_optimization),
- MYSQL_SYSVAR(thread_sleep_delay),
- MYSQL_SYSVAR(autoinc_lock_mode),
- MYSQL_SYSVAR(show_verbose_locks),
- MYSQL_SYSVAR(show_locks_held),
- MYSQL_SYSVAR(version),
- MYSQL_SYSVAR(use_sys_malloc),
- MYSQL_SYSVAR(use_native_aio),
-#ifdef HAVE_LIBNUMA
- MYSQL_SYSVAR(numa_interleave),
-#endif // HAVE_LIBNUMA
- MYSQL_SYSVAR(change_buffering),
- MYSQL_SYSVAR(change_buffer_max_size),
- MYSQL_SYSVAR(track_changed_pages),
- MYSQL_SYSVAR(max_bitmap_file_size),
- MYSQL_SYSVAR(max_changed_pages),
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- MYSQL_SYSVAR(change_buffering_debug),
- MYSQL_SYSVAR(disable_background_merge),
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-#ifdef WITH_INNODB_DISALLOW_WRITES
- MYSQL_SYSVAR(disallow_writes),
-#endif /* WITH_INNODB_DISALLOW_WRITES */
- MYSQL_SYSVAR(random_read_ahead),
- MYSQL_SYSVAR(read_ahead_threshold),
- MYSQL_SYSVAR(read_only),
- MYSQL_SYSVAR(io_capacity),
- MYSQL_SYSVAR(io_capacity_max),
- MYSQL_SYSVAR(idle_flush_pct),
- MYSQL_SYSVAR(monitor_enable),
- MYSQL_SYSVAR(monitor_disable),
- MYSQL_SYSVAR(monitor_reset),
- MYSQL_SYSVAR(monitor_reset_all),
- MYSQL_SYSVAR(purge_threads),
- MYSQL_SYSVAR(purge_batch_size),
-#ifdef UNIV_DEBUG
- MYSQL_SYSVAR(purge_run_now),
- MYSQL_SYSVAR(purge_stop_now),
- MYSQL_SYSVAR(log_checkpoint_now),
- MYSQL_SYSVAR(buf_flush_list_now),
- MYSQL_SYSVAR(track_redo_log_now),
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_LINUX
- MYSQL_SYSVAR(sched_priority_cleaner),
-#endif
-#if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG
- MYSQL_SYSVAR(page_hash_locks),
- MYSQL_SYSVAR(doublewrite_batch_size),
-#ifdef UNIV_LINUX
- MYSQL_SYSVAR(sched_priority_purge),
- MYSQL_SYSVAR(sched_priority_io),
- MYSQL_SYSVAR(sched_priority_master),
- MYSQL_SYSVAR(priority_purge),
- MYSQL_SYSVAR(priority_io),
- MYSQL_SYSVAR(priority_cleaner),
- MYSQL_SYSVAR(priority_master),
-#endif /* UNIV_LINUX */
- MYSQL_SYSVAR(cleaner_max_lru_time),
- MYSQL_SYSVAR(cleaner_max_flush_time),
- MYSQL_SYSVAR(cleaner_flush_chunk_size),
- MYSQL_SYSVAR(cleaner_lru_chunk_size),
- MYSQL_SYSVAR(cleaner_free_list_lwm),
- MYSQL_SYSVAR(cleaner_eviction_factor),
-#endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
- MYSQL_SYSVAR(status_output),
- MYSQL_SYSVAR(status_output_locks),
- MYSQL_SYSVAR(cleaner_lsn_age_factor),
- MYSQL_SYSVAR(foreground_preflush),
- MYSQL_SYSVAR(empty_free_list_algorithm),
- MYSQL_SYSVAR(print_all_deadlocks),
- MYSQL_SYSVAR(cmp_per_index_enabled),
- MYSQL_SYSVAR(undo_logs),
- MYSQL_SYSVAR(rollback_segments),
- MYSQL_SYSVAR(undo_directory),
- MYSQL_SYSVAR(undo_tablespaces),
- MYSQL_SYSVAR(sync_array_size),
- MYSQL_SYSVAR(compression_failure_threshold_pct),
- MYSQL_SYSVAR(compression_pad_pct_max),
-#ifdef UNIV_DEBUG
- MYSQL_SYSVAR(trx_rseg_n_slots_debug),
- MYSQL_SYSVAR(limit_optimistic_insert_debug),
- MYSQL_SYSVAR(trx_purge_view_update_only_debug),
- MYSQL_SYSVAR(data_file_size_debug),
- MYSQL_SYSVAR(fil_make_page_dirty_debug),
- MYSQL_SYSVAR(saved_page_number_debug),
-#endif /* UNIV_DEBUG */
- MYSQL_SYSVAR(simulate_comp_failures),
- MYSQL_SYSVAR(corrupt_table_action),
- MYSQL_SYSVAR(fake_changes),
- MYSQL_SYSVAR(locking_fake_changes),
- MYSQL_SYSVAR(tmpdir),
- MYSQL_SYSVAR(use_stacktrace),
- MYSQL_SYSVAR(force_primary_key),
- MYSQL_SYSVAR(fatal_semaphore_wait_threshold),
- /* Table page compression feature */
- MYSQL_SYSVAR(use_trim),
- MYSQL_SYSVAR(compression_algorithm),
- MYSQL_SYSVAR(mtflush_threads),
- MYSQL_SYSVAR(use_mtflush),
- /* Encryption feature */
- MYSQL_SYSVAR(encrypt_tables),
- MYSQL_SYSVAR(encryption_threads),
- MYSQL_SYSVAR(encryption_rotate_key_age),
- MYSQL_SYSVAR(encryption_rotation_iops),
- MYSQL_SYSVAR(scrub_log),
- MYSQL_SYSVAR(scrub_log_speed),
- MYSQL_SYSVAR(encrypt_log),
- MYSQL_SYSVAR(default_encryption_key_id),
- /* Scrubing feature */
- MYSQL_SYSVAR(immediate_scrub_data_uncompressed),
- MYSQL_SYSVAR(background_scrub_data_uncompressed),
- MYSQL_SYSVAR(background_scrub_data_compressed),
- MYSQL_SYSVAR(background_scrub_data_interval),
- MYSQL_SYSVAR(background_scrub_data_check_interval),
-#ifdef UNIV_DEBUG
- MYSQL_SYSVAR(debug_force_scrubbing),
-#endif
- MYSQL_SYSVAR(instrument_semaphores),
- MYSQL_SYSVAR(buf_dump_status_frequency),
- NULL
-};
-
-maria_declare_plugin(xtradb)
-{ /* InnoDB */
- MYSQL_STORAGE_ENGINE_PLUGIN,
- &innobase_storage_engine,
- innobase_hton_name,
- plugin_author,
- "Percona-XtraDB, Supports transactions, row-level locking, foreign keys and encryption for tables",
- PLUGIN_LICENSE_GPL,
- innobase_init, /* Plugin Init */
- NULL, /* Plugin Deinit */
- INNODB_VERSION_SHORT,
- innodb_status_variables_export,/* status variables */
- innobase_system_variables, /* system variables */
- INNODB_VERSION_STR, /* string version */
- MariaDB_PLUGIN_MATURITY_STABLE /* maturity */
-},
-i_s_xtradb_read_view,
-i_s_xtradb_internal_hash_tables,
-i_s_xtradb_rseg,
-i_s_innodb_trx,
-i_s_innodb_locks,
-i_s_innodb_lock_waits,
-i_s_innodb_cmp,
-i_s_innodb_cmp_reset,
-i_s_innodb_cmpmem,
-i_s_innodb_cmpmem_reset,
-i_s_innodb_cmp_per_index,
-i_s_innodb_cmp_per_index_reset,
-i_s_innodb_buffer_page,
-i_s_innodb_buffer_page_lru,
-i_s_innodb_buffer_stats,
-i_s_innodb_metrics,
-i_s_innodb_ft_default_stopword,
-i_s_innodb_ft_deleted,
-i_s_innodb_ft_being_deleted,
-i_s_innodb_ft_config,
-i_s_innodb_ft_index_cache,
-i_s_innodb_ft_index_table,
-i_s_innodb_sys_tables,
-i_s_innodb_sys_tablestats,
-i_s_innodb_sys_indexes,
-i_s_innodb_sys_columns,
-i_s_innodb_sys_fields,
-i_s_innodb_sys_foreign,
-i_s_innodb_sys_foreign_cols,
-i_s_innodb_sys_tablespaces,
-i_s_innodb_sys_datafiles,
-i_s_innodb_changed_pages,
-i_s_innodb_mutexes,
-i_s_innodb_sys_semaphore_waits,
-i_s_innodb_tablespaces_encryption,
-i_s_innodb_tablespaces_scrubbing,
-i_s_innodb_changed_page_bitmaps
-maria_declare_plugin_end;
-
-/** @brief Initialize the default value of innodb_commit_concurrency.
-
-Once InnoDB is running, the innodb_commit_concurrency must not change
-from zero to nonzero. (Bug #42101)
-
-The initial default value is 0, and without this extra initialization,
-SET GLOBAL innodb_commit_concurrency=DEFAULT would set the parameter
-to 0, even if it was initially set to nonzero at the command line
-or configuration file. */
-static
-void
-innobase_commit_concurrency_init_default()
-/*======================================*/
-{
- MYSQL_SYSVAR_NAME(commit_concurrency).def_val
- = innobase_commit_concurrency;
-}
-
-/** @brief Initialize the default and max value of innodb_undo_logs.
-
-Once InnoDB is running, the default value and the max value of
-innodb_undo_logs must be equal to the available undo logs,
-given by srv_available_undo_logs. */
-static
-void
-innobase_undo_logs_init_default_max()
-/*=================================*/
-{
- MYSQL_SYSVAR_NAME(undo_logs).max_val
- = MYSQL_SYSVAR_NAME(undo_logs).def_val
- = static_cast<unsigned long>(srv_available_undo_logs);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-struct innobase_convert_name_test_t {
- char* buf;
- ulint buflen;
- const char* id;
- ulint idlen;
- void* thd;
- ibool file_id;
-
- const char* expected;
-};
-
-void
-test_innobase_convert_name()
-{
- char buf[1024];
- ulint i;
-
- innobase_convert_name_test_t test_input[] = {
- {buf, sizeof(buf), "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 7, "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 6, "abcd", 4, NULL, TRUE, "\"abcd\""},
- {buf, 5, "abcd", 4, NULL, TRUE, "\"abc\""},
- {buf, 4, "abcd", 4, NULL, TRUE, "\"ab\""},
-
- {buf, sizeof(buf), "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 9, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 8, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 7, "ab@0060cd", 9, NULL, TRUE, "\"ab`cd\""},
- {buf, 6, "ab@0060cd", 9, NULL, TRUE, "\"ab`c\""},
- {buf, 5, "ab@0060cd", 9, NULL, TRUE, "\"ab`\""},
- {buf, 4, "ab@0060cd", 9, NULL, TRUE, "\"ab\""},
-
- {buf, sizeof(buf), "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"cd\""},
- {buf, 17, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"cd\""},
- {buf, 16, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"c\""},
- {buf, 15, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\"\"\""},
- {buf, 14, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\""},
- {buf, 13, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#ab\""},
- {buf, 12, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#a\""},
- {buf, 11, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50#\""},
- {buf, 10, "ab\"cd", 5, NULL, TRUE,
- "\"#mysql50\""},
-
- {buf, sizeof(buf), "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
- {buf, 9, "ab/cd", 5, NULL, TRUE, "\"ab\".\"cd\""},
- {buf, 8, "ab/cd", 5, NULL, TRUE, "\"ab\".\"c\""},
- {buf, 7, "ab/cd", 5, NULL, TRUE, "\"ab\".\"\""},
- {buf, 6, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
- {buf, 5, "ab/cd", 5, NULL, TRUE, "\"ab\"."},
- {buf, 4, "ab/cd", 5, NULL, TRUE, "\"ab\""},
- {buf, 3, "ab/cd", 5, NULL, TRUE, "\"a\""},
- {buf, 2, "ab/cd", 5, NULL, TRUE, "\"\""},
- /* XXX probably "" is a better result in this case
- {buf, 1, "ab/cd", 5, NULL, TRUE, "."},
- */
- {buf, 0, "ab/cd", 5, NULL, TRUE, ""},
- };
-
- for (i = 0; i < sizeof(test_input) / sizeof(test_input[0]); i++) {
-
- char* end;
- ibool ok = TRUE;
- size_t res_len;
-
- fprintf(stderr, "TESTING %lu, %s, %lu, %s\n",
- test_input[i].buflen,
- test_input[i].id,
- test_input[i].idlen,
- test_input[i].expected);
-
- end = innobase_convert_name(
- test_input[i].buf,
- test_input[i].buflen,
- test_input[i].id,
- test_input[i].idlen,
- test_input[i].thd,
- test_input[i].file_id);
-
- res_len = (size_t) (end - test_input[i].buf);
-
- if (res_len != strlen(test_input[i].expected)) {
-
- fprintf(stderr, "unexpected len of the result: %u, "
- "expected: %u\n", (unsigned) res_len,
- (unsigned) strlen(test_input[i].expected));
- ok = FALSE;
- }
-
- if (memcmp(test_input[i].buf,
- test_input[i].expected,
- strlen(test_input[i].expected)) != 0
- || !ok) {
-
- fprintf(stderr, "unexpected result: %.*s, "
- "expected: %s\n", (int) res_len,
- test_input[i].buf,
- test_input[i].expected);
- ok = FALSE;
- }
-
- if (ok) {
- fprintf(stderr, "OK: res: %.*s\n\n", (int) res_len,
- buf);
- } else {
- fprintf(stderr, "FAILED\n\n");
- return;
- }
- }
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-
-/**
- * Index Condition Pushdown interface implementation
- */
-
-/*************************************************************//**
-InnoDB index push-down condition check
-@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
-UNIV_INTERN
-enum icp_result
-innobase_index_cond(
-/*================*/
- void* file) /*!< in/out: pointer to ha_innobase */
-{
- return handler_index_cond_check(file);
-}
-
-/** Attempt to push down an index condition.
-* @param[in] keyno MySQL key number
-* @param[in] idx_cond Index condition to be checked
-* @return Part of idx_cond which the handler will not evaluate
-*/
-UNIV_INTERN
-class Item*
-ha_innobase::idx_cond_push(
- uint keyno,
- class Item* idx_cond)
-{
- DBUG_ENTER("ha_innobase::idx_cond_push");
- DBUG_ASSERT(keyno != MAX_KEY);
- DBUG_ASSERT(idx_cond != NULL);
-
- pushed_idx_cond = idx_cond;
- pushed_idx_cond_keyno = keyno;
- in_range_check_pushed_down = TRUE;
- /* We will evaluate the condition entirely */
- DBUG_RETURN(NULL);
-}
-
-/******************************************************************//**
-Use this when the args are passed to the format string from
-errmsg-utf8.txt directly as is.
-
-Push a warning message to the client, it is a wrapper around:
-
-void push_warning_printf(
- THD *thd, Sql_condition::enum_warning_level level,
- uint code, const char *format, ...);
-*/
-UNIV_INTERN
-void
-ib_senderrf(
-/*========*/
- THD* thd, /*!< in/out: session */
- ib_log_level_t level, /*!< in: warning level */
- ib_uint32_t code, /*!< MySQL error code */
- ...) /*!< Args */
-{
- va_list args;
- const char* format = innobase_get_err_msg(code);
-
- /* If the caller wants to push a message to the client then
- the caller must pass a valid session handle. */
-
- ut_a(thd != 0);
-
- /* The error code must exist in the errmsg-utf8.txt file. */
- ut_a(format != 0);
-
- va_start(args, code);
-
- myf l=0;
-
- switch(level) {
- case IB_LOG_LEVEL_INFO:
- l = ME_JUST_INFO;
- break;
- case IB_LOG_LEVEL_WARN:
- l = ME_JUST_WARNING;
- break;
- case IB_LOG_LEVEL_ERROR:
- case IB_LOG_LEVEL_FATAL:
- l = 0;
- break;
- default:
- l = 0;
- break;
- }
-
- my_printv_error(code, format, MYF(l), args);
-
- va_end(args);
-
- if (level == IB_LOG_LEVEL_FATAL) {
- ut_error;
- }
-}
-
-/******************************************************************//**
-Use this when the args are first converted to a formatted string and then
-passed to the format string from errmsg-utf8.txt. The error message format
-must be: "Some string ... %s".
-
-Push a warning message to the client, it is a wrapper around:
-
-void push_warning_printf(
- THD *thd, Sql_condition::enum_warning_level level,
- uint code, const char *format, ...);
-*/
-UNIV_INTERN
-void
-ib_errf(
-/*====*/
- THD* thd, /*!< in/out: session */
- ib_log_level_t level, /*!< in: warning level */
- ib_uint32_t code, /*!< MySQL error code */
- const char* format, /*!< printf format */
- ...) /*!< Args */
-{
- char* str;
- va_list args;
-
- /* If the caller wants to push a message to the client then
- the caller must pass a valid session handle. */
-
- ut_a(thd != 0);
- ut_a(format != 0);
-
- va_start(args, format);
-
-#ifdef __WIN__
- int size = _vscprintf(format, args) + 1;
- str = static_cast<char*>(malloc(size));
- str[size - 1] = 0x0;
- vsnprintf(str, size, format, args);
-#elif HAVE_VASPRINTF
- int ret;
- ret = vasprintf(&str, format, args);
- ut_a(ret != -1);
-#else
- /* Use a fixed length string. */
- str = static_cast<char*>(malloc(BUFSIZ));
- my_vsnprintf(str, BUFSIZ, format, args);
-#endif /* __WIN__ */
-
- ib_senderrf(thd, level, code, str);
-
- va_end(args);
- free(str);
-}
-
-/******************************************************************//**
-Write a message to the MySQL log, prefixed with "InnoDB: " */
-UNIV_INTERN
-void
-ib_logf(
-/*====*/
- ib_log_level_t level, /*!< in: warning level */
- const char* format, /*!< printf format */
- ...) /*!< Args */
-{
- char* str;
- va_list args;
-
- va_start(args, format);
-
-#ifdef __WIN__
- int size = _vscprintf(format, args) + 1;
- str = static_cast<char*>(malloc(size));
- str[size - 1] = 0x0;
- vsnprintf(str, size, format, args);
-#elif HAVE_VASPRINTF
- int ret;
- ret = vasprintf(&str, format, args);
- ut_a(ret != -1);
-#else
- /* Use a fixed length string. */
- str = static_cast<char*>(malloc(BUFSIZ));
- my_vsnprintf(str, BUFSIZ, format, args);
-#endif /* __WIN__ */
- if (!IS_XTRABACKUP()) {
- switch (level) {
- case IB_LOG_LEVEL_INFO:
- sql_print_information("InnoDB: %s", str);
- break;
- case IB_LOG_LEVEL_WARN:
- sql_print_warning("InnoDB: %s", str);
- break;
- case IB_LOG_LEVEL_ERROR:
- sql_print_error("InnoDB: %s", str);
- sd_notifyf(0, "STATUS=InnoDB: Error: %s", str);
- break;
- case IB_LOG_LEVEL_FATAL:
- sql_print_error("InnoDB: %s", str);
- sd_notifyf(0, "STATUS=InnoDB: Fatal: %s", str);
- break;
- }
- }
- else {
- /* Don't use server logger for XtraBackup, just print to stderr. */
- fprintf(stderr, "InnoDB: %s\n", str);
- }
-
- va_end(args);
- free(str);
-
- if (level == IB_LOG_LEVEL_FATAL) {
- ut_error;
- }
-}
-
-/**********************************************************************
-Converts an identifier from my_charset_filename to UTF-8 charset.
-@return result string length, as returned by strconvert() */
-uint
-innobase_convert_to_filename_charset(
-/*=================================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len) /* in: length of 'to', in bytes */
-{
- uint errors;
- CHARSET_INFO* cs_to = &my_charset_filename;
- CHARSET_INFO* cs_from = system_charset_info;
-
- return(strconvert(cs_from, from, strlen(from), cs_to, to,
- static_cast<uint>(len), &errors));
-}
-
-/**********************************************************************
-Converts an identifier from my_charset_filename to UTF-8 charset.
-@return result string length, as returned by strconvert() */
-uint
-innobase_convert_to_system_charset(
-/*===============================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len, /* in: length of 'to', in bytes */
- uint* errors) /* out: error return */
-{
- CHARSET_INFO* cs1 = &my_charset_filename;
- CHARSET_INFO* cs2 = system_charset_info;
-
- return(strconvert(cs1, from, strlen(from), cs2, to,
- static_cast<uint>(len), errors));
-}
-
-
-/****************************************************************************
- * DS-MRR implementation
- ***************************************************************************/
-
-/**
- * Multi Range Read interface, DS-MRR calls
- */
-
-int ha_innobase::multi_range_read_init(RANGE_SEQ_IF *seq, void *seq_init_param,
- uint n_ranges, uint mode,
- HANDLER_BUFFER *buf)
-{
- return ds_mrr.dsmrr_init(this, seq, seq_init_param, n_ranges, mode, buf);
-}
-
-int ha_innobase::multi_range_read_next(range_id_t *range_info)
-{
- return ds_mrr.dsmrr_next(range_info);
-}
-
-ha_rows ha_innobase::multi_range_read_info_const(uint keyno, RANGE_SEQ_IF *seq,
- void *seq_init_param,
- uint n_ranges, uint *bufsz,
- uint *flags,
- Cost_estimate *cost)
-{
- /* See comments in ha_myisam::multi_range_read_info_const */
- ds_mrr.init(this, table);
-
- if (prebuilt->select_lock_type != LOCK_NONE)
- *flags |= HA_MRR_USE_DEFAULT_IMPL;
-
- ha_rows res= ds_mrr.dsmrr_info_const(keyno, seq, seq_init_param, n_ranges,
- bufsz, flags, cost);
- return res;
-}
-
-ha_rows ha_innobase::multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint key_parts, uint *bufsz,
- uint *flags, Cost_estimate *cost)
-{
- ds_mrr.init(this, table);
- ha_rows res= ds_mrr.dsmrr_info(keyno, n_ranges, keys, key_parts, bufsz,
- flags, cost);
- return res;
-}
-
-int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t size)
-{
- return ds_mrr.dsmrr_explain_info(mrr_mode, str, size);
-}
-
-/*
- A helper function used only in index_cond_func_innodb
-*/
-
-bool ha_innobase::is_thd_killed()
-{
- return thd_kill_level(user_thd);
-}
-
-/**********************************************************************
-Issue a warning that the row is too big. */
-UNIV_INTERN
-void
-ib_warn_row_too_big(const dict_table_t* table)
-{
- /* If prefix is true then a 768-byte prefix is stored
- locally for BLOB fields. Refer to dict_table_get_format() */
- const bool prefix = (dict_tf_get_format(table->flags)
- == UNIV_FORMAT_A);
-
- const ulint free_space = page_get_free_space_of_empty(
- table->flags & DICT_TF_COMPACT) / 2;
-
- THD* thd = current_thd;
-
- if (thd == NULL) {
- return;
- }
-
- push_warning_printf(
- thd, Sql_condition::WARN_LEVEL_WARN, HA_ERR_TO_BIG_ROW,
- "Row size too large (> %lu). Changing some columns to TEXT"
- " or BLOB %smay help. In current row format, BLOB prefix of"
- " %d bytes is stored inline.", free_space
- , prefix ? "or using ROW_FORMAT=DYNAMIC or"
- " ROW_FORMAT=COMPRESSED ": ""
- , prefix ? DICT_MAX_FIXED_COL_LEN : 0);
-}
-
-/*************************************************************//**
-Check for a valid value of innobase_compression_algorithm.
-@return 0 for valid innodb_compression_algorithm. */
-static
-int
-innodb_compression_algorithm_validate(
-/*==================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- long compression_algorithm;
- DBUG_ENTER("innobase_compression_algorithm_validate");
-
- if (check_sysvar_enum(thd, var, save, value)) {
- DBUG_RETURN(1);
- }
-
- compression_algorithm = *reinterpret_cast<ulong*>(save);
- (void)compression_algorithm;
-
-#ifndef HAVE_LZ4
- if (compression_algorithm == PAGE_LZ4_ALGORITHM) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: liblz4 is not installed. \n",
- compression_algorithm);
- DBUG_RETURN(1);
- }
-#endif
-
-#ifndef HAVE_LZO
- if (compression_algorithm == PAGE_LZO_ALGORITHM) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: liblzo is not installed. \n",
- compression_algorithm);
- DBUG_RETURN(1);
- }
-#endif
-
-#ifndef HAVE_LZMA
- if (compression_algorithm == PAGE_LZMA_ALGORITHM) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: liblzma is not installed. \n",
- compression_algorithm);
- DBUG_RETURN(1);
- }
-#endif
-
-#ifndef HAVE_BZIP2
- if (compression_algorithm == PAGE_BZIP2_ALGORITHM) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: libbz2 is not installed. \n",
- compression_algorithm);
- DBUG_RETURN(1);
- }
-#endif
-
-#ifndef HAVE_SNAPPY
- if (compression_algorithm == PAGE_SNAPPY_ALGORITHM) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: innodb_compression_algorithm = %lu unsupported.\n"
- "InnoDB: libsnappy is not installed. \n",
- compression_algorithm);
- DBUG_RETURN(1);
- }
-#endif
- DBUG_RETURN(0);
-}
-
-static
-int
-innodb_encrypt_tables_validate(
-/*=================================*/
- THD* thd, /*!< in: thread handle */
- struct st_mysql_sys_var* var, /*!< in: pointer to system
- variable */
- void* save, /*!< out: immediate result
- for update function */
- struct st_mysql_value* value) /*!< in: incoming string */
-{
- if (check_sysvar_enum(thd, var, save, value)) {
- return 1;
- }
-
- ulong encrypt_tables = *(ulong*)save;
-
- if (encrypt_tables
- && !encryption_key_id_exists(FIL_DEFAULT_ENCRYPTION_KEY)) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: cannot enable encryption, "
- "encryption plugin is not available");
- return 1;
- }
-
- if (!srv_fil_crypt_rotate_key_age) {
- const char *msg = (encrypt_tables ? "enable" : "disable");
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_UNSUPPORTED,
- "InnoDB: cannot %s encryption, "
- "innodb_encryption_rotate_key_age=0"
- " i.e. key rotation disabled", msg);
- return 1;
- }
-
- return 0;
-}
-
-static void innodb_remember_check_sysvar_funcs()
-{
- /* remember build-in sysvar check functions */
- ut_ad((MYSQL_SYSVAR_NAME(checksum_algorithm).flags & 0x1FF) == PLUGIN_VAR_ENUM);
- check_sysvar_enum = MYSQL_SYSVAR_NAME(checksum_algorithm).check;
-}
-
-/********************************************************************//**
-Helper function to push warnings from InnoDB internals to SQL-layer. */
-UNIV_INTERN
-void
-ib_push_warning(
- trx_t* trx, /*!< in: trx */
- ulint error, /*!< in: error code to push as warning */
- const char *format,/*!< in: warning message */
- ...)
-{
- if (trx && trx->mysql_thd) {
- THD *thd = (THD *)trx->mysql_thd;
- va_list args;
- char *buf;
-#define MAX_BUF_SIZE 4*1024
-
- va_start(args, format);
- buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
- vsprintf(buf,format, args);
-
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- convert_error_code_to_mysql((dberr_t)error, 0, thd),
- buf);
- my_free(buf);
- va_end(args);
- }
-}
-
-/********************************************************************//**
-Helper function to push warnings from InnoDB internals to SQL-layer. */
-UNIV_INTERN
-void
-ib_push_warning(
- void* ithd, /*!< in: thd */
- ulint error, /*!< in: error code to push as warning */
- const char *format,/*!< in: warning message */
- ...)
-{
- va_list args;
- THD *thd = (THD *)ithd;
- char *buf;
-#define MAX_BUF_SIZE 4*1024
-
- if (ithd == NULL) {
- thd = current_thd;
- }
-
- if (thd) {
- va_start(args, format);
- buf = (char *)my_malloc(MAX_BUF_SIZE, MYF(MY_WME));
- vsprintf(buf,format, args);
-
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- convert_error_code_to_mysql((dberr_t)error, 0, thd),
- buf);
- my_free(buf);
- va_end(args);
- }
-}
-
-/********************************************************************//**
-Helper function to push frm mismatch error to error log and
-if needed to sql-layer. */
-UNIV_INTERN
-void
-ib_push_frm_error(
-/*==============*/
- THD* thd, /*!< in: MySQL thd */
- dict_table_t* ib_table, /*!< in: InnoDB table */
- TABLE* table, /*!< in: MySQL table */
- ulint n_keys, /*!< in: InnoDB #keys */
- bool push_warning) /*!< in: print warning ? */
-{
- switch (ib_table->dict_frm_mismatch) {
- case DICT_FRM_NO_PK:
- sql_print_error("Table %s has a primary key in "
- "InnoDB data dictionary, but not "
- "in MySQL!"
- " Have you mixed up "
- ".frm files from different "
- "installations? See "
- REFMAN
- "innodb-troubleshooting.html\n",
- ib_table->name);
-
- if (push_warning) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NO_SUCH_INDEX,
- "InnoDB: Table %s has a "
- "primary key in InnoDB data "
- "dictionary, but not in "
- "MySQL!", ib_table->name);
- }
- break;
- case DICT_NO_PK_FRM_HAS:
- sql_print_error(
- "Table %s has no primary key in InnoDB data "
- "dictionary, but has one in MySQL! If you "
- "created the table with a MySQL version < "
- "3.23.54 and did not define a primary key, "
- "but defined a unique key with all non-NULL "
- "columns, then MySQL internally treats that "
- "key as the primary key. You can fix this "
- "error by dump + DROP + CREATE + reimport "
- "of the table.", ib_table->name);
-
- if (push_warning) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NO_SUCH_INDEX,
- "InnoDB: Table %s has no "
- "primary key in InnoDB data "
- "dictionary, but has one in "
- "MySQL!",
- ib_table->name);
- }
- break;
-
- case DICT_FRM_INCONSISTENT_KEYS:
- sql_print_error("InnoDB: Table %s contains %lu "
- "indexes inside InnoDB, which "
- "is different from the number of "
- "indexes %u defined in the MySQL "
- " Have you mixed up "
- ".frm files from different "
- "installations? See "
- REFMAN
- "innodb-troubleshooting.html\n",
- ib_table->name, n_keys,
- table->s->keys);
-
- if (push_warning) {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_NO_SUCH_INDEX,
- "InnoDB: Table %s contains %lu "
- "indexes inside InnoDB, which "
- "is different from the number of "
- "indexes %u defined in the MySQL ",
- ib_table->name, n_keys,
- table->s->keys);
- }
- break;
-
- case DICT_FRM_CONSISTENT:
- default:
- sql_print_error("InnoDB: Table %s is consistent "
- "on InnoDB data dictionary and MySQL "
- " FRM file.",
- ib_table->name);
- ut_error;
- break;
- }
-}
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
deleted file mode 100644
index 3bb67532954..00000000000
--- a/storage/xtradb/handler/ha_innodb.h
+++ /dev/null
@@ -1,746 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*
- This file is based on ha_berkeley.h of MySQL distribution
-
- This file defines the Innodb handler: the interface between MySQL and
- Innodb
-*/
-
-#include "dict0stats.h"
-
-/* Structure defines translation table between mysql index and innodb
-index structures */
-struct innodb_idx_translate_t {
- ulint index_count; /*!< number of valid index entries
- in the index_mapping array */
- ulint array_size; /*!< array size of index_mapping */
- dict_index_t** index_mapping; /*!< index pointer array directly
- maps to index in Innodb from MySQL
- array index */
-};
-
-
-/** InnoDB table share */
-typedef struct st_innobase_share {
- THR_LOCK lock; /*!< MySQL lock protecting
- this structure */
- const char* table_name; /*!< InnoDB table name */
- uint use_count; /*!< reference count,
- incremented in get_share()
- and decremented in
- free_share() */
- void* table_name_hash;/*!< hash table chain node */
- innodb_idx_translate_t idx_trans_tbl; /*!< index translation
- table between MySQL and
- Innodb */
- dict_table_t* ib_table;
-} INNOBASE_SHARE;
-
-
-/** Prebuilt structures in an InnoDB table handle used within MySQL */
-struct row_prebuilt_t;
-
-/** Engine specific table options are defined using this struct */
-struct ha_table_option_struct
-{
- bool page_compressed; /*!< Table is using page compression
- if this option is true. */
- ulonglong page_compression_level; /*!< Table page compression level
- 0-9. */
- uint atomic_writes; /*!< Use atomic writes for this
- table if this options is ON or
- in DEFAULT if
- srv_use_atomic_writes=1.
- Atomic writes are not used if
- value OFF.*/
- uint encryption; /*!< DEFAULT, ON, OFF */
- ulonglong encryption_key_id; /*!< encryption key id */
-};
-
-/** The class defining a handle to an Innodb table */
-class ha_innobase: public handler
-{
- row_prebuilt_t* prebuilt; /*!< prebuilt struct in InnoDB, used
- to save CPU time with prebuilt data
- structures*/
- THD* user_thd; /*!< the thread handle of the user
- currently using the handle; this is
- set in external_lock function */
- THR_LOCK_DATA lock;
- INNOBASE_SHARE* share; /*!< information for MySQL
- table locking */
-
- uchar* upd_buf; /*!< buffer used in updates */
- ulint upd_buf_size; /*!< the size of upd_buf in bytes */
- Table_flags int_table_flags;
- uint primary_key;
- ulong start_of_scan; /*!< this is set to 1 when we are
- starting a table scan but have not
- yet fetched any row, else 0 */
- uint last_match_mode;/* match mode of the latest search:
- ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX,
- or undefined */
- uint num_write_row; /*!< number of write_row() calls */
-
- ha_statistics* ha_partition_stats; /*!< stats of the partition owner
- handler (if there is one) */
- uint store_key_val_for_row(uint keynr, char* buff, uint buff_len,
- const uchar* record);
- inline void update_thd(THD* thd);
- void update_thd();
- int change_active_index(uint keynr);
- int general_fetch(uchar* buf, uint direction, uint match_mode);
- dberr_t innobase_lock_autoinc();
- ulonglong innobase_peek_autoinc();
- dberr_t innobase_set_max_autoinc(ulonglong auto_inc);
- dberr_t innobase_reset_autoinc(ulonglong auto_inc);
- dberr_t innobase_get_autoinc(ulonglong* value);
- void innobase_initialize_autoinc();
- dict_index_t* innobase_get_index(uint keynr);
-
-#ifdef WITH_WSREP
- int wsrep_append_keys(THD *thd, bool shared,
- const uchar* record0, const uchar* record1);
-#endif
- /* Init values for the class: */
- public:
- ha_innobase(handlerton *hton, TABLE_SHARE *table_arg);
- ~ha_innobase();
- /*
- Get the row type from the storage engine. If this method returns
- ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used.
- */
- enum row_type get_row_type() const;
-
- const char* table_type() const;
- const char* index_type(uint key_number);
- Table_flags table_flags() const;
- ulong index_flags(uint idx, uint part, bool all_parts) const;
- uint max_supported_keys() const;
- uint max_supported_key_length() const;
- uint max_supported_key_part_length() const;
- const key_map* keys_to_use_for_scanning();
-
- int open(const char *name, int mode, uint test_if_locked);
- handler* clone(const char *name, MEM_ROOT *mem_root);
- int close(void);
- double scan_time();
- double read_time(uint index, uint ranges, ha_rows rows);
- longlong get_memory_buffer_size() const;
- my_bool is_fake_change_enabled(THD *thd);
-
- int write_row(uchar * buf);
- int update_row(const uchar * old_data, const uchar * new_data);
- int delete_row(const uchar * buf);
- bool was_semi_consistent_read();
- void try_semi_consistent_read(bool yes);
- void unlock_row();
-
- int index_init(uint index, bool sorted);
- int index_end();
- int index_read(uchar * buf, const uchar * key,
- uint key_len, enum ha_rkey_function find_flag);
- int index_read_idx(uchar * buf, uint index, const uchar * key,
- uint key_len, enum ha_rkey_function find_flag);
- int index_read_last(uchar * buf, const uchar * key, uint key_len);
- int index_next(uchar * buf);
- int index_next_same(uchar * buf, const uchar *key, uint keylen);
- int index_prev(uchar * buf);
- int index_first(uchar * buf);
- int index_last(uchar * buf);
-
- bool has_gap_locks() const { return true; }
-
- int rnd_init(bool scan);
- int rnd_end();
- int rnd_next(uchar *buf);
- int rnd_pos(uchar * buf, uchar *pos);
-
- int ft_init();
- void ft_end();
- FT_INFO *ft_init_ext(uint flags, uint inx, String* key);
- int ft_read(uchar* buf);
-
- void position(const uchar *record);
- int info(uint);
- int analyze(THD* thd,HA_CHECK_OPT* check_opt);
- int optimize(THD* thd,HA_CHECK_OPT* check_opt);
- int discard_or_import_tablespace(my_bool discard);
- int extra(enum ha_extra_function operation);
- int reset();
- int external_lock(THD *thd, int lock_type);
- int transactional_table_lock(THD *thd, int lock_type);
- int start_stmt(THD *thd, thr_lock_type lock_type);
- void position(uchar *record);
- ha_rows records_in_range(uint inx, key_range *min_key, key_range
- *max_key);
- ha_rows estimate_rows_upper_bound();
-
- void update_create_info(HA_CREATE_INFO* create_info);
- int parse_table_name(const char*name,
- HA_CREATE_INFO* create_info,
- ulint flags,
- ulint flags2,
- char* norm_name,
- char* temp_path,
- char* remote_path);
- const char* check_table_options(THD *thd, TABLE* table,
- HA_CREATE_INFO* create_info, const bool use_tablespace, const ulint file_format);
- int create(const char *name, register TABLE *form,
- HA_CREATE_INFO *create_info);
- int truncate();
- int delete_table(const char *name);
- int rename_table(const char* from, const char* to);
- int defragment_table(const char* name, const char* index_name,
- bool async);
- int check(THD* thd, HA_CHECK_OPT* check_opt);
- char* update_table_comment(const char* comment);
- char* get_foreign_key_create_info();
- int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list);
- int get_parent_foreign_key_list(THD *thd,
- List<FOREIGN_KEY_INFO> *f_key_list);
- bool can_switch_engines();
- uint referenced_by_foreign_key();
- void free_foreign_key_create_info(char* str);
- THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
- enum thr_lock_type lock_type);
- void init_table_handle_for_HANDLER();
- virtual void get_auto_increment(ulonglong offset, ulonglong increment,
- ulonglong nb_desired_values,
- ulonglong *first_value,
- ulonglong *nb_reserved_values);
- int reset_auto_increment(ulonglong value);
-
- virtual bool get_error_message(int error, String *buf);
- virtual bool get_foreign_dup_key(char*, uint, char*, uint);
- uint8 table_cache_type();
- /*
- ask handler about permission to cache table during query registration
- */
- my_bool register_query_cache_table(THD *thd, const char *table_key,
- uint key_length,
- qc_engine_callback *call_back,
- ulonglong *engine_data);
- static const char *get_mysql_bin_log_name();
- static ulonglong get_mysql_bin_log_pos();
- bool primary_key_is_clustered();
- int cmp_ref(const uchar *ref1, const uchar *ref2);
- /** On-line ALTER TABLE interface @see handler0alter.cc @{ */
-
- /** Check if InnoDB supports a particular alter table in-place
- @param altered_table TABLE object for new version of table.
- @param ha_alter_info Structure describing changes to be done
- by ALTER TABLE and holding data used during in-place alter.
-
- @retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
- @retval HA_ALTER_INPLACE_NO_LOCK Supported
- @retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE
- Supported, but requires lock
- during main phase and exclusive
- lock during prepare phase.
- @retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
- Supported, prepare phase
- requires exclusive lock.
- */
- enum_alter_inplace_result check_if_supported_inplace_alter(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info);
- /** Allows InnoDB to update internal structures with concurrent
- writes blocked (provided that check_if_supported_inplace_alter()
- did not return HA_ALTER_INPLACE_NO_LOCK).
- This will be invoked before inplace_alter_table().
-
- @param altered_table TABLE object for new version of table.
- @param ha_alter_info Structure describing changes to be done
- by ALTER TABLE and holding data used during in-place alter.
-
- @retval true Failure
- @retval false Success
- */
- bool prepare_inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info);
-
- /** Alter the table structure in-place with operations
- specified using HA_ALTER_FLAGS and Alter_inplace_information.
- The level of concurrency allowed during this operation depends
- on the return value from check_if_supported_inplace_alter().
-
- @param altered_table TABLE object for new version of table.
- @param ha_alter_info Structure describing changes to be done
- by ALTER TABLE and holding data used during in-place alter.
-
- @retval true Failure
- @retval false Success
- */
- bool inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info);
-
- /** Commit or rollback the changes made during
- prepare_inplace_alter_table() and inplace_alter_table() inside
- the storage engine. Note that the allowed level of concurrency
- during this operation will be the same as for
- inplace_alter_table() and thus might be higher than during
- prepare_inplace_alter_table(). (E.g concurrent writes were
- blocked during prepare, but might not be during commit).
- @param altered_table TABLE object for new version of table.
- @param ha_alter_info Structure describing changes to be done
- by ALTER TABLE and holding data used during in-place alter.
- @param commit true => Commit, false => Rollback.
- @retval true Failure
- @retval false Success
- */
- bool commit_inplace_alter_table(
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info,
- bool commit);
- /** @} */
- void set_partition_owner_stats(ha_statistics *stats);
- bool check_if_incompatible_data(HA_CREATE_INFO *info,
- uint table_changes);
-
- bool check_if_supported_virtual_columns(void) { return TRUE; }
-
-private:
- /** Builds a 'template' to the prebuilt struct.
-
- The template is used in fast retrieval of just those column
- values MySQL needs in its processing.
- @param whole_row true if access is needed to a whole row,
- false if accessing individual fields is enough */
- void build_template(bool whole_row);
- /** Resets a query execution 'template'.
- @see build_template() */
- inline void reset_template();
-
- int info_low(uint, bool);
-
-public:
- /** @name Multi Range Read interface @{ */
- /** Initialize multi range read @see DsMrr_impl::dsmrr_init
- * @param seq
- * @param seq_init_param
- * @param n_ranges
- * @param mode
- * @param buf
- */
- int multi_range_read_init(RANGE_SEQ_IF* seq,
- void* seq_init_param,
- uint n_ranges, uint mode,
- HANDLER_BUFFER* buf);
- /** Process next multi range read @see DsMrr_impl::dsmrr_next
- * @param range_info
- */
- int multi_range_read_next(range_id_t *range_info);
- /** Initialize multi range read and get information.
- * @see ha_myisam::multi_range_read_info_const
- * @see DsMrr_impl::dsmrr_info_const
- * @param keyno
- * @param seq
- * @param seq_init_param
- * @param n_ranges
- * @param bufsz
- * @param flags
- * @param cost
- */
- ha_rows multi_range_read_info_const(uint keyno, RANGE_SEQ_IF* seq,
- void* seq_init_param,
- uint n_ranges, uint* bufsz,
- uint* flags, Cost_estimate* cost);
- /** Initialize multi range read and get information.
- * @see DsMrr_impl::dsmrr_info
- * @param keyno
- * @param seq
- * @param seq_init_param
- * @param n_ranges
- * @param bufsz
- * @param flags
- * @param cost
- */
- ha_rows multi_range_read_info(uint keyno, uint n_ranges, uint keys,
- uint key_parts, uint* bufsz, uint* flags,
- Cost_estimate* cost);
- int multi_range_read_explain_info(uint mrr_mode,
- char *str, size_t size);
-
- /** Attempt to push down an index condition.
- * @param[in] keyno MySQL key number
- * @param[in] idx_cond Index condition to be checked
- * @return idx_cond if pushed; NULL if not pushed
- */
- class Item* idx_cond_push(uint keyno, class Item* idx_cond);
-
- /* An helper function for index_cond_func_innodb: */
- bool is_thd_killed();
-
-private:
- /** The multi range read session object */
- DsMrr_impl ds_mrr;
- /* @} */
-};
-
-/* Some accessor functions which the InnoDB plugin needs, but which
-can not be added to mysql/plugin.h as part of the public interface;
-the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */
-
-#ifndef INNODB_COMPATIBILITY_HOOKS
-#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS
-#endif
-
-extern "C" {
-
-struct charset_info_st *thd_charset(MYSQL_THD thd);
-LEX_STRING *thd_query_string(MYSQL_THD thd);
-
-/**
- Check if a user thread is a replication slave thread
- @param thd user thread
- @retval 0 the user thread is not a replication slave thread
- @retval 1 the user thread is a replication slave thread
-*/
-int thd_slave_thread(const MYSQL_THD thd);
-
-/**
- Check if a user thread is running a non-transactional update
- @param thd user thread
- @retval 0 the user thread is not running a non-transactional update
- @retval 1 the user thread is running a non-transactional update
-*/
-int thd_non_transactional_update(const MYSQL_THD thd);
-
-/**
- Get high resolution timestamp for the current query start time.
- The timestamp is not anchored to any specific point in time,
- but can be used for comparison.
-
- @retval timestamp in microseconds precision
-*/
-unsigned long long thd_start_utime(const MYSQL_THD thd);
-
-/**
- Get the user thread's binary logging format
- @param thd user thread
- @return Value to be used as index into the binlog_format_names array
-*/
-int thd_binlog_format(const MYSQL_THD thd);
-
-/**
- Mark transaction to rollback and mark error as fatal to a sub-statement.
- @param thd Thread handle
- @param all TRUE <=> rollback main transaction.
-*/
-void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all);
-
-/**
- Check if binary logging is filtered for thread's current db.
- @param thd Thread handle
- @retval 1 the query is not filtered, 0 otherwise.
-*/
-bool thd_binlog_filter_ok(const MYSQL_THD thd);
-
-/**
- Check if the query may generate row changes which
- may end up in the binary.
- @param thd Thread handle
- @return 1 the query may generate row changes, 0 otherwise.
-*/
-bool thd_sqlcom_can_generate_row_events(const MYSQL_THD thd);
-
-/**
- Gets information on the durability property requested by
- a thread.
- @param thd Thread handle
- @return a durability property.
-*/
-enum durability_properties thd_get_durability_property(const MYSQL_THD thd);
-
-/** Is strict sql_mode set.
-@param thd Thread object
-@return True if sql_mode has strict mode (all or trans), false otherwise.
-*/
-bool thd_is_strict_mode(const MYSQL_THD thd)
-MY_ATTRIBUTE((nonnull));
-} /* extern "C" */
-
-/** Get the file name and position of the MySQL binlog corresponding to the
- * current commit.
- */
-extern void mysql_bin_log_commit_pos(THD *thd, ulonglong *out_pos, const char **out_file);
-
-struct trx_t;
-#ifdef WITH_WSREP
-#include <wsrep_mysqld.h>
-//extern "C" int wsrep_trx_order_before(void *thd1, void *thd2);
-
-extern "C" bool wsrep_thd_is_wsrep_on(THD *thd);
-
-
-extern "C" void wsrep_thd_set_exec_mode(THD *thd, enum wsrep_exec_mode mode);
-extern "C" void wsrep_thd_set_query_state(
- THD *thd, enum wsrep_query_state state);
-
-extern "C" void wsrep_thd_set_trx_to_replay(THD *thd, uint64 trx_id);
-
-extern "C" uint32 wsrep_thd_wsrep_rand(THD *thd);
-extern "C" time_t wsrep_thd_query_start(THD *thd);
-extern "C" query_id_t wsrep_thd_query_id(THD *thd);
-extern "C" query_id_t wsrep_thd_wsrep_last_query_id(THD *thd);
-extern "C" void wsrep_thd_set_wsrep_last_query_id(THD *thd, query_id_t id);
-#endif
-
-extern const struct _ft_vft ft_vft_result;
-
-/* Structure Returned by ha_innobase::ft_init_ext() */
-typedef struct new_ft_info
-{
- struct _ft_vft *please;
- struct _ft_vft_ext *could_you;
- row_prebuilt_t* ft_prebuilt;
- fts_result_t* ft_result;
-} NEW_FT_INFO;
-
-/*********************************************************************//**
-Allocates an InnoDB transaction for a MySQL handler object.
-@return InnoDB transaction handle */
-trx_t*
-innobase_trx_allocate(
-/*==================*/
- MYSQL_THD thd); /*!< in: user thread handle */
-
-/*********************************************************************//**
-This function checks each index name for a table against reserved
-system default primary index name 'GEN_CLUST_INDEX'. If a name
-matches, this function pushes an warning message to the client,
-and returns true.
-@return true if the index name matches the reserved name */
-UNIV_INTERN
-bool
-innobase_index_name_is_reserved(
-/*============================*/
- THD* thd, /*!< in/out: MySQL connection */
- const KEY* key_info, /*!< in: Indexes to be created */
- ulint num_of_keys) /*!< in: Number of indexes to
- be created. */
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
-/*****************************************************************//**
-#ifdef WITH_WSREP
-extern "C" int wsrep_trx_is_aborting(void *thd_ptr);
-#endif
-Determines InnoDB table flags.
-@retval true if successful, false if error */
-UNIV_INTERN
-bool
-innobase_table_flags(
-/*=================*/
- const TABLE* form, /*!< in: table */
- const HA_CREATE_INFO* create_info, /*!< in: information
- on table columns and indexes */
- THD* thd, /*!< in: connection */
- bool use_tablespace, /*!< in: whether to create
- outside system tablespace */
- ulint* flags, /*!< out: DICT_TF flags */
- ulint* flags2) /*!< out: DICT_TF2 flags */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*****************************************************************//**
-Validates the create options. We may build on this function
-in future. For now, it checks two specifiers:
-KEY_BLOCK_SIZE and ROW_FORMAT
-If innodb_strict_mode is not set then this function is a no-op
-@return NULL if valid, string if not. */
-UNIV_INTERN
-const char*
-create_options_are_invalid(
-/*=======================*/
- THD* thd, /*!< in: connection thread. */
- TABLE* form, /*!< in: information on table
- columns and indexes */
- HA_CREATE_INFO* create_info, /*!< in: create info. */
- bool use_tablespace) /*!< in: srv_file_per_table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*********************************************************************//**
-Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
-@return the relevance ranking value */
-UNIV_INTERN
-float
-innobase_fts_retrieve_ranking(
-/*==========================*/
- FT_INFO* fts_hdl); /*!< in: FTS handler */
-
-/*********************************************************************//**
-Find and Retrieve the FTS Relevance Ranking result for doc with doc_id
-of prebuilt->fts_doc_id
-@return the relevance ranking value */
-UNIV_INTERN
-float
-innobase_fts_find_ranking(
-/*======================*/
- FT_INFO* fts_hdl, /*!< in: FTS handler */
- uchar* record, /*!< in: Unused */
- uint len); /*!< in: Unused */
-/*********************************************************************//**
-Free the memory for the FTS handler */
-UNIV_INTERN
-void
-innobase_fts_close_ranking(
-/*=======================*/
- FT_INFO* fts_hdl) /*!< in: FTS handler */
- MY_ATTRIBUTE((nonnull));
-/*****************************************************************//**
-Initialize the table FTS stopword list
-@return TRUE if success */
-UNIV_INTERN
-ibool
-innobase_fts_load_stopword(
-/*=======================*/
- dict_table_t* table, /*!< in: Table has the FTS */
- trx_t* trx, /*!< in: transaction */
- THD* thd) /*!< in: current thread */
- MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-
-/** Some defines for innobase_fts_check_doc_id_index() return value */
-enum fts_doc_id_index_enum {
- FTS_INCORRECT_DOC_ID_INDEX,
- FTS_EXIST_DOC_ID_INDEX,
- FTS_NOT_EXIST_DOC_ID_INDEX
-};
-
-/*******************************************************************//**
-Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
-on the Doc ID column.
-@return the status of the FTS_DOC_ID index */
-UNIV_INTERN
-enum fts_doc_id_index_enum
-innobase_fts_check_doc_id_index(
-/*============================*/
- const dict_table_t* table, /*!< in: table definition */
- const TABLE* altered_table, /*!< in: MySQL table
- that is being altered */
- ulint* fts_doc_col_no) /*!< out: The column number for
- Doc ID */
- MY_ATTRIBUTE((warn_unused_result));
-
-/*******************************************************************//**
-Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
-on the Doc ID column in MySQL create index definition.
-@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
-FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
-UNIV_INTERN
-enum fts_doc_id_index_enum
-innobase_fts_check_doc_id_index_in_def(
-/*===================================*/
- ulint n_key, /*!< in: Number of keys */
- const KEY* key_info) /*!< in: Key definitions */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/***********************************************************************
-@return version of the extended FTS API */
-uint
-innobase_fts_get_version();
-
-/***********************************************************************
-@return Which part of the extended FTS API is supported */
-ulonglong
-innobase_fts_flags();
-
-/***********************************************************************
-Find and Retrieve the FTS doc_id for the current result row
-@return the document ID */
-ulonglong
-innobase_fts_retrieve_docid(
-/*============================*/
- FT_INFO_EXT* fts_hdl); /*!< in: FTS handler */
-
-/***********************************************************************
-Find and retrieve the size of the current result
-@return number of matching rows */
-ulonglong
-innobase_fts_count_matches(
-/*============================*/
- FT_INFO_EXT* fts_hdl); /*!< in: FTS handler */
-
-/** "GEN_CLUST_INDEX" is the name reserved for InnoDB default
-system clustered index when there is no primary key. */
-extern const char innobase_index_reserve_name[];
-
-/*********************************************************************//**
-Copy table flags from MySQL's HA_CREATE_INFO into an InnoDB table object.
-Those flags are stored in .frm file and end up in the MySQL table object,
-but are frequently used inside InnoDB so we keep their copies into the
-InnoDB table object. */
-UNIV_INTERN
-void
-innobase_copy_frm_flags_from_create_info(
-/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- const HA_CREATE_INFO* create_info); /*!< in: create info */
-
-/*********************************************************************//**
-Copy table flags from MySQL's TABLE_SHARE into an InnoDB table object.
-Those flags are stored in .frm file and end up in the MySQL table object,
-but are frequently used inside InnoDB so we keep their copies into the
-InnoDB table object. */
-UNIV_INTERN
-void
-innobase_copy_frm_flags_from_table_share(
-/*=====================================*/
- dict_table_t* innodb_table, /*!< in/out: InnoDB table */
- const TABLE_SHARE* table_share); /*!< in: table share */
-
-/*******************************************************************//**
-This function builds a translation table in INNOBASE_SHARE
-structure for fast index location with mysql array number from its
-table->key_info structure. This also provides the necessary translation
-between the key order in mysql key_info and Innodb ib_table->indexes if
-they are not fully matched with each other.
-Note we do not have any mutex protecting the translation table
-building based on the assumption that there is no concurrent
-index creation/drop and DMLs that requires index lookup. All table
-handle will be closed before the index creation/drop.
-@return TRUE if index translation table built successfully */
-UNIV_INTERN
-ibool
-innobase_build_index_translation(
-/*=============================*/
- const TABLE* table, /*!< in: table in MySQL data
- dictionary */
- dict_table_t* ib_table, /*!< in: table in Innodb data
- dictionary */
- INNOBASE_SHARE* share); /*!< in/out: share structure
- where index translation table
- will be constructed in. */
-
-/********************************************************************//**
-Helper function to push frm mismatch error to error log and
-if needed to sql-layer. */
-UNIV_INTERN
-void
-ib_push_frm_error(
-/*==============*/
- THD* thd, /*!< in: MySQL thd */
- dict_table_t* ib_table, /*!< in: InnoDB table */
- TABLE* table, /*!< in: MySQL table */
- ulint n_keys, /*!< in: InnoDB #keys */
- bool push_warning); /*!< in: print warning ? */
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
deleted file mode 100644
index 0e7cc9a655b..00000000000
--- a/storage/xtradb/handler/handler0alter.cc
+++ /dev/null
@@ -1,6431 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file handler/handler0alter.cc
-Smart ALTER TABLE
-*******************************************************/
-
-#include <my_global.h>
-#include <unireg.h>
-#include <mysqld_error.h>
-#include <log.h>
-#include <debug_sync.h>
-#include <innodb_priv.h>
-#include <sql_alter.h>
-#include <sql_class.h>
-#include <sql_table.h>
-
-#include "dict0crea.h"
-#include "dict0dict.h"
-#include "dict0priv.h"
-#include "dict0stats.h"
-#include "dict0stats_bg.h"
-#include "log0log.h"
-#include "rem0types.h"
-#include "row0log.h"
-#include "row0merge.h"
-#include "srv0srv.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "ha_prototypes.h"
-#include "handler0alter.h"
-#include "srv0mon.h"
-#include "fts0priv.h"
-#include "pars0pars.h"
-#include "row0sel.h"
-#include "ha_innodb.h"
-#ifdef WITH_WSREP
-//#include "wsrep_api.h"
-#include <sql_acl.h> // PROCESS_ACL
-#endif
-
-/** Operations for creating secondary indexes (no rebuild needed) */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ONLINE_CREATE
- = Alter_inplace_info::ADD_INDEX
- | Alter_inplace_info::ADD_UNIQUE_INDEX;
-
-/** Operations for rebuilding a table in place */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_REBUILD
- = Alter_inplace_info::ADD_PK_INDEX
- | Alter_inplace_info::DROP_PK_INDEX
- | Alter_inplace_info::CHANGE_CREATE_OPTION
- /* CHANGE_CREATE_OPTION needs to check innobase_need_rebuild() */
- | Alter_inplace_info::ALTER_COLUMN_NULLABLE
- | Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE
- | Alter_inplace_info::ALTER_COLUMN_ORDER
- | Alter_inplace_info::DROP_COLUMN
- | Alter_inplace_info::ADD_COLUMN
- | Alter_inplace_info::RECREATE_TABLE
- /*
- | Alter_inplace_info::ALTER_COLUMN_TYPE
- | Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
- */
- ;
-
-/** Operations that require changes to data */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_DATA
- = INNOBASE_ONLINE_CREATE | INNOBASE_ALTER_REBUILD;
-
-/** Operations for altering a table that InnoDB does not care about */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_INPLACE_IGNORE
- = Alter_inplace_info::ALTER_COLUMN_DEFAULT
- | Alter_inplace_info::ALTER_PARTITIONED
- | Alter_inplace_info::ALTER_COLUMN_COLUMN_FORMAT
- | Alter_inplace_info::ALTER_COLUMN_STORAGE_TYPE
- | Alter_inplace_info::ALTER_RENAME;
-
-/** Operations on foreign key definitions (changing the schema only) */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_FOREIGN_OPERATIONS
- = Alter_inplace_info::DROP_FOREIGN_KEY
- | Alter_inplace_info::ADD_FOREIGN_KEY;
-
-/** Operations that InnoDB cares about and can perform without rebuild */
-static const Alter_inplace_info::HA_ALTER_FLAGS INNOBASE_ALTER_NOREBUILD
- = INNOBASE_ONLINE_CREATE
- | INNOBASE_FOREIGN_OPERATIONS
- | Alter_inplace_info::DROP_INDEX
- | Alter_inplace_info::DROP_UNIQUE_INDEX
- | Alter_inplace_info::ALTER_COLUMN_NAME;
-
-/* Report an InnoDB error to the client by invoking my_error(). */
-static UNIV_COLD MY_ATTRIBUTE((nonnull))
-void
-my_error_innodb(
-/*============*/
- dberr_t error, /*!< in: InnoDB error code */
- const char* table, /*!< in: table name */
- ulint flags) /*!< in: table flags */
-{
- switch (error) {
- case DB_MISSING_HISTORY:
- my_error(ER_TABLE_DEF_CHANGED, MYF(0));
- break;
- case DB_RECORD_NOT_FOUND:
- my_error(ER_KEY_NOT_FOUND, MYF(0), table);
- break;
- case DB_DEADLOCK:
- my_error(ER_LOCK_DEADLOCK, MYF(0));
- break;
- case DB_LOCK_WAIT_TIMEOUT:
- my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0));
- break;
- case DB_INTERRUPTED:
- my_error(ER_QUERY_INTERRUPTED, MYF(0));
- break;
- case DB_OUT_OF_MEMORY:
- my_error(ER_OUT_OF_RESOURCES, MYF(0));
- break;
- case DB_OUT_OF_FILE_SPACE:
- my_error(ER_RECORD_FILE_FULL, MYF(0), table);
- break;
- case DB_TEMP_FILE_WRITE_FAILURE:
- my_error(ER_GET_ERRMSG, MYF(0),
- DB_TEMP_FILE_WRITE_FAILURE,
- ut_strerr(DB_TEMP_FILE_WRITE_FAILURE),
- "InnoDB");
- break;
- case DB_TOO_BIG_INDEX_COL:
- my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
- DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
- break;
- case DB_TOO_MANY_CONCURRENT_TRXS:
- my_error(ER_TOO_MANY_CONCURRENT_TRXS, MYF(0));
- break;
- case DB_LOCK_TABLE_FULL:
- my_error(ER_LOCK_TABLE_FULL, MYF(0));
- break;
- case DB_UNDO_RECORD_TOO_BIG:
- my_error(ER_UNDO_RECORD_TOO_BIG, MYF(0));
- break;
- case DB_CORRUPTION:
- my_error(ER_NOT_KEYFILE, MYF(0), table);
- break;
- case DB_TOO_BIG_RECORD:
- my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
- page_get_free_space_of_empty(
- flags & DICT_TF_COMPACT) / 2);
- break;
- case DB_INVALID_NULL:
- /* TODO: report the row, as we do for DB_DUPLICATE_KEY */
- my_error(ER_INVALID_USE_OF_NULL, MYF(0));
- break;
- case DB_TABLESPACE_EXISTS:
- my_error(ER_TABLESPACE_EXISTS, MYF(0), table);
- break;
-
-#ifdef UNIV_DEBUG
- case DB_SUCCESS:
- case DB_DUPLICATE_KEY:
- case DB_ONLINE_LOG_TOO_BIG:
- /* These codes should not be passed here. */
- ut_error;
-#endif /* UNIV_DEBUG */
- default:
- my_error(ER_GET_ERRNO, MYF(0), error, "InnoDB");
- break;
- }
-}
-
-/** Determine if fulltext indexes exist in a given table.
-@param table MySQL table
-@return whether fulltext indexes exist on the table */
-static
-bool
-innobase_fulltext_exist(
-/*====================*/
- const TABLE* table)
-{
- for (uint i = 0; i < table->s->keys; i++) {
- if (table->key_info[i].flags & HA_FULLTEXT) {
- return(true);
- }
- }
-
- return(false);
-}
-
-/*******************************************************************//**
-Determine if ALTER TABLE needs to rebuild the table.
-@param ha_alter_info the DDL operation
-@param altered_table MySQL original table
-@return whether it is necessary to rebuild the table */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_need_rebuild(
-/*==================*/
- const Alter_inplace_info* ha_alter_info,
- const TABLE* altered_table)
-{
- Alter_inplace_info::HA_ALTER_FLAGS alter_inplace_flags =
- ha_alter_info->handler_flags & ~(INNOBASE_INPLACE_IGNORE);
-
- if (alter_inplace_flags
- == Alter_inplace_info::CHANGE_CREATE_OPTION
- && !(ha_alter_info->create_info->used_fields
- & (HA_CREATE_USED_ROW_FORMAT
- | HA_CREATE_USED_KEY_BLOCK_SIZE))) {
- /* Any other CHANGE_CREATE_OPTION than changing
- ROW_FORMAT or KEY_BLOCK_SIZE is ignored. */
- return(false);
- }
-
- /* If alter table changes column name and adds a new
- index, we need to check is this new index created
- to new column name. This is because column name
- changes are done normally after creating indexes. */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME) &&
- ((ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_INDEX) ||
- (ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_FOREIGN_KEY))) {
- for (ulint i = 0; i < ha_alter_info->index_add_count; i++) {
- const KEY* key = &ha_alter_info->key_info_buffer[
- ha_alter_info->index_add_buffer[i]];
-
- for (ulint j = 0; j < key->user_defined_key_parts; j++) {
- const KEY_PART_INFO* key_part = &(key->key_part[j]);
- const Field* field = altered_table->field[key_part->fieldnr];
-
- /* Field used on added index is renamed on
- this same alter table. We need table
- rebuild. */
- if (field && field->flags & FIELD_IS_RENAMED) {
- return (true);
- }
- }
- }
- }
-
- return(!!(ha_alter_info->handler_flags & INNOBASE_ALTER_REBUILD));
-}
-
-/** Check if InnoDB supports a particular alter table in-place
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
-by ALTER TABLE and holding data used during in-place alter.
-
-@retval HA_ALTER_INPLACE_NOT_SUPPORTED Not supported
-@retval HA_ALTER_INPLACE_NO_LOCK Supported
-@retval HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE Supported, but requires
-lock during main phase and exclusive lock during prepare phase.
-@retval HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE Supported, prepare phase
-requires exclusive lock (any transactions that have accessed the table
-must commit or roll back first, and no transactions can access the table
-while prepare_inplace_alter_table() is executing)
-*/
-UNIV_INTERN
-enum_alter_inplace_result
-ha_innobase::check_if_supported_inplace_alter(
-/*==========================================*/
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info)
-{
- DBUG_ENTER("check_if_supported_inplace_alter");
-
- if (high_level_read_only) {
- ha_alter_info->unsupported_reason =
- innobase_get_err_msg(ER_READ_ONLY_MODE);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- } else if (srv_created_new_raw || srv_force_recovery) {
-
- ha_alter_info->unsupported_reason =
- innobase_get_err_msg(ER_READ_ONLY_MODE);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- if (altered_table->s->stored_fields > REC_MAX_N_USER_FIELDS) {
- /* Deny the inplace ALTER TABLE. MySQL will try to
- re-create the table and ha_innobase::create() will
- return an error too. This is how we effectively
- deny adding too many columns to a table. */
- ha_alter_info->unsupported_reason =
- innobase_get_err_msg(ER_TOO_MANY_FIELDS);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- update_thd();
- trx_search_latch_release_if_reserved(prebuilt->trx);
-
- /* Change on engine specific table options require rebuild of the
- table */
- if (ha_alter_info->handler_flags
- & Alter_inplace_info::CHANGE_CREATE_OPTION) {
- ha_table_option_struct *new_options= ha_alter_info->create_info->option_struct;
- ha_table_option_struct *old_options= table->s->option_struct;
-
- if (new_options->page_compressed != old_options->page_compressed ||
- new_options->page_compression_level != old_options->page_compression_level ||
- new_options->atomic_writes != old_options->atomic_writes) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- if (new_options->encryption != old_options->encryption ||
- new_options->encryption_key_id != old_options->encryption_key_id) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- }
-
- if (ha_alter_info->handler_flags
- & ~(INNOBASE_INPLACE_IGNORE
- | INNOBASE_ALTER_NOREBUILD
- | INNOBASE_ALTER_REBUILD)) {
-
- if (ha_alter_info->handler_flags
- & (Alter_inplace_info::ALTER_COLUMN_EQUAL_PACK_LENGTH
- | Alter_inplace_info::ALTER_COLUMN_TYPE))
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_COLUMN_TYPE);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- /* Only support online add foreign key constraint when
- check_foreigns is turned off */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_FOREIGN_KEY)
- && prebuilt->trx->check_foreigns) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FK_CHECK);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
- DBUG_RETURN(HA_ALTER_INPLACE_NO_LOCK);
- }
-
- /* Only support NULL -> NOT NULL change if strict table sql_mode
- is set. Fall back to COPY for conversion if not strict tables.
- In-Place will fail with an error when trying to convert
- NULL to a NOT NULL value. */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE)
- && !thd_is_strict_mode(user_thd)) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- /* InnoDB cannot IGNORE when creating unique indexes. IGNORE
- should silently delete some duplicate rows. Our inplace_alter
- code will not delete anything from existing indexes. */
- if (ha_alter_info->ignore
- && (ha_alter_info->handler_flags
- & (Alter_inplace_info::ADD_PK_INDEX
- | Alter_inplace_info::ADD_UNIQUE_INDEX))) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_IGNORE);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- /* DROP PRIMARY KEY is only allowed in combination with ADD
- PRIMARY KEY. */
- if ((ha_alter_info->handler_flags
- & (Alter_inplace_info::ADD_PK_INDEX
- | Alter_inplace_info::DROP_PK_INDEX))
- == Alter_inplace_info::DROP_PK_INDEX) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOPK);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- /* If a column change from NOT NULL to NULL,
- and there's a implict pk on this column. the
- table should be rebuild. The change should
- only go through the "Copy" method.*/
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NULLABLE)) {
- uint primary_key = altered_table->s->primary_key;
-
- /* See if MYSQL table has no pk but we do.*/
- if (UNIV_UNLIKELY(primary_key >= MAX_KEY)
- && !row_table_got_default_clust_index(prebuilt->table)) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_PRIMARY_CANT_HAVE_NULL);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- }
-
- /*
- InnoDB in different MariaDB versions was generating different mtype
- codes for certain types. In some cases the signed/unsigned bit was
- generated differently too.
-
- Online ALTER would change the mtype/unsigned_flag (to what the
- current code generates) without changing the underlying data
- represenation, and it might result in data corruption.
-
- Don't do online ALTER if mtype/unsigned_flag are wrong.
- */
- for (ulint i = 0, icol= 0; i < table->s->fields; i++) {
- const Field* field = table->field[i];
- const dict_col_t* col = dict_table_get_nth_col(prebuilt->table, icol);
- ulint unsigned_flag;
- if (!field->stored_in_db())
- continue;
- icol++;
-
- if (col->mtype != get_innobase_type_from_mysql_type(&unsigned_flag, field)) {
-
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- if ((col->prtype & DATA_UNSIGNED) != unsigned_flag) {
-
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- }
-
- /* If we have column that has changed from NULL -> NOT NULL
- and column default has changed we need to do additional
- check. */
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NOT_NULLABLE) &&
- (ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_DEFAULT)) {
- Alter_info *alter_info = ha_alter_info->alter_info;
- List_iterator<Create_field> def_it(alter_info->create_list);
- Create_field *def;
- while ((def=def_it++)) {
-
- /* If this is first column definition whose SQL type
- is TIMESTAMP and it is defined as NOT NULL and
- it has either constant default or function default
- we must use "Copy" method. */
- if (def->is_timestamp_type()) {
- if ((def->flags & NOT_NULL_FLAG) != 0 && // NOT NULL
- (def->default_value != NULL || // constant default ?
- def->unireg_check != Field::NONE)) { // function default
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_NOT_NULL);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- break;
- }
- }
- }
-
- ulint n_indexes = UT_LIST_GET_LEN((prebuilt->table)->indexes);
-
- /* If InnoDB dictionary and MySQL frm file are not consistent
- use "Copy" method. */
- if (prebuilt->table->dict_frm_mismatch) {
-
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_NO_SUCH_INDEX);
- ib_push_frm_error(user_thd, prebuilt->table, altered_table,
- n_indexes, true);
-
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- /* We should be able to do the operation in-place.
- See if we can do it online (LOCK=NONE). */
- bool online = true;
-
- List_iterator_fast<Create_field> cf_it(
- ha_alter_info->alter_info->create_list);
-
- /* Fix the key parts. */
- for (KEY* new_key = ha_alter_info->key_info_buffer;
- new_key < ha_alter_info->key_info_buffer
- + ha_alter_info->key_count;
- new_key++) {
- for (KEY_PART_INFO* key_part = new_key->key_part;
- key_part < new_key->key_part + new_key->user_defined_key_parts;
- key_part++) {
- const Create_field* new_field;
-
- DBUG_ASSERT(key_part->fieldnr
- < altered_table->s->fields);
-
- cf_it.rewind();
- for (uint fieldnr = 0; (new_field = cf_it++);
- fieldnr++) {
- if (fieldnr == key_part->fieldnr) {
- break;
- }
- }
-
- DBUG_ASSERT(new_field);
-
- key_part->field = altered_table->field[
- key_part->fieldnr];
- /* In some special cases InnoDB emits "false"
- duplicate key errors with NULL key values. Let
- us play safe and ensure that we can correctly
- print key values even in such cases .*/
- key_part->null_offset = key_part->field->null_offset();
- key_part->null_bit = key_part->field->null_bit;
-
- if (new_field->field) {
- /* This is an existing column. */
- continue;
- }
-
- /* This is an added column. */
- DBUG_ASSERT(ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_COLUMN);
-
- /* We cannot replace a hidden FTS_DOC_ID
- with a user-visible FTS_DOC_ID. */
- if (prebuilt->table->fts
- && innobase_fulltext_exist(altered_table)
- && !my_strcasecmp(
- system_charset_info,
- key_part->field->field_name.str,
- FTS_DOC_ID_COL_NAME)) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_HIDDEN_FTS);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
-
- DBUG_ASSERT((MTYP_TYPENR(key_part->field->unireg_check)
- == Field::NEXT_NUMBER)
- == !!(key_part->field->flags
- & AUTO_INCREMENT_FLAG));
-
- if (key_part->field->flags & AUTO_INCREMENT_FLAG) {
- /* We cannot assign an AUTO_INCREMENT
- column values during online ALTER. */
- DBUG_ASSERT(key_part->field == altered_table
- -> found_next_number_field);
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_AUTOINC);
- online = false;
- }
- }
- }
-
- DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
- <= table->s->stored_fields);
- DBUG_ASSERT(!prebuilt->table->fts || prebuilt->table->fts->doc_col
- < dict_table_get_n_user_cols(prebuilt->table));
-
- if (prebuilt->table->fts
- && innobase_fulltext_exist(altered_table)) {
- /* FULLTEXT indexes are supposed to remain. */
- /* Disallow DROP INDEX FTS_DOC_ID_INDEX */
-
- for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
- if (!my_strcasecmp(
- system_charset_info,
- ha_alter_info->index_drop_buffer[i]->name,
- FTS_DOC_ID_INDEX_NAME)) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- }
-
- /* InnoDB can have a hidden FTS_DOC_ID_INDEX on a
- visible FTS_DOC_ID column as well. Prevent dropping or
- renaming the FTS_DOC_ID. */
-
- for (Field** fp = table->field; *fp; fp++) {
- if (!((*fp)->flags
- & (FIELD_IS_RENAMED | FIELD_IS_DROPPED))) {
- continue;
- }
-
- if (!my_strcasecmp(
- system_charset_info,
- (*fp)->field_name.str,
- FTS_DOC_ID_COL_NAME)) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_CHANGE_FTS);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- }
- }
-
- prebuilt->trx->will_lock++;
-
- if (!online) {
- /* We already determined that only a non-locking
- operation is possible. */
- } else if (((ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_PK_INDEX)
- || innobase_need_rebuild(ha_alter_info, table))
- && (innobase_fulltext_exist(altered_table))) {
- /* Refuse to rebuild the table online, if
- fulltext indexes are to survive the rebuild. */
- online = false;
- /* If the table already contains fulltext indexes,
- refuse to rebuild the table natively altogether. */
- if (prebuilt->table->fts) {
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_INNODB_FT_LIMIT);
- DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
- }
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
- } else if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_INDEX)) {
- /* Building a full-text index requires a lock.
- We could do without a lock if the table already contains
- an FTS_DOC_ID column, but in that case we would have
- to apply the modification log to the full-text indexes. */
-
- for (uint i = 0; i < ha_alter_info->index_add_count; i++) {
- const KEY* key =
- &ha_alter_info->key_info_buffer[
- ha_alter_info->index_add_buffer[i]];
- if (key->flags & HA_FULLTEXT) {
- DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
- & ~(HA_FULLTEXT
- | HA_PACK_KEY
- | HA_GENERATED_KEY
- | HA_BINARY_PACK_KEY)));
- ha_alter_info->unsupported_reason = innobase_get_err_msg(
- ER_ALTER_OPERATION_NOT_SUPPORTED_REASON_FTS);
- online = false;
- break;
- }
- }
- }
-
- DBUG_RETURN(online
- ? HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE
- : HA_ALTER_INPLACE_SHARED_LOCK_AFTER_PREPARE);
-}
-
-/*************************************************************//**
-Initialize the dict_foreign_t structure with supplied info
-@return true if added, false if duplicate foreign->id */
-static MY_ATTRIBUTE((nonnull(1,3,5,7)))
-bool
-innobase_init_foreign(
-/*==================*/
- dict_foreign_t* foreign, /*!< in/out: structure to
- initialize */
- char* constraint_name, /*!< in/out: constraint name if
- exists */
- dict_table_t* table, /*!< in: foreign table */
- dict_index_t* index, /*!< in: foreign key index */
- const char** column_names, /*!< in: foreign key column
- names */
- ulint num_field, /*!< in: number of columns */
- const char* referenced_table_name, /*!< in: referenced table
- name */
- dict_table_t* referenced_table, /*!< in: referenced table */
- dict_index_t* referenced_index, /*!< in: referenced index */
- const char** referenced_column_names,/*!< in: referenced column
- names */
- ulint referenced_num_field) /*!< in: number of referenced
- columns */
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-
- if (constraint_name) {
- ulint db_len;
-
- /* Catenate 'databasename/' to the constraint name specified
- by the user: we conceive the constraint as belonging to the
- same MySQL 'database' as the table itself. We store the name
- to foreign->id. */
-
- db_len = dict_get_db_name_len(table->name);
-
- foreign->id = static_cast<char*>(mem_heap_alloc(
- foreign->heap, db_len + strlen(constraint_name) + 2));
-
- ut_memcpy(foreign->id, table->name, db_len);
- foreign->id[db_len] = '/';
- strcpy(foreign->id + db_len + 1, constraint_name);
-
- /* Check if any existing foreign key has the same id,
- this is needed only if user supplies the constraint name */
-
- if (table->foreign_set.find(foreign)
- != table->foreign_set.end()) {
- return(false);
- }
- }
-
- foreign->foreign_table = table;
- foreign->foreign_table_name = mem_heap_strdup(
- foreign->heap, table->name);
- dict_mem_foreign_table_name_lookup_set(foreign, TRUE);
-
- foreign->foreign_index = index;
- foreign->n_fields = (unsigned int) num_field;
-
- foreign->foreign_col_names = static_cast<const char**>(
- mem_heap_alloc(foreign->heap, num_field * sizeof(void*)));
-
- for (ulint i = 0; i < foreign->n_fields; i++) {
- foreign->foreign_col_names[i] = mem_heap_strdup(
- foreign->heap, column_names[i]);
- }
-
- foreign->referenced_index = referenced_index;
- foreign->referenced_table = referenced_table;
-
- foreign->referenced_table_name = mem_heap_strdup(
- foreign->heap, referenced_table_name);
- dict_mem_referenced_table_name_lookup_set(foreign, TRUE);
-
- foreign->referenced_col_names = static_cast<const char**>(
- mem_heap_alloc(foreign->heap,
- referenced_num_field * sizeof(void*)));
-
- for (ulint i = 0; i < foreign->n_fields; i++) {
- foreign->referenced_col_names[i]
- = mem_heap_strdup(foreign->heap,
- referenced_column_names[i]);
- }
-
- return(true);
-}
-
-/*************************************************************//**
-Check whether the foreign key options is legit
-@return true if it is */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_check_fk_option(
-/*=====================*/
- const dict_foreign_t* foreign) /*!< in: foreign key */
-{
- if (!foreign->foreign_index) {
- return(true);
- }
-
- if (foreign->type & (DICT_FOREIGN_ON_UPDATE_SET_NULL
- | DICT_FOREIGN_ON_DELETE_SET_NULL)) {
-
- for (ulint j = 0; j < foreign->n_fields; j++) {
- if ((dict_index_get_nth_col(
- foreign->foreign_index, j)->prtype)
- & DATA_NOT_NULL) {
-
- /* It is not sensible to define
- SET NULL if the column is not
- allowed to be NULL! */
- return(false);
- }
- }
- }
-
- return(true);
-}
-
-/*************************************************************//**
-Set foreign key options
-@return true if successfully set */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_set_foreign_key_option(
-/*============================*/
- dict_foreign_t* foreign, /*!< in:InnoDB Foreign key */
- Foreign_key* fk_key) /*!< in: Foreign key info from
- MySQL */
-{
- ut_ad(!foreign->type);
-
- switch (fk_key->delete_opt) {
- case Foreign_key::FK_OPTION_NO_ACTION:
- case Foreign_key::FK_OPTION_RESTRICT:
- case Foreign_key::FK_OPTION_DEFAULT:
- foreign->type = DICT_FOREIGN_ON_DELETE_NO_ACTION;
- break;
- case Foreign_key::FK_OPTION_CASCADE:
- foreign->type = DICT_FOREIGN_ON_DELETE_CASCADE;
- break;
- case Foreign_key::FK_OPTION_SET_NULL:
- foreign->type = DICT_FOREIGN_ON_DELETE_SET_NULL;
- break;
- }
-
- switch (fk_key->update_opt) {
- case Foreign_key::FK_OPTION_NO_ACTION:
- case Foreign_key::FK_OPTION_RESTRICT:
- case Foreign_key::FK_OPTION_DEFAULT:
- foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION;
- break;
- case Foreign_key::FK_OPTION_CASCADE:
- foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE;
- break;
- case Foreign_key::FK_OPTION_SET_NULL:
- foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL;
- break;
- }
-
- return(innobase_check_fk_option(foreign));
-}
-
-/*******************************************************************//**
-Check if a foreign key constraint can make use of an index
-that is being created.
-@return useable index, or NULL if none found */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const KEY*
-innobase_find_equiv_index(
-/*======================*/
- const char*const* col_names,
- /*!< in: column names */
- uint n_cols, /*!< in: number of columns */
- const KEY* keys, /*!< in: index information */
- const uint* add, /*!< in: indexes being created */
- uint n_add) /*!< in: number of indexes to create */
-{
- for (uint i = 0; i < n_add; i++) {
- const KEY* key = &keys[add[i]];
-
- if (key->user_defined_key_parts < n_cols) {
-no_match:
- continue;
- }
-
- for (uint j = 0; j < n_cols; j++) {
- const KEY_PART_INFO& key_part = key->key_part[j];
- uint32 col_len
- = key_part.field->pack_length();
-
- /* The MySQL pack length contains 1 or 2 bytes
- length field for a true VARCHAR. */
-
- if (key_part.field->type() == MYSQL_TYPE_VARCHAR) {
- col_len -= static_cast<const Field_varstring*>(
- key_part.field)->length_bytes;
- }
-
- if (key_part.length < col_len) {
-
- /* Column prefix indexes cannot be
- used for FOREIGN KEY constraints. */
- goto no_match;
- }
-
- if (innobase_strcasecmp(col_names[j],
- key_part.field->field_name.str)) {
- /* Name mismatch */
- goto no_match;
- }
- }
-
- return(key);
- }
-
- return(NULL);
-}
-
-/*************************************************************//**
-Find an index whose first fields are the columns in the array
-in the same order and is not marked for deletion
-@return matching index, NULL if not found */
-static MY_ATTRIBUTE((nonnull(1,2,6), warn_unused_result))
-dict_index_t*
-innobase_find_fk_index(
-/*===================*/
- Alter_inplace_info* ha_alter_info,
- /*!< in: alter table info */
- dict_table_t* table, /*!< in: table */
- const char** col_names,
- /*!< in: column names, or NULL
- to use table->col_names */
- dict_index_t** drop_index,
- /*!< in: indexes to be dropped */
- ulint n_drop_index,
- /*!< in: size of drop_index[] */
- const char** columns,/*!< in: array of column names */
- ulint n_cols) /*!< in: number of columns */
-{
- dict_index_t* index;
-
- index = dict_table_get_first_index(table);
-
- while (index != NULL) {
- if (!(index->type & DICT_FTS)
- && dict_foreign_qualify_index(
- table, col_names, columns, n_cols,
- index, NULL, true, 0, NULL, NULL, NULL)) {
- for (ulint i = 0; i < n_drop_index; i++) {
- if (index == drop_index[i]) {
- /* Skip to-be-dropped indexes. */
- goto next_rec;
- }
- }
-
- return(index);
- }
-
-next_rec:
- index = dict_table_get_next_index(index);
- }
-
- return(NULL);
-}
-
-/*************************************************************//**
-Create InnoDB foreign key structure from MySQL alter_info
-@retval true if successful
-@retval false on error (will call my_error()) */
-static MY_ATTRIBUTE((nonnull(1,2,3,7,8), warn_unused_result))
-bool
-innobase_get_foreign_key_info(
-/*==========================*/
- Alter_inplace_info*
- ha_alter_info, /*!< in: alter table info */
- const TABLE_SHARE*
- table_share, /*!< in: the TABLE_SHARE */
- dict_table_t* table, /*!< in: table */
- const char** col_names, /*!< in: column names, or NULL
- to use table->col_names */
- dict_index_t** drop_index, /*!< in: indexes to be dropped */
- ulint n_drop_index, /*!< in: size of drop_index[] */
- dict_foreign_t**add_fk, /*!< out: foreign constraint added */
- ulint* n_add_fk, /*!< out: number of foreign
- constraints added */
- const trx_t* trx) /*!< in: user transaction */
-{
- Key* key;
- Foreign_key* fk_key;
- dict_table_t* referenced_table = NULL;
- char* referenced_table_name = NULL;
- ulint num_fk = 0;
- Alter_info* alter_info = ha_alter_info->alter_info;
-
- *n_add_fk = 0;
-
- List_iterator<Key> key_iterator(alter_info->key_list);
-
- while ((key=key_iterator++)) {
- if (key->type != Key::FOREIGN_KEY) {
- continue;
- }
-
- const char* column_names[MAX_NUM_FK_COLUMNS];
- dict_index_t* index = NULL;
- const char* referenced_column_names[MAX_NUM_FK_COLUMNS];
- dict_index_t* referenced_index = NULL;
- ulint num_col = 0;
- ulint referenced_num_col = 0;
- bool correct_option;
- char* db_namep = NULL;
- char* tbl_namep = NULL;
- ulint db_name_len = 0;
- ulint tbl_name_len = 0;
- char db_name[MAX_DATABASE_NAME_LEN];
- char tbl_name[MAX_TABLE_NAME_LEN];
-
- fk_key = static_cast<Foreign_key*>(key);
-
- if (fk_key->columns.elements > 0) {
- ulint i = 0;
- Key_part_spec* column;
- List_iterator<Key_part_spec> key_part_iterator(
- fk_key->columns);
-
- /* Get all the foreign key column info for the
- current table */
- while ((column = key_part_iterator++)) {
- column_names[i] = column->field_name.str;
- ut_ad(i < MAX_NUM_FK_COLUMNS);
- i++;
- }
-
- index = innobase_find_fk_index(
- ha_alter_info,
- table, col_names,
- drop_index, n_drop_index,
- column_names, i);
-
- /* MySQL would add a index in the creation
- list if no such index for foreign table,
- so we have to use DBUG_EXECUTE_IF to simulate
- the scenario */
- DBUG_EXECUTE_IF("innodb_test_no_foreign_idx",
- index = NULL;);
-
- /* Check whether there exist such
- index in the the index create clause */
- if (!index && !innobase_find_equiv_index(
- column_names, static_cast<uint>(i),
- ha_alter_info->key_info_buffer,
- ha_alter_info->index_add_buffer,
- ha_alter_info->index_add_count)) {
- my_error(
- ER_FK_NO_INDEX_CHILD,
- MYF(0),
- fk_key->name.str
- ? fk_key->name.str : "",
- table_share->table_name.str);
- goto err_exit;
- }
-
- num_col = i;
- }
-
- add_fk[num_fk] = dict_mem_foreign_create();
-
-#ifndef __WIN__
- if(fk_key->ref_db.str) {
- tablename_to_filename(fk_key->ref_db.str, db_name,
- MAX_DATABASE_NAME_LEN);
- db_namep = db_name;
- db_name_len = strlen(db_name);
- }
- if (fk_key->ref_table.str) {
- tablename_to_filename(fk_key->ref_table.str, tbl_name,
- MAX_TABLE_NAME_LEN);
- tbl_namep = tbl_name;
- tbl_name_len = strlen(tbl_name);
- }
-#else
- ut_ad(fk_key->ref_table.str);
- tablename_to_filename(fk_key->ref_table.str, tbl_name,
- MAX_TABLE_NAME_LEN);
- innobase_casedn_str(tbl_name);
- tbl_name_len = strlen(tbl_name);
- tbl_namep = &tbl_name[0];
-
- if (fk_key->ref_db.str != NULL) {
- tablename_to_filename(fk_key->ref_db.str, db_name,
- MAX_DATABASE_NAME_LEN);
- innobase_casedn_str(db_name);
- db_name_len = strlen(db_name);
- db_namep = &db_name[0];
- }
-#endif
- mutex_enter(&dict_sys->mutex);
-
- referenced_table_name = dict_get_referenced_table(
- table->name,
- db_namep,
- db_name_len,
- tbl_namep,
- tbl_name_len,
- &referenced_table,
- add_fk[num_fk]->heap);
-
- /* Test the case when referenced_table failed to
- open, if trx->check_foreigns is not set, we should
- still be able to add the foreign key */
- DBUG_EXECUTE_IF("innodb_test_open_ref_fail",
- referenced_table = NULL;);
-
- if (!referenced_table && trx->check_foreigns) {
- mutex_exit(&dict_sys->mutex);
- my_error(ER_FK_CANNOT_OPEN_PARENT,
- MYF(0), tbl_namep);
-
- goto err_exit;
- }
-
- if (fk_key->ref_columns.elements > 0) {
- ulint i = 0;
- Key_part_spec* column;
- List_iterator<Key_part_spec> key_part_iterator(
- fk_key->ref_columns);
-
- while ((column = key_part_iterator++)) {
- referenced_column_names[i] =
- column->field_name.str;
- ut_ad(i < MAX_NUM_FK_COLUMNS);
- i++;
- }
-
- if (referenced_table) {
- referenced_index =
- dict_foreign_find_index(
- referenced_table, 0,
- referenced_column_names,
- i, index,
- TRUE, FALSE, NULL, NULL, NULL);
-
- DBUG_EXECUTE_IF(
- "innodb_test_no_reference_idx",
- referenced_index = NULL;);
-
- /* Check whether there exist such
- index in the the index create clause */
- if (!referenced_index) {
- mutex_exit(&dict_sys->mutex);
- my_error(ER_FK_NO_INDEX_PARENT, MYF(0),
- fk_key->name.str
- ? fk_key->name.str : "",
- tbl_namep);
- goto err_exit;
- }
- } else {
- ut_a(!trx->check_foreigns);
- }
-
- referenced_num_col = i;
- } else {
- /* Not possible to add a foreign key without a
- referenced column */
- mutex_exit(&dict_sys->mutex);
- my_error(ER_CANNOT_ADD_FOREIGN, MYF(0), tbl_namep);
- goto err_exit;
- }
-
- if (!innobase_init_foreign(
- add_fk[num_fk], fk_key->name.str,
- table, index, column_names,
- num_col, referenced_table_name,
- referenced_table, referenced_index,
- referenced_column_names, referenced_num_col)) {
- mutex_exit(&dict_sys->mutex);
- my_error(
- ER_DUP_CONSTRAINT_NAME,
- MYF(0),
- "FOREIGN KEY", add_fk[num_fk]->id);
- goto err_exit;
- }
-
- mutex_exit(&dict_sys->mutex);
-
- correct_option = innobase_set_foreign_key_option(
- add_fk[num_fk], fk_key);
-
- DBUG_EXECUTE_IF("innodb_test_wrong_fk_option",
- correct_option = false;);
-
- if (!correct_option) {
- my_error(ER_FK_INCORRECT_OPTION,
- MYF(0),
- table_share->table_name.str,
- add_fk[num_fk]->id);
- goto err_exit;
- }
-
- num_fk++;
- }
-
- *n_add_fk = num_fk;
-
- return(true);
-err_exit:
- for (ulint i = 0; i <= num_fk; i++) {
- if (add_fk[i]) {
- dict_foreign_free(add_fk[i]);
- }
- }
-
- return(false);
-}
-
-/*************************************************************//**
-Copies an InnoDB column to a MySQL field. This function is
-adapted from row_sel_field_store_in_mysql_format(). */
-static
-void
-innobase_col_to_mysql(
-/*==================*/
- const dict_col_t* col, /*!< in: InnoDB column */
- const uchar* data, /*!< in: InnoDB column data */
- ulint len, /*!< in: length of data, in bytes */
- Field* field) /*!< in/out: MySQL field */
-{
- uchar* ptr;
- uchar* dest = field->ptr;
- ulint flen = field->pack_length();
-
- switch (col->mtype) {
- case DATA_INT:
- ut_ad(len == flen);
-
- /* Convert integer data from Innobase to little-endian
- format, sign bit restored to normal */
-
- for (ptr = dest + len; ptr != dest; ) {
- *--ptr = *data++;
- }
-
- if (!(field->flags & UNSIGNED_FLAG)) {
- ((byte*) dest)[len - 1] ^= 0x80;
- }
-
- break;
-
- case DATA_VARCHAR:
- case DATA_VARMYSQL:
- case DATA_BINARY:
- field->reset();
-
- if (field->type() == MYSQL_TYPE_VARCHAR) {
- /* This is a >= 5.0.3 type true VARCHAR. Store the
- length of the data to the first byte or the first
- two bytes of dest. */
-
- dest = row_mysql_store_true_var_len(
- dest, len, flen - field->key_length());
- }
-
- /* Copy the actual data */
- memcpy(dest, data, len);
- break;
-
- case DATA_BLOB:
- /* Skip MySQL BLOBs when reporting an erroneous row
- during index creation or table rebuild. */
- field->set_null();
- break;
-
-#ifdef UNIV_DEBUG
- case DATA_MYSQL:
- ut_ad(flen >= len);
- ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
- >= DATA_MBMINLEN(col->mbminmaxlen));
- memcpy(dest, data, len);
- break;
-
- default:
- case DATA_SYS_CHILD:
- case DATA_SYS:
- /* These column types should never be shipped to MySQL. */
- ut_ad(0);
-
- case DATA_FLOAT:
- case DATA_DOUBLE:
- case DATA_DECIMAL:
- /* Above are the valid column types for MySQL data. */
- ut_ad(flen == len);
- /* fall through */
- case DATA_FIXBINARY:
- case DATA_CHAR:
- /* We may have flen > len when there is a shorter
- prefix on the CHAR and BINARY column. */
- ut_ad(flen >= len);
-#else /* UNIV_DEBUG */
- default:
-#endif /* UNIV_DEBUG */
- memcpy(dest, data, len);
- }
-}
-
-/*************************************************************//**
-Copies an InnoDB record to table->record[0]. */
-UNIV_INTERN
-void
-innobase_rec_to_mysql(
-/*==================*/
- struct TABLE* table, /*!< in/out: MySQL table */
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets)/*!< in: rec_get_offsets(
- rec, index, ...) */
-{
- uint n_fields = table->s->stored_fields;
- uint sql_idx = 0;
-
- ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
- - !!(DICT_TF2_FLAG_IS_SET(index->table,
- DICT_TF2_FTS_HAS_DOC_ID)));
-
- for (uint i = 0; i < n_fields; i++, sql_idx++) {
- Field* field;
- ulint ipos;
- ulint ilen;
- const uchar* ifield;
-
- while (!((field= table->field[sql_idx])->stored_in_db()))
- sql_idx++;
-
- field->reset();
-
- ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE,
- NULL);
-
- if (ipos == ULINT_UNDEFINED
- || rec_offs_nth_extern(offsets, ipos)) {
-null_field:
- field->set_null();
- continue;
- }
-
- ifield = rec_get_nth_field(rec, offsets, ipos, &ilen);
-
- /* Assign the NULL flag */
- if (ilen == UNIV_SQL_NULL) {
- ut_ad(field->real_maybe_null());
- goto null_field;
- }
-
- field->set_notnull();
-
- innobase_col_to_mysql(
- dict_field_get_col(
- dict_index_get_nth_field(index, ipos)),
- ifield, ilen, field);
- }
-}
-
-/*************************************************************//**
-Copies an InnoDB index entry to table->record[0]. */
-UNIV_INTERN
-void
-innobase_fields_to_mysql(
-/*=====================*/
- struct TABLE* table, /*!< in/out: MySQL table */
- const dict_index_t* index, /*!< in: InnoDB index */
- const dfield_t* fields) /*!< in: InnoDB index fields */
-{
- uint n_fields = table->s->stored_fields;
- uint sql_idx = 0;
-
- ut_ad(n_fields == dict_table_get_n_user_cols(index->table)
- - !!(DICT_TF2_FLAG_IS_SET(index->table,
- DICT_TF2_FTS_HAS_DOC_ID)));
-
- for (uint i = 0; i < n_fields; i++, sql_idx++) {
- Field* field;
- ulint ipos;
-
- while (!((field= table->field[sql_idx])->stored_in_db()))
- sql_idx++;
-
- field->reset();
-
- ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE,
- NULL);
-
- if (ipos == ULINT_UNDEFINED
- || dfield_is_ext(&fields[ipos])
- || dfield_is_null(&fields[ipos])) {
-
- field->set_null();
- } else {
- field->set_notnull();
-
- const dfield_t* df = &fields[ipos];
-
- innobase_col_to_mysql(
- dict_field_get_col(
- dict_index_get_nth_field(index, ipos)),
- static_cast<const uchar*>(dfield_get_data(df)),
- dfield_get_len(df), field);
- }
- }
-}
-
-/*************************************************************//**
-Copies an InnoDB row to table->record[0]. */
-UNIV_INTERN
-void
-innobase_row_to_mysql(
-/*==================*/
- struct TABLE* table, /*!< in/out: MySQL table */
- const dict_table_t* itab, /*!< in: InnoDB table */
- const dtuple_t* row) /*!< in: InnoDB row */
-{
- uint n_fields = table->s->stored_fields;
- uint sql_idx = 0;
-
- /* The InnoDB row may contain an extra FTS_DOC_ID column at the end. */
- ut_ad(row->n_fields == dict_table_get_n_cols(itab));
- ut_ad(n_fields == row->n_fields - DATA_N_SYS_COLS
- - !!(DICT_TF2_FLAG_IS_SET(itab, DICT_TF2_FTS_HAS_DOC_ID)));
-
- for (uint i = 0; i < n_fields; i++, sql_idx++) {
- Field* field;
- const dfield_t* df = dtuple_get_nth_field(row, i);
-
- while (!((field= table->field[sql_idx])->stored_in_db()))
- sql_idx++;
-
- field->reset();
-
- if (dfield_is_ext(df) || dfield_is_null(df)) {
- field->set_null();
- } else {
- field->set_notnull();
-
- innobase_col_to_mysql(
- dict_table_get_nth_col(itab, i),
- static_cast<const uchar*>(dfield_get_data(df)),
- dfield_get_len(df), field);
- }
- }
-}
-
-/*************************************************************//**
-Resets table->record[0]. */
-UNIV_INTERN
-void
-innobase_rec_reset(
-/*===============*/
- TABLE* table) /*!< in/out: MySQL table */
-{
- uint n_fields = table->s->fields;
- uint i;
-
- for (i = 0; i < n_fields; i++) {
- table->field[i]->set_default();
- }
-}
-
-/*******************************************************************//**
-This function checks that index keys are sensible.
-@return 0 or error number */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-int
-innobase_check_index_keys(
-/*======================*/
- const Alter_inplace_info* info,
- /*!< in: indexes to be created or dropped */
- const dict_table_t* innodb_table)
- /*!< in: Existing indexes */
-{
- for (uint key_num = 0; key_num < info->index_add_count;
- key_num++) {
- const KEY& key = info->key_info_buffer[
- info->index_add_buffer[key_num]];
-
- /* Check that the same index name does not appear
- twice in indexes to be created. */
-
- for (ulint i = 0; i < key_num; i++) {
- const KEY& key2 = info->key_info_buffer[
- info->index_add_buffer[i]];
-
- if (0 == strcmp(key.name, key2.name)) {
- my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
- key.name);
-
- return(ER_WRONG_NAME_FOR_INDEX);
- }
- }
-
- /* Check that the same index name does not already exist. */
-
- const dict_index_t* index;
-
- for (index = dict_table_get_first_index(innodb_table);
- index; index = dict_table_get_next_index(index)) {
-
- if (!strcmp(key.name, index->name)) {
- break;
- }
- }
-
- if (index) {
- /* If a key by the same name is being created and
- dropped, the name clash is OK. */
- for (uint i = 0; i < info->index_drop_count;
- i++) {
- const KEY* drop_key
- = info->index_drop_buffer[i];
-
- if (0 == strcmp(key.name, drop_key->name)) {
- goto name_ok;
- }
- }
-
- my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0), key.name);
-
- return(ER_WRONG_NAME_FOR_INDEX);
- }
-
-name_ok:
- for (ulint i = 0; i < key.user_defined_key_parts; i++) {
- const KEY_PART_INFO& key_part1
- = key.key_part[i];
- const Field* field
- = key_part1.field;
- ibool is_unsigned;
-
- switch (get_innobase_type_from_mysql_type(
- &is_unsigned, field)) {
- default:
- break;
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- case DATA_DECIMAL:
- /* Check that MySQL does not try to
- create a column prefix index field on
- an inappropriate data type. */
-
- if (field->type() == MYSQL_TYPE_VARCHAR) {
- if (key_part1.length
- >= field->pack_length()
- - ((Field_varstring*) field)
- ->length_bytes) {
- break;
- }
- } else {
- if (key_part1.length
- >= field->pack_length()) {
- break;
- }
- }
-
- my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
- field->field_name.str);
- return(ER_WRONG_KEY_COLUMN);
- }
-
- /* Check that the same column does not appear
- twice in the index. */
-
- for (ulint j = 0; j < i; j++) {
- const KEY_PART_INFO& key_part2
- = key.key_part[j];
-
- if (key_part1.fieldnr != key_part2.fieldnr) {
- continue;
- }
-
- my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
- field->field_name.str);
- return(ER_WRONG_KEY_COLUMN);
- }
- }
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Create index field definition for key part */
-static MY_ATTRIBUTE((nonnull(2,3)))
-void
-innobase_create_index_field_def(
-/*============================*/
- const TABLE* altered_table, /*!< in: MySQL table that is
- being altered, or NULL
- if a new clustered index is
- not being created */
- const KEY_PART_INFO* key_part, /*!< in: MySQL key definition */
- index_field_t* index_field, /*!< out: index field
- definition for key_part */
- const Field** fields) /*!< in: MySQL table fields */
-{
- const Field* field;
- ibool is_unsigned;
- ulint col_type;
-
- DBUG_ENTER("innobase_create_index_field_def");
-
- ut_ad(key_part);
- ut_ad(index_field);
-
- field = altered_table
- ? altered_table->field[key_part->fieldnr]
- : key_part->field;
- ut_a(field);
-
- index_field->col_no = key_part->fieldnr;
- index_field->col_name = altered_table ? field->field_name.str : fields[key_part->fieldnr]->field_name.str;
-
- col_type = get_innobase_type_from_mysql_type(&is_unsigned, field);
-
- if (DATA_BLOB == col_type
- || (key_part->length < field->pack_length()
- && field->type() != MYSQL_TYPE_VARCHAR)
- || (field->type() == MYSQL_TYPE_VARCHAR
- && key_part->length < field->pack_length()
- - ((Field_varstring*) field)->length_bytes)) {
-
- index_field->prefix_len = key_part->length;
- } else {
- index_field->prefix_len = 0;
- }
-
- DBUG_VOID_RETURN;
-}
-
-/*******************************************************************//**
-Create index definition for key */
-static MY_ATTRIBUTE((nonnull))
-void
-innobase_create_index_def(
-/*======================*/
- const TABLE* altered_table, /*!< in: MySQL table that is
- being altered */
- const KEY* keys, /*!< in: key definitions */
- ulint key_number, /*!< in: MySQL key number */
- bool new_clustered, /*!< in: true if generating
- a new clustered index
- on the table */
- bool key_clustered, /*!< in: true if this is
- the new clustered index */
- index_def_t* index, /*!< out: index definition */
- mem_heap_t* heap, /*!< in: heap where memory
- is allocated */
- const Field** fields) /*!< in: MySQL table fields
- */
-{
- const KEY* key = &keys[key_number];
- ulint i;
- ulint len;
- ulint n_fields = key->user_defined_key_parts;
- char* index_name;
-
- DBUG_ENTER("innobase_create_index_def");
- DBUG_ASSERT(!key_clustered || new_clustered);
-
- index->fields = static_cast<index_field_t*>(
- mem_heap_alloc(heap, n_fields * sizeof *index->fields));
-
- memset(index->fields, 0, n_fields * sizeof *index->fields);
-
- index->ind_type = 0;
- index->key_number = key_number;
- index->n_fields = n_fields;
- len = strlen(key->name) + 1;
- index->name = index_name = static_cast<char*>(
- mem_heap_alloc(heap, len + !new_clustered));
-
- if (!new_clustered) {
- *index_name++ = TEMP_INDEX_PREFIX;
- }
-
- memcpy(index_name, key->name, len);
-
- if (key->flags & HA_NOSAME) {
- index->ind_type |= DICT_UNIQUE;
- }
-
- if (key_clustered) {
- DBUG_ASSERT(!(key->flags & HA_FULLTEXT));
- index->ind_type |= DICT_CLUSTERED;
- } else if (key->flags & HA_FULLTEXT) {
- DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
- & ~(HA_FULLTEXT
- | HA_PACK_KEY
- | HA_BINARY_PACK_KEY)));
- DBUG_ASSERT(!(key->flags & HA_NOSAME));
- DBUG_ASSERT(!index->ind_type);
- index->ind_type |= DICT_FTS;
- }
-
- if (!new_clustered) {
- altered_table = NULL;
- }
-
- for (i = 0; i < n_fields; i++) {
- innobase_create_index_field_def(
- altered_table, &key->key_part[i], &index->fields[i], fields);
- }
-
- DBUG_VOID_RETURN;
-}
-
-/*******************************************************************//**
-Check whether the table has the FTS_DOC_ID column
-@return whether there exists an FTS_DOC_ID column */
-static
-bool
-innobase_fts_check_doc_id_col(
-/*==========================*/
- const dict_table_t* table, /*!< in: InnoDB table with
- fulltext index */
- const TABLE* altered_table,
- /*!< in: MySQL table with
- fulltext index */
- ulint* fts_doc_col_no)
- /*!< out: The column number for
- Doc ID, or ULINT_UNDEFINED
- if it is of wrong type */
-{
- *fts_doc_col_no = ULINT_UNDEFINED;
-
- const uint n_cols = altered_table->s->stored_fields;
- uint sql_idx = 0;
- uint i;
-
- for (i = 0; i < n_cols; i++, sql_idx++) {
- const Field* field;
- while (!((field= altered_table->field[sql_idx])->
- stored_in_db()))
- sql_idx++;
- if (my_strcasecmp(system_charset_info,
- field->field_name.str, FTS_DOC_ID_COL_NAME)) {
- continue;
- }
-
- if (strcmp(field->field_name.str, FTS_DOC_ID_COL_NAME)) {
- my_error(ER_WRONG_COLUMN_NAME, MYF(0),
- field->field_name.str);
- } else if (field->type() != MYSQL_TYPE_LONGLONG
- || field->pack_length() != 8
- || field->real_maybe_null()
- || !(field->flags & UNSIGNED_FLAG)) {
- my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN, MYF(0),
- field->field_name.str);
- } else {
- *fts_doc_col_no = i;
- }
-
- return(true);
- }
-
- if (!table) {
- return(false);
- }
-
- for (; i + DATA_N_SYS_COLS < (uint) table->n_cols; i++) {
- const char* name = dict_table_get_col_name(table, i);
-
- if (strcmp(name, FTS_DOC_ID_COL_NAME) == 0) {
-#ifdef UNIV_DEBUG
- const dict_col_t* col;
-
- col = dict_table_get_nth_col(table, i);
-
- /* Because the FTS_DOC_ID does not exist in
- the MySQL data dictionary, this must be the
- internally created FTS_DOC_ID column. */
- ut_ad(col->mtype == DATA_INT);
- ut_ad(col->len == 8);
- ut_ad(col->prtype & DATA_NOT_NULL);
- ut_ad(col->prtype & DATA_UNSIGNED);
-#endif /* UNIV_DEBUG */
- *fts_doc_col_no = i;
- return(true);
- }
- }
-
- return(false);
-}
-
-/*******************************************************************//**
-Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
-on the Doc ID column.
-@return the status of the FTS_DOC_ID index */
-UNIV_INTERN
-enum fts_doc_id_index_enum
-innobase_fts_check_doc_id_index(
-/*============================*/
- const dict_table_t* table, /*!< in: table definition */
- const TABLE* altered_table, /*!< in: MySQL table
- that is being altered */
- ulint* fts_doc_col_no) /*!< out: The column number for
- Doc ID, or ULINT_UNDEFINED
- if it is being created in
- ha_alter_info */
-{
- const dict_index_t* index;
- const dict_field_t* field;
-
- if (altered_table) {
- /* Check if a unique index with the name of
- FTS_DOC_ID_INDEX_NAME is being created. */
-
- for (uint i = 0; i < altered_table->s->keys; i++) {
- const KEY& key = altered_table->key_info[i];
-
- if (innobase_strcasecmp(
- key.name, FTS_DOC_ID_INDEX_NAME)) {
- continue;
- }
-
- if ((key.flags & HA_NOSAME)
- && key.user_defined_key_parts == 1
- && !strcmp(key.name, FTS_DOC_ID_INDEX_NAME)
- && !strcmp(key.key_part[0].field->field_name.str,
- FTS_DOC_ID_COL_NAME)) {
- if (fts_doc_col_no) {
- *fts_doc_col_no = ULINT_UNDEFINED;
- }
- return(FTS_EXIST_DOC_ID_INDEX);
- } else {
- return(FTS_INCORRECT_DOC_ID_INDEX);
- }
- }
- }
-
- if (!table) {
- return(FTS_NOT_EXIST_DOC_ID_INDEX);
- }
-
- for (index = dict_table_get_first_index(table);
- index; index = dict_table_get_next_index(index)) {
-
- /* Check if there exists a unique index with the name of
- FTS_DOC_ID_INDEX_NAME */
- if (innobase_strcasecmp(index->name, FTS_DOC_ID_INDEX_NAME)) {
- continue;
- }
-
- if (!dict_index_is_unique(index)
- || dict_index_get_n_unique(index) > 1
- || strcmp(index->name, FTS_DOC_ID_INDEX_NAME)) {
- return(FTS_INCORRECT_DOC_ID_INDEX);
- }
-
- /* Check whether the index has FTS_DOC_ID as its
- first column */
- field = dict_index_get_nth_field(index, 0);
-
- /* The column would be of a BIGINT data type */
- if (strcmp(field->name, FTS_DOC_ID_COL_NAME) == 0
- && field->col->mtype == DATA_INT
- && field->col->len == 8
- && field->col->prtype & DATA_NOT_NULL) {
- if (fts_doc_col_no) {
- *fts_doc_col_no = dict_col_get_no(field->col);
- }
- return(FTS_EXIST_DOC_ID_INDEX);
- } else {
- return(FTS_INCORRECT_DOC_ID_INDEX);
- }
- }
-
-
- /* Not found */
- return(FTS_NOT_EXIST_DOC_ID_INDEX);
-}
-/*******************************************************************//**
-Check whether the table has a unique index with FTS_DOC_ID_INDEX_NAME
-on the Doc ID column in MySQL create index definition.
-@return FTS_EXIST_DOC_ID_INDEX if there exists the FTS_DOC_ID index,
-FTS_INCORRECT_DOC_ID_INDEX if the FTS_DOC_ID index is of wrong format */
-UNIV_INTERN
-enum fts_doc_id_index_enum
-innobase_fts_check_doc_id_index_in_def(
-/*===================================*/
- ulint n_key, /*!< in: Number of keys */
- const KEY* key_info) /*!< in: Key definition */
-{
- /* Check whether there is a "FTS_DOC_ID_INDEX" in the to be built index
- list */
- for (ulint j = 0; j < n_key; j++) {
- const KEY* key = &key_info[j];
-
- if (innobase_strcasecmp(key->name, FTS_DOC_ID_INDEX_NAME)) {
- continue;
- }
-
- /* Do a check on FTS DOC ID_INDEX, it must be unique,
- named as "FTS_DOC_ID_INDEX" and on column "FTS_DOC_ID" */
- if (!(key->flags & HA_NOSAME)
- || key->user_defined_key_parts != 1
- || strcmp(key->name, FTS_DOC_ID_INDEX_NAME)
- || strcmp(key->key_part[0].field->field_name.str,
- FTS_DOC_ID_COL_NAME)) {
- return(FTS_INCORRECT_DOC_ID_INDEX);
- }
-
- return(FTS_EXIST_DOC_ID_INDEX);
- }
-
- return(FTS_NOT_EXIST_DOC_ID_INDEX);
-}
-
-/*******************************************************************//**
-Create an index table where indexes are ordered as follows:
-
-IF a new primary key is defined for the table THEN
-
- 1) New primary key
- 2) The remaining keys in key_info
-
-ELSE
-
- 1) All new indexes in the order they arrive from MySQL
-
-ENDIF
-
-@return key definitions */
-static MY_ATTRIBUTE((nonnull, warn_unused_result, malloc))
-index_def_t*
-innobase_create_key_defs(
-/*=====================*/
- mem_heap_t* heap,
- /*!< in/out: memory heap where space for key
- definitions are allocated */
- const Alter_inplace_info* ha_alter_info,
- /*!< in: alter operation */
- const TABLE* altered_table,
- /*!< in: MySQL table that is being altered */
- ulint& n_add,
- /*!< in/out: number of indexes to be created */
- ulint& n_fts_add,
- /*!< out: number of FTS indexes to be created */
- bool got_default_clust,
- /*!< in: whether the table lacks a primary key */
- ulint& fts_doc_id_col,
- /*!< in: The column number for Doc ID */
- bool& add_fts_doc_id,
- /*!< in: whether we need to add new DOC ID
- column for FTS index */
- bool& add_fts_doc_idx,
- /*!< in: whether we need to add new DOC ID
- index for FTS index */
- const TABLE* table)
- /*!< in: MySQL table that is being altered */
-{
- index_def_t* indexdef;
- index_def_t* indexdefs;
- bool new_primary;
- const uint*const add
- = ha_alter_info->index_add_buffer;
- const KEY*const key_info
- = ha_alter_info->key_info_buffer;
-
- DBUG_ENTER("innobase_create_key_defs");
- DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_idx);
- DBUG_ASSERT(ha_alter_info->index_add_count == n_add);
-
- /* If there is a primary key, it is always the first index
- defined for the innodb_table. */
-
- new_primary = n_add > 0
- && !my_strcasecmp(system_charset_info,
- key_info[*add].name, "PRIMARY");
- n_fts_add = 0;
-
- /* If there is a UNIQUE INDEX consisting entirely of NOT NULL
- columns and if the index does not contain column prefix(es)
- (only prefix/part of the column is indexed), MySQL will treat the
- index as a PRIMARY KEY unless the table already has one. */
-
- ut_ad(altered_table->s->primary_key == 0
- || altered_table->s->primary_key == MAX_KEY);
-
- if (got_default_clust && !new_primary) {
- new_primary = (altered_table->s->primary_key != MAX_KEY);
- }
-
- const bool rebuild = new_primary || add_fts_doc_id
- || innobase_need_rebuild(ha_alter_info, table);
-
- /* Reserve one more space if new_primary is true, and we might
- need to add the FTS_DOC_ID_INDEX */
- indexdef = indexdefs = static_cast<index_def_t*>(
- mem_heap_alloc(
- heap, sizeof *indexdef
- * (ha_alter_info->key_count
- + rebuild
- + got_default_clust)));
-
- if (rebuild) {
- ulint primary_key_number;
-
- if (new_primary) {
- if (n_add == 0) {
- DBUG_ASSERT(got_default_clust);
- DBUG_ASSERT(altered_table->s->primary_key
- == 0);
- primary_key_number = 0;
- } else {
- primary_key_number = *add;
- }
- } else if (got_default_clust) {
- /* Create the GEN_CLUST_INDEX */
- index_def_t* index = indexdef++;
-
- index->fields = NULL;
- index->n_fields = 0;
- index->ind_type = DICT_CLUSTERED;
- index->name = mem_heap_strdup(
- heap, innobase_index_reserve_name);
- index->key_number = ~0;
- primary_key_number = ULINT_UNDEFINED;
- goto created_clustered;
- } else {
- primary_key_number = 0;
- }
-
- /* Create the PRIMARY key index definition */
- innobase_create_index_def(
- altered_table, key_info, primary_key_number,
- TRUE, TRUE, indexdef++, heap, (const Field **)altered_table->field);
-
-created_clustered:
- n_add = 1;
-
- for (ulint i = 0; i < ha_alter_info->key_count; i++) {
- if (i == primary_key_number) {
- continue;
- }
- /* Copy the index definitions. */
- innobase_create_index_def(
- altered_table, key_info, i, TRUE, FALSE,
- indexdef, heap, (const Field **)altered_table->field);
-
- if (indexdef->ind_type & DICT_FTS) {
- n_fts_add++;
- }
-
- indexdef++;
- n_add++;
- }
-
- if (n_fts_add > 0) {
- if (!add_fts_doc_id
- && !innobase_fts_check_doc_id_col(
- NULL, altered_table,
- &fts_doc_id_col)) {
- fts_doc_id_col =
- altered_table->s->stored_fields;
- add_fts_doc_id = true;
- }
-
- if (!add_fts_doc_idx) {
- fts_doc_id_index_enum ret;
- ulint doc_col_no;
-
- ret = innobase_fts_check_doc_id_index(
- NULL, altered_table, &doc_col_no);
-
- /* This should have been checked before */
- ut_ad(ret != FTS_INCORRECT_DOC_ID_INDEX);
-
- if (ret == FTS_NOT_EXIST_DOC_ID_INDEX) {
- add_fts_doc_idx = true;
- } else {
- ut_ad(ret == FTS_EXIST_DOC_ID_INDEX);
- ut_ad(doc_col_no == ULINT_UNDEFINED
- || doc_col_no == fts_doc_id_col);
- }
- }
- }
- } else {
- /* Create definitions for added secondary indexes. */
-
- for (ulint i = 0; i < n_add; i++) {
- innobase_create_index_def(
- altered_table, key_info, add[i], FALSE, FALSE,
- indexdef, heap, (const Field **)altered_table->field);
-
- if (indexdef->ind_type & DICT_FTS) {
- n_fts_add++;
- }
-
- indexdef++;
- }
- }
-
- DBUG_ASSERT(indexdefs + n_add == indexdef);
-
- if (add_fts_doc_idx) {
- index_def_t* index = indexdef++;
-
- index->fields = static_cast<index_field_t*>(
- mem_heap_alloc(heap, sizeof *index->fields));
- memset(index->fields, 0, sizeof *index->fields);
- index->n_fields = 1;
- index->fields->col_no = fts_doc_id_col;
- index->fields->prefix_len = 0;
- index->ind_type = DICT_UNIQUE;
-
- if (rebuild) {
- index->name = mem_heap_strdup(
- heap, FTS_DOC_ID_INDEX_NAME);
- ut_ad(!add_fts_doc_id
- || fts_doc_id_col == altered_table->s->stored_fields);
- } else {
- char* index_name;
- index->name = index_name = static_cast<char*>(
- mem_heap_alloc(
- heap,
- 1 + sizeof FTS_DOC_ID_INDEX_NAME));
- *index_name++ = TEMP_INDEX_PREFIX;
- memcpy(index_name, FTS_DOC_ID_INDEX_NAME,
- sizeof FTS_DOC_ID_INDEX_NAME);
- }
-
- /* TODO: assign a real MySQL key number for this */
- index->key_number = ULINT_UNDEFINED;
- n_add++;
- }
-
- DBUG_ASSERT(indexdef > indexdefs);
- DBUG_ASSERT((ulint) (indexdef - indexdefs)
- <= ha_alter_info->key_count
- + add_fts_doc_idx + got_default_clust);
- DBUG_ASSERT(ha_alter_info->index_add_count <= n_add);
- DBUG_RETURN(indexdefs);
-}
-
-/*******************************************************************//**
-Check each index column size, make sure they do not exceed the max limit
-@return true if index column size exceeds limit */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_check_column_length(
-/*=========================*/
- ulint max_col_len, /*!< in: maximum column length */
- const KEY* key_info) /*!< in: Indexes to be created */
-{
- for (ulint key_part = 0; key_part < key_info->user_defined_key_parts; key_part++) {
- if (key_info->key_part[key_part].length > max_col_len) {
- return(true);
- }
- }
- return(false);
-}
-
-struct ha_innobase_inplace_ctx : public inplace_alter_handler_ctx
-{
- /** Dummy query graph */
- que_thr_t* thr;
- /** reference to the prebuilt struct of the creating instance */
- row_prebuilt_t*&prebuilt;
- /** InnoDB indexes being created */
- dict_index_t** add_index;
- /** MySQL key numbers for the InnoDB indexes that are being created */
- const ulint* add_key_numbers;
- /** number of InnoDB indexes being created */
- ulint num_to_add_index;
- /** InnoDB indexes being dropped */
- dict_index_t** drop_index;
- /** number of InnoDB indexes being dropped */
- const ulint num_to_drop_index;
- /** InnoDB foreign key constraints being dropped */
- dict_foreign_t** drop_fk;
- /** number of InnoDB foreign key constraints being dropped */
- const ulint num_to_drop_fk;
- /** InnoDB foreign key constraints being added */
- dict_foreign_t** add_fk;
- /** number of InnoDB foreign key constraints being dropped */
- const ulint num_to_add_fk;
- /** whether to create the indexes online */
- bool online;
- /** memory heap */
- mem_heap_t* heap;
- /** dictionary transaction */
- trx_t* trx;
- /** original table (if rebuilt, differs from indexed_table) */
- dict_table_t* old_table;
- /** table where the indexes are being created or dropped */
- dict_table_t* new_table;
- /** mapping of old column numbers to new ones, or NULL */
- const ulint* col_map;
- /** new column names, or NULL if nothing was renamed */
- const char** col_names;
- /** added AUTO_INCREMENT column position, or ULINT_UNDEFINED */
- const ulint add_autoinc;
- /** default values of ADD COLUMN, or NULL */
- const dtuple_t* add_cols;
- /** autoinc sequence to use */
- ib_sequence_t sequence;
- /** maximum auto-increment value */
- ulonglong max_autoinc;
- /** temporary table name to use for old table when renaming tables */
- const char* tmp_name;
-
- ha_innobase_inplace_ctx(row_prebuilt_t*& prebuilt_arg,
- dict_index_t** drop_arg,
- ulint num_to_drop_arg,
- dict_foreign_t** drop_fk_arg,
- ulint num_to_drop_fk_arg,
- dict_foreign_t** add_fk_arg,
- ulint num_to_add_fk_arg,
- bool online_arg,
- mem_heap_t* heap_arg,
- dict_table_t* new_table_arg,
- const char** col_names_arg,
- ulint add_autoinc_arg,
- ulonglong autoinc_col_min_value_arg,
- ulonglong autoinc_col_max_value_arg) :
- inplace_alter_handler_ctx(),
- prebuilt (prebuilt_arg),
- add_index (0), add_key_numbers (0), num_to_add_index (0),
- drop_index (drop_arg), num_to_drop_index (num_to_drop_arg),
- drop_fk (drop_fk_arg), num_to_drop_fk (num_to_drop_fk_arg),
- add_fk (add_fk_arg), num_to_add_fk (num_to_add_fk_arg),
- online (online_arg), heap (heap_arg), trx (0),
- old_table (prebuilt_arg->table),
- new_table (new_table_arg),
- col_map (0), col_names (col_names_arg),
- add_autoinc (add_autoinc_arg),
- add_cols (0),
- sequence(prebuilt->trx->mysql_thd,
- autoinc_col_min_value_arg, autoinc_col_max_value_arg),
- max_autoinc (0),
- tmp_name (0)
- {
-#ifdef UNIV_DEBUG
- for (ulint i = 0; i < num_to_add_index; i++) {
- ut_ad(!add_index[i]->to_be_dropped);
- }
- for (ulint i = 0; i < num_to_drop_index; i++) {
- ut_ad(drop_index[i]->to_be_dropped);
- }
-#endif /* UNIV_DEBUG */
-
- thr = pars_complete_graph_for_exec(NULL, prebuilt->trx, heap);
- }
-
- ~ha_innobase_inplace_ctx()
- {
- mem_heap_free(heap);
- }
-
- /** Determine if the table will be rebuilt.
- @return whether the table will be rebuilt */
- bool need_rebuild () const { return(old_table != new_table); }
-
-private:
- // Disable copying
- ha_innobase_inplace_ctx(const ha_innobase_inplace_ctx&);
- ha_innobase_inplace_ctx& operator=(const ha_innobase_inplace_ctx&);
-};
-
-/********************************************************************//**
-Drop any indexes that we were not able to free previously due to
-open table handles. */
-static
-void
-online_retry_drop_indexes_low(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-
- /* We can have table->n_ref_count > 1, because other threads
- may have prebuilt->table pointing to the table. However, these
- other threads should be between statements, waiting for the
- next statement to execute, or for a meta-data lock. */
- ut_ad(table->n_ref_count >= 1);
-
- if (table->drop_aborted) {
- row_merge_drop_indexes(trx, table, TRUE);
- }
-}
-
-/********************************************************************//**
-Drop any indexes that we were not able to free previously due to
-open table handles. */
-static MY_ATTRIBUTE((nonnull))
-void
-online_retry_drop_indexes(
-/*======================*/
- dict_table_t* table, /*!< in/out: table */
- THD* user_thd) /*!< in/out: MySQL connection */
-{
- if (table->drop_aborted) {
- trx_t* trx = innobase_trx_allocate(user_thd);
-
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
-
- row_mysql_lock_data_dictionary(trx);
- online_retry_drop_indexes_low(table, trx);
- trx_commit_for_mysql(trx);
- row_mysql_unlock_data_dictionary(trx);
- trx_free_for_mysql(trx);
- }
-
-#ifdef UNIV_DEBUG
- mutex_enter(&dict_sys->mutex);
- dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE);
- mutex_exit(&dict_sys->mutex);
- ut_a(!table->drop_aborted);
-#endif /* UNIV_DEBUG */
-}
-
-/********************************************************************//**
-Commit a dictionary transaction and drop any indexes that we were not
-able to free previously due to open table handles. */
-static MY_ATTRIBUTE((nonnull))
-void
-online_retry_drop_indexes_with_trx(
-/*===============================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Now that the dictionary is being locked, check if we can
- drop any incompletely created indexes that may have been left
- behind in rollback_inplace_alter_table() earlier. */
- if (table->drop_aborted) {
-
- trx->table_id = 0;
-
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
-
- online_retry_drop_indexes_low(table, trx);
- trx_commit_for_mysql(trx);
- }
-}
-
-/** Determines if InnoDB is dropping a foreign key constraint.
-@param foreign the constraint
-@param drop_fk constraints being dropped
-@param n_drop_fk number of constraints that are being dropped
-@return whether the constraint is being dropped */
-MY_ATTRIBUTE((pure, nonnull(1), warn_unused_result))
-inline
-bool
-innobase_dropping_foreign(
- const dict_foreign_t* foreign,
- dict_foreign_t** drop_fk,
- ulint n_drop_fk)
-{
- while (n_drop_fk--) {
- if (*drop_fk++ == foreign) {
- return(true);
- }
- }
-
- return(false);
-}
-
-/** Determines if an InnoDB FOREIGN KEY constraint depends on a
-column that is being dropped or modified to NOT NULL.
-@param user_table InnoDB table as it is before the ALTER operation
-@param col_name Name of the column being altered
-@param drop_fk constraints being dropped
-@param n_drop_fk number of constraints that are being dropped
-@param drop true=drop column, false=set NOT NULL
-@retval true Not allowed (will call my_error())
-@retval false Allowed
-*/
-MY_ATTRIBUTE((pure, nonnull(1,4), warn_unused_result))
-static
-bool
-innobase_check_foreigns_low(
- const dict_table_t* user_table,
- dict_foreign_t** drop_fk,
- ulint n_drop_fk,
- const char* col_name,
- bool drop)
-{
- dict_foreign_t* foreign;
- ut_ad(mutex_own(&dict_sys->mutex));
-
- /* Check if any FOREIGN KEY constraints are defined on this
- column. */
-
- for (dict_foreign_set::const_iterator it = user_table->foreign_set.begin();
- it != user_table->foreign_set.end();
- ++it) {
-
- foreign = *it;
-
- if (!drop && !(foreign->type
- & (DICT_FOREIGN_ON_DELETE_SET_NULL
- | DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
- continue;
- }
-
- if (innobase_dropping_foreign(foreign, drop_fk, n_drop_fk)) {
- continue;
- }
-
- for (unsigned f = 0; f < foreign->n_fields; f++) {
- if (!strcmp(foreign->foreign_col_names[f],
- col_name)) {
- my_error(drop
- ? ER_FK_COLUMN_CANNOT_DROP
- : ER_FK_COLUMN_NOT_NULL, MYF(0),
- col_name, foreign->id);
- return(true);
- }
- }
- }
-
- if (!drop) {
- /* SET NULL clauses on foreign key constraints of
- child tables affect the child tables, not the parent table.
- The column can be NOT NULL in the parent table. */
- return(false);
- }
-
- /* Check if any FOREIGN KEY constraints in other tables are
- referring to the column that is being dropped. */
- for (dict_foreign_set::const_iterator it
- = user_table->referenced_set.begin();
- it != user_table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- if (innobase_dropping_foreign(foreign, drop_fk, n_drop_fk)) {
- continue;
- }
-
- for (unsigned f = 0; f < foreign->n_fields; f++) {
- char display_name[FN_REFLEN];
-
- if (strcmp(foreign->referenced_col_names[f],
- col_name)) {
- continue;
- }
-
- char* buf_end = innobase_convert_name(
- display_name, (sizeof display_name) - 1,
- foreign->foreign_table_name,
- strlen(foreign->foreign_table_name),
- NULL, TRUE);
- *buf_end = '\0';
- my_error(ER_FK_COLUMN_CANNOT_DROP_CHILD,
- MYF(0), col_name, foreign->id,
- display_name);
-
- return(true);
- }
- }
-
- return(false);
-}
-
-/** Determines if an InnoDB FOREIGN KEY constraint depends on a
-column that is being dropped or modified to NOT NULL.
-@param ha_alter_info Data used during in-place alter
-@param altered_table MySQL table that is being altered
-@param old_table MySQL table as it is before the ALTER operation
-@param user_table InnoDB table as it is before the ALTER operation
-@param drop_fk constraints being dropped
-@param n_drop_fk number of constraints that are being dropped
-@retval true Not allowed (will call my_error())
-@retval false Allowed
-*/
-MY_ATTRIBUTE((pure, nonnull(1,2,3,4), warn_unused_result))
-static
-bool
-innobase_check_foreigns(
- Alter_inplace_info* ha_alter_info,
- const TABLE* altered_table,
- const TABLE* old_table,
- const dict_table_t* user_table,
- dict_foreign_t** drop_fk,
- ulint n_drop_fk)
-{
- List_iterator_fast<Create_field> cf_it(
- ha_alter_info->alter_info->create_list);
-
- for (Field** fp = old_table->field; *fp; fp++) {
- cf_it.rewind();
- const Create_field* new_field;
-
- ut_ad(!(*fp)->real_maybe_null()
- == !!((*fp)->flags & NOT_NULL_FLAG));
-
- while ((new_field = cf_it++)) {
- if (new_field->field == *fp) {
- break;
- }
- }
-
- if (!new_field || (new_field->flags & NOT_NULL_FLAG)) {
- if (innobase_check_foreigns_low(
- user_table, drop_fk, n_drop_fk,
- (*fp)->field_name.str, !new_field)) {
- return(true);
- }
- }
- }
-
- return(false);
-}
-
-/** Convert a default value for ADD COLUMN.
-
-@param heap Memory heap where allocated
-@param dfield InnoDB data field to copy to
-@param field MySQL value for the column
-@param comp nonzero if in compact format */
-static MY_ATTRIBUTE((nonnull))
-void
-innobase_build_col_map_add(
-/*=======================*/
- mem_heap_t* heap,
- dfield_t* dfield,
- const Field* field,
- ulint comp,
- row_prebuilt_t* prebuilt)
-{
- if (field->is_real_null()) {
- dfield_set_null(dfield);
- return;
- }
-
- ulint size = field->pack_length();
-
- byte* buf = static_cast<byte*>(mem_heap_alloc(heap, size));
-
- row_mysql_store_col_in_innobase_format(
- dfield, buf, TRUE, field->ptr, size, comp);
-}
-
-/** Construct the translation table for reordering, dropping or
-adding columns.
-
-@param ha_alter_info Data used during in-place alter
-@param altered_table MySQL table that is being altered
-@param table MySQL table as it is before the ALTER operation
-@param new_table InnoDB table corresponding to MySQL altered_table
-@param old_table InnoDB table corresponding to MYSQL table
-@param add_cols Default values for ADD COLUMN, or NULL if no ADD COLUMN
-@param heap Memory heap where allocated
-@return array of integers, mapping column numbers in the table
-to column numbers in altered_table */
-static MY_ATTRIBUTE((nonnull(1,2,3,4,5,7), warn_unused_result))
-const ulint*
-innobase_build_col_map(
-/*===================*/
- Alter_inplace_info* ha_alter_info,
- const TABLE* altered_table,
- const TABLE* table,
- const dict_table_t* new_table,
- const dict_table_t* old_table,
- dtuple_t* add_cols,
- mem_heap_t* heap,
- row_prebuilt_t* prebuilt)
-{
- uint old_i, old_innobase_i;
- DBUG_ENTER("innobase_build_col_map");
- DBUG_ASSERT(altered_table != table);
- DBUG_ASSERT(new_table != old_table);
- DBUG_ASSERT(dict_table_get_n_cols(new_table)
- >= altered_table->s->stored_fields + DATA_N_SYS_COLS);
- DBUG_ASSERT(dict_table_get_n_cols(old_table)
- >= table->s->stored_fields + DATA_N_SYS_COLS);
- DBUG_ASSERT(!!add_cols == !!(ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_COLUMN));
- DBUG_ASSERT(!add_cols || dtuple_get_n_fields(add_cols)
- == dict_table_get_n_cols(new_table));
-
- ulint* col_map = static_cast<ulint*>(
- mem_heap_alloc(heap, old_table->n_cols * sizeof *col_map));
-
- List_iterator_fast<Create_field> cf_it(
- ha_alter_info->alter_info->create_list);
- uint i = 0, sql_idx = 0;
-
- /* Any dropped columns will map to ULINT_UNDEFINED. */
- for (old_innobase_i = 0;
- old_innobase_i + DATA_N_SYS_COLS < old_table->n_cols;
- old_innobase_i++) {
- col_map[old_innobase_i] = ULINT_UNDEFINED;
- }
-
- while (const Create_field* new_field = cf_it++) {
- if (!new_field->stored_in_db())
- {
- sql_idx++;
- continue;
- }
- for (old_i = 0, old_innobase_i= 0;
- table->field[old_i];
- old_i++) {
- const Field* field = table->field[old_i];
- if (!table->field[old_i]->stored_in_db())
- continue;
- if (new_field->field == field) {
- col_map[old_innobase_i] = i;
- goto found_col;
- }
- old_innobase_i++;
- }
-
- innobase_build_col_map_add(
- heap, dtuple_get_nth_field(add_cols, i),
- altered_table->field[sql_idx],
- dict_table_is_comp(new_table), prebuilt);
-found_col:
- i++;
- sql_idx++;
- }
-
- DBUG_ASSERT(i == altered_table->s->stored_fields);
-
- i = table->s->stored_fields;
-
- /* Add the InnoDB hidden FTS_DOC_ID column, if any. */
- if (i + DATA_N_SYS_COLS < old_table->n_cols) {
- /* There should be exactly one extra field,
- the FTS_DOC_ID. */
- DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(old_table,
- DICT_TF2_FTS_HAS_DOC_ID));
- DBUG_ASSERT(i + DATA_N_SYS_COLS + 1 == old_table->n_cols);
- DBUG_ASSERT(!strcmp(dict_table_get_col_name(
- old_table, table->s->stored_fields),
- FTS_DOC_ID_COL_NAME));
- if (altered_table->s->stored_fields + DATA_N_SYS_COLS
- < new_table->n_cols) {
- DBUG_ASSERT(DICT_TF2_FLAG_IS_SET(
- new_table,
- DICT_TF2_FTS_HAS_DOC_ID));
- DBUG_ASSERT(altered_table->s->stored_fields
- + DATA_N_SYS_COLS + 1
- == new_table->n_cols);
- col_map[i] = altered_table->s->stored_fields;
- } else {
- DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET(
- new_table,
- DICT_TF2_FTS_HAS_DOC_ID));
- col_map[i] = ULINT_UNDEFINED;
- }
-
- i++;
- } else {
- DBUG_ASSERT(!DICT_TF2_FLAG_IS_SET(
- old_table,
- DICT_TF2_FTS_HAS_DOC_ID));
- }
-
- for (; i < old_table->n_cols; i++) {
- col_map[i] = i + new_table->n_cols - old_table->n_cols;
- }
-
- DBUG_RETURN(col_map);
-}
-
-/** Drop newly create FTS index related auxiliary table during
-FIC create index process, before fts_add_index is called
-@param table table that was being rebuilt online
-@param trx transaction
-@return DB_SUCCESS if successful, otherwise last error code
-*/
-static
-dberr_t
-innobase_drop_fts_index_table(
-/*==========================*/
- dict_table_t* table,
- trx_t* trx)
-{
- dberr_t ret_err = DB_SUCCESS;
-
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- if (index->type & DICT_FTS) {
- dberr_t err;
-
- err = fts_drop_index_tables(trx, index);
-
- if (err != DB_SUCCESS) {
- ret_err = err;
- }
- }
- }
-
- return(ret_err);
-}
-
-/** Get the new column names if any columns were renamed
-@param ha_alter_info Data used during in-place alter
-@param altered_table MySQL table that is being altered
-@param table MySQL table as it is before the ALTER operation
-@param user_table InnoDB table as it is before the ALTER operation
-@param heap Memory heap for the allocation
-@return array of new column names in rebuilt_table, or NULL if not renamed */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const char**
-innobase_get_col_names(
- Alter_inplace_info* ha_alter_info,
- const TABLE* altered_table,
- const TABLE* table,
- const dict_table_t* user_table,
- mem_heap_t* heap)
-{
- const char** cols;
- uint i;
-
- DBUG_ENTER("innobase_get_col_names");
- DBUG_ASSERT(user_table->n_def > table->s->fields);
- DBUG_ASSERT(ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME);
-
- cols = static_cast<const char**>(
- mem_heap_zalloc(heap, user_table->n_def * sizeof *cols));
-
- i = 0;
- List_iterator_fast<Create_field> cf_it(
- ha_alter_info->alter_info->create_list);
- while (const Create_field* new_field = cf_it++) {
- DBUG_ASSERT(i < altered_table->s->fields);
-
- for (uint old_i = 0; table->field[old_i]; old_i++) {
- if (new_field->field == table->field[old_i]) {
- cols[old_i] = new_field->field_name.str;
- break;
- }
- }
-
- i++;
- }
-
- /* Copy the internal column names. */
- i = table->s->fields;
- cols[i] = dict_table_get_col_name(user_table, i);
-
- while (++i < user_table->n_def) {
- cols[i] = cols[i - 1] + strlen(cols[i - 1]) + 1;
- }
-
- DBUG_RETURN(cols);
-}
-
-/** Update internal structures with concurrent writes blocked,
-while preparing ALTER TABLE.
-
-@param ha_alter_info Data used during in-place alter
-@param altered_table MySQL table that is being altered
-@param old_table MySQL table as it is before the ALTER operation
-@param table_name Table name in MySQL
-@param flags Table and tablespace flags
-@param flags2 Additional table flags
-@param fts_doc_id_col The column number of FTS_DOC_ID
-@param add_fts_doc_id Flag: add column FTS_DOC_ID?
-@param add_fts_doc_id_idx Flag: add index FTS_DOC_ID_INDEX (FTS_DOC_ID)?
-
-@retval true Failure
-@retval false Success
-*/
-static MY_ATTRIBUTE((warn_unused_result, nonnull(1,2,3,4)))
-bool
-prepare_inplace_alter_table_dict(
-/*=============================*/
- Alter_inplace_info* ha_alter_info,
- const TABLE* altered_table,
- const TABLE* old_table,
- const char* table_name,
- ulint flags,
- ulint flags2,
- ulint fts_doc_id_col,
- bool add_fts_doc_id,
- bool add_fts_doc_id_idx,
- row_prebuilt_t* prebuilt)
-{
- bool dict_locked = false;
- ulint* add_key_nums; /* MySQL key numbers */
- index_def_t* index_defs; /* index definitions */
- dict_table_t* user_table;
- dict_index_t* fts_index = NULL;
- ulint new_clustered = 0;
- dberr_t error;
- ulint num_fts_index;
- ha_innobase_inplace_ctx*ctx;
- uint sql_idx;
-
- DBUG_ENTER("prepare_inplace_alter_table_dict");
-
- ctx = static_cast<ha_innobase_inplace_ctx*>
- (ha_alter_info->handler_ctx);
-
- DBUG_ASSERT((ctx->add_autoinc != ULINT_UNDEFINED)
- == (ctx->sequence.m_max_value > 0));
- DBUG_ASSERT(!ctx->num_to_drop_index == !ctx->drop_index);
- DBUG_ASSERT(!ctx->num_to_drop_fk == !ctx->drop_fk);
- DBUG_ASSERT(!add_fts_doc_id || add_fts_doc_id_idx);
- DBUG_ASSERT(!add_fts_doc_id_idx
- || innobase_fulltext_exist(altered_table));
- DBUG_ASSERT(!ctx->add_cols);
- DBUG_ASSERT(!ctx->add_index);
- DBUG_ASSERT(!ctx->add_key_numbers);
- DBUG_ASSERT(!ctx->num_to_add_index);
-
- user_table = ctx->new_table;
-
- trx_start_if_not_started_xa(ctx->prebuilt->trx);
-
- /* Create a background transaction for the operations on
- the data dictionary tables. */
- ctx->trx = innobase_trx_allocate(ctx->prebuilt->trx->mysql_thd);
- trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
-
- DBUG_ASSERT(!ctx->trx->fake_changes);
-
- /* Create table containing all indexes to be built in this
- ALTER TABLE ADD INDEX so that they are in the correct order
- in the table. */
-
- ctx->num_to_add_index = ha_alter_info->index_add_count;
-
- ut_ad(ctx->prebuilt->trx->mysql_thd != NULL);
- const char* path = thd_innodb_tmpdir(
- ctx->prebuilt->trx->mysql_thd);
-
- index_defs = innobase_create_key_defs(
- ctx->heap, ha_alter_info, altered_table, ctx->num_to_add_index,
- num_fts_index,
- row_table_got_default_clust_index(ctx->new_table),
- fts_doc_id_col, add_fts_doc_id, add_fts_doc_id_idx,
- old_table);
-
- new_clustered = DICT_CLUSTERED & index_defs[0].ind_type;
-
- if (num_fts_index > 1) {
- my_error(ER_INNODB_FT_LIMIT, MYF(0));
- goto error_handled;
- }
-
- if (!ctx->online) {
- /* This is not an online operation (LOCK=NONE). */
- } else if (ctx->add_autoinc == ULINT_UNDEFINED
- && num_fts_index == 0
- && (!innobase_need_rebuild(ha_alter_info, old_table)
- || !innobase_fulltext_exist(altered_table))) {
- /* InnoDB can perform an online operation (LOCK=NONE). */
- } else {
- /* This should have been blocked in
- check_if_supported_inplace_alter(). */
- ut_ad(0);
- my_error(ER_NOT_SUPPORTED_YET, MYF(0),
- thd_query_string(ctx->prebuilt->trx->mysql_thd)->str);
- goto error_handled;
- }
-
- /* The primary index would be rebuilt if a FTS Doc ID
- column is to be added, and the primary index definition
- is just copied from old table and stored in indexdefs[0] */
- DBUG_ASSERT(!add_fts_doc_id || new_clustered);
- DBUG_ASSERT(!!new_clustered ==
- (innobase_need_rebuild(ha_alter_info, old_table)
- || add_fts_doc_id));
-
- /* Allocate memory for dictionary index definitions */
-
- ctx->add_index = static_cast<dict_index_t**>(
- mem_heap_alloc(ctx->heap, ctx->num_to_add_index
- * sizeof *ctx->add_index));
- ctx->add_key_numbers = add_key_nums = static_cast<ulint*>(
- mem_heap_alloc(ctx->heap, ctx->num_to_add_index
- * sizeof *ctx->add_key_numbers));
-
- /* This transaction should be dictionary operation, so that
- the data dictionary will be locked during crash recovery. */
-
- ut_ad(ctx->trx->dict_operation == TRX_DICT_OP_INDEX);
-
- /* Acquire a lock on the table before creating any indexes. */
-
- if (ctx->online) {
- error = DB_SUCCESS;
- } else {
- error = row_merge_lock_table(
- ctx->prebuilt->trx, ctx->new_table, LOCK_S);
-
- if (error != DB_SUCCESS) {
-
- goto error_handling;
- }
- }
-
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during an index create operation. */
-
- row_mysql_lock_data_dictionary(ctx->trx);
- dict_locked = true;
-
- /* Wait for background stats processing to stop using the table that
- we are going to alter. We know bg stats will not start using it again
- until we are holding the data dict locked and we are holding it here
- at least until checking ut_ad(user_table->n_ref_count == 1) below.
- XXX what may happen if bg stats opens the table after we
- have unlocked data dictionary below? */
- dict_stats_wait_bg_to_stop_using_table(user_table, ctx->trx);
-
- online_retry_drop_indexes_low(ctx->new_table, ctx->trx);
-
- ut_d(dict_table_check_for_dup_indexes(
- ctx->new_table, CHECK_ABORTED_OK));
-
- /* If a new clustered index is defined for the table we need
- to rebuild the table with a temporary name. */
-
- if (new_clustered) {
- fil_space_crypt_t* crypt_data;
- const char* new_table_name
- = dict_mem_create_temporary_tablename(
- ctx->heap,
- ctx->new_table->name,
- ctx->new_table->id);
- ulint n_cols;
- dtuple_t* add_cols;
- ulint key_id = FIL_DEFAULT_ENCRYPTION_KEY;
- fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT;
-
- fil_space_t* space = fil_space_acquire(ctx->prebuilt->table->space);
- crypt_data = space->crypt_data;
- fil_space_release(space);
-
- if (crypt_data) {
- key_id = crypt_data->key_id;
- mode = crypt_data->encryption;
- }
-
- if (innobase_check_foreigns(
- ha_alter_info, altered_table, old_table,
- user_table, ctx->drop_fk, ctx->num_to_drop_fk)) {
- goto new_clustered_failed;
- }
-
- n_cols = altered_table->s->stored_fields;
-
- if (add_fts_doc_id) {
- n_cols++;
- DBUG_ASSERT(flags2 & DICT_TF2_FTS);
- DBUG_ASSERT(add_fts_doc_id_idx);
- flags2 |= DICT_TF2_FTS_ADD_DOC_ID
- | DICT_TF2_FTS_HAS_DOC_ID
- | DICT_TF2_FTS;
- }
-
- DBUG_ASSERT(!add_fts_doc_id_idx || (flags2 & DICT_TF2_FTS));
-
- /* Create the table. */
- trx_set_dict_operation(ctx->trx, TRX_DICT_OP_TABLE);
-
- if (dict_table_get_low(new_table_name)) {
- my_error(ER_TABLE_EXISTS_ERROR, MYF(0),
- new_table_name);
- goto new_clustered_failed;
- }
-
- /* The initial space id 0 may be overridden later. */
- ctx->new_table = dict_mem_table_create(
- new_table_name, 0, n_cols, flags, flags2);
- /* The rebuilt indexed_table will use the renamed
- column names. */
- ctx->col_names = NULL;
-
- if (DICT_TF_HAS_DATA_DIR(flags)) {
- ctx->new_table->data_dir_path =
- mem_heap_strdup(ctx->new_table->heap,
- user_table->data_dir_path);
- }
-
- sql_idx= 0;
- for (uint i = 0; i < altered_table->s->stored_fields; i++, sql_idx++) {
- const Field* field;
- while (!((field= altered_table->field[sql_idx])->
- stored_in_db()))
- sql_idx++;
- ulint is_unsigned;
- ulint field_type
- = (ulint) field->type();
- ulint col_type
- = get_innobase_type_from_mysql_type(
- &is_unsigned, field);
- ulint charset_no;
- ulint col_len;
-
- /* we assume in dtype_form_prtype() that this
- fits in two bytes */
- ut_a(field_type <= MAX_CHAR_COLL_NUM);
-
- if (!field->real_maybe_null()) {
- field_type |= DATA_NOT_NULL;
- }
-
- if (field->binary()) {
- field_type |= DATA_BINARY_TYPE;
- }
-
- if (is_unsigned) {
- field_type |= DATA_UNSIGNED;
- }
-
- if (dtype_is_string_type(col_type)) {
- charset_no = (ulint) field->charset()->number;
-
- if (charset_no > MAX_CHAR_COLL_NUM) {
- dict_mem_table_free(
- ctx->new_table);
- my_error(ER_WRONG_KEY_COLUMN, MYF(0),
- field->field_name.str);
- goto new_clustered_failed;
- }
- } else {
- charset_no = 0;
- }
-
- col_len = field->pack_length();
-
- /* The MySQL pack length contains 1 or 2 bytes
- length field for a true VARCHAR. Let us
- subtract that, so that the InnoDB column
- length in the InnoDB data dictionary is the
- real maximum byte length of the actual data. */
-
- if (field->type() == MYSQL_TYPE_VARCHAR) {
- uint32 length_bytes
- = static_cast<const Field_varstring*>(
- field)->length_bytes;
-
- col_len -= length_bytes;
-
- if (length_bytes == 2) {
- field_type |= DATA_LONG_TRUE_VARCHAR;
- }
- }
-
- if (dict_col_name_is_reserved(field->field_name.str)) {
- dict_mem_table_free(ctx->new_table);
- my_error(ER_WRONG_COLUMN_NAME, MYF(0),
- field->field_name.str);
- goto new_clustered_failed;
- }
-
- dict_mem_table_add_col(
- ctx->new_table, ctx->heap,
- field->field_name.str,
- col_type,
- dtype_form_prtype(field_type, charset_no),
- col_len);
- }
-
- if (add_fts_doc_id) {
- fts_add_doc_id_column(ctx->new_table, ctx->heap);
- ctx->new_table->fts->doc_col = fts_doc_id_col;
- ut_ad(fts_doc_id_col == altered_table->s->stored_fields);
- } else if (ctx->new_table->fts) {
- ctx->new_table->fts->doc_col = fts_doc_id_col;
- }
-
- error = row_create_table_for_mysql(
- ctx->new_table, ctx->trx, false, mode, key_id);
-
- switch (error) {
- dict_table_t* temp_table;
- case DB_SUCCESS:
- /* We need to bump up the table ref count and
- before we can use it we need to open the
- table. The new_table must be in the data
- dictionary cache, because we are still holding
- the dict_sys->mutex. */
- ut_ad(mutex_own(&dict_sys->mutex));
- temp_table = dict_table_open_on_name(
- ctx->new_table->name, TRUE, FALSE,
- DICT_ERR_IGNORE_NONE);
- ut_a(ctx->new_table == temp_table);
- /* n_ref_count must be 1, because purge cannot
- be executing on this very table as we are
- holding dict_operation_lock X-latch. */
- DBUG_ASSERT(ctx->new_table->n_ref_count == 1);
- break;
- case DB_TABLESPACE_EXISTS:
- my_error(ER_TABLESPACE_EXISTS, MYF(0),
- new_table_name);
- goto new_clustered_failed;
- case DB_DUPLICATE_KEY:
- my_error(HA_ERR_TABLE_EXIST, MYF(0),
- altered_table->s->table_name.str);
- goto new_clustered_failed;
- default:
- my_error_innodb(error, table_name, flags);
- new_clustered_failed:
- DBUG_ASSERT(ctx->trx != ctx->prebuilt->trx);
- trx_rollback_to_savepoint(ctx->trx, NULL);
-
- ut_ad(user_table->n_ref_count == 1);
-
- online_retry_drop_indexes_with_trx(
- user_table, ctx->trx);
- goto err_exit;
- }
-
- if (ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_COLUMN) {
- add_cols = dtuple_create(
- ctx->heap,
- dict_table_get_n_cols(ctx->new_table));
-
- dict_table_copy_types(add_cols, ctx->new_table);
- } else {
- add_cols = NULL;
- }
-
- ctx->col_map = innobase_build_col_map(
- ha_alter_info, altered_table, old_table,
- ctx->new_table, user_table,
- add_cols, ctx->heap, prebuilt);
- ctx->add_cols = add_cols;
- } else {
- DBUG_ASSERT(!innobase_need_rebuild(ha_alter_info, old_table));
- DBUG_ASSERT(old_table->s->primary_key
- == altered_table->s->primary_key);
-
- if (!ctx->new_table->fts
- && innobase_fulltext_exist(altered_table)) {
- ctx->new_table->fts = fts_create(
- ctx->new_table);
- ctx->new_table->fts->doc_col = fts_doc_id_col;
- }
- }
-
- /* Assign table_id, so that no table id of
- fts_create_index_tables() will be written to the undo logs. */
- DBUG_ASSERT(ctx->new_table->id != 0);
- ctx->trx->table_id = ctx->new_table->id;
-
- /* Create the indexes in SYS_INDEXES and load into dictionary. */
-
- for (ulint a = 0; a < ctx->num_to_add_index; a++) {
-
- ctx->add_index[a] = row_merge_create_index(
- ctx->trx, ctx->new_table,
- &index_defs[a], ctx->col_names);
-
- add_key_nums[a] = index_defs[a].key_number;
-
- if (!ctx->add_index[a]) {
- error = ctx->trx->error_state;
- DBUG_ASSERT(error != DB_SUCCESS);
- goto error_handling;
- }
-
- if (ctx->add_index[a]->type & DICT_FTS) {
- DBUG_ASSERT(num_fts_index);
- DBUG_ASSERT(!fts_index);
- DBUG_ASSERT(ctx->add_index[a]->type == DICT_FTS);
- fts_index = ctx->add_index[a];
- }
-
- /* If only online ALTER TABLE operations have been
- requested, allocate a modification log. If the table
- will be locked anyway, the modification
- log is unnecessary. When rebuilding the table
- (new_clustered), we will allocate the log for the
- clustered index of the old table, later. */
- if (new_clustered
- || !ctx->online
- || !user_table->is_readable()
- || dict_table_is_discarded(user_table)) {
- /* No need to allocate a modification log. */
- ut_ad(!ctx->add_index[a]->online_log);
- } else if (ctx->add_index[a]->type & DICT_FTS) {
- /* Fulltext indexes are not covered
- by a modification log. */
- } else {
- DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
- error = DB_OUT_OF_MEMORY;
- goto error_handling;);
- rw_lock_x_lock(&ctx->add_index[a]->lock);
-
- bool ok = row_log_allocate(ctx->add_index[a],
- NULL, true, NULL,
- NULL, path);
- rw_lock_x_unlock(&ctx->add_index[a]->lock);
-
- if (!ok) {
- error = DB_OUT_OF_MEMORY;
- goto error_handling;
- }
- }
- }
-
- ut_ad(new_clustered == ctx->need_rebuild());
-
- DBUG_EXECUTE_IF("innodb_OOM_prepare_inplace_alter",
- error = DB_OUT_OF_MEMORY;
- goto error_handling;);
-
- if (new_clustered && ctx->online) {
- /* Allocate a log for online table rebuild. */
- dict_index_t* clust_index = dict_table_get_first_index(
- user_table);
-
- rw_lock_x_lock(&clust_index->lock);
- bool ok = row_log_allocate(
- clust_index, ctx->new_table,
- !(ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_PK_INDEX),
- ctx->add_cols, ctx->col_map, path);
- rw_lock_x_unlock(&clust_index->lock);
-
- if (!ok) {
- error = DB_OUT_OF_MEMORY;
- goto error_handling;
- }
- }
-
- if (ctx->online) {
- /* Assign a consistent read view for
- row_merge_read_clustered_index(). */
- trx_assign_read_view(ctx->prebuilt->trx);
- }
-
- if (fts_index) {
- /* Ensure that the dictionary operation mode will
- not change while creating the auxiliary tables. */
- trx_dict_op_t op = trx_get_dict_operation(ctx->trx);
-
-#ifdef UNIV_DEBUG
- switch (op) {
- case TRX_DICT_OP_NONE:
- break;
- case TRX_DICT_OP_TABLE:
- case TRX_DICT_OP_INDEX:
- goto op_ok;
- }
- ut_error;
-op_ok:
-#endif /* UNIV_DEBUG */
- ut_ad(ctx->trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- DICT_TF2_FLAG_SET(ctx->new_table, DICT_TF2_FTS);
-
- /* This function will commit the transaction and reset
- the trx_t::dict_operation flag on success. */
-
- error = fts_create_index_tables(ctx->trx, fts_index);
-
- DBUG_EXECUTE_IF("innodb_test_fail_after_fts_index_table",
- error = DB_LOCK_WAIT_TIMEOUT;
- goto error_handling;);
-
- if (error != DB_SUCCESS) {
- goto error_handling;
- }
-
- trx_start_for_ddl(ctx->trx, op);
-
- if (!ctx->new_table->fts
- || ib_vector_size(ctx->new_table->fts->indexes) == 0) {
- error = fts_create_common_tables(
- ctx->trx, ctx->new_table,
- user_table->name, TRUE);
-
- DBUG_EXECUTE_IF(
- "innodb_test_fail_after_fts_common_table",
- error = DB_LOCK_WAIT_TIMEOUT;);
-
- if (error != DB_SUCCESS) {
- goto error_handling;
- }
-
- ctx->new_table->fts->fts_status
- |= TABLE_DICT_LOCKED;
-
- error = innobase_fts_load_stopword(
- ctx->new_table, ctx->trx,
- ctx->prebuilt->trx->mysql_thd)
- ? DB_SUCCESS : DB_ERROR;
- ctx->new_table->fts->fts_status
- &= ~TABLE_DICT_LOCKED;
-
- if (error != DB_SUCCESS) {
- goto error_handling;
- }
- }
-
- ut_ad(trx_get_dict_operation(ctx->trx) == op);
- }
-
- DBUG_ASSERT(error == DB_SUCCESS);
-
- /* Commit the data dictionary transaction in order to release
- the table locks on the system tables. This means that if
- MySQL crashes while creating a new primary key inside
- row_merge_build_indexes(), ctx->new_table will not be dropped
- by trx_rollback_active(). It will have to be recovered or
- dropped by the database administrator. */
- trx_commit_for_mysql(ctx->trx);
-
- row_mysql_unlock_data_dictionary(ctx->trx);
- dict_locked = false;
-
- ut_a(ctx->trx->lock.n_active_thrs == 0);
-
- DBUG_EXECUTE_IF("crash_innodb_add_index_after", DBUG_SUICIDE(););
-
-error_handling:
- /* After an error, remove all those index definitions from the
- dictionary which were defined. */
-
- switch (error) {
- case DB_SUCCESS:
- ut_a(!dict_locked);
-
- ut_d(mutex_enter(&dict_sys->mutex));
- ut_d(dict_table_check_for_dup_indexes(
- user_table, CHECK_PARTIAL_OK));
- ut_d(mutex_exit(&dict_sys->mutex));
- DBUG_RETURN(false);
- case DB_TABLESPACE_EXISTS:
- my_error(ER_TABLESPACE_EXISTS, MYF(0), "(unknown)");
- break;
- case DB_DUPLICATE_KEY:
- my_error(ER_DUP_KEY, MYF(0), "SYS_INDEXES");
- break;
- case DB_OUT_OF_FILE_SPACE:
- my_error_innodb(error, table_name, user_table->flags);
- break;
- default:
- my_error_innodb(error, table_name, user_table->flags);
- }
-
-error_handled:
-
- ctx->prebuilt->trx->error_info = NULL;
- ctx->trx->error_state = DB_SUCCESS;
-
- if (!dict_locked) {
- row_mysql_lock_data_dictionary(ctx->trx);
- }
-
- if (new_clustered) {
- if (ctx->need_rebuild()) {
-
- if (DICT_TF2_FLAG_IS_SET(
- ctx->new_table, DICT_TF2_FTS)) {
- innobase_drop_fts_index_table(
- ctx->new_table, ctx->trx);
- }
-
- dict_table_close(ctx->new_table, TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
- /* Nobody should have initialized the stats of the
- newly created table yet. When this is the case, we
- know that it has not been added for background stats
- gathering. */
- ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
- row_merge_drop_table(ctx->trx, ctx->new_table);
-
- /* Free the log for online table rebuild, if
- one was allocated. */
-
- dict_index_t* clust_index = dict_table_get_first_index(
- user_table);
-
- rw_lock_x_lock(&clust_index->lock);
-
- if (clust_index->online_log) {
- ut_ad(ctx->online);
- row_log_abort_sec(clust_index);
- clust_index->online_status
- = ONLINE_INDEX_COMPLETE;
- }
-
- rw_lock_x_unlock(&clust_index->lock);
- }
-
- trx_commit_for_mysql(ctx->trx);
- /* n_ref_count must be 1, because purge cannot
- be executing on this very table as we are
- holding dict_operation_lock X-latch. */
- DBUG_ASSERT(user_table->n_ref_count == 1 || ctx->online);
-
- online_retry_drop_indexes_with_trx(user_table, ctx->trx);
- } else {
- ut_ad(!ctx->need_rebuild());
- row_merge_drop_indexes(ctx->trx, user_table, TRUE);
- trx_commit_for_mysql(ctx->trx);
- }
-
- ut_d(dict_table_check_for_dup_indexes(user_table, CHECK_ALL_COMPLETE));
- ut_ad(!user_table->drop_aborted);
-
-err_exit:
- /* Clear the to_be_dropped flag in the data dictionary cache. */
- for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
- DBUG_ASSERT(*ctx->drop_index[i]->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(ctx->drop_index[i]->to_be_dropped);
- ctx->drop_index[i]->to_be_dropped = 0;
- }
-
- row_mysql_unlock_data_dictionary(ctx->trx);
-
- trx_free_for_mysql(ctx->trx);
- trx_commit_for_mysql(ctx->prebuilt->trx);
-
- delete ctx;
- ha_alter_info->handler_ctx = NULL;
-
- DBUG_RETURN(true);
-}
-
-/* Check whether an index is needed for the foreign key constraint.
-If so, if it is dropped, is there an equivalent index can play its role.
-@return true if the index is needed and can't be dropped */
-static MY_ATTRIBUTE((nonnull(1,2,3,5), warn_unused_result))
-bool
-innobase_check_foreign_key_index(
-/*=============================*/
- Alter_inplace_info* ha_alter_info, /*!< in: Structure describing
- changes to be done by ALTER
- TABLE */
- dict_index_t* index, /*!< in: index to check */
- dict_table_t* indexed_table, /*!< in: table that owns the
- foreign keys */
- const char** col_names, /*!< in: column names, or NULL
- for indexed_table->col_names */
- trx_t* trx, /*!< in/out: transaction */
- dict_foreign_t** drop_fk, /*!< in: Foreign key constraints
- to drop */
- ulint n_drop_fk) /*!< in: Number of foreign keys
- to drop */
-{
- ut_ad(index != NULL);
- ut_ad(indexed_table != NULL);
-
- const dict_foreign_set* fks = &indexed_table->referenced_set;
-
- /* Check for all FK references from other tables to the index. */
- for (dict_foreign_set::const_iterator it = fks->begin();
- it != fks->end(); ++it) {
-
- dict_foreign_t* foreign = *it;
- if (foreign->referenced_index != index) {
- continue;
- }
- ut_ad(indexed_table == foreign->referenced_table);
-
- if (NULL == dict_foreign_find_index(
- indexed_table, col_names,
- foreign->referenced_col_names,
- foreign->n_fields, index,
- /*check_charsets=*/TRUE,
- /*check_null=*/FALSE,
- 0,0,0)
- && NULL == innobase_find_equiv_index(
- foreign->referenced_col_names,
- foreign->n_fields,
- ha_alter_info->key_info_buffer,
- ha_alter_info->index_add_buffer,
- ha_alter_info->index_add_count)) {
-
- /* Index cannot be dropped. */
- trx->error_info = index;
- return(true);
- }
- }
-
- fks = &indexed_table->foreign_set;
-
- /* Check for all FK references in current table using the index. */
- for (dict_foreign_set::const_iterator it = fks->begin();
- it != fks->end(); ++it) {
-
- dict_foreign_t* foreign = *it;
- if (foreign->foreign_index != index) {
- continue;
- }
-
- ut_ad(indexed_table == foreign->foreign_table);
-
- if (!innobase_dropping_foreign(
- foreign, drop_fk, n_drop_fk)
- && NULL == dict_foreign_find_index(
- indexed_table, col_names,
- foreign->foreign_col_names,
- foreign->n_fields, index,
- /*check_charsets=*/TRUE,
- /*check_null=*/FALSE,
- 0, 0, 0)
- && NULL == innobase_find_equiv_index(
- foreign->foreign_col_names,
- foreign->n_fields,
- ha_alter_info->key_info_buffer,
- ha_alter_info->index_add_buffer,
- ha_alter_info->index_add_count)) {
-
- /* Index cannot be dropped. */
- trx->error_info = index;
- return(true);
- }
- }
-
- return(false);
-}
-
-/** Allows InnoDB to update internal structures with concurrent
-writes blocked (provided that check_if_supported_inplace_alter()
-did not return HA_ALTER_INPLACE_NO_LOCK).
-This will be invoked before inplace_alter_table().
-
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
-by ALTER TABLE and holding data used during in-place alter.
-
-@retval true Failure
-@retval false Success
-*/
-UNIV_INTERN
-bool
-ha_innobase::prepare_inplace_alter_table(
-/*=====================================*/
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info)
-{
- dict_index_t** drop_index; /*!< Index to be dropped */
- ulint n_drop_index; /*!< Number of indexes to drop */
- dict_foreign_t**drop_fk; /*!< Foreign key constraints to drop */
- ulint n_drop_fk; /*!< Number of foreign keys to drop */
- dict_foreign_t**add_fk = NULL; /*!< Foreign key constraints to drop */
- ulint n_add_fk; /*!< Number of foreign keys to drop */
- dict_table_t* indexed_table; /*!< Table where indexes are created */
- mem_heap_t* heap;
- const char** col_names;
- int error;
- ulint flags;
- ulint flags2;
- ulint max_col_len;
- ulint add_autoinc_col_no = ULINT_UNDEFINED;
- ulonglong autoinc_col_max_value = 0;
- ulint fts_doc_col_no = ULINT_UNDEFINED;
- bool add_fts_doc_id = false;
- bool add_fts_doc_id_idx = false;
- bool add_fts_idx = false;
-
- DBUG_ENTER("prepare_inplace_alter_table");
- DBUG_ASSERT(!ha_alter_info->handler_ctx);
- DBUG_ASSERT(ha_alter_info->create_info);
- DBUG_ASSERT(!srv_read_only_mode);
-
- if (UNIV_UNLIKELY(prebuilt->trx->fake_changes)) {
- DBUG_RETURN(true);
- }
-
- /* Init online ddl status variables */
- onlineddl_rowlog_rows = 0;
- onlineddl_rowlog_pct_used = 0;
- onlineddl_pct_progress = 0;
-
- MONITOR_ATOMIC_INC(MONITOR_PENDING_ALTER_TABLE);
-
-#ifdef UNIV_DEBUG
- for (dict_index_t* index = dict_table_get_first_index(prebuilt->table);
- index;
- index = dict_table_get_next_index(index)) {
- ut_ad(!index->to_be_dropped);
- }
-#endif /* UNIV_DEBUG */
-
- ut_d(mutex_enter(&dict_sys->mutex));
- ut_d(dict_table_check_for_dup_indexes(
- prebuilt->table, CHECK_ABORTED_OK));
- ut_d(mutex_exit(&dict_sys->mutex));
-
- if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
- /* Nothing to do */
- goto func_exit;
- }
-
- indexed_table = prebuilt->table;
-
- if (indexed_table->is_readable()) {
- } else {
- if (indexed_table->corrupted) {
- /* Handled below */
- } else {
- FilSpace space(indexed_table->space, true);
-
- if (space()) {
- String str;
- const char* engine= table_type();
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(indexed_table->name, TRUE, buf, sizeof(buf));
-
- push_warning_printf(user_thd, Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_DECRYPTION_FAILED,
- "Table %s in file %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- buf, space()->chain.start->name);
-
- my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine);
- DBUG_RETURN(true);
- }
- }
- }
-
- if (indexed_table->corrupted
- || dict_table_get_first_index(indexed_table) == NULL
- || dict_index_is_corrupted(
- dict_table_get_first_index(indexed_table))) {
- /* The clustered index is corrupted. */
- my_error(ER_CHECK_NO_SUCH_TABLE, MYF(0));
- DBUG_RETURN(true);
- }
-
- if (ha_alter_info->handler_flags
- & Alter_inplace_info::CHANGE_CREATE_OPTION) {
- /* Check engine specific table options */
- if (const char* invalid_tbopt = check_table_options(
- user_thd, altered_table,
- ha_alter_info->create_info,
- prebuilt->table->space != 0,
- srv_file_format)) {
- my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
- table_type(), invalid_tbopt);
- goto err_exit_no_heap;
- }
-
- if (const char* invalid_opt = create_options_are_invalid(
- user_thd, altered_table,
- ha_alter_info->create_info,
- prebuilt->table->space != 0)) {
- my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
- table_type(), invalid_opt);
- goto err_exit_no_heap;
- }
- }
-
- /* Check if any index name is reserved. */
- if (innobase_index_name_is_reserved(
- user_thd,
- ha_alter_info->key_info_buffer,
- ha_alter_info->key_count)) {
-err_exit_no_heap:
- DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
- if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
- online_retry_drop_indexes(prebuilt->table, user_thd);
- }
- DBUG_RETURN(true);
- }
-
- indexed_table = prebuilt->table;
-
- /* Check that index keys are sensible */
- error = innobase_check_index_keys(ha_alter_info, indexed_table);
-
- if (error) {
- goto err_exit_no_heap;
- }
-
- /* Prohibit renaming a column to something that the table
- already contains. */
- if (ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME) {
- List_iterator_fast<Create_field> cf_it(
- ha_alter_info->alter_info->create_list);
-
- for (Field** fp = table->field; *fp; fp++) {
- if (!((*fp)->flags & FIELD_IS_RENAMED)) {
- continue;
- }
-
- const char* name = 0;
-
- cf_it.rewind();
- while (Create_field* cf = cf_it++) {
- if (cf->field == *fp) {
- name = cf->field_name.str;
- goto check_if_ok_to_rename;
- }
- }
-
- ut_error;
-check_if_ok_to_rename:
- /* Prohibit renaming a column from FTS_DOC_ID
- if full-text indexes exist. */
- if (!my_strcasecmp(system_charset_info,
- (*fp)->field_name.str,
- FTS_DOC_ID_COL_NAME)
- && innobase_fulltext_exist(altered_table)) {
- my_error(ER_INNODB_FT_WRONG_DOCID_COLUMN,
- MYF(0), name);
- goto err_exit_no_heap;
- }
-
- /* Prohibit renaming a column to an internal column. */
- const char* s = prebuilt->table->col_names;
- unsigned j;
- /* Skip user columns.
- MySQL should have checked these already.
- We want to allow renaming of c1 to c2, c2 to c1. */
- for (j = 0; j < table->s->fields; j++) {
- s += strlen(s) + 1;
- }
-
- for (; j < prebuilt->table->n_def; j++) {
- if (!my_strcasecmp(
- system_charset_info, name, s)) {
- my_error(ER_WRONG_COLUMN_NAME, MYF(0),
- s);
- goto err_exit_no_heap;
- }
-
- s += strlen(s) + 1;
- }
- }
- }
-
- if (!innobase_table_flags(altered_table,
- ha_alter_info->create_info,
- user_thd,
- srv_file_per_table
- || indexed_table->space != 0,
- &flags, &flags2)) {
- goto err_exit_no_heap;
- }
-
- max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
-
- /* Check each index's column length to make sure they do not
- exceed limit */
- for (ulint i = 0; i < ha_alter_info->index_add_count; i++) {
- const KEY* key = &ha_alter_info->key_info_buffer[
- ha_alter_info->index_add_buffer[i]];
-
- if (key->flags & HA_FULLTEXT) {
- /* The column length does not matter for
- fulltext search indexes. But, UNIQUE
- fulltext indexes are not supported. */
- DBUG_ASSERT(!(key->flags & HA_NOSAME));
- DBUG_ASSERT(!(key->flags & HA_KEYFLAG_MASK
- & ~(HA_FULLTEXT
- | HA_PACK_KEY
- | HA_BINARY_PACK_KEY)));
- add_fts_idx = true;
- continue;
- }
-
- if (innobase_check_column_length(max_col_len, key)) {
- my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
- max_col_len);
- goto err_exit_no_heap;
- }
- }
-
- /* We won't be allowed to add fts index to a table with
- fts indexes already but without AUX_HEX_NAME set.
- This means the aux tables of the table failed to
- rename to hex format but new created aux tables
- shall be in hex format, which is contradictory. */
- if (!DICT_TF2_FLAG_IS_SET(indexed_table, DICT_TF2_FTS_AUX_HEX_NAME)
- && indexed_table->fts != NULL && add_fts_idx) {
- my_error(ER_INNODB_FT_AUX_NOT_HEX_ID, MYF(0));
- goto err_exit_no_heap;
- }
-
- /* Check existing index definitions for too-long column
- prefixes as well, in case max_col_len shrunk. */
- for (const dict_index_t* index
- = dict_table_get_first_index(indexed_table);
- index;
- index = dict_table_get_next_index(index)) {
- if (index->type & DICT_FTS) {
- DBUG_ASSERT(index->type == DICT_FTS
- || (index->type & DICT_CORRUPT));
-
- /* We need to drop any corrupted fts indexes
- before we add a new fts index. */
- if (add_fts_idx && index->type & DICT_CORRUPT) {
- ib_errf(user_thd, IB_LOG_LEVEL_ERROR,
- ER_INNODB_INDEX_CORRUPT,
- "Fulltext index '%s' is corrupt. "
- "you should drop this index first.",
- index->name);
-
- goto err_exit_no_heap;
- }
-
- continue;
- }
-
- for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
- const dict_field_t* field
- = dict_index_get_nth_field(index, i);
- if (field->prefix_len > max_col_len) {
- my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
- max_col_len);
- goto err_exit_no_heap;
- }
- }
- }
-
- n_drop_index = 0;
- n_drop_fk = 0;
-
- if (ha_alter_info->handler_flags
- & (INNOBASE_ALTER_NOREBUILD | INNOBASE_ALTER_REBUILD)) {
- heap = mem_heap_create(1024);
-
- if (ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME) {
- col_names = innobase_get_col_names(
- ha_alter_info, altered_table, table,
- indexed_table, heap);
- } else {
- col_names = NULL;
- }
- } else {
- heap = NULL;
- col_names = NULL;
- }
-
- if (ha_alter_info->handler_flags
- & Alter_inplace_info::DROP_FOREIGN_KEY) {
- DBUG_ASSERT(ha_alter_info->alter_info->drop_list.elements > 0);
-
- drop_fk = static_cast<dict_foreign_t**>(
- mem_heap_alloc(
- heap,
- ha_alter_info->alter_info->drop_list.elements
- * sizeof(dict_foreign_t*)));
-
- List_iterator<Alter_drop> drop_it(
- ha_alter_info->alter_info->drop_list);
-
- while (Alter_drop* drop = drop_it++) {
- if (drop->type != Alter_drop::FOREIGN_KEY) {
- continue;
- }
-
- for (dict_foreign_set::iterator it
- = prebuilt->table->foreign_set.begin();
- it != prebuilt->table->foreign_set.end();
- ++it) {
-
- dict_foreign_t* foreign = *it;
- const char* fid = strchr(foreign->id, '/');
-
- DBUG_ASSERT(fid);
- /* If no database/ prefix was present in
- the FOREIGN KEY constraint name, compare
- to the full constraint name. */
- fid = fid ? fid + 1 : foreign->id;
-
- if (!my_strcasecmp(system_charset_info,
- fid, drop->name)) {
- drop_fk[n_drop_fk++] = foreign;
- goto found_fk;
- }
- }
-
- my_error(ER_CANT_DROP_FIELD_OR_KEY, MYF(0),
- drop->type_name(), drop->name);
- goto err_exit;
-found_fk:
- continue;
- }
-
- DBUG_ASSERT(n_drop_fk > 0);
- DBUG_ASSERT(n_drop_fk
- == ha_alter_info->alter_info->drop_list.elements);
- } else {
- drop_fk = NULL;
- }
-
- if (ha_alter_info->index_drop_count) {
- dict_index_t* drop_primary = NULL;
-
- DBUG_ASSERT(ha_alter_info->handler_flags
- & (Alter_inplace_info::DROP_INDEX
- | Alter_inplace_info::DROP_UNIQUE_INDEX
- | Alter_inplace_info::DROP_PK_INDEX));
- /* Check which indexes to drop. */
- drop_index = static_cast<dict_index_t**>(
- mem_heap_alloc(
- heap, (ha_alter_info->index_drop_count + 1)
- * sizeof *drop_index));
-
- for (uint i = 0; i < ha_alter_info->index_drop_count; i++) {
- const KEY* key
- = ha_alter_info->index_drop_buffer[i];
- dict_index_t* index
- = dict_table_get_index_on_name_and_min_id(
- indexed_table, key->name);
-
- if (!index) {
- push_warning_printf(
- user_thd,
- Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_WRONG_INDEX,
- "InnoDB could not find key "
- "with name %s", key->name);
- } else {
- ut_ad(!index->to_be_dropped);
- if (!dict_index_is_clust(index)) {
- drop_index[n_drop_index++] = index;
- } else {
- drop_primary = index;
- }
- }
- }
-
- /* If all FULLTEXT indexes were removed, drop an
- internal FTS_DOC_ID_INDEX as well, unless it exists in
- the table. */
-
- if (innobase_fulltext_exist(table)
- && !innobase_fulltext_exist(altered_table)
- && !DICT_TF2_FLAG_IS_SET(
- indexed_table, DICT_TF2_FTS_HAS_DOC_ID)) {
- dict_index_t* fts_doc_index
- = dict_table_get_index_on_name(
- indexed_table, FTS_DOC_ID_INDEX_NAME);
-
- // Add some fault tolerance for non-debug builds.
- if (fts_doc_index == NULL) {
- goto check_if_can_drop_indexes;
- }
-
- DBUG_ASSERT(!fts_doc_index->to_be_dropped);
-
- for (uint i = 0; i < table->s->keys; i++) {
- if (!my_strcasecmp(
- system_charset_info,
- FTS_DOC_ID_INDEX_NAME,
- table->key_info[i].name)) {
- /* The index exists in the MySQL
- data dictionary. Do not drop it,
- even though it is no longer needed
- by InnoDB fulltext search. */
- goto check_if_can_drop_indexes;
- }
- }
-
- drop_index[n_drop_index++] = fts_doc_index;
- }
-
-check_if_can_drop_indexes:
- /* Check if the indexes can be dropped. */
-
- /* Prevent a race condition between DROP INDEX and
- CREATE TABLE adding FOREIGN KEY constraints. */
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- if (!n_drop_index) {
- drop_index = NULL;
- } else {
- /* Flag all indexes that are to be dropped. */
- for (ulint i = 0; i < n_drop_index; i++) {
- ut_ad(!drop_index[i]->to_be_dropped);
- drop_index[i]->to_be_dropped = 1;
- }
- }
-
- if (prebuilt->trx->check_foreigns) {
- for (uint i = 0; i < n_drop_index; i++) {
- dict_index_t* index = drop_index[i];
-
- if (innobase_check_foreign_key_index(
- ha_alter_info, index,
- indexed_table, col_names,
- prebuilt->trx, drop_fk, n_drop_fk)) {
- row_mysql_unlock_data_dictionary(
- prebuilt->trx);
- prebuilt->trx->error_info = index;
- print_error(HA_ERR_DROP_INDEX_FK,
- MYF(0));
- goto err_exit;
- }
- }
-
- /* If a primary index is dropped, need to check
- any depending foreign constraints get affected */
- if (drop_primary
- && innobase_check_foreign_key_index(
- ha_alter_info, drop_primary,
- indexed_table, col_names,
- prebuilt->trx, drop_fk, n_drop_fk)) {
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- print_error(HA_ERR_DROP_INDEX_FK, MYF(0));
- goto err_exit;
- }
- }
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- } else {
- drop_index = NULL;
- }
-
- /* Check if any of the existing indexes are marked as corruption
- and if they are, refuse adding more indexes. */
- if (ha_alter_info->handler_flags & Alter_inplace_info::ADD_INDEX) {
- for (dict_index_t* index = dict_table_get_first_index(indexed_table);
- index != NULL; index = dict_table_get_next_index(index)) {
-
- if (!index->to_be_dropped && dict_index_is_corrupted(index)) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(index_name, sizeof index_name,
- index->name, TRUE);
-
- my_error(ER_INDEX_CORRUPT, MYF(0), index_name);
- goto err_exit;
- }
- }
- }
-
- n_add_fk = 0;
-
- if (ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_FOREIGN_KEY) {
- ut_ad(!prebuilt->trx->check_foreigns);
-
- add_fk = static_cast<dict_foreign_t**>(
- mem_heap_zalloc(
- heap,
- ha_alter_info->alter_info->key_list.elements
- * sizeof(dict_foreign_t*)));
-
- if (!innobase_get_foreign_key_info(
- ha_alter_info, table_share,
- prebuilt->table, col_names,
- drop_index, n_drop_index,
- add_fk, &n_add_fk, prebuilt->trx)) {
-err_exit:
- if (n_drop_index) {
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- /* Clear the to_be_dropped flags, which might
- have been set at this point. */
- for (ulint i = 0; i < n_drop_index; i++) {
- DBUG_ASSERT(*drop_index[i]->name
- != TEMP_INDEX_PREFIX);
- drop_index[i]->to_be_dropped = 0;
- }
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- }
-
- if (heap) {
- mem_heap_free(heap);
- }
-
- goto err_exit_no_heap;
- }
- }
-
- if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)
- || ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
- == Alter_inplace_info::CHANGE_CREATE_OPTION
- && !innobase_need_rebuild(ha_alter_info, table))) {
-
- if (heap) {
- ha_alter_info->handler_ctx
- = new ha_innobase_inplace_ctx(
- prebuilt,
- drop_index, n_drop_index,
- drop_fk, n_drop_fk,
- add_fk, n_add_fk,
- ha_alter_info->online,
- heap, indexed_table,
- col_names, ULINT_UNDEFINED, 0, 0);
- }
-
-func_exit:
- DBUG_ASSERT(prebuilt->trx->dict_operation_lock_mode == 0);
- if (ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE) {
- online_retry_drop_indexes(prebuilt->table, user_thd);
- }
- DBUG_RETURN(false);
- }
-
- /* If we are to build a full-text search index, check whether
- the table already has a DOC ID column. If not, we will need to
- add a Doc ID hidden column and rebuild the primary index */
- if (innobase_fulltext_exist(altered_table)) {
- ulint doc_col_no;
-
- if (!innobase_fts_check_doc_id_col(
- prebuilt->table, altered_table, &fts_doc_col_no)) {
- fts_doc_col_no = altered_table->s->stored_fields;
- add_fts_doc_id = true;
- add_fts_doc_id_idx = true;
-
- push_warning_printf(
- user_thd,
- Sql_condition::WARN_LEVEL_WARN,
- HA_ERR_WRONG_INDEX,
- "InnoDB rebuilding table to add column "
- FTS_DOC_ID_COL_NAME);
- } else if (fts_doc_col_no == ULINT_UNDEFINED) {
- goto err_exit;
- }
-
- switch (innobase_fts_check_doc_id_index(
- prebuilt->table, altered_table, &doc_col_no)) {
- case FTS_NOT_EXIST_DOC_ID_INDEX:
- add_fts_doc_id_idx = true;
- break;
- case FTS_INCORRECT_DOC_ID_INDEX:
- my_error(ER_INNODB_FT_WRONG_DOCID_INDEX, MYF(0),
- FTS_DOC_ID_INDEX_NAME);
- goto err_exit;
- case FTS_EXIST_DOC_ID_INDEX:
- DBUG_ASSERT(doc_col_no == fts_doc_col_no
- || doc_col_no == ULINT_UNDEFINED
- || (ha_alter_info->handler_flags
- & (Alter_inplace_info::ALTER_COLUMN_ORDER
- | Alter_inplace_info::DROP_COLUMN
- | Alter_inplace_info::ADD_COLUMN)));
- }
- }
-
- /* See if an AUTO_INCREMENT column was added. */
- uint i = 0, innodb_idx= 0;
- List_iterator_fast<Create_field> cf_it(
- ha_alter_info->alter_info->create_list);
- while (const Create_field* new_field = cf_it++) {
- const Field* field;
- if (!new_field->stored_in_db()) {
- i++;
- continue;
- }
-
- DBUG_ASSERT(i < altered_table->s->fields);
- DBUG_ASSERT(innodb_idx < altered_table->s->stored_fields);
-
- for (uint old_i = 0; table->field[old_i]; old_i++) {
- if (!table->field[old_i]->stored_in_db())
- continue;
- if (new_field->field == table->field[old_i]) {
- goto found_col;
- }
- }
-
- /* This is an added column. */
- DBUG_ASSERT(!new_field->field);
- DBUG_ASSERT(ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_COLUMN);
-
- field = altered_table->field[i];
-
- DBUG_ASSERT((MTYP_TYPENR(field->unireg_check)
- == Field::NEXT_NUMBER)
- == !!(field->flags & AUTO_INCREMENT_FLAG));
-
- if (field->flags & AUTO_INCREMENT_FLAG) {
- if (add_autoinc_col_no != ULINT_UNDEFINED) {
- /* This should have been blocked earlier. */
- ut_ad(0);
- my_error(ER_WRONG_AUTO_KEY, MYF(0));
- goto err_exit;
- }
- add_autoinc_col_no = innodb_idx;
-
- autoinc_col_max_value = innobase_get_int_col_max_value(
- field);
- }
-found_col:
- i++;
- innodb_idx++;
- }
-
- DBUG_ASSERT(heap);
- DBUG_ASSERT(user_thd == prebuilt->trx->mysql_thd);
- DBUG_ASSERT(!ha_alter_info->handler_ctx);
-
- ha_alter_info->handler_ctx = new ha_innobase_inplace_ctx(
- prebuilt,
- drop_index, n_drop_index,
- drop_fk, n_drop_fk, add_fk, n_add_fk,
- ha_alter_info->online,
- heap, prebuilt->table, col_names,
- add_autoinc_col_no,
- ha_alter_info->create_info->auto_increment_value,
- autoinc_col_max_value);
-
- DBUG_RETURN(prepare_inplace_alter_table_dict(
- ha_alter_info, altered_table, table,
- table_share->table_name.str,
- flags, flags2,
- fts_doc_col_no, add_fts_doc_id,
- add_fts_doc_id_idx, prebuilt));
-}
-
-/** Get the name of an erroneous key.
-@param[in] error_key_num InnoDB number of the erroneus key
-@param[in] ha_alter_info changes that were being performed
-@param[in] table InnoDB table
-@return the name of the erroneous key */
-static
-const char*
-get_error_key_name(
- ulint error_key_num,
- const Alter_inplace_info* ha_alter_info,
- const dict_table_t* table)
-{
- if (error_key_num == ULINT_UNDEFINED) {
- return(FTS_DOC_ID_INDEX_NAME);
- } else if (ha_alter_info->key_count == 0) {
- return(dict_table_get_first_index(table)->name);
- } else {
- return(ha_alter_info->key_info_buffer[error_key_num].name);
- }
-}
-
-/** Alter the table structure in-place with operations
-specified using Alter_inplace_info.
-The level of concurrency allowed during this operation depends
-on the return value from check_if_supported_inplace_alter().
-
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
-by ALTER TABLE and holding data used during in-place alter.
-
-@retval true Failure
-@retval false Success
-*/
-UNIV_INTERN
-bool
-ha_innobase::inplace_alter_table(
-/*=============================*/
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info)
-{
- dberr_t error;
-
- DBUG_ENTER("inplace_alter_table");
- DBUG_ASSERT(!srv_read_only_mode);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
- ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- DEBUG_SYNC(user_thd, "innodb_inplace_alter_table_enter");
-
- if (!(ha_alter_info->handler_flags & INNOBASE_ALTER_DATA)) {
-ok_exit:
- DEBUG_SYNC(user_thd, "innodb_after_inplace_alter_table");
- DBUG_RETURN(false);
- }
-
- if ((ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)
- == Alter_inplace_info::CHANGE_CREATE_OPTION
- && !innobase_need_rebuild(ha_alter_info, table)) {
- goto ok_exit;
- }
-
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>
- (ha_alter_info->handler_ctx);
-
- DBUG_ASSERT(ctx);
- DBUG_ASSERT(ctx->trx);
- DBUG_ASSERT(ctx->prebuilt == prebuilt);
-
- if (prebuilt->table->file_unreadable
- || dict_table_is_discarded(prebuilt->table)) {
- goto all_done;
- }
-
- /* Read the clustered index of the table and build
- indexes based on this information using temporary
- files and merge sort. */
- DBUG_EXECUTE_IF("innodb_OOM_inplace_alter",
- error = DB_OUT_OF_MEMORY; goto oom;);
-
- error = row_merge_build_indexes(
- prebuilt->trx,
- prebuilt->table, ctx->new_table,
- ctx->online,
- ctx->add_index, ctx->add_key_numbers, ctx->num_to_add_index,
- altered_table, ctx->add_cols, ctx->col_map,
- ctx->add_autoinc, ctx->sequence);
-#ifndef DBUG_OFF
-oom:
-#endif /* !DBUG_OFF */
- if (error == DB_SUCCESS && ctx->online && ctx->need_rebuild()) {
- DEBUG_SYNC_C("row_log_table_apply1_before");
- error = row_log_table_apply(
- ctx->thr, prebuilt->table, altered_table);
- }
-
- /* Init online ddl status variables */
- onlineddl_rowlog_rows = 0;
- onlineddl_rowlog_pct_used = 0;
- onlineddl_pct_progress = 0;
-
- DEBUG_SYNC_C("inplace_after_index_build");
-
- DBUG_EXECUTE_IF("create_index_fail",
- error = DB_DUPLICATE_KEY;
- prebuilt->trx->error_key_num = ULINT_UNDEFINED;);
-
- /* After an error, remove all those index definitions
- from the dictionary which were defined. */
-
- switch (error) {
- KEY* dup_key;
- all_done:
- case DB_SUCCESS:
- ut_d(mutex_enter(&dict_sys->mutex));
- ut_d(dict_table_check_for_dup_indexes(
- prebuilt->table, CHECK_PARTIAL_OK));
- ut_d(mutex_exit(&dict_sys->mutex));
- /* prebuilt->table->n_ref_count can be anything here,
- given that we hold at most a shared lock on the table. */
- goto ok_exit;
- case DB_DUPLICATE_KEY:
- if (prebuilt->trx->error_key_num == ULINT_UNDEFINED
- || ha_alter_info->key_count == 0) {
- /* This should be the hidden index on
- FTS_DOC_ID, or there is no PRIMARY KEY in the
- table. Either way, we should be seeing and
- reporting a bogus duplicate key error. */
- dup_key = NULL;
- } else {
- DBUG_ASSERT(prebuilt->trx->error_key_num
- < ha_alter_info->key_count);
- dup_key = &ha_alter_info->key_info_buffer[
- prebuilt->trx->error_key_num];
- }
- print_keydup_error(altered_table, dup_key, MYF(0));
- break;
- case DB_ONLINE_LOG_TOO_BIG:
- DBUG_ASSERT(ctx->online);
- my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
- get_error_key_name(prebuilt->trx->error_key_num,
- ha_alter_info, prebuilt->table));
- break;
- case DB_INDEX_CORRUPT:
- my_error(ER_INDEX_CORRUPT, MYF(0),
- get_error_key_name(prebuilt->trx->error_key_num,
- ha_alter_info, prebuilt->table));
- break;
- case DB_DECRYPTION_FAILED: {
- String str;
- const char* engine= table_type();
- get_error_message(HA_ERR_DECRYPTION_FAILED, &str);
- my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine);
- break;
- }
- default:
- my_error_innodb(error,
- table_share->table_name.str,
- prebuilt->table->flags);
- }
-
- /* prebuilt->table->n_ref_count can be anything here, given
- that we hold at most a shared lock on the table. */
- prebuilt->trx->error_info = NULL;
- ctx->trx->error_state = DB_SUCCESS;
-
- DBUG_RETURN(true);
-}
-
-/** Free the modification log for online table rebuild.
-@param table table that was being rebuilt online */
-static
-void
-innobase_online_rebuild_log_free(
-/*=============================*/
- dict_table_t* table)
-{
- dict_index_t* clust_index = dict_table_get_first_index(table);
-
- ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- rw_lock_x_lock(&clust_index->lock);
-
- if (clust_index->online_log) {
- ut_ad(dict_index_get_online_status(clust_index)
- == ONLINE_INDEX_CREATION);
- clust_index->online_status = ONLINE_INDEX_COMPLETE;
- row_log_free(clust_index->online_log);
- DEBUG_SYNC_C("innodb_online_rebuild_log_free_aborted");
- }
-
- DBUG_ASSERT(dict_index_get_online_status(clust_index)
- == ONLINE_INDEX_COMPLETE);
- rw_lock_x_unlock(&clust_index->lock);
-}
-
-/** Rollback a secondary index creation, drop the indexes with
-temparary index prefix
-@param user_table InnoDB table
-@param table the TABLE
-@param locked TRUE=table locked, FALSE=may need to do a lazy drop
-@param trx the transaction
-*/
-static MY_ATTRIBUTE((nonnull))
-void
-innobase_rollback_sec_index(
-/*========================*/
- dict_table_t* user_table,
- const TABLE* table,
- ibool locked,
- trx_t* trx)
-{
- row_merge_drop_indexes(trx, user_table, locked);
-
- /* Free the table->fts only if there is no FTS_DOC_ID
- in the table */
- if (user_table->fts
- && !DICT_TF2_FLAG_IS_SET(user_table,
- DICT_TF2_FTS_HAS_DOC_ID)
- && !innobase_fulltext_exist(table)) {
- fts_free(user_table);
- }
-}
-
-/** Roll back the changes made during prepare_inplace_alter_table()
-and inplace_alter_table() inside the storage engine. Note that the
-allowed level of concurrency during this operation will be the same as
-for inplace_alter_table() and thus might be higher than during
-prepare_inplace_alter_table(). (E.g concurrent writes were blocked
-during prepare, but might not be during commit).
-
-@param ha_alter_info Data used during in-place alter.
-@param table the TABLE
-@param prebuilt the prebuilt struct
-@retval true Failure
-@retval false Success
-*/
-inline MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-rollback_inplace_alter_table(
-/*=========================*/
- Alter_inplace_info* ha_alter_info,
- const TABLE* table,
- row_prebuilt_t* prebuilt)
-{
- bool fail = false;
-
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>
- (ha_alter_info->handler_ctx);
-
- DBUG_ENTER("rollback_inplace_alter_table");
-
- if (!ctx || !ctx->trx) {
- /* If we have not started a transaction yet,
- (almost) nothing has been or needs to be done. */
- goto func_exit;
- }
-
- row_mysql_lock_data_dictionary(ctx->trx);
-
- if (ctx->need_rebuild()) {
- dberr_t err;
- ulint flags = ctx->new_table->flags;
-
- /* DML threads can access ctx->new_table via the
- online rebuild log. Free it first. */
- innobase_online_rebuild_log_free(prebuilt->table);
-
- /* Since the FTS index specific auxiliary tables has
- not yet registered with "table->fts" by fts_add_index(),
- we will need explicitly delete them here */
- if (DICT_TF2_FLAG_IS_SET(ctx->new_table, DICT_TF2_FTS)) {
-
- err = innobase_drop_fts_index_table(
- ctx->new_table, ctx->trx);
-
- if (err != DB_SUCCESS) {
- my_error_innodb(
- err, table->s->table_name.str,
- flags);
- fail = true;
- }
- }
-
- /* Drop the table. */
- dict_table_close(ctx->new_table, TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
- /* Nobody should have initialized the stats of the
- newly created table yet. When this is the case, we
- know that it has not been added for background stats
- gathering. */
- ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
- err = row_merge_drop_table(ctx->trx, ctx->new_table);
-
- switch (err) {
- case DB_SUCCESS:
- break;
- default:
- my_error_innodb(err, table->s->table_name.str,
- flags);
- fail = true;
- }
- } else {
- DBUG_ASSERT(!(ha_alter_info->handler_flags
- & Alter_inplace_info::ADD_PK_INDEX));
- DBUG_ASSERT(ctx->new_table == prebuilt->table);
-
- trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX);
-
- innobase_rollback_sec_index(
- prebuilt->table, table, FALSE, ctx->trx);
- }
-
- trx_commit_for_mysql(ctx->trx);
- row_mysql_unlock_data_dictionary(ctx->trx);
- trx_free_for_mysql(ctx->trx);
-
-func_exit:
-#ifndef DBUG_OFF
- dict_index_t* clust_index = dict_table_get_first_index(
- prebuilt->table);
- DBUG_ASSERT(!clust_index->online_log);
- DBUG_ASSERT(dict_index_get_online_status(clust_index)
- == ONLINE_INDEX_COMPLETE);
-#endif /* !DBUG_OFF */
-
- if (ctx) {
- DBUG_ASSERT(ctx->prebuilt == prebuilt);
-
- if (ctx->num_to_add_fk) {
- for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
- dict_foreign_free(ctx->add_fk[i]);
- }
- }
-
- if (ctx->num_to_drop_index) {
- row_mysql_lock_data_dictionary(prebuilt->trx);
-
- /* Clear the to_be_dropped flags
- in the data dictionary cache.
- The flags may already have been cleared,
- in case an error was detected in
- commit_inplace_alter_table(). */
- for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
- dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
-
- index->to_be_dropped = 0;
- }
-
- row_mysql_unlock_data_dictionary(prebuilt->trx);
- }
- }
-
- trx_commit_for_mysql(prebuilt->trx);
- MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
- DBUG_RETURN(fail);
-}
-
-/** Drop a FOREIGN KEY constraint from the data dictionary tables.
-@param trx data dictionary transaction
-@param table_name Table name in MySQL
-@param foreign_id Foreign key constraint identifier
-@retval true Failure
-@retval false Success */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_drop_foreign_try(
-/*======================*/
- trx_t* trx,
- const char* table_name,
- const char* foreign_id)
-{
- DBUG_ENTER("innobase_drop_foreign_try");
-
- DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Drop the constraint from the data dictionary. */
- static const char sql[] =
- "PROCEDURE DROP_FOREIGN_PROC () IS\n"
- "BEGIN\n"
- "DELETE FROM SYS_FOREIGN WHERE ID=:id;\n"
- "DELETE FROM SYS_FOREIGN_COLS WHERE ID=:id;\n"
- "END;\n";
-
- dberr_t error;
- pars_info_t* info;
-
- info = pars_info_create();
- pars_info_add_str_literal(info, "id", foreign_id);
-
- trx->op_info = "dropping foreign key constraint from dictionary";
- error = que_eval_sql(info, sql, FALSE, trx);
- trx->op_info = "";
-
- DBUG_EXECUTE_IF("ib_drop_foreign_error",
- error = DB_OUT_OF_FILE_SPACE;);
-
- if (error != DB_SUCCESS) {
- my_error_innodb(error, table_name, 0);
- trx->error_state = DB_SUCCESS;
- DBUG_RETURN(true);
- }
-
- DBUG_RETURN(false);
-}
-
-/** Rename a column in the data dictionary tables.
-@param user_table InnoDB table that was being altered
-@param trx data dictionary transaction
-@param table_name Table name in MySQL
-@param nth_col 0-based index of the column
-@param from old column name
-@param to new column name
-@param new_clustered whether the table has been rebuilt
-@retval true Failure
-@retval false Success */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_rename_column_try(
-/*=======================*/
- const dict_table_t* user_table,
- trx_t* trx,
- const char* table_name,
- ulint nth_col,
- const char* from,
- const char* to,
- bool new_clustered)
-{
- pars_info_t* info;
- dberr_t error;
-
- DBUG_ENTER("innobase_rename_column_try");
-
- DBUG_ASSERT(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(mutex_own(&dict_sys->mutex));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (new_clustered) {
- goto rename_foreign;
- }
-
- info = pars_info_create();
-
- pars_info_add_ull_literal(info, "tableid", user_table->id);
- pars_info_add_int4_literal(info, "nth", nth_col);
- pars_info_add_str_literal(info, "old", from);
- pars_info_add_str_literal(info, "new", to);
-
- trx->op_info = "renaming column in SYS_COLUMNS";
-
- error = que_eval_sql(
- info,
- "PROCEDURE RENAME_SYS_COLUMNS_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_COLUMNS SET NAME=:new\n"
- "WHERE TABLE_ID=:tableid AND NAME=:old\n"
- "AND POS=:nth;\n"
- "END;\n",
- FALSE, trx);
-
- DBUG_EXECUTE_IF("ib_rename_column_error",
- error = DB_OUT_OF_FILE_SPACE;);
-
- if (error != DB_SUCCESS) {
-err_exit:
- my_error_innodb(error, table_name, 0);
- trx->error_state = DB_SUCCESS;
- trx->op_info = "";
- DBUG_RETURN(true);
- }
-
- trx->op_info = "renaming column in SYS_FIELDS";
-
- for (const dict_index_t* index = dict_table_get_first_index(
- user_table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- for (ulint i = 0; i < dict_index_get_n_fields(index); i++) {
- if (strcmp(dict_index_get_nth_field(index, i)->name,
- from)) {
- continue;
- }
-
- info = pars_info_create();
-
- pars_info_add_ull_literal(info, "indexid", index->id);
- pars_info_add_int4_literal(info, "nth", i);
- pars_info_add_str_literal(info, "old", from);
- pars_info_add_str_literal(info, "new", to);
-
- error = que_eval_sql(
- info,
- "PROCEDURE RENAME_SYS_FIELDS_PROC () IS\n"
- "BEGIN\n"
-
- "UPDATE SYS_FIELDS SET COL_NAME=:new\n"
- "WHERE INDEX_ID=:indexid AND COL_NAME=:old\n"
- "AND POS=:nth;\n"
-
- /* Try again, in case there is a prefix_len
- encoded in SYS_FIELDS.POS */
-
- "UPDATE SYS_FIELDS SET COL_NAME=:new\n"
- "WHERE INDEX_ID=:indexid AND COL_NAME=:old\n"
- "AND POS>=65536*:nth AND POS<65536*(:nth+1);\n"
-
- "END;\n",
- FALSE, trx);
-
- if (error != DB_SUCCESS) {
- goto err_exit;
- }
- }
- }
-
-rename_foreign:
- trx->op_info = "renaming column in SYS_FOREIGN_COLS";
-
- std::list<dict_foreign_t*> fk_evict;
- bool foreign_modified;
-
- for (dict_foreign_set::const_iterator it = user_table->foreign_set.begin();
- it != user_table->foreign_set.end();
- ++it) {
-
- dict_foreign_t* foreign = *it;
- foreign_modified = false;
-
- for (unsigned i = 0; i < foreign->n_fields; i++) {
- if (strcmp(foreign->foreign_col_names[i], from)) {
- continue;
- }
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", foreign->id);
- pars_info_add_int4_literal(info, "nth", i);
- pars_info_add_str_literal(info, "old", from);
- pars_info_add_str_literal(info, "new", to);
-
- error = que_eval_sql(
- info,
- "PROCEDURE RENAME_SYS_FOREIGN_F_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_FOREIGN_COLS\n"
- "SET FOR_COL_NAME=:new\n"
- "WHERE ID=:id AND POS=:nth\n"
- "AND FOR_COL_NAME=:old;\n"
- "END;\n",
- FALSE, trx);
-
- if (error != DB_SUCCESS) {
- goto err_exit;
- }
- foreign_modified = true;
- }
-
- if (foreign_modified) {
- fk_evict.push_back(foreign);
- }
- }
-
- for (dict_foreign_set::const_iterator it
- = user_table->referenced_set.begin();
- it != user_table->referenced_set.end();
- ++it) {
-
- foreign_modified = false;
- dict_foreign_t* foreign = *it;
-
- for (unsigned i = 0; i < foreign->n_fields; i++) {
- if (strcmp(foreign->referenced_col_names[i], from)) {
- continue;
- }
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", foreign->id);
- pars_info_add_int4_literal(info, "nth", i);
- pars_info_add_str_literal(info, "old", from);
- pars_info_add_str_literal(info, "new", to);
-
- error = que_eval_sql(
- info,
- "PROCEDURE RENAME_SYS_FOREIGN_R_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_FOREIGN_COLS\n"
- "SET REF_COL_NAME=:new\n"
- "WHERE ID=:id AND POS=:nth\n"
- "AND REF_COL_NAME=:old;\n"
- "END;\n",
- FALSE, trx);
-
- if (error != DB_SUCCESS) {
- goto err_exit;
- }
- foreign_modified = true;
- }
-
- if (foreign_modified) {
- fk_evict.push_back(foreign);
- }
- }
-
- if (new_clustered) {
- std::for_each(fk_evict.begin(), fk_evict.end(),
- dict_foreign_remove_from_cache);
- }
-
- trx->op_info = "";
- DBUG_RETURN(false);
-}
-
-/** Rename columns in the data dictionary tables.
-@param ha_alter_info Data used during in-place alter.
-@param ctx In-place ALTER TABLE context
-@param table the TABLE
-@param trx data dictionary transaction
-@param table_name Table name in MySQL
-@retval true Failure
-@retval false Success */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_rename_columns_try(
-/*========================*/
- Alter_inplace_info* ha_alter_info,
- ha_innobase_inplace_ctx*ctx,
- const TABLE* table,
- trx_t* trx,
- const char* table_name)
-{
- List_iterator_fast<Create_field> cf_it(
- ha_alter_info->alter_info->create_list);
- uint i = 0;
-
- DBUG_ASSERT(ctx);
- DBUG_ASSERT(ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME);
-
- for (Field** fp = table->field; *fp; fp++, i++) {
- if (!((*fp)->flags & FIELD_IS_RENAMED) || !((*fp)->stored_in_db())) {
- continue;
- }
-
- cf_it.rewind();
- while (Create_field* cf = cf_it++) {
- if (cf->field == *fp) {
- if (innobase_rename_column_try(
- ctx->old_table, trx, table_name, i,
- cf->field->field_name.str,
- cf->field_name.str,
- ctx->need_rebuild())) {
- return(true);
- }
- goto processed_field;
- }
- }
-
- ut_error;
-processed_field:
- continue;
- }
-
- return(false);
-}
-
-/** Rename columns in the data dictionary cache
-as part of commit_cache_norebuild().
-@param ha_alter_info Data used during in-place alter.
-@param table the TABLE
-@param user_table InnoDB table that was being altered */
-static MY_ATTRIBUTE((nonnull))
-void
-innobase_rename_columns_cache(
-/*==========================*/
- Alter_inplace_info* ha_alter_info,
- const TABLE* table,
- dict_table_t* user_table)
-{
- if (!(ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME)) {
- return;
- }
-
- List_iterator_fast<Create_field> cf_it(
- ha_alter_info->alter_info->create_list);
- uint i = 0;
-
- for (Field** fp = table->field; *fp; fp++, i++) {
- if (!((*fp)->flags & FIELD_IS_RENAMED)) {
- continue;
- }
-
- cf_it.rewind();
- while (Create_field* cf = cf_it++) {
- if (cf->field == *fp) {
- dict_mem_table_col_rename(user_table, i,
- cf->field->field_name.str,
- cf->field_name.str);
- goto processed_field;
- }
- }
-
- ut_error;
-processed_field:
- continue;
- }
-}
-
-/** Get the auto-increment value of the table on commit.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param altered_table MySQL table that is being altered
-@param old_table MySQL table as it is before the ALTER operation
-@return the next auto-increment value (0 if not present) */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ulonglong
-commit_get_autoinc(
-/*===============*/
- Alter_inplace_info* ha_alter_info,
- ha_innobase_inplace_ctx*ctx,
- const TABLE* altered_table,
- const TABLE* old_table)
-{
- ulonglong max_autoinc;
-
- DBUG_ENTER("commit_get_autoinc");
-
- if (!altered_table->found_next_number_field) {
- /* There is no AUTO_INCREMENT column in the table
- after the ALTER operation. */
- max_autoinc = 0;
- } else if (ctx->add_autoinc != ULINT_UNDEFINED) {
- /* An AUTO_INCREMENT column was added. Get the last
- value from the sequence, which may be based on a
- supplied AUTO_INCREMENT value. */
- max_autoinc = ctx->sequence.last();
- } else if ((ha_alter_info->handler_flags
- & Alter_inplace_info::CHANGE_CREATE_OPTION)
- && (ha_alter_info->create_info->used_fields
- & HA_CREATE_USED_AUTO)) {
- /* An AUTO_INCREMENT value was supplied, but the table was not
- rebuilt. Get the user-supplied value or the last value from the
- sequence. */
- ib_uint64_t max_value_table;
- dberr_t err;
-
- Field* autoinc_field =
- old_table->found_next_number_field;
- KEY* autoinc_key =
- old_table->key_info + old_table->s->next_number_index;
-
- dict_index_t* index = dict_table_get_index_on_name(
- ctx->old_table, autoinc_key->name);
-
- max_autoinc = ha_alter_info->create_info->auto_increment_value;
-
- dict_table_autoinc_lock(ctx->old_table);
-
- err = row_search_max_autoinc(
- index, autoinc_field->field_name.str, &max_value_table);
-
- if (err != DB_SUCCESS) {
- ut_ad(0);
- max_autoinc = 0;
- } else if (max_autoinc <= max_value_table) {
- ulonglong col_max_value;
- ulonglong offset;
-
- col_max_value = innobase_get_int_col_max_value(
- old_table->found_next_number_field);
-
- offset = ctx->prebuilt->autoinc_offset;
- max_autoinc = innobase_next_autoinc(
- max_value_table, 1, 1, offset,
- col_max_value);
- }
- dict_table_autoinc_unlock(ctx->old_table);
- } else {
- /* An AUTO_INCREMENT value was not specified.
- Read the old counter value from the table. */
- ut_ad(old_table->found_next_number_field);
- dict_table_autoinc_lock(ctx->old_table);
- max_autoinc = ctx->old_table->autoinc;
- dict_table_autoinc_unlock(ctx->old_table);
- }
-
- DBUG_RETURN(max_autoinc);
-}
-
-/** Add or drop foreign key constraints to the data dictionary tables,
-but do not touch the data dictionary cache.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param trx Data dictionary transaction
-@param table_name Table name in MySQL
-@retval true Failure
-@retval false Success
-*/
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-innobase_update_foreign_try(
-/*========================*/
- ha_innobase_inplace_ctx*ctx,
- trx_t* trx,
- const char* table_name)
-{
- ulint foreign_id;
- ulint i;
-
- DBUG_ENTER("innobase_update_foreign_try");
- DBUG_ASSERT(ctx);
-
- foreign_id = dict_table_get_highest_foreign_id(ctx->new_table);
-
- foreign_id++;
-
- for (i = 0; i < ctx->num_to_add_fk; i++) {
- dict_foreign_t* fk = ctx->add_fk[i];
-
- ut_ad(fk->foreign_table == ctx->new_table
- || fk->foreign_table == ctx->old_table);
-
- dberr_t error = dict_create_add_foreign_id(
- &foreign_id, ctx->old_table->name, fk);
-
- if (error != DB_SUCCESS) {
- my_error(ER_TOO_LONG_IDENT, MYF(0),
- fk->id);
- DBUG_RETURN(true);
- }
-
- if (!fk->foreign_index) {
- fk->foreign_index = dict_foreign_find_index(
- ctx->new_table, ctx->col_names,
- fk->foreign_col_names,
- fk->n_fields, fk->referenced_index, TRUE,
- fk->type
- & (DICT_FOREIGN_ON_DELETE_SET_NULL
- | DICT_FOREIGN_ON_UPDATE_SET_NULL),
- NULL, NULL, NULL);
- if (!fk->foreign_index) {
- my_error(ER_FK_INCORRECT_OPTION,
- MYF(0), table_name, fk->id);
- DBUG_RETURN(true);
- }
- }
-
- /* The fk->foreign_col_names[] uses renamed column
- names, while the columns in ctx->old_table have not
- been renamed yet. */
- error = dict_create_add_foreign_to_dictionary(
- (dict_table_t*)ctx->old_table, ctx->old_table->name, fk, trx);
-
- DBUG_EXECUTE_IF(
- "innodb_test_cannot_add_fk_system",
- error = DB_ERROR;);
-
- if (error != DB_SUCCESS) {
- my_error(ER_FK_FAIL_ADD_SYSTEM, MYF(0),
- fk->id);
- DBUG_RETURN(true);
- }
- }
-
- for (i = 0; i < ctx->num_to_drop_fk; i++) {
- dict_foreign_t* fk = ctx->drop_fk[i];
-
- DBUG_ASSERT(fk->foreign_table == ctx->old_table);
-
- if (innobase_drop_foreign_try(trx, table_name, fk->id)) {
- DBUG_RETURN(true);
- }
- }
-
- DBUG_RETURN(false);
-}
-
-/** Update the foreign key constraint definitions in the data dictionary cache
-after the changes to data dictionary tables were committed.
-@param ctx In-place ALTER TABLE context
-@param user_thd MySQL connection
-@return InnoDB error code (should always be DB_SUCCESS) */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-innobase_update_foreign_cache(
-/*==========================*/
- ha_innobase_inplace_ctx* ctx,
- THD* user_thd)
-{
- dict_table_t* user_table;
- dberr_t err = DB_SUCCESS;
-
- DBUG_ENTER("innobase_update_foreign_cache");
-
- user_table = ctx->old_table;
-
- /* Discard the added foreign keys, because we will
- load them from the data dictionary. */
- for (ulint i = 0; i < ctx->num_to_add_fk; i++) {
- dict_foreign_t* fk = ctx->add_fk[i];
- dict_foreign_free(fk);
- }
-
- if (ctx->need_rebuild()) {
- /* The rebuilt table is already using the renamed
- column names. No need to pass col_names or to drop
- constraints from the data dictionary cache. */
- DBUG_ASSERT(!ctx->col_names);
- DBUG_ASSERT(user_table->foreign_set.empty());
- DBUG_ASSERT(user_table->referenced_set.empty());
- user_table = ctx->new_table;
- } else {
- /* Drop the foreign key constraints if the
- table was not rebuilt. If the table is rebuilt,
- there would not be any foreign key contraints for
- it yet in the data dictionary cache. */
- for (ulint i = 0; i < ctx->num_to_drop_fk; i++) {
- dict_foreign_t* fk = ctx->drop_fk[i];
- dict_foreign_remove_from_cache(fk);
- }
- }
-
- /* Load the old or added foreign keys from the data dictionary
- and prevent the table from being evicted from the data
- dictionary cache (work around the lack of WL#6049). */
- err = dict_load_foreigns(user_table->name,
- ctx->col_names, false, true,
- DICT_ERR_IGNORE_NONE);
-
- if (err == DB_CANNOT_ADD_CONSTRAINT) {
- /* It is possible there are existing foreign key are
- loaded with "foreign_key checks" off,
- so let's retry the loading with charset_check is off */
- err = dict_load_foreigns(user_table->name,
- ctx->col_names, false, false,
- DICT_ERR_IGNORE_NONE);
-
- /* The load with "charset_check" off is successful, warn
- the user that the foreign key has loaded with mis-matched
- charset */
- if (err == DB_SUCCESS) {
- push_warning_printf(
- user_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_ALTER_INFO,
- "Foreign key constraints for table '%s'"
- " are loaded with charset check off",
- user_table->name);
- }
- }
-
- DBUG_RETURN(err);
-}
-
-/** Commit the changes made during prepare_inplace_alter_table()
-and inplace_alter_table() inside the data dictionary tables,
-when rebuilding the table.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param altered_table MySQL table that is being altered
-@param old_table MySQL table as it is before the ALTER operation
-@param trx Data dictionary transaction
-@param table_name Table name in MySQL
-@retval true Failure
-@retval false Success
-*/
-inline MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-commit_try_rebuild(
-/*===============*/
- Alter_inplace_info* ha_alter_info,
- ha_innobase_inplace_ctx*ctx,
- TABLE* altered_table,
- const TABLE* old_table,
- trx_t* trx,
- const char* table_name)
-{
- dict_table_t* rebuilt_table = ctx->new_table;
- dict_table_t* user_table = ctx->old_table;
-
- DBUG_ENTER("commit_try_rebuild");
- DBUG_ASSERT(ctx->need_rebuild());
- DBUG_ASSERT(trx->dict_operation_lock_mode == RW_X_LATCH);
- DBUG_ASSERT(!(ha_alter_info->handler_flags
- & Alter_inplace_info::DROP_FOREIGN_KEY)
- || ctx->num_to_drop_fk > 0);
- DBUG_ASSERT(ctx->num_to_drop_fk
- == ha_alter_info->alter_info->drop_list.elements);
-
- for (dict_index_t* index = dict_table_get_first_index(rebuilt_table);
- index;
- index = dict_table_get_next_index(index)) {
- DBUG_ASSERT(dict_index_get_online_status(index)
- == ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- if (dict_index_is_corrupted(index)) {
- my_error(ER_INDEX_CORRUPT, MYF(0),
- index->name);
- DBUG_RETURN(true);
- }
- }
-
- if (innobase_update_foreign_try(ctx, trx, table_name)) {
- DBUG_RETURN(true);
- }
-
- dberr_t error;
-
- /* Clear the to_be_dropped flag in the data dictionary cache
- of user_table. */
- for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
- dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(index->table == user_table);
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(index->to_be_dropped);
- index->to_be_dropped = 0;
- }
-
- /* We copied the table. Any indexes that were requested to be
- dropped were not created in the copy of the table. Apply any
- last bit of the rebuild log and then rename the tables. */
-
- if (ctx->online) {
- DEBUG_SYNC_C("row_log_table_apply2_before");
- error = row_log_table_apply(
- ctx->thr, user_table, altered_table);
- ulint err_key = thr_get_trx(ctx->thr)->error_key_num;
-
- switch (error) {
- KEY* dup_key;
- case DB_SUCCESS:
- break;
- case DB_DUPLICATE_KEY:
- if (err_key == ULINT_UNDEFINED) {
- /* This should be the hidden index on
- FTS_DOC_ID. */
- dup_key = NULL;
- } else {
- DBUG_ASSERT(err_key <
- ha_alter_info->key_count);
- dup_key = &ha_alter_info
- ->key_info_buffer[err_key];
- }
- print_keydup_error(altered_table, dup_key, MYF(0));
- DBUG_RETURN(true);
- case DB_ONLINE_LOG_TOO_BIG:
- my_error(ER_INNODB_ONLINE_LOG_TOO_BIG, MYF(0),
- get_error_key_name(err_key, ha_alter_info,
- rebuilt_table));
- DBUG_RETURN(true);
- case DB_INDEX_CORRUPT:
- my_error(ER_INDEX_CORRUPT, MYF(0),
- get_error_key_name(err_key, ha_alter_info,
- rebuilt_table));
- DBUG_RETURN(true);
- default:
- my_error_innodb(error, table_name, user_table->flags);
- DBUG_RETURN(true);
- }
- }
-
- if ((ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME)
- && innobase_rename_columns_try(ha_alter_info, ctx, old_table,
- trx, table_name)) {
- DBUG_RETURN(true);
- }
-
- DBUG_EXECUTE_IF("ib_ddl_crash_before_rename", DBUG_SUICIDE(););
-
- /* The new table must inherit the flag from the
- "parent" table. */
- if (dict_table_is_discarded(user_table)) {
- rebuilt_table->file_unreadable = true;
- rebuilt_table->flags2 |= DICT_TF2_DISCARDED;
- }
-
- /* We can now rename the old table as a temporary table,
- rename the new temporary table as the old table and drop the
- old table. First, we only do this in the data dictionary
- tables. The actual renaming will be performed in
- commit_cache_rebuild(), once the data dictionary transaction
- has been successfully committed. */
-
- error = row_merge_rename_tables_dict(
- user_table, rebuilt_table, ctx->tmp_name, trx);
-
- /* We must be still holding a table handle. */
- DBUG_ASSERT(user_table->n_ref_count >= 1);
-
- DBUG_EXECUTE_IF("ib_ddl_crash_after_rename", DBUG_SUICIDE(););
- DBUG_EXECUTE_IF("ib_rebuild_cannot_rename", error = DB_ERROR;);
-
- if (user_table->n_ref_count > 1) {
- /* This should only occur when an innodb_memcached
- connection with innodb_api_enable_mdl=off was started
- before commit_inplace_alter_table() locked the data
- dictionary. We must roll back the ALTER TABLE, because
- we cannot drop a table while it is being used. */
-
- /* Normally, n_ref_count must be 1, because purge
- cannot be executing on this very table as we are
- holding dict_operation_lock X-latch. */
-
- error = DB_LOCK_WAIT_TIMEOUT;
- }
-
- switch (error) {
- case DB_SUCCESS:
- DBUG_RETURN(false);
- case DB_TABLESPACE_EXISTS:
- ut_a(rebuilt_table->n_ref_count == 1);
- my_error(ER_TABLESPACE_EXISTS, MYF(0), ctx->tmp_name);
- DBUG_RETURN(true);
- case DB_DUPLICATE_KEY:
- ut_a(rebuilt_table->n_ref_count == 1);
- my_error(ER_TABLE_EXISTS_ERROR, MYF(0), ctx->tmp_name);
- DBUG_RETURN(true);
- default:
- my_error_innodb(error, table_name, user_table->flags);
- DBUG_RETURN(true);
- }
-}
-
-/** Apply the changes made during commit_try_rebuild(),
-to the data dictionary cache and the file system.
-@param ctx In-place ALTER TABLE context */
-inline MY_ATTRIBUTE((nonnull))
-void
-commit_cache_rebuild(
-/*=================*/
- ha_innobase_inplace_ctx* ctx)
-{
- dberr_t error;
-
- DBUG_ENTER("commit_cache_rebuild");
- DBUG_ASSERT(ctx->need_rebuild());
- DBUG_ASSERT(dict_table_is_discarded(ctx->old_table)
- == dict_table_is_discarded(ctx->new_table));
-
- const char* old_name = mem_heap_strdup(
- ctx->heap, ctx->old_table->name);
-
- /* We already committed and redo logged the renames,
- so this must succeed. */
- error = dict_table_rename_in_cache(
- ctx->old_table, ctx->tmp_name, FALSE);
- ut_a(error == DB_SUCCESS);
-
- DEBUG_SYNC_C("commit_cache_rebuild_middle");
-
- error = dict_table_rename_in_cache(
- ctx->new_table, old_name, FALSE);
- ut_a(error == DB_SUCCESS);
-
- DBUG_VOID_RETURN;
-}
-
-/** Store the column number of the columns in a list belonging
-to indexes which are not being dropped.
-@param[in] ctx In-place ALTER TABLE context
-@param[out] drop_col_list list which will be set, containing columns
- which is part of index being dropped */
-static
-void
-get_col_list_to_be_dropped(
- ha_innobase_inplace_ctx* ctx,
- std::set<ulint>& drop_col_list)
-{
- for (ulint index_count = 0; index_count < ctx->num_to_drop_index;
- index_count++) {
- dict_index_t* index = ctx->drop_index[index_count];
-
- for (ulint col = 0; col < index->n_user_defined_cols; col++) {
- ulint col_no = dict_index_get_nth_col_no(index, col);
- drop_col_list.insert(col_no);
- }
- }
-}
-
-/** For each column, which is part of an index which is not going to be
-dropped, it checks if the column number of the column is same as col_no
-argument passed.
-@param[in] table table object
-@param[in] col_no column number of the column which is to be checked
-@retval true column exists
-@retval false column does not exist. */
-static
-bool
-check_col_exists_in_indexes(
- const dict_table_t* table,
- ulint col_no)
-{
- for (dict_index_t* index = dict_table_get_first_index(table); index;
- index = dict_table_get_next_index(index)) {
-
- if (index->to_be_dropped) {
- continue;
- }
-
- for (ulint col = 0; col < index->n_user_defined_cols; col++) {
-
- ulint index_col_no = dict_index_get_nth_col_no(
- index, col);
- if (col_no == index_col_no) {
- return(true);
- }
- }
- }
-
- return(false);
-}
-
-/** Commit the changes made during prepare_inplace_alter_table()
-and inplace_alter_table() inside the data dictionary tables,
-when not rebuilding the table.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param old_table MySQL table as it is before the ALTER operation
-@param trx Data dictionary transaction
-@param table_name Table name in MySQL
-@retval true Failure
-@retval false Success
-*/
-inline MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-commit_try_norebuild(
-/*=================*/
- Alter_inplace_info* ha_alter_info,
- ha_innobase_inplace_ctx*ctx,
- const TABLE* old_table,
- trx_t* trx,
- const char* table_name)
-{
- DBUG_ENTER("commit_try_norebuild");
- DBUG_ASSERT(!ctx->need_rebuild());
- DBUG_ASSERT(trx->dict_operation_lock_mode == RW_X_LATCH);
- DBUG_ASSERT(!(ha_alter_info->handler_flags
- & Alter_inplace_info::DROP_FOREIGN_KEY)
- || ctx->num_to_drop_fk > 0);
- DBUG_ASSERT(ctx->num_to_drop_fk
- == ha_alter_info->alter_info->drop_list.elements);
-
- for (ulint i = 0; i < ctx->num_to_add_index; i++) {
- dict_index_t* index = ctx->add_index[i];
- DBUG_ASSERT(dict_index_get_online_status(index)
- == ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
- if (dict_index_is_corrupted(index)) {
- /* Report a duplicate key
- error for the index that was
- flagged corrupted, most likely
- because a duplicate value was
- inserted (directly or by
- rollback) after
- ha_innobase::inplace_alter_table()
- completed.
- TODO: report this as a corruption
- with a detailed reason once
- WL#6379 has been implemented. */
- my_error(ER_DUP_UNKNOWN_IN_INDEX,
- MYF(0), index->name + 1);
- DBUG_RETURN(true);
- }
- }
-
- if (innobase_update_foreign_try(ctx, trx, table_name)) {
- DBUG_RETURN(true);
- }
-
- dberr_t error;
-
- /* We altered the table in place. */
- /* Lose the TEMP_INDEX_PREFIX. */
- for (ulint i = 0; i < ctx->num_to_add_index; i++) {
- dict_index_t* index = ctx->add_index[i];
- DBUG_ASSERT(dict_index_get_online_status(index)
- == ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name
- == TEMP_INDEX_PREFIX);
- error = row_merge_rename_index_to_add(
- trx, ctx->new_table->id, index->id);
- if (error != DB_SUCCESS) {
- sql_print_error(
- "InnoDB: rename index to add: %lu\n",
- (ulong) error);
- DBUG_ASSERT(0);
- my_error(ER_INTERNAL_ERROR, MYF(0),
- "rename index to add");
- DBUG_RETURN(true);
- }
- }
-
- /* Drop any indexes that were requested to be dropped.
- Rename them to TEMP_INDEX_PREFIX in the data
- dictionary first. We do not bother to rename
- index->name in the dictionary cache, because the index
- is about to be freed after row_merge_drop_indexes_dict(). */
-
- for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
- dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(index->table == ctx->new_table);
- DBUG_ASSERT(index->to_be_dropped);
-
- error = row_merge_rename_index_to_drop(
- trx, index->table->id, index->id);
- if (error != DB_SUCCESS) {
- sql_print_error(
- "InnoDB: rename index to drop: %lu\n",
- (ulong) error);
- DBUG_ASSERT(0);
- my_error(ER_INTERNAL_ERROR, MYF(0),
- "rename index to drop");
- DBUG_RETURN(true);
- }
- }
-
- if (!(ha_alter_info->handler_flags
- & Alter_inplace_info::ALTER_COLUMN_NAME)) {
- DBUG_RETURN(false);
- }
-
- DBUG_RETURN(innobase_rename_columns_try(ha_alter_info, ctx,
- old_table, trx, table_name));
-}
-
-/** Commit the changes to the data dictionary cache
-after a successful commit_try_norebuild() call.
-@param ctx In-place ALTER TABLE context
-@param table the TABLE before the ALTER
-@param trx Data dictionary transaction object
-(will be started and committed)
-@return whether all replacements were found for dropped indexes */
-inline MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-commit_cache_norebuild(
-/*===================*/
- ha_innobase_inplace_ctx*ctx,
- const TABLE* table,
- trx_t* trx)
-{
- DBUG_ENTER("commit_cache_norebuild");
-
- bool found = true;
-
- DBUG_ASSERT(!ctx->need_rebuild());
-
- std::set<ulint> drop_list;
- std::set<ulint>::const_iterator col_it;
-
- /* Check if the column, part of an index to be dropped is part of any
- other index which is not being dropped. If it so, then set the ord_part
- of the column to 0. */
- get_col_list_to_be_dropped(ctx, drop_list);
-
- for(col_it = drop_list.begin(); col_it != drop_list.end(); ++col_it) {
- if (!check_col_exists_in_indexes(ctx->new_table, *col_it)) {
- ctx->new_table->cols[*col_it].ord_part = 0;
- }
- }
-
- for (ulint i = 0; i < ctx->num_to_add_index; i++) {
- dict_index_t* index = ctx->add_index[i];
- DBUG_ASSERT(dict_index_get_online_status(index)
- == ONLINE_INDEX_COMPLETE);
- DBUG_ASSERT(*index->name == TEMP_INDEX_PREFIX);
- index->name++;
- }
-
- if (ctx->num_to_drop_index) {
- /* Really drop the indexes that were dropped.
- The transaction had to be committed first
- (after renaming the indexes), so that in the
- event of a crash, crash recovery will drop the
- indexes, because it drops all indexes whose
- names start with TEMP_INDEX_PREFIX. Once we
- have started dropping an index tree, there is
- no way to roll it back. */
-
- for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
- dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(index->table == ctx->new_table);
- DBUG_ASSERT(index->to_be_dropped);
-
- /* Replace the indexes in foreign key
- constraints if needed. */
-
- if (!dict_foreign_replace_index(
- index->table, ctx->col_names, index)) {
- found = false;
- }
-
- /* Mark the index dropped
- in the data dictionary cache. */
- rw_lock_x_lock(dict_index_get_lock(index));
- index->page = FIL_NULL;
- rw_lock_x_unlock(dict_index_get_lock(index));
- }
-
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
- row_merge_drop_indexes_dict(trx, ctx->new_table->id);
-
- for (ulint i = 0; i < ctx->num_to_drop_index; i++) {
- dict_index_t* index = ctx->drop_index[i];
- DBUG_ASSERT(*index->name != TEMP_INDEX_PREFIX);
- DBUG_ASSERT(index->table == ctx->new_table);
-
- if (index->type & DICT_FTS) {
- DBUG_ASSERT(index->type == DICT_FTS
- || (index->type
- & DICT_CORRUPT));
- DBUG_ASSERT(index->table->fts);
- fts_drop_index(index->table, index, trx);
- }
-
- dict_index_remove_from_cache(index->table, index);
- }
-
- trx_commit_for_mysql(trx);
- }
-
- DBUG_RETURN(found);
-}
-
-/** Adjust the persistent statistics after non-rebuilding ALTER TABLE.
-Remove statistics for dropped indexes, add statistics for created indexes
-and rename statistics for renamed indexes.
-@param ha_alter_info Data used during in-place alter
-@param ctx In-place ALTER TABLE context
-@param altered_table MySQL table that is being altered
-@param table_name Table name in MySQL
-@param thd MySQL connection
-*/
-static
-void
-alter_stats_norebuild(
-/*==================*/
- Alter_inplace_info* ha_alter_info,
- ha_innobase_inplace_ctx* ctx,
- TABLE* altered_table,
- const char* table_name,
- THD* thd)
-{
- ulint i;
-
- DBUG_ENTER("alter_stats_norebuild");
- DBUG_ASSERT(!ctx->need_rebuild());
-
- if (!dict_stats_is_persistent_enabled(ctx->new_table)) {
- DBUG_VOID_RETURN;
- }
-
- /* TODO: This will not drop the (unused) statistics for
- FTS_DOC_ID_INDEX if it was a hidden index, dropped together
- with the last renamining FULLTEXT index. */
- for (i = 0; i < ha_alter_info->index_drop_count; i++) {
- const KEY* key = ha_alter_info->index_drop_buffer[i];
-
- if (key->flags & HA_FULLTEXT) {
- /* There are no index cardinality
- statistics for FULLTEXT indexes. */
- continue;
- }
-
- char errstr[1024];
-
- if (dict_stats_drop_index(
- ctx->new_table->name, key->name,
- errstr, sizeof errstr) != DB_SUCCESS) {
- push_warning(thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_LOCK_WAIT_TIMEOUT, errstr);
- }
- }
-
- for (i = 0; i < ctx->num_to_add_index; i++) {
- dict_index_t* index = ctx->add_index[i];
- DBUG_ASSERT(index->table == ctx->new_table);
-
- if (!(index->type & DICT_FTS)) {
- dict_stats_init(ctx->new_table);
- dict_stats_update_for_index(index);
- }
- }
-
- DBUG_VOID_RETURN;
-}
-
-/** Adjust the persistent statistics after rebuilding ALTER TABLE.
-Remove statistics for dropped indexes, add statistics for created indexes
-and rename statistics for renamed indexes.
-@param table InnoDB table that was rebuilt by ALTER TABLE
-@param table_name Table name in MySQL
-@param thd MySQL connection
-*/
-static
-void
-alter_stats_rebuild(
-/*================*/
- dict_table_t* table,
- const char* table_name,
- THD* thd)
-{
- DBUG_ENTER("alter_stats_rebuild");
-
- if (dict_table_is_discarded(table)
- || !dict_stats_is_persistent_enabled(table)) {
- DBUG_VOID_RETURN;
- }
-
- dberr_t ret;
-
- ret = dict_stats_update(table, DICT_STATS_RECALC_PERSISTENT);
-
- if (ret != DB_SUCCESS) {
- push_warning_printf(
- thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_ALTER_INFO,
- "Error updating stats for table '%s' "
- "after table rebuild: %s",
- table_name, ut_strerr(ret));
- }
-
- DBUG_VOID_RETURN;
-}
-
-#ifndef DBUG_OFF
-# define DBUG_INJECT_CRASH(prefix, count) \
-do { \
- char buf[32]; \
- ut_snprintf(buf, sizeof buf, prefix "_%u", count); \
- DBUG_EXECUTE_IF(buf, DBUG_SUICIDE();); \
-} while (0)
-#else
-# define DBUG_INJECT_CRASH(prefix, count)
-#endif
-
-/** Commit or rollback the changes made during
-prepare_inplace_alter_table() and inplace_alter_table() inside
-the storage engine. Note that the allowed level of concurrency
-during this operation will be the same as for
-inplace_alter_table() and thus might be higher than during
-prepare_inplace_alter_table(). (E.g concurrent writes were
-blocked during prepare, but might not be during commit).
-@param altered_table TABLE object for new version of table.
-@param ha_alter_info Structure describing changes to be done
-by ALTER TABLE and holding data used during in-place alter.
-@param commit true => Commit, false => Rollback.
-@retval true Failure
-@retval false Success
-*/
-UNIV_INTERN
-bool
-ha_innobase::commit_inplace_alter_table(
-/*====================================*/
- TABLE* altered_table,
- Alter_inplace_info* ha_alter_info,
- bool commit)
-{
- dberr_t error;
- ha_innobase_inplace_ctx* ctx0
- = static_cast<ha_innobase_inplace_ctx*>
- (ha_alter_info->handler_ctx);
-#ifndef DBUG_OFF
- uint crash_inject_count = 1;
- uint crash_fail_inject_count = 1;
- uint failure_inject_count = 1;
-#endif
-
- DBUG_ENTER("commit_inplace_alter_table");
- DBUG_ASSERT(!srv_read_only_mode);
- DBUG_ASSERT(!ctx0 || ctx0->prebuilt == prebuilt);
- DBUG_ASSERT(!ctx0 || ctx0->old_table == prebuilt->table);
-
- DEBUG_SYNC_C("innodb_commit_inplace_alter_table_enter");
-
- DEBUG_SYNC_C("innodb_commit_inplace_alter_table_wait");
-
- if (!commit) {
- /* A rollback is being requested. So far we may at
- most have created some indexes. If any indexes were to
- be dropped, they would actually be dropped in this
- method if commit=true. */
- DBUG_RETURN(rollback_inplace_alter_table(
- ha_alter_info, table, prebuilt));
- }
-
- if (!(ha_alter_info->handler_flags & ~INNOBASE_INPLACE_IGNORE)) {
- DBUG_ASSERT(!ctx0);
- MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
- ha_alter_info->group_commit_ctx = NULL;
- DBUG_RETURN(false);
- }
-
- DBUG_ASSERT(ctx0);
-
- inplace_alter_handler_ctx** ctx_array;
- inplace_alter_handler_ctx* ctx_single[2];
-
- if (ha_alter_info->group_commit_ctx) {
- ctx_array = ha_alter_info->group_commit_ctx;
- } else {
- ctx_single[0] = ctx0;
- ctx_single[1] = NULL;
- ctx_array = ctx_single;
- }
-
- DBUG_ASSERT(ctx0 == ctx_array[0]);
- ut_ad(prebuilt->table == ctx0->old_table);
- ha_alter_info->group_commit_ctx = NULL;
-
- /* Free the ctx->trx of other partitions, if any. We will only
- use the ctx0->trx here. Others may have been allocated in
- the prepare stage. */
-
- for (inplace_alter_handler_ctx** pctx = &ctx_array[1]; *pctx;
- pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>(*pctx);
-
- if (ctx->trx) {
- trx_free_for_mysql(ctx->trx);
- ctx->trx = NULL;
- }
- }
-
- trx_start_if_not_started_xa(prebuilt->trx);
-
- for (inplace_alter_handler_ctx** pctx = ctx_array; *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>(*pctx);
- DBUG_ASSERT(ctx->prebuilt->trx == prebuilt->trx);
-
- /* If decryption failed for old table or new table
- fail here. */
- if ((ctx->old_table->file_unreadable &&
- fil_space_get(ctx->old_table->space) != NULL)||
- (ctx->new_table->file_unreadable &&
- fil_space_get(ctx->new_table->space) != NULL)) {
- String str;
- const char* engine= table_type();
- get_error_message(HA_ERR_DECRYPTION_FAILED, &str);
- my_error(ER_GET_ERRMSG, MYF(0), HA_ERR_DECRYPTION_FAILED, str.c_ptr(), engine);
- DBUG_RETURN(true);
- }
-
- /* Exclusively lock the table, to ensure that no other
- transaction is holding locks on the table while we
- change the table definition. The MySQL meta-data lock
- should normally guarantee that no conflicting locks
- exist. However, FOREIGN KEY constraints checks and any
- transactions collected during crash recovery could be
- holding InnoDB locks only, not MySQL locks. */
-
- error = row_merge_lock_table(
- prebuilt->trx, ctx->old_table, LOCK_X);
-
- if (error != DB_SUCCESS) {
- my_error_innodb(
- error, table_share->table_name.str, 0);
- DBUG_RETURN(true);
- }
- }
-
- DEBUG_SYNC(user_thd, "innodb_alter_commit_after_lock_table");
-
- const bool new_clustered = ctx0->need_rebuild();
- trx_t* trx = ctx0->trx;
- bool fail = false;
-
- if (new_clustered) {
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>(*pctx);
- DBUG_ASSERT(ctx->need_rebuild());
-
- if (ctx->old_table->fts) {
- ut_ad(!ctx->old_table->fts->add_wq);
- fts_optimize_remove_table(
- ctx->old_table);
- }
-
- if (ctx->new_table->fts) {
- ut_ad(!ctx->new_table->fts->add_wq);
- fts_optimize_remove_table(
- ctx->new_table);
- }
- }
- }
-
- if (!trx) {
- DBUG_ASSERT(!new_clustered);
- trx = innobase_trx_allocate(user_thd);
- }
-
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
- /* Latch the InnoDB data dictionary exclusively so that no deadlocks
- or lock waits can happen in it during the data dictionary operation. */
- row_mysql_lock_data_dictionary(trx);
-
- /* Prevent the background statistics collection from accessing
- the tables. */
- for (;;) {
- bool retry = false;
-
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>(*pctx);
-
- DBUG_ASSERT(new_clustered == ctx->need_rebuild());
-
- if (new_clustered
- && !dict_stats_stop_bg(ctx->old_table)) {
- retry = true;
- }
-
- if (!dict_stats_stop_bg(ctx->new_table)) {
- retry = true;
- }
- }
-
- if (!retry) {
- break;
- }
-
- DICT_STATS_BG_YIELD(trx);
- }
-
- /* Apply the changes to the data dictionary tables, for all
- partitions. */
-
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx && !fail; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>(*pctx);
-
- DBUG_ASSERT(new_clustered == ctx->need_rebuild());
-
- ctx->max_autoinc = commit_get_autoinc(
- ha_alter_info, ctx, altered_table, table);
-
- if (ctx->need_rebuild()) {
- ctx->tmp_name = dict_mem_create_temporary_tablename(
- ctx->heap, ctx->new_table->name,
- ctx->new_table->id);
-
- fail = commit_try_rebuild(
- ha_alter_info, ctx, altered_table, table,
- trx, table_share->table_name.str);
- } else {
- fail = commit_try_norebuild(
- ha_alter_info, ctx, table, trx,
- table_share->table_name.str);
- }
- DBUG_INJECT_CRASH("ib_commit_inplace_crash",
- crash_inject_count++);
-#ifndef DBUG_OFF
- {
- /* Generate a dynamic dbug text. */
- char buf[32];
- ut_snprintf(buf, sizeof buf, "ib_commit_inplace_fail_%u",
- failure_inject_count++);
- DBUG_EXECUTE_IF(buf,
- my_error(ER_INTERNAL_ERROR, MYF(0),
- "Injected error!");
- fail = true;
- );
- }
-#endif
- }
-
- /* Commit or roll back the changes to the data dictionary. */
-
- if (fail) {
- trx_rollback_for_mysql(trx);
- } else if (!new_clustered) {
- trx_commit_for_mysql(trx);
- } else {
- mtr_t mtr;
- mtr_start(&mtr);
-
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>(*pctx);
-
- DBUG_ASSERT(ctx->need_rebuild());
- /* Check for any possible problems for any
- file operations that will be performed in
- commit_cache_rebuild(), and if none, generate
- the redo log for these operations. */
- error = fil_mtr_rename_log(ctx->old_table,
- ctx->new_table,
- ctx->tmp_name, &mtr);
- if (error != DB_SUCCESS) {
- /* Out of memory or a problem will occur
- when renaming files. */
- fail = true;
- my_error_innodb(error, ctx->old_table->name,
- ctx->old_table->flags);
- }
- DBUG_INJECT_CRASH("ib_commit_inplace_crash",
- crash_inject_count++);
- }
-
- /* Test what happens on crash if the redo logs
- are flushed to disk here. The log records
- about the rename should not be committed, and
- the data dictionary transaction should be
- rolled back, restoring the old table. */
- DBUG_EXECUTE_IF("innodb_alter_commit_crash_before_commit",
- log_buffer_flush_to_disk();
- DBUG_SUICIDE(););
- ut_ad(!trx->fts_trx);
-
- if (fail) {
- mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
- mtr_commit(&mtr);
- trx_rollback_for_mysql(trx);
- } else {
- /* The following call commits the
- mini-transaction, making the data dictionary
- transaction committed at mtr.end_lsn. The
- transaction becomes 'durable' by the time when
- log_buffer_flush_to_disk() returns. In the
- logical sense the commit in the file-based
- data structures happens here. */
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
- ut_ad(trx->insert_undo || trx->update_undo);
-
- trx_commit_low(trx, &mtr);
- }
-
- /* If server crashes here, the dictionary in
- InnoDB and MySQL will differ. The .ibd files
- and the .frm files must be swapped manually by
- the administrator. No loss of data. */
- DBUG_EXECUTE_IF("innodb_alter_commit_crash_after_commit",
- log_buffer_flush_to_disk();
- DBUG_SUICIDE(););
- }
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- /* At this point, the changes to the persistent storage have
- been committed or rolled back. What remains to be done is to
- update the in-memory structures, close some handles, release
- temporary files, and (unless we rolled back) update persistent
- statistics. */
-
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>(*pctx);
-
- DBUG_ASSERT(ctx->need_rebuild() == new_clustered);
-
- if (new_clustered) {
- innobase_online_rebuild_log_free(ctx->old_table);
- }
-
- if (fail) {
- if (new_clustered) {
- dict_table_close(ctx->new_table,
- TRUE, FALSE);
-
-#if defined UNIV_DEBUG || defined UNIV_DDL_DEBUG
- /* Nobody should have initialized the
- stats of the newly created table
- yet. When this is the case, we know
- that it has not been added for
- background stats gathering. */
- ut_a(!ctx->new_table->stat_initialized);
-#endif /* UNIV_DEBUG || UNIV_DDL_DEBUG */
-
- trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
- row_merge_drop_table(trx, ctx->new_table);
- trx_commit_for_mysql(trx);
- ctx->new_table = NULL;
- } else {
- /* We failed, but did not rebuild the table.
- Roll back any ADD INDEX, or get rid of garbage
- ADD INDEX that was left over from a previous
- ALTER TABLE statement. */
- trx_start_for_ddl(trx, TRX_DICT_OP_INDEX);
- innobase_rollback_sec_index(
- ctx->new_table, table, TRUE, trx);
- trx_commit_for_mysql(trx);
- }
- DBUG_INJECT_CRASH("ib_commit_inplace_crash_fail",
- crash_fail_inject_count++);
-
- continue;
- }
-
- innobase_copy_frm_flags_from_table_share(
- ctx->new_table, altered_table->s);
-
- if (new_clustered) {
- /* We will reload and refresh the
- in-memory foreign key constraint
- metadata. This is a rename operation
- in preparing for dropping the old
- table. Set the table to_be_dropped bit
- here, so to make sure DML foreign key
- constraint check does not use the
- stale dict_foreign_t. This is done
- because WL#6049 (FK MDL) has not been
- implemented yet. */
- ctx->old_table->to_be_dropped = true;
-
- /* Rename the tablespace files. */
- commit_cache_rebuild(ctx);
-
- error = innobase_update_foreign_cache(ctx, user_thd);
- if (error != DB_SUCCESS) {
- goto foreign_fail;
- }
- } else {
- error = innobase_update_foreign_cache(ctx, user_thd);
-
- if (error != DB_SUCCESS) {
-foreign_fail:
- /* The data dictionary cache
- should be corrupted now. The
- best solution should be to
- kill and restart the server,
- but the *.frm file has not
- been replaced yet. */
- my_error(ER_CANNOT_ADD_FOREIGN,
- MYF(0));
- sql_print_error(
- "InnoDB: dict_load_foreigns()"
- " returned %u for %s",
- (unsigned) error,
- thd_query_string(user_thd)
- ->str);
- ut_ad(0);
- } else {
- if (!commit_cache_norebuild(
- ctx, table, trx)) {
- ut_a(!prebuilt->trx->check_foreigns);
- }
-
- innobase_rename_columns_cache(
- ha_alter_info, table,
- ctx->new_table);
- }
- }
- DBUG_INJECT_CRASH("ib_commit_inplace_crash",
- crash_inject_count++);
- }
-
- /* Invalidate the index translation table. In partitioned
- tables, there is one TABLE_SHARE (and also only one TABLE)
- covering all partitions. */
- share->idx_trans_tbl.index_count = 0;
-
- if (trx == ctx0->trx) {
- ctx0->trx = NULL;
- }
-
- /* Tell the InnoDB server that there might be work for
- utility threads: */
-
- srv_active_wake_master_thread();
-
- if (fail) {
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>
- (*pctx);
- DBUG_ASSERT(ctx->need_rebuild() == new_clustered);
-
- ut_d(dict_table_check_for_dup_indexes(
- ctx->old_table,
- CHECK_ABORTED_OK));
- ut_a(fts_check_cached_index(ctx->old_table));
- DBUG_INJECT_CRASH("ib_commit_inplace_crash_fail",
- crash_fail_inject_count++);
- }
-
- row_mysql_unlock_data_dictionary(trx);
- trx_free_for_mysql(trx);
- DBUG_RETURN(true);
- }
-
- /* Release the table locks. */
- trx_commit_for_mysql(prebuilt->trx);
-
- DBUG_EXECUTE_IF("ib_ddl_crash_after_user_trx_commit", DBUG_SUICIDE(););
-
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>
- (*pctx);
- DBUG_ASSERT(ctx->need_rebuild() == new_clustered);
-
- if (altered_table->found_next_number_field) {
- dict_table_t* t = ctx->new_table;
-
- dict_table_autoinc_lock(t);
- dict_table_autoinc_initialize(t, ctx->max_autoinc);
- dict_table_autoinc_unlock(t);
- }
-
- bool add_fts = false;
-
- /* Publish the created fulltext index, if any.
- Note that a fulltext index can be created without
- creating the clustered index, if there already exists
- a suitable FTS_DOC_ID column. If not, one will be
- created, implying new_clustered */
- for (ulint i = 0; i < ctx->num_to_add_index; i++) {
- dict_index_t* index = ctx->add_index[i];
-
- if (index->type & DICT_FTS) {
- DBUG_ASSERT(index->type == DICT_FTS);
- /* We reset DICT_TF2_FTS here because the bit
- is left unset when a drop proceeds the add. */
- DICT_TF2_FLAG_SET(ctx->new_table, DICT_TF2_FTS);
- fts_add_index(index, ctx->new_table);
- add_fts = true;
- }
- }
-
- ut_d(dict_table_check_for_dup_indexes(
- ctx->new_table, CHECK_ALL_COMPLETE));
-
- if (add_fts) {
- fts_optimize_add_table(ctx->new_table);
- }
-
- ut_d(dict_table_check_for_dup_indexes(
- ctx->new_table, CHECK_ABORTED_OK));
- ut_a(fts_check_cached_index(ctx->new_table));
-
- if (new_clustered) {
- /* Since the table has been rebuilt, we remove
- all persistent statistics corresponding to the
- old copy of the table (which was renamed to
- ctx->tmp_name). */
-
- char errstr[1024];
-
- DBUG_ASSERT(0 == strcmp(ctx->old_table->name,
- ctx->tmp_name));
-
- if (dict_stats_drop_table(
- ctx->new_table->name,
- errstr, sizeof(errstr))
- != DB_SUCCESS) {
- push_warning_printf(
- user_thd,
- Sql_condition::WARN_LEVEL_WARN,
- ER_ALTER_INFO,
- "Deleting persistent statistics"
- " for rebuilt table '%s' in"
- " InnoDB failed: %s",
- table->s->table_name.str,
- errstr);
- }
-
- DBUG_EXECUTE_IF("ib_ddl_crash_before_commit",
- DBUG_SUICIDE(););
-
- trx_t* const user_trx = prebuilt->trx;
-
- row_prebuilt_free(ctx->prebuilt, TRUE);
-
- /* Drop the copy of the old table, which was
- renamed to ctx->tmp_name at the atomic DDL
- transaction commit. If the system crashes
- before this is completed, some orphan tables
- with ctx->tmp_name may be recovered. */
- trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
- row_merge_drop_table(trx, ctx->old_table);
- trx_commit_for_mysql(trx);
-
- /* Rebuild the prebuilt object. */
- ctx->prebuilt = row_create_prebuilt(
- ctx->new_table, altered_table->s->reclength);
- trx_start_if_not_started(user_trx);
- user_trx->will_lock++;
- prebuilt->trx = user_trx;
- }
- DBUG_INJECT_CRASH("ib_commit_inplace_crash",
- crash_inject_count++);
- }
-
- row_mysql_unlock_data_dictionary(trx);
- trx_free_for_mysql(trx);
-
- /* Rebuild index translation table now for temporary tables if we are
- restoring secondary keys, as ha_innobase::open will not be called for
- the next access. */
- if (dict_table_is_temporary(ctx0->new_table)
- && ctx0->num_to_add_index > 0) {
- ut_ad(!ctx0->num_to_drop_index);
- ut_ad(!ctx0->num_to_drop_fk);
- if (!innobase_build_index_translation(altered_table,
- ctx0->new_table,
- share)) {
- MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
- DBUG_RETURN(true);
- }
- }
-
- /* TODO: The following code could be executed
- while allowing concurrent access to the table
- (MDL downgrade). */
-
- if (new_clustered) {
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>
- (*pctx);
- DBUG_ASSERT(ctx->need_rebuild());
-
- alter_stats_rebuild(
- ctx->new_table, table->s->table_name.str,
- user_thd);
- DBUG_INJECT_CRASH("ib_commit_inplace_crash",
- crash_inject_count++);
- }
- } else {
- for (inplace_alter_handler_ctx** pctx = ctx_array;
- *pctx; pctx++) {
- ha_innobase_inplace_ctx* ctx
- = static_cast<ha_innobase_inplace_ctx*>
- (*pctx);
- DBUG_ASSERT(!ctx->need_rebuild());
-
- alter_stats_norebuild(
- ha_alter_info, ctx, altered_table,
- table->s->table_name.str, user_thd);
- DBUG_INJECT_CRASH("ib_commit_inplace_crash",
- crash_inject_count++);
- }
- }
-
- /* TODO: Also perform DROP TABLE and DROP INDEX after
- the MDL downgrade. */
-
-#ifndef DBUG_OFF
- dict_index_t* clust_index = dict_table_get_first_index(
- prebuilt->table);
- DBUG_ASSERT(!clust_index->online_log);
- DBUG_ASSERT(dict_index_get_online_status(clust_index)
- == ONLINE_INDEX_COMPLETE);
-
- for (dict_index_t* index = dict_table_get_first_index(
- prebuilt->table);
- index;
- index = dict_table_get_next_index(index)) {
- DBUG_ASSERT(!index->to_be_dropped);
- }
-#endif /* DBUG_OFF */
-
- MONITOR_ATOMIC_DEC(MONITOR_PENDING_ALTER_TABLE);
- DBUG_RETURN(false);
-}
-
-/**
-@param thd - the session
-@param start_value - the lower bound
-@param max_value - the upper bound (inclusive) */
-UNIV_INTERN
-ib_sequence_t::ib_sequence_t(
- THD* thd,
- ulonglong start_value,
- ulonglong max_value)
- :
- m_max_value(max_value),
- m_increment(0),
- m_offset(0),
- m_next_value(start_value),
- m_eof(false)
-{
- if (thd != 0 && m_max_value > 0) {
-
- thd_get_autoinc(thd, &m_offset, &m_increment);
-
- if (m_increment > 1 || m_offset > 1) {
-
- /* If there is an offset or increment specified
- then we need to work out the exact next value. */
-
- m_next_value = innobase_next_autoinc(
- start_value, 1,
- m_increment, m_offset, m_max_value);
-
- } else if (start_value == 0) {
- /* The next value can never be 0. */
- m_next_value = 1;
- }
- } else {
- m_eof = true;
- }
-}
-
-/**
-Postfix increment
-@return the next value to insert */
-UNIV_INTERN
-ulonglong
-ib_sequence_t::operator++(int) UNIV_NOTHROW
-{
- ulonglong current = m_next_value;
-
- ut_ad(!m_eof);
- ut_ad(m_max_value > 0);
-
- m_next_value = innobase_next_autoinc(
- current, 1, m_increment, m_offset, m_max_value);
-
- if (m_next_value == m_max_value && current == m_next_value) {
- m_eof = true;
- }
-
- return(current);
-}
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
deleted file mode 100644
index 9cef04c4244..00000000000
--- a/storage/xtradb/handler/i_s.cc
+++ /dev/null
@@ -1,9647 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file handler/i_s.cc
-InnoDB INFORMATION SCHEMA tables interface to MySQL.
-
-Created July 18, 2007 Vasil Dimov
-Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
-*******************************************************/
-#include "univ.i"
-#include <my_global.h>
-#ifndef MYSQL_SERVER
-#define MYSQL_SERVER /* For Item_* classes */
-#include <item.h>
-/* Prevent influence of this definition to other headers */
-#undef MYSQL_SERVER
-#else
-#include <mysql_priv.h>
-#endif //MYSQL_SERVER
-
-#include <ctype.h> /*toupper*/
-#include <mysqld_error.h>
-#include <sql_acl.h>
-
-#include <m_ctype.h>
-#include <hash.h>
-#include <myisampack.h>
-#include <mysys_err.h>
-#include <my_sys.h>
-#include "i_s.h"
-#include <sql_plugin.h>
-#include <innodb_priv.h>
-
-#include "btr0pcur.h"
-#include "btr0types.h"
-#include "dict0dict.h"
-#include "dict0load.h"
-#include "buf0buddy.h"
-#include "buf0buf.h"
-#include "ibuf0ibuf.h"
-#include "dict0mem.h"
-#include "dict0types.h"
-#include "ha_prototypes.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "trx0i_s.h"
-#include "trx0trx.h"
-#include "srv0mon.h"
-#include "fut0fut.h"
-#include "pars0pars.h"
-#include "fts0types.h"
-#include "fts0opt.h"
-#include "fts0priv.h"
-#include "log0online.h"
-#include "btr0btr.h"
-#include "page0zip.h"
-#include "sync0arr.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-
-/** structure associates a name string with a file page type and/or buffer
-page state. */
-struct buf_page_desc_t{
- const char* type_str; /*!< String explain the page
- type/state */
- ulint type_value; /*!< Page type or page state */
-};
-
-/** Change buffer B-tree page */
-#define I_S_PAGE_TYPE_IBUF (FIL_PAGE_TYPE_LAST + 1)
-
-/** Any states greater than I_S_PAGE_TYPE_IBUF would be treated as
-unknown. */
-#define I_S_PAGE_TYPE_UNKNOWN (I_S_PAGE_TYPE_IBUF + 1)
-
-/** We also define I_S_PAGE_TYPE_INDEX as the Index Page's position
-in i_s_page_type[] array */
-#define I_S_PAGE_TYPE_INDEX 1
-
-/** Name string for File Page Types */
-static buf_page_desc_t i_s_page_type[] = {
- {"ALLOCATED", FIL_PAGE_TYPE_ALLOCATED},
- {"INDEX", FIL_PAGE_INDEX},
- {"UNDO_LOG", FIL_PAGE_UNDO_LOG},
- {"INODE", FIL_PAGE_INODE},
- {"IBUF_FREE_LIST", FIL_PAGE_IBUF_FREE_LIST},
- {"IBUF_BITMAP", FIL_PAGE_IBUF_BITMAP},
- {"SYSTEM", FIL_PAGE_TYPE_SYS},
- {"TRX_SYSTEM", FIL_PAGE_TYPE_TRX_SYS},
- {"FILE_SPACE_HEADER", FIL_PAGE_TYPE_FSP_HDR},
- {"EXTENT_DESCRIPTOR", FIL_PAGE_TYPE_XDES},
- {"BLOB", FIL_PAGE_TYPE_BLOB},
- {"COMPRESSED_BLOB", FIL_PAGE_TYPE_ZBLOB},
- {"COMPRESSED_BLOB2", FIL_PAGE_TYPE_ZBLOB2},
- {"IBUF_INDEX", I_S_PAGE_TYPE_IBUF},
- {"PAGE COMPRESSED", FIL_PAGE_PAGE_COMPRESSED},
- {"UNKNOWN", I_S_PAGE_TYPE_UNKNOWN}
-};
-
-/* Check if we can hold all page type in a 4 bit value */
-#if I_S_PAGE_TYPE_UNKNOWN > 1<<4
-# error "i_s_page_type[] is too large"
-#endif
-
-/** This structure defines information we will fetch from pages
-currently cached in the buffer pool. It will be used to populate
-table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE */
-struct buf_page_info_t{
- ulint block_id; /*!< Buffer Pool block ID */
- unsigned space_id:32; /*!< Tablespace ID */
- unsigned page_num:32; /*!< Page number/offset */
- unsigned access_time:32; /*!< Time of first access */
- unsigned pool_id:MAX_BUFFER_POOLS_BITS;
- /*!< Buffer Pool ID. Must be less than
- MAX_BUFFER_POOLS */
- unsigned flush_type:2; /*!< Flush type */
- unsigned io_fix:2; /*!< type of pending I/O operation */
- unsigned fix_count:19; /*!< Count of how manyfold this block
- is bufferfixed */
- unsigned hashed:1; /*!< Whether hash index has been
- built on this page */
- unsigned is_old:1; /*!< TRUE if the block is in the old
- blocks in buf_pool->LRU_old */
- unsigned freed_page_clock:31; /*!< the value of
- buf_pool->freed_page_clock */
- unsigned zip_ssize:PAGE_ZIP_SSIZE_BITS;
- /*!< Compressed page size */
- unsigned page_state:BUF_PAGE_STATE_BITS; /*!< Page state */
- unsigned page_type:4; /*!< Page type */
- unsigned num_recs:UNIV_PAGE_SIZE_SHIFT_MAX-2;
- /*!< Number of records on Page */
- unsigned data_size:UNIV_PAGE_SIZE_SHIFT_MAX;
- /*!< Sum of the sizes of the records */
- lsn_t newest_mod; /*!< Log sequence number of
- the youngest modification */
- lsn_t oldest_mod; /*!< Log sequence number of
- the oldest modification */
- index_id_t index_id; /*!< Index ID if a index page */
-};
-
-/*
-Use the following types mapping:
-
-C type ST_FIELD_INFO::field_type
----------------------------------
-long MYSQL_TYPE_LONGLONG
-(field_length=MY_INT64_NUM_DECIMAL_DIGITS)
-
-long unsigned MYSQL_TYPE_LONGLONG
-(field_length=MY_INT64_NUM_DECIMAL_DIGITS, field_flags=MY_I_S_UNSIGNED)
-
-char* MYSQL_TYPE_STRING
-(field_length=n)
-
-float MYSQL_TYPE_FLOAT
-(field_length=0 is ignored)
-
-void* MYSQL_TYPE_LONGLONG
-(field_length=MY_INT64_NUM_DECIMAL_DIGITS, field_flags=MY_I_S_UNSIGNED)
-
-boolean (if else) MYSQL_TYPE_LONG
-(field_length=1)
-
-time_t MYSQL_TYPE_DATETIME
-(field_length=0 ignored)
----------------------------------
-*/
-
-/** Implemented on sync0arr.cc */
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
-Loop through each item on sync array, and extract the column
-information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
-@return 0 on success */
-UNIV_INTERN
-int
-sync_arr_fill_sys_semphore_waits_table(
-/*===================================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ); /*!< in: condition (not used) */
-
-/*******************************************************************//**
-Common function to fill any of the dynamic tables:
-INFORMATION_SCHEMA.innodb_trx
-INFORMATION_SCHEMA.innodb_locks
-INFORMATION_SCHEMA.innodb_lock_waits
-@return 0 on success */
-static
-int
-trx_i_s_common_fill_table(
-/*======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ); /*!< in: condition (not used) */
-
-/*******************************************************************//**
-Unbind a dynamic INFORMATION_SCHEMA table.
-@return 0 on success */
-static
-int
-i_s_common_deinit(
-/*==============*/
- void* p); /*!< in/out: table schema object */
-/*******************************************************************//**
-Auxiliary function to store time_t value in MYSQL_TYPE_DATETIME
-field.
-@return 0 on success */
-static
-int
-field_store_time_t(
-/*===============*/
- Field* field, /*!< in/out: target field for storage */
- time_t time) /*!< in: value to store */
-{
- MYSQL_TIME my_time;
- struct tm tm_time;
-
- if (time) {
-#if 0
- /* use this if you are sure that `variables' and `time_zone'
- are always initialized */
- thd->variables.time_zone->gmt_sec_to_TIME(
- &my_time, (my_time_t) time);
-#else
- localtime_r(&time, &tm_time);
- localtime_to_TIME(&my_time, &tm_time);
- my_time.time_type = MYSQL_TIMESTAMP_DATETIME;
-#endif
- } else {
- memset(&my_time, 0, sizeof(my_time));
- }
-
- return(field->store_time(&my_time));
-}
-
-/*******************************************************************//**
-Auxiliary function to store char* value in MYSQL_TYPE_STRING field.
-@return 0 on success */
-int
-field_store_string(
-/*===============*/
- Field* field, /*!< in/out: target field for storage */
- const char* str) /*!< in: NUL-terminated utf-8 string,
- or NULL */
-{
- int ret;
-
- if (str != NULL) {
-
- ret = field->store(str, static_cast<uint>(strlen(str)),
- system_charset_info);
- field->set_notnull();
- } else {
-
- ret = 0; /* success */
- field->set_null();
- }
-
- return(ret);
-}
-
-/*******************************************************************//**
-Store the name of an index in a MYSQL_TYPE_VARCHAR field.
-Handles the names of incomplete secondary indexes.
-@return 0 on success */
-static
-int
-field_store_index_name(
-/*===================*/
- Field* field, /*!< in/out: target field for
- storage */
- const char* index_name) /*!< in: NUL-terminated utf-8
- index name, possibly starting with
- TEMP_INDEX_PREFIX */
-{
- int ret;
-
- ut_ad(index_name != NULL);
- ut_ad(field->real_type() == MYSQL_TYPE_VARCHAR);
-
- /* Since TEMP_INDEX_PREFIX is not a valid UTF8, we need to convert
- it to something else. */
- if (index_name[0] == TEMP_INDEX_PREFIX) {
- char buf[NAME_LEN + 1];
- buf[0] = '?';
- memcpy(buf + 1, index_name + 1, strlen(index_name));
- ret = field->store(
- buf, static_cast<uint>(strlen(buf)),
- system_charset_info);
- } else {
- ret = field->store(
- index_name, static_cast<uint>(strlen(index_name)),
- system_charset_info);
- }
-
- field->set_notnull();
-
- return(ret);
-}
-
-/*******************************************************************//**
-Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field.
-If the value is ULINT_UNDEFINED then the field it set to NULL.
-@return 0 on success */
-int
-field_store_ulint(
-/*==============*/
- Field* field, /*!< in/out: target field for storage */
- ulint n) /*!< in: value to store */
-{
- int ret;
-
- if (n != ULINT_UNDEFINED) {
-
- ret = field->store(static_cast<double>(n));
- field->set_notnull();
- } else {
-
- ret = 0; /* success */
- field->set_null();
- }
-
- return(ret);
-}
-
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_trx */
-static ST_FIELD_INFO innodb_trx_fields_info[] =
-{
-#define IDX_TRX_ID 0
- {STRUCT_FLD(field_name, "trx_id"),
- STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_STATE 1
- {STRUCT_FLD(field_name, "trx_state"),
- STRUCT_FLD(field_length, TRX_QUE_STATE_STR_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_STARTED 2
- {STRUCT_FLD(field_name, "trx_started"),
- STRUCT_FLD(field_length, 0),
- STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_REQUESTED_LOCK_ID 3
- {STRUCT_FLD(field_name, "trx_requested_lock_id"),
- STRUCT_FLD(field_length, TRX_I_S_LOCK_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_WAIT_STARTED 4
- {STRUCT_FLD(field_name, "trx_wait_started"),
- STRUCT_FLD(field_length, 0),
- STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_WEIGHT 5
- {STRUCT_FLD(field_name, "trx_weight"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_MYSQL_THREAD_ID 6
- {STRUCT_FLD(field_name, "trx_mysql_thread_id"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_QUERY 7
- {STRUCT_FLD(field_name, "trx_query"),
- STRUCT_FLD(field_length, TRX_I_S_TRX_QUERY_MAX_LEN),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_OPERATION_STATE 8
- {STRUCT_FLD(field_name, "trx_operation_state"),
- STRUCT_FLD(field_length, TRX_I_S_TRX_OP_STATE_MAX_LEN),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_TABLES_IN_USE 9
- {STRUCT_FLD(field_name, "trx_tables_in_use"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_TABLES_LOCKED 10
- {STRUCT_FLD(field_name, "trx_tables_locked"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_LOCK_STRUCTS 11
- {STRUCT_FLD(field_name, "trx_lock_structs"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_LOCK_MEMORY_BYTES 12
- {STRUCT_FLD(field_name, "trx_lock_memory_bytes"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_ROWS_LOCKED 13
- {STRUCT_FLD(field_name, "trx_rows_locked"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_ROWS_MODIFIED 14
- {STRUCT_FLD(field_name, "trx_rows_modified"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_CONNCURRENCY_TICKETS 15
- {STRUCT_FLD(field_name, "trx_concurrency_tickets"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_ISOLATION_LEVEL 16
- {STRUCT_FLD(field_name, "trx_isolation_level"),
- STRUCT_FLD(field_length, TRX_I_S_TRX_ISOLATION_LEVEL_MAX_LEN),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_UNIQUE_CHECKS 17
- {STRUCT_FLD(field_name, "trx_unique_checks"),
- STRUCT_FLD(field_length, 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 1),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_FOREIGN_KEY_CHECKS 18
- {STRUCT_FLD(field_name, "trx_foreign_key_checks"),
- STRUCT_FLD(field_length, 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 1),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_LAST_FOREIGN_KEY_ERROR 19
- {STRUCT_FLD(field_name, "trx_last_foreign_key_error"),
- STRUCT_FLD(field_length, TRX_I_S_TRX_FK_ERROR_MAX_LEN),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_ADAPTIVE_HASH_LATCHED 20
- {STRUCT_FLD(field_name, "trx_adaptive_hash_latched"),
- STRUCT_FLD(field_length, 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_ADAPTIVE_HASH_TIMEOUT 21
- {STRUCT_FLD(field_name, "trx_adaptive_hash_timeout"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_READ_ONLY 22
- {STRUCT_FLD(field_name, "trx_is_read_only"),
- STRUCT_FLD(field_length, 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TRX_AUTOCOMMIT_NON_LOCKING 23
- {STRUCT_FLD(field_name, "trx_autocommit_non_locking"),
- STRUCT_FLD(field_length, 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_trx
-table with it.
-@return 0 on success */
-static
-int
-fill_innodb_trx_from_cache(
-/*=======================*/
- trx_i_s_cache_t* cache, /*!< in: cache to read from */
- THD* thd, /*!< in: used to call
- schema_table_store_record() */
- TABLE* table) /*!< in/out: fill this table */
-{
- Field** fields;
- ulint rows_num;
- char lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
- ulint i;
-
- DBUG_ENTER("fill_innodb_trx_from_cache");
-
- fields = table->field;
-
- rows_num = trx_i_s_cache_get_rows_used(cache,
- I_S_INNODB_TRX);
-
- for (i = 0; i < rows_num; i++) {
-
- i_s_trx_row_t* row;
- char trx_id[TRX_ID_MAX_LEN + 1];
-
- row = (i_s_trx_row_t*)
- trx_i_s_cache_get_nth_row(
- cache, I_S_INNODB_TRX, i);
-
- /* trx_id */
- ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, row->trx_id);
- OK(field_store_string(fields[IDX_TRX_ID], trx_id));
-
- /* trx_state */
- OK(field_store_string(fields[IDX_TRX_STATE],
- row->trx_state));
-
- /* trx_started */
- OK(field_store_time_t(fields[IDX_TRX_STARTED],
- (time_t) row->trx_started));
-
- /* trx_requested_lock_id */
- /* trx_wait_started */
- if (row->trx_wait_started != 0) {
-
- OK(field_store_string(
- fields[IDX_TRX_REQUESTED_LOCK_ID],
- trx_i_s_create_lock_id(
- row->requested_lock_row,
- lock_id, sizeof(lock_id))));
- /* field_store_string() sets it no notnull */
-
- OK(field_store_time_t(
- fields[IDX_TRX_WAIT_STARTED],
- (time_t) row->trx_wait_started));
- fields[IDX_TRX_WAIT_STARTED]->set_notnull();
- } else {
-
- fields[IDX_TRX_REQUESTED_LOCK_ID]->set_null();
- fields[IDX_TRX_WAIT_STARTED]->set_null();
- }
-
- /* trx_weight */
- OK(fields[IDX_TRX_WEIGHT]->store((longlong) row->trx_weight,
- true));
-
- /* trx_mysql_thread_id */
- OK(fields[IDX_TRX_MYSQL_THREAD_ID]->store(
- static_cast<double>(row->trx_mysql_thread_id)));
-
- /* trx_query */
- if (row->trx_query) {
- /* store will do appropriate character set
- conversion check */
- fields[IDX_TRX_QUERY]->store(
- row->trx_query,
- static_cast<uint>(strlen(row->trx_query)),
- row->trx_query_cs);
- fields[IDX_TRX_QUERY]->set_notnull();
- } else {
- fields[IDX_TRX_QUERY]->set_null();
- }
-
- /* trx_operation_state */
- OK(field_store_string(fields[IDX_TRX_OPERATION_STATE],
- row->trx_operation_state));
-
- /* trx_tables_in_use */
- OK(fields[IDX_TRX_TABLES_IN_USE]->store(
- (longlong) row->trx_tables_in_use, true));
-
- /* trx_tables_locked */
- OK(fields[IDX_TRX_TABLES_LOCKED]->store(
- (longlong) row->trx_tables_locked, true));
-
- /* trx_lock_structs */
- OK(fields[IDX_TRX_LOCK_STRUCTS]->store(
- (longlong) row->trx_lock_structs, true));
-
- /* trx_lock_memory_bytes */
- OK(fields[IDX_TRX_LOCK_MEMORY_BYTES]->store(
- (longlong) row->trx_lock_memory_bytes, true));
-
- /* trx_rows_locked */
- OK(fields[IDX_TRX_ROWS_LOCKED]->store(
- (longlong) row->trx_rows_locked, true));
-
- /* trx_rows_modified */
- OK(fields[IDX_TRX_ROWS_MODIFIED]->store(
- (longlong) row->trx_rows_modified, true));
-
- /* trx_concurrency_tickets */
- OK(fields[IDX_TRX_CONNCURRENCY_TICKETS]->store(
- (longlong) row->trx_concurrency_tickets, true));
-
- /* trx_isolation_level */
- OK(field_store_string(fields[IDX_TRX_ISOLATION_LEVEL],
- row->trx_isolation_level));
-
- /* trx_unique_checks */
- OK(fields[IDX_TRX_UNIQUE_CHECKS]->store(
- static_cast<double>(row->trx_unique_checks)));
-
- /* trx_foreign_key_checks */
- OK(fields[IDX_TRX_FOREIGN_KEY_CHECKS]->store(
- static_cast<double>(row->trx_foreign_key_checks)));
-
- /* trx_last_foreign_key_error */
- OK(field_store_string(fields[IDX_TRX_LAST_FOREIGN_KEY_ERROR],
- row->trx_foreign_key_error));
-
- /* trx_adaptive_hash_latched */
- OK(fields[IDX_TRX_ADAPTIVE_HASH_LATCHED]->store(
- static_cast<double>(row->trx_has_search_latch)));
-
- /* trx_adaptive_hash_timeout */
- OK(fields[IDX_TRX_ADAPTIVE_HASH_TIMEOUT]->store(
- (longlong) row->trx_search_latch_timeout, true));
-
- /* trx_is_read_only*/
- OK(fields[IDX_TRX_READ_ONLY]->store(
- (longlong) row->trx_is_read_only, true));
-
- /* trx_is_autocommit_non_locking */
- OK(fields[IDX_TRX_AUTOCOMMIT_NON_LOCKING]->store(
- (longlong) row->trx_is_autocommit_non_locking,
- true));
-
- OK(schema_table_store_record(thd, table));
- }
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_trx
-@return 0 on success */
-static
-int
-innodb_trx_init(
-/*============*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_trx_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_trx_fields_info;
- schema->fill_table = trx_i_s_common_fill_table;
-
- DBUG_RETURN(0);
-}
-
-static struct st_mysql_information_schema i_s_info =
-{
- MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
-};
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_trx =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_TRX"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB transactions"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_trx_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_locks */
-static ST_FIELD_INFO innodb_locks_fields_info[] =
-{
-#define IDX_LOCK_ID 0
- {STRUCT_FLD(field_name, "lock_id"),
- STRUCT_FLD(field_length, TRX_I_S_LOCK_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_TRX_ID 1
- {STRUCT_FLD(field_name, "lock_trx_id"),
- STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_MODE 2
- {STRUCT_FLD(field_name, "lock_mode"),
- /* S[,GAP] X[,GAP] IS[,GAP] IX[,GAP] AUTO_INC UNKNOWN */
- STRUCT_FLD(field_length, 32),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_TYPE 3
- {STRUCT_FLD(field_name, "lock_type"),
- STRUCT_FLD(field_length, 32 /* RECORD|TABLE|UNKNOWN */),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_TABLE 4
- {STRUCT_FLD(field_name, "lock_table"),
- STRUCT_FLD(field_length, 1024),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_INDEX 5
- {STRUCT_FLD(field_name, "lock_index"),
- STRUCT_FLD(field_length, 1024),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_SPACE 6
- {STRUCT_FLD(field_name, "lock_space"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_PAGE 7
- {STRUCT_FLD(field_name, "lock_page"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_REC 8
- {STRUCT_FLD(field_name, "lock_rec"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_LOCK_DATA 9
- {STRUCT_FLD(field_name, "lock_data"),
- STRUCT_FLD(field_length, TRX_I_S_LOCK_DATA_MAX_LEN),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Read data from cache buffer and fill the INFORMATION_SCHEMA.innodb_locks
-table with it.
-@return 0 on success */
-static
-int
-fill_innodb_locks_from_cache(
-/*=========================*/
- trx_i_s_cache_t* cache, /*!< in: cache to read from */
- THD* thd, /*!< in: MySQL client connection */
- TABLE* table) /*!< in/out: fill this table */
-{
- Field** fields;
- ulint rows_num;
- char lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
- ulint i;
-
- DBUG_ENTER("fill_innodb_locks_from_cache");
-
- fields = table->field;
-
- rows_num = trx_i_s_cache_get_rows_used(cache,
- I_S_INNODB_LOCKS);
-
- for (i = 0; i < rows_num; i++) {
-
- i_s_locks_row_t* row;
- char buf[MAX_FULL_NAME_LEN + 1];
- const char* bufend;
-
- char lock_trx_id[TRX_ID_MAX_LEN + 1];
-
- row = (i_s_locks_row_t*)
- trx_i_s_cache_get_nth_row(
- cache, I_S_INNODB_LOCKS, i);
-
- /* lock_id */
- trx_i_s_create_lock_id(row, lock_id, sizeof(lock_id));
- OK(field_store_string(fields[IDX_LOCK_ID],
- lock_id));
-
- /* lock_trx_id */
- ut_snprintf(lock_trx_id, sizeof(lock_trx_id),
- TRX_ID_FMT, row->lock_trx_id);
- OK(field_store_string(fields[IDX_LOCK_TRX_ID], lock_trx_id));
-
- /* lock_mode */
- OK(field_store_string(fields[IDX_LOCK_MODE],
- row->lock_mode));
-
- /* lock_type */
- OK(field_store_string(fields[IDX_LOCK_TYPE],
- row->lock_type));
-
- /* lock_table */
- bufend = innobase_convert_name(buf, sizeof(buf),
- row->lock_table,
- strlen(row->lock_table),
- thd, TRUE);
- OK(fields[IDX_LOCK_TABLE]->store(
- buf, static_cast<uint>(bufend - buf),
- system_charset_info));
-
- /* lock_index */
- if (row->lock_index != NULL) {
- OK(field_store_index_name(fields[IDX_LOCK_INDEX],
- row->lock_index));
- } else {
- fields[IDX_LOCK_INDEX]->set_null();
- }
-
- /* lock_space */
- OK(field_store_ulint(fields[IDX_LOCK_SPACE],
- row->lock_space));
-
- /* lock_page */
- OK(field_store_ulint(fields[IDX_LOCK_PAGE],
- row->lock_page));
-
- /* lock_rec */
- OK(field_store_ulint(fields[IDX_LOCK_REC],
- row->lock_rec));
-
- /* lock_data */
- OK(field_store_string(fields[IDX_LOCK_DATA],
- row->lock_data));
-
- OK(schema_table_store_record(thd, table));
- }
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_locks
-@return 0 on success */
-static
-int
-innodb_locks_init(
-/*==============*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_locks_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_locks_fields_info;
- schema->fill_table = trx_i_s_common_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_locks =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_LOCKS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB conflicting locks"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_locks_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_lock_waits */
-static ST_FIELD_INFO innodb_lock_waits_fields_info[] =
-{
-#define IDX_REQUESTING_TRX_ID 0
- {STRUCT_FLD(field_name, "requesting_trx_id"),
- STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_REQUESTED_LOCK_ID 1
- {STRUCT_FLD(field_name, "requested_lock_id"),
- STRUCT_FLD(field_length, TRX_I_S_LOCK_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BLOCKING_TRX_ID 2
- {STRUCT_FLD(field_name, "blocking_trx_id"),
- STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BLOCKING_LOCK_ID 3
- {STRUCT_FLD(field_name, "blocking_lock_id"),
- STRUCT_FLD(field_length, TRX_I_S_LOCK_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Read data from cache buffer and fill the
-INFORMATION_SCHEMA.innodb_lock_waits table with it.
-@return 0 on success */
-static
-int
-fill_innodb_lock_waits_from_cache(
-/*==============================*/
- trx_i_s_cache_t* cache, /*!< in: cache to read from */
- THD* thd, /*!< in: used to call
- schema_table_store_record() */
- TABLE* table) /*!< in/out: fill this table */
-{
- Field** fields;
- ulint rows_num;
- char requested_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
- char blocking_lock_id[TRX_I_S_LOCK_ID_MAX_LEN + 1];
- ulint i;
-
- DBUG_ENTER("fill_innodb_lock_waits_from_cache");
-
- fields = table->field;
-
- rows_num = trx_i_s_cache_get_rows_used(cache,
- I_S_INNODB_LOCK_WAITS);
-
- for (i = 0; i < rows_num; i++) {
-
- i_s_lock_waits_row_t* row;
-
- char requesting_trx_id[TRX_ID_MAX_LEN + 1];
- char blocking_trx_id[TRX_ID_MAX_LEN + 1];
-
- row = (i_s_lock_waits_row_t*)
- trx_i_s_cache_get_nth_row(
- cache, I_S_INNODB_LOCK_WAITS, i);
-
- /* requesting_trx_id */
- ut_snprintf(requesting_trx_id, sizeof(requesting_trx_id),
- TRX_ID_FMT, row->requested_lock_row->lock_trx_id);
- OK(field_store_string(fields[IDX_REQUESTING_TRX_ID],
- requesting_trx_id));
-
- /* requested_lock_id */
- OK(field_store_string(
- fields[IDX_REQUESTED_LOCK_ID],
- trx_i_s_create_lock_id(
- row->requested_lock_row,
- requested_lock_id,
- sizeof(requested_lock_id))));
-
- /* blocking_trx_id */
- ut_snprintf(blocking_trx_id, sizeof(blocking_trx_id),
- TRX_ID_FMT, row->blocking_lock_row->lock_trx_id);
- OK(field_store_string(fields[IDX_BLOCKING_TRX_ID],
- blocking_trx_id));
-
- /* blocking_lock_id */
- OK(field_store_string(
- fields[IDX_BLOCKING_LOCK_ID],
- trx_i_s_create_lock_id(
- row->blocking_lock_row,
- blocking_lock_id,
- sizeof(blocking_lock_id))));
-
- OK(schema_table_store_record(thd, table));
- }
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_lock_waits
-@return 0 on success */
-static
-int
-innodb_lock_waits_init(
-/*===================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_lock_waits_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_lock_waits_fields_info;
- schema->fill_table = trx_i_s_common_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_lock_waits =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_LOCK_WAITS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB which lock is blocking which"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_lock_waits_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/*******************************************************************//**
-Common function to fill any of the dynamic tables:
-INFORMATION_SCHEMA.innodb_trx
-INFORMATION_SCHEMA.innodb_locks
-INFORMATION_SCHEMA.innodb_lock_waits
-@return 0 on success */
-static
-int
-trx_i_s_common_fill_table(
-/*======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- const char* table_name;
- int ret;
- trx_i_s_cache_t* cache;
-
- DBUG_ENTER("trx_i_s_common_fill_table");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
-
- DBUG_RETURN(0);
- }
-
- /* minimize the number of places where global variables are
- referenced */
- cache = trx_i_s_cache;
-
- /* which table we have to fill? */
- table_name = tables->schema_table_name;
- /* or table_name = tables->schema_table->table_name; */
-
- RETURN_IF_INNODB_NOT_STARTED(table_name);
-
- /* update the cache */
- trx_i_s_cache_start_write(cache);
- trx_i_s_possibly_fetch_data_into_cache(cache);
- trx_i_s_cache_end_write(cache);
-
- if (trx_i_s_cache_is_truncated(cache)) {
-
- /* XXX show warning to user if possible */
- fprintf(stderr, "Warning: data in %s truncated due to "
- "memory limit of %d bytes\n", table_name,
- TRX_I_S_MEM_LIMIT);
- }
-
- ret = 0;
-
- trx_i_s_cache_start_read(cache);
-
- if (innobase_strcasecmp(table_name, "innodb_trx") == 0) {
-
- if (fill_innodb_trx_from_cache(
- cache, thd, tables->table) != 0) {
-
- ret = 1;
- }
-
- } else if (innobase_strcasecmp(table_name, "innodb_locks") == 0) {
-
- if (fill_innodb_locks_from_cache(
- cache, thd, tables->table) != 0) {
-
- ret = 1;
- }
-
- } else if (innobase_strcasecmp(table_name, "innodb_lock_waits") == 0) {
-
- if (fill_innodb_lock_waits_from_cache(
- cache, thd, tables->table) != 0) {
-
- ret = 1;
- }
-
- } else {
-
- /* huh! what happened!? */
- fprintf(stderr,
- "InnoDB: trx_i_s_common_fill_table() was "
- "called to fill unknown table: %s.\n"
- "This function only knows how to fill "
- "innodb_trx, innodb_locks and "
- "innodb_lock_waits tables.\n", table_name);
-
- ret = 1;
- }
-
- trx_i_s_cache_end_read(cache);
-
-#if 0
- DBUG_RETURN(ret);
-#else
- /* if this function returns something else than 0 then a
- deadlock occurs between the mysqld server and mysql client,
- see http://bugs.mysql.com/29900 ; when that bug is resolved
- we can enable the DBUG_RETURN(ret) above */
- ret++; // silence a gcc46 warning
- DBUG_RETURN(0);
-#endif
-}
-
-/* Fields of the dynamic table information_schema.innodb_cmp. */
-static ST_FIELD_INFO i_s_cmp_fields_info[] =
-{
- {STRUCT_FLD(field_name, "page_size"),
- STRUCT_FLD(field_length, 5),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Compressed Page Size"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "compress_ops"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Total Number of Compressions"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "compress_ops_ok"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Total Number of"
- " Successful Compressions"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "compress_time"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Total Duration of Compressions,"
- " in Seconds"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "uncompress_ops"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Total Number of Decompressions"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "uncompress_time"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Total Duration of Decompressions,"
- " in Seconds"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmp or
-innodb_cmp_reset.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmp_fill_low(
-/*=============*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* , /*!< in: condition (ignored) */
- ibool reset) /*!< in: TRUE=reset cumulated counts */
-{
- TABLE* table = (TABLE*) tables->table;
- int status = 0;
-
- DBUG_ENTER("i_s_cmp_fill_low");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
-
- DBUG_RETURN(0);
- }
-
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- for (uint i = 0; i < PAGE_ZIP_SSIZE_MAX; i++) {
- page_zip_stat_t* zip_stat = &page_zip_stat[i];
-
- table->field[0]->store(UNIV_ZIP_SIZE_MIN << i);
-
- /* The cumulated counts are not protected by any
- mutex. Thus, some operation in page0zip.cc could
- increment a counter between the time we read it and
- clear it. We could introduce mutex protection, but it
- could cause a measureable performance hit in
- page0zip.cc. */
- table->field[1]->store(
- static_cast<double>(zip_stat->compressed));
- table->field[2]->store(
- static_cast<double>(zip_stat->compressed_ok));
- table->field[3]->store(
- static_cast<double>(zip_stat->compressed_usec / 1000000));
- table->field[4]->store(
- static_cast<double>(zip_stat->decompressed));
- table->field[5]->store(
- static_cast<double>(zip_stat->decompressed_usec / 1000000));
-
- if (reset) {
- memset(zip_stat, 0, sizeof *zip_stat);
- }
-
- if (schema_table_store_record(thd, table)) {
- status = 1;
- break;
- }
- }
-
- DBUG_RETURN(status);
-}
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmp.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmp_fill(
-/*=========*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* cond) /*!< in: condition (ignored) */
-{
- return(i_s_cmp_fill_low(thd, tables, cond, FALSE));
-}
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmp_reset.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmp_reset_fill(
-/*===============*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* cond) /*!< in: condition (ignored) */
-{
- return(i_s_cmp_fill_low(thd, tables, cond, TRUE));
-}
-
-/*******************************************************************//**
-Bind the dynamic table information_schema.innodb_cmp.
-@return 0 on success */
-static
-int
-i_s_cmp_init(
-/*=========*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_cmp_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_cmp_fields_info;
- schema->fill_table = i_s_cmp_fill;
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table information_schema.innodb_cmp_reset.
-@return 0 on success */
-static
-int
-i_s_cmp_reset_init(
-/*===============*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_cmp_reset_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_cmp_fields_info;
- schema->fill_table = i_s_cmp_reset_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_cmp =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_CMP"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "Statistics for the InnoDB compression"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_cmp_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_cmp_reset =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_CMP_RESET"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "Statistics for the InnoDB compression;"
- " reset cumulated counts"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_cmp_reset_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/* Fields of the dynamic tables
-information_schema.innodb_cmp_per_index and
-information_schema.innodb_cmp_per_index_reset. */
-static ST_FIELD_INFO i_s_cmp_per_index_fields_info[] =
-{
-#define IDX_DATABASE_NAME 0
- {STRUCT_FLD(field_name, "database_name"),
- STRUCT_FLD(field_length, 192),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_TABLE_NAME 1
- {STRUCT_FLD(field_name, "table_name"),
- STRUCT_FLD(field_length, 192),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_INDEX_NAME 2
- {STRUCT_FLD(field_name, "index_name"),
- STRUCT_FLD(field_length, 192),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_COMPRESS_OPS 3
- {STRUCT_FLD(field_name, "compress_ops"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_COMPRESS_OPS_OK 4
- {STRUCT_FLD(field_name, "compress_ops_ok"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_COMPRESS_TIME 5
- {STRUCT_FLD(field_name, "compress_time"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_UNCOMPRESS_OPS 6
- {STRUCT_FLD(field_name, "uncompress_ops"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_UNCOMPRESS_TIME 7
- {STRUCT_FLD(field_name, "uncompress_time"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Fill the dynamic table
-information_schema.innodb_cmp_per_index or
-information_schema.innodb_cmp_per_index_reset.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmp_per_index_fill_low(
-/*=======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* , /*!< in: condition (ignored) */
- ibool reset) /*!< in: TRUE=reset cumulated counts */
-{
- TABLE* table = tables->table;
- Field** fields = table->field;
- int status = 0;
-
- DBUG_ENTER("i_s_cmp_per_index_fill_low");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
-
- DBUG_RETURN(0);
- }
-
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* Create a snapshot of the stats so we do not bump into lock
- order violations with dict_sys->mutex below. */
- mutex_enter(&page_zip_stat_per_index_mutex);
- page_zip_stat_per_index_t snap (page_zip_stat_per_index);
- mutex_exit(&page_zip_stat_per_index_mutex);
-
- mutex_enter(&dict_sys->mutex);
-
- page_zip_stat_per_index_t::iterator iter;
- ulint i;
-
- for (iter = snap.begin(), i = 0; iter != snap.end(); iter++, i++) {
-
- char name[192];
- dict_index_t* index = dict_index_find_on_id_low(iter->first);
-
- if (index != NULL) {
- char db_utf8[MAX_DB_UTF8_LEN];
- char table_utf8[MAX_TABLE_UTF8_LEN];
-
- dict_fs2utf8(index->table_name,
- db_utf8, sizeof(db_utf8),
- table_utf8, sizeof(table_utf8));
-
- field_store_string(fields[IDX_DATABASE_NAME], db_utf8);
- field_store_string(fields[IDX_TABLE_NAME], table_utf8);
- field_store_index_name(fields[IDX_INDEX_NAME],
- index->name);
- } else {
- /* index not found */
- ut_snprintf(name, sizeof(name),
- "index_id:" IB_ID_FMT, iter->first);
- field_store_string(fields[IDX_DATABASE_NAME],
- "unknown");
- field_store_string(fields[IDX_TABLE_NAME],
- "unknown");
- field_store_string(fields[IDX_INDEX_NAME],
- name);
- }
-
- fields[IDX_COMPRESS_OPS]->store(
- static_cast<double>(iter->second.compressed));
-
- fields[IDX_COMPRESS_OPS_OK]->store(
- static_cast<double>(iter->second.compressed_ok));
-
- fields[IDX_COMPRESS_TIME]->store(
- static_cast<double>(iter->second.compressed_usec / 1000000));
-
- fields[IDX_UNCOMPRESS_OPS]->store(
- static_cast<double>(iter->second.decompressed));
-
- fields[IDX_UNCOMPRESS_TIME]->store(
- static_cast<double>(iter->second.decompressed_usec / 1000000));
-
- if (schema_table_store_record(thd, table)) {
- status = 1;
- break;
- }
-
- /* Release and reacquire the dict mutex to allow other
- threads to proceed. This could eventually result in the
- contents of INFORMATION_SCHEMA.innodb_cmp_per_index being
- inconsistent, but it is an acceptable compromise. */
- if (i % 1000 == 0) {
- mutex_exit(&dict_sys->mutex);
- mutex_enter(&dict_sys->mutex);
- }
- }
-
- mutex_exit(&dict_sys->mutex);
-
- if (reset) {
- page_zip_reset_stat_per_index();
- }
-
- DBUG_RETURN(status);
-}
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmp_per_index.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmp_per_index_fill(
-/*===================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* cond) /*!< in: condition (ignored) */
-{
- return(i_s_cmp_per_index_fill_low(thd, tables, cond, FALSE));
-}
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmp_per_index_reset.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmp_per_index_reset_fill(
-/*=========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* cond) /*!< in: condition (ignored) */
-{
- return(i_s_cmp_per_index_fill_low(thd, tables, cond, TRUE));
-}
-
-/*******************************************************************//**
-Bind the dynamic table information_schema.innodb_cmp_per_index.
-@return 0 on success */
-static
-int
-i_s_cmp_per_index_init(
-/*===================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_cmp_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_cmp_per_index_fields_info;
- schema->fill_table = i_s_cmp_per_index_fill;
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table information_schema.innodb_cmp_per_index_reset.
-@return 0 on success */
-static
-int
-i_s_cmp_per_index_reset_init(
-/*=========================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_cmp_reset_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_cmp_per_index_fields_info;
- schema->fill_table = i_s_cmp_per_index_reset_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmp_per_index =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_CMP_PER_INDEX"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "Statistics for the InnoDB compression (per index)"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_cmp_per_index_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_cmp_per_index_reset =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_CMP_PER_INDEX_RESET"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "Statistics for the InnoDB compression (per index);"
- " reset cumulated counts"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_cmp_per_index_reset_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/* Fields of the dynamic table information_schema.innodb_cmpmem. */
-static ST_FIELD_INFO i_s_cmpmem_fields_info[] =
-{
- {STRUCT_FLD(field_name, "page_size"),
- STRUCT_FLD(field_length, 5),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Buddy Block Size"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "buffer_pool_instance"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Buffer Pool Id"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "pages_used"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Currently in Use"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "pages_free"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Currently Available"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "relocation_ops"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Total Number of Relocations"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "relocation_time"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, "Total Duration of Relocations,"
- " in Seconds"),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmpmem or
-innodb_cmpmem_reset.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmpmem_fill_low(
-/*================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* , /*!< in: condition (ignored) */
- ibool reset) /*!< in: TRUE=reset cumulated counts */
-{
- int status = 0;
- TABLE* table = (TABLE*) tables->table;
-
- DBUG_ENTER("i_s_cmpmem_fill_low");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
-
- DBUG_RETURN(0);
- }
-
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- status = 0;
-
- buf_pool = buf_pool_from_array(i);
-
- mutex_enter(&buf_pool->zip_free_mutex);
-
- for (uint x = 0; x <= BUF_BUDDY_SIZES; x++) {
- buf_buddy_stat_t* buddy_stat;
-
- buddy_stat = &buf_pool->buddy_stat[x];
-
- table->field[0]->store(BUF_BUDDY_LOW << x);
- table->field[1]->store(static_cast<double>(i));
- table->field[2]->store(static_cast<double>(
- buddy_stat->used));
- table->field[3]->store(static_cast<double>(
- (x < BUF_BUDDY_SIZES)
- ? UT_LIST_GET_LEN(buf_pool->zip_free[x])
- : 0));
- table->field[4]->store(
- (longlong) buddy_stat->relocated, true);
- table->field[5]->store(
- static_cast<double>(buddy_stat->relocated_usec / 1000000));
-
- if (reset) {
- /* This is protected by
- buf_pool->zip_free_mutex. */
- buddy_stat->relocated = 0;
- buddy_stat->relocated_usec = 0;
- }
-
- if (schema_table_store_record(thd, table)) {
- status = 1;
- break;
- }
- }
-
- mutex_exit(&buf_pool->zip_free_mutex);
-
- if (status) {
- break;
- }
- }
-
- DBUG_RETURN(status);
-}
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmpmem.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmpmem_fill(
-/*============*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* cond) /*!< in: condition (ignored) */
-{
- return(i_s_cmpmem_fill_low(thd, tables, cond, FALSE));
-}
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_cmpmem_reset.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_cmpmem_reset_fill(
-/*==================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* cond) /*!< in: condition (ignored) */
-{
- return(i_s_cmpmem_fill_low(thd, tables, cond, TRUE));
-}
-
-/*******************************************************************//**
-Bind the dynamic table information_schema.innodb_cmpmem.
-@return 0 on success */
-static
-int
-i_s_cmpmem_init(
-/*============*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_cmpmem_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_cmpmem_fields_info;
- schema->fill_table = i_s_cmpmem_fill;
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table information_schema.innodb_cmpmem_reset.
-@return 0 on success */
-static
-int
-i_s_cmpmem_reset_init(
-/*==================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_cmpmem_reset_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_cmpmem_fields_info;
- schema->fill_table = i_s_cmpmem_reset_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_cmpmem =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_CMPMEM"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "Statistics for the InnoDB compressed buffer pool"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_cmpmem_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_cmpmem_reset =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_CMPMEM_RESET"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "Statistics for the InnoDB compressed buffer pool;"
- " reset cumulated counts"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_cmpmem_reset_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_metrics */
-static ST_FIELD_INFO innodb_metrics_fields_info[] =
-{
-#define METRIC_NAME 0
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_SUBSYS 1
- {STRUCT_FLD(field_name, "SUBSYSTEM"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_VALUE_START 2
- {STRUCT_FLD(field_name, "COUNT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_MAX_VALUE_START 3
- {STRUCT_FLD(field_name, "MAX_COUNT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_MIN_VALUE_START 4
- {STRUCT_FLD(field_name, "MIN_COUNT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_AVG_VALUE_START 5
- {STRUCT_FLD(field_name, "AVG_COUNT"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_VALUE_RESET 6
- {STRUCT_FLD(field_name, "COUNT_RESET"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_MAX_VALUE_RESET 7
- {STRUCT_FLD(field_name, "MAX_COUNT_RESET"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_MIN_VALUE_RESET 8
- {STRUCT_FLD(field_name, "MIN_COUNT_RESET"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_AVG_VALUE_RESET 9
- {STRUCT_FLD(field_name, "AVG_COUNT_RESET"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_START_TIME 10
- {STRUCT_FLD(field_name, "TIME_ENABLED"),
- STRUCT_FLD(field_length, 0),
- STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_STOP_TIME 11
- {STRUCT_FLD(field_name, "TIME_DISABLED"),
- STRUCT_FLD(field_length, 0),
- STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_TIME_ELAPSED 12
- {STRUCT_FLD(field_name, "TIME_ELAPSED"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_RESET_TIME 13
- {STRUCT_FLD(field_name, "TIME_RESET"),
- STRUCT_FLD(field_length, 0),
- STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_STATUS 14
- {STRUCT_FLD(field_name, "STATUS"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_TYPE 15
- {STRUCT_FLD(field_name, "TYPE"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define METRIC_DESC 16
- {STRUCT_FLD(field_name, "COMMENT"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Fill the information schema metrics table.
-@return 0 on success */
-static
-int
-i_s_metrics_fill(
-/*=============*/
- THD* thd, /*!< in: thread */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- int count;
- Field** fields;
- double time_diff = 0;
- monitor_info_t* monitor_info;
- mon_type_t min_val;
- mon_type_t max_val;
-
- DBUG_ENTER("i_s_metrics_fill");
- fields = table_to_fill->field;
-
- for (count = 0; count < NUM_MONITOR; count++) {
- monitor_info = srv_mon_get_info((monitor_id_t) count);
-
- /* A good place to sanity check the Monitor ID */
- ut_a(count == monitor_info->monitor_id);
-
- /* If the item refers to a Module, nothing to fill,
- continue. */
- if ((monitor_info->monitor_type & MONITOR_MODULE)
- || (monitor_info->monitor_type & MONITOR_HIDDEN)) {
- continue;
- }
-
- /* If this is an existing "status variable", and
- its corresponding counter is still on, we need
- to calculate the result from its corresponding
- counter. */
- if (monitor_info->monitor_type & MONITOR_EXISTING
- && MONITOR_IS_ON(count)) {
- srv_mon_process_existing_counter((monitor_id_t) count,
- MONITOR_GET_VALUE);
- }
-
- /* Fill in counter's basic information */
- OK(field_store_string(fields[METRIC_NAME],
- monitor_info->monitor_name));
-
- OK(field_store_string(fields[METRIC_SUBSYS],
- monitor_info->monitor_module));
-
- OK(field_store_string(fields[METRIC_DESC],
- monitor_info->monitor_desc));
-
- /* Fill in counter values */
- OK(fields[METRIC_VALUE_RESET]->store(
- MONITOR_VALUE(count), FALSE));
-
- OK(fields[METRIC_VALUE_START]->store(
- MONITOR_VALUE_SINCE_START(count), FALSE));
-
- /* If the max value is MAX_RESERVED, counter max
- value has not been updated. Set the column value
- to NULL. */
- if (MONITOR_MAX_VALUE(count) == MAX_RESERVED
- || MONITOR_MAX_MIN_NOT_INIT(count)) {
- fields[METRIC_MAX_VALUE_RESET]->set_null();
- } else {
- OK(fields[METRIC_MAX_VALUE_RESET]->store(
- MONITOR_MAX_VALUE(count), FALSE));
- fields[METRIC_MAX_VALUE_RESET]->set_notnull();
- }
-
- /* If the min value is MAX_RESERVED, counter min
- value has not been updated. Set the column value
- to NULL. */
- if (MONITOR_MIN_VALUE(count) == MIN_RESERVED
- || MONITOR_MAX_MIN_NOT_INIT(count)) {
- fields[METRIC_MIN_VALUE_RESET]->set_null();
- } else {
- OK(fields[METRIC_MIN_VALUE_RESET]->store(
- MONITOR_MIN_VALUE(count), FALSE));
- fields[METRIC_MIN_VALUE_RESET]->set_notnull();
- }
-
- /* Calculate the max value since counter started */
- max_val = srv_mon_calc_max_since_start((monitor_id_t) count);
-
- if (max_val == MAX_RESERVED
- || MONITOR_MAX_MIN_NOT_INIT(count)) {
- fields[METRIC_MAX_VALUE_START]->set_null();
- } else {
- OK(fields[METRIC_MAX_VALUE_START]->store(
- max_val, FALSE));
- fields[METRIC_MAX_VALUE_START]->set_notnull();
- }
-
- /* Calculate the min value since counter started */
- min_val = srv_mon_calc_min_since_start((monitor_id_t) count);
-
- if (min_val == MIN_RESERVED
- || MONITOR_MAX_MIN_NOT_INIT(count)) {
- fields[METRIC_MIN_VALUE_START]->set_null();
- } else {
- OK(fields[METRIC_MIN_VALUE_START]->store(
- min_val, FALSE));
-
- fields[METRIC_MIN_VALUE_START]->set_notnull();
- }
-
- /* If monitor has been enabled (no matter it is disabled
- or not now), fill METRIC_START_TIME and METRIC_TIME_ELAPSED
- field */
- if (MONITOR_FIELD(count, mon_start_time)) {
- OK(field_store_time_t(fields[METRIC_START_TIME],
- (time_t)MONITOR_FIELD(count, mon_start_time)));
- fields[METRIC_START_TIME]->set_notnull();
-
- /* If monitor is enabled, the TIME_ELAPSED is the
- time difference between current and time when monitor
- is enabled. Otherwise, it is the time difference
- between time when monitor is enabled and time
- when it is disabled */
- if (MONITOR_IS_ON(count)) {
- time_diff = difftime(time(NULL),
- MONITOR_FIELD(count, mon_start_time));
- } else {
- time_diff = difftime(
- MONITOR_FIELD(count, mon_stop_time),
- MONITOR_FIELD(count, mon_start_time));
- }
-
- OK(fields[METRIC_TIME_ELAPSED]->store(
- time_diff));
- fields[METRIC_TIME_ELAPSED]->set_notnull();
- } else {
- fields[METRIC_START_TIME]->set_null();
- fields[METRIC_TIME_ELAPSED]->set_null();
- time_diff = 0;
- }
-
- /* Unless MONITOR__NO_AVERAGE is marked, we will need
- to calculate the average value. If this is a monitor set
- owner marked by MONITOR_SET_OWNER, divide
- the value by another counter (number of calls) designated
- by monitor_info->monitor_related_id.
- Otherwise average the counter value by the time between the
- time that the counter is enabled and time it is disabled
- or time it is sampled. */
- if (!(monitor_info->monitor_type & MONITOR_NO_AVERAGE)
- && (monitor_info->monitor_type & MONITOR_SET_OWNER)
- && monitor_info->monitor_related_id) {
- mon_type_t value_start
- = MONITOR_VALUE_SINCE_START(
- monitor_info->monitor_related_id);
-
- if (value_start) {
- OK(fields[METRIC_AVG_VALUE_START]->store(
- MONITOR_VALUE_SINCE_START(count)
- / value_start, FALSE));
-
- fields[METRIC_AVG_VALUE_START]->set_notnull();
- } else {
- fields[METRIC_AVG_VALUE_START]->set_null();
- }
-
- if (MONITOR_VALUE(monitor_info->monitor_related_id)) {
- OK(fields[METRIC_AVG_VALUE_RESET]->store(
- MONITOR_VALUE(count)
- / MONITOR_VALUE(
- monitor_info->monitor_related_id),
- FALSE));
- } else {
- fields[METRIC_AVG_VALUE_RESET]->set_null();
- }
- } else if (!(monitor_info->monitor_type & MONITOR_NO_AVERAGE)
- && !(monitor_info->monitor_type
- & MONITOR_DISPLAY_CURRENT)) {
- if (time_diff) {
- OK(fields[METRIC_AVG_VALUE_START]->store(
- (double) MONITOR_VALUE_SINCE_START(
- count) / time_diff));
- fields[METRIC_AVG_VALUE_START]->set_notnull();
- } else {
- fields[METRIC_AVG_VALUE_START]->set_null();
- }
-
- if (MONITOR_FIELD(count, mon_reset_time)) {
- /* calculate the time difference since last
- reset */
- if (MONITOR_IS_ON(count)) {
- time_diff = difftime(
- time(NULL), MONITOR_FIELD(
- count, mon_reset_time));
- } else {
- time_diff = difftime(
- MONITOR_FIELD(count, mon_stop_time),
- MONITOR_FIELD(count, mon_reset_time));
- }
- } else {
- time_diff = 0;
- }
-
- if (time_diff) {
- OK(fields[METRIC_AVG_VALUE_RESET]->store(
- static_cast<double>(
- MONITOR_VALUE(count) / time_diff)));
- fields[METRIC_AVG_VALUE_RESET]->set_notnull();
- } else {
- fields[METRIC_AVG_VALUE_RESET]->set_null();
- }
- } else {
- fields[METRIC_AVG_VALUE_START]->set_null();
- fields[METRIC_AVG_VALUE_RESET]->set_null();
- }
-
-
- if (MONITOR_IS_ON(count)) {
- /* If monitor is on, the stop time will set to NULL */
- fields[METRIC_STOP_TIME]->set_null();
-
- /* Display latest Monitor Reset Time only if Monitor
- counter is on. */
- if (MONITOR_FIELD(count, mon_reset_time)) {
- OK(field_store_time_t(
- fields[METRIC_RESET_TIME],
- (time_t)MONITOR_FIELD(
- count, mon_reset_time)));
- fields[METRIC_RESET_TIME]->set_notnull();
- } else {
- fields[METRIC_RESET_TIME]->set_null();
- }
-
- /* Display the monitor status as "enabled" */
- OK(field_store_string(fields[METRIC_STATUS],
- "enabled"));
- } else {
- if (MONITOR_FIELD(count, mon_stop_time)) {
- OK(field_store_time_t(fields[METRIC_STOP_TIME],
- (time_t)MONITOR_FIELD(count, mon_stop_time)));
- fields[METRIC_STOP_TIME]->set_notnull();
- } else {
- fields[METRIC_STOP_TIME]->set_null();
- }
-
- fields[METRIC_RESET_TIME]->set_null();
-
- OK(field_store_string(fields[METRIC_STATUS],
- "disabled"));
- }
-
- if (monitor_info->monitor_type & MONITOR_DISPLAY_CURRENT) {
- OK(field_store_string(fields[METRIC_TYPE],
- "value"));
- } else if (monitor_info->monitor_type & MONITOR_EXISTING) {
- OK(field_store_string(fields[METRIC_TYPE],
- "status_counter"));
- } else if (monitor_info->monitor_type & MONITOR_SET_OWNER) {
- OK(field_store_string(fields[METRIC_TYPE],
- "set_owner"));
- } else if ( monitor_info->monitor_type & MONITOR_SET_MEMBER) {
- OK(field_store_string(fields[METRIC_TYPE],
- "set_member"));
- } else {
- OK(field_store_string(fields[METRIC_TYPE],
- "counter"));
- }
-
- OK(schema_table_store_record(thd, table_to_fill));
- }
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Function to fill information schema metrics tables.
-@return 0 on success */
-static
-int
-i_s_metrics_fill_table(
-/*===================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- DBUG_ENTER("i_s_metrics_fill_table");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- i_s_metrics_fill(thd, tables->table);
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_metrics
-@return 0 on success */
-static
-int
-innodb_metrics_init(
-/*================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_metrics_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_metrics_fields_info;
- schema->fill_table = i_s_metrics_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_metrics =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_METRICS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB Metrics Info"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_metrics_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-/* Fields of the dynamic table INFORMATION_SCHEMA.innodb_ft_default_stopword */
-static ST_FIELD_INFO i_s_stopword_fields_info[] =
-{
-#define STOPWORD_VALUE 0
- {STRUCT_FLD(field_name, "value"),
- STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Fill the dynamic table information_schema.innodb_ft_default_stopword.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_stopword_fill(
-/*==============*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- Field** fields;
- ulint i = 0;
- TABLE* table = (TABLE*) tables->table;
-
- DBUG_ENTER("i_s_stopword_fill");
-
- fields = table->field;
-
- /* Fill with server default stopword list in array
- fts_default_stopword */
- while (fts_default_stopword[i]) {
- OK(field_store_string(fields[STOPWORD_VALUE],
- fts_default_stopword[i]));
-
- OK(schema_table_store_record(thd, table));
- i++;
- }
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table information_schema.innodb_ft_default_stopword.
-@return 0 on success */
-static
-int
-i_s_stopword_init(
-/*==============*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_stopword_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_stopword_fields_info;
- schema->fill_table = i_s_stopword_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_default_stopword =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_FT_DEFAULT_STOPWORD"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "Default stopword list for InnoDB Full Text Search"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_stopword_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
-INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED */
-static ST_FIELD_INFO i_s_fts_doc_fields_info[] =
-{
-#define I_S_FTS_DOC_ID 0
- {STRUCT_FLD(field_name, "DOC_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED or
-INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_deleted_generic_fill(
-/*=========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- ibool being_deleted) /*!< in: BEING_DELTED table */
-{
- Field** fields;
- TABLE* table = (TABLE*) tables->table;
- trx_t* trx;
- fts_table_t fts_table;
- fts_doc_ids_t* deleted;
- dict_table_t* user_table;
-
- DBUG_ENTER("i_s_fts_deleted_generic_fill");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- if (!fts_internal_tbl_name) {
- DBUG_RETURN(0);
- }
-
- /* Prevent DDL to drop fts aux tables. */
- rw_lock_s_lock(&dict_operation_lock);
-
- user_table = dict_table_open_on_name(
- fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
-
- if (!user_table) {
- rw_lock_s_unlock(&dict_operation_lock);
-
- DBUG_RETURN(0);
- } else if (!dict_table_has_fts_index(user_table)) {
- dict_table_close(user_table, FALSE, FALSE);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- DBUG_RETURN(0);
- }
-
- deleted = fts_doc_ids_create();
-
- trx = trx_allocate_for_background();
- trx->op_info = "Select for FTS DELETE TABLE";
-
- FTS_INIT_FTS_TABLE(&fts_table,
- (being_deleted) ? "BEING_DELETED" : "DELETED",
- FTS_COMMON_TABLE, user_table);
-
- fts_table_fetch_doc_ids(trx, &fts_table, deleted);
-
- fields = table->field;
-
- int ret = 0;
-
- for (ulint j = 0; j < ib_vector_size(deleted->doc_ids); ++j) {
- doc_id_t doc_id;
-
- doc_id = *(doc_id_t*) ib_vector_get_const(deleted->doc_ids, j);
-
- BREAK_IF(ret = fields[I_S_FTS_DOC_ID]->store(doc_id, true));
-
- BREAK_IF(ret = schema_table_store_record(thd, table));
- }
-
- trx_free_for_background(trx);
-
- fts_doc_ids_free(deleted);
-
- dict_table_close(user_table, FALSE, FALSE);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- DBUG_RETURN(ret);
-}
-
-/*******************************************************************//**
-Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_deleted_fill(
-/*=================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- DBUG_ENTER("i_s_fts_deleted_fill");
-
- DBUG_RETURN(i_s_fts_deleted_generic_fill(thd, tables, FALSE));
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_DELETED
-@return 0 on success */
-static
-int
-i_s_fts_deleted_init(
-/*=================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_fts_deleted_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_fts_doc_fields_info;
- schema->fill_table = i_s_fts_deleted_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_deleted =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_FT_DELETED"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "INNODB AUXILIARY FTS DELETED TABLE"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_fts_deleted_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/*******************************************************************//**
-Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_being_deleted_fill(
-/*=======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- DBUG_ENTER("i_s_fts_being_deleted_fill");
-
- DBUG_RETURN(i_s_fts_deleted_generic_fill(thd, tables, TRUE));
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_BEING_DELETED
-@return 0 on success */
-static
-int
-i_s_fts_being_deleted_init(
-/*=======================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_fts_deleted_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_fts_doc_fields_info;
- schema->fill_table = i_s_fts_being_deleted_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_being_deleted =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_FT_BEING_DELETED"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "INNODB AUXILIARY FTS BEING DELETED TABLE"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_fts_being_deleted_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED and
-INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE */
-static ST_FIELD_INFO i_s_fts_index_fields_info[] =
-{
-#define I_S_FTS_WORD 0
- {STRUCT_FLD(field_name, "WORD"),
- STRUCT_FLD(field_length, FTS_MAX_WORD_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define I_S_FTS_FIRST_DOC_ID 1
- {STRUCT_FLD(field_name, "FIRST_DOC_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define I_S_FTS_LAST_DOC_ID 2
- {STRUCT_FLD(field_name, "LAST_DOC_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define I_S_FTS_DOC_COUNT 3
- {STRUCT_FLD(field_name, "DOC_COUNT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define I_S_FTS_ILIST_DOC_ID 4
- {STRUCT_FLD(field_name, "DOC_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define I_S_FTS_ILIST_DOC_POS 5
- {STRUCT_FLD(field_name, "POSITION"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Go through the Doc Node and its ilist, fill the dynamic table
-INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED for one FTS index on the table.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_index_cache_fill_one_index(
-/*===============================*/
- fts_index_cache_t* index_cache, /*!< in: FTS index cache */
- THD* thd, /*!< in: thread */
- fts_string_t* conv_str, /*!< in/out: buffer */
- TABLE_LIST* tables) /*!< in/out: tables to fill */
-{
- TABLE* table = (TABLE*) tables->table;
- Field** fields;
- CHARSET_INFO* index_charset;
- const ib_rbt_node_t* rbt_node;
- uint dummy_errors;
- char* word_str;
-
- DBUG_ENTER("i_s_fts_index_cache_fill_one_index");
-
- fields = table->field;
-
- index_charset = index_cache->charset;
- conv_str->f_n_char = 0;
-
- int ret = 0;
-
- /* Go through each word in the index cache */
- for (rbt_node = rbt_first(index_cache->words);
- rbt_node;
- rbt_node = rbt_next(index_cache->words, rbt_node)) {
- fts_tokenizer_word_t* word;
-
- word = rbt_value(fts_tokenizer_word_t, rbt_node);
-
- /* Convert word from index charset to system_charset_info */
- if (index_charset->cset != system_charset_info->cset) {
- conv_str->f_n_char = my_convert(
- reinterpret_cast<char*>(conv_str->f_str),
- static_cast<uint32>(conv_str->f_len),
- system_charset_info,
- reinterpret_cast<char*>(word->text.f_str),
- static_cast<uint32>(word->text.f_len),
- index_charset, &dummy_errors);
- ut_ad(conv_str->f_n_char <= conv_str->f_len);
- conv_str->f_str[conv_str->f_n_char] = 0;
- word_str = reinterpret_cast<char*>(conv_str->f_str);
- } else {
- word_str = reinterpret_cast<char*>(word->text.f_str);
- }
-
- /* Decrypt the ilist, and display Dod ID and word position */
- for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
- fts_node_t* node;
- byte* ptr;
- ulint decoded = 0;
- doc_id_t doc_id = 0;
-
- node = static_cast<fts_node_t*> (ib_vector_get(
- word->nodes, i));
-
- ptr = node->ilist;
-
- while (decoded < node->ilist_size) {
- ulint pos = fts_decode_vlc(&ptr);
-
- doc_id += pos;
-
- /* Get position info */
- while (*ptr) {
- pos = fts_decode_vlc(&ptr);
-
- OK(field_store_string(
- fields[I_S_FTS_WORD],
- word_str));
-
- OK(fields[I_S_FTS_FIRST_DOC_ID]->store(
- (longlong) node->first_doc_id,
- true));
-
- OK(fields[I_S_FTS_LAST_DOC_ID]->store(
- (longlong) node->last_doc_id,
- true));
-
- OK(fields[I_S_FTS_DOC_COUNT]->store(
- static_cast<double>(node->doc_count)));
-
- OK(fields[I_S_FTS_ILIST_DOC_ID]->store(
- (longlong) doc_id, true));
-
- OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
- static_cast<double>(pos)));
-
- OK(schema_table_store_record(
- thd, table));
- }
-
- ++ptr;
-
- decoded = ptr - (byte*) node->ilist;
- }
- }
- }
-
- DBUG_RETURN(ret);
-}
-/*******************************************************************//**
-Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_index_cache_fill(
-/*=====================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- dict_table_t* user_table;
- fts_cache_t* cache;
-
- DBUG_ENTER("i_s_fts_index_cache_fill");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- if (!fts_internal_tbl_name) {
- DBUG_RETURN(0);
- }
-
- user_table = dict_table_open_on_name(
- fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
-
- if (!user_table) {
- DBUG_RETURN(0);
- }
-
- if (user_table->fts == NULL || user_table->fts->cache == NULL) {
- dict_table_close(user_table, FALSE, FALSE);
-
- DBUG_RETURN(0);
- }
-
- cache = user_table->fts->cache;
-
- ut_a(cache);
-
- int ret = 0;
- fts_string_t conv_str;
- conv_str.f_len = system_charset_info->mbmaxlen
- * FTS_MAX_WORD_LEN_IN_CHAR;
- conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len));
-
- for (ulint i = 0; i < ib_vector_size(cache->indexes); i++) {
- fts_index_cache_t* index_cache;
-
- index_cache = static_cast<fts_index_cache_t*> (
- ib_vector_get(cache->indexes, i));
-
- BREAK_IF(ret = i_s_fts_index_cache_fill_one_index(
- index_cache, thd, &conv_str, tables));
- }
-
- ut_free(conv_str.f_str);
-
- dict_table_close(user_table, FALSE, FALSE);
-
- DBUG_RETURN(ret);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHE
-@return 0 on success */
-static
-int
-i_s_fts_index_cache_init(
-/*=====================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_fts_index_cache_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_fts_index_fields_info;
- schema->fill_table = i_s_fts_index_cache_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_index_cache =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_FT_INDEX_CACHE"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "INNODB AUXILIARY FTS INDEX CACHED"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_fts_index_cache_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/*******************************************************************//**
-Go through a FTS index auxiliary table, fetch its rows and fill
-FTS word cache structure.
-@return DB_SUCCESS on success, otherwise error code */
-static
-dberr_t
-i_s_fts_index_table_fill_selected(
-/*==============================*/
- dict_index_t* index, /*!< in: FTS index */
- ib_vector_t* words, /*!< in/out: vector to hold
- fetched words */
- ulint selected, /*!< in: selected FTS index */
- fts_string_t* word) /*!< in: word to select */
-{
- pars_info_t* info;
- fts_table_t fts_table;
- trx_t* trx;
- que_t* graph;
- dberr_t error;
- fts_fetch_t fetch;
-
- info = pars_info_create();
-
- fetch.read_arg = words;
- fetch.read_record = fts_optimize_index_fetch_node;
- fetch.total_memory = 0;
-
- DBUG_EXECUTE_IF("fts_instrument_result_cache_limit",
- fts_result_cache_limit = 8192;
- );
-
- trx = trx_allocate_for_background();
-
- trx->op_info = "fetching FTS index nodes";
-
- pars_info_bind_function(info, "my_func", fetch.read_record, &fetch);
- pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
-
- FTS_INIT_INDEX_TABLE(&fts_table, fts_get_suffix(selected),
- FTS_INDEX_TABLE, index);
-
- graph = fts_parse_sql(
- &fts_table, info,
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS"
- " SELECT word, doc_count, first_doc_id, last_doc_id, "
- "ilist\n"
- " FROM %s WHERE word >= :word;\n"
- "BEGIN\n"
- "\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE c;");
-
- for(;;) {
- error = fts_eval_sql(trx, graph);
-
- if (error == DB_SUCCESS) {
- fts_sql_commit(trx);
-
- break;
- } else {
- fts_sql_rollback(trx);
-
- ut_print_timestamp(stderr);
-
- if (error == DB_LOCK_WAIT_TIMEOUT) {
- fprintf(stderr, " InnoDB: Warning: "
- "lock wait timeout reading "
- "FTS index. Retrying!\n");
-
- trx->error_state = DB_SUCCESS;
- } else {
- fprintf(stderr, " InnoDB: Error: %d "
- "while reading FTS index.\n", error);
- break;
- }
- }
- }
-
- mutex_enter(&dict_sys->mutex);
- que_graph_free(graph);
- mutex_exit(&dict_sys->mutex);
-
- trx_free_for_background(trx);
-
- if (fetch.total_memory >= fts_result_cache_limit) {
- error = DB_FTS_EXCEED_RESULT_CACHE_LIMIT;
- }
-
- return(error);
-}
-
-/*******************************************************************//**
-Free words. */
-static
-void
-i_s_fts_index_table_free_one_fetch(
-/*===============================*/
- ib_vector_t* words) /*!< in: words fetched */
-{
- for (ulint i = 0; i < ib_vector_size(words); i++) {
- fts_word_t* word;
-
- word = static_cast<fts_word_t*>(ib_vector_get(words, i));
-
- for (ulint j = 0; j < ib_vector_size(word->nodes); j++) {
- fts_node_t* node;
-
- node = static_cast<fts_node_t*> (ib_vector_get(
- word->nodes, j));
- ut_free(node->ilist);
- }
-
- fts_word_free(word);
- }
-
- ib_vector_reset(words);
-}
-
-/*******************************************************************//**
-Go through words, fill INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_index_table_fill_one_fetch(
-/*===============================*/
- CHARSET_INFO* index_charset, /*!< in: FTS index charset */
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- ib_vector_t* words, /*!< in: words fetched */
- fts_string_t* conv_str, /*!< in: string for conversion*/
- bool has_more) /*!< in: has more to fetch */
-{
- TABLE* table = (TABLE*) tables->table;
- Field** fields;
- uint dummy_errors;
- char* word_str;
- ulint words_size;
- int ret = 0;
-
- DBUG_ENTER("i_s_fts_index_table_fill_one_fetch");
-
- fields = table->field;
-
- words_size = ib_vector_size(words);
- if (has_more) {
- /* the last word is not fetched completely. */
- ut_ad(words_size > 1);
- words_size -= 1;
- }
-
- /* Go through each word in the index cache */
- for (ulint i = 0; i < words_size; i++) {
- fts_word_t* word;
-
- word = static_cast<fts_word_t*>(ib_vector_get(words, i));
-
- word->text.f_str[word->text.f_len] = 0;
-
- /* Convert word from index charset to system_charset_info */
- if (index_charset->cset != system_charset_info->cset) {
- conv_str->f_n_char = my_convert(
- reinterpret_cast<char*>(conv_str->f_str),
- static_cast<uint32>(conv_str->f_len),
- system_charset_info,
- reinterpret_cast<char*>(word->text.f_str),
- static_cast<uint32>(word->text.f_len),
- index_charset, &dummy_errors);
- ut_ad(conv_str->f_n_char <= conv_str->f_len);
- conv_str->f_str[conv_str->f_n_char] = 0;
- word_str = reinterpret_cast<char*>(conv_str->f_str);
- } else {
- word_str = reinterpret_cast<char*>(word->text.f_str);
- }
-
- /* Decrypt the ilist, and display Dod ID and word position */
- for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
- fts_node_t* node;
- byte* ptr;
- ulint decoded = 0;
- doc_id_t doc_id = 0;
-
- node = static_cast<fts_node_t*> (ib_vector_get(
- word->nodes, i));
-
- ptr = node->ilist;
-
- while (decoded < node->ilist_size) {
- ulint pos = fts_decode_vlc(&ptr);
-
- doc_id += pos;
-
- /* Get position info */
- while (*ptr) {
- pos = fts_decode_vlc(&ptr);
-
- OK(field_store_string(
- fields[I_S_FTS_WORD],
- word_str));
-
- OK(fields[I_S_FTS_FIRST_DOC_ID]->store(
- (longlong) node->first_doc_id,
- true));
-
- OK(fields[I_S_FTS_LAST_DOC_ID]->store(
- (longlong) node->last_doc_id,
- true));
-
- OK(fields[I_S_FTS_DOC_COUNT]->store(
- static_cast<double>(node->doc_count)));
-
- OK(fields[I_S_FTS_ILIST_DOC_ID]->store(
- (longlong) doc_id, true));
-
- OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
- static_cast<double>(pos)));
-
- OK(schema_table_store_record(
- thd, table));
- }
-
- ++ptr;
-
- decoded = ptr - (byte*) node->ilist;
- }
- }
- }
-
- DBUG_RETURN(ret);
-}
-
-/*******************************************************************//**
-Go through a FTS index and its auxiliary tables, fetch rows in each table
-and fill INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_index_table_fill_one_index(
-/*===============================*/
- dict_index_t* index, /*!< in: FTS index */
- THD* thd, /*!< in: thread */
- fts_string_t* conv_str, /*!< in/out: buffer */
- TABLE_LIST* tables) /*!< in/out: tables to fill */
-{
- ib_vector_t* words;
- mem_heap_t* heap;
- fts_string_t word;
- CHARSET_INFO* index_charset;
- dberr_t error;
- int ret = 0;
-
- DBUG_ENTER("i_s_fts_index_table_fill_one_index");
- DBUG_ASSERT(!dict_index_is_online_ddl(index));
-
- heap = mem_heap_create(1024);
-
- words = ib_vector_create(ib_heap_allocator_create(heap),
- sizeof(fts_word_t), 256);
-
- word.f_str = NULL;
- word.f_len = 0;
- word.f_n_char = 0;
-
- index_charset = fts_index_get_charset(index);
-
- /* Iterate through each auxiliary table as described in
- fts_index_selector */
- for (ulint selected = 0; fts_index_selector[selected].value;
- selected++) {
- bool has_more = false;
-
- do {
- /* Fetch from index */
- error = i_s_fts_index_table_fill_selected(
- index, words, selected, &word);
-
- if (error == DB_SUCCESS) {
- has_more = false;
- } else if (error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT) {
- has_more = true;
- } else {
- i_s_fts_index_table_free_one_fetch(words);
- ret = 1;
- goto func_exit;
- }
-
- if (has_more) {
- fts_word_t* last_word;
-
- /* Prepare start point for next fetch */
- last_word = static_cast<fts_word_t*>(ib_vector_last(words));
- ut_ad(last_word != NULL);
- fts_utf8_string_dup(&word, &last_word->text, heap);
- }
-
- /* Fill into tables */
- ret = i_s_fts_index_table_fill_one_fetch(
- index_charset, thd, tables, words, conv_str,
- has_more);
- i_s_fts_index_table_free_one_fetch(words);
-
- if (ret != 0) {
- goto func_exit;
- }
- } while (has_more);
- }
-
-func_exit:
- mem_heap_free(heap);
-
- DBUG_RETURN(ret);
-}
-/*******************************************************************//**
-Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_index_table_fill(
-/*=====================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- dict_table_t* user_table;
- dict_index_t* index;
-
- DBUG_ENTER("i_s_fts_index_table_fill");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- if (!fts_internal_tbl_name) {
- DBUG_RETURN(0);
- }
-
- /* Prevent DDL to drop fts aux tables. */
- rw_lock_s_lock(&dict_operation_lock);
-
- user_table = dict_table_open_on_name(
- fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
-
- if (!user_table) {
- rw_lock_s_unlock(&dict_operation_lock);
-
- DBUG_RETURN(0);
- }
-
- int ret = 0;
- fts_string_t conv_str;
- conv_str.f_len = system_charset_info->mbmaxlen
- * FTS_MAX_WORD_LEN_IN_CHAR;
- conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len));
-
- for (index = dict_table_get_first_index(user_table);
- index; index = dict_table_get_next_index(index)) {
- if (index->type & DICT_FTS) {
- BREAK_IF(ret = i_s_fts_index_table_fill_one_index(
- index, thd, &conv_str, tables));
- }
- }
-
- dict_table_close(user_table, FALSE, FALSE);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- ut_free(conv_str.f_str);
-
- DBUG_RETURN(ret);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_TABLE
-@return 0 on success */
-static
-int
-i_s_fts_index_table_init(
-/*=====================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_fts_index_table_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_fts_index_fields_info;
- schema->fill_table = i_s_fts_index_table_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_index_table =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_FT_INDEX_TABLE"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "INNODB AUXILIARY FTS INDEX TABLE"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_fts_index_table_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG */
-static ST_FIELD_INFO i_s_fts_config_fields_info[] =
-{
-#define FTS_CONFIG_KEY 0
- {STRUCT_FLD(field_name, "KEY"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define FTS_CONFIG_VALUE 1
- {STRUCT_FLD(field_name, "VALUE"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-static const char* fts_config_key[] = {
- FTS_OPTIMIZE_LIMIT_IN_SECS,
- FTS_SYNCED_DOC_ID,
- FTS_STOPWORD_TABLE_NAME,
- FTS_USE_STOPWORD,
- NULL
-};
-
-/*******************************************************************//**
-Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG
-@return 0 on success, 1 on failure */
-static
-int
-i_s_fts_config_fill(
-/*================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- Field** fields;
- TABLE* table = (TABLE*) tables->table;
- trx_t* trx;
- fts_table_t fts_table;
- dict_table_t* user_table;
- ulint i = 0;
- dict_index_t* index = NULL;
- unsigned char str[FTS_MAX_CONFIG_VALUE_LEN + 1];
-
- DBUG_ENTER("i_s_fts_config_fill");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- if (!fts_internal_tbl_name) {
- DBUG_RETURN(0);
- }
-
- DEBUG_SYNC_C("i_s_fts_config_fille_check");
-
- fields = table->field;
-
- /* Prevent DDL to drop fts aux tables. */
- rw_lock_s_lock(&dict_operation_lock);
-
- user_table = dict_table_open_on_name(
- fts_internal_tbl_name, FALSE, FALSE, DICT_ERR_IGNORE_NONE);
-
- if (!user_table) {
- rw_lock_s_unlock(&dict_operation_lock);
-
- DBUG_RETURN(0);
- } else if (!dict_table_has_fts_index(user_table)) {
- dict_table_close(user_table, FALSE, FALSE);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- DBUG_RETURN(0);
- }
-
- trx = trx_allocate_for_background();
- trx->op_info = "Select for FTS CONFIG TABLE";
-
- FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, user_table);
-
- if (!ib_vector_is_empty(user_table->fts->indexes)) {
- index = (dict_index_t*) ib_vector_getp_const(
- user_table->fts->indexes, 0);
- DBUG_ASSERT(!dict_index_is_online_ddl(index));
- }
-
- int ret = 0;
-
- while (fts_config_key[i]) {
- fts_string_t value;
- char* key_name;
- ulint allocated = FALSE;
-
- value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
-
- value.f_str = str;
-
- if (index
- && strcmp(fts_config_key[i], FTS_TOTAL_WORD_COUNT) == 0) {
- key_name = fts_config_create_index_param_name(
- fts_config_key[i], index);
- allocated = TRUE;
- } else {
- key_name = (char*) fts_config_key[i];
- }
-
- fts_config_get_value(trx, &fts_table, key_name, &value);
-
- if (allocated) {
- ut_free(key_name);
- }
-
- BREAK_IF(ret = field_store_string(
- fields[FTS_CONFIG_KEY], fts_config_key[i]));
-
- BREAK_IF(ret = field_store_string(
- fields[FTS_CONFIG_VALUE],
- reinterpret_cast<const char*>(value.f_str)));
-
- BREAK_IF(ret = schema_table_store_record(thd, table));
-
- i++;
- }
-
- fts_sql_commit(trx);
-
- trx_free_for_background(trx);
-
- dict_table_close(user_table, FALSE, FALSE);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- DBUG_RETURN(ret);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_FT_CONFIG
-@return 0 on success */
-static
-int
-i_s_fts_config_init(
-/*=================*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_fts_config_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_fts_config_fields_info;
- schema->fill_table = i_s_fts_config_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_ft_config =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_FT_CONFIG"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "INNODB AUXILIARY FTS CONFIG TABLE"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_fts_config_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/* Fields of the dynamic table INNODB_BUFFER_POOL_STATS. */
-static ST_FIELD_INFO i_s_innodb_buffer_stats_fields_info[] =
-{
-#define IDX_BUF_STATS_POOL_ID 0
- {STRUCT_FLD(field_name, "POOL_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_POOL_SIZE 1
- {STRUCT_FLD(field_name, "POOL_SIZE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_FREE_BUFFERS 2
- {STRUCT_FLD(field_name, "FREE_BUFFERS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_LRU_LEN 3
- {STRUCT_FLD(field_name, "DATABASE_PAGES"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_OLD_LRU_LEN 4
- {STRUCT_FLD(field_name, "OLD_DATABASE_PAGES"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_FLUSH_LIST_LEN 5
- {STRUCT_FLD(field_name, "MODIFIED_DATABASE_PAGES"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PENDING_ZIP 6
- {STRUCT_FLD(field_name, "PENDING_DECOMPRESS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PENDING_READ 7
- {STRUCT_FLD(field_name, "PENDING_READS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_FLUSH_LRU 8
- {STRUCT_FLD(field_name, "PENDING_FLUSH_LRU"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_FLUSH_LIST 9
- {STRUCT_FLD(field_name, "PENDING_FLUSH_LIST"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_YOUNG 10
- {STRUCT_FLD(field_name, "PAGES_MADE_YOUNG"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_NOT_YOUNG 11
- {STRUCT_FLD(field_name, "PAGES_NOT_MADE_YOUNG"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_YOUNG_RATE 12
- {STRUCT_FLD(field_name, "PAGES_MADE_YOUNG_RATE"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE 13
- {STRUCT_FLD(field_name, "PAGES_MADE_NOT_YOUNG_RATE"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_READ 14
- {STRUCT_FLD(field_name, "NUMBER_PAGES_READ"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_CREATED 15
- {STRUCT_FLD(field_name, "NUMBER_PAGES_CREATED"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_WRITTEN 16
- {STRUCT_FLD(field_name, "NUMBER_PAGES_WRITTEN"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_READ_RATE 17
- {STRUCT_FLD(field_name, "PAGES_READ_RATE"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_CREATE_RATE 18
- {STRUCT_FLD(field_name, "PAGES_CREATE_RATE"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_PAGE_WRITTEN_RATE 19
- {STRUCT_FLD(field_name, "PAGES_WRITTEN_RATE"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_GET 20
- {STRUCT_FLD(field_name, "NUMBER_PAGES_GET"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_HIT_RATE 21
- {STRUCT_FLD(field_name, "HIT_RATE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_MADE_YOUNG_PCT 22
- {STRUCT_FLD(field_name, "YOUNG_MAKE_PER_THOUSAND_GETS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_NOT_MADE_YOUNG_PCT 23
- {STRUCT_FLD(field_name, "NOT_YOUNG_MAKE_PER_THOUSAND_GETS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_READ_AHREAD 24
- {STRUCT_FLD(field_name, "NUMBER_PAGES_READ_AHEAD"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_READ_AHEAD_EVICTED 25
- {STRUCT_FLD(field_name, "NUMBER_READ_AHEAD_EVICTED"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_READ_AHEAD_RATE 26
- {STRUCT_FLD(field_name, "READ_AHEAD_RATE"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_READ_AHEAD_EVICT_RATE 27
- {STRUCT_FLD(field_name, "READ_AHEAD_EVICTED_RATE"),
- STRUCT_FLD(field_length, MAX_FLOAT_STR_LENGTH),
- STRUCT_FLD(field_type, MYSQL_TYPE_FLOAT),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_LRU_IO_SUM 28
- {STRUCT_FLD(field_name, "LRU_IO_TOTAL"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_LRU_IO_CUR 29
- {STRUCT_FLD(field_name, "LRU_IO_CURRENT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_UNZIP_SUM 30
- {STRUCT_FLD(field_name, "UNCOMPRESS_TOTAL"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_STATS_UNZIP_CUR 31
- {STRUCT_FLD(field_name, "UNCOMPRESS_CURRENT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Fill Information Schema table INNODB_BUFFER_POOL_STATS for a particular
-buffer pool
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_stats_fill(
-/*==================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- const buf_pool_info_t* info) /*!< in: buffer pool
- information */
-{
- TABLE* table;
- Field** fields;
-
- DBUG_ENTER("i_s_innodb_stats_fill");
-
- table = tables->table;
-
- fields = table->field;
-
- OK(fields[IDX_BUF_STATS_POOL_ID]->store(
- static_cast<double>(info->pool_unique_id)));
-
- OK(fields[IDX_BUF_STATS_POOL_SIZE]->store(
- static_cast<double>(info->pool_size)));
-
- OK(fields[IDX_BUF_STATS_LRU_LEN]->store(
- static_cast<double>(info->lru_len)));
-
- OK(fields[IDX_BUF_STATS_OLD_LRU_LEN]->store(
- static_cast<double>(info->old_lru_len)));
-
- OK(fields[IDX_BUF_STATS_FREE_BUFFERS]->store(
- static_cast<double>(info->free_list_len)));
-
- OK(fields[IDX_BUF_STATS_FLUSH_LIST_LEN]->store(
- static_cast<double>(info->flush_list_len)));
-
- OK(fields[IDX_BUF_STATS_PENDING_ZIP]->store(
- static_cast<double>(info->n_pend_unzip)));
-
- OK(fields[IDX_BUF_STATS_PENDING_READ]->store(
- static_cast<double>(info->n_pend_reads)));
-
- OK(fields[IDX_BUF_STATS_FLUSH_LRU]->store(
- static_cast<double>(info->n_pending_flush_lru)));
-
- OK(fields[IDX_BUF_STATS_FLUSH_LIST]->store(
- static_cast<double>(info->n_pending_flush_list)));
-
- OK(fields[IDX_BUF_STATS_PAGE_YOUNG]->store(
- static_cast<double>(info->n_pages_made_young)));
-
- OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG]->store(
- static_cast<double>(info->n_pages_not_made_young)));
-
- OK(fields[IDX_BUF_STATS_PAGE_YOUNG_RATE]->store(
- info->page_made_young_rate));
-
- OK(fields[IDX_BUF_STATS_PAGE_NOT_YOUNG_RATE]->store(
- info->page_not_made_young_rate));
-
- OK(fields[IDX_BUF_STATS_PAGE_READ]->store(
- static_cast<double>(info->n_pages_read)));
-
- OK(fields[IDX_BUF_STATS_PAGE_CREATED]->store(
- static_cast<double>(info->n_pages_created)));
-
- OK(fields[IDX_BUF_STATS_PAGE_WRITTEN]->store(
- static_cast<double>(info->n_pages_written)));
-
- OK(fields[IDX_BUF_STATS_GET]->store(
- static_cast<double>(info->n_page_gets)));
-
- OK(fields[IDX_BUF_STATS_PAGE_READ_RATE]->store(
- info->pages_read_rate));
-
- OK(fields[IDX_BUF_STATS_PAGE_CREATE_RATE]->store(
- info->pages_created_rate));
-
- OK(fields[IDX_BUF_STATS_PAGE_WRITTEN_RATE]->store(
- info->pages_written_rate));
-
- if (info->n_page_get_delta) {
- if (info->page_read_delta <= info->n_page_get_delta) {
- OK(fields[IDX_BUF_STATS_HIT_RATE]->store(
- static_cast<double>(
- 1000 - (1000 * info->page_read_delta
- / info->n_page_get_delta))));
- } else {
- OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0));
- }
-
- OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(
- static_cast<double>(
- 1000 * info->young_making_delta
- / info->n_page_get_delta)));
-
- OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(
- static_cast<double>(
- 1000 * info->not_young_making_delta
- / info->n_page_get_delta)));
- } else {
- OK(fields[IDX_BUF_STATS_HIT_RATE]->store(0));
- OK(fields[IDX_BUF_STATS_MADE_YOUNG_PCT]->store(0));
- OK(fields[IDX_BUF_STATS_NOT_MADE_YOUNG_PCT]->store(0));
- }
-
- OK(fields[IDX_BUF_STATS_READ_AHREAD]->store(
- static_cast<double>(info->n_ra_pages_read)));
-
- OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICTED]->store(
- static_cast<double>(info->n_ra_pages_evicted)));
-
- OK(fields[IDX_BUF_STATS_READ_AHEAD_RATE]->store(
- info->pages_readahead_rate));
-
- OK(fields[IDX_BUF_STATS_READ_AHEAD_EVICT_RATE]->store(
- info->pages_evicted_rate));
-
- OK(fields[IDX_BUF_STATS_LRU_IO_SUM]->store(
- static_cast<double>(info->io_sum)));
-
- OK(fields[IDX_BUF_STATS_LRU_IO_CUR]->store(
- static_cast<double>(info->io_cur)));
-
- OK(fields[IDX_BUF_STATS_UNZIP_SUM]->store(
- static_cast<double>(info->unzip_sum)));
-
- OK(fields[IDX_BUF_STATS_UNZIP_CUR]->store(
- static_cast<double>(info->unzip_cur)));
-
- DBUG_RETURN(schema_table_store_record(thd, table));
-}
-
-/*******************************************************************//**
-This is the function that loops through each buffer pool and fetch buffer
-pool stats to information schema table: I_S_INNODB_BUFFER_POOL_STATS
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_buffer_stats_fill_table(
-/*===============================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- int status = 0;
- buf_pool_info_t* pool_info;
-
- DBUG_ENTER("i_s_innodb_buffer_fill_general");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* Only allow the PROCESS privilege holder to access the stats */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- pool_info = (buf_pool_info_t*) mem_zalloc(
- srv_buf_pool_instances * sizeof *pool_info);
-
- /* Walk through each buffer pool */
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- /* Fetch individual buffer pool info */
- buf_stats_get_pool_info(buf_pool, i, pool_info);
-
- status = i_s_innodb_stats_fill(thd, tables, &pool_info[i]);
-
- /* If something goes wrong, break and return */
- if (status) {
- break;
- }
- }
-
- mem_free(pool_info);
-
- DBUG_RETURN(status);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_POOL_STATS.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_buffer_pool_stats_init(
-/*==============================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("i_s_innodb_buffer_pool_stats_init");
-
- schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p);
-
- schema->fields_info = i_s_innodb_buffer_stats_fields_info;
- schema->fill_table = i_s_innodb_buffer_stats_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_stats =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_BUFFER_POOL_STATS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB Buffer Pool Statistics Information "),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_innodb_buffer_pool_stats_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/* Fields of the dynamic table INNODB_BUFFER_POOL_PAGE. */
-static ST_FIELD_INFO i_s_innodb_buffer_page_fields_info[] =
-{
-#define IDX_BUFFER_POOL_ID 0
- {STRUCT_FLD(field_name, "POOL_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_BLOCK_ID 1
- {STRUCT_FLD(field_name, "BLOCK_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_SPACE 2
- {STRUCT_FLD(field_name, "SPACE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_NUM 3
- {STRUCT_FLD(field_name, "PAGE_NUMBER"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_TYPE 4
- {STRUCT_FLD(field_name, "PAGE_TYPE"),
- STRUCT_FLD(field_length, 64),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_FLUSH_TYPE 5
- {STRUCT_FLD(field_name, "FLUSH_TYPE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_FIX_COUNT 6
- {STRUCT_FLD(field_name, "FIX_COUNT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_HASHED 7
- {STRUCT_FLD(field_name, "IS_HASHED"),
- STRUCT_FLD(field_length, 3),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_NEWEST_MOD 8
- {STRUCT_FLD(field_name, "NEWEST_MODIFICATION"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_OLDEST_MOD 9
- {STRUCT_FLD(field_name, "OLDEST_MODIFICATION"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_ACCESS_TIME 10
- {STRUCT_FLD(field_name, "ACCESS_TIME"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_TABLE_NAME 11
- {STRUCT_FLD(field_name, "TABLE_NAME"),
- STRUCT_FLD(field_length, 1024),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_INDEX_NAME 12
- {STRUCT_FLD(field_name, "INDEX_NAME"),
- STRUCT_FLD(field_length, 1024),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_NUM_RECS 13
- {STRUCT_FLD(field_name, "NUMBER_RECORDS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_DATA_SIZE 14
- {STRUCT_FLD(field_name, "DATA_SIZE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_ZIP_SIZE 15
- {STRUCT_FLD(field_name, "COMPRESSED_SIZE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_STATE 16
- {STRUCT_FLD(field_name, "PAGE_STATE"),
- STRUCT_FLD(field_length, 64),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_IO_FIX 17
- {STRUCT_FLD(field_name, "IO_FIX"),
- STRUCT_FLD(field_length, 64),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_IS_OLD 18
- {STRUCT_FLD(field_name, "IS_OLD"),
- STRUCT_FLD(field_length, 3),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUFFER_PAGE_FREE_CLOCK 19
- {STRUCT_FLD(field_name, "FREE_PAGE_CLOCK"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Fill Information Schema table INNODB_BUFFER_PAGE with information
-cached in the buf_page_info_t array
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_buffer_page_fill(
-/*========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- const buf_page_info_t* info_array, /*!< in: array cached page
- info */
- ulint num_page) /*!< in: number of page info
- cached */
-{
- TABLE* table;
- Field** fields;
-
- DBUG_ENTER("i_s_innodb_buffer_page_fill");
-
- table = tables->table;
-
- fields = table->field;
-
- /* Iterate through the cached array and fill the I_S table rows */
- for (ulint i = 0; i < num_page; i++) {
- const buf_page_info_t* page_info;
- char table_name[MAX_FULL_NAME_LEN + 1];
- const char* table_name_end = NULL;
- const char* state_str;
- enum buf_page_state state;
-
- page_info = info_array + i;
-
- state_str = NULL;
-
- OK(fields[IDX_BUFFER_POOL_ID]->store(
- page_info->pool_id, true));
-
- OK(fields[IDX_BUFFER_BLOCK_ID]->store(
- page_info->block_id, true));
-
- OK(fields[IDX_BUFFER_PAGE_SPACE]->store(
- page_info->space_id, true));
-
- OK(fields[IDX_BUFFER_PAGE_NUM]->store(
- page_info->page_num, true));
-
- OK(field_store_string(
- fields[IDX_BUFFER_PAGE_TYPE],
- i_s_page_type[page_info->page_type].type_str));
-
- OK(fields[IDX_BUFFER_PAGE_FLUSH_TYPE]->store(
- page_info->flush_type, true));
-
- OK(fields[IDX_BUFFER_PAGE_FIX_COUNT]->store(
- page_info->fix_count, true));
-
- OK(field_store_string(fields[IDX_BUFFER_PAGE_HASHED],
- page_info->hashed ? "YES" : "NO"));
-
- OK(fields[IDX_BUFFER_PAGE_NEWEST_MOD]->store(
- (longlong) page_info->newest_mod, true));
-
- OK(fields[IDX_BUFFER_PAGE_OLDEST_MOD]->store(
- (longlong) page_info->oldest_mod, true));
-
- OK(fields[IDX_BUFFER_PAGE_ACCESS_TIME]->store(
- page_info->access_time, true));
-
- fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_null();
-
- fields[IDX_BUFFER_PAGE_INDEX_NAME]->set_null();
-
- /* If this is an index page, fetch the index name
- and table name */
- if (page_info->page_type == I_S_PAGE_TYPE_INDEX) {
- bool ret = false;
-
- mutex_enter(&dict_sys->mutex);
-
- if (const dict_index_t* index =
- dict_index_get_if_in_cache_low(
- page_info->index_id)) {
- table_name_end = innobase_convert_name(
- table_name, sizeof(table_name),
- index->table_name,
- strlen(index->table_name),
- thd, TRUE);
-
- ret = fields[IDX_BUFFER_PAGE_TABLE_NAME]
- ->store(table_name,
- static_cast<uint>(
- table_name_end
- - table_name),
- system_charset_info)
- || field_store_index_name(
- fields
- [IDX_BUFFER_PAGE_INDEX_NAME],
- index->name);
- }
-
- mutex_exit(&dict_sys->mutex);
-
- OK(ret);
-
- fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_notnull();
- }
-
- OK(fields[IDX_BUFFER_PAGE_NUM_RECS]->store(
- page_info->num_recs, true));
-
- OK(fields[IDX_BUFFER_PAGE_DATA_SIZE]->store(
- page_info->data_size, true));
-
- OK(fields[IDX_BUFFER_PAGE_ZIP_SIZE]->store(
- page_info->zip_ssize
- ? (UNIV_ZIP_SIZE_MIN >> 1) << page_info->zip_ssize
- : 0, true));
-
-#if BUF_PAGE_STATE_BITS > 3
-# error "BUF_PAGE_STATE_BITS > 3, please ensure that all 1<<BUF_PAGE_STATE_BITS values are checked for"
-#endif
- state = static_cast<enum buf_page_state>(page_info->page_state);
-
- switch (state) {
- /* First three states are for compression pages and
- are not states we would get as we scan pages through
- buffer blocks */
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- state_str = NULL;
- break;
- case BUF_BLOCK_NOT_USED:
- state_str = "NOT_USED";
- break;
- case BUF_BLOCK_READY_FOR_USE:
- state_str = "READY_FOR_USE";
- break;
- case BUF_BLOCK_FILE_PAGE:
- state_str = "FILE_PAGE";
- break;
- case BUF_BLOCK_MEMORY:
- state_str = "MEMORY";
- break;
- case BUF_BLOCK_REMOVE_HASH:
- state_str = "REMOVE_HASH";
- break;
- };
-
- OK(field_store_string(fields[IDX_BUFFER_PAGE_STATE],
- state_str));
-
- switch (page_info->io_fix) {
- case BUF_IO_NONE:
- state_str = "IO_NONE";
- break;
- case BUF_IO_READ:
- state_str = "IO_READ";
- break;
- case BUF_IO_WRITE:
- state_str = "IO_WRITE";
- break;
- case BUF_IO_PIN:
- state_str = "IO_PIN";
- break;
- }
-
- OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX],
- state_str));
-
- OK(field_store_string(fields[IDX_BUFFER_PAGE_IS_OLD],
- (page_info->is_old) ? "YES" : "NO"));
-
- OK(fields[IDX_BUFFER_PAGE_FREE_CLOCK]->store(
- page_info->freed_page_clock));
-
- OK(schema_table_store_record(thd, table));
- }
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Set appropriate page type to a buf_page_info_t structure */
-static
-void
-i_s_innodb_set_page_type(
-/*=====================*/
- buf_page_info_t*page_info, /*!< in/out: structure to fill with
- scanned info */
- ulint page_type, /*!< in: page type */
- const byte* frame) /*!< in: buffer frame */
-{
- if (page_type == FIL_PAGE_INDEX) {
- const page_t* page = (const page_t*) frame;
-
- page_info->index_id = btr_page_get_index_id(page);
-
- /* FIL_PAGE_INDEX is a bit special, its value
- is defined as 17855, so we cannot use FIL_PAGE_INDEX
- to index into i_s_page_type[] array, its array index
- in the i_s_page_type[] array is I_S_PAGE_TYPE_INDEX
- (1) for index pages or I_S_PAGE_TYPE_IBUF for
- change buffer index pages */
- if (page_info->index_id
- == static_cast<index_id_t>(DICT_IBUF_ID_MIN
- + IBUF_SPACE_ID)) {
- page_info->page_type = I_S_PAGE_TYPE_IBUF;
- } else {
- page_info->page_type = I_S_PAGE_TYPE_INDEX;
- }
-
- page_info->data_size = (ulint)(page_header_get_field(
- page, PAGE_HEAP_TOP) - (page_is_comp(page)
- ? PAGE_NEW_SUPREMUM_END
- : PAGE_OLD_SUPREMUM_END)
- - page_header_get_field(page, PAGE_GARBAGE));
-
- page_info->num_recs = page_get_n_recs(page);
- } else if (page_type > FIL_PAGE_TYPE_LAST) {
- /* Encountered an unknown page type */
- page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
- } else {
- /* Make sure we get the right index into the
- i_s_page_type[] array */
- ut_a(page_type == i_s_page_type[page_type].type_value);
-
- page_info->page_type = page_type;
- }
-
- if (page_info->page_type == FIL_PAGE_TYPE_ZBLOB
- || page_info->page_type == FIL_PAGE_TYPE_ZBLOB2) {
- page_info->page_num = mach_read_from_4(
- frame + FIL_PAGE_OFFSET);
- page_info->space_id = mach_read_from_4(
- frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- }
-}
-/*******************************************************************//**
-Scans pages in the buffer cache, and collect their general information
-into the buf_page_info_t array which is zero-filled. So any fields
-that are not initialized in the function will default to 0 */
-static
-void
-i_s_innodb_buffer_page_get_info(
-/*============================*/
- const buf_page_t*bpage, /*!< in: buffer pool page to scan */
- ulint pool_id, /*!< in: buffer pool id */
- ulint pos, /*!< in: buffer block position in
- buffer pool or in the LRU list */
- buf_page_info_t*page_info) /*!< in: zero filled info structure;
- out: structure filled with scanned
- info */
-{
- ib_mutex_t* mutex = buf_page_get_mutex(bpage);
-
- ut_ad(pool_id < MAX_BUFFER_POOLS);
-
- page_info->pool_id = pool_id;
-
- page_info->block_id = pos;
-
- mutex_enter(mutex);
-
- page_info->page_state = buf_page_get_state(bpage);
-
- /* Only fetch information for buffers that map to a tablespace,
- that is, buffer page with state BUF_BLOCK_ZIP_PAGE,
- BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_FILE_PAGE */
- if (buf_page_in_file(bpage)) {
- const byte* frame;
- ulint page_type;
-
- page_info->space_id = buf_page_get_space(bpage);
-
- page_info->page_num = buf_page_get_page_no(bpage);
-
- page_info->flush_type = bpage->flush_type;
-
- page_info->fix_count = bpage->buf_fix_count;
-
- page_info->newest_mod = bpage->newest_modification;
-
- page_info->oldest_mod = bpage->oldest_modification;
-
- page_info->access_time = bpage->access_time;
-
- page_info->zip_ssize = bpage->zip.ssize;
-
- page_info->io_fix = bpage->io_fix;
-
- page_info->is_old = bpage->old;
-
- page_info->freed_page_clock = bpage->freed_page_clock;
-
- switch (buf_page_get_io_fix(bpage)) {
- case BUF_IO_NONE:
- case BUF_IO_WRITE:
- case BUF_IO_PIN:
- break;
- case BUF_IO_READ:
- page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
- mutex_exit(mutex);
- return;
- }
-
- if (page_info->page_state == BUF_BLOCK_FILE_PAGE) {
- const buf_block_t*block;
-
- block = reinterpret_cast<const buf_block_t*>(bpage);
- frame = block->frame;
- page_info->hashed = (block->index != NULL);
- } else {
- ut_ad(page_info->zip_ssize);
- frame = bpage->zip.data;
- }
-
- page_type = fil_page_get_type(frame);
-
- i_s_innodb_set_page_type(page_info, page_type, frame);
- } else {
- page_info->page_type = I_S_PAGE_TYPE_UNKNOWN;
- }
-
- mutex_exit(mutex);
-}
-
-/*******************************************************************//**
-This is the function that goes through each block of the buffer pool
-and fetch information to information schema tables: INNODB_BUFFER_PAGE.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_fill_buffer_pool(
-/*========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- buf_pool_t* buf_pool, /*!< in: buffer pool to scan */
- const ulint pool_id) /*!< in: buffer pool id */
-{
- int status = 0;
- mem_heap_t* heap;
-
- DBUG_ENTER("i_s_innodb_fill_buffer_pool");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- heap = mem_heap_create(10000);
-
- /* Go through each chunk of buffer pool. Currently, we only
- have one single chunk for each buffer pool */
- for (ulint n = 0; n < buf_pool->n_chunks; n++) {
- const buf_block_t* block;
- ulint n_blocks;
- buf_page_info_t* info_buffer;
- ulint num_page;
- ulint mem_size;
- ulint chunk_size;
- ulint num_to_process = 0;
- ulint block_id = 0;
-
- /* Get buffer block of the nth chunk */
- block = buf_get_nth_chunk_block(buf_pool, n, &chunk_size);
- num_page = 0;
-
- while (chunk_size > 0) {
- /* we cache maximum MAX_BUF_INFO_CACHED number of
- buffer page info */
- num_to_process = ut_min(chunk_size,
- MAX_BUF_INFO_CACHED);
-
- mem_size = num_to_process * sizeof(buf_page_info_t);
-
- /* For each chunk, we'll pre-allocate information
- structures to cache the page information read from
- the buffer pool. Doing so before obtain any mutex */
- info_buffer = (buf_page_info_t*) mem_heap_zalloc(
- heap, mem_size);
-
- /* GO through each block in the chunk */
- for (n_blocks = num_to_process; n_blocks--; block++) {
- i_s_innodb_buffer_page_get_info(
- &block->page, pool_id, block_id,
- info_buffer + num_page);
- block_id++;
- num_page++;
- }
-
- /* Fill in information schema table with information
- just collected from the buffer chunk scan */
- status = i_s_innodb_buffer_page_fill(
- thd, tables, info_buffer,
- num_page);
-
- /* If something goes wrong, break and return */
- if (status) {
- break;
- }
-
- mem_heap_empty(heap);
- chunk_size -= num_to_process;
- num_page = 0;
- }
- }
-
- mem_heap_free(heap);
-
- DBUG_RETURN(status);
-}
-
-/*******************************************************************//**
-Fill page information for pages in InnoDB buffer pool to the
-dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_buffer_page_fill_table(
-/*==============================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- int status = 0;
-
- DBUG_ENTER("i_s_innodb_buffer_page_fill_table");
-
- /* deny access to user without PROCESS privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- /* Walk through each buffer pool */
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- /* Fetch information from pages in this buffer pool,
- and fill the corresponding I_S table */
- status = i_s_innodb_fill_buffer_pool(thd, tables, buf_pool, i);
-
- /* If something wrong, break and return */
- if (status) {
- break;
- }
- }
-
- DBUG_RETURN(status);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_buffer_page_init(
-/*========================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("i_s_innodb_buffer_page_init");
-
- schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p);
-
- schema->fields_info = i_s_innodb_buffer_page_fields_info;
- schema->fill_table = i_s_innodb_buffer_page_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_BUFFER_PAGE"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB Buffer Page Information"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_innodb_buffer_page_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-static ST_FIELD_INFO i_s_innodb_buf_page_lru_fields_info[] =
-{
-#define IDX_BUF_LRU_POOL_ID 0
- {STRUCT_FLD(field_name, "POOL_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_POS 1
- {STRUCT_FLD(field_name, "LRU_POSITION"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_SPACE 2
- {STRUCT_FLD(field_name, "SPACE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_NUM 3
- {STRUCT_FLD(field_name, "PAGE_NUMBER"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_TYPE 4
- {STRUCT_FLD(field_name, "PAGE_TYPE"),
- STRUCT_FLD(field_length, 64),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_FLUSH_TYPE 5
- {STRUCT_FLD(field_name, "FLUSH_TYPE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_FIX_COUNT 6
- {STRUCT_FLD(field_name, "FIX_COUNT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_HASHED 7
- {STRUCT_FLD(field_name, "IS_HASHED"),
- STRUCT_FLD(field_length, 3),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_NEWEST_MOD 8
- {STRUCT_FLD(field_name, "NEWEST_MODIFICATION"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_OLDEST_MOD 9
- {STRUCT_FLD(field_name, "OLDEST_MODIFICATION"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_ACCESS_TIME 10
- {STRUCT_FLD(field_name, "ACCESS_TIME"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_TABLE_NAME 11
- {STRUCT_FLD(field_name, "TABLE_NAME"),
- STRUCT_FLD(field_length, 1024),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_INDEX_NAME 12
- {STRUCT_FLD(field_name, "INDEX_NAME"),
- STRUCT_FLD(field_length, 1024),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_NUM_RECS 13
- {STRUCT_FLD(field_name, "NUMBER_RECORDS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_DATA_SIZE 14
- {STRUCT_FLD(field_name, "DATA_SIZE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_ZIP_SIZE 15
- {STRUCT_FLD(field_name, "COMPRESSED_SIZE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_STATE 16
- {STRUCT_FLD(field_name, "COMPRESSED"),
- STRUCT_FLD(field_length, 3),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_IO_FIX 17
- {STRUCT_FLD(field_name, "IO_FIX"),
- STRUCT_FLD(field_length, 64),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_IS_OLD 18
- {STRUCT_FLD(field_name, "IS_OLD"),
- STRUCT_FLD(field_length, 3),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define IDX_BUF_LRU_PAGE_FREE_CLOCK 19
- {STRUCT_FLD(field_name, "FREE_PAGE_CLOCK"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Fill Information Schema table INNODB_BUFFER_PAGE_LRU with information
-cached in the buf_page_info_t array
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_buf_page_lru_fill(
-/*=========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- const buf_page_info_t* info_array, /*!< in: array cached page
- info */
- ulint num_page) /*!< in: number of page info
- cached */
-{
- DBUG_ENTER("i_s_innodb_buf_page_lru_fill");
-
- TABLE* table = tables->table;
- Field** fields = table->field;
-
- /* Iterate through the cached array and fill the I_S table rows */
- for (ulint i = 0; i < num_page; i++) {
- const buf_page_info_t* page_info;
- char table_name[MAX_FULL_NAME_LEN + 1];
- const char* table_name_end = NULL;
- const char* state_str;
- enum buf_page_state state;
-
- state_str = NULL;
-
- page_info = info_array + i;
-
- OK(fields[IDX_BUF_LRU_POOL_ID]->store(
- page_info->pool_id, true));
- OK(fields[IDX_BUF_LRU_POS]->store(
- page_info->block_id, true));
-
- OK(fields[IDX_BUF_LRU_PAGE_SPACE]->store(
- page_info->space_id, true));
-
- OK(fields[IDX_BUF_LRU_PAGE_NUM]->store(
- page_info->page_num, true));
-
- OK(field_store_string(
- fields[IDX_BUF_LRU_PAGE_TYPE],
- i_s_page_type[page_info->page_type].type_str));
-
- OK(fields[IDX_BUF_LRU_PAGE_FLUSH_TYPE]->store(
- page_info->flush_type, true));
-
- OK(fields[IDX_BUF_LRU_PAGE_FIX_COUNT]->store(
- page_info->fix_count, true));
-
- OK(field_store_string(fields[IDX_BUF_LRU_PAGE_HASHED],
- page_info->hashed ? "YES" : "NO"));
-
- OK(fields[IDX_BUF_LRU_PAGE_NEWEST_MOD]->store(
- page_info->newest_mod, true));
-
- OK(fields[IDX_BUF_LRU_PAGE_OLDEST_MOD]->store(
- page_info->oldest_mod, true));
-
- OK(fields[IDX_BUF_LRU_PAGE_ACCESS_TIME]->store(
- page_info->access_time, true));
-
- fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_null();
-
- fields[IDX_BUF_LRU_PAGE_INDEX_NAME]->set_null();
-
- /* If this is an index page, fetch the index name
- and table name */
- if (page_info->page_type == I_S_PAGE_TYPE_INDEX) {
- bool ret = false;
-
- mutex_enter(&dict_sys->mutex);
-
- if (const dict_index_t* index =
- dict_index_get_if_in_cache_low(
- page_info->index_id)) {
- table_name_end = innobase_convert_name(
- table_name, sizeof(table_name),
- index->table_name,
- strlen(index->table_name),
- thd, TRUE);
-
- ret = fields[IDX_BUF_LRU_PAGE_TABLE_NAME]
- ->store(table_name,
- static_cast<uint>(
- table_name_end
- - table_name),
- system_charset_info)
- || field_store_index_name(
- fields
- [IDX_BUF_LRU_PAGE_INDEX_NAME],
- index->name);
- }
-
- mutex_exit(&dict_sys->mutex);
-
- OK(ret);
-
- fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_notnull();
- }
-
- OK(fields[IDX_BUF_LRU_PAGE_NUM_RECS]->store(
- page_info->num_recs, true));
-
- OK(fields[IDX_BUF_LRU_PAGE_DATA_SIZE]->store(
- page_info->data_size, true));
-
- OK(fields[IDX_BUF_LRU_PAGE_ZIP_SIZE]->store(
- page_info->zip_ssize
- ? 512 << page_info->zip_ssize : 0, true));
-
- state = static_cast<enum buf_page_state>(page_info->page_state);
-
- switch (state) {
- /* Compressed page */
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- state_str = "YES";
- break;
- /* Uncompressed page */
- case BUF_BLOCK_FILE_PAGE:
- state_str = "NO";
- break;
- /* We should not see following states */
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- state_str = NULL;
- break;
- };
-
- OK(field_store_string(fields[IDX_BUF_LRU_PAGE_STATE],
- state_str));
-
- switch (page_info->io_fix) {
- case BUF_IO_NONE:
- state_str = "IO_NONE";
- break;
- case BUF_IO_READ:
- state_str = "IO_READ";
- break;
- case BUF_IO_WRITE:
- state_str = "IO_WRITE";
- break;
- case BUF_IO_PIN:
- state_str = "IO_PIN";
- break;
- }
-
- OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX],
- state_str));
-
- OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IS_OLD],
- page_info->is_old ? "YES" : "NO"));
-
- OK(fields[IDX_BUF_LRU_PAGE_FREE_CLOCK]->store(
- page_info->freed_page_clock, true));
-
- OK(schema_table_store_record(thd, table));
- }
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-This is the function that goes through buffer pool's LRU list
-and fetch information to INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_fill_buffer_lru(
-/*=======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- buf_pool_t* buf_pool, /*!< in: buffer pool to scan */
- const ulint pool_id) /*!< in: buffer pool id */
-{
- int status = 0;
- buf_page_info_t* info_buffer;
- ulint lru_pos = 0;
- const buf_page_t* bpage;
- ulint lru_len;
-
- DBUG_ENTER("i_s_innodb_fill_buffer_lru");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* Obtain buf_pool->LRU_list_mutex before allocate info_buffer, since
- UT_LIST_GET_LEN(buf_pool->LRU) could change */
- mutex_enter(&buf_pool->LRU_list_mutex);
-
- lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
-
- /* Print error message if malloc fail */
- info_buffer = (buf_page_info_t*) my_malloc(
- lru_len * sizeof *info_buffer, MYF(MY_WME));
-
- if (!info_buffer) {
- status = 1;
- goto exit;
- }
-
- memset(info_buffer, 0, lru_len * sizeof *info_buffer);
-
- /* Walk through Pool's LRU list and print the buffer page
- information */
- bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-
- while (bpage != NULL) {
- /* Use the same function that collect buffer info for
- INNODB_BUFFER_PAGE to get buffer page info */
- i_s_innodb_buffer_page_get_info(bpage, pool_id, lru_pos,
- (info_buffer + lru_pos));
-
- bpage = UT_LIST_GET_PREV(LRU, bpage);
-
- lru_pos++;
- }
-
- ut_ad(lru_pos == lru_len);
- ut_ad(lru_pos == UT_LIST_GET_LEN(buf_pool->LRU));
-
-exit:
- mutex_exit(&buf_pool->LRU_list_mutex);
-
- if (info_buffer) {
- status = i_s_innodb_buf_page_lru_fill(
- thd, tables, info_buffer, lru_len);
-
- my_free(info_buffer);
- }
-
- DBUG_RETURN(status);
-}
-
-/*******************************************************************//**
-Fill page information for pages in InnoDB buffer pool to the
-dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_buf_page_lru_fill_table(
-/*===============================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (ignored) */
-{
- int status = 0;
-
- DBUG_ENTER("i_s_innodb_buf_page_lru_fill_table");
-
- /* deny access to any users that do not hold PROCESS_ACL */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- /* Walk through each buffer pool */
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool;
-
- buf_pool = buf_pool_from_array(i);
-
- /* Fetch information from pages in this buffer pool's LRU list,
- and fill the corresponding I_S table */
- status = i_s_innodb_fill_buffer_lru(thd, tables, buf_pool, i);
-
- /* If something wrong, break and return */
- if (status) {
- break;
- }
- }
-
- DBUG_RETURN(status);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_BUFFER_PAGE_LRU.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_buffer_page_lru_init(
-/*============================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("i_s_innodb_buffer_page_lru_init");
-
- schema = reinterpret_cast<ST_SCHEMA_TABLE*>(p);
-
- schema->fields_info = i_s_innodb_buf_page_lru_fields_info;
- schema->fill_table = i_s_innodb_buf_page_lru_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_buffer_page_lru =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_BUFFER_PAGE_LRU"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB Buffer Page in LRU"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, i_s_innodb_buffer_page_lru_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/*******************************************************************//**
-Unbind a dynamic INFORMATION_SCHEMA table.
-@return 0 on success */
-static
-int
-i_s_common_deinit(
-/*==============*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_common_deinit");
-
- /* Do nothing */
-
- DBUG_RETURN(0);
-}
-
-/** SYS_TABLES ***************************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLES */
-static ST_FIELD_INFO innodb_sys_tables_fields_info[] =
-{
-#define SYS_TABLES_ID 0
- {STRUCT_FLD(field_name, "TABLE_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLES_NAME 1
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLES_FLAG 2
- {STRUCT_FLD(field_name, "FLAG"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLES_NUM_COLUMN 3
- {STRUCT_FLD(field_name, "N_COLS"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLES_SPACE 4
- {STRUCT_FLD(field_name, "SPACE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLES_FILE_FORMAT 5
- {STRUCT_FLD(field_name, "FILE_FORMAT"),
- STRUCT_FLD(field_length, 10),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLES_ROW_FORMAT 6
- {STRUCT_FLD(field_name, "ROW_FORMAT"),
- STRUCT_FLD(field_length, 12),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLES_ZIP_PAGE_SIZE 7
- {STRUCT_FLD(field_name, "ZIP_PAGE_SIZE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Populate information_schema.innodb_sys_tables table with information
-from SYS_TABLES.
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_tables(
-/*=====================*/
- THD* thd, /*!< in: thread */
- dict_table_t* table, /*!< in: table */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
- ulint compact = DICT_TF_GET_COMPACT(table->flags);
- ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table->flags);
- ulint zip_size = dict_tf_get_zip_size(table->flags);
- const char* file_format;
- const char* row_format;
-
- file_format = trx_sys_file_format_id_to_name(atomic_blobs);
- if (!compact) {
- row_format = "Redundant";
- } else if (!atomic_blobs) {
- row_format = "Compact";
- } else if DICT_TF_GET_ZIP_SSIZE(table->flags) {
- row_format = "Compressed";
- } else {
- row_format = "Dynamic";
- }
-
- DBUG_ENTER("i_s_dict_fill_sys_tables");
-
- fields = table_to_fill->field;
-
- OK(fields[SYS_TABLES_ID]->store(longlong(table->id), TRUE));
-
- OK(field_store_string(fields[SYS_TABLES_NAME], table->name));
-
- OK(fields[SYS_TABLES_FLAG]->store(table->flags));
-
- OK(fields[SYS_TABLES_NUM_COLUMN]->store(table->n_cols));
-
- OK(fields[SYS_TABLES_SPACE]->store(table->space));
-
- OK(field_store_string(fields[SYS_TABLES_FILE_FORMAT], file_format));
-
- OK(field_store_string(fields[SYS_TABLES_ROW_FORMAT], row_format));
-
- OK(fields[SYS_TABLES_ZIP_PAGE_SIZE]->store(
- static_cast<double>(zip_size)));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to go through each record in SYS_TABLES table, and fill the
-information_schema.innodb_sys_tables table with related table information
-@return 0 on success */
-static
-int
-i_s_sys_tables_fill_table(
-/*======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_tables_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&(dict_sys->mutex));
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
-
- while (rec) {
- const char* err_msg;
- dict_table_t* table_rec;
-
- /* Create and populate a dict_table_t structure with
- information from SYS_TABLES row */
- err_msg = dict_process_sys_tables_rec_and_mtr_commit(
- heap, rec, &table_rec,
- DICT_TABLE_LOAD_FROM_RECORD, &mtr);
-
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_tables(thd, table_rec, tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- /* Since dict_process_sys_tables_rec_and_mtr_commit()
- is called with DICT_TABLE_LOAD_FROM_RECORD, the table_rec
- is created in dict_process_sys_tables_rec(), we will
- need to free it */
- if (table_rec) {
- dict_mem_table_free(table_rec);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_tables
-@return 0 on success */
-static
-int
-innodb_sys_tables_init(
-/*===================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_tables_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_tables_fields_info;
- schema->fill_table = i_s_sys_tables_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_tables =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_TABLES"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_TABLES"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_tables_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** SYS_TABLESTATS ***********************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_TABLESTATS */
-static ST_FIELD_INFO innodb_sys_tablestats_fields_info[] =
-{
-#define SYS_TABLESTATS_ID 0
- {STRUCT_FLD(field_name, "TABLE_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESTATS_NAME 1
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESTATS_INIT 2
- {STRUCT_FLD(field_name, "STATS_INITIALIZED"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESTATS_NROW 3
- {STRUCT_FLD(field_name, "NUM_ROWS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESTATS_CLUST_SIZE 4
- {STRUCT_FLD(field_name, "CLUST_INDEX_SIZE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESTATS_INDEX_SIZE 5
- {STRUCT_FLD(field_name, "OTHER_INDEX_SIZE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESTATS_MODIFIED 6
- {STRUCT_FLD(field_name, "MODIFIED_COUNTER"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESTATS_AUTONINC 7
- {STRUCT_FLD(field_name, "AUTOINC"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESTATS_TABLE_REF_COUNT 8
- {STRUCT_FLD(field_name, "REF_COUNT"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Populate information_schema.innodb_sys_tablestats table with information
-from SYS_TABLES.
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_tablestats(
-/*=========================*/
- THD* thd, /*!< in: thread */
- dict_table_t* table, /*!< in: table */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
-
- DBUG_ENTER("i_s_dict_fill_sys_tablestats");
-
- fields = table_to_fill->field;
-
- OK(fields[SYS_TABLESTATS_ID]->store(longlong(table->id), TRUE));
-
- OK(field_store_string(fields[SYS_TABLESTATS_NAME], table->name));
-
- dict_table_stats_lock(table, RW_S_LATCH);
-
- if (table->stat_initialized) {
- OK(field_store_string(fields[SYS_TABLESTATS_INIT],
- "Initialized"));
-
- OK(fields[SYS_TABLESTATS_NROW]->store(table->stat_n_rows,
- TRUE));
-
- OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(
- static_cast<double>(table->stat_clustered_index_size)));
-
- OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(
- static_cast<double>(table->stat_sum_of_other_index_sizes)));
-
- OK(fields[SYS_TABLESTATS_MODIFIED]->store(
- static_cast<double>(table->stat_modified_counter)));
- } else {
- OK(field_store_string(fields[SYS_TABLESTATS_INIT],
- "Uninitialized"));
-
- OK(fields[SYS_TABLESTATS_NROW]->store(0, TRUE));
-
- OK(fields[SYS_TABLESTATS_CLUST_SIZE]->store(0));
-
- OK(fields[SYS_TABLESTATS_INDEX_SIZE]->store(0));
-
- OK(fields[SYS_TABLESTATS_MODIFIED]->store(0));
- }
-
- dict_table_stats_unlock(table, RW_S_LATCH);
-
- OK(fields[SYS_TABLESTATS_AUTONINC]->store(table->autoinc, TRUE));
-
- OK(fields[SYS_TABLESTATS_TABLE_REF_COUNT]->store(
- static_cast<double>(table->n_ref_count)));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Function to go through each record in SYS_TABLES table, and fill the
-information_schema.innodb_sys_tablestats table with table statistics
-related information
-@return 0 on success */
-static
-int
-i_s_sys_tables_fill_table_stats(
-/*============================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_tables_fill_table_stats");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLES);
-
- while (rec) {
- const char* err_msg;
- dict_table_t* table_rec;
-
- /* Fetch the dict_table_t structure corresponding to
- this SYS_TABLES record */
- err_msg = dict_process_sys_tables_rec_and_mtr_commit(
- heap, rec, &table_rec,
- DICT_TABLE_LOAD_FROM_CACHE, &mtr);
-
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_tablestats(thd, table_rec,
- tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_tablestats
-@return 0 on success */
-static
-int
-innodb_sys_tablestats_init(
-/*=======================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_tablestats_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_tablestats_fields_info;
- schema->fill_table = i_s_sys_tables_fill_table_stats;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_tablestats =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_TABLESTATS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_TABLESTATS"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_tablestats_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** SYS_INDEXES **************************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.SYS_INDEXES */
-static ST_FIELD_INFO innodb_sysindex_fields_info[] =
-{
-#define SYS_INDEX_ID 0
- {STRUCT_FLD(field_name, "INDEX_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_INDEX_NAME 1
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_INDEX_TABLE_ID 2
- {STRUCT_FLD(field_name, "TABLE_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_INDEX_TYPE 3
- {STRUCT_FLD(field_name, "TYPE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_INDEX_NUM_FIELDS 4
- {STRUCT_FLD(field_name, "N_FIELDS"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_INDEX_PAGE_NO 5
- {STRUCT_FLD(field_name, "PAGE_NO"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_INDEX_SPACE 6
- {STRUCT_FLD(field_name, "SPACE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Function to populate the information_schema.innodb_sys_indexes table with
-collected index information
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_indexes(
-/*======================*/
- THD* thd, /*!< in: thread */
- table_id_t table_id, /*!< in: table id */
- dict_index_t* index, /*!< in: populated dict_index_t
- struct with index info */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
-
- DBUG_ENTER("i_s_dict_fill_sys_indexes");
-
- fields = table_to_fill->field;
-
- OK(field_store_index_name(fields[SYS_INDEX_NAME], index->name));
-
- OK(fields[SYS_INDEX_ID]->store(longlong(index->id), TRUE));
-
- OK(fields[SYS_INDEX_TABLE_ID]->store(longlong(table_id), TRUE));
-
- OK(fields[SYS_INDEX_TYPE]->store(index->type));
-
- OK(fields[SYS_INDEX_NUM_FIELDS]->store(index->n_fields));
-
- /* FIL_NULL is ULINT32_UNDEFINED */
- if (index->page == FIL_NULL) {
- OK(fields[SYS_INDEX_PAGE_NO]->store(-1));
- } else {
- OK(fields[SYS_INDEX_PAGE_NO]->store(index->page));
- }
-
- OK(fields[SYS_INDEX_SPACE]->store(index->space));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to go through each record in SYS_INDEXES table, and fill the
-information_schema.innodb_sys_indexes table with related index information
-@return 0 on success */
-static
-int
-i_s_sys_indexes_fill_table(
-/*=======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_indexes_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- /* Start scan the SYS_INDEXES table */
- rec = dict_startscan_system(&pcur, &mtr, SYS_INDEXES);
-
- /* Process each record in the table */
- while (rec) {
- const char* err_msg;
- table_id_t table_id;
- dict_index_t index_rec;
-
- /* Populate a dict_index_t structure with information from
- a SYS_INDEXES row */
- err_msg = dict_process_sys_indexes_rec(heap, rec, &index_rec,
- &table_id);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_indexes(thd, table_id, &index_rec,
- tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_indexes
-@return 0 on success */
-static
-int
-innodb_sys_indexes_init(
-/*====================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_indexes_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sysindex_fields_info;
- schema->fill_table = i_s_sys_indexes_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_indexes =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_INDEXES"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_INDEXES"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_indexes_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** SYS_COLUMNS **************************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_COLUMNS */
-static ST_FIELD_INFO innodb_sys_columns_fields_info[] =
-{
-#define SYS_COLUMN_TABLE_ID 0
- {STRUCT_FLD(field_name, "TABLE_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_COLUMN_NAME 1
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_COLUMN_POSITION 2
- {STRUCT_FLD(field_name, "POS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_COLUMN_MTYPE 3
- {STRUCT_FLD(field_name, "MTYPE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_COLUMN__PRTYPE 4
- {STRUCT_FLD(field_name, "PRTYPE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_COLUMN_COLUMN_LEN 5
- {STRUCT_FLD(field_name, "LEN"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Function to populate the information_schema.innodb_sys_columns with
-related column information
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_columns(
-/*======================*/
- THD* thd, /*!< in: thread */
- table_id_t table_id, /*!< in: table ID */
- const char* col_name, /*!< in: column name */
- dict_col_t* column, /*!< in: dict_col_t struct holding
- more column information */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
-
- DBUG_ENTER("i_s_dict_fill_sys_columns");
-
- fields = table_to_fill->field;
-
- OK(fields[SYS_COLUMN_TABLE_ID]->store(longlong(table_id), TRUE));
-
- OK(field_store_string(fields[SYS_COLUMN_NAME], col_name));
-
- OK(fields[SYS_COLUMN_POSITION]->store(column->ind));
-
- OK(fields[SYS_COLUMN_MTYPE]->store(column->mtype));
-
- OK(fields[SYS_COLUMN__PRTYPE]->store(column->prtype));
-
- OK(fields[SYS_COLUMN_COLUMN_LEN]->store(column->len));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to fill information_schema.innodb_sys_columns with information
-collected by scanning SYS_COLUMNS table.
-@return 0 on success */
-static
-int
-i_s_sys_columns_fill_table(
-/*=======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- const char* col_name;
- mem_heap_t* heap;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_columns_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_COLUMNS);
-
- while (rec) {
- const char* err_msg;
- dict_col_t column_rec;
- table_id_t table_id;
-
- /* populate a dict_col_t structure with information from
- a SYS_COLUMNS row */
- err_msg = dict_process_sys_columns_rec(heap, rec, &column_rec,
- &table_id, &col_name);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_columns(thd, table_id, col_name,
- &column_rec,
- tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_columns
-@return 0 on success */
-static
-int
-innodb_sys_columns_init(
-/*====================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_columns_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_columns_fields_info;
- schema->fill_table = i_s_sys_columns_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_columns =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_COLUMNS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_COLUMNS"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_columns_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** SYS_FIELDS ***************************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FIELDS */
-static ST_FIELD_INFO innodb_sys_fields_fields_info[] =
-{
-#define SYS_FIELD_INDEX_ID 0
- {STRUCT_FLD(field_name, "INDEX_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FIELD_NAME 1
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FIELD_POS 2
- {STRUCT_FLD(field_name, "POS"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Function to fill information_schema.innodb_sys_fields with information
-collected by scanning SYS_FIELDS table.
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_fields(
-/*=====================*/
- THD* thd, /*!< in: thread */
- index_id_t index_id, /*!< in: index id for the field */
- dict_field_t* field, /*!< in: table */
- ulint pos, /*!< in: Field position */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
-
- DBUG_ENTER("i_s_dict_fill_sys_fields");
-
- fields = table_to_fill->field;
-
- OK(fields[SYS_FIELD_INDEX_ID]->store(longlong(index_id), TRUE));
-
- OK(field_store_string(fields[SYS_FIELD_NAME], field->name));
-
- OK(fields[SYS_FIELD_POS]->store(static_cast<double>(pos)));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to go through each record in SYS_FIELDS table, and fill the
-information_schema.innodb_sys_fields table with related index field
-information
-@return 0 on success */
-static
-int
-i_s_sys_fields_fill_table(
-/*======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- index_id_t last_id;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_fields_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
-
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- /* will save last index id so that we know whether we move to
- the next index. This is used to calculate prefix length */
- last_id = 0;
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_FIELDS);
-
- while (rec) {
- ulint pos;
- const char* err_msg;
- index_id_t index_id;
- dict_field_t field_rec;
-
- /* Populate a dict_field_t structure with information from
- a SYS_FIELDS row */
- err_msg = dict_process_sys_fields_rec(heap, rec, &field_rec,
- &pos, &index_id, last_id);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_fields(thd, index_id, &field_rec,
- pos, tables->table);
- last_id = index_id;
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_fields
-@return 0 on success */
-static
-int
-innodb_sys_fields_init(
-/*===================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_field_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_fields_fields_info;
- schema->fill_table = i_s_sys_fields_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_fields =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_FIELDS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_FIELDS"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_fields_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** SYS_FOREIGN ********************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FOREIGN */
-static ST_FIELD_INFO innodb_sys_foreign_fields_info[] =
-{
-#define SYS_FOREIGN_ID 0
- {STRUCT_FLD(field_name, "ID"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FOREIGN_FOR_NAME 1
- {STRUCT_FLD(field_name, "FOR_NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FOREIGN_REF_NAME 2
- {STRUCT_FLD(field_name, "REF_NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FOREIGN_NUM_COL 3
- {STRUCT_FLD(field_name, "N_COLS"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FOREIGN_TYPE 4
- {STRUCT_FLD(field_name, "TYPE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Function to fill information_schema.innodb_sys_foreign with information
-collected by scanning SYS_FOREIGN table.
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_foreign(
-/*======================*/
- THD* thd, /*!< in: thread */
- dict_foreign_t* foreign, /*!< in: table */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
-
- DBUG_ENTER("i_s_dict_fill_sys_foreign");
-
- fields = table_to_fill->field;
-
- OK(field_store_string(fields[SYS_FOREIGN_ID], foreign->id));
-
- OK(field_store_string(fields[SYS_FOREIGN_FOR_NAME],
- foreign->foreign_table_name));
-
- OK(field_store_string(fields[SYS_FOREIGN_REF_NAME],
- foreign->referenced_table_name));
-
- OK(fields[SYS_FOREIGN_NUM_COL]->store(foreign->n_fields));
-
- OK(fields[SYS_FOREIGN_TYPE]->store(foreign->type));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.innodb_sys_foreign table. Loop
-through each record in SYS_FOREIGN, and extract the foreign key
-information.
-@return 0 on success */
-static
-int
-i_s_sys_foreign_fill_table(
-/*=======================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_foreign_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
-
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_FOREIGN);
-
- while (rec) {
- const char* err_msg;
- dict_foreign_t foreign_rec;
-
- /* Populate a dict_foreign_t structure with information from
- a SYS_FOREIGN row */
- err_msg = dict_process_sys_foreign_rec(heap, rec, &foreign_rec);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_foreign(thd, &foreign_rec,
- tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mtr_start(&mtr);
- mutex_enter(&dict_sys->mutex);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign
-@return 0 on success */
-static
-int
-innodb_sys_foreign_init(
-/*====================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_foreign_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_foreign_fields_info;
- schema->fill_table = i_s_sys_foreign_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_foreign =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_FOREIGN"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_FOREIGN"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_foreign_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** SYS_FOREIGN_COLS ********************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_FOREIGN_COLS */
-static ST_FIELD_INFO innodb_sys_foreign_cols_fields_info[] =
-{
-#define SYS_FOREIGN_COL_ID 0
- {STRUCT_FLD(field_name, "ID"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FOREIGN_COL_FOR_NAME 1
- {STRUCT_FLD(field_name, "FOR_COL_NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FOREIGN_COL_REF_NAME 2
- {STRUCT_FLD(field_name, "REF_COL_NAME"),
- STRUCT_FLD(field_length, NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_FOREIGN_COL_POS 3
- {STRUCT_FLD(field_name, "POS"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Function to fill information_schema.innodb_sys_foreign_cols with information
-collected by scanning SYS_FOREIGN_COLS table.
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_foreign_cols(
-/*==========================*/
- THD* thd, /*!< in: thread */
- const char* name, /*!< in: foreign key constraint name */
- const char* for_col_name, /*!< in: referencing column name*/
- const char* ref_col_name, /*!< in: referenced column
- name */
- ulint pos, /*!< in: column position */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
-
- DBUG_ENTER("i_s_dict_fill_sys_foreign_cols");
-
- fields = table_to_fill->field;
-
- OK(field_store_string(fields[SYS_FOREIGN_COL_ID], name));
-
- OK(field_store_string(fields[SYS_FOREIGN_COL_FOR_NAME], for_col_name));
-
- OK(field_store_string(fields[SYS_FOREIGN_COL_REF_NAME], ref_col_name));
-
- OK(fields[SYS_FOREIGN_COL_POS]->store(static_cast<double>(pos)));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.innodb_sys_foreign_cols table. Loop
-through each record in SYS_FOREIGN_COLS, and extract the foreign key column
-information and fill the INFORMATION_SCHEMA.innodb_sys_foreign_cols table.
-@return 0 on success */
-static
-int
-i_s_sys_foreign_cols_fill_table(
-/*============================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_foreign_cols_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_FOREIGN_COLS);
-
- while (rec) {
- const char* err_msg;
- const char* name;
- const char* for_col_name;
- const char* ref_col_name;
- ulint pos;
-
- /* Extract necessary information from a SYS_FOREIGN_COLS row */
- err_msg = dict_process_sys_foreign_col_rec(
- heap, rec, &name, &for_col_name, &ref_col_name, &pos);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_foreign_cols(
- thd, name, for_col_name, ref_col_name, pos,
- tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.innodb_sys_foreign_cols
-@return 0 on success */
-static
-int
-innodb_sys_foreign_cols_init(
-/*========================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_foreign_cols_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_foreign_cols_fields_info;
- schema->fill_table = i_s_sys_foreign_cols_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_sys_foreign_cols =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_FOREIGN_COLS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_FOREIGN_COLS"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_foreign_cols_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** SYS_TABLESPACES ********************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES */
-static ST_FIELD_INFO innodb_sys_tablespaces_fields_info[] =
-{
-#define SYS_TABLESPACES_SPACE 0
- {STRUCT_FLD(field_name, "SPACE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESPACES_NAME 1
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESPACES_FLAGS 2
- {STRUCT_FLD(field_name, "FLAG"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESPACES_FILE_FORMAT 3
- {STRUCT_FLD(field_name, "FILE_FORMAT"),
- STRUCT_FLD(field_length, 10),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESPACES_ROW_FORMAT 4
- {STRUCT_FLD(field_name, "ROW_FORMAT"),
- STRUCT_FLD(field_length, 22),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESPACES_PAGE_SIZE 5
- {STRUCT_FLD(field_name, "PAGE_SIZE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_TABLESPACES_ZIP_PAGE_SIZE 6
- {STRUCT_FLD(field_name, "ZIP_PAGE_SIZE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-
-};
-
-/**********************************************************************//**
-Function to fill INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES with information
-collected by scanning SYS_TABLESPACESS table.
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_tablespaces(
-/*==========================*/
- THD* thd, /*!< in: thread */
- ulint space, /*!< in: space ID */
- const char* name, /*!< in: tablespace name */
- ulint flags, /*!< in: tablespace flags */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
- ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
- ulint page_size = fsp_flags_get_page_size(flags);
- ulint zip_size = fsp_flags_get_zip_size(flags);
- const char* file_format;
- const char* row_format;
-
- DBUG_ENTER("i_s_dict_fill_sys_tablespaces");
-
- file_format = trx_sys_file_format_id_to_name(atomic_blobs);
- if (!atomic_blobs) {
- row_format = "Compact or Redundant";
- } else if DICT_TF_GET_ZIP_SSIZE(flags) {
- row_format = "Compressed";
- } else {
- row_format = "Dynamic";
- }
-
- fields = table_to_fill->field;
-
- OK(fields[SYS_TABLESPACES_SPACE]->store(
- static_cast<double>(space)));
-
- OK(field_store_string(fields[SYS_TABLESPACES_NAME], name));
-
- OK(fields[SYS_TABLESPACES_FLAGS]->store(
- static_cast<double>(flags)));
-
- OK(field_store_string(fields[SYS_TABLESPACES_FILE_FORMAT],
- file_format));
-
- OK(field_store_string(fields[SYS_TABLESPACES_ROW_FORMAT],
- row_format));
-
- OK(fields[SYS_TABLESPACES_PAGE_SIZE]->store(
- static_cast<double>(page_size)));
-
- OK(fields[SYS_TABLESPACES_ZIP_PAGE_SIZE]->store(
- static_cast<double>(zip_size)));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table.
-Loop through each record in SYS_TABLESPACES, and extract the column
-information and fill the INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES table.
-@return 0 on success */
-static
-int
-i_s_sys_tablespaces_fill_table(
-/*===========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_tablespaces_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
-
- while (rec) {
- const char* err_msg;
- ulint space;
- const char* name;
- ulint flags;
-
- /* Extract necessary information from a SYS_TABLESPACES row */
- err_msg = dict_process_sys_tablespaces(
- heap, rec, &space, &name, &flags);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_tablespaces(
- thd, space, name, flags,
- tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_TABLESPACES
-@return 0 on success */
-static
-int
-innodb_sys_tablespaces_init(
-/*========================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_tablespaces_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_tablespaces_fields_info;
- schema->fill_table = i_s_sys_tablespaces_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_tablespaces =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_TABLESPACES"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_TABLESPACES"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_tablespaces_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/** SYS_DATAFILES ************************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_DATAFILES */
-static ST_FIELD_INFO innodb_sys_datafiles_fields_info[] =
-{
-#define SYS_DATAFILES_SPACE 0
- {STRUCT_FLD(field_name, "SPACE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define SYS_DATAFILES_PATH 1
- {STRUCT_FLD(field_name, "PATH"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Function to fill INFORMATION_SCHEMA.INNODB_SYS_DATAFILES with information
-collected by scanning SYS_DATAFILESS table.
-@return 0 on success */
-static
-int
-i_s_dict_fill_sys_datafiles(
-/*========================*/
- THD* thd, /*!< in: thread */
- ulint space, /*!< in: space ID */
- const char* path, /*!< in: absolute path */
- TABLE* table_to_fill) /*!< in/out: fill this table */
-{
- Field** fields;
-
- DBUG_ENTER("i_s_dict_fill_sys_datafiles");
-
- fields = table_to_fill->field;
-
- OK(field_store_ulint(fields[SYS_DATAFILES_SPACE], space));
-
- OK(field_store_string(fields[SYS_DATAFILES_PATH], path));
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.INNODB_SYS_DATAFILES table.
-Loop through each record in SYS_DATAFILES, and extract the column
-information and fill the INFORMATION_SCHEMA.INNODB_SYS_DATAFILES table.
-@return 0 on success */
-static
-int
-i_s_sys_datafiles_fill_table(
-/*=========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
-
- DBUG_ENTER("i_s_sys_datafiles_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL, true)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_DATAFILES);
-
- while (rec) {
- const char* err_msg;
- ulint space;
- const char* path;
-
- /* Extract necessary information from a SYS_DATAFILES row */
- err_msg = dict_process_sys_datafiles(
- heap, rec, &space, &path);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (!err_msg) {
- i_s_dict_fill_sys_datafiles(
- thd, space, path, tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_DATAFILES
-@return 0 on success */
-static
-int
-innodb_sys_datafiles_init(
-/*======================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_datafiles_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_datafiles_fields_info;
- schema->fill_table = i_s_sys_datafiles_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_datafiles =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_DATAFILES"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_DATAFILES"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_datafiles_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-static ST_FIELD_INFO i_s_innodb_changed_pages_info[] =
-{
- {STRUCT_FLD(field_name, "space_id"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "page_id"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "start_lsn"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "end_lsn"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/***********************************************************************
- This function implements ICP for I_S.INNODB_CHANGED_PAGES by parsing a
- condition and getting lower and upper bounds for start and end LSNs if the
- condition corresponds to a certain pattern.
-
- In the most general form, we understand queries like
-
- SELECT * FROM INNODB_CHANGED_PAGES
- WHERE START_LSN > num1 AND START_LSN < num2
- AND END_LSN > num3 AND END_LSN < num4;
-
- That's why the pattern syntax is:
-
- pattern: comp | and_comp;
- comp: lsn < int_num | lsn <= int_num | int_num > lsn | int_num >= lsn;
- lsn: start_lsn | end_lsn;
- and_comp: expression AND expression | expression AND and_comp;
- expression: comp | any_other_expression;
-
- The two bounds are handled differently: the lower bound is used to find the
- correct starting _file_, the upper bound the last _block_ that needs reading.
-
- Lower bound conditions are handled in the following way: start_lsn >= X
- specifies that the reading must start from the file that has the highest
- starting LSN less than or equal to X. start_lsn > X is equivalent to
- start_lsn >= X + 1. For end_lsn, end_lsn >= X is treated as
- start_lsn >= X - 1 and end_lsn > X as start_lsn >= X.
-
- For the upper bound, suppose the condition is start_lsn < 100, this means we
- have to read all blocks with start_lsn < 100. Which is equivalent to reading
- all the blocks with end_lsn <= 99, or just end_lsn < 100. That's why it's
- enough to find maximum lsn value, doesn't matter if this is start or end lsn
- and compare it with "start_lsn" field. LSN <= 100 is treated as LSN < 101.
-
- Example:
-
- SELECT * FROM INNODB_CHANGED_PAGES
- WHERE
- start_lsn > 10 AND
- end_lsn <= 1111 AND
- 555 > end_lsn AND
- page_id = 100;
-
- end_lsn will be set to 555, start_lsn will be set 11.
-
- Support for other functions (equal, NULL-safe equal, BETWEEN, IN, etc.) will
- be added on demand.
-
-*/
-static
-void
-limit_lsn_range_from_condition(
-/*===========================*/
- TABLE* table, /*!<in: table */
- Item* cond, /*!<in: condition */
- ib_uint64_t* start_lsn, /*!<in/out: minumum LSN */
- ib_uint64_t* end_lsn) /*!<in/out: maximum LSN */
-{
- enum Item_func::Functype func_type;
-
- if (cond->type() != Item::COND_ITEM &&
- cond->type() != Item::FUNC_ITEM)
- return;
-
- func_type = ((Item_func*) cond)->functype();
-
- switch (func_type)
- {
- case Item_func::COND_AND_FUNC:
- {
- List_iterator<Item> li(*((Item_cond*) cond)
- ->argument_list());
- Item *item;
-
- while ((item= li++)) {
- limit_lsn_range_from_condition(table, item, start_lsn,
- end_lsn);
- }
- break;
- }
- case Item_func::LT_FUNC:
- case Item_func::LE_FUNC:
- case Item_func::GT_FUNC:
- case Item_func::GE_FUNC:
- {
- Item *left;
- Item *right;
- Item_field *item_field;
- ib_uint64_t tmp_result;
- ibool is_end_lsn;
-
- /* a <= b equals to b >= a that's why we just exchange "left"
- and "right" in the case of ">" or ">=" function. We don't
- touch the operation itself. */
- if (((Item_func*) cond)->functype() == Item_func::LT_FUNC
- || ((Item_func*) cond)->functype() == Item_func::LE_FUNC) {
- left = ((Item_func*) cond)->arguments()[0];
- right = ((Item_func*) cond)->arguments()[1];
- } else {
- left = ((Item_func*) cond)->arguments()[1];
- right = ((Item_func*) cond)->arguments()[0];
- }
-
- if (left->type() == Item::FIELD_ITEM) {
- item_field = (Item_field *)left;
- } else if (right->type() == Item::FIELD_ITEM) {
- item_field = (Item_field *)right;
- } else {
- return;
- }
-
- /* Check if the current field belongs to our table */
- if (table != item_field->field->table) {
- return;
- }
-
- /* Check if the field is START_LSN or END_LSN */
- /* END_LSN */
- is_end_lsn = table->field[3]->eq(item_field->field);
-
- if (/* START_LSN */ !table->field[2]->eq(item_field->field)
- && !is_end_lsn) {
- return;
- }
-
- if (left->type() == Item::FIELD_ITEM
- && right->type() == Item::INT_ITEM) {
-
- /* The case of start_lsn|end_lsn <|<= const
- "end_lsn <=? const" gives a valid upper bound.
- "start_lsn <=? const" is not a valid upper bound.
- */
-
- if (is_end_lsn) {
- tmp_result = right->val_int();
- if (((func_type == Item_func::LE_FUNC)
- || (func_type == Item_func::GE_FUNC))
- && (tmp_result != IB_UINT64_MAX)) {
-
- tmp_result++;
- }
- if (tmp_result < *end_lsn) {
- *end_lsn = tmp_result;
- }
- }
-
- } else if (left->type() == Item::INT_ITEM
- && right->type() == Item::FIELD_ITEM) {
-
- /* The case of const <|<= start_lsn|end_lsn
- turning it around: start_lsn|end_lsn >|>= const
- "start_lsn >=? const " is a valid loer bound.
- "end_lsn >=? const" is not a valid lower bound.
- */
-
- if (!is_end_lsn) {
- tmp_result = left->val_int();
- if (is_end_lsn && tmp_result != 0) {
- tmp_result--;
- }
- if (((func_type == Item_func::LT_FUNC)
- || (func_type == Item_func::GT_FUNC))
- && (tmp_result != IB_UINT64_MAX)) {
-
- tmp_result++;
- }
- if (tmp_result > *start_lsn) {
- *start_lsn = tmp_result;
- }
- }
- }
-
- break;
- }
- default:;
- }
-}
-
-/***********************************************************************
-Fill the dynamic table information_schema.innodb_changed_pages.
-@return 0 on success, 1 on failure */
-static
-int
-i_s_innodb_changed_pages_fill(
-/*==========================*/
- THD* thd, /*!<in: thread */
- TABLE_LIST* tables, /*!<in/out: tables to fill */
- Item* cond) /*!<in: condition */
-{
- TABLE* table = (TABLE *) tables->table;
- log_bitmap_iterator_t i;
- ib_uint64_t output_rows_num = 0UL;
- lsn_t max_lsn = LSN_MAX;
- lsn_t min_lsn = 0ULL;
- int ret = 0;
-
- DBUG_ENTER("i_s_innodb_changed_pages_fill");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL, true)) {
-
- DBUG_RETURN(0);
- }
-
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- if (cond) {
- limit_lsn_range_from_condition(table, cond, &min_lsn,
- &max_lsn);
- }
-
- /* If the log tracker is running and our max_lsn > current tracked LSN,
- cap the max lsn so that we don't try to read any partial runs as the
- tracked LSN advances. */
- if (srv_track_changed_pages) {
- ib_uint64_t tracked_lsn = log_get_tracked_lsn();
- if (max_lsn > tracked_lsn)
- max_lsn = tracked_lsn;
- }
-
- if (!log_online_bitmap_iterator_init(&i, min_lsn, max_lsn)) {
- my_error(ER_CANT_FIND_SYSTEM_REC, MYF(0));
- DBUG_RETURN(1);
- }
-
- while(log_online_bitmap_iterator_next(&i) &&
- (!srv_max_changed_pages ||
- output_rows_num < srv_max_changed_pages))
- {
- if (!LOG_BITMAP_ITERATOR_PAGE_CHANGED(i))
- continue;
-
- /* SPACE_ID */
- table->field[0]->store(
- LOG_BITMAP_ITERATOR_SPACE_ID(i));
- /* PAGE_ID */
- table->field[1]->store(
- LOG_BITMAP_ITERATOR_PAGE_NUM(i));
- /* START_LSN */
- table->field[2]->store(
- LOG_BITMAP_ITERATOR_START_LSN(i), true);
- /* END_LSN */
- table->field[3]->store(
- LOG_BITMAP_ITERATOR_END_LSN(i), true);
-
- /*
- I_S tables are in-memory tables. If bitmap file is big enough
- a lot of memory can be used to store the table. But the size
- of used memory can be diminished if we store only data which
- corresponds to some conditions (in WHERE sql clause). Here
- conditions are checked for the field values stored above.
-
- Conditions are checked twice. The first is here (during table
- generation) and the second during query execution. Maybe it
- makes sense to use some flag in THD object to avoid double
- checking.
- */
- if (cond && !cond->val_int())
- continue;
-
- if (schema_table_store_record(thd, table))
- {
- log_online_bitmap_iterator_release(&i);
- my_error(ER_CANT_FIND_SYSTEM_REC, MYF(0));
- DBUG_RETURN(1);
- }
-
- ++output_rows_num;
- }
-
- if (i.failed) {
- my_error(ER_CANT_FIND_SYSTEM_REC, MYF(0));
- ret = 1;
- }
-
- log_online_bitmap_iterator_release(&i);
- DBUG_RETURN(ret);
-}
-
-static
-int
-i_s_innodb_changed_pages_init(
-/*==========================*/
- void* p)
-{
- DBUG_ENTER("i_s_innodb_changed_pages_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_innodb_changed_pages_info;
- schema->fill_table = i_s_innodb_changed_pages_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_changed_pages =
-{
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
- STRUCT_FLD(info, &i_s_info),
- STRUCT_FLD(name, "INNODB_CHANGED_PAGES"),
- STRUCT_FLD(author, "Percona"),
- STRUCT_FLD(descr, "InnoDB CHANGED_PAGES table"),
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
- STRUCT_FLD(init, i_s_innodb_changed_pages_init),
- STRUCT_FLD(deinit, i_s_common_deinit),
- STRUCT_FLD(version, 0x0100 /* 1.0 */),
- STRUCT_FLD(status_vars, NULL),
- STRUCT_FLD(system_vars, NULL),
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** TABLESPACES_ENCRYPTION ********************************************/
-/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION */
-static ST_FIELD_INFO innodb_tablespaces_encryption_fields_info[] =
-{
-#define TABLESPACES_ENCRYPTION_SPACE 0
- {STRUCT_FLD(field_name, "SPACE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_NAME 1
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME 2
- {STRUCT_FLD(field_name, "ENCRYPTION_SCHEME"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS 3
- {STRUCT_FLD(field_name, "KEYSERVER_REQUESTS"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_MIN_KEY_VERSION 4
- {STRUCT_FLD(field_name, "MIN_KEY_VERSION"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION 5
- {STRUCT_FLD(field_name, "CURRENT_KEY_VERSION"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER 6
- {STRUCT_FLD(field_name, "KEY_ROTATION_PAGE_NUMBER"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER 7
- {STRUCT_FLD(field_name, "KEY_ROTATION_MAX_PAGE_NUMBER"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_CURRENT_KEY_ID 8
- {STRUCT_FLD(field_name, "CURRENT_KEY_ID"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_ENCRYPTION_ROTATING_OR_FLUSHING 9
- {STRUCT_FLD(field_name, "ROTATING_OR_FLUSHING"),
- STRUCT_FLD(field_length, 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION
-with information collected by scanning SYS_TABLESPACES table.
-@param[in] thd thread handle
-@param[in] space Tablespace
-@param[in] table_to_fill I_S table to fill
-@return 0 on success */
-static
-int
-i_s_dict_fill_tablespaces_encryption(
- THD* thd,
- fil_space_t* space,
- TABLE* table_to_fill)
-{
- Field** fields;
- struct fil_space_crypt_status_t status;
-
- DBUG_ENTER("i_s_dict_fill_tablespaces_encryption");
-
- fields = table_to_fill->field;
-
- fil_space_crypt_get_status(space, &status);
-
- OK(fields[TABLESPACES_ENCRYPTION_SPACE]->store(space->id));
-
- OK(field_store_string(fields[TABLESPACES_ENCRYPTION_NAME],
- space->name));
-
- OK(fields[TABLESPACES_ENCRYPTION_ENCRYPTION_SCHEME]->store(
- status.scheme));
- OK(fields[TABLESPACES_ENCRYPTION_KEYSERVER_REQUESTS]->store(
- status.keyserver_requests));
- OK(fields[TABLESPACES_ENCRYPTION_MIN_KEY_VERSION]->store(
- status.min_key_version));
- OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_VERSION]->store(
- status.current_key_version));
- OK(fields[TABLESPACES_ENCRYPTION_CURRENT_KEY_ID]->store(
- status.key_id));
- OK(fields[TABLESPACES_ENCRYPTION_ROTATING_OR_FLUSHING]->store(
- (status.rotating || status.flushing) ? 1 : 0));
-
- if (status.rotating) {
- fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->set_notnull();
- OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]->store(
- status.rotate_next_page_number));
- fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->set_notnull();
- OK(fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]->store(
- status.rotate_max_page_number));
- } else {
- fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_PAGE_NUMBER]
- ->set_null();
- fields[TABLESPACES_ENCRYPTION_KEY_ROTATION_MAX_PAGE_NUMBER]
- ->set_null();
- }
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table.
-Loop through each record in TABLESPACES_ENCRYPTION, and extract the column
-information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION table.
-@return 0 on success */
-static
-int
-i_s_tablespaces_encryption_fill_table(
-/*===========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
- bool found_space_0 = false;
-
- DBUG_ENTER("i_s_tablespaces_encryption_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, SUPER_ACL)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
-
- while (rec) {
- const char* err_msg;
- ulint space_id;
- const char* name;
- ulint flags;
-
- /* Extract necessary information from a SYS_TABLESPACES row */
- err_msg = dict_process_sys_tablespaces(
- heap, rec, &space_id, &name, &flags);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (space_id == 0) {
- found_space_0 = true;
- }
-
- fil_space_t* space = fil_space_acquire_silent(space_id);
-
- if (!err_msg && space) {
- i_s_dict_fill_tablespaces_encryption(
- thd, space, tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- if (space) {
- fil_space_release(space);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- if (found_space_0 == false) {
- /* space 0 does for what ever unknown reason not show up
- * in iteration above, add it manually */
-
- fil_space_t* space = fil_space_acquire_silent(0);
-
- i_s_dict_fill_tablespaces_encryption(
- thd, space, tables->table);
-
- fil_space_release(space);
- }
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_ENCRYPTION
-@return 0 on success */
-static
-int
-innodb_tablespaces_encryption_init(
-/*========================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_tablespaces_encryption_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_tablespaces_encryption_fields_info;
- schema->fill_table = i_s_tablespaces_encryption_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_encryption =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_TABLESPACES_ENCRYPTION"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, "Google Inc"),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB TABLESPACES_ENCRYPTION"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_BSD),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_tablespaces_encryption_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- INNODB_VERSION_STR, MariaDB_PLUGIN_MATURITY_STABLE
-};
-
-/** TABLESPACES_SCRUBBING ********************************************/
-/* Fields of the table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING */
-static ST_FIELD_INFO innodb_tablespaces_scrubbing_fields_info[] =
-{
-#define TABLESPACES_SCRUBBING_SPACE 0
- {STRUCT_FLD(field_name, "SPACE"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_SCRUBBING_NAME 1
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, MAX_FULL_NAME_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_SCRUBBING_COMPRESSED 2
- {STRUCT_FLD(field_name, "COMPRESSED"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED 3
- {STRUCT_FLD(field_name, "LAST_SCRUB_COMPLETED"),
- STRUCT_FLD(field_length, 0),
- STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED 4
- {STRUCT_FLD(field_name, "CURRENT_SCRUB_STARTED"),
- STRUCT_FLD(field_length, 0),
- STRUCT_FLD(field_type, MYSQL_TYPE_DATETIME),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS 5
- {STRUCT_FLD(field_name, "CURRENT_SCRUB_ACTIVE_THREADS"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED | MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER 6
- {STRUCT_FLD(field_name, "CURRENT_SCRUB_PAGE_NUMBER"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER 7
- {STRUCT_FLD(field_name, "CURRENT_SCRUB_MAX_PAGE_NUMBER"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/**********************************************************************//**
-Function to fill INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING
-with information collected by scanning SYS_TABLESPACES table and
-fil_space.
-@param[in] thd Thread handle
-@param[in] space Tablespace
-@param[in] table_to_fill I_S table
-@return 0 on success */
-static
-int
-i_s_dict_fill_tablespaces_scrubbing(
- THD* thd,
- fil_space_t* space,
- TABLE* table_to_fill)
-{
- Field** fields;
- struct fil_space_scrub_status_t status;
-
- DBUG_ENTER("i_s_dict_fill_tablespaces_scrubbing");
-
- fields = table_to_fill->field;
-
- fil_space_get_scrub_status(space, &status);
-
- OK(fields[TABLESPACES_SCRUBBING_SPACE]->store(space->id));
-
- OK(field_store_string(fields[TABLESPACES_SCRUBBING_NAME],
- space->name));
-
- OK(fields[TABLESPACES_SCRUBBING_COMPRESSED]->store(
- status.compressed ? 1 : 0));
-
- if (status.last_scrub_completed == 0) {
- fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED]->set_null();
- } else {
- fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED]
- ->set_notnull();
- OK(field_store_time_t(
- fields[TABLESPACES_SCRUBBING_LAST_SCRUB_COMPLETED],
- status.last_scrub_completed));
- }
-
- int field_numbers[] = {
- TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED,
- TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS,
- TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER,
- TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER };
-
- if (status.scrubbing) {
- for (uint i = 0; i < array_elements(field_numbers); i++) {
- fields[field_numbers[i]]->set_notnull();
- }
-
- OK(field_store_time_t(
- fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_STARTED],
- status.current_scrub_started));
- OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_ACTIVE_THREADS]
- ->store(status.current_scrub_active_threads));
- OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_PAGE_NUMBER]
- ->store(status.current_scrub_page_number));
- OK(fields[TABLESPACES_SCRUBBING_CURRENT_SCRUB_MAX_PAGE_NUMBER]
- ->store(status.current_scrub_max_page_number));
- } else {
- for (uint i = 0; i < array_elements(field_numbers); i++) {
- fields[field_numbers[i]]->set_null();
- }
- }
-
- OK(schema_table_store_record(thd, table_to_fill));
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table.
-Loop through each record in TABLESPACES_SCRUBBING, and extract the column
-information and fill the INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING table.
-@return 0 on success */
-static
-int
-i_s_tablespaces_scrubbing_fill_table(
-/*===========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- mem_heap_t* heap;
- mtr_t mtr;
- bool found_space_0 = false;
-
- DBUG_ENTER("i_s_tablespaces_scrubbing_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without SUPER_ACL privilege */
- if (check_global_access(thd, SUPER_ACL)) {
- DBUG_RETURN(0);
- }
-
- heap = mem_heap_create(1000);
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
-
- rec = dict_startscan_system(&pcur, &mtr, SYS_TABLESPACES);
-
- while (rec) {
- const char* err_msg;
- ulint space_id;
- const char* name;
- ulint flags;
-
- /* Extract necessary information from a SYS_TABLESPACES row */
- err_msg = dict_process_sys_tablespaces(
- heap, rec, &space_id, &name, &flags);
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
-
- if (space_id == 0) {
- found_space_0 = true;
- }
-
- fil_space_t* space = fil_space_acquire_silent(space_id);
-
- if (!err_msg && space) {
- i_s_dict_fill_tablespaces_scrubbing(
- thd, space, tables->table);
- } else {
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN,
- ER_CANT_FIND_SYSTEM_REC, "%s",
- err_msg);
- }
-
- if (space) {
- fil_space_release(space);
- }
-
- mem_heap_empty(heap);
-
- /* Get the next record */
- mutex_enter(&dict_sys->mutex);
- mtr_start(&mtr);
- rec = dict_getnext_system(&pcur, &mtr);
- }
-
- mtr_commit(&mtr);
- mutex_exit(&dict_sys->mutex);
- mem_heap_free(heap);
-
- if (found_space_0 == false) {
- /* space 0 does for what ever unknown reason not show up
- * in iteration above, add it manually */
- fil_space_t* space = fil_space_acquire_silent(0);
-
- i_s_dict_fill_tablespaces_scrubbing(
- thd, space, tables->table);
-
- fil_space_release(space);
- }
-
- DBUG_RETURN(0);
-}
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_TABLESPACES_SCRUBBING
-@return 0 on success */
-static
-int
-innodb_tablespaces_scrubbing_init(
-/*========================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_tablespaces_scrubbing_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_tablespaces_scrubbing_fields_info;
- schema->fill_table = i_s_tablespaces_scrubbing_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_maria_plugin i_s_innodb_tablespaces_scrubbing =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_TABLESPACES_SCRUBBING"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, "Google Inc"),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB TABLESPACES_SCRUBBING"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_BSD),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_tablespaces_scrubbing_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE)
-};
-
-/** INNODB_MUTEXES *********************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_MUTEXES */
-static ST_FIELD_INFO innodb_mutexes_fields_info[] =
-{
-#define MUTEXES_NAME 0
- {STRUCT_FLD(field_name, "NAME"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define MUTEXES_CREATE_FILE 1
- {STRUCT_FLD(field_name, "CREATE_FILE"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define MUTEXES_CREATE_LINE 2
- {STRUCT_FLD(field_name, "CREATE_LINE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-#define MUTEXES_OS_WAITS 3
- {STRUCT_FLD(field_name, "OS_WAITS"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.INNODB_MUTEXES table.
-Loop through each record in mutex and rw_lock lists, and extract the column
-information and fill the INFORMATION_SCHEMA.INNODB_MUTEXES table.
-@return 0 on success */
-static
-int
-i_s_innodb_mutexes_fill_table(
-/*==========================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- ib_mutex_t* mutex;
- rw_lock_t* lock;
- ulint block_mutex_oswait_count = 0;
- ulint block_lock_oswait_count = 0;
- ib_mutex_t* block_mutex = NULL;
- rw_lock_t* block_lock = NULL;
- Field** fields = tables->table->field;
-
- DBUG_ENTER("i_s_innodb_mutexes_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL)) {
- DBUG_RETURN(0);
- }
-
- mutex_enter(&mutex_list_mutex);
-
- for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
- mutex = UT_LIST_GET_NEXT(list, mutex)) {
- if (mutex->count_os_wait == 0) {
- continue;
- }
-
- if (buf_pool_is_block_mutex(mutex)) {
- block_mutex = mutex;
- block_mutex_oswait_count += mutex->count_os_wait;
- continue;
- }
-
- OK(field_store_string(fields[MUTEXES_NAME], mutex->cmutex_name));
- OK(field_store_string(fields[MUTEXES_CREATE_FILE], innobase_basename(mutex->cfile_name)));
- OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], mutex->cline));
- OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)mutex->count_os_wait));
- OK(schema_table_store_record(thd, tables->table));
- }
-
- if (block_mutex) {
- char buf1[IO_SIZE];
-
- my_snprintf(buf1, sizeof buf1, "combined %s",
- innobase_basename(block_mutex->cfile_name));
-
- OK(field_store_string(fields[MUTEXES_NAME], block_mutex->cmutex_name));
- OK(field_store_string(fields[MUTEXES_CREATE_FILE], buf1));
- OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], block_mutex->cline));
- OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)block_mutex_oswait_count));
- OK(schema_table_store_record(thd, tables->table));
- }
-
- mutex_exit(&mutex_list_mutex);
-
- mutex_enter(&rw_lock_list_mutex);
-
- for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL;
- lock = UT_LIST_GET_NEXT(list, lock)) {
- if (lock->count_os_wait == 0) {
- continue;
- }
-
- if (buf_pool_is_block_lock(lock)) {
- block_lock = lock;
- block_lock_oswait_count += lock->count_os_wait;
- continue;
- }
-
- OK(field_store_string(fields[MUTEXES_NAME], lock->lock_name));
- OK(field_store_string(fields[MUTEXES_CREATE_FILE], innobase_basename(lock->cfile_name)));
- OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], lock->cline));
- OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)lock->count_os_wait));
- OK(schema_table_store_record(thd, tables->table));
- }
-
- if (block_lock) {
- char buf1[IO_SIZE];
-
- my_snprintf(buf1, sizeof buf1, "combined %s",
- innobase_basename(block_lock->cfile_name));
-
- OK(field_store_string(fields[MUTEXES_NAME], block_lock->lock_name));
- OK(field_store_string(fields[MUTEXES_CREATE_FILE], buf1));
- OK(field_store_ulint(fields[MUTEXES_CREATE_LINE], block_lock->cline));
- OK(field_store_ulint(fields[MUTEXES_OS_WAITS], (longlong)block_lock_oswait_count));
- OK(schema_table_store_record(thd, tables->table));
- }
-
- mutex_exit(&rw_lock_list_mutex);
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_MUTEXES
-@return 0 on success */
-static
-int
-innodb_mutexes_init(
-/*================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_mutexes_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_mutexes_fields_info;
- schema->fill_table = i_s_innodb_mutexes_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_mutexes =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_MUTEXES"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_DATAFILES"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_mutexes_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-/** SYS_SEMAPHORE_WAITS ************************************************/
-/* Fields of the dynamic table INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS */
-static ST_FIELD_INFO innodb_sys_semaphore_waits_fields_info[] =
-{
- // SYS_SEMAPHORE_WAITS_THREAD_ID 0
- {STRUCT_FLD(field_name, "THREAD_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_OBJECT_NAME 1
- {STRUCT_FLD(field_name, "OBJECT_NAME"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_FILE 2
- {STRUCT_FLD(field_name, "FILE"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_LINE 3
- {STRUCT_FLD(field_name, "LINE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_WAIT_TIME 4
- {STRUCT_FLD(field_name, "WAIT_TIME"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_WAIT_OBJECT 5
- {STRUCT_FLD(field_name, "WAIT_OBJECT"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_WAIT_TYPE 6
- {STRUCT_FLD(field_name, "WAIT_TYPE"),
- STRUCT_FLD(field_length, 16),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID 7
- {STRUCT_FLD(field_name, "HOLDER_THREAD_ID"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_HOLDER_FILE 8
- {STRUCT_FLD(field_name, "HOLDER_FILE"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_HOLDER_LINE 9
- {STRUCT_FLD(field_name, "HOLDER_LINE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_CREATED_FILE 10
- {STRUCT_FLD(field_name, "CREATED_FILE"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_CREATED_LINE 11
- {STRUCT_FLD(field_name, "CREATED_LINE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_WRITER_THREAD 12
- {STRUCT_FLD(field_name, "WRITER_THREAD"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_RESERVATION_MODE 13
- {STRUCT_FLD(field_name, "RESERVATION_MODE"),
- STRUCT_FLD(field_length, 16),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_READERS 14
- {STRUCT_FLD(field_name, "READERS"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_WAITERS_FLAG 15
- {STRUCT_FLD(field_name, "WAITERS_FLAG"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_LOCK_WORD 16
- {STRUCT_FLD(field_name, "LOCK_WORD"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_LAST_READER_FILE 17
- {STRUCT_FLD(field_name, "LAST_READER_FILE"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_LAST_READER_LINE 18
- {STRUCT_FLD(field_name, "LAST_READER_LINE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 19
- {STRUCT_FLD(field_name, "LAST_WRITER_FILE"),
- STRUCT_FLD(field_length, OS_FILE_MAX_PATH),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_MAYBE_NULL),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 20
- {STRUCT_FLD(field_name, "LAST_WRITER_LINE"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- // SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 21
- {STRUCT_FLD(field_name, "OS_WAIT_COUNT"),
- STRUCT_FLD(field_length, MY_INT32_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS
-@return 0 on success */
-static
-int
-innodb_sys_semaphore_waits_init(
-/*============================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_sys_semaphore_waits_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_sys_semaphore_waits_fields_info;
- schema->fill_table = sync_arr_fill_sys_semphore_waits_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_sys_semaphore_waits =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "INNODB_SYS_SEMAPHORE_WAITS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, maria_plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "InnoDB SYS_SEMAPHORE_WAITS"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_sys_semaphore_waits_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-static ST_FIELD_INFO innodb_changed_page_bitmaps_fields_info[] =
-{
- {STRUCT_FLD(field_name, "dummy"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
- END_OF_ST_FIELD_INFO
-};
-
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.CHANGED_PAGE_BITMAPS
-@return 0 on success */
-static
-int
-fill_changed_page_bitmaps_table(
-/*============================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- Field** fields = tables->table->field;
- DBUG_ENTER("fill_changed_page_bitmaps");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL)) {
- DBUG_RETURN(0);
- }
- OK(field_store_ulint(fields[0], 0));
- OK(schema_table_store_record(thd, tables->table));
-
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Flush support for changed_page_bitmaps table.
-@return 0 on success */
-static
-int
-flush_changed_page_bitmaps()
-/*========================*/
-{
- DBUG_ENTER("flush_changed_page_bitmaps");
- if (srv_track_changed_pages) {
- os_event_reset(srv_checkpoint_completed_event);
- log_online_follow_redo_log();
- }
- DBUG_RETURN(0);
-}
-
-/*******************************************************************//**
-Bind the dynamic table INFORMATION_SCHEMA.CHANGED_PAGE_BITMAP
-@return 0 on success */
-static
-int
-innodb_changed_page_bitmaps_init(
-/*=============================*/
- void* p) /*!< in/out: table schema object */
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("innodb_changed_page_bitmaps_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = innodb_changed_page_bitmaps_fields_info;
- schema->fill_table = fill_changed_page_bitmaps_table;
- schema->reset_table= flush_changed_page_bitmaps;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_innodb_changed_page_bitmaps =
-{
- /* the plugin type (a MYSQL_XXX_PLUGIN value) */
- /* int */
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
-
- /* pointer to type-specific plugin descriptor */
- /* void* */
- STRUCT_FLD(info, &i_s_info),
-
- /* plugin name */
- /* const char* */
- STRUCT_FLD(name, "CHANGED_PAGE_BITMAPS"),
-
- /* plugin author (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(author, maria_plugin_author),
-
- /* general descriptive text (for SHOW PLUGINS) */
- /* const char* */
- STRUCT_FLD(descr, "XtraDB dummy changed_page_bitmaps table"),
-
- /* the plugin license (PLUGIN_LICENSE_XXX) */
- /* int */
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
-
- /* the function to invoke when plugin is loaded */
- /* int (*)(void*); */
- STRUCT_FLD(init, innodb_changed_page_bitmaps_init),
-
- /* the function to invoke when plugin is unloaded */
- /* int (*)(void*); */
- STRUCT_FLD(deinit, i_s_common_deinit),
-
- /* plugin version (for SHOW PLUGINS) */
- /* unsigned int */
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
-
- /* struct st_mysql_show_var* */
- STRUCT_FLD(status_vars, NULL),
-
- /* struct st_mysql_sys_var** */
- STRUCT_FLD(system_vars, NULL),
-
- /* Maria extension */
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
diff --git a/storage/xtradb/handler/i_s.h b/storage/xtradb/handler/i_s.h
deleted file mode 100644
index 4bb3ea33462..00000000000
--- a/storage/xtradb/handler/i_s.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyrigth (c) 2014, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file handler/i_s.h
-InnoDB INFORMATION SCHEMA tables interface to MySQL.
-
-Created July 18, 2007 Vasil Dimov
-Modified Dec 29, 2014 Jan Lindström
-*******************************************************/
-
-#ifndef i_s_h
-#define i_s_h
-
-const char plugin_author[] = "Oracle Corporation";
-const char maria_plugin_author[] = "MariaDB Corporation";
-
-#define st_mysql_plugin st_maria_plugin
-
-extern struct st_maria_plugin i_s_innodb_trx;
-extern struct st_mysql_plugin i_s_innodb_trx;
-extern struct st_mysql_plugin i_s_innodb_locks;
-extern struct st_mysql_plugin i_s_innodb_lock_waits;
-extern struct st_mysql_plugin i_s_innodb_cmp;
-extern struct st_mysql_plugin i_s_innodb_cmp_reset;
-extern struct st_mysql_plugin i_s_innodb_cmp_per_index;
-extern struct st_mysql_plugin i_s_innodb_cmp_per_index_reset;
-extern struct st_mysql_plugin i_s_innodb_cmpmem;
-extern struct st_mysql_plugin i_s_innodb_cmpmem_reset;
-extern struct st_mysql_plugin i_s_innodb_metrics;
-extern struct st_mysql_plugin i_s_innodb_ft_default_stopword;
-extern struct st_mysql_plugin i_s_innodb_ft_deleted;
-extern struct st_mysql_plugin i_s_innodb_ft_being_deleted;
-extern struct st_mysql_plugin i_s_innodb_ft_index_cache;
-extern struct st_mysql_plugin i_s_innodb_ft_index_table;
-extern struct st_mysql_plugin i_s_innodb_ft_config;
-extern struct st_mysql_plugin i_s_innodb_buffer_page;
-extern struct st_mysql_plugin i_s_innodb_buffer_page_lru;
-extern struct st_mysql_plugin i_s_innodb_buffer_stats;
-extern struct st_mysql_plugin i_s_innodb_sys_tables;
-extern struct st_mysql_plugin i_s_innodb_sys_tablestats;
-extern struct st_mysql_plugin i_s_innodb_sys_indexes;
-extern struct st_mysql_plugin i_s_innodb_sys_columns;
-extern struct st_mysql_plugin i_s_innodb_sys_fields;
-extern struct st_mysql_plugin i_s_innodb_sys_foreign;
-extern struct st_mysql_plugin i_s_innodb_sys_foreign_cols;
-extern struct st_mysql_plugin i_s_innodb_sys_tablespaces;
-extern struct st_mysql_plugin i_s_innodb_sys_datafiles;
-extern struct st_mysql_plugin i_s_innodb_changed_pages;
-extern struct st_mysql_plugin i_s_innodb_mutexes;
-extern struct st_maria_plugin i_s_innodb_tablespaces_encryption;
-extern struct st_maria_plugin i_s_innodb_tablespaces_scrubbing;
-extern struct st_mysql_plugin i_s_innodb_sys_semaphore_waits;
-extern struct st_mysql_plugin i_s_innodb_changed_page_bitmaps;
-
-/** maximum number of buffer page info we would cache. */
-#define MAX_BUF_INFO_CACHED 10000
-
-#define OK(expr) \
- if ((expr) != 0) { \
- DBUG_RETURN(1); \
- }
-
-#define BREAK_IF(expr) if ((expr)) break
-
-#define RETURN_IF_INNODB_NOT_STARTED(plugin_name) \
-do { \
- if (!srv_was_started) { \
- push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, \
- ER_CANT_FIND_SYSTEM_REC, \
- "InnoDB: SELECTing from " \
- "INFORMATION_SCHEMA.%s but " \
- "the InnoDB storage engine " \
- "is not installed", plugin_name); \
- DBUG_RETURN(0); \
- } \
-} while (0)
-
-#if !defined __STRICT_ANSI__ && defined __GNUC__ && (__GNUC__) > 2 && !defined __INTEL_COMPILER && !defined __clang__
-#ifdef HAVE_C99_INITIALIZERS
-#define STRUCT_FLD(name, value) .name = value
-#else
-#define STRUCT_FLD(name, value) name: value
-#endif /* HAVE_C99_INITIALIZERS */
-#else
-#define STRUCT_FLD(name, value) value
-#endif
-
-/* Don't use a static const variable here, as some C++ compilers (notably
-HPUX aCC: HP ANSI C++ B3910B A.03.65) can't handle it. */
-#define END_OF_ST_FIELD_INFO \
- {STRUCT_FLD(field_name, NULL), \
- STRUCT_FLD(field_length, 0), \
- STRUCT_FLD(field_type, MYSQL_TYPE_NULL), \
- STRUCT_FLD(value, 0), \
- STRUCT_FLD(field_flags, 0), \
- STRUCT_FLD(old_name, ""), \
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)}
-
-/** Fields on INFORMATION_SCHEMA.SYS_SEMAMPHORE_WAITS table */
-#define SYS_SEMAPHORE_WAITS_THREAD_ID 0
-#define SYS_SEMAPHORE_WAITS_OBJECT_NAME 1
-#define SYS_SEMAPHORE_WAITS_FILE 2
-#define SYS_SEMAPHORE_WAITS_LINE 3
-#define SYS_SEMAPHORE_WAITS_WAIT_TIME 4
-#define SYS_SEMAPHORE_WAITS_WAIT_OBJECT 5
-#define SYS_SEMAPHORE_WAITS_WAIT_TYPE 6
-#define SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID 7
-#define SYS_SEMAPHORE_WAITS_HOLDER_FILE 8
-#define SYS_SEMAPHORE_WAITS_HOLDER_LINE 9
-#define SYS_SEMAPHORE_WAITS_CREATED_FILE 10
-#define SYS_SEMAPHORE_WAITS_CREATED_LINE 11
-#define SYS_SEMAPHORE_WAITS_WRITER_THREAD 12
-#define SYS_SEMAPHORE_WAITS_RESERVATION_MODE 13
-#define SYS_SEMAPHORE_WAITS_READERS 14
-#define SYS_SEMAPHORE_WAITS_WAITERS_FLAG 15
-#define SYS_SEMAPHORE_WAITS_LOCK_WORD 16
-#define SYS_SEMAPHORE_WAITS_LAST_READER_FILE 17
-#define SYS_SEMAPHORE_WAITS_LAST_READER_LINE 18
-#define SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE 19
-#define SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE 20
-#define SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT 21
-
-/*******************************************************************//**
-Auxiliary function to store ulint value in MYSQL_TYPE_LONGLONG field.
-If the value is ULINT_UNDEFINED then the field it set to NULL.
-@return 0 on success */
-int
-field_store_ulint(
-/*==============*/
- Field* field, /*!< in/out: target field for storage */
- ulint n); /*!< in: value to store */
-
-/*******************************************************************//**
-Auxiliary function to store char* value in MYSQL_TYPE_STRING field.
-@return 0 on success */
-int
-field_store_string(
-/*===============*/
- Field* field, /*!< in/out: target field for storage */
- const char* str); /*!< in: NUL-terminated utf-8 string,
- or NULL */
-#endif /* i_s_h */
diff --git a/storage/xtradb/handler/xtradb_i_s.cc b/storage/xtradb/handler/xtradb_i_s.cc
deleted file mode 100644
index eaf7da733bf..00000000000
--- a/storage/xtradb/handler/xtradb_i_s.cc
+++ /dev/null
@@ -1,544 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2010-2012, Percona Inc. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-#include "univ.i"
-#include <mysqld_error.h>
-#include <sql_acl.h> // PROCESS_ACL
-
-#include <m_ctype.h>
-#include <hash.h>
-#include <myisampack.h>
-#include <mysys_err.h>
-#include <my_sys.h>
-#include "i_s.h"
-#include <sql_plugin.h>
-#include <innodb_priv.h>
-
-#include <read0i_s.h>
-#include <trx0i_s.h>
-#include "srv0start.h" /* for srv_was_started */
-#include <btr0pcur.h> /* btr_pcur_t */
-#include <btr0sea.h> /* btr_search_sys */
-#include <log0recv.h> /* recv_sys */
-#include <fil0fil.h>
-
-/* for XTRADB_RSEG table */
-#include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */
-#include "trx0rseg.h" /* for trx_rseg_struct */
-#include "trx0sys.h" /* for trx_sys */
-
-#define PLUGIN_AUTHOR "Percona Inc."
-
-static int field_store_blob(Field*, const char*, uint) __attribute__((unused));
-/** Auxiliary function to store (char*, len) value in MYSQL_TYPE_BLOB
-field.
-@return 0 on success */
-static
-int
-field_store_blob(
- Field* field, /*!< in/out: target field for storage */
- const char* data, /*!< in: pointer to data, or NULL */
- uint data_len) /*!< in: data length */
-{
- int ret;
-
- if (data != NULL) {
- ret = field->store(data, data_len, system_charset_info);
- field->set_notnull();
- } else {
- ret = 0; /* success */
- field->set_null();
- }
-
- return(ret);
-}
-
-static
-int
-i_s_common_deinit(
-/*==============*/
- void* p) /*!< in/out: table schema object */
-{
- DBUG_ENTER("i_s_common_deinit");
-
- /* Do nothing */
-
- DBUG_RETURN(0);
-}
-
-static ST_FIELD_INFO xtradb_read_view_fields_info[] =
-{
-#define READ_VIEW_UNDO_NUMBER 0
- {STRUCT_FLD(field_name, "READ_VIEW_UNDO_NUMBER"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define READ_VIEW_LOW_LIMIT_NUMBER 1
- {STRUCT_FLD(field_name, "READ_VIEW_LOW_LIMIT_TRX_NUMBER"),
- STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define READ_VIEW_UPPER_LIMIT_ID 2
- {STRUCT_FLD(field_name, "READ_VIEW_UPPER_LIMIT_TRX_ID"),
- STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define READ_VIEW_LOW_LIMIT_ID 3
- {STRUCT_FLD(field_name, "READ_VIEW_LOW_LIMIT_TRX_ID"),
-
- STRUCT_FLD(field_length, TRX_ID_MAX_LEN + 1),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-static int xtradb_read_view_fill_table(THD* thd, TABLE_LIST* tables, Item*)
-{
- const char* table_name;
- Field** fields;
- TABLE* table;
- char trx_id[TRX_ID_MAX_LEN + 1];
-
-
- DBUG_ENTER("xtradb_read_view_fill_table");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL)) {
-
- DBUG_RETURN(0);
- }
-
- table_name = tables->schema_table_name;
- table = tables->table;
- fields = table->field;
-
- RETURN_IF_INNODB_NOT_STARTED(table_name);
-
- i_s_xtradb_read_view_t read_view;
-
- if (read_fill_i_s_xtradb_read_view(&read_view) == NULL)
- DBUG_RETURN(0);
-
- OK(field_store_ulint(fields[READ_VIEW_UNDO_NUMBER], read_view.undo_no));
-
- ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, read_view.low_limit_no);
- OK(field_store_string(fields[READ_VIEW_LOW_LIMIT_NUMBER], trx_id));
-
- ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, read_view.up_limit_id);
- OK(field_store_string(fields[READ_VIEW_UPPER_LIMIT_ID], trx_id));
-
- ut_snprintf(trx_id, sizeof(trx_id), TRX_ID_FMT, read_view.low_limit_id);
- OK(field_store_string(fields[READ_VIEW_LOW_LIMIT_ID], trx_id));
-
- OK(schema_table_store_record(thd, table));
-
- DBUG_RETURN(0);
-}
-
-
-static int xtradb_read_view_init(void* p)
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("xtradb_read_view_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = xtradb_read_view_fields_info;
- schema->fill_table = xtradb_read_view_fill_table;
-
- DBUG_RETURN(0);
-}
-
-static struct st_mysql_information_schema i_s_info =
-{
- MYSQL_INFORMATION_SCHEMA_INTERFACE_VERSION
-};
-
-UNIV_INTERN struct st_mysql_plugin i_s_xtradb_read_view =
-{
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
- STRUCT_FLD(info, &i_s_info),
- STRUCT_FLD(name, "XTRADB_READ_VIEW"),
- STRUCT_FLD(author, PLUGIN_AUTHOR),
- STRUCT_FLD(descr, "InnoDB Read View information"),
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
- STRUCT_FLD(init, xtradb_read_view_init),
- STRUCT_FLD(deinit, i_s_common_deinit),
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
- STRUCT_FLD(status_vars, NULL),
- STRUCT_FLD(system_vars, NULL),
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-static ST_FIELD_INFO xtradb_internal_hash_tables_fields_info[] =
-{
-#define INT_HASH_TABLES_NAME 0
- {STRUCT_FLD(field_name, "INTERNAL_HASH_TABLE_NAME"),
- STRUCT_FLD(field_length, 100),
- STRUCT_FLD(field_type, MYSQL_TYPE_STRING),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, 0),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define INT_HASH_TABLES_TOTAL 1
- {STRUCT_FLD(field_name, "TOTAL_MEMORY"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define INT_HASH_TABLES_CONSTANT 2
- {STRUCT_FLD(field_name, "CONSTANT_MEMORY"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
-#define INT_HASH_TABLES_VARIABLE 3
- {STRUCT_FLD(field_name, "VARIABLE_MEMORY"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-static int xtradb_internal_hash_tables_fill_table(THD* thd, TABLE_LIST* tables, Item*)
-{
- const char* table_name;
- Field** fields;
- TABLE* table;
- ulong btr_search_sys_constant;
- ulong btr_search_sys_variable;
-
- DBUG_ENTER("xtradb_internal_hash_tables_fill_table");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL)) {
-
- DBUG_RETURN(0);
- }
-
- table_name = tables->schema_table_name;
- table = tables->table;
- fields = table->field;
-
- RETURN_IF_INNODB_NOT_STARTED(table_name);
-
- /* Calculate AHI constant and variable memory allocations */
-
- btr_search_sys_constant = 0;
- btr_search_sys_variable = 0;
-
- ut_ad(btr_search_sys->hash_tables);
-
- for (ulint i = 0; i < btr_search_index_num; i++) {
- hash_table_t* ht = btr_search_sys->hash_tables[i];
-
- ut_ad(ht);
- ut_ad(ht->heap);
-
- /* Multiple mutexes/heaps are currently never used for adaptive
- hash index tables. */
- ut_ad(!ht->n_sync_obj);
- ut_ad(!ht->heaps);
-
- btr_search_sys_variable += mem_heap_get_size(ht->heap);
- btr_search_sys_constant += ht->n_cells * sizeof(hash_cell_t);
- }
-
- OK(field_store_string(fields[INT_HASH_TABLES_NAME],
- "Adaptive hash index"));
- OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL],
- btr_search_sys_variable + btr_search_sys_constant));
- OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT],
- btr_search_sys_constant));
- OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE],
- btr_search_sys_variable));
- OK(schema_table_store_record(thd, table));
-
- {
- OK(field_store_string(fields[INT_HASH_TABLES_NAME],
- "Page hash (buffer pool 0 only)"));
- OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL],
- (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t))));
- OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT],
- (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t))));
- OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE], 0));
- OK(schema_table_store_record(thd, table));
-
- }
-
- if (dict_sys)
- {
- OK(field_store_string(fields[INT_HASH_TABLES_NAME],
- "Dictionary Cache"));
- OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL],
- ((dict_sys->table_hash->n_cells
- + dict_sys->table_id_hash->n_cells
- ) * sizeof(hash_cell_t)
- + dict_sys->size)));
- OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT],
- ((dict_sys->table_hash->n_cells
- + dict_sys->table_id_hash->n_cells
- ) * sizeof(hash_cell_t))));
- OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE],
- dict_sys->size));
- OK(schema_table_store_record(thd, table));
- }
-
- {
- OK(field_store_string(fields[INT_HASH_TABLES_NAME],
- "File system"));
- OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL],
- (ulong) (fil_system_hash_cells()
- * sizeof(hash_cell_t)
- + fil_system_hash_nodes())));
- OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT],
- (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t))));
- OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE],
- (ulong) fil_system_hash_nodes()));
- OK(schema_table_store_record(thd, table));
-
- }
-
- {
- ulint lock_sys_constant, lock_sys_variable;
-
- trx_i_s_get_lock_sys_memory_usage(&lock_sys_constant,
- &lock_sys_variable);
-
- OK(field_store_string(fields[INT_HASH_TABLES_NAME], "Lock System"));
- OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL],
- lock_sys_constant + lock_sys_variable));
- OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT],
- lock_sys_constant));
- OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE],
- lock_sys_variable));
- OK(schema_table_store_record(thd, table));
- }
-
- if (recv_sys)
- {
- ulint recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
- ? mem_heap_get_size(recv_sys->heap) : 0);
-
- OK(field_store_string(fields[INT_HASH_TABLES_NAME], "Recovery System"));
- OK(field_store_ulint(fields[INT_HASH_TABLES_TOTAL],
- ((recv_sys->addr_hash) ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0) + recv_sys_subtotal));
- OK(field_store_ulint(fields[INT_HASH_TABLES_CONSTANT],
- ((recv_sys->addr_hash) ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)));
- OK(field_store_ulint(fields[INT_HASH_TABLES_VARIABLE],
- recv_sys_subtotal));
- OK(schema_table_store_record(thd, table));
- }
-
- DBUG_RETURN(0);
-}
-
-static int xtradb_internal_hash_tables_init(void* p)
-{
- ST_SCHEMA_TABLE* schema;
-
- DBUG_ENTER("xtradb_internal_hash_tables_init");
-
- schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = xtradb_internal_hash_tables_fields_info;
- schema->fill_table = xtradb_internal_hash_tables_fill_table;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_xtradb_internal_hash_tables =
-{
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
- STRUCT_FLD(info, &i_s_info),
- STRUCT_FLD(name, "XTRADB_INTERNAL_HASH_TABLES"),
- STRUCT_FLD(author, PLUGIN_AUTHOR),
- STRUCT_FLD(descr, "InnoDB internal hash tables information"),
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
- STRUCT_FLD(init, xtradb_internal_hash_tables_init),
- STRUCT_FLD(deinit, i_s_common_deinit),
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
- STRUCT_FLD(status_vars, NULL),
- STRUCT_FLD(system_vars, NULL),
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
-
-
-/***********************************************************************
-*/
-static ST_FIELD_INFO i_s_xtradb_rseg_fields_info[] =
-{
- {STRUCT_FLD(field_name, "rseg_id"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "space_id"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "zip_size"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "page_no"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "max_size"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- {STRUCT_FLD(field_name, "curr_size"),
- STRUCT_FLD(field_length, MY_INT64_NUM_DECIMAL_DIGITS),
- STRUCT_FLD(field_type, MYSQL_TYPE_LONGLONG),
- STRUCT_FLD(value, 0),
- STRUCT_FLD(field_flags, MY_I_S_UNSIGNED),
- STRUCT_FLD(old_name, ""),
- STRUCT_FLD(open_method, SKIP_OPEN_TABLE)},
-
- END_OF_ST_FIELD_INFO
-};
-
-static
-int
-i_s_xtradb_rseg_fill(
-/*=================*/
- THD* thd, /* in: thread */
- TABLE_LIST* tables, /* in/out: tables to fill */
- Item* ) /* in: condition (ignored) */
-{
- TABLE* table = (TABLE *) tables->table;
- int status = 0;
- trx_rseg_t* rseg;
-
- DBUG_ENTER("i_s_xtradb_rseg_fill");
-
- /* deny access to non-superusers */
- if (check_global_access(thd, PROCESS_ACL)) {
-
- DBUG_RETURN(0);
- }
-
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- for(int i=0; i < TRX_SYS_N_RSEGS; i++)
- {
- rseg = trx_sys->rseg_array[i];
- if (!rseg)
- continue;
-
- table->field[0]->store(rseg->id);
- table->field[1]->store(rseg->space);
- table->field[2]->store(rseg->zip_size);
- table->field[3]->store(rseg->page_no);
- table->field[4]->store(rseg->max_size);
- table->field[5]->store(rseg->curr_size);
-
- if (schema_table_store_record(thd, table)) {
- status = 1;
- break;
- }
- }
-
- DBUG_RETURN(status);
-}
-
-static
-int
-i_s_xtradb_rseg_init(
-/*=================*/
- /* out: 0 on success */
- void* p) /* in/out: table schema object */
-{
- DBUG_ENTER("i_s_xtradb_rseg_init");
- ST_SCHEMA_TABLE* schema = (ST_SCHEMA_TABLE*) p;
-
- schema->fields_info = i_s_xtradb_rseg_fields_info;
- schema->fill_table = i_s_xtradb_rseg_fill;
-
- DBUG_RETURN(0);
-}
-
-UNIV_INTERN struct st_mysql_plugin i_s_xtradb_rseg =
-{
- STRUCT_FLD(type, MYSQL_INFORMATION_SCHEMA_PLUGIN),
- STRUCT_FLD(info, &i_s_info),
- STRUCT_FLD(name, "XTRADB_RSEG"),
- STRUCT_FLD(author, PLUGIN_AUTHOR),
- STRUCT_FLD(descr, "InnoDB rollback segment information"),
- STRUCT_FLD(license, PLUGIN_LICENSE_GPL),
- STRUCT_FLD(init, i_s_xtradb_rseg_init),
- STRUCT_FLD(deinit, i_s_common_deinit),
- STRUCT_FLD(version, INNODB_VERSION_SHORT),
- STRUCT_FLD(status_vars, NULL),
- STRUCT_FLD(system_vars, NULL),
- STRUCT_FLD(version_info, INNODB_VERSION_STR),
- STRUCT_FLD(maturity, MariaDB_PLUGIN_MATURITY_STABLE),
-};
diff --git a/storage/xtradb/handler/xtradb_i_s.h b/storage/xtradb/handler/xtradb_i_s.h
deleted file mode 100644
index 994bc11c1b8..00000000000
--- a/storage/xtradb/handler/xtradb_i_s.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2010-2012, Percona Inc. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-#ifndef XTRADB_I_S_H
-#define XTRADB_I_S_H
-
-extern struct st_mysql_plugin i_s_xtradb_read_view;
-extern struct st_mysql_plugin i_s_xtradb_internal_hash_tables;
-extern struct st_mysql_plugin i_s_xtradb_rseg;
-
-#endif /* XTRADB_I_S_H */
diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc
deleted file mode 100644
index 0445bb557e1..00000000000
--- a/storage/xtradb/ibuf/ibuf0ibuf.cc
+++ /dev/null
@@ -1,5276 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file ibuf/ibuf0ibuf.cc
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "ibuf0ibuf.h"
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-UNIV_INTERN my_bool srv_ibuf_disable_background_merge;
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-/** Number of bits describing a single page */
-#define IBUF_BITS_PER_PAGE 4
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE must be an even number!"
-#endif
-/** The start address for an insert buffer bitmap page bitmap */
-#define IBUF_BITMAP PAGE_DATA
-
-#ifdef UNIV_NONINL
-#include "ibuf0ibuf.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-
-#include "buf0buf.h"
-#include "buf0rea.h"
-#include "fsp0fsp.h"
-#include "trx0sys.h"
-#include "fil0fil.h"
-#include "rem0rec.h"
-#include "btr0cur.h"
-#include "btr0pcur.h"
-#include "btr0btr.h"
-#include "btr0sea.h"
-#include "row0upd.h"
-#include "sync0sync.h"
-#include "dict0boot.h"
-#include "fut0lst.h"
-#include "lock0lock.h"
-#include "log0recv.h"
-#include "que0que.h"
-#include "srv0start.h" /* srv_shutdown_state */
-#include "ha_prototypes.h"
-#include "rem0cmp.h"
-
-/* STRUCTURE OF AN INSERT BUFFER RECORD
-
-In versions < 4.1.x:
-
-1. The first field is the page number.
-2. The second field is an array which stores type info for each subsequent
- field. We store the information which affects the ordering of records, and
- also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
- is 10 bytes.
-3. Next we have the fields of the actual index record.
-
-In versions >= 4.1.x:
-
-Note that contary to what we planned in the 1990's, there will only be one
-insert buffer tree, and that is in the system tablespace of InnoDB.
-
-1. The first field is the space id.
-2. The second field is a one-byte marker (0) which differentiates records from
- the < 4.1.x storage format.
-3. The third field is the page number.
-4. The fourth field contains the type info, where we have also added 2 bytes to
- store the charset. In the compressed table format of 5.0.x we must add more
- information here so that we can build a dummy 'index' struct which 5.0.x
- can use in the binary search on the index page in the ibuf merge phase.
-5. The rest of the fields contain the fields of the actual index record.
-
-In versions >= 5.0.3:
-
-The first byte of the fourth field is an additional marker (0) if the record
-is in the compact format. The presence of this marker can be detected by
-looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
-
-The high-order bit of the character set field in the type info is the
-"nullable" flag for the field.
-
-In versions >= 5.5:
-
-The optional marker byte at the start of the fourth field is replaced by
-mandatory 3 fields, totaling 4 bytes:
-
- 1. 2 bytes: Counter field, used to sort records within a (space id, page
- no) in the order they were added. This is needed so that for example the
- sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled
- correctly.
-
- 2. 1 byte: Operation type (see ibuf_op_t).
-
- 3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT.
-
-To ensure older records, which do not have counters to enforce correct
-sorting, are merged before any new records, ibuf_insert checks if we're
-trying to insert to a position that contains old-style records, and if so,
-refuses the insert. Thus, ibuf pages are gradually converted to the new
-format as their corresponding buffer pool pages are read into memory.
-*/
-
-
-/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
-
-If an OS thread performs any operation that brings in disk pages from
-non-system tablespaces into the buffer pool, or creates such a page there,
-then the operation may have as a side effect an insert buffer index tree
-compression. Thus, the tree latch of the insert buffer tree may be acquired
-in the x-mode, and also the file space latch of the system tablespace may
-be acquired in the x-mode.
-
-Also, an insert to an index in a non-system tablespace can have the same
-effect. How do we know this cannot lead to a deadlock of OS threads? There
-is a problem with the i\o-handler threads: they break the latching order
-because they own x-latches to pages which are on a lower level than the
-insert buffer tree latch, its page latches, and the tablespace latch an
-insert buffer operation can reserve.
-
-The solution is the following: Let all the tree and page latches connected
-with the insert buffer be later in the latching order than the fsp latch and
-fsp page latches.
-
-Insert buffer pages must be such that the insert buffer is never invoked
-when these pages are accessed as this would result in a recursion violating
-the latching order. We let a special i/o-handler thread take care of i/o to
-the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
-pages and the first inode page, which contains the inode of the ibuf tree: let
-us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
-access both non-ibuf and ibuf pages.
-
-Then an i/o-handler for the insert buffer never needs to access recursively the
-insert buffer tree and thus obeys the latching order. On the other hand, other
-i/o-handlers for other tablespaces may require access to the insert buffer,
-but because all kinds of latches they need to access there are later in the
-latching order, no violation of the latching order occurs in this case,
-either.
-
-A problem is how to grow and contract an insert buffer tree. As it is later
-in the latching order than the fsp management, we have to reserve the fsp
-latch first, before adding or removing pages from the insert buffer tree.
-We let the insert buffer tree have its own file space management: a free
-list of pages linked to the tree root. To prevent recursive using of the
-insert buffer when adding pages to the tree, we must first load these pages
-to memory, obtaining a latch on them, and only after that add them to the
-free list of the insert buffer tree. More difficult is removing of pages
-from the free list. If there is an excess of pages in the free list of the
-ibuf tree, they might be needed if some thread reserves the fsp latch,
-intending to allocate more file space. So we do the following: if a thread
-reserves the fsp latch, we check the writer count field of the latch. If
-this field has value 1, it means that the thread did not own the latch
-before entering the fsp system, and the mtr of the thread contains no
-modifications to the fsp pages. Now we are free to reserve the ibuf latch,
-and check if there is an excess of pages in the free list. We can then, in a
-separate mini-transaction, take them out of the free list and free them to
-the fsp system.
-
-To avoid deadlocks in the ibuf system, we divide file pages into three levels:
-
-(1) non-ibuf pages,
-(2) ibuf tree pages and the pages in the ibuf tree free list, and
-(3) ibuf bitmap pages.
-
-No OS thread is allowed to access higher level pages if it has latches to
-lower level pages; even if the thread owns a B-tree latch it must not access
-the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
-is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
-exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
-level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
-it uses synchronous aio, it can access any pages, as long as it obeys the
-access order rules. */
-
-/** Table name for the insert buffer. */
-#define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
-
-/** Operations that can currently be buffered. */
-UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL;
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/** Flag to control insert buffer debugging. */
-UNIV_INTERN uint ibuf_debug;
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-/** The insert buffer control structure */
-UNIV_INTERN ibuf_t* ibuf = NULL;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
-UNIV_INTERN mysql_pfs_key_t ibuf_mutex_key;
-UNIV_INTERN mysql_pfs_key_t ibuf_bitmap_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/** Number of tablespaces in the ibuf_counts array */
-#define IBUF_COUNT_N_SPACES 4
-/** Number of pages within each tablespace in the ibuf_counts array */
-#define IBUF_COUNT_N_PAGES 130000
-
-/** Buffered entry counts for file pages, used in debugging */
-static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
-
-/******************************************************************//**
-Checks that the indexes to ibuf_counts[][] are within limits. */
-UNIV_INLINE
-void
-ibuf_count_check(
-/*=============*/
- ulint space_id, /*!< in: space identifier */
- ulint page_no) /*!< in: page number */
-{
- if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
- return;
- }
-
- fprintf(stderr,
- "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
- "InnoDB: and breaks crash recovery.\n"
- "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
- "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
- (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
- (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
- ut_error;
-}
-#endif
-
-/** @name Offsets to the per-page bits in the insert buffer bitmap */
-/* @{ */
-#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the
- amount of free space */
-#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered
- changes for the page */
-#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of
- the ibuf tree, excluding the
- root page, or is in the free
- list of the ibuf */
-/* @} */
-
-#define IBUF_REC_FIELD_SPACE 0 /*!< in the pre-4.1 format,
- the page number. later, the space_id */
-#define IBUF_REC_FIELD_MARKER 1 /*!< starting with 4.1, a marker
- consisting of 1 byte that is 0 */
-#define IBUF_REC_FIELD_PAGE 2 /*!< starting with 4.1, the
- page number */
-#define IBUF_REC_FIELD_METADATA 3 /* the metadata field */
-#define IBUF_REC_FIELD_USER 4 /* first user field */
-
-/* Various constants for checking the type of an ibuf record and extracting
-data from it. For details, see the description of the record format at the
-top of this file. */
-
-/** @name Format of the IBUF_REC_FIELD_METADATA of an insert buffer record
-The fourth column in the MySQL 5.5 format contains an operation
-type, counter, and some flags. */
-/* @{ */
-#define IBUF_REC_INFO_SIZE 4 /*!< Combined size of info fields at
- the beginning of the fourth field */
-#if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-# error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
-/* Offsets for the fields at the beginning of the fourth field */
-#define IBUF_REC_OFFSET_COUNTER 0 /*!< Operation counter */
-#define IBUF_REC_OFFSET_TYPE 2 /*!< Type of operation */
-#define IBUF_REC_OFFSET_FLAGS 3 /*!< Additional flags */
-
-/* Record flag masks */
-#define IBUF_REC_COMPACT 0x1 /*!< Set in
- IBUF_REC_OFFSET_FLAGS if the
- user index is in COMPACT
- format or later */
-
-
-/** The mutex used to block pessimistic inserts to ibuf trees */
-static ib_mutex_t ibuf_pessimistic_insert_mutex;
-
-/** The mutex protecting the insert buffer structs */
-static ib_mutex_t ibuf_mutex;
-
-/** The mutex protecting the insert buffer bitmaps */
-static ib_mutex_t ibuf_bitmap_mutex;
-
-/** The area in pages from which contract looks for page numbers for merge */
-#define IBUF_MERGE_AREA 8UL
-
-/** Inside the merge area, pages which have at most 1 per this number less
-buffered entries compared to maximum volume that can buffered for a single
-page are merged along with the page whose buffer became full */
-#define IBUF_MERGE_THRESHOLD 4
-
-/** In ibuf_contract at most this number of pages is read to memory in one
-batch, in order to merge the entries for them in the insert buffer */
-#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
-
-/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
-many pages, we start to contract it in connection to inserts there, using
-non-synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
-
-/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
-many pages, we start to contract it in connection to inserts there, using
-synchronous contract */
-#define IBUF_CONTRACT_ON_INSERT_SYNC 5
-
-/** If the combined size of the ibuf trees exceeds ibuf->max_size by
-this many pages, we start to contract it synchronous contract, but do
-not insert */
-#define IBUF_CONTRACT_DO_NOT_INSERT 10
-
-/* TODO: how to cope with drop table if there are records in the insert
-buffer for the indexes of the table? Is there actually any problem,
-because ibuf merge is done to a page when it is read in, and it is
-still physically like the index page even if the index would have been
-dropped! So, there seems to be no problem. */
-
-/******************************************************************//**
-Sets the flag in the current mini-transaction record indicating we're
-inside an insert buffer routine. */
-UNIV_INLINE
-void
-ibuf_enter(
-/*=======*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(!mtr->inside_ibuf);
- mtr->inside_ibuf = TRUE;
-}
-
-/******************************************************************//**
-Sets the flag in the current mini-transaction record indicating we're
-exiting an insert buffer routine. */
-UNIV_INLINE
-void
-ibuf_exit(
-/*======*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(mtr->inside_ibuf);
- mtr->inside_ibuf = FALSE;
-}
-
-/**************************************************************//**
-Commits an insert buffer mini-transaction and sets the persistent
-cursor latch mode to BTR_NO_LATCHES, that is, detaches the cursor. */
-UNIV_INLINE
-void
-ibuf_btr_pcur_commit_specify_mtr(
-/*=============================*/
- btr_pcur_t* pcur, /*!< in/out: persistent cursor */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_d(ibuf_exit(mtr));
- btr_pcur_commit_specify_mtr(pcur, mtr);
-}
-
-/******************************************************************//**
-Gets the ibuf header page and x-latches it.
-@return insert buffer header page */
-static
-page_t*
-ibuf_header_page_get(
-/*=================*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- buf_block_t* block;
-
- ut_ad(!ibuf_inside(mtr));
- page_t* page = NULL;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
-
- if (!block->page.encrypted) {
- buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
-
- page = buf_block_get_frame(block);
- }
-
- return page;
-}
-
-/******************************************************************//**
-Gets the root page and x-latches it.
-@return insert buffer tree root page */
-static
-page_t*
-ibuf_tree_root_get(
-/*===============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- page_t* root;
-
- ut_ad(ibuf_inside(mtr));
- ut_ad(mutex_own(&ibuf_mutex));
-
- mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
-
- buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
-
- root = buf_block_get_frame(block);
-
- ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
- ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
- ut_ad(ibuf->empty == page_is_empty(root));
-
- return(root);
-}
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
-@return number of entries in the insert buffer currently buffered for
-this page */
-UNIV_INTERN
-ulint
-ibuf_count_get(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- ibuf_count_check(space, page_no);
-
- return(ibuf_counts[space][page_no]);
-}
-
-/******************************************************************//**
-Sets the ibuf count for a given page. */
-static
-void
-ibuf_count_set(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: page number */
- ulint val) /*!< in: value to set */
-{
- ibuf_count_check(space, page_no);
- ut_a(val < UNIV_PAGE_SIZE);
-
- ibuf_counts[space][page_no] = val;
-}
-#endif
-
-/******************************************************************//**
-Closes insert buffer and frees the data structures. */
-UNIV_INTERN
-void
-ibuf_close(void)
-/*============*/
-{
- mutex_free(&ibuf_pessimistic_insert_mutex);
- memset(&ibuf_pessimistic_insert_mutex,
- 0x0, sizeof(ibuf_pessimistic_insert_mutex));
-
- mutex_free(&ibuf_mutex);
- memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
-
- mutex_free(&ibuf_bitmap_mutex);
- memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
-
- mem_free(ibuf);
- ibuf = NULL;
-}
-
-/******************************************************************//**
-Function to pass ibuf status variables */
-UNIV_INTERN
-void
-ibuf_export_ibuf_status(
-/*====================*/
- ulint* size,
- ulint* free_list,
- ulint* segment_size,
- ulint* merges,
- ulint* merged_inserts,
- ulint* merged_delete_marks,
- ulint* merged_deletes,
- ulint* discarded_inserts,
- ulint* discarded_delete_marks,
- ulint* discarded_deletes)
-{
- *size
- = ibuf->size;
- *free_list
- = ibuf->free_list_len;
- *segment_size
- = ibuf->seg_size;
- *merges
- = ibuf->n_merges;
- *merged_inserts
- = ibuf->n_merged_ops[IBUF_OP_INSERT];
- *merged_delete_marks
- = ibuf->n_merged_ops[IBUF_OP_DELETE_MARK];
- *merged_deletes
- = ibuf->n_merged_ops[IBUF_OP_DELETE];
- *discarded_inserts
- = ibuf->n_discarded_ops[IBUF_OP_INSERT];
- *discarded_delete_marks
- = ibuf->n_discarded_ops[IBUF_OP_DELETE_MARK];
- *discarded_deletes
- = ibuf->n_discarded_ops[IBUF_OP_DELETE];
-}
-
-/******************************************************************//**
-Updates the size information of the ibuf, assuming the segment size has not
-changed. */
-static
-void
-ibuf_size_update(
-/*=============*/
- const page_t* root, /*!< in: ibuf tree root */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mutex_own(&ibuf_mutex));
-
- ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, mtr);
-
- ibuf->height = 1 + btr_page_get_level(root, mtr);
-
- /* the '1 +' is the ibuf header page */
- ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
-}
-
-/******************************************************************//**
-Creates the insert buffer data structure at a database startup and initializes
-the data structures for the insert buffer.
-@return DB_SUCCESS or failure */
-UNIV_INTERN
-dberr_t
-ibuf_init_at_db_start(void)
-/*=======================*/
-{
- page_t* root;
- mtr_t mtr;
- dict_table_t* table;
- mem_heap_t* heap;
- dict_index_t* index;
- ulint n_used;
- page_t* header_page;
- dberr_t error= DB_SUCCESS;
-
- ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t)));
-
- /* At startup we intialize ibuf to have a maximum of
- CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the
- buffer pool size. Once ibuf struct is initialized this
- value is updated with the user supplied size by calling
- ibuf_max_size_update(). */
- ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
- * CHANGE_BUFFER_DEFAULT_SIZE) / 100;
-
- mutex_create(ibuf_pessimistic_insert_mutex_key,
- &ibuf_pessimistic_insert_mutex,
- SYNC_IBUF_PESS_INSERT_MUTEX);
-
- mutex_create(ibuf_mutex_key,
- &ibuf_mutex, SYNC_IBUF_MUTEX);
-
- mutex_create(ibuf_bitmap_mutex_key,
- &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
-
- mtr_start(&mtr);
-
- mutex_enter(&ibuf_mutex);
-
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
-
- header_page = ibuf_header_page_get(&mtr);
-
- if (!header_page) {
- return (DB_DECRYPTION_FAILED);
- }
-
- fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- &n_used, &mtr);
- ibuf_enter(&mtr);
-
- ut_ad(n_used >= 2);
-
- ibuf->seg_size = n_used;
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
-
- root = buf_block_get_frame(block);
- }
-
- ibuf_size_update(root, &mtr);
- mutex_exit(&ibuf_mutex);
-
- ibuf->empty = page_is_empty(root);
- ibuf_mtr_commit(&mtr);
-
- heap = mem_heap_create(450);
-
- /* Use old-style record format for the insert buffer. */
- table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0);
-
- dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
-
- table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
-
- dict_table_add_to_cache(table, FALSE, heap);
- mem_heap_free(heap);
-
- index = dict_mem_index_create(
- IBUF_TABLE_NAME, "CLUST_IND",
- IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
-
- dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
-
- index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
- btr_search_index_init(index);
-
- error = dict_index_add_to_cache(table, index,
- FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
- ut_a(error == DB_SUCCESS);
-
- ibuf->index = dict_table_get_first_index(table);
- return (error);
-}
-
-/*********************************************************************//**
-Updates the max_size value for ibuf. */
-UNIV_INTERN
-void
-ibuf_max_size_update(
-/*=================*/
- ulint new_val) /*!< in: new value in terms of
- percentage of the buffer pool size */
-{
- ulint new_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
- * new_val) / 100;
- mutex_enter(&ibuf_mutex);
- ibuf->max_size = new_size;
- mutex_exit(&ibuf_mutex);
-}
-
-
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Initializes an ibuf bitmap page. */
-UNIV_INTERN
-void
-ibuf_bitmap_page_init(
-/*==================*/
- buf_block_t* block, /*!< in: bitmap page */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page;
- ulint byte_offset;
- ulint zip_size = buf_block_get_zip_size(block);
-
- ut_a(ut_is_2pow(zip_size));
-
- page = buf_block_get_frame(block);
- fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
-
- /* Write all zeros to the bitmap */
-
- if (!zip_size) {
- byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
- * IBUF_BITS_PER_PAGE);
- } else {
- byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
- }
-
- memset(page + IBUF_BITMAP, 0, byte_offset);
-
- /* The remaining area (up to the page trailer) is uninitialized. */
-
-#ifndef UNIV_HOTBACKUP
- mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/*********************************************************************//**
-Parses a redo log record of an ibuf bitmap page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-ibuf_parse_bitmap_init(
-/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (block) {
- ibuf_bitmap_page_init(block, mtr);
- }
-
- return(ptr);
-}
-#ifndef UNIV_HOTBACKUP
-# ifdef UNIV_DEBUG
-/** Gets the desired bits for a given page from a bitmap page.
-@param page in: bitmap page
-@param offset in: page whose bits to get
-@param zs in: compressed page size in bytes; 0 for uncompressed pages
-@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param mtr in: mini-transaction holding an x-latch on the bitmap page
-@return value of bits */
-# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \
- ibuf_bitmap_page_get_bits_low(page, offset, zs, \
- MTR_MEMO_PAGE_X_FIX, mtr, bit)
-# else /* UNIV_DEBUG */
-/** Gets the desired bits for a given page from a bitmap page.
-@param page in: bitmap page
-@param offset in: page whose bits to get
-@param zs in: compressed page size in bytes; 0 for uncompressed pages
-@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
-@param mtr in: mini-transaction holding an x-latch on the bitmap page
-@return value of bits */
-# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \
- ibuf_bitmap_page_get_bits_low(page, offset, zs, bit)
-# endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Gets the desired bits for a given page from a bitmap page.
-@return value of bits */
-UNIV_INLINE
-ulint
-ibuf_bitmap_page_get_bits_low(
-/*==========================*/
- const page_t* page, /*!< in: bitmap page */
- ulint page_no,/*!< in: page whose bits to get */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
-#ifdef UNIV_DEBUG
- ulint latch_type,
- /*!< in: MTR_MEMO_PAGE_X_FIX,
- MTR_MEMO_BUF_FIX, ... */
- mtr_t* mtr, /*!< in: mini-transaction holding latch_type
- on the bitmap page */
-#endif /* UNIV_DEBUG */
- ulint bit) /*!< in: IBUF_BITMAP_FREE,
- IBUF_BITMAP_BUFFERED, ... */
-{
- ulint byte_offset;
- ulint bit_offset;
- ulint map_byte;
- ulint value;
-
- ut_ad(bit < IBUF_BITS_PER_PAGE);
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE % 2 != 0"
-#endif
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(mtr_memo_contains_page(mtr, page, latch_type));
-
- if (!zip_size) {
- bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
- + bit;
- } else {
- bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
- + bit;
- }
-
- byte_offset = bit_offset / 8;
- bit_offset = bit_offset % 8;
-
- ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
-
- map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
-
- value = ut_bit_get_nth(map_byte, bit_offset);
-
- if (bit == IBUF_BITMAP_FREE) {
- ut_ad(bit_offset + 1 < 8);
-
- value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
- }
-
- return(value);
-}
-
-/********************************************************************//**
-Sets the desired bit for a given page in a bitmap page. */
-static
-void
-ibuf_bitmap_page_set_bits(
-/*======================*/
- page_t* page, /*!< in: bitmap page */
- ulint page_no,/*!< in: page whose bits to set */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
- ulint val, /*!< in: value to set */
- mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */
-{
- ulint byte_offset;
- ulint bit_offset;
- ulint map_byte;
-
- ut_ad(bit < IBUF_BITS_PER_PAGE);
-#if IBUF_BITS_PER_PAGE % 2
-# error "IBUF_BITS_PER_PAGE % 2 != 0"
-#endif
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
- || (0 == ibuf_count_get(page_get_space_id(page),
- page_no)));
-#endif
- if (!zip_size) {
- bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
- + bit;
- } else {
- bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
- + bit;
- }
-
- byte_offset = bit_offset / 8;
- bit_offset = bit_offset % 8;
-
- ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
-
- map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
-
- if (bit == IBUF_BITMAP_FREE) {
- ut_ad(bit_offset + 1 < 8);
- ut_ad(val <= 3);
-
- map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
- map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
- } else {
- ut_ad(val <= 1);
- map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
- }
-
- mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
- MLOG_1BYTE, mtr);
-}
-
-/********************************************************************//**
-Calculates the bitmap page number for a given page number.
-@return the bitmap page number where the file page is mapped */
-UNIV_INLINE
-ulint
-ibuf_bitmap_page_no_calc(
-/*=====================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no) /*!< in: tablespace page number */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return(FSP_IBUF_BITMAP_OFFSET
- + (page_no & ~(UNIV_PAGE_SIZE - 1)));
- } else {
- return(FSP_IBUF_BITMAP_OFFSET
- + (page_no & ~(zip_size - 1)));
- }
-}
-
-/********************************************************************//**
-Gets the ibuf bitmap page where the bits describing a given file page are
-stored.
-@return bitmap page where the file page is mapped, that is, the bitmap
-page containing the descriptor bits for the file page; the bitmap page
-is x-latched */
-static
-page_t*
-ibuf_bitmap_get_map_page_func(
-/*==========================*/
- ulint space, /*!< in: space id of the file page */
- ulint page_no,/*!< in: page number of the file page */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block = NULL;
- dberr_t err = DB_SUCCESS;
-
- block = buf_page_get_gen(space, zip_size,
- ibuf_bitmap_page_no_calc(zip_size, page_no),
- RW_X_LATCH, NULL, BUF_GET,
- file, line, mtr, &err);
-
- if (err != DB_SUCCESS) {
- return NULL;
- }
-
- buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
-
- return(buf_block_get_frame(block));
-}
-
-/********************************************************************//**
-Gets the ibuf bitmap page where the bits describing a given file page are
-stored.
-@return bitmap page where the file page is mapped, that is, the bitmap
-page containing the descriptor bits for the file page; the bitmap page
-is x-latched
-@param space in: space id of the file page
-@param page_no in: page number of the file page
-@param zip_size in: compressed page size in bytes; 0 for uncompressed pages
-@param mtr in: mini-transaction */
-#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
- ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
- __FILE__, __LINE__, mtr)
-
-/************************************************************************//**
-Sets the free bits of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-UNIV_INLINE
-void
-ibuf_set_free_bits_low(
-/*===================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- const buf_block_t* block, /*!< in: index page; free bits are set if
- the index is non-clustered and page
- level is 0 */
- ulint val, /*!< in: value to set: < 4 */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- page_t* bitmap_page;
- ulint space;
- ulint page_no;
- buf_frame_t* frame;
-
- if (!block) {
- return;
- }
-
- frame = buf_block_get_frame(block);
-
- if (!frame || !page_is_leaf(frame)) {
- return;
- }
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
-#ifdef UNIV_IBUF_DEBUG
-# if 0
- fprintf(stderr,
- "Setting space %lu page %lu free bits to %lu should be %lu\n",
- space, page_no, val,
- ibuf_index_page_calc_free(zip_size, block));
-# endif
-
- ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
-#endif /* UNIV_IBUF_DEBUG */
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, val, mtr);
-}
-
-/************************************************************************//**
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-UNIV_INTERN
-void
-ibuf_set_free_bits_func(
-/*====================*/
- buf_block_t* block, /*!< in: index page of a non-clustered index;
- free bit is reset if page level is 0 */
-#ifdef UNIV_IBUF_DEBUG
- ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
- value which the bits must have before
- setting; this is for debugging */
-#endif /* UNIV_IBUF_DEBUG */
- ulint val) /*!< in: value to set: < 4 */
-{
- mtr_t mtr;
- page_t* page;
- page_t* bitmap_page;
- ulint space;
- ulint page_no;
- ulint zip_size;
-
- page = buf_block_get_frame(block);
-
- if (!page_is_leaf(page)) {
-
- return;
- }
-
- mtr_start(&mtr);
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- zip_size = buf_block_get_zip_size(block);
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
-
-#ifdef UNIV_IBUF_DEBUG
- if (max_val != ULINT_UNDEFINED) {
- ulint old_val;
-
- old_val = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, &mtr);
-# if 0
- if (old_val != max_val) {
- fprintf(stderr,
- "Ibuf: page %lu old val %lu max val %lu\n",
- page_get_page_no(page),
- old_val, max_val);
- }
-# endif
-
- ut_a(old_val <= max_val);
- }
-# if 0
- fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
- page_get_page_no(page), val,
- ibuf_index_page_calc_free(zip_size, block));
-# endif
-
- ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
-#endif /* UNIV_IBUF_DEBUG */
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, val, &mtr);
- mtr_commit(&mtr);
-}
-
-/************************************************************************//**
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to decrement or reset the bits in the bitmap in a mini-transaction
-that is committed before the mini-transaction that affects the free
-space. */
-UNIV_INTERN
-void
-ibuf_reset_free_bits(
-/*=================*/
- buf_block_t* block) /*!< in: index page; free bits are set to 0
- if the index is a non-clustered
- non-unique, and page level is 0 */
-{
- ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Updates the free bits for an uncompressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_low(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- ulint max_ins_size, /*!< in: value of
- maximum insert size
- with reorganize before
- the latest operation
- performed to the page */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- ulint before;
- ulint after;
-
- ut_a(!buf_block_get_page_zip(block));
-
- before = ibuf_index_page_calc_free_bits(0, max_ins_size);
-
- after = ibuf_index_page_calc_free(0, block);
-
- /* This approach cannot be used on compressed pages, since the
- computed value of "before" often does not match the current
- state of the bitmap. This is because the free space may
- increase or decrease when a compressed page is reorganized. */
- if (before != after) {
- ibuf_set_free_bits_low(0, block, after, mtr);
- }
-}
-
-/**********************************************************************//**
-Updates the free bits for a compressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_zip(
-/*======================*/
- buf_block_t* block, /*!< in/out: index page */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- page_t* bitmap_page;
- ulint space;
- ulint page_no;
- ulint zip_size;
- ulint after;
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- zip_size = buf_block_get_zip_size(block);
-
- ut_a(block);
-
- buf_frame_t* frame = buf_block_get_frame(block);
-
- ut_a(frame && page_is_leaf(frame));
- ut_a(zip_size);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
-
- after = ibuf_index_page_calc_free_zip(zip_size, block);
-
- if (after == 0) {
- /* We move the page to the front of the buffer pool LRU list:
- the purpose of this is to prevent those pages to which we
- cannot make inserts using the insert buffer from slipping
- out of the buffer pool */
-
- buf_page_make_young(&block->page);
- }
-
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, after, mtr);
-}
-
-/**********************************************************************//**
-Updates the free bits for the two pages to reflect the present state.
-Does this in the mtr given, which means that the latching order rules
-virtually prevent any further operations until mtr is committed.
-NOTE: The free bits in the insert buffer bitmap must never exceed the
-free space on a page. It is safe to set the free bits in the same
-mini-transaction that updated the pages. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- buf_block_t* block1, /*!< in: index page */
- buf_block_t* block2, /*!< in: index page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint state;
-
- /* As we have to x-latch two random bitmap pages, we have to acquire
- the bitmap mutex to prevent a deadlock with a similar operation
- performed by another OS thread. */
-
- mutex_enter(&ibuf_bitmap_mutex);
-
- state = ibuf_index_page_calc_free(zip_size, block1);
-
- ibuf_set_free_bits_low(zip_size, block1, state, mtr);
-
- state = ibuf_index_page_calc_free(zip_size, block2);
-
- ibuf_set_free_bits_low(zip_size, block2, state, mtr);
-
- mutex_exit(&ibuf_bitmap_mutex);
-}
-
-/**********************************************************************//**
-Returns TRUE if the page is one of the fixed address ibuf pages.
-@return TRUE if a fixed address ibuf i/o page */
-UNIV_INLINE
-ibool
-ibuf_fixed_addr_page(
-/*=================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
-{
- return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
- || ibuf_bitmap_page(zip_size, page_no));
-}
-
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return TRUE if level 2 or level 3 page */
-UNIV_INTERN
-ibool
-ibuf_page_low(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number */
-#ifdef UNIV_DEBUG
- ibool x_latch,/*!< in: FALSE if relaxed check
- (avoid latching the bitmap page) */
-#endif /* UNIV_DEBUG */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr which will contain an
- x-latch to the bitmap page if the page
- is not one of the fixed address ibuf
- pages, or NULL, in which case a new
- transaction is created. */
-{
- ibool ret;
- mtr_t local_mtr;
- page_t* bitmap_page;
-
- ut_ad(!recv_no_ibuf_operations);
- ut_ad(x_latch || mtr == NULL);
-
- if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
-
- return(TRUE);
- } else if (space != IBUF_SPACE_ID) {
-
- return(FALSE);
- }
-
- ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
-
-#ifdef UNIV_DEBUG
- if (!x_latch) {
- mtr_start(&local_mtr);
-
- /* Get the bitmap page without a page latch, so that
- we will not be violating the latching order when
- another bitmap page has already been latched by this
- thread. The page will be buffer-fixed, and thus it
- cannot be removed or relocated while we are looking at
- it. The contents of the page could change, but the
- IBUF_BITMAP_IBUF bit that we are interested in should
- not be modified by any other thread. Nobody should be
- calling ibuf_add_free_page() or ibuf_remove_free_page()
- while the page is linked to the insert buffer b-tree. */
-
- bitmap_page = buf_block_get_frame(
- buf_page_get_gen(
- space, zip_size,
- ibuf_bitmap_page_no_calc(zip_size, page_no),
- RW_NO_LATCH, NULL, BUF_GET_NO_LATCH,
- file, line, &local_mtr));
-
- ret = ibuf_bitmap_page_get_bits_low(
- bitmap_page, page_no, zip_size,
- MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF);
-
- mtr_commit(&local_mtr);
- return(ret);
- }
-#endif /* UNIV_DEBUG */
-
- if (mtr == NULL) {
- mtr = &local_mtr;
- mtr_start(mtr);
- }
-
- bitmap_page = ibuf_bitmap_get_map_page_func(space, page_no, zip_size,
- file, line, mtr);
-
- ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_IBUF, mtr);
-
- if (mtr == &local_mtr) {
- mtr_commit(mtr);
- }
-
- return(ret);
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(mtr,rec)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(rec)
-#endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Returns the page number field of an ibuf record.
-@return page number */
-static
-ulint
-ibuf_rec_get_page_no_func(
-/*======================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec) /*!< in: ibuf record */
-{
- const byte* field;
- ulint len;
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
-
- ut_a(len == 1);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
-
- ut_a(len == 4);
-
- return(mach_read_from_4(field));
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(mtr,rec)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(rec)
-#endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Returns the space id field of an ibuf record. For < 4.1.x format records
-returns 0.
-@return space id */
-static
-ulint
-ibuf_rec_get_space_func(
-/*====================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec) /*!< in: ibuf record */
-{
- const byte* field;
- ulint len;
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
-
- ut_a(len == 1);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
-
- ut_a(len == 4);
-
- return(mach_read_from_4(field));
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \
- ibuf_rec_get_info_func(mtr,rec,op,comp,info_len,counter)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \
- ibuf_rec_get_info_func(rec,op,comp,info_len,counter)
-#endif
-/****************************************************************//**
-Get various information about an ibuf record in >= 4.1.x format. */
-static
-void
-ibuf_rec_get_info_func(
-/*===================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec, /*!< in: ibuf record */
- ibuf_op_t* op, /*!< out: operation type, or NULL */
- ibool* comp, /*!< out: compact flag, or NULL */
- ulint* info_len, /*!< out: length of info fields at the
- start of the fourth field, or
- NULL */
- ulint* counter) /*!< in: counter value, or NULL */
-{
- const byte* types;
- ulint fields;
- ulint len;
-
- /* Local variables to shadow arguments. */
- ibuf_op_t op_local;
- ibool comp_local;
- ulint info_len_local;
- ulint counter_local;
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- fields = rec_get_n_fields_old(rec);
- ut_a(fields > IBUF_REC_FIELD_USER);
-
- types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
-
- info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
-
- switch (info_len_local) {
- case 0:
- case 1:
- op_local = IBUF_OP_INSERT;
- comp_local = info_len_local;
- ut_ad(!counter);
- counter_local = ULINT_UNDEFINED;
- break;
-
- case IBUF_REC_INFO_SIZE:
- op_local = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
- comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
- counter_local = mach_read_from_2(
- types + IBUF_REC_OFFSET_COUNTER);
- break;
-
- default:
- ut_error;
- }
-
- ut_a(op_local < IBUF_OP_COUNT);
- ut_a((len - info_len_local) ==
- (fields - IBUF_REC_FIELD_USER)
- * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- if (op) {
- *op = op_local;
- }
-
- if (comp) {
- *comp = comp_local;
- }
-
- if (info_len) {
- *info_len = info_len_local;
- }
-
- if (counter) {
- *counter = counter_local;
- }
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(mtr,rec)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(rec)
-#endif
-
-/****************************************************************//**
-Returns the operation type field of an ibuf record.
-@return operation type */
-static
-ibuf_op_t
-ibuf_rec_get_op_type_func(
-/*======================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec) /*!< in: ibuf record */
-{
- ulint len;
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- (void) rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
-
- if (len > 1) {
- /* This is a < 4.1.x format record */
-
- return(IBUF_OP_INSERT);
- } else {
- ibuf_op_t op;
-
- ibuf_rec_get_info(mtr, rec, &op, NULL, NULL, NULL);
-
- return(op);
- }
-}
-
-/****************************************************************//**
-Read the first two bytes from a record's fourth field (counter field in new
-records; something else in older records).
-@return "counter" field, or ULINT_UNDEFINED if for some reason it
-can't be read */
-UNIV_INTERN
-ulint
-ibuf_rec_get_counter(
-/*=================*/
- const rec_t* rec) /*!< in: ibuf record */
-{
- const byte* ptr;
- ulint len;
-
- if (rec_get_n_fields_old(rec) <= IBUF_REC_FIELD_METADATA) {
-
- return(ULINT_UNDEFINED);
- }
-
- ptr = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
-
- if (len >= 2) {
-
- return(mach_read_from_2(ptr));
- } else {
-
- return(ULINT_UNDEFINED);
- }
-}
-
-/****************************************************************//**
-Add accumulated operation counts to a permanent array. Both arrays must be
-of size IBUF_OP_COUNT. */
-static
-void
-ibuf_add_ops(
-/*=========*/
- ulint* arr, /*!< in/out: array to modify */
- const ulint* ops) /*!< in: operation counts */
-
-{
- ulint i;
-
-#ifndef HAVE_ATOMIC_BUILTINS
- ut_ad(mutex_own(&ibuf_mutex));
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
- for (i = 0; i < IBUF_OP_COUNT; i++) {
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_increment_ulint(&arr[i], ops[i]);
-#else /* HAVE_ATOMIC_BUILTINS */
- arr[i] += ops[i];
-#endif /* HAVE_ATOMIC_BUILTINS */
- }
-}
-
-/****************************************************************//**
-Print operation counts. The array must be of size IBUF_OP_COUNT. */
-static
-void
-ibuf_print_ops(
-/*===========*/
- const ulint* ops, /*!< in: operation counts */
- FILE* file) /*!< in: file where to print */
-{
- static const char* op_names[] = {
- "insert",
- "delete mark",
- "delete"
- };
- ulint i;
-
- ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
-
- for (i = 0; i < IBUF_OP_COUNT; i++) {
- fprintf(file, "%s %lu%s", op_names[i],
- (ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
- }
-
- putc('\n', file);
-}
-
-/********************************************************************//**
-Creates a dummy index for inserting a record to a non-clustered index.
-@return dummy index */
-static
-dict_index_t*
-ibuf_dummy_index_create(
-/*====================*/
- ulint n, /*!< in: number of fields */
- ibool comp) /*!< in: TRUE=use compact record format */
-{
- dict_table_t* table;
- dict_index_t* index;
-
- table = dict_mem_table_create("IBUF_DUMMY",
- DICT_HDR_SPACE, n,
- comp ? DICT_TF_COMPACT : 0, 0);
-
- index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
- DICT_HDR_SPACE, 0, n);
-
- index->table = table;
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- index->cached = TRUE;
-
- return(index);
-}
-/********************************************************************//**
-Add a column to the dummy index */
-static
-void
-ibuf_dummy_index_add_col(
-/*=====================*/
- dict_index_t* index, /*!< in: dummy index */
- const dtype_t* type, /*!< in: the data type of the column */
- ulint len) /*!< in: length of the column */
-{
- ulint i = index->table->n_def;
- dict_mem_table_add_col(index->table, NULL, NULL,
- dtype_get_mtype(type),
- dtype_get_prtype(type),
- dtype_get_len(type));
- dict_index_add_col(index, index->table,
- dict_table_get_nth_col(index->table, i), len);
-}
-/********************************************************************//**
-Deallocates a dummy index for inserting a record to a non-clustered index. */
-static
-void
-ibuf_dummy_index_free(
-/*==================*/
- dict_index_t* index) /*!< in, own: dummy index */
-{
- dict_table_t* table = index->table;
-
- dict_mem_index_free(index);
- dict_mem_table_free(table);
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \
- ibuf_build_entry_from_ibuf_rec_func(mtr,ibuf_rec,heap,pindex)
-#else /* UNIV_DEBUG */
-# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \
- ibuf_build_entry_from_ibuf_rec_func(ibuf_rec,heap,pindex)
-#endif
-
-/*********************************************************************//**
-Builds the entry used to
-
-1) IBUF_OP_INSERT: insert into a non-clustered index
-
-2) IBUF_OP_DELETE_MARK: find the record whose delete-mark flag we need to
- activate
-
-3) IBUF_OP_DELETE: find the record we need to delete
-
-when we have the corresponding record in an ibuf index.
-
-NOTE that as we copy pointers to fields in ibuf_rec, the caller must
-hold a latch to the ibuf_rec page as long as the entry is used!
-
-@return own: entry to insert to a non-clustered index */
-static
-dtuple_t*
-ibuf_build_entry_from_ibuf_rec_func(
-/*================================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
- mem_heap_t* heap, /*!< in: heap where built */
- dict_index_t** pindex) /*!< out, own: dummy index that
- describes the entry */
-{
- dtuple_t* tuple;
- dfield_t* field;
- ulint n_fields;
- const byte* types;
- const byte* data;
- ulint len;
- ulint info_len;
- ulint i;
- ulint comp;
- dict_index_t* index;
-
- ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
-
- data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
-
- ut_a(len == 1);
- ut_a(*data == 0);
- ut_a(rec_get_n_fields_old(ibuf_rec) > IBUF_REC_FIELD_USER);
-
- n_fields = rec_get_n_fields_old(ibuf_rec) - IBUF_REC_FIELD_USER;
-
- tuple = dtuple_create(heap, n_fields);
-
- types = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
-
- ibuf_rec_get_info(mtr, ibuf_rec, NULL, &comp, &info_len, NULL);
-
- index = ibuf_dummy_index_create(n_fields, comp);
-
- len -= info_len;
- types += info_len;
-
- ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = rec_get_nth_field_old(
- ibuf_rec, i + IBUF_REC_FIELD_USER, &len);
-
- dfield_set_data(field, data, len);
-
- dtype_new_read_for_order_and_null_size(
- dfield_get_type(field),
- types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
- ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
- }
-
- /* Prevent an ut_ad() failure in page_zip_write_rec() by
- adding system columns to the dummy table pointed to by the
- dummy secondary index. The insert buffer is only used for
- secondary indexes, whose records never contain any system
- columns, such as DB_TRX_ID. */
- ut_d(dict_table_add_system_columns(index->table, index->table->heap));
-
- *pindex = index;
-
- return(tuple);
-}
-
-/******************************************************************//**
-Get the data size.
-@return size of fields */
-UNIV_INLINE
-ulint
-ibuf_rec_get_size(
-/*==============*/
- const rec_t* rec, /*!< in: ibuf record */
- const byte* types, /*!< in: fields */
- ulint n_fields, /*!< in: number of fields */
- ulint comp) /*!< in: 0=ROW_FORMAT=REDUNDANT,
- nonzero=ROW_FORMAT=COMPACT */
-{
- ulint i;
- ulint field_offset;
- ulint types_offset;
- ulint size = 0;
-
- field_offset = IBUF_REC_FIELD_USER;
- types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
-
- for (i = 0; i < n_fields; i++) {
- ulint len;
- dtype_t dtype;
-
- rec_get_nth_field_offs_old(rec, i + field_offset, &len);
-
- if (len != UNIV_SQL_NULL) {
- size += len;
- } else {
- dtype_new_read_for_order_and_null_size(&dtype, types);
-
- size += dtype_get_sql_null_size(&dtype, comp);
- }
-
- types += types_offset;
- }
-
- return(size);
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(mtr,rec)
-#else /* UNIV_DEBUG */
-# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(rec)
-#endif
-
-/********************************************************************//**
-Returns the space taken by a stored non-clustered index entry if converted to
-an index record.
-@return size of index record in bytes + an upper limit of the space
-taken in the page directory */
-static
-ulint
-ibuf_rec_get_volume_func(
-/*=====================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* ibuf_rec)/*!< in: ibuf record */
-{
- ulint len;
- const byte* data;
- const byte* types;
- ulint n_fields;
- ulint data_size;
- ulint comp;
- ibuf_op_t op;
- ulint info_len;
-
- ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
- ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
-
- data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
- ut_a(len == 1);
- ut_a(*data == 0);
-
- types = rec_get_nth_field_old(
- ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
-
- ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL);
-
- if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
- /* Delete-marking a record doesn't take any
- additional space, and while deleting a record
- actually frees up space, we have to play it safe and
- pretend it takes no additional space (the record
- might not exist, etc.). */
-
- return(0);
- } else if (comp) {
- dtuple_t* entry;
- ulint volume;
- dict_index_t* dummy_index;
- mem_heap_t* heap = mem_heap_create(500);
-
- entry = ibuf_build_entry_from_ibuf_rec(mtr, ibuf_rec,
- heap, &dummy_index);
-
- volume = rec_get_converted_size(dummy_index, entry, 0);
-
- ibuf_dummy_index_free(dummy_index);
- mem_heap_free(heap);
-
- return(volume + page_dir_calc_reserved_space(1));
- }
-
- types += info_len;
- n_fields = rec_get_n_fields_old(ibuf_rec)
- - IBUF_REC_FIELD_USER;
-
- data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, comp);
-
- return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
- + page_dir_calc_reserved_space(1));
-}
-
-/*********************************************************************//**
-Builds the tuple to insert to an ibuf tree when we have an entry for a
-non-clustered index.
-
-NOTE that the original entry must be kept because we copy pointers to
-its fields.
-
-@return own: entry to insert into an ibuf index tree */
-static
-dtuple_t*
-ibuf_entry_build(
-/*=============*/
- ibuf_op_t op, /*!< in: operation type */
- dict_index_t* index, /*!< in: non-clustered index */
- const dtuple_t* entry, /*!< in: entry for a non-clustered index */
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number where entry should
- be inserted */
- ulint counter,/*!< in: counter value;
- ULINT_UNDEFINED=not used */
- mem_heap_t* heap) /*!< in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- const dfield_t* entry_field;
- ulint n_fields;
- byte* buf;
- byte* ti;
- byte* type_info;
- ulint i;
-
- ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
- ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
- ut_ad(op < IBUF_OP_COUNT);
-
- /* We have to build a tuple with the following fields:
-
- 1-4) These are described at the top of this file.
-
- 5) The rest of the fields are copied from the entry.
-
- All fields in the tuple are ordered like the type binary in our
- insert buffer tree. */
-
- n_fields = dtuple_get_n_fields(entry);
-
- tuple = dtuple_create(heap, n_fields + IBUF_REC_FIELD_USER);
-
- /* 1) Space Id */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, space);
-
- dfield_set_data(field, buf, 4);
-
- /* 2) Marker byte */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 1));
-
- /* We set the marker byte zero */
-
- mach_write_to_1(buf, 0);
-
- dfield_set_data(field, buf, 1);
-
- /* 3) Page number */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- /* 4) Type info, part #1 */
-
- if (counter == ULINT_UNDEFINED) {
- i = dict_table_is_comp(index->table) ? 1 : 0;
- } else {
- ut_ad(counter <= 0xFFFF);
- i = IBUF_REC_INFO_SIZE;
- }
-
- ti = type_info = static_cast<byte*>(
- mem_heap_alloc(
- heap,
- i + n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE));
-
- switch (i) {
- default:
- ut_error;
- break;
- case 1:
- /* set the flag for ROW_FORMAT=COMPACT */
- *ti++ = 0;
- /* fall through */
- case 0:
- /* the old format does not allow delete buffering */
- ut_ad(op == IBUF_OP_INSERT);
- break;
- case IBUF_REC_INFO_SIZE:
- mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter);
-
- ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
- ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
- ? IBUF_REC_COMPACT : 0;
- ti += IBUF_REC_INFO_SIZE;
- break;
- }
-
- /* 5+) Fields from the entry */
-
- for (i = 0; i < n_fields; i++) {
- ulint fixed_len;
- const dict_field_t* ifield;
-
- field = dtuple_get_nth_field(tuple, i + IBUF_REC_FIELD_USER);
- entry_field = dtuple_get_nth_field(entry, i);
- dfield_copy(field, entry_field);
-
- ifield = dict_index_get_nth_field(index, i);
- /* Prefix index columns of fixed-length columns are of
- fixed length. However, in the function call below,
- dfield_get_type(entry_field) contains the fixed length
- of the column in the clustered index. Replace it with
- the fixed length of the secondary index column. */
- fixed_len = ifield->fixed_len;
-
-#ifdef UNIV_DEBUG
- if (fixed_len) {
- /* dict_index_add_col() should guarantee these */
- ut_ad(fixed_len <= (ulint)
- dfield_get_type(entry_field)->len);
- if (ifield->prefix_len) {
- ut_ad(ifield->prefix_len == fixed_len);
- } else {
- ut_ad(fixed_len == (ulint)
- dfield_get_type(entry_field)->len);
- }
- }
-#endif /* UNIV_DEBUG */
-
- dtype_new_store_for_order_and_null_size(
- ti, dfield_get_type(entry_field), fixed_len);
- ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
- }
-
- /* 4) Type info, part #2 */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_METADATA);
-
- dfield_set_data(field, type_info, ti - type_info);
-
- /* Set all the types in the new tuple binary */
-
- dtuple_set_types_binary(tuple, n_fields + IBUF_REC_FIELD_USER);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Builds a search tuple used to search buffered inserts for an index page.
-This is for >= 4.1.x format records.
-@return own: search tuple */
-static
-dtuple_t*
-ibuf_search_tuple_build(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number */
- mem_heap_t* heap) /*!< in: heap into which to build */
-{
- dtuple_t* tuple;
- dfield_t* field;
- byte* buf;
-
- tuple = dtuple_create(heap, IBUF_REC_FIELD_METADATA);
-
- /* Store the space id in tuple */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, space);
-
- dfield_set_data(field, buf, 4);
-
- /* Store the new format record marker byte */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 1));
-
- mach_write_to_1(buf, 0);
-
- dfield_set_data(field, buf, 1);
-
- /* Store the page number in tuple */
-
- field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
- mach_write_to_4(buf, page_no);
-
- dfield_set_data(field, buf, 4);
-
- dtuple_set_types_binary(tuple, IBUF_REC_FIELD_METADATA);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Checks if there are enough pages in the free list of the ibuf tree that we
-dare to start a pessimistic insert to the insert buffer.
-@return TRUE if enough free pages in list */
-UNIV_INLINE
-ibool
-ibuf_data_enough_free_for_insert(void)
-/*==================================*/
-{
- ut_ad(mutex_own(&ibuf_mutex));
-
- /* We want a big margin of free pages, because a B-tree can sometimes
- grow in size also if records are deleted from it, as the node pointers
- can change, and we must make sure that we are able to delete the
- inserts buffered for pages that we read to the buffer pool, without
- any risk of running out of free space in the insert buffer. */
-
- return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
-}
-
-/*********************************************************************//**
-Checks if there are enough pages in the free list of the ibuf tree that we
-should remove them and free to the file space management.
-@return TRUE if enough free pages in list */
-UNIV_INLINE
-ibool
-ibuf_data_too_much_free(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&ibuf_mutex));
-
- return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
-}
-
-/*********************************************************************//**
-Allocates a new page from the ibuf file segment and adds it to the free
-list.
-@return TRUE on success, FALSE if no space left */
-static
-ibool
-ibuf_add_free_page(void)
-/*====================*/
-{
- mtr_t mtr;
- page_t* header_page;
- ulint flags;
- ulint zip_size;
- buf_block_t* block;
- page_t* page;
- page_t* root;
- page_t* bitmap_page;
-
- mtr_start(&mtr);
-
- /* Acquire the fsp latch before the ibuf header, obeying the latching
- order */
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- header_page = ibuf_header_page_get(&mtr);
-
- /* Allocate a new page: NOTE that if the page has been a part of a
- non-clustered index which has subsequently been dropped, then the
- page may have buffered inserts in the insert buffer, and these
- should be deleted from there. These get deleted when the page
- allocation creates the page in buffer. Thus the call below may end
- up calling the insert buffer routines and, as we yet have no latches
- to insert buffer tree pages, these routines can run without a risk
- of a deadlock. This is the reason why we created a special ibuf
- header page apart from the ibuf tree. */
-
- block = fseg_alloc_free_page(
- header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
- &mtr);
-
- if (block == NULL) {
- mtr_commit(&mtr);
-
- return(FALSE);
- }
-
- ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
- ibuf_enter(&mtr);
- mutex_enter(&ibuf_mutex);
- root = ibuf_tree_root_get(&mtr);
-
- buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
- page = buf_block_get_frame(block);
-
- /* Add the page to the free list and update the ibuf size data */
-
- flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
- MLOG_2BYTES, &mtr);
-
- ibuf->seg_size++;
- ibuf->free_list_len++;
-
- /* Set the bit indicating that this page is now an ibuf tree page
- (level 2 page) */
-
- bitmap_page = ibuf_bitmap_get_map_page(
- IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr);
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_bitmap_page_set_bits(
- bitmap_page, buf_block_get_page_no(block), zip_size,
- IBUF_BITMAP_IBUF, TRUE, &mtr);
-
- ibuf_mtr_commit(&mtr);
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Removes a page from the free list and frees it to the fsp system. */
-static
-void
-ibuf_remove_free_page(void)
-/*=======================*/
-{
- mtr_t mtr;
- mtr_t mtr2;
- page_t* header_page;
- ulint flags;
- ulint zip_size;
- ulint page_no;
- page_t* page;
- page_t* root;
- page_t* bitmap_page;
-
- mtr_start(&mtr);
-
- /* Acquire the fsp latch before the ibuf header, obeying the latching
- order */
- mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
- zip_size = fsp_flags_get_zip_size(flags);
-
- header_page = ibuf_header_page_get(&mtr);
-
- /* Prevent pessimistic inserts to insert buffer trees for a while */
- ibuf_enter(&mtr);
- mutex_enter(&ibuf_pessimistic_insert_mutex);
- mutex_enter(&ibuf_mutex);
-
- if (!ibuf_data_too_much_free()) {
-
- mutex_exit(&ibuf_mutex);
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- ibuf_mtr_commit(&mtr);
-
- return;
- }
-
- ibuf_mtr_start(&mtr2);
-
- root = ibuf_tree_root_get(&mtr2);
-
- mutex_exit(&ibuf_mutex);
-
- page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- &mtr2).page;
-
- /* NOTE that we must release the latch on the ibuf tree root
- because in fseg_free_page we access level 1 pages, and the root
- is a level 2 page. */
-
- ibuf_mtr_commit(&mtr2);
- ibuf_exit(&mtr);
-
- /* Since pessimistic inserts were prevented, we know that the
- page is still in the free list. NOTE that also deletes may take
- pages from the free list, but they take them from the start, and
- the free list was so long that they cannot have taken the last
- page from it. */
-
- fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
- IBUF_SPACE_ID, page_no, &mtr);
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-
- ibuf_enter(&mtr);
-
- mutex_enter(&ibuf_mutex);
-
- root = ibuf_tree_root_get(&mtr);
-
- ut_ad(page_no == flst_get_last(root + PAGE_HEADER
- + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
-
- buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
-
- page = buf_block_get_frame(block);
- }
-
- /* Remove the page from the free list and update the ibuf size data */
-
- flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
- page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- ibuf->seg_size--;
- ibuf->free_list_len--;
-
- /* Set the bit indicating that this page is no more an ibuf tree page
- (level 2 page) */
-
- bitmap_page = ibuf_bitmap_get_map_page(
- IBUF_SPACE_ID, page_no, zip_size, &mtr);
-
- mutex_exit(&ibuf_mutex);
-
- ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
- ibuf_mtr_commit(&mtr);
-}
-
-/***********************************************************************//**
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
-void
-ibuf_free_excess_pages(void)
-/*========================*/
-{
- ulint i;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
- RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(rw_lock_get_x_lock_count(
- fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
-
- /* NOTE: We require that the thread did not own the latch before,
- because then we know that we can obey the correct latching order
- for ibuf latches */
-
- if (!ibuf) {
- /* Not yet initialized; not sure if this is possible, but
- does no harm to check for it. */
-
- return;
- }
-
- /* Free at most a few pages at a time, so that we do not delay the
- requested service too much */
-
- for (i = 0; i < 4; i++) {
-
- ibool too_much_free;
-
- mutex_enter(&ibuf_mutex);
- too_much_free = ibuf_data_too_much_free();
- mutex_exit(&ibuf_mutex);
-
- if (!too_much_free) {
- return;
- }
-
- ibuf_remove_free_page();
- }
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
- ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,vers,pages,n_stored)
-#else /* UNIV_DEBUG */
-# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
- ibuf_get_merge_page_nos_func(contract,rec,ids,vers,pages,n_stored)
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Reads page numbers from a leaf in an ibuf tree.
-@return a lower limit for the combined volume of records which will be
-merged */
-static
-ulint
-ibuf_get_merge_page_nos_func(
-/*=========================*/
- ibool contract,/*!< in: TRUE if this function is called to
- contract the tree, FALSE if this is called
- when a single page becomes full and we look
- if it pays to read also nearby pages */
- const rec_t* rec, /*!< in: insert buffer record */
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction holding rec */
-#endif /* UNIV_DEBUG */
- ulint* space_ids,/*!< in/out: space id's of the pages */
- ib_int64_t* space_versions,/*!< in/out: tablespace version
- timestamps; used to prevent reading in old
- pages after DISCARD + IMPORT tablespace */
- ulint* page_nos,/*!< in/out: buffer for at least
- IBUF_MAX_N_PAGES_MERGED many page numbers;
- the page numbers are in an ascending order */
- ulint* n_stored)/*!< out: number of page numbers stored to
- page_nos in this function */
-{
- ulint prev_page_no;
- ulint prev_space_id;
- ulint first_page_no;
- ulint first_space_id;
- ulint rec_page_no;
- ulint rec_space_id;
- ulint sum_volumes;
- ulint volume_for_page;
- ulint rec_volume;
- ulint limit;
- ulint n_pages;
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
-
- *n_stored = 0;
-
- limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4);
-
- if (page_rec_is_supremum(rec)) {
-
- rec = page_rec_get_prev_const(rec);
- }
-
- if (page_rec_is_infimum(rec)) {
-
- rec = page_rec_get_next_const(rec);
- }
-
- if (page_rec_is_supremum(rec)) {
-
- return(0);
- }
-
- first_page_no = ibuf_rec_get_page_no(mtr, rec);
- first_space_id = ibuf_rec_get_space(mtr, rec);
- n_pages = 0;
- prev_page_no = 0;
- prev_space_id = 0;
-
- /* Go backwards from the first rec until we reach the border of the
- 'merge area', or the page start or the limit of storeable pages is
- reached */
-
- while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
-
- rec_page_no = ibuf_rec_get_page_no(mtr, rec);
- rec_space_id = ibuf_rec_get_space(mtr, rec);
-
- if (rec_space_id != first_space_id
- || (rec_page_no / IBUF_MERGE_AREA)
- != (first_page_no / IBUF_MERGE_AREA)) {
-
- break;
- }
-
- if (rec_page_no != prev_page_no
- || rec_space_id != prev_space_id) {
- n_pages++;
- }
-
- prev_page_no = rec_page_no;
- prev_space_id = rec_space_id;
-
- rec = page_rec_get_prev_const(rec);
- }
-
- rec = page_rec_get_next_const(rec);
-
- /* At the loop start there is no prev page; we mark this with a pair
- of space id, page no (0, 0) for which there can never be entries in
- the insert buffer */
-
- prev_page_no = 0;
- prev_space_id = 0;
- sum_volumes = 0;
- volume_for_page = 0;
-
- while (*n_stored < limit) {
- if (page_rec_is_supremum(rec)) {
- /* When no more records available, mark this with
- another 'impossible' pair of space id, page no */
- rec_page_no = 1;
- rec_space_id = 0;
- } else {
- rec_page_no = ibuf_rec_get_page_no(mtr, rec);
- rec_space_id = ibuf_rec_get_space(mtr, rec);
- /* In the system tablespace, the smallest
- possible secondary index leaf page number is
- bigger than IBUF_TREE_ROOT_PAGE_NO (4). In
- other tablespaces, the clustered index tree is
- created at page 3, which makes page 4 the
- smallest possible secondary index leaf page
- (and that only after DROP INDEX). */
- ut_ad(rec_page_no
- > (ulint) IBUF_TREE_ROOT_PAGE_NO
- - (rec_space_id != 0));
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
-#endif
- if ((rec_space_id != prev_space_id
- || rec_page_no != prev_page_no)
- && (prev_space_id != 0 || prev_page_no != 0)) {
-
- if (contract
- || (prev_page_no == first_page_no
- && prev_space_id == first_space_id)
- || (volume_for_page
- > ((IBUF_MERGE_THRESHOLD - 1)
- * 4 * UNIV_PAGE_SIZE
- / IBUF_PAGE_SIZE_PER_FREE_SPACE)
- / IBUF_MERGE_THRESHOLD)) {
-
- space_ids[*n_stored] = prev_space_id;
- space_versions[*n_stored]
- = fil_space_get_version(prev_space_id);
- page_nos[*n_stored] = prev_page_no;
-
- (*n_stored)++;
-
- sum_volumes += volume_for_page;
- }
-
- if (rec_space_id != first_space_id
- || rec_page_no / IBUF_MERGE_AREA
- != first_page_no / IBUF_MERGE_AREA) {
-
- break;
- }
-
- volume_for_page = 0;
- }
-
- if (rec_page_no == 1 && rec_space_id == 0) {
- /* Supremum record */
-
- break;
- }
-
- rec_volume = ibuf_rec_get_volume(mtr, rec);
-
- volume_for_page += rec_volume;
-
- prev_page_no = rec_page_no;
- prev_space_id = rec_space_id;
-
- rec = page_rec_get_next_const(rec);
- }
-
-#ifdef UNIV_IBUF_DEBUG
- ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
-#endif
-#if 0
- fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
- *n_stored, sum_volumes);
-#endif
- return(sum_volumes);
-}
-
-/*******************************************************************//**
-Get the matching records for space id.
-@return current rec or NULL */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const rec_t*
-ibuf_get_user_rec(
-/*===============*/
- btr_pcur_t* pcur, /*!< in: the current cursor */
- mtr_t* mtr) /*!< in: mini transaction */
-{
- do {
- const rec_t* rec = btr_pcur_get_rec(pcur);
-
- if (page_rec_is_user_rec(rec)) {
- return(rec);
- }
- } while (btr_pcur_move_to_next(pcur, mtr));
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Reads page numbers for a space id from an ibuf tree.
-@return a lower limit for the combined volume of records which will be
-merged */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ulint
-ibuf_get_merge_pages(
-/*=================*/
- btr_pcur_t* pcur, /*!< in/out: cursor */
- ulint space, /*!< in: space for which to merge */
- ulint limit, /*!< in: max page numbers to read */
- ulint* pages, /*!< out: pages read */
- ulint* spaces, /*!< out: spaces read */
- ib_int64_t* versions,/*!< out: space versions read */
- ulint* n_pages,/*!< out: number of pages read */
- mtr_t* mtr) /*!< in: mini transaction */
-{
- const rec_t* rec;
- ulint volume = 0;
- ib_int64_t version = fil_space_get_version(space);
-
- ut_a(space != ULINT_UNDEFINED);
-
- *n_pages = 0;
-
- while ((rec = ibuf_get_user_rec(pcur, mtr)) != 0
- && ibuf_rec_get_space(mtr, rec) == space
- && *n_pages < limit) {
-
- ulint page_no = ibuf_rec_get_page_no(mtr, rec);
-
- if (*n_pages == 0 || pages[*n_pages - 1] != page_no) {
- spaces[*n_pages] = space;
- pages[*n_pages] = page_no;
- versions[*n_pages] = version;
- ++*n_pages;
- }
-
- volume += ibuf_rec_get_volume(mtr, rec);
-
- btr_pcur_move_to_next(pcur, mtr);
- }
-
- return(volume);
-}
-
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages to the buffer pool.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-static
-ulint
-ibuf_merge_pages(
-/*=============*/
- ulint* n_pages, /*!< out: number of pages to which merged */
- bool sync) /*!< in: true if the caller wants to wait for
- the issued read with the highest tablespace
- address to complete */
-{
- mtr_t mtr;
- btr_pcur_t pcur;
- ulint sum_sizes;
- ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
- ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
-
- *n_pages = 0;
-
- ibuf_mtr_start(&mtr);
-
- /* Open a cursor to a randomly chosen leaf of the tree, at a random
- position within the leaf */
-
- btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
-
- ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
-
- if (page_is_empty(btr_pcur_get_page(&pcur))) {
- /* If a B-tree page is empty, it must be the root page
- and the whole B-tree must be empty. InnoDB does not
- allow empty B-tree pages other than the root. */
- ut_ad(ibuf->empty);
- ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
- == IBUF_SPACE_ID);
- ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
- == FSP_IBUF_TREE_ROOT_PAGE_NO);
-
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- return(0);
- }
-
- sum_sizes = ibuf_get_merge_page_nos(TRUE,
- btr_pcur_get_rec(&pcur), &mtr,
- space_ids, space_versions,
- page_nos, n_pages);
-#if 0 /* defined UNIV_IBUF_DEBUG */
- fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
- sync, *n_pages, sum_sizes);
-#endif
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- buf_read_ibuf_merge_pages(
- sync, space_ids, space_versions, page_nos, *n_pages);
-
- return(sum_sizes + 1);
-}
-
-/*********************************************************************//**
-Contracts insert buffer trees by reading pages referring to space_id
-to the buffer pool.
-@returns number of pages merged.*/
-UNIV_INTERN
-ulint
-ibuf_merge_space(
-/*=============*/
- ulint space) /*!< in: tablespace id to merge */
-{
- mtr_t mtr;
- btr_pcur_t pcur;
- mem_heap_t* heap = mem_heap_create(512);
- dtuple_t* tuple = ibuf_search_tuple_build(space, 0, heap);
- ulint n_pages = 0;
-
- ut_ad(space < SRV_LOG_SPACE_FIRST_ID);
-
- ibuf_mtr_start(&mtr);
-
- /* Position the cursor on the first matching record. */
-
- btr_pcur_open(
- ibuf->index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur,
- &mtr);
-
- mem_heap_free(heap);
-
- ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
-
- ulint sum_sizes = 0;
- ulint pages[IBUF_MAX_N_PAGES_MERGED];
- ulint spaces[IBUF_MAX_N_PAGES_MERGED];
- ib_int64_t versions[IBUF_MAX_N_PAGES_MERGED];
-
- if (page_is_empty(btr_pcur_get_page(&pcur))) {
- /* If a B-tree page is empty, it must be the root page
- and the whole B-tree must be empty. InnoDB does not
- allow empty B-tree pages other than the root. */
- ut_ad(ibuf->empty);
- ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
- == IBUF_SPACE_ID);
- ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
- == FSP_IBUF_TREE_ROOT_PAGE_NO);
-
- } else {
-
- sum_sizes = ibuf_get_merge_pages(
- &pcur, space, IBUF_MAX_N_PAGES_MERGED,
- &pages[0], &spaces[0], &versions[0], &n_pages,
- &mtr);
- ib_logf(IB_LOG_LEVEL_INFO,"\n Size of pages merged %lu"
- ,sum_sizes);
- }
-
- ibuf_mtr_commit(&mtr);
-
- btr_pcur_close(&pcur);
-
- if (n_pages > 0) {
-
-#ifdef UNIV_DEBUG
- ut_ad(n_pages <= UT_ARR_SIZE(pages));
-
- for (ulint i = 0; i < n_pages; ++i) {
- ut_ad(spaces[i] == space);
- ut_ad(i == 0 || versions[i] == versions[i - 1]);
- }
-#endif /* UNIV_DEBUG */
-
- buf_read_ibuf_merge_pages(
- true, spaces, versions, pages, n_pages);
- }
-
- return(n_pages);
-}
-
-/** Contract the change buffer by reading pages to the buffer pool.
-@param[out] n_pages number of pages merged
-@param[in] sync whether the caller waits for
-the issued reads to complete
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ulint
-ibuf_merge(
-/*=======*/
- ulint* n_pages, /*!< out: number of pages to
- which merged */
- bool sync) /*!< in: TRUE if the caller
- wants to wait for the issued
- read with the highest
- tablespace address to complete */
-{
- *n_pages = 0;
-
- /* We perform a dirty read of ibuf->empty, without latching
- the insert buffer root page. We trust this dirty read except
- when a slow shutdown is being executed. During a slow
- shutdown, the insert buffer merge must be completed. */
-
- if (ibuf->empty && !srv_shutdown_state) {
- return(0);
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- } else if (ibuf_debug) {
- return(0);
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
- } else {
- return(ibuf_merge_pages(n_pages, sync));
- }
-}
-
-/** Contract the change buffer by reading pages to the buffer pool.
-@param[in] sync whether the caller waits for
-the issued reads to complete
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is empty */
-static
-ulint
-ibuf_contract(
-/*==========*/
- bool sync) /*!< in: TRUE if the caller wants to wait for the
- issued read with the highest tablespace address
- to complete */
-{
- ulint n_pages;
-
- return(ibuf_merge_pages(&n_pages, sync));
-}
-
-/** Contract the change buffer by reading pages to the buffer pool.
-@param[in] full If true, do a full contraction based
-on PCT_IO(100). If false, the size of contract batch is determined
-based on the current size of the change buffer.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
-ibuf_merge_in_background(
-/*=====================*/
- bool full) /*!< in: TRUE if the caller wants to
- do a full contract based on PCT_IO(100).
- If FALSE then the size of contract
- batch is determined based on the
- current size of the ibuf tree. */
-{
- ulint sum_bytes = 0;
- ulint sum_pages = 0;
- ulint n_pag2;
- ulint n_pages;
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- if (srv_ibuf_disable_background_merge) {
- return(0);
- }
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
- if (full) {
- /* Caller has requested a full batch */
- n_pages = PCT_IO(100);
- } else {
- /* By default we do a batch of 5% of the io_capacity */
- n_pages = PCT_IO(5);
-
- mutex_enter(&ibuf_mutex);
-
- /* If the ibuf->size is more than half the max_size
- then we make more agreesive contraction.
- +1 is to avoid division by zero. */
- if (ibuf->size > ibuf->max_size / 2) {
- ulint diff = ibuf->size - ibuf->max_size / 2;
- n_pages += PCT_IO((diff * 100)
- / (ibuf->max_size + 1));
- }
-
- mutex_exit(&ibuf_mutex);
- }
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- if (ibuf_debug) {
- return(0);
- }
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
- while (sum_pages < n_pages) {
- ulint n_bytes;
-
- n_bytes = ibuf_merge(&n_pag2, false);
-
- if (n_bytes == 0) {
- return(sum_bytes);
- }
-
- sum_bytes += n_bytes;
- sum_pages += n_pag2;
-
- srv_inc_activity_count(true);
- }
-
- return(sum_bytes);
-}
-
-/*********************************************************************//**
-Contract insert buffer trees after insert if they are too big. */
-UNIV_INLINE
-void
-ibuf_contract_after_insert(
-/*=======================*/
- ulint entry_size) /*!< in: size of a record which was inserted
- into an ibuf tree */
-{
- ibool sync;
- ulint sum_sizes;
- ulint size;
- ulint max_size;
-
- /* Perform dirty reads of ibuf->size and ibuf->max_size, to
- reduce ibuf_mutex contention. ibuf->max_size remains constant
- after ibuf_init_at_db_start(), but ibuf->size should be
- protected by ibuf_mutex. Given that ibuf->size fits in a
- machine word, this should be OK; at worst we are doing some
- excessive ibuf_contract() or occasionally skipping a
- ibuf_contract(). */
- size = ibuf->size;
- max_size = ibuf->max_size;
-
- if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
- return;
- }
-
- sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
-
- /* Contract at least entry_size many bytes */
- sum_sizes = 0;
- size = 1;
-
- do {
-
- size = ibuf_contract(sync);
- sum_sizes += size;
- } while (size > 0 && sum_sizes < entry_size);
-}
-
-/*********************************************************************//**
-Determine if an insert buffer record has been encountered already.
-@return TRUE if a new record, FALSE if possible duplicate */
-static
-ibool
-ibuf_get_volume_buffered_hash(
-/*==========================*/
- const rec_t* rec, /*!< in: ibuf record in post-4.1 format */
- const byte* types, /*!< in: fields */
- const byte* data, /*!< in: start of user record data */
- ulint comp, /*!< in: 0=ROW_FORMAT=REDUNDANT,
- nonzero=ROW_FORMAT=COMPACT */
- ulint* hash, /*!< in/out: hash array */
- ulint size) /*!< in: number of elements in hash array */
-{
- ulint len;
- ulint fold;
- ulint bitmask;
-
- len = ibuf_rec_get_size(
- rec, types,
- rec_get_n_fields_old(rec) - IBUF_REC_FIELD_USER, comp);
- fold = ut_fold_binary(data, len);
-
- hash += (fold / (CHAR_BIT * sizeof *hash)) % size;
- bitmask = static_cast<ulint>(1) << (fold % (CHAR_BIT * sizeof(*hash)));
-
- if (*hash & bitmask) {
-
- return(FALSE);
- }
-
- /* We have not seen this record yet. Insert it. */
- *hash |= bitmask;
-
- return(TRUE);
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
- ibuf_get_volume_buffered_count_func(mtr,rec,hash,size,n_recs)
-#else /* UNIV_DEBUG */
-# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
- ibuf_get_volume_buffered_count_func(rec,hash,size,n_recs)
-#endif
-/*********************************************************************//**
-Update the estimate of the number of records on a page, and
-get the space taken by merging the buffered record to the index page.
-@return size of index record in bytes + an upper limit of the space
-taken in the page directory */
-static
-ulint
-ibuf_get_volume_buffered_count_func(
-/*================================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction owning rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec, /*!< in: insert buffer record */
- ulint* hash, /*!< in/out: hash array */
- ulint size, /*!< in: number of elements in hash array */
- lint* n_recs) /*!< in/out: estimated number of records
- on the page that rec points to */
-{
- ulint len;
- ibuf_op_t ibuf_op;
- const byte* types;
- ulint n_fields;
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(ibuf_inside(mtr));
-
- n_fields = rec_get_n_fields_old(rec);
- ut_ad(n_fields > IBUF_REC_FIELD_USER);
- n_fields -= IBUF_REC_FIELD_USER;
-
- rec_get_nth_field_offs_old(rec, 1, &len);
- /* This function is only invoked when buffering new
- operations. All pre-4.1 records should have been merged
- when the database was started up. */
- ut_a(len == 1);
-
- if (rec_get_deleted_flag(rec, 0)) {
- /* This record has been merged already,
- but apparently the system crashed before
- the change was discarded from the buffer.
- Pretend that the record does not exist. */
- return(0);
- }
-
- types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
-
- switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
- IBUF_REC_INFO_SIZE)) {
- default:
- ut_error;
- case 0:
- /* This ROW_TYPE=REDUNDANT record does not include an
- operation counter. Exclude it from the *n_recs,
- because deletes cannot be buffered if there are
- old-style inserts buffered for the page. */
-
- len = ibuf_rec_get_size(rec, types, n_fields, 0);
-
- return(len
- + rec_get_converted_extra_size(len, n_fields, 0)
- + page_dir_calc_reserved_space(1));
- case 1:
- /* This ROW_TYPE=COMPACT record does not include an
- operation counter. Exclude it from the *n_recs,
- because deletes cannot be buffered if there are
- old-style inserts buffered for the page. */
- goto get_volume_comp;
-
- case IBUF_REC_INFO_SIZE:
- ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
- break;
- }
-
- switch (ibuf_op) {
- case IBUF_OP_INSERT:
- /* Inserts can be done by updating a delete-marked record.
- Because delete-mark and insert operations can be pointing to
- the same records, we must not count duplicates. */
- case IBUF_OP_DELETE_MARK:
- /* There must be a record to delete-mark.
- See if this record has been already buffered. */
- if (n_recs && ibuf_get_volume_buffered_hash(
- rec, types + IBUF_REC_INFO_SIZE,
- types + len,
- types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT,
- hash, size)) {
- (*n_recs)++;
- }
-
- if (ibuf_op == IBUF_OP_DELETE_MARK) {
- /* Setting the delete-mark flag does not
- affect the available space on the page. */
- return(0);
- }
- break;
- case IBUF_OP_DELETE:
- /* A record will be removed from the page. */
- if (n_recs) {
- (*n_recs)--;
- }
- /* While deleting a record actually frees up space,
- we have to play it safe and pretend that it takes no
- additional space (the record might not exist, etc.). */
- return(0);
- default:
- ut_error;
- }
-
- ut_ad(ibuf_op == IBUF_OP_INSERT);
-
-get_volume_comp:
- {
- dtuple_t* entry;
- ulint volume;
- dict_index_t* dummy_index;
- mem_heap_t* heap = mem_heap_create(500);
-
- entry = ibuf_build_entry_from_ibuf_rec(
- mtr, rec, heap, &dummy_index);
-
- volume = rec_get_converted_size(dummy_index, entry, 0);
-
- ibuf_dummy_index_free(dummy_index);
- mem_heap_free(heap);
-
- return(volume + page_dir_calc_reserved_space(1));
- }
-}
-
-/*********************************************************************//**
-Gets an upper limit for the combined size of entries buffered in the insert
-buffer for a given page.
-@return upper limit for the volume of buffered inserts for the index
-page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span
-several pages in the insert buffer */
-static
-ulint
-ibuf_get_volume_buffered(
-/*=====================*/
- const btr_pcur_t*pcur, /*!< in: pcur positioned at a place in an
- insert buffer tree where we would insert an
- entry for the index page whose number is
- page_no, latch mode has to be BTR_MODIFY_PREV
- or BTR_MODIFY_TREE */
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: page number of an index page */
- lint* n_recs, /*!< in/out: minimum number of records on the
- page after the buffered changes have been
- applied, or NULL to disable the counting */
- mtr_t* mtr) /*!< in: mini-transaction of pcur */
-{
- ulint volume;
- const rec_t* rec;
- const page_t* page;
- ulint prev_page_no;
- const page_t* prev_page;
- ulint next_page_no;
- const page_t* next_page;
- /* bitmap of buffered recs */
- ulint hash_bitmap[128 / sizeof(ulint)];
-
- ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
- || (pcur->latch_mode == BTR_MODIFY_TREE));
-
- /* Count the volume of inserts earlier in the alphabetical order than
- pcur */
-
- volume = 0;
-
- if (n_recs) {
- memset(hash_bitmap, 0, sizeof hash_bitmap);
- }
-
- rec = btr_pcur_get_rec(pcur);
- page = page_align(rec);
- ut_ad(page_validate(page, ibuf->index));
-
- if (page_rec_is_supremum(rec)) {
- rec = page_rec_get_prev_const(rec);
- }
-
- for (; !page_rec_is_infimum(rec);
- rec = page_rec_get_prev_const(rec)) {
- ut_ad(page_align(rec) == page);
-
- if (page_no != ibuf_rec_get_page_no(mtr, rec)
- || space != ibuf_rec_get_space(mtr, rec)) {
-
- goto count_later;
- }
-
- volume += ibuf_get_volume_buffered_count(
- mtr, rec,
- hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
- }
-
- /* Look at the previous page */
-
- prev_page_no = btr_page_get_prev(page, mtr);
-
- if (prev_page_no == FIL_NULL) {
-
- goto count_later;
- }
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH,
- mtr);
-
- buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
-
-
- prev_page = buf_block_get_frame(block);
- ut_ad(page_validate(prev_page, ibuf->index));
- }
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_next(prev_page, mtr) == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- rec = page_get_supremum_rec(prev_page);
- rec = page_rec_get_prev_const(rec);
-
- for (;; rec = page_rec_get_prev_const(rec)) {
- ut_ad(page_align(rec) == prev_page);
-
- if (page_rec_is_infimum(rec)) {
-
- /* We cannot go to yet a previous page, because we
- do not have the x-latch on it, and cannot acquire one
- because of the latching order: we have to give up */
-
- return(UNIV_PAGE_SIZE);
- }
-
- if (page_no != ibuf_rec_get_page_no(mtr, rec)
- || space != ibuf_rec_get_space(mtr, rec)) {
-
- goto count_later;
- }
-
- volume += ibuf_get_volume_buffered_count(
- mtr, rec,
- hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
- }
-
-count_later:
- rec = btr_pcur_get_rec(pcur);
-
- if (!page_rec_is_supremum(rec)) {
- rec = page_rec_get_next_const(rec);
- }
-
- for (; !page_rec_is_supremum(rec);
- rec = page_rec_get_next_const(rec)) {
- if (page_no != ibuf_rec_get_page_no(mtr, rec)
- || space != ibuf_rec_get_space(mtr, rec)) {
-
- return(volume);
- }
-
- volume += ibuf_get_volume_buffered_count(
- mtr, rec,
- hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
- }
-
- /* Look at the next page */
-
- next_page_no = btr_page_get_next(page, mtr);
-
- if (next_page_no == FIL_NULL) {
-
- return(volume);
- }
-
- {
- buf_block_t* block;
-
- block = buf_page_get(
- IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH,
- mtr);
-
- buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
-
-
- next_page = buf_block_get_frame(block);
- ut_ad(page_validate(next_page, ibuf->index));
- }
-
-#ifdef UNIV_BTR_DEBUG
- ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
-#endif /* UNIV_BTR_DEBUG */
-
- rec = page_get_infimum_rec(next_page);
- rec = page_rec_get_next_const(rec);
-
- for (;; rec = page_rec_get_next_const(rec)) {
- ut_ad(page_align(rec) == next_page);
-
- if (page_rec_is_supremum(rec)) {
-
- /* We give up */
-
- return(UNIV_PAGE_SIZE);
- }
-
- if (page_no != ibuf_rec_get_page_no(mtr, rec)
- || space != ibuf_rec_get_space(mtr, rec)) {
-
- return(volume);
- }
-
- volume += ibuf_get_volume_buffered_count(
- mtr, rec,
- hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
- }
-}
-
-/*********************************************************************//**
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-UNIV_INTERN
-void
-ibuf_update_max_tablespace_id(void)
-/*===============================*/
-{
- ulint max_space_id;
- const rec_t* rec;
- const byte* field;
- ulint len;
- btr_pcur_t pcur;
- mtr_t mtr;
-
- ut_a(!dict_table_is_comp(ibuf->index->table));
-
- ibuf_mtr_start(&mtr);
-
- btr_pcur_open_at_index_side(
- false, ibuf->index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
-
- ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
-
- btr_pcur_move_to_prev(&pcur, &mtr);
-
- if (btr_pcur_is_before_first_on_page(&pcur)) {
- /* The tree is empty */
-
- max_space_id = 0;
- } else {
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
-
- ut_a(len == 4);
-
- max_space_id = mach_read_from_4(field);
- }
-
- ibuf_mtr_commit(&mtr);
-
- /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
-
- fil_set_max_space_id_if_bigger(max_space_id);
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \
- ibuf_get_entry_counter_low_func(mtr,rec,space,page_no)
-#else /* UNIV_DEBUG */
-# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \
- ibuf_get_entry_counter_low_func(rec,space,page_no)
-#endif
-/****************************************************************//**
-Helper function for ibuf_get_entry_counter_func. Checks if rec is for
-(space, page_no), and if so, reads counter value from it and returns
-that + 1.
-@retval ULINT_UNDEFINED if the record does not contain any counter
-@retval 0 if the record is not for (space, page_no)
-@retval 1 + previous counter value, otherwise */
-static
-ulint
-ibuf_get_entry_counter_low_func(
-/*============================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction of rec */
-#endif /* UNIV_DEBUG */
- const rec_t* rec, /*!< in: insert buffer record */
- ulint space, /*!< in: space id */
- ulint page_no) /*!< in: page number */
-{
- ulint counter;
- const byte* field;
- ulint len;
-
- ut_ad(ibuf_inside(mtr));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
- ut_ad(rec_get_n_fields_old(rec) > 2);
-
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
-
- ut_a(len == 1);
-
- /* Check the tablespace identifier. */
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
-
- ut_a(len == 4);
-
- if (mach_read_from_4(field) != space) {
-
- return(0);
- }
-
- /* Check the page offset. */
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
- ut_a(len == 4);
-
- if (mach_read_from_4(field) != page_no) {
-
- return(0);
- }
-
- /* Check if the record contains a counter field. */
- field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
-
- switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
- default:
- ut_error;
- case 0: /* ROW_FORMAT=REDUNDANT */
- case 1: /* ROW_FORMAT=COMPACT */
- return(ULINT_UNDEFINED);
-
- case IBUF_REC_INFO_SIZE:
- counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
- ut_a(counter < 0xFFFF);
- return(counter + 1);
- }
-}
-
-#ifdef UNIV_DEBUG
-# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
- ibuf_get_entry_counter_func(space,page_no,rec,mtr,exact_leaf)
-#else /* UNIV_DEBUG */
-# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
- ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf)
-#endif
-
-/****************************************************************//**
-Calculate the counter field for an entry based on the current
-last record in ibuf for (space, page_no).
-@return the counter field, or ULINT_UNDEFINED
-if we should abort this insertion to ibuf */
-static
-ulint
-ibuf_get_entry_counter_func(
-/*========================*/
- ulint space, /*!< in: space id of entry */
- ulint page_no, /*!< in: page number of entry */
- const rec_t* rec, /*!< in: the record preceding the
- insertion point */
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in: mini-transaction */
-#endif /* UNIV_DEBUG */
- ibool only_leaf) /*!< in: TRUE if this is the only
- leaf page that can contain entries
- for (space,page_no), that is, there
- was no exact match for (space,page_no)
- in the node pointer */
-{
- ut_ad(ibuf_inside(mtr));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_validate(page_align(rec), ibuf->index));
-
- if (page_rec_is_supremum(rec)) {
- /* This is just for safety. The record should be a
- page infimum or a user record. */
- ut_ad(0);
- return(ULINT_UNDEFINED);
- } else if (!page_rec_is_infimum(rec)) {
- return(ibuf_get_entry_counter_low(mtr, rec, space, page_no));
- } else if (only_leaf
- || fil_page_get_prev(page_align(rec)) == FIL_NULL) {
- /* The parent node pointer did not contain the
- searched for (space, page_no), which means that the
- search ended on the correct page regardless of the
- counter value, and since we're at the infimum record,
- there are no existing records. */
- return(0);
- } else {
- /* We used to read the previous page here. It would
- break the latching order, because the caller has
- buffer-fixed an insert buffer bitmap page. */
- return(ULINT_UNDEFINED);
- }
-}
-
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
-directly to the disk page, if this is possible.
-@return DB_SUCCESS, DB_STRONG_FAIL or other error */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-ibuf_insert_low(
-/*============*/
- ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
- ibuf_op_t op, /*!< in: operation type */
- ibool no_counter,
- /*!< in: TRUE=use 5.0.3 format;
- FALSE=allow delete buffering */
- const dtuple_t* entry, /*!< in: index entry to insert */
- ulint entry_size,
- /*!< in: rec_get_converted_size(index, entry) */
- dict_index_t* index, /*!< in: index where to insert; must not be
- unique or clustered */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr) /*!< in: query thread */
-{
- big_rec_t* dummy_big_rec;
- btr_pcur_t pcur;
- btr_cur_t* cursor;
- dtuple_t* ibuf_entry;
- mem_heap_t* offsets_heap = NULL;
- mem_heap_t* heap;
- ulint* offsets = NULL;
- ulint buffered;
- lint min_n_recs;
- rec_t* ins_rec;
- ibool old_bit_value;
- page_t* bitmap_page;
- buf_block_t* block;
- page_t* root;
- dberr_t err;
- ibool do_merge;
- ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
- ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
- ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
- ulint n_stored;
- mtr_t mtr;
- mtr_t bitmap_mtr;
-
- ut_a(!dict_index_is_clust(index));
- ut_ad(dtuple_check_typed(entry));
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(!no_counter || op == IBUF_OP_INSERT);
- ut_a(op < IBUF_OP_COUNT);
-
- ut_ad(!(thr_get_trx(thr)->fake_changes));
-
- do_merge = FALSE;
-
- /* Perform dirty reads of ibuf->size and ibuf->max_size, to
- reduce ibuf_mutex contention. Given that ibuf->max_size and
- ibuf->size fit in a machine word, this should be OK; at worst
- we are doing some excessive ibuf_contract() or occasionally
- skipping an ibuf_contract(). */
- if (ibuf->max_size == 0) {
- return(DB_STRONG_FAIL);
- }
-
- if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
- /* Insert buffer is now too big, contract it but do not try
- to insert */
-
-
-#ifdef UNIV_IBUF_DEBUG
- fputs("Ibuf too big\n", stderr);
-#endif
- ibuf_contract(true);
-
- return(DB_STRONG_FAIL);
- }
-
- heap = mem_heap_create(1024);
-
- /* Build the entry which contains the space id and the page number
- as the first fields and the type information for other fields, and
- which will be inserted to the insert buffer. Using a counter value
- of 0xFFFF we find the last record for (space, page_no), from which
- we can then read the counter value N and use N + 1 in the record we
- insert. (We patch the ibuf_entry's counter field to the correct
- value just before actually inserting the entry.) */
-
- ibuf_entry = ibuf_entry_build(
- op, index, entry, space, page_no,
- no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
-
- /* Open a cursor to the insert buffer tree to calculate if we can add
- the new entry to it without exceeding the free space limit for the
- page. */
-
- if (mode == BTR_MODIFY_TREE) {
- for (;;) {
- mutex_enter(&ibuf_pessimistic_insert_mutex);
- mutex_enter(&ibuf_mutex);
-
- if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
-
- break;
- }
-
- mutex_exit(&ibuf_mutex);
- mutex_exit(&ibuf_pessimistic_insert_mutex);
-
- if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
-
- mem_heap_free(heap);
- return(DB_STRONG_FAIL);
- }
- }
- }
-
- ibuf_mtr_start(&mtr);
-
- btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
- ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
-
- /* Find out the volume of already buffered inserts for the same index
- page */
- min_n_recs = 0;
- buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
- op == IBUF_OP_DELETE
- ? &min_n_recs
- : NULL, &mtr);
-
- if (op == IBUF_OP_DELETE
- && (min_n_recs < 2
- || buf_pool_watch_occurred(space, page_no))) {
- /* The page could become empty after the record is
- deleted, or the page has been read in to the buffer
- pool. Refuse to buffer the operation. */
-
- /* The buffer pool watch is needed for IBUF_OP_DELETE
- because of latching order considerations. We can
- check buf_pool_watch_occurred() only after latching
- the insert buffer B-tree pages that contain buffered
- changes for the page. We never buffer IBUF_OP_DELETE,
- unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
- been previously buffered for the page. Because there
- are buffered operations for the page, the insert
- buffer B-tree page latches held by mtr will guarantee
- that no changes for the user page will be merged
- before mtr_commit(&mtr). We must not mtr_commit(&mtr)
- until after the IBUF_OP_DELETE has been buffered. */
-
-fail_exit:
- if (mode == BTR_MODIFY_TREE) {
- mutex_exit(&ibuf_mutex);
- mutex_exit(&ibuf_pessimistic_insert_mutex);
- }
-
- err = DB_STRONG_FAIL;
- goto func_exit;
- }
-
- /* After this point, the page could still be loaded to the
- buffer pool, but we do not have to care about it, since we are
- holding a latch on the insert buffer leaf page that contains
- buffered changes for (space, page_no). If the page enters the
- buffer pool, buf_page_io_complete() for (space, page_no) will
- have to acquire a latch on the same insert buffer leaf page,
- which it cannot do until we have buffered the IBUF_OP_DELETE
- and done mtr_commit(&mtr) to release the latch. */
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a((buffered == 0) || ibuf_count_get(space, page_no));
-#endif
- ibuf_mtr_start(&bitmap_mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
- zip_size, &bitmap_mtr);
-
- /* We check if the index page is suitable for buffered entries */
-
- if (buf_page_peek(space, page_no)
- || lock_rec_expl_exist_on_page(space, page_no)) {
-
- ibuf_mtr_commit(&bitmap_mtr);
- goto fail_exit;
- }
-
- if (op == IBUF_OP_INSERT) {
- ulint bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
- &bitmap_mtr);
-
- if (buffered + entry_size + page_dir_calc_reserved_space(1)
- > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
- /* Release the bitmap page latch early. */
- ibuf_mtr_commit(&bitmap_mtr);
-
- /* It may not fit */
- do_merge = TRUE;
-
- ibuf_get_merge_page_nos(FALSE,
- btr_pcur_get_rec(&pcur), &mtr,
- space_ids, space_versions,
- page_nos, &n_stored);
-
- goto fail_exit;
- }
- }
-
- if (!no_counter) {
- /* Patch correct counter value to the entry to
- insert. This can change the insert position, which can
- result in the need to abort in some cases. */
- ulint counter = ibuf_get_entry_counter(
- space, page_no, btr_pcur_get_rec(&pcur), &mtr,
- btr_pcur_get_btr_cur(&pcur)->low_match
- < IBUF_REC_FIELD_METADATA);
- dfield_t* field;
-
- if (counter == ULINT_UNDEFINED) {
- ibuf_mtr_commit(&bitmap_mtr);
- goto fail_exit;
- }
-
- field = dtuple_get_nth_field(
- ibuf_entry, IBUF_REC_FIELD_METADATA);
- mach_write_to_2(
- (byte*) dfield_get_data(field)
- + IBUF_REC_OFFSET_COUNTER, counter);
- }
-
- /* Set the bitmap bit denoting that the insert buffer contains
- buffered entries for this index page, if the bit is not set yet */
-
- old_bit_value = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_BUFFERED, &bitmap_mtr);
-
- if (!old_bit_value) {
- ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_BUFFERED, TRUE,
- &bitmap_mtr);
- }
-
- ibuf_mtr_commit(&bitmap_mtr);
-
- cursor = btr_pcur_get_btr_cur(&pcur);
-
- if (mode == BTR_MODIFY_PREV) {
- err = btr_cur_optimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
- cursor, &offsets, &offsets_heap,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
- block = btr_cur_get_block(cursor);
- ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
-
- /* If this is the root page, update ibuf->empty. */
- if (UNIV_UNLIKELY(buf_block_get_page_no(block)
- == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
- const page_t* root = buf_block_get_frame(block);
-
- ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
- ut_ad(page_get_page_no(root)
- == FSP_IBUF_TREE_ROOT_PAGE_NO);
-
- ibuf->empty = page_is_empty(root);
- }
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- /* We acquire an x-latch to the root page before the insert,
- because a pessimistic insert releases the tree x-latch,
- which would cause the x-latching of the root after that to
- break the latching order. */
-
- root = ibuf_tree_root_get(&mtr);
-
- err = btr_cur_optimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
- cursor, &offsets, &offsets_heap,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
-
- if (err == DB_FAIL) {
- err = btr_cur_pessimistic_insert(
- BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
- cursor, &offsets, &offsets_heap,
- ibuf_entry, &ins_rec,
- &dummy_big_rec, 0, thr, &mtr);
- }
-
- mutex_exit(&ibuf_pessimistic_insert_mutex);
- ibuf_size_update(root, &mtr);
- mutex_exit(&ibuf_mutex);
- ibuf->empty = page_is_empty(root);
-
- block = btr_cur_get_block(cursor);
- ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
- }
-
- if (offsets_heap) {
- mem_heap_free(offsets_heap);
- }
-
- if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
- /* Update the page max trx id field */
- page_update_max_trx_id(block, NULL,
- thr_get_trx(thr)->id, &mtr);
- }
-
-func_exit:
-#ifdef UNIV_IBUF_COUNT_DEBUG
- if (err == DB_SUCCESS) {
- fprintf(stderr,
- "Incrementing ibuf count of space %lu page %lu\n"
- "from %lu by 1\n", space, page_no,
- ibuf_count_get(space, page_no));
-
- ibuf_count_set(space, page_no,
- ibuf_count_get(space, page_no) + 1);
- }
-#endif
-
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- mem_heap_free(heap);
-
- if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
- ibuf_contract_after_insert(entry_size);
- }
-
- if (do_merge) {
-#ifdef UNIV_IBUF_DEBUG
- ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
-#endif
- buf_read_ibuf_merge_pages(false, space_ids, space_versions,
- page_nos, n_stored);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
-directly to the disk page, if this is possible. Does not do it if the index
-is clustered or unique.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-ibuf_insert(
-/*========*/
- ibuf_op_t op, /*!< in: operation type */
- const dtuple_t* entry, /*!< in: index entry to insert */
- dict_index_t* index, /*!< in: index where to insert */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- ulint entry_size;
- ibool no_counter;
- /* Read the settable global variable ibuf_use only once in
- this function, so that we will have a consistent view of it. */
- ibuf_use_t use = ibuf_use;
- DBUG_ENTER("ibuf_insert");
-
- DBUG_PRINT("ibuf", ("op: %d, space: %ld, page_no: %ld",
- op, space, page_no));
-
- ut_ad(dtuple_check_typed(entry));
- ut_ad(ut_is_2pow(zip_size));
-
- ut_a(!dict_index_is_clust(index));
-
- no_counter = use <= IBUF_USE_INSERT;
-
- switch (op) {
- case IBUF_OP_INSERT:
- switch (use) {
- case IBUF_USE_NONE:
- case IBUF_USE_DELETE:
- case IBUF_USE_DELETE_MARK:
- DBUG_RETURN(FALSE);
- case IBUF_USE_INSERT:
- case IBUF_USE_INSERT_DELETE_MARK:
- case IBUF_USE_ALL:
- goto check_watch;
- case IBUF_USE_COUNT:
- break;
- }
- break;
- case IBUF_OP_DELETE_MARK:
- switch (use) {
- case IBUF_USE_NONE:
- case IBUF_USE_INSERT:
- DBUG_RETURN(FALSE);
- case IBUF_USE_DELETE_MARK:
- case IBUF_USE_DELETE:
- case IBUF_USE_INSERT_DELETE_MARK:
- case IBUF_USE_ALL:
- ut_ad(!no_counter);
- goto check_watch;
- case IBUF_USE_COUNT:
- break;
- }
- break;
- case IBUF_OP_DELETE:
- switch (use) {
- case IBUF_USE_NONE:
- case IBUF_USE_INSERT:
- case IBUF_USE_INSERT_DELETE_MARK:
- DBUG_RETURN(FALSE);
- case IBUF_USE_DELETE_MARK:
- case IBUF_USE_DELETE:
- case IBUF_USE_ALL:
- ut_ad(!no_counter);
- goto skip_watch;
- case IBUF_USE_COUNT:
- break;
- }
- break;
- case IBUF_OP_COUNT:
- break;
- }
-
- /* unknown op or use */
- ut_error;
-
-check_watch:
- /* If a thread attempts to buffer an insert on a page while a
- purge is in progress on the same page, the purge must not be
- buffered, because it could remove a record that was
- re-inserted later. For simplicity, we block the buffering of
- all operations on a page that has a purge pending.
-
- We do not check this in the IBUF_OP_DELETE case, because that
- would always trigger the buffer pool watch during purge and
- thus prevent the buffering of delete operations. We assume
- that the issuer of IBUF_OP_DELETE has called
- buf_pool_watch_set(space, page_no). */
-
- {
- buf_page_t* bpage;
- buf_pool_t* buf_pool = buf_pool_get(space, page_no);
- bpage = buf_page_get_also_watch(buf_pool, space, page_no);
-
- if (UNIV_LIKELY_NULL(bpage)) {
- /* A buffer pool watch has been set or the
- page has been read into the buffer pool.
- Do not buffer the request. If a purge operation
- is being buffered, have this request executed
- directly on the page in the buffer pool after the
- buffered entries for this page have been merged. */
- DBUG_RETURN(FALSE);
- }
- }
-
-skip_watch:
- entry_size = rec_get_converted_size(index, entry, 0);
-
- if (entry_size
- >= page_get_free_space_of_empty(dict_table_is_comp(index->table))
- / 2) {
-
- DBUG_RETURN(FALSE);
- }
-
- err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
- entry, entry_size,
- index, space, zip_size, page_no, thr);
- if (err == DB_FAIL) {
- err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
- entry, entry_size,
- index, space, zip_size, page_no, thr);
- }
-
- if (err == DB_SUCCESS) {
-#ifdef UNIV_IBUF_DEBUG
- /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
- page_no, index->name); */
-#endif
- DBUG_RETURN(TRUE);
-
- } else {
- ut_a(err == DB_STRONG_FAIL || err == DB_TOO_BIG_RECORD);
-
- DBUG_RETURN(FALSE);
- }
-}
-
-/********************************************************************//**
-During merge, inserts to an index page a secondary index entry extracted
-from the insert buffer.
-@return newly inserted record */
-static MY_ATTRIBUTE((nonnull))
-rec_t*
-ibuf_insert_to_index_page_low(
-/*==========================*/
- const dtuple_t* entry, /*!< in: buffered entry to insert */
- buf_block_t* block, /*!< in/out: index page where the buffered
- entry should be placed */
- dict_index_t* index, /*!< in: record descriptor */
- ulint** offsets,/*!< out: offsets on *rec */
- mem_heap_t* heap, /*!< in/out: memory heap */
- mtr_t* mtr, /*!< in/out: mtr */
- page_cur_t* page_cur)/*!< in/out: cursor positioned on the record
- after which to insert the buffered entry */
-{
- const page_t* page;
- ulint space;
- ulint page_no;
- ulint zip_size;
- const page_t* bitmap_page;
- ulint old_bits;
- rec_t* rec;
- DBUG_ENTER("ibuf_insert_to_index_page_low");
-
- rec = page_cur_tuple_insert(page_cur, entry, index,
- offsets, &heap, 0, mtr);
- if (rec != NULL) {
- DBUG_RETURN(rec);
- }
-
- /* Page reorganization or recompression should already have
- been attempted by page_cur_tuple_insert(). Besides, per
- ibuf_index_page_calc_free_zip() the page should not have been
- recompressed or reorganized. */
- ut_ad(!buf_block_get_page_zip(block));
-
- /* If the record did not fit, reorganize */
-
- btr_page_reorganize(page_cur, index, mtr);
-
- /* This time the record must fit */
-
- rec = page_cur_tuple_insert(page_cur, entry, index,
- offsets, &heap, 0, mtr);
- if (rec != NULL) {
- DBUG_RETURN(rec);
- }
-
- page = buf_block_get_frame(block);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Insert buffer insert fails;"
- " page free %lu, dtuple size %lu\n",
- (ulong) page_get_max_insert_size(page, 1),
- (ulong) rec_get_converted_size(index, entry, 0));
- fputs("InnoDB: Cannot insert index record ", stderr);
- dtuple_print(stderr, entry);
- fputs("\nInnoDB: The table where this index record belongs\n"
- "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
- "InnoDB: that table.\n", stderr);
-
- space = page_get_space_id(page);
- zip_size = buf_block_get_zip_size(block);
- page_no = page_get_page_no(page);
-
- bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
- old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, mtr);
-
- fprintf(stderr,
- "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
- (ulong) space, (ulong) page_no,
- (ulong) zip_size, (ulong) old_bits);
-
- fputs("InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- ut_ad(0);
- DBUG_RETURN(NULL);
-}
-
-/************************************************************************
-During merge, inserts to an index page a secondary index entry extracted
-from the insert buffer. */
-static
-void
-ibuf_insert_to_index_page(
-/*======================*/
- const dtuple_t* entry, /*!< in: buffered entry to insert */
- buf_block_t* block, /*!< in/out: index page where the buffered entry
- should be placed */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t page_cur;
- ulint low_match;
- page_t* page = buf_block_get_frame(block);
- rec_t* rec;
- ulint* offsets;
- mem_heap_t* heap;
-
- DBUG_ENTER("ibuf_insert_to_index_page");
-
- DBUG_PRINT("ibuf", ("page_no: %ld", buf_block_get_page_no(block)));
- DBUG_PRINT("ibuf", ("index name: %s", index->name));
- DBUG_PRINT("ibuf", ("online status: %d",
- dict_index_get_online_status(index)));
-
- ut_ad(ibuf_inside(mtr));
- ut_ad(dtuple_check_typed(entry));
- ut_ad(!buf_block_align(page)->index);
-
- if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
- != (ibool)!!page_is_comp(page))) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the 'compact' flag does not match!\n",
- stderr);
- goto dump;
- }
-
- rec = page_rec_get_next(page_get_infimum_rec(page));
-
- if (page_rec_is_supremum(rec)) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the index page is empty!\n",
- stderr);
- goto dump;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
- != dtuple_get_n_fields(entry))) {
- fputs("InnoDB: Trying to insert a record from"
- " the insert buffer to an index page\n"
- "InnoDB: but the number of fields does not match!\n",
- stderr);
-dump:
- buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
-
- dtuple_print(stderr, entry);
- ut_ad(0);
-
- fputs("InnoDB: The table where where"
- " this index record belongs\n"
- "InnoDB: is now probably corrupt."
- " Please run CHECK TABLE on\n"
- "InnoDB: your tables.\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com!\n", stderr);
-
- DBUG_VOID_RETURN;
- }
-
- low_match = page_cur_search(block, index, entry,
- PAGE_CUR_LE, &page_cur);
-
- heap = mem_heap_create(
- sizeof(upd_t)
- + REC_OFFS_HEADER_SIZE * sizeof(*offsets)
- + dtuple_get_n_fields(entry)
- * (sizeof(upd_field_t) + sizeof *offsets));
-
- if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
- upd_t* update;
- page_zip_des_t* page_zip;
-
- rec = page_cur_get_rec(&page_cur);
-
- /* This is based on
- row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
- ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
-
- offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
- &heap);
- update = row_upd_build_sec_rec_difference_binary(
- rec, index, offsets, entry, heap);
-
- page_zip = buf_block_get_page_zip(block);
-
- if (update->n_fields == 0) {
- /* The records only differ in the delete-mark.
- Clear the delete-mark, like we did before
- Bug #56680 was fixed. */
- btr_cur_set_deleted_flag_for_ibuf(
- rec, page_zip, FALSE, mtr);
- goto updated_in_place;
- }
-
- /* Copy the info bits. Clear the delete-mark. */
- update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
- update->info_bits &= ~REC_INFO_DELETED_FLAG;
-
- /* We cannot invoke btr_cur_optimistic_update() here,
- because we do not have a btr_cur_t or que_thr_t,
- as the insert buffer merge occurs at a very low level. */
- if (!row_upd_changes_field_size_or_external(index, offsets,
- update)
- && (!page_zip || btr_cur_update_alloc_zip(
- page_zip, &page_cur, index, offsets,
- rec_offs_size(offsets), false, mtr, NULL))) {
- /* This is the easy case. Do something similar
- to btr_cur_update_in_place(). */
- rec = page_cur_get_rec(&page_cur);
- row_upd_rec_in_place(rec, index, offsets,
- update, page_zip);
-
- /* Log the update in place operation. During recovery
- MLOG_COMP_REC_UPDATE_IN_PLACE/MLOG_REC_UPDATE_IN_PLACE
- expects trx_id, roll_ptr for secondary indexes. So we
- just write dummy trx_id(0), roll_ptr(0) */
- btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec,
- index, update, 0, 0, mtr);
- DBUG_EXECUTE_IF(
- "crash_after_log_ibuf_upd_inplace",
- log_buffer_flush_to_disk();
- ib_logf(IB_LOG_LEVEL_INFO,
- "Wrote log record for ibuf update in "
- "place operation");
- DBUG_SUICIDE();
- );
-
- goto updated_in_place;
- }
-
- /* btr_cur_update_alloc_zip() may have changed this */
- rec = page_cur_get_rec(&page_cur);
-
- /* A collation may identify values that differ in
- storage length.
- Some examples (1 or 2 bytes):
- utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
- utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
- utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
-
- latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
-
- Examples of a character (3-byte UTF-8 sequence)
- identified with 2 or 4 characters (1-byte UTF-8 sequences):
-
- utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
- utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
- */
-
- /* Delete the different-length record, and insert the
- buffered one. */
-
- lock_rec_store_on_page_infimum(block, rec);
- page_cur_delete_rec(&page_cur, index, offsets, mtr);
- page_cur_move_to_prev(&page_cur);
- rec = ibuf_insert_to_index_page_low(entry, block, index,
- &offsets, heap, mtr,
- &page_cur);
-
- ut_ad(!cmp_dtuple_rec(entry, rec, offsets));
- lock_rec_restore_from_page_infimum(block, rec, block);
- } else {
- offsets = NULL;
- ibuf_insert_to_index_page_low(entry, block, index,
- &offsets, heap, mtr,
- &page_cur);
- }
-updated_in_place:
- mem_heap_free(heap);
-
- DBUG_VOID_RETURN;
-}
-
-/****************************************************************//**
-During merge, sets the delete mark on a record for a secondary index
-entry. */
-static
-void
-ibuf_set_del_mark(
-/*==============*/
- const dtuple_t* entry, /*!< in: entry */
- buf_block_t* block, /*!< in/out: block */
- const dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t page_cur;
- ulint low_match;
-
- ut_ad(ibuf_inside(mtr));
- ut_ad(dtuple_check_typed(entry));
-
- low_match = page_cur_search(
- block, index, entry, PAGE_CUR_LE, &page_cur);
-
- if (low_match == dtuple_get_n_fields(entry)) {
- rec_t* rec;
- page_zip_des_t* page_zip;
-
- rec = page_cur_get_rec(&page_cur);
- page_zip = page_cur_get_page_zip(&page_cur);
-
- /* Delete mark the old index record. According to a
- comment in row_upd_sec_index_entry(), it can already
- have been delete marked if a lock wait occurred in
- row_ins_sec_index_entry() in a previous invocation of
- row_upd_sec_index_entry(). */
-
- if (UNIV_LIKELY
- (!rec_get_deleted_flag(
- rec, dict_table_is_comp(index->table)))) {
- btr_cur_set_deleted_flag_for_ibuf(rec, page_zip,
- TRUE, mtr);
- }
- } else {
- const page_t* page
- = page_cur_get_page(&page_cur);
- const buf_block_t* block
- = page_cur_get_block(&page_cur);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: unable to find a record to delete-mark\n",
- stderr);
- fputs("InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, page_cur_get_rec(&page_cur), index);
- fprintf(stderr, "\nspace %u offset %u"
- " (%u records, index id %llu)\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- (unsigned) buf_block_get_space(block),
- (unsigned) buf_block_get_page_no(block),
- (unsigned) page_get_n_recs(page),
- (ulonglong) btr_page_get_index_id(page));
- ut_ad(0);
- }
-}
-
-/****************************************************************//**
-During merge, delete a record for a secondary index entry. */
-static
-void
-ibuf_delete(
-/*========*/
- const dtuple_t* entry, /*!< in: entry */
- buf_block_t* block, /*!< in/out: block */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in/out: mtr; must be committed
- before latching any further pages */
-{
- page_cur_t page_cur;
- ulint low_match;
-
- ut_ad(ibuf_inside(mtr));
- ut_ad(dtuple_check_typed(entry));
-
- low_match = page_cur_search(
- block, index, entry, PAGE_CUR_LE, &page_cur);
-
- if (low_match == dtuple_get_n_fields(entry)) {
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
- page_t* page = buf_block_get_frame(block);
- rec_t* rec = page_cur_get_rec(&page_cur);
-
- /* TODO: the below should probably be a separate function,
- it's a bastardized version of btr_cur_optimistic_delete. */
-
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- mem_heap_t* heap = NULL;
- ulint max_ins_size = 0;
-
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (page_get_n_recs(page) <= 1
- || !(REC_INFO_DELETED_FLAG
- & rec_get_info_bits(rec, page_is_comp(page)))) {
- /* Refuse to purge the last record or a
- record that has not been marked for deletion. */
- ut_print_timestamp(stderr);
- fputs(" InnoDB: unable to purge a record\n",
- stderr);
- fputs("InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- fprintf(stderr, "\nspace %u offset %u"
- " (%u records, index id %llu)\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- (unsigned) buf_block_get_space(block),
- (unsigned) buf_block_get_page_no(block),
- (unsigned) page_get_n_recs(page),
- (ulonglong) btr_page_get_index_id(page));
-
- ut_ad(0);
- return;
- }
-
- lock_update_delete(block, rec);
-
- if (!page_zip) {
- max_ins_size
- = page_get_max_insert_size_after_reorganize(
- page, 1);
- }
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- page_cur_delete_rec(&page_cur, index, offsets, mtr);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (page_zip) {
- ibuf_update_free_bits_zip(block, mtr);
- } else {
- ibuf_update_free_bits_low(block, max_ins_size, mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- } else {
- /* The record must have been purged already. */
- }
-}
-
-/*********************************************************************//**
-Restores insert buffer tree cursor position
-@return TRUE if the position was restored; FALSE if not */
-static MY_ATTRIBUTE((nonnull))
-ibool
-ibuf_restore_pos(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number where the record
- should belong */
- const dtuple_t* search_tuple,
- /*!< in: search tuple for entries of page_no */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor whose
- position is to be restored */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
-
- if (btr_pcur_restore_position(mode, pcur, mtr)) {
-
- return(TRUE);
- }
-
- if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
- /* The tablespace has been dropped. It is possible
- that another thread has deleted the insert buffer
- entry. Do not complain. */
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
- } else {
- fprintf(stderr,
- "InnoDB: ERROR: Submit the output to"
- " http://bugs.mysql.com\n"
- "InnoDB: ibuf cursor restoration fails!\n"
- "InnoDB: ibuf record inserted to page %lu:%lu\n",
- (ulong) space, (ulong) page_no);
- fflush(stderr);
-
- rec_print_old(stderr, btr_pcur_get_rec(pcur));
- rec_print_old(stderr, pcur->old_rec);
- dtuple_print(stderr, search_tuple);
-
- rec_print_old(stderr,
- page_rec_get_next(btr_pcur_get_rec(pcur)));
- fflush(stderr);
-
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
- ut_ad(0);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Deletes from ibuf the record on which pcur is positioned. If we have to
-resort to a pessimistic delete, this function commits mtr and closes
-the cursor.
-@return TRUE if mtr was committed and pcur closed in this operation */
-static MY_ATTRIBUTE((warn_unused_result))
-ibool
-ibuf_delete_rec(
-/*============*/
- ulint space, /*!< in: space id */
- ulint page_no,/*!< in: index page number that the record
- should belong to */
- btr_pcur_t* pcur, /*!< in: pcur positioned on the record to
- delete, having latch mode BTR_MODIFY_LEAF */
- const dtuple_t* search_tuple,
- /*!< in: search tuple for entries of page_no */
- mtr_t* mtr) /*!< in: mtr */
-{
- ibool success;
- page_t* root;
- dberr_t err;
-
- ut_ad(ibuf_inside(mtr));
- ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
- ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
- ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
- if (ibuf_debug == 2) {
- /* Inject a fault (crash). We do this before trying
- optimistic delete, because a pessimistic delete in the
- change buffer would require a larger test case. */
-
- /* Flag the buffered record as processed, to avoid
- an assertion failure after crash recovery. */
- btr_cur_set_deleted_flag_for_ibuf(
- btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
- ibuf_mtr_commit(mtr);
- log_write_up_to(LSN_MAX, LOG_WAIT_ALL_GROUPS, TRUE);
- DBUG_SUICIDE();
- }
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
- success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur),
- 0, mtr);
-
- if (success) {
- if (page_is_empty(btr_pcur_get_page(pcur))) {
- /* If a B-tree page is empty, it must be the root page
- and the whole B-tree must be empty. InnoDB does not
- allow empty B-tree pages other than the root. */
- root = btr_pcur_get_page(pcur);
-
- ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
- ut_ad(page_get_page_no(root)
- == FSP_IBUF_TREE_ROOT_PAGE_NO);
-
- /* ibuf->empty is protected by the root page latch.
- Before the deletion, it had to be FALSE. */
- ut_ad(!ibuf->empty);
- ibuf->empty = true;
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- fprintf(stderr,
- "Decrementing ibuf count of space %lu page %lu\n"
- "from %lu by 1\n", space, page_no,
- ibuf_count_get(space, page_no));
- ibuf_count_set(space, page_no,
- ibuf_count_get(space, page_no) - 1);
-#endif
- return(FALSE);
- }
-
- ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
- ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
- ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
-
- /* We have to resort to a pessimistic delete from ibuf.
- Delete-mark the record so that it will not be applied again,
- in case the server crashes before the pessimistic delete is
- made persistent. */
- btr_cur_set_deleted_flag_for_ibuf(
- btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
-
- btr_pcur_store_position(pcur, mtr);
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
-
- ibuf_mtr_start(mtr);
- mutex_enter(&ibuf_mutex);
-
- if (!ibuf_restore_pos(space, page_no, search_tuple,
- BTR_MODIFY_TREE, pcur, mtr)) {
-
- mutex_exit(&ibuf_mutex);
- ut_ad(mtr->state == MTR_COMMITTED);
- goto func_exit;
- }
-
- root = ibuf_tree_root_get(mtr);
-
- btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0,
- RB_NONE, mtr);
- ut_a(err == DB_SUCCESS);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
-#endif
- ibuf_size_update(root, mtr);
- mutex_exit(&ibuf_mutex);
-
- ibuf->empty = page_is_empty(root);
- ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
-
-func_exit:
- ut_ad(mtr->state == MTR_COMMITTED);
- btr_pcur_close(pcur);
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
-applies any buffered operations to the page and deletes the entries from the
-insert buffer. If the page is not read, but created in the buffer pool, this
-function deletes its buffered entries from the insert buffer; there can
-exist entries for such a page if the page belonged to an index which
-subsequently was dropped. */
-UNIV_INTERN
-void
-ibuf_merge_or_delete_for_page(
-/*==========================*/
- buf_block_t* block, /*!< in: if page has been read from
- disk, pointer to the page x-latched,
- else NULL */
- ulint space_id,/*!< in: space id of the index page */
- ulint page_no,/*!< in: page number of the index page */
- ulint zip_size,/*!< in: compressed page size in bytes,
- or 0 */
- ibool update_ibuf_bitmap)/*!< in: normally this is set
- to TRUE, but if we have deleted or are
- deleting the tablespace, then we
- naturally do not want to update a
- non-existent bitmap page */
-{
- mem_heap_t* heap;
- btr_pcur_t pcur;
- dtuple_t* search_tuple;
-#ifdef UNIV_IBUF_DEBUG
- ulint volume = 0;
-#endif
- page_zip_des_t* page_zip = NULL;
- ibool corruption_noticed = FALSE;
- mtr_t mtr;
- fil_space_t* space = NULL;
-
- /* Counts for merged & discarded operations. */
- ulint mops[IBUF_OP_COUNT];
- ulint dops[IBUF_OP_COUNT];
-
- ut_ad(!block || buf_block_get_space(block) == space_id);
- ut_ad(!block || buf_block_get_page_no(block) == page_no);
- ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
- ut_ad(!block || buf_block_get_io_fix_unlocked(block) == BUF_IO_READ);
-
- if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
- || trx_sys_hdr_page(space_id, page_no)) {
- return;
- }
-
- /* We cannot refer to zip_size in the following, because
- zip_size is passed as ULINT_UNDEFINED (it is unknown) when
- buf_read_ibuf_merge_pages() is merging (discarding) changes
- for a dropped tablespace. When block != NULL or
- update_ibuf_bitmap is specified, the zip_size must be known.
- That is why we will repeat the check below, with zip_size in
- place of 0. Passing zip_size as 0 assumes that the
- uncompressed page size always is a power-of-2 multiple of the
- compressed page size. */
-
- if (ibuf_fixed_addr_page(space_id, 0, page_no)
- || fsp_descr_page(0, page_no)) {
- return;
- }
-
- if (UNIV_LIKELY(update_ibuf_bitmap)) {
- ut_a(ut_is_2pow(zip_size));
-
- if (ibuf_fixed_addr_page(space_id, zip_size, page_no)
- || fsp_descr_page(zip_size, page_no)) {
- return;
- }
-
- /* If the following returns space, we get the counter
- incremented, and must decrement it when we leave this
- function. When the counter is > 0, that prevents tablespace
- from being dropped. */
-
- space = fil_space_acquire(space_id);
-
- if (UNIV_UNLIKELY(!space)) {
- /* Do not try to read the bitmap page from space;
- just delete the ibuf records for the page */
-
- block = NULL;
- update_ibuf_bitmap = FALSE;
- } else {
- page_t* bitmap_page = NULL;
- ulint bitmap_bits = 0;
-
- ibuf_mtr_start(&mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(
- space_id, page_no, zip_size, &mtr);
-
- if (bitmap_page &&
- fil_page_get_type(bitmap_page) != FIL_PAGE_TYPE_ALLOCATED) {
- bitmap_bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_BUFFERED, &mtr);
- }
-
- ibuf_mtr_commit(&mtr);
-
- if (!bitmap_bits) {
- /* No inserts buffered for this page */
-
- if (space) {
- fil_space_release(space);
- }
-
- return;
- }
- }
- } else if (block
- && (ibuf_fixed_addr_page(space_id, zip_size, page_no)
- || fsp_descr_page(zip_size, page_no))) {
-
- return;
- }
-
- heap = mem_heap_create(512);
-
- search_tuple = ibuf_search_tuple_build(space_id, page_no, heap);
-
- if (block) {
- /* Move the ownership of the x-latch on the page to this OS
- thread, so that we can acquire a second x-latch on it. This
- is needed for the insert operations to the index page to pass
- the debug checks. */
-
- rw_lock_x_lock_move_ownership(&(block->lock));
- page_zip = buf_block_get_page_zip(block);
-
- if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
- != FIL_PAGE_INDEX)
- || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
-
- page_t* bitmap_page;
-
- corruption_noticed = TRUE;
-
- ut_print_timestamp(stderr);
-
- ibuf_mtr_start(&mtr);
-
- fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
- stderr);
-
- bitmap_page = ibuf_bitmap_get_map_page(space_id, page_no,
- zip_size, &mtr);
- if (bitmap_page == NULL)
- {
- fputs("InnoDB: cannot retrieve bitmap page\n",
- stderr);
- } else {
- buf_page_print(bitmap_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- }
- ibuf_mtr_commit(&mtr);
-
- fputs("\nInnoDB: Dump of the page:\n", stderr);
-
- buf_page_print(block->frame, 0,
- BUF_PAGE_PRINT_NO_CRASH);
-
- fprintf(stderr,
- "InnoDB: Error: corruption in the tablespace."
- " Bitmap shows insert\n"
- "InnoDB: buffer records to page n:o %lu"
- " though the page\n"
- "InnoDB: type is %lu, which is"
- " not an index leaf page!\n"
- "InnoDB: We try to resolve the problem"
- " by skipping the insert buffer\n"
- "InnoDB: merge for this page."
- " Please run CHECK TABLE on your tables\n"
- "InnoDB: to determine if they are corrupt"
- " after this.\n\n"
- "InnoDB: Please submit a detailed bug report"
- " to http://bugs.mysql.com\n\n",
- (ulong) page_no,
- (ulong)
- fil_page_get_type(block->frame));
- ut_ad(0);
- }
- }
-
- memset(mops, 0, sizeof(mops));
- memset(dops, 0, sizeof(dops));
-
-loop:
- ibuf_mtr_start(&mtr);
-
- /* Position pcur in the insert buffer at the first entry for this
- index page */
- btr_pcur_open_on_user_rec(
- ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
- &pcur, &mtr);
-
- if (block) {
- ibool success;
-
- success = buf_page_get_known_nowait(
- RW_X_LATCH, block,
- BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
-
- ut_a(success);
-
- /* This is a user page (secondary index leaf page),
- but we pretend that it is a change buffer page in
- order to obey the latching order. This should be OK,
- because buffered changes are applied immediately while
- the block is io-fixed. Other threads must not try to
- latch an io-fixed block. */
- buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
- }
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
- goto reset_bit;
- }
-
- for (;;) {
- rec_t* rec;
-
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
-
- rec = btr_pcur_get_rec(&pcur);
-
- /* Check if the entry is for this index page */
- if (ibuf_rec_get_page_no(&mtr, rec) != page_no
- || ibuf_rec_get_space(&mtr, rec) != space_id) {
-
- if (block) {
- page_header_reset_last_insert(
- block->frame, page_zip, &mtr);
- }
-
- goto reset_bit;
- }
-
- if (UNIV_UNLIKELY(corruption_noticed)) {
- fputs("InnoDB: Discarding record\n ", stderr);
- rec_print_old(stderr, rec);
- fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
- } else if (block && !rec_get_deleted_flag(rec, 0)) {
- /* Now we have at pcur a record which should be
- applied on the index page; NOTE that the call below
- copies pointers to fields in rec, and we must
- keep the latch to the rec page until the
- insertion is finished! */
- dtuple_t* entry;
- trx_id_t max_trx_id;
- dict_index_t* dummy_index;
- ibuf_op_t op = ibuf_rec_get_op_type(&mtr, rec);
-
- max_trx_id = page_get_max_trx_id(page_align(rec));
- page_update_max_trx_id(block, page_zip, max_trx_id,
- &mtr);
-
- ut_ad(page_validate(page_align(rec), ibuf->index));
-
- entry = ibuf_build_entry_from_ibuf_rec(
- &mtr, rec, heap, &dummy_index);
-
- ut_ad(page_validate(block->frame, dummy_index));
-
- switch (op) {
- ibool success;
- case IBUF_OP_INSERT:
-#ifdef UNIV_IBUF_DEBUG
- volume += rec_get_converted_size(
- dummy_index, entry, 0);
-
- volume += page_dir_calc_reserved_space(1);
-
- ut_a(volume <= 4 * UNIV_PAGE_SIZE
- / IBUF_PAGE_SIZE_PER_FREE_SPACE);
-#endif
- ibuf_insert_to_index_page(
- entry, block, dummy_index, &mtr);
- break;
-
- case IBUF_OP_DELETE_MARK:
- ibuf_set_del_mark(
- entry, block, dummy_index, &mtr);
- break;
-
- case IBUF_OP_DELETE:
- ibuf_delete(entry, block, dummy_index, &mtr);
- /* Because ibuf_delete() will latch an
- insert buffer bitmap page, commit mtr
- before latching any further pages.
- Store and restore the cursor position. */
- ut_ad(rec == btr_pcur_get_rec(&pcur));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(ibuf_rec_get_page_no(&mtr, rec)
- == page_no);
- ut_ad(ibuf_rec_get_space(&mtr, rec) == space_id);
-
- /* Mark the change buffer record processed,
- so that it will not be merged again in case
- the server crashes between the following
- mtr_commit() and the subsequent mtr_commit()
- of deleting the change buffer record. */
-
- btr_cur_set_deleted_flag_for_ibuf(
- btr_pcur_get_rec(&pcur), NULL,
- TRUE, &mtr);
-
- btr_pcur_store_position(&pcur, &mtr);
- ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr);
-
- ibuf_mtr_start(&mtr);
-
- success = buf_page_get_known_nowait(
- RW_X_LATCH, block,
- BUF_KEEP_OLD,
- __FILE__, __LINE__, &mtr);
- ut_a(success);
-
- /* This is a user page (secondary
- index leaf page), but it should be OK
- to use too low latching order for it,
- as the block is io-fixed. */
- buf_block_dbg_add_level(
- block, SYNC_IBUF_TREE_NODE);
-
- if (!ibuf_restore_pos(space_id, page_no,
- search_tuple,
- BTR_MODIFY_LEAF,
- &pcur, &mtr)) {
-
- ut_ad(mtr.state == MTR_COMMITTED);
- mops[op]++;
- ibuf_dummy_index_free(dummy_index);
- goto loop;
- }
-
- break;
- default:
- ut_error;
- }
-
- mops[op]++;
-
- ibuf_dummy_index_free(dummy_index);
- } else {
- dops[ibuf_rec_get_op_type(&mtr, rec)]++;
- }
-
- /* Delete the record from ibuf */
- if (ibuf_delete_rec(space_id, page_no, &pcur, search_tuple,
- &mtr)) {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
-
- ut_ad(mtr.state == MTR_COMMITTED);
- goto loop;
- } else if (btr_pcur_is_after_last_on_page(&pcur)) {
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- goto loop;
- }
- }
-
-reset_bit:
- if (UNIV_LIKELY(update_ibuf_bitmap)) {
- page_t* bitmap_page;
-
- bitmap_page = ibuf_bitmap_get_map_page(
- space_id, page_no, zip_size, &mtr);
-
- ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_BUFFERED, FALSE, &mtr);
-
- if (block) {
- ulint old_bits = ibuf_bitmap_page_get_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, &mtr);
-
- ulint new_bits = ibuf_index_page_calc_free(
- zip_size, block);
-
- if (old_bits != new_bits) {
- ibuf_bitmap_page_set_bits(
- bitmap_page, page_no, zip_size,
- IBUF_BITMAP_FREE, new_bits, &mtr);
- }
- }
- }
-
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
- mem_heap_free(heap);
-
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_increment_ulint(&ibuf->n_merges, 1);
- ibuf_add_ops(ibuf->n_merged_ops, mops);
- ibuf_add_ops(ibuf->n_discarded_ops, dops);
-#else /* HAVE_ATOMIC_BUILTINS */
- /* Protect our statistics keeping from race conditions */
- mutex_enter(&ibuf_mutex);
-
- ibuf->n_merges++;
- ibuf_add_ops(ibuf->n_merged_ops, mops);
- ibuf_add_ops(ibuf->n_discarded_ops, dops);
-
- mutex_exit(&ibuf_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- if (space) {
- fil_space_release(space);
- }
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ut_a(ibuf_count_get(space_id, page_no) == 0);
-#endif
-}
-
-/*********************************************************************//**
-Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
-NOTE: this does not update the page free bitmaps in the space. The space will
-become CORRUPT when you call this function! */
-UNIV_INTERN
-void
-ibuf_delete_for_discarded_space(
-/*============================*/
- ulint space) /*!< in: space id */
-{
- mem_heap_t* heap;
- btr_pcur_t pcur;
- dtuple_t* search_tuple;
- const rec_t* ibuf_rec;
- ulint page_no;
- mtr_t mtr;
-
- /* Counts for discarded operations. */
- ulint dops[IBUF_OP_COUNT];
-
- heap = mem_heap_create(512);
-
- /* Use page number 0 to build the search tuple so that we get the
- cursor positioned at the first entry for this space id */
-
- search_tuple = ibuf_search_tuple_build(space, 0, heap);
-
- memset(dops, 0, sizeof(dops));
-loop:
- ibuf_mtr_start(&mtr);
-
- /* Position pcur in the insert buffer at the first entry for the
- space */
- btr_pcur_open_on_user_rec(
- ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
- &pcur, &mtr);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
-
- goto leave_loop;
- }
-
- for (;;) {
- ut_ad(btr_pcur_is_on_user_rec(&pcur));
-
- ibuf_rec = btr_pcur_get_rec(&pcur);
-
- /* Check if the entry is for this space */
- if (ibuf_rec_get_space(&mtr, ibuf_rec) != space) {
-
- goto leave_loop;
- }
-
- page_no = ibuf_rec_get_page_no(&mtr, ibuf_rec);
-
- dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++;
-
- /* Delete the record from ibuf */
- if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
- &mtr)) {
- /* Deletion was pessimistic and mtr was committed:
- we start from the beginning again */
-
- ut_ad(mtr.state == MTR_COMMITTED);
- goto loop;
- }
-
- if (btr_pcur_is_after_last_on_page(&pcur)) {
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
- goto loop;
- }
- }
-
-leave_loop:
- ibuf_mtr_commit(&mtr);
- btr_pcur_close(&pcur);
-
-#ifdef HAVE_ATOMIC_BUILTINS
- ibuf_add_ops(ibuf->n_discarded_ops, dops);
-#else /* HAVE_ATOMIC_BUILTINS */
- /* Protect our statistics keeping from race conditions */
- mutex_enter(&ibuf_mutex);
- ibuf_add_ops(ibuf->n_discarded_ops, dops);
- mutex_exit(&ibuf_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- mem_heap_free(heap);
-}
-
-/******************************************************************//**
-Looks if the insert buffer is empty.
-@return true if empty */
-UNIV_INTERN
-bool
-ibuf_is_empty(void)
-/*===============*/
-{
- bool is_empty;
- const page_t* root;
- mtr_t mtr;
-
- ibuf_mtr_start(&mtr);
-
- mutex_enter(&ibuf_mutex);
- root = ibuf_tree_root_get(&mtr);
- mutex_exit(&ibuf_mutex);
-
- is_empty = page_is_empty(root);
- ut_a(is_empty == ibuf->empty);
- ibuf_mtr_commit(&mtr);
-
- return(is_empty);
-}
-
-/******************************************************************//**
-Prints info of ibuf. */
-UNIV_INTERN
-void
-ibuf_print(
-/*=======*/
- FILE* file) /*!< in: file where to print */
-{
-#ifdef UNIV_IBUF_COUNT_DEBUG
- ulint i;
- ulint j;
-#endif
-
- mutex_enter(&ibuf_mutex);
-
- fprintf(file,
- "Ibuf: size %lu, free list len %lu,"
- " seg size %lu, %lu merges\n",
- (ulong) ibuf->size,
- (ulong) ibuf->free_list_len,
- (ulong) ibuf->seg_size,
- (ulong) ibuf->n_merges);
-
- fputs("merged operations:\n ", file);
- ibuf_print_ops(ibuf->n_merged_ops, file);
-
- fputs("discarded operations:\n ", file);
- ibuf_print_ops(ibuf->n_discarded_ops, file);
-
-#ifdef UNIV_IBUF_COUNT_DEBUG
- for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
- for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
- ulint count = ibuf_count_get(i, j);
-
- if (count > 0) {
- fprintf(stderr,
- "Ibuf count for space/page %lu/%lu"
- " is %lu\n",
- (ulong) i, (ulong) j, (ulong) count);
- }
- }
- }
-#endif /* UNIV_IBUF_COUNT_DEBUG */
-
- mutex_exit(&ibuf_mutex);
-}
-
-/******************************************************************//**
-Checks the insert buffer bitmaps on IMPORT TABLESPACE.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-ibuf_check_bitmap_on_import(
-/*========================*/
- const trx_t* trx, /*!< in: transaction */
- ulint space_id) /*!< in: tablespace identifier */
-{
- ulint zip_size;
- ulint page_size;
- ulint size;
- ulint page_no;
-
- ut_ad(space_id);
- ut_ad(trx->mysql_thd);
-
- zip_size = fil_space_get_zip_size(space_id);
-
- if (zip_size == ULINT_UNDEFINED) {
- return(DB_TABLE_NOT_FOUND);
- }
-
- size = fil_space_get_size(space_id);
-
- if (size == 0) {
- return(DB_TABLE_NOT_FOUND);
- }
-
- mutex_enter(&ibuf_mutex);
-
- page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
-
- for (page_no = 0; page_no < size; page_no += page_size) {
- mtr_t mtr;
- page_t* bitmap_page;
- ulint i;
-
- if (trx_is_interrupted(trx)) {
- mutex_exit(&ibuf_mutex);
- return(DB_INTERRUPTED);
- }
-
- mtr_start(&mtr);
-
- mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
- ibuf_enter(&mtr);
-
- bitmap_page = ibuf_bitmap_get_map_page(
- space_id, page_no, zip_size, &mtr);
-
- for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) {
- const ulint offset = page_no + i;
-
- if (ibuf_bitmap_page_get_bits(
- bitmap_page, offset, zip_size,
- IBUF_BITMAP_IBUF, &mtr)) {
-
- mutex_exit(&ibuf_mutex);
- ibuf_exit(&mtr);
- mtr_commit(&mtr);
-
- ib_errf(trx->mysql_thd,
- IB_LOG_LEVEL_ERROR,
- ER_INNODB_INDEX_CORRUPT,
- "Space %u page %u"
- " is wrongly flagged to belong to the"
- " insert buffer",
- (unsigned) space_id,
- (unsigned) offset);
-
- return(DB_CORRUPTION);
- }
-
- if (ibuf_bitmap_page_get_bits(
- bitmap_page, offset, zip_size,
- IBUF_BITMAP_BUFFERED, &mtr)) {
-
- ib_errf(trx->mysql_thd,
- IB_LOG_LEVEL_WARN,
- ER_INNODB_INDEX_CORRUPT,
- "Buffered changes"
- " for space %u page %u are lost",
- (unsigned) space_id,
- (unsigned) offset);
-
- /* Tolerate this error, so that
- slightly corrupted tables can be
- imported and dumped. Clear the bit. */
- ibuf_bitmap_page_set_bits(
- bitmap_page, offset, zip_size,
- IBUF_BITMAP_BUFFERED, FALSE, &mtr);
- }
- }
-
- ibuf_exit(&mtr);
- mtr_commit(&mtr);
- }
-
- mutex_exit(&ibuf_mutex);
- return(DB_SUCCESS);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/api0api.h b/storage/xtradb/include/api0api.h
deleted file mode 100644
index 500bf4fe3b2..00000000000
--- a/storage/xtradb/include/api0api.h
+++ /dev/null
@@ -1,1312 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/api0api.h
-InnoDB Native API
-
-2008-08-01 Created by Sunny Bains.
-3/20/2011 Jimmy Yang extracted from Embedded InnoDB
-*******************************************************/
-
-#ifndef api0api_h
-#define api0api_h
-
-#include "db0err.h"
-#include <stdio.h>
-
-#ifdef _MSC_VER
-#define strncasecmp _strnicmp
-#define strcasecmp _stricmp
-#endif
-
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
-#define UNIV_NO_IGNORE MY_ATTRIBUTE ((warn_unused_result))
-#else
-#define UNIV_NO_IGNORE
-#endif /* __GNUC__ && __GNUC__ > 2 && !__INTEL_COMPILER */
-
-/* See comment about ib_bool_t as to why the two macros are unsigned long. */
-/** The boolean value of "true" used internally within InnoDB */
-#define IB_TRUE 0x1UL
-/** The boolean value of "false" used internally within InnoDB */
-#define IB_FALSE 0x0UL
-
-/* Basic types used by the InnoDB API. */
-/** All InnoDB error codes are represented by ib_err_t */
-typedef enum dberr_t ib_err_t;
-/** Representation of a byte within InnoDB */
-typedef unsigned char ib_byte_t;
-/** Representation of an unsigned long int within InnoDB */
-typedef unsigned long int ib_ulint_t;
-
-/* We assume C99 support except when using VisualStudio. */
-#if !defined(_MSC_VER)
-#include <stdint.h>
-#endif /* _MSC_VER */
-
-/* Integer types used by the API. Microsft VS defines its own types
-and we use the Microsoft types when building with Visual Studio. */
-#if defined(_MSC_VER)
-/** A signed 8 bit integral type. */
-typedef __int8 ib_i8_t;
-#else
-/** A signed 8 bit integral type. */
-typedef int8_t ib_i8_t;
-#endif
-
-#if defined(_MSC_VER)
-/** An unsigned 8 bit integral type. */
-typedef unsigned __int8 ib_u8_t;
-#else
-/** An unsigned 8 bit integral type. */
-typedef uint8_t ib_u8_t;
-#endif
-
-#if defined(_MSC_VER)
-/** A signed 16 bit integral type. */
-typedef __int16 ib_i16_t;
-#else
-/** A signed 16 bit integral type. */
-typedef int16_t ib_i16_t;
-#endif
-
-#if defined(_MSC_VER)
-/** An unsigned 16 bit integral type. */
-typedef unsigned __int16 ib_u16_t;
-#else
-/** An unsigned 16 bit integral type. */
-typedef uint16_t ib_u16_t;
-#endif
-
-#if defined(_MSC_VER)
-/** A signed 32 bit integral type. */
-typedef __int32 ib_i32_t;
-#else
-/** A signed 32 bit integral type. */
-typedef int32_t ib_i32_t;
-#endif
-
-#if defined(_MSC_VER)
-/** An unsigned 32 bit integral type. */
-typedef unsigned __int32 ib_u32_t;
-#else
-/** An unsigned 32 bit integral type. */
-typedef uint32_t ib_u32_t;
-#endif
-
-#if defined(_MSC_VER)
-/** A signed 64 bit integral type. */
-typedef __int64 ib_i64_t;
-#else
-/** A signed 64 bit integral type. */
-typedef int64_t ib_i64_t;
-#endif
-
-#if defined(_MSC_VER)
-/** An unsigned 64 bit integral type. */
-typedef unsigned __int64 ib_u64_t;
-#else
-/** An unsigned 64 bit integral type. */
-typedef uint64_t ib_u64_t;
-#endif
-
-typedef void* ib_opaque_t;
-typedef ib_opaque_t ib_charset_t;
-typedef ib_ulint_t ib_bool_t;
-typedef ib_u64_t ib_id_u64_t;
-
-/** @enum ib_cfg_type_t Possible types for a configuration variable. */
-typedef enum {
- IB_CFG_IBOOL, /*!< The configuration parameter is
- of type ibool */
-
- /* XXX Can we avoid having different types for ulint and ulong?
- - On Win64 "unsigned long" is 32 bits
- - ulong is always defined as "unsigned long"
- - On Win64 ulint is defined as 64 bit integer
- => On Win64 ulint != ulong.
- If we typecast all ulong and ulint variables to the smaller type
- ulong, then we will cut the range of the ulint variables.
- This is not a problem for most ulint variables because their max
- allowed values do not exceed 2^32-1 (e.g. log_groups is ulint
- but its max allowed value is 10). BUT buffer_pool_size and
- log_file_size allow up to 2^64-1. */
-
- IB_CFG_ULINT, /*!< The configuration parameter is
- of type ulint */
-
- IB_CFG_ULONG, /*!< The configuration parameter is
- of type ulong */
-
- IB_CFG_TEXT, /*!< The configuration parameter is
- of type char* */
-
- IB_CFG_CB /*!< The configuration parameter is
- a callback parameter */
-} ib_cfg_type_t;
-
-/** @enum ib_col_type_t column types that are supported. */
-typedef enum {
- IB_VARCHAR = 1, /*!< Character varying length. The
- column is not padded. */
-
- IB_CHAR = 2, /*!< Fixed length character string. The
- column is padded to the right. */
-
- IB_BINARY = 3, /*!< Fixed length binary, similar to
- IB_CHAR but the column is not padded
- to the right. */
-
- IB_VARBINARY = 4, /*!< Variable length binary */
-
- IB_BLOB = 5, /*!< Binary large object, or
- a TEXT type */
-
- IB_INT = 6, /*!< Integer: can be any size
- from 1 - 8 bytes. If the size is
- 1, 2, 4 and 8 bytes then you can use
- the typed read and write functions. For
- other sizes you will need to use the
- ib_col_get_value() function and do the
- conversion yourself. */
-
- IB_SYS = 8, /*!< System column, this column can
- be one of DATA_TRX_ID, DATA_ROLL_PTR
- or DATA_ROW_ID. */
-
- IB_FLOAT = 9, /*!< C (float) floating point value. */
-
- IB_DOUBLE = 10, /*!> C (double) floating point value. */
-
- IB_DECIMAL = 11, /*!< Decimal stored as an ASCII
- string */
-
- IB_VARCHAR_ANYCHARSET = 12, /*!< Any charset, varying length */
-
- IB_CHAR_ANYCHARSET = 13 /*!< Any charset, fixed length */
-
-} ib_col_type_t;
-
-/** @enum ib_tbl_fmt_t InnoDB table format types */
-typedef enum {
- IB_TBL_REDUNDANT, /*!< Redundant row format, the column
- type and length is stored in the row.*/
-
- IB_TBL_COMPACT, /*!< Compact row format, the column
- type is not stored in the row. The
- length is stored in the row but the
- storage format uses a compact format
- to store the length of the column data
- and record data storage format also
- uses less storage. */
-
- IB_TBL_DYNAMIC, /*!< Compact row format. BLOB prefixes
- are not stored in the clustered index */
-
- IB_TBL_COMPRESSED /*!< Similar to dynamic format but
- with pages compressed */
-} ib_tbl_fmt_t;
-
-/** @enum ib_col_attr_t InnoDB column attributes */
-typedef enum {
- IB_COL_NONE = 0, /*!< No special attributes. */
-
- IB_COL_NOT_NULL = 1, /*!< Column data can't be NULL. */
-
- IB_COL_UNSIGNED = 2, /*!< Column is IB_INT and unsigned. */
-
- IB_COL_NOT_USED = 4, /*!< Future use, reserved. */
-
- IB_COL_CUSTOM1 = 8, /*!< Custom precision type, this is
- a bit that is ignored by InnoDB and so
- can be set and queried by users. */
-
- IB_COL_CUSTOM2 = 16, /*!< Custom precision type, this is
- a bit that is ignored by InnoDB and so
- can be set and queried by users. */
-
- IB_COL_CUSTOM3 = 32 /*!< Custom precision type, this is
- a bit that is ignored by InnoDB and so
- can be set and queried by users. */
-} ib_col_attr_t;
-
-/* Note: must match lock0types.h */
-/** @enum ib_lck_mode_t InnoDB lock modes. */
-typedef enum {
- IB_LOCK_IS = 0, /*!< Intention shared, an intention
- lock should be used to lock tables */
-
- IB_LOCK_IX, /*!< Intention exclusive, an intention
- lock should be used to lock tables */
-
- IB_LOCK_S, /*!< Shared locks should be used to
- lock rows */
-
- IB_LOCK_X, /*!< Exclusive locks should be used to
- lock rows*/
-
- IB_LOCK_TABLE_X, /*!< exclusive table lock */
-
- IB_LOCK_NONE, /*!< This is used internally to note
- consistent read */
-
- IB_LOCK_NUM = IB_LOCK_NONE /*!< number of lock modes */
-} ib_lck_mode_t;
-
-typedef enum {
- IB_CLUSTERED = 1, /*!< clustered index */
- IB_UNIQUE = 2 /*!< unique index */
-} ib_index_type_t;
-
-/** @enum ib_srch_mode_t InnoDB cursor search modes for ib_cursor_moveto().
-Note: Values must match those found in page0cur.h */
-typedef enum {
- IB_CUR_G = 1, /*!< If search key is not found then
- position the cursor on the row that
- is greater than the search key */
-
- IB_CUR_GE = 2, /*!< If the search key not found then
- position the cursor on the row that
- is greater than or equal to the search
- key */
-
- IB_CUR_L = 3, /*!< If search key is not found then
- position the cursor on the row that
- is less than the search key */
-
- IB_CUR_LE = 4 /*!< If search key is not found then
- position the cursor on the row that
- is less than or equal to the search
- key */
-} ib_srch_mode_t;
-
-/** @enum ib_match_mode_t Various match modes used by ib_cursor_moveto() */
-typedef enum {
- IB_CLOSEST_MATCH, /*!< Closest match possible */
-
- IB_EXACT_MATCH, /*!< Search using a complete key
- value */
-
- IB_EXACT_PREFIX /*!< Search using a key prefix which
- must match to rows: the prefix may
- contain an incomplete field (the
- last field in prefix may be just
- a prefix of a fixed length column) */
-} ib_match_mode_t;
-
-/** @struct ib_col_meta_t InnoDB column meta data. */
-typedef struct {
- ib_col_type_t type; /*!< Type of the column */
-
- ib_col_attr_t attr; /*!< Column attributes */
-
- ib_u32_t type_len; /*!< Length of type */
-
- ib_u16_t client_type; /*!< 16 bits of data relevant only to
- the client. InnoDB doesn't care */
-
- ib_charset_t* charset; /*!< Column charset */
-} ib_col_meta_t;
-
-/* Note: Must be in sync with trx0trx.h */
-/** @enum ib_trx_state_t The transaction state can be queried using the
-ib_trx_state() function. The InnoDB deadlock monitor can roll back a
-transaction and users should be prepared for this, especially where there
-is high contention. The way to determine the state of the transaction is to
-query it's state and check. */
-typedef enum {
- IB_TRX_NOT_STARTED, /*!< Has not started yet, the
- transaction has not ben started yet.*/
-
- IB_TRX_ACTIVE, /*!< The transaction is currently
- active and needs to be either
- committed or rolled back. */
-
- IB_TRX_COMMITTED_IN_MEMORY, /*!< Not committed to disk yet */
-
- IB_TRX_PREPARED /*!< Support for 2PC/XA */
-} ib_trx_state_t;
-
-/* Note: Must be in sync with trx0trx.h */
-/** @enum ib_trx_level_t Transaction isolation levels */
-typedef enum {
- IB_TRX_READ_UNCOMMITTED = 0, /*!< Dirty read: non-locking SELECTs are
- performed so that we do not look at a
- possible earlier version of a record;
- thus they are not 'consistent' reads
- under this isolation level; otherwise
- like level 2 */
-
- IB_TRX_READ_COMMITTED = 1, /*!< Somewhat Oracle-like isolation,
- except that in range UPDATE and DELETE
- we must block phantom rows with
- next-key locks; SELECT ... FOR UPDATE
- and ... LOCK IN SHARE MODE only lock
- the index records, NOT the gaps before
- them, and thus allow free inserting;
- each consistent read reads its own
- snapshot */
-
- IB_TRX_REPEATABLE_READ = 2, /*!< All consistent reads in the same
- trx read the same snapshot; full
- next-key locking used in locking reads
- to block insertions into gaps */
-
- IB_TRX_SERIALIZABLE = 3 /*!< All plain SELECTs are converted to
- LOCK IN SHARE MODE reads */
-} ib_trx_level_t;
-
-/** Generical InnoDB callback prototype. */
-typedef void (*ib_cb_t)(void);
-
-#define IB_CFG_BINLOG_ENABLED 0x1
-#define IB_CFG_MDL_ENABLED 0x2
-#define IB_CFG_DISABLE_ROWLOCK 0x4
-
-/** The first argument to the InnoDB message logging function. By default
-it's set to stderr. You should treat ib_msg_stream_t as a void*, since
-it will probably change in the future. */
-typedef FILE* ib_msg_stream_t;
-
-/** All log messages are written to this function.It should have the same
-behavior as fprintf(3). */
-typedef int (*ib_msg_log_t)(ib_msg_stream_t, const char*, ...);
-
-/* Note: This is to make it easy for API users to have type
-checking for arguments to our functions. Making it ib_opaque_t
-by itself will result in pointer decay resulting in subverting
-of the compiler's type checking. */
-
-/** InnoDB tuple handle. This handle can refer to either a cluster index
-tuple or a secondary index tuple. There are two types of tuples for each
-type of index, making a total of four types of tuple handles. There
-is a tuple for reading the entire row contents and another for searching
-on the index key. */
-typedef struct ib_tuple_t* ib_tpl_t;
-
-/** InnoDB transaction handle, all database operations need to be covered
-by transactions. This handle represents a transaction. The handle can be
-created with ib_trx_begin(), you commit your changes with ib_trx_commit()
-and undo your changes using ib_trx_rollback(). If the InnoDB deadlock
-monitor rolls back the transaction then you need to free the transaction
-using the function ib_trx_release(). You can query the state of an InnoDB
-transaction by calling ib_trx_state(). */
-typedef struct trx_t* ib_trx_t;
-
-/** InnoDB cursor handle */
-typedef struct ib_cursor_t* ib_crsr_t;
-
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use the client code to compare them.
-
-@param col_meta column meta data
-@param p1 key
-@oaram p1_len key length
-@param p2 second key
-@param p2_len second key length
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-
-typedef int (*ib_client_cmp_t)(
- const ib_col_meta_t* col_meta,
- const ib_byte_t* p1,
- ib_ulint_t p1_len,
- const ib_byte_t* p2,
- ib_ulint_t p2_len);
-
-/* This should be the same as univ.i */
-/** Represents SQL_NULL length */
-#define IB_SQL_NULL 0xFFFFFFFF
-/** The number of system columns in a row. */
-#define IB_N_SYS_COLS 3
-
-/** The maximum length of a text column. */
-#define MAX_TEXT_LEN 4096
-
-/* MySQL uses 3 byte UTF-8 encoding. */
-/** The maximum length of a column name in a table schema. */
-#define IB_MAX_COL_NAME_LEN (64 * 3)
-
-/** The maximum length of a table name (plus database name). */
-#define IB_MAX_TABLE_NAME_LEN (64 * 3) * 2
-
-/*****************************************************************//**
-Start a transaction that's been rolled back. This special function
-exists for the case when InnoDB's deadlock detector has rolledack
-a transaction. While the transaction has been rolled back the handle
-is still valid and can be reused by calling this function. If you
-don't want to reuse the transaction handle then you can free the handle
-by calling ib_trx_release().
-@return innobase txn handle */
-
-ib_err_t
-ib_trx_start(
-/*=========*/
- ib_trx_t ib_trx, /*!< in: transaction to restart */
- ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
- ib_bool_t read_write, /*!< in: true if read write
- transaction */
- ib_bool_t auto_commit, /*!< in: auto commit after each
- single DML */
- void* thd); /*!< in: THD */
-
-/*****************************************************************//**
-Begin a transaction. This will allocate a new transaction handle and
-put the transaction in the active state.
-@return innobase txn handle */
-
-ib_trx_t
-ib_trx_begin(
-/*=========*/
- ib_trx_level_t ib_trx_level, /*!< in: trx isolation level */
- ib_bool_t read_write, /*!< in: true if read write
- transaction */
- ib_bool_t auto_commit); /*!< in: auto commit after each
- single DML */
-
-/*****************************************************************//**
-Query the transaction's state. This function can be used to check for
-the state of the transaction in case it has been rolled back by the
-InnoDB deadlock detector. Note that when a transaction is selected as
-a victim for rollback, InnoDB will always return an appropriate error
-code indicating this. @see DB_DEADLOCK, @see DB_LOCK_TABLE_FULL and
-@see DB_LOCK_WAIT_TIMEOUT
-@return transaction state */
-
-ib_trx_state_t
-ib_trx_state(
-/*=========*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-
-/*****************************************************************//**
-Check if the transaction is read_only */
-ib_u32_t
-ib_trx_read_only(
-/*=============*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-/*****************************************************************//**
-Release the resources of the transaction. If the transaction was
-selected as a victim by InnoDB and rolled back then use this function
-to free the transaction handle.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_trx_release(
-/*===========*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-/*****************************************************************//**
-Commit a transaction. This function will release the schema latches too.
-It will also free the transaction handle.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_trx_commit(
-/*==========*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-/*****************************************************************//**
-Rollback a transaction. This function will release the schema latches too.
-It will also free the transaction handle.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_trx_rollback(
-/*============*/
- ib_trx_t ib_trx); /*!< in: trx handle */
-
-/*****************************************************************//**
-Open an InnoDB table and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_table_using_id(
-/*==========================*/
- ib_id_u64_t table_id, /*!< in: table id of table to open */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */
-
-/*****************************************************************//**
-Open an InnoDB index and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_index_using_id(
-/*==========================*/
- ib_id_u64_t index_id, /*!< in: index id of index to open */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr); /*!< out: InnoDB cursor */
-
-/*****************************************************************//**
-Open an InnoDB secondary index cursor and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_index_using_name(
-/*============================*/
- ib_crsr_t ib_open_crsr, /*!< in: open/active cursor */
- const char* index_name, /*!< in: secondary index name */
- ib_crsr_t* ib_crsr, /*!< out,own: InnoDB index cursor */
- int* idx_type, /*!< out: index is cluster index */
- ib_id_u64_t* idx_id); /*!< out: index id */
-
-/*****************************************************************//**
-Open an InnoDB table by name and return a cursor handle to it.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_open_table(
-/*=================*/
- const char* name, /*!< in: table name */
- ib_trx_t ib_trx, /*!< in: Current transaction handle
- can be NULL */
- ib_crsr_t* ib_crsr); /*!< out,own: InnoDB cursor */
-
-/*****************************************************************//**
-Reset the cursor.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_reset(
-/*============*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-
-/*****************************************************************//**
-set a cursor trx to NULL*/
-
-void
-ib_cursor_clear_trx(
-/*================*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-/*****************************************************************//**
-Close an InnoDB table and free the cursor.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_close(
-/*============*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-/*****************************************************************//**
-Close the table, decrement n_ref_count count.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_close_table(
-/*==================*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-/*****************************************************************//**
-update the cursor with new transactions and also reset the cursor
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_new_trx(
-/*==============*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_trx_t ib_trx); /*!< in: transaction */
-
-/*****************************************************************//**
-Commit the transaction in a cursor
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_commit_trx(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_trx_t ib_trx); /*!< in: transaction */
-
-/********************************************************************//**
-Open a table using the table name, if found then increment table ref count.
-@return table instance if found */
-
-void*
-ib_open_table_by_name(
-/*==================*/
- const char* name); /*!< in: table name to lookup */
-
-/*****************************************************************//**
-Insert a row to a table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_insert_row(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor instance */
- const ib_tpl_t ib_tpl); /*!< in: tuple to insert */
-
-/*****************************************************************//**
-Update a row in a table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_update_row(
-/*=================*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- const ib_tpl_t ib_old_tpl, /*!< in: Old tuple in table */
- const ib_tpl_t ib_new_tpl); /*!< in: New tuple to update */
-
-/*****************************************************************//**
-Delete a row in a table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_delete_row(
-/*=================*/
- ib_crsr_t ib_crsr); /*!< in: cursor instance */
-
-/*****************************************************************//**
-Read current row.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_read_row(
-/*===============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_tpl_t ib_tpl, /*!< out: read cols into this tuple */
- void** row_buf, /*!< in/out: row buffer */
- ib_ulint_t* row_len); /*!< in/out: row buffer len */
-
-/*****************************************************************//**
-Move cursor to the first record in the table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_first(
-/*============*/
- ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
-Move cursor to the last record in the table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_last(
-/*===========*/
- ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
-Move cursor to the next record in the table.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_next(
-/*===========*/
- ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
-Search for key.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_cursor_moveto(
-/*=============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_tpl_t ib_tpl, /*!< in: Key to search for */
- ib_srch_mode_t ib_srch_mode); /*!< in: search mode */
-
-/*****************************************************************//**
-Set the match mode for ib_cursor_move(). */
-
-void
-ib_cursor_set_match_mode(
-/*=====================*/
- ib_crsr_t ib_crsr, /*!< in: Cursor instance */
- ib_match_mode_t match_mode); /*!< in: ib_cursor_moveto match mode */
-
-/*****************************************************************//**
-Set a column of the tuple. Make a copy using the tuple's heap.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_col_set_value(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t col_no, /*!< in: column index in tuple */
- const void* src, /*!< in: data value */
- ib_ulint_t len, /*!< in: data value len */
- ib_bool_t need_cpy); /*!< in: if need memcpy */
-
-
-/*****************************************************************//**
-Get the size of the data available in the column the tuple.
-@return bytes avail or IB_SQL_NULL */
-
-ib_ulint_t
-ib_col_get_len(
-/*===========*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i); /*!< in: column index in tuple */
-
-/*****************************************************************//**
-Copy a column value from the tuple.
-@return bytes copied or IB_SQL_NULL */
-
-ib_ulint_t
-ib_col_copy_value(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: tuple instance */
- ib_ulint_t i, /*!< in: column index in tuple */
- void* dst, /*!< out: copied data value */
- ib_ulint_t len); /*!< in: max data value len to copy */
-
-/*************************************************************//**
-Read a signed int 8 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_i8(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i8_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read an unsigned int 8 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_u8(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u8_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read a signed int 16 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_i16(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i16_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read an unsigned int 16 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_u16(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u16_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read a signed int 32 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_i32(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i32_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read an unsigned int 32 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_u32(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u32_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read a signed int 64 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_i64(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_i64_t* ival); /*!< out: integer value */
-
-/*************************************************************//**
-Read an unsigned int 64 bit column from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_u64(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_u64_t* ival); /*!< out: integer value */
-
-/*****************************************************************//**
-Get a column value pointer from the tuple.
-@return NULL or pointer to buffer */
-
-const void*
-ib_col_get_value(
-/*=============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i); /*!< in: column number */
-
-/*****************************************************************//**
-Get a column type, length and attributes from the tuple.
-@return len of column data */
-
-ib_ulint_t
-ib_col_get_meta(
-/*============*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t i, /*!< in: column number */
- ib_col_meta_t* ib_col_meta); /*!< out: column meta data */
-
-/*****************************************************************//**
-"Clear" or reset an InnoDB tuple. We free the heap and recreate the tuple.
-@return new tuple, or NULL */
-
-ib_tpl_t
-ib_tuple_clear(
-/*============*/
- ib_tpl_t ib_tpl); /*!< in: InnoDB tuple */
-
-/*****************************************************************//**
-Create a new cluster key search tuple and copy the contents of the
-secondary index key tuple columns that refer to the cluster index record
-to the cluster key. It does a deep copy of the column data.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_tuple_get_cluster_key(
-/*=====================*/
- ib_crsr_t ib_crsr, /*!< in: secondary index cursor */
- ib_tpl_t* ib_dst_tpl, /*!< out,own: destination tuple */
- const ib_tpl_t ib_src_tpl); /*!< in: source tuple */
-
-/*****************************************************************//**
-Copy the contents of source tuple to destination tuple. The tuples
-must be of the same type and belong to the same table/index.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_tuple_copy(
-/*==========*/
- ib_tpl_t ib_dst_tpl, /*!< in: destination tuple */
- const ib_tpl_t ib_src_tpl); /*!< in: source tuple */
-
-/*****************************************************************//**
-Create an InnoDB tuple used for index/table search.
-@return tuple for current index */
-
-ib_tpl_t
-ib_sec_search_tuple_create(
-/*=======================*/
- ib_crsr_t ib_crsr); /*!< in: Cursor instance */
-
-/*****************************************************************//**
-Create an InnoDB tuple used for index/table search.
-@return tuple for current index */
-
-ib_tpl_t
-ib_sec_read_tuple_create(
-/*=====================*/
- ib_crsr_t ib_crsr); /*!< in: Cursor instance */
-
-/*****************************************************************//**
-Create an InnoDB tuple used for table key operations.
-@return tuple for current table */
-
-ib_tpl_t
-ib_clust_search_tuple_create(
-/*=========================*/
- ib_crsr_t ib_crsr); /*!< in: Cursor instance */
-
-/*****************************************************************//**
-Create an InnoDB tuple for table row operations.
-@return tuple for current table */
-
-ib_tpl_t
-ib_clust_read_tuple_create(
-/*=======================*/
- ib_crsr_t ib_crsr); /*!< in: Cursor instance */
-
-/*****************************************************************//**
-Return the number of user columns in the tuple definition.
-@return number of user columns */
-
-ib_ulint_t
-ib_tuple_get_n_user_cols(
-/*=====================*/
- const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */
-
-/*****************************************************************//**
-Return the number of columns in the tuple definition.
-@return number of columns */
-
-ib_ulint_t
-ib_tuple_get_n_cols(
-/*================*/
- const ib_tpl_t ib_tpl); /*!< in: Tuple for current table */
-
-/*****************************************************************//**
-Destroy an InnoDB tuple. */
-
-void
-ib_tuple_delete(
-/*============*/
- ib_tpl_t ib_tpl); /*!< in,own: Tuple instance to delete */
-
-/*****************************************************************//**
-Truncate a table. The cursor handle will be closed and set to NULL
-on success.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_cursor_truncate(
-/*===============*/
- ib_crsr_t* ib_crsr, /*!< in/out: cursor for table
- to truncate */
- ib_id_u64_t* table_id); /*!< out: new table id */
-
-/*****************************************************************//**
-Get a table id.
-@return DB_SUCCESS if found */
-
-ib_err_t
-ib_table_get_id(
-/*============*/
- const char* table_name, /*!< in: table to find */
- ib_id_u64_t* table_id); /*!< out: table id if found */
-
-/*****************************************************************//**
-Get an index id.
-@return DB_SUCCESS if found */
-
-ib_err_t
-ib_index_get_id(
-/*============*/
- const char* table_name, /*!< in: find index for this table */
- const char* index_name, /*!< in: index to find */
- ib_id_u64_t* index_id); /*!< out: index id if found */
-
-/*****************************************************************//**
-Check if cursor is positioned.
-@return IB_TRUE if positioned */
-
-ib_bool_t
-ib_cursor_is_positioned(
-/*====================*/
- const ib_crsr_t ib_crsr); /*!< in: InnoDB cursor instance */
-
-/*****************************************************************//**
-Checks if the data dictionary is latched in exclusive mode by a
-user transaction.
-@return TRUE if exclusive latch */
-
-ib_bool_t
-ib_schema_lock_is_exclusive(
-/*========================*/
- const ib_trx_t ib_trx); /*!< in: transaction */
-
-/*****************************************************************//**
-Lock an InnoDB cursor/table.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_cursor_lock(
-/*===========*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
-
-/*****************************************************************//**
-Set the Lock an InnoDB table using the table id.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_table_lock(
-/*===========*/
- ib_trx_t ib_trx, /*!< in/out: transaction */
- ib_id_u64_t table_id, /*!< in: table id */
- ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
-
-/*****************************************************************//**
-Set the Lock mode of the cursor.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_cursor_set_lock_mode(
-/*====================*/
- ib_crsr_t ib_crsr, /*!< in/out: InnoDB cursor */
- ib_lck_mode_t ib_lck_mode); /*!< in: InnoDB lock mode */
-
-/*****************************************************************//**
-Set need to access clustered index record flag. */
-
-void
-ib_cursor_set_cluster_access(
-/*=========================*/
- ib_crsr_t ib_crsr); /*!< in/out: InnoDB cursor */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i8(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i8_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i16(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i16_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i32(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i32_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_i64(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_i64_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u8(
-/*==============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u8_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u16(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u16_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u32(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u32_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Write an integer value to a column. Integers are stored in big-endian
-format and will need to be converted from the host format.
-@return DB_SUCESS or error */
-
-ib_err_t
-ib_tuple_write_u64(
-/*===============*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- ib_u64_t val); /*!< in: value to write */
-
-/*****************************************************************//**
-Inform the cursor that it's the start of an SQL statement. */
-
-void
-ib_cursor_stmt_begin(
-/*=================*/
- ib_crsr_t ib_crsr); /*!< in: cursor */
-
-/*****************************************************************//**
-Write a double value to a column.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_write_double(
-/*==================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- int col_no, /*!< in: column number */
- double val); /*!< in: value to write */
-
-/*************************************************************//**
-Read a double column value from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_double(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t col_no, /*!< in: column number */
- double* dval); /*!< out: double value */
-
-/*****************************************************************//**
-Write a float value to a column.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_write_float(
-/*=================*/
- ib_tpl_t ib_tpl, /*!< in/out: tuple to write to */
- int col_no, /*!< in: column number */
- float val); /*!< in: value to write */
-
-/*************************************************************//**
-Read a float value from an InnoDB tuple.
-@return DB_SUCCESS or error */
-
-ib_err_t
-ib_tuple_read_float(
-/*================*/
- ib_tpl_t ib_tpl, /*!< in: InnoDB tuple */
- ib_ulint_t col_no, /*!< in: column number */
- float* fval); /*!< out: float value */
-
-/*****************************************************************//**
-Get a column type, length and attributes from the tuple.
-@return len of column data */
-
-const char*
-ib_col_get_name(
-/*============*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_ulint_t i); /*!< in: column index in tuple */
-
-/*****************************************************************//**
-Get an index field name from the cursor.
-@return name of the field */
-
-const char*
-ib_get_idx_field_name(
-/*==================*/
- ib_crsr_t ib_crsr, /*!< in: InnoDB cursor instance */
- ib_ulint_t i); /*!< in: column index in tuple */
-
-/*****************************************************************//**
-Truncate a table.
-@return DB_SUCCESS or error code */
-
-ib_err_t
-ib_table_truncate(
-/*==============*/
- const char* table_name, /*!< in: table name */
- ib_id_u64_t* table_id); /*!< out: new table id */
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return DB_SUCCESS or error number */
-
-ib_err_t
-ib_close_thd(
-/*=========*/
- void* thd); /*!< in: handle to the MySQL
- thread of the user whose resources
- should be free'd */
-
-/*****************************************************************//**
-Get generic configure status
-@return configure status*/
-
-int
-ib_cfg_get_cfg();
-/*============*/
-
-/*****************************************************************//**
-Increase/decrease the memcached sync count of table to sync memcached
-DML with SQL DDLs.
-@return DB_SUCCESS or error number */
-ib_err_t
-ib_cursor_set_memcached_sync(
-/*=========================*/
- ib_crsr_t ib_crsr, /*!< in: cursor */
- ib_bool_t flag); /*!< in: true for increasing */
-
-/*****************************************************************//**
-Check whether the table name conforms to our requirements. Currently
-we only do a simple check for the presence of a '/'.
-@return DB_SUCCESS or err code */
-
-ib_err_t
-ib_table_name_check(
-/*================*/
- const char* name); /*!< in: table name to check */
-
-/*****************************************************************//**
-Return isolation configuration set by "innodb_api_trx_level"
-@return trx isolation level*/
-
-ib_trx_state_t
-ib_cfg_trx_level();
-/*==============*/
-
-/*****************************************************************//**
-Return configure value for background commit interval (in seconds)
-@return background commit interval (in seconds) */
-
-ib_ulint_t
-ib_cfg_bk_commit_interval();
-/*=======================*/
-
-/*****************************************************************//**
-Get a trx start time.
-@return trx start_time */
-
-ib_u64_t
-ib_trx_get_start_time(
-/*==================*/
- ib_trx_t ib_trx); /*!< in: transaction */
-
-#endif /* api0api_h */
diff --git a/storage/xtradb/include/api0misc.h b/storage/xtradb/include/api0misc.h
deleted file mode 100644
index fcd748390d1..00000000000
--- a/storage/xtradb/include/api0misc.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2008, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/api0misc.h
-InnoDB Native API
-
-3/20/2011 Jimmy Yang extracted from Embedded InnoDB
-2008 Created by Sunny Bains
-*******************************************************/
-
-#ifndef api0misc_h
-#define api0misc_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "que0que.h"
-#include "trx0trx.h"
-
-/** Whether binlog is enabled for applications using InnoDB APIs */
-extern my_bool ib_binlog_enabled;
-
-/** Whether MySQL MDL is enabled for applications using InnoDB APIs */
-extern my_bool ib_mdl_enabled;
-
-/** Whether InnoDB row lock is disabled for applications using InnoDB APIs */
-extern my_bool ib_disable_row_lock;
-
-/** configure value for transaction isolation level */
-extern ulong ib_trx_level_setting;
-
-/** configure value for background commit interval (in seconds) */
-extern ulong ib_bk_commit_interval;
-
-/********************************************************************
-Handles user errors and lock waits detected by the database engine.
-@return TRUE if it was a lock wait and we should continue running
-the query thread */
-UNIV_INTERN
-ibool
-ib_handle_errors(
-/*=============*/
- dberr_t* new_err, /*!< out: possible new error
- encountered in lock wait, or if
- no new error, the value of
- trx->error_state at the entry of this
- function */
- trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread */
- trx_savept_t* savept); /*!< in: savepoint or NULL */
-
-/*************************************************************************
-Sets a lock on a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-ib_trx_lock_table_with_retry(
-/*=========================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode); /*!< in: lock mode */
-
-#endif /* api0misc_h */
diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h
deleted file mode 100644
index 9ab62f7739f..00000000000
--- a/storage/xtradb/include/btr0btr.h
+++ /dev/null
@@ -1,883 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2015, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0btr.h
-The B-tree
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0btr_h
-#define btr0btr_h
-
-#include "univ.i"
-
-#include "dict0dict.h"
-#include "data0data.h"
-#include "page0cur.h"
-#include "mtr0mtr.h"
-#include "btr0types.h"
-
-#ifndef UNIV_HOTBACKUP
-/** Maximum record size which can be stored on a page, without using the
-special big record storage structure */
-#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
-
-/** @brief Maximum depth of a B-tree in InnoDB.
-
-Note that this isn't a maximum as such; none of the tree operations
-avoid producing trees bigger than this. It is instead a "max depth
-that other code must work with", useful for e.g. fixed-size arrays
-that must store some information about each level in a tree. In other
-words: if a B-tree with bigger depth than this is encountered, it is
-not acceptable for it to lead to mysterious memory corruption, but it
-is acceptable for the program to die with a clear assert failure. */
-#define BTR_MAX_LEVELS 100
-
-/** Latching modes for btr_cur_search_to_nth_level(). */
-enum btr_latch_mode {
- /** Search a record on a leaf page and S-latch it. */
- BTR_SEARCH_LEAF = RW_S_LATCH,
- /** (Prepare to) modify a record on a leaf page and X-latch it. */
- BTR_MODIFY_LEAF = RW_X_LATCH,
- /** Obtain no latches. */
- BTR_NO_LATCHES = RW_NO_LATCH,
- /** Start modifying the entire B-tree. */
- BTR_MODIFY_TREE = 33,
- /** Continue modifying the entire B-tree. */
- BTR_CONT_MODIFY_TREE = 34,
- /** Search the previous record. */
- BTR_SEARCH_PREV = 35,
- /** Modify the previous record. */
- BTR_MODIFY_PREV = 36,
- /** Weaker BTR_MODIFY_TREE that does not lock the leaf page siblings,
- used for fake changes. */
- BTR_SEARCH_TREE = 37 /* BTR_MODIFY_TREE | 4 */
-};
-
-/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
-
-/** If this is ORed to btr_latch_mode, it means that the search tuple
-will be inserted to the index, at the searched position.
-When the record is not in the buffer pool, try to use the insert buffer. */
-#define BTR_INSERT 512
-
-/** This flag ORed to btr_latch_mode says that we do the search in query
-optimization */
-#define BTR_ESTIMATE 1024
-
-/** This flag ORed to BTR_INSERT says that we can ignore possible
-UNIQUE definition on secondary indexes when we decide if we can use
-the insert buffer to speed up inserts */
-#define BTR_IGNORE_SEC_UNIQUE 2048
-
-/** Try to delete mark the record at the searched position using the
-insert/delete buffer when the record is not in the buffer pool. */
-#define BTR_DELETE_MARK 4096
-
-/** Try to purge the record at the searched position using the insert/delete
-buffer when the record is not in the buffer pool. */
-#define BTR_DELETE 8192
-
-/** In the case of BTR_SEARCH_LEAF or BTR_MODIFY_LEAF, the caller is
-already holding an S latch on the index tree */
-#define BTR_ALREADY_S_LATCHED 16384
-
-#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
- ((latch_mode) & ~(BTR_INSERT \
- | BTR_DELETE_MARK \
- | BTR_DELETE \
- | BTR_ESTIMATE \
- | BTR_IGNORE_SEC_UNIQUE \
- | BTR_ALREADY_S_LATCHED))
-#endif /* UNIV_HOTBACKUP */
-
-/**************************************************************//**
-Report that an index page is corrupted. */
-UNIV_INTERN
-void
-btr_corruption_report(
-/*==================*/
- const buf_block_t* block, /*!< in: corrupted block */
- const dict_index_t* index) /*!< in: index tree */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-
-/** Assert that a B-tree page is not corrupted.
-@param block buffer block containing a B-tree page
-@param index the B-tree index */
-#define btr_assert_not_corrupted(block, index) \
- if ((ibool) !!page_is_comp(buf_block_get_frame(block)) \
- != dict_table_is_comp((index)->table)) { \
- btr_corruption_report(block, index); \
- ut_error; \
- }
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_BLOB_DEBUG
-# include "ut0rbt.h"
-/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_t
-{
- unsigned blob_page_no:32; /*!< first BLOB page number */
- unsigned ref_page_no:32; /*!< referring page number */
- unsigned ref_heap_no:16; /*!< referring heap number */
- unsigned ref_field_no:10; /*!< referring field number */
- unsigned owner:1; /*!< TRUE if BLOB owner */
- unsigned always_owner:1; /*!< TRUE if always
- has been the BLOB owner;
- reset to TRUE on B-tree
- page splits and merges */
- unsigned del:1; /*!< TRUE if currently
- delete-marked */
-};
-
-/**************************************************************//**
-Add a reference to an off-page column to the index->blobs map. */
-UNIV_INTERN
-void
-btr_blob_dbg_add_blob(
-/*==================*/
- const rec_t* rec, /*!< in: clustered index record */
- ulint field_no, /*!< in: number of off-page column */
- ulint page_no, /*!< in: start page of the column */
- dict_index_t* index, /*!< in/out: index tree */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Display the references to off-page columns.
-This function is to be called from a debugger,
-for example when a breakpoint on ut_dbg_assertion_failed is hit. */
-UNIV_INTERN
-void
-btr_blob_dbg_print(
-/*===============*/
- const dict_index_t* index) /*!< in: index tree */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Check that there are no references to off-page columns from or to
-the given page. Invoked when freeing or clearing a page.
-@return TRUE when no orphan references exist */
-UNIV_INTERN
-ibool
-btr_blob_dbg_is_empty(
-/*==================*/
- dict_index_t* index, /*!< in: index */
- ulint page_no) /*!< in: page number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/**************************************************************//**
-Modify the 'deleted' flag of a record. */
-UNIV_INTERN
-void
-btr_blob_dbg_set_deleted_flag(
-/*==========================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
- ibool del) /*!< in: TRUE=deleted, FALSE=exists */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Change the ownership of an off-page column. */
-UNIV_INTERN
-void
-btr_blob_dbg_owner(
-/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
- ulint i, /*!< in: ith field in rec */
- ibool own) /*!< in: TRUE=owned, FALSE=disowned */
- MY_ATTRIBUTE((nonnull));
-/** Assert that there are no BLOB references to or from the given page. */
-# define btr_blob_dbg_assert_empty(index, page_no) \
- ut_a(btr_blob_dbg_is_empty(index, page_no))
-#else /* UNIV_BLOB_DEBUG */
-# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx) ((void) 0)
-# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0)
-# define btr_blob_dbg_owner(rec, index, offsets, i, val) ((void) 0)
-# define btr_blob_dbg_assert_empty(index, page_no) ((void) 0)
-#endif /* UNIV_BLOB_DEBUG */
-
-/**************************************************************//**
-Gets the root node of a tree and x-latches it.
-@return root page, x-latched */
-UNIV_INTERN
-page_t*
-btr_root_get(
-/*=========*/
- const dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-
-/**************************************************************//**
-Checks and adjusts the root node of a tree during IMPORT TABLESPACE.
-@return error code, or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-btr_root_adjust_on_import(
-/*======================*/
- const dict_index_t* index) /*!< in: index tree */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/**************************************************************//**
-Gets the height of the B-tree (the level of the root, when the leaf
-level is assumed to be 0). The caller must hold an S or X latch on
-the index.
-@return tree height (level of the root) */
-UNIV_INTERN
-ulint
-btr_height_get(
-/*===========*/
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-buf_block_t*
-btr_block_get_func(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- dict_index_t* index, /*!< in: index tree, may be NULL
- if it is not an insert buffer tree */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-# ifdef UNIV_SYNC_DEBUG
-/** Gets a buffer page and declares its latching order level.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes or 0 for uncompressed pages
-@param page_no page number
-@param mode latch mode
-@param index index tree, may be NULL if not the insert buffer tree
-@param mtr mini-transaction handle
-@return the block descriptor */
-# define btr_block_get(space,zip_size,page_no,mode,index,mtr) \
- btr_block_get_func(space,zip_size,page_no,mode, \
- __FILE__,__LINE__,index,mtr)
-# else /* UNIV_SYNC_DEBUG */
-/** Gets a buffer page and declares its latching order level.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes or 0 for uncompressed pages
-@param page_no page number
-@param mode latch mode
-@param idx index tree, may be NULL if not the insert buffer tree
-@param mtr mini-transaction handle
-@return the block descriptor */
-# define btr_block_get(space,zip_size,page_no,mode,idx,mtr) \
- btr_block_get_func(space,zip_size,page_no,mode, \
- __FILE__,__LINE__,idx,mtr)
-# endif /* UNIV_SYNC_DEBUG */
-/** Gets a buffer page and declares its latching order level.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes or 0 for uncompressed pages
-@param page_no page number
-@param mode latch mode
-@param idx index tree, may be NULL if not the insert buffer tree
-@param mtr mini-transaction handle
-@return the uncompressed page frame */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space,
- ulint zip_size,
- ulint root_page_no,
- ulint mode,
- dict_index_t* index,
- mtr_t* mtr)
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-/**************************************************************//**
-Gets the index id field of a page.
-@return index id */
-UNIV_INLINE
-index_id_t
-btr_page_get_index_id(
-/*==================*/
- const page_t* page) /*!< in: index page */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
- const page_t* page) /*!< in: index page */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
-/********************************************************//**
-Gets the next index page number.
-@return next page number */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr) /*!< in: mini-transaction handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************//**
-Gets the previous index page number.
-@return prev page number */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr) /*!< in: mini-transaction handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Gets pointer to the previous user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return previous user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_prev_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the previous page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Gets pointer to the next user record in the tree. It is assumed
-that the caller has appropriate latches on the page and its neighbor.
-@return next user record, NULL if there is none */
-UNIV_INTERN
-rec_t*
-btr_get_next_user_rec(
-/*==================*/
- rec_t* rec, /*!< in: record on leaf level */
- mtr_t* mtr) /*!< in: mtr holding a latch on the page, and if
- needed, also to the next page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Releases the latch on a leaf page and bufferunfixes it. */
-UNIV_INLINE
-void
-btr_leaf_page_release(
-/*==================*/
- buf_block_t* block, /*!< in: buffer block */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Gets the child node file address in a node pointer.
-NOTE: the offsets array must contain all offsets for the record since
-we read the last field according to offsets and assume that it contains
-the child page number. In other words offsets must have been retrieved
-with rec_get_offsets(n_fields=ULINT_UNDEFINED).
-@return child node address */
-UNIV_INLINE
-ulint
-btr_node_ptr_get_child_page_no(
-/*===========================*/
- const rec_t* rec, /*!< in: node pointer record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/************************************************************//**
-Creates the root node for a new index tree.
-@return page number of the created root, FIL_NULL if did not succeed */
-UNIV_INTERN
-ulint
-btr_create(
-/*=======*/
- ulint type, /*!< in: type of the index */
- ulint space, /*!< in: space where created */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- index_id_t index_id,/*!< in: index id */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr) /*!< in: mini-transaction handle */
- MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Frees a B-tree except the root page, which MUST be freed after this
-by calling btr_free_root. */
-UNIV_INTERN
-void
-btr_free_but_not_root(
-/*==================*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no); /*!< in: root page number */
-/************************************************************//**
-Frees the B-tree root page. Other tree MUST already have been freed. */
-UNIV_INTERN
-void
-btr_free_root(
-/*==========*/
- ulint space, /*!< in: space where created */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint root_page_no, /*!< in: root page number */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Makes tree one level higher by splitting the root, and inserts
-the tuple. It is assumed that mtr contains an x-latch on the tree.
-NOTE that the operation of this function must always succeed,
-we cannot reverse it: therefore enough free disk space must be
-guaranteed to be available before this function is called.
-@return inserted record */
-UNIV_INTERN
-rec_t*
-btr_root_raise_and_insert(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
- on the root page; when the function returns,
- the cursor is positioned on the predecessor
- of the inserted record */
- ulint** offsets,/*!< out: offsets on inserted record */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap
- that can be emptied, or NULL */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull(2,3,4,7), warn_unused_result));
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
-@retval true if the operation was successful
-@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
-bool
-btr_page_reorganize_low(
-/*====================*/
- bool recovery,/*!< in: true if called in recovery:
- locks should not be updated, i.e.,
- there cannot exist locks on the
- page, and a hash index should not be
- dropped: it cannot exist */
- ulint z_level,/*!< in: compression level to be used
- if dealing with compressed page */
- page_cur_t* cursor, /*!< in/out: page cursor */
- dict_index_t* index, /*!< in: the index tree of the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
-@retval true if the operation was successful
-@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
-bool
-btr_page_reorganize(
-/*================*/
- page_cur_t* cursor, /*!< in/out: page cursor */
- dict_index_t* index, /*!< in: the index tree of the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Decides if the page should be split at the convergence point of
-inserts converging to left.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_left(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec)/*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Decides if the page should be split at the convergence point of
-inserts converging to right.
-@return TRUE if split recommended */
-UNIV_INTERN
-ibool
-btr_page_get_split_rec_to_right(
-/*============================*/
- btr_cur_t* cursor, /*!< in: cursor at which to insert */
- rec_t** split_rec)/*!< out: if split recommended,
- the first record on upper half page,
- or NULL if tuple should be first */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Splits an index page to halves and inserts the tuple. It is assumed
-that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
-released within this function! NOTE that the operation of this
-function must always succeed, we cannot reverse it: therefore enough
-free disk space (2 pages) must be guaranteed to be available before
-this function is called.
-
-@return inserted record */
-UNIV_INTERN
-rec_t*
-btr_page_split_and_insert(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
- function returns, the cursor is positioned
- on the predecessor of the inserted record */
- ulint** offsets,/*!< out: offsets on inserted record */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap
- that can be emptied, or NULL */
- const dtuple_t* tuple, /*!< in: tuple to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull(2,3,4,7), warn_unused_result));
-/*******************************************************//**
-Inserts a data tuple to a tree on a non-leaf level. It is assumed
-that mtr holds an x-latch on the tree. */
-UNIV_INTERN
-void
-btr_insert_on_non_leaf_level_func(
-/*==============================*/
- ulint flags, /*!< in: undo logging and locking flags */
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: level, must be > 0 */
- dtuple_t* tuple, /*!< in: the record to be inserted */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-# define btr_insert_on_non_leaf_level(f,i,l,t,m) \
- btr_insert_on_non_leaf_level_func(f,i,l,t,__FILE__,__LINE__,m)
-#endif /* !UNIV_HOTBACKUP */
-/****************************************************************//**
-Sets a record as the predefined minimum record. */
-UNIV_INTERN
-void
-btr_set_min_rec_mark(
-/*=================*/
- rec_t* rec, /*!< in/out: record */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Deletes on the upper level the node pointer to a page. */
-UNIV_INTERN
-void
-btr_node_ptr_delete(
-/*================*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Checks that the node pointer to a page is appropriate.
-@return TRUE */
-UNIV_INTERN
-ibool
-btr_check_node_ptr(
-/*===============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: index page */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-Tries to merge the page first to the left immediate brother if such a
-brother exists, and the node pointers to the current page and to the
-brother reside on the same page. If the left brother does not satisfy these
-conditions, looks at the right brother. If the page is the only one on that
-level lifts the records of the page to the father page, thus reducing the
-tree height. It is assumed that mtr holds an x-latch on the tree and on the
-page. If cursor is on the leaf level, mtr must also hold x-latches to
-the brothers, if they exist.
-@return TRUE on success */
-UNIV_INTERN
-ibool
-btr_compress(
-/*=========*/
- btr_cur_t* cursor, /*!< in/out: cursor on the page to merge
- or lift; the page must not be empty:
- when deleting records, use btr_discard_page()
- if the page would become empty */
- ibool adjust, /*!< in: TRUE if should adjust the
- cursor position even if compression occurs */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Discards a page from a B-tree. This is used to remove the last record from
-a B-tree page: the whole page must be removed at the same time. This cannot
-be used for the root page, which is allowed to be empty. */
-UNIV_INTERN
-void
-btr_discard_page(
-/*=============*/
- btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
- the root page */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/****************************************************************//**
-Parses the redo log record for setting an index record as the predefined
-minimum record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_parse_set_min_rec_mark(
-/*=======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
- MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
-/***********************************************************//**
-Parses a redo log record of reorganizing a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_parse_page_reorganize(
-/*======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- dict_index_t* index, /*!< in: record descriptor */
- bool compressed,/*!< in: true if compressed page */
- buf_block_t* block, /*!< in: page to be reorganized, or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
- MY_ATTRIBUTE((nonnull(1,2,3), warn_unused_result));
-#ifndef UNIV_HOTBACKUP
-/**************************************************************//**
-Gets the number of pages in a B-tree.
-@return number of pages, or ULINT_UNDEFINED if the index is unavailable */
-UNIV_INTERN
-ulint
-btr_get_size(
-/*=========*/
- dict_index_t* index, /*!< in: index */
- ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
- mtr_t* mtr) /*!< in/out: mini-transaction where index
- is s-latched */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Gets the number of reserved and used pages in a B-tree.
-@return number of pages reserved, or ULINT_UNDEFINED if the index
-is unavailable */
-UNIV_INTERN
-ulint
-btr_get_size_and_reserved(
-/*======================*/
- dict_index_t* index, /*!< in: index */
- ulint flag, /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
- ulint* used, /*!< out: number of pages used (<= reserved) */
- mtr_t* mtr) /*!< in/out: mini-transaction where index
- is s-latched */
- __attribute__((nonnull));
-
-/**************************************************************//**
-Allocates a new file page to be used in an index tree. NOTE: we assume
-that the caller has made the reservation for free extents!
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
-buf_block_t*
-btr_page_alloc(
-/*===========*/
- dict_index_t* index, /*!< in: index tree */
- ulint hint_page_no, /*!< in: hint of a good page */
- byte file_direction, /*!< in: direction where a possible
- page split is made */
- ulint level, /*!< in: level where the page is placed
- in the tree */
- mtr_t* mtr, /*!< in/out: mini-transaction
- for the allocation */
- mtr_t* init_mtr) /*!< in/out: mini-transaction
- for x-latching and initializing
- the page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Frees a file page used in an index tree. NOTE: cannot free field external
-storage pages because the page must contain info on its level. */
-UNIV_INTERN
-void
-btr_page_free(
-/*==========*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Frees a file page used in an index tree. Can be used also to BLOB
-external storage pages, because the page level 0 can be given as an
-argument. */
-UNIV_INTERN
-void
-btr_page_free_low(
-/*==============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: block to be freed, x-latched */
- ulint level, /*!< in: page level */
- bool blob, /*!< in: blob page */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
-/*************************************************************//**
-Reorganizes an index page.
-
-IMPORTANT: On success, the caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index. This has to
-be done either within the same mini-transaction, or by invoking
-ibuf_reset_free_bits() before mtr_commit(). On uncompressed pages,
-IBUF_BITMAP_FREE is unaffected by reorganization.
-
-@retval true if the operation was successful
-@retval false if it is a compressed page, and recompression failed */
-UNIV_INTERN
-bool
-btr_page_reorganize_block(
-/*======================*/
- bool recovery,/*!< in: true if called in recovery:
- locks should not be updated, i.e.,
- there cannot exist locks on the
- page, and a hash index should not be
- dropped: it cannot exist */
- ulint z_level,/*!< in: compression level to be used
- if dealing with compressed page */
- buf_block_t* block, /*!< in/out: B-tree page */
- dict_index_t* index, /*!< in: the index tree of the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- __attribute__((nonnull));
-
-#ifdef UNIV_BTR_PRINT
-/*************************************************************//**
-Prints size info of a B-tree. */
-UNIV_INTERN
-void
-btr_print_size(
-/*===========*/
- dict_index_t* index) /*!< in: index tree */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Prints directories and other info of all nodes in the index. */
-UNIV_INTERN
-void
-btr_print_index(
-/*============*/
- dict_index_t* index, /*!< in: index */
- ulint width) /*!< in: print this many entries from start
- and end */
- MY_ATTRIBUTE((nonnull));
-#endif /* UNIV_BTR_PRINT */
-/************************************************************//**
-Checks the size and number of fields in a record based on the definition of
-the index.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_index_rec_validate(
-/*===================*/
- const rec_t* rec, /*!< in: index record */
- const dict_index_t* index, /*!< in: index */
- ibool dump_on_error) /*!< in: TRUE if the function
- should print hex dump of record
- and page on error */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Checks the consistency of an index tree.
-@return DB_SUCCESS if ok, error code if not */
-UNIV_INTERN
-dberr_t
-btr_validate_index(
-/*===============*/
- dict_index_t* index, /*!< in: index */
- const trx_t* trx) /*!< in: transaction or 0 */
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
-#ifdef UNIV_SYNC_DEBUG
-/*************************************************************//**
-Removes a page from the level list of pages.
-@param space in: space where removed
-@param zip_size in: compressed page size in bytes, or 0 for uncompressed
-@param page in/out: page to remove
-@param index in: index tree
-@param mtr in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr) \
- btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#else /* UNIV_SYNC_DEBUG */
-/*************************************************************//**
-Removes a page from the level list of pages.
-@param space in: space where removed
-@param zip_size in: compressed page size in bytes, or 0 for uncompressed
-@param page in/out: page to remove
-@param index in: index tree
-@param mtr in/out: mini-transaction */
-# define btr_level_list_remove(space,zip_size,page,index,mtr) \
- btr_level_list_remove_func(space,zip_size,page,index,mtr)
-#endif /* UNIV_SYNC_DEBUG */
-
-/*************************************************************//**
-Removes a page from the level list of pages. */
-UNIV_INTERN
-void
-btr_level_list_remove_func(
-/*=======================*/
- ulint space, /*!< in: space where removed */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- page_t* page, /*!< in/out: page to remove */
- dict_index_t* index, /*!< in: index tree */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-
-/*************************************************************//**
-If page is the only on its level, this function moves its records to the
-father page, thus reducing the tree height.
-@return father block */
-UNIV_INTERN
-buf_block_t*
-btr_lift_page_up(
-/*=============*/
- dict_index_t* index, /*!< in: index tree */
- buf_block_t* block, /*!< in: page which is the only on its level;
- must not be empty: use
- btr_discard_only_page_on_level if the last
- record from the page should be removed */
- mtr_t* mtr) /*!< in: mtr */
- __attribute__((nonnull));
-
-#define BTR_N_LEAF_PAGES 1
-#define BTR_TOTAL_SIZE 2
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "btr0btr.ic"
-#endif
-
-/****************************************************************
-Global variable controlling if scrubbing should be performed */
-extern my_bool srv_immediate_scrub_data_uncompressed;
-
-#endif
diff --git a/storage/xtradb/include/btr0btr.ic b/storage/xtradb/include/btr0btr.ic
deleted file mode 100644
index 0f5f025d6a3..00000000000
--- a/storage/xtradb/include/btr0btr.ic
+++ /dev/null
@@ -1,335 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0btr.ic
-The B-tree
-
-Created 6/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "page0zip.h"
-
-#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
- (not really a hard limit).
- Used in debug assertions
- in btr_page_set_level and
- btr_page_get_level_low */
-
-/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-buf_block_t*
-btr_block_get_func(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- dict_index_t* index, /*!< in: index tree, may be NULL
- if it is not an insert buffer tree */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- buf_block_t* block;
- dberr_t err;
-
- block = buf_page_get_gen(space, zip_size, page_no, mode,
- NULL, BUF_GET, file, line, mtr, &err);
-
- if (err == DB_DECRYPTION_FAILED) {
- if (index && index->table) {
- index->table->file_unreadable = true;
- }
- }
-
- if (block) {
- if (mode != RW_NO_LATCH) {
-
- buf_block_dbg_add_level(
- block, index != NULL && dict_index_is_ibuf(index)
- ? SYNC_IBUF_TREE_NODE : SYNC_TREE_NODE);
- }
- }
-
- return(block);
-}
-
-/**************************************************************//**
-Sets the index id field of a page. */
-UNIV_INLINE
-void
-btr_page_set_index_id(
-/*==================*/
- page_t* page, /*!< in: page to be created */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- index_id_t id, /*!< in: index id */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (page_zip) {
- mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id);
- page_zip_write_header(page_zip,
- page + (PAGE_HEADER + PAGE_INDEX_ID),
- 8, mtr);
- } else {
- mlog_write_ull(page + (PAGE_HEADER + PAGE_INDEX_ID), id, mtr);
- }
-}
-
-/** Gets a buffer page and declares its latching order level.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes or 0 for uncompressed pages
-@param page_no page number
-@param mode latch mode
-@param idx index tree, may be NULL if not the insert buffer tree
-@param mtr mini-transaction handle
-@return the uncompressed page frame */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space,
- ulint zip_size,
- ulint root_page_no,
- ulint mode,
- dict_index_t* index,
- mtr_t* mtr)
-{
- buf_block_t* block=NULL;
- buf_frame_t* frame=NULL;
-
- block = btr_block_get(space, zip_size, root_page_no, mode, index, mtr);
-
- if (block) {
- frame = buf_block_get_frame(block);
- }
-
- return ((page_t*)frame);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-/**************************************************************//**
-Gets the index id field of a page.
-@return index id */
-UNIV_INLINE
-index_id_t
-btr_page_get_index_id(
-/*==================*/
- const page_t* page) /*!< in: index page */
-{
- return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID));
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
- const page_t* page) /*!< in: index page */
-{
- ulint level;
-
- ut_ad(page);
-
- level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
-
- ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
- return(level);
-}
-
-/********************************************************//**
-Sets the node level field in an index page. */
-UNIV_INLINE
-void
-btr_page_set_level(
-/*===============*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- ulint level, /*!< in: level, leaf level == 0 */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(page && mtr);
- ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
- if (page_zip) {
- mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level);
- page_zip_write_header(page_zip,
- page + (PAGE_HEADER + PAGE_LEVEL),
- 2, mtr);
- } else {
- mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level,
- MLOG_2BYTES, mtr);
- }
-}
-
-/********************************************************//**
-Gets the next index page number.
-@return next page number */
-UNIV_INLINE
-ulint
-btr_page_get_next(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr MY_ATTRIBUTE((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(page != NULL);
- ut_ad(mtr != NULL);
-#ifndef UNIV_INNOCHECKSUM
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX));
-#endif /* UNIV_INNOCHECKSUM */
- return(mach_read_from_4(page + FIL_PAGE_NEXT));
-}
-
-/********************************************************//**
-Sets the next index page field. */
-UNIV_INLINE
-void
-btr_page_set_next(
-/*==============*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- ulint next, /*!< in: next page number */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(page != NULL);
- ut_ad(mtr != NULL);
-
- if (page_zip) {
- mach_write_to_4(page + FIL_PAGE_NEXT, next);
- page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr);
- } else {
- mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr);
- }
-}
-
-/********************************************************//**
-Gets the previous index page number.
-@return prev page number */
-UNIV_INLINE
-ulint
-btr_page_get_prev(
-/*==============*/
- const page_t* page, /*!< in: index page */
- mtr_t* mtr MY_ATTRIBUTE((unused))) /*!< in: mini-transaction handle */
-{
- ut_ad(page != NULL);
- ut_ad(mtr != NULL);
-
- return(mach_read_from_4(page + FIL_PAGE_PREV));
-}
-
-/********************************************************//**
-Sets the previous index page field. */
-UNIV_INLINE
-void
-btr_page_set_prev(
-/*==============*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
- part will be updated, or NULL */
- ulint prev, /*!< in: previous page number */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(page != NULL);
- ut_ad(mtr != NULL);
-
- if (page_zip) {
- mach_write_to_4(page + FIL_PAGE_PREV, prev);
- page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr);
- } else {
- mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr);
- }
-}
-
-/**************************************************************//**
-Gets the child node file address in a node pointer.
-NOTE: the offsets array must contain all offsets for the record since
-we read the last field according to offsets and assume that it contains
-the child page number. In other words offsets must have been retrieved
-with rec_get_offsets(n_fields=ULINT_UNDEFINED).
-@return child node address */
-UNIV_INLINE
-ulint
-btr_node_ptr_get_child_page_no(
-/*===========================*/
- const rec_t* rec, /*!< in: node pointer record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- const byte* field;
- ulint len;
- ulint page_no;
-
- ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
-
- /* The child address is in the last field */
- field = rec_get_nth_field(rec, offsets,
- rec_offs_n_fields(offsets) - 1, &len);
-
- ut_ad(len == 4);
-
- page_no = mach_read_from_4(field);
-
- if (page_no == 0) {
- fprintf(stderr,
- "InnoDB: a nonsensical page number 0"
- " in a node ptr record at offset %lu\n",
- (ulong) page_offset(rec));
- buf_page_print(page_align(rec), 0, 0);
- ut_ad(0);
- }
-
- return(page_no);
-}
-
-/**************************************************************//**
-Releases the latches on a leaf page and bufferunfixes it. */
-UNIV_INLINE
-void
-btr_leaf_page_release(
-/*==================*/
- buf_block_t* block, /*!< in: buffer block */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);
- ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY));
-
- mtr_memo_release(mtr, block,
- latch_mode == BTR_SEARCH_LEAF
- ? MTR_MEMO_PAGE_S_FIX
- : MTR_MEMO_PAGE_X_FIX);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h
deleted file mode 100644
index e478b33bf8e..00000000000
--- a/storage/xtradb/include/btr0cur.h
+++ /dev/null
@@ -1,946 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0cur.h
-The index tree cursor
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0cur_h
-#define btr0cur_h
-
-#include "univ.i"
-#include "dict0dict.h"
-#include "page0cur.h"
-#include "btr0types.h"
-
-/** Mode flags for btr_cur operations; these can be ORed */
-enum {
- /** do no undo logging */
- BTR_NO_UNDO_LOG_FLAG = 1,
- /** do no record lock checking */
- BTR_NO_LOCKING_FLAG = 2,
- /** sys fields will be found in the update vector or inserted
- entry */
- BTR_KEEP_SYS_FLAG = 4,
- /** btr_cur_pessimistic_update() must keep cursor position
- when moving columns to big_rec */
- BTR_KEEP_POS_FLAG = 8,
- /** the caller is creating the index or wants to bypass the
- index->info.online creation log */
- BTR_CREATE_FLAG = 16,
- /** the caller of btr_cur_optimistic_update() or
- btr_cur_update_in_place() will take care of
- updating IBUF_BITMAP_FREE */
- BTR_KEEP_IBUF_BITMAP = 32
-};
-
-#ifndef UNIV_HOTBACKUP
-#include "que0types.h"
-#include "row0types.h"
-#include "ha0ha.h"
-
-#define BTR_CUR_ADAPT
-#define BTR_CUR_HASH_ADAPT
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Returns the page cursor component of a tree cursor.
-@return pointer to page cursor component */
-UNIV_INLINE
-page_cur_t*
-btr_cur_get_page_cur(
-/*=================*/
- const btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the buffer block on which the tree cursor is positioned.
-@return pointer to buffer block */
-UNIV_INLINE
-buf_block_t*
-btr_cur_get_block(
-/*==============*/
- const btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the record pointer of a tree cursor.
-@return pointer to record */
-UNIV_INLINE
-rec_t*
-btr_cur_get_rec(
-/*============*/
- const btr_cur_t* cursor);/*!< in: tree cursor */
-#else /* UNIV_DEBUG */
-# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur)
-# define btr_cur_get_block(cursor) ((cursor)->page_cur.block)
-# define btr_cur_get_rec(cursor) ((cursor)->page_cur.rec)
-#endif /* UNIV_DEBUG */
-/*********************************************************//**
-Returns the compressed page on which the tree cursor is positioned.
-@return pointer to compressed page, or NULL if the page is not compressed */
-UNIV_INLINE
-page_zip_des_t*
-btr_cur_get_page_zip(
-/*=================*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the page of a tree cursor.
-@return pointer to page */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
- btr_cur_t* cursor);/*!< in: tree cursor */
-/*********************************************************//**
-Returns the index of a cursor.
-@param cursor b-tree cursor
-@return index */
-#define btr_cur_get_index(cursor) ((cursor)->index)
-/*********************************************************//**
-Positions a tree cursor at a given record. */
-UNIV_INLINE
-void
-btr_cur_position(
-/*=============*/
- dict_index_t* index, /*!< in: index */
- rec_t* rec, /*!< in: record in tree */
- buf_block_t* block, /*!< in: buffer block of rec */
- btr_cur_t* cursor);/*!< in: cursor */
-/********************************************************************//**
-Searches an index tree and positions a tree cursor on a given level.
-NOTE: n_fields_cmp in tuple must be set so that it cannot be compared
-to node pointer page number fields on the upper levels of the tree!
-Note that if mode is PAGE_CUR_LE, which is used in inserts, then
-cursor->up_match and cursor->low_match both will have sensible values.
-If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
-UNIV_INTERN
-dberr_t
-btr_cur_search_to_nth_level(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: the tree level of search */
- const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
- tuple must be set so that it cannot get
- compared to the node ptr page number field! */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be PAGE_CUR_LE,
- not PAGE_CUR_GE, as the latter may end up on
- the previous page of the record! Inserts
- should always be made using PAGE_CUR_LE to
- search the position! */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
- at most one of BTR_INSERT, BTR_DELETE_MARK,
- BTR_DELETE, or BTR_ESTIMATE;
- cursor->left_block is used to store a pointer
- to the left neighbor page, in the cases
- BTR_SEARCH_PREV and BTR_MODIFY_PREV;
- NOTE that if has_search_latch
- is != 0, we maybe do not have a latch set
- on the cursor page, we assume
- the caller uses his search latch
- to protect the record! */
- btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
- s- or x-latched, but see also above! */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Opens a cursor at either end of an index. */
-UNIV_INTERN
-dberr_t
-btr_cur_open_at_index_side_func(
-/*============================*/
- bool from_left, /*!< in: true if open to the low end,
- false if to the high end */
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: latch mode */
- btr_cur_t* cursor, /*!< in/out: cursor */
- ulint level, /*!< in: level to search for
- (0=leaf) */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-#define btr_cur_open_at_index_side(f,i,l,c,lv,m) \
- btr_cur_open_at_index_side_func(f,i,l,c,lv,__FILE__,__LINE__,m)
-/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INTERN
-void
-btr_cur_open_at_rnd_pos_func(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< in/out: B-tree cursor */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-#define btr_cur_open_at_rnd_pos(i,l,c,m) \
- btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
-/*************************************************************//**
-Tries to perform an insert to a page in an index tree, next to cursor.
-It is assumed that mtr holds an x-latch on the page. The operation does
-not succeed if there is too little space on the page. If there is just
-one record on the page, the insert will always succeed; this is to
-prevent trying to split a page with just one record.
-@return DB_SUCCESS, DB_WAIT_LOCK, DB_FAIL, or error number */
-UNIV_INTERN
-dberr_t
-btr_cur_optimistic_insert(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags: if not
- zero, the parameters index and thr should be
- specified */
- btr_cur_t* cursor, /*!< in: cursor on page after which to insert;
- cursor stays valid */
- ulint** offsets,/*!< out: offsets on *rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap */
- dtuple_t* entry, /*!< in/out: entry to insert */
- rec_t** rec, /*!< out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr, /*!< in/out: query thread; can be NULL if
- !(~flags
- & (BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG)) */
- mtr_t* mtr) /*!< in/out: mini-transaction;
- if this function returns DB_SUCCESS on
- a leaf page of a secondary index in a
- compressed tablespace, the caller must
- mtr_commit(mtr) before latching
- any further pages */
- MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result));
-/*************************************************************//**
-Performs an insert on a page of an index tree. It is assumed that mtr
-holds an x-latch on the tree and on the cursor page. If the insert is
-made on the leaf level, to avoid deadlocks, mtr must also own x-latches
-to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-btr_cur_pessimistic_insert(
-/*=======================*/
- ulint flags, /*!< in: undo logging and locking flags: if not
- zero, the parameter thr should be
- specified; if no undo logging is specified,
- then the caller must have reserved enough
- free extents in the file space so that the
- insertion will certainly succeed */
- btr_cur_t* cursor, /*!< in: cursor after which to insert;
- cursor stays valid */
- ulint** offsets,/*!< out: offsets on *rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap
- that can be emptied */
- dtuple_t* entry, /*!< in/out: entry to insert */
- rec_t** rec, /*!< out: pointer to inserted record if
- succeed */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr, /*!< in/out: query thread; can be NULL if
- !(~flags
- & (BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG)) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result));
-/*************************************************************//**
-See if there is enough place in the page modification log to log
-an update-in-place.
-
-@retval false if out of space; IBUF_BITMAP_FREE will be reset
-outside mtr if the page was recompressed
-@retval true if enough place;
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is
-a secondary index leaf page. This has to be done either within the
-same mini-transaction, or by invoking ibuf_reset_free_bits() before
-mtr_commit(mtr). */
-UNIV_INTERN
-bool
-btr_cur_update_alloc_zip_func(
-/*==========================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- page_cur_t* cursor, /*!< in/out: B-tree page cursor */
- dict_index_t* index, /*!< in: the index corresponding to cursor */
-#ifdef UNIV_DEBUG
- ulint* offsets,/*!< in/out: offsets of the cursor record */
-#endif /* UNIV_DEBUG */
- ulint length, /*!< in: size needed */
- bool create, /*!< in: true=delete-and-insert,
- false=update-in-place */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- trx_t* trx) /*!< in: NULL or transaction */
- MY_ATTRIBUTE((warn_unused_result));
-
-#ifdef UNIV_DEBUG
-# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr,trx) \
- btr_cur_update_alloc_zip_func(page_zip,cursor,index,offsets,len,cr,mtr,trx)
-#else /* UNIV_DEBUG */
-# define btr_cur_update_alloc_zip(page_zip,cursor,index,offsets,len,cr,mtr,trx) \
- btr_cur_update_alloc_zip_func(page_zip,cursor,index,len,cr,mtr,trx)
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-Updates a record when the update causes no size changes in its fields.
-@return locking or undo log related error code, or
-@retval DB_SUCCESS on success
-@retval DB_ZIP_OVERFLOW if there is not enough space left
-on the compressed page (IBUF_BITMAP_FREE was reset outside mtr) */
-UNIV_INTERN
-dberr_t
-btr_cur_update_in_place(
-/*====================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- ulint* offsets,/*!< in/out: offsets on cursor->page_cur.rec */
- const upd_t* update, /*!< in: update vector */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction; if this
- is a secondary index, the caller must
- mtr_commit(mtr) before latching any
- further pages */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-/***********************************************************//**
-Writes a redo log record of updating a record in-place. */
-UNIV_INTERN
-void
-btr_cur_update_in_place_log(
-/*========================*/
- ulint flags, /*!< in: flags */
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index of the record */
- const upd_t* update, /*!< in: update vector */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr, /*!< in: roll ptr */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Tries to update a record on a page in an index tree. It is assumed that mtr
-holds an x-latch on the page. The operation does not succeed if there is too
-little space on the page or if the update would result in too empty a page,
-so that tree compression is recommended.
-@return error code, including
-@retval DB_SUCCESS on success
-@retval DB_OVERFLOW if the updated record does not fit
-@retval DB_UNDERFLOW if the page would become too empty
-@retval DB_ZIP_OVERFLOW if there is not enough space left
-on the compressed page */
-UNIV_INTERN
-dberr_t
-btr_cur_optimistic_update(
-/*======================*/
- ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor on the record to update;
- cursor stays valid and positioned on the
- same record */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
- mem_heap_t** heap, /*!< in/out: pointer to NULL or memory heap */
- const upd_t* update, /*!< in: update vector; this must also
- contain trx id and roll ptr fields */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction; if this
- is a secondary index, the caller must
- mtr_commit(mtr) before latching any
- further pages */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-/*************************************************************//**
-Performs an update of a record on a page of a tree. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. If the
-update is made on the leaf level, to avoid deadlocks, mtr must also
-own x-latches to brothers of page, if those brothers exist.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-btr_cur_pessimistic_update(
-/*=======================*/
- ulint flags, /*!< in: undo logging, locking, and rollback
- flags */
- btr_cur_t* cursor, /*!< in/out: cursor on the record to update;
- cursor may become invalid if *big_rec == NULL
- || !(flags & BTR_KEEP_POS_FLAG) */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
- mem_heap_t** offsets_heap,
- /*!< in/out: pointer to memory heap
- that can be emptied */
- mem_heap_t* entry_heap,
- /*!< in/out: memory heap for allocating
- big_rec and the index tuple */
- big_rec_t** big_rec,/*!< out: big rec vector whose fields have to
- be stored externally by the caller */
- const upd_t* update, /*!< in: update vector; this is allowed also
- contain trx id and roll ptr fields, but
- the values in update vector have no effect */
- ulint cmpl_info,/*!< in: compiler info on secondary index
- updates */
- que_thr_t* thr, /*!< in: query thread */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction; must be committed
- before latching any further pages */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-/***********************************************************//**
-Marks a clustered index record deleted. Writes an undo log record to
-undo log on this delete marking. Writes in the trx id field the id
-of the deleting transaction, and in the roll ptr field pointer to the
-undo log record created.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
-dberr_t
-btr_cur_del_mark_set_clust_rec(
-/*===========================*/
- buf_block_t* block, /*!< in/out: buffer block of the record */
- rec_t* rec, /*!< in/out: record */
- dict_index_t* index, /*!< in: clustered index of the record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((warn_unused_result));
-/***********************************************************//**
-Sets a secondary index record delete mark to TRUE or FALSE.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
-UNIV_INTERN
-dberr_t
-btr_cur_del_mark_set_sec_rec(
-/*=========================*/
- ulint flags, /*!< in: locking flag */
- btr_cur_t* cursor, /*!< in: cursor */
- ibool val, /*!< in: value to set */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((warn_unused_result));
-/*************************************************************//**
-Tries to compress a page of the tree if it seems useful. It is assumed
-that mtr holds an x-latch on the tree and on the cursor page. To avoid
-deadlocks, mtr must also own x-latches to brothers of page, if those
-brothers exist. NOTE: it is assumed that the caller has reserved enough
-free extents so that the compression will always succeed if done!
-@return TRUE if compression occurred */
-UNIV_INTERN
-ibool
-btr_cur_compress_if_useful(
-/*=======================*/
- btr_cur_t* cursor, /*!< in/out: cursor on the page to compress;
- cursor does not stay valid if compression
- occurs */
- ibool adjust, /*!< in: TRUE if should adjust the
- cursor position even if compression occurs */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/*******************************************************//**
-Removes the record on which the tree cursor is positioned. It is assumed
-that the mtr has an x-latch on the page where the cursor is positioned,
-but no latch on the whole tree.
-@return TRUE if success, i.e., the page did not become too empty */
-UNIV_INTERN
-ibool
-btr_cur_optimistic_delete_func(
-/*===========================*/
- btr_cur_t* cursor, /*!< in: cursor on the record to delete;
- cursor stays valid: if deletion succeeds,
- on function exit it points to the successor
- of the deleted record */
-# ifdef UNIV_DEBUG
- ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
-# endif /* UNIV_DEBUG */
- mtr_t* mtr) /*!< in: mtr; if this function returns
- TRUE on a leaf page of a secondary
- index, the mtr must be committed
- before latching any further pages */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-# ifdef UNIV_DEBUG
-# define btr_cur_optimistic_delete(cursor, flags, mtr) \
- btr_cur_optimistic_delete_func(cursor, flags, mtr)
-# else /* UNIV_DEBUG */
-# define btr_cur_optimistic_delete(cursor, flags, mtr) \
- btr_cur_optimistic_delete_func(cursor, mtr)
-# endif /* UNIV_DEBUG */
-/*************************************************************//**
-Removes the record on which the tree cursor is positioned. Tries
-to compress the page if its fillfactor drops below a threshold
-or if it is the only page on the level. It is assumed that mtr holds
-an x-latch on the tree and on the cursor page. To avoid deadlocks,
-mtr must also own x-latches to brothers of page, if those brothers
-exist.
-@return TRUE if compression occurred */
-UNIV_INTERN
-ibool
-btr_cur_pessimistic_delete(
-/*=======================*/
- dberr_t* err, /*!< out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE;
- the latter may occur because we may have
- to update node pointers on upper levels,
- and in the case of variable length keys
- these may actually grow in size */
- ibool has_reserved_extents, /*!< in: TRUE if the
- caller has already reserved enough free
- extents so that he knows that the operation
- will succeed */
- btr_cur_t* cursor, /*!< in: cursor on the record to delete;
- if compression does not occur, the cursor
- stays valid: it points to successor of
- deleted record on function exit */
- ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses a redo log record of updating a record in-place.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_update_in_place(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index); /*!< in: index corresponding to page */
-/****************************************************************//**
-Parses the redo log record for delete marking or unmarking of a clustered
-index record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_del_mark_set_clust_rec(
-/*=================================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- dict_index_t* index); /*!< in: index corresponding to page */
-/****************************************************************//**
-Parses the redo log record for delete marking or unmarking of a secondary
-index record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-btr_cur_parse_del_mark_set_sec_rec(
-/*===============================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in/out: page or NULL */
- page_zip_des_t* page_zip);/*!< in/out: compressed page, or NULL */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Estimates the number of rows in a given index range.
-@return estimated number of rows */
-UNIV_INTERN
-ib_int64_t
-btr_estimate_n_rows_in_range(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple1, /*!< in: range start, may also be empty tuple */
- ulint mode1, /*!< in: search mode for range start */
- const dtuple_t* tuple2, /*!< in: range end, may also be empty tuple */
- ulint mode2, /*!< in: search mode for range end */
- trx_t* trx); /*!< in: trx */
-/*******************************************************************//**
-Estimates the number of different key values in a given index, for
-each n-column prefix of the index where 1 <= n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals[] (indexed
-0..n_uniq-1) and the number of pages that were sampled is saved in
-index->stat_n_sample_sizes[].
-If innodb_stats_method is nulls_ignored, we also record the number of
-non-null values for each prefix and stored the estimates in
-array index->stat_n_non_null_key_vals. */
-UNIV_INTERN
-void
-btr_estimate_number_of_different_key_vals(
-/*======================================*/
- dict_index_t* index); /*!< in: index */
-
-/** Gets the externally stored size of a record, in units of a database page.
-@param[in] rec record
-@param[in] offsets array returned by rec_get_offsets()
-@return externally stored part, in units of a database page */
-
-ulint
-btr_rec_get_externally_stored_len(
- const rec_t* rec,
- const ulint* offsets);
-
-/*******************************************************************//**
-Marks non-updated off-page fields as disowned by this record. The ownership
-must be transferred to the updated record which is inserted elsewhere in the
-index tree. In purge only the owner of externally stored field is allowed
-to free the field. */
-UNIV_INTERN
-void
-btr_cur_disown_inherited_fields(
-/*============================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
- part will be updated, or NULL */
- rec_t* rec, /*!< in/out: record in a clustered index */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- const upd_t* update, /*!< in: update vector */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-
-/** Operation code for btr_store_big_rec_extern_fields(). */
-enum blob_op {
- /** Store off-page columns for a freshly inserted record */
- BTR_STORE_INSERT = 0,
- /** Store off-page columns for an insert by update */
- BTR_STORE_INSERT_UPDATE,
- /** Store off-page columns for an update */
- BTR_STORE_UPDATE
-};
-
-/*******************************************************************//**
-Determine if an operation on off-page columns is an update.
-@return TRUE if op != BTR_STORE_INSERT */
-UNIV_INLINE
-ibool
-btr_blob_op_is_update(
-/*==================*/
- enum blob_op op) /*!< in: operation */
- MY_ATTRIBUTE((warn_unused_result));
-
-/*******************************************************************//**
-Stores the fields in big_rec_vec to the tablespace and puts pointers to
-them in rec. The extern flags in rec will have to be set beforehand.
-The fields are stored on pages allocated from leaf node
-file segment of the index tree.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
-dberr_t
-btr_store_big_rec_extern_fields(
-/*============================*/
- dict_index_t* index, /*!< in: index of rec; the index tree
- MUST be X-latched */
- buf_block_t* rec_block, /*!< in/out: block containing rec */
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index);
- the "external storage" flags in offsets
- will not correspond to rec when
- this function returns */
- const big_rec_t*big_rec_vec, /*!< in: vector containing fields
- to be stored externally */
- mtr_t* btr_mtr, /*!< in: mtr containing the
- latches to the clustered index */
- enum blob_op op) /*! in: operation code */
- MY_ATTRIBUTE((warn_unused_result));
-
-/*******************************************************************//**
-Frees the space in an externally stored field to the file space
-management if the field in data is owned the externally stored field,
-in a rollback we may have the additional condition that the field must
-not be inherited. */
-UNIV_INTERN
-void
-btr_free_externally_stored_field(
-/*=============================*/
- dict_index_t* index, /*!< in: index of the data, the index
- tree MUST be X-latched; if the tree
- height is 1, then also the root page
- must be X-latched! (this is relevant
- in the case this function is called
- from purge where 'data' is located on
- an undo log page, not an index
- page) */
- byte* field_ref, /*!< in/out: field reference */
- const rec_t* rec, /*!< in: record containing field_ref, for
- page_zip_write_blob_ptr(), or NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
- or NULL */
- page_zip_des_t* page_zip, /*!< in: compressed page corresponding
- to rec, or NULL if rec == NULL */
- ulint i, /*!< in: field number of field_ref;
- ignored if rec == NULL */
- enum trx_rb_ctx rb_ctx, /*!< in: rollback context */
- mtr_t* local_mtr); /*!< in: mtr containing the latch to
- data an an X-latch to the index
- tree */
-/*******************************************************************//**
-Copies the prefix of an externally stored field of a record. The
-clustered index record must be protected by a lock or a page latch.
-@return the length of the copied field, or 0 if the column was being
-or has been deleted */
-UNIV_INTERN
-ulint
-btr_copy_externally_stored_field_prefix(
-/*====================================*/
- byte* buf, /*!< out: the field, or a prefix of it */
- ulint len, /*!< in: length of buf, in bytes */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint local_len,/*!< in: length of data, in bytes */
- trx_t* trx); /*!< in: transaction handle */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap. The
-clustered index record must be protected by a lock or a page latch.
-@return the whole field copied to heap */
-UNIV_INTERN
-byte*
-btr_copy_externally_stored_field(
-/*=============================*/
- ulint* len, /*!< out: length of the whole field */
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part; must be protected by
- a lock or a page latch */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint local_len,/*!< in: length of data */
- mem_heap_t* heap, /*!< in: mem heap */
- trx_t* trx); /*!< in: transaction handle */
-/*******************************************************************//**
-Copies an externally stored field of a record to mem heap.
-@return the field copied to heap, or NULL if the field is incomplete */
-UNIV_INTERN
-byte*
-btr_rec_copy_externally_stored_field(
-/*=================================*/
- const rec_t* rec, /*!< in: record in a clustered index;
- must be protected by a lock or a page latch */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint zip_size,/*!< in: nonzero=compressed BLOB page size,
- zero for uncompressed BLOBs */
- ulint no, /*!< in: field number */
- ulint* len, /*!< out: length of the field */
- mem_heap_t* heap, /*!< in: mem heap */
- trx_t* trx); /*!< in: transaction handle */
-/*******************************************************************//**
-Flags the data tuple fields that are marked as extern storage in the
-update vector. We use this function to remember which fields we must
-mark as extern storage in a record inserted for an update.
-@return number of flagged external columns */
-UNIV_INTERN
-ulint
-btr_push_update_extern_fields(
-/*==========================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const upd_t* update, /*!< in: update vector */
- mem_heap_t* heap); /*!< in: memory heap */
-/***********************************************************//**
-Sets a secondary index record's delete mark to the given value. This
-function is only used by the insert buffer merge mechanism. */
-UNIV_INTERN
-void
-btr_cur_set_deleted_flag_for_ibuf(
-/*==============================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip, /*!< in/out: compressed page
- corresponding to rec, or NULL
- when the tablespace is
- uncompressed */
- ibool val, /*!< in: value to set */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/*######################################################################*/
-
-/** In the pessimistic delete, if the page data size drops below this
-limit, merging it to a neighbor is tried */
-#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2)
-
-/** A slot in the path array. We store here info on a search path down the
-tree. Each slot contains data on a single level of the tree. */
-
-struct btr_path_t{
- ulint nth_rec; /*!< index of the record
- where the page cursor stopped on
- this level (index in alphabetical
- order); value ULINT_UNDEFINED
- denotes array end */
- ulint n_recs; /*!< number of records on the page */
- ulint page_no; /*!< no of the page containing the record */
- ulint page_level; /*!< level of the page, if later we fetch
- the page under page_no and it is no different
- level then we know that the tree has been
- reorganized */
-};
-
-#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
-
-/** Values for the flag documenting the used search method */
-enum btr_cur_method {
- BTR_CUR_HASH = 1, /*!< successful shortcut using
- the hash index */
- BTR_CUR_HASH_FAIL, /*!< failure using hash, success using
- binary search: the misleading hash
- reference is stored in the field
- hash_node, and might be necessary to
- update */
- BTR_CUR_BINARY, /*!< success using the binary search */
- BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to
- the insert buffer */
- BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete
- mark in the insert/delete buffer */
- BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in
- the insert/delete buffer */
- BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */
-};
-
-/** The tree cursor: the definition appears here only for the compiler
-to know struct size! */
-struct btr_cur_t {
- dict_index_t* index; /*!< index where positioned */
- page_cur_t page_cur; /*!< page cursor */
- purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
- buf_block_t* left_block; /*!< this field is used to store
- a pointer to the left neighbor
- page, in the cases
- BTR_SEARCH_PREV and
- BTR_MODIFY_PREV */
- /*------------------------------*/
- que_thr_t* thr; /*!< this field is only used
- when btr_cur_search_to_nth_level
- is called for an index entry
- insertion: the calling query
- thread is passed here to be
- used in the insert buffer */
- /*------------------------------*/
- /** The following fields are used in
- btr_cur_search_to_nth_level to pass information: */
- /* @{ */
- enum btr_cur_method flag; /*!< Search method used */
- ulint tree_height; /*!< Tree height if the search is done
- for a pessimistic insert or update
- operation */
- ulint up_match; /*!< If the search mode was PAGE_CUR_LE,
- the number of matched fields to the
- the first user record to the right of
- the cursor record after
- btr_cur_search_to_nth_level;
- for the mode PAGE_CUR_GE, the matched
- fields to the first user record AT THE
- CURSOR or to the right of it;
- NOTE that the up_match and low_match
- values may exceed the correct values
- for comparison to the adjacent user
- record if that record is on a
- different leaf page! (See the note in
- row_ins_duplicate_error_in_clust.) */
- ulint up_bytes; /*!< number of matched bytes to the
- right at the time cursor positioned;
- only used internally in searches: not
- defined after the search */
- ulint low_match; /*!< if search mode was PAGE_CUR_LE,
- the number of matched fields to the
- first user record AT THE CURSOR or
- to the left of it after
- btr_cur_search_to_nth_level;
- NOT defined for PAGE_CUR_GE or any
- other search modes; see also the NOTE
- in up_match! */
- ulint low_bytes; /*!< number of matched bytes to the
- right at the time cursor positioned;
- only used internally in searches: not
- defined after the search */
- ulint n_fields; /*!< prefix length used in a hash
- search if hash_node != NULL */
- ulint n_bytes; /*!< hash prefix bytes if hash_node !=
- NULL */
- ulint fold; /*!< fold value used in the search if
- flag is BTR_CUR_HASH */
- /* @} */
- btr_path_t* path_arr; /*!< in estimating the number of
- rows in range, we store in this array
- information of the path through
- the tree */
-};
-
-/** If pessimistic delete fails because of lack of file space, there
-is still a good change of success a little later. Try this many
-times. */
-#define BTR_CUR_RETRY_DELETE_N_TIMES 100
-/** If pessimistic delete fails because of lack of file space, there
-is still a good change of success a little later. Sleep this many
-microseconds between retries. */
-#define BTR_CUR_RETRY_SLEEP_TIME 50000
-
-/** The reference in a field for which data is stored on a different page.
-The reference is at the end of the 'locally' stored part of the field.
-'Locally' means storage in the index record.
-We store locally a long enough prefix of each column so that we can determine
-the ordering parts of each index record without looking into the externally
-stored part. */
-/*-------------------------------------- @{ */
-#define BTR_EXTERN_SPACE_ID 0 /*!< space id where stored */
-#define BTR_EXTERN_PAGE_NO 4 /*!< page no where stored */
-#define BTR_EXTERN_OFFSET 8 /*!< offset of BLOB header
- on that page */
-#define BTR_EXTERN_LEN 12 /*!< 8 bytes containing the
- length of the externally
- stored part of the BLOB.
- The 2 highest bits are
- reserved to the flags below. */
-/*-------------------------------------- @} */
-/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */
-
-/** The most significant bit of BTR_EXTERN_LEN (i.e., the most
-significant bit of the byte at smallest address) is set to 1 if this
-field does not 'own' the externally stored field; only the owner field
-is allowed to free the field in purge! */
-#define BTR_EXTERN_OWNER_FLAG 128
-/** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
-second most significant bit of the byte at smallest address) is 1 then
-it means that the externally stored field was inherited from an
-earlier version of the row. In rollback we are not allowed to free an
-inherited external field. */
-#define BTR_EXTERN_INHERITED_FLAG 64
-
-/** Number of searches down the B-tree in btr_cur_search_to_nth_level(). */
-extern ulint btr_cur_n_non_sea;
-/** Number of successful adaptive hash index lookups in
-btr_cur_search_to_nth_level(). */
-extern ulint btr_cur_n_sea;
-/** Old value of btr_cur_n_non_sea. Copied by
-srv_refresh_innodb_monitor_stats(). Referenced by
-srv_printf_innodb_monitor(). */
-extern ulint btr_cur_n_non_sea_old;
-/** Old value of btr_cur_n_sea. Copied by
-srv_refresh_innodb_monitor_stats(). Referenced by
-srv_printf_innodb_monitor(). */
-extern ulint btr_cur_n_sea_old;
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/* Flag to limit optimistic insert records */
-extern uint btr_cur_limit_optimistic_insert_debug;
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_NONINL
-#include "btr0cur.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/btr0cur.ic b/storage/xtradb/include/btr0cur.ic
deleted file mode 100644
index 43ee3304c0e..00000000000
--- a/storage/xtradb/include/btr0cur.ic
+++ /dev/null
@@ -1,223 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0cur.ic
-The index tree cursor
-
-Created 10/16/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-#include "btr0btr.h"
-
-#ifdef UNIV_DEBUG
-# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
-if (btr_cur_limit_optimistic_insert_debug > 1\
- && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
- CODE;\
-}
-#else
-# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Returns the page cursor component of a tree cursor.
-@return pointer to page cursor component */
-UNIV_INLINE
-page_cur_t*
-btr_cur_get_page_cur(
-/*=================*/
- const btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(&((btr_cur_t*) cursor)->page_cur);
-}
-
-/*********************************************************//**
-Returns the buffer block on which the tree cursor is positioned.
-@return pointer to buffer block */
-UNIV_INLINE
-buf_block_t*
-btr_cur_get_block(
-/*==============*/
- const btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(page_cur_get_block(btr_cur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Returns the record pointer of a tree cursor.
-@return pointer to record */
-UNIV_INLINE
-rec_t*
-btr_cur_get_rec(
-/*============*/
- const btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(page_cur_get_rec(btr_cur_get_page_cur(cursor)));
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************//**
-Returns the compressed page on which the tree cursor is positioned.
-@return pointer to compressed page, or NULL if the page is not compressed */
-UNIV_INLINE
-page_zip_des_t*
-btr_cur_get_page_zip(
-/*=================*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(buf_block_get_page_zip(btr_cur_get_block(cursor)));
-}
-
-/*********************************************************//**
-Invalidates a tree cursor by setting record pointer to NULL. */
-UNIV_INLINE
-void
-btr_cur_invalidate(
-/*===============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- page_cur_invalidate(&(cursor->page_cur));
-}
-
-/*********************************************************//**
-Returns the page of a tree cursor.
-@return pointer to page */
-UNIV_INLINE
-page_t*
-btr_cur_get_page(
-/*=============*/
- btr_cur_t* cursor) /*!< in: tree cursor */
-{
- return(page_align(page_cur_get_rec(&(cursor->page_cur))));
-}
-
-/*********************************************************//**
-Positions a tree cursor at a given record. */
-UNIV_INLINE
-void
-btr_cur_position(
-/*=============*/
- dict_index_t* index, /*!< in: index */
- rec_t* rec, /*!< in: record in tree */
- buf_block_t* block, /*!< in: buffer block of rec */
- btr_cur_t* cursor) /*!< out: cursor */
-{
- ut_ad(page_align(rec) == block->frame);
-
- page_cur_position(rec, block, btr_cur_get_page_cur(cursor));
-
- cursor->index = index;
-}
-
-/*********************************************************************//**
-Checks if compressing an index page where a btr cursor is placed makes
-sense.
-@return TRUE if compression is recommended */
-UNIV_INLINE
-ibool
-btr_cur_compress_recommendation(
-/*============================*/
- btr_cur_t* cursor, /*!< in: btr cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- const page_t* page;
-
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
-
- page = btr_cur_get_page(cursor);
-
- LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
- return(FALSE));
-
- if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))) {
-
- /* The page fillfactor has dropped below a predefined
- minimum value OR the level in the B-tree contains just
- one page: we recommend compression if this is not the
- root page. */
-
- return(dict_index_get_page(cursor->index)
- != page_get_page_no(page));
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Checks if the record on which the cursor is placed can be deleted without
-making tree compression necessary (or, recommended).
-@return TRUE if can be deleted without recommended compression */
-UNIV_INLINE
-ibool
-btr_cur_can_delete_without_compress(
-/*================================*/
- btr_cur_t* cursor, /*!< in: btr cursor */
- ulint rec_size,/*!< in: rec_get_size(btr_cur_get_rec(cursor))*/
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* page;
-
- ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX));
-
- page = btr_cur_get_page(cursor);
-
- if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))
- || (page_get_n_recs(page) < 2)) {
-
- /* The page fillfactor will drop below a predefined
- minimum value, OR the level in the B-tree contains just
- one page, OR the page will become empty: we recommend
- compression if this is not the root page. */
-
- return(dict_index_get_page(cursor->index)
- == page_get_page_no(page));
- }
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Determine if an operation on off-page columns is an update.
-@return TRUE if op != BTR_STORE_INSERT */
-UNIV_INLINE
-ibool
-btr_blob_op_is_update(
-/*==================*/
- enum blob_op op) /*!< in: operation */
-{
- switch (op) {
- case BTR_STORE_INSERT:
- return(FALSE);
- case BTR_STORE_INSERT_UPDATE:
- case BTR_STORE_UPDATE:
- return(TRUE);
- }
-
- ut_ad(0);
- return(FALSE);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/btr0defragment.h b/storage/xtradb/include/btr0defragment.h
deleted file mode 100644
index 477824c1a35..00000000000
--- a/storage/xtradb/include/btr0defragment.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved.
-Copyright (C) 2014, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-#ifndef btr0defragment_h
-#define btr0defragment_h
-
-#include "univ.i"
-
-#ifndef UNIV_HOTBACKUP
-
-#include "btr0pcur.h"
-
-/* Max number of pages to consider at once during defragmentation. */
-#define BTR_DEFRAGMENT_MAX_N_PAGES 32
-
-/** stats in btr_defragment */
-extern ulint btr_defragment_compression_failures;
-extern ulint btr_defragment_failures;
-extern ulint btr_defragment_count;
-
-/** Item in the work queue for btr_degrament_thread. */
-struct btr_defragment_item_t
-{
- btr_pcur_t* pcur; /* persistent cursor where
- btr_defragment_n_pages should start */
- os_event_t event; /* if not null, signal after work
- is done */
- bool removed; /* Mark an item as removed */
- ulonglong last_processed; /* timestamp of last time this index
- is processed by defragment thread */
-
- btr_defragment_item_t(btr_pcur_t* pcur, os_event_t event);
- ~btr_defragment_item_t();
-};
-
-/******************************************************************//**
-Initialize defragmentation. */
-void
-btr_defragment_init(void);
-/******************************************************************//**
-Shutdown defragmentation. */
-void
-btr_defragment_shutdown();
-/******************************************************************//**
-Check whether the given index is in btr_defragment_wq. */
-bool
-btr_defragment_find_index(
- dict_index_t* index); /*!< Index to find. */
-/******************************************************************//**
-Add an index to btr_defragment_wq. Return a pointer to os_event if this
-is a synchronized defragmentation. */
-os_event_t
-btr_defragment_add_index(
- dict_index_t* index, /*!< index to be added */
- bool async, /*!< whether this is an async
- defragmentation */
- dberr_t* err); /*!< out: error code */
-/******************************************************************//**
-When table is dropped, this function is called to mark a table as removed in
-btr_efragment_wq. The difference between this function and the remove_index
-function is this will not NULL the event. */
-void
-btr_defragment_remove_table(
- dict_table_t* table); /*!< Index to be removed. */
-/******************************************************************//**
-Mark an index as removed from btr_defragment_wq. */
-void
-btr_defragment_remove_index(
- dict_index_t* index); /*!< Index to be removed. */
-/*********************************************************************//**
-Check whether we should save defragmentation statistics to persistent storage.*/
-UNIV_INTERN
-void
-btr_defragment_save_defrag_stats_if_needed(
- dict_index_t* index); /*!< in: index */
-
-/** Merge consecutive b-tree pages into fewer pages to defragment indexes */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(btr_defragment_thread)(void*);
-
-/** Whether btr_defragment_thread is active */
-extern bool btr_defragment_thread_active;
-
-#endif /* !UNIV_HOTBACKUP */
-#endif
diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h
deleted file mode 100644
index dafe14ce556..00000000000
--- a/storage/xtradb/include/btr0pcur.h
+++ /dev/null
@@ -1,548 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0pcur.h
-The index tree persistent cursor
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef btr0pcur_h
-#define btr0pcur_h
-
-#include "univ.i"
-#include "dict0dict.h"
-#include "data0data.h"
-#include "mtr0mtr.h"
-#include "page0cur.h"
-#include "btr0cur.h"
-#include "btr0btr.h"
-#include "btr0types.h"
-
-/* Relative positions for a stored cursor position */
-#define BTR_PCUR_ON 1
-#define BTR_PCUR_BEFORE 2
-#define BTR_PCUR_AFTER 3
-/* Note that if the tree is not empty, btr_pcur_store_position does not
-use the following, but only uses the above three alternatives, where the
-position is stored relative to a specific record: this makes implementation
-of a scroll cursor easier */
-#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */
-#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */
-
-/**************************************************************//**
-Allocates memory for a persistent cursor object and initializes the cursor.
-@return own: persistent cursor */
-UNIV_INTERN
-btr_pcur_t*
-btr_pcur_create_for_mysql(void);
-/*============================*/
-
-/**************************************************************//**
-Resets a persistent cursor object, freeing ::old_rec_buf if it is
-allocated and resetting the other members to their initial values. */
-UNIV_INTERN
-void
-btr_pcur_reset(
-/*===========*/
- btr_pcur_t* cursor);/*!< in, out: persistent cursor */
-
-/**************************************************************//**
-Frees the memory for a persistent cursor object. */
-UNIV_INTERN
-void
-btr_pcur_free_for_mysql(
-/*====================*/
- btr_pcur_t* cursor); /*!< in, own: persistent cursor */
-/**************************************************************//**
-Copies the stored position of a pcur to another pcur. */
-UNIV_INTERN
-void
-btr_pcur_copy_stored_position(
-/*==========================*/
- btr_pcur_t* pcur_receive, /*!< in: pcur which will receive the
- position info */
- btr_pcur_t* pcur_donate); /*!< in: pcur from which the info is
- copied */
-/**************************************************************//**
-Sets the old_rec_buf field to NULL. */
-UNIV_INLINE
-void
-btr_pcur_init(
-/*==========*/
- btr_pcur_t* pcur); /*!< in: persistent cursor */
-/**************************************************************//**
-Initializes and opens a persistent cursor to an index tree. It should be
-closed with btr_pcur_close. */
-UNIV_INLINE
-void
-btr_pcur_open_low(
-/*==============*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: level in the btree */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-#define btr_pcur_open(i,t,md,l,c,m) \
- btr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m)
-/**************************************************************//**
-Opens an persistent cursor to an index tree without initializing the
-cursor. */
-UNIV_INLINE
-dberr_t
-btr_pcur_open_with_no_init_func(
-/*============================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page of the
- record! */
- ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that if has_search_latch != 0 then
- we maybe do not acquire a latch on the cursor
- page, but assume that the caller uses his
- btr search latch to protect the record! */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \
- btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
-
-/*****************************************************************//**
-Opens a persistent cursor at either end of an index. */
-UNIV_INLINE
-dberr_t
-btr_pcur_open_at_index_side(
-/*========================*/
- bool from_left, /*!< in: true if open to the low end,
- false if to the high end */
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: latch mode */
- btr_pcur_t* pcur, /*!< in/out: cursor */
- bool init_pcur, /*!< in: whether to initialize pcur */
- ulint level, /*!< in: level to search for
- (0=leaf) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Gets the up_match value for a pcur after a search.
-@return number of matched fields at the cursor or to the right if
-search mode was PAGE_CUR_GE, otherwise undefined */
-UNIV_INLINE
-ulint
-btr_pcur_get_up_match(
-/*==================*/
- const btr_pcur_t* cursor); /*!< in: persistent cursor */
-/**************************************************************//**
-Gets the low_match value for a pcur after a search.
-@return number of matched fields at the cursor or to the right if
-search mode was PAGE_CUR_LE, otherwise undefined */
-UNIV_INLINE
-ulint
-btr_pcur_get_low_match(
-/*===================*/
- const btr_pcur_t* cursor); /*!< in: persistent cursor */
-/**************************************************************//**
-If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
-user record satisfying the search condition, in the case PAGE_CUR_L or
-PAGE_CUR_LE, on the last user record. If no such user record exists, then
-in the first case sets the cursor after last in tree, and in the latter case
-before first in tree. The latching mode must be BTR_SEARCH_LEAF or
-BTR_MODIFY_LEAF. */
-UNIV_INTERN
-void
-btr_pcur_open_on_user_rec_func(
-/*===========================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
- BTR_MODIFY_LEAF */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent
- cursor */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-#define btr_pcur_open_on_user_rec(i,t,md,l,c,m) \
- btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
-/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INLINE
-void
-btr_pcur_open_at_rnd_pos_func(
-/*==========================*/
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-#define btr_pcur_open_at_rnd_pos(i,l,c,m) \
- btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
-/**************************************************************//**
-Frees the possible memory heap of a persistent cursor and sets the latch
-mode of the persistent cursor to BTR_NO_LATCHES.
-WARNING: this function does not release the latch on the page where the
-cursor is currently positioned. The latch is acquired by the
-"move to next/previous" family of functions. Since recursive shared locks
-are not allowed, you must take care (if using the cursor in S-mode) to
-manually release the latch by either calling
-btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
-or by committing the mini-transaction right after btr_pcur_close().
-A subsequent attempt to crawl the same page in the same mtr would cause
-an assertion failure. */
-UNIV_INLINE
-void
-btr_pcur_close(
-/*===========*/
- btr_pcur_t* cursor); /*!< in: persistent cursor */
-/**************************************************************//**
-The position of the cursor is stored by taking an initial segment of the
-record the cursor is positioned on, before, or after, and copying it to the
-cursor data structure, or just setting a flag if the cursor id before the
-first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the
-page where the cursor is positioned must not be empty if the index tree is
-not totally empty! */
-UNIV_INTERN
-void
-btr_pcur_store_position(
-/*====================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/**************************************************************//**
-Restores the stored position of a persistent cursor bufferfixing the page and
-obtaining the specified latches. If the cursor position was saved when the
-(1) cursor was positioned on a user record: this function restores the position
-to the last record LESS OR EQUAL to the stored record;
-(2) cursor was positioned on a page infimum record: restores the position to
-the last record LESS than the user record which was the successor of the page
-infimum;
-(3) cursor was positioned on the page supremum: restores to the first record
-GREATER than the user record which was the predecessor of the supremum.
-(4) cursor was positioned before the first or after the last in an empty tree:
-restores to before first or after the last in the tree.
-@return TRUE if the cursor position was stored when it was on a user
-record and it can be restored on a user record whose ordering fields
-are identical to the ones of the original user record */
-UNIV_INTERN
-ibool
-btr_pcur_restore_position_func(
-/*===========================*/
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: detached persistent cursor */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mtr */
-#define btr_pcur_restore_position(l,cur,mtr) \
- btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
-/*********************************************************//**
-Gets the rel_pos field for a cursor whose position has been stored.
-@return BTR_PCUR_ON, ... */
-UNIV_INLINE
-ulint
-btr_pcur_get_rel_pos(
-/*=================*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/**************************************************************//**
-Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
-that is, the cursor becomes detached.
-Function btr_pcur_store_position should be used before calling this,
-if restoration of cursor is wanted later. */
-UNIV_INLINE
-void
-btr_pcur_commit_specify_mtr(
-/*========================*/
- btr_pcur_t* pcur, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr to commit */
-/*********************************************************//**
-Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'.
-@return TRUE if the cursor was not after last in tree */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the previous record in the tree. If no records
-are left, the cursor stays 'before first in tree'.
-@return TRUE if the cursor was not before first in tree */
-UNIV_INTERN
-ibool
-btr_pcur_move_to_prev(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the last record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_last_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'.
-@return TRUE if the cursor moved forward, ending on a user record */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next_user_rec(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the first record on the next page.
-Releases the latch on the current page, and bufferunfixes it.
-Note that there must not be modifications on the current page,
-as then the x-latch can be released only in mtr_commit. */
-UNIV_INTERN
-void
-btr_pcur_move_to_next_page(
-/*=======================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; must be on the
- last record of the current page */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor backward if it is on the first record
-of the page. Releases the latch on the current page, and bufferunfixes
-it. Note that to prevent a possible deadlock, the operation first
-stores the position of the cursor, releases the leaf latch, acquires
-necessary latches and restores the cursor position again before returning.
-The alphabetical position of the cursor is guaranteed to be sensible
-on return, but it may happen that the cursor is not positioned on the
-last record of any page, because the structure of the tree may have
-changed while the cursor had no latches. */
-UNIV_INTERN
-void
-btr_pcur_move_backward_from_page(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor, must be on the
- first record of the current page */
- mtr_t* mtr); /*!< in: mtr */
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Returns the btr cursor component of a persistent cursor.
-@return pointer to btr cursor component */
-UNIV_INLINE
-btr_cur_t*
-btr_pcur_get_btr_cur(
-/*=================*/
- const btr_pcur_t* cursor); /*!< in: persistent cursor */
-/*********************************************************//**
-Returns the page cursor component of a persistent cursor.
-@return pointer to page cursor component */
-UNIV_INLINE
-page_cur_t*
-btr_pcur_get_page_cur(
-/*==================*/
- const btr_pcur_t* cursor); /*!< in: persistent cursor */
-/*********************************************************//**
-Returns the page of a persistent cursor.
-@return pointer to the page */
-UNIV_INLINE
-page_t*
-btr_pcur_get_page(
-/*==============*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Returns the buffer block of a persistent cursor.
-@return pointer to the block */
-UNIV_INLINE
-buf_block_t*
-btr_pcur_get_block(
-/*===============*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Returns the record of a persistent cursor.
-@return pointer to the record */
-UNIV_INLINE
-rec_t*
-btr_pcur_get_rec(
-/*=============*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-#else /* UNIV_DEBUG */
-# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
-# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
-# define btr_pcur_get_page(cursor) ((cursor)->btr_cur.page_cur.block->frame)
-# define btr_pcur_get_block(cursor) ((cursor)->btr_cur.page_cur.block)
-# define btr_pcur_get_rec(cursor) ((cursor)->btr_cur.page_cur.rec)
-#endif /* UNIV_DEBUG */
-/*********************************************************//**
-Checks if the persistent cursor is on a user record. */
-UNIV_INLINE
-ibool
-btr_pcur_is_on_user_rec(
-/*====================*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Checks if the persistent cursor is after the last user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_on_page(
-/*===========================*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Checks if the persistent cursor is before the first user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_on_page(
-/*=============================*/
- const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Checks if the persistent cursor is before the first user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Checks if the persistent cursor is after the last user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************//**
-Moves the persistent cursor to the next record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_next_on_page(
-/*==========================*/
- btr_pcur_t* cursor);/*!< in/out: persistent cursor */
-/*********************************************************//**
-Moves the persistent cursor to the previous record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_prev_on_page(
-/*==========================*/
- btr_pcur_t* cursor);/*!< in/out: persistent cursor */
-/*********************************************************//**
-Moves the persistent cursor to the infimum record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_before_first_on_page(
-/*===============================*/
- btr_pcur_t* cursor); /*!< in/out: persistent cursor */
-
-/** Position state of persistent B-tree cursor. */
-enum pcur_pos_t {
- /** The persistent cursor is not positioned. */
- BTR_PCUR_NOT_POSITIONED = 0,
- /** The persistent cursor was previously positioned.
- TODO: currently, the state can be BTR_PCUR_IS_POSITIONED,
- though it really should be BTR_PCUR_WAS_POSITIONED,
- because we have no obligation to commit the cursor with
- mtr; similarly latch_mode may be out of date. This can
- lead to problems if btr_pcur is not used the right way;
- all current code should be ok. */
- BTR_PCUR_WAS_POSITIONED,
- /** The persistent cursor is positioned by optimistic get to the same
- record as it was positioned at. Not used for rel_pos == BTR_PCUR_ON.
- It may need adjustment depending on previous/current search direction
- and rel_pos. */
- BTR_PCUR_IS_POSITIONED_OPTIMISTIC,
- /** The persistent cursor is positioned by index search.
- Or optimistic get for rel_pos == BTR_PCUR_ON. */
- BTR_PCUR_IS_POSITIONED
-};
-
-/* The persistent B-tree cursor structure. This is used mainly for SQL
-selects, updates, and deletes. */
-
-struct btr_pcur_t{
- btr_cur_t btr_cur; /*!< a B-tree cursor */
- ulint latch_mode; /*!< see TODO note below!
- BTR_SEARCH_LEAF, BTR_MODIFY_LEAF,
- BTR_MODIFY_TREE, or BTR_NO_LATCHES,
- depending on the latching state of
- the page and tree where the cursor is
- positioned; BTR_NO_LATCHES means that
- the cursor is not currently positioned:
- we say then that the cursor is
- detached; it can be restored to
- attached if the old position was
- stored in old_rec */
- ulint old_stored; /*!< BTR_PCUR_OLD_STORED
- or BTR_PCUR_OLD_NOT_STORED */
- rec_t* old_rec; /*!< if cursor position is stored,
- contains an initial segment of the
- latest record cursor was positioned
- either on, before, or after */
- ulint old_n_fields; /*!< number of fields in old_rec */
- ulint rel_pos; /*!< BTR_PCUR_ON, BTR_PCUR_BEFORE, or
- BTR_PCUR_AFTER, depending on whether
- cursor was on, before, or after the
- old_rec record */
- buf_block_t* block_when_stored;/* buffer block when the position was
- stored */
- ib_uint64_t modify_clock; /*!< the modify clock value of the
- buffer block when the cursor position
- was stored */
- enum pcur_pos_t pos_state; /*!< btr_pcur_store_position() and
- btr_pcur_restore_position() state. */
- ulint search_mode; /*!< PAGE_CUR_G, ... */
- trx_t* trx_if_known; /*!< the transaction, if we know it;
- otherwise this field is not defined;
- can ONLY BE USED in error prints in
- fatal assertion failures! */
- /*-----------------------------*/
- /* NOTE that the following fields may possess dynamically allocated
- memory which should be freed if not needed anymore! */
-
- byte* old_rec_buf; /*!< NULL, or a dynamically allocated
- buffer for old_rec */
- ulint buf_size; /*!< old_rec_buf size if old_rec_buf
- is not NULL */
-};
-
-#define BTR_PCUR_OLD_STORED 908467085
-#define BTR_PCUR_OLD_NOT_STORED 122766467
-
-#ifndef UNIV_NONINL
-#include "btr0pcur.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic
deleted file mode 100644
index 1cd13824542..00000000000
--- a/storage/xtradb/include/btr0pcur.ic
+++ /dev/null
@@ -1,612 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/btr0pcur.ic
-The index tree persistent cursor
-
-Created 2/23/1996 Heikki Tuuri
-*******************************************************/
-
-
-/*********************************************************//**
-Gets the rel_pos field for a cursor whose position has been stored.
-@return BTR_PCUR_ON, ... */
-UNIV_INLINE
-ulint
-btr_pcur_get_rel_pos(
-/*=================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor);
- ut_ad(cursor->old_rec);
- ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED);
- ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED
- || cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- return(cursor->rel_pos);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Returns the btr cursor component of a persistent cursor.
-@return pointer to btr cursor component */
-UNIV_INLINE
-btr_cur_t*
-btr_pcur_get_btr_cur(
-/*=================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- const btr_cur_t* btr_cur = &cursor->btr_cur;
- return((btr_cur_t*) btr_cur);
-}
-
-/*********************************************************//**
-Returns the page cursor component of a persistent cursor.
-@return pointer to page cursor component */
-UNIV_INLINE
-page_cur_t*
-btr_pcur_get_page_cur(
-/*==================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
-}
-
-/*********************************************************//**
-Returns the page of a persistent cursor.
-@return pointer to the page */
-UNIV_INLINE
-page_t*
-btr_pcur_get_page(
-/*==============*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor)));
-}
-
-/*********************************************************//**
-Returns the buffer block of a persistent cursor.
-@return pointer to the block */
-UNIV_INLINE
-buf_block_t*
-btr_pcur_get_block(
-/*===============*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
-
- return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor)));
-}
-
-/*********************************************************//**
-Returns the record of a persistent cursor.
-@return pointer to the record */
-UNIV_INLINE
-rec_t*
-btr_pcur_get_rec(
-/*=============*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
-}
-#endif /* UNIV_DEBUG */
-
-/**************************************************************//**
-Gets the up_match value for a pcur after a search.
-@return number of matched fields at the cursor or to the right if
-search mode was PAGE_CUR_GE, otherwise undefined */
-UNIV_INLINE
-ulint
-btr_pcur_get_up_match(
-/*==================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- const btr_cur_t* btr_cursor;
-
- ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
- || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- ut_ad(btr_cursor->up_match != ULINT_UNDEFINED);
-
- return(btr_cursor->up_match);
-}
-
-/**************************************************************//**
-Gets the low_match value for a pcur after a search.
-@return number of matched fields at the cursor or to the right if
-search mode was PAGE_CUR_LE, otherwise undefined */
-UNIV_INLINE
-ulint
-btr_pcur_get_low_match(
-/*===================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- const btr_cur_t* btr_cursor;
-
- ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
- || (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
- ut_ad(btr_cursor->low_match != ULINT_UNDEFINED);
-
- return(btr_cursor->low_match);
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is after the last user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_on_page(
-/*===========================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is before the first user record on
-a page. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_on_page(
-/*=============================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is on a user record. */
-UNIV_INLINE
-ibool
-btr_pcur_is_on_user_rec(
-/*====================*/
- const btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (btr_pcur_is_before_first_on_page(cursor)
- || btr_pcur_is_after_last_on_page(cursor)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is before the first user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Checks if the persistent cursor is after the last user record in
-the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the next record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_next_on_page(
-/*==========================*/
- btr_pcur_t* cursor) /*!< in/out: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_move_to_next(btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the previous record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_prev_on_page(
-/*==========================*/
- btr_pcur_t* cursor) /*!< in/out: persistent cursor */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_move_to_prev(btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the last record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_to_last_on_page(
-/*==========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
-{
- UT_NOT_USED(mtr);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_set_after_last(btr_pcur_get_block(cursor),
- btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the next user record in the tree. If no user
-records are left, the cursor ends up 'after last in tree'.
-@return TRUE if the cursor moved forward, ending on a user record */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next_user_rec(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-loop:
- if (btr_pcur_is_after_last_on_page(cursor)) {
-
- if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_to_next_page(cursor, mtr);
- } else {
- btr_pcur_move_to_next_on_page(cursor);
- }
-
- if (btr_pcur_is_on_user_rec(cursor)) {
-
- return(TRUE);
- }
-
- goto loop;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the next record in the tree. If no records are
-left, the cursor stays 'after last in tree'.
-@return TRUE if the cursor was not after last in tree */
-UNIV_INLINE
-ibool
-btr_pcur_move_to_next(
-/*==================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor; NOTE that the
- function may release the page latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- if (btr_pcur_is_after_last_on_page(cursor)) {
-
- if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
- return(FALSE);
- }
-
- btr_pcur_move_to_next_page(cursor, mtr);
-
- return(TRUE);
- }
-
- btr_pcur_move_to_next_on_page(cursor);
-
- return(TRUE);
-}
-
-/**************************************************************//**
-Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
-that is, the cursor becomes detached.
-Function btr_pcur_store_position should be used before calling this,
-if restoration of cursor is wanted later. */
-UNIV_INLINE
-void
-btr_pcur_commit_specify_mtr(
-/*========================*/
- btr_pcur_t* pcur, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr to commit */
-{
- ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
- pcur->latch_mode = BTR_NO_LATCHES;
-
- mtr_commit(mtr);
-
- pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/**************************************************************//**
-Sets the old_rec_buf field to NULL. */
-UNIV_INLINE
-void
-btr_pcur_init(
-/*==========*/
- btr_pcur_t* pcur) /*!< in: persistent cursor */
-{
- pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
- pcur->old_rec_buf = NULL;
- pcur->old_rec = NULL;
-}
-
-/**************************************************************//**
-Initializes and opens a persistent cursor to an index tree. It should be
-closed with btr_pcur_close. */
-UNIV_INLINE
-void
-btr_pcur_open_low(
-/*==============*/
- dict_index_t* index, /*!< in: index */
- ulint level, /*!< in: level in the btree */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page from the
- record! */
- ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_cur_t* btr_cursor;
-
- /* Initialize the cursor */
-
- btr_pcur_init(cursor);
-
- cursor->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
- cursor->search_mode = mode;
-
- /* Search with the tree cursor */
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode,
- btr_cursor, 0, file, line, mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-
- cursor->trx_if_known = NULL;
-}
-
-/**************************************************************//**
-Opens an persistent cursor to an index tree without initializing the
-cursor. */
-UNIV_INLINE
-dberr_t
-btr_pcur_open_with_no_init_func(
-/*============================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* tuple, /*!< in: tuple on which search done */
- ulint mode, /*!< in: PAGE_CUR_L, ...;
- NOTE that if the search is made using a unique
- prefix of a record, mode should be
- PAGE_CUR_LE, not PAGE_CUR_GE, as the latter
- may end up on the previous page of the
- record! */
- ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that if has_search_latch != 0 then
- we maybe do not acquire a latch on the cursor
- page, but assume that the caller uses his
- btr search latch to protect the record! */
- btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, or 0 */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- btr_cur_t* btr_cursor;
- dberr_t err = DB_SUCCESS;
-
- cursor->latch_mode = latch_mode;
- cursor->search_mode = mode;
-
- /* Search with the tree cursor */
-
- btr_cursor = btr_pcur_get_btr_cur(cursor);
-
- err = btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
- btr_cursor, has_search_latch,
- file, line, mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->trx_if_known = NULL;
- return err;
-}
-
-/*****************************************************************//**
-Opens a persistent cursor at either end of an index. */
-UNIV_INLINE
-dberr_t
-btr_pcur_open_at_index_side(
-/*========================*/
- bool from_left, /*!< in: true if open to the low end,
- false if to the high end */
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: latch mode */
- btr_pcur_t* pcur, /*!< in/out: cursor */
- bool init_pcur, /*!< in: whether to initialize pcur */
- ulint level, /*!< in: level to search for
- (0=leaf) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- dberr_t err = DB_SUCCESS;
-
- pcur->latch_mode = BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode);
-
- pcur->search_mode = from_left ? PAGE_CUR_G : PAGE_CUR_L;
-
- if (init_pcur) {
- btr_pcur_init(pcur);
- }
-
- err = btr_cur_open_at_index_side(from_left, index, latch_mode,
- btr_pcur_get_btr_cur(pcur), level, mtr);
- pcur->pos_state = BTR_PCUR_IS_POSITIONED;
-
- pcur->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- pcur->trx_if_known = NULL;
-
- return (err);
-}
-
-/**********************************************************************//**
-Positions a cursor at a randomly chosen position within a B-tree. */
-UNIV_INLINE
-void
-btr_pcur_open_at_rnd_pos_func(
-/*==========================*/
- dict_index_t* index, /*!< in: index */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_pcur_t* cursor, /*!< in/out: B-tree pcur */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr */
-{
- /* Initialize the cursor */
-
- cursor->latch_mode = latch_mode;
- cursor->search_mode = PAGE_CUR_G;
-
- btr_pcur_init(cursor);
-
- btr_cur_open_at_rnd_pos_func(index, latch_mode,
- btr_pcur_get_btr_cur(cursor),
- file, line, mtr);
- cursor->pos_state = BTR_PCUR_IS_POSITIONED;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->trx_if_known = NULL;
-}
-
-/**************************************************************//**
-Frees the possible memory heap of a persistent cursor and sets the latch
-mode of the persistent cursor to BTR_NO_LATCHES.
-WARNING: this function does not release the latch on the page where the
-cursor is currently positioned. The latch is acquired by the
-"move to next/previous" family of functions. Since recursive shared locks
-are not allowed, you must take care (if using the cursor in S-mode) to
-manually release the latch by either calling
-btr_leaf_page_release(btr_pcur_get_block(&pcur), pcur.latch_mode, mtr)
-or by committing the mini-transaction right after btr_pcur_close().
-A subsequent attempt to crawl the same page in the same mtr would cause
-an assertion failure. */
-UNIV_INLINE
-void
-btr_pcur_close(
-/*===========*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- if (cursor->old_rec_buf != NULL) {
-
- mem_free(cursor->old_rec_buf);
-
- cursor->old_rec = NULL;
- cursor->old_rec_buf = NULL;
- }
-
- cursor->btr_cur.page_cur.rec = NULL;
- cursor->btr_cur.page_cur.block = NULL;
- cursor->old_rec = NULL;
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-
- cursor->latch_mode = BTR_NO_LATCHES;
- cursor->pos_state = BTR_PCUR_NOT_POSITIONED;
-
- cursor->trx_if_known = NULL;
-}
-
-/*********************************************************//**
-Moves the persistent cursor to the infimum record on the same page. */
-UNIV_INLINE
-void
-btr_pcur_move_before_first_on_page(
-/*===============================*/
- btr_pcur_t* cursor) /*!< in/out: persistent cursor */
-{
- ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
-
- page_cur_set_before_first(btr_pcur_get_block(cursor),
- btr_pcur_get_page_cur(cursor));
-
- cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
-}
diff --git a/storage/xtradb/include/btr0scrub.h b/storage/xtradb/include/btr0scrub.h
deleted file mode 100644
index 608266c206d..00000000000
--- a/storage/xtradb/include/btr0scrub.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// Copyright 2014 Google
-
-#ifndef btr0scrub_h
-#define btr0scrub_h
-
-#include "univ.i"
-
-#include "dict0dict.h"
-#include "data0data.h"
-#include "page0cur.h"
-#include "mtr0mtr.h"
-#include "btr0types.h"
-
-/**
- * enum describing page allocation status
- */
-enum btr_scrub_page_allocation_status_t {
- BTR_SCRUB_PAGE_FREE,
- BTR_SCRUB_PAGE_ALLOCATED,
- BTR_SCRUB_PAGE_ALLOCATION_UNKNOWN
-};
-
-/**
-* constants returned by btr_page_needs_scrubbing & btr_scrub_recheck_page
-*/
-#define BTR_SCRUB_PAGE 1 /* page should be scrubbed */
-#define BTR_SCRUB_SKIP_PAGE 2 /* no scrub & no action */
-#define BTR_SCRUB_SKIP_PAGE_AND_CLOSE_TABLE 3 /* no scrub & close table */
-#define BTR_SCRUB_SKIP_PAGE_AND_COMPLETE_SPACE 4 /* no scrub & complete space */
-#define BTR_SCRUB_TURNED_OFF 5 /* we detected that scrubbing
- was disabled by global
- variable */
-
-/**************************************************************//**
-struct for keeping scrub statistics. */
-struct btr_scrub_stat_t {
- /* page reorganizations */
- ulint page_reorganizations;
- /* page splits */
- ulint page_splits;
- /* scrub failures */
- ulint page_split_failures_underflow;
- ulint page_split_failures_out_of_filespace;
- ulint page_split_failures_missing_index;
- ulint page_split_failures_unknown;
-};
-
-/**************************************************************//**
-struct for thread local scrub state. */
-struct btr_scrub_t {
-
- /* current space */
- ulint space;
-
- /* is scrubbing enabled for this space */
- bool scrubbing;
-
- /* is current space compressed */
- bool compressed;
-
- dict_table_t* current_table;
- dict_index_t* current_index;
- /* savepoint for X_LATCH of block */
- ulint savepoint;
-
- /* statistic counters */
- btr_scrub_stat_t scrub_stat;
-};
-
-/*********************************************************************
-Init scrub global variables */
-UNIV_INTERN
-void
-btr_scrub_init();
-
-/*********************************************************************
-Cleanup scrub globals */
-UNIV_INTERN
-void
-btr_scrub_cleanup();
-
-/***********************************************************************
-Return crypt statistics */
-UNIV_INTERN
-void
-btr_scrub_total_stat(
-/*==================*/
- btr_scrub_stat_t *stat); /*!< out: stats to update */
-
-/**************************************************************//**
-Check if a page needs scrubbing
-* @return BTR_SCRUB_PAGE if page should be scrubbed
-* else btr_scrub_skip_page should be called
-* with this return value (and without any latches held)
-*/
-UNIV_INTERN
-int
-btr_page_needs_scrubbing(
-/*=====================*/
- btr_scrub_t* scrub_data, /*!< in: scrub data */
- buf_block_t* block, /*!< in: block to check, latched */
- btr_scrub_page_allocation_status_t allocated); /*!< in: is block
- allocated, free or
- unknown */
-
-/****************************************************************
-Recheck if a page needs scrubbing, and if it does load appropriate
-table and index
-* @return BTR_SCRUB_PAGE if page should be scrubbed
-* else btr_scrub_skip_page should be called
-* with this return value (and without any latches held)
-*/
-UNIV_INTERN
-int
-btr_scrub_recheck_page(
-/*====================*/
- btr_scrub_t* scrub_data, /*!< inut: scrub data */
- buf_block_t* block, /*!< in: block */
- btr_scrub_page_allocation_status_t allocated, /*!< in: is block
- allocated or free */
- mtr_t* mtr); /*!< in: mtr */
-
-/****************************************************************
-Perform actual scrubbing of page */
-UNIV_INTERN
-int
-btr_scrub_page(
-/*============*/
- btr_scrub_t* scrub_data, /*!< in/out: scrub data */
- buf_block_t* block, /*!< in: block */
- btr_scrub_page_allocation_status_t allocated, /*!< in: is block
- allocated or free */
- mtr_t* mtr); /*!< in: mtr */
-
-/****************************************************************
-Perform cleanup needed for a page not needing scrubbing */
-UNIV_INTERN
-void
-btr_scrub_skip_page(
-/*============*/
- btr_scrub_t* scrub_data, /*!< in/out: scrub data */
- int needs_scrubbing); /*!< in: return value from
- btr_page_needs_scrubbing or
- btr_scrub_recheck_page which encodes what kind
- of cleanup is needed */
-
-/****************************************************************
-Start iterating a space
-* @return true if scrubbing is turned on */
-UNIV_INTERN
-bool
-btr_scrub_start_space(
-/*===================*/
- ulint space, /*!< in: space */
- btr_scrub_t* scrub_data); /*!< in/out: scrub data */
-
-/****************************************************************
-Complete iterating a space
-* @return true if space was scrubbed */
-UNIV_INTERN
-bool
-btr_scrub_complete_space(
-/*=====================*/
- btr_scrub_t* scrub_data); /*!< in/out: scrub data */
-
-#endif
diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h
deleted file mode 100644
index 66c27607013..00000000000
--- a/storage/xtradb/include/btr0sea.h
+++ /dev/null
@@ -1,356 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/btr0sea.h
-The index tree adaptive search
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#ifndef btr0sea_h
-#define btr0sea_h
-
-#include "univ.i"
-
-#include "rem0rec.h"
-#include "dict0dict.h"
-#include "btr0types.h"
-#include "mtr0mtr.h"
-#include "ha0ha.h"
-
-/*****************************************************************//**
-Creates and initializes the adaptive search system at a database start. */
-UNIV_INTERN
-void
-btr_search_sys_create(
-/*==================*/
- ulint hash_size); /*!< in: hash index hash table size */
-/*****************************************************************//**
-Frees the adaptive search system at a database shutdown. */
-UNIV_INTERN
-void
-btr_search_sys_free(void);
-/*=====================*/
-
-/********************************************************************//**
-Disable the adaptive hash search system and empty the index. */
-UNIV_INTERN
-void
-btr_search_disable(void);
-/*====================*/
-/********************************************************************//**
-Enable the adaptive hash search system. */
-UNIV_INTERN
-void
-btr_search_enable(void);
-/*====================*/
-
-/********************************************************************//**
-Returns search info for an index.
-@return search info; search mutex reserved */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
- dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull));
-/*****************************************************************//**
-Creates and initializes a search info struct.
-@return own: search info struct */
-UNIV_INTERN
-btr_search_t*
-btr_search_info_create(
-/*===================*/
- mem_heap_t* heap); /*!< in: heap where created */
-/*****************************************************************//**
-Returns the value of ref_count. The value is protected by
-the latch of the AHI partition corresponding to this index.
-@return ref_count value. */
-UNIV_INTERN
-ulint
-btr_search_info_get_ref_count(
-/*==========================*/
- btr_search_t* info, /*!< in: search info. */
- dict_index_t* index); /*!< in: index */
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INLINE
-void
-btr_search_info_update(
-/*===================*/
- dict_index_t* index, /*!< in: index of the cursor */
- btr_cur_t* cursor);/*!< in: cursor which was just positioned */
-/******************************************************************//**
-Tries to guess the right search position based on the hash search info
-of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts,
-and the function returns TRUE, then cursor->up_match and cursor->low_match
-both have sensible values.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-btr_search_guess_on_hash(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- btr_search_t* info, /*!< in: index search info */
- const dtuple_t* tuple, /*!< in: logical record */
- ulint mode, /*!< in: PAGE_CUR_L, ... */
- ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ... */
- btr_cur_t* cursor, /*!< out: tree cursor */
- ulint has_search_latch,/*!< in: latch mode the caller
- currently has on btr_search_latch:
- RW_S_LATCH, RW_X_LATCH, or 0 */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Moves or deletes hash entries for moved records. If new_page is already hashed,
-then the hash index for page, if any, is dropped. If new_page is not hashed,
-and page is hashed, then a new hash index is built to new_page with the same
-parameters as page (this often happens when a page is split). */
-UNIV_INTERN
-void
-btr_search_move_or_delete_hash_entries(
-/*===================================*/
- buf_block_t* new_block, /*!< in: records are copied
- to this page */
- buf_block_t* block, /*!< in: index page from which
- records were copied, and the
- copied records will be deleted
- from this page */
- dict_index_t* index); /*!< in: record descriptor */
-/********************************************************************//**
-Drops a page hash index. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_index(
-/*============================*/
- buf_block_t* block); /*!< in: block containing index page,
- s- or x-latched, or an index page
- for which we know that
- block->buf_fix_count == 0 */
-/********************************************************************//**
-Drops a possible page hash index when a page is evicted from the buffer pool
-or freed in a file segment. */
-UNIV_INTERN
-void
-btr_search_drop_page_hash_when_freed(
-/*=================================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no); /*!< in: page number */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_node_on_insert(
-/*==================================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is inserted on a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_insert(
-/*=============================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned to the
- place to insert using btr_cur_search_...,
- and the new record has been inserted next
- to the cursor */
-/********************************************************************//**
-Updates the page hash index when a single record is deleted from a page. */
-UNIV_INTERN
-void
-btr_search_update_hash_on_delete(
-/*=============================*/
- btr_cur_t* cursor);/*!< in: cursor which was positioned on the
- record to delete using btr_cur_search_...,
- the record is not yet deleted */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/********************************************************************//**
-Validates the search system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-btr_search_validate(void);
-/*======================*/
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-
-/********************************************************************//**
-Returns the adaptive hash index table for a given index key.
-@return the adaptive hash index table for a given index key */
-UNIV_INLINE
-hash_table_t*
-btr_search_get_hash_table(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((warn_unused_result));
-
-/********************************************************************//**
-Returns the adaptive hash index latch for a given index key.
-@return the adaptive hash index latch for a given index key */
-UNIV_INLINE
-prio_rw_lock_t*
-btr_search_get_latch(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((warn_unused_result));
-
-/*********************************************************************//**
-Returns the AHI partition number corresponding to a given index ID. */
-UNIV_INLINE
-ulint
-btr_search_get_key(
-/*===============*/
- index_id_t index_id) /*!< in: index ID */
- MY_ATTRIBUTE((pure,warn_unused_result));
-
-/*********************************************************************//**
-Initializes AHI-related fields in a newly created index. */
-UNIV_INLINE
-void
-btr_search_index_init(
-/*===============*/
- dict_index_t* index); /*!< in: index */
-
-/********************************************************************//**
-Latches all adaptive hash index latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_lock_all(void);
-/*========================*/
-
-/********************************************************************//**
-Unlatches all adaptive hash index latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_unlock_all(void);
-/*==========================*/
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Checks if the thread has locked all the adaptive hash index latches in the
-specified mode.
-
-@return true if all latches are locked by the current thread, false
-otherwise. */
-UNIV_INLINE
-bool
-btr_search_own_all(
-/*===============*/
- ulint lock_type)
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Checks if the thread owns any adaptive hash latches in either S or X mode.
-@return true if the thread owns at least one latch in any mode. */
-UNIV_INLINE
-bool
-btr_search_own_any(void)
-/*=====================*/
- MY_ATTRIBUTE((warn_unused_result));
-#endif
-
-/** The search info struct in an index */
-struct btr_search_t{
- ulint ref_count; /*!< Number of blocks in this index tree
- that have search index built
- i.e. block->index points to this index.
- Protected by btr_search_latch except
- when during initialization in
- btr_search_info_create(). */
-
- /* @{ The following fields are not protected by any latch.
- Unfortunately, this means that they must be aligned to
- the machine word, i.e., they cannot be turned into bit-fields. */
- buf_block_t* root_guess;/*!< the root page frame when it was last time
- fetched, or NULL */
- ulint hash_analysis; /*!< when this exceeds
- BTR_SEARCH_HASH_ANALYSIS, the hash
- analysis starts; this is reset if no
- success noticed */
- ibool last_hash_succ; /*!< TRUE if the last search would have
- succeeded, or did succeed, using the hash
- index; NOTE that the value here is not exact:
- it is not calculated for every search, and the
- calculation itself is not always accurate! */
- ulint n_hash_potential;
- /*!< number of consecutive searches
- which would have succeeded, or did succeed,
- using the hash index;
- the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */
- /* @} */
- /*---------------------- @{ */
- ulint n_fields; /*!< recommended prefix length for hash search:
- number of full fields */
- ulint n_bytes; /*!< recommended prefix: number of bytes in
- an incomplete field
- @see BTR_PAGE_MAX_REC_SIZE */
- ibool left_side; /*!< TRUE or FALSE, depending on whether
- the leftmost record of several records with
- the same prefix should be indexed in the
- hash index */
- /*---------------------- @} */
-#ifdef UNIV_SEARCH_PERF_STAT
- ulint n_hash_succ; /*!< number of successful hash searches thus
- far */
- ulint n_hash_fail; /*!< number of failed hash searches */
- ulint n_patt_succ; /*!< number of successful pattern searches thus
- far */
- ulint n_searches; /*!< number of searches */
-#endif /* UNIV_SEARCH_PERF_STAT */
-#ifdef UNIV_DEBUG
- ulint magic_n; /*!< magic number @see BTR_SEARCH_MAGIC_N */
-/** value of btr_search_t::magic_n, used in assertions */
-# define BTR_SEARCH_MAGIC_N 1112765
-#endif /* UNIV_DEBUG */
-};
-
-/** The hash index system */
-struct btr_search_sys_t{
- hash_table_t** hash_tables; /*!< the array of adaptive hash index
- tables, mapping dtuple_fold values to
- rec_t pointers on index pages */
-};
-
-/** The adaptive hash index */
-extern btr_search_sys_t* btr_search_sys;
-
-/** After change in n_fields or n_bytes in info, this many rounds are waited
-before starting the hash analysis again: this is to save CPU time when there
-is no hope in building a hash index. */
-#define BTR_SEARCH_HASH_ANALYSIS 17
-
-/** Limit of consecutive searches for trying a search shortcut on the search
-pattern */
-#define BTR_SEARCH_ON_PATTERN_LIMIT 3
-
-/** Limit of consecutive searches for trying a search shortcut using
-the hash index */
-#define BTR_SEARCH_ON_HASH_LIMIT 3
-
-/** We do this many searches before trying to keep the search latch
-over calls from MySQL. If we notice someone waiting for the latch, we
-again set this much timeout. This is to reduce contention. */
-#define BTR_SEA_TIMEOUT 10000
-
-#ifndef UNIV_NONINL
-#include "btr0sea.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/btr0sea.ic b/storage/xtradb/include/btr0sea.ic
deleted file mode 100644
index e963d8a8449..00000000000
--- a/storage/xtradb/include/btr0sea.ic
+++ /dev/null
@@ -1,210 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/btr0sea.ic
-The index tree adaptive search
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "dict0mem.h"
-#include "btr0cur.h"
-#include "buf0buf.h"
-
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INTERN
-void
-btr_search_info_update_slow(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor);/*!< in: cursor which was just positioned */
-
-/********************************************************************//**
-Returns search info for an index.
-@return search info; search mutex reserved */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(
-/*================*/
- dict_index_t* index) /*!< in: index */
-{
- return(index->search_info);
-}
-
-/*********************************************************************//**
-Updates the search info. */
-UNIV_INLINE
-void
-btr_search_info_update(
-/*===================*/
- dict_index_t* index, /*!< in: index of the cursor */
- btr_cur_t* cursor) /*!< in: cursor which was just positioned */
-{
- btr_search_t* info;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(btr_search_get_latch(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- info = btr_search_get_info(index);
-
- info->hash_analysis++;
-
- if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) {
-
- /* Do nothing */
-
- return;
-
- }
-
- ut_ad(cursor->flag != BTR_CUR_HASH);
-
- btr_search_info_update_slow(info, cursor);
-}
-
-/********************************************************************//**
-Returns the adaptive hash index table for a given index key.
-@return the adaptive hash index table for a given index key */
-UNIV_INLINE
-hash_table_t*
-btr_search_get_hash_table(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index->search_table);
-
- return(index->search_table);
-}
-
-/********************************************************************//**
-Returns the adaptive hash index latch for a given index key.
-@return the adaptive hash index latch for a given index key */
-UNIV_INLINE
-prio_rw_lock_t*
-btr_search_get_latch(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index->search_latch >= btr_search_latch_arr &&
- index->search_latch < btr_search_latch_arr +
- btr_search_index_num);
-
- return(index->search_latch);
-}
-
-/*********************************************************************//**
-Returns the AHI partition number corresponding to a given index ID. */
-UNIV_INLINE
-ulint
-btr_search_get_key(
-/*===============*/
- index_id_t index_id) /*!< in: index ID */
-{
- return(index_id % btr_search_index_num);
-}
-
-/*********************************************************************//**
-Initializes AHI-related fields in a newly created index. */
-UNIV_INLINE
-void
-btr_search_index_init(
-/*===============*/
- dict_index_t* index) /*!< in: index */
-{
- index->search_latch =
- &btr_search_latch_arr[btr_search_get_key(index->id)];
- index->search_table =
- btr_search_sys->hash_tables[btr_search_get_key(index->id)];
-}
-
-/********************************************************************//**
-Latches all adaptive hash index latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_lock_all(void)
-/*=======================*/
-{
- ulint i;
-
- for (i = 0; i < btr_search_index_num; i++) {
- rw_lock_x_lock(&btr_search_latch_arr[i]);
- }
-}
-
-/********************************************************************//**
-Unlatches all adaptive hash index latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_unlock_all(void)
-/*==========================*/
-{
- ulint i;
-
- for (i = 0; i < btr_search_index_num; i++) {
- rw_lock_x_unlock(&btr_search_latch_arr[i]);
- }
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Checks if the thread has locked all the adaptive hash index latches in the
-specified mode.
-
-@return true if all latches are locked by the current thread, false
-otherwise. */
-UNIV_INLINE
-bool
-btr_search_own_all(
-/*===============*/
- ulint lock_type)
-{
- ulint i;
-
- for (i = 0; i < btr_search_index_num; i++) {
- if (!rw_lock_own(&btr_search_latch_arr[i], lock_type)) {
- return(false);
- }
- }
-
- return(true);
-}
-
-/********************************************************************//**
-Checks if the thread owns any adaptive hash latches in either S or X mode.
-@return true if the thread owns at least one latch in any mode. */
-UNIV_INLINE
-bool
-btr_search_own_any(void)
-/*====================*/
-{
- ulint i;
-
- for (i = 0; i < btr_search_index_num; i++) {
- if (rw_lock_own(&btr_search_latch_arr[i], RW_LOCK_SHARED) ||
- rw_lock_own(&btr_search_latch_arr[i], RW_LOCK_EX)) {
- return(true);
- }
- }
-
- return(false);
-}
-#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/xtradb/include/btr0types.h b/storage/xtradb/include/btr0types.h
deleted file mode 100644
index 4bc9c72eccc..00000000000
--- a/storage/xtradb/include/btr0types.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/btr0types.h
-The index tree general types
-
-Created 2/17/1996 Heikki Tuuri
-*************************************************************************/
-
-#ifndef btr0types_h
-#define btr0types_h
-
-#include "univ.i"
-
-#include "rem0types.h"
-#include "page0types.h"
-#include "sync0rw.h"
-
-/** Persistent cursor */
-struct btr_pcur_t;
-/** B-tree cursor */
-struct btr_cur_t;
-/** B-tree search information for the adaptive hash index */
-struct btr_search_t;
-
-#ifndef UNIV_HOTBACKUP
-
-/** @brief The array of latches protecting the adaptive search partitions
-
-These latches protect the
-(1) hash index from the corresponding AHI partition;
-(2) columns of a record to which we have a pointer in the hash index;
-
-but do NOT protect:
-
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
-
-Bear in mind (3) and (4) when using the hash indexes.
-*/
-
-extern prio_rw_lock_t* btr_search_latch_arr;
-
-#endif /* UNIV_HOTBACKUP */
-
-/** Flag: has the search system been enabled?
-Protected by btr_search_latch. */
-extern char btr_search_enabled;
-
-/** Number of adaptive hash index partitions */
-extern ulint btr_search_index_num;
-
-#ifdef UNIV_BLOB_DEBUG
-# include "buf0types.h"
-/** An index->blobs entry for keeping track of off-page column references */
-struct btr_blob_dbg_t;
-
-/** Insert to index->blobs a reference to an off-page column.
-@param index the index tree
-@param b the reference
-@param ctx context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_insert(
-/*====================*/
- dict_index_t* index, /*!< in/out: index tree */
- const btr_blob_dbg_t* b, /*!< in: the reference */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-
-/** Remove from index->blobs a reference to an off-page column.
-@param index the index tree
-@param b the reference
-@param ctx context (for logging) */
-UNIV_INTERN
-void
-btr_blob_dbg_rbt_delete(
-/*====================*/
- dict_index_t* index, /*!< in/out: index tree */
- const btr_blob_dbg_t* b, /*!< in: the reference */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-
-/**************************************************************//**
-Add to index->blobs any references to off-page columns from a record.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add_rec(
-/*=================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: offsets */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Remove from index->blobs any references to off-page columns from a record.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove_rec(
-/*====================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in/out: index */
- const ulint* offsets,/*!< in: offsets */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Count and add to index->blobs any references to off-page columns
-from records on a page.
-@return number of references added */
-UNIV_INTERN
-ulint
-btr_blob_dbg_add(
-/*=============*/
- const page_t* page, /*!< in: rewritten page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Count and remove from index->blobs any references to off-page columns
-from records on a page.
-Used when reorganizing a page, before copying the records.
-@return number of references removed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_remove(
-/*================*/
- const page_t* page, /*!< in: b-tree page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Restore in index->blobs any references to off-page columns
-Used when page reorganize fails due to compressed page overflow. */
-UNIV_INTERN
-void
-btr_blob_dbg_restore(
-/*=================*/
- const page_t* npage, /*!< in: page that failed to compress */
- const page_t* page, /*!< in: copy of original page */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx) /*!< in: context (for logging) */
- MY_ATTRIBUTE((nonnull));
-
-/** Operation that processes the BLOB references of an index record
-@param[in] rec record on index page
-@param[in/out] index the index tree of the record
-@param[in] offsets rec_get_offsets(rec,index)
-@param[in] ctx context (for logging)
-@return number of BLOB references processed */
-typedef ulint (*btr_blob_dbg_op_f)
-(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx);
-
-/**************************************************************//**
-Count and process all references to off-page columns on a page.
-@return number of references processed */
-UNIV_INTERN
-ulint
-btr_blob_dbg_op(
-/*============*/
- const page_t* page, /*!< in: B-tree leaf page */
- const rec_t* rec, /*!< in: record to start from
- (NULL to process the whole page) */
- dict_index_t* index, /*!< in/out: index */
- const char* ctx, /*!< in: context (for logging) */
- const btr_blob_dbg_op_f op) /*!< in: operation on records */
- MY_ATTRIBUTE((nonnull(1,3,4,5)));
-#else /* UNIV_BLOB_DEBUG */
-# define btr_blob_dbg_add_rec(rec, index, offsets, ctx) ((void) 0)
-# define btr_blob_dbg_add(page, index, ctx) ((void) 0)
-# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx) ((void) 0)
-# define btr_blob_dbg_remove(page, index, ctx) ((void) 0)
-# define btr_blob_dbg_restore(npage, page, index, ctx) ((void) 0)
-# define btr_blob_dbg_op(page, rec, index, ctx, op) ((void) 0)
-#endif /* UNIV_BLOB_DEBUG */
-
-/** The size of a reference to data stored on a different page.
-The reference is stored at the end of the prefix of the field
-in the index record. */
-#define BTR_EXTERN_FIELD_REF_SIZE 20
-
-/** A BLOB field reference full of zero, for use in assertions and tests.
-Initially, BLOB field references are set to zero, in
-dtuple_convert_big_rec(). */
-extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE];
-
-#endif
diff --git a/storage/xtradb/include/buf0buddy.h b/storage/xtradb/include/buf0buddy.h
deleted file mode 100644
index 09768dda92f..00000000000
--- a/storage/xtradb/include/buf0buddy.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0buddy.h
-Binary buddy allocator for compressed pages
-
-Created December 2006 by Marko Makela
-*******************************************************/
-
-#ifndef buf0buddy_h
-#define buf0buddy_h
-
-#ifdef UNIV_MATERIALIZE
-# undef UNIV_INLINE
-# define UNIV_INLINE
-#endif
-
-#include "univ.i"
-#include "buf0types.h"
-
-/**********************************************************************//**
-Allocate a block. The thread calling this function must hold
-buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
-block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired.
-This function should only be used for allocating compressed page frames.
-@return allocated block, never NULL */
-UNIV_INLINE
-byte*
-buf_buddy_alloc(
-/*============*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
- the page resides */
- ulint size, /*!< in: compressed page size
- (between UNIV_ZIP_SIZE_MIN and
- UNIV_PAGE_SIZE) */
- ibool* lru) /*!< in: pointer to a variable
- that will be assigned TRUE if
- storage was allocated from the
- LRU list and buf_pool->LRU_list_mutex
- was temporarily released */
- MY_ATTRIBUTE((malloc, nonnull));
-
-/**********************************************************************//**
-Deallocate a block. */
-UNIV_INLINE
-void
-buf_buddy_free(
-/*===========*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
- the block resides */
- void* buf, /*!< in: block to be freed, must not
- be pointed to by the buffer pool */
- ulint size) /*!< in: block size,
- up to UNIV_PAGE_SIZE */
- MY_ATTRIBUTE((nonnull));
-
-#ifndef UNIV_NONINL
-# include "buf0buddy.ic"
-#endif
-
-#endif /* buf0buddy_h */
diff --git a/storage/xtradb/include/buf0buddy.ic b/storage/xtradb/include/buf0buddy.ic
deleted file mode 100644
index a5fb510dd19..00000000000
--- a/storage/xtradb/include/buf0buddy.ic
+++ /dev/null
@@ -1,142 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0buddy.ic
-Binary buddy allocator for compressed pages
-
-Created December 2006 by Marko Makela
-*******************************************************/
-
-#ifdef UNIV_MATERIALIZE
-# undef UNIV_INLINE
-# define UNIV_INLINE
-#endif
-
-#include "buf0buf.h"
-#include "buf0buddy.h"
-#include "ut0ut.h"
-#include "sync0sync.h"
-
-/**********************************************************************//**
-Allocate a block. The thread calling this function must hold
-buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
-block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired.
-@return allocated block, never NULL */
-UNIV_INTERN
-void*
-buf_buddy_alloc_low(
-/*================*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- ulint i, /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
- ibool* lru) /*!< in: pointer to a variable that
- will be assigned TRUE if storage was
- allocated from the LRU list and
- buf_pool->LRU_list_mutex was
- temporarily released */
- MY_ATTRIBUTE((malloc));
-
-/**********************************************************************//**
-Deallocate a block. */
-UNIV_INTERN
-void
-buf_buddy_free_low(
-/*===============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
- ulint i) /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Get the index of buf_pool->zip_free[] for a given block size.
-@return index of buf_pool->zip_free[], or BUF_BUDDY_SIZES */
-UNIV_INLINE
-ulint
-buf_buddy_get_slot(
-/*===============*/
- ulint size) /*!< in: block size */
-{
- ulint i;
- ulint s;
-
- ut_ad(size >= UNIV_ZIP_SIZE_MIN);
-
- for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
- }
-
- ut_ad(i <= BUF_BUDDY_SIZES);
- return(i);
-}
-
-/**********************************************************************//**
-Allocate a block. The thread calling this function must hold
-buf_pool->LRU_list_mutex and must not hold buf_pool->zip_mutex or any
-block->mutex. The buf_pool->LRU_list_mutex may be released and reacquired.
-This function should only be used for allocating compressed page frames.
-@return allocated block, never NULL */
-UNIV_INLINE
-byte*
-buf_buddy_alloc(
-/*============*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
- the page resides */
- ulint size, /*!< in: compressed page size
- (between UNIV_ZIP_SIZE_MIN and
- UNIV_PAGE_SIZE) */
- ibool* lru) /*!< in: pointer to a variable
- that will be assigned TRUE if
- storage was allocated from the
- LRU list and buf_pool->LRU_list_mutex
- was temporarily released */
-{
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(ut_is_2pow(size));
- ut_ad(size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(size <= UNIV_PAGE_SIZE);
-
- return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
- lru));
-}
-
-/**********************************************************************//**
-Deallocate a block. */
-UNIV_INLINE
-void
-buf_buddy_free(
-/*===========*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool in which
- the block resides */
- void* buf, /*!< in: block to be freed, must not
- be pointed to by the buffer pool */
- ulint size) /*!< in: block size,
- up to UNIV_PAGE_SIZE */
-{
- ut_ad(ut_is_2pow(size));
- ut_ad(size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(size <= UNIV_PAGE_SIZE);
-
- buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
-}
-
-#ifdef UNIV_MATERIALIZE
-# undef UNIV_INLINE
-# define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
deleted file mode 100644
index 1899165ace0..00000000000
--- a/storage/xtradb/include/buf0buf.h
+++ /dev/null
@@ -1,2353 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0buf.h
-The database buffer pool high-level routines
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0buf_h
-#define buf0buf_h
-
-#include "univ.i"
-#include "fil0fil.h"
-#include "mtr0types.h"
-#include "buf0types.h"
-#include "hash0hash.h"
-#include "ut0byte.h"
-#include "page0types.h"
-#ifndef UNIV_HOTBACKUP
-#include "ut0rbt.h"
-#include "os0proc.h"
-#include "log0log.h"
-
-/** @name Modes for buf_page_get_gen */
-/* @{ */
-#define BUF_GET 10 /*!< get always */
-#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
-#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
- the block young in the LRU list */
-#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
- set no latch; we have
- separated this case, because
- it is error-prone programming
- not to set a latch, and it
- should be used with care */
-#define BUF_GET_IF_IN_POOL_OR_WATCH 15
- /*!< Get the page only if it's in the
- buffer pool, if not then set a watch
- on the page. */
-#define BUF_GET_POSSIBLY_FREED 16
- /*!< Like BUF_GET, but do not mind
- if the file page has been freed. */
-/* @} */
-/** @name Modes for buf_page_get_known_nowait */
-/* @{ */
-#define BUF_MAKE_YOUNG 51 /*!< Move the block to the
- start of the LRU list if there
- is a danger that the block
- would drift out of the buffer
- pool*/
-#define BUF_KEEP_OLD 52 /*!< Preserve the current LRU
- position of the block. */
-/* @} */
-
-#define MAX_BUFFER_POOLS_BITS 6 /*!< Number of bits to representing
- a buffer pool ID */
-
-#define MAX_BUFFER_POOLS (1 << MAX_BUFFER_POOLS_BITS)
- /*!< The maximum number of buffer
- pools that can be defined */
-
-#define BUF_POOL_WATCH_SIZE (srv_n_purge_threads + 1)
- /*!< Maximum number of concurrent
- buffer pool watches */
-#define MAX_PAGE_HASH_LOCKS 1024 /*!< The maximum number of
- page_hash locks */
-
-extern buf_pool_t* buf_pool_ptr; /*!< The buffer pools
- of the database */
-#ifdef UNIV_DEBUG
-extern ibool buf_debug_prints;/*!< If this is set TRUE, the program
- prints info whenever read or flush
- occurs */
-#endif /* UNIV_DEBUG */
-extern ulint srv_buf_pool_instances;
-extern ulint srv_buf_pool_curr_size;
-#else /* !UNIV_HOTBACKUP */
-extern buf_block_t* back_block1; /*!< first block, for --apply-log */
-extern buf_block_t* back_block2; /*!< second block, for page reorganize */
-#endif /* !UNIV_HOTBACKUP */
-
-/** @brief States of a control block
-@see buf_page_t
-
-The enumeration values must be 0..7. */
-enum buf_page_state {
- BUF_BLOCK_POOL_WATCH, /*!< a sentinel for the buffer pool
- watch, element of buf_pool->watch[] */
- BUF_BLOCK_ZIP_PAGE, /*!< contains a clean
- compressed page */
- BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed
- page that is in the
- buf_pool->flush_list */
-
- BUF_BLOCK_NOT_USED, /*!< is in the free list;
- must be after the BUF_BLOCK_ZIP_
- constants for compressed-only pages
- @see buf_block_state_valid() */
- BUF_BLOCK_READY_FOR_USE, /*!< when buf_LRU_get_free_block
- returns a block, it is in this state */
- BUF_BLOCK_FILE_PAGE, /*!< contains a buffered file page */
- BUF_BLOCK_MEMORY, /*!< contains some main memory
- object */
- BUF_BLOCK_REMOVE_HASH /*!< hash index should be removed
- before putting to the free list */
-};
-
-
-/** This structure defines information we will fetch from each buffer pool. It
-will be used to print table IO stats */
-struct buf_pool_info_t{
- /* General buffer pool info */
- ulint pool_unique_id; /*!< Buffer Pool ID */
- ulint pool_size; /*!< Buffer Pool size in pages */
- ulint pool_size_bytes;
- ulint lru_len; /*!< Length of buf_pool->LRU */
- ulint old_lru_len; /*!< buf_pool->LRU_old_len */
- ulint free_list_len; /*!< Length of buf_pool->free list */
- ulint flush_list_len; /*!< Length of buf_pool->flush_list */
- ulint n_pend_unzip; /*!< buf_pool->n_pend_unzip, pages
- pending decompress */
- ulint n_pend_reads; /*!< buf_pool->n_pend_reads, pages
- pending read */
- ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */
- ulint n_pending_flush_single_page;/*!< Pages pending to be
- flushed as part of single page
- flushes issued by various user
- threads */
- ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH
- LIST */
- ulint n_pages_made_young; /*!< number of pages made young */
- ulint n_pages_not_made_young; /*!< number of pages not made young */
- ulint n_pages_read; /*!< buf_pool->n_pages_read */
- ulint n_pages_created; /*!< buf_pool->n_pages_created */
- ulint n_pages_written; /*!< buf_pool->n_pages_written */
- ulint n_page_gets; /*!< buf_pool->n_page_gets */
- ulint n_ra_pages_read_rnd; /*!< buf_pool->n_ra_pages_read_rnd,
- number of pages readahead */
- ulint n_ra_pages_read; /*!< buf_pool->n_ra_pages_read, number
- of pages readahead */
- ulint n_ra_pages_evicted; /*!< buf_pool->n_ra_pages_evicted,
- number of readahead pages evicted
- without access */
- ulint n_page_get_delta; /*!< num of buffer pool page gets since
- last printout */
-
- /* Buffer pool access stats */
- double page_made_young_rate; /*!< page made young rate in pages
- per second */
- double page_not_made_young_rate;/*!< page not made young rate
- in pages per second */
- double pages_read_rate; /*!< num of pages read per second */
- double pages_created_rate; /*!< num of pages create per second */
- double pages_written_rate; /*!< num of pages written per second */
- ulint page_read_delta; /*!< num of pages read since last
- printout */
- ulint young_making_delta; /*!< num of pages made young since
- last printout */
- ulint not_young_making_delta; /*!< num of pages not make young since
- last printout */
-
- /* Statistics about read ahead algorithm. */
- double pages_readahead_rnd_rate;/*!< random readahead rate in pages per
- second */
- double pages_readahead_rate; /*!< readahead rate in pages per
- second */
- double pages_evicted_rate; /*!< rate of readahead page evicted
- without access, in pages per second */
-
- /* Stats about LRU eviction */
- ulint unzip_lru_len; /*!< length of buf_pool->unzip_LRU
- list */
- /* Counters for LRU policy */
- ulint io_sum; /*!< buf_LRU_stat_sum.io */
- ulint io_cur; /*!< buf_LRU_stat_cur.io, num of IO
- for current interval */
- ulint unzip_sum; /*!< buf_LRU_stat_sum.unzip */
- ulint unzip_cur; /*!< buf_LRU_stat_cur.unzip, num
- pages decompressed in current
- interval */
-};
-
-/** The occupied bytes of lists in all buffer pools */
-struct buf_pools_list_size_t {
- ulint LRU_bytes; /*!< LRU size in bytes */
- ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */
- ulint flush_list_bytes; /*!< flush_list size in bytes */
-};
-
-#ifndef UNIV_HOTBACKUP
-
-/********************************************************************//**
-Creates the buffer pool.
-@return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
-UNIV_INTERN
-dberr_t
-buf_pool_init(
-/*=========*/
- ulint size, /*!< in: Size of the total pool in bytes */
- ulint n_instances); /*!< in: Number of instances */
-/********************************************************************//**
-Frees the buffer pool at shutdown. This must not be invoked before
-freeing all mutexes. */
-UNIV_INTERN
-void
-buf_pool_free(
-/*==========*/
- ulint n_instances); /*!< in: numbere of instances to free */
-
-/********************************************************************//**
-Clears the adaptive hash index on all pages in the buffer pool. */
-UNIV_INTERN
-void
-buf_pool_clear_hash_index(void);
-/*===========================*/
-
-/********************************************************************//**
-Relocate a buffer control block. Relocates the block on the LRU list
-and in buf_pool->page_hash. Does not relocate bpage->list.
-The caller must take care of relocating bpage->list. */
-UNIV_INTERN
-void
-buf_relocate(
-/*=========*/
- buf_page_t* bpage, /*!< in/out: control block being relocated;
- buf_page_get_state(bpage) must be
- BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
- buf_page_t* dpage); /*!< in/out: destination control block */
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void);
-/*========================*/
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in frames.
-@return size in pages */
-UNIV_INLINE
-ulint
-buf_pool_get_n_pages(void);
-/*=======================*/
-/********************************************************************//**
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-zero if all modified pages have been flushed to disk.
-@return oldest modification in pool, zero if none */
-UNIV_INTERN
-lsn_t
-buf_pool_get_oldest_modification(void);
-/*==================================*/
-
-/********************************************************************//**
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-zero if all modified pages have been flushed to disk.
-@return oldest modification in pool, zero if none */
-UNIV_INTERN
-lsn_t
-buf_pool_get_oldest_modification_peek(void);
-/*=======================================*/
-
-/********************************************************************//**
-Allocates a buf_page_t descriptor. This function must succeed. In case
-of failure we assert in this function. */
-UNIV_INLINE
-buf_page_t*
-buf_page_alloc_descriptor(void)
-/*===========================*/
- MY_ATTRIBUTE((malloc));
-/********************************************************************//**
-Free a buf_page_t descriptor. */
-UNIV_INLINE
-void
-buf_page_free_descriptor(
-/*=====================*/
- buf_page_t* bpage) /*!< in: bpage descriptor to free. */
- MY_ATTRIBUTE((nonnull));
-
-/********************************************************************//**
-Allocates a buffer block.
-@return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INTERN
-buf_block_t*
-buf_block_alloc(
-/*============*/
- buf_pool_t* buf_pool); /*!< in: buffer pool instance,
- or NULL for round-robin selection
- of the buffer pool */
-/********************************************************************//**
-Frees a buffer block which does not contain a file page. */
-UNIV_INLINE
-void
-buf_block_free(
-/*===========*/
- buf_block_t* block); /*!< in, own: block to be freed */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Copies contents of a buffer frame to a given buffer.
-@return buf */
-UNIV_INLINE
-byte*
-buf_frame_copy(
-/*===========*/
- byte* buf, /*!< in: buffer to copy to */
- const buf_frame_t* frame); /*!< in: buffer frame */
-#ifndef UNIV_HOTBACKUP
-/**************************************************************//**
-NOTE! The following macros should be used instead of buf_page_get_gen,
-to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
-in LA! */
-#define buf_page_get(SP, ZS, OF, LA, MTR) buf_page_get_gen(\
- SP, ZS, OF, LA, NULL,\
- BUF_GET, __FILE__, __LINE__, MTR)
-/**************************************************************//**
-Use these macros to bufferfix a page with no latching. Remember not to
-read the contents of the page unless you know it is safe. Do not modify
-the contents of the page! We have separated this case, because it is
-error-prone programming not to set a latch, and it should be used
-with care. */
-#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
- SP, ZS, OF, RW_NO_LATCH, NULL,\
- BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
-/********************************************************************//**
-This is the general function used to get optimistic access to a database
-page.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-buf_page_optimistic_get(
-/*====================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: guessed block */
- ib_uint64_t modify_clock,/*!< in: modify clock value */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-/********************************************************************//**
-This is used to get access to a known database page, when no waiting can be
-done.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-buf_page_get_known_nowait(
-/*======================*/
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
- buf_block_t* block, /*!< in: the known page */
- ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-
-/*******************************************************************//**
-Given a tablespace id and page number tries to get that page. If the
-page is not in the buffer pool it is not loaded and NULL is returned.
-Suitable for using when holding the lock_sys_t::mutex. */
-UNIV_INTERN
-buf_block_t*
-buf_page_try_get_func(
-/*==================*/
- ulint space_id,/*!< in: tablespace id */
- ulint page_no,/*!< in: page number */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
- bool possibly_freed, /*!< in: don't mind if page is freed */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr); /*!< in: mini-transaction */
-
-/** Tries to get a page. If the page is not in the buffer pool it is
-not loaded. Suitable for using when holding the lock_sys_t::mutex.
-@param space_id in: tablespace id
-@param page_no in: page number
-@param mtr in: mini-transaction
-@return the page if in buffer pool, NULL if not */
-#define buf_page_try_get(space_id, page_no, mtr) \
- buf_page_try_get_func(space_id, page_no, RW_S_LATCH, false, \
- __FILE__, __LINE__, mtr);
-
-/********************************************************************//**
-Get read access to a compressed page (usually of type
-FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
-The page must be released with buf_page_release_zip().
-NOTE: the page is not protected by any latch. Mutual exclusion has to
-be implemented at a higher level. In other words, all possible
-accesses to a given page through this function must be protected by
-the same set of mutexes or latches.
-@return pointer to the block, or NULL if not compressed */
-UNIV_INTERN
-buf_page_t*
-buf_page_get_zip(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-This is the general function used to get access to a database page.
-@return pointer to the block or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_page_get_gen(
-/*=============*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint offset, /*!< in: page number */
- ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
- buf_block_t* guess, /*!< in: guessed block or NULL */
- ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH or
- BUF_GET_IF_IN_POOL_OR_WATCH */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr, /*!< in: mini-transaction */
- dberr_t* err = NULL); /*!< out: error code */
-/********************************************************************//**
-Initializes a page to the buffer buf_pool. The page is usually not read
-from a file even if it cannot be found in the buffer buf_pool. This is one
-of the functions which perform to a block a state transition NOT_USED =>
-FILE_PAGE (the other is buf_page_get_gen).
-@return pointer to the block, page bufferfixed */
-UNIV_INTERN
-buf_block_t*
-buf_page_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space in units of
- a page */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. */
-UNIV_INTERN
-void
-buf_page_init_for_backup_restore(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space
- in units of a page */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- buf_block_t* block); /*!< in: block to init */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Releases a compressed-only page acquired with buf_page_get_zip(). */
-UNIV_INLINE
-void
-buf_page_release_zip(
-/*=================*/
- buf_page_t* bpage); /*!< in: buffer block */
-/********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release(
-/*=============*/
- buf_block_t* block, /*!< in: buffer block */
- ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
- RW_NO_LATCH */
-/********************************************************************//**
-Moves a page to the start of the buffer pool LRU list. This high-level
-function can be used to prevent an important page from slipping out of
-the buffer pool. */
-UNIV_INTERN
-void
-buf_page_make_young(
-/*================*/
- buf_page_t* bpage); /*!< in: buffer block of a file page */
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
-
-NOTE that it is possible that the page is not yet read from disk,
-though.
-
-@return TRUE if found in the page hash table */
-UNIV_INLINE
-ibool
-buf_page_peek(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
-/********************************************************************//**
-Sets file_page_was_freed TRUE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_set_file_page_was_freed(
-/*=============================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-/********************************************************************//**
-Sets file_page_was_freed FALSE if the page is found in the buffer pool.
-This function should be called when we free a file page and want the
-debug version to check that it is not accessed any more unless
-reallocated.
-@return control block if found in page hash table, otherwise NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_reset_file_page_was_freed(
-/*===============================*/
- ulint space, /*!< in: space id */
- ulint offset); /*!< in: page number */
-#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-ulint
-buf_page_get_freed_page_clock(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: block */
- MY_ATTRIBUTE((pure));
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-ulint
-buf_block_get_freed_page_clock(
-/*===========================*/
- const buf_block_t* block) /*!< in: block */
- MY_ATTRIBUTE((pure));
-
-/********************************************************************//**
-Tells if a block is still close enough to the MRU end of the LRU list
-meaning that it is not in danger of getting evicted and also implying
-that it has been accessed recently.
-Note that this is for heuristics only and does not reserve buffer pool
-mutex.
-@return TRUE if block is close to MRU end of LRU */
-UNIV_INLINE
-ibool
-buf_page_peek_if_young(
-/*===================*/
- const buf_page_t* bpage); /*!< in: block */
-/********************************************************************//**
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex.
-@return TRUE if should be made younger */
-UNIV_INLINE
-ibool
-buf_page_peek_if_too_old(
-/*=====================*/
- const buf_page_t* bpage); /*!< in: block to make younger */
-/********************************************************************//**
-Gets the youngest modification log sequence number for a frame.
-Returns zero if not file page or no modification occurred yet.
-@return newest modification to page */
-UNIV_INLINE
-lsn_t
-buf_page_get_newest_modification(
-/*=============================*/
- const buf_page_t* bpage); /*!< in: block containing the
- page frame */
-/********************************************************************//**
-Increments the modify clock of a frame by 1. The caller must (1) own the
-LRU list mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-void
-buf_block_modify_clock_inc(
-/*=======================*/
- buf_block_t* block); /*!< in: block */
-/********************************************************************//**
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block.
-@return value */
-UNIV_INLINE
-ib_uint64_t
-buf_block_get_modify_clock(
-/*=======================*/
- buf_block_t* block); /*!< in: block */
-/*******************************************************************//**
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_inc_func(
-/*=======================*/
-# ifdef UNIV_SYNC_DEBUG
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line */
-# endif /* UNIV_SYNC_DEBUG */
- buf_block_t* block) /*!< in/out: block to bufferfix */
- MY_ATTRIBUTE((nonnull));
-
-/*******************************************************************//**
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_fix(
-/*===========*/
- buf_block_t* block); /*!< in/out: block to bufferfix */
-
-/*******************************************************************//**
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_unfix(
-/*===========*/
- buf_block_t* block); /*!< in/out: block to bufferfix */
-
-# ifdef UNIV_SYNC_DEBUG
-/** Increments the bufferfix count.
-@param b in/out: block to bufferfix
-@param f in: file name where requested
-@param l in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
-# else /* UNIV_SYNC_DEBUG */
-/** Increments the bufferfix count.
-@param b in/out: block to bufferfix
-@param f in: file name where requested
-@param l in: line number where requested */
-# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
-# endif /* UNIV_SYNC_DEBUG */
-#else /* !UNIV_HOTBACKUP */
-# define buf_block_modify_clock_inc(block) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/** Checks if the page is in crc32 checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in crc32 checksum format */
-bool
-buf_page_is_checksum_valid_crc32(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Checks if the page is in innodb checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in innodb checksum format */
-bool
-buf_page_is_checksum_valid_innodb(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Checks if the page is in none checksum format.
-@param[in] read_buf database page
-@param[in] checksum_field1 new checksum field
-@param[in] checksum_field2 old checksum field
-@return true if the page is in none checksum format */
-bool
-buf_page_is_checksum_valid_none(
- const byte* read_buf,
- ulint checksum_field1,
- ulint checksum_field2)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Check if a page is corrupt.
-@param[in] check_lsn true if LSN should be checked
-@param[in] read_buf Page to be checked
-@param[in] zip_size compressed size or 0
-@param[in] space Pointer to tablespace
-@return true if corrupted, false if not */
-UNIV_INTERN
-bool
-buf_page_is_corrupted(
- bool check_lsn,
- const byte* read_buf,
- ulint zip_size,
- const fil_space_t* space)
- MY_ATTRIBUTE((warn_unused_result));
-/** Check if a page is all zeroes.
-@param[in] read_buf database page
-@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
-@return whether the page is all zeroes */
-UNIV_INTERN
-bool
-buf_page_is_zeroes(const byte* read_buf, ulint zip_size);
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Gets the space id, page offset, and byte offset within page of a
-pointer pointing to a buffer frame containing a file page. */
-UNIV_INLINE
-void
-buf_ptr_get_fsp_addr(
-/*=================*/
- const void* ptr, /*!< in: pointer to a buffer frame */
- ulint* space, /*!< out: space id */
- fil_addr_t* addr); /*!< out: page offset and byte offset */
-/**********************************************************************//**
-Gets the hash value of a block. This can be used in searches in the
-lock hash table.
-@return lock hash value */
-UNIV_INLINE
-ulint
-buf_block_get_lock_hash_val(
-/*========================*/
- const buf_block_t* block) /*!< in: block */
- MY_ATTRIBUTE((pure));
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Finds a block in the buffer pool that points to a
-given compressed page.
-@return buffer block pointing to the compressed page, or NULL */
-UNIV_INTERN
-buf_block_t*
-buf_pool_contains_zip(
-/*==================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- const void* data); /*!< in: pointer to compressed page */
-#endif /* UNIV_DEBUG */
-
-/***********************************************************************
-FIXME_FTS: Gets the frame the pointer is pointing to. */
-UNIV_INLINE
-buf_frame_t*
-buf_frame_align(
-/*============*/
- /* out: pointer to frame */
- byte* ptr); /* in: pointer to a frame */
-
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/*********************************************************************//**
-Validates the buffer pool data structure.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_validate(void);
-/*==============*/
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/*********************************************************************//**
-Prints info of the buffer pool data structure. */
-UNIV_INTERN
-void
-buf_print(void);
-/*============*/
-#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-enum buf_page_print_flags {
- /** Do not crash at the end of buf_page_print(). */
- BUF_PAGE_PRINT_NO_CRASH = 1,
- /** Do not print the full page dump. */
- BUF_PAGE_PRINT_NO_FULL = 2
-};
-
-/********************************************************************//**
-Prints a page to stderr. */
-UNIV_INTERN
-void
-buf_page_print(
-/*===========*/
- const byte* read_buf, /*!< in: a database page */
- ulint zip_size, /*!< in: compressed page size, or
- 0 for uncompressed pages */
- ulint flags) /*!< in: 0 or
- BUF_PAGE_PRINT_NO_CRASH or
- BUF_PAGE_PRINT_NO_FULL */
- UNIV_COLD;
-/********************************************************************//**
-Decompress a block.
-@return TRUE if successful */
-UNIV_INTERN
-ibool
-buf_zip_decompress(
-/*===============*/
- buf_block_t* block, /*!< in/out: block */
- ibool check); /*!< in: TRUE=verify the page checksum */
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Returns the number of latched pages in the buffer pool.
-@return number of latched pages */
-UNIV_INTERN
-ulint
-buf_get_latched_pages_number(void);
-/*==============================*/
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Returns the number of pending buf pool read ios.
-@return number of pending read I/O operations */
-UNIV_INTERN
-ulint
-buf_get_n_pending_read_ios(void);
-/*============================*/
-/*********************************************************************//**
-Prints info of the buffer i/o. */
-UNIV_INTERN
-void
-buf_print_io(
-/*=========*/
- FILE* file); /*!< in: file where to print */
-/*******************************************************************//**
-Collect buffer pool stats information for a buffer pool. Also
-record aggregated stats if there are more than one buffer pool
-in the server */
-UNIV_INTERN
-void
-buf_stats_get_pool_info(
-/*====================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool */
- ulint pool_id, /*!< in: buffer pool ID */
- buf_pool_info_t* all_pool_info); /*!< in/out: buffer pool info
- to fill */
-/*********************************************************************//**
-Returns the ratio in percents of modified pages in the buffer pool /
-database pages in the buffer pool.
-@return modified page percentage ratio */
-UNIV_INTERN
-double
-buf_get_modified_ratio_pct(void);
-/*============================*/
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-buf_refresh_io_stats(
-/*=================*/
- buf_pool_t* buf_pool); /*!< buffer pool instance */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-buf_refresh_io_stats_all(void);
-/*=================*/
-/*********************************************************************//**
-Asserts that all file pages in the buffer are in a replaceable state.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_all_freed(void);
-/*===============*/
-/*********************************************************************//**
-Checks that there currently are no pending i/o-operations for the buffer
-pool.
-@return number of pending i/o operations */
-UNIV_INTERN
-ulint
-buf_pool_check_no_pending_io(void);
-/*==============================*/
-/*********************************************************************//**
-Invalidates the file pages in the buffer pool when an archive recovery is
-completed. All the file pages buffered must be in a replaceable state when
-this function is called: not latched and not modified. */
-UNIV_INTERN
-void
-buf_pool_invalidate(void);
-/*=====================*/
-#endif /* !UNIV_HOTBACKUP */
-
-/*========================================================================
---------------------------- LOWER LEVEL ROUTINES -------------------------
-=========================================================================*/
-
-#ifdef UNIV_SYNC_DEBUG
-/*********************************************************************//**
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. */
-UNIV_INLINE
-void
-buf_block_dbg_add_level(
-/*====================*/
- buf_block_t* block, /*!< in: buffer page
- where we have acquired latch */
- ulint level); /*!< in: latching order level */
-#else /* UNIV_SYNC_DEBUG */
-# define buf_block_dbg_add_level(block, level) /* nothing */
-#endif /* UNIV_SYNC_DEBUG */
-/*********************************************************************//**
-Gets the state of a block.
-@return state */
-UNIV_INLINE
-enum buf_page_state
-buf_page_get_state(
-/*===============*/
- const buf_page_t* bpage); /*!< in: pointer to the control
- block */
-/*********************************************************************//**
-Gets the state name for state of a block
-@return name or "CORRUPTED" */
-UNIV_INLINE
-const char*
-buf_get_state_name(
-/*===============*/
- const buf_block_t* block); /*!< in: pointer to the control
- block */
-/*********************************************************************//**
-Gets the state of a block.
-@return state */
-UNIV_INLINE
-enum buf_page_state
-buf_block_get_state(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Sets the state of a block. */
-UNIV_INLINE
-void
-buf_page_set_state(
-/*===============*/
- buf_page_t* bpage, /*!< in/out: pointer to control block */
- enum buf_page_state state); /*!< in: state */
-/*********************************************************************//**
-Sets the state of a block. */
-UNIV_INLINE
-void
-buf_block_set_state(
-/*================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- enum buf_page_state state); /*!< in: state */
-/*********************************************************************//**
-Determines if a block is mapped to a tablespace.
-@return TRUE if mapped */
-UNIV_INLINE
-ibool
-buf_page_in_file(
-/*=============*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
- MY_ATTRIBUTE((pure));
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Determines if a block should be on unzip_LRU list.
-@return TRUE if block belongs to unzip_LRU */
-UNIV_INLINE
-ibool
-buf_page_belongs_to_unzip_LRU(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
- MY_ATTRIBUTE((pure));
-
-/*********************************************************************//**
-Gets the mutex of a block.
-@return pointer to mutex protecting bpage */
-UNIV_INLINE
-ib_mutex_t*
-buf_page_get_mutex(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
- MY_ATTRIBUTE((pure));
-
-/*********************************************************************//**
-Get the flush type of a page.
-@return flush type */
-UNIV_INLINE
-buf_flush_t
-buf_page_get_flush_type(
-/*====================*/
- const buf_page_t* bpage) /*!< in: buffer page */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Set the flush type of a page. */
-UNIV_INLINE
-void
-buf_page_set_flush_type(
-/*====================*/
- buf_page_t* bpage, /*!< in: buffer page */
- buf_flush_t flush_type); /*!< in: flush type */
-/*********************************************************************//**
-Map a block to a file page. */
-UNIV_INLINE
-void
-buf_block_set_file_page(
-/*====================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- ulint space, /*!< in: tablespace id */
- ulint page_no);/*!< in: page number */
-/*********************************************************************//**
-Gets the io_fix state of a block.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_page_get_io_fix(
-/*================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the io_fix state of a block.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_block_get_io_fix(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the io_fix state of a block. Does not assert that the
-buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
-not to hold it.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_page_get_io_fix_unlocked(
-/*=========================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Sets the io_fix state of a block. */
-UNIV_INLINE
-void
-buf_page_set_io_fix(
-/*================*/
- buf_page_t* bpage, /*!< in/out: control block */
- enum buf_io_fix io_fix);/*!< in: io_fix state */
-/*********************************************************************//**
-Sets the io_fix state of a block. */
-UNIV_INLINE
-void
-buf_block_set_io_fix(
-/*=================*/
- buf_block_t* block, /*!< in/out: control block */
- enum buf_io_fix io_fix);/*!< in: io_fix state */
-/*********************************************************************//**
-Makes a block sticky. A sticky block implies that even after we release
-the buf_pool->LRU_list_mutex and the block->mutex:
-* it cannot be removed from the flush_list
-* the block descriptor cannot be relocated
-* it cannot be removed from the LRU list
-Note that:
-* the block can still change its position in the LRU list
-* the next and previous pointers can change. */
-UNIV_INLINE
-void
-buf_page_set_sticky(
-/*================*/
- buf_page_t* bpage); /*!< in/out: control block */
-/*********************************************************************//**
-Removes stickiness of a block. */
-UNIV_INLINE
-void
-buf_page_unset_sticky(
-/*==================*/
- buf_page_t* bpage); /*!< in/out: control block */
-/********************************************************************//**
-Determine if a buffer block can be relocated in memory. The block
-can be dirty, but it must not be I/O-fixed or bufferfixed. */
-UNIV_INLINE
-ibool
-buf_page_can_relocate(
-/*==================*/
- const buf_page_t* bpage) /*!< control block being relocated */
- MY_ATTRIBUTE((pure));
-
-/*********************************************************************//**
-Determine if a block has been flagged old.
-@return TRUE if old */
-UNIV_INLINE
-ibool
-buf_page_is_old(
-/*============*/
- const buf_page_t* bpage) /*!< in: control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Flag a block old. */
-UNIV_INLINE
-void
-buf_page_set_old(
-/*=============*/
- buf_page_t* bpage, /*!< in/out: control block */
- ibool old); /*!< in: old */
-/*********************************************************************//**
-Determine the time of first access of a block in the buffer pool.
-@return ut_time_ms() at the time of first access, 0 if not accessed */
-UNIV_INLINE
-unsigned
-buf_page_is_accessed(
-/*=================*/
- const buf_page_t* bpage) /*!< in: control block */
- MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************************//**
-Flag a block accessed. */
-UNIV_INLINE
-void
-buf_page_set_accessed(
-/*==================*/
- buf_page_t* bpage) /*!< in/out: control block */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL. Note: even though bpage is not declared a
-const we don't update its value. It is safe to make this pure.
-@return control block, or NULL */
-UNIV_INLINE
-buf_block_t*
-buf_page_get_block(
-/*===============*/
- buf_page_t* bpage) /*!< in: control block, or NULL */
- MY_ATTRIBUTE((pure));
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets a pointer to the memory frame of a block.
-@return pointer to the frame */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-
-/*********************************************************************//**
-Gets a pointer to the memory frame of a block, where block is known not to be
-NULL.
-@return pointer to the frame */
-UNIV_INLINE
-buf_frame_t*
-buf_nonnull_block_get_frame(
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-
-#else /* UNIV_DEBUG */
-# define buf_block_get_frame(block) (block ? (block)->frame : 0)
-# define buf_nonnull_block_get_frame(block) ((block)->frame)
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
- MY_ATTRIBUTE((pure));
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable. */
-#define buf_block_get_page_zip(block) \
- ((block)->page.zip.data ? &(block)->page.zip : NULL)
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Gets the block to whose frame the pointer is pointing to.
-@return pointer to block, never NULL */
-UNIV_INTERN
-buf_block_t*
-buf_block_align(
-/*============*/
- const byte* ptr); /*!< in: pointer to a frame */
-/********************************************************************//**
-Find out if a pointer belongs to a buf_block_t. It can be a pointer to
-the buf_block_t itself or a member of it
-@return TRUE if ptr belongs to a buf_block_t struct */
-UNIV_INTERN
-ibool
-buf_pointer_is_block_field(
-/*=======================*/
- const void* ptr); /*!< in: pointer not
- dereferenced */
-/** Find out if a pointer corresponds to a buf_block_t::mutex.
-@param m in: mutex candidate
-@return TRUE if m is a buf_block_t::mutex */
-#define buf_pool_is_block_mutex(m) \
- buf_pointer_is_block_field((const void*)(m))
-/** Find out if a pointer corresponds to a buf_block_t::lock.
-@param l in: rw-lock candidate
-@return TRUE if l is a buf_block_t::lock */
-#define buf_pool_is_block_lock(l) \
- buf_pointer_is_block_field((const void*)(l))
-
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
- const byte* ptr); /*!< in: pointer to the page */
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-/********************************************************************//**
-Function which inits a page for read to the buffer buf_pool. If the page is
-(1) already in buf_pool, or
-(2) if we specify to read only ibuf pages and the page is not an ibuf page, or
-(3) if the space is deleted or being deleted,
-then this function does nothing.
-Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
-on the buffer frame. The io-handler must take care that the flag is cleared
-and the lock released later.
-@return pointer to the block or NULL */
-UNIV_INTERN
-buf_page_t*
-buf_page_init_for_read(
-/*===================*/
- dberr_t* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
- ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size, or 0 */
- ibool unzip, /*!< in: TRUE=request uncompressed page */
- ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong
- version of the tablespace in case we have done
- DISCARD + IMPORT */
- ulint offset);/*!< in: page number */
-/** Complete a read or write request of a file page to or from the buffer pool.
-@param[in,out] bpage Page to complete
-@return whether the operation succeeded
-@retval DB_SUCCESS always when writing, or if a read page was OK
-@retval DB_PAGE_CORRUPTED if the checksum fails on a page read
-@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
- after decryption normal page checksum does
- not match */
-UNIV_INTERN
-dberr_t
-buf_page_io_complete(buf_page_t* bpage)
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
- MY_ATTRIBUTE((const));
-/********************************************************************//**
-Calculates the index of a buffer pool to the buf_pool[] array.
-@return the position of the buffer pool in buf_pool[] */
-UNIV_INLINE
-ulint
-buf_pool_index(
-/*===========*/
- const buf_pool_t* buf_pool) /*!< in: buffer pool */
- MY_ATTRIBUTE((nonnull, const));
-/******************************************************************//**
-Returns the buffer pool instance given a page instance
-@return buf_pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_from_bpage(
-/*================*/
- const buf_page_t* bpage); /*!< in: buffer pool page */
-/******************************************************************//**
-Returns the buffer pool instance given a block instance
-@return buf_pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_from_block(
-/*================*/
- const buf_block_t* block); /*!< in: block */
-/******************************************************************//**
-Returns the buffer pool instance given space and offset of page
-@return buffer pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_get(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: offset of the page within space */
-/******************************************************************//**
-Returns the buffer pool instance given its array index
-@return buffer pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_from_array(
-/*================*/
- ulint index); /*!< in: array index to get
- buffer pool instance from */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return block, NULL if not found */
-UNIV_INLINE
-buf_page_t*
-buf_page_hash_get_low(
-/*==================*/
- buf_pool_t* buf_pool,/*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space */
- ulint fold); /*!< in: buf_page_address_fold(space, offset) */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-If the block is found and lock is not NULL then the appropriate
-page_hash lock is acquired in the specified lock mode. Otherwise,
-mode value is ignored. It is up to the caller to release the
-lock. If the block is found and the lock is NULL then the page_hash
-lock is released by this function.
-@return block, NULL if not found, or watch sentinel (if watch is true) */
-UNIV_INLINE
-buf_page_t*
-buf_page_hash_get_locked(
-/*=====================*/
- /*!< out: pointer to the bpage,
- or NULL; if NULL, hash_lock
- is also NULL. */
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- prio_rw_lock_t** lock, /*!< in/out: lock of the page
- hash acquired if bpage is
- found. NULL otherwise. If NULL
- is passed then the hash_lock
- is released by this function */
- ulint lock_mode, /*!< in: RW_LOCK_EX or
- RW_LOCK_SHARED. Ignored if
- lock == NULL */
- bool watch = false); /*!< in: if true, return watch
- sentinel also. */
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-If the block is found and lock is not NULL then the appropriate
-page_hash lock is acquired in the specified lock mode. Otherwise,
-mode value is ignored. It is up to the caller to release the
-lock. If the block is found and the lock is NULL then the page_hash
-lock is released by this function.
-@return block, NULL if not found */
-UNIV_INLINE
-buf_block_t*
-buf_block_hash_get_locked(
-/*=====================*/
- /*!< out: pointer to the bpage,
- or NULL; if NULL, hash_lock
- is also NULL. */
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- prio_rw_lock_t** lock, /*!< in/out: lock of the page
- hash acquired if bpage is
- found. NULL otherwise. If NULL
- is passed then the hash_lock
- is released by this function */
- ulint lock_mode); /*!< in: RW_LOCK_EX or
- RW_LOCK_SHARED. Ignored if
- lock == NULL */
-/* There are four different ways we can try to get a bpage or block
-from the page hash:
-1) Caller already holds the appropriate page hash lock: in the case call
-buf_page_hash_get_low() function.
-2) Caller wants to hold page hash lock in x-mode
-3) Caller wants to hold page hash lock in s-mode
-4) Caller doesn't want to hold page hash lock */
-#define buf_page_hash_get_s_locked(b, s, o, l) \
- buf_page_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
-#define buf_page_hash_get_x_locked(b, s, o, l) \
- buf_page_hash_get_locked(b, s, o, l, RW_LOCK_EX)
-#define buf_page_hash_get(b, s, o) \
- buf_page_hash_get_locked(b, s, o, NULL, 0)
-#define buf_page_get_also_watch(b, s, o) \
- buf_page_hash_get_locked(b, s, o, NULL, 0, true)
-
-#define buf_block_hash_get_s_locked(b, s, o, l) \
- buf_block_hash_get_locked(b, s, o, l, RW_LOCK_SHARED)
-#define buf_block_hash_get_x_locked(b, s, o, l) \
- buf_block_hash_get_locked(b, s, o, l, RW_LOCK_EX)
-#define buf_block_hash_get(b, s, o) \
- buf_block_hash_get_locked(b, s, o, NULL, 0)
-
-/*********************************************************************//**
-Gets the current length of the free list of buffer blocks.
-@return length of the free list */
-UNIV_INTERN
-ulint
-buf_get_free_list_len(void);
-/*=======================*/
-
-/********************************************************************//**
-Determine if a block is a sentinel for a buffer pool watch.
-@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
-UNIV_INTERN
-ibool
-buf_pool_watch_is_sentinel(
-/*=======================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- const buf_page_t* bpage) /*!< in: block */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/****************************************************************//**
-Add watch for the given page to be read in. Caller must have
-appropriate hash_lock for the bpage and hold the LRU list mutex to avoid a race
-condition with buf_LRU_free_page inserting the same page into the page hash.
-This function may release the hash_lock and reacquire it.
-@return NULL if watch set, block if the page is in the buffer pool */
-UNIV_INTERN
-buf_page_t*
-buf_pool_watch_set(
-/*===============*/
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- ulint fold) /*!< in: buf_page_address_fold(space, offset) */
- MY_ATTRIBUTE((warn_unused_result));
-/****************************************************************//**
-Stop watching if the page has been read in.
-buf_pool_watch_set(space,offset) must have returned NULL before. */
-UNIV_INTERN
-void
-buf_pool_watch_unset(
-/*=================*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: page number */
-/****************************************************************//**
-Check if the page has been read in.
-This may only be called after buf_pool_watch_set(space,offset)
-has returned NULL and before invoking buf_pool_watch_unset(space,offset).
-@return FALSE if the given page was not read in, TRUE if it was */
-UNIV_INTERN
-ibool
-buf_pool_watch_occurred(
-/*====================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Get total buffer pool statistics. */
-UNIV_INTERN
-void
-buf_get_total_list_len(
-/*===================*/
- ulint* LRU_len, /*!< out: length of all LRU lists */
- ulint* free_len, /*!< out: length of all free lists */
- ulint* flush_list_len);/*!< out: length of all flush lists */
-/********************************************************************//**
-Get total list size in bytes from all buffer pools. */
-UNIV_INTERN
-void
-buf_get_total_list_size_in_bytes(
-/*=============================*/
- buf_pools_list_size_t* buf_pools_list_size); /*!< out: list sizes
- in all buffer pools */
-/********************************************************************//**
-Get total buffer pool statistics. */
-UNIV_INTERN
-void
-buf_get_total_stat(
-/*===============*/
- buf_pool_stat_t*tot_stat); /*!< out: buffer pool stats */
-/*********************************************************************//**
-Get the nth chunk's buffer block in the specified buffer pool.
-@return the nth chunk's buffer block. */
-UNIV_INLINE
-buf_block_t*
-buf_get_nth_chunk_block(
-/*====================*/
- const buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint n, /*!< in: nth chunk in the buffer pool */
- ulint* chunk_size); /*!< in: chunk size */
-
-/********************************************************************//**
-Calculate the checksum of a page from compressed table and update the page. */
-UNIV_INTERN
-void
-buf_flush_update_zip_checksum(
-/*==========================*/
- buf_frame_t* page, /*!< in/out: Page to update */
- ulint zip_size, /*!< in: Compressed page size */
- lsn_t lsn); /*!< in: Lsn to stamp on the page */
-
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Checks if buf_pool->zip_mutex is owned and is serving for a given page as its
-block mutex.
-@return true if buf_pool->zip_mutex is owned. */
-UNIV_INLINE
-bool
-buf_own_zip_mutex_for_page(
-/*=======================*/
- const buf_page_t* bpage)
- MY_ATTRIBUTE((nonnull,warn_unused_result));
-#endif /* UNIV_DEBUG */
-
-/** Encryption and page_compression hook that is called just before
-a page is written to disk.
-@param[in,out] space tablespace
-@param[in,out] bpage buffer page
-@param[in] src_frame physical page frame that is being encrypted
-@return page frame to be written to file
-(may be src_frame or an encrypted/compressed copy of it) */
-UNIV_INTERN
-byte*
-buf_page_encrypt_before_write(
- fil_space_t* space,
- buf_page_t* bpage,
- byte* src_frame);
-
-/**********************************************************************
-The hook that is called after page is written to disk.
-The function releases any resources needed for encryption that was allocated
-in buf_page_encrypt_before_write */
-UNIV_INTERN
-ibool
-buf_page_encrypt_after_write(
-/*=========================*/
- buf_page_t* page); /*!< in/out: buffer page that was flushed */
-
-/********************************************************************//**
-The hook that is called just before a page is read from disk.
-The function allocates memory that is used to temporarily store disk content
-before getting decrypted */
-UNIV_INTERN
-byte*
-buf_page_decrypt_before_read(
-/*=========================*/
- buf_page_t* page, /*!< in/out: buffer page read from disk */
- ulint zip_size); /*!< in: compressed page size, or 0 */
-
-/********************************************************************//**
-The hook that is called just after a page is read from disk.
-The function decrypt disk content into buf_page_t and releases the
-temporary buffer that was allocated in buf_page_decrypt_before_read */
-UNIV_INTERN
-bool
-buf_page_decrypt_after_read(
-/*========================*/
- buf_page_t* page); /*!< in/out: buffer page read from disk */
-
-/** @brief The temporary memory structure.
-
-NOTE! The definition appears here only for other modules of this
-directory (buf) to see it. Do not use from outside! */
-
-typedef struct {
- bool reserved; /*!< true if this slot is reserved
- */
- byte* crypt_buf; /*!< for encryption the data needs to be
- copied to a separate buffer before it's
- encrypted&written. this as a page can be
- read while it's being flushed */
- byte* comp_buf; /*!< for compression we need
- temporal buffer because page
- can be read while it's being flushed */
- byte* out_buf; /*!< resulting buffer after
- encryption/compression. This is a
- pointer and not allocated. */
-} buf_tmp_buffer_t;
-
-/** The common buffer control block structure
-for compressed and uncompressed frames */
-
-/** Number of bits used for buffer page states. */
-#define BUF_PAGE_STATE_BITS 3
-
-struct buf_page_t{
- /** @name General fields
- None of these bit-fields must be modified without holding
- buf_page_get_mutex() [buf_block_t::mutex or
- buf_pool->zip_mutex], since they can be stored in the same
- machine word. */
- /* @{ */
-
- ib_uint32_t space; /*!< tablespace id. */
- ib_uint32_t offset; /*!< page number. */
- /** count of how manyfold this block is currently bufferfixed */
-#ifdef PAGE_ATOMIC_REF_COUNT
- ib_uint32_t buf_fix_count;
-
- /** type of pending I/O operation; Transitions from BUF_IO_NONE to
- BUF_IO_WRITE and back are protected by the buf_page_get_mutex() mutex
- and the corresponding flush state mutex. The flush state mutex
- protection for io_fix and flush_type is not strictly required, but it
- ensures consistent buffer pool instance state snapshots in
- buf_pool_validate_instance(). @see enum buf_io_fix */
- byte io_fix;
-
- byte state;
-#else
- unsigned buf_fix_count:19;
-
- /** type of pending I/O operation; also protected by
- buf_pool->mutex for writes only @see enum buf_io_fix */
- unsigned io_fix:2;
-
- /*!< state of the control block.
- State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY
- need not be protected by buf_page_get_mutex(). @see enum buf_page_state.
- State changes that are relevant to page_hash are additionally protected
- by the appropriate page_hash mutex i.e.: if a page is in page_hash or
- is being added to/removed from page_hash then the corresponding changes
- must also be protected by page_hash mutex. */
- unsigned state:BUF_PAGE_STATE_BITS;
-
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
-#ifndef UNIV_HOTBACKUP
- unsigned flush_type:2; /*!< if this block is currently being
- flushed to disk, this tells the
- flush_type. Writes during flushing
- protected by buf_page_get_mutex_enter()
- mutex and the corresponding flush state
- mutex.
- @see buf_flush_t */
- unsigned buf_pool_index:6;/*!< index number of the buffer pool
- that this block belongs to */
-# if MAX_BUFFER_POOLS > 64
-# error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6"
-# endif
- /* @} */
-#endif /* !UNIV_HOTBACKUP */
- page_zip_des_t zip; /*!< compressed page; zip.data
- (but not the data it points to) is
- protected by buf_pool->zip_mutex;
- state == BUF_BLOCK_ZIP_PAGE and
- zip.data == NULL means an active
- buf_pool->watch */
-
- ulint write_size; /* Write size is set when this
- page is first time written and then
- if written again we check is TRIM
- operation needed. */
-
- bool encrypted; /*!< page is still encrypted */
-
- ulint real_size; /*!< Real size of the page
- Normal pages == UNIV_PAGE_SIZE
- page compressed pages, payload
- size alligned to sector boundary.
- */
-
- buf_tmp_buffer_t* slot; /*!< Slot for temporary memory
- used for encryption/compression
- or NULL */
-#ifndef UNIV_HOTBACKUP
- buf_page_t* hash; /*!< node used in chaining to
- buf_pool->page_hash or
- buf_pool->zip_hash */
-#ifdef UNIV_DEBUG
- ibool in_page_hash; /*!< TRUE if in buf_pool->page_hash */
- ibool in_zip_hash; /*!< TRUE if in buf_pool->zip_hash */
-#endif /* UNIV_DEBUG */
-
- /** @name Page flushing fields */
- /* @{ */
-
- UT_LIST_NODE_T(buf_page_t) list;
- /*!< based on state, this is a
- list node, protected either by
- a corresponding list mutex,
- in one of the following lists in
- buf_pool:
-
- - BUF_BLOCK_NOT_USED: free
- - BUF_BLOCK_FILE_PAGE: flush_list
- - BUF_BLOCK_ZIP_DIRTY: flush_list
- - BUF_BLOCK_ZIP_PAGE: zip_clean
-
- If bpage is part of flush_list
- then the node pointers are
- covered by buf_pool->flush_list_mutex.
- Otherwise these pointers are
- protected by a corresponding list
- mutex.
-
- The contents of the list node
- is undefined if !in_flush_list
- && state == BUF_BLOCK_FILE_PAGE,
- or if state is one of
- BUF_BLOCK_MEMORY,
- BUF_BLOCK_REMOVE_HASH or
- BUF_BLOCK_READY_IN_USE. */
-
-#ifdef UNIV_DEBUG
- ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
- when buf_pool->flush_list_mutex is
- free, the following should hold:
- in_flush_list
- == (state == BUF_BLOCK_FILE_PAGE
- || state == BUF_BLOCK_ZIP_DIRTY)
- Writes to this field must be
- covered by both block->mutex
- and buf_pool->flush_list_mutex. Hence
- reads can happen while holding
- any one of the two mutexes */
- ibool in_free_list; /*!< TRUE if in buf_pool->free; when
- buf_pool->free_list_mutex is free, the
- following should hold: in_free_list
- == (state == BUF_BLOCK_NOT_USED) */
-#endif /* UNIV_DEBUG */
- lsn_t newest_modification;
- /*!< log sequence number of
- the youngest modification to
- this block, zero if not
- modified. Protected by block
- mutex */
- lsn_t oldest_modification;
- /*!< log sequence number of
- the START of the log entry
- written of the oldest
- modification to this block
- which has not yet been flushed
- on disk; zero if all
- modifications are on disk.
- Writes to this field must be
- covered by both block->mutex
- and buf_pool->flush_list_mutex. Hence
- reads can happen while holding
- any one of the two mutexes */
- /* @} */
- /** @name LRU replacement algorithm fields */
- /* @{ */
-
- UT_LIST_NODE_T(buf_page_t) LRU;
- /*!< node of the LRU list */
-#ifdef UNIV_DEBUG
- ibool in_LRU_list; /*!< TRUE if the page is in
- the LRU list; used in
- debugging */
-#endif /* UNIV_DEBUG */
- unsigned old:1; /*!< TRUE if the block is in the old
- blocks in buf_pool->LRU_old. Protected
- by the LRU list mutex. May be read for
- heuristics purposes under the block
- mutex instead. */
- unsigned freed_page_clock:31;/*!< the value of
- buf_pool->freed_page_clock
- when this block was the last
- time put to the head of the
- LRU list; a thread is allowed
- to read this for heuristic
- purposes without holding any
- mutex or latch */
- /* @} */
- unsigned access_time; /*!< time of first access, or
- 0 if the block was never accessed
- in the buffer pool. Protected by
- block mutex */
- ibool is_corrupt;
-# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- ibool file_page_was_freed;
- /*!< this is set to TRUE when
- fsp frees a page in buffer pool;
- protected by buf_pool->zip_mutex
- or buf_block_t::mutex. */
-# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-};
-
-/** The buffer control block structure */
-
-struct buf_block_t{
-
- /** @name General fields */
- /* @{ */
-
- buf_page_t page; /*!< page information; this must
- be the first field, so that
- buf_pool->page_hash can point
- to buf_page_t or buf_block_t */
- byte* frame; /*!< pointer to buffer frame which
- is of size UNIV_PAGE_SIZE, and
- aligned to an address divisible by
- UNIV_PAGE_SIZE */
-#ifndef UNIV_HOTBACKUP
- UT_LIST_NODE_T(buf_block_t) unzip_LRU;
- /*!< node of the decompressed LRU list;
- a block is in the unzip_LRU list
- if page.state == BUF_BLOCK_FILE_PAGE
- and page.zip.data != NULL */
-#ifdef UNIV_DEBUG
- ibool in_unzip_LRU_list;/*!< TRUE if the page is in the
- decompressed LRU list;
- used in debugging */
-#endif /* UNIV_DEBUG */
- ib_mutex_t mutex; /*!< mutex protecting this block:
- state, io_fix, buf_fix_count,
- and accessed; we introduce this new
- mutex in InnoDB-5.1 to relieve
- contention on the buffer pool mutex */
- rw_lock_t lock; /*!< read-write lock of the buffer
- frame */
- unsigned lock_hash_val:32;/*!< hashed value of the page address
- in the record lock hash table;
- protected by buf_block_t::lock
- (or buf_block_t::mutex in
- buf_page_get_gen(),
- buf_page_init_for_read()
- and buf_page_create()) */
- ibool check_index_page_at_flush;
- /*!< TRUE if we know that this is
- an index page, and want the database
- to check its consistency before flush;
- note that there may be pages in the
- buffer pool which are index pages,
- but this flag is not set because
- we do not keep track of all pages;
- NOT protected by any mutex */
- /* @} */
- /** @name Optimistic search field */
- /* @{ */
-
- ib_uint64_t modify_clock; /*!< this clock is incremented every
- time a pointer to a record on the
- page may become obsolete; this is
- used in the optimistic cursor
- positioning: if the modify clock has
- not changed, we know that the pointer
- is still valid; this field may be
- changed if the thread (1) owns the LRU
- list mutex and the page is not
- bufferfixed, or (2) the thread has an
- x-latch on the block */
- /* @} */
- /** @name Hash search fields (unprotected)
- NOTE that these fields are NOT protected by any semaphore! */
- /* @{ */
-
- ulint n_hash_helps; /*!< counter which controls building
- of a new hash index for the page */
- ulint n_fields; /*!< recommended prefix length for hash
- search: number of full fields */
- ulint n_bytes; /*!< recommended prefix: number of bytes
- in an incomplete field */
- ibool left_side; /*!< TRUE or FALSE, depending on
- whether the leftmost record of several
- records with the same prefix should be
- indexed in the hash index */
- /* @} */
-
- /** @name Hash search fields
- These 5 fields may only be modified when we have
- an x-latch on btr_search_latch AND
- - we are holding an s-latch or x-latch on buf_block_t::lock or
- - we know that buf_block_t::buf_fix_count == 0.
-
- An exception to this is when we init or create a page
- in the buffer pool in buf0buf.cc.
-
- Another exception is that assigning block->index = NULL
- is allowed whenever holding an x-latch on btr_search_latch. */
-
- /* @{ */
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- ulint n_pointers; /*!< used in debugging: the number of
- pointers in the adaptive hash index
- pointing to this frame */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- unsigned curr_n_fields:10;/*!< prefix length for hash indexing:
- number of full fields */
- unsigned curr_n_bytes:15;/*!< number of bytes in hash
- indexing */
- unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
- dict_index_t* index; /*!< Index for which the
- adaptive hash index has been
- created, or NULL if the page
- does not exist in the
- index. Note that it does not
- guarantee that the index is
- complete, though: there may
- have been hash collisions,
- record deletions, etc. */
- /* @} */
-# ifdef UNIV_SYNC_DEBUG
- /** @name Debug fields */
- /* @{ */
- rw_lock_t debug_latch; /*!< in the debug version, each thread
- which bufferfixes the block acquires
- an s-latch here; so we can use the
- debug utilities in sync0rw */
- /* @} */
-# endif
-#endif /* !UNIV_HOTBACKUP */
-};
-
-/** Check if a buf_block_t object is in a valid state
-@param block buffer block
-@return TRUE if valid */
-#define buf_block_state_valid(block) \
-(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED \
- && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Compute the hash fold value for blocks in buf_pool->zip_hash. */
-/* @{ */
-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
-#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
-#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
-/* @} */
-
-/** Struct that is embedded in the free zip blocks */
-struct buf_buddy_free_t {
- union {
- ulint size; /*!< size of the block */
- byte bytes[FIL_PAGE_DATA];
- /*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
- == BUF_BUDDY_FREE_STAMP denotes a free
- block. If the space_id field of buddy
- block != BUF_BUDDY_FREE_STAMP, the block
- is not in any zip_free list. If the
- space_id is BUF_BUDDY_FREE_STAMP then
- stamp[0] will contain the
- buddy block size. */
- } stamp;
-
- buf_page_t bpage; /*!< Embedded bpage descriptor */
- UT_LIST_NODE_T(buf_buddy_free_t) list;
- /*!< Node of zip_free list */
-};
-
-/** @brief The buffer pool statistics structure. */
-struct buf_pool_stat_t{
- ulint n_page_gets; /*!< number of page gets performed;
- also successful searches through
- the adaptive hash index are
- counted as page gets. */
- ulint n_pages_read; /*!< number read operations. Accessed
- atomically. */
- ulint n_pages_written;/*!< number write operations. Accessed
- atomically.*/
- ulint n_pages_created;/*!< number of pages created
- in the pool with no read */
- ulint n_ra_pages_read_rnd;/*!< number of pages read in
- as part of random read ahead */
- ulint n_ra_pages_read;/*!< number of pages read in
- as part of read ahead */
- ulint n_ra_pages_evicted;/*!< number of read ahead
- pages that are evicted without
- being accessed */
- ulint n_pages_made_young; /*!< number of pages made young, in
- calls to buf_LRU_make_block_young() */
- ulint n_pages_not_made_young; /*!< number of pages not made
- young because the first access
- was not long enough ago, in
- buf_page_peek_if_too_old() */
- ulint LRU_bytes; /*!< LRU size in bytes */
- ulint flush_list_bytes;/*!< flush_list size in bytes */
- ulint buf_lru_flush_page_count;
-};
-
-/** Statistics of buddy blocks of a given size. */
-struct buf_buddy_stat_t {
- /** Number of blocks allocated from the buddy system. */
- ulint used;
- /** Number of blocks relocated by the buddy system. */
- ib_uint64_t relocated;
- /** Total duration of block relocations, in microseconds. */
- ib_uint64_t relocated_usec;
-};
-
-/** @brief The temporary memory array structure.
-
-NOTE! The definition appears here only for other modules of this
-directory (buf) to see it. Do not use from outside! */
-
-typedef struct {
- ulint n_slots; /*!< Total number of slots */
- buf_tmp_buffer_t *slots; /*!< Pointer to the slots in the
- array */
-} buf_tmp_array_t;
-
-/** @brief The buffer pool structure.
-
-NOTE! The definition appears here only for other modules of this
-directory (buf) to see it. Do not use from outside! */
-
-struct buf_pool_t{
-
- /** @name General fields */
- /* @{ */
- ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer
- pool instance, protects compressed
- only pages (of type buf_page_t, not
- buf_block_t */
- ib_prio_mutex_t LRU_list_mutex;
- ib_prio_mutex_t free_list_mutex;
- ib_mutex_t zip_free_mutex;
- ib_mutex_t zip_hash_mutex;
- ib_mutex_t flush_state_mutex; /*!< Flush state protection
- mutex */
- ulint instance_no; /*!< Array index of this buffer
- pool instance */
- ulint old_pool_size; /*!< Old pool size in bytes */
- ulint curr_pool_size; /*!< Current pool size in bytes */
- ulint LRU_old_ratio; /*!< Reserve this much of the buffer
- pool for "old" blocks */
-#ifdef UNIV_DEBUG
- ulint buddy_n_frames; /*!< Number of frames allocated from
- the buffer pool to the buddy system */
-#endif
- ulint n_chunks; /*!< number of buffer pool chunks */
- buf_chunk_t* chunks; /*!< buffer pool chunks */
- ulint curr_size; /*!< current pool size in pages */
- ulint read_ahead_area;/*!< size in pages of the area which
- the read-ahead algorithms read if
- invoked */
- hash_table_t* page_hash; /*!< hash table of buf_page_t or
- buf_block_t file pages,
- buf_page_in_file() == TRUE,
- indexed by (space_id, offset).
- page_hash is protected by an
- array of mutexes. */
- hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks
- whose frames are allocated to the
- zip buddy system,
- indexed by block->frame */
- ulint n_pend_reads; /*!< number of pending read
- operations. Accessed atomically */
- ulint n_pend_unzip; /*!< number of pending decompressions.
- Accesssed atomically */
-
- time_t last_printout_time;
- /*!< when buf_print_io was last time
- called. Accesses not protected */
- buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
- /*!< Statistics of buddy system,
- indexed by block size. Protected by
- zip_free_mutex. */
- buf_pool_stat_t stat; /*!< current statistics */
- buf_pool_stat_t old_stat; /*!< old statistics */
-
- /* @} */
-
- /** @name Page flushing algorithm fields */
-
- /* @{ */
-
- ib_mutex_t flush_list_mutex;/*!< mutex protecting the
- flush list access. This mutex
- protects flush_list, flush_rbt
- and bpage::list pointers when
- the bpage is on flush_list. It
- also protects writes to
- bpage::oldest_modification and
- flush_list_hp */
- const buf_page_t* flush_list_hp;/*!< "hazard pointer"
- used during scan of flush_list
- while doing flush list batch.
- Protected by flush_list_mutex */
- UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
- /*!< base node of the modified block
- list */
- ibool init_flush[BUF_FLUSH_N_TYPES];
- /*!< this is TRUE when a flush of the
- given type is being initialized.
- Protected by flush_state_mutex. */
- ulint n_flush[BUF_FLUSH_N_TYPES];
- /*!< this is the number of pending
- writes in the given flush type.
- Protected by flush_state_mutex. */
- os_event_t no_flush[BUF_FLUSH_N_TYPES];
- /*!< this is in the set state
- when there is no flush batch
- of the given type running;
- os_event_set() and os_event_reset()
- are protected by
- buf_pool_t::flush_state_mutex */
- ib_rbt_t* flush_rbt; /*!< a red-black tree is used
- exclusively during recovery to
- speed up insertions in the
- flush_list. This tree contains
- blocks in order of
- oldest_modification LSN and is
- kept in sync with the
- flush_list.
- Each member of the tree MUST
- also be on the flush_list.
- This tree is relevant only in
- recovery and is set to NULL
- once the recovery is over.
- Protected by flush_list_mutex */
- ulint freed_page_clock;/*!< a sequence number used
- to count the number of buffer
- blocks removed from the end of
- the LRU list; NOTE that this
- counter may wrap around at 4
- billion! A thread is allowed
- to read this for heuristic
- purposes without holding any
- mutex or latch. For non-heuristic
- purposes protected by LRU_list_mutex */
- ibool try_LRU_scan; /*!< Set to FALSE when an LRU
- scan for free block fails. This
- flag is used to avoid repeated
- scans of LRU list when we know
- that there is no free block
- available in the scan depth for
- eviction. Set to TRUE whenever
- we flush a batch from the
- buffer pool. Accessed atomically. */
- /* @} */
-
- /** @name LRU replacement algorithm fields */
- /* @{ */
-
- UT_LIST_BASE_NODE_T(buf_page_t) free;
- /*!< base node of the free
- block list */
- UT_LIST_BASE_NODE_T(buf_page_t) LRU;
- /*!< base node of the LRU list */
- buf_page_t* LRU_old; /*!< pointer to the about
- LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
- oldest blocks in the LRU list;
- NULL if LRU length less than
- BUF_LRU_OLD_MIN_LEN;
- NOTE: when LRU_old != NULL, its length
- should always equal LRU_old_len */
- ulint LRU_old_len; /*!< length of the LRU list from
- the block to which LRU_old points
- onward, including that block;
- see buf0lru.cc for the restrictions
- on this value; 0 if LRU_old == NULL;
- NOTE: LRU_old_len must be adjusted
- whenever LRU_old shrinks or grows! */
-
- UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
- /*!< base node of the
- unzip_LRU list. The list is protected
- by LRU list mutex. */
-
- /* @} */
- /** @name Buddy allocator fields
- The buddy allocator is used for allocating compressed page
- frames and buf_page_t descriptors of blocks that exist
- in the buffer pool only in compressed form. */
- /* @{ */
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
- UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
- /*!< unmodified compressed pages */
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
- UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
- /*!< buddy free lists */
-
- buf_page_t* watch;
- /*!< Sentinel records for buffer
- pool watches. */
-
- buf_tmp_array_t* tmp_arr;
- /*!< Array for temporal memory
- used in compression and encryption */
-
-#if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
-# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
-#endif
- /* @} */
-};
-
-/** @name Accessors for buffer pool mutexes
-Use these instead of accessing buffer pool mutexes directly. */
-/* @{ */
-
-/** Test if flush list mutex is owned. */
-#define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex)
-
-/** Acquire the flush list mutex. */
-#define buf_flush_list_mutex_enter(b) do { \
- mutex_enter(&b->flush_list_mutex); \
-} while (0)
-/** Release the flush list mutex. */
-# define buf_flush_list_mutex_exit(b) do { \
- mutex_exit(&b->flush_list_mutex); \
-} while (0)
-
-/** Test if block->mutex is owned. */
-#define buf_block_mutex_own(b) mutex_own(&(b)->mutex)
-
-/** Acquire the block->mutex. */
-#define buf_block_mutex_enter(b) do { \
- mutex_enter(&(b)->mutex); \
-} while (0)
-
-/** Release the trx->mutex. */
-#define buf_block_mutex_exit(b) do { \
- mutex_exit(&(b)->mutex); \
-} while (0)
-
-
-/** Get appropriate page_hash_lock. */
-# define buf_page_hash_lock_get(b, f) \
- hash_get_lock(b->page_hash, f)
-
-#ifdef UNIV_SYNC_DEBUG
-/** Test if page_hash lock is held in s-mode. */
-# define buf_page_hash_lock_held_s(b, p) \
- rw_lock_own(buf_page_hash_lock_get(b, \
- buf_page_address_fold(p->space, \
- p->offset)), \
- RW_LOCK_SHARED)
-
-/** Test if page_hash lock is held in x-mode. */
-# define buf_page_hash_lock_held_x(b, p) \
- rw_lock_own(buf_page_hash_lock_get(b, \
- buf_page_address_fold(p->space, \
- p->offset)), \
- RW_LOCK_EX)
-
-/** Test if page_hash lock is held in x or s-mode. */
-# define buf_page_hash_lock_held_s_or_x(b, p) \
- (buf_page_hash_lock_held_s(b, p) \
- || buf_page_hash_lock_held_x(b, p))
-
-# define buf_block_hash_lock_held_s(b, p) \
- buf_page_hash_lock_held_s(b, &(p->page))
-
-# define buf_block_hash_lock_held_x(b, p) \
- buf_page_hash_lock_held_x(b, &(p->page))
-
-# define buf_block_hash_lock_held_s_or_x(b, p) \
- buf_page_hash_lock_held_s_or_x(b, &(p->page))
-#else /* UNIV_SYNC_DEBUG */
-# define buf_page_hash_lock_held_s(b, p) (TRUE)
-# define buf_page_hash_lock_held_x(b, p) (TRUE)
-# define buf_page_hash_lock_held_s_or_x(b, p) (TRUE)
-# define buf_block_hash_lock_held_s(b, p) (TRUE)
-# define buf_block_hash_lock_held_x(b, p) (TRUE)
-# define buf_block_hash_lock_held_s_or_x(b, p) (TRUE)
-#endif /* UNIV_SYNC_DEBUG */
-
-#endif /* !UNIV_HOTBACKUP */
-/* @} */
-
-/**********************************************************************
-Let us list the consistency conditions for different control block states.
-
-NOT_USED: is in free list, not in LRU list, not in flush list, nor
- page hash table
-READY_FOR_USE: is not in free list, LRU list, or flush list, nor page
- hash table
-MEMORY: is not in free list, LRU list, or flush list, nor page
- hash table
-FILE_PAGE: space and offset are defined, is in page hash table
- if io_fix == BUF_IO_WRITE,
- pool: no_flush[flush_type] is in reset state,
- pool: n_flush[flush_type] > 0
-
- (1) if buf_fix_count == 0, then
- is in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- is x-locked,
- if and only if io_fix == BUF_IO_READ
- is s-locked,
- if and only if io_fix == BUF_IO_WRITE
-
- (2) if buf_fix_count > 0, then
- is not in LRU list, not in free list
- is in flush list,
- if and only if oldest_modification > 0
- if io_fix == BUF_IO_READ,
- is x-locked
- if io_fix == BUF_IO_WRITE,
- is s-locked
-
-State transitions:
-
-NOT_USED => READY_FOR_USE
-READY_FOR_USE => MEMORY
-READY_FOR_USE => FILE_PAGE
-MEMORY => NOT_USED
-FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
- (1) buf_fix_count == 0,
- (2) oldest_modification == 0, and
- (3) io_fix == 0.
-*/
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/** Functor to validate the LRU list. */
-struct CheckInLRUList {
- void operator()(const buf_page_t* elem) const
- {
- ut_a(elem->in_LRU_list);
- }
-};
-
-/** Functor to validate the LRU list. */
-struct CheckInFreeList {
- void operator()(const buf_page_t* elem) const
- {
- ut_a(elem->in_free_list);
- }
-};
-
-struct CheckUnzipLRUAndLRUList {
- void operator()(const buf_block_t* elem) const
- {
- ut_a(elem->page.in_LRU_list);
- ut_a(elem->in_unzip_LRU_list);
- }
-};
-#endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
-
-/*********************************************************************//**
-Aquire LRU list mutex */
-void
-buf_pool_mutex_enter(
-/*=================*/
- buf_pool_t* buf_pool); /*!< in: buffer pool */
-/*********************************************************************//**
-Exit LRU list mutex */
-void
-buf_pool_mutex_exit(
-/*================*/
- buf_pool_t* buf_pool); /*!< in: buffer pool */
-
-#ifndef UNIV_NONINL
-#include "buf0buf.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
deleted file mode 100644
index 20721b28ef2..00000000000
--- a/storage/xtradb/include/buf0buf.ic
+++ /dev/null
@@ -1,1561 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2014, 2015, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0buf.ic
-The database buffer buf_pool
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0mtr.h"
-#ifndef UNIV_HOTBACKUP
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "buf0rea.h"
-
-/** A chunk of buffers. The buffer pool is allocated in chunks. */
-struct buf_chunk_t{
- ulint mem_size; /*!< allocated size of the chunk */
- ulint size; /*!< size of frames[] and blocks[] */
- void* mem; /*!< pointer to the memory area which
- was allocated for the frames */
- buf_block_t* blocks; /*!< array of buffer control blocks */
-};
-
-
-#include "srv0srv.h"
-
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void)
-/*========================*/
-{
- return(srv_buf_pool_curr_size);
-}
-
-/********************************************************************//**
-Calculates the index of a buffer pool to the buf_pool[] array.
-@return the position of the buffer pool in buf_pool[] */
-UNIV_INLINE
-ulint
-buf_pool_index(
-/*===========*/
- const buf_pool_t* buf_pool) /*!< in: buffer pool */
-{
- ulint i = buf_pool - buf_pool_ptr;
- ut_ad(i < MAX_BUFFER_POOLS);
- ut_ad(i < srv_buf_pool_instances);
- return(i);
-}
-
-/******************************************************************//**
-Returns the buffer pool instance given a page instance
-@return buf_pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_from_bpage(
-/*================*/
- const buf_page_t* bpage) /*!< in: buffer pool page */
-{
- ulint i;
- i = bpage->buf_pool_index;
- ut_ad(i < srv_buf_pool_instances);
- return(&buf_pool_ptr[i]);
-}
-
-/******************************************************************//**
-Returns the buffer pool instance given a block instance
-@return buf_pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_from_block(
-/*================*/
- const buf_block_t* block) /*!< in: block */
-{
- return(buf_pool_from_bpage(&block->page));
-}
-
-/*********************************************************************//**
-Gets the current size of buffer buf_pool in pages.
-@return size in pages*/
-UNIV_INLINE
-ulint
-buf_pool_get_n_pages(void)
-/*======================*/
-{
- return(buf_pool_get_curr_size() / UNIV_PAGE_SIZE);
-}
-
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-ulint
-buf_page_get_freed_page_clock(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: block */
-{
- /* This is sometimes read without holding any buffer pool mutex. */
- return(bpage->freed_page_clock);
-}
-
-/********************************************************************//**
-Reads the freed_page_clock of a buffer block.
-@return freed_page_clock */
-UNIV_INLINE
-ulint
-buf_block_get_freed_page_clock(
-/*===========================*/
- const buf_block_t* block) /*!< in: block */
-{
- return(buf_page_get_freed_page_clock(&block->page));
-}
-
-/********************************************************************//**
-Tells if a block is still close enough to the MRU end of the LRU list
-meaning that it is not in danger of getting evicted and also implying
-that it has been accessed recently.
-Note that this is for heuristics only and does not reserve buffer pool
-mutex.
-@return TRUE if block is close to MRU end of LRU */
-UNIV_INLINE
-ibool
-buf_page_peek_if_young(
-/*===================*/
- const buf_page_t* bpage) /*!< in: block */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- /* FIXME: bpage->freed_page_clock is 31 bits */
- return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
- < ((ulint) bpage->freed_page_clock
- + (buf_pool->curr_size
- * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
- / (BUF_LRU_OLD_RATIO_DIV * 4))));
-}
-
-/********************************************************************//**
-Recommends a move of a block to the start of the LRU list if there is danger
-of dropping from the buffer pool. NOTE: does not reserve the buffer pool
-mutex.
-@return TRUE if should be made younger */
-UNIV_INLINE
-ibool
-buf_page_peek_if_too_old(
-/*=====================*/
- const buf_page_t* bpage) /*!< in: block to make younger */
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- if (buf_pool->freed_page_clock == 0) {
- /* If eviction has not started yet, do not update the
- statistics or move blocks in the LRU list. This is
- either the warm-up phase or an in-memory workload. */
- return(FALSE);
- } else if (buf_LRU_old_threshold_ms && bpage->old) {
- unsigned access_time = buf_page_is_accessed(bpage);
-
- if (access_time > 0
- && ((ib_uint32_t) (ut_time_ms() - access_time))
- >= buf_LRU_old_threshold_ms) {
- return(TRUE);
- }
-
- buf_pool->stat.n_pages_not_made_young++;
- return(FALSE);
- } else {
- return(!buf_page_peek_if_young(bpage));
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Gets the state of a block.
-@return state */
-UNIV_INLINE
-enum buf_page_state
-buf_page_get_state(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- enum buf_page_state state = (enum buf_page_state) bpage->state;
-
-#ifdef UNIV_DEBUG
- switch (state) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_FILE_PAGE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- break;
- default:
- ut_error;
- }
-#endif /* UNIV_DEBUG */
-
- return(state);
-}
-/*********************************************************************//**
-Gets the state of a block.
-@return state */
-UNIV_INLINE
-enum buf_page_state
-buf_block_get_state(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- return(buf_page_get_state(&block->page));
-}
-
-/*********************************************************************//**
-Gets the state name for state of a block
-@return name or "CORRUPTED" */
-UNIV_INLINE
-const char*
-buf_get_state_name(
-/*===============*/
- const buf_block_t* block) /*!< in: pointer to the control
- block */
-{
- enum buf_page_state state = buf_page_get_state(&block->page);
-
- switch (state) {
- case BUF_BLOCK_POOL_WATCH:
- return (const char *) "BUF_BLOCK_POOL_WATCH";
- case BUF_BLOCK_ZIP_PAGE:
- return (const char *) "BUF_BLOCK_ZIP_PAGE";
- case BUF_BLOCK_ZIP_DIRTY:
- return (const char *) "BUF_BLOCK_ZIP_DIRTY";
- case BUF_BLOCK_NOT_USED:
- return (const char *) "BUF_BLOCK_NOT_USED";
- case BUF_BLOCK_READY_FOR_USE:
- return (const char *) "BUF_BLOCK_NOT_USED";
- case BUF_BLOCK_FILE_PAGE:
- return (const char *) "BUF_BLOCK_FILE_PAGE";
- case BUF_BLOCK_MEMORY:
- return (const char *) "BUF_BLOCK_MEMORY";
- case BUF_BLOCK_REMOVE_HASH:
- return (const char *) "BUF_BLOCK_REMOVE_HASH";
- default:
- return (const char *) "CORRUPTED";
- }
-}
-
-/*********************************************************************//**
-Sets the state of a block. */
-UNIV_INLINE
-void
-buf_page_set_state(
-/*===============*/
- buf_page_t* bpage, /*!< in/out: pointer to control block */
- enum buf_page_state state) /*!< in: state */
-{
-#ifdef UNIV_DEBUG
- enum buf_page_state old_state = buf_page_get_state(bpage);
-
- switch (old_state) {
- case BUF_BLOCK_POOL_WATCH:
- ut_error;
- break;
- case BUF_BLOCK_ZIP_PAGE:
- ut_a(state == BUF_BLOCK_ZIP_DIRTY);
- break;
- case BUF_BLOCK_ZIP_DIRTY:
- ut_a(state == BUF_BLOCK_ZIP_PAGE);
- break;
- case BUF_BLOCK_NOT_USED:
- ut_a(state == BUF_BLOCK_READY_FOR_USE);
- break;
- case BUF_BLOCK_READY_FOR_USE:
- ut_a(state == BUF_BLOCK_MEMORY
- || state == BUF_BLOCK_FILE_PAGE
- || state == BUF_BLOCK_NOT_USED);
- break;
- case BUF_BLOCK_MEMORY:
- ut_a(state == BUF_BLOCK_NOT_USED);
- break;
- case BUF_BLOCK_FILE_PAGE:
- if (!(state == BUF_BLOCK_NOT_USED
- || state == BUF_BLOCK_REMOVE_HASH)) {
- const char *old_state_name = buf_get_state_name((buf_block_t*)bpage);
- bpage->state = state;
-
- fprintf(stderr,
- "InnoDB: Error: block old state %d (%s) "
- " new state %d (%s) not correct\n",
- old_state,
- old_state_name,
- state,
- buf_get_state_name((buf_block_t*)bpage));
- }
-
- ut_a(state == BUF_BLOCK_NOT_USED
- || state == BUF_BLOCK_REMOVE_HASH);
- break;
- case BUF_BLOCK_REMOVE_HASH:
- ut_a(state == BUF_BLOCK_MEMORY);
- break;
- }
-#endif /* UNIV_DEBUG */
-
- bpage->state = state;
-}
-
-/*********************************************************************//**
-Sets the state of a block. */
-UNIV_INLINE
-void
-buf_block_set_state(
-/*================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- enum buf_page_state state) /*!< in: state */
-{
- buf_page_set_state(&block->page, state);
-}
-
-/*********************************************************************//**
-Determines if a block is mapped to a tablespace.
-@return TRUE if mapped */
-UNIV_INLINE
-ibool
-buf_page_in_file(
-/*=============*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
-{
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- ut_error;
- break;
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_FILE_PAGE:
- return(TRUE);
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- break;
- }
-
- return(FALSE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Determines if a block should be on unzip_LRU list.
-@return TRUE if block belongs to unzip_LRU */
-UNIV_INLINE
-ibool
-buf_page_belongs_to_unzip_LRU(
-/*==========================*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
-{
- ut_ad(buf_page_in_file(bpage));
-
- return(bpage->zip.data
- && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-}
-
-/*********************************************************************//**
-Gets the mutex of a block.
-@return pointer to mutex protecting bpage */
-UNIV_INLINE
-ib_mutex_t*
-buf_page_get_mutex(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to control block */
-{
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_POOL_WATCH:
- ut_error;
- return(NULL);
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY: {
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- return(&buf_pool->zip_mutex);
- }
- default:
- return(&((buf_block_t*) bpage)->mutex);
- }
-}
-
-/*********************************************************************//**
-Get the flush type of a page.
-@return flush type */
-UNIV_INLINE
-buf_flush_t
-buf_page_get_flush_type(
-/*====================*/
- const buf_page_t* bpage) /*!< in: buffer page */
-{
- buf_flush_t flush_type = (buf_flush_t) bpage->flush_type;
-
-#ifdef UNIV_DEBUG
- switch (flush_type) {
- case BUF_FLUSH_LRU:
- case BUF_FLUSH_LIST:
- case BUF_FLUSH_SINGLE_PAGE:
- return(flush_type);
- case BUF_FLUSH_N_TYPES:
- ut_error;
- }
- ut_error;
-#endif /* UNIV_DEBUG */
- return(flush_type);
-}
-/*********************************************************************//**
-Set the flush type of a page. */
-UNIV_INLINE
-void
-buf_page_set_flush_type(
-/*====================*/
- buf_page_t* bpage, /*!< in: buffer page */
- buf_flush_t flush_type) /*!< in: flush type */
-{
- bpage->flush_type = flush_type;
- ut_ad(buf_page_get_flush_type(bpage) == flush_type);
-}
-
-/*********************************************************************//**
-Map a block to a file page. */
-UNIV_INLINE
-void
-buf_block_set_file_page(
-/*====================*/
- buf_block_t* block, /*!< in/out: pointer to control block */
- ulint space, /*!< in: tablespace id */
- ulint page_no)/*!< in: page number */
-{
- buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
- block->page.space = static_cast<ib_uint32_t>(space);
- block->page.offset = static_cast<ib_uint32_t>(page_no);
-}
-
-/*********************************************************************//**
-Gets the io_fix state of a block.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_page_get_io_fix(
-/*================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- return buf_page_get_io_fix_unlocked(bpage);
-}
-
-/*********************************************************************//**
-Gets the io_fix state of a block. Does not assert that the
-buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
-not to hold it.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_page_get_io_fix_unlocked(
-/*=========================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- enum buf_io_fix io_fix = (enum buf_io_fix) bpage->io_fix;
-#ifdef UNIV_DEBUG
- switch (io_fix) {
- case BUF_IO_NONE:
- case BUF_IO_READ:
- case BUF_IO_WRITE:
- case BUF_IO_PIN:
- return(io_fix);
- }
- ut_error;
-#endif /* UNIV_DEBUG */
- return(io_fix);
-}
-
-/*********************************************************************//**
-Gets the io_fix state of a block.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_block_get_io_fix(
-/*=================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- return(buf_page_get_io_fix(&block->page));
-}
-
-/*********************************************************************//**
-Gets the io_fix state of a block. Does not assert that the
-buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
-not to hold it.
-@return io_fix state */
-UNIV_INLINE
-enum buf_io_fix
-buf_block_get_io_fix_unlocked(
-/*==========================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- return(buf_page_get_io_fix_unlocked(&block->page));
-}
-
-
-/*********************************************************************//**
-Sets the io_fix state of a block. */
-UNIV_INLINE
-void
-buf_page_set_io_fix(
-/*================*/
- buf_page_t* bpage, /*!< in/out: control block */
- enum buf_io_fix io_fix) /*!< in: io_fix state */
-{
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- bpage->io_fix = io_fix;
- ut_ad(buf_page_get_io_fix(bpage) == io_fix);
-}
-
-/*********************************************************************//**
-Sets the io_fix state of a block. */
-UNIV_INLINE
-void
-buf_block_set_io_fix(
-/*=================*/
- buf_block_t* block, /*!< in/out: control block */
- enum buf_io_fix io_fix) /*!< in: io_fix state */
-{
- buf_page_set_io_fix(&block->page, io_fix);
-}
-
-/*********************************************************************//**
-Makes a block sticky. A sticky block implies that even after we release
-the buf_pool->LRU_list_mutex and the block->mutex:
-* it cannot be removed from the flush_list
-* the block descriptor cannot be relocated
-* it cannot be removed from the LRU list
-Note that:
-* the block can still change its position in the LRU list
-* the next and previous pointers can change. */
-UNIV_INLINE
-void
-buf_page_set_sticky(
-/*================*/
- buf_page_t* bpage) /*!< in/out: control block */
-{
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
-#endif
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
- ut_ad(bpage->in_LRU_list);
-
- bpage->io_fix = BUF_IO_PIN;
-}
-
-/*********************************************************************//**
-Removes stickiness of a block. */
-UNIV_INLINE
-void
-buf_page_unset_sticky(
-/*==================*/
- buf_page_t* bpage) /*!< in/out: control block */
-{
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
-
- bpage->io_fix = BUF_IO_NONE;
-}
-
-/********************************************************************//**
-Determine if a buffer block can be relocated in memory. The block
-can be dirty, but it must not be I/O-fixed or bufferfixed. */
-UNIV_INLINE
-ibool
-buf_page_can_relocate(
-/*==================*/
- const buf_page_t* bpage) /*!< control block being relocated */
-{
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
- ut_ad(buf_page_in_file(bpage));
- ut_ad(bpage->in_LRU_list);
-
- return(buf_page_get_io_fix(bpage) == BUF_IO_NONE
- && bpage->buf_fix_count == 0);
-}
-
-/*********************************************************************//**
-Determine if a block has been flagged old.
-@return TRUE if old */
-UNIV_INLINE
-ibool
-buf_page_is_old(
-/*============*/
- const buf_page_t* bpage) /*!< in: control block */
-{
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-#endif
- /* Buffer page mutex is not strictly required here for heuristic
- purposes even if LRU mutex is not being held. Keep the assertion
- for now since all the callers hold it. */
- ut_ad(mutex_own(buf_page_get_mutex(bpage))
- || mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(buf_page_in_file(bpage));
-
- return(bpage->old);
-}
-
-/*********************************************************************//**
-Flag a block old. */
-UNIV_INLINE
-void
-buf_page_set_old(
-/*=============*/
- buf_page_t* bpage, /*!< in/out: control block */
- ibool old) /*!< in: old */
-{
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-#endif /* UNIV_DEBUG */
- ut_a(buf_page_in_file(bpage));
- ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
- ut_ad(bpage->in_LRU_list);
-
-#ifdef UNIV_LRU_DEBUG
- ut_a((buf_pool->LRU_old_len == 0) == (buf_pool->LRU_old == NULL));
- /* If a block is flagged "old", the LRU_old list must exist. */
- ut_a(!old || buf_pool->LRU_old);
-
- if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage)) {
- const buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage);
- const buf_page_t* next = UT_LIST_GET_NEXT(LRU, bpage);
- if (prev->old == next->old) {
- ut_a(prev->old == old);
- } else {
- ut_a(!prev->old);
- ut_a(buf_pool->LRU_old == (old ? bpage : next));
- }
- }
-#endif /* UNIV_LRU_DEBUG */
-
- bpage->old = old;
-}
-
-/*********************************************************************//**
-Determine the time of first access of a block in the buffer pool.
-@return ut_time_ms() at the time of first access, 0 if not accessed */
-UNIV_INLINE
-unsigned
-buf_page_is_accessed(
-/*=================*/
- const buf_page_t* bpage) /*!< in: control block */
-{
- ut_ad(buf_page_in_file(bpage));
-
- return(bpage->access_time);
-}
-
-/*********************************************************************//**
-Flag a block accessed. */
-UNIV_INLINE
-void
-buf_page_set_accessed(
-/*==================*/
- buf_page_t* bpage) /*!< in/out: control block */
-{
- ut_ad(mutex_own(buf_page_get_mutex(bpage)));
-
- ut_a(buf_page_in_file(bpage));
-
- if (bpage->access_time == 0) {
- /* Make this the time of the first access. */
- bpage->access_time = static_cast<uint>(ut_time_ms());
- }
-}
-
-/*********************************************************************//**
-Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL.
-@return control block, or NULL */
-UNIV_INLINE
-buf_block_t*
-buf_page_get_block(
-/*===============*/
- buf_page_t* bpage) /*!< in: control block, or NULL */
-{
- if (bpage != NULL) {
-#ifdef UNIV_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage)
- || mutex_own(&buf_pool->LRU_list_mutex));
-#endif
- ut_ad(buf_page_in_file(bpage));
-
- if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
- return((buf_block_t*) bpage);
- }
- }
-
- return(NULL);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets a pointer to the memory frame of a block.
-@return pointer to the frame */
-UNIV_INLINE
-buf_frame_t*
-buf_block_get_frame(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- if (!block) {
- return NULL;
- }
-
- SRV_CORRUPT_TABLE_CHECK(block, return(0););
-
- return(buf_nonnull_block_get_frame(block));
-}
-
-/*********************************************************************//**
-Gets a pointer to the memory frame of a block, where block is known not to be
-NULL.
-@return pointer to the frame */
-UNIV_INLINE
-buf_frame_t*
-buf_nonnull_block_get_frame(
-/*========================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- switch (buf_block_get_state(block)) {
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- case BUF_BLOCK_NOT_USED:
- if (block->page.encrypted) {
- goto ok;
- }
- ut_error;
- break;
- case BUF_BLOCK_FILE_PAGE:
-# ifndef UNIV_HOTBACKUP
- ut_a(block->page.buf_fix_count > 0);
-# endif /* !UNIV_HOTBACKUP */
- /* fall through */
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- goto ok;
- }
- ut_error;
-ok:
- return((buf_frame_t*) block->frame);
-}
-
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_page_get_space(
-/*===============*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- ut_ad(bpage);
- ut_a(buf_page_in_file(bpage));
-
- return(bpage->space);
-}
-
-/*********************************************************************//**
-Gets the space id of a block.
-@return space id */
-UNIV_INLINE
-ulint
-buf_block_get_space(
-/*================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- ut_ad(block);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- return(block->page.space);
-}
-
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_page_get_page_no(
-/*=================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- ut_ad(bpage);
- ut_a(buf_page_in_file(bpage));
-
- return(bpage->offset);
-}
-/***********************************************************************
-FIXME_FTS Gets the frame the pointer is pointing to. */
-UNIV_INLINE
-buf_frame_t*
-buf_frame_align(
-/*============*/
- /* out: pointer to frame */
- byte* ptr) /* in: pointer to a frame */
-{
- buf_frame_t* frame;
-
- ut_ad(ptr);
-
- frame = (buf_frame_t*) ut_align_down(ptr, UNIV_PAGE_SIZE);
-
- return(frame);
-}
-
-/*********************************************************************//**
-Gets the page number of a block.
-@return page number */
-UNIV_INLINE
-ulint
-buf_block_get_page_no(
-/*==================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- ut_ad(block);
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
- return(block->page.offset);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_page_get_zip_size(
-/*==================*/
- const buf_page_t* bpage) /*!< in: pointer to the control block */
-{
- return(bpage->zip.ssize
- ? (UNIV_ZIP_SIZE_MIN >> 1) << bpage->zip.ssize : 0);
-}
-
-/*********************************************************************//**
-Gets the compressed page size of a block.
-@return compressed page size, or 0 */
-UNIV_INLINE
-ulint
-buf_block_get_zip_size(
-/*===================*/
- const buf_block_t* block) /*!< in: pointer to the control block */
-{
- return(block->page.zip.ssize
- ? (UNIV_ZIP_SIZE_MIN >> 1) << block->page.zip.ssize : 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/*********************************************************************//**
-Gets the compressed page descriptor corresponding to an uncompressed page
-if applicable.
-@return compressed page descriptor, or NULL */
-UNIV_INLINE
-const page_zip_des_t*
-buf_frame_get_page_zip(
-/*===================*/
- const byte* ptr) /*!< in: pointer to the page */
-{
- return(buf_block_get_page_zip(buf_block_align(ptr)));
-}
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Gets the space id, page offset, and byte offset within page of a
-pointer pointing to a buffer frame containing a file page. */
-UNIV_INLINE
-void
-buf_ptr_get_fsp_addr(
-/*=================*/
- const void* ptr, /*!< in: pointer to a buffer frame */
- ulint* space, /*!< out: space id */
- fil_addr_t* addr) /*!< out: page offset and byte offset */
-{
- const page_t* page = (const page_t*) ut_align_down(ptr,
- UNIV_PAGE_SIZE);
-
- *space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
- addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Gets the hash value of the page the pointer is pointing to. This can be used
-in searches in the lock hash table.
-@return lock hash value */
-UNIV_INLINE
-ulint
-buf_block_get_lock_hash_val(
-/*========================*/
- const buf_block_t* block) /*!< in: block */
-{
- ut_ad(block);
- ut_ad(buf_page_in_file(&block->page));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE)
- || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- return(block->lock_hash_val);
-}
-
-/********************************************************************//**
-Allocates a buf_page_t descriptor. This function must succeed. In case
-of failure we assert in this function.
-@return: the allocated descriptor. */
-UNIV_INLINE
-buf_page_t*
-buf_page_alloc_descriptor(void)
-/*===========================*/
-{
- buf_page_t* bpage;
-
- bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
- ut_d(memset(bpage, 0, sizeof *bpage));
- UNIV_MEM_ALLOC(bpage, sizeof *bpage);
-
- return(bpage);
-}
-
-/********************************************************************//**
-Free a buf_page_t descriptor. */
-UNIV_INLINE
-void
-buf_page_free_descriptor(
-/*=====================*/
- buf_page_t* bpage) /*!< in: bpage descriptor to free. */
-{
- ut_free(bpage);
-}
-
-/********************************************************************//**
-Frees a buffer block which does not contain a file page. */
-UNIV_INLINE
-void
-buf_block_free(
-/*===========*/
- buf_block_t* block) /*!< in, own: block to be freed */
-{
- mutex_enter(&block->mutex);
-
- ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
-
- buf_LRU_block_free_non_file_page(block);
-
- mutex_exit(&block->mutex);
-}
-
-/********************************************************************//**
-Get buf frame. */
-UNIV_INLINE
-void *
-buf_page_get_frame(
-/*===============*/
- const buf_page_t* bpage) /*!< in: buffer pool page */
-{
- /* In encryption/compression buffer pool page may contain extra
- buffer where result is stored. */
- if (bpage->slot && bpage->slot->out_buf) {
- return bpage->slot->out_buf;
- } else if (bpage->zip.data) {
- return bpage->zip.data;
- } else {
- return ((buf_block_t*) bpage)->frame;
- }
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Copies contents of a buffer frame to a given buffer.
-@return buf */
-UNIV_INLINE
-byte*
-buf_frame_copy(
-/*===========*/
- byte* buf, /*!< in: buffer to copy to */
- const buf_frame_t* frame) /*!< in: buffer frame */
-{
- ut_ad(buf && frame);
-
- ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
-
- return(buf);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Calculates a folded value of a file page address to use in the page hash
-table.
-@return the folded value */
-UNIV_INLINE
-ulint
-buf_page_address_fold(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
-{
- return((space << 20) + space + offset);
-}
-
-/********************************************************************//**
-Gets the youngest modification log sequence number for a frame.
-Returns zero if not file page or no modification occurred yet.
-@return newest modification to page */
-UNIV_INLINE
-lsn_t
-buf_page_get_newest_modification(
-/*=============================*/
- const buf_page_t* bpage) /*!< in: block containing the
- page frame */
-{
- lsn_t lsn;
- ib_mutex_t* block_mutex = buf_page_get_mutex(bpage);
-
- mutex_enter(block_mutex);
-
- if (buf_page_in_file(bpage)) {
- lsn = bpage->newest_modification;
- } else {
- lsn = 0;
- }
-
- mutex_exit(block_mutex);
-
- return(lsn);
-}
-
-/********************************************************************//**
-Increments the modify clock of a frame by 1. The caller must (1) own the
-LRU list mutex and block bufferfix count has to be zero, (2) or own an x-lock
-on the block. */
-UNIV_INLINE
-void
-buf_block_modify_clock_inc(
-/*=======================*/
- buf_block_t* block) /*!< in: block */
-{
-#ifdef UNIV_SYNC_DEBUG
- buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*) block);
-
- ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
- && (block->page.buf_fix_count == 0))
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- block->modify_clock++;
-}
-
-/********************************************************************//**
-Returns the value of the modify clock. The caller must have an s-lock
-or x-lock on the block.
-@return value */
-UNIV_INLINE
-ib_uint64_t
-buf_block_get_modify_clock(
-/*=======================*/
- buf_block_t* block) /*!< in: block */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED)
- || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
-#endif /* UNIV_SYNC_DEBUG */
-
- return(block->modify_clock);
-}
-
-/*******************************************************************//**
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_fix(
-/*===========*/
- buf_block_t* block) /*!< in/out: block to bufferfix */
-{
- ut_ad(!mutex_own(buf_page_get_mutex(&block->page)));
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
-#else
- ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page);
-
- mutex_enter(block_mutex);
- ++block->page.buf_fix_count;
- mutex_exit(block_mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
-}
-
-/*******************************************************************//**
-Increments the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_inc_func(
-/*=======================*/
-#ifdef UNIV_SYNC_DEBUG
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line */
-#endif /* UNIV_SYNC_DEBUG */
- buf_block_t* block) /*!< in/out: block to bufferfix */
-{
-#ifdef UNIV_SYNC_DEBUG
- ibool ret;
-
- ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
- ut_a(ret);
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_increment_uint32(&block->page.buf_fix_count, 1);
-#else
- ut_ad(mutex_own(&block->mutex));
-
- ++block->page.buf_fix_count;
-#endif /* PAGE_ATOMIC_REF_COUNT */
-}
-
-/*******************************************************************//**
-Decrements the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_unfix(
-/*============*/
- buf_block_t* block) /*!< in/out: block to bufferunfix */
-{
- ut_ad(block->page.buf_fix_count > 0);
- ut_ad(!mutex_own(buf_page_get_mutex(&block->page)));
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
-#else
- ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page);
-
- mutex_enter(block_mutex);
- --block->page.buf_fix_count;
- mutex_exit(block_mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
-}
-
-/*******************************************************************//**
-Decrements the bufferfix count. */
-UNIV_INLINE
-void
-buf_block_buf_fix_dec(
-/*==================*/
- buf_block_t* block) /*!< in/out: block to bufferunfix */
-{
- ut_ad(block->page.buf_fix_count > 0);
-
-#ifdef PAGE_ATOMIC_REF_COUNT
- os_atomic_decrement_uint32(&block->page.buf_fix_count, 1);
-#else
- mutex_enter(&block->mutex);
- --block->page.buf_fix_count;
- mutex_exit(&block->mutex);
-#endif /* PAGE_ATOMIC_REF_COUNT */
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&block->debug_latch);
-#endif
-}
-
-/******************************************************************//**
-Returns the buffer pool instance given space and offset of page
-@return buffer pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_get(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
-{
- ulint fold;
- ulint index;
- ulint ignored_offset;
-
- ignored_offset = offset >> 6; /* 2log of BUF_READ_AHEAD_AREA (64)*/
- fold = buf_page_address_fold(space, ignored_offset);
- index = fold % srv_buf_pool_instances;
- return(&buf_pool_ptr[index]);
-}
-
-/******************************************************************//**
-Returns the buffer pool instance given its array index
-@return buffer pool */
-UNIV_INLINE
-buf_pool_t*
-buf_pool_from_array(
-/*================*/
- ulint index) /*!< in: array index to get
- buffer pool instance from */
-{
- ut_ad(index < MAX_BUFFER_POOLS);
- ut_ad(index < srv_buf_pool_instances);
- return(&buf_pool_ptr[index]);
-}
-
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-@return block, NULL if not found */
-UNIV_INLINE
-buf_page_t*
-buf_page_hash_get_low(
-/*==================*/
- buf_pool_t* buf_pool,/*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: offset of the page within space */
- ulint fold) /*!< in: buf_page_address_fold(space, offset) */
-{
- buf_page_t* bpage;
-
-#ifdef UNIV_SYNC_DEBUG
- ulint hash_fold;
- prio_rw_lock_t* hash_lock;
-
- hash_fold = buf_page_address_fold(space, offset);
- ut_ad(hash_fold == fold);
-
- hash_lock = hash_get_lock(buf_pool->page_hash, fold);
- ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX)
- || rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Look for the page in the hash table */
-
- HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
- ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
- && buf_page_in_file(bpage)),
- bpage->space == space && bpage->offset == offset);
- if (bpage) {
- ut_a(buf_page_in_file(bpage));
- ut_ad(bpage->in_page_hash);
- ut_ad(!bpage->in_zip_hash);
- }
-
- return(bpage);
-}
-
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-If the block is found and lock is not NULL then the appropriate
-page_hash lock is acquired in the specified lock mode. Otherwise,
-mode value is ignored. It is up to the caller to release the
-lock. If the block is found and the lock is NULL then the page_hash
-lock is released by this function.
-@return block, NULL if not found, or watch sentinel (if watch is true) */
-UNIV_INLINE
-buf_page_t*
-buf_page_hash_get_locked(
-/*=====================*/
- /*!< out: pointer to the bpage,
- or NULL; if NULL, hash_lock
- is also NULL. */
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- prio_rw_lock_t** lock, /*!< in/out: lock of the page
- hash acquired if bpage is
- found. NULL otherwise. If NULL
- is passed then the hash_lock
- is released by this function */
- ulint lock_mode, /*!< in: RW_LOCK_EX or
- RW_LOCK_SHARED. Ignored if
- lock == NULL */
- bool watch) /*!< in: if true, return watch
- sentinel also. */
-{
- buf_page_t* bpage = NULL;
- ulint fold;
- prio_rw_lock_t* hash_lock;
- ulint mode = RW_LOCK_SHARED;
-
- if (lock != NULL) {
- *lock = NULL;
- ut_ad(lock_mode == RW_LOCK_EX
- || lock_mode == RW_LOCK_SHARED);
- mode = lock_mode;
- }
-
- fold = buf_page_address_fold(space, offset);
- hash_lock = hash_get_lock(buf_pool->page_hash, fold);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)
- && !rw_lock_own(hash_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (mode == RW_LOCK_SHARED) {
- rw_lock_s_lock(hash_lock);
- } else {
- rw_lock_x_lock(hash_lock);
- }
-
- bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
-
- if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) {
- if (!watch) {
- bpage = NULL;
- }
- goto unlock_and_exit;
- }
-
- ut_ad(buf_page_in_file(bpage));
- ut_ad(offset == bpage->offset);
- ut_ad(space == bpage->space);
-
- if (lock == NULL) {
- /* The caller wants us to release the page_hash lock */
- goto unlock_and_exit;
- } else {
- /* To be released by the caller */
- *lock = hash_lock;
- goto exit;
- }
-
-unlock_and_exit:
- if (mode == RW_LOCK_SHARED) {
- rw_lock_s_unlock(hash_lock);
- } else {
- rw_lock_x_unlock(hash_lock);
- }
-exit:
- return(bpage);
-}
-
-/******************************************************************//**
-Returns the control block of a file page, NULL if not found.
-If the block is found and lock is not NULL then the appropriate
-page_hash lock is acquired in the specified lock mode. Otherwise,
-mode value is ignored. It is up to the caller to release the
-lock. If the block is found and the lock is NULL then the page_hash
-lock is released by this function.
-@return block, NULL if not found */
-UNIV_INLINE
-buf_block_t*
-buf_block_hash_get_locked(
-/*=====================*/
- /*!< out: pointer to the bpage,
- or NULL; if NULL, hash_lock
- is also NULL. */
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- ulint space, /*!< in: space id */
- ulint offset, /*!< in: page number */
- prio_rw_lock_t** lock, /*!< in/out: lock of the page
- hash acquired if bpage is
- found. NULL otherwise. If NULL
- is passed then the hash_lock
- is released by this function */
- ulint lock_mode) /*!< in: RW_LOCK_EX or
- RW_LOCK_SHARED. Ignored if
- lock == NULL */
-{
- buf_page_t* bpage = buf_page_hash_get_locked(buf_pool,
- space,
- offset,
- lock,
- lock_mode);
- buf_block_t* block = buf_page_get_block(bpage);
-
- if (block) {
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!lock || rw_lock_own(*lock, lock_mode));
-#endif /* UNIV_SYNC_DEBUG */
- return(block);
- } else if (bpage) {
- /* It is not a block. Just a bpage */
- ut_ad(buf_page_in_file(bpage));
-
- if (lock) {
- if (lock_mode == RW_LOCK_SHARED) {
- rw_lock_s_unlock(*lock);
- } else {
- rw_lock_x_unlock(*lock);
- }
- }
- *lock = NULL;
- return(NULL);
- }
-
- ut_ad(!bpage);
- ut_ad(lock == NULL ||*lock == NULL);
- return(NULL);
-}
-
-/********************************************************************//**
-Returns TRUE if the page can be found in the buffer pool hash table.
-
-NOTE that it is possible that the page is not yet read from disk,
-though.
-
-@return TRUE if found in the page hash table */
-UNIV_INLINE
-ibool
-buf_page_peek(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: page number */
-{
- buf_pool_t* buf_pool = buf_pool_get(space, offset);
-
- return(buf_page_hash_get(buf_pool, space, offset) != NULL);
-}
-
-/********************************************************************//**
-Releases a compressed-only page acquired with buf_page_get_zip(). */
-UNIV_INLINE
-void
-buf_page_release_zip(
-/*=================*/
- buf_page_t* bpage) /*!< in: buffer block */
-{
- buf_block_t* block;
-
- block = (buf_block_t*) bpage;
-
- switch (buf_page_get_state(bpage)) {
- case BUF_BLOCK_FILE_PAGE:
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&block->debug_latch);
-#endif /* UNUV_SYNC_DEBUG */
- /* Fall through */
- case BUF_BLOCK_ZIP_PAGE:
- case BUF_BLOCK_ZIP_DIRTY:
- buf_block_unfix(block);
- return;
-
- case BUF_BLOCK_POOL_WATCH:
- case BUF_BLOCK_NOT_USED:
- case BUF_BLOCK_READY_FOR_USE:
- case BUF_BLOCK_MEMORY:
- case BUF_BLOCK_REMOVE_HASH:
- break;
- }
-
- ut_error;
-}
-
-/********************************************************************//**
-Decrements the bufferfix count of a buffer control block and releases
-a latch, if specified. */
-UNIV_INLINE
-void
-buf_page_release(
-/*=============*/
- buf_block_t* block, /*!< in: buffer block */
- ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH,
- RW_NO_LATCH */
-{
- ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_s_unlock(&(block->debug_latch));
-#endif
- if (rw_latch == RW_S_LATCH) {
- rw_lock_s_unlock(&(block->lock));
- } else if (rw_latch == RW_X_LATCH) {
- rw_lock_x_unlock(&(block->lock));
- }
-
- buf_block_unfix(block);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/*********************************************************************//**
-Adds latch level info for the rw-lock protecting the buffer frame. This
-should be called in the debug version after a successful latching of a
-page if we know the latching order level of the acquired latch. */
-UNIV_INLINE
-void
-buf_block_dbg_add_level(
-/*====================*/
- buf_block_t* block, /*!< in: buffer page
- where we have acquired latch */
- ulint level) /*!< in: latching order level */
-{
- sync_thread_add_level(&block->lock, level, FALSE);
-}
-
-#endif /* UNIV_SYNC_DEBUG */
-/*********************************************************************//**
-Get the nth chunk's buffer block in the specified buffer pool.
-@return the nth chunk's buffer block. */
-UNIV_INLINE
-buf_block_t*
-buf_get_nth_chunk_block(
-/*====================*/
- const buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ulint n, /*!< in: nth chunk in the buffer pool */
- ulint* chunk_size) /*!< in: chunk size */
-{
- const buf_chunk_t* chunk;
-
- chunk = buf_pool->chunks + n;
- *chunk_size = chunk->size;
- return(chunk->blocks);
-}
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Checks if buf_pool->zip_mutex is owned and is serving for a given page as its
-block mutex.
-@return true if buf_pool->zip_mutex is owned. */
-UNIV_INLINE
-bool
-buf_own_zip_mutex_for_page(
-/*=======================*/
- const buf_page_t* bpage)
-{
- buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
- ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE
- || buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY);
- ut_ad(buf_page_get_mutex(bpage) == &buf_pool->zip_mutex);
-
- return(mutex_own(&buf_pool->zip_mutex));
-}
-#endif /* UNIV_DEBUG */
-
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/buf0checksum.h b/storage/xtradb/include/buf0checksum.h
deleted file mode 100644
index 6818345f965..00000000000
--- a/storage/xtradb/include/buf0checksum.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0checksum.h
-Buffer pool checksum functions, also linked from /extra/innochecksum.cc
-
-Created Aug 11, 2011 Vasil Dimov
-*******************************************************/
-
-#ifndef buf0checksum_h
-#define buf0checksum_h
-
-#include "univ.i"
-
-#include "buf0types.h"
-
-/** Magic value to use instead of checksums when they are disabled */
-#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-/********************************************************************//**
-Calculates a page CRC32 which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ib_uint32_t
-buf_calc_page_crc32(
-/*================*/
- const byte* page); /*!< in: buffer page */
-
-/********************************************************************//**
-Calculates a page checksum which is stored to the page when it is written
-to a file. Note that we must be careful to calculate the same value on
-32-bit and 64-bit architectures.
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
- const byte* page); /*!< in: buffer page */
-
-/********************************************************************//**
-In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
-looked at the first few bytes of the page. This calculates that old
-checksum.
-NOTE: we must first store the new formula checksum to
-FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
-because this takes that field as an input!
-@return checksum */
-UNIV_INTERN
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
- const byte* page); /*!< in: buffer page */
-
-/********************************************************************//**
-Return a printable string describing the checksum algorithm.
-@return algorithm name */
-UNIV_INTERN
-const char*
-buf_checksum_algorithm_name(
-/*========================*/
- srv_checksum_algorithm_t algo); /*!< in: algorithm */
-
-extern ulong srv_checksum_algorithm;
-
-#endif /* buf0checksum_h */
diff --git a/storage/xtradb/include/buf0dblwr.h b/storage/xtradb/include/buf0dblwr.h
deleted file mode 100644
index 7b7464761cc..00000000000
--- a/storage/xtradb/include/buf0dblwr.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0dblwr.h
-Doublewrite buffer module
-
-Created 2011/12/19 Inaam Rana
-*******************************************************/
-
-#ifndef buf0dblwr_h
-#define buf0dblwr_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "log0log.h"
-#include "log0recv.h"
-
-#ifndef UNIV_HOTBACKUP
-
-/** Doublewrite system */
-extern buf_dblwr_t* buf_dblwr;
-/** Set to TRUE when the doublewrite buffer is being created */
-extern ibool buf_dblwr_being_created;
-
-/** Create the doublewrite buffer if the doublewrite buffer header
-is not present in the TRX_SYS page.
-@return whether the operation succeeded
-@retval true if the doublewrite buffer exists or was created
-@retval false if the creation failed (too small first data file) */
-UNIV_INTERN
-bool
-buf_dblwr_create()
- MY_ATTRIBUTE((warn_unused_result));
-
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function loads the pages from double write buffer into memory. */
-void
-buf_dblwr_init_or_load_pages(
-/*=========================*/
- pfs_os_file_t file,
- char* path,
- bool load_corrupt_pages);
-
-/****************************************************************//**
-Process the double write buffer pages. */
-void
-buf_dblwr_process(void);
-/*===================*/
-
-/****************************************************************//**
-frees doublewrite buffer. */
-UNIV_INTERN
-void
-buf_dblwr_free(void);
-/*================*/
-/********************************************************************//**
-Updates the doublewrite buffer when an IO request is completed. */
-UNIV_INTERN
-void
-buf_dblwr_update(
-/*=============*/
- const buf_page_t* bpage, /*!< in: buffer block descriptor */
- buf_flush_t flush_type);/*!< in: flush type */
-/****************************************************************//**
-Determines if a page number is located inside the doublewrite buffer.
-@return TRUE if the location is inside the two blocks of the
-doublewrite buffer */
-UNIV_INTERN
-ibool
-buf_dblwr_page_inside(
-/*==================*/
- ulint page_no); /*!< in: page number */
-/********************************************************************//**
-Posts a buffer page for writing. If the doublewrite memory buffer is
-full, calls buf_dblwr_flush_buffered_writes and waits for for free
-space to appear. */
-UNIV_INTERN
-void
-buf_dblwr_add_to_batch(
-/*====================*/
- buf_page_t* bpage); /*!< in: buffer block to write */
-/********************************************************************//**
-Flushes possible buffered writes from the doublewrite memory buffer to disk,
-and also wakes up the aio thread if simulated aio is used. It is very
-important to call this function after a batch of writes has been posted,
-and also when we may have to wait for a page latch! Otherwise a deadlock
-of threads can occur. */
-UNIV_INTERN
-void
-buf_dblwr_flush_buffered_writes(void);
-/*=================================*/
-/********************************************************************//**
-Writes a page to the doublewrite buffer on disk, sync it, then write
-the page to the datafile and sync the datafile. This function is used
-for single page flushes. If all the buffers allocated for single page
-flushes in the doublewrite buffer are in use we wait here for one to
-become free. We are guaranteed that a slot will become free because any
-thread that is using a slot must also release the slot before leaving
-this function. */
-UNIV_INTERN
-void
-buf_dblwr_write_single_page(
-/*========================*/
- buf_page_t* bpage, /*!< in: buffer block to write */
- bool sync); /*!< in: true if sync IO requested */
-
-/** Doublewrite control struct */
-struct buf_dblwr_t{
- ib_mutex_t mutex; /*!< mutex protecting the first_free
- field and write_buf */
- ulint block1; /*!< the page number of the first
- doublewrite block (64 pages) */
- ulint block2; /*!< page number of the second block */
- ulint first_free;/*!< first free position in write_buf
- measured in units of UNIV_PAGE_SIZE */
- ulint b_reserved;/*!< number of slots currently reserved
- for batch flush. */
- os_event_t b_event;/*!< event where threads wait for a
- batch flush to end;
- os_event_set() and os_event_reset()
- are protected by buf_dblwr_t::mutex */
- ulint s_reserved;/*!< number of slots currently
- reserved for single page flushes. */
- os_event_t s_event;/*!< event where threads wait for a
- single page flush slot. Protected by mutex. */
- bool* in_use; /*!< flag used to indicate if a slot is
- in use. Only used for single page
- flushes. */
- bool batch_running;/*!< set to TRUE if currently a batch
- is being written from the doublewrite
- buffer. */
- byte* write_buf;/*!< write buffer used in writing to the
- doublewrite buffer, aligned to an
- address divisible by UNIV_PAGE_SIZE
- (which is required by Windows aio) */
- byte* write_buf_unaligned;/*!< pointer to write_buf,
- but unaligned */
- buf_page_t** buf_block_arr;/*!< array to store pointers to
- the buffer blocks which have been
- cached to write_buf */
-};
-
-
-#endif /* UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/xtradb/include/buf0dump.h b/storage/xtradb/include/buf0dump.h
deleted file mode 100644
index c704a8e97e0..00000000000
--- a/storage/xtradb/include/buf0dump.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file buf/buf0dump.h
-Implements a buffer pool dump/load.
-
-Created April 08, 2011 Vasil Dimov
-*******************************************************/
-
-#ifndef buf0dump_h
-#define buf0dump_h
-
-#include "univ.i"
-
-/*****************************************************************//**
-Wakes up the buffer pool dump/load thread and instructs it to start
-a dump. This function is called by MySQL code via buffer_pool_dump_now()
-and it should return immediately because the whole MySQL is frozen during
-its execution. */
-UNIV_INTERN
-void
-buf_dump_start();
-/*============*/
-
-/*****************************************************************//**
-Wakes up the buffer pool dump/load thread and instructs it to start
-a load. This function is called by MySQL code via buffer_pool_load_now()
-and it should return immediately because the whole MySQL is frozen during
-its execution. */
-UNIV_INTERN
-void
-buf_load_start();
-/*============*/
-
-/*****************************************************************//**
-Aborts a currently running buffer pool load. This function is called by
-MySQL code via buffer_pool_load_abort() and it should return immediately
-because the whole MySQL is frozen during its execution. */
-UNIV_INTERN
-void
-buf_load_abort();
-/*============*/
-
-/*****************************************************************//**
-This is the main thread for buffer pool dump/load. It waits for an
-event and when waked up either performs a dump or load and sleeps
-again.
-@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(buf_dump_thread)(
-/*============================*/
- void* arg); /*!< in: a dummy parameter
- required by os_thread_create */
-
-#endif /* buf0dump_h */
diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h
deleted file mode 100644
index 6089baf81e8..00000000000
--- a/storage/xtradb/include/buf0flu.h
+++ /dev/null
@@ -1,382 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0flu.h
-The database buffer pool flush algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0flu_h
-#define buf0flu_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "log0log.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0types.h"
-#include "buf0types.h"
-
-/** Flag indicating if the page_cleaner is in active state. */
-extern bool buf_page_cleaner_is_active;
-
-/** Flag indicating if the lru_manager is in active state. */
-extern bool buf_lru_manager_is_active;
-
-/** Handled page counters for a single flush */
-struct flush_counters_t {
- ulint flushed; /*!< number of dirty pages flushed */
- ulint evicted; /*!< number of clean pages evicted */
- ulint unzip_LRU_evicted;/*!< number of uncompressed page images
- evicted */
-};
-
-/********************************************************************//**
-Remove a block from the flush list of modified blocks. */
-UNIV_INTERN
-void
-buf_flush_remove(
-/*=============*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
-/*******************************************************************//**
-Relocates a buffer control block on the flush_list.
-Note that it is assumed that the contents of bpage has already been
-copied to dpage. */
-UNIV_INTERN
-void
-buf_flush_relocate_on_flush_list(
-/*=============================*/
- buf_page_t* bpage, /*!< in/out: control block being moved */
- buf_page_t* dpage); /*!< in/out: destination block */
-/********************************************************************//**
-Updates the flush system data structures when a write is completed. */
-UNIV_INTERN
-void
-buf_flush_write_complete(
-/*=====================*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Initializes a page for writing to the tablespace. */
-UNIV_INTERN
-void
-buf_flush_init_for_writing(
-/*=======================*/
- byte* page, /*!< in/out: page */
- void* page_zip_, /*!< in/out: compressed page, or NULL */
- lsn_t newest_lsn); /*!< in: newest modification lsn
- to the page */
-#ifndef UNIV_HOTBACKUP
-# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/********************************************************************//**
-Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: block and LRU list mutexes must be held upon entering this function, and
-they will be released by this function after flushing. This is loosely based on
-buf_flush_batch() and buf_flush_page().
-@return TRUE if the page was flushed and the mutexes released */
-UNIV_INTERN
-ibool
-buf_flush_page_try(
-/*===============*/
- buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
- buf_block_t* block) /*!< in/out: buffer control block */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances.
-NOTE: The calling thread is not allowed to own any latches on pages!
-@return true if a batch was queued successfully for each buffer pool
-instance. false if another batch of same type was already running in
-at least one of the buffer pool instance */
-UNIV_INTERN
-bool
-buf_flush_list(
-/*===========*/
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- ulint* n_processed); /*!< out: the number of pages
- which were processed is passed
- back to caller. Ignored if NULL */
-/******************************************************************//**
-This function picks up a single dirty page from the tail of the LRU
-list, flushes it, removes it from page_hash and LRU list and puts
-it on the free list. It is called from user threads when they are
-unable to find a replacable page at the tail of the LRU list i.e.:
-when the background LRU flushing in the page_cleaner thread is not
-fast enough to keep pace with the workload.
-@return TRUE if success. */
-UNIV_INTERN
-ibool
-buf_flush_single_page_from_LRU(
-/*===========================*/
- buf_pool_t* buf_pool); /*!< in/out: buffer pool instance */
-/******************************************************************//**
-Waits until a flush batch of the given type ends */
-UNIV_INTERN
-void
-buf_flush_wait_batch_end(
-/*=====================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_flush_t type); /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-/******************************************************************//**
-Waits until a flush batch of the given type ends. This is called by
-a thread that only wants to wait for a flush to end but doesn't do
-any flushing itself. */
-UNIV_INTERN
-void
-buf_flush_wait_batch_end_wait_only(
-/*===============================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_flush_t type); /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-/********************************************************************//**
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it not
-already in it. */
-UNIV_INLINE
-void
-buf_flush_note_modification(
-/*========================*/
- buf_block_t* block, /*!< in: block which is modified */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-This function should be called when recovery has modified a buffer page. */
-UNIV_INLINE
-void
-buf_flush_recv_note_modification(
-/*=============================*/
- buf_block_t* block, /*!< in: block which is modified */
- lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
- set of mtr's */
- lsn_t end_lsn); /*!< in: end lsn of the last mtr in the
- set of mtr's */
-/********************************************************************//**
-Returns TRUE if the file page block is immediately suitable for replacement,
-i.e., transition FILE_PAGE => NOT_USED allowed.
-@return TRUE if can replace immediately */
-UNIV_INTERN
-ibool
-buf_flush_ready_for_replace(
-/*========================*/
- buf_page_t* bpage); /*!< in: buffer control block, must be
- buf_page_in_file(bpage) and in the LRU list */
-/******************************************************************//**
-page_cleaner thread tasked with flushing dirty pages from the buffer
-pool flush lists. As of now we'll have only one instance of this thread.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(buf_flush_page_cleaner_thread)(
-/*==========================================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-/******************************************************************//**
-lru_manager thread tasked with performing LRU flushes and evictions to refill
-the buffer pool free lists. As of now we'll have only one instance of this
-thread.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(buf_flush_lru_manager_thread)(
-/*=========================================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return number of pages flushed */
-UNIV_INTERN
-ulint
-buf_flush_LRU_tail(void);
-/*====================*/
-/*********************************************************************//**
-Wait for any possible LRU flushes that are in progress to end. */
-UNIV_INTERN
-void
-buf_flush_wait_LRU_batch_end(void);
-/*==============================*/
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/******************************************************************//**
-Validates the flush list.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-buf_flush_validate(
-/*===============*/
- buf_pool_t* buf_pool);
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-/********************************************************************//**
-Initialize the red-black tree to speed up insertions into the flush_list
-during recovery process. Should be called at the start of recovery
-process before any page has been read/written. */
-UNIV_INTERN
-void
-buf_flush_init_flush_rbt(void);
-/*==========================*/
-
-/********************************************************************//**
-Frees up the red-black tree. */
-UNIV_INTERN
-void
-buf_flush_free_flush_rbt(void);
-/*==========================*/
-
-/********************************************************************//**
-Writes a flushable page asynchronously from the buffer pool to a file.
-NOTE: in simulated aio we must call
-os_aio_simulated_wake_handler_threads after we have posted a batch of
-writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this
-function, and they will be released by this function if it returns true.
-LRU_list_mutex must be held iff performing a single page flush and will be
-released by the function if it returns true.
-@return TRUE if the page was flushed */
-UNIV_INTERN
-bool
-buf_flush_page(
-/*===========*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_page_t* bpage, /*!< in: buffer control block */
- buf_flush_t flush_type, /*!< in: type of flush */
- bool sync); /*!< in: true if sync IO request */
-/********************************************************************//**
-Returns true if the block is modified and ready for flushing.
-@return true if can flush immediately */
-UNIV_INTERN
-bool
-buf_flush_ready_for_flush(
-/*======================*/
- buf_page_t* bpage, /*!< in: buffer control block, must be
- buf_page_in_file(bpage) */
- buf_flush_t flush_type)/*!< in: type of flush */
- MY_ATTRIBUTE((warn_unused_result));
-
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Check if there are any dirty pages that belong to a space id in the flush
-list in a particular buffer pool.
-@return number of dirty pages present in a single buffer pool */
-UNIV_INTERN
-ulint
-buf_pool_get_dirty_pages_count(
-/*===========================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool */
- ulint id); /*!< in: space id to check */
-/******************************************************************//**
-Check if there are any dirty pages that belong to a space id in the flush list.
-@return count of dirty pages present in all the buffer pools */
-UNIV_INTERN
-ulint
-buf_flush_get_dirty_pages_count(
-/*============================*/
- ulint id); /*!< in: space id to check */
-#endif /* UNIV_DEBUG */
-
-#endif /* !UNIV_HOTBACKUP */
-
-/******************************************************************//**
-Check if a flush list flush is in progress for any buffer pool instance, or if
-all the instances are clean, for heuristic purposes.
-@return true if flush list flush is in progress or buffer pool is clean */
-UNIV_INLINE
-bool
-buf_flush_flush_list_in_progress(void)
-/*==================================*/
- MY_ATTRIBUTE((warn_unused_result));
-
-/** If LRU list of a buf_pool is less than this size then LRU eviction
-should not happen. This is because when we do LRU flushing we also put
-the blocks on free list. If LRU list is very small then we can end up
-in thrashing. */
-#define BUF_LRU_MIN_LEN 256
-
-/******************************************************************//**
-Start a buffer flush batch for LRU or flush list */
-ibool
-buf_flush_start(
-/*============*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-
-/******************************************************************//**
-End a buffer flush batch for LRU or flush list */
-void
-buf_flush_end(
-/*==========*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages!
-@return number of blocks for which the write request was queued */
-__attribute__((nonnull))
-void
-buf_flush_batch(
-/*============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
- then the caller must not own any
- latches on pages */
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST
- all blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- bool limited_lru_scan,/*!< in: for LRU flushes, if true,
- allow to scan only up to
- srv_LRU_scan_depth pages in total */
- flush_counters_t* n); /*!< out: flushed/evicted page
- counts */
-
-
-/******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
-void
-buf_flush_common(
-/*=============*/
- buf_flush_t flush_type, /*!< in: type of flush */
- ulint page_count); /*!< in: number of pages flushed */
-
-#ifndef UNIV_NONINL
-#include "buf0flu.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/buf0flu.ic b/storage/xtradb/include/buf0flu.ic
deleted file mode 100644
index 06fa49754cd..00000000000
--- a/storage/xtradb/include/buf0flu.ic
+++ /dev/null
@@ -1,167 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0flu.ic
-The database buffer pool flush algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-#include "buf0buf.h"
-#include "mtr0mtr.h"
-#include "srv0srv.h"
-
-/********************************************************************//**
-Inserts a modified block into the flush list. */
-UNIV_INTERN
-void
-buf_flush_insert_into_flush_list(
-/*=============================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_block_t* block, /*!< in/out: block which is modified */
- lsn_t lsn); /*!< in: oldest modification */
-/********************************************************************//**
-Inserts a modified block into the flush list in the right sorted position.
-This function is used by recovery, because there the modifications do not
-necessarily come in the order of lsn's. */
-UNIV_INTERN
-void
-buf_flush_insert_sorted_into_flush_list(
-/*====================================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_block_t* block, /*!< in/out: block which is modified */
- lsn_t lsn); /*!< in: oldest modification */
-
-/********************************************************************//**
-This function should be called at a mini-transaction commit, if a page was
-modified in it. Puts the block to the list of modified blocks, if it is not
-already in it. */
-UNIV_INLINE
-void
-buf_flush_note_modification(
-/*========================*/
- buf_block_t* block, /*!< in: block which is modified */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_pool_t* buf_pool = buf_pool_from_block(block);
-
- ut_ad(!srv_read_only_mode);
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(!buf_flush_list_mutex_own(buf_pool));
- ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
-
- ut_ad(mtr->start_lsn != 0);
- ut_ad(mtr->modifications);
-
- mutex_enter(&block->mutex);
- ut_ad(block->page.newest_modification <= mtr->end_lsn);
-
- block->page.newest_modification = mtr->end_lsn;
-
- if (!block->page.oldest_modification) {
- ut_a(mtr->made_dirty);
- ut_ad(log_flush_order_mutex_own());
- buf_flush_insert_into_flush_list(
- buf_pool, block, mtr->start_lsn);
- } else {
- ut_ad(block->page.oldest_modification <= mtr->start_lsn);
- }
-
- mutex_exit(&block->mutex);
-
- srv_stats.buf_pool_write_requests.inc();
-}
-
-/********************************************************************//**
-This function should be called when recovery has modified a buffer page. */
-UNIV_INLINE
-void
-buf_flush_recv_note_modification(
-/*=============================*/
- buf_block_t* block, /*!< in: block which is modified */
- lsn_t start_lsn, /*!< in: start lsn of the first mtr in a
- set of mtr's */
- lsn_t end_lsn) /*!< in: end lsn of the last mtr in the
- set of mtr's */
-{
- buf_pool_t* buf_pool = buf_pool_from_block(block);
-
- ut_ad(!srv_read_only_mode);
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(!buf_flush_list_mutex_own(buf_pool));
- ut_ad(log_flush_order_mutex_own());
-
- ut_ad(start_lsn != 0);
- ut_ad(block->page.newest_modification <= end_lsn);
-
- mutex_enter(&block->mutex);
- block->page.newest_modification = end_lsn;
-
- if (!block->page.oldest_modification) {
- buf_flush_insert_sorted_into_flush_list(
- buf_pool, block, start_lsn);
- } else {
- ut_ad(block->page.oldest_modification <= start_lsn);
- }
-
- mutex_exit(&block->mutex);
-
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/******************************************************************//**
-Check if a flush list flush is in progress for any buffer pool instance, or if
-all the instances are clean, for heuristic purposes.
-@return true if flush list flush is in progress or buffer pool is clean */
-UNIV_INLINE
-bool
-buf_flush_flush_list_in_progress(void)
-/*==================================*/
-{
- bool all_clean = true;
-
- for (ulint i = 0; i < srv_buf_pool_instances; i++) {
-
- const buf_pool_t* buf_pool = buf_pool_from_array(i);
- if (buf_pool->init_flush[BUF_FLUSH_LIST]
- || buf_pool->n_flush[BUF_FLUSH_LIST]) {
-
- return(true);
- }
-
- if (all_clean) {
-
- all_clean = (UT_LIST_GET_LEN(buf_pool->flush_list)
- == 0);
- }
-
- }
- return(all_clean);
-}
diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h
deleted file mode 100644
index f056c6c4116..00000000000
--- a/storage/xtradb/include/buf0lru.h
+++ /dev/null
@@ -1,314 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0lru.h
-The database buffer pool LRU replacement algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0lru_h
-#define buf0lru_h
-
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-#ifndef UNIV_INNOCHECKSUM
-#include "ut0byte.h"
-#endif
-#include "buf0types.h"
-
-// Forward declaration
-struct trx_t;
-
-/******************************************************************//**
-Returns TRUE if less than 25 % of the buffer pool is available. This can be
-used in heuristics to prevent huge transactions eating up the whole buffer
-pool for their locks.
-@return TRUE if less than 25 % of buffer pool left */
-UNIV_INTERN
-ibool
-buf_LRU_buf_pool_running_out(void);
-/*==============================*/
-
-/*#######################################################################
-These are low-level functions
-#########################################################################*/
-
-/** Minimum LRU list length for which the LRU_old pointer is defined */
-#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
-
-/******************************************************************//**
-Flushes all dirty pages or removes all pages belonging
-to a given tablespace. A PROBLEM: if readahead is being started, what
-guarantees that it will not try to read in pages after this operation
-has completed? */
-UNIV_INTERN
-void
-buf_LRU_flush_or_remove_pages(
-/*==========================*/
- ulint id, /*!< in: space id */
- buf_remove_t buf_remove, /*!< in: remove or flush strategy */
- const trx_t* trx); /*!< to check if the operation must
- be interrupted */
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/********************************************************************//**
-Insert a compressed block into buf_pool->zip_clean in the LRU order. */
-UNIV_INTERN
-void
-buf_LRU_insert_zip_clean(
-/*=====================*/
- buf_page_t* bpage); /*!< in: pointer to the block in question */
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-/******************************************************************//**
-Try to free a block. If bpage is a descriptor of a compressed-only
-page, the descriptor object will be freed as well.
-
-NOTE: If this function returns true, it will release the LRU list mutex,
-and temporarily release and relock the buf_page_get_mutex() mutex.
-Furthermore, the page frame will no longer be accessible via bpage. If this
-function returns false, the buf_page_get_mutex() might be temporarily released
-and relocked too.
-
-The caller must hold the LRU list and buf_page_get_mutex() mutexes.
-
-@return true if freed, false otherwise. */
-UNIV_INTERN
-bool
-buf_LRU_free_page(
-/*==============*/
- buf_page_t* bpage, /*!< in: block to be freed */
- bool zip) /*!< in: true if should remove also the
- compressed page of an uncompressed page */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Try to free a replaceable block.
-@return TRUE if found and freed */
-UNIV_INTERN
-ibool
-buf_LRU_scan_and_free_block(
-/*========================*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- ibool scan_all) /*!< in: scan whole LRU list
- if TRUE, otherwise scan only
- 'old' blocks. */
- MY_ATTRIBUTE((nonnull,warn_unused_result));
-/******************************************************************//**
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, returns NULL.
-@return a free control block, or NULL if the buf_block->free list is empty */
-UNIV_INTERN
-buf_block_t*
-buf_LRU_get_free_only(
-/*==================*/
- buf_pool_t* buf_pool); /*!< buffer pool instance */
-/******************************************************************//**
-Returns a free block from the buf_pool. The block is taken off the
-free list. If it is empty, blocks are moved from the end of the
-LRU list to the free list.
-This function is called from a user thread when it needs a clean
-block to read in a page. Note that we only ever get a block from
-the free list. Even when we flush a page or find a page in LRU scan
-we put it to free list to be used.
-* iteration 0:
- * get a block from free list, success:done
- * if there is an LRU flush batch in progress:
- * wait for batch to end: retry free list
- * if buf_pool->try_LRU_scan is set
- * scan LRU up to srv_LRU_scan_depth to find a clean block
- * the above will put the block on free list
- * success:retry the free list
- * flush one dirty page from tail of LRU to disk
- * the above will put the block on free list
- * success: retry the free list
-* iteration 1:
- * same as iteration 0 except:
- * scan whole LRU list
- * scan LRU list even if buf_pool->try_LRU_scan is not set
-* iteration > 1:
- * same as iteration 1 but sleep 100ms
-@return the free control block, in state BUF_BLOCK_READY_FOR_USE */
-UNIV_INTERN
-buf_block_t*
-buf_LRU_get_free_block(
-/*===================*/
- buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
- MY_ATTRIBUTE((nonnull,warn_unused_result));
-/******************************************************************//**
-Determines if the unzip_LRU list should be used for evicting a victim
-instead of the general LRU list.
-@return TRUE if should use unzip_LRU */
-UNIV_INTERN
-ibool
-buf_LRU_evict_from_unzip_LRU(
-/*=========================*/
- buf_pool_t* buf_pool);
-/******************************************************************//**
-Puts a block back to the free list. */
-UNIV_INTERN
-void
-buf_LRU_block_free_non_file_page(
-/*=============================*/
- buf_block_t* block); /*!< in: block, must not contain a file page */
-/******************************************************************//**
-Adds a block to the LRU list. Please make sure that the zip_size is
-already set into the page zip when invoking the function, so that we
-can get correct zip_size from the buffer page when adding a block
-into LRU */
-UNIV_INTERN
-void
-buf_LRU_add_block(
-/*==============*/
- buf_page_t* bpage, /*!< in: control block */
- ibool old); /*!< in: TRUE if should be put to the old
- blocks in the LRU list, else put to the
- start; if the LRU list is very short, added to
- the start regardless of this parameter */
-/******************************************************************//**
-Adds a block to the LRU list of decompressed zip pages. */
-UNIV_INTERN
-void
-buf_unzip_LRU_add_block(
-/*====================*/
- buf_block_t* block, /*!< in: control block */
- ibool old); /*!< in: TRUE if should be put to the end
- of the list, else put to the start */
-/******************************************************************//**
-Moves a block to the start of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_young(
-/*=====================*/
- buf_page_t* bpage); /*!< in: control block */
-/******************************************************************//**
-Moves a block to the end of the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_make_block_old(
-/*===================*/
- buf_page_t* bpage); /*!< in: control block */
-/**********************************************************************//**
-Updates buf_pool->LRU_old_ratio.
-@return updated old_pct */
-UNIV_INTERN
-ulint
-buf_LRU_old_ratio_update(
-/*=====================*/
- uint old_pct,/*!< in: Reserve this percentage of
- the buffer pool for "old" blocks. */
- ibool adjust);/*!< in: TRUE=adjust the LRU list;
- FALSE=just assign buf_pool->LRU_old_ratio
- during the initialization of InnoDB */
-/********************************************************************//**
-Update the historical stats that we are collecting for LRU eviction
-policy at the end of each interval. */
-UNIV_INTERN
-void
-buf_LRU_stat_update(void);
-/*=====================*/
-
-/******************************************************************//**
-Remove one page from LRU list and put it to free list */
-UNIV_INTERN
-void
-buf_LRU_free_one_page(
-/*==================*/
- buf_page_t* bpage) /*!< in/out: block, must contain a file page and
- be in a state where it can be freed; there
- may or may not be a hash index to the page */
- MY_ATTRIBUTE((nonnull));
-
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Validates the LRU list.
-@return TRUE */
-UNIV_INTERN
-ibool
-buf_LRU_validate(void);
-/*==================*/
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Prints the LRU list. */
-UNIV_INTERN
-void
-buf_LRU_print(void);
-/*===============*/
-#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
-
-/** @name Heuristics for detecting index scan @{ */
-/** The denominator of buf_pool->LRU_old_ratio. */
-#define BUF_LRU_OLD_RATIO_DIV 1024
-/** Maximum value of buf_pool->LRU_old_ratio.
-@see buf_LRU_old_adjust_len
-@see buf_pool->LRU_old_ratio_update */
-#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV
-/** Minimum value of buf_pool->LRU_old_ratio.
-@see buf_LRU_old_adjust_len
-@see buf_pool->LRU_old_ratio_update
-The minimum must exceed
-(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */
-#define BUF_LRU_OLD_RATIO_MIN 51
-
-#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX
-# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX"
-#endif
-#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV
-# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV"
-#endif
-
-/** Move blocks to "new" LRU list only if the first access was at
-least this many milliseconds ago. Not protected by any mutex or latch. */
-extern uint buf_LRU_old_threshold_ms;
-/* @} */
-
-/** @brief Statistics for selecting the LRU list for eviction.
-
-These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O
-and page_zip_decompress() operations. Based on the statistics we decide
-if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */
-struct buf_LRU_stat_t
-{
- ulint io; /**< Counter of buffer pool I/O operations. */
- ulint unzip; /**< Counter of page_zip_decompress operations. */
-};
-
-/** Current operation counters. Not protected by any mutex.
-Cleared by buf_LRU_stat_update(). */
-extern buf_LRU_stat_t buf_LRU_stat_cur;
-
-/** Running sum of past values of buf_LRU_stat_cur.
-Updated by buf_LRU_stat_update(). */
-extern buf_LRU_stat_t buf_LRU_stat_sum;
-
-/********************************************************************//**
-Increments the I/O counter in buf_LRU_stat_cur. */
-#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++
-/********************************************************************//**
-Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */
-#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++
-
-#ifndef UNIV_NONINL
-#include "buf0lru.ic"
-#endif
-
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/xtradb/include/buf0lru.ic b/storage/xtradb/include/buf0lru.ic
deleted file mode 100644
index 6e0da7a2588..00000000000
--- a/storage/xtradb/include/buf0lru.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0lru.ic
-The database buffer replacement algorithm
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/xtradb/include/buf0mtflu.h b/storage/xtradb/include/buf0mtflu.h
deleted file mode 100644
index 0475335bbf5..00000000000
--- a/storage/xtradb/include/buf0mtflu.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
-Copyright (C) 2014 Fusion-io. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/buf0mtflu.h
-Multi-threadef flush method interface function prototypes
-
-Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
- Dhananjoy Das DDas@fusionio.com
-***********************************************************************/
-
-#ifndef buf0mtflu_h
-#define buf0mtflu_h
-
-/******************************************************************//**
-Add exit work item to work queue to signal multi-threded flush
-threads that they should exit.
-*/
-void
-buf_mtflu_io_thread_exit(void);
-/*===========================*/
-
-/******************************************************************//**
-Initialize multi-threaded flush thread syncronization data.
-@return Initialized multi-threaded flush thread syncroniztion data. */
-void*
-buf_mtflu_handler_init(
-/*===================*/
- ulint n_threads, /*!< in: Number of threads to create */
- ulint wrk_cnt); /*!< in: Number of work items */
-
-/******************************************************************//**
-Return true if multi-threaded flush is initialized
-@return true if initialized, false if not */
-bool
-buf_mtflu_init_done(void);
-/*======================*/
-
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-buf_mtflu_flush_LRU_tail(void);
-/*===========================*/
-
-/*******************************************************************//**
-Multi-threaded version of buf_flush_list
-*/
-bool
-buf_mtflu_flush_list(
-/*=================*/
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- ulint* n_processed); /*!< out: the number of pages
- which were processed is passed
- back to caller. Ignored if NULL */
-
-/*********************************************************************//**
-Set correct thread identifiers to io thread array based on
-information we have. */
-void
-buf_mtflu_set_thread_ids(
-/*=====================*/
- ulint n_threads, /*!<in: Number of threads to fill */
- void* ctx, /*!<in: thread context */
- os_thread_id_t* thread_ids); /*!<in: thread id array */
-
-#endif
diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h
deleted file mode 100644
index ab73108a71e..00000000000
--- a/storage/xtradb/include/buf0rea.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0rea.h
-The database buffer read
-
-Created 11/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0rea_h
-#define buf0rea_h
-
-#include "univ.i"
-#include "buf0types.h"
-
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread.
-
-@param[in] space space_id
-@param[in] zip_size compressed page size in bytes, or 0
-@param[in] offset page number
-@param[in] trx transaction
-@return DB_SUCCESS if page has been read and is not corrupted,
-@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
-@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
-after decryption normal page checksum does not match.
-@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
-UNIV_INTERN
-dberr_t
-buf_read_page(
- ulint space,
- ulint zip_size,
- ulint offset,
- trx_t* trx);
-
-/********************************************************************//**
-High-level function which reads a page asynchronously from a file to the
-buffer buf_pool if it is not already there. Sets the io_fix flag and sets
-an exclusive lock on the buffer frame. The flag is cleared and the x-lock
-released by the i/o-handler thread.
-@param[in] space Tablespace id
-@param[in] offset Page number */
-UNIV_INTERN
-void
-buf_read_page_async(
- ulint space,
- ulint offset);
-
-/********************************************************************//**
-Applies a random read-ahead in buf_pool if there are at least a threshold
-value of accessed pages from the random read-ahead area. Does not read any
-page, not even the one at the position (space, offset), if the read-ahead
-mechanism is not activated. NOTE 1: the calling thread may own latches on
-pages: to avoid deadlocks this function must be written such that it cannot
-end up waiting for these latches! NOTE 2: the calling thread must want
-access to the page given: this rule is set to prevent unintended read-aheads
-performed by ibuf routines, a situation which could result in a deadlock if
-the OS does not support asynchronous i/o.
-@return number of page read requests issued; NOTE that if we read ibuf
-pages, it may happen that the page at the given page number does not
-get read even if we return a positive value!
-@return number of page read requests issued */
-UNIV_INTERN
-ulint
-buf_read_ahead_random(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes,
- or 0 */
- ulint offset, /*!< in: page number of a page which
- the current thread wants to access */
- ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf
- routine */
- trx_t* trx);
-/********************************************************************//**
-Applies linear read-ahead if in the buf_pool the page is a border page of
-a linear read-ahead area and all the pages in the area have been accessed.
-Does not read any page if the read-ahead mechanism is not activated. Note
-that the algorithm looks at the 'natural' adjacent successor and
-predecessor of the page, which on the leaf level of a B-tree are the next
-and previous page in the chain of leaves. To know these, the page specified
-in (space, offset) must already be present in the buf_pool. Thus, the
-natural way to use this function is to call it when a page in the buf_pool
-is accessed the first time, calling this function just after it has been
-bufferfixed.
-NOTE 1: as this function looks at the natural predecessor and successor
-fields on the page, what happens, if these are not initialized to any
-sensible value? No problem, before applying read-ahead we check that the
-area to read is within the span of the space, if not, read-ahead is not
-applied. An uninitialized value may result in a useless read operation, but
-only very improbably.
-NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
-function must be written such that it cannot end up waiting for these
-latches!
-NOTE 3: the calling thread must want access to the page given: this rule is
-set to prevent unintended read-aheads performed by ibuf routines, a situation
-which could result in a deadlock if the OS does not support asynchronous io.
-@return number of page read requests issued */
-UNIV_INTERN
-ulint
-buf_read_ahead_linear(
-/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes, or 0 */
- ulint offset, /*!< in: page number; see NOTE 3 above */
- ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */
- trx_t* trx);
-/********************************************************************//**
-Issues read requests for pages which the ibuf module wants to read in, in
-order to contract the insert buffer tree. Technically, this function is like
-a read-ahead function. */
-UNIV_INTERN
-void
-buf_read_ibuf_merge_pages(
-/*======================*/
- bool sync, /*!< in: true if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- const ulint* space_ids, /*!< in: array of space ids */
- const ib_int64_t* space_versions,/*!< in: the spaces must have
- this version number
- (timestamp), otherwise we
- discard the read; we use this
- to cancel reads if DISCARD +
- IMPORT may have changed the
- tablespace size */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored); /*!< in: number of elements
- in the arrays */
-/********************************************************************//**
-Issues read requests for pages which recovery wants to read in. */
-UNIV_INTERN
-void
-buf_read_recv_pages(
-/*================*/
- ibool sync, /*!< in: TRUE if the caller
- wants this function to wait
- for the highest address page
- to get read in, before this
- function returns */
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in
- bytes, or 0 */
- const ulint* page_nos, /*!< in: array of page numbers
- to read, with the highest page
- number the last in the
- array */
- ulint n_stored); /*!< in: number of page numbers
- in the array */
-
-/** The size in pages of the area which the read-ahead algorithms read if
-invoked */
-#define BUF_READ_AHEAD_AREA(b) ((b)->read_ahead_area)
-
-/** @name Modes used in read-ahead @{ */
-/** read only pages belonging to the insert buffer tree */
-#define BUF_READ_IBUF_PAGES_ONLY 131
-/** read any page */
-#define BUF_READ_ANY_PAGE 132
-/** read any page, but ignore (return an error) if a page does not exist
-instead of crashing like BUF_READ_ANY_PAGE does */
-#define BUF_READ_IGNORE_NONEXISTENT_PAGES 1024
-/* @} */
-
-#endif
diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h
deleted file mode 100644
index 4eb5ea18cef..00000000000
--- a/storage/xtradb/include/buf0types.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/buf0types.h
-The database buffer pool global types for the directory
-
-Created 11/17/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef buf0types_h
-#define buf0types_h
-
-#if defined(INNODB_PAGE_ATOMIC_REF_COUNT) && defined(HAVE_ATOMIC_BUILTINS)
-#define PAGE_ATOMIC_REF_COUNT
-#endif /* INNODB_PAGE_ATOMIC_REF_COUNT && HAVE_ATOMIC_BUILTINS */
-
-/** Buffer page (uncompressed or compressed) */
-struct buf_page_t;
-/** Buffer block for which an uncompressed page exists */
-struct buf_block_t;
-/** Buffer pool chunk comprising buf_block_t */
-struct buf_chunk_t;
-/** Buffer pool comprising buf_chunk_t */
-struct buf_pool_t;
-/** Buffer pool statistics struct */
-struct buf_pool_stat_t;
-/** Buffer pool buddy statistics struct */
-struct buf_buddy_stat_t;
-/** Doublewrite memory struct */
-struct buf_dblwr_t;
-
-/** A buffer frame. @see page_t */
-typedef byte buf_frame_t;
-
-/** Flags for flush types */
-enum buf_flush_t {
- BUF_FLUSH_LRU = 0, /*!< flush via the LRU list */
- BUF_FLUSH_LIST, /*!< flush via the flush list
- of dirty blocks */
- BUF_FLUSH_SINGLE_PAGE, /*!< flush via the LRU list
- but only a single page */
- BUF_FLUSH_N_TYPES /*!< index of last element + 1 */
-};
-
-/** Algorithm to remove the pages for a tablespace from the buffer pool.
-See buf_LRU_flush_or_remove_pages(). */
-enum buf_remove_t {
- BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer
- pool, don't write or sync to disk */
- BUF_REMOVE_FLUSH_NO_WRITE, /*!< Remove only, from the flush list,
- don't write or sync to disk */
- BUF_REMOVE_FLUSH_WRITE /*!< Flush dirty pages to disk only
- don't remove from the buffer pool */
-};
-
-/** Flags for io_fix types */
-enum buf_io_fix {
- BUF_IO_NONE = 0, /**< no pending I/O */
- BUF_IO_READ, /**< read pending */
- BUF_IO_WRITE, /**< write pending */
- BUF_IO_PIN /**< disallow relocation of
- block and its removal of from
- the flush_list */
-};
-
-/** Alternatives for srv_checksum_algorithm, which can be changed by
-setting innodb_checksum_algorithm */
-enum srv_checksum_algorithm_t {
- SRV_CHECKSUM_ALGORITHM_CRC32, /*!< Write crc32, allow crc32,
- innodb or none when reading */
- SRV_CHECKSUM_ALGORITHM_STRICT_CRC32, /*!< Write crc32, allow crc32
- when reading */
- SRV_CHECKSUM_ALGORITHM_INNODB, /*!< Write innodb, allow crc32,
- innodb or none when reading */
- SRV_CHECKSUM_ALGORITHM_STRICT_INNODB, /*!< Write innodb, allow
- innodb when reading */
- SRV_CHECKSUM_ALGORITHM_NONE, /*!< Write none, allow crc32,
- innodb or none when reading */
- SRV_CHECKSUM_ALGORITHM_STRICT_NONE /*!< Write none, allow none
- when reading */
-};
-
-/** Alternatives for srv_cleaner_lsn_age_factor, set through
-innodb_cleaner_lsn_age_factor variable */
-enum srv_cleaner_lsn_age_factor_t {
- SRV_CLEANER_LSN_AGE_FACTOR_LEGACY, /*!< Original Oracle MySQL 5.6
- formula */
- SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT
- /*!< Percona Server 5.6 formula
- that returns lower values than
- legacy option for low
- checkpoint ages, and higher
- values for high ages. This has
- the effect of stabilizing the
- checkpoint age higher. */
-};
-
-/** Alternatives for srv_foreground_preflush, set through
-innodb_foreground_preflush variable */
-enum srv_foreground_preflush_t {
- SRV_FOREGROUND_PREFLUSH_SYNC_PREFLUSH, /*!< Original Oracle MySQL 5.6
- behavior of performing a sync
- flush list flush */
- SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF /*!< Exponential backoff wait
- for the page cleaner to flush
- for us */
-};
-
-/** Alternatives for srv_empty_free_list_algorithm, set through
-innodb_empty_free_list_algorithm variable */
-enum srv_empty_free_list_t {
- SRV_EMPTY_FREE_LIST_LEGACY, /*!< Original Oracle MySQL 5.6
- algorithm */
- SRV_EMPTY_FREE_LIST_BACKOFF /*!< Percona Server 5.6 algorithm that
- loops in a progressive backoff until a
- free page is produced by the cleaner
- thread */
-};
-
-/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
-/* @{ */
-/** Zip shift value for the smallest page size */
-#define BUF_BUDDY_LOW_SHIFT UNIV_ZIP_SIZE_SHIFT_MIN
-
-/** Smallest buddy page size */
-#define BUF_BUDDY_LOW (1U << BUF_BUDDY_LOW_SHIFT)
-
-/** Actual number of buddy sizes based on current page size */
-#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
-
-/** Maximum number of buddy sizes based on the max page size */
-#define BUF_BUDDY_SIZES_MAX (UNIV_PAGE_SIZE_SHIFT_MAX \
- - BUF_BUDDY_LOW_SHIFT)
-
-/** twice the maximum block size of the buddy system;
-the underlying memory is aligned by this amount:
-this must be equal to UNIV_PAGE_SIZE */
-#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
-/* @} */
-
-#endif /* buf0types.h */
diff --git a/storage/xtradb/include/data0data.h b/storage/xtradb/include/data0data.h
deleted file mode 100644
index 1d954bfc07c..00000000000
--- a/storage/xtradb/include/data0data.h
+++ /dev/null
@@ -1,536 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/data0data.h
-SQL data field and tuple
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef data0data_h
-#define data0data_h
-
-#include "univ.i"
-
-#include "data0types.h"
-#include "data0type.h"
-#include "mem0mem.h"
-#include "dict0types.h"
-
-/** Storage for overflow data in a big record, that is, a clustered
-index record which needs external storage of data fields */
-struct big_rec_t;
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets pointer to the type struct of SQL data field.
-@return pointer to the type struct */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
- const dfield_t* field) /*!< in: SQL data field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Gets pointer to the data in a field.
-@return pointer to data */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- const dfield_t* field) /*!< in: field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#else /* UNIV_DEBUG */
-# define dfield_get_type(field) (&(field)->type)
-# define dfield_get_data(field) ((field)->data)
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Sets the type struct of SQL data field. */
-UNIV_INLINE
-void
-dfield_set_type(
-/*============*/
- dfield_t* field, /*!< in: SQL data field */
- const dtype_t* type) /*!< in: pointer to data type struct */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Gets length of field data.
-@return length of data; UNIV_SQL_NULL if SQL null data */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
- const dfield_t* field) /*!< in: field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets length in a field. */
-UNIV_INLINE
-void
-dfield_set_len(
-/*===========*/
- dfield_t* field, /*!< in: field */
- ulint len) /*!< in: length or UNIV_SQL_NULL */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Determines if a field is SQL NULL
-@return nonzero if SQL null data */
-UNIV_INLINE
-ulint
-dfield_is_null(
-/*===========*/
- const dfield_t* field) /*!< in: field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Determines if a field is externally stored
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-dfield_is_ext(
-/*==========*/
- const dfield_t* field) /*!< in: field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets the "external storage" flag */
-UNIV_INLINE
-void
-dfield_set_ext(
-/*===========*/
- dfield_t* field) /*!< in/out: field */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Sets pointer to the data and length in a field. */
-UNIV_INLINE
-void
-dfield_set_data(
-/*============*/
- dfield_t* field, /*!< in: field */
- const void* data, /*!< in: data */
- ulint len) /*!< in: length or UNIV_SQL_NULL */
- MY_ATTRIBUTE((nonnull(1)));
-/*********************************************************************//**
-Sets a data field to SQL NULL. */
-UNIV_INLINE
-void
-dfield_set_null(
-/*============*/
- dfield_t* field) /*!< in/out: field */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Writes an SQL null field full of zeros. */
-UNIV_INLINE
-void
-data_write_sql_null(
-/*================*/
- byte* data, /*!< in: pointer to a buffer of size len */
- ulint len) /*!< in: SQL null size in bytes */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Copies the data and len fields. */
-UNIV_INLINE
-void
-dfield_copy_data(
-/*=============*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2) /*!< in: field to copy from */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Copies a data field to another. */
-UNIV_INLINE
-void
-dfield_copy(
-/*========*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2) /*!< in: field to copy from */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Copies the data pointed to by a data field. */
-UNIV_INLINE
-void
-dfield_dup(
-/*=======*/
- dfield_t* field, /*!< in/out: data field */
- mem_heap_t* heap) /*!< in: memory heap where allocated */
- MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Tests if two data fields are equal.
-If len==0, tests the data length and content for equality.
-If len>0, tests the first len bytes of the content for equality.
-@return TRUE if both fields are NULL or if they are equal */
-UNIV_INLINE
-ibool
-dfield_datas_are_binary_equal(
-/*==========================*/
- const dfield_t* field1, /*!< in: field */
- const dfield_t* field2, /*!< in: field */
- ulint len) /*!< in: maximum prefix to compare,
- or 0 to compare the whole field length */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Tests if dfield data length and content is equal to the given.
-@return TRUE if equal */
-UNIV_INLINE
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
- const dfield_t* field, /*!< in: field */
- ulint len, /*!< in: data length or UNIV_SQL_NULL */
- const byte* data) /*!< in: data */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Gets number of fields in a data tuple.
-@return number of fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return nth field */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
- const dtuple_t* tuple, /*!< in: tuple */
- ulint n); /*!< in: index of field */
-#else /* UNIV_DEBUG */
-# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n))
-#endif /* UNIV_DEBUG */
-/*********************************************************************//**
-Gets info bits in a data tuple.
-@return info bits */
-UNIV_INLINE
-ulint
-dtuple_get_info_bits(
-/*=================*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets info bits in a data tuple. */
-UNIV_INLINE
-void
-dtuple_set_info_bits(
-/*=================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint info_bits) /*!< in: info bits */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Gets number of fields used in record comparisons.
-@return number of fields used in comparisons in rem0cmp.* */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields_cmp(
-/*====================*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Gets number of fields used in record comparisons. */
-UNIV_INLINE
-void
-dtuple_set_n_fields_cmp(
-/*====================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields_cmp) /*!< in: number of fields used in
- comparisons in rem0cmp.* */
- MY_ATTRIBUTE((nonnull));
-
-/* Estimate the number of bytes that are going to be allocated when
-creating a new dtuple_t object */
-#define DTUPLE_EST_ALLOC(n_fields) \
- (sizeof(dtuple_t) + (n_fields) * sizeof(dfield_t))
-
-/**********************************************************//**
-Creates a data tuple from an already allocated chunk of memory.
-The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
-The default value for number of fields used in record comparisons
-for this tuple is n_fields.
-@return created tuple (inside buf) */
-UNIV_INLINE
-dtuple_t*
-dtuple_create_from_mem(
-/*===================*/
- void* buf, /*!< in, out: buffer to use */
- ulint buf_size, /*!< in: buffer size */
- ulint n_fields) /*!< in: number of fields */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/**********************************************************//**
-Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return own: created tuple */
-UNIV_INLINE
-dtuple_t*
-dtuple_create(
-/*==========*/
- mem_heap_t* heap, /*!< in: memory heap where the tuple
- is created, DTUPLE_EST_ALLOC(n_fields)
- bytes will be allocated from this heap */
- ulint n_fields)/*!< in: number of fields */
- MY_ATTRIBUTE((nonnull, malloc));
-
-/*********************************************************************//**
-Sets number of fields used in a tuple. Normally this is set in
-dtuple_create, but if you want later to set it smaller, you can use this. */
-UNIV_INTERN
-void
-dtuple_set_n_fields(
-/*================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields) /*!< in: number of fields */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Copies a data tuple to another. This is a shallow copy; if a deep copy
-is desired, dfield_dup() will have to be invoked on each field.
-@return own: copy of tuple */
-UNIV_INLINE
-dtuple_t*
-dtuple_copy(
-/*========*/
- const dtuple_t* tuple, /*!< in: tuple to copy from */
- mem_heap_t* heap) /*!< in: memory heap
- where the tuple is created */
- MY_ATTRIBUTE((nonnull, malloc));
-/**********************************************************//**
-The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted.
-@return sum of data lens */
-UNIV_INLINE
-ulint
-dtuple_get_data_size(
-/*=================*/
- const dtuple_t* tuple, /*!< in: typed data tuple */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Computes the number of externally stored fields in a data tuple.
-@return number of fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_ext(
-/*=============*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Compare two data tuples, respecting the collation of character fields.
-@return 1, 0 , -1 if tuple1 is greater, equal, less, respectively,
-than tuple2 */
-UNIV_INTERN
-int
-dtuple_coll_cmp(
-/*============*/
- const dtuple_t* tuple1, /*!< in: tuple 1 */
- const dtuple_t* tuple2) /*!< in: tuple 2 */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return the folded value */
-UNIV_INLINE
-ulint
-dtuple_fold(
-/*========*/
- const dtuple_t* tuple, /*!< in: the tuple */
- ulint n_fields,/*!< in: number of complete fields to fold */
- ulint n_bytes,/*!< in: number of bytes to fold in an
- incomplete last field */
- index_id_t tree_id)/*!< in: index tree id */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/*******************************************************************//**
-Sets types of fields binary in a tuple. */
-UNIV_INLINE
-void
-dtuple_set_types_binary(
-/*====================*/
- dtuple_t* tuple, /*!< in: data tuple */
- ulint n) /*!< in: number of fields to set */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Checks if a dtuple contains an SQL null value.
-@return TRUE if some field is SQL null */
-UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
- const dtuple_t* tuple) /*!< in: dtuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************//**
-Checks that a data field is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dfield_check_typed(
-/*===============*/
- const dfield_t* field) /*!< in: data field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************//**
-Checks that a data tuple is typed. Asserts an error if not.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed(
-/*===============*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************//**
-Checks that a data tuple is typed.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_check_typed_no_assert(
-/*=========================*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_DEBUG
-/**********************************************************//**
-Validates the consistency of a tuple which must be complete, i.e,
-all fields must have been set.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtuple_validate(
-/*============*/
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-Pretty prints a dfield value according to its data type. */
-UNIV_INTERN
-void
-dfield_print(
-/*=========*/
- const dfield_t* dfield) /*!< in: dfield */
- MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Pretty prints a dfield value according to its data type. Also the hex string
-is printed if a string contains non-printable characters. */
-UNIV_INTERN
-void
-dfield_print_also_hex(
-/*==================*/
- const dfield_t* dfield) /*!< in: dfield */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************//**
-The following function prints the contents of a tuple. */
-UNIV_INTERN
-void
-dtuple_print(
-/*=========*/
- FILE* f, /*!< in: output stream */
- const dtuple_t* tuple) /*!< in: tuple */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Moves parts of long fields in entry to the big record vector so that
-the size of tuple drops below the maximum record size allowed in the
-database. Moves data only from those fields which are not necessary
-to determine uniquely the insertion place of the tuple in the index.
-@return own: created big record vector, NULL if we are not able to
-shorten the entry enough, i.e., if there are too many fixed-length or
-short fields in entry or the index is clustered */
-UNIV_INTERN
-big_rec_t*
-dtuple_convert_big_rec(
-/*===================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in/out: index entry */
- ulint* n_ext) /*!< in/out: number of
- externally stored columns */
- MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
-/**************************************************************//**
-Puts back to entry the data stored in vector. Note that to ensure the
-fields in entry can accommodate the data, vector must have been created
-from entry with dtuple_convert_big_rec. */
-UNIV_INTERN
-void
-dtuple_convert_back_big_rec(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: entry whose data was put to vector */
- big_rec_t* vector) /*!< in, own: big rec vector; it is
- freed in this function */
- MY_ATTRIBUTE((nonnull));
-/**************************************************************//**
-Frees the memory in a big rec vector. */
-UNIV_INLINE
-void
-dtuple_big_rec_free(
-/*================*/
- big_rec_t* vector) /*!< in, own: big rec vector; it is
- freed in this function */
- MY_ATTRIBUTE((nonnull));
-
-/*######################################################################*/
-
-/** Structure for an SQL data field */
-struct dfield_t{
- void* data; /*!< pointer to data */
- unsigned ext:1; /*!< TRUE=externally stored, FALSE=local */
- unsigned len:32; /*!< data length; UNIV_SQL_NULL if SQL null */
- dtype_t type; /*!< type of data */
-};
-
-/** Structure for an SQL data tuple of fields (logical record) */
-struct dtuple_t {
- ulint info_bits; /*!< info bits of an index record:
- the default is 0; this field is used
- if an index record is built from
- a data tuple */
- ulint n_fields; /*!< number of fields in dtuple */
- ulint n_fields_cmp; /*!< number of fields which should
- be used in comparison services
- of rem0cmp.*; the index search
- is performed by comparing only these
- fields, others are ignored; the
- default value in dtuple creation is
- the same value as n_fields */
- dfield_t* fields; /*!< fields */
- UT_LIST_NODE_T(dtuple_t) tuple_list;
- /*!< data tuples can be linked into a
- list using this field */
-#ifdef UNIV_DEBUG
- ulint magic_n; /*!< magic number, used in
- debug assertions */
-/** Value of dtuple_t::magic_n */
-# define DATA_TUPLE_MAGIC_N 65478679
-#endif /* UNIV_DEBUG */
-};
-
-/** A slot for a field in a big rec vector */
-struct big_rec_field_t {
- ulint field_no; /*!< field number in record */
- ulint len; /*!< stored data length, in bytes */
- const void* data; /*!< stored data */
-};
-
-/** Storage format for overflow data in a big record, that is, a
-clustered index record which needs external storage of data fields */
-struct big_rec_t {
- mem_heap_t* heap; /*!< memory heap from which
- allocated */
- ulint n_fields; /*!< number of stored fields */
- big_rec_field_t*fields; /*!< stored fields */
-};
-
-#ifndef UNIV_NONINL
-#include "data0data.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/data0data.ic b/storage/xtradb/include/data0data.ic
deleted file mode 100644
index 11499ab928c..00000000000
--- a/storage/xtradb/include/data0data.ic
+++ /dev/null
@@ -1,651 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/data0data.ic
-SQL data field and tuple
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0mem.h"
-#include "ut0rnd.h"
-
-#ifdef UNIV_DEBUG
-/** Dummy variable to catch access to uninitialized fields. In the
-debug version, dtuple_create() will make all fields of dtuple_t point
-to data_error. */
-extern byte data_error;
-
-/*********************************************************************//**
-Gets pointer to the type struct of SQL data field.
-@return pointer to the type struct */
-UNIV_INLINE
-dtype_t*
-dfield_get_type(
-/*============*/
- const dfield_t* field) /*!< in: SQL data field */
-{
- ut_ad(field);
-
- return((dtype_t*) &(field->type));
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Sets the type struct of SQL data field. */
-UNIV_INLINE
-void
-dfield_set_type(
-/*============*/
- dfield_t* field, /*!< in: SQL data field */
- const dtype_t* type) /*!< in: pointer to data type struct */
-{
- ut_ad(field != NULL);
- ut_ad(type != NULL);
-
- field->type = *type;
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets pointer to the data in a field.
-@return pointer to data */
-UNIV_INLINE
-void*
-dfield_get_data(
-/*============*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad(field);
- ut_ad((field->len == UNIV_SQL_NULL)
- || (field->data != &data_error));
-
- return((void*) field->data);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Gets length of field data.
-@return length of data; UNIV_SQL_NULL if SQL null data */
-UNIV_INLINE
-ulint
-dfield_get_len(
-/*===========*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad(field);
- ut_ad((field->len == UNIV_SQL_NULL)
- || (field->data != &data_error));
-
- return(field->len);
-}
-
-/*********************************************************************//**
-Sets length in a field. */
-UNIV_INLINE
-void
-dfield_set_len(
-/*===========*/
- dfield_t* field, /*!< in: field */
- ulint len) /*!< in: length or UNIV_SQL_NULL */
-{
- ut_ad(field);
-#ifdef UNIV_VALGRIND_DEBUG
- if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
-#endif /* UNIV_VALGRIND_DEBUG */
-
- field->ext = 0;
- field->len = len;
-}
-
-/*********************************************************************//**
-Determines if a field is SQL NULL
-@return nonzero if SQL null data */
-UNIV_INLINE
-ulint
-dfield_is_null(
-/*===========*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad(field);
-
- return(field->len == UNIV_SQL_NULL);
-}
-
-/*********************************************************************//**
-Determines if a field is externally stored
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-dfield_is_ext(
-/*==========*/
- const dfield_t* field) /*!< in: field */
-{
- ut_ad(field);
-
- return(field->ext);
-}
-
-/*********************************************************************//**
-Sets the "external storage" flag */
-UNIV_INLINE
-void
-dfield_set_ext(
-/*===========*/
- dfield_t* field) /*!< in/out: field */
-{
- ut_ad(field);
-
- field->ext = 1;
-}
-
-/*********************************************************************//**
-Sets pointer to the data and length in a field. */
-UNIV_INLINE
-void
-dfield_set_data(
-/*============*/
- dfield_t* field, /*!< in: field */
- const void* data, /*!< in: data */
- ulint len) /*!< in: length or UNIV_SQL_NULL */
-{
- ut_ad(field);
-
-#ifdef UNIV_VALGRIND_DEBUG
- if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len);
-#endif /* UNIV_VALGRIND_DEBUG */
- field->data = (void*) data;
- field->ext = 0;
- field->len = len;
-}
-
-/*********************************************************************//**
-Sets a data field to SQL NULL. */
-UNIV_INLINE
-void
-dfield_set_null(
-/*============*/
- dfield_t* field) /*!< in/out: field */
-{
- dfield_set_data(field, NULL, UNIV_SQL_NULL);
-}
-
-/*********************************************************************//**
-Copies the data and len fields. */
-UNIV_INLINE
-void
-dfield_copy_data(
-/*=============*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2) /*!< in: field to copy from */
-{
- ut_ad(field1 != NULL);
- ut_ad(field2 != NULL);
-
- field1->data = field2->data;
- field1->len = field2->len;
- field1->ext = field2->ext;
-}
-
-/*********************************************************************//**
-Copies a data field to another. */
-UNIV_INLINE
-void
-dfield_copy(
-/*========*/
- dfield_t* field1, /*!< out: field to copy to */
- const dfield_t* field2) /*!< in: field to copy from */
-{
- *field1 = *field2;
-}
-
-/*********************************************************************//**
-Copies the data pointed to by a data field. */
-UNIV_INLINE
-void
-dfield_dup(
-/*=======*/
- dfield_t* field, /*!< in/out: data field */
- mem_heap_t* heap) /*!< in: memory heap where allocated */
-{
- if (!dfield_is_null(field)) {
- UNIV_MEM_ASSERT_RW(field->data, field->len);
- field->data = mem_heap_dup(heap, field->data, field->len);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Tests if two data fields are equal.
-If len==0, tests the data length and content for equality.
-If len>0, tests the first len bytes of the content for equality.
-@return TRUE if both fields are NULL or if they are equal */
-UNIV_INLINE
-ibool
-dfield_datas_are_binary_equal(
-/*==========================*/
- const dfield_t* field1, /*!< in: field */
- const dfield_t* field2, /*!< in: field */
- ulint len) /*!< in: maximum prefix to compare,
- or 0 to compare the whole field length */
-{
- ulint len2 = len;
-
- if (field1->len == UNIV_SQL_NULL || len == 0 || field1->len < len) {
- len = field1->len;
- }
-
- if (field2->len == UNIV_SQL_NULL || len2 == 0 || field2->len < len2) {
- len2 = field2->len;
- }
-
- return(len == len2
- && (len == UNIV_SQL_NULL
- || !memcmp(field1->data, field2->data, len)));
-}
-
-/*********************************************************************//**
-Tests if dfield data length and content is equal to the given.
-@return TRUE if equal */
-UNIV_INLINE
-ibool
-dfield_data_is_binary_equal(
-/*========================*/
- const dfield_t* field, /*!< in: field */
- ulint len, /*!< in: data length or UNIV_SQL_NULL */
- const byte* data) /*!< in: data */
-{
- return(len == dfield_get_len(field)
- && (len == UNIV_SQL_NULL
- || !memcmp(dfield_get_data(field), data, len)));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Gets info bits in a data tuple.
-@return info bits */
-UNIV_INLINE
-ulint
-dtuple_get_info_bits(
-/*=================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->info_bits);
-}
-
-/*********************************************************************//**
-Sets info bits in a data tuple. */
-UNIV_INLINE
-void
-dtuple_set_info_bits(
-/*=================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint info_bits) /*!< in: info bits */
-{
- ut_ad(tuple);
-
- tuple->info_bits = info_bits;
-}
-
-/*********************************************************************//**
-Gets number of fields used in record comparisons.
-@return number of fields used in comparisons in rem0cmp.* */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields_cmp(
-/*====================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->n_fields_cmp);
-}
-
-/*********************************************************************//**
-Sets number of fields used in record comparisons. */
-UNIV_INLINE
-void
-dtuple_set_n_fields_cmp(
-/*====================*/
- dtuple_t* tuple, /*!< in: tuple */
- ulint n_fields_cmp) /*!< in: number of fields used in
- comparisons in rem0cmp.* */
-{
- ut_ad(tuple);
- ut_ad(n_fields_cmp <= tuple->n_fields);
-
- tuple->n_fields_cmp = n_fields_cmp;
-}
-
-/*********************************************************************//**
-Gets number of fields in a data tuple.
-@return number of fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_fields(
-/*================*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ut_ad(tuple);
-
- return(tuple->n_fields);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Gets nth field of a tuple.
-@return nth field */
-UNIV_INLINE
-dfield_t*
-dtuple_get_nth_field(
-/*=================*/
- const dtuple_t* tuple, /*!< in: tuple */
- ulint n) /*!< in: index of field */
-{
- ut_ad(tuple);
- ut_ad(n < tuple->n_fields);
-
- return((dfield_t*) tuple->fields + n);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************//**
-Creates a data tuple from an already allocated chunk of memory.
-The size of the chunk must be at least DTUPLE_EST_ALLOC(n_fields).
-The default value for number of fields used in record comparisons
-for this tuple is n_fields.
-@return created tuple (inside buf) */
-UNIV_INLINE
-dtuple_t*
-dtuple_create_from_mem(
-/*===================*/
- void* buf, /*!< in, out: buffer to use */
- ulint buf_size, /*!< in: buffer size */
- ulint n_fields) /*!< in: number of fields */
-{
- dtuple_t* tuple;
-
- ut_ad(buf != NULL);
- ut_a(buf_size >= DTUPLE_EST_ALLOC(n_fields));
-
- tuple = (dtuple_t*) buf;
- tuple->info_bits = 0;
- tuple->n_fields = n_fields;
- tuple->n_fields_cmp = n_fields;
- tuple->fields = (dfield_t*) &tuple[1];
-
-#ifdef UNIV_DEBUG
- tuple->magic_n = DATA_TUPLE_MAGIC_N;
-
- { /* In the debug version, initialize fields to an error value */
- ulint i;
-
- for (i = 0; i < n_fields; i++) {
- dfield_t* field;
-
- field = dtuple_get_nth_field(tuple, i);
-
- dfield_set_len(field, UNIV_SQL_NULL);
- field->data = &data_error;
- dfield_get_type(field)->mtype = DATA_ERROR;
- }
- }
-#endif
- UNIV_MEM_ASSERT_W(tuple->fields, n_fields * sizeof *tuple->fields);
- UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields);
- return(tuple);
-}
-
-/**********************************************************//**
-Creates a data tuple to a memory heap. The default value for number
-of fields used in record comparisons for this tuple is n_fields.
-@return own: created tuple */
-UNIV_INLINE
-dtuple_t*
-dtuple_create(
-/*==========*/
- mem_heap_t* heap, /*!< in: memory heap where the tuple
- is created, DTUPLE_EST_ALLOC(n_fields)
- bytes will be allocated from this heap */
- ulint n_fields) /*!< in: number of fields */
-{
- void* buf;
- ulint buf_size;
- dtuple_t* tuple;
-
- ut_ad(heap);
-
- buf_size = DTUPLE_EST_ALLOC(n_fields);
- buf = mem_heap_alloc(heap, buf_size);
-
- tuple = dtuple_create_from_mem(buf, buf_size, n_fields);
-
- return(tuple);
-}
-
-/*********************************************************************//**
-Copies a data tuple to another. This is a shallow copy; if a deep copy
-is desired, dfield_dup() will have to be invoked on each field.
-@return own: copy of tuple */
-UNIV_INLINE
-dtuple_t*
-dtuple_copy(
-/*========*/
- const dtuple_t* tuple, /*!< in: tuple to copy from */
- mem_heap_t* heap) /*!< in: memory heap
- where the tuple is created */
-{
- ulint n_fields = dtuple_get_n_fields(tuple);
- dtuple_t* new_tuple = dtuple_create(heap, n_fields);
- ulint i;
-
- for (i = 0; i < n_fields; i++) {
- dfield_copy(dtuple_get_nth_field(new_tuple, i),
- dtuple_get_nth_field(tuple, i));
- }
-
- return(new_tuple);
-}
-
-/**********************************************************//**
-The following function returns the sum of data lengths of a tuple. The space
-occupied by the field structs or the tuple struct is not counted. Neither
-is possible space in externally stored parts of the field.
-@return sum of data lengths */
-UNIV_INLINE
-ulint
-dtuple_get_data_size(
-/*=================*/
- const dtuple_t* tuple, /*!< in: typed data tuple */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
- const dfield_t* field;
- ulint n_fields;
- ulint len;
- ulint i;
- ulint sum = 0;
-
- ut_ad(tuple);
- ut_ad(dtuple_check_typed(tuple));
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
- n_fields = tuple->n_fields;
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
- len = dfield_get_len(field);
-
- if (len == UNIV_SQL_NULL) {
- len = dtype_get_sql_null_size(dfield_get_type(field),
- comp);
- }
-
- sum += len;
- }
-
- return(sum);
-}
-
-/*********************************************************************//**
-Computes the number of externally stored fields in a data tuple.
-@return number of externally stored fields */
-UNIV_INLINE
-ulint
-dtuple_get_n_ext(
-/*=============*/
- const dtuple_t* tuple) /*!< in: tuple */
-{
- ulint n_ext = 0;
- ulint n_fields = tuple->n_fields;
- ulint i;
-
- ut_ad(tuple);
- ut_ad(dtuple_check_typed(tuple));
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
-
- for (i = 0; i < n_fields; i++) {
- n_ext += dtuple_get_nth_field(tuple, i)->ext;
- }
-
- return(n_ext);
-}
-
-/*******************************************************************//**
-Sets types of fields binary in a tuple. */
-UNIV_INLINE
-void
-dtuple_set_types_binary(
-/*====================*/
- dtuple_t* tuple, /*!< in: data tuple */
- ulint n) /*!< in: number of fields to set */
-{
- dtype_t* dfield_type;
- ulint i;
-
- for (i = 0; i < n; i++) {
- dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
- dtype_set(dfield_type, DATA_BINARY, 0, 0);
- }
-}
-
-/************************************************************//**
-Folds a prefix given as the number of fields of a tuple.
-@return the folded value */
-UNIV_INLINE
-ulint
-dtuple_fold(
-/*========*/
- const dtuple_t* tuple, /*!< in: the tuple */
- ulint n_fields,/*!< in: number of complete fields to fold */
- ulint n_bytes,/*!< in: number of bytes to fold in an
- incomplete last field */
- index_id_t tree_id)/*!< in: index tree id */
-{
- const dfield_t* field;
- ulint i;
- const byte* data;
- ulint len;
- ulint fold;
-
- ut_ad(tuple);
- ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
- ut_ad(dtuple_check_typed(tuple));
-
- fold = ut_fold_ull(tree_id);
-
- for (i = 0; i < n_fields; i++) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = (const byte*) dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len != UNIV_SQL_NULL) {
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- if (n_bytes > 0) {
- field = dtuple_get_nth_field(tuple, i);
-
- data = (const byte*) dfield_get_data(field);
- len = dfield_get_len(field);
-
- if (len != UNIV_SQL_NULL) {
- if (len > n_bytes) {
- len = n_bytes;
- }
-
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- return(fold);
-}
-
-/**********************************************************************//**
-Writes an SQL null field full of zeros. */
-UNIV_INLINE
-void
-data_write_sql_null(
-/*================*/
- byte* data, /*!< in: pointer to a buffer of size len */
- ulint len) /*!< in: SQL null size in bytes */
-{
- memset(data, 0, len);
-}
-
-/**********************************************************************//**
-Checks if a dtuple contains an SQL null value.
-@return TRUE if some field is SQL null */
-UNIV_INLINE
-ibool
-dtuple_contains_null(
-/*=================*/
- const dtuple_t* tuple) /*!< in: dtuple */
-{
- ulint n;
- ulint i;
-
- n = dtuple_get_n_fields(tuple);
-
- for (i = 0; i < n; i++) {
- if (dfield_is_null(dtuple_get_nth_field(tuple, i))) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/**************************************************************//**
-Frees the memory in a big rec vector. */
-UNIV_INLINE
-void
-dtuple_big_rec_free(
-/*================*/
- big_rec_t* vector) /*!< in, own: big rec vector; it is
- freed in this function */
-{
- mem_heap_free(vector->heap);
-}
diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h
deleted file mode 100644
index df6b6a41c11..00000000000
--- a/storage/xtradb/include/data0type.h
+++ /dev/null
@@ -1,545 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/data0type.h
-Data types
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef data0type_h
-#define data0type_h
-
-#include "univ.i"
-
-extern ulint data_mysql_default_charset_coll;
-#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8
-#define DATA_MYSQL_BINARY_CHARSET_COLL 63
-
-/* SQL data type struct */
-struct dtype_t;
-
-/* SQL Like operator comparison types */
-enum ib_like_t {
- IB_LIKE_EXACT, /* e.g. STRING */
- IB_LIKE_PREFIX, /* e.g., STRING% */
- IB_LIKE_SUFFIX, /* e.g., %STRING */
- IB_LIKE_SUBSTR, /* e.g., %STRING% */
- IB_LIKE_REGEXP /* Future */
-};
-
-/*-------------------------------------------*/
-/* The 'MAIN TYPE' of a column */
-#define DATA_MISSING 0 /* missing column */
-#define DATA_VARCHAR 1 /* character varying of the
- latin1_swedish_ci charset-collation; note
- that the MySQL format for this, DATA_BINARY,
- DATA_VARMYSQL, is also affected by whether the
- 'precise type' contains
- DATA_MYSQL_TRUE_VARCHAR */
-#define DATA_CHAR 2 /* fixed length character of the
- latin1_swedish_ci charset-collation */
-#define DATA_FIXBINARY 3 /* binary string of fixed length */
-#define DATA_BINARY 4 /* binary string */
-#define DATA_BLOB 5 /* binary large object, or a TEXT type;
- if prtype & DATA_BINARY_TYPE == 0, then this is
- actually a TEXT column (or a BLOB created
- with < 4.0.14; since column prefix indexes
- came only in 4.0.14, the missing flag in BLOBs
- created before that does not cause any harm) */
-#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
-#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
-#define DATA_SYS 8 /* system column */
-
-/* Data types >= DATA_FLOAT must be compared using the whole field, not as
-binary strings */
-
-#define DATA_FLOAT 9
-#define DATA_DOUBLE 10
-#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */
-#define DATA_VARMYSQL 12 /* any charset varying length char */
-#define DATA_MYSQL 13 /* any charset fixed length char */
- /* NOTE that 4.1.1 used DATA_MYSQL and
- DATA_VARMYSQL for all character sets, and the
- charset-collation for tables created with it
- can also be latin1_swedish_ci */
-#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size()
- requires the values are <= 63 */
-/*-------------------------------------------*/
-/* The 'PRECISE TYPE' of a column */
-/*
-Tables created by a MySQL user have the following convention:
-
-- In the least significant byte in the precise type we store the MySQL type
-code (not applicable for system columns).
-
-- In the second least significant byte we OR flags DATA_NOT_NULL,
-DATA_UNSIGNED, DATA_BINARY_TYPE.
-
-- In the third least significant byte of the precise type of string types we
-store the MySQL charset-collation code. In DATA_BLOB columns created with
-< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there
-are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no
-problem, though.
-
-Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the
-precise type, since the charset was always the default charset of the MySQL
-installation. If the stored charset code is 0 in the system table SYS_COLUMNS
-of InnoDB, that means that the default charset of this MySQL installation
-should be used.
-
-When loading a table definition from the system tables to the InnoDB data
-dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check
-if the stored charset-collation is 0, and if that is the case and the type is
-a non-binary string, replace that 0 by the default charset-collation code of
-this MySQL installation. In short, in old tables, the charset-collation code
-in the system tables on disk can be 0, but in in-memory data structures
-(dtype_t), the charset-collation code is always != 0 for non-binary string
-types.
-
-In new tables, in binary string types, the charset-collation code is the
-MySQL code for the 'binary charset', that is, != 0.
-
-For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those
-DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci,
-InnoDB performs all comparisons internally, without resorting to the MySQL
-comparison functions. This is to save CPU time.
-
-InnoDB's own internal system tables have different precise types for their
-columns, and for them the precise type is usually not used at all.
-*/
-
-#define DATA_ENGLISH 4 /* English language character string: this
- is a relic from pre-MySQL time and only used
- for InnoDB's own system tables */
-#define DATA_ERROR 111 /* another relic from pre-MySQL time */
-
-#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
- type from the precise type */
-#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
- format true VARCHAR */
-
-/* Precise data types for system columns and the length of those columns;
-NOTE: the values must run from 0 up in the order given! All codes must
-be less than 256 */
-#define DATA_ROW_ID 0 /* row id: a 48-bit integer */
-#define DATA_ROW_ID_LEN 6 /* stored length for row id */
-
-#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
-#define DATA_TRX_ID_LEN 6
-
-#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */
-#define DATA_ROLL_PTR_LEN 7
-
-#define DATA_N_SYS_COLS 3 /* number of system columns defined above */
-
-#define DATA_FTS_DOC_ID 3 /* Used as FTS DOC ID column */
-
-#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */
-
-/* Flags ORed to the precise data type */
-#define DATA_NOT_NULL 256 /* this is ORed to the precise type when
- the column is declared as NOT NULL */
-#define DATA_UNSIGNED 512 /* this id ORed to the precise type when
- we have an unsigned integer type */
-#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character
- string, this is ORed to the precise type:
- this only holds for tables created with
- >= MySQL-4.0.14 */
-/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1.
- In earlier versions this was set for some
- BLOB columns.
-*/
-#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data
- type when the column is true VARCHAR where
- MySQL uses 2 bytes to store the data len;
- for shorter VARCHARs MySQL uses only 1 byte */
-/*-------------------------------------------*/
-
-/* This many bytes we need to store the type information affecting the
-alphabetical order for a single field and decide the storage size of an
-SQL null*/
-#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4
-/* In the >= 4.1.x storage format we add 2 bytes more so that we can also
-store the charset-collation number; one byte is left unused, though */
-#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
-
-/* Maximum multi-byte character length in bytes, plus 1 */
-#define DATA_MBMAX 5
-
-/* Pack mbminlen, mbmaxlen to mbminmaxlen. */
-#define DATA_MBMINMAXLEN(mbminlen, mbmaxlen) \
- ((mbmaxlen) * DATA_MBMAX + (mbminlen))
-/* Get mbminlen from mbminmaxlen. Cast the result of UNIV_EXPECT to ulint
-because in GCC it returns a long. */
-#define DATA_MBMINLEN(mbminmaxlen) ((ulint) \
- UNIV_EXPECT(((mbminmaxlen) % DATA_MBMAX), \
- 1))
-/* Get mbmaxlen from mbminmaxlen. */
-#define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX))
-
-/* We now support 15 bits (up to 32767) collation number */
-#define MAX_CHAR_COLL_NUM 32767
-
-/* Mask to get the Charset Collation number (0x7fff) */
-#define CHAR_COLL_MASK MAX_CHAR_COLL_NUM
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Gets the MySQL type code from a dtype.
-@return MySQL type code; this is NOT an InnoDB type code! */
-UNIV_INLINE
-ulint
-dtype_get_mysql_type(
-/*=================*/
- const dtype_t* type); /*!< in: type struct */
-/*********************************************************************//**
-Determine how many bytes the first n characters of the given string occupy.
-If the string is shorter than n characters, returns the number of bytes
-the characters in the string occupy.
-@return length of the prefix, in bytes */
-UNIV_INTERN
-ulint
-dtype_get_at_most_n_mbchars(
-/*========================*/
- ulint prtype, /*!< in: precise type */
- ulint mbminmaxlen, /*!< in: minimum and maximum length of
- a multi-byte character */
- ulint prefix_len, /*!< in: length of the requested
- prefix, in characters, multiplied by
- dtype_get_mbmaxlen(dtype) */
- ulint data_len, /*!< in: length of str (in bytes) */
- const char* str); /*!< in: the string whose prefix
- length is being determined */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Checks if a data main type is a string type. Also a BLOB is considered a
-string type.
-@return TRUE if string type */
-UNIV_INTERN
-ibool
-dtype_is_string_type(
-/*=================*/
- ulint mtype); /*!< in: InnoDB main data type code: DATA_CHAR, ... */
-/*********************************************************************//**
-Checks if a type is a binary string type. Note that for tables created with
-< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For
-those DATA_BLOB columns this function currently returns FALSE.
-@return TRUE if binary string type */
-UNIV_INTERN
-ibool
-dtype_is_binary_string_type(
-/*========================*/
- ulint mtype, /*!< in: main data type */
- ulint prtype);/*!< in: precise type */
-/*********************************************************************//**
-Checks if a type is a non-binary string type. That is, dtype_is_string_type is
-TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created
-with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column.
-For those DATA_BLOB columns this function currently returns TRUE.
-@return TRUE if non-binary string type */
-UNIV_INTERN
-ibool
-dtype_is_non_binary_string_type(
-/*============================*/
- ulint mtype, /*!< in: main data type */
- ulint prtype);/*!< in: precise type */
-/*********************************************************************//**
-Sets a data type structure. */
-UNIV_INLINE
-void
-dtype_set(
-/*======*/
- dtype_t* type, /*!< in: type struct to init */
- ulint mtype, /*!< in: main data type */
- ulint prtype, /*!< in: precise type */
- ulint len); /*!< in: precision of type */
-/*********************************************************************//**
-Copies a data type structure. */
-UNIV_INLINE
-void
-dtype_copy(
-/*=======*/
- dtype_t* type1, /*!< in: type struct to copy to */
- const dtype_t* type2); /*!< in: type struct to copy from */
-/*********************************************************************//**
-Gets the SQL main data type.
-@return SQL main data type */
-UNIV_INLINE
-ulint
-dtype_get_mtype(
-/*============*/
- const dtype_t* type); /*!< in: data type */
-/*********************************************************************//**
-Gets the precise data type.
-@return precise data type */
-UNIV_INLINE
-ulint
-dtype_get_prtype(
-/*=============*/
- const dtype_t* type); /*!< in: data type */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_get_mblen(
-/*============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type (and collation) */
- ulint* mbminlen, /*!< out: minimum length of a
- multi-byte character */
- ulint* mbmaxlen); /*!< out: maximum length of a
- multi-byte character */
-/*********************************************************************//**
-Gets the MySQL charset-collation code for MySQL string types.
-@return MySQL charset-collation code */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
- ulint prtype);/*!< in: precise data type */
-/*********************************************************************//**
-Forms a precise type from the < 4.1.2 format precise type plus the
-charset-collation code.
-@return precise type, including the charset-collation code */
-UNIV_INTERN
-ulint
-dtype_form_prtype(
-/*==============*/
- ulint old_prtype, /*!< in: the MySQL type code and the flags
- DATA_BINARY_TYPE etc. */
- ulint charset_coll); /*!< in: MySQL charset-collation code */
-/*********************************************************************//**
-Determines if a MySQL string type is a subset of UTF-8. This function
-may return false negatives, in case further character-set collation
-codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
-UNIV_INLINE
-ibool
-dtype_is_utf8(
-/*==========*/
- ulint prtype);/*!< in: precise data type */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Gets the type length.
-@return fixed length of the type, in bytes, or 0 if variable-length */
-UNIV_INLINE
-ulint
-dtype_get_len(
-/*==========*/
- const dtype_t* type); /*!< in: data type */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Gets the minimum length of a character, in bytes.
-@return minimum length of a char, in bytes, or 0 if this is not a
-character type */
-UNIV_INLINE
-ulint
-dtype_get_mbminlen(
-/*===============*/
- const dtype_t* type); /*!< in: type */
-/*********************************************************************//**
-Gets the maximum length of a character, in bytes.
-@return maximum length of a char, in bytes, or 0 if this is not a
-character type */
-UNIV_INLINE
-ulint
-dtype_get_mbmaxlen(
-/*===============*/
- const dtype_t* type); /*!< in: type */
-/*********************************************************************//**
-Sets the minimum and maximum length of a character, in bytes. */
-UNIV_INLINE
-void
-dtype_set_mbminmaxlen(
-/*==================*/
- dtype_t* type, /*!< in/out: type */
- ulint mbminlen, /*!< in: minimum length of a char,
- in bytes, or 0 if this is not
- a character type */
- ulint mbmaxlen); /*!< in: maximum length of a char,
- in bytes, or 0 if this is not
- a character type */
-/*********************************************************************//**
-Gets the padding character code for the type.
-@return padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype); /*!< in: precise type */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return fixed size, or 0 */
-UNIV_INLINE
-ulint
-dtype_get_fixed_size_low(
-/*=====================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint len, /*!< in: length */
- ulint mbminmaxlen, /*!< in: minimum and maximum length of a
- multibyte character, in bytes */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Returns the minimum size of a data type.
-@return minimum size */
-UNIV_INLINE
-ulint
-dtype_get_min_size_low(
-/*===================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint len, /*!< in: length */
- ulint mbminmaxlen); /*!< in: minimum and maximum length of a
- multibyte character */
-/***********************************************************************//**
-Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information.
-@return maximum size */
-UNIV_INLINE
-ulint
-dtype_get_max_size_low(
-/*===================*/
- ulint mtype, /*!< in: main type */
- ulint len); /*!< in: length */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
-UNIV_INLINE
-ulint
-dtype_get_sql_null_size(
-/*====================*/
- const dtype_t* type, /*!< in: type */
- ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. */
-UNIV_INLINE
-void
-dtype_read_for_order_and_null_size(
-/*===============================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf); /*!< in: buffer for the stored order info */
-/**********************************************************************//**
-Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. This is the >= 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_store_for_order_and_null_size(
-/*====================================*/
- byte* buf, /*!< in: buffer for
- DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- bytes where we store the info */
- const dtype_t* type, /*!< in: type struct */
- ulint prefix_len);/*!< in: prefix length to
- replace type->len, or 0 */
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_read_for_order_and_null_size(
-/*===================================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf); /*!< in: buffer for stored type order info */
-
-/*********************************************************************//**
-Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len
-@return the SQL type name */
-UNIV_INLINE
-char*
-dtype_sql_name(
-/*===========*/
- unsigned mtype, /*!< in: mtype */
- unsigned prtype, /*!< in: prtype */
- unsigned len, /*!< in: len */
- char* name, /*!< out: SQL name */
- unsigned name_sz);/*!< in: size of the name buffer */
-
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Validates a data type structure.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dtype_validate(
-/*===========*/
- const dtype_t* type); /*!< in: type struct to validate */
-/*********************************************************************//**
-Prints a data type structure. */
-UNIV_INTERN
-void
-dtype_print(
-/*========*/
- const dtype_t* type); /*!< in: type */
-
-/* Structure for an SQL data type.
-If you add fields to this structure, be sure to initialize them everywhere.
-This structure is initialized in the following functions:
-dtype_set()
-dtype_read_for_order_and_null_size()
-dtype_new_read_for_order_and_null_size()
-sym_tab_add_null_lit() */
-
-struct dtype_t{
- unsigned prtype:32; /*!< precise type; MySQL data
- type, charset code, flags to
- indicate nullability,
- signedness, whether this is a
- binary string, whether this is
- a true VARCHAR where MySQL
- uses 2 bytes to store the length */
- unsigned mtype:8; /*!< main data type */
-
- /* the remaining fields do not affect alphabetical ordering: */
-
- unsigned len:16; /*!< length; for MySQL data this
- is field->pack_length(),
- except that for a >= 5.0.3
- type true VARCHAR this is the
- maximum byte length of the
- string data (in addition to
- the string, MySQL uses 1 or 2
- bytes to store the string length) */
-#ifndef UNIV_HOTBACKUP
- unsigned mbminmaxlen:5; /*!< minimum and maximum length of a
- character, in bytes;
- DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
- mbminlen=DATA_MBMINLEN(mbminmaxlen);
- mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
-#endif /* !UNIV_HOTBACKUP */
-};
-
-#ifndef UNIV_NONINL
-#include "data0type.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic
deleted file mode 100644
index 8f5cee0fd5f..00000000000
--- a/storage/xtradb/include/data0type.ic
+++ /dev/null
@@ -1,714 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/data0type.ic
-Data types
-
-Created 1/16/1996 Heikki Tuuri
-*******************************************************/
-
-#include <string.h> /* strlen() */
-
-#include "mach0data.h"
-#include "rem0types.h" /* ZIP_COLUMN_HEADER_LENGTH */
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-
-/*********************************************************************//**
-Gets the MySQL charset-collation code for MySQL string types.
-@return MySQL charset-collation code */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
- ulint prtype) /*!< in: precise data type */
-{
- return((prtype >> 16) & CHAR_COLL_MASK);
-}
-
-/*********************************************************************//**
-Determines if a MySQL string type is a subset of UTF-8. This function
-may return false negatives, in case further character-set collation
-codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
-UNIV_INLINE
-ibool
-dtype_is_utf8(
-/*==========*/
- ulint prtype) /*!< in: precise data type */
-{
- /* These codes have been copied from strings/ctype-extra.c
- and strings/ctype-utf8.c. */
- switch (dtype_get_charset_coll(prtype)) {
- case 11: /* ascii_general_ci */
- case 65: /* ascii_bin */
- case 33: /* utf8_general_ci */
- case 83: /* utf8_bin */
- case 254: /* utf8_general_cs */
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Gets the MySQL type code from a dtype.
-@return MySQL type code; this is NOT an InnoDB type code! */
-UNIV_INLINE
-ulint
-dtype_get_mysql_type(
-/*=================*/
- const dtype_t* type) /*!< in: type struct */
-{
- return(type->prtype & 0xFFUL);
-}
-
-/*********************************************************************//**
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_get_mblen(
-/*============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type (and collation) */
- ulint* mbminlen, /*!< out: minimum length of a
- multi-byte character */
- ulint* mbmaxlen) /*!< out: maximum length of a
- multi-byte character */
-{
- if (dtype_is_string_type(mtype)) {
- innobase_get_cset_width(dtype_get_charset_coll(prtype),
- mbminlen, mbmaxlen);
- ut_ad(*mbminlen <= *mbmaxlen);
- ut_ad(*mbminlen < DATA_MBMAX);
- ut_ad(*mbmaxlen < DATA_MBMAX);
- } else {
- *mbminlen = *mbmaxlen = 0;
- }
-}
-
-/*********************************************************************//**
-Sets the minimum and maximum length of a character, in bytes. */
-UNIV_INLINE
-void
-dtype_set_mbminmaxlen(
-/*==================*/
- dtype_t* type, /*!< in/out: type */
- ulint mbminlen, /*!< in: minimum length of a char,
- in bytes, or 0 if this is not
- a character type */
- ulint mbmaxlen) /*!< in: maximum length of a char,
- in bytes, or 0 if this is not
- a character type */
-{
- ut_ad(mbminlen < DATA_MBMAX);
- ut_ad(mbmaxlen < DATA_MBMAX);
- ut_ad(mbminlen <= mbmaxlen);
-
- type->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen);
-}
-
-/*********************************************************************//**
-Compute the mbminlen and mbmaxlen members of a data type structure. */
-UNIV_INLINE
-void
-dtype_set_mblen(
-/*============*/
- dtype_t* type) /*!< in/out: type */
-{
- ulint mbminlen;
- ulint mbmaxlen;
-
- dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
- dtype_set_mbminmaxlen(type, mbminlen, mbmaxlen);
-
- ut_ad(dtype_validate(type));
-}
-#else /* !UNIV_HOTBACKUP */
-# define dtype_set_mblen(type) (void) 0
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Sets a data type structure. */
-UNIV_INLINE
-void
-dtype_set(
-/*======*/
- dtype_t* type, /*!< in: type struct to init */
- ulint mtype, /*!< in: main data type */
- ulint prtype, /*!< in: precise type */
- ulint len) /*!< in: precision of type */
-{
- ut_ad(type);
- ut_ad(mtype <= DATA_MTYPE_MAX);
-
- type->mtype = mtype;
- type->prtype = prtype;
- type->len = len;
-
- dtype_set_mblen(type);
-}
-
-/*********************************************************************//**
-Copies a data type structure. */
-UNIV_INLINE
-void
-dtype_copy(
-/*=======*/
- dtype_t* type1, /*!< in: type struct to copy to */
- const dtype_t* type2) /*!< in: type struct to copy from */
-{
- *type1 = *type2;
-
- ut_ad(dtype_validate(type1));
-}
-
-/*********************************************************************//**
-Gets the SQL main data type.
-@return SQL main data type */
-UNIV_INLINE
-ulint
-dtype_get_mtype(
-/*============*/
- const dtype_t* type) /*!< in: data type */
-{
- ut_ad(type);
-
- return(type->mtype);
-}
-
-/*********************************************************************//**
-Gets the precise data type.
-@return precise data type */
-UNIV_INLINE
-ulint
-dtype_get_prtype(
-/*=============*/
- const dtype_t* type) /*!< in: data type */
-{
- ut_ad(type);
-
- return(type->prtype);
-}
-
-/*********************************************************************//**
-Gets the type length.
-@return fixed length of the type, in bytes, or 0 if variable-length */
-UNIV_INLINE
-ulint
-dtype_get_len(
-/*==========*/
- const dtype_t* type) /*!< in: data type */
-{
- ut_ad(type);
-
- return(type->len);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Gets the minimum length of a character, in bytes.
-@return minimum length of a char, in bytes, or 0 if this is not a
-character type */
-UNIV_INLINE
-ulint
-dtype_get_mbminlen(
-/*===============*/
- const dtype_t* type) /*!< in: type */
-{
- ut_ad(type);
- return(DATA_MBMINLEN(type->mbminmaxlen));
-}
-/*********************************************************************//**
-Gets the maximum length of a character, in bytes.
-@return maximum length of a char, in bytes, or 0 if this is not a
-character type */
-UNIV_INLINE
-ulint
-dtype_get_mbmaxlen(
-/*===============*/
- const dtype_t* type) /*!< in: type */
-{
- ut_ad(type);
- return(DATA_MBMAXLEN(type->mbminmaxlen));
-}
-
-/*********************************************************************//**
-Gets the padding character code for a type.
-@return padding character code, or ULINT_UNDEFINED if no padding specified */
-UNIV_INLINE
-ulint
-dtype_get_pad_char(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype) /*!< in: precise type */
-{
- switch (mtype) {
- case DATA_FIXBINARY:
- case DATA_BINARY:
- if (dtype_get_charset_coll(prtype)
- == DATA_MYSQL_BINARY_CHARSET_COLL) {
- /* Starting from 5.0.18, do not pad
- VARBINARY or BINARY columns. */
- return(ULINT_UNDEFINED);
- }
- /* Fall through */
- case DATA_CHAR:
- case DATA_VARCHAR:
- case DATA_MYSQL:
- case DATA_VARMYSQL:
- /* Space is the padding character for all char and binary
- strings, and starting from 5.0.3, also for TEXT strings. */
-
- return(0x20);
- case DATA_BLOB:
- if (!(prtype & DATA_BINARY_TYPE)) {
- return(0x20);
- }
- /* Fall through */
- default:
- /* No padding specified */
- return(ULINT_UNDEFINED);
- }
-}
-
-/**********************************************************************//**
-Stores for a type the information which determines its alphabetical ordering
-and the storage size of an SQL NULL value. This is the >= 4.1.x storage
-format. */
-UNIV_INLINE
-void
-dtype_new_store_for_order_and_null_size(
-/*====================================*/
- byte* buf, /*!< in: buffer for
- DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
- bytes where we store the info */
- const dtype_t* type, /*!< in: type struct */
- ulint prefix_len)/*!< in: prefix length to
- replace type->len, or 0 */
-{
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
- ulint len;
-
- ut_ad(type);
- ut_ad(type->mtype >= DATA_VARCHAR);
- ut_ad(type->mtype <= DATA_MYSQL);
-
- buf[0] = (byte)(type->mtype & 0xFFUL);
-
- if (type->prtype & DATA_BINARY_TYPE) {
- buf[0] |= 128;
- }
-
- /* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) {
- buf[0] |= 64;
- }
- */
-
- buf[1] = (byte)(type->prtype & 0xFFUL);
-
- len = prefix_len ? prefix_len : type->len;
-
- mach_write_to_2(buf + 2, len & 0xFFFFUL);
-
- ut_ad(dtype_get_charset_coll(type->prtype) <= MAX_CHAR_COLL_NUM);
- mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
-
- if (type->prtype & DATA_NOT_NULL) {
- buf[4] |= 128;
- }
-}
-
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the < 4.1.x
-storage format. */
-UNIV_INLINE
-void
-dtype_read_for_order_and_null_size(
-/*===============================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf) /*!< in: buffer for stored type order info */
-{
-#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
-# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
- type->mtype = buf[0] & 63;
- type->prtype = buf[1];
-
- if (buf[0] & 128) {
- type->prtype |= DATA_BINARY_TYPE;
- }
-
- type->len = mach_read_from_2(buf + 2);
-
- type->prtype = dtype_form_prtype(type->prtype,
- data_mysql_default_charset_coll);
- dtype_set_mblen(type);
-}
-
-/**********************************************************************//**
-Reads to a type the stored information which determines its alphabetical
-ordering and the storage size of an SQL NULL value. This is the >= 4.1.x
-storage format. */
-UNIV_INLINE
-void
-dtype_new_read_for_order_and_null_size(
-/*===================================*/
- dtype_t* type, /*!< in: type struct */
- const byte* buf) /*!< in: buffer for stored type order info */
-{
- ulint charset_coll;
-
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
- type->mtype = buf[0] & 63;
- type->prtype = buf[1];
-
- if (buf[0] & 128) {
- type->prtype |= DATA_BINARY_TYPE;
- }
-
- if (buf[4] & 128) {
- type->prtype |= DATA_NOT_NULL;
- }
-
- type->len = mach_read_from_2(buf + 2);
-
- charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
-
- if (dtype_is_string_type(type->mtype)) {
- ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
-
- if (charset_coll == 0) {
- /* This insert buffer record was inserted with MySQL
- version < 4.1.2, and the charset-collation code was not
- explicitly stored to dtype->prtype at that time. It
- must be the default charset-collation of this MySQL
- installation. */
-
- charset_coll = data_mysql_default_charset_coll;
- }
-
- type->prtype = dtype_form_prtype(type->prtype, charset_coll);
- }
- dtype_set_mblen(type);
-}
-
-/*********************************************************************//**
-Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len
-@return the SQL type name */
-UNIV_INLINE
-char*
-dtype_sql_name(
-/*===========*/
- unsigned mtype, /*!< in: mtype */
- unsigned prtype, /*!< in: prtype */
- unsigned len, /*!< in: len */
- char* name, /*!< out: SQL name */
- unsigned name_sz)/*!< in: size of the name buffer */
-{
-
-#define APPEND_UNSIGNED() \
- do { \
- if (prtype & DATA_UNSIGNED) { \
- ut_snprintf(name + strlen(name), \
- name_sz - strlen(name), \
- " UNSIGNED"); \
- } \
- } while (0)
-
- ut_snprintf(name, name_sz, "UNKNOWN");
-
- switch (mtype) {
- case DATA_INT:
- switch (len) {
- case 1:
- ut_snprintf(name, name_sz, "TINYINT");
- break;
- case 2:
- ut_snprintf(name, name_sz, "SMALLINT");
- break;
- case 3:
- ut_snprintf(name, name_sz, "MEDIUMINT");
- break;
- case 4:
- ut_snprintf(name, name_sz, "INT");
- break;
- case 8:
- ut_snprintf(name, name_sz, "BIGINT");
- break;
- }
- APPEND_UNSIGNED();
- break;
- case DATA_FLOAT:
- ut_snprintf(name, name_sz, "FLOAT");
- APPEND_UNSIGNED();
- break;
- case DATA_DOUBLE:
- ut_snprintf(name, name_sz, "DOUBLE");
- APPEND_UNSIGNED();
- break;
- case DATA_FIXBINARY:
- ut_snprintf(name, name_sz, "BINARY(%u)", len);
- break;
- case DATA_CHAR:
- case DATA_MYSQL:
- ut_snprintf(name, name_sz, "CHAR(%u)", len);
- break;
- case DATA_VARCHAR:
- case DATA_VARMYSQL:
- ut_snprintf(name, name_sz, "VARCHAR(%u)", len);
- break;
- case DATA_BINARY:
- ut_snprintf(name, name_sz, "VARBINARY(%u)", len);
- break;
- case DATA_BLOB:
- switch (len) {
- case 9:
- ut_snprintf(name, name_sz, "TINYBLOB");
- break;
- case 10:
- ut_snprintf(name, name_sz, "BLOB");
- break;
- case 11:
- ut_snprintf(name, name_sz, "MEDIUMBLOB");
- break;
- case 12:
- ut_snprintf(name, name_sz, "LONGBLOB");
- break;
- }
- }
-
- if (prtype & DATA_NOT_NULL) {
- ut_snprintf(name + strlen(name),
- name_sz - strlen(name),
- " NOT NULL");
- }
-
- return(name);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Returns the size of a fixed size data type, 0 if not a fixed size type.
-@return fixed size, or 0 */
-UNIV_INLINE
-ulint
-dtype_get_fixed_size_low(
-/*=====================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint len, /*!< in: length */
- ulint mbminmaxlen, /*!< in: minimum and maximum length of
- a multibyte character, in bytes */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
- switch (mtype) {
- case DATA_SYS:
-#ifdef UNIV_DEBUG
- switch (prtype & DATA_MYSQL_TYPE_MASK) {
- case DATA_ROW_ID:
- ut_ad(len == DATA_ROW_ID_LEN);
- break;
- case DATA_TRX_ID:
- ut_ad(len == DATA_TRX_ID_LEN);
- break;
- case DATA_ROLL_PTR:
- ut_ad(len == DATA_ROLL_PTR_LEN);
- break;
- default:
- ut_ad(0);
- return(0);
- }
-#endif /* UNIV_DEBUG */
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- return(len);
- case DATA_MYSQL:
-#ifndef UNIV_HOTBACKUP
- if (prtype & DATA_BINARY_TYPE) {
- return(len);
- } else if (!comp) {
- return(len);
- } else {
-#ifdef UNIV_DEBUG
- ulint i_mbminlen, i_mbmaxlen;
-
- innobase_get_cset_width(
- dtype_get_charset_coll(prtype),
- &i_mbminlen, &i_mbmaxlen);
-
- ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
- == mbminmaxlen);
-#endif /* UNIV_DEBUG */
- if (DATA_MBMINLEN(mbminmaxlen)
- == DATA_MBMAXLEN(mbminmaxlen)) {
- return(len);
- }
- }
-#else /* !UNIV_HOTBACKUP */
- return(len);
-#endif /* !UNIV_HOTBACKUP */
- /* Treat as variable-length. */
- /* Fall through */
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- case DATA_BLOB:
- return(0);
- default:
- ut_error;
- }
-
- return(0);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Returns the minimum size of a data type.
-@return minimum size */
-UNIV_INLINE
-ulint
-dtype_get_min_size_low(
-/*===================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint len, /*!< in: length */
- ulint mbminmaxlen) /*!< in: minimum and maximum length of a
- multi-byte character */
-{
- switch (mtype) {
- case DATA_SYS:
-#ifdef UNIV_DEBUG
- switch (prtype & DATA_MYSQL_TYPE_MASK) {
- case DATA_ROW_ID:
- ut_ad(len == DATA_ROW_ID_LEN);
- break;
- case DATA_TRX_ID:
- ut_ad(len == DATA_TRX_ID_LEN);
- break;
- case DATA_ROLL_PTR:
- ut_ad(len == DATA_ROLL_PTR_LEN);
- break;
- default:
- ut_ad(0);
- return(0);
- }
-#endif /* UNIV_DEBUG */
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- return(len);
- case DATA_MYSQL:
- if (prtype & DATA_BINARY_TYPE) {
- return(len);
- } else {
- ulint mbminlen = DATA_MBMINLEN(mbminmaxlen);
- ulint mbmaxlen = DATA_MBMAXLEN(mbminmaxlen);
-
- if (mbminlen == mbmaxlen) {
- return(len);
- }
-
- /* this is a variable-length character set */
- ut_a(mbminlen > 0);
- ut_a(mbmaxlen > mbminlen);
- ut_a(len % mbmaxlen == 0);
- return(len * mbminlen / mbmaxlen);
- }
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- case DATA_BLOB:
- return(0);
- default:
- ut_error;
- }
-
- return(0);
-}
-
-/***********************************************************************//**
-Returns the maximum size of a data type. Note: types in system tables may be
-incomplete and return incorrect information.
-@return maximum size */
-UNIV_INLINE
-ulint
-dtype_get_max_size_low(
-/*===================*/
- ulint mtype, /*!< in: main type */
- ulint len) /*!< in: length */
-{
- switch (mtype) {
- case DATA_SYS:
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_INT:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- case DATA_MYSQL:
- case DATA_VARCHAR:
- case DATA_BINARY:
- case DATA_DECIMAL:
- case DATA_VARMYSQL:
- return(len);
- case DATA_BLOB:
- break;
- default:
- ut_error;
- }
-
- return(ULINT_MAX);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type.
-For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
-UNIV_INLINE
-ulint
-dtype_get_sql_null_size(
-/*====================*/
- const dtype_t* type, /*!< in: type */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
-#ifndef UNIV_HOTBACKUP
- return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- type->mbminmaxlen, comp));
-#else /* !UNIV_HOTBACKUP */
- return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- 0, 0));
-#endif /* !UNIV_HOTBACKUP */
-}
diff --git a/storage/xtradb/include/data0types.h b/storage/xtradb/include/data0types.h
deleted file mode 100644
index bd2bb577611..00000000000
--- a/storage/xtradb/include/data0types.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/data0types.h
-Some type definitions
-
-Created 9/21/2000 Heikki Tuuri
-*************************************************************************/
-
-#ifndef data0types_h
-#define data0types_h
-
-/* SQL data field struct */
-struct dfield_t;
-
-/* SQL data tuple struct */
-struct dtuple_t;
-
-#endif
-
diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h
deleted file mode 100644
index 8bd3beda110..00000000000
--- a/storage/xtradb/include/db0err.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/db0err.h
-Global error codes for the database
-
-Created 5/24/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef db0err_h
-#define db0err_h
-
-
-enum dberr_t {
- DB_SUCCESS_LOCKED_REC = 9, /*!< like DB_SUCCESS, but a new
- explicit record lock was created */
- DB_SUCCESS = 10,
-
- /* The following are error codes */
- DB_ERROR,
- DB_INTERRUPTED,
- DB_OUT_OF_MEMORY,
- DB_OUT_OF_FILE_SPACE,
- DB_LOCK_WAIT,
- DB_DEADLOCK,
- DB_ROLLBACK,
- DB_DUPLICATE_KEY,
- DB_QUE_THR_SUSPENDED,
- DB_MISSING_HISTORY, /*!< required history data has been
- deleted due to lack of space in
- rollback segment */
- DB_CLUSTER_NOT_FOUND = 30,
- DB_TABLE_NOT_FOUND,
- DB_MUST_GET_MORE_FILE_SPACE, /*!< the database has to be stopped
- and restarted with more file space */
- DB_TABLE_IS_BEING_USED,
- DB_TOO_BIG_RECORD, /*!< a record in an index would not fit
- on a compressed page, or it would
- become bigger than 1/2 free space in
- an uncompressed page frame */
- DB_LOCK_WAIT_TIMEOUT, /*!< lock wait lasted too long */
- DB_NO_REFERENCED_ROW, /*!< referenced key value not found
- for a foreign key in an insert or
- update of a row */
- DB_ROW_IS_REFERENCED, /*!< cannot delete or update a row
- because it contains a key value
- which is referenced */
- DB_CANNOT_ADD_CONSTRAINT, /*!< adding a foreign key constraint
- to a table failed */
- DB_CORRUPTION, /*!< data structure corruption noticed */
- DB_CANNOT_DROP_CONSTRAINT, /*!< dropping a foreign key constraint
- from a table failed */
- DB_NO_SAVEPOINT, /*!< no savepoint exists with the given
- name */
- DB_TABLESPACE_EXISTS, /*!< we cannot create a new single-table
- tablespace because a file of the same
- name already exists */
- DB_TABLESPACE_DELETED, /*!< tablespace was deleted or is
- being dropped right now */
- DB_TABLESPACE_NOT_FOUND, /*<! Attempt to delete a tablespace
- instance that was not found in the
- tablespace hash table */
- DB_LOCK_TABLE_FULL, /*!< lock structs have exhausted the
- buffer pool (for big transactions,
- InnoDB stores the lock structs in the
- buffer pool) */
- DB_FOREIGN_DUPLICATE_KEY, /*!< foreign key constraints
- activated by the operation would
- lead to a duplicate key in some
- table */
- DB_TOO_MANY_CONCURRENT_TRXS, /*!< when InnoDB runs out of the
- preconfigured undo slots, this can
- only happen when there are too many
- concurrent transactions */
- DB_UNSUPPORTED, /*!< when InnoDB sees any artefact or
- a feature that it can't recoginize or
- work with e.g., FT indexes created by
- a later version of the engine. */
-
- DB_INVALID_NULL, /*!< a NOT NULL column was found to
- be NULL during table rebuild */
-
- DB_STATS_DO_NOT_EXIST, /*!< an operation that requires the
- persistent storage, used for recording
- table and index statistics, was
- requested but this storage does not
- exist itself or the stats for a given
- table do not exist */
- DB_FOREIGN_EXCEED_MAX_CASCADE, /*!< Foreign key constraint related
- cascading delete/update exceeds
- maximum allowed depth */
- DB_CHILD_NO_INDEX, /*!< the child (foreign) table does
- not have an index that contains the
- foreign keys as its prefix columns */
- DB_PARENT_NO_INDEX, /*!< the parent table does not
- have an index that contains the
- foreign keys as its prefix columns */
- DB_TOO_BIG_INDEX_COL, /*!< index column size exceeds
- maximum limit */
- DB_INDEX_CORRUPT, /*!< we have corrupted index */
- DB_UNDO_RECORD_TOO_BIG, /*!< the undo log record is too big */
- DB_READ_ONLY, /*!< Update operation attempted in
- a read-only transaction */
- DB_FTS_INVALID_DOCID, /* FTS Doc ID cannot be zero */
- DB_TABLE_IN_FK_CHECK, /* table is being used in foreign
- key check */
- DB_ONLINE_LOG_TOO_BIG, /*!< Modification log grew too big
- during online index creation */
-
- DB_IO_ERROR, /*!< Generic IO error */
- DB_IDENTIFIER_TOO_LONG, /*!< Identifier name too long */
- DB_FTS_EXCEED_RESULT_CACHE_LIMIT, /*!< FTS query memory
- exceeds result cache limit */
- DB_TEMP_FILE_WRITE_FAILURE, /*!< Temp file write failure */
- DB_FTS_TOO_MANY_WORDS_IN_PHRASE,
- /*< Too many words in a phrase */
- DB_TOO_BIG_FOR_REDO, /* Record length greater than 10%
- of redo log */
- DB_DECRYPTION_FAILED, /* Tablespace encrypted and
- decrypt operation failed because
- of missing key management plugin,
- or missing or incorrect key or
- incorret AES method or algorithm. */
- DB_PAGE_CORRUPTED, /* Page read from tablespace is
- corrupted. */
- /* The following are partial failure codes */
- DB_FAIL = 1000,
- DB_OVERFLOW,
- DB_UNDERFLOW,
- DB_STRONG_FAIL,
- DB_ZIP_OVERFLOW,
- DB_RECORD_NOT_FOUND = 1500,
- DB_END_OF_INDEX,
- DB_DICT_CHANGED, /*!< Some part of table dictionary has
- changed. Such as index dropped or
- foreign key dropped */
-
- DB_SEARCH_ABORTED_BY_USER= 1533,
-
- /* The following are API only error codes. */
- DB_DATA_MISMATCH = 2000, /*!< Column update or read failed
- because the types mismatch */
-
- DB_SCHEMA_NOT_LOCKED, /*!< If an API function expects the
- schema to be locked in exclusive mode
- and if it's not then that API function
- will return this error code */
-
- DB_NOT_FOUND /*!< Generic error code for "Not found"
- type of errors */
-};
-
-#endif
diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h
deleted file mode 100644
index 4fd9b0b7f98..00000000000
--- a/storage/xtradb/include/dict0boot.h
+++ /dev/null
@@ -1,343 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0boot.h
-Data dictionary creation and booting
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0boot_h
-#define dict0boot_h
-
-#include "univ.i"
-
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "ut0byte.h"
-#include "buf0buf.h"
-#include "fsp0fsp.h"
-#include "dict0dict.h"
-
-typedef byte dict_hdr_t;
-
-/**********************************************************************//**
-Gets a pointer to the dictionary header and x-latches its page.
-@return pointer to the dictionary header, page x-latched */
-UNIV_INTERN
-dict_hdr_t*
-dict_hdr_get(
-/*=========*/
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Returns a new table, index, or space id. */
-UNIV_INTERN
-void
-dict_hdr_get_new_id(
-/*================*/
- table_id_t* table_id, /*!< out: table id
- (not assigned if NULL) */
- index_id_t* index_id, /*!< out: index id
- (not assigned if NULL) */
- ulint* space_id); /*!< out: space id
- (not assigned if NULL) */
-/**********************************************************************//**
-Writes the current value of the row id counter to the dictionary header file
-page. */
-UNIV_INTERN
-void
-dict_hdr_flush_row_id(void);
-/*=======================*/
-/**********************************************************************//**
-Returns a new row id.
-@return the new id */
-UNIV_INLINE
-row_id_t
-dict_sys_get_new_row_id(void);
-/*=========================*/
-/**********************************************************************//**
-Reads a row id from a record or other 6-byte stored form.
-@return row id */
-UNIV_INLINE
-row_id_t
-dict_sys_read_row_id(
-/*=================*/
- const byte* field); /*!< in: record field */
-/**********************************************************************//**
-Writes a row id to a record or other 6-byte stored form. */
-UNIV_INLINE
-void
-dict_sys_write_row_id(
-/*==================*/
- byte* field, /*!< in: record field */
- row_id_t row_id);/*!< in: row id */
-/*****************************************************************//**
-Initializes the data dictionary memory structures when the database is
-started. This function is also called when the data dictionary is created.
-@return DB_SUCCESS or error code. */
-UNIV_INTERN
-dberr_t
-dict_boot(void)
-/*===========*/
- MY_ATTRIBUTE((warn_unused_result));
-
-/*****************************************************************//**
-Creates and initializes the data dictionary at the server bootstrap.
-@return DB_SUCCESS or error code. */
-UNIV_INTERN
-dberr_t
-dict_create(void)
-/*=============*/
- MY_ATTRIBUTE((warn_unused_result));
-
-/*********************************************************************//**
-Check if a table id belongs to system table.
-@return true if the table id belongs to a system table. */
-UNIV_INLINE
-bool
-dict_is_sys_table(
-/*==============*/
- table_id_t id) /*!< in: table id to check */
- MY_ATTRIBUTE((warn_unused_result));
-
-/* Space id and page no where the dictionary header resides */
-#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
-#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
-
-/* The ids for the basic system tables and their indexes */
-#define DICT_TABLES_ID 1
-#define DICT_COLUMNS_ID 2
-#define DICT_INDEXES_ID 3
-#define DICT_FIELDS_ID 4
-/* The following is a secondary index on SYS_TABLES */
-#define DICT_TABLE_IDS_ID 5
-
-#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start
- from this number, except for basic
- system tables and their above defined
- indexes; ibuf tables and indexes are
- assigned as the id the number
- DICT_IBUF_ID_MIN plus the space id */
-
-/* The offset of the dictionary header on the page */
-#define DICT_HDR FSEG_PAGE_DATA
-
-/*-------------------------------------------------------------*/
-/* Dictionary header offsets */
-#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */
-#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */
-#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */
-#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/
-#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID*/
-#define DICT_HDR_TABLES 32 /* Root of SYS_TABLES clust index */
-#define DICT_HDR_TABLE_IDS 36 /* Root of SYS_TABLE_IDS sec index */
-#define DICT_HDR_COLUMNS 40 /* Root of SYS_COLUMNS clust index */
-#define DICT_HDR_INDEXES 44 /* Root of SYS_INDEXES clust index */
-#define DICT_HDR_FIELDS 48 /* Root of SYS_FIELDS clust index */
-
-#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace
- segment into which the dictionary
- header is created */
-/*-------------------------------------------------------------*/
-
-/* The columns in SYS_TABLES */
-enum dict_col_sys_tables_enum {
- DICT_COL__SYS_TABLES__NAME = 0,
- DICT_COL__SYS_TABLES__ID = 1,
- DICT_COL__SYS_TABLES__N_COLS = 2,
- DICT_COL__SYS_TABLES__TYPE = 3,
- DICT_COL__SYS_TABLES__MIX_ID = 4,
- DICT_COL__SYS_TABLES__MIX_LEN = 5,
- DICT_COL__SYS_TABLES__CLUSTER_ID = 6,
- DICT_COL__SYS_TABLES__SPACE = 7,
- DICT_NUM_COLS__SYS_TABLES = 8
-};
-/* The field numbers in the SYS_TABLES clustered index */
-enum dict_fld_sys_tables_enum {
- DICT_FLD__SYS_TABLES__NAME = 0,
- DICT_FLD__SYS_TABLES__DB_TRX_ID = 1,
- DICT_FLD__SYS_TABLES__DB_ROLL_PTR = 2,
- DICT_FLD__SYS_TABLES__ID = 3,
- DICT_FLD__SYS_TABLES__N_COLS = 4,
- DICT_FLD__SYS_TABLES__TYPE = 5,
- DICT_FLD__SYS_TABLES__MIX_ID = 6,
- DICT_FLD__SYS_TABLES__MIX_LEN = 7,
- DICT_FLD__SYS_TABLES__CLUSTER_ID = 8,
- DICT_FLD__SYS_TABLES__SPACE = 9,
- DICT_NUM_FIELDS__SYS_TABLES = 10
-};
-/* The field numbers in the SYS_TABLE_IDS index */
-enum dict_fld_sys_table_ids_enum {
- DICT_FLD__SYS_TABLE_IDS__ID = 0,
- DICT_FLD__SYS_TABLE_IDS__NAME = 1,
- DICT_NUM_FIELDS__SYS_TABLE_IDS = 2
-};
-/* The columns in SYS_COLUMNS */
-enum dict_col_sys_columns_enum {
- DICT_COL__SYS_COLUMNS__TABLE_ID = 0,
- DICT_COL__SYS_COLUMNS__POS = 1,
- DICT_COL__SYS_COLUMNS__NAME = 2,
- DICT_COL__SYS_COLUMNS__MTYPE = 3,
- DICT_COL__SYS_COLUMNS__PRTYPE = 4,
- DICT_COL__SYS_COLUMNS__LEN = 5,
- DICT_COL__SYS_COLUMNS__PREC = 6,
- DICT_NUM_COLS__SYS_COLUMNS = 7
-};
-/* The field numbers in the SYS_COLUMNS clustered index */
-enum dict_fld_sys_columns_enum {
- DICT_FLD__SYS_COLUMNS__TABLE_ID = 0,
- DICT_FLD__SYS_COLUMNS__POS = 1,
- DICT_FLD__SYS_COLUMNS__DB_TRX_ID = 2,
- DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR = 3,
- DICT_FLD__SYS_COLUMNS__NAME = 4,
- DICT_FLD__SYS_COLUMNS__MTYPE = 5,
- DICT_FLD__SYS_COLUMNS__PRTYPE = 6,
- DICT_FLD__SYS_COLUMNS__LEN = 7,
- DICT_FLD__SYS_COLUMNS__PREC = 8,
- DICT_NUM_FIELDS__SYS_COLUMNS = 9
-};
-/* The columns in SYS_INDEXES */
-enum dict_col_sys_indexes_enum {
- DICT_COL__SYS_INDEXES__TABLE_ID = 0,
- DICT_COL__SYS_INDEXES__ID = 1,
- DICT_COL__SYS_INDEXES__NAME = 2,
- DICT_COL__SYS_INDEXES__N_FIELDS = 3,
- DICT_COL__SYS_INDEXES__TYPE = 4,
- DICT_COL__SYS_INDEXES__SPACE = 5,
- DICT_COL__SYS_INDEXES__PAGE_NO = 6,
- DICT_NUM_COLS__SYS_INDEXES = 7
-};
-/* The field numbers in the SYS_INDEXES clustered index */
-enum dict_fld_sys_indexes_enum {
- DICT_FLD__SYS_INDEXES__TABLE_ID = 0,
- DICT_FLD__SYS_INDEXES__ID = 1,
- DICT_FLD__SYS_INDEXES__DB_TRX_ID = 2,
- DICT_FLD__SYS_INDEXES__DB_ROLL_PTR = 3,
- DICT_FLD__SYS_INDEXES__NAME = 4,
- DICT_FLD__SYS_INDEXES__N_FIELDS = 5,
- DICT_FLD__SYS_INDEXES__TYPE = 6,
- DICT_FLD__SYS_INDEXES__SPACE = 7,
- DICT_FLD__SYS_INDEXES__PAGE_NO = 8,
- DICT_NUM_FIELDS__SYS_INDEXES = 9
-};
-/* The columns in SYS_FIELDS */
-enum dict_col_sys_fields_enum {
- DICT_COL__SYS_FIELDS__INDEX_ID = 0,
- DICT_COL__SYS_FIELDS__POS = 1,
- DICT_COL__SYS_FIELDS__COL_NAME = 2,
- DICT_NUM_COLS__SYS_FIELDS = 3
-};
-/* The field numbers in the SYS_FIELDS clustered index */
-enum dict_fld_sys_fields_enum {
- DICT_FLD__SYS_FIELDS__INDEX_ID = 0,
- DICT_FLD__SYS_FIELDS__POS = 1,
- DICT_FLD__SYS_FIELDS__DB_TRX_ID = 2,
- DICT_FLD__SYS_FIELDS__DB_ROLL_PTR = 3,
- DICT_FLD__SYS_FIELDS__COL_NAME = 4,
- DICT_NUM_FIELDS__SYS_FIELDS = 5
-};
-/* The columns in SYS_FOREIGN */
-enum dict_col_sys_foreign_enum {
- DICT_COL__SYS_FOREIGN__ID = 0,
- DICT_COL__SYS_FOREIGN__FOR_NAME = 1,
- DICT_COL__SYS_FOREIGN__REF_NAME = 2,
- DICT_COL__SYS_FOREIGN__N_COLS = 3,
- DICT_NUM_COLS__SYS_FOREIGN = 4
-};
-/* The field numbers in the SYS_FOREIGN clustered index */
-enum dict_fld_sys_foreign_enum {
- DICT_FLD__SYS_FOREIGN__ID = 0,
- DICT_FLD__SYS_FOREIGN__DB_TRX_ID = 1,
- DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR = 2,
- DICT_FLD__SYS_FOREIGN__FOR_NAME = 3,
- DICT_FLD__SYS_FOREIGN__REF_NAME = 4,
- DICT_FLD__SYS_FOREIGN__N_COLS = 5,
- DICT_NUM_FIELDS__SYS_FOREIGN = 6
-};
-/* The field numbers in the SYS_FOREIGN_FOR_NAME secondary index */
-enum dict_fld_sys_foreign_for_name_enum {
- DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME = 0,
- DICT_FLD__SYS_FOREIGN_FOR_NAME__ID = 1,
- DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME = 2
-};
-/* The columns in SYS_FOREIGN_COLS */
-enum dict_col_sys_foreign_cols_enum {
- DICT_COL__SYS_FOREIGN_COLS__ID = 0,
- DICT_COL__SYS_FOREIGN_COLS__POS = 1,
- DICT_COL__SYS_FOREIGN_COLS__FOR_COL_NAME = 2,
- DICT_COL__SYS_FOREIGN_COLS__REF_COL_NAME = 3,
- DICT_NUM_COLS__SYS_FOREIGN_COLS = 4
-};
-/* The field numbers in the SYS_FOREIGN_COLS clustered index */
-enum dict_fld_sys_foreign_cols_enum {
- DICT_FLD__SYS_FOREIGN_COLS__ID = 0,
- DICT_FLD__SYS_FOREIGN_COLS__POS = 1,
- DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID = 2,
- DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR = 3,
- DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME = 4,
- DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME = 5,
- DICT_NUM_FIELDS__SYS_FOREIGN_COLS = 6
-};
-/* The columns in SYS_TABLESPACES */
-enum dict_col_sys_tablespaces_enum {
- DICT_COL__SYS_TABLESPACES__SPACE = 0,
- DICT_COL__SYS_TABLESPACES__NAME = 1,
- DICT_COL__SYS_TABLESPACES__FLAGS = 2,
- DICT_NUM_COLS__SYS_TABLESPACES = 3
-};
-/* The field numbers in the SYS_TABLESPACES clustered index */
-enum dict_fld_sys_tablespaces_enum {
- DICT_FLD__SYS_TABLESPACES__SPACE = 0,
- DICT_FLD__SYS_TABLESPACES__DB_TRX_ID = 1,
- DICT_FLD__SYS_TABLESPACES__DB_ROLL_PTR = 2,
- DICT_FLD__SYS_TABLESPACES__NAME = 3,
- DICT_FLD__SYS_TABLESPACES__FLAGS = 4,
- DICT_NUM_FIELDS__SYS_TABLESPACES = 5
-};
-/* The columns in SYS_DATAFILES */
-enum dict_col_sys_datafiles_enum {
- DICT_COL__SYS_DATAFILES__SPACE = 0,
- DICT_COL__SYS_DATAFILES__PATH = 1,
- DICT_NUM_COLS__SYS_DATAFILES = 2
-};
-/* The field numbers in the SYS_DATAFILES clustered index */
-enum dict_fld_sys_datafiles_enum {
- DICT_FLD__SYS_DATAFILES__SPACE = 0,
- DICT_FLD__SYS_DATAFILES__DB_TRX_ID = 1,
- DICT_FLD__SYS_DATAFILES__DB_ROLL_PTR = 2,
- DICT_FLD__SYS_DATAFILES__PATH = 3,
- DICT_NUM_FIELDS__SYS_DATAFILES = 4
-};
-
-/* A number of the columns above occur in multiple tables. These are the
-length of thos fields. */
-#define DICT_FLD_LEN_SPACE 4
-#define DICT_FLD_LEN_FLAGS 4
-
-/* When a row id which is zero modulo this number (which must be a power of
-two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
-updated */
-#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
-
-#ifndef UNIV_NONINL
-#include "dict0boot.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/dict0boot.ic b/storage/xtradb/include/dict0boot.ic
deleted file mode 100644
index 42e91ee930e..00000000000
--- a/storage/xtradb/include/dict0boot.ic
+++ /dev/null
@@ -1,95 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0boot.ic
-Data dictionary creation and booting
-
-Created 4/18/1996 Heikki Tuuri
-*******************************************************/
-
-/**********************************************************************//**
-Returns a new row id.
-@return the new id */
-UNIV_INLINE
-row_id_t
-dict_sys_get_new_row_id(void)
-/*=========================*/
-{
- row_id_t id;
-
- mutex_enter(&(dict_sys->mutex));
-
- id = dict_sys->row_id;
-
- if (0 == (id % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
-
- dict_hdr_flush_row_id();
- }
-
- dict_sys->row_id++;
-
- mutex_exit(&(dict_sys->mutex));
-
- return(id);
-}
-
-/**********************************************************************//**
-Reads a row id from a record or other 6-byte stored form.
-@return row id */
-UNIV_INLINE
-row_id_t
-dict_sys_read_row_id(
-/*=================*/
- const byte* field) /*!< in: record field */
-{
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
- return(mach_read_from_6(field));
-}
-
-/**********************************************************************//**
-Writes a row id to a record or other 6-byte stored form. */
-UNIV_INLINE
-void
-dict_sys_write_row_id(
-/*==================*/
- byte* field, /*!< in: record field */
- row_id_t row_id) /*!< in: row id */
-{
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
- mach_write_to_6(field, row_id);
-}
-
-/*********************************************************************//**
-Check if a table id belongs to system table.
-@return true if the table id belongs to a system table. */
-UNIV_INLINE
-bool
-dict_is_sys_table(
-/*==============*/
- table_id_t id) /*!< in: table id to check */
-{
- return(id < DICT_HDR_FIRST_ID);
-}
diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h
deleted file mode 100644
index 082048b8bbd..00000000000
--- a/storage/xtradb/include/dict0crea.h
+++ /dev/null
@@ -1,266 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0crea.h
-Database object creation
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0crea_h
-#define dict0crea_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "dict0dict.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-#include "fil0crypt.h"
-
-/*********************************************************************//**
-Creates a table create graph.
-@return own: table create node */
-UNIV_INTERN
-tab_node_t*
-tab_create_graph_create(
-/*====================*/
- dict_table_t* table, /*!< in: table to create, built as a memory data
- structure */
- mem_heap_t* heap, /*!< in: heap where created */
- bool commit, /*!< in: true if the commit node should be
- added to the query graph */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id);/*!< in: encryption key_id */
-/*********************************************************************//**
-Creates an index create graph.
-@return own: index create node */
-UNIV_INTERN
-ind_node_t*
-ind_create_graph_create(
-/*====================*/
- dict_index_t* index, /*!< in: index to create, built as a memory data
- structure */
- mem_heap_t* heap, /*!< in: heap where created */
- bool commit);/*!< in: true if the commit node should be
- added to the query graph */
-/***********************************************************//**
-Creates a table. This is a high-level function used in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-dict_create_table_step(
-/*===================*/
- que_thr_t* thr); /*!< in: query thread */
-/***********************************************************//**
-Creates an index. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-dict_create_index_step(
-/*===================*/
- que_thr_t* thr); /*!< in: query thread */
-/*******************************************************************//**
-Truncates the index tree associated with a row in SYS_INDEXES table.
-@return new root page number, or FIL_NULL on failure */
-UNIV_INTERN
-ulint
-dict_truncate_index_tree(
-/*=====================*/
- dict_table_t* table, /*!< in: the table the index belongs to */
- ulint space, /*!< in: 0=truncate,
- nonzero=create the index tree in the
- given tablespace */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor pointing to
- record in the clustered index of
- SYS_INDEXES table. The cursor may be
- repositioned in this call. */
- mtr_t* mtr); /*!< in: mtr having the latch
- on the record page. The mtr may be
- committed and restarted in this call. */
-/*******************************************************************//**
-Drops the index tree associated with a row in SYS_INDEXES table. */
-UNIV_INTERN
-void
-dict_drop_index_tree(
-/*=================*/
- rec_t* rec, /*!< in/out: record in the clustered index
- of SYS_INDEXES table */
- mtr_t* mtr); /*!< in: mtr having the latch on the record page */
-/****************************************************************//**
-Creates the foreign key constraints system tables inside InnoDB
-at server bootstrap or server start if they are not found or are
-not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_create_or_check_foreign_constraint_tables(void);
-/*================================================*/
-
-/********************************************************************//**
-Construct foreign key constraint defintion from data dictionary information.
-*/
-UNIV_INTERN
-char*
-dict_foreign_def_get(
-/*=================*/
- dict_foreign_t* foreign,/*!< in: foreign */
- trx_t* trx); /*!< in: trx */
-
-/********************************************************************//**
-Generate a foreign key constraint name when it was not named by the user.
-A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
-where the numbers start from 1, and are given locally for this table, that is,
-the number is not global, as it used to be before MySQL 4.0.18. */
-UNIV_INLINE
-dberr_t
-dict_create_add_foreign_id(
-/*=======================*/
- ulint* id_nr, /*!< in/out: number to use in id generation;
- incremented if used */
- const char* name, /*!< in: table name */
- dict_foreign_t* foreign)/*!< in/out: foreign key */
- MY_ATTRIBUTE((nonnull));
-
-/** Adds the given set of foreign key objects to the dictionary tables
-in the database. This function does not modify the dictionary cache. The
-caller must ensure that all foreign key objects contain a valid constraint
-name in foreign->id.
-@param[in] local_fk_set set of foreign key objects, to be added to
-the dictionary tables
-@param[in] table table to which the foreign key objects in
-local_fk_set belong to
-@param[in,out] trx transaction
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_create_add_foreigns_to_dictionary(
-/*===================================*/
- const dict_foreign_set& local_fk_set,
- const dict_table_t* table,
- trx_t* trx)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/****************************************************************//**
-Creates the tablespaces and datafiles system tables inside InnoDB
-at server bootstrap or server start if they are not found or are
-not of the right form.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_create_or_check_sys_tablespace(void);
-/*=====================================*/
-
-/********************************************************************//**
-Add a single tablespace definition to the data dictionary tables in the
-database.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_create_add_tablespace_to_dictionary(
-/*=====================================*/
- ulint space, /*!< in: tablespace id */
- const char* name, /*!< in: tablespace name */
- ulint flags, /*!< in: tablespace flags */
- const char* path, /*!< in: tablespace path */
- trx_t* trx, /*!< in: transaction */
- bool commit); /*!< in: if true then commit the
- transaction */
-
-/********************************************************************//**
-Add a foreign key definition to the data dictionary tables.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_create_add_foreign_to_dictionary(
-/*==================================*/
- dict_table_t* table, /*!< in: table */
- const char* name, /*!< in: table name */
- const dict_foreign_t* foreign,/*!< in: foreign key */
- trx_t* trx) /*!< in/out: dictionary transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/* Table create node structure */
-struct tab_node_t{
- que_common_t common; /*!< node type: QUE_NODE_TABLE_CREATE */
- dict_table_t* table; /*!< table to create, built as a memory data
- structure with dict_mem_... functions */
- ins_node_t* tab_def; /* child node which does the insert of
- the table definition; the row to be inserted
- is built by the parent node */
- ins_node_t* col_def; /* child node which does the inserts of
- the column definitions; the row to be inserted
- is built by the parent node */
- commit_node_t* commit_node;
- /* child node which performs a commit after
- a successful table creation */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /*!< node execution state */
- ulint col_no; /*!< next column definition to insert */
- ulint key_id; /*!< encryption key_id */
- fil_encryption_t mode; /*!< encryption mode */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
-};
-
-/* Table create node states */
-#define TABLE_BUILD_TABLE_DEF 1
-#define TABLE_BUILD_COL_DEF 2
-#define TABLE_COMMIT_WORK 3
-#define TABLE_ADD_TO_CACHE 4
-#define TABLE_COMPLETED 5
-
-/* Index create node struct */
-
-struct ind_node_t{
- que_common_t common; /*!< node type: QUE_NODE_INDEX_CREATE */
- dict_index_t* index; /*!< index to create, built as a memory data
- structure with dict_mem_... functions */
- ins_node_t* ind_def; /* child node which does the insert of
- the index definition; the row to be inserted
- is built by the parent node */
- ins_node_t* field_def; /* child node which does the inserts of
- the field definitions; the row to be inserted
- is built by the parent node */
- commit_node_t* commit_node;
- /* child node which performs a commit after
- a successful index creation */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /*!< node execution state */
- ulint page_no;/* root page number of the index */
- dict_table_t* table; /*!< table which owns the index */
- dtuple_t* ind_row;/* index definition row built */
- ulint field_no;/* next field definition to insert */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage */
-};
-
-/* Index create node states */
-#define INDEX_BUILD_INDEX_DEF 1
-#define INDEX_BUILD_FIELD_DEF 2
-#define INDEX_CREATE_INDEX_TREE 3
-#define INDEX_COMMIT_WORK 4
-#define INDEX_ADD_TO_CACHE 5
-
-#ifndef UNIV_NONINL
-#include "dict0crea.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/dict0crea.ic b/storage/xtradb/include/dict0crea.ic
deleted file mode 100644
index 1cbaa47032b..00000000000
--- a/storage/xtradb/include/dict0crea.ic
+++ /dev/null
@@ -1,98 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0crea.ic
-Database object creation
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#include "mem0mem.h"
-
-/*********************************************************************//**
-Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL.
-@return true if temporary table */
-UNIV_INTERN
-bool
-row_is_mysql_tmp_table_name(
-/*========================*/
- const char* name) MY_ATTRIBUTE((warn_unused_result));
- /*!< in: table name in the form
- 'database/tablename' */
-
-
-/********************************************************************//**
-Generate a foreign key constraint name when it was not named by the user.
-A generated constraint has a name of the format dbname/tablename_ibfk_NUMBER,
-where the numbers start from 1, and are given locally for this table, that is,
-the number is not global, as it used to be before MySQL 4.0.18. */
-UNIV_INLINE
-dberr_t
-dict_create_add_foreign_id(
-/*=======================*/
- ulint* id_nr, /*!< in/out: number to use in id generation;
- incremented if used */
- const char* name, /*!< in: table name */
- dict_foreign_t* foreign)/*!< in/out: foreign key */
-{
- if (foreign->id == NULL) {
- /* Generate a new constraint id */
- ulint namelen = strlen(name);
- char* id = static_cast<char*>(
- mem_heap_alloc(foreign->heap,
- namelen + 20));
-
- if (row_is_mysql_tmp_table_name(name)) {
-
- /* no overflow if number < 1e13 */
- sprintf(id, "%s_ibfk_%lu", name,
- (ulong) (*id_nr)++);
- } else {
- char table_name[MAX_TABLE_NAME_LEN + 20] = "";
- uint errors = 0;
-
- strncpy(table_name, name,
- MAX_TABLE_NAME_LEN + 20);
-
- innobase_convert_to_system_charset(
- strchr(table_name, '/') + 1,
- strchr(name, '/') + 1,
- MAX_TABLE_NAME_LEN, &errors);
-
- if (errors) {
- strncpy(table_name, name,
- MAX_TABLE_NAME_LEN + 20);
- }
-
- /* no overflow if number < 1e13 */
- sprintf(id, "%s_ibfk_%lu", table_name,
- (ulong) (*id_nr)++);
-
- if (innobase_check_identifier_length(
- strchr(id,'/') + 1)) {
- return(DB_IDENTIFIER_TOO_LONG);
- }
- }
- foreign->id = id;
- }
-
- return(DB_SUCCESS);
-}
-
diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h
deleted file mode 100644
index 0290b884ece..00000000000
--- a/storage/xtradb/include/dict0dict.h
+++ /dev/null
@@ -1,1907 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0dict.h
-Data dictionary system
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0dict_h
-#define dict0dict_h
-
-#include "univ.i"
-#include "db0err.h"
-#include "dict0types.h"
-#include "dict0mem.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "hash0hash.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "trx0types.h"
-#include "row0types.h"
-#include "fsp0fsp.h"
-#include "dict0pagecompress.h"
-
-extern bool innodb_table_stats_not_found;
-extern bool innodb_index_stats_not_found;
-
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-dict_casedn_str(
-/*============*/
- char* a) /*!< in/out: string to put in lower case */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Get the database name length in a table name.
-@return database name length */
-UNIV_INTERN
-ulint
-dict_get_db_name_len(
-/*=================*/
- const char* name) /*!< in: table name in the form
- dbname '/' tablename */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Open a table from its database and table name, this is currently used by
-foreign constraint parser to get the referenced table.
-@return complete table name with database and table name, allocated from
-heap memory passed in */
-UNIV_INTERN
-char*
-dict_get_referenced_table(
-/*======================*/
- const char* name, /*!< in: foreign key table name */
- const char* database_name, /*!< in: table db name */
- ulint database_name_len,/*!< in: db name length */
- const char* table_name, /*!< in: table name */
- ulint table_name_len, /*!< in: table name length */
- dict_table_t** table, /*!< out: table object or NULL */
- mem_heap_t* heap); /*!< in: heap memory */
-/*********************************************************************//**
-Frees a foreign key struct. */
-
-void
-dict_foreign_free(
-/*==============*/
- dict_foreign_t* foreign); /*!< in, own: foreign key struct */
-/*********************************************************************//**
-Finds the highest [number] for foreign key constraints of the table. Looks
-only at the >= 4.0.18-format id's, which are of the form
-databasename/tablename_ibfk_[number].
-@return highest number, 0 if table has no new format foreign key constraints */
-UNIV_INTERN
-ulint
-dict_table_get_highest_foreign_id(
-/*==============================*/
- dict_table_t* table); /*!< in: table in the dictionary
- memory cache */
-/********************************************************************//**
-Return the end of table name where we have removed dbname and '/'.
-@return table name */
-UNIV_INTERN
-const char*
-dict_remove_db_name(
-/*================*/
- const char* name) /*!< in: table name in the form
- dbname '/' tablename */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/** Operation to perform when opening a table */
-enum dict_table_op_t {
- /** Expect the tablespace to exist. */
- DICT_TABLE_OP_NORMAL = 0,
- /** Drop any orphan indexes after an aborted online index creation */
- DICT_TABLE_OP_DROP_ORPHAN,
- /** Silently load the tablespace if it does not exist,
- and do not load the definitions of incomplete indexes. */
- DICT_TABLE_OP_LOAD_TABLESPACE,
- /** Open the table only if it's in table cache. */
- DICT_TABLE_OP_OPEN_ONLY_IF_CACHED
-};
-
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_id(
-/*==================*/
- table_id_t table_id, /*!< in: table id */
- ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- dict_table_op_t table_op) /*!< in: operation to perform */
- __attribute__((warn_unused_result));
-
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_index_id(
-/*==================*/
- table_id_t table_id, /*!< in: table id */
- bool dict_locked) /*!< in: TRUE=data dictionary locked */
- __attribute__((warn_unused_result));
-/********************************************************************//**
-Decrements the count of open handles to a table. */
-UNIV_INTERN
-void
-dict_table_close(
-/*=============*/
- dict_table_t* table, /*!< in/out: table */
- ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- ibool try_drop) /*!< in: TRUE=try to drop any orphan
- indexes after an aborted online
- index creation */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Inits the data dictionary module. */
-UNIV_INTERN
-void
-dict_init(void);
-/*===========*/
-/********************************************************************//**
-Gets the space id of every table of the data dictionary and makes a linear
-list and a hash table of them to the data dictionary cache. This function
-can be called at database startup if we did not need to do a crash recovery.
-In crash recovery we must scan the space id's from the .ibd files in MySQL
-database directories. */
-UNIV_INTERN
-void
-dict_load_space_id_list(void);
-/*=========================*/
-/*********************************************************************//**
-Gets the minimum number of bytes per character.
-@return minimum multi-byte char size, in bytes */
-UNIV_INLINE
-ulint
-dict_col_get_mbminlen(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Gets the maximum number of bytes per character.
-@return maximum multi-byte char size, in bytes */
-UNIV_INLINE
-ulint
-dict_col_get_mbmaxlen(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets the minimum and maximum number of bytes per character. */
-UNIV_INLINE
-void
-dict_col_set_mbminmaxlen(
-/*=====================*/
- dict_col_t* col, /*!< in/out: column */
- ulint mbminlen, /*!< in: minimum multi-byte
- character size, in bytes */
- ulint mbmaxlen) /*!< in: minimum multi-byte
- character size, in bytes */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Gets the column data type. */
-UNIV_INLINE
-void
-dict_col_copy_type(
-/*===============*/
- const dict_col_t* col, /*!< in: column */
- dtype_t* type) /*!< out: data type */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
-needs to be stored in the undo log.
-@return bytes of column prefix to be stored in the undo log */
-UNIV_INLINE
-ulint
-dict_max_field_len_store_undo(
-/*==========================*/
- dict_table_t* table, /*!< in: table */
- const dict_col_t* col) /*!< in: column which index prefix
- is based on */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Assert that a column and a data type match.
-@return TRUE */
-UNIV_INLINE
-ibool
-dict_col_type_assert_equal(
-/*=======================*/
- const dict_col_t* col, /*!< in: column */
- const dtype_t* type) /*!< in: data type */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Returns the minimum size of the column.
-@return minimum size */
-UNIV_INLINE
-ulint
-dict_col_get_min_size(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************************//**
-Returns the maximum size of the column.
-@return maximum size */
-UNIV_INLINE
-ulint
-dict_col_get_max_size(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************************//**
-Returns the size of a fixed size column, 0 if not a fixed size column.
-@return fixed size, or 0 */
-UNIV_INLINE
-ulint
-dict_col_get_fixed_size(
-/*====================*/
- const dict_col_t* col, /*!< in: column */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************************//**
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
-UNIV_INLINE
-ulint
-dict_col_get_sql_null_size(
-/*=======================*/
- const dict_col_t* col, /*!< in: column */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Gets the column number.
-@return col->ind, table column position (starting from 0) */
-UNIV_INLINE
-ulint
-dict_col_get_no(
-/*============*/
- const dict_col_t* col) /*!< in: column */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Gets the column position in the clustered index. */
-UNIV_INLINE
-ulint
-dict_col_get_clust_pos(
-/*===================*/
- const dict_col_t* col, /*!< in: table column */
- const dict_index_t* clust_index) /*!< in: clustered index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/****************************************************************//**
-If the given column name is reserved for InnoDB system columns, return
-TRUE.
-@return TRUE if name is reserved */
-UNIV_INTERN
-ibool
-dict_col_name_is_reserved(
-/*======================*/
- const char* name) /*!< in: column name */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Acquire the autoinc lock. */
-UNIV_INTERN
-void
-dict_table_autoinc_lock(
-/*====================*/
- dict_table_t* table) /*!< in/out: table */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Unconditionally set the autoinc counter. */
-UNIV_INTERN
-void
-dict_table_autoinc_initialize(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: next value to assign to a row */
- MY_ATTRIBUTE((nonnull));
-
-/** Store autoinc value when the table is evicted.
-@param[in] table table evicted */
-UNIV_INTERN
-void
-dict_table_autoinc_store(
- const dict_table_t* table);
-
-/** Restore autoinc value when the table is loaded.
-@param[in] table table loaded */
-UNIV_INTERN
-void
-dict_table_autoinc_restore(
- dict_table_t* table);
-
-/********************************************************************//**
-Reads the next autoinc value (== autoinc counter value), 0 if not yet
-initialized.
-@return value for a new row, or 0 */
-UNIV_INTERN
-ib_uint64_t
-dict_table_autoinc_read(
-/*====================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Updates the autoinc counter if the value supplied is greater than the
-current value. */
-UNIV_INTERN
-void
-dict_table_autoinc_update_if_greater(
-/*=================================*/
-
- dict_table_t* table, /*!< in/out: table */
- ib_uint64_t value) /*!< in: value which was assigned to a row */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Release the autoinc lock. */
-UNIV_INTERN
-void
-dict_table_autoinc_unlock(
-/*======================*/
- dict_table_t* table) /*!< in/out: table */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Adds system columns to a table object. */
-UNIV_INTERN
-void
-dict_table_add_system_columns(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table */
- mem_heap_t* heap) /*!< in: temporary heap */
- MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Adds a table object to the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- ibool can_be_evicted, /*!< in: TRUE if can be evicted*/
- mem_heap_t* heap) /*!< in: temporary heap */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Removes a table object from the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_remove_from_cache(
-/*=========================*/
- dict_table_t* table) /*!< in, own: table */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Removes a table object from the dictionary cache. */
-UNIV_INTERN
-void
-dict_table_remove_from_cache_low(
-/*=============================*/
- dict_table_t* table, /*!< in, own: table */
- ibool lru_evict); /*!< in: TRUE if table being evicted
- to make room in the table LRU list */
-/**********************************************************************//**
-Renames a table object.
-@return TRUE if success */
-UNIV_INTERN
-dberr_t
-dict_table_rename_in_cache(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- const char* new_name, /*!< in: new name */
- ibool rename_also_foreigns)
- /*!< in: in ALTER TABLE we want
- to preserve the original table name
- in constraints which reference it */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in, own: index */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Change the id of a table object in the dictionary cache. This is used in
-DISCARD TABLESPACE. */
-UNIV_INTERN
-void
-dict_table_change_id_in_cache(
-/*==========================*/
- dict_table_t* table, /*!< in/out: table object already in cache */
- table_id_t new_id) /*!< in: new id to set */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Removes a foreign constraint struct from the dictionary cache. */
-UNIV_INTERN
-void
-dict_foreign_remove_from_cache(
-/*===========================*/
- dict_foreign_t* foreign) /*!< in, own: foreign constraint */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Adds a foreign key constraint object to the dictionary cache. May free
-the object if there already is an object with the same identifier in.
-At least one of foreign table or referenced table must already be in
-the dictionary cache!
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_foreign_add_to_cache(
-/*======================*/
- dict_foreign_t* foreign,
- /*!< in, own: foreign key constraint */
- const char** col_names,
- /*!< in: column names, or NULL to use
- foreign->foreign_table->col_names */
- bool check_charsets,
- /*!< in: whether to check charset
- compatibility */
- dict_err_ignore_t ignore_err)
- /*!< in: error to be ignored */
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/*********************************************************************//**
-Checks if a table is referenced by foreign keys.
-@return TRUE if table is referenced by a foreign key */
-UNIV_INTERN
-ibool
-dict_table_is_referenced_by_foreign_key(
-/*====================================*/
- const dict_table_t* table) /*!< in: InnoDB table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Replace the index passed in with another equivalent index in the
-foreign key lists of the table.
-@return whether all replacements were found */
-UNIV_INTERN
-bool
-dict_foreign_replace_index(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- const char** col_names,
- /*!< in: column names, or NULL
- to use table->col_names */
- const dict_index_t* index) /*!< in: index to be replaced */
- MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-/**********************************************************************//**
-Determines whether a string starts with the specified keyword.
-@return TRUE if str starts with keyword */
-UNIV_INTERN
-ibool
-dict_str_starts_with_keyword(
-/*=========================*/
- THD* thd, /*!< in: MySQL thread handle */
- const char* str, /*!< in: string to scan for keyword */
- const char* keyword) /*!< in: keyword to look for */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_create_foreign_constraints(
-/*============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES
- table2(c, d), table2 can be written
- also with the database
- name before it: test.table2; the
- default database id the database of
- parameter name */
- size_t sql_length, /*!< in: length of sql_string */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement.
-@return DB_SUCCESS or DB_CANNOT_DROP_CONSTRAINT if syntax error or the
-constraint id does not match */
-UNIV_INTERN
-dberr_t
-dict_foreign_parse_drop_constraints(
-/*================================*/
- mem_heap_t* heap, /*!< in: heap from which we can
- allocate memory */
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table, /*!< in: table */
- ulint* n, /*!< out: number of constraints
- to drop */
- const char*** constraints_to_drop) /*!< out: id's of the
- constraints to drop */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Returns a table object and increments its open handle count.
-NOTE! This is a high-level function to be used mainly from outside the
-'dict' directory. Inside this directory dict_table_get_low
-is usually the appropriate function.
-@return table, NULL if does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_table_open_on_name(
-/*====================*/
- const char* table_name, /*!< in: table name */
- ibool dict_locked, /*!< in: TRUE=data dictionary locked */
- ibool try_drop, /*!< in: TRUE=try to drop any orphan
- indexes after an aborted online
- index creation */
- dict_err_ignore_t
- ignore_err) /*!< in: error to be ignored when
- loading the table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*********************************************************************//**
-Tries to find an index whose first fields are the columns in the array,
-in the same order and is not marked for deletion and is not the same
-as types_idx.
-@return matching index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_foreign_find_index(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- const char** col_names,
- /*!< in: column names, or NULL
- to use table->col_names */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- const dict_index_t* types_idx,
- /*!< in: NULL or an index
- whose types the column types
- must match */
- bool check_charsets,
- /*!< in: whether to check
- charsets. only has an effect
- if types_idx != NULL */
- ulint check_null,
- /*!< in: nonzero if none of
- the columns must be declared
- NOT NULL */
- ulint* error, /*!< out: error code */
- ulint* err_col_no,
- /*!< out: column number where
- error happened */
- dict_index_t** err_index)
- /*!< out: index where error
- happened */
- MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-UNIV_INTERN
-const char*
-dict_table_get_col_name(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_nr) /*!< in: column number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-UNIV_INTERN
-const char*
-dict_table_get_col_name_for_mysql(
-/*==============================*/
- const dict_table_t* table, /*!< in: table */
- const char* col_name)/*!< in: MySQL table column name */
- __attribute__((nonnull, warn_unused_result));
-/**********************************************************************//**
-Prints a table data. */
-UNIV_INTERN
-void
-dict_table_print(
-/*=============*/
- dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Outputs info on foreign keys of a table. */
-UNIV_INTERN
-std::string
-dict_print_info_on_foreign_keys(
-/*============================*/
- ibool create_table_format, /*!< in: if TRUE then print in
- a format suitable to be inserted into
- a CREATE TABLE, otherwise in the format
- of SHOW TABLE STATUS */
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Outputs info on a foreign key of a table in a format suitable for
-CREATE TABLE. */
-UNIV_INTERN
-std::string
-dict_print_info_on_foreign_key_in_create_format(
-/*============================================*/
- trx_t* trx, /*!< in: transaction */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- ibool add_newline); /*!< in: whether to add a newline */
-/********************************************************************//**
-Displays the names of the index and the table. */
-UNIV_INTERN
-void
-dict_index_name_print(
-/*==================*/
- FILE* file, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- const dict_index_t* index) /*!< in: index to print */
- MY_ATTRIBUTE((nonnull(1,3)));
-/*********************************************************************//**
-Tries to find an index whose first fields are the columns in the array,
-in the same order and is not marked for deletion and is not the same
-as types_idx.
-@return matching index, NULL if not found */
-UNIV_INTERN
-bool
-dict_foreign_qualify_index(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- const char** col_names,
- /*!< in: column names, or NULL
- to use table->col_names */
- const char** columns,/*!< in: array of column names */
- ulint n_cols, /*!< in: number of columns */
- const dict_index_t* index, /*!< in: index to check */
- const dict_index_t* types_idx,
- /*!< in: NULL or an index
- whose types the column types
- must match */
- bool check_charsets,
- /*!< in: whether to check
- charsets. only has an effect
- if types_idx != NULL */
- ulint check_null,
- /*!< in: nonzero if none of
- the columns must be declared
- NOT NULL */
- ulint* error, /*!< out: error code */
- ulint* err_col_no,
- /*!< out: column number where
- error happened */
- dict_index_t** err_index)
- /*!< out: index where error
- happened */
-
- MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the first index on the table (the clustered index).
-@return index, NULL if none exists */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_first_index(
-/*=======================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Gets the last index on the table.
-@return index, NULL if none exists */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_last_index(
-/*=======================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Gets the next index on the table.
-@return index, NULL if none left */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_next_index(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#else /* UNIV_DEBUG */
-# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes)
-# define dict_table_get_last_index(table) UT_LIST_GET_LAST((table)->indexes)
-# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index)
-#endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/* Skip corrupted index */
-#define dict_table_skip_corrupt_index(index) \
- while (index && dict_index_is_corrupted(index)) { \
- index = dict_table_get_next_index(index); \
- }
-
-/* Get the next non-corrupt index */
-#define dict_table_next_uncorrupted_index(index) \
-do { \
- index = dict_table_get_next_index(index); \
- dict_table_skip_corrupt_index(index); \
-} while (0)
-
-/********************************************************************//**
-Check whether the index is the clustered index.
-@return nonzero for clustered index, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_clust(
-/*================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Check whether the index is unique.
-@return nonzero for unique index, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_unique(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Check whether the index is the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_ibuf(
-/*===============*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Check whether the index is a secondary index or the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_sec_or_ibuf(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((warn_unused_result));
-
-/************************************************************************
-Gets the all the FTS indexes for the table. NOTE: must not be called for
-tables which do not have an FTS-index. */
-UNIV_INTERN
-ulint
-dict_table_get_all_fts_indexes(
-/*===========================*/
- /* out: number of indexes collected */
- dict_table_t* table, /* in: table */
- ib_vector_t* indexes)/* out: vector for collecting FTS indexes */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return number of user-defined (e.g., not ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_user_cols(
-/*=======================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return number of system (e.g., ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_sys_cols(
-/*======================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return number of columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_cols(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Gets the approximately estimated number of rows in the table.
-@return estimated number of rows */
-UNIV_INLINE
-ib_uint64_t
-dict_table_get_n_rows(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Increment the number of rows in the table by one.
-Notice that this operation is not protected by any latch, the number is
-approximate. */
-UNIV_INLINE
-void
-dict_table_n_rows_inc(
-/*==================*/
- dict_table_t* table) /*!< in/out: table */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Decrement the number of rows in the table by one.
-Notice that this operation is not protected by any latch, the number is
-approximate. */
-UNIV_INLINE
-void
-dict_table_n_rows_dec(
-/*==================*/
- dict_table_t* table) /*!< in/out: table */
- MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the nth column of a table.
-@return pointer to column object */
-UNIV_INLINE
-dict_col_t*
-dict_table_get_nth_col(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- ulint pos) /*!< in: position of column */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Gets the given system column of a table.
-@return pointer to column object */
-UNIV_INLINE
-dict_col_t*
-dict_table_get_sys_col(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- ulint sys) /*!< in: DATA_ROW_ID, ... */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#else /* UNIV_DEBUG */
-#define dict_table_get_nth_col(table, pos) \
-((table)->cols + (pos))
-#define dict_table_get_sys_col(table, sys) \
-((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS)
-#endif /* UNIV_DEBUG */
-/********************************************************************//**
-Gets the given system column number of a table.
-@return column number */
-UNIV_INLINE
-ulint
-dict_table_get_sys_col_no(
-/*======================*/
- const dict_table_t* table, /*!< in: table */
- ulint sys) /*!< in: DATA_ROW_ID, ... */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Returns the minimum data size of an index record.
-@return minimum data size in bytes */
-UNIV_INLINE
-ulint
-dict_index_get_min_size(
-/*====================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Check whether the table uses the compact page format.
-@return TRUE if table uses the compact page format */
-UNIV_INLINE
-ibool
-dict_table_is_comp(
-/*===============*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Determine the file format from a dict_table_t::flags.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_tf_get_format(
-/*===============*/
- ulint flags) /*!< in: dict_table_t::flags */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Set the various values in a dict_table_t::flags pointer. */
-UNIV_INLINE
-void
-dict_tf_set(
-/*========*/
- ulint* flags, /*!< in/out: table */
- rec_format_t format, /*!< in: file format */
- ulint zip_ssize, /*!< in: zip shift size */
- bool remote_path, /*!< in: table uses DATA DIRECTORY
- */
- bool page_compressed,/*!< in: table uses page compressed
- pages */
- ulint page_compression_level, /*!< in: table page compression
- level */
- ulint atomic_writes) /*!< in: table atomic
- writes option value*/
- __attribute__((nonnull));
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32 bit integer that is
-written into the tablespace header at the offset FSP_SPACE_FLAGS and is
-also stored in the fil_space_t::flags field. The following chart shows
-the translation of the low order bit. Other bits are the same.
-========================= Low order bit ==========================
- | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
-dict_table_t::flags | 0 | 1 | 1 | 1
-fil_space_t::flags | 0 | 0 | 1 | 1
-==================================================================
-@return tablespace flags (fil_space_t::flags) */
-UNIV_INLINE
-ulint
-dict_tf_to_fsp_flags(
-/*=================*/
- ulint flags) /*!< in: dict_table_t::flags */
- MY_ATTRIBUTE((const));
-/********************************************************************//**
-Extract the compressed page size from table flags.
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_tf_get_zip_size(
-/*=================*/
- ulint flags) /*!< in: flags */
- MY_ATTRIBUTE((const));
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Obtain exclusive locks on all index trees of the table. This is to prevent
-accessing index trees while InnoDB is updating internal metadata for
-operations such as truncate tables. */
-UNIV_INLINE
-void
-dict_table_x_lock_indexes(
-/*======================*/
- dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Release the exclusive locks on all index tree. */
-UNIV_INLINE
-void
-dict_table_x_unlock_indexes(
-/*========================*/
- dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Checks if a column is in the ordering columns of the clustered index of a
-table. Column prefixes are treated like whole columns.
-@return TRUE if the column, or its prefix, is in the clustered key */
-UNIV_INTERN
-ibool
-dict_table_col_in_clustered_key(
-/*============================*/
- const dict_table_t* table, /*!< in: table */
- ulint n) /*!< in: column number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Check if the table has an FTS index.
-@return TRUE if table has an FTS index */
-UNIV_INLINE
-ibool
-dict_table_has_fts_index(
-/*=====================*/
- dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Copies types of columns contained in table to tuple and sets all
-fields of the tuple to the SQL NULL value. This function should
-be called right after dtuple_create(). */
-UNIV_INTERN
-void
-dict_table_copy_types(
-/*==================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************
-Wait until all the background threads of the given table have exited, i.e.,
-bg_threads == 0. Note: bg_threads_mutex must be reserved when
-calling this. */
-UNIV_INTERN
-void
-dict_table_wait_for_bg_threads_to_exit(
-/*===================================*/
- dict_table_t* table, /* in: table */
- ulint delay) /* in: time in microseconds to wait between
- checks of bg_threads. */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Looks for an index with the given id. NOTE that we do not reserve
-the dictionary mutex: this function is for emergency purposes like
-printing info of a corrupt database page!
-@return index or NULL if not found from cache */
-UNIV_INTERN
-dict_index_t*
-dict_index_find_on_id_low(
-/*======================*/
- index_id_t id) /*!< in: index id */
- MY_ATTRIBUTE((warn_unused_result));
-/**********************************************************************//**
-Make room in the table cache by evicting an unused table. The unused table
-should not be part of FK relationship and currently not used in any user
-transaction. There is no guarantee that it will remove a table.
-@return number of tables evicted. */
-UNIV_INTERN
-ulint
-dict_make_room_in_cache(
-/*====================*/
- ulint max_tables, /*!< in: max tables allowed in cache */
- ulint pct_check); /*!< in: max percent to check */
-/**********************************************************************//**
-Adds an index to the dictionary cache.
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-UNIV_INTERN
-dberr_t
-dict_index_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table on which the index is */
- dict_index_t* index, /*!< in, own: index; NOTE! The index memory
- object is freed in this function! */
- ulint page_no,/*!< in: root page number of the index */
- ibool strict) /*!< in: TRUE=refuse to create the index
- if records could be too big to fit in
- an B-tree page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Removes an index from the dictionary cache. */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in, own: index */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_fields(
-/*====================*/
- const dict_index_t* index) /*!< in: an internal
- representation of index (in
- the dictionary cache) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index
-that uniquely determine the position of an index entry in the index, if
-we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique(
-/*====================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index
-which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique_in_tree(
-/*============================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Gets the number of user-defined ordering fields in the index. In the internal
-representation we add the row id to the ordering fields to make all indexes
-unique, but this function returns the number of fields the user defined
-in the index as ordering fields.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_ordering_defined_by_user(
-/*======================================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the nth field of an index.
-@return pointer to field object */
-UNIV_INLINE
-dict_field_t*
-dict_index_get_nth_field(
-/*=====================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#else /* UNIV_DEBUG */
-# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos))
-#endif /* UNIV_DEBUG */
-/********************************************************************//**
-Gets pointer to the nth column in an index.
-@return column */
-UNIV_INLINE
-const dict_col_t*
-dict_index_get_nth_col(
-/*===================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of the field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Gets the column number of the nth field in an index.
-@return column number */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_no(
-/*======================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of the field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Looks for column n in an index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint n, /*!< in: column number */
- ulint* prefix_col_pos) /*!< out: col num if prefix */
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Looks for column n in an index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INTERN
-ulint
-dict_index_get_nth_col_or_prefix_pos(
-/*=================================*/
- const dict_index_t* index, /*!< in: index */
- ulint n, /*!< in: column number */
- ibool inc_prefix, /*!< in: TRUE=consider
- column prefixes too */
- ulint* prefix_col_pos) /*!< out: col num if prefix */
-
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Returns TRUE if the index contains a column or a prefix of that column.
-@return TRUE if contains the column or its prefix */
-UNIV_INTERN
-ibool
-dict_index_contains_col_or_prefix(
-/*==============================*/
- const dict_index_t* index, /*!< in: index */
- ulint n) /*!< in: column number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Looks for a matching field in an index. The column has to be the same. The
-column in index must be complete, or must contain a prefix longer than the
-column in index2. That is, we must be able to construct the prefix in index2
-from the prefix in index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INTERN
-ulint
-dict_index_get_nth_field_pos(
-/*=========================*/
- const dict_index_t* index, /*!< in: index from which to search */
- const dict_index_t* index2, /*!< in: index */
- ulint n) /*!< in: field number in index2 */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Looks for column n position in the clustered index.
-@return position in internal representation of the clustered index */
-UNIV_INTERN
-ulint
-dict_table_get_nth_col_pos(
-/*=======================*/
- const dict_table_t* table, /*!< in: table */
- ulint n) /*!< in: column number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Returns the position of a system column in an index.
-@return position, ULINT_UNDEFINED if not contained */
-UNIV_INLINE
-ulint
-dict_index_get_sys_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint type) /*!< in: DATA_ROW_ID, ... */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Adds a column to index. */
-UNIV_INTERN
-void
-dict_index_add_col(
-/*===============*/
- dict_index_t* index, /*!< in/out: index */
- const dict_table_t* table, /*!< in: table */
- dict_col_t* col, /*!< in: column */
- ulint prefix_len) /*!< in: column prefix length */
- MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Copies types of fields contained in index to tuple. */
-UNIV_INTERN
-void
-dict_index_copy_types(
-/*==================*/
- dtuple_t* tuple, /*!< in/out: data tuple */
- const dict_index_t* index, /*!< in: index */
- ulint n_fields) /*!< in: number of
- field types to copy */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Gets the field column.
-@return field->col, pointer to the table column */
-UNIV_INLINE
-const dict_col_t*
-dict_field_get_col(
-/*===============*/
- const dict_field_t* field) /*!< in: index field */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Returns an index object if it is found in the dictionary cache.
-Assumes that dict_sys->mutex is already being held.
-@return index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_if_in_cache_low(
-/*===========================*/
- index_id_t index_id) /*!< in: index id */
- MY_ATTRIBUTE((warn_unused_result));
-#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/**********************************************************************//**
-Returns an index object if it is found in the dictionary cache.
-@return index, NULL if not found */
-UNIV_INTERN
-dict_index_t*
-dict_index_get_if_in_cache(
-/*=======================*/
- index_id_t index_id) /*!< in: index id */
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that a tuple has n_fields_cmp value in a sensible range, so that
-no comparison can occur with the page number field in a node pointer.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-dict_index_check_search_tuple(
-/*==========================*/
- const dict_index_t* index, /*!< in: index tree */
- const dtuple_t* tuple) /*!< in: tuple used in a search */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/** Whether and when to allow temporary index names */
-enum check_name {
- /** Require all indexes to be complete. */
- CHECK_ALL_COMPLETE,
- /** Allow aborted online index creation. */
- CHECK_ABORTED_OK,
- /** Allow partial indexes to exist. */
- CHECK_PARTIAL_OK
-};
-/**********************************************************************//**
-Check for duplicate index entries in a table [using the index name] */
-UNIV_INTERN
-void
-dict_table_check_for_dup_indexes(
-/*=============================*/
- const dict_table_t* table, /*!< in: Check for dup indexes
- in this table */
- enum check_name check) /*!< in: whether and when to allow
- temporary index names */
- MY_ATTRIBUTE((nonnull));
-#endif /* UNIV_DEBUG */
-/**********************************************************************//**
-Builds a node pointer out of a physical record and a page number.
-@return own: node pointer */
-UNIV_INTERN
-dtuple_t*
-dict_index_build_node_ptr(
-/*======================*/
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record for which to build node
- pointer */
- ulint page_no,/*!< in: page number to put in node
- pointer */
- mem_heap_t* heap, /*!< in: memory heap where pointer
- created */
- ulint level) /*!< in: level of rec in tree:
- 0 means leaf level */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Copies an initial segment of a physical record, long enough to specify an
-index entry uniquely.
-@return pointer to the prefix record */
-UNIV_INTERN
-rec_t*
-dict_index_copy_rec_order_prefix(
-/*=============================*/
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record for which to
- copy prefix */
- ulint* n_fields,/*!< out: number of fields copied */
- byte** buf, /*!< in/out: memory buffer for the
- copied prefix, or NULL */
- ulint* buf_size)/*!< in/out: buffer size */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Builds a typed data tuple out of a physical record.
-@return own: data tuple */
-UNIV_INTERN
-dtuple_t*
-dict_index_build_data_tuple(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- rec_t* rec, /*!< in: record for which to build data tuple */
- ulint n_fields,/*!< in: number of data fields */
- mem_heap_t* heap) /*!< in: memory heap where tuple created */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
- dict_index_t* index, /*!< in/out: index */
- ulint space) /*!< in: space id */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Gets the page number of the root of the index tree.
-@return page number */
-UNIV_INLINE
-ulint
-dict_index_get_page(
-/*================*/
- const dict_index_t* tree) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Gets the read-write lock of the index tree.
-@return read-write lock */
-UNIV_INLINE
-prio_rw_lock_t*
-dict_index_get_lock(
-/*================*/
- dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Returns free space reserved for future updates of records. This is
-relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index.
-@return number of free bytes on page, reserved for updates */
-UNIV_INLINE
-ulint
-dict_index_get_space_reserve(void);
-/*==============================*/
-
-/* Online index creation @{ */
-/********************************************************************//**
-Gets the status of online index creation.
-@return the status */
-UNIV_INLINE
-enum online_index_status
-dict_index_get_online_status(
-/*=========================*/
- const dict_index_t* index) /*!< in: secondary index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Sets the status of online index creation. */
-UNIV_INLINE
-void
-dict_index_set_online_status(
-/*=========================*/
- dict_index_t* index, /*!< in/out: index */
- enum online_index_status status) /*!< in: status */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Determines if a secondary index is being or has been created online,
-or if the table is being rebuilt online, allowing concurrent modifications
-to the table.
-@retval true if the index is being or has been built online, or
-if this is a clustered index and the table is being or has been rebuilt online
-@retval false if the index has been created or the table has been
-rebuilt completely */
-UNIV_INLINE
-bool
-dict_index_is_online_ddl(
-/*=====================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Calculates the minimum record length in an index. */
-UNIV_INTERN
-ulint
-dict_index_calc_min_rec_len(
-/*========================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Reserves the dictionary system mutex for MySQL. */
-UNIV_INTERN
-void
-dict_mutex_enter_for_mysql_func(const char * file, ulint line);
-/*============================*/
-
-#define dict_mutex_enter_for_mysql() \
- dict_mutex_enter_for_mysql_func(__FILE__, __LINE__)
-
-/********************************************************************//**
-Releases the dictionary system mutex for MySQL. */
-UNIV_INTERN
-void
-dict_mutex_exit_for_mysql(void);
-/*===========================*/
-
-/** Create a dict_table_t's stats latch or delay for lazy creation.
-This function is only called from either single threaded environment
-or from a thread that has not shared the table object with other threads.
-@param[in,out] table table whose stats latch to create
-@param[in] enabled if false then the latch is disabled
-and dict_table_stats_lock()/unlock() become noop on this table. */
-
-void
-dict_table_stats_latch_create(
- dict_table_t* table,
- bool enabled);
-
-/** Destroy a dict_table_t's stats latch.
-This function is only called from either single threaded environment
-or from a thread that has not shared the table object with other threads.
-@param[in,out] table table whose stats latch to destroy */
-
-void
-dict_table_stats_latch_destroy(
- dict_table_t* table);
-
-/**********************************************************************//**
-Lock the appropriate latch to protect a given table's statistics.
-table->id is used to pick the corresponding latch from a global array of
-latches. */
-UNIV_INTERN
-void
-dict_table_stats_lock(
-/*==================*/
- dict_table_t* table, /*!< in: table */
- ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */
-/**********************************************************************//**
-Unlock the latch that has been locked by dict_table_stats_lock() */
-UNIV_INTERN
-void
-dict_table_stats_unlock(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */
-/********************************************************************//**
-Checks if the database name in two table names is the same.
-@return TRUE if same db name */
-UNIV_INTERN
-ibool
-dict_tables_have_same_db(
-/*=====================*/
- const char* name1, /*!< in: table name in the form
- dbname '/' tablename */
- const char* name2) /*!< in: table name in the form
- dbname '/' tablename */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Removes an index from the cache */
-UNIV_INTERN
-void
-dict_index_remove_from_cache(
-/*=========================*/
- dict_table_t* table, /*!< in/out: table */
- dict_index_t* index) /*!< in, own: index */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Get index by name
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name(
-/*=========================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Looks for an index with the given id given a table instance.
-@return index or NULL */
-UNIV_INTERN
-dict_index_t*
-dict_table_find_index_on_id(
-/*========================*/
- const dict_table_t* table, /*!< in: table instance */
- index_id_t id) /*!< in: index id */
- __attribute__((nonnull, warn_unused_result));
-/**********************************************************************//**
-In case there is more than one index with the same name return the index
-with the min(id).
-@return index, NULL if does not exist */
-UNIV_INTERN
-dict_index_t*
-dict_table_get_index_on_name_and_min_id(
-/*====================================*/
- dict_table_t* table, /*!< in: table */
- const char* name) /*!< in: name of the index to find */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***************************************************************
-Check whether a column exists in an FTS index. */
-UNIV_INLINE
-ulint
-dict_table_is_fts_column(
-/*=====================*/
- /* out: ULINT_UNDEFINED if no match else
- the offset within the vector */
- ib_vector_t* indexes,/* in: vector containing only FTS indexes */
- ulint col_no) /* in: col number to search for */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Move a table to the non LRU end of the LRU list. */
-UNIV_INTERN
-void
-dict_table_move_from_lru_to_non_lru(
-/*================================*/
- dict_table_t* table) /*!< in: table to move from LRU to non-LRU */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Move a table to the LRU list from the non-LRU list. */
-UNIV_INTERN
-void
-dict_table_move_from_non_lru_to_lru(
-/*================================*/
- dict_table_t* table) /*!< in: table to move from non-LRU to LRU */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Move to the most recently used segment of the LRU list. */
-UNIV_INTERN
-void
-dict_move_to_mru(
-/*=============*/
- dict_table_t* table) /*!< in: table to move to MRU */
- MY_ATTRIBUTE((nonnull));
-
-/** Maximum number of columns in a foreign key constraint. Please Note MySQL
-has a much lower limit on the number of columns allowed in a foreign key
-constraint */
-#define MAX_NUM_FK_COLUMNS 500
-
-/* Buffers for storing detailed information about the latest foreign key
-and unique key errors */
-extern FILE* dict_foreign_err_file;
-extern ib_mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */
-
-/** the dictionary system */
-extern dict_sys_t* dict_sys;
-/** the data dictionary rw-latch protecting dict_sys */
-extern rw_lock_t dict_operation_lock;
-
-typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t;
-
-/* Dictionary system struct */
-struct dict_sys_t{
- ib_prio_mutex_t mutex; /*!< mutex protecting the data
- dictionary; protects also the
- disk-based dictionary system tables;
- this mutex serializes CREATE TABLE
- and DROP TABLE, as well as reading
- the dictionary data for a table from
- system tables */
- row_id_t row_id; /*!< the next row id to assign;
- NOTE that at a checkpoint this
- must be written to the dict system
- header and flushed to a file; in
- recovery this must be derived from
- the log records */
- hash_table_t* table_hash; /*!< hash table of the tables, based
- on name */
- hash_table_t* table_id_hash; /*!< hash table of the tables, based
- on id */
- ulint size; /*!< varying space in bytes occupied
- by the data dictionary table and
- index objects */
- dict_table_t* sys_tables; /*!< SYS_TABLES table */
- dict_table_t* sys_columns; /*!< SYS_COLUMNS table */
- dict_table_t* sys_indexes; /*!< SYS_INDEXES table */
- dict_table_t* sys_fields; /*!< SYS_FIELDS table */
-
- /*=============================*/
- UT_LIST_BASE_NODE_T(dict_table_t)
- table_LRU; /*!< List of tables that can be evicted
- from the cache */
- UT_LIST_BASE_NODE_T(dict_table_t)
- table_non_LRU; /*!< List of tables that can't be
- evicted from the cache */
- autoinc_map_t* autoinc_map; /*!< Map to store table id and autoinc
- when table is evicted */
-};
-#endif /* !UNIV_HOTBACKUP */
-
-/** dummy index for ROW_FORMAT=REDUNDANT supremum and infimum records */
-extern dict_index_t* dict_ind_redundant;
-/** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */
-extern dict_index_t* dict_ind_compact;
-
-/**********************************************************************//**
-Inits dict_ind_redundant and dict_ind_compact. */
-UNIV_INTERN
-void
-dict_ind_init(void);
-/*===============*/
-
-/* Auxiliary structs for checking a table definition @{ */
-
-/* This struct is used to specify the name and type that a column must
-have when checking a table's schema. */
-struct dict_col_meta_t {
- const char* name; /* column name */
- ulint mtype; /* required column main type */
- ulint prtype_mask; /* required column precise type mask;
- if this is non-zero then all the
- bits it has set must also be set
- in the column's prtype */
- ulint len; /* required column length */
-};
-
-/* This struct is used for checking whether a given table exists and
-whether it has a predefined schema (number of columns and columns names
-and types) */
-struct dict_table_schema_t {
- const char* table_name; /* the name of the table whose
- structure we are checking */
- ulint n_cols; /* the number of columns the
- table must have */
- dict_col_meta_t* columns; /* metadata for the columns;
- this array has n_cols
- elements */
- ulint n_foreign; /* number of foreign keys this
- table has, pointing to other
- tables (where this table is
- FK child) */
- ulint n_referenced; /* number of foreign keys other
- tables have, pointing to this
- table (where this table is
- parent) */
-};
-/* @} */
-
-/*********************************************************************//**
-Checks whether a table exists and whether it has the given structure.
-The table must have the same number of columns with the same names and
-types. The order of the columns does not matter.
-The caller must own the dictionary mutex.
-dict_table_schema_check() @{
-@return DB_SUCCESS if the table exists and contains the necessary columns */
-UNIV_INTERN
-dberr_t
-dict_table_schema_check(
-/*====================*/
- dict_table_schema_t* req_schema, /*!< in/out: required table
- schema */
- char* errstr, /*!< out: human readable error
- message if != DB_SUCCESS and
- != DB_TABLE_NOT_FOUND is
- returned */
- size_t errstr_sz) /*!< in: errstr size */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/* @} */
-
-/*********************************************************************//**
-Converts a database and table name from filesystem encoding
-(e.g. d@i1b/a@q1b@1Kc, same format as used in dict_table_t::name) in two
-strings in UTF8 encoding (e.g. dцb and aюbØc). The output buffers must be
-at least MAX_DB_UTF8_LEN and MAX_TABLE_UTF8_LEN bytes. */
-UNIV_INTERN
-void
-dict_fs2utf8(
-/*=========*/
- const char* db_and_table, /*!< in: database and table names,
- e.g. d@i1b/a@q1b@1Kc */
- char* db_utf8, /*!< out: database name, e.g. dцb */
- size_t db_utf8_size, /*!< in: dbname_utf8 size */
- char* table_utf8, /*!< out: table name, e.g. aюbØc */
- size_t table_utf8_size)/*!< in: table_utf8 size */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Closes the data dictionary module. */
-UNIV_INTERN
-void
-dict_close(void);
-/*============*/
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Check whether the index is corrupted.
-@return nonzero for corrupted index, zero for valid indexes */
-UNIV_INLINE
-ulint
-dict_index_is_corrupted(
-/*====================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((warn_unused_result));
-
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Flags an index and table corrupted both in the data dictionary cache
-and in the system table SYS_INDEXES. */
-UNIV_INTERN
-void
-dict_set_corrupted(
-/*===============*/
- dict_index_t* index, /*!< in/out: index */
- trx_t* trx, /*!< in/out: transaction */
- const char* ctx) /*!< in: context */
- UNIV_COLD;
-
-/**********************************************************************//**
-Flags an index corrupted in the data dictionary cache only. This
-is used mostly to mark a corrupted index when index's own dictionary
-is corrupted, and we force to load such index for repair purpose */
-UNIV_INTERN
-void
-dict_set_corrupted_index_cache_only(
-/*================================*/
- dict_index_t* index, /*!< in/out: index */
- dict_table_t* table); /*!< in/out: table */
-
-/**********************************************************************//**
-Flags a table with specified space_id corrupted in the table dictionary
-cache.
-@return TRUE if successful */
-UNIV_INTERN
-ibool
-dict_set_corrupted_by_space(
-/*========================*/
- ulint space_id); /*!< in: space ID */
-
-/**********************************************************************//**
-Flags a table with specified space_id encrypted in the data dictionary
-cache
-@param[in] space_id Tablespace id */
-UNIV_INTERN
-void
-dict_set_encrypted_by_space(
- ulint space_id);
-
-/********************************************************************//**
-Validate the table flags.
-@return true if valid. */
-UNIV_INLINE
-bool
-dict_tf_is_valid(
-/*=============*/
- ulint flags) /*!< in: table flags */
- MY_ATTRIBUTE((warn_unused_result));
-
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
-UNIV_INLINE
-bool
-dict_table_is_discarded(
-/*====================*/
- const dict_table_t* table) /*!< in: table to check */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-/********************************************************************//**
-Check if it is a temporary table.
-@return true if temporary table flag is set. */
-UNIV_INLINE
-bool
-dict_table_is_temporary(
-/*====================*/
- const dict_table_t* table) /*!< in: table to check */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-This function should be called whenever a page is successfully
-compressed. Updates the compression padding information. */
-UNIV_INTERN
-void
-dict_index_zip_success(
-/*===================*/
- dict_index_t* index) /*!< in/out: index to be updated. */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-This function should be called whenever a page compression attempt
-fails. Updates the compression padding information. */
-UNIV_INTERN
-void
-dict_index_zip_failure(
-/*===================*/
- dict_index_t* index) /*!< in/out: index to be updated. */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Return the optimal page size, for which page will likely compress.
-@return page size beyond which page may not compress*/
-UNIV_INTERN
-ulint
-dict_index_zip_pad_optimal_page_size(
-/*=================================*/
- dict_index_t* index) /*!< in: index for which page size
- is requested */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Convert table flag to row format string.
-@return row format name */
-UNIV_INTERN
-const char*
-dict_tf_to_row_format_string(
-/*=========================*/
- ulint table_flag); /*!< in: row format setting */
-/*****************************************************************//**
-Get index by first field of the index
-@return index which is having first field matches
-with the field present in field_index position of table */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_index_on_first_col(
-/*==============================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_index); /*!< in: position of column
- in table */
-
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "dict0dict.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
deleted file mode 100644
index f68d4e176da..00000000000
--- a/storage/xtradb/include/dict0dict.ic
+++ /dev/null
@@ -1,1588 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/dict0dict.ic
-Data dictionary system
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "data0type.h"
-#ifndef UNIV_HOTBACKUP
-#include "dict0load.h"
-#include "rem0types.h"
-#include "fsp0fsp.h"
-#include "srv0srv.h"
-#include "sync0rw.h" /* RW_S_LATCH */
-
-/*********************************************************************//**
-Gets the minimum number of bytes per character.
-@return minimum multi-byte char size, in bytes */
-UNIV_INLINE
-ulint
-dict_col_get_mbminlen(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
-{
- return(DATA_MBMINLEN(col->mbminmaxlen));
-}
-/*********************************************************************//**
-Gets the maximum number of bytes per character.
-@return maximum multi-byte char size, in bytes */
-UNIV_INLINE
-ulint
-dict_col_get_mbmaxlen(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
-{
- return(DATA_MBMAXLEN(col->mbminmaxlen));
-}
-/*********************************************************************//**
-Sets the minimum and maximum number of bytes per character. */
-UNIV_INLINE
-void
-dict_col_set_mbminmaxlen(
-/*=====================*/
- dict_col_t* col, /*!< in/out: column */
- ulint mbminlen, /*!< in: minimum multi-byte
- character size, in bytes */
- ulint mbmaxlen) /*!< in: minimum multi-byte
- character size, in bytes */
-{
- ut_ad(mbminlen < DATA_MBMAX);
- ut_ad(mbmaxlen < DATA_MBMAX);
- ut_ad(mbminlen <= mbmaxlen);
-
- col->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen);
-}
-/*********************************************************************//**
-Gets the column data type. */
-UNIV_INLINE
-void
-dict_col_copy_type(
-/*===============*/
- const dict_col_t* col, /*!< in: column */
- dtype_t* type) /*!< out: data type */
-{
- ut_ad(col != NULL);
- ut_ad(type != NULL);
-
- type->mtype = col->mtype;
- type->prtype = col->prtype;
- type->len = col->len;
- type->mbminmaxlen = col->mbminmaxlen;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Assert that a column and a data type match.
-@return TRUE */
-UNIV_INLINE
-ibool
-dict_col_type_assert_equal(
-/*=======================*/
- const dict_col_t* col, /*!< in: column */
- const dtype_t* type) /*!< in: data type */
-{
- ut_ad(col);
- ut_ad(type);
-
- ut_ad(col->mtype == type->mtype);
- ut_ad(col->prtype == type->prtype);
- //ut_ad(col->len == type->len);
-# ifndef UNIV_HOTBACKUP
- ut_ad(col->mbminmaxlen == type->mbminmaxlen);
-# endif /* !UNIV_HOTBACKUP */
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Returns the minimum size of the column.
-@return minimum size */
-UNIV_INLINE
-ulint
-dict_col_get_min_size(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
-{
- return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
- col->mbminmaxlen));
-}
-/***********************************************************************//**
-Returns the maximum size of the column.
-@return maximum size */
-UNIV_INLINE
-ulint
-dict_col_get_max_size(
-/*==================*/
- const dict_col_t* col) /*!< in: column */
-{
- return(dtype_get_max_size_low(col->mtype, col->len));
-}
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-Returns the size of a fixed size column, 0 if not a fixed size column.
-@return fixed size, or 0 */
-UNIV_INLINE
-ulint
-dict_col_get_fixed_size(
-/*====================*/
- const dict_col_t* col, /*!< in: column */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
- return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
- col->mbminmaxlen, comp));
-}
-/***********************************************************************//**
-Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
-For fixed length types it is the fixed length of the type, otherwise 0.
-@return SQL null storage size in ROW_FORMAT=REDUNDANT */
-UNIV_INLINE
-ulint
-dict_col_get_sql_null_size(
-/*=======================*/
- const dict_col_t* col, /*!< in: column */
- ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
-{
- return(dict_col_get_fixed_size(col, comp));
-}
-
-/*********************************************************************//**
-Gets the column number.
-@return col->ind, table column position (starting from 0) */
-UNIV_INLINE
-ulint
-dict_col_get_no(
-/*============*/
- const dict_col_t* col) /*!< in: column */
-{
- ut_ad(col);
-
- return(col->ind);
-}
-
-/*********************************************************************//**
-Gets the column position in the clustered index. */
-UNIV_INLINE
-ulint
-dict_col_get_clust_pos(
-/*===================*/
- const dict_col_t* col, /*!< in: table column */
- const dict_index_t* clust_index) /*!< in: clustered index */
-{
- ulint i;
-
- ut_ad(col);
- ut_ad(clust_index);
- ut_ad(dict_index_is_clust(clust_index));
-
- for (i = 0; i < clust_index->n_def; i++) {
- const dict_field_t* field = &clust_index->fields[i];
-
- if (!field->prefix_len && field->col == col) {
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the first index on the table (the clustered index).
-@return index, NULL if none exists */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_first_index(
-/*=======================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes));
-}
-
-/********************************************************************//**
-Gets the last index on the table.
-@return index, NULL if none exists */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_last_index(
-/*=======================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(UT_LIST_GET_LAST((const_cast<dict_table_t*>(table))
- ->indexes));
-}
-
-/********************************************************************//**
-Gets the next index on the table.
-@return index, NULL if none left */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_next_index(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index));
-}
-#endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Check whether the index is the clustered index.
-@return nonzero for clustered index, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_clust(
-/*================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->type & DICT_CLUSTERED);
-}
-/********************************************************************//**
-Check whether the index is unique.
-@return nonzero for unique index, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_unique(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->type & DICT_UNIQUE);
-}
-
-/********************************************************************//**
-Check whether the index is the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_ibuf(
-/*===============*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->type & DICT_IBUF);
-}
-
-/********************************************************************//**
-Check whether the index is an universal index tree.
-@return nonzero for universal tree, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_univ(
-/*===============*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->type & DICT_UNIVERSAL);
-}
-
-/********************************************************************//**
-Check whether the index is a secondary index or the insert buffer tree.
-@return nonzero for insert buffer, zero for other indexes */
-UNIV_INLINE
-ulint
-dict_index_is_sec_or_ibuf(
-/*======================*/
- const dict_index_t* index) /*!< in: index */
-{
- ulint type;
-
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- type = index->type;
-
- return(!(type & DICT_CLUSTERED) || (type & DICT_IBUF));
-}
-
-/********************************************************************//**
-Gets the number of user-defined columns in a table in the dictionary
-cache.
-@return number of user-defined (e.g., not ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_user_cols(
-/*=======================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols - DATA_N_SYS_COLS);
-}
-
-/********************************************************************//**
-Gets the number of system columns in a table in the dictionary cache.
-@return number of system (e.g., ROW_ID) columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_sys_cols(
-/*======================*/
- const dict_table_t* table MY_ATTRIBUTE((unused))) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- ut_ad(table->cached);
-
- return(DATA_N_SYS_COLS);
-}
-
-/********************************************************************//**
-Gets the number of all columns (also system) in a table in the dictionary
-cache.
-@return number of columns of a table */
-UNIV_INLINE
-ulint
-dict_table_get_n_cols(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols);
-}
-
-/********************************************************************//**
-Gets the approximately estimated number of rows in the table.
-@return estimated number of rows */
-UNIV_INLINE
-ib_uint64_t
-dict_table_get_n_rows(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table->stat_initialized);
-
- return(table->stat_n_rows);
-}
-
-/********************************************************************//**
-Increment the number of rows in the table by one.
-Notice that this operation is not protected by any latch, the number is
-approximate. */
-UNIV_INLINE
-void
-dict_table_n_rows_inc(
-/*==================*/
- dict_table_t* table) /*!< in/out: table */
-{
- if (table->stat_initialized) {
- ib_uint64_t n_rows = table->stat_n_rows;
- if (n_rows < 0xFFFFFFFFFFFFFFFFULL) {
- table->stat_n_rows = n_rows + 1;
- }
- }
-}
-
-/********************************************************************//**
-Decrement the number of rows in the table by one.
-Notice that this operation is not protected by any latch, the number is
-approximate. */
-UNIV_INLINE
-void
-dict_table_n_rows_dec(
-/*==================*/
- dict_table_t* table) /*!< in/out: table */
-{
- if (table->stat_initialized) {
- ib_uint64_t n_rows = table->stat_n_rows;
- if (n_rows > 0) {
- table->stat_n_rows = n_rows - 1;
- }
- }
-}
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the nth column of a table.
-@return pointer to column object */
-UNIV_INLINE
-dict_col_t*
-dict_table_get_nth_col(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- ulint pos) /*!< in: position of column */
-{
- ut_ad(table);
- ut_ad(pos < table->n_def);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return((dict_col_t*) (table->cols) + pos);
-}
-
-/********************************************************************//**
-Gets the given system column of a table.
-@return pointer to column object */
-UNIV_INLINE
-dict_col_t*
-dict_table_get_sys_col(
-/*===================*/
- const dict_table_t* table, /*!< in: table */
- ulint sys) /*!< in: DATA_ROW_ID, ... */
-{
- dict_col_t* col;
-
- ut_ad(table);
- ut_ad(sys < DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- col = dict_table_get_nth_col(table, table->n_cols
- - DATA_N_SYS_COLS + sys);
- ut_ad(col->mtype == DATA_SYS);
- ut_ad(col->prtype == (sys | DATA_NOT_NULL));
-
- return(col);
-}
-#endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Gets the given system column number of a table.
-@return column number */
-UNIV_INLINE
-ulint
-dict_table_get_sys_col_no(
-/*======================*/
- const dict_table_t* table, /*!< in: table */
- ulint sys) /*!< in: DATA_ROW_ID, ... */
-{
- ut_ad(table);
- ut_ad(sys < DATA_N_SYS_COLS);
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols - DATA_N_SYS_COLS + sys);
-}
-
-/********************************************************************//**
-Check whether the table uses the compact page format.
-@return TRUE if table uses the compact page format */
-UNIV_INLINE
-ibool
-dict_table_is_comp(
-/*===============*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
-
-#if DICT_TF_COMPACT != 1
-#error "DICT_TF_COMPACT must be 1"
-#endif
-
- return(table->flags & DICT_TF_COMPACT);
-}
-
-/************************************************************************
-Check if the table has an FTS index. */
-UNIV_INLINE
-ibool
-dict_table_has_fts_index(
-/*=====================*/
- /* out: TRUE if table has an FTS index */
- dict_table_t* table) /* in: table */
-{
- ut_ad(table);
-
- return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS));
-}
-
-/********************************************************************//**
-Validate the table flags.
-@return true if valid. */
-UNIV_INLINE
-bool
-dict_tf_is_valid(
-/*=============*/
- ulint flags) /*!< in: table flags */
-{
- ulint compact = DICT_TF_GET_COMPACT(flags);
- ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
- ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
- ulint unused = DICT_TF_GET_UNUSED(flags);
- ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
- ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
- ulint data_dir = DICT_TF_HAS_DATA_DIR(flags);
- ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
-
- /* Make sure there are no bits that we do not know about. */
- if (unused != 0) {
- fprintf(stderr,
- "InnoDB: Error: table unused flags are " ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF " atomic_blobs " ULINTPF
- "\nInnoDB: unused " ULINTPF " data_dir " ULINTPF
- " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- unused,
- compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
-
- return(false);
-
- } else if (atomic_blobs) {
- /* Barracuda row formats COMPRESSED and DYNAMIC build on
- the page structure introduced for the COMPACT row format
- by allowing keys in secondary indexes to be made from
- data stored off-page in the clustered index. */
-
- if (!compact) {
- fprintf(stderr,
- "InnoDB: Error: table compact flags are "
- ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF
- " atomic_blobs " ULINTPF "\n"
- "InnoDB: unused " ULINTPF
- " data_dir " ULINTPF " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- compact, compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
- return(false);
- }
-
- } else if (zip_ssize) {
-
- /* Antelope does not support COMPRESSED row format. */
- fprintf(stderr,
- "InnoDB: Error: table flags are " ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF " atomic_blobs " ULINTPF
- "\nInnoDB: unused " ULINTPF " data_dir " ULINTPF
- " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
- return(false);
- }
-
- if (zip_ssize) {
-
- /* COMPRESSED row format must have compact and atomic_blobs
- bits set and validate the number is within allowed range. */
-
- if (!compact
- || !atomic_blobs
- || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-
- fprintf(stderr,
- "InnoDB: Error: table compact flags are "
- ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF
- " atomic_blobs " ULINTPF "\n"
- "InnoDB: unused " ULINTPF
- " data_dir " ULINTPF " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- flags,
- compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
-
- );
- return(false);
- }
- }
-
- if (page_compression || page_compression_level) {
- /* Page compression format must have compact and
- atomic_blobs and page_compression_level requires
- page_compression */
- if (!compact
- || !page_compression
- || !atomic_blobs) {
-
- fprintf(stderr,
- "InnoDB: Error: table flags are " ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF
- " atomic_blobs " ULINTPF "\n"
- "InnoDB: unused " ULINTPF
- " data_dir " ULINTPF " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
- return(false);
- }
- }
-
- if (atomic_writes) {
-
- if(atomic_writes > ATOMIC_WRITES_OFF) {
-
- fprintf(stderr,
- "InnoDB: Error: table flags are " ULINTPF
- " in the data dictionary and are corrupted\n"
- "InnoDB: Error: data dictionary flags are\n"
- "InnoDB: compact " ULINTPF
- " atomic_blobs " ULINTPF "\n"
- "InnoDB: unused " ULINTPF
- " data_dir " ULINTPF " zip_ssize " ULINTPF
- "\nInnoDB: page_compression " ULINTPF
- " page_compression_level " ULINTPF
- "\nInnoDB: atomic_writes " ULINTPF "\n",
- flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
- page_compression, page_compression_level, atomic_writes
- );
- return(false);
- }
- }
-
- /* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
- so the DATA_DIR flag is compatible with all other table flags. */
-
- return(true);
-}
-
-/********************************************************************//**
-Validate a SYS_TABLES TYPE field and return it.
-@return Same as input after validating it as a SYS_TABLES TYPE field.
-If there is an error, return ULINT_UNDEFINED. */
-UNIV_INLINE
-ulint
-dict_sys_tables_type_validate(
-/*==========================*/
- ulint type, /*!< in: SYS_TABLES.TYPE */
- ulint n_cols) /*!< in: SYS_TABLES.N_COLS */
-{
- ulint low_order_bit = DICT_TF_GET_COMPACT(type);
- ulint redundant = !(n_cols & DICT_N_COLS_COMPACT);
- ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
- ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
- ulint unused = DICT_TF_GET_UNUSED(type);
- ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
- ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
- ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
-
- /* The low order bit of SYS_TABLES.TYPE is always set to 1.
- If the format is UNIV_FORMAT_B or higher, this field is the same
- as dict_table_t::flags. Zero is not allowed here. */
- if (!low_order_bit) {
- return(ULINT_UNDEFINED);
- }
-
- if (redundant) {
- if (zip_ssize || atomic_blobs) {
- return(ULINT_UNDEFINED);
- }
- }
-
- /* Make sure there are no bits that we do not know about. */
- if (unused) {
- return(ULINT_UNDEFINED);
- }
-
- if (atomic_blobs) {
- /* Barracuda row formats COMPRESSED and DYNAMIC build on
- the page structure introduced for the COMPACT row format
- by allowing keys in secondary indexes to be made from
- data stored off-page in the clustered index.
-
- The DICT_N_COLS_COMPACT flag should be in N_COLS,
- but we already know that. */
- } else if (zip_ssize) {
- /* Antelope does not support COMPRESSED format. */
- return(ULINT_UNDEFINED);
- }
-
- if (zip_ssize) {
- /* COMPRESSED row format must have low_order_bit and
- atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
- should be in N_COLS, but we already know about the
- low_order_bit and DICT_N_COLS_COMPACT flags. */
- if (!atomic_blobs) {
- return(ULINT_UNDEFINED);
- }
-
- /* Validate that the number is within allowed range. */
- if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
- return(ULINT_UNDEFINED);
- }
- }
-
- /* There is nothing to validate for the data_dir field.
- CREATE TABLE ... DATA DIRECTORY is supported for any row
- format, so the DATA_DIR flag is compatible with any other
- table flags. However, it is not used with TEMPORARY tables.*/
-
- if (page_compression || page_compression_level) {
- /* page compressed row format must have low_order_bit and
- atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
- should be in N_COLS, but we already know about the
- low_order_bit and DICT_N_COLS_COMPACT flags. */
-
- if (!atomic_blobs || !page_compression) {
- return(ULINT_UNDEFINED);
- }
- }
-
- /* Validate that the atomic writes number is within allowed range. */
- if (atomic_writes > ATOMIC_WRITES_OFF) {
- return(ULINT_UNDEFINED);
- }
-
- /* Return the validated SYS_TABLES.TYPE. */
- return(type);
-}
-
-/********************************************************************//**
-Determine the file format from dict_table_t::flags
-The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any
-other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set.
-@return file format version */
-UNIV_INLINE
-rec_format_t
-dict_tf_get_rec_format(
-/*===================*/
- ulint flags) /*!< in: dict_table_t::flags */
-{
- ut_a(dict_tf_is_valid(flags));
-
- if (!DICT_TF_GET_COMPACT(flags)) {
- return(REC_FORMAT_REDUNDANT);
- }
-
- if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
- return(REC_FORMAT_COMPACT);
- }
-
- if (DICT_TF_GET_ZIP_SSIZE(flags)) {
- return(REC_FORMAT_COMPRESSED);
- }
-
- return(REC_FORMAT_DYNAMIC);
-}
-
-/********************************************************************//**
-Determine the file format from a dict_table_t::flags.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_tf_get_format(
-/*===============*/
- ulint flags) /*!< in: dict_table_t::flags */
-{
- if (DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
- return(UNIV_FORMAT_B);
- }
-
- return(UNIV_FORMAT_A);
-}
-
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
-
- return(dict_tf_get_format(table->flags));
-}
-
-/********************************************************************//**
-Set the file format and zip size in a dict_table_t::flags. If zip size
-is not needed, it should be 0. */
-UNIV_INLINE
-void
-dict_tf_set(
-/*========*/
- ulint* flags, /*!< in/out: table flags */
- rec_format_t format, /*!< in: file format */
- ulint zip_ssize, /*!< in: zip shift size */
- bool use_data_dir, /*!< in: table uses DATA DIRECTORY
- */
- bool page_compressed,/*!< in: table uses page compressed
- pages */
- ulint page_compression_level, /*!< in: table page compression
- level */
- ulint atomic_writes) /*!< in: table atomic writes setup */
-{
- atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
-
- switch (format) {
- case REC_FORMAT_REDUNDANT:
- *flags = 0;
- ut_ad(zip_ssize == 0);
- break;
- case REC_FORMAT_COMPACT:
- *flags = DICT_TF_COMPACT;
- ut_ad(zip_ssize == 0);
- break;
- case REC_FORMAT_COMPRESSED:
- *flags = DICT_TF_COMPACT
- | (1 << DICT_TF_POS_ATOMIC_BLOBS)
- | (zip_ssize << DICT_TF_POS_ZIP_SSIZE);
- break;
- case REC_FORMAT_DYNAMIC:
- *flags = DICT_TF_COMPACT
- | (1 << DICT_TF_POS_ATOMIC_BLOBS);
- ut_ad(zip_ssize == 0);
- break;
- }
-
- if (use_data_dir) {
- *flags |= (1 << DICT_TF_POS_DATA_DIR);
- }
-
- if (page_compressed) {
- *flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
- | (1 << DICT_TF_POS_PAGE_COMPRESSION)
- | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
-
- ut_ad(zip_ssize == 0);
- ut_ad(dict_tf_get_page_compression(*flags) == TRUE);
- ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
- }
-
- *flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
- ut_a(dict_tf_get_atomic_writes(*flags) == awrites);
-}
-
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32 bit integer that is
-written into the tablespace header at the offset FSP_SPACE_FLAGS and is
-also stored in the fil_space_t::flags field. The following chart shows
-the translation of the low order bit. Other bits are the same.
-========================= Low order bit ==========================
- | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC
-dict_table_t::flags | 0 | 1 | 1 | 1
-fil_space_t::flags | 0 | 0 | 1 | 1
-==================================================================
-@return tablespace flags (fil_space_t::flags) */
-UNIV_INLINE
-ulint
-dict_tf_to_fsp_flags(
-/*=================*/
- ulint table_flags) /*!< in: dict_table_t::flags */
-{
- ulint fsp_flags;
- ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(
- table_flags);
- ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
-
- ut_ad((DICT_TF_GET_PAGE_COMPRESSION(table_flags) == 0)
- == (page_compression_level == 0));
-
- DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
- return(ULINT_UNDEFINED););
-
- /* Adjust bit zero. */
- fsp_flags = DICT_TF_HAS_ATOMIC_BLOBS(table_flags) ? 1 : 0;
-
- /* ZIP_SSIZE and ATOMIC_BLOBS are at the same position. */
- fsp_flags |= table_flags
- & (DICT_TF_MASK_ZIP_SSIZE | DICT_TF_MASK_ATOMIC_BLOBS);
-
- fsp_flags |= FSP_FLAGS_PAGE_SSIZE();
-
- if (page_compression_level) {
- fsp_flags |= FSP_FLAGS_MASK_PAGE_COMPRESSION;
- }
-
- ut_a(fsp_flags_is_valid(fsp_flags));
-
- if (DICT_TF_HAS_DATA_DIR(table_flags)) {
- fsp_flags |= 1U << FSP_FLAGS_MEM_DATA_DIR;
- }
-
- fsp_flags |= atomic_writes << FSP_FLAGS_MEM_ATOMIC_WRITES;
- fsp_flags |= page_compression_level << FSP_FLAGS_MEM_COMPRESSION_LEVEL;
-
- return(fsp_flags);
-}
-
-/********************************************************************//**
-Convert a 32 bit integer from SYS_TABLES.TYPE to dict_table_t::flags
-The following chart shows the translation of the low order bit.
-Other bits are the same.
-========================= Low order bit ==========================
- | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
-SYS_TABLES.TYPE | 1 | 1 | 1
-dict_table_t::flags | 0 | 1 | 1
-==================================================================
-@return ulint containing SYS_TABLES.TYPE */
-UNIV_INLINE
-ulint
-dict_sys_tables_type_to_tf(
-/*=======================*/
- ulint type, /*!< in: SYS_TABLES.TYPE field */
- ulint n_cols) /*!< in: SYS_TABLES.N_COLS field */
-{
- ulint flags;
- ulint redundant = !(n_cols & DICT_N_COLS_COMPACT);
-
- /* Adjust bit zero. */
- flags = redundant ? 0 : 1;
-
- /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
- PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
- flags |= type & (DICT_TF_MASK_ZIP_SSIZE
- | DICT_TF_MASK_ATOMIC_BLOBS
- | DICT_TF_MASK_DATA_DIR
- | DICT_TF_MASK_PAGE_COMPRESSION
- | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
- | DICT_TF_MASK_ATOMIC_WRITES
-
- );
-
- return(flags);
-}
-
-/********************************************************************//**
-Convert a 32 bit integer table flags to the 32bit integer that is written
-to a SYS_TABLES.TYPE field. The following chart shows the translation of
-the low order bit. Other bits are the same.
-========================= Low order bit ==========================
- | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
-dict_table_t::flags | 0 | 1 | 1
-SYS_TABLES.TYPE | 1 | 1 | 1
-==================================================================
-@return ulint containing SYS_TABLES.TYPE */
-UNIV_INLINE
-ulint
-dict_tf_to_sys_tables_type(
-/*=======================*/
- ulint flags) /*!< in: dict_table_t::flags */
-{
- ulint type;
-
- ut_a(dict_tf_is_valid(flags));
-
- /* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
- type = 1;
-
- /* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
- PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
- type |= flags & (DICT_TF_MASK_ZIP_SSIZE
- | DICT_TF_MASK_ATOMIC_BLOBS
- | DICT_TF_MASK_DATA_DIR
- | DICT_TF_MASK_PAGE_COMPRESSION
- | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
- | DICT_TF_MASK_ATOMIC_WRITES);
-
- return(type);
-}
-
-/********************************************************************//**
-Extract the compressed page size from dict_table_t::flags.
-These flags are in memory, so assert that they are valid.
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_tf_get_zip_size(
-/*=================*/
- ulint flags) /*!< in: flags */
-{
- ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
- ulint zip_size = (zip_ssize
- ? (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize
- : 0);
-
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
- return(zip_size);
-}
-
-/********************************************************************//**
-Check whether the table uses the compressed compact page format.
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_table_zip_size(
-/*================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
-
- return(dict_tf_get_zip_size(table->flags));
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Obtain exclusive locks on all index trees of the table. This is to prevent
-accessing index trees while InnoDB is updating internal metadata for
-operations such as truncate tables. */
-UNIV_INLINE
-void
-dict_table_x_lock_indexes(
-/*======================*/
- dict_table_t* table) /*!< in: table */
-{
- dict_index_t* index;
-
- ut_a(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* Loop through each index of the table and lock them */
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- rw_lock_x_lock(dict_index_get_lock(index));
- }
-}
-
-/*********************************************************************//**
-Release the exclusive locks on all index tree. */
-UNIV_INLINE
-void
-dict_table_x_unlock_indexes(
-/*========================*/
- dict_table_t* table) /*!< in: table */
-{
- dict_index_t* index;
-
- ut_a(table);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- rw_lock_x_unlock(dict_index_get_lock(index));
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index,
-including fields added by the dictionary system.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_fields(
-/*====================*/
- const dict_index_t* index) /*!< in: an internal
- representation of index (in
- the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->n_fields);
-}
-
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index
-that uniquely determine the position of an index entry in the index, if
-we do not take multiversioning into account: in the B-tree use the value
-returned by dict_index_get_n_unique_in_tree.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique(
-/*====================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(index->cached);
-
- return(index->n_uniq);
-}
-
-/********************************************************************//**
-Gets the number of fields in the internal representation of an index
-which uniquely determine the position of an index entry in the index, if
-we also take multiversioning into account.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_unique_in_tree(
-/*============================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(index->cached);
-
- if (dict_index_is_clust(index)) {
-
- return(dict_index_get_n_unique(index));
- }
-
- return(dict_index_get_n_fields(index));
-}
-
-/********************************************************************//**
-Gets the number of user-defined ordering fields in the index. In the internal
-representation of clustered indexes we add the row id to the ordering fields
-to make a clustered index unique, but this function returns the number of
-fields the user defined in the index as ordering fields.
-@return number of fields */
-UNIV_INLINE
-ulint
-dict_index_get_n_ordering_defined_by_user(
-/*======================================*/
- const dict_index_t* index) /*!< in: an internal representation
- of index (in the dictionary cache) */
-{
- return(index->n_user_defined_cols);
-}
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Gets the nth field of an index.
-@return pointer to field object */
-UNIV_INLINE
-dict_field_t*
-dict_index_get_nth_field(
-/*=====================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of field */
-{
- ut_ad(index);
- ut_ad(pos < index->n_def);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return((dict_field_t*) (index->fields) + pos);
-}
-#endif /* UNIV_DEBUG */
-
-/********************************************************************//**
-Returns the position of a system column in an index.
-@return position, ULINT_UNDEFINED if not contained */
-UNIV_INLINE
-ulint
-dict_index_get_sys_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint type) /*!< in: DATA_ROW_ID, ... */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- ut_ad(!dict_index_is_univ(index));
-
- if (dict_index_is_clust(index)) {
-
- return(dict_col_get_clust_pos(
- dict_table_get_sys_col(index->table, type),
- index));
- }
-
- return(dict_index_get_nth_col_pos(
- index, dict_table_get_sys_col_no(index->table, type),
- NULL));
-}
-
-/*********************************************************************//**
-Gets the field column.
-@return field->col, pointer to the table column */
-UNIV_INLINE
-const dict_col_t*
-dict_field_get_col(
-/*===============*/
- const dict_field_t* field) /*!< in: index field */
-{
- ut_ad(field);
-
- return(field->col);
-}
-
-/********************************************************************//**
-Gets pointer to the nth column in an index.
-@return column */
-UNIV_INLINE
-const dict_col_t*
-dict_index_get_nth_col(
-/*===================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of the field */
-{
- return(dict_field_get_col(dict_index_get_nth_field(index, pos)));
-}
-
-/********************************************************************//**
-Gets the column number the nth field in an index.
-@return column number */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_no(
-/*======================*/
- const dict_index_t* index, /*!< in: index */
- ulint pos) /*!< in: position of the field */
-{
- return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
-}
-
-/********************************************************************//**
-Looks for column n in an index.
-@return position in internal representation of the index;
-ULINT_UNDEFINED if not contained */
-UNIV_INLINE
-ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint n, /*!< in: column number */
- ulint* prefix_col_pos) /*!< out: col num if prefix */
-{
- return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE,
- prefix_col_pos));
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Returns the minimum data size of an index record.
-@return minimum data size in bytes */
-UNIV_INLINE
-ulint
-dict_index_get_min_size(
-/*====================*/
- const dict_index_t* index) /*!< in: index */
-{
- ulint n = dict_index_get_n_fields(index);
- ulint size = 0;
-
- while (n--) {
- size += dict_col_get_min_size(dict_index_get_nth_col(index,
- n));
- }
-
- return(size);
-}
-
-/*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->space);
-}
-
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
- dict_index_t* index, /*!< in/out: index */
- ulint space) /*!< in: space id */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->space = space;
-}
-
-/*********************************************************************//**
-Gets the page number of the root of the index tree.
-@return page number */
-UNIV_INLINE
-ulint
-dict_index_get_page(
-/*================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->page);
-}
-
-/*********************************************************************//**
-Gets the read-write lock of the index tree.
-@return read-write lock */
-UNIV_INLINE
-prio_rw_lock_t*
-dict_index_get_lock(
-/*================*/
- dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(&(index->lock));
-}
-
-/********************************************************************//**
-Returns free space reserved for future updates of records. This is
-relevant only in the case of many consecutive inserts, as updates
-which make the records bigger might fragment the index.
-@return number of free bytes on page, reserved for updates */
-UNIV_INLINE
-ulint
-dict_index_get_space_reserve(void)
-/*==============================*/
-{
- return(UNIV_PAGE_SIZE / 16);
-}
-
-/********************************************************************//**
-Gets the status of online index creation.
-@return the status */
-UNIV_INLINE
-enum online_index_status
-dict_index_get_online_status(
-/*=========================*/
- const dict_index_t* index) /*!< in: secondary index */
-{
- enum online_index_status status;
-
- status = (enum online_index_status) index->online_status;
-
- /* Without the index->lock protection, the online
- status can change from ONLINE_INDEX_CREATION to
- ONLINE_INDEX_COMPLETE (or ONLINE_INDEX_ABORTED) in
- row_log_apply() once log application is done. So to make
- sure the status is ONLINE_INDEX_CREATION or ONLINE_INDEX_COMPLETE
- you should always do the recheck after acquiring index->lock */
-
-#ifdef UNIV_DEBUG
- switch (status) {
- case ONLINE_INDEX_COMPLETE:
- case ONLINE_INDEX_CREATION:
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- return(status);
- }
- ut_error;
-#endif /* UNIV_DEBUG */
- return(status);
-}
-
-/********************************************************************//**
-Sets the status of online index creation. */
-UNIV_INLINE
-void
-dict_index_set_online_status(
-/*=========================*/
- dict_index_t* index, /*!< in/out: index */
- enum online_index_status status) /*!< in: status */
-{
- ut_ad(!(index->type & DICT_FTS));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-#ifdef UNIV_DEBUG
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_COMPLETE:
- case ONLINE_INDEX_CREATION:
- break;
- case ONLINE_INDEX_ABORTED:
- ut_ad(status == ONLINE_INDEX_ABORTED_DROPPED);
- break;
- case ONLINE_INDEX_ABORTED_DROPPED:
- ut_error;
- }
-#endif /* UNIV_DEBUG */
-
- index->online_status = status;
- ut_ad(dict_index_get_online_status(index) == status);
-}
-
-/********************************************************************//**
-Determines if a secondary index is being or has been created online,
-or if the table is being rebuilt online, allowing concurrent modifications
-to the table.
-@retval true if the index is being or has been built online, or
-if this is a clustered index and the table is being or has been rebuilt online
-@retval false if the index has been created or the table has been
-rebuilt completely */
-UNIV_INLINE
-bool
-dict_index_is_online_ddl(
-/*=====================*/
- const dict_index_t* index) /*!< in: index */
-{
-#ifdef UNIV_DEBUG
- if (dict_index_is_clust(index)) {
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_CREATION:
- return(true);
- case ONLINE_INDEX_COMPLETE:
- return(false);
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- break;
- }
- ut_ad(0);
- return(false);
- }
-#endif /* UNIV_DEBUG */
-
- return(UNIV_UNLIKELY(dict_index_get_online_status(index)
- != ONLINE_INDEX_COMPLETE));
-}
-
-/**********************************************************************//**
-Check whether a column exists in an FTS index.
-@return ULINT_UNDEFINED if no match else the offset within the vector */
-UNIV_INLINE
-ulint
-dict_table_is_fts_column(
-/*=====================*/
- ib_vector_t* indexes,/*!< in: vector containing only FTS indexes */
- ulint col_no) /*!< in: col number to search for */
-
-{
- ulint i;
-
- for (i = 0; i < ib_vector_size(indexes); ++i) {
- dict_index_t* index;
-
- index = (dict_index_t*) ib_vector_getp(indexes, i);
-
- if (dict_index_contains_col_or_prefix(index, col_no)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
-needs to be stored in the undo log.
-@return bytes of column prefix to be stored in the undo log */
-UNIV_INLINE
-ulint
-dict_max_field_len_store_undo(
-/*==========================*/
- dict_table_t* table, /*!< in: table */
- const dict_col_t* col) /*!< in: column which index prefix
- is based on */
-{
- ulint prefix_len = 0;
-
- if (dict_table_get_format(table) >= UNIV_FORMAT_B)
- {
- prefix_len = col->max_prefix
- ? col->max_prefix
- : DICT_MAX_FIELD_LEN_BY_FORMAT(table);
- }
-
- return(prefix_len);
-}
-
-/********************************************************************//**
-Check whether the index is corrupted.
-@return nonzero for corrupted index, zero for valid indexes */
-UNIV_INLINE
-ulint
-dict_index_is_corrupted(
-/*====================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return((index->type & DICT_CORRUPT)
- || (index->table && index->table->corrupted));
-}
-
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
-UNIV_INLINE
-bool
-dict_table_is_discarded(
-/*====================*/
- const dict_table_t* table) /*!< in: table to check */
-{
- return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
-}
-
-/********************************************************************//**
-Check if it is a temporary table.
-@return true if temporary table flag is set. */
-UNIV_INLINE
-bool
-dict_table_is_temporary(
-/*====================*/
- const dict_table_t* table) /*!< in: table to check */
-{
- return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY));
-}
-
-/**********************************************************************//**
-Get index by first field of the index
-@return index which is having first field matches
-with the field present in field_index position of table */
-UNIV_INLINE
-dict_index_t*
-dict_table_get_index_on_first_col(
-/*==============================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_index) /*!< in: position of column
- in table */
-{
- ut_ad(col_index < table->n_cols);
-
- dict_col_t* column = dict_table_get_nth_col(table, col_index);
-
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL; index = dict_table_get_next_index(index)) {
-
- if (index->fields[0].col == column) {
- return(index);
- }
- }
- ut_error;
- return(0);
-}
-
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h
deleted file mode 100644
index 1a720de5bb6..00000000000
--- a/storage/xtradb/include/dict0load.h
+++ /dev/null
@@ -1,430 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0load.h
-Loads to the memory cache database object definitions
-from dictionary tables
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0load_h
-#define dict0load_h
-
-#include "univ.i"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-#include "btr0types.h"
-
-/** enum that defines all system table IDs. @see SYSTEM_TABLE_NAME[] */
-enum dict_system_id_t {
- SYS_TABLES = 0,
- SYS_INDEXES,
- SYS_COLUMNS,
- SYS_FIELDS,
- SYS_FOREIGN,
- SYS_FOREIGN_COLS,
- SYS_TABLESPACES,
- SYS_DATAFILES,
-
- /* This must be last item. Defines the number of system tables. */
- SYS_NUM_SYSTEM_TABLES
-};
-
-/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */
-enum dict_table_info_t {
- DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
- structure with information from
- a SYS_TABLES record */
- DICT_TABLE_LOAD_FROM_CACHE = 1 /*!< Check first whether dict_table_t
- is in the cache, if so, return it */
-};
-
-/** Check type for dict_check_tablespaces_and_store_max_id() */
-enum dict_check_t {
- /** No user tablespaces have been opened
- (no crash recovery, no transactions recovered). */
- DICT_CHECK_NONE_LOADED = 0,
- /** Some user tablespaces may have been opened
- (no crash recovery; recovered table locks for transactions). */
- DICT_CHECK_SOME_LOADED,
- /** All user tablespaces have been opened (crash recovery). */
- DICT_CHECK_ALL_LOADED
-};
-
-/********************************************************************//**
-In a crash recovery we already have all the tablespace objects created.
-This function compares the space id information in the InnoDB data dictionary
-to what we already read with fil_load_single_table_tablespaces().
-
-In a normal startup, we create the tablespace objects for every table in
-InnoDB's data dictionary, if the corresponding .ibd file exists.
-We also scan the biggest space id, and store it to fil_system. */
-UNIV_INTERN
-void
-dict_check_tablespaces_and_store_max_id(
-/*====================================*/
- dict_check_t dict_check); /*!< in: how to check */
-/********************************************************************//**
-Finds the first table name in the given database.
-@return own: table name, NULL if does not exist; the caller must free
-the memory in the string! */
-UNIV_INTERN
-char*
-dict_get_first_table_name_in_db(
-/*============================*/
- const char* name); /*!< in: database name which ends to '/' */
-
-/********************************************************************//**
-Loads a table definition from a SYS_TABLES record to dict_table_t.
-Does not load any columns or indexes.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_table_low(
-/*================*/
- const char* name, /*!< in: table name */
- const rec_t* rec, /*!< in: SYS_TABLES record */
- dict_table_t** table); /*!< out,own: table, or NULL */
-/********************************************************************//**
-Loads a table column definition from a SYS_COLUMNS record to
-dict_table_t.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_column_low(
-/*=================*/
- dict_table_t* table, /*!< in/out: table, could be NULL
- if we just populate a dict_column_t
- struct with information from
- a SYS_COLUMNS record */
- mem_heap_t* heap, /*!< in/out: memory heap
- for temporary storage */
- dict_col_t* column, /*!< out: dict_column_t to fill,
- or NULL if table != NULL */
- table_id_t* table_id, /*!< out: table id */
- const char** col_name, /*!< out: column name */
- const rec_t* rec); /*!< in: SYS_COLUMNS record */
-/********************************************************************//**
-Loads an index definition from a SYS_INDEXES record to dict_index_t.
-If allocate=TRUE, we will create a dict_index_t structure and fill it
-accordingly. If allocated=FALSE, the dict_index_t will be supplied by
-the caller and filled with information read from the record. @return
-error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_index_low(
-/*================*/
- byte* table_id, /*!< in/out: table id (8 bytes),
- an "in" value if allocate=TRUE
- and "out" when allocate=FALSE */
- const char* table_name, /*!< in: table name */
- mem_heap_t* heap, /*!< in/out: temporary memory heap */
- const rec_t* rec, /*!< in: SYS_INDEXES record */
- ibool allocate, /*!< in: TRUE=allocate *index,
- FALSE=fill in a pre-allocated
- *index */
- dict_index_t** index); /*!< out,own: index, or NULL */
-/********************************************************************//**
-Loads an index field definition from a SYS_FIELDS record to
-dict_index_t.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_load_field_low(
-/*================*/
- byte* index_id, /*!< in/out: index id (8 bytes)
- an "in" value if index != NULL
- and "out" if index == NULL */
- dict_index_t* index, /*!< in/out: index, could be NULL
- if we just populate a dict_field_t
- struct with information from
- a SYS_FIELDS record */
- dict_field_t* sys_field, /*!< out: dict_field_t to be
- filled */
- ulint* pos, /*!< out: Field position */
- byte* last_index_id, /*!< in: last index id */
- mem_heap_t* heap, /*!< in/out: memory heap
- for temporary storage */
- const rec_t* rec); /*!< in: SYS_FIELDS record */
-/********************************************************************//**
-Using the table->heap, copy the null-terminated filepath into
-table->data_dir_path and put a null byte before the extension.
-This allows SHOW CREATE TABLE to return the correct DATA DIRECTORY path.
-Make this data directory path only if it has not yet been saved. */
-UNIV_INTERN
-void
-dict_save_data_dir_path(
-/*====================*/
- dict_table_t* table, /*!< in/out: table */
- char* filepath); /*!< in: filepath of tablespace */
-/*****************************************************************//**
-Make sure the data_file_name is saved in dict_table_t if needed. Try to
-read it from the file dictionary first, then from SYS_DATAFILES. */
-UNIV_INTERN
-void
-dict_get_and_save_data_dir_path(
-/*============================*/
- dict_table_t* table, /*!< in/out: table */
- bool dict_mutex_own); /*!< in: true if dict_sys->mutex
- is owned already */
-/********************************************************************//**
-Loads a table definition and also all its index definitions, and also
-the cluster definition if the table is a member in a cluster. Also loads
-all foreign key constraints where the foreign key is in the table or where
-a foreign key references columns in this table.
-@return table, NULL if does not exist; if the table is stored in an
-.ibd file, but the file does not exist, then we set the
-ibd_file_missing flag TRUE in the table object we return */
-UNIV_INTERN
-dict_table_t*
-dict_load_table(
-/*============*/
- const char* name, /*!< in: table name in the
- databasename/tablename format */
- ibool cached, /*!< in: TRUE=add to cache, FALSE=do not */
- dict_err_ignore_t ignore_err);
- /*!< in: error to be ignored when loading
- table and its indexes' definition */
-/***********************************************************************//**
-Loads a table object based on the table id.
-@return table; NULL if table does not exist */
-UNIV_INTERN
-dict_table_t*
-dict_load_table_on_id(
-/*==================*/
- table_id_t table_id, /*!< in: table id */
- dict_err_ignore_t ignore_err); /*!< in: errors to ignore
- when loading the table */
-/********************************************************************//**
-This function is called when the database is booted.
-Loads system table index definitions except for the clustered index which
-is added to the dictionary cache at booting before calling this function. */
-UNIV_INTERN
-void
-dict_load_sys_table(
-/*================*/
- dict_table_t* table); /*!< in: system table */
-/***********************************************************************//**
-Loads foreign key constraints where the table is either the foreign key
-holder or where the table is referenced by a foreign key. Adds these
-constraints to the data dictionary. Note that we know that the dictionary
-cache already contains all constraints where the other relevant table is
-already in the dictionary cache.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_load_foreigns(
-/*===============*/
- const char* table_name, /*!< in: table name */
- const char** col_names, /*!< in: column names, or NULL
- to use table->col_names */
- bool check_recursive,/*!< in: Whether to check
- recursive load of tables
- chained by FK */
- bool check_charsets, /*!< in: whether to check
- charset compatibility */
- dict_err_ignore_t ignore_err) /*!< in: error to be ignored */
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-/********************************************************************//**
-Prints to the standard output information on all tables found in the data
-dictionary system table. */
-UNIV_INTERN
-void
-dict_print(void);
-/*============*/
-
-/********************************************************************//**
-This function opens a system table, and return the first record.
-@return first record of the system table */
-UNIV_INTERN
-const rec_t*
-dict_startscan_system(
-/*==================*/
- btr_pcur_t* pcur, /*!< out: persistent cursor to
- the record */
- mtr_t* mtr, /*!< in: the mini-transaction */
- dict_system_id_t system_id); /*!< in: which system table to open */
-/********************************************************************//**
-This function get the next system table record as we scan the table.
-@return the record if found, NULL if end of scan. */
-UNIV_INTERN
-const rec_t*
-dict_getnext_system(
-/*================*/
- btr_pcur_t* pcur, /*!< in/out: persistent cursor
- to the record */
- mtr_t* mtr); /*!< in: the mini-transaction */
-/********************************************************************//**
-This function processes one SYS_TABLES record and populate the dict_table_t
-struct for the table. Extracted out of dict_print() to be used by
-both monitor table output and information schema innodb_sys_tables output.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_tables_rec_and_mtr_commit(
-/*=======================================*/
- mem_heap_t* heap, /*!< in: temporary memory heap */
- const rec_t* rec, /*!< in: SYS_TABLES record */
- dict_table_t** table, /*!< out: dict_table_t to fill */
- dict_table_info_t status, /*!< in: status bit controls
- options such as whether we shall
- look for dict_table_t from cache
- first */
- mtr_t* mtr); /*!< in/out: mini-transaction,
- will be committed */
-/********************************************************************//**
-This function parses a SYS_INDEXES record and populate a dict_index_t
-structure with the information from the record. For detail information
-about SYS_INDEXES fields, please refer to dict_boot() function.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_indexes_rec(
-/*=========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_INDEXES rec */
- dict_index_t* index, /*!< out: dict_index_t to be
- filled */
- table_id_t* table_id); /*!< out: table id */
-/********************************************************************//**
-This function parses a SYS_COLUMNS record and populate a dict_column_t
-structure with the information from the record.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_columns_rec(
-/*=========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_COLUMNS rec */
- dict_col_t* column, /*!< out: dict_col_t to be filled */
- table_id_t* table_id, /*!< out: table id */
- const char** col_name); /*!< out: column name */
-/********************************************************************//**
-This function parses a SYS_FIELDS record and populate a dict_field_t
-structure with the information from the record.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_fields_rec(
-/*========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_FIELDS rec */
- dict_field_t* sys_field, /*!< out: dict_field_t to be
- filled */
- ulint* pos, /*!< out: Field position */
- index_id_t* index_id, /*!< out: current index id */
- index_id_t last_id); /*!< in: previous index id */
-/********************************************************************//**
-This function parses a SYS_FOREIGN record and populate a dict_foreign_t
-structure with the information from the record. For detail information
-about SYS_FOREIGN fields, please refer to dict_load_foreign() function
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_foreign_rec(
-/*=========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_FOREIGN rec */
- dict_foreign_t* foreign); /*!< out: dict_foreign_t to be
- filled */
-/********************************************************************//**
-This function parses a SYS_FOREIGN_COLS record and extract necessary
-information from the record and return to caller.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_foreign_col_rec(
-/*=============================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_FOREIGN_COLS rec */
- const char** name, /*!< out: foreign key constraint name */
- const char** for_col_name, /*!< out: referencing column name */
- const char** ref_col_name, /*!< out: referenced column name
- in referenced table */
- ulint* pos); /*!< out: column position */
-/********************************************************************//**
-This function parses a SYS_TABLESPACES record, extracts necessary
-information from the record and returns to caller.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_tablespaces(
-/*=========================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_TABLESPACES rec */
- ulint* space, /*!< out: pace id */
- const char** name, /*!< out: tablespace name */
- ulint* flags); /*!< out: tablespace flags */
-/********************************************************************//**
-This function parses a SYS_DATAFILES record, extracts necessary
-information from the record and returns to caller.
-@return error message, or NULL on success */
-UNIV_INTERN
-const char*
-dict_process_sys_datafiles(
-/*=======================*/
- mem_heap_t* heap, /*!< in/out: heap memory */
- const rec_t* rec, /*!< in: current SYS_DATAFILES rec */
- ulint* space, /*!< out: pace id */
- const char** path); /*!< out: datafile path */
-
-/********************************************************************//**
-Get the filepath for a spaceid from SYS_DATAFILES. This function provides
-a temporary heap which is used for the table lookup, but not for the path.
-The caller must free the memory for the path returned. This function can
-return NULL if the space ID is not found in SYS_DATAFILES, then the caller
-will assume that the ibd file is in the normal datadir.
-@return own: A copy of the first datafile found in SYS_DATAFILES.PATH for
-the given space ID. NULL if space ID is zero or not found. */
-UNIV_INTERN
-char*
-dict_get_first_path(
-/*================*/
- ulint space, /*!< in: space id */
- const char* name); /*!< in: tablespace name */
-/********************************************************************//**
-Update the record for space_id in SYS_TABLESPACES to this filepath.
-@return DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
-dberr_t
-dict_update_filepath(
-/*=================*/
- ulint space_id, /*!< in: space id */
- const char* filepath); /*!< in: filepath */
-/********************************************************************//**
-Insert records into SYS_TABLESPACES and SYS_DATAFILES.
-@return DB_SUCCESS if OK, dberr_t if the insert failed */
-UNIV_INTERN
-dberr_t
-dict_insert_tablespace_and_filepath(
-/*================================*/
- ulint space, /*!< in: space id */
- const char* name, /*!< in: talespace name */
- const char* filepath, /*!< in: filepath */
- ulint fsp_flags); /*!< in: tablespace flags */
-
-#ifndef UNIV_NONINL
-#include "dict0load.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/dict0load.ic b/storage/xtradb/include/dict0load.ic
deleted file mode 100644
index 2c0f1ff38a5..00000000000
--- a/storage/xtradb/include/dict0load.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0load.ic
-Loads to the memory cache database object definitions
-from dictionary tables
-
-Created 4/24/1996 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h
deleted file mode 100644
index 2a4422fc18b..00000000000
--- a/storage/xtradb/include/dict0mem.h
+++ /dev/null
@@ -1,1522 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0mem.h
-Data dictionary memory object creation
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0mem_h
-#define dict0mem_h
-
-#include "univ.i"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "dict0types.h"
-#include "data0type.h"
-#include "mem0mem.h"
-#include "row0types.h"
-#include "rem0types.h"
-#include "btr0types.h"
-#ifndef UNIV_HOTBACKUP
-# include "lock0types.h"
-# include "que0types.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0rnd.h"
-#include "ut0byte.h"
-#include "hash0hash.h"
-#include "trx0types.h"
-#include "fts0fts.h"
-#include "os0once.h"
-#include "fil0fil.h"
-#include <my_crypt.h>
-#include "fil0crypt.h"
-#include <set>
-#include <algorithm>
-#include <iterator>
-#include <ostream>
-
-/* Forward declaration. */
-struct ib_rbt_t;
-
-/** Type flags of an index: OR'ing of the flags is allowed to define a
-combination of types */
-/* @{ */
-#define DICT_CLUSTERED 1 /*!< clustered index */
-#define DICT_UNIQUE 2 /*!< unique index */
-#define DICT_UNIVERSAL 4 /*!< index which can contain records from any
- other index */
-#define DICT_IBUF 8 /*!< insert buffer tree */
-#define DICT_CORRUPT 16 /*!< bit to store the corrupted flag
- in SYS_INDEXES.TYPE */
-#define DICT_FTS 32 /* FTS index; can't be combined with the
- other flags */
-
-#define DICT_IT_BITS 6 /*!< number of bits used for
- SYS_INDEXES.TYPE */
-/* @} */
-
-#if 0 /* not implemented, retained for history */
-/** Types for a table object */
-#define DICT_TABLE_ORDINARY 1 /*!< ordinary table */
-#define DICT_TABLE_CLUSTER_MEMBER 2
-#define DICT_TABLE_CLUSTER 3 /* this means that the table is
- really a cluster definition */
-#endif
-
-/* Table and tablespace flags are generally not used for the Antelope file
-format except for the low order bit, which is used differently depending on
-where the flags are stored.
-
-==================== Low order flags bit =========================
- | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC
-SYS_TABLES.TYPE | 1 | 1 | 1
-dict_table_t::flags | 0 | 1 | 1
-FSP_SPACE_FLAGS | 0 | 0 | 1
-fil_space_t::flags | 0 | 0 | 1
-
-Before the 5.1 plugin, SYS_TABLES.TYPE was always DICT_TABLE_ORDINARY (1)
-and the tablespace flags field was always 0. In the 5.1 plugin, these fields
-were repurposed to identify compressed and dynamic row formats.
-
-The following types and constants describe the flags found in dict_table_t
-and SYS_TABLES.TYPE. Similar flags found in fil_space_t and FSP_SPACE_FLAGS
-are described in fsp0fsp.h. */
-
-/* @{ */
-/** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */
-#define DICT_TF_REDUNDANT 0 /*!< Redundant row format. */
-/** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */
-#define DICT_TF_COMPACT 1 /*!< Compact row format. */
-
-/** This bitmask is used in SYS_TABLES.N_COLS to set and test whether
-the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
-#define DICT_N_COLS_COMPACT 0x80000000UL
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/** Width of the COMPACT flag */
-#define DICT_TF_WIDTH_COMPACT 1
-/** Width of the ZIP_SSIZE flag */
-#define DICT_TF_WIDTH_ZIP_SSIZE 4
-/** Width of the ATOMIC_BLOBS flag. The Antelope file formats broke up
-BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
-Brracuda row formats store the whole blob or text field off-page atomically.
-Secondary indexes are created from this external data using row_ext_t
-to cache the BLOB prefixes. */
-#define DICT_TF_WIDTH_ATOMIC_BLOBS 1
-/** If a table is created with the MYSQL option DATA DIRECTORY and
-innodb-file-per-table, an older engine will not be able to find that table.
-This flag prevents older engines from attempting to open the table and
-allows InnoDB to update_create_info() accordingly. */
-#define DICT_TF_WIDTH_DATA_DIR 1
-
-/**
-Width of the page compression flag
-*/
-#define DICT_TF_WIDTH_PAGE_COMPRESSION 1
-#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
-
-/**
-Width of the page encryption flag
-*/
-#define DICT_TF_WIDTH_PAGE_ENCRYPTION 1
-#define DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY 8
-
-/**
-Width of atomic writes flag
-DEFAULT=0, ON = 1, OFF = 2
-*/
-#define DICT_TF_WIDTH_ATOMIC_WRITES 2
-
-/** Width of all the currently known table flags */
-#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \
- + DICT_TF_WIDTH_ZIP_SSIZE \
- + DICT_TF_WIDTH_ATOMIC_BLOBS \
- + DICT_TF_WIDTH_DATA_DIR \
- + DICT_TF_WIDTH_PAGE_COMPRESSION \
- + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
- + DICT_TF_WIDTH_ATOMIC_WRITES \
- + DICT_TF_WIDTH_PAGE_ENCRYPTION \
- + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
-
-/** A mask of all the known/used bits in table flags */
-#define DICT_TF_BIT_MASK (~(~0U << DICT_TF_BITS))
-
-/** Zero relative shift position of the COMPACT field */
-#define DICT_TF_POS_COMPACT 0
-/** Zero relative shift position of the ZIP_SSIZE field */
-#define DICT_TF_POS_ZIP_SSIZE (DICT_TF_POS_COMPACT \
- + DICT_TF_WIDTH_COMPACT)
-/** Zero relative shift position of the ATOMIC_BLOBS field */
-#define DICT_TF_POS_ATOMIC_BLOBS (DICT_TF_POS_ZIP_SSIZE \
- + DICT_TF_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the DATA_DIR field */
-#define DICT_TF_POS_DATA_DIR (DICT_TF_POS_ATOMIC_BLOBS \
- + DICT_TF_WIDTH_ATOMIC_BLOBS)
-/** Zero relative shift position of the PAGE_COMPRESSION field */
-#define DICT_TF_POS_PAGE_COMPRESSION (DICT_TF_POS_DATA_DIR \
- + DICT_TF_WIDTH_DATA_DIR)
-/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
-#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \
- + DICT_TF_WIDTH_PAGE_COMPRESSION)
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
- + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
-
-/** Zero relative shift position of the PAGE_ENCRYPTION field */
-#define DICT_TF_POS_PAGE_ENCRYPTION (DICT_TF_POS_ATOMIC_WRITES \
- + DICT_TF_WIDTH_ATOMIC_WRITES)
-/** Zero relative shift position of the PAGE_ENCRYPTION_KEY field */
-#define DICT_TF_POS_PAGE_ENCRYPTION_KEY (DICT_TF_POS_PAGE_ENCRYPTION \
- + DICT_TF_WIDTH_PAGE_ENCRYPTION)
-/** Zero relative shift position of the start of the UNUSED bits */
-#define DICT_TF_POS_UNUSED (DICT_TF_POS_PAGE_ENCRYPTION_KEY \
- + DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)
-
-/** Bit mask of the COMPACT field */
-#define DICT_TF_MASK_COMPACT \
- ((~(~0U << DICT_TF_WIDTH_COMPACT)) \
- << DICT_TF_POS_COMPACT)
-/** Bit mask of the ZIP_SSIZE field */
-#define DICT_TF_MASK_ZIP_SSIZE \
- ((~(~0U << DICT_TF_WIDTH_ZIP_SSIZE)) \
- << DICT_TF_POS_ZIP_SSIZE)
-/** Bit mask of the ATOMIC_BLOBS field */
-#define DICT_TF_MASK_ATOMIC_BLOBS \
- ((~(~0U << DICT_TF_WIDTH_ATOMIC_BLOBS)) \
- << DICT_TF_POS_ATOMIC_BLOBS)
-/** Bit mask of the DATA_DIR field */
-#define DICT_TF_MASK_DATA_DIR \
- ((~(~0U << DICT_TF_WIDTH_DATA_DIR)) \
- << DICT_TF_POS_DATA_DIR)
-/** Bit mask of the PAGE_COMPRESSION field */
-#define DICT_TF_MASK_PAGE_COMPRESSION \
- ((~(~0U << DICT_TF_WIDTH_PAGE_COMPRESSION)) \
- << DICT_TF_POS_PAGE_COMPRESSION)
-/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
-#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL \
- ((~(~0U << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \
- << DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
-/** Bit mask of the ATOMIC_WRITES field */
-#define DICT_TF_MASK_ATOMIC_WRITES \
- ((~(~0U << DICT_TF_WIDTH_ATOMIC_WRITES)) \
- << DICT_TF_POS_ATOMIC_WRITES)
-/** Bit mask of the PAGE_ENCRYPTION field */
-#define DICT_TF_MASK_PAGE_ENCRYPTION \
- ((~(~0U << DICT_TF_WIDTH_PAGE_ENCRYPTION)) \
- << DICT_TF_POS_PAGE_ENCRYPTION)
-/** Bit mask of the PAGE_ENCRYPTION_KEY field */
-#define DICT_TF_MASK_PAGE_ENCRYPTION_KEY \
- ((~(~0U << DICT_TF_WIDTH_PAGE_ENCRYPTION_KEY)) \
- << DICT_TF_POS_PAGE_ENCRYPTION_KEY)
-
-/** Return the value of the COMPACT field */
-#define DICT_TF_GET_COMPACT(flags) \
- ((flags & DICT_TF_MASK_COMPACT) \
- >> DICT_TF_POS_COMPACT)
-/** Return the value of the ZIP_SSIZE field */
-#define DICT_TF_GET_ZIP_SSIZE(flags) \
- ((flags & DICT_TF_MASK_ZIP_SSIZE) \
- >> DICT_TF_POS_ZIP_SSIZE)
-/** Return the value of the ATOMIC_BLOBS field */
-#define DICT_TF_HAS_ATOMIC_BLOBS(flags) \
- ((flags & DICT_TF_MASK_ATOMIC_BLOBS) \
- >> DICT_TF_POS_ATOMIC_BLOBS)
-/** Return the value of the DATA_DIR field */
-#define DICT_TF_HAS_DATA_DIR(flags) \
- ((flags & DICT_TF_MASK_DATA_DIR) \
- >> DICT_TF_POS_DATA_DIR)
-
-/** Return the contents of the PAGE_ENCRYPTION field */
-#define DICT_TF_GET_PAGE_ENCRYPTION(flags) \
- ((flags & DICT_TF_MASK_PAGE_ENCRYPTION) \
- >> DICT_TF_POS_PAGE_ENCRYPTION)
-/** Return the contents of the PAGE_ENCRYPTION KEY field */
-#define DICT_TF_GET_PAGE_ENCRYPTION_KEY(flags) \
- ((flags & DICT_TF_MASK_PAGE_ENCRYPTION_KEY) \
- >> DICT_TF_POS_PAGE_ENCRYPTION_KEY)
-
-
-/** Return the contents of the UNUSED bits */
-#define DICT_TF_GET_UNUSED(flags) \
- (flags >> DICT_TF_POS_UNUSED)
-
-/** Return the value of the PAGE_COMPRESSION field */
-#define DICT_TF_GET_PAGE_COMPRESSION(flags) \
- ((flags & DICT_TF_MASK_PAGE_COMPRESSION) \
- >> DICT_TF_POS_PAGE_COMPRESSION)
-/** Return the value of the PAGE_COMPRESSION_LEVEL field */
-#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags) \
- ((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL) \
- >> DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
-/** Return the value of the ATOMIC_WRITES field */
-#define DICT_TF_GET_ATOMIC_WRITES(flags) \
- ((flags & DICT_TF_MASK_ATOMIC_WRITES) \
- >> DICT_TF_POS_ATOMIC_WRITES)
-/* @} */
-
-#ifndef UNIV_INNOCHECKSUM
-
-/** @brief Table Flags set number 2.
-
-These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags
-will be written as 0. The column may contain garbage for tables
-created with old versions of InnoDB that only implemented
-ROW_FORMAT=REDUNDANT. InnoDB engines do not check these flags
-for unknown bits in order to protect backward incompatibility. */
-/* @{ */
-/** Total number of bits in table->flags2. */
-#define DICT_TF2_BITS 7
-#define DICT_TF2_BIT_MASK ~(~0U << DICT_TF2_BITS)
-
-/** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */
-#define DICT_TF2_TEMPORARY 1
-/** The table has an internal defined DOC ID column */
-#define DICT_TF2_FTS_HAS_DOC_ID 2
-/** The table has an FTS index */
-#define DICT_TF2_FTS 4
-/** Need to add Doc ID column for FTS index build.
-This is a transient bit for index build */
-#define DICT_TF2_FTS_ADD_DOC_ID 8
-/** This bit is used during table creation to indicate that it will
-use its own tablespace instead of the system tablespace. */
-#define DICT_TF2_USE_TABLESPACE 16
-
-/** Set when we discard/detach the tablespace */
-#define DICT_TF2_DISCARDED 32
-
-/** This bit is set if all aux table names (both common tables and
-index tables) of a FTS table are in HEX format. */
-#define DICT_TF2_FTS_AUX_HEX_NAME 64
-/* @} */
-
-#define DICT_TF2_FLAG_SET(table, flag) \
- (table->flags2 |= (flag))
-
-#define DICT_TF2_FLAG_IS_SET(table, flag) \
- (table->flags2 & (flag))
-
-#define DICT_TF2_FLAG_UNSET(table, flag) \
- (table->flags2 &= ~(flag))
-
-/** Tables could be chained together with Foreign key constraint. When
-first load the parent table, we would load all of its descedents.
-This could result in rescursive calls and out of stack error eventually.
-DICT_FK_MAX_RECURSIVE_LOAD defines the maximum number of recursive loads,
-when exceeded, the child table will not be loaded. It will be loaded when
-the foreign constraint check needs to be run. */
-#define DICT_FK_MAX_RECURSIVE_LOAD 20
-
-/** Similarly, when tables are chained together with foreign key constraints
-with on cascading delete/update clause, delete from parent table could
-result in recursive cascading calls. This defines the maximum number of
-such cascading deletes/updates allowed. When exceeded, the delete from
-parent table will fail, and user has to drop excessive foreign constraint
-before proceeds. */
-#define FK_MAX_CASCADE_DEL 255
-
-/**********************************************************************//**
-Creates a table memory object.
-@return own: table object */
-UNIV_INTERN
-dict_table_t*
-dict_mem_table_create(
-/*==================*/
- const char* name, /*!< in: table name */
- ulint space, /*!< in: space where the clustered index
- of the table is placed */
- ulint n_cols, /*!< in: number of columns */
- ulint flags, /*!< in: table flags */
- ulint flags2); /*!< in: table flags2 */
-/**********************************************************************//**
-Determines if a table belongs to a system database
-@return true if table belong to a system database */
-UNIV_INTERN
-bool
-dict_mem_table_is_system(
-/*==================*/
- char *name); /*!< in: table name */
-/****************************************************************//**
-Free a table memory object. */
-UNIV_INTERN
-void
-dict_mem_table_free(
-/*================*/
- dict_table_t* table); /*!< in: table */
-/**********************************************************************//**
-Adds a column definition to a table. */
-UNIV_INTERN
-void
-dict_mem_table_add_col(
-/*===================*/
- dict_table_t* table, /*!< in: table */
- mem_heap_t* heap, /*!< in: temporary memory heap, or NULL */
- const char* name, /*!< in: column name, or NULL */
- ulint mtype, /*!< in: main datatype */
- ulint prtype, /*!< in: precise type */
- ulint len) /*!< in: precision */
- MY_ATTRIBUTE((nonnull(1)));
-/**********************************************************************//**
-Renames a column of a table in the data dictionary cache. */
-UNIV_INTERN
-void
-dict_mem_table_col_rename(
-/*======================*/
- dict_table_t* table, /*!< in/out: table */
- unsigned nth_col,/*!< in: column index */
- const char* from, /*!< in: old column name */
- const char* to) /*!< in: new column name */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-This function populates a dict_col_t memory structure with
-supplied information. */
-UNIV_INTERN
-void
-dict_mem_fill_column_struct(
-/*========================*/
- dict_col_t* column, /*!< out: column struct to be
- filled */
- ulint col_pos, /*!< in: column position */
- ulint mtype, /*!< in: main data type */
- ulint prtype, /*!< in: precise type */
- ulint col_len); /*!< in: column length */
-/**********************************************************************//**
-This function poplulates a dict_index_t index memory structure with
-supplied information. */
-UNIV_INLINE
-void
-dict_mem_fill_index_struct(
-/*=======================*/
- dict_index_t* index, /*!< out: index to be filled */
- mem_heap_t* heap, /*!< in: memory heap */
- const char* table_name, /*!< in: table name */
- const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
- ulint type, /*!< in: DICT_UNIQUE,
- DICT_CLUSTERED, ... ORed */
- ulint n_fields); /*!< in: number of fields */
-/**********************************************************************//**
-Creates an index memory object.
-@return own: index object */
-UNIV_INTERN
-dict_index_t*
-dict_mem_index_create(
-/*==================*/
- const char* table_name, /*!< in: table name */
- const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
- ulint type, /*!< in: DICT_UNIQUE,
- DICT_CLUSTERED, ... ORed */
- ulint n_fields); /*!< in: number of fields */
-/**********************************************************************//**
-Adds a field definition to an index. NOTE: does not take a copy
-of the column name if the field is a column. The memory occupied
-by the column name may be released only after publishing the index. */
-UNIV_INTERN
-void
-dict_mem_index_add_field(
-/*=====================*/
- dict_index_t* index, /*!< in: index */
- const char* name, /*!< in: column name */
- ulint prefix_len); /*!< in: 0 or the column prefix length
- in a MySQL index like
- INDEX (textcol(25)) */
-/**********************************************************************//**
-Frees an index memory object. */
-UNIV_INTERN
-void
-dict_mem_index_free(
-/*================*/
- dict_index_t* index); /*!< in: index */
-/**********************************************************************//**
-Creates and initializes a foreign constraint memory object.
-@return own: foreign constraint struct */
-UNIV_INTERN
-dict_foreign_t*
-dict_mem_foreign_create(void);
-/*=========================*/
-
-/**********************************************************************//**
-Sets the foreign_table_name_lookup pointer based on the value of
-lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup
-will point to foreign_table_name. If 2, then another string is
-allocated from the heap and set to lower case. */
-UNIV_INTERN
-void
-dict_mem_foreign_table_name_lookup_set(
-/*===================================*/
- dict_foreign_t* foreign, /*!< in/out: foreign struct */
- ibool do_alloc); /*!< in: is an alloc needed */
-
-/**********************************************************************//**
-Sets the referenced_table_name_lookup pointer based on the value of
-lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup
-will point to referenced_table_name. If 2, then another string is
-allocated from the heap and set to lower case. */
-UNIV_INTERN
-void
-dict_mem_referenced_table_name_lookup_set(
-/*======================================*/
- dict_foreign_t* foreign, /*!< in/out: foreign struct */
- ibool do_alloc); /*!< in: is an alloc needed */
-
-/** Create a temporary tablename like "#sql-ibtid-inc where
- tid = the Table ID
- inc = a randomly initialized number that is incremented for each file
-The table ID is a 64 bit integer, can use up to 20 digits, and is
-initialized at bootstrap. The second number is 32 bits, can use up to 10
-digits, and is initialized at startup to a randomly distributed number.
-It is hoped that the combination of these two numbers will provide a
-reasonably unique temporary file name.
-@param[in] heap A memory heap
-@param[in] dbtab Table name in the form database/table name
-@param[in] id Table id
-@return A unique temporary tablename suitable for InnoDB use */
-UNIV_INTERN
-char*
-dict_mem_create_temporary_tablename(
- mem_heap_t* heap,
- const char* dbtab,
- table_id_t id);
-
-/** Initialize dict memory variables */
-
-void
-dict_mem_init(void);
-
-/** Data structure for a column in a table */
-struct dict_col_t{
- /*----------------------*/
- /** The following are copied from dtype_t,
- so that all bit-fields can be packed tightly. */
- /* @{ */
- unsigned prtype:32; /*!< precise type; MySQL data
- type, charset code, flags to
- indicate nullability,
- signedness, whether this is a
- binary string, whether this is
- a true VARCHAR where MySQL
- uses 2 bytes to store the length */
- unsigned mtype:8; /*!< main data type */
-
- /* the remaining fields do not affect alphabetical ordering: */
-
- unsigned len:16; /*!< length; for MySQL data this
- is field->pack_length(),
- except that for a >= 5.0.3
- type true VARCHAR this is the
- maximum byte length of the
- string data (in addition to
- the string, MySQL uses 1 or 2
- bytes to store the string length) */
-
- unsigned mbminmaxlen:5; /*!< minimum and maximum length of a
- character, in bytes;
- DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
- mbminlen=DATA_MBMINLEN(mbminmaxlen);
- mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
- /*----------------------*/
- /* End of definitions copied from dtype_t */
- /* @} */
-
- unsigned ind:10; /*!< table column position
- (starting from 0) */
- unsigned ord_part:1; /*!< nonzero if this column
- appears in the ordering fields
- of an index */
- unsigned max_prefix:12; /*!< maximum index prefix length on
- this column. Our current max limit is
- 3072 for Barracuda table */
-};
-
-/** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and
-is the maximum indexed column length (or indexed prefix length) in
-ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format,
-any fixed-length field that is longer than this will be encoded as
-a variable-length field.
-
-It is set to 3*256, so that one can create a column prefix index on
-256 characters of a TEXT or VARCHAR column also in the UTF-8
-charset. In that charset, a character may take at most 3 bytes. This
-constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
-files would be at risk! */
-#define DICT_ANTELOPE_MAX_INDEX_COL_LEN REC_ANTELOPE_MAX_INDEX_COL_LEN
-
-/** Find out maximum indexed column length by its table format.
-For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
-field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For
-Barracuda row formats COMPRESSED and DYNAMIC, the length could
-be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
-#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \
- ((dict_table_get_format(table) < UNIV_FORMAT_B) \
- ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
- : REC_VERSION_56_MAX_INDEX_COL_LEN)
-
-#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \
- ((DICT_TF_HAS_ATOMIC_BLOBS(flags) < UNIV_FORMAT_B) \
- ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
- : REC_VERSION_56_MAX_INDEX_COL_LEN)
-
-/** Defines the maximum fixed length column size */
-#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
-#ifdef WITH_WSREP
-#define WSREP_MAX_SUPPORTED_KEY_LENGTH 3500
-#endif /* WITH_WSREP */
-
-/** Data structure for a field in an index */
-struct dict_field_t{
- dict_col_t* col; /*!< pointer to the table column */
- const char* name; /*!< name of the column */
- unsigned prefix_len:12; /*!< 0 or the length of the column
- prefix in bytes in a MySQL index of
- type, e.g., INDEX (textcol(25));
- must be smaller than
- DICT_MAX_FIELD_LEN_BY_FORMAT;
- NOTE that in the UTF-8 charset, MySQL
- sets this to (mbmaxlen * the prefix len)
- in UTF-8 chars */
- unsigned fixed_len:10; /*!< 0 or the fixed length of the
- column if smaller than
- DICT_ANTELOPE_MAX_INDEX_COL_LEN */
-};
-
-/**********************************************************************//**
-PADDING HEURISTIC BASED ON LINEAR INCREASE OF PADDING TO AVOID
-COMPRESSION FAILURES
-(Note: this is relevant only for compressed indexes)
-GOAL: Avoid compression failures by maintaining information about the
-compressibility of data. If data is not very compressible then leave
-some extra space 'padding' in the uncompressed page making it more
-likely that compression of less than fully packed uncompressed page will
-succeed.
-
-This padding heuristic works by increasing the pad linearly until the
-desired failure rate is reached. A "round" is a fixed number of
-compression operations.
-After each round, the compression failure rate for that round is
-computed. If the failure rate is too high, then padding is incremented
-by a fixed value, otherwise it's left intact.
-If the compression failure is lower than the desired rate for a fixed
-number of consecutive rounds, then the padding is decreased by a fixed
-value. This is done to prevent overshooting the padding value,
-and to accommodate the possible change in data compressibility. */
-
-/** Number of zip ops in one round. */
-#define ZIP_PAD_ROUND_LEN (128)
-
-/** Number of successful rounds after which the padding is decreased */
-#define ZIP_PAD_SUCCESSFUL_ROUND_LIMIT (5)
-
-/** Amount by which padding is increased. */
-#define ZIP_PAD_INCR (128)
-
-/** Percentage of compression failures that are allowed in a single
-round */
-extern ulong zip_failure_threshold_pct;
-
-/** Maximum percentage of a page that can be allowed as a pad to avoid
-compression failures */
-extern ulong zip_pad_max;
-
-/** Data structure to hold information about how much space in
-an uncompressed page should be left as padding to avoid compression
-failures. This estimate is based on a self-adapting heuristic. */
-struct zip_pad_info_t {
- os_fast_mutex_t*
- mutex; /*!< mutex protecting the info */
- ulint pad; /*!< number of bytes used as pad */
- ulint success;/*!< successful compression ops during
- current round */
- ulint failure;/*!< failed compression ops during
- current round */
- ulint n_rounds;/*!< number of currently successful
- rounds */
- volatile os_once::state_t
- mutex_created;
- /*!< Creation state of mutex member */
-};
-
-/** Number of samples of data size kept when page compression fails for
-a certain index.*/
-#define STAT_DEFRAG_DATA_SIZE_N_SAMPLE 10
-
-/** Data structure for an index. Most fields will be
-initialized to 0, NULL or FALSE in dict_mem_index_create(). */
-struct dict_index_t{
- index_id_t id; /*!< id of the index */
- prio_rw_lock_t* search_latch; /*!< latch protecting the AHI partition
- corresponding to this index */
- hash_table_t* search_table; /*!< hash table protected by
- search_latch */
- mem_heap_t* heap; /*!< memory heap */
- const char* name; /*!< index name */
- const char* table_name;/*!< table name */
- dict_table_t* table; /*!< back pointer to table */
-#ifndef UNIV_HOTBACKUP
- unsigned space:32;
- /*!< space where the index tree is placed */
- unsigned page:32;/*!< index tree root page number */
-#endif /* !UNIV_HOTBACKUP */
- unsigned type:DICT_IT_BITS;
- /*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
- DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
-#define MAX_KEY_LENGTH_BITS 12
- unsigned trx_id_offset:MAX_KEY_LENGTH_BITS;
- /*!< position of the trx id column
- in a clustered index record, if the fields
- before it are known to be of a fixed size,
- 0 otherwise */
-#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
-# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
-#endif
- unsigned n_user_defined_cols:10;
- /*!< number of columns the user defined to
- be in the index: in the internal
- representation we add more columns */
- unsigned n_uniq:10;/*!< number of fields from the beginning
- which are enough to determine an index
- entry uniquely */
- unsigned n_def:10;/*!< number of fields defined so far */
- unsigned n_fields:10;/*!< number of fields in the index */
- unsigned n_nullable:10;/*!< number of nullable fields */
- unsigned cached:1;/*!< TRUE if the index object is in the
- dictionary cache */
- unsigned to_be_dropped:1;
- /*!< TRUE if the index is to be dropped;
- protected by dict_operation_lock */
- unsigned online_status:2;
- /*!< enum online_index_status.
- Transitions from ONLINE_INDEX_COMPLETE (to
- ONLINE_INDEX_CREATION) are protected
- by dict_operation_lock and
- dict_sys->mutex. Other changes are
- protected by index->lock. */
- dict_field_t* fields; /*!< array of field descriptions */
-#ifndef UNIV_HOTBACKUP
- UT_LIST_NODE_T(dict_index_t)
- indexes;/*!< list of indexes of the table */
- btr_search_t* search_info;
- /*!< info used in optimistic searches */
- row_log_t* online_log;
- /*!< the log of modifications
- during online index creation;
- valid when online_status is
- ONLINE_INDEX_CREATION */
- /*----------------------*/
- /** Statistics for query optimization */
- /* @{ */
- ib_uint64_t* stat_n_diff_key_vals;
- /*!< approximate number of different
- key values for this index, for each
- n-column prefix where 1 <= n <=
- dict_get_n_unique(index) (the array is
- indexed from 0 to n_uniq-1); we
- periodically calculate new
- estimates */
- ib_uint64_t* stat_n_sample_sizes;
- /*!< number of pages that were sampled
- to calculate each of stat_n_diff_key_vals[],
- e.g. stat_n_sample_sizes[3] pages were sampled
- to get the number stat_n_diff_key_vals[3]. */
- ib_uint64_t* stat_n_non_null_key_vals;
- /* approximate number of non-null key values
- for this index, for each column where
- 1 <= n <= dict_get_n_unique(index) (the array
- is indexed from 0 to n_uniq-1); This
- is used when innodb_stats_method is
- "nulls_ignored". */
- ulint stat_index_size;
- /*!< approximate index size in
- database pages */
- ulint stat_n_leaf_pages;
- /*!< approximate number of leaf pages in the
- index tree */
- bool stats_error_printed;
- /*!< has persistent statistics error printed
- for this index ? */
- /* @} */
- /** Statistics for defragmentation, these numbers are estimations and
- could be very inaccurate at certain times, e.g. right after restart,
- during defragmentation, etc. */
- /* @{ */
- ulint stat_defrag_modified_counter;
- ulint stat_defrag_n_pages_freed;
- /* number of pages freed by defragmentation. */
- ulint stat_defrag_n_page_split;
- /* number of page splits since last full index
- defragmentation. */
- ulint stat_defrag_data_size_sample[STAT_DEFRAG_DATA_SIZE_N_SAMPLE];
- /* data size when compression failure happened
- the most recent 10 times. */
- ulint stat_defrag_sample_next_slot;
- /* in which slot the next sample should be
- saved. */
- /* @} */
- prio_rw_lock_t lock; /*!< read-write lock protecting the
- upper levels of the index tree */
- trx_id_t trx_id; /*!< id of the transaction that created this
- index, or 0 if the index existed
- when InnoDB was started up */
- zip_pad_info_t zip_pad;/*!< Information about state of
- compression failures and successes */
-#endif /* !UNIV_HOTBACKUP */
-#ifdef UNIV_BLOB_DEBUG
- ib_mutex_t blobs_mutex;
- /*!< mutex protecting blobs */
- ib_rbt_t* blobs; /*!< map of (page_no,heap_no,field_no)
- to first_blob_page_no; protected by
- blobs_mutex; @see btr_blob_dbg_t */
-#endif /* UNIV_BLOB_DEBUG */
-
- bool is_readable() const;
-
-#ifdef UNIV_DEBUG
- ulint magic_n;/*!< magic number */
-/** Value of dict_index_t::magic_n */
-# define DICT_INDEX_MAGIC_N 76789786
-#endif
-};
-
-/** The status of online index creation */
-enum online_index_status {
- /** the index is complete and ready for access */
- ONLINE_INDEX_COMPLETE = 0,
- /** the index is being created, online
- (allowing concurrent modifications) */
- ONLINE_INDEX_CREATION,
- /** secondary index creation was aborted and the index
- should be dropped as soon as index->table->n_ref_count reaches 0,
- or online table rebuild was aborted and the clustered index
- of the original table should soon be restored to
- ONLINE_INDEX_COMPLETE */
- ONLINE_INDEX_ABORTED,
- /** the online index creation was aborted, the index was
- dropped from the data dictionary and the tablespace, and it
- should be dropped from the data dictionary cache as soon as
- index->table->n_ref_count reaches 0. */
- ONLINE_INDEX_ABORTED_DROPPED
-};
-
-/** Data structure for a foreign key constraint; an example:
-FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be
-initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */
-struct dict_foreign_t{
- mem_heap_t* heap; /*!< this object is allocated from
- this memory heap */
- char* id; /*!< id of the constraint as a
- null-terminated string */
- unsigned n_fields:10; /*!< number of indexes' first fields
- for which the foreign key
- constraint is defined: we allow the
- indexes to contain more fields than
- mentioned in the constraint, as long
- as the first fields are as mentioned */
- unsigned type:6; /*!< 0 or DICT_FOREIGN_ON_DELETE_CASCADE
- or DICT_FOREIGN_ON_DELETE_SET_NULL */
- char* foreign_table_name;/*!< foreign table name */
- char* foreign_table_name_lookup;
- /*!< foreign table name used for dict lookup */
- dict_table_t* foreign_table; /*!< table where the foreign key is */
- const char** foreign_col_names;/*!< names of the columns in the
- foreign key */
- char* referenced_table_name;/*!< referenced table name */
- char* referenced_table_name_lookup;
- /*!< referenced table name for dict lookup*/
- dict_table_t* referenced_table;/*!< table where the referenced key
- is */
- const char** referenced_col_names;/*!< names of the referenced
- columns in the referenced table */
- dict_index_t* foreign_index; /*!< foreign index; we require that
- both tables contain explicitly defined
- indexes for the constraint: InnoDB
- does not generate new indexes
- implicitly */
- dict_index_t* referenced_index;/*!< referenced index */
-};
-
-std::ostream&
-operator<< (std::ostream& out, const dict_foreign_t& foreign);
-
-struct dict_foreign_print {
-
- dict_foreign_print(std::ostream& out)
- : m_out(out)
- {}
-
- void operator()(const dict_foreign_t* foreign) {
- m_out << *foreign;
- }
-private:
- std::ostream& m_out;
-};
-
-/** Compare two dict_foreign_t objects using their ids. Used in the ordering
-of dict_table_t::foreign_set and dict_table_t::referenced_set. It returns
-true if the first argument is considered to go before the second in the
-strict weak ordering it defines, and false otherwise. */
-struct dict_foreign_compare {
-
- bool operator()(
- const dict_foreign_t* lhs,
- const dict_foreign_t* rhs) const
- {
- return(ut_strcmp(lhs->id, rhs->id) < 0);
- }
-};
-
-/** A function object to find a foreign key with the given index as the
-referenced index. Return the foreign key with matching criteria or NULL */
-struct dict_foreign_with_index {
-
- dict_foreign_with_index(const dict_index_t* index)
- : m_index(index)
- {}
-
- bool operator()(const dict_foreign_t* foreign) const
- {
- return(foreign->referenced_index == m_index);
- }
-
- const dict_index_t* m_index;
-};
-
-/* A function object to check if the foreign constraint is between different
-tables. Returns true if foreign key constraint is between different tables,
-false otherwise. */
-struct dict_foreign_different_tables {
-
- bool operator()(const dict_foreign_t* foreign) const
- {
- return(foreign->foreign_table != foreign->referenced_table);
- }
-};
-
-/** A function object to check if the foreign key constraint has the same
-name as given. If the full name of the foreign key constraint doesn't match,
-then, check if removing the database name from the foreign key constraint
-matches. Return true if it matches, false otherwise. */
-struct dict_foreign_matches_id {
-
- dict_foreign_matches_id(const char* id)
- : m_id(id)
- {}
-
- bool operator()(const dict_foreign_t* foreign) const
- {
- if (0 == innobase_strcasecmp(foreign->id, m_id)) {
- return(true);
- }
- if (const char* pos = strchr(foreign->id, '/')) {
- if (0 == innobase_strcasecmp(m_id, pos + 1)) {
- return(true);
- }
- }
- return(false);
- }
-
- const char* m_id;
-};
-
-typedef std::set<dict_foreign_t*, dict_foreign_compare> dict_foreign_set;
-
-std::ostream&
-operator<< (std::ostream& out, const dict_foreign_set& fk_set);
-
-/** Function object to check if a foreign key object is there
-in the given foreign key set or not. It returns true if the
-foreign key is not found, false otherwise */
-struct dict_foreign_not_exists {
- dict_foreign_not_exists(const dict_foreign_set& obj_)
- : m_foreigns(obj_)
- {}
-
- /* Return true if the given foreign key is not found */
- bool operator()(dict_foreign_t* const & foreign) const {
- return(m_foreigns.find(foreign) == m_foreigns.end());
- }
-private:
- const dict_foreign_set& m_foreigns;
-};
-
-/** Validate the search order in the foreign key set.
-@param[in] fk_set the foreign key set to be validated
-@return true if search order is fine in the set, false otherwise. */
-bool
-dict_foreign_set_validate(
- const dict_foreign_set& fk_set);
-
-/** Validate the search order in the foreign key sets of the table
-(foreign_set and referenced_set).
-@param[in] table table whose foreign key sets are to be validated
-@return true if foreign key sets are fine, false otherwise. */
-bool
-dict_foreign_set_validate(
- const dict_table_t& table);
-
-/*********************************************************************//**
-Frees a foreign key struct. */
-inline
-void
-dict_foreign_free(
-/*==============*/
- dict_foreign_t* foreign) /*!< in, own: foreign key struct */
-{
- mem_heap_free(foreign->heap);
-}
-
-/** The destructor will free all the foreign key constraints in the set
-by calling dict_foreign_free() on each of the foreign key constraints.
-This is used to free the allocated memory when a local set goes out
-of scope. */
-struct dict_foreign_set_free {
-
- dict_foreign_set_free(const dict_foreign_set& foreign_set)
- : m_foreign_set(foreign_set)
- {}
-
- ~dict_foreign_set_free()
- {
- std::for_each(m_foreign_set.begin(),
- m_foreign_set.end(),
- dict_foreign_free);
- }
-
- const dict_foreign_set& m_foreign_set;
-};
-
-/** The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that
-a foreign key constraint is enforced, therefore RESTRICT just means no flag */
-/* @{ */
-#define DICT_FOREIGN_ON_DELETE_CASCADE 1 /*!< ON DELETE CASCADE */
-#define DICT_FOREIGN_ON_DELETE_SET_NULL 2 /*!< ON UPDATE SET NULL */
-#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 /*!< ON DELETE CASCADE */
-#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 /*!< ON UPDATE SET NULL */
-#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 /*!< ON DELETE NO ACTION */
-#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */
-/* @} */
-
-/* This flag is for sync SQL DDL and memcached DML.
-if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on
-the table, DML from memcached will be blocked. */
-#define DICT_TABLE_IN_DDL -1
-
-/** These are used when MySQL FRM and InnoDB data dictionary are
-in inconsistent state. */
-typedef enum {
- DICT_FRM_CONSISTENT = 0, /*!< Consistent state */
- DICT_FRM_NO_PK = 1, /*!< MySQL has no primary key
- but InnoDB dictionary has
- non-generated one. */
- DICT_NO_PK_FRM_HAS = 2, /*!< MySQL has primary key but
- InnoDB dictionary has not. */
- DICT_FRM_INCONSISTENT_KEYS = 3 /*!< Key count mismatch */
-} dict_frm_t;
-
-/** Data structure for a database table. Most fields will be
-initialized to 0, NULL or FALSE in dict_mem_table_create(). */
-struct dict_table_t{
-
-
- table_id_t id; /*!< id of the table */
- mem_heap_t* heap; /*!< memory heap */
- char* name; /*!< table name */
- const char* dir_path_of_temp_table;/*!< NULL or the directory path
- where a TEMPORARY table that was explicitly
- created by a user should be placed if
- innodb_file_per_table is defined in my.cnf;
- in Unix this is usually /tmp/..., in Windows
- temp\... */
- char* data_dir_path; /*!< NULL or the directory path
- specified by DATA DIRECTORY */
- unsigned space:32;
- /*!< space where the clustered index of the
- table is placed */
- unsigned flags:DICT_TF_BITS; /*!< DICT_TF_... */
- unsigned flags2:DICT_TF2_BITS; /*!< DICT_TF2_... */
- unsigned file_unreadable:1;
- /*!< true if this is in a single-table
- tablespace and the .ibd file is missing
- or page decryption failed and page is
- corrupted; then we must return in
- ha_innodb.cc an error if the
- user tries to query such table */
- unsigned cached:1;/*!< TRUE if the table object has been added
- to the dictionary cache */
- unsigned to_be_dropped:1;
- /*!< TRUE if the table is to be dropped, but
- not yet actually dropped (could in the bk
- drop list); It is turned on at the beginning
- of row_drop_table_for_mysql() and turned off
- just before we start to update system tables
- for the drop. It is protected by
- dict_operation_lock */
- unsigned n_def:10;/*!< number of columns defined so far */
- unsigned n_cols:10;/*!< number of columns */
- unsigned can_be_evicted:1;
- /*!< TRUE if it's not an InnoDB system table
- or a table that has no FK relationships */
- unsigned corrupted:1;
- /*!< TRUE if table is corrupted */
- unsigned drop_aborted:1;
- /*!< TRUE if some indexes should be dropped
- after ONLINE_INDEX_ABORTED
- or ONLINE_INDEX_ABORTED_DROPPED */
- dict_col_t* cols; /*!< array of column descriptions */
- const char* col_names;
- /*!< Column names packed in a character string
- "name1\0name2\0...nameN\0". Until
- the string contains n_cols, it will be
- allocated from a temporary heap. The final
- string will be allocated from table->heap. */
- bool is_system_db;
- /*!< True if the table belongs to a system
- database (mysql, information_schema or
- performance_schema) */
- dict_frm_t dict_frm_mismatch;
- /*!< !DICT_FRM_CONSISTENT==0 if data
- dictionary information and
- MySQL FRM information mismatch. */
-#ifndef UNIV_HOTBACKUP
- hash_node_t name_hash; /*!< hash chain node */
- hash_node_t id_hash; /*!< hash chain node */
- UT_LIST_BASE_NODE_T(dict_index_t)
- indexes; /*!< list of indexes of the table */
-
- dict_foreign_set foreign_set;
- /*!< set of foreign key constraints
- in the table; these refer to columns
- in other tables */
-
- dict_foreign_set referenced_set;
- /*!< list of foreign key constraints
- which refer to this table */
-
- UT_LIST_NODE_T(dict_table_t)
- table_LRU; /*!< node of the LRU list of tables */
- unsigned fk_max_recusive_level:8;
- /*!< maximum recursive level we support when
- loading tables chained together with FK
- constraints. If exceeds this level, we will
- stop loading child table into memory along with
- its parent table */
- ulint n_foreign_key_checks_running;
- /*!< count of how many foreign key check
- operations are currently being performed
- on the table: we cannot drop the table while
- there are foreign key checks running on
- it! */
- trx_id_t def_trx_id;
- /*!< transaction id that last touched
- the table definition, either when
- loading the definition or CREATE
- TABLE, or ALTER TABLE (prepare,
- commit, and rollback phases) */
- trx_id_t query_cache_inv_trx_id;
- /*!< transactions whose trx id is
- smaller than this number are not
- allowed to store to the MySQL query
- cache or retrieve from it; when a trx
- with undo logs commits, it sets this
- to the value of the trx id counter for
- the tables it had an IX lock on */
-#ifdef UNIV_DEBUG
- /*----------------------*/
- ibool does_not_fit_in_memory;
- /*!< this field is used to specify in
- simulations tables which are so big
- that disk should be accessed: disk
- access is simulated by putting the
- thread to sleep for a while; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about value TRUE if it has
- to reload the table definition from
- disk */
-#endif /* UNIV_DEBUG */
- /*----------------------*/
- unsigned big_rows:1;
- /*!< flag: TRUE if the maximum length of
- a single row exceeds BIG_ROW_SIZE;
- initialized in dict_table_add_to_cache() */
- /** Statistics for query optimization */
- /* @{ */
-
- volatile os_once::state_t stats_latch_created;
- /*!< Creation state of 'stats_latch'. */
-
- rw_lock_t* stats_latch; /*!< this latch protects:
- dict_table_t::stat_initialized
- dict_table_t::stat_n_rows (*)
- dict_table_t::stat_clustered_index_size
- dict_table_t::stat_sum_of_other_index_sizes
- dict_table_t::stat_modified_counter (*)
- dict_table_t::indexes*::stat_n_diff_key_vals[]
- dict_table_t::indexes*::stat_index_size
- dict_table_t::indexes*::stat_n_leaf_pages
- (*) those are not always protected for
- performance reasons. */
- unsigned stat_initialized:1; /*!< TRUE if statistics have
- been calculated the first time
- after database startup or table creation */
-#define DICT_TABLE_IN_USED -1
- lint memcached_sync_count;
- /*!< count of how many handles are opened
- to this table from memcached; DDL on the
- table is NOT allowed until this count
- goes to zero. If it's -1, means there's DDL
- on the table, DML from memcached will be
- blocked. */
- ib_time_t stats_last_recalc;
- /*!< Timestamp of last recalc of the stats */
- ib_uint32_t stat_persistent;
- /*!< The two bits below are set in the
- ::stat_persistent member and have the following
- meaning:
- 1. _ON=0, _OFF=0, no explicit persistent stats
- setting for this table, the value of the global
- srv_stats_persistent is used to determine
- whether the table has persistent stats enabled
- or not
- 2. _ON=0, _OFF=1, persistent stats are
- explicitly disabled for this table, regardless
- of the value of the global srv_stats_persistent
- 3. _ON=1, _OFF=0, persistent stats are
- explicitly enabled for this table, regardless
- of the value of the global srv_stats_persistent
- 4. _ON=1, _OFF=1, not allowed, we assert if
- this ever happens. */
-#define DICT_STATS_PERSISTENT_ON (1 << 1)
-#define DICT_STATS_PERSISTENT_OFF (1 << 2)
- ib_uint32_t stats_auto_recalc;
- /*!< The two bits below are set in the
- ::stats_auto_recalc member and have
- the following meaning:
- 1. _ON=0, _OFF=0, no explicit auto recalc
- setting for this table, the value of the global
- srv_stats_persistent_auto_recalc is used to
- determine whether the table has auto recalc
- enabled or not
- 2. _ON=0, _OFF=1, auto recalc is explicitly
- disabled for this table, regardless of the
- value of the global
- srv_stats_persistent_auto_recalc
- 3. _ON=1, _OFF=0, auto recalc is explicitly
- enabled for this table, regardless of the
- value of the global
- srv_stats_persistent_auto_recalc
- 4. _ON=1, _OFF=1, not allowed, we assert if
- this ever happens. */
-#define DICT_STATS_AUTO_RECALC_ON (1 << 1)
-#define DICT_STATS_AUTO_RECALC_OFF (1 << 2)
- ulint stats_sample_pages;
- /*!< the number of pages to sample for this
- table during persistent stats estimation;
- if this is 0, then the value of the global
- srv_stats_persistent_sample_pages will be
- used instead. */
- ib_uint64_t stat_n_rows;
- /*!< approximate number of rows in the table;
- we periodically calculate new estimates */
- ulint stat_clustered_index_size;
- /*!< approximate clustered index size in
- database pages */
- ulint stat_sum_of_other_index_sizes;
- /*!< other indexes in database pages */
- ib_uint64_t stat_modified_counter;
- /*!< when a row is inserted, updated,
- or deleted,
- we add 1 to this number; we calculate new
- estimates for the stat_... values for the
- table and the indexes when about 1 / 16 of
- table has been modified;
- also when the estimate operation is
- called for MySQL SHOW TABLE STATUS; the
- counter is reset to zero at statistics
- calculation; this counter is not protected by
- any latch, because this is only used for
- heuristics */
-
-#define BG_STAT_IN_PROGRESS ((byte)(1 << 0))
- /*!< BG_STAT_IN_PROGRESS is set in
- stats_bg_flag when the background
- stats code is working on this table. The DROP
- TABLE code waits for this to be cleared
- before proceeding. */
-#define BG_STAT_SHOULD_QUIT ((byte)(1 << 1))
- /*!< BG_STAT_SHOULD_QUIT is set in
- stats_bg_flag when DROP TABLE starts
- waiting on BG_STAT_IN_PROGRESS to be cleared,
- the background stats thread will detect this
- and will eventually quit sooner */
-#define BG_SCRUB_IN_PROGRESS ((byte)(1 << 2))
- /*!< BG_SCRUB_IN_PROGRESS is set in
- stats_bg_flag when the background
- scrub code is working on this table. The DROP
- TABLE code waits for this to be cleared
- before proceeding. */
-
-#define BG_IN_PROGRESS (BG_STAT_IN_PROGRESS | BG_SCRUB_IN_PROGRESS)
-
- byte stats_bg_flag;
- /*!< see BG_STAT_* above.
- Writes are covered by dict_sys->mutex.
- Dirty reads are possible. */
- bool stats_error_printed;
- /*!< Has persistent stats error beein
- already printed for this table ? */
- /* @} */
- /*----------------------*/
- /**!< The following fields are used by the
- AUTOINC code. The actual collection of
- tables locked during AUTOINC read/write is
- kept in trx_t. In order to quickly determine
- whether a transaction has locked the AUTOINC
- lock we keep a pointer to the transaction
- here in the autoinc_trx variable. This is to
- avoid acquiring the lock_sys_t::mutex and
- scanning the vector in trx_t.
-
- When an AUTOINC lock has to wait, the
- corresponding lock instance is created on
- the trx lock heap rather than use the
- pre-allocated instance in autoinc_lock below.*/
- /* @{ */
- lock_t* autoinc_lock;
- /*!< a buffer for an AUTOINC lock
- for this table: we allocate the memory here
- so that individual transactions can get it
- and release it without a need to allocate
- space from the lock heap of the trx:
- otherwise the lock heap would grow rapidly
- if we do a large insert from a select */
- ib_mutex_t* autoinc_mutex;
- /*!< mutex protecting the autoincrement
- counter */
-
- /** Creation state of autoinc_mutex member */
- volatile os_once::state_t
- autoinc_mutex_created;
-
- ib_uint64_t autoinc;/*!< autoinc counter value to give to the
- next inserted row */
- ulong n_waiting_or_granted_auto_inc_locks;
- /*!< This counter is used to track the number
- of granted and pending autoinc locks on this
- table. This value is set after acquiring the
- lock_sys_t::mutex but we peek the contents to
- determine whether other transactions have
- acquired the AUTOINC lock or not. Of course
- only one transaction can be granted the
- lock but there can be multiple waiters. */
- const trx_t* autoinc_trx;
- /*!< The transaction that currently holds the
- the AUTOINC lock on this table.
- Protected by lock_sys->mutex. */
- fts_t* fts; /* FTS specific state variables */
- /* @} */
- /*----------------------*/
-
- ib_quiesce_t quiesce;/*!< Quiescing states, protected by the
- dict_index_t::lock. ie. we can only change
- the state if we acquire all the latches
- (dict_index_t::lock) in X mode of this table's
- indexes. */
-
- /*----------------------*/
- ulint n_rec_locks;
- /*!< Count of the number of record locks on
- this table. We use this to determine whether
- we can evict the table from the dictionary
- cache. It is protected by lock_sys->mutex. */
- ulint n_ref_count;
- /*!< count of how many handles are opened
- to this table; dropping of the table is
- NOT allowed until this count gets to zero;
- MySQL does NOT itself check the number of
- open handles at drop */
- UT_LIST_BASE_NODE_T(lock_t)
- locks; /*!< list of locks on the table; protected
- by lock_sys->mutex */
-
-#endif /* !UNIV_HOTBACKUP */
-
- /* Returns true if this is a single-table tablespace
- and the .ibd file is missing or page decryption failed
- and/or page is corrupted.
- @return true if table is readable
- @retval false if table is not readable */
- inline bool is_readable() const
- {
- return(UNIV_LIKELY(!file_unreadable));
- }
-
-#ifdef UNIV_DEBUG
- ulint magic_n;/*!< magic number */
-/** Value of dict_table_t::magic_n */
-# define DICT_TABLE_MAGIC_N 76333786
-#endif /* UNIV_DEBUG */
-};
-
-/* Returns true if this is a single-table tablespace
-and the .ibd file is missing or page decryption failed
-and/or page is corrupted.
-@return true if table is readable
-@retval false if table is not readable */
-inline bool dict_index_t::is_readable() const
-{
- return(UNIV_LIKELY(!table->file_unreadable));
-}
-
-/** A function object to add the foreign key constraint to the referenced set
-of the referenced table, if it exists in the dictionary cache. */
-struct dict_foreign_add_to_referenced_table {
- void operator()(dict_foreign_t* foreign) const
- {
- if (dict_table_t* table = foreign->referenced_table) {
- std::pair<dict_foreign_set::iterator, bool> ret
- = table->referenced_set.insert(foreign);
- ut_a(ret.second);
- }
- }
-};
-
-/** Destroy the autoinc latch of the given table.
-This function is only called from either single threaded environment
-or from a thread that has not shared the table object with other threads.
-@param[in,out] table table whose stats latch to destroy */
-inline
-void
-dict_table_autoinc_destroy(
- dict_table_t* table)
-{
- if (table->autoinc_mutex_created == os_once::DONE
- && table->autoinc_mutex != NULL) {
- mutex_free(table->autoinc_mutex);
- delete table->autoinc_mutex;
- }
-}
-
-/** Allocate and init the autoinc latch of a given table.
-This function must not be called concurrently on the same table object.
-@param[in,out] table_void table whose autoinc latch to create */
-void
-dict_table_autoinc_alloc(
- void* table_void);
-
-/** Allocate and init the zip_pad_mutex of a given index.
-This function must not be called concurrently on the same index object.
-@param[in,out] index_void index whose zip_pad_mutex to create */
-void
-dict_index_zip_pad_alloc(
- void* index_void);
-
-/** Request for lazy creation of the autoinc latch of a given table.
-This function is only called from either single threaded environment
-or from a thread that has not shared the table object with other threads.
-@param[in,out] table table whose autoinc latch is to be created. */
-inline
-void
-dict_table_autoinc_create_lazy(
- dict_table_t* table)
-{
-#ifdef HAVE_ATOMIC_BUILTINS
- table->autoinc_mutex = NULL;
- table->autoinc_mutex_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
- dict_table_autoinc_alloc(table);
- table->autoinc_mutex_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/** Request a lazy creation of dict_index_t::zip_pad::mutex.
-This function is only called from either single threaded environment
-or from a thread that has not shared the table object with other threads.
-@param[in,out] index index whose zip_pad mutex is to be created */
-inline
-void
-dict_index_zip_pad_mutex_create_lazy(
- dict_index_t* index)
-{
-#ifdef HAVE_ATOMIC_BUILTINS
- index->zip_pad.mutex = NULL;
- index->zip_pad.mutex_created = os_once::NEVER_DONE;
-#else /* HAVE_ATOMIC_BUILTINS */
- dict_index_zip_pad_alloc(index);
- index->zip_pad.mutex_created = os_once::DONE;
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/** Destroy the zip_pad_mutex of the given index.
-This function is only called from either single threaded environment
-or from a thread that has not shared the table object with other threads.
-@param[in,out] table table whose stats latch to destroy */
-inline
-void
-dict_index_zip_pad_mutex_destroy(
- dict_index_t* index)
-{
- if (index->zip_pad.mutex_created == os_once::DONE
- && index->zip_pad.mutex != NULL) {
- os_fast_mutex_free(index->zip_pad.mutex);
- delete index->zip_pad.mutex;
- }
-}
-
-/** Release the zip_pad_mutex of a given index.
-@param[in,out] index index whose zip_pad_mutex is to be released */
-inline
-void
-dict_index_zip_pad_unlock(
- dict_index_t* index)
-{
- os_fast_mutex_unlock(index->zip_pad.mutex);
-}
-
-#ifdef UNIV_DEBUG
-/** Check if the current thread owns the autoinc_mutex of a given table.
-@param[in] table the autoinc_mutex belongs to this table
-@return true, if the current thread owns the autoinc_mutex, false otherwise.*/
-inline
-bool
-dict_table_autoinc_own(
- const dict_table_t* table)
-{
- return(mutex_own(table->autoinc_mutex));
-}
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_NONINL
-#include "dict0mem.ic"
-#endif
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-#endif
diff --git a/storage/xtradb/include/dict0mem.ic b/storage/xtradb/include/dict0mem.ic
deleted file mode 100644
index 38d51f61789..00000000000
--- a/storage/xtradb/include/dict0mem.ic
+++ /dev/null
@@ -1,74 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/dict0mem.ic
-Data dictionary memory object creation
-
-Created 1/8/1996 Heikki Tuuri
-***********************************************************************/
-
-#include "data0type.h"
-#include "dict0mem.h"
-#include "fil0fil.h"
-
-/**********************************************************************//**
-This function poplulates a dict_index_t index memory structure with
-supplied information. */
-UNIV_INLINE
-void
-dict_mem_fill_index_struct(
-/*=======================*/
- dict_index_t* index, /*!< out: index to be filled */
- mem_heap_t* heap, /*!< in: memory heap */
- const char* table_name, /*!< in: table name */
- const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
- ulint type, /*!< in: DICT_UNIQUE,
- DICT_CLUSTERED, ... ORed */
- ulint n_fields) /*!< in: number of fields */
-{
-
- if (heap) {
- index->heap = heap;
- index->name = mem_heap_strdup(heap, index_name);
- index->fields = (dict_field_t*) mem_heap_alloc(
- heap, 1 + n_fields * sizeof(dict_field_t));
- } else {
- index->name = index_name;
- index->heap = NULL;
- index->fields = NULL;
- }
-
- /* Assign a ulint to a 4-bit-mapped field.
- Only the low-order 4 bits are assigned. */
- index->type = type;
-#ifndef UNIV_HOTBACKUP
- index->space = (unsigned int) space;
- index->page = FIL_NULL;
-#endif /* !UNIV_HOTBACKUP */
- index->table_name = table_name;
- index->n_fields = (unsigned int) n_fields;
- /* The '1 +' above prevents allocation
- of an empty mem block */
-#ifdef UNIV_DEBUG
- index->magic_n = DICT_INDEX_MAGIC_N;
-#endif /* UNIV_DEBUG */
-}
diff --git a/storage/xtradb/include/dict0pagecompress.h b/storage/xtradb/include/dict0pagecompress.h
deleted file mode 100644
index 6503c86ffa2..00000000000
--- a/storage/xtradb/include/dict0pagecompress.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/dict0pagecompress.h
-Helper functions for extracting/storing page compression information
-to dictionary.
-
-Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
-***********************************************************************/
-
-#ifndef dict0pagecompress_h
-#define dict0pagecompress_h
-
-/********************************************************************//**
-Extract the page compression level from table flags.
-@return page compression level, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_tf_get_page_compression_level(
-/*===============================*/
- ulint flags) /*!< in: flags */
- __attribute__((const));
-/********************************************************************//**
-Extract the page compression flag from table flags
-@return page compression flag, or false if not compressed */
-UNIV_INLINE
-ibool
-dict_tf_get_page_compression(
-/*==========================*/
- ulint flags) /*!< in: flags */
- __attribute__((const));
-
-/********************************************************************//**
-Check whether the table uses the page compressed page format.
-@return page compression level, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_table_page_compression_level(
-/*==============================*/
- const dict_table_t* table) /*!< in: table */
- __attribute__((const));
-
-/********************************************************************//**
-Extract the atomic writes flag from table flags.
-@return true if atomic writes are used, false if not used */
-UNIV_INLINE
-atomic_writes_t
-dict_tf_get_atomic_writes(
-/*======================*/
- ulint flags) /*!< in: flags */
- __attribute__((const));
-
-/********************************************************************//**
-Check whether the table uses the atomic writes.
-@return true if atomic writes is used, false if not */
-UNIV_INLINE
-atomic_writes_t
-dict_table_get_atomic_writes(
-/*=========================*/
- const dict_table_t* table); /*!< in: table */
-
-
-#ifndef UNIV_NONINL
-#include "dict0pagecompress.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/dict0pagecompress.ic b/storage/xtradb/include/dict0pagecompress.ic
deleted file mode 100644
index 13c2b46c51c..00000000000
--- a/storage/xtradb/include/dict0pagecompress.ic
+++ /dev/null
@@ -1,105 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/dict0pagecompress.ic
-Inline implementation for helper functions for extracting/storing
-page compression and atomic writes information to dictionary.
-
-Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
-***********************************************************************/
-
-/********************************************************************//**
-Extract the page compression level from dict_table_t::flags.
-These flags are in memory, so assert that they are valid.
-@return page compression level, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_tf_get_page_compression_level(
-/*===============================*/
- ulint flags) /*!< in: flags */
-{
- ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
-
- ut_ad(page_compression_level <= 9);
-
- return(page_compression_level);
-}
-
-/********************************************************************//**
-Check whether the table uses the page compression page format.
-@return page compression level, or 0 if not compressed */
-UNIV_INLINE
-ulint
-dict_table_page_compression_level(
-/*==============================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(dict_tf_get_page_compression(table->flags));
-
- return(dict_tf_get_page_compression_level(table->flags));
-}
-
-/********************************************************************//**
-Check whether the table uses the page compression page format.
-@return true if page compressed, false if not */
-UNIV_INLINE
-ibool
-dict_tf_get_page_compression(
-/*=========================*/
- ulint flags) /*!< in: flags */
-{
- return(DICT_TF_GET_PAGE_COMPRESSION(flags));
-}
-
-/********************************************************************//**
-Check whether the table uses the page compression page format.
-@return true if page compressed, false if not */
-UNIV_INLINE
-ibool
-dict_table_is_page_compressed(
-/*==========================*/
- const dict_table_t* table) /*!< in: table */
-{
- return (dict_tf_get_page_compression(table->flags));
-}
-
-/********************************************************************//**
-Extract the atomic writes flag from table flags.
-@return enumerated value of atomic writes */
-UNIV_INLINE
-atomic_writes_t
-dict_tf_get_atomic_writes(
-/*======================*/
- ulint flags) /*!< in: flags */
-{
- return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags));
-}
-
-/********************************************************************//**
-Check whether the table uses the atomic writes.
-@return enumerated value of atomic writes */
-UNIV_INLINE
-atomic_writes_t
-dict_table_get_atomic_writes(
-/*=========================*/
- const dict_table_t* table) /*!< in: table */
-{
- return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags));
-}
diff --git a/storage/xtradb/include/dict0priv.h b/storage/xtradb/include/dict0priv.h
deleted file mode 100644
index e034662aba0..00000000000
--- a/storage/xtradb/include/dict0priv.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0priv.h
-Data dictionary private functions
-
-Created Fri 2 Jul 2010 13:30:38 EST - Sunny Bains
-*******************************************************/
-
-#ifndef dict0priv_h
-#define dict0priv_h
-
-/**********************************************************************//**
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function. Note: Not to be called from outside dict0*c functions.
-@return table, NULL if not found */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
- const char* table_name); /*!< in: table name */
-
-/**********************************************************************//**
-Checks if a table is in the dictionary cache.
-@return table, NULL if not found */
-UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
- const char* table_name); /*!< in: table name */
-
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INLINE
-dict_table_t*
-dict_table_open_on_id_low(
-/*=====================*/
- table_id_t table_id, /*!< in: table id */
- dict_err_ignore_t ignore_err, /*!< in: errors to ignore
- when loading the table */
- ibool open_only_if_in_cache);
-
-#ifndef UNIV_NONINL
-#include "dict0priv.ic"
-#endif
-
-#endif /* dict0priv.h */
diff --git a/storage/xtradb/include/dict0priv.ic b/storage/xtradb/include/dict0priv.ic
deleted file mode 100644
index 983218af78a..00000000000
--- a/storage/xtradb/include/dict0priv.ic
+++ /dev/null
@@ -1,126 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/dict0priv.ic
-Data dictionary system private include file
-
-Created Wed 13 Oct 2010 16:10:14 EST Sunny Bains
-***********************************************************************/
-
-#include "dict0dict.h"
-#include "dict0load.h"
-#include "dict0priv.h"
-#ifndef UNIV_HOTBACKUP
-
-/**********************************************************************//**
-Gets a table; loads it to the dictionary cache if necessary. A low-level
-function.
-@return table, NULL if not found */
-UNIV_INLINE
-dict_table_t*
-dict_table_get_low(
-/*===============*/
- const char* table_name) /*!< in: table name */
-{
- dict_table_t* table;
-
- ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- table = dict_table_check_if_in_cache_low(table_name);
-
- if (table && table->corrupted) {
- fprintf(stderr, "InnoDB: table");
- ut_print_name(stderr, NULL, TRUE, table->name);
- if (srv_load_corrupted) {
- fputs(" is corrupted, but"
- " innodb_force_load_corrupted is set\n", stderr);
- } else {
- fputs(" is corrupted\n", stderr);
- return(NULL);
- }
- }
-
- if (table == NULL) {
- table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
- }
-
- ut_ad(!table || table->cached);
-
- return(table);
-}
-
-/**********************************************************************//**
-Returns a table object based on table id.
-@return table, NULL if does not exist */
-UNIV_INLINE
-dict_table_t*
-dict_table_open_on_id_low(
-/*======================*/
- table_id_t table_id, /*!< in: table id */
- dict_err_ignore_t ignore_err, /*!< in: errors to ignore
- when loading the table */
- ibool open_only_if_in_cache)
-{
- dict_table_t* table;
- ulint fold;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* Look for the table name in the hash table */
- fold = ut_fold_ull(table_id);
-
- HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
- dict_table_t*, table, ut_ad(table->cached),
- table->id == table_id);
- if (table == NULL && !open_only_if_in_cache) {
- table = dict_load_table_on_id(table_id, ignore_err);
- }
-
- ut_ad(!table || table->cached);
-
- /* TODO: should get the type information from MySQL */
-
- return(table);
-}
-
-/**********************************************************************//**
-Checks if a table is in the dictionary cache.
-@return table, NULL if not found */
-UNIV_INLINE
-dict_table_t*
-dict_table_check_if_in_cache_low(
-/*=============================*/
- const char* table_name) /*!< in: table name */
-{
- dict_table_t* table;
- ulint table_fold;
-
- ut_ad(table_name);
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- /* Look for the table name in the hash table */
- table_fold = ut_fold_string(table_name);
-
- HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold,
- dict_table_t*, table, ut_ad(table->cached),
- !strcmp(table->name, table_name));
- return(table);
-}
-#endif /*! UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/dict0stats.h b/storage/xtradb/include/dict0stats.h
deleted file mode 100644
index 72501bf9429..00000000000
--- a/storage/xtradb/include/dict0stats.h
+++ /dev/null
@@ -1,235 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2009, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0stats.h
-Code used for calculating and manipulating table statistics.
-
-Created Jan 06, 2010 Vasil Dimov
-*******************************************************/
-
-#ifndef dict0stats_h
-#define dict0stats_h
-
-#include "univ.i"
-
-#include "db0err.h"
-#include "dict0types.h"
-#include "trx0types.h"
-
-enum dict_stats_upd_option_t {
- DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the
- statistics using a precise and slow
- algo and save them to the persistent
- storage, if the persistent storage is
- not present then emit a warning and
- fall back to transient stats */
- DICT_STATS_RECALC_TRANSIENT,/* (re) calculate the statistics
- using an imprecise quick algo
- without saving the results
- persistently */
- DICT_STATS_EMPTY_TABLE, /* Write all zeros (or 1 where it makes sense)
- into a table and its indexes' statistics
- members. The resulting stats correspond to an
- empty table. If the table is using persistent
- statistics, then they are saved on disk. */
- DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* fetch the stats
- from the persistent storage if the in-memory
- structures have not been initialized yet,
- otherwise do nothing */
-};
-
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. This function
-is relatively quick and is used to calculate transient statistics that
-are not saved on disk.
-This was the only way to calculate statistics before the
-Persistent Statistics feature was introduced. */
-UNIV_INTERN
-void
-dict_stats_update_transient(
-/*========================*/
- dict_table_t* table); /*!< in/out: table */
-
-/*********************************************************************//**
-Set the persistent statistics flag for a given table. This is set only
-in the in-memory table object and is not saved on disk. It will be read
-from the .frm file upon first open from MySQL after a server restart. */
-UNIV_INLINE
-void
-dict_stats_set_persistent(
-/*======================*/
- dict_table_t* table, /*!< in/out: table */
- ibool ps_on, /*!< in: persistent stats explicitly enabled */
- ibool ps_off) /*!< in: persistent stats explicitly disabled */
- MY_ATTRIBUTE((nonnull));
-
-/*********************************************************************//**
-Check whether persistent statistics is enabled for a given table.
-@return TRUE if enabled, FALSE otherwise */
-UNIV_INLINE
-ibool
-dict_stats_is_persistent_enabled(
-/*=============================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*********************************************************************//**
-Set the auto recalc flag for a given table (only honored for a persistent
-stats enabled table). The flag is set only in the in-memory table object
-and is not saved in InnoDB files. It will be read from the .frm file upon
-first open from MySQL after a server restart. */
-UNIV_INLINE
-void
-dict_stats_auto_recalc_set(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- ibool auto_recalc_on, /*!< in: explicitly enabled */
- ibool auto_recalc_off); /*!< in: explicitly disabled */
-
-/*********************************************************************//**
-Check whether auto recalc is enabled for a given table.
-@return TRUE if enabled, FALSE otherwise */
-UNIV_INLINE
-ibool
-dict_stats_auto_recalc_is_enabled(
-/*==============================*/
- const dict_table_t* table); /*!< in: table */
-
-/*********************************************************************//**
-Initialize table's stats for the first time when opening a table. */
-UNIV_INLINE
-void
-dict_stats_init(
-/*============*/
- dict_table_t* table); /*!< in/out: table */
-
-/*********************************************************************//**
-Deinitialize table's stats after the last close of the table. This is
-used to detect "FLUSH TABLE" and refresh the stats upon next open. */
-UNIV_INLINE
-void
-dict_stats_deinit(
-/*==============*/
- dict_table_t* table) /*!< in/out: table */
- MY_ATTRIBUTE((nonnull));
-
-/*********************************************************************//**
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization.
-@return DB_* error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-dict_stats_update(
-/*==============*/
- dict_table_t* table, /*!< in/out: table */
- dict_stats_upd_option_t stats_upd_option);
- /*!< in: whether to (re) calc
- the stats or to fetch them from
- the persistent storage */
-
-/*********************************************************************//**
-Removes the information for a particular index's stats from the persistent
-storage if it exists and if there is data stored for this index.
-This function creates its own trx and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_drop_index(
-/*==================*/
- const char* tname, /*!< in: table name */
- const char* iname, /*!< in: index name */
- char* errstr, /*!< out: error message if != DB_SUCCESS
- is returned */
- ulint errstr_sz);/*!< in: size of the errstr buffer */
-
-/*********************************************************************//**
-Removes the statistics for a table and all of its indexes from the
-persistent storage if it exists and if there is data stored for the table.
-This function creates its own transaction and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_drop_table(
-/*==================*/
- const char* table_name, /*!< in: table name */
- char* errstr, /*!< out: error message
- if != DB_SUCCESS is returned */
- ulint errstr_sz); /*!< in: size of errstr buffer */
-
-/*********************************************************************//**
-Fetches or calculates new estimates for index statistics. */
-UNIV_INTERN
-void
-dict_stats_update_for_index(
-/*========================*/
- dict_index_t* index) /*!< in/out: index */
- MY_ATTRIBUTE((nonnull));
-
-/*********************************************************************//**
-Renames a table in InnoDB persistent stats storage.
-This function creates its own transaction and commits it.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_rename_table(
-/*====================*/
- const char* old_name, /*!< in: old table name */
- const char* new_name, /*!< in: new table name */
- char* errstr, /*!< out: error string if != DB_SUCCESS
- is returned */
- size_t errstr_sz); /*!< in: errstr size */
-
-/*********************************************************************//**
-Save defragmentation result.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_save_defrag_summary(
- dict_index_t* index); /*!< in: index */
-
-/*********************************************************************//**
-Save defragmentation stats for a given index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-dict_stats_save_defrag_stats(
- dict_index_t* index); /*!< in: index */
-
-/**********************************************************************//**
-Clear defragmentation summary. */
-UNIV_INTERN
-void
-dict_stats_empty_defrag_summary(
-/*==================*/
- dict_index_t* index); /*!< in: index to clear defragmentation stats */
-
-/**********************************************************************//**
-Clear defragmentation related index stats. */
-UNIV_INTERN
-void
-dict_stats_empty_defrag_stats(
-/*==================*/
- dict_index_t* index); /*!< in: index to clear defragmentation stats */
-
-
-#ifndef UNIV_NONINL
-#include "dict0stats.ic"
-#endif
-
-#endif /* dict0stats_h */
diff --git a/storage/xtradb/include/dict0stats.ic b/storage/xtradb/include/dict0stats.ic
deleted file mode 100644
index ec9a9065470..00000000000
--- a/storage/xtradb/include/dict0stats.ic
+++ /dev/null
@@ -1,236 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0stats.ic
-Code used for calculating and manipulating table statistics.
-
-Created Jan 23, 2012 Vasil Dimov
-*******************************************************/
-
-#include "univ.i"
-#include "dict0dict.h" /* dict_table_stats_lock() */
-#include "dict0types.h" /* dict_table_t */
-#include "srv0srv.h" /* srv_stats_persistent, srv_stats_auto_recalc */
-
-/*********************************************************************//**
-Set the persistent statistics flag for a given table. This is set only
-in the in-memory table object and is not saved on disk. It will be read
-from the .frm file upon first open from MySQL after a server restart. */
-UNIV_INLINE
-void
-dict_stats_set_persistent(
-/*======================*/
- dict_table_t* table, /*!< in/out: table */
- ibool ps_on, /*!< in: persistent stats explicitly enabled */
- ibool ps_off) /*!< in: persistent stats explicitly disabled */
-{
- /* Not allowed to have both flags set, but a CREATE or ALTER
- statement that contains "STATS_PERSISTENT=0 STATS_PERSISTENT=1" would
- end up having both set. In this case we clear the OFF flag. */
- if (ps_on && ps_off) {
- ps_off = FALSE;
- }
-
- ib_uint32_t stat_persistent = 0;
-
- if (ps_on) {
- stat_persistent |= DICT_STATS_PERSISTENT_ON;
- }
-
- if (ps_off) {
- stat_persistent |= DICT_STATS_PERSISTENT_OFF;
- }
-
- /* we rely on this assignment to be atomic */
- table->stat_persistent = stat_persistent;
-}
-
-/*********************************************************************//**
-Check whether persistent statistics is enabled for a given table.
-@return TRUE if enabled, FALSE otherwise */
-UNIV_INLINE
-ibool
-dict_stats_is_persistent_enabled(
-/*=============================*/
- const dict_table_t* table) /*!< in: table */
-{
- /* Because of the nature of this check (non-locking) it is possible
- that a table becomes:
- * PS-disabled immediately after this function has returned TRUE or
- * PS-enabled immediately after this function has returned FALSE.
- This means that it is possible that we do:
- + dict_stats_update(DICT_STATS_RECALC_PERSISTENT) on a table that has
- just been PS-disabled or
- + dict_stats_update(DICT_STATS_RECALC_TRANSIENT) on a table that has
- just been PS-enabled.
- This is acceptable. Avoiding this would mean that we would have to
- protect the ::stat_persistent with dict_table_stats_lock() like the
- other ::stat_ members which would be too big performance penalty,
- especially when this function is called from
- row_update_statistics_if_needed(). */
-
- /* we rely on this read to be atomic */
- ib_uint32_t stat_persistent = table->stat_persistent;
-
- if (stat_persistent & DICT_STATS_PERSISTENT_ON) {
- ut_ad(!(stat_persistent & DICT_STATS_PERSISTENT_OFF));
- return(TRUE);
- } else if (stat_persistent & DICT_STATS_PERSISTENT_OFF) {
- return(FALSE);
- } else {
- return(srv_stats_persistent);
- }
-}
-
-/*********************************************************************//**
-Set the auto recalc flag for a given table (only honored for a persistent
-stats enabled table). The flag is set only in the in-memory table object
-and is not saved in InnoDB files. It will be read from the .frm file upon
-first open from MySQL after a server restart. */
-UNIV_INLINE
-void
-dict_stats_auto_recalc_set(
-/*=======================*/
- dict_table_t* table, /*!< in/out: table */
- ibool auto_recalc_on, /*!< in: explicitly enabled */
- ibool auto_recalc_off) /*!< in: explicitly disabled */
-{
- ut_ad(!auto_recalc_on || !auto_recalc_off);
-
- ib_uint32_t stats_auto_recalc = 0;
-
- if (auto_recalc_on) {
- stats_auto_recalc |= DICT_STATS_AUTO_RECALC_ON;
- }
-
- if (auto_recalc_off) {
- stats_auto_recalc |= DICT_STATS_AUTO_RECALC_OFF;
- }
-
- /* we rely on this assignment to be atomic */
- table->stats_auto_recalc = stats_auto_recalc;
-}
-
-/*********************************************************************//**
-Check whether auto recalc is enabled for a given table.
-@return TRUE if enabled, FALSE otherwise */
-UNIV_INLINE
-ibool
-dict_stats_auto_recalc_is_enabled(
-/*==============================*/
- const dict_table_t* table) /*!< in: table */
-{
- /* we rely on this read to be atomic */
- ib_uint32_t stats_auto_recalc = table->stats_auto_recalc;
-
- if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_ON) {
- ut_ad(!(stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF));
- return(TRUE);
- } else if (stats_auto_recalc & DICT_STATS_AUTO_RECALC_OFF) {
- return(FALSE);
- } else {
- return(srv_stats_auto_recalc);
- }
-}
-
-/*********************************************************************//**
-Initialize table's stats for the first time when opening a table. */
-UNIV_INLINE
-void
-dict_stats_init(
-/*============*/
- dict_table_t* table) /*!< in/out: table */
-{
- ut_ad(!mutex_own(&dict_sys->mutex));
-
- if (table->stat_initialized) {
- return;
- }
-
- dict_stats_upd_option_t opt;
-
- if (dict_stats_is_persistent_enabled(table)) {
- opt = DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY;
- } else {
- opt = DICT_STATS_RECALC_TRANSIENT;
- }
-
- dict_stats_update(table, opt);
-}
-
-/*********************************************************************//**
-Deinitialize table's stats after the last close of the table. This is
-used to detect "FLUSH TABLE" and refresh the stats upon next open. */
-UNIV_INLINE
-void
-dict_stats_deinit(
-/*==============*/
- dict_table_t* table) /*!< in/out: table */
-{
- ut_ad(mutex_own(&dict_sys->mutex));
-
- ut_a(table->n_ref_count == 0);
-
- dict_table_stats_lock(table, RW_X_LATCH);
-
- if (!table->stat_initialized) {
- dict_table_stats_unlock(table, RW_X_LATCH);
- return;
- }
-
- table->stat_initialized = FALSE;
-
-#ifdef UNIV_DEBUG_VALGRIND
- UNIV_MEM_INVALID(&table->stat_n_rows,
- sizeof(table->stat_n_rows));
- UNIV_MEM_INVALID(&table->stat_clustered_index_size,
- sizeof(table->stat_clustered_index_size));
- UNIV_MEM_INVALID(&table->stat_sum_of_other_index_sizes,
- sizeof(table->stat_sum_of_other_index_sizes));
- UNIV_MEM_INVALID(&table->stat_modified_counter,
- sizeof(table->stat_modified_counter));
-
- dict_index_t* index;
-
- for (index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
-
- ulint n_uniq = dict_index_get_n_unique(index);
-
- UNIV_MEM_INVALID(
- index->stat_n_diff_key_vals,
- n_uniq * sizeof(index->stat_n_diff_key_vals[0]));
- UNIV_MEM_INVALID(
- index->stat_n_sample_sizes,
- n_uniq * sizeof(index->stat_n_sample_sizes[0]));
- UNIV_MEM_INVALID(
- index->stat_n_non_null_key_vals,
- n_uniq * sizeof(index->stat_n_non_null_key_vals[0]));
- UNIV_MEM_INVALID(
- &index->stat_index_size,
- sizeof(index->stat_index_size));
- UNIV_MEM_INVALID(
- &index->stat_n_leaf_pages,
- sizeof(index->stat_n_leaf_pages));
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- dict_table_stats_unlock(table, RW_X_LATCH);
-}
diff --git a/storage/xtradb/include/dict0stats_bg.h b/storage/xtradb/include/dict0stats_bg.h
deleted file mode 100644
index 8f3385eb22b..00000000000
--- a/storage/xtradb/include/dict0stats_bg.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0stats_bg.h
-Code used for background table and index stats gathering.
-
-Created Apr 26, 2012 Vasil Dimov
-*******************************************************/
-
-#ifndef dict0stats_bg_h
-#define dict0stats_bg_h
-
-#include "univ.i"
-
-#include "dict0types.h" /* dict_table_t, table_id_t */
-#include "os0sync.h" /* os_event_t */
-#include "os0thread.h" /* DECLARE_THREAD */
-
-/** Event to wake up dict_stats_thread on dict_stats_recalc_pool_add()
-or shutdown. Not protected by any mutex. */
-extern os_event_t dict_stats_event;
-
-/*****************************************************************//**
-Add a table to the recalc pool, which is processed by the
-background stats gathering thread. Only the table id is added to the
-list, so the table can be closed after being enqueued and it will be
-opened when needed. If the table does not exist later (has been DROPped),
-then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_recalc_pool_add(
-/*=======================*/
- const dict_table_t* table); /*!< in: table to add */
-
-/*****************************************************************//**
-Delete a given table from the auto recalc pool.
-dict_stats_recalc_pool_del() */
-UNIV_INTERN
-void
-dict_stats_recalc_pool_del(
-/*=======================*/
- const dict_table_t* table); /*!< in: table to remove */
-
-/*****************************************************************//**
-Add an index in a table to the defrag pool, which is processed by the
-background stats gathering thread. Only the table id and index id are
-added to the list, so the table can be closed after being enqueued and
-it will be opened when needed. If the table or index does not exist later
-(has been DROPped), then it will be removed from the pool and skipped. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_add(
-/*=======================*/
- const dict_index_t* index); /*!< in: table to add */
-
-/*****************************************************************//**
-Delete a given index from the auto defrag pool. */
-UNIV_INTERN
-void
-dict_stats_defrag_pool_del(
-/*=======================*/
- const dict_table_t* table, /*!<in: if given, remove
- all entries for the table */
- const dict_index_t* index); /*!< in: index to remove */
-
-/** Yield the data dictionary latch when waiting
-for the background thread to stop accessing a table.
-@param trx transaction holding the data dictionary locks */
-#define DICT_STATS_BG_YIELD(trx) do { \
- row_mysql_unlock_data_dictionary(trx); \
- os_thread_sleep(250000); \
- row_mysql_lock_data_dictionary(trx); \
-} while (0)
-
-/*****************************************************************//**
-Request the background collection of statistics to stop for a table.
-@retval true when no background process is active
-@retval false when it is not safe to modify the table definition */
-UNIV_INLINE
-bool
-dict_stats_stop_bg(
-/*===============*/
- dict_table_t* table) /*!< in/out: table */
- MY_ATTRIBUTE((warn_unused_result));
-
-/*****************************************************************//**
-Wait until background stats thread has stopped using the specified table.
-The caller must have locked the data dictionary using
-row_mysql_lock_data_dictionary() and this function may unlock it temporarily
-and restore the lock before it exits.
-The background stats thread is guaranteed not to start using the specified
-table after this function returns and before the caller unlocks the data
-dictionary because it sets the BG_STAT_IN_PROGRESS bit in table->stats_bg_flag
-under dict_sys->mutex. */
-UNIV_INTERN
-void
-dict_stats_wait_bg_to_stop_using_table(
-/*===================================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx); /*!< in/out: transaction to use for
- unlocking/locking the data dict */
-/*****************************************************************//**
-Initialize global variables needed for the operation of dict_stats_thread().
-Must be called before dict_stats_thread() is started. */
-UNIV_INTERN
-void
-dict_stats_thread_init();
-/*====================*/
-
-/*****************************************************************//**
-Free resources allocated by dict_stats_thread_init(), must be called
-after dict_stats_thread() has exited. */
-UNIV_INTERN
-void
-dict_stats_thread_deinit();
-/*======================*/
-
-/*****************************************************************//**
-This is the thread for background stats gathering. It pops tables, from
-the auto recalc list and proceeds them, eventually recalculating their
-statistics.
-@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(dict_stats_thread)(
-/*==============================*/
- void* arg); /*!< in: a dummy parameter
- required by os_thread_create */
-
-/** Shut down the dict_stats_thread. */
-void
-dict_stats_shutdown();
-
-# ifndef UNIV_NONINL
-# include "dict0stats_bg.ic"
-# endif
-
-#endif /* dict0stats_bg_h */
diff --git a/storage/xtradb/include/dict0stats_bg.ic b/storage/xtradb/include/dict0stats_bg.ic
deleted file mode 100644
index 87e3225de58..00000000000
--- a/storage/xtradb/include/dict0stats_bg.ic
+++ /dev/null
@@ -1,45 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0stats_bg.ic
-Code used for background table and index stats gathering.
-
-Created Feb 8, 2013 Marko Makela
-*******************************************************/
-
-/*****************************************************************//**
-Request the background collection of statistics to stop for a table.
-@retval true when no background process is active
-@retval false when it is not safe to modify the table definition */
-UNIV_INLINE
-bool
-dict_stats_stop_bg(
-/*===============*/
- dict_table_t* table) /*!< in/out: table */
-{
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&dict_sys->mutex));
-
- if (!(table->stats_bg_flag & BG_STAT_IN_PROGRESS)) {
- return(true);
- }
-
- table->stats_bg_flag |= BG_STAT_SHOULD_QUIT;
- return(false);
-}
diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h
deleted file mode 100644
index 909fdf9cf3d..00000000000
--- a/storage/xtradb/include/dict0types.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dict0types.h
-Data dictionary global types
-
-Created 1/8/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dict0types_h
-#define dict0types_h
-
-struct dict_sys_t;
-struct dict_col_t;
-struct dict_field_t;
-struct dict_index_t;
-struct dict_table_t;
-struct dict_foreign_t;
-
-struct ind_node_t;
-struct tab_node_t;
-
-/* Space id and page no where the dictionary header resides */
-#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
-#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
-
-/* The ibuf table and indexes's ID are assigned as the number
-DICT_IBUF_ID_MIN plus the space id */
-#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL
-
-typedef ib_id_t table_id_t;
-typedef ib_id_t index_id_t;
-
-/** Error to ignore when we load table dictionary into memory. However,
-the table and index will be marked as "corrupted", and caller will
-be responsible to deal with corrupted table or index.
-Note: please define the IGNORE_ERR_* as bits, so their value can
-be or-ed together */
-enum dict_err_ignore_t {
- DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */
- DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
- page is FIL_NULL or incorrect value */
- DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */
- DICT_ERR_IGNORE_FK_NOKEY = 4, /*!< ignore error if any foreign
- key is missing */
- DICT_ERR_IGNORE_RECOVER_LOCK = 8,
- /*!< Used when recovering table locks
- for resurrected transactions.
- Silently load a missing
- tablespace, and do not load
- incomplete index definitions. */
- DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
-};
-
-/** Quiescing states for flushing tables to disk. */
-enum ib_quiesce_t {
- QUIESCE_NONE,
- QUIESCE_START, /*!< Initialise, prepare to start */
- QUIESCE_COMPLETE /*!< All done */
-};
-
-/** Prefix for tmp tables, adopted from sql/table.h */
-#define tmp_file_prefix "#sql"
-#define tmp_file_prefix_length 4
-#define TEMP_FILE_PREFIX_INNODB "#sql-ib"
-
-#define TEMP_TABLE_PREFIX "#sql"
-#define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX
-
-
-/** Enum values for atomic_writes table option */
-typedef enum {
- ATOMIC_WRITES_DEFAULT = 0,
- ATOMIC_WRITES_ON = 1,
- ATOMIC_WRITES_OFF = 2
-} atomic_writes_t;
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-/** Flag to control insert buffer debugging. */
-extern uint ibuf_debug;
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-#endif
diff --git a/storage/xtradb/include/dyn0dyn.h b/storage/xtradb/include/dyn0dyn.h
deleted file mode 100644
index 20963a1472b..00000000000
--- a/storage/xtradb/include/dyn0dyn.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.h
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef dyn0dyn_h
-#define dyn0dyn_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "mem0mem.h"
-
-/** A block in a dynamically allocated array */
-struct dyn_block_t;
-/** Dynamically allocated array */
-typedef dyn_block_t dyn_array_t;
-
-/** This is the initial 'payload' size of a dynamic array;
-this must be > MLOG_BUF_MARGIN + 30! */
-#define DYN_ARRAY_DATA_SIZE 512
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
- dyn_array_t* arr); /*!< in/out memory buffer of
- size sizeof(dyn_array_t) */
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
- dyn_array_t* arr) /*!< in,own: dyn array */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size) /*!< in: size in bytes of the buffer; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
- MY_ATTRIBUTE((warn_unused_result));
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
- dyn_array_t* arr, /*!< in: dynamic array */
- const byte* ptr); /*!< in: end of used space */
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to
-the added element. The caller must copy the element to
-the pointer returned.
-@return pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
- dyn_array_t* arr, /*!< in/out: dynamic array */
- ulint size) /*!< in: size in bytes of the element */
- MY_ATTRIBUTE((warn_unused_result));
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
- const dyn_array_t* arr, /*!< in: dyn array */
- ulint pos) /*!< in: position of element
- in bytes from array start */
- MY_ATTRIBUTE((warn_unused_result));
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
- const dyn_array_t* arr) /*!< in: dyn array */
- MY_ATTRIBUTE((warn_unused_result));
-/************************************************************//**
-Gets the first block in a dyn array.
-@param arr dyn array
-@return first block */
-#define dyn_array_get_first_block(arr) (arr)
-/************************************************************//**
-Gets the last block in a dyn array.
-@param arr dyn array
-@return last block */
-#define dyn_array_get_last_block(arr) \
- ((arr)->heap ? UT_LIST_GET_LAST((arr)->base) : (arr))
-/********************************************************************//**
-Gets the next block in a dyn array.
-@param arr dyn array
-@param block dyn array block
-@return pointer to next, NULL if end of list */
-#define dyn_array_get_next_block(arr, block) \
- ((arr)->heap ? UT_LIST_GET_NEXT(list, block) : NULL)
-/********************************************************************//**
-Gets the previous block in a dyn array.
-@param arr dyn array
-@param block dyn array block
-@return pointer to previous, NULL if end of list */
-#define dyn_array_get_prev_block(arr, block) \
- ((arr)->heap ? UT_LIST_GET_PREV(list, block) : NULL)
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
- const dyn_block_t* block) /*!< in: dyn array block */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
- const dyn_block_t* block) /*!< in: dyn array block */
- MY_ATTRIBUTE((warn_unused_result));
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
- dyn_array_t* arr, /*!< in/out: dyn array */
- const byte* str, /*!< in: string to write */
- ulint len) /*!< in: string length */
- MY_ATTRIBUTE((nonnull));
-
-/*#################################################################*/
-
-/** @brief A block in a dynamically allocated array.
-NOTE! Do not access the fields of the struct directly: the definition
-appears here only for the compiler to know its size! */
-struct dyn_block_t{
- mem_heap_t* heap; /*!< in the first block this is != NULL
- if dynamic allocation has been needed */
- ulint used; /*!< number of data bytes used in this block;
- DYN_BLOCK_FULL_FLAG is set when the block
- becomes full */
- byte data[DYN_ARRAY_DATA_SIZE];
- /*!< storage for array elements */
- UT_LIST_BASE_NODE_T(dyn_block_t) base;
- /*!< linear list of dyn blocks: this node is
- used only in the first block */
- UT_LIST_NODE_T(dyn_block_t) list;
- /*!< linear list node: used in all blocks */
-#ifdef UNIV_DEBUG
- ulint buf_end;/*!< only in the debug version: if dyn
- array is opened, this is the buffer
- end offset, else this is 0 */
- ulint magic_n;/*!< magic number (DYN_BLOCK_MAGIC_N) */
-#endif
-};
-
-
-#ifndef UNIV_NONINL
-#include "dyn0dyn.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/dyn0dyn.ic b/storage/xtradb/include/dyn0dyn.ic
deleted file mode 100644
index 6e97649245e..00000000000
--- a/storage/xtradb/include/dyn0dyn.ic
+++ /dev/null
@@ -1,298 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/dyn0dyn.ic
-The dynamically allocated array
-
-Created 2/5/1996 Heikki Tuuri
-*******************************************************/
-
-/** Value of dyn_block_t::magic_n */
-#define DYN_BLOCK_MAGIC_N 375767
-/** Flag for dyn_block_t::used that indicates a full block */
-#define DYN_BLOCK_FULL_FLAG 0x1000000UL
-
-/************************************************************//**
-Adds a new block to a dyn array.
-@return created block */
-UNIV_INTERN
-dyn_block_t*
-dyn_array_add_block(
-/*================*/
- dyn_array_t* arr) /*!< in/out: dyn array */
- MY_ATTRIBUTE((warn_unused_result));
-
-/********************************************************************//**
-Gets the number of used bytes in a dyn array block.
-@return number of bytes used */
-UNIV_INLINE
-ulint
-dyn_block_get_used(
-/*===============*/
- const dyn_block_t* block) /*!< in: dyn array block */
-{
- return((block->used) & ~DYN_BLOCK_FULL_FLAG);
-}
-
-/********************************************************************//**
-Gets pointer to the start of data in a dyn array block.
-@return pointer to data */
-UNIV_INLINE
-byte*
-dyn_block_get_data(
-/*===============*/
- const dyn_block_t* block) /*!< in: dyn array block */
-{
- ut_ad(block);
-
- return(const_cast<byte*>(block->data));
-}
-
-/*********************************************************************//**
-Initializes a dynamic array.
-@return initialized dyn array */
-UNIV_INLINE
-dyn_array_t*
-dyn_array_create(
-/*=============*/
- dyn_array_t* arr) /*!< in/out: memory buffer of
- size sizeof(dyn_array_t) */
-{
-#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG
-# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG"
-#endif
-
- arr->heap = NULL;
- arr->used = 0;
-
- ut_d(arr->buf_end = 0);
- ut_d(arr->magic_n = DYN_BLOCK_MAGIC_N);
-
- return(arr);
-}
-
-/************************************************************//**
-Frees a dynamic array. */
-UNIV_INLINE
-void
-dyn_array_free(
-/*===========*/
- dyn_array_t* arr) /*!< in: dyn array */
-{
- if (arr->heap != NULL) {
- mem_heap_free(arr->heap);
- }
-
- ut_d(arr->magic_n = 0);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to the added element.
-The caller must copy the element to the pointer returned.
-@return pointer to the element */
-UNIV_INLINE
-void*
-dyn_array_push(
-/*===========*/
- dyn_array_t* arr, /*!< in/out: dynamic array */
- ulint size) /*!< in: size in bytes of the element */
-{
- dyn_block_t* block;
- ulint used;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
- ut_ad(size <= DYN_ARRAY_DATA_SIZE);
- ut_ad(size);
-
- block = arr;
-
- if (block->used + size > DYN_ARRAY_DATA_SIZE) {
- /* Get the last array block */
-
- block = dyn_array_get_last_block(arr);
-
- if (block->used + size > DYN_ARRAY_DATA_SIZE) {
- block = dyn_array_add_block(arr);
- }
- }
-
- used = block->used;
-
- block->used = used + size;
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
- return(block->data + used);
-}
-
-/*********************************************************************//**
-Makes room on top of a dyn array and returns a pointer to a buffer in it.
-After copying the elements, the caller must close the buffer using
-dyn_array_close.
-@return pointer to the buffer */
-UNIV_INLINE
-byte*
-dyn_array_open(
-/*===========*/
- dyn_array_t* arr, /*!< in: dynamic array */
- ulint size) /*!< in: size in bytes of the buffer; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-{
- dyn_block_t* block;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
- ut_ad(size <= DYN_ARRAY_DATA_SIZE);
- ut_ad(size);
-
- block = arr;
-
- if (block->used + size > DYN_ARRAY_DATA_SIZE) {
- /* Get the last array block */
-
- block = dyn_array_get_last_block(arr);
-
- if (block->used + size > DYN_ARRAY_DATA_SIZE) {
- block = dyn_array_add_block(arr);
- ut_a(size <= DYN_ARRAY_DATA_SIZE);
- }
- }
-
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
- ut_ad(arr->buf_end == 0);
- ut_d(arr->buf_end = block->used + size);
-
- return(block->data + block->used);
-}
-
-/*********************************************************************//**
-Closes the buffer returned by dyn_array_open. */
-UNIV_INLINE
-void
-dyn_array_close(
-/*============*/
- dyn_array_t* arr, /*!< in/out: dynamic array */
- const byte* ptr) /*!< in: end of used space */
-{
- dyn_block_t* block;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- block = dyn_array_get_last_block(arr);
-
- ut_ad(arr->buf_end + block->data >= ptr);
-
- block->used = ptr - block->data;
-
- ut_ad(block->used <= DYN_ARRAY_DATA_SIZE);
-
- ut_d(arr->buf_end = 0);
-}
-
-/************************************************************//**
-Returns pointer to an element in dyn array.
-@return pointer to element */
-UNIV_INLINE
-void*
-dyn_array_get_element(
-/*==================*/
- const dyn_array_t* arr, /*!< in: dyn array */
- ulint pos) /*!< in: position of element
- in bytes from array start */
-{
- const dyn_block_t* block;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- /* Get the first array block */
- block = dyn_array_get_first_block(arr);
-
- if (arr->heap != NULL) {
- for (;;) {
- ulint used = dyn_block_get_used(block);
-
- if (pos < used) {
- break;
- }
-
- pos -= used;
- block = UT_LIST_GET_NEXT(list, block);
- ut_ad(block);
- }
- }
-
- ut_ad(block);
- ut_ad(dyn_block_get_used(block) >= pos);
-
- return(const_cast<byte*>(block->data) + pos);
-}
-
-/************************************************************//**
-Returns the size of stored data in a dyn array.
-@return data size in bytes */
-UNIV_INLINE
-ulint
-dyn_array_get_data_size(
-/*====================*/
- const dyn_array_t* arr) /*!< in: dyn array */
-{
- const dyn_block_t* block;
- ulint sum = 0;
-
- ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N);
-
- if (arr->heap == NULL) {
-
- return(arr->used);
- }
-
- /* Get the first array block */
- block = dyn_array_get_first_block(arr);
-
- while (block != NULL) {
- sum += dyn_block_get_used(block);
- block = dyn_array_get_next_block(arr, block);
- }
-
- return(sum);
-}
-
-/********************************************************//**
-Pushes n bytes to a dyn array. */
-UNIV_INLINE
-void
-dyn_push_string(
-/*============*/
- dyn_array_t* arr, /*!< in/out: dyn array */
- const byte* str, /*!< in: string to write */
- ulint len) /*!< in: string length */
-{
- ulint n_copied;
-
- while (len > 0) {
- if (len > DYN_ARRAY_DATA_SIZE) {
- n_copied = DYN_ARRAY_DATA_SIZE;
- } else {
- n_copied = len;
- }
-
- memcpy(dyn_array_push(arr, n_copied), str, n_copied);
-
- str += n_copied;
- len -= n_copied;
- }
-}
diff --git a/storage/xtradb/include/eval0eval.h b/storage/xtradb/include/eval0eval.h
deleted file mode 100644
index e3b1e6c16b6..00000000000
--- a/storage/xtradb/include/eval0eval.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/eval0eval.h
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef eval0eval_h
-#define eval0eval_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-
-/*****************************************************************//**
-Free the buffer from global dynamic memory for a value of a que_node,
-if it has been allocated in the above function. The freeing for pushed
-column values is done in sel_col_prefetch_buf_free. */
-UNIV_INTERN
-void
-eval_node_free_val_buf(
-/*===================*/
- que_node_t* node); /*!< in: query graph node */
-/*****************************************************************//**
-Evaluates a symbol table symbol. */
-UNIV_INLINE
-void
-eval_sym(
-/*=====*/
- sym_node_t* sym_node); /*!< in: symbol table node */
-/*****************************************************************//**
-Evaluates an expression. */
-UNIV_INLINE
-void
-eval_exp(
-/*=====*/
- que_node_t* exp_node); /*!< in: expression */
-/*****************************************************************//**
-Sets an integer value as the value of an expression node. */
-UNIV_INLINE
-void
-eval_node_set_int_val(
-/*==================*/
- que_node_t* node, /*!< in: expression node */
- lint val); /*!< in: value to set */
-/*****************************************************************//**
-Gets an integer value from an expression node.
-@return integer value */
-UNIV_INLINE
-lint
-eval_node_get_int_val(
-/*==================*/
- que_node_t* node); /*!< in: expression node */
-/*****************************************************************//**
-Copies a binary string value as the value of a query graph node. Allocates a
-new buffer if necessary. */
-UNIV_INLINE
-void
-eval_node_copy_and_alloc_val(
-/*=========================*/
- que_node_t* node, /*!< in: query graph node */
- const byte* str, /*!< in: binary string */
- ulint len); /*!< in: string length or UNIV_SQL_NULL */
-/*****************************************************************//**
-Copies a query node value to another node. */
-UNIV_INLINE
-void
-eval_node_copy_val(
-/*===============*/
- que_node_t* node1, /*!< in: node to copy to */
- que_node_t* node2); /*!< in: node to copy from */
-/*****************************************************************//**
-Gets a iboolean value from a query node.
-@return iboolean value */
-UNIV_INLINE
-ibool
-eval_node_get_ibool_val(
-/*====================*/
- que_node_t* node); /*!< in: query graph node */
-/*****************************************************************//**
-Evaluates a comparison node.
-@return the result of the comparison */
-UNIV_INTERN
-ibool
-eval_cmp(
-/*=====*/
- func_node_t* cmp_node); /*!< in: comparison node */
-
-
-#ifndef UNIV_NONINL
-#include "eval0eval.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/eval0eval.ic b/storage/xtradb/include/eval0eval.ic
deleted file mode 100644
index e4b1dd08017..00000000000
--- a/storage/xtradb/include/eval0eval.ic
+++ /dev/null
@@ -1,255 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/eval0eval.ic
-SQL evaluator: evaluates simple data structures, like expressions, in
-a query graph
-
-Created 12/29/1997 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-#include "rem0cmp.h"
-#include "pars0grm.h"
-
-/*****************************************************************//**
-Evaluates a function node. */
-UNIV_INTERN
-void
-eval_func(
-/*======*/
- func_node_t* func_node); /*!< in: function node */
-/*****************************************************************//**
-Allocate a buffer from global dynamic memory for a value of a que_node.
-NOTE that this memory must be explicitly freed when the query graph is
-freed. If the node already has allocated buffer, that buffer is freed
-here. NOTE that this is the only function where dynamic memory should be
-allocated for a query node val field.
-@return pointer to allocated buffer */
-UNIV_INTERN
-byte*
-eval_node_alloc_val_buf(
-/*====================*/
- que_node_t* node, /*!< in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size); /*!< in: buffer size */
-
-
-/*****************************************************************//**
-Allocates a new buffer if needed.
-@return pointer to buffer */
-UNIV_INLINE
-byte*
-eval_node_ensure_val_buf(
-/*=====================*/
- que_node_t* node, /*!< in: query graph node; sets the val field
- data field to point to the new buffer, and
- len field equal to size */
- ulint size) /*!< in: buffer size */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
- dfield_set_len(dfield, size);
-
- data = static_cast<byte*>(dfield_get_data(dfield));
-
- if (!data || que_node_get_val_buf_size(node) < size) {
-
- data = eval_node_alloc_val_buf(node, size);
- }
-
- return(data);
-}
-
-/*****************************************************************//**
-Evaluates a symbol table symbol. */
-UNIV_INLINE
-void
-eval_sym(
-/*=====*/
- sym_node_t* sym_node) /*!< in: symbol table node */
-{
-
- ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
-
- if (sym_node->indirection) {
- /* The symbol table node is an alias for a variable or a
- column */
-
- dfield_copy_data(que_node_get_val(sym_node),
- que_node_get_val(sym_node->indirection));
- }
-}
-
-/*****************************************************************//**
-Evaluates an expression. */
-UNIV_INLINE
-void
-eval_exp(
-/*=====*/
- que_node_t* exp_node) /*!< in: expression */
-{
- if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) {
-
- eval_sym((sym_node_t*) exp_node);
-
- return;
- }
-
- eval_func(static_cast<func_node_t*>(exp_node));
-}
-
-/*****************************************************************//**
-Sets an integer value as the value of an expression node. */
-UNIV_INLINE
-void
-eval_node_set_int_val(
-/*==================*/
- que_node_t* node, /*!< in: expression node */
- lint val) /*!< in: value to set */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
-
- data = static_cast<byte*>(dfield_get_data(dfield));
-
- if (data == NULL) {
- data = eval_node_alloc_val_buf(node, 4);
- }
-
- ut_ad(dfield_get_len(dfield) == 4);
-
- mach_write_to_4(data, (ulint) val);
-}
-
-/*****************************************************************//**
-Gets an integer non-SQL null value from an expression node.
-@return integer value */
-UNIV_INLINE
-lint
-eval_node_get_int_val(
-/*==================*/
- que_node_t* node) /*!< in: expression node */
-{
- const byte* ptr;
- dfield_t* dfield;
-
- dfield = que_node_get_val(node);
- ptr = static_cast<byte*>(dfield_get_data(dfield));
-
- ut_ad(dfield_get_len(dfield) == 4);
-
- return((int) mach_read_from_4(ptr));
-}
-
-/*****************************************************************//**
-Gets a iboolean value from a query node.
-@return iboolean value */
-UNIV_INLINE
-ibool
-eval_node_get_ibool_val(
-/*====================*/
- que_node_t* node) /*!< in: query graph node */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(node);
-
- data = static_cast<byte*>(dfield_get_data(dfield));
-
- ut_ad(data != NULL);
-
- return(mach_read_from_1(data));
-}
-
-/*****************************************************************//**
-Sets a iboolean value as the value of a function node. */
-UNIV_INLINE
-void
-eval_node_set_ibool_val(
-/*====================*/
- func_node_t* func_node, /*!< in: function node */
- ibool val) /*!< in: value to set */
-{
- dfield_t* dfield;
- byte* data;
-
- dfield = que_node_get_val(func_node);
-
- data = static_cast<byte*>(dfield_get_data(dfield));
-
- if (data == NULL) {
- /* Allocate 1 byte to hold the value */
-
- data = eval_node_alloc_val_buf(func_node, 1);
- }
-
- ut_ad(dfield_get_len(dfield) == 1);
-
- mach_write_to_1(data, val);
-}
-
-/*****************************************************************//**
-Copies a binary string value as the value of a query graph node. Allocates a
-new buffer if necessary. */
-UNIV_INLINE
-void
-eval_node_copy_and_alloc_val(
-/*=========================*/
- que_node_t* node, /*!< in: query graph node */
- const byte* str, /*!< in: binary string */
- ulint len) /*!< in: string length or UNIV_SQL_NULL */
-{
- byte* data;
-
- if (len == UNIV_SQL_NULL) {
- dfield_set_len(que_node_get_val(node), len);
-
- return;
- }
-
- data = eval_node_ensure_val_buf(node, len);
-
- ut_memcpy(data, str, len);
-}
-
-/*****************************************************************//**
-Copies a query node value to another node. */
-UNIV_INLINE
-void
-eval_node_copy_val(
-/*===============*/
- que_node_t* node1, /*!< in: node to copy to */
- que_node_t* node2) /*!< in: node to copy from */
-{
- dfield_t* dfield2;
-
- dfield2 = que_node_get_val(node2);
-
- eval_node_copy_and_alloc_val(
- node1,
- static_cast<byte*>(dfield_get_data(dfield2)),
- dfield_get_len(dfield2));
-}
diff --git a/storage/xtradb/include/eval0proc.h b/storage/xtradb/include/eval0proc.h
deleted file mode 100644
index 7755fb10343..00000000000
--- a/storage/xtradb/include/eval0proc.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/eval0proc.h
-Executes SQL stored procedures and their control structures
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#ifndef eval0proc_h
-#define eval0proc_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-
-/**********************************************************************//**
-Performs an execution step of a procedure node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-proc_step(
-/*======*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of an if-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-if_step(
-/*====*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of a while-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-while_step(
-/*=======*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of a for-loop node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-for_step(
-/*=====*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of an assignment statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-assign_step(
-/*========*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of a procedure call node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-proc_eval_step(
-/*===========*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of an exit statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-exit_step(
-/*======*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of a return-statement node.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-return_step(
-/*========*/
- que_thr_t* thr); /*!< in: query thread */
-
-
-#ifndef UNIV_NONINL
-#include "eval0proc.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/eval0proc.ic b/storage/xtradb/include/eval0proc.ic
deleted file mode 100644
index 81418bae2c9..00000000000
--- a/storage/xtradb/include/eval0proc.ic
+++ /dev/null
@@ -1,88 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/eval0proc.ic
-Executes SQL stored procedures and their control structures
-
-Created 1/20/1998 Heikki Tuuri
-*******************************************************/
-
-#include "pars0pars.h"
-#include "que0que.h"
-#include "eval0eval.h"
-
-/**********************************************************************//**
-Performs an execution step of a procedure node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-proc_step(
-/*======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- proc_node_t* node;
-
- ut_ad(thr);
-
- node = static_cast<proc_node_t*>(thr->run_node);
- ut_ad(que_node_get_type(node) == QUE_NODE_PROC);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- /* Start execution from the first statement in the statement
- list */
-
- thr->run_node = node->stat_list;
- } else {
- /* Move to the next statement */
- ut_ad(que_node_get_next(thr->prev_node) == NULL);
-
- thr->run_node = NULL;
- }
-
- if (thr->run_node == NULL) {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs an execution step of a procedure call node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-proc_eval_step(
-/*===========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- func_node_t* node;
-
- ut_ad(thr);
-
- node = static_cast<func_node_t*>(thr->run_node);
- ut_ad(que_node_get_type(node) == QUE_NODE_FUNC);
-
- /* Evaluate the procedure */
-
- eval_exp(node);
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
diff --git a/storage/xtradb/include/fil0crypt.h b/storage/xtradb/include/fil0crypt.h
deleted file mode 100644
index 228dfb895fe..00000000000
--- a/storage/xtradb/include/fil0crypt.h
+++ /dev/null
@@ -1,511 +0,0 @@
-/*****************************************************************************
-Copyright (C) 2013, 2015, Google Inc. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fil0crypt.h
-The low-level file system encryption support functions
-
-Created 04/01/2015 Jan Lindström
-*******************************************************/
-
-#ifndef fil0crypt_h
-#define fil0crypt_h
-
-#include "os0sync.h"
-
-/**
-* Magic pattern in start of crypt data on page 0
-*/
-#define MAGIC_SZ 6
-
-static const unsigned char CRYPT_MAGIC[MAGIC_SZ] = {
- 's', 0xE, 0xC, 'R', 'E', 't' };
-
-/* This key will be used if nothing else is given */
-#define FIL_DEFAULT_ENCRYPTION_KEY ENCRYPTION_KEY_SYSTEM_DATA
-
-extern os_event_t fil_crypt_threads_event;
-
-/**
- * CRYPT_SCHEME_UNENCRYPTED
- *
- * Used as intermediate state when convering a space from unencrypted
- * to encrypted
- */
-/**
- * CRYPT_SCHEME_1
- *
- * xxx is AES_CTR or AES_CBC (or another block cypher with the same key and iv lengths)
- * L = AES_ECB(KEY, IV)
- * CRYPT(PAGE) = xxx(KEY=L, IV=C, PAGE)
- */
-
-#define CRYPT_SCHEME_1 1
-#define CRYPT_SCHEME_1_IV_LEN 16
-#define CRYPT_SCHEME_UNENCRYPTED 0
-
-/* Cached L or key for given key_version */
-struct key_struct
-{
- uint key_version; /*!< Version of the key */
- uint key_length; /*!< Key length */
- unsigned char key[MY_AES_MAX_KEY_LENGTH]; /*!< Cached key
- (that is L in CRYPT_SCHEME_1) */
-};
-
-/** is encryption enabled */
-extern ulong srv_encrypt_tables;
-
-#ifdef UNIV_PFS_MUTEX
-extern mysql_pfs_key_t fil_crypt_data_mutex_key;
-#endif
-
-/** Mutex helper for crypt_data->scheme
-@param[in, out] schme encryption scheme
-@param[in] exit should we exit or enter mutex ? */
-void
-crypt_data_scheme_locker(
- st_encryption_scheme* scheme,
- int exit);
-
-struct fil_space_rotate_state_t
-{
- time_t start_time; /*!< time when rotation started */
- ulint active_threads; /*!< active threads in space */
- ulint next_offset; /*!< next "free" offset */
- ulint max_offset; /*!< max offset needing to be rotated */
- uint min_key_version_found; /*!< min key version found but not
- rotated */
- lsn_t end_lsn; /*!< max lsn created when rotating this
- space */
- bool starting; /*!< initial write of IV */
- bool flushing; /*!< space is being flushed at end of rotate */
- struct {
- bool is_active; /*!< is scrubbing active in this space */
- time_t last_scrub_completed; /*!< when was last scrub
- completed */
- } scrubbing;
-};
-
-struct fil_space_crypt_t : st_encryption_scheme
-{
- public:
- /** Constructor. Does not initialize the members!
- The object is expected to be placed in a buffer that
- has been zero-initialized. */
- fil_space_crypt_t(
- uint new_type,
- uint new_min_key_version,
- uint new_key_id,
- fil_encryption_t new_encryption)
- : st_encryption_scheme(),
- min_key_version(new_min_key_version),
- page0_offset(0),
- encryption(new_encryption),
- key_found(0),
- rotate_state()
- {
- key_id = new_key_id;
- my_random_bytes(iv, sizeof(iv));
- mutex_create(fil_crypt_data_mutex_key,
- &mutex, SYNC_NO_ORDER_CHECK);
- locker = crypt_data_scheme_locker;
- type = new_type;
-
- if (new_encryption == FIL_ENCRYPTION_OFF ||
- (!srv_encrypt_tables &&
- new_encryption == FIL_ENCRYPTION_DEFAULT)) {
- type = CRYPT_SCHEME_UNENCRYPTED;
- } else {
- type = CRYPT_SCHEME_1;
- min_key_version = key_get_latest_version();
- }
-
- key_found = min_key_version;
- }
-
- /** Destructor */
- ~fil_space_crypt_t()
- {
- mutex_free(&mutex);
- }
-
- /** Get latest key version from encryption plugin
- @retval key_version or
- @retval ENCRYPTION_KEY_VERSION_INVALID if used key_id
- is not found from encryption plugin. */
- uint key_get_latest_version(void);
-
- /** Returns true if key was found from encryption plugin
- and false if not. */
- bool is_key_found() const {
- return key_found != ENCRYPTION_KEY_VERSION_INVALID;
- }
-
- /** Returns true if tablespace should be encrypted */
- bool should_encrypt() const {
- return ((encryption == FIL_ENCRYPTION_ON) ||
- (srv_encrypt_tables &&
- encryption == FIL_ENCRYPTION_DEFAULT));
- }
-
- /** Return true if tablespace is encrypted. */
- bool is_encrypted() const {
- return (encryption != FIL_ENCRYPTION_OFF);
- }
-
- /** Return true if default tablespace encryption is used, */
- bool is_default_encryption() const {
- return (encryption == FIL_ENCRYPTION_DEFAULT);
- }
-
- /** Return true if tablespace is not encrypted. */
- bool not_encrypted() const {
- return (encryption == FIL_ENCRYPTION_OFF);
- }
-
- /** Write crypt data to a page (0)
- @param[in,out] page0 Page 0 where to write
- @param[in,out] mtr Minitransaction */
- void write_page0(byte* page0, mtr_t* mtr);
-
- uint min_key_version; // min key version for this space
- ulint page0_offset; // byte offset on page 0 for crypt data
- fil_encryption_t encryption; // Encryption setup
-
- ib_mutex_t mutex; // mutex protecting following variables
-
- /** Return code from encryption_key_get_latest_version.
- If ENCRYPTION_KEY_VERSION_INVALID encryption plugin
- could not find the key and there is no need to call
- get_latest_key_version again as keys are read only
- at startup. */
- uint key_found;
-
- fil_space_rotate_state_t rotate_state;
-};
-
-/** Status info about encryption */
-struct fil_space_crypt_status_t {
- ulint space; /*!< tablespace id */
- ulint scheme; /*!< encryption scheme */
- uint min_key_version; /*!< min key version */
- uint current_key_version;/*!< current key version */
- uint keyserver_requests;/*!< no of key requests to key server */
- ulint key_id; /*!< current key_id */
- bool rotating; /*!< is key rotation ongoing */
- bool flushing; /*!< is flush at end of rotation ongoing */
- ulint rotate_next_page_number; /*!< next page if key rotating */
- ulint rotate_max_page_number; /*!< max page if key rotating */
-};
-
-/** Statistics about encryption key rotation */
-struct fil_crypt_stat_t {
- ulint pages_read_from_cache;
- ulint pages_read_from_disk;
- ulint pages_modified;
- ulint pages_flushed;
- ulint estimated_iops;
-};
-
-/** Status info about scrubbing */
-struct fil_space_scrub_status_t {
- ulint space; /*!< tablespace id */
- bool compressed; /*!< is space compressed */
- time_t last_scrub_completed; /*!< when was last scrub completed */
- bool scrubbing; /*!< is scrubbing ongoing */
- time_t current_scrub_started; /*!< when started current scrubbing */
- ulint current_scrub_active_threads; /*!< current scrub active threads */
- ulint current_scrub_page_number; /*!< current scrub page no */
- ulint current_scrub_max_page_number; /*!< current scrub max page no */
-};
-
-/*********************************************************************
-Init space crypt */
-UNIV_INTERN
-void
-fil_space_crypt_init();
-
-/*********************************************************************
-Cleanup space crypt */
-UNIV_INTERN
-void
-fil_space_crypt_cleanup();
-
-/******************************************************************
-Create a fil_space_crypt_t object
-@param[in] encrypt_mode FIL_ENCRYPTION_DEFAULT or
- FIL_ENCRYPTION_ON or
- FIL_ENCRYPTION_OFF
-
-@param[in] key_id Encryption key id
-@return crypt object */
-UNIV_INTERN
-fil_space_crypt_t*
-fil_space_create_crypt_data(
- fil_encryption_t encrypt_mode,
- uint key_id)
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************
-Merge fil_space_crypt_t object
-@param[in,out] dst Destination cryp data
-@param[in] src Source crypt data */
-UNIV_INTERN
-void
-fil_space_merge_crypt_data(
- fil_space_crypt_t* dst,
- const fil_space_crypt_t* src);
-
-/******************************************************************
-Read crypt data from a page (0)
-@param[in] space space_id
-@param[in] page Page 0
-@param[in] offset Offset to crypt data
-@return crypt data from page 0 or NULL. */
-UNIV_INTERN
-fil_space_crypt_t*
-fil_space_read_crypt_data(
- ulint space,
- const byte* page,
- ulint offset)
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************
-Free a crypt data object
-@param[in,out] crypt_data crypt data to be freed */
-UNIV_INTERN
-void
-fil_space_destroy_crypt_data(
- fil_space_crypt_t **crypt_data);
-
-/******************************************************************
-Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry
-@param[in] ptr Log entry start
-@param[in] end_ptr Log entry end
-@param[in] block buffer block
-@param[out] err DB_SUCCESS or DB_DECRYPTION_FAILED
-@return position on log buffer */
-UNIV_INTERN
-byte*
-fil_parse_write_crypt_data(
- byte* ptr,
- const byte* end_ptr,
- const buf_block_t* block,
- dberr_t* err)
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************
-Encrypt a buffer
-@param[in,out] crypt_data Crypt data
-@param[in] space space_id
-@param[in] offset Page offset
-@param[in] lsn Log sequence number
-@param[in] src_frame Page to encrypt
-@param[in] zip_size Compressed size or 0
-@param[in,out] dst_frame Output buffer
-@return encrypted buffer or NULL */
-UNIV_INTERN
-byte*
-fil_encrypt_buf(
- fil_space_crypt_t* crypt_data,
- ulint space,
- ulint offset,
- lsn_t lsn,
- const byte* src_frame,
- ulint zip_size,
- byte* dst_frame)
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************
-Encrypt a page
-
-@param[in] space Tablespace
-@param[in] offset Page offset
-@param[in] lsn Log sequence number
-@param[in] src_frame Page to encrypt
-@param[in,out] dst_frame Output buffer
-@return encrypted buffer or NULL */
-UNIV_INTERN
-byte*
-fil_space_encrypt(
- const fil_space_t* space,
- ulint offset,
- lsn_t lsn,
- byte* src_frame,
- byte* dst_frame)
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************
-Decrypt a page
-@param[in,out] crypt_data crypt_data
-@param[in] tmp_frame Temporary buffer
-@param[in] page_size Page size
-@param[in,out] src_frame Page to decrypt
-@param[out] err DB_SUCCESS or error
-@return true if page decrypted, false if not.*/
-UNIV_INTERN
-bool
-fil_space_decrypt(
- fil_space_crypt_t* crypt_data,
- byte* tmp_frame,
- ulint page_size,
- byte* src_frame,
- dberr_t* err);
-
-/******************************************************************
-Decrypt a page
-@param[in] space Tablespace
-@param[in] tmp_frame Temporary buffer used for decrypting
-@param[in] page_size Page size
-@param[in,out] src_frame Page to decrypt
-@param[out] decrypted true if page was decrypted
-@return decrypted page, or original not encrypted page if decryption is
-not needed.*/
-UNIV_INTERN
-byte*
-fil_space_decrypt(
- const fil_space_t* space,
- byte* tmp_frame,
- byte* src_frame,
- bool* decrypted)
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************
-Calculate post encryption checksum
-@param[in] zip_size zip_size or 0
-@param[in] dst_frame Block where checksum is calculated
-@return page checksum or BUF_NO_CHECKSUM_MAGIC
-not needed. */
-UNIV_INTERN
-ulint
-fil_crypt_calculate_checksum(
- ulint zip_size,
- const byte* dst_frame)
- MY_ATTRIBUTE((warn_unused_result));
-
-/*********************************************************************
-Verify that post encryption checksum match calculated checksum.
-This function should be called only if tablespace contains crypt_data
-metadata (this is strong indication that tablespace is encrypted).
-Function also verifies that traditional checksum does not match
-calculated checksum as if it does page could be valid unencrypted,
-encrypted, or corrupted.
-
-@param[in] page Page to verify
-@param[in] zip_size zip size
-@param[in] space Tablespace
-@param[in] pageno Page no
-@return true if page is encrypted AND OK, false otherwise */
-UNIV_INTERN
-bool
-fil_space_verify_crypt_checksum(
- byte* page,
- ulint zip_size,
- const fil_space_t* space,
- ulint pageno)
- MY_ATTRIBUTE((warn_unused_result));
-
-/*********************************************************************
-Adjust thread count for key rotation
-@param[in] enw_cnt Number of threads to be used */
-UNIV_INTERN
-void
-fil_crypt_set_thread_cnt(
- uint new_cnt);
-
-/*********************************************************************
-Adjust max key age
-@param[in] val New max key age */
-UNIV_INTERN
-void
-fil_crypt_set_rotate_key_age(
- uint val);
-
-/*********************************************************************
-Adjust rotation iops
-@param[in] val New max roation iops */
-UNIV_INTERN
-void
-fil_crypt_set_rotation_iops(
- uint val);
-
-/*********************************************************************
-Adjust encrypt tables
-@param[in] val New setting for innodb-encrypt-tables */
-UNIV_INTERN
-void
-fil_crypt_set_encrypt_tables(
- uint val);
-
-/*********************************************************************
-Init threads for key rotation */
-UNIV_INTERN
-void
-fil_crypt_threads_init();
-
-/*********************************************************************
-Clean up key rotation threads resources */
-UNIV_INTERN
-void
-fil_crypt_threads_cleanup();
-
-/*********************************************************************
-Wait for crypt threads to stop accessing space
-@param[in] space Tablespace */
-UNIV_INTERN
-void
-fil_space_crypt_close_tablespace(
- const fil_space_t* space);
-
-/*********************************************************************
-Get crypt status for a space (used by information_schema)
-@param[in] space Tablespace
-@param[out] status Crypt status
-return 0 if crypt data present */
-UNIV_INTERN
-void
-fil_space_crypt_get_status(
- const fil_space_t* space,
- struct fil_space_crypt_status_t* status);
-
-/*********************************************************************
-Return crypt statistics
-@param[out] stat Crypt statistics */
-UNIV_INTERN
-void
-fil_crypt_total_stat(
- fil_crypt_stat_t *stat);
-
-/*********************************************************************
-Get scrub status for a space (used by information_schema)
-
-@param[in] space Tablespace
-@param[out] status Scrub status
-return 0 if data found */
-UNIV_INTERN
-void
-fil_space_get_scrub_status(
- const fil_space_t* space,
- struct fil_space_scrub_status_t* status);
-
-#ifndef UNIV_NONINL
-#include "fil0crypt.ic"
-#endif
-
-#endif /* fil0crypt_h */
diff --git a/storage/xtradb/include/fil0crypt.ic b/storage/xtradb/include/fil0crypt.ic
deleted file mode 100644
index cb9ba083466..00000000000
--- a/storage/xtradb/include/fil0crypt.ic
+++ /dev/null
@@ -1,36 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2015, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fil0fil.h
-The low-level file system encryption support functions
-
-Created 04/01/2015 Jan Lindström
-*******************************************************/
-
-/*******************************************************************//**
-Find out whether the page is page encrypted
-@return true if page is page encrypted, false if not */
-UNIV_INLINE
-bool
-fil_page_is_encrypted(
-/*==================*/
- const byte *buf) /*!< in: page */
-{
- return(mach_read_from_4(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) != 0);
-}
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
deleted file mode 100644
index 6eab5db6883..00000000000
--- a/storage/xtradb/include/fil0fil.h
+++ /dev/null
@@ -1,1540 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fil0fil.h
-The low-level file system
-
-Created 10/25/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef fil0fil_h
-#define fil0fil_h
-#include "univ.i"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "dict0types.h"
-#include "ut0byte.h"
-#include "os0file.h"
-#include "hash0hash.h"
-#ifndef UNIV_HOTBACKUP
-#include "sync0rw.h"
-#include "ibuf0types.h"
-#include "log0log.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "trx0types.h"
-
-#include <list>
-
-// Forward declaration
-struct trx_t;
-
-typedef std::list<const char*> space_name_list_t;
-
-/** When mysqld is run, the default directory "." is the mysqld datadir,
-but in the MySQL Embedded Server Library and mysqlbackup it is not the default
-directory, and we must set the base file path explicitly */
-extern const char* fil_path_to_mysql_datadir;
-
-/** Initial size of a single-table tablespace in pages */
-#define FIL_IBD_FILE_INITIAL_SIZE 4
-
-/** 'null' (undefined) page offset in the context of file spaces */
-#define FIL_NULL ULINT32_UNDEFINED
-
-/* Space address data type; this is intended to be used when
-addresses accurate to a byte are stored in file pages. If the page part
-of the address is FIL_NULL, the address is considered undefined. */
-
-typedef byte fil_faddr_t; /*!< 'type' definition in C: an address
- stored in a file page is a string of bytes */
-#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
-#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
-
-#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
-
-/** File space address */
-struct fil_addr_t{
- ulint page; /*!< page number within a space */
- ulint boffset; /*!< byte offset within the page */
-};
-
-/** The null file address */
-extern fil_addr_t fil_addr_null;
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/** The byte offsets on a file page for various variables @{ */
-#define FIL_PAGE_SPACE_OR_CHKSUM 0 /*!< in < MySQL-4.0.14 space id the
- page belongs to (== 0) but in later
- versions the 'new' checksum of the
- page */
-#define FIL_PAGE_OFFSET 4 /*!< page offset inside space */
-#define FIL_PAGE_PREV 8 /*!< if there is a 'natural'
- predecessor of the page, its
- offset. Otherwise FIL_NULL.
- This field is not set on BLOB
- pages, which are stored as a
- singly-linked list. See also
- FIL_PAGE_NEXT. */
-#define FIL_PAGE_NEXT 12 /*!< if there is a 'natural' successor
- of the page, its offset.
- Otherwise FIL_NULL.
- B-tree index pages
- (FIL_PAGE_TYPE contains FIL_PAGE_INDEX)
- on the same PAGE_LEVEL are maintained
- as a doubly linked list via
- FIL_PAGE_PREV and FIL_PAGE_NEXT
- in the collation order of the
- smallest user record on each page. */
-#define FIL_PAGE_LSN 16 /*!< lsn of the end of the newest
- modification log record to the page */
-#define FIL_PAGE_TYPE 24 /*!< file page type: FIL_PAGE_INDEX,...,
- 2 bytes.
-
- The contents of this field can only
- be trusted in the following case:
- if the page is an uncompressed
- B-tree index page, then it is
- guaranteed that the value is
- FIL_PAGE_INDEX.
- The opposite does not hold.
-
- In tablespaces created by
- MySQL/InnoDB 5.1.7 or later, the
- contents of this field is valid
- for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26 /*!< for the first page
- in a system tablespace data file
- (ibdata*, not *.ibd): the file has
- been flushed to disk at least up
- to this lsn
- for other pages: a 32-bit key version
- used to encrypt the page + 32-bit checksum
- or 64 bits of zero if no encryption
- */
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this
- contains the space id of the page */
-#define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
-
-#define FIL_PAGE_DATA 38 /*!< start of the data on the page */
-/* Following are used when page compression is used */
-
-#define FIL_PAGE_COMPRESSED_SIZE 2 /*!< Number of bytes used to store
- actual payload data size on
- compressed pages. */
-#define FIL_PAGE_COMPRESSION_METHOD_SIZE 2
- /*!< Number of bytes used to store
- actual compression method. */
-/* @} */
-/** File page trailer @{ */
-#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /*!< the low 4 bytes of this are used
- to store the page checksum, the
- last 4 bytes should be identical
- to the last 4 bytes of FIL_PAGE_LSN */
-#define FIL_PAGE_DATA_END 8 /*!< size of the page trailer */
-/* @} */
-
-/** File page types (values of FIL_PAGE_TYPE) @{ */
-#define FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED 37401 /*!< Page is compressed and
- then encrypted */
-#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< Page compressed page */
-#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
-#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
-#define FIL_PAGE_INODE 3 /*!< Index node */
-#define FIL_PAGE_IBUF_FREE_LIST 4 /*!< Insert buffer free list */
-/* File page types introduced in MySQL/InnoDB 5.1.7 */
-#define FIL_PAGE_TYPE_ALLOCATED 0 /*!< Freshly allocated page */
-#define FIL_PAGE_IBUF_BITMAP 5 /*!< Insert buffer bitmap */
-#define FIL_PAGE_TYPE_SYS 6 /*!< System page */
-#define FIL_PAGE_TYPE_TRX_SYS 7 /*!< Transaction system data */
-#define FIL_PAGE_TYPE_FSP_HDR 8 /*!< File space header */
-#define FIL_PAGE_TYPE_XDES 9 /*!< Extent descriptor page */
-#define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */
-#define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */
-#define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */
-#define FIL_PAGE_TYPE_COMPRESSED 13 /*!< Compressed page */
-#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_COMPRESSED
- /*!< Last page type */
-/* @} */
-
-#ifndef UNIV_INNOCHECKSUM
-
-/** Space types @{ */
-#define FIL_TABLESPACE 501 /*!< tablespace */
-#define FIL_LOG 502 /*!< redo log */
-/* @} */
-
-/** Structure containing encryption specification */
-struct fil_space_crypt_t;
-
-/** Enum values for encryption table option */
-enum fil_encryption_t {
- /** Encrypted if innodb_encrypt_tables=ON (srv_encrypt_tables) */
- FIL_ENCRYPTION_DEFAULT,
- /** Encrypted */
- FIL_ENCRYPTION_ON,
- /** Not encrypted */
- FIL_ENCRYPTION_OFF
-};
-
-/** The number of fsyncs done to the log */
-extern ulint fil_n_log_flushes;
-
-/** Number of pending redo log flushes */
-extern ulint fil_n_pending_log_flushes;
-/** Number of pending tablespace flushes */
-extern ulint fil_n_pending_tablespace_flushes;
-
-/** Number of files currently open */
-extern ulint fil_n_file_opened;
-
-struct fsp_open_info {
- ibool success; /*!< Has the tablespace been opened? */
- const char* check_msg; /*!< fil_check_first_page() message */
- ibool valid; /*!< Is the tablespace valid? */
- pfs_os_file_t file; /*!< File handle */
- char* filepath; /*!< File path to open */
- ulint id; /*!< Space ID */
- ulint flags; /*!< Tablespace flags */
- ulint encryption_error; /*!< if an encryption error occurs */
- fil_space_crypt_t* crypt_data; /*!< crypt data */
- dict_table_t* table; /*!< table */
-};
-
-struct fil_space_t;
-
-/** File node of a tablespace or the log data space */
-struct fil_node_t {
- fil_space_t* space; /*!< backpointer to the space where this node
- belongs */
- char* name; /*!< path to the file */
- ibool open; /*!< TRUE if file open */
- pfs_os_file_t handle; /*!< OS handle to the file, if file open */
- os_event_t sync_event;/*!< Condition event to group and
- serialize calls to fsync;
- os_event_set() and os_event_reset()
- are protected by fil_system_t::mutex */
- ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw
- device or a raw disk partition */
- ulint size; /*!< size of the file in database pages, 0 if
- not known yet; the possible last incomplete
- megabyte may be ignored if space == 0 */
- ulint n_pending;
- /*!< count of pending i/o's on this file;
- closing of the file is not allowed if
- this is > 0 */
- ulint n_pending_flushes;
- /*!< count of pending flushes on this file;
- closing of the file is not allowed if
- this is > 0 */
- ibool being_extended;
- /*!< TRUE if the node is currently
- being extended. */
- ib_int64_t modification_counter;/*!< when we write to the file we
- increment this by one */
- ib_int64_t flush_counter;/*!< up to what
- modification_counter value we have
- flushed the modifications to disk */
- ulint file_block_size;/*!< file system block size */
- UT_LIST_NODE_T(fil_node_t) chain;
- /*!< link field for the file chain */
- UT_LIST_NODE_T(fil_node_t) LRU;
- /*!< link field for the LRU list */
- ulint magic_n;/*!< FIL_NODE_MAGIC_N */
-};
-
-/** Value of fil_node_t::magic_n */
-#define FIL_NODE_MAGIC_N 89389
-
-/** Tablespace or log data space: let us call them by a common name space */
-struct fil_space_t {
- char* name; /*!< space name = the path to the first file in
- it */
- ulint id; /*!< space id */
- ib_int64_t tablespace_version;
- /*!< in DISCARD/IMPORT this timestamp
- is used to check if we should ignore
- an insert buffer merge request for a
- page because it actually was for the
- previous incarnation of the space */
- ibool stop_ios;/*!< TRUE if we want to rename the
- .ibd file of tablespace and want to
- stop temporarily posting of new i/o
- requests on the file */
- bool stop_new_ops;
- /*!< we set this true when we start
- deleting a single-table tablespace.
- When this is set following new ops
- are not allowed:
- * read IO request
- * ibuf merge
- * file flush
- Note that we can still possibly have
- new write operations because we don't
- check this flag when doing flush
- batches. */
- ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
- FIL_ARCH_LOG */
- UT_LIST_BASE_NODE_T(fil_node_t) chain;
- /*!< base node for the file chain */
- ulint size; /*!< space size in pages; 0 if a single-table
- tablespace whose size we do not know yet;
- last incomplete megabytes in data files may be
- ignored if space == 0 */
- ulint recv_size;
- /*!< recovered tablespace size in pages;
- 0 if no size change was read from the redo log,
- or if the size change was implemented */
- ulint flags; /*!< FSP_SPACE_FLAGS and FSP_FLAGS_MEM_ flags;
- see fsp0fsp.h,
- fsp_flags_is_valid(),
- fsp_flags_get_zip_size() */
- ulint n_reserved_extents;
- /*!< number of reserved free extents for
- ongoing operations like B-tree page split */
- ulint n_pending_flushes; /*!< this is positive when flushing
- the tablespace to disk; dropping of the
- tablespace is forbidden if this is positive */
- /** Number of pending buffer pool operations accessing the tablespace
- without holding a table lock or dict_operation_lock S-latch
- that would prevent the table (and tablespace) from being
- dropped. An example is change buffer merge.
- The tablespace cannot be dropped while this is nonzero,
- or while fil_node_t::n_pending is nonzero.
- Protected by fil_system->mutex. */
- ulint n_pending_ops;
- /** Number of pending block read or write operations
- (when a write is imminent or a read has recently completed).
- The tablespace object cannot be freed while this is nonzero,
- but it can be detached from fil_system.
- Note that fil_node_t::n_pending tracks actual pending I/O requests.
- Protected by fil_system->mutex. */
- ulint n_pending_ios;
- hash_node_t hash; /*!< hash chain node */
- hash_node_t name_hash;/*!< hash chain the name_hash table */
-#ifndef UNIV_HOTBACKUP
- prio_rw_lock_t latch; /*!< latch protecting the file space storage
- allocation */
-#endif /* !UNIV_HOTBACKUP */
-
- UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
- /*!< list of spaces with at least one unflushed
- file we have written to */
- bool is_in_unflushed_spaces;
- /*!< true if this space is currently in
- unflushed_spaces */
- /** True if srv_pass_corrupt_table=true and tablespace contains
- corrupted page. */
- bool is_corrupt;
- /*!< true if tablespace corrupted */
- bool printed_compression_failure;
- /*!< true if we have already printed
- compression failure */
- fil_space_crypt_t* crypt_data;
- /*!< tablespace crypt data or NULL */
- ulint file_block_size;
- /*!< file system block size */
-
- UT_LIST_NODE_T(fil_space_t) space_list;
- /*!< list of all spaces */
-
- /*!< Protected by fil_system */
- UT_LIST_NODE_T(fil_space_t) rotation_list;
- /*!< list of spaces needing
- key rotation */
-
- bool is_in_rotation_list;
- /*!< true if this space is
- currently in key rotation list */
-
- ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
-
- /** @return whether the tablespace is about to be dropped or truncated */
- bool is_stopping() const
- {
- return stop_new_ops;
- }
-};
-
-/** Value of fil_space_t::magic_n */
-#define FIL_SPACE_MAGIC_N 89472
-
-/** The tablespace memory cache; also the totality of logs (the log
-data space) is stored here; below we talk about tablespaces, but also
-the ib_logfiles form a 'space' and it is handled here */
-struct fil_system_t {
-#ifndef UNIV_HOTBACKUP
- ib_mutex_t mutex; /*!< The mutex protecting the cache */
-#endif /* !UNIV_HOTBACKUP */
- hash_table_t* spaces; /*!< The hash table of spaces in the
- system; they are hashed on the space
- id */
- hash_table_t* name_hash; /*!< hash table based on the space
- name */
- UT_LIST_BASE_NODE_T(fil_node_t) LRU;
- /*!< base node for the LRU list of the
- most recently used open files with no
- pending i/o's; if we start an i/o on
- the file, we first remove it from this
- list, and return it to the start of
- the list when the i/o ends;
- log files and the system tablespace are
- not put to this list: they are opened
- after the startup, and kept open until
- shutdown */
- UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
- /*!< base node for the list of those
- tablespaces whose files contain
- unflushed writes; those spaces have
- at least one file node where
- modification_counter > flush_counter */
- ulint n_open; /*!< number of files currently open */
- ulint max_n_open; /*!< n_open is not allowed to exceed
- this */
- ib_int64_t modification_counter;/*!< when we write to a file we
- increment this by one */
- ulint max_assigned_id;/*!< maximum space id in the existing
- tables, or assigned during the time
- mysqld has been up; at an InnoDB
- startup we scan the data dictionary
- and set here the maximum of the
- space id's of the tables there */
- ib_int64_t tablespace_version;
- /*!< a counter which is incremented for
- every space object memory creation;
- every space mem object gets a
- 'timestamp' from this; in DISCARD/
- IMPORT this is used to check if we
- should ignore an insert buffer merge
- request */
- UT_LIST_BASE_NODE_T(fil_space_t) space_list;
- /*!< list of all file spaces */
-
- UT_LIST_BASE_NODE_T(fil_space_t) rotation_list;
- /*!< list of all file spaces needing
- key rotation.*/
-
- ibool space_id_reuse_warned;
- /* !< TRUE if fil_space_create()
- has issued a warning about
- potential space_id reuse */
-};
-
-/** The tablespace memory cache. This variable is NULL before the module is
-initialized. */
-extern fil_system_t* fil_system;
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Returns the version number of a tablespace, -1 if not found.
-@return version number, -1 if the tablespace does not exist in the
-memory cache */
-UNIV_INTERN
-ib_int64_t
-fil_space_get_version(
-/*==================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Returns the latch of a file space.
-@return latch protecting storage allocation */
-UNIV_INTERN
-prio_rw_lock_t*
-fil_space_get_latch(
-/*================*/
- ulint id, /*!< in: space id */
- ulint* zip_size);/*!< out: compressed page size, or
- 0 for uncompressed tablespaces */
-/*******************************************************************//**
-Returns the type of a file space.
-@return FIL_TABLESPACE or FIL_LOG */
-UNIV_INTERN
-ulint
-fil_space_get_type(
-/*===============*/
- ulint id); /*!< in: space id */
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Appends a new file to the chain of files of a space. File must be closed.
-@return pointer to the file name, or NULL on error */
-UNIV_INTERN
-char*
-fil_node_create(
-/*============*/
- const char* name, /*!< in: file name (file must be closed) */
- ulint size, /*!< in: file size in database blocks, rounded
- downwards to an integer */
- ulint id, /*!< in: space id where to append */
- ibool is_raw) /*!< in: TRUE if a raw device or
- a raw disk partition */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#ifdef UNIV_LOG_ARCHIVE
-/****************************************************************//**
-Drops files from the start of a file space, so that its size is cut by
-the amount given. */
-UNIV_INTERN
-void
-fil_space_truncate_start(
-/*=====================*/
- ulint id, /*!< in: space id */
- ulint trunc_len); /*!< in: truncate by this much; it is an error
- if this does not equal to the combined size of
- some initial files in the space */
-/****************************************************************//**
-Check is there node in file space with given name. */
-UNIV_INTERN
-ibool
-fil_space_contains_node(
-/*====================*/
- ulint id, /*!< in: space id */
- char* node_name); /*!< in: node name */
-#endif /* UNIV_LOG_ARCHIVE */
-/*******************************************************************//**
-Creates a space memory object and puts it to the 'fil system' hash table.
-If there is an error, prints an error message to the .err log.
-@param[in] name Space name
-@param[in] id Space id
-@param[in] flags Tablespace flags
-@param[in] purpose FIL_TABLESPACE or FIL_LOG if log
-@param[in] crypt_data Encryption information
-@param[in] create_table True if this is create table
-@param[in] mode Encryption mode
-@return TRUE if success */
-UNIV_INTERN
-bool
-fil_space_create(
- const char* name,
- ulint id,
- ulint flags,
- ulint purpose,
- fil_space_crypt_t* crypt_data,
- bool create_table,
- fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT);
-
-/*******************************************************************//**
-Assigns a new space id for a new single-table tablespace. This works simply by
-incrementing the global counter. If 4 billion id's is not enough, we may need
-to recycle id's.
-@return TRUE if assigned, FALSE if not */
-UNIV_INTERN
-ibool
-fil_assign_new_space_id(
-/*====================*/
- ulint* space_id); /*!< in/out: space id */
-/*******************************************************************//**
-Returns the path from the first fil_node_t found for the space ID sent.
-The caller is responsible for freeing the memory allocated here for the
-value returned.
-@return a copy of fil_node_t::path, NULL if space is zero or not found. */
-UNIV_INTERN
-char*
-fil_space_get_first_path(
-/*=====================*/
- ulint id); /*!< in: space id */
-/** Set the recovered size of a tablespace in pages.
-@param id tablespace ID
-@param size recovered size in pages */
-UNIV_INTERN
-void
-fil_space_set_recv_size(ulint id, ulint size);
-/*******************************************************************//**
-Returns the size of the space in pages. The tablespace must be cached in the
-memory cache.
-@return space size, 0 if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_size(
-/*===============*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Returns the flags of the space. The tablespace must be cached
-in the memory cache.
-@return flags, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_flags(
-/*================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Returns the compressed page size of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return compressed page size, ULINT_UNDEFINED if space not found */
-UNIV_INTERN
-ulint
-fil_space_get_zip_size(
-/*===================*/
- ulint id); /*!< in: space id */
-/*******************************************************************//**
-Checks if the pair space, page_no refers to an existing page in a tablespace
-file space. The tablespace must be cached in the memory cache.
-@return TRUE if the address is meaningful */
-UNIV_INTERN
-ibool
-fil_check_adress_in_tablespace(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint page_no);/*!< in: page number */
-/****************************************************************//**
-Initializes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_init(
-/*=====*/
- ulint hash_size, /*!< in: hash table size */
- ulint max_n_open); /*!< in: max number of open files */
-/*******************************************************************//**
-Initializes the tablespace memory cache. */
-UNIV_INTERN
-void
-fil_close(void);
-/*===========*/
-/*******************************************************************//**
-Opens all log files and system tablespace data files. They stay open until the
-database server shutdown. This should be called at a server startup after the
-space objects for the log and the system tablespace have been created. The
-purpose of this operation is to make sure we never run out of file descriptors
-if we need to read from the insert buffer or to write to the log. */
-UNIV_INTERN
-void
-fil_open_log_and_system_tablespace_files(void);
-/*==========================================*/
-/*******************************************************************//**
-Closes all open files. There must not be any pending i/o's or not flushed
-modifications in the files. */
-UNIV_INTERN
-void
-fil_close_all_files(void);
-/*=====================*/
-/*******************************************************************//**
-Closes the redo log files. There must not be any pending i/o's or not
-flushed modifications in the files. */
-UNIV_INTERN
-void
-fil_close_log_files(
-/*================*/
- bool free); /*!< in: whether to free the memory object */
-/*******************************************************************//**
-Sets the max tablespace id counter if the given number is bigger than the
-previous value. */
-UNIV_INTERN
-void
-fil_set_max_space_id_if_bigger(
-/*===========================*/
- ulint max_id);/*!< in: maximum known id */
-
-#ifndef UNIV_HOTBACKUP
-
-/** Write the flushed LSN to the page header of the first page in the
-system tablespace.
-@param[in] lsn flushed LSN
-@return DB_SUCCESS or error number */
-dberr_t
-fil_write_flushed_lsn(
- lsn_t lsn)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Acquire a tablespace when it could be dropped concurrently.
-Used by background threads that do not necessarily hold proper locks
-for concurrency control.
-@param[in] id tablespace ID
-@param[in] silent whether to silently ignore missing tablespaces
-@return the tablespace
-@retval NULL if missing or being deleted or truncated */
-UNIV_INTERN
-fil_space_t*
-fil_space_acquire_low(ulint id, bool silent)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Acquire a tablespace when it could be dropped concurrently.
-Used by background threads that do not necessarily hold proper locks
-for concurrency control.
-@param[in] id tablespace ID
-@param[in] for_io whether to look up the tablespace while performing I/O
- (possibly executing TRUNCATE)
-@return the tablespace
-@retval NULL if missing or being deleted or truncated */
-inline
-fil_space_t*
-fil_space_acquire(ulint id)
-{
- return(fil_space_acquire_low(id, false));
-}
-
-/** Acquire a tablespace that may not exist.
-Used by background threads that do not necessarily hold proper locks
-for concurrency control.
-@param[in] id tablespace ID
-@return the tablespace
-@retval NULL if missing or being deleted */
-inline
-fil_space_t*
-fil_space_acquire_silent(ulint id)
-{
- return(fil_space_acquire_low(id, true));
-}
-
-/** Release a tablespace acquired with fil_space_acquire().
-@param[in,out] space tablespace to release */
-UNIV_INTERN
-void
-fil_space_release(fil_space_t* space);
-
-/** Acquire a tablespace for reading or writing a block,
-when it could be dropped concurrently.
-@param[in] id tablespace ID
-@return the tablespace
-@retval NULL if missing */
-UNIV_INTERN
-fil_space_t*
-fil_space_acquire_for_io(ulint id);
-
-/** Release a tablespace acquired with fil_space_acquire_for_io().
-@param[in,out] space tablespace to release */
-UNIV_INTERN
-void
-fil_space_release_for_io(fil_space_t* space);
-
-/** Return the next fil_space_t.
-Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
-blocks a concurrent operation from dropping the tablespace.
-@param[in,out] prev_space Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
-@return pointer to the next fil_space_t.
-@retval NULL if this was the last */
-UNIV_INTERN
-fil_space_t*
-fil_space_next(
- fil_space_t* prev_space)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Return the next fil_space_t from key rotation list.
-Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
-blocks a concurrent operation from dropping the tablespace.
-@param[in,out] prev_space Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
-@return pointer to the next fil_space_t.
-@retval NULL if this was the last*/
-UNIV_INTERN
-fil_space_t*
-fil_space_keyrotate_next(
- fil_space_t* prev_space)
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Wrapper with reference-counting for a fil_space_t. */
-class FilSpace
-{
-public:
- /** Default constructor: Use this when reference counting
- is done outside this wrapper. */
- FilSpace() : m_space(NULL) {}
-
- /** Constructor: Look up the tablespace and increment the
- reference count if found.
- @param[in] space_id tablespace ID
- @param[in] silent whether not to print any errors */
- explicit FilSpace(ulint space_id, bool silent = false)
- : m_space(fil_space_acquire_low(space_id, silent)) {}
-
- /** Assignment operator: This assumes that fil_space_acquire()
- has already been done for the fil_space_t. The caller must
- assign NULL if it calls fil_space_release().
- @param[in] space tablespace to assign */
- class FilSpace& operator=(fil_space_t* space)
- {
- /* fil_space_acquire() must have been invoked. */
- ut_ad(space == NULL || space->n_pending_ops > 0);
- m_space = space;
- return(*this);
- }
-
- /** Destructor - Decrement the reference count if a fil_space_t
- is still assigned. */
- ~FilSpace()
- {
- if (m_space != NULL) {
- fil_space_release(m_space);
- }
- }
-
- /** Implicit type conversion
- @return the wrapped object */
- operator const fil_space_t*() const
- {
- return(m_space);
- }
-
- /** Explicit type conversion
- @return the wrapped object */
- const fil_space_t* operator()() const
- {
- return(m_space);
- }
-
-private:
- /** The wrapped pointer */
- fil_space_t* m_space;
-};
-
-/** Reads the flushed lsn, arch no, space_id and tablespace flag fields from
-the first page of a first data file at database startup.
-@param[in] data_file open data file
-@param[in] one_read_only true if first datafile is already
- read
-@param[out] flags FSP_SPACE_FLAGS
-@param[out] space_id tablepspace ID
-@param[out] flushed_lsn flushed lsn value
-@param[out] crypt_data encryption crypt data
-@retval NULL on success, or if innodb_force_recovery is set
-@return pointer to an error message string */
-UNIV_INTERN
-const char*
-fil_read_first_page(
- pfs_os_file_t data_file,
- ibool one_read_already,
- ulint* flags,
- ulint* space_id,
- lsn_t* flushed_lsn,
- fil_space_crypt_t** crypt_data)
- MY_ATTRIBUTE((warn_unused_result));
-
-#endif /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Parses the body of a log record written about an .ibd file operation. That is,
-the log record part after the standard (type, space id, page no) header of the
-log record.
-
-If desired, also replays the delete or rename operation if the .ibd file
-exists and the space id in it matches. Replays the create operation if a file
-at that path does not exist yet. If the database directory for the file to be
-created does not exist, then we create the directory, too.
-
-Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
-the datadir that we should use in replaying the file operations.
-@return end of log record, or NULL if the record was not completely
-contained between ptr and end_ptr */
-UNIV_INTERN
-byte*
-fil_op_log_parse_or_replay(
-/*=======================*/
- byte* ptr, /*!< in: buffer containing the log record body,
- or an initial segment of it, if the record does
- not fir completely between ptr and end_ptr */
- byte* end_ptr, /*!< in: buffer end */
- ulint type, /*!< in: the type of this log record */
- ulint space_id, /*!< in: the space id of the tablespace in
- question, or 0 if the log record should
- only be parsed but not replayed */
- ulint log_flags); /*!< in: redo log flags
- (stored in the page number parameter) */
-/*******************************************************************//**
-Deletes a single-table tablespace. The tablespace must be cached in the
-memory cache.
-@return TRUE if success */
-UNIV_INTERN
-dberr_t
-fil_delete_tablespace(
-/*==================*/
- ulint id, /*!< in: space id */
- buf_remove_t buf_remove); /*!< in: specify the action to take
- on the tables pages in the buffer
- pool */
-/*******************************************************************//**
-Closes a single-table tablespace. The tablespace must be cached in the
-memory cache. Free all pages used by the tablespace.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_close_tablespace(
-/*=================*/
- trx_t* trx, /*!< in/out: Transaction covering the close */
- ulint id); /*!< in: space id */
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-
- 1. We do not drop the table from the data dictionary;
-
- 2. We remove all insert buffer entries for the tablespace immediately;
- in DROP TABLE they are only removed gradually in the background;
-
- 3. When the user does IMPORT TABLESPACE, the tablespace will have the
- same id as it originally had.
-
- 4. Free all the pages in use by the tablespace if rename=TRUE.
-@return DB_SUCCESS or error */
-UNIV_INTERN
-dberr_t
-fil_discard_tablespace(
-/*===================*/
- ulint id) /*!< in: space id */
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-
-/** Test if a tablespace file can be renamed to a new filepath by checking
-if that the old filepath exists and the new filepath does not exist.
-@param[in] space_id tablespace id
-@param[in] old_path old filepath
-@param[in] new_path new filepath
-@param[in] is_discarded whether the tablespace is discarded
-@return innodb error code */
-dberr_t
-fil_rename_tablespace_check(
- ulint space_id,
- const char* old_path,
- const char* new_path,
- bool is_discarded);
-
-/*******************************************************************//**
-Renames a single-table tablespace. The tablespace must be cached in the
-tablespace memory cache.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_rename_tablespace(
-/*==================*/
- const char* old_name_in, /*!< in: old table name in the
- standard databasename/tablename
- format of InnoDB, or NULL if we
- do the rename based on the space
- id only */
- ulint id, /*!< in: space id */
- const char* new_name, /*!< in: new table name in the
- standard databasename/tablename
- format of InnoDB */
- const char* new_path); /*!< in: new full datafile path
- if the tablespace is remotely
- located, or NULL if it is located
- in the normal data directory. */
-
-/*******************************************************************//**
-Allocates a file name for a single-table tablespace. The string must be freed
-by caller with mem_free().
-@return own: file name */
-UNIV_INTERN
-char*
-fil_make_ibd_name(
-/*==============*/
- const char* name, /*!< in: table name or a dir path */
- bool is_full_path); /*!< in: TRUE if it is a dir path */
-/*******************************************************************//**
-Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
-The string must be freed by caller with mem_free().
-@return own: file name */
-UNIV_INTERN
-char*
-fil_make_isl_name(
-/*==============*/
- const char* name); /*!< in: table name */
-/*******************************************************************//**
-Creates a new InnoDB Symbolic Link (ISL) file. It is always created
-under the 'datadir' of MySQL. The datadir is the directory of a
-running mysqld program. We can refer to it by simply using the path '.'.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_link_file(
-/*=================*/
- const char* tablename, /*!< in: tablename */
- const char* filepath); /*!< in: pathname of tablespace */
-/*******************************************************************//**
-Deletes an InnoDB Symbolic Link (ISL) file. */
-UNIV_INTERN
-void
-fil_delete_link_file(
-/*==================*/
- const char* tablename); /*!< in: name of table */
-/*******************************************************************//**
-Reads an InnoDB Symbolic Link (ISL) file.
-It is always created under the 'datadir' of MySQL. The name is of the
-form {databasename}/{tablename}. and the isl file is expected to be in a
-'{databasename}' directory called '{tablename}.isl'. The caller must free
-the memory of the null-terminated path returned if it is not null.
-@return own: filepath found in link file, NULL if not found. */
-UNIV_INTERN
-char*
-fil_read_link_file(
-/*===============*/
- const char* name); /*!< in: tablespace name */
-
-#include "fil0crypt.h"
-
-/*******************************************************************//**
-Creates a new single-table tablespace to a database directory of MySQL.
-Database directories are under the 'datadir' of MySQL. The datadir is the
-directory of a running mysqld program. We can refer to it by simply the
-path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
-dir of the mysqld server.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_create_new_single_table_tablespace(
-/*===================================*/
- ulint space_id, /*!< in: space id */
- const char* tablename, /*!< in: the table name in the usual
- databasename/tablename format
- of InnoDB */
- const char* dir_path, /*!< in: NULL or a dir path */
- ulint flags, /*!< in: tablespace flags */
- ulint flags2, /*!< in: table flags2 */
- ulint size, /*!< in: the initial size of the
- tablespace file in pages,
- must be >= FIL_IBD_FILE_INITIAL_SIZE */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
- MY_ATTRIBUTE((nonnull(2), warn_unused_result));
-#ifndef UNIV_HOTBACKUP
-/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
-(Typically when upgrading from MariaDB 10.1.0..10.1.20.)
-@param[in] space_id tablespace ID
-@param[in] flags desired tablespace flags */
-UNIV_INTERN
-void
-fsp_flags_try_adjust(ulint space_id, ulint flags);
-
-/********************************************************************//**
-Tries to open a single-table tablespace and optionally checks the space id is
-right in it. If does not succeed, prints an error message to the .err log. This
-function is used to open a tablespace when we start up mysqld, and also in
-IMPORT TABLESPACE.
-NOTE that we assume this operation is used either at the database startup
-or under the protection of the dictionary mutex, so that two users cannot
-race here. This operation does not leave the file associated with the
-tablespace open, but closes it after we have looked at the space id in it.
-
-If the validate boolean is set, we read the first page of the file and
-check that the space id in the file is what we expect. We assume that
-this function runs much faster if no check is made, since accessing the
-file inode probably is much faster (the OS caches them) than accessing
-the first page of the file. This boolean may be initially FALSE, but if
-a remote tablespace is found it will be changed to true.
-
-If the fix_dict boolean is set, then it is safe to use an internal SQL
-statement to update the dictionary tables if they are incorrect.
-
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_open_single_table_tablespace(
-/*=============================*/
- bool validate, /*!< in: Do we validate tablespace? */
- bool fix_dict, /*!< in: Can we fix the dictionary? */
- ulint id, /*!< in: space id */
- ulint flags, /*!< in: expected FSP_SPACE_FLAGS */
- const char* tablename, /*!< in: table name in the
- databasename/tablename format */
- const char* filepath) /*!< in: tablespace filepath */
- __attribute__((nonnull(5), warn_unused_result));
-
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-At the server startup, if we need crash recovery, scans the database
-directories under the MySQL datadir, looking for .ibd files. Those files are
-single-table tablespaces. We need to know the space id in each of them so that
-we know into which file we should look to check the contents of a page stored
-in the doublewrite buffer, also to know where to apply log records where the
-space id is != 0.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-fil_load_single_table_tablespaces(ibool (*pred)(const char*, const char*)=0);
-/*===================================*/
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace does not exist in the memory cache,
-or is being deleted there.
-@return TRUE if does not exist or is being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_deleted_or_being_deleted_in_mem(
-/*===========================================*/
- ulint id, /*!< in: space id */
- ib_int64_t version);/*!< in: tablespace_version should be this; if
- you pass -1 as the value of this, then this
- parameter is ignored */
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace exists in the memory cache.
-@return TRUE if exists */
-UNIV_INTERN
-ibool
-fil_tablespace_exists_in_mem(
-/*=========================*/
- ulint id); /*!< in: space id */
-#ifndef UNIV_HOTBACKUP
-/** Check if a matching tablespace exists in the InnoDB tablespace memory
-cache. Note that if we have not done a crash recovery at the database startup,
-there may be many tablespaces which are not yet in the memory cache.
-@return whether a matching tablespace exists in the memory cache */
-UNIV_INTERN
-bool
-fil_space_for_table_exists_in_mem(
-/*==============================*/
- ulint id, /*!< in: space id */
- const char* name, /*!< in: table name in the standard
- 'databasename/tablename' format */
- bool print_error_if_does_not_exist,
- /*!< in: print detailed error
- information to the .err log if a
- matching tablespace is not found from
- memory */
- bool remove_from_data_dict_if_does_not_exist,
- /*!< in: remove from the data dictionary
- if tablespace does not exist */
- bool adjust_space, /*!< in: whether to adjust space id
- when find table space mismatch */
- mem_heap_t* heap, /*!< in: heap memory */
- table_id_t table_id, /*!< in: table id */
- ulint table_flags); /*!< in: table flags */
-#else /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Extends all tablespaces to the size stored in the space header. During the
-mysqlbackup --apply-log phase we extended the spaces on-demand so that log
-records could be appllied, but that may have left spaces still too small
-compared to the size stored in the space header. */
-UNIV_INTERN
-void
-fil_extend_tablespaces_to_stored_len(void);
-/*======================================*/
-#endif /* !UNIV_HOTBACKUP */
-/**********************************************************************//**
-Tries to extend a data file so that it would accommodate the number of pages
-given. The tablespace must be cached in the memory cache. If the space is big
-enough already, does nothing.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fil_extend_space_to_desired_size(
-/*=============================*/
- ulint* actual_size, /*!< out: size of the space after extension;
- if we ran out of disk space this may be lower
- than the desired size */
- ulint space_id, /*!< in: space id */
- ulint size_after_extend);/*!< in: desired size in pages after the
- extension; if the current space size is bigger
- than this already, the function does nothing */
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return TRUE if succeed */
-UNIV_INTERN
-ibool
-fil_space_reserve_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_free_now, /*!< in: number of free extents now */
- ulint n_to_reserve); /*!< in: how many one wants to reserve */
-/*******************************************************************//**
-Releases free extents in a file space. */
-UNIV_INTERN
-void
-fil_space_release_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_reserved); /*!< in: how many one reserved */
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-UNIV_INTERN
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
- ulint id); /*!< in: space id */
-/********************************************************************//**
-Reads or writes data. This operation is asynchronous (aio).
-@return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
-i/o on a tablespace which does not exist */
-UNIV_INTERN
-dberr_t
-_fil_io(
-/*===*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
- ORed to OS_FILE_LOG, if a log i/o
- and ORed to OS_AIO_SIMULATED_WAKE_LATER
- if simulated aio and we want to post a
- batch of i/os; NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- bool sync, /*!< in: true if synchronous aio is desired */
- ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len, /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
- void* buf, /*!< in/out: buffer where to store read data
- or from where to write; in aio this must be
- appropriately aligned */
- void* message, /*!< in: message for aio handler if non-sync
- aio used, else ignored */
- ulint* write_size, /*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
- trx_t* trx) /*!< in: trx */
-
- __attribute__((nonnull(8)));
-
-#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size) \
- _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size, NULL)
-
-/** Determine the block size of the data file.
-@param[in] space tablespace
-@param[in] offset page number
-@return block size */
-UNIV_INTERN
-ulint
-fil_space_get_block_size(const fil_space_t* space, unsigned offset);
-
-/**********************************************************************//**
-Waits for an aio operation to complete. This function is used to write the
-handler for completed requests. The aio array of pending requests is divided
-into segments (see os0file.cc for more info). The thread specifies which
-segment it wants to wait for. */
-UNIV_INTERN
-void
-fil_aio_wait(
-/*=========*/
- ulint segment); /*!< in: the number of the segment in the aio
- array to wait for */
-/**********************************************************************//**
-Flushes to disk possible writes cached by the OS. If the space does not exist
-or is being dropped, does not do anything. */
-UNIV_INTERN
-void
-fil_flush(
-/*======*/
- ulint space_id); /*!< in: file space id (this can be a group of
- log files or a tablespace of the database) */
-/** Flush a tablespace.
-@param[in,out] space tablespace to flush */
-UNIV_INTERN
-void
-fil_flush(fil_space_t* space);
-
-/** Flush to disk the writes in file spaces of the given type
-possibly cached by the OS.
-@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
-UNIV_INTERN
-void
-fil_flush_file_spaces(ulint purpose);
-/******************************************************************//**
-Checks the consistency of the tablespace cache.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fil_validate(void);
-/*==============*/
-/********************************************************************//**
-Returns TRUE if file address is undefined.
-@return TRUE if undefined */
-UNIV_INTERN
-ibool
-fil_addr_is_null(
-/*=============*/
- fil_addr_t addr); /*!< in: address */
-/********************************************************************//**
-Get the predecessor of a file page.
-@return FIL_PAGE_PREV */
-UNIV_INTERN
-ulint
-fil_page_get_prev(
-/*==============*/
- const byte* page); /*!< in: file page */
-/********************************************************************//**
-Get the successor of a file page.
-@return FIL_PAGE_NEXT */
-UNIV_INTERN
-ulint
-fil_page_get_next(
-/*==============*/
- const byte* page); /*!< in: file page */
-/*********************************************************************//**
-Sets the file page type. */
-UNIV_INTERN
-void
-fil_page_set_type(
-/*==============*/
- byte* page, /*!< in/out: file page */
- ulint type); /*!< in: type */
-/*********************************************************************//**
-Gets the file page type.
-@return type; NOTE that if the type has not been written to page, the
-return value not defined */
-UNIV_INTERN
-ulint
-fil_page_get_type(
-/*==============*/
- const byte* page); /*!< in: file page */
-
-/*******************************************************************//**
-Returns TRUE if a single-table tablespace is being deleted.
-@return TRUE if being deleted */
-UNIV_INTERN
-ibool
-fil_tablespace_is_being_deleted(
-/*============================*/
- ulint id); /*!< in: space id */
-
-/********************************************************************//**
-Delete the tablespace file and any related files like .cfg.
-This should not be called for temporary tables. */
-UNIV_INTERN
-void
-fil_delete_file(
-/*============*/
- const char* path); /*!< in: filepath of the ibd tablespace */
-
-/** Callback functor. */
-struct PageCallback {
-
- /**
- Default constructor */
- PageCallback()
- :
- m_zip_size(),
- m_page_size(),
- m_filepath() UNIV_NOTHROW {}
-
- virtual ~PageCallback() UNIV_NOTHROW {}
-
- /**
- Called for page 0 in the tablespace file at the start.
- @param file_size - size of the file in bytes
- @param block - contents of the first page in the tablespace file
- @retval DB_SUCCESS or error code.*/
- virtual dberr_t init(
- os_offset_t file_size,
- const buf_block_t* block) UNIV_NOTHROW = 0;
-
- /**
- Called for every page in the tablespace. If the page was not
- updated then its state must be set to BUF_PAGE_NOT_USED. For
- compressed tables the page descriptor memory will be at offset:
- block->frame + UNIV_PAGE_SIZE;
- @param offset - physical offset within the file
- @param block - block read from file, note it is not from the buffer pool
- @retval DB_SUCCESS or error code. */
- virtual dberr_t operator()(
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW = 0;
-
- /**
- Set the name of the physical file and the file handle that is used
- to open it for the file that is being iterated over.
- @param filename - then physical name of the tablespace file.
- @param file - OS file handle */
- void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW
- {
- m_file = file;
- m_filepath = filename;
- }
-
- /**
- @return the space id of the tablespace */
- virtual ulint get_space_id() const UNIV_NOTHROW = 0;
-
- /** The compressed page size
- @return the compressed page size */
- ulint get_zip_size() const
- {
- return(m_zip_size);
- }
-
- /**
- Set the tablespace compressed table size.
- @return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
- dberr_t set_zip_size(const buf_frame_t* page) UNIV_NOTHROW;
-
- /** The compressed page size
- @return the compressed page size */
- ulint get_page_size() const
- {
- return(m_page_size);
- }
-
- /** Compressed table page size */
- ulint m_zip_size;
-
- /** The tablespace page size. */
- ulint m_page_size;
-
- /** File handle to the tablespace */
- pfs_os_file_t m_file;
-
- /** Physical file path. */
- const char* m_filepath;
-
-protected:
- // Disable copying
- PageCallback(const PageCallback&);
- PageCallback& operator=(const PageCallback&);
-};
-
-/********************************************************************//**
-Iterate over all the pages in the tablespace.
-@param table - the table definiton in the server
-@param n_io_buffers - number of blocks to read and write together
-@param callback - functor that will do the page updates
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fil_tablespace_iterate(
-/*===================*/
- dict_table_t* table,
- ulint n_io_buffers,
- PageCallback& callback)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*******************************************************************//**
-Checks if a single-table tablespace for a given table name exists in the
-tablespace memory cache.
-@return space id, ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-fil_get_space_id_for_table(
-/*=======================*/
- const char* name); /*!< in: table name in the standard
- 'databasename/tablename' format */
-
-/**
-Iterate over all the spaces in the space list and fetch the
-tablespace names. It will return a copy of the name that must be
-freed by the caller using: delete[].
-@return DB_SUCCESS if all OK. */
-UNIV_INTERN
-dberr_t
-fil_get_space_names(
-/*================*/
- space_name_list_t& space_name_list)
- /*!< in/out: Vector for collecting the names. */
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Generate redo log for swapping two .ibd files
-@param[in] old_table old table
-@param[in] new_table new table
-@param[in] tmp_name temporary table name
-@param[in,out] mtr mini-transaction
-@return innodb error code */
-UNIV_INTERN
-dberr_t
-fil_mtr_rename_log(
- const dict_table_t* old_table,
- const dict_table_t* new_table,
- const char* tmp_name,
- mtr_t* mtr)
- MY_ATTRIBUTE((nonnull));
-
-/*******************************************************************//**
-Finds the given page_no of the given space id from the double write buffer,
-and copies it to the corresponding .ibd file.
-@return true if copy was successful, or false. */
-bool
-fil_user_tablespace_restore_page(
-/*==============================*/
- fsp_open_info* fsp, /* in: contains space id and .ibd
- file information */
- ulint page_no); /* in: page_no to obtain from double
- write buffer */
-
-/*******************************************************************//**
-Returns a pointer to the file_space_t that is in the memory cache
-associated with a space id.
-@return file_space_t pointer, NULL if space not found */
-fil_space_t*
-fil_space_get(
-/*==========*/
- ulint id); /*!< in: space id */
-#endif /* !UNIV_INNOCHECKSUM */
-
-/*************************************************************************
-Return local hash table informations. */
-
-ulint
-fil_system_hash_cells(void);
-/*========================*/
-
-ulint
-fil_system_hash_nodes(void);
-/*========================*/
-
-/*************************************************************************
-functions to access is_corrupt flag of fil_space_t*/
-
-void
-fil_space_set_corrupt(
-/*==================*/
- ulint space_id);
-
-/** Acquire the fil_system mutex. */
-#define fil_system_enter() mutex_enter(&fil_system->mutex)
-/** Release the fil_system mutex. */
-#define fil_system_exit() mutex_exit(&fil_system->mutex)
-
-#ifndef UNIV_INNOCHECKSUM
-/*******************************************************************//**
-Returns the table space by a given id, NULL if not found. */
-fil_space_t*
-fil_space_found_by_id(
-/*==================*/
- ulint id); /*!< in: space id */
-
-/*******************************************************************//**
-Returns the table space by a given id, NULL if not found. */
-fil_space_t*
-fil_space_get_by_id(
-/*================*/
- ulint id); /*!< in: space id */
-
-#endif /* UNIV_INNOCHECKSUM */
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool on_error_silent,/*!< in: if TRUE then don't print
- any message to the log. */
- const char* file, /*!< in: file name */
- const ulint line); /*!< in: line */
-
-/*******************************************************************//**
-Return page type name */
-UNIV_INLINE
-const char*
-fil_get_page_type_name(
-/*===================*/
- ulint page_type); /*!< in: FIL_PAGE_TYPE */
-
-#ifndef UNIV_NONINL
-#include "fil0fil.ic"
-#endif
-
-#endif /* fil0fil_h */
diff --git a/storage/xtradb/include/fil0fil.ic b/storage/xtradb/include/fil0fil.ic
deleted file mode 100644
index 6c2504c9f8c..00000000000
--- a/storage/xtradb/include/fil0fil.ic
+++ /dev/null
@@ -1,148 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fil0fil.ic
-The low-level file system support functions
-
-Created 31/03/2015 Jan Lindström
-*******************************************************/
-
-#ifndef fil0fil_ic
-#define fil0fil_ic
-
-/*******************************************************************//**
-Return page type name */
-UNIV_INLINE
-const char*
-fil_get_page_type_name(
-/*===================*/
- ulint page_type) /*!< in: FIL_PAGE_TYPE */
-{
- switch(page_type) {
- case FIL_PAGE_PAGE_COMPRESSED:
- return "PAGE_COMPRESSED";
- case FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED:
- return "PAGE_COMPRESSED_ENCRYPTED";
- case FIL_PAGE_INDEX:
- return "INDEX";
- case FIL_PAGE_UNDO_LOG:
- return "UNDO LOG";
- case FIL_PAGE_INODE:
- return "INODE";
- case FIL_PAGE_IBUF_FREE_LIST:
- return "IBUF_FREE_LIST";
- case FIL_PAGE_TYPE_ALLOCATED:
- return "ALLOCATED";
- case FIL_PAGE_IBUF_BITMAP:
- return "IBUF_BITMAP";
- case FIL_PAGE_TYPE_SYS:
- return "SYS";
- case FIL_PAGE_TYPE_TRX_SYS:
- return "TRX_SYS";
- case FIL_PAGE_TYPE_FSP_HDR:
- return "FSP_HDR";
- case FIL_PAGE_TYPE_XDES:
- return "XDES";
- case FIL_PAGE_TYPE_BLOB:
- return "BLOB";
- case FIL_PAGE_TYPE_ZBLOB:
- return "ZBLOB";
- case FIL_PAGE_TYPE_ZBLOB2:
- return "ZBLOB2";
- case FIL_PAGE_TYPE_COMPRESSED:
- return "ORACLE PAGE COMPRESSED";
- }
-
- return "PAGE TYPE CORRUPTED";
-
-}
-
-/****************************************************************//**
-Get block size from fil node
-@return block size*/
-UNIV_INLINE
-ulint
-fil_node_get_block_size(
-/*====================*/
- fil_node_t* node) /*!< in: Node where to get block
- size */
-{
- return (node->file_block_size);
-}
-
-/****************************************************************//**
-Validate page type.
-@return true if valid, false if not */
-UNIV_INLINE
-bool
-fil_page_type_validate(
- const byte* page) /*!< in: page */
-{
-#ifdef UNIV_DEBUG
- ulint page_type = mach_read_from_2(page + FIL_PAGE_TYPE);
-
- /* Validate page type */
- if (!((page_type == FIL_PAGE_PAGE_COMPRESSED ||
- page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
- page_type == FIL_PAGE_INDEX ||
- page_type == FIL_PAGE_UNDO_LOG ||
- page_type == FIL_PAGE_INODE ||
- page_type == FIL_PAGE_IBUF_FREE_LIST ||
- page_type == FIL_PAGE_TYPE_ALLOCATED ||
- page_type == FIL_PAGE_IBUF_BITMAP ||
- page_type == FIL_PAGE_TYPE_SYS ||
- page_type == FIL_PAGE_TYPE_TRX_SYS ||
- page_type == FIL_PAGE_TYPE_FSP_HDR ||
- page_type == FIL_PAGE_TYPE_XDES ||
- page_type == FIL_PAGE_TYPE_BLOB ||
- page_type == FIL_PAGE_TYPE_ZBLOB ||
- page_type == FIL_PAGE_TYPE_ZBLOB2 ||
- page_type == FIL_PAGE_TYPE_COMPRESSED))) {
-
- ulint key_version = mach_read_from_4(page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION);
- bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED);
- bool page_compressed_encrypted = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
- ulint space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- ulint offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
- ib_uint64_t lsn = mach_read_from_8(page + FIL_PAGE_LSN);
- ulint compressed_len = mach_read_from_2(page + FIL_PAGE_DATA);
- fil_system_enter();
- fil_space_t* rspace = fil_space_get_by_id(space);
- fil_system_exit();
-
- /* Dump out the page info */
- fprintf(stderr, "InnoDB: Page " ULINTPF ":" ULINTPF
- " name %s page_type " ULINTPF " page_type_name %s\n"
- "InnoDB: key_version " ULINTPF
- " page_compressed %d page_compressed_encrypted %d lsn "
- LSN_PF " compressed_len " ULINTPF "\n",
- space, offset, rspace->name, page_type,
- fil_get_page_type_name(page_type),
- key_version,
- page_compressed, page_compressed_encrypted,
- lsn, compressed_len);
- ut_error;
- return false;
- }
-
-#endif /* UNIV_DEBUG */
- return true;
-}
-
-#endif /* fil0fil_ic */
diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h
deleted file mode 100644
index 03e16699ce3..00000000000
--- a/storage/xtradb/include/fil0pagecompress.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2017 MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-#ifndef fil0pagecompress_h
-#define fil0pagecompress_h
-
-#include "fsp0fsp.h"
-#include "fsp0pagecompress.h"
-
-/******************************************************************//**
-@file include/fil0pagecompress.h
-Helper functions for extracting/storing page compression and
-atomic writes information to table space.
-
-Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
-***********************************************************************/
-
-/*******************************************************************//**
-Find out wheather the page is index page or not
-@return true if page type index page, false if not */
-UNIV_INLINE
-ibool
-fil_page_is_index_page(
-/*===================*/
- byte *buf); /*!< in: page */
-
-/****************************************************************//**
-Get the name of the compression algorithm used for page
-compression.
-@return compression algorithm name or "UNKNOWN" if not known*/
-UNIV_INLINE
-const char*
-fil_get_compression_alg_name(
-/*=========================*/
- ulint comp_alg); /*!<in: compression algorithm number */
-
-/****************************************************************//**
-For page compressed pages compress the page before actual write
-operation.
-@return compressed page to be written*/
-UNIV_INTERN
-byte*
-fil_compress_page(
-/*==============*/
- fil_space_t* space, /*!< in,out: tablespace (NULL during IMPORT) */
- byte* buf, /*!< in: buffer from which to write; in aio
- this must be appropriately aligned */
- byte* out_buf, /*!< out: compressed buffer */
- ulint len, /*!< in: length of input buffer.*/
- ulint level, /* in: compression level */
- ulint block_size, /*!< in: block size */
- bool encrypted, /*!< in: is page also encrypted */
- ulint* out_len); /*!< out: actual length of compressed
- page */
-
-/****************************************************************//**
-For page compressed pages decompress the page after actual read
-operation. */
-UNIV_INTERN
-void
-fil_decompress_page(
-/*================*/
- byte* page_buf, /*!< in: preallocated buffer or NULL */
- byte* buf, /*!< out: buffer from which to read; in aio
- this must be appropriately aligned */
- ulong len, /*!< in: length of output buffer.*/
- ulint* write_size, /*!< in/out: Actual payload size of
- the compressed data. */
- bool return_error=false);
- /*!< in: true if only an error should
- be produced when decompression fails.
- By default this parameter is false. */
-
-/****************************************************************//**
-Get space id from fil node
-@return space id*/
-UNIV_INTERN
-ulint
-fil_node_get_space_id(
-/*==================*/
- fil_node_t* node); /*!< in: Node where to get space id*/
-
-/****************************************************************//**
-Get block size from fil node
-@return block size*/
-UNIV_INLINE
-ulint
-fil_node_get_block_size(
- fil_node_t* node); /*!< in: Node where to get block
- size */
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return true if page is page compressed*/
-UNIV_INLINE
-ibool
-fil_page_is_compressed(
-/*===================*/
- byte* buf); /*!< in: page */
-
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return true if page is page compressed*/
-UNIV_INLINE
-ibool
-fil_page_is_compressed_encrypted(
-/*=============================*/
- byte* buf); /*!< in: page */
-
-/*******************************************************************//**
-Find out wheather the page is page compressed with lzo method
-@return true if page is page compressed with lzo method*/
-UNIV_INLINE
-ibool
-fil_page_is_lzo_compressed(
-/*=======================*/
- byte* buf); /*!< in: page */
-#endif
diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h
deleted file mode 100644
index 715572199ab..00000000000
--- a/storage/xtradb/include/fsp0fsp.h
+++ /dev/null
@@ -1,1068 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fsp0fsp.h
-File space management
-
-Created 12/18/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef fsp0fsp_h
-#define fsp0fsp_h
-
-#include "univ.i"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "mtr0mtr.h"
-#include "fut0lst.h"
-#include "ut0byte.h"
-#include "page0types.h"
-#include "fsp0types.h"
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
-
-/** Width of the POST_ANTELOPE flag */
-#define FSP_FLAGS_WIDTH_POST_ANTELOPE 1
-/** Number of flag bits used to indicate the tablespace zip page size */
-#define FSP_FLAGS_WIDTH_ZIP_SSIZE 4
-/** Width of the ATOMIC_BLOBS flag. The ability to break up a long
-column into an in-record prefix and an externally stored part is available
-to the two Barracuda row formats COMPRESSED and DYNAMIC. */
-#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS 1
-/** Number of flag bits used to indicate the tablespace page size */
-#define FSP_FLAGS_WIDTH_PAGE_SSIZE 4
-/** Number of reserved bits */
-#define FSP_FLAGS_WIDTH_RESERVED 6
-/** Number of flag bits used to indicate the page compression */
-#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION 1
-
-/** Width of all the currently known persistent tablespace flags */
-#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \
- + FSP_FLAGS_WIDTH_ZIP_SSIZE \
- + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \
- + FSP_FLAGS_WIDTH_PAGE_SSIZE \
- + FSP_FLAGS_WIDTH_RESERVED \
- + FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
-
-/** A mask of all the known/used bits in FSP_SPACE_FLAGS */
-#define FSP_FLAGS_MASK (~(~0U << FSP_FLAGS_WIDTH))
-
-/* FSP_SPACE_FLAGS position and name in MySQL 5.6/MariaDB 10.0 or older
-and MariaDB 10.1.20 or older MariaDB 10.1 and in MariaDB 10.1.21
-or newer.
-MySQL 5.6 MariaDB 10.1.x MariaDB 10.1.21
-====================================================================
-Below flags in same offset
-====================================================================
-0: POST_ANTELOPE 0:POST_ANTELOPE 0: POST_ANTELOPE
-1..4: ZIP_SSIZE(0..5) 1..4:ZIP_SSIZE(0..5) 1..4: ZIP_SSIZE(0..5)
-(NOTE: bit 4 is always 0)
-5: ATOMIC_BLOBS 5:ATOMIC_BLOBS 5: ATOMIC_BLOBS
-=====================================================================
-Below note the order difference:
-=====================================================================
-6..9: PAGE_SSIZE(3..7) 6: COMPRESSION 6..9: PAGE_SSIZE(3..7)
-10: DATA_DIR 7..10: COMP_LEVEL(0..9) 10: RESERVED (5.6 DATA_DIR)
-=====================================================================
-The flags below were in incorrect position in MariaDB 10.1,
-or have been introduced in MySQL 5.7 or 8.0:
-=====================================================================
-11: UNUSED 11..12:ATOMIC_WRITES 11: RESERVED (5.7 SHARED)
- 12: RESERVED (5.7 TEMPORARY)
- 13..15:PAGE_SSIZE(3..7) 13: RESERVED (5.7 ENCRYPTION)
- 14: RESERVED (8.0 SDI)
- 15: RESERVED
- 16: PAGE_SSIZE_msb(0) 16: COMPRESSION
- 17: DATA_DIR 17: UNUSED
- 18: UNUSED
-=====================================================================
-The flags below only exist in fil_space_t::flags, not in FSP_SPACE_FLAGS:
-=====================================================================
- 25: DATA_DIR
- 26..27: ATOMIC_WRITES
- 28..31: COMPRESSION_LEVEL
-*/
-
-/** A mask of the memory-only flags in fil_space_t::flags */
-#define FSP_FLAGS_MEM_MASK (~0U << FSP_FLAGS_MEM_DATA_DIR)
-
-/** Zero relative shift position of the DATA_DIR flag */
-#define FSP_FLAGS_MEM_DATA_DIR 25
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define FSP_FLAGS_MEM_ATOMIC_WRITES 26
-/** Zero relative shift position of the COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MEM_COMPRESSION_LEVEL 28
-
-/** Zero relative shift position of the POST_ANTELOPE field */
-#define FSP_FLAGS_POS_POST_ANTELOPE 0
-/** Zero relative shift position of the ZIP_SSIZE field */
-#define FSP_FLAGS_POS_ZIP_SSIZE (FSP_FLAGS_POS_POST_ANTELOPE \
- + FSP_FLAGS_WIDTH_POST_ANTELOPE)
-/** Zero relative shift position of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \
- + FSP_FLAGS_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the start of the PAGE_SSIZE bits */
-#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \
- + FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
-/** Zero relative shift position of the start of the RESERVED bits
-these are only used in MySQL 5.7 and used for compatibility. */
-#define FSP_FLAGS_POS_RESERVED (FSP_FLAGS_POS_PAGE_SSIZE \
- + FSP_FLAGS_WIDTH_PAGE_SSIZE)
-/** Zero relative shift position of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_POS_PAGE_COMPRESSION (FSP_FLAGS_POS_RESERVED \
- + FSP_FLAGS_WIDTH_RESERVED)
-
-/** Bit mask of the POST_ANTELOPE field */
-#define FSP_FLAGS_MASK_POST_ANTELOPE \
- ((~(~0U << FSP_FLAGS_WIDTH_POST_ANTELOPE)) \
- << FSP_FLAGS_POS_POST_ANTELOPE)
-/** Bit mask of the ZIP_SSIZE field */
-#define FSP_FLAGS_MASK_ZIP_SSIZE \
- ((~(~0U << FSP_FLAGS_WIDTH_ZIP_SSIZE)) \
- << FSP_FLAGS_POS_ZIP_SSIZE)
-/** Bit mask of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_MASK_ATOMIC_BLOBS \
- ((~(~0U << FSP_FLAGS_WIDTH_ATOMIC_BLOBS)) \
- << FSP_FLAGS_POS_ATOMIC_BLOBS)
-/** Bit mask of the PAGE_SSIZE field */
-#define FSP_FLAGS_MASK_PAGE_SSIZE \
- ((~(~0U << FSP_FLAGS_WIDTH_PAGE_SSIZE)) \
- << FSP_FLAGS_POS_PAGE_SSIZE)
-/** Bit mask of the RESERVED1 field */
-#define FSP_FLAGS_MASK_RESERVED \
- ((~(~0U << FSP_FLAGS_WIDTH_RESERVED)) \
- << FSP_FLAGS_POS_RESERVED)
-/** Bit mask of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_MASK_PAGE_COMPRESSION \
- ((~(~0U << FSP_FLAGS_WIDTH_PAGE_COMPRESSION)) \
- << FSP_FLAGS_POS_PAGE_COMPRESSION)
-
-/** Bit mask of the in-memory ATOMIC_WRITES field */
-#define FSP_FLAGS_MASK_MEM_ATOMIC_WRITES \
- (3U << FSP_FLAGS_MEM_ATOMIC_WRITES)
-
-/** Bit mask of the in-memory COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL \
- (15U << FSP_FLAGS_MEM_COMPRESSION_LEVEL)
-
-/** Return the value of the POST_ANTELOPE field */
-#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \
- ((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \
- >> FSP_FLAGS_POS_POST_ANTELOPE)
-/** Return the value of the ZIP_SSIZE field */
-#define FSP_FLAGS_GET_ZIP_SSIZE(flags) \
- ((flags & FSP_FLAGS_MASK_ZIP_SSIZE) \
- >> FSP_FLAGS_POS_ZIP_SSIZE)
-/** Return the value of the ATOMIC_BLOBS field */
-#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags) \
- ((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS) \
- >> FSP_FLAGS_POS_ATOMIC_BLOBS)
-/** Return the value of the PAGE_SSIZE field */
-#define FSP_FLAGS_GET_PAGE_SSIZE(flags) \
- ((flags & FSP_FLAGS_MASK_PAGE_SSIZE) \
- >> FSP_FLAGS_POS_PAGE_SSIZE)
-/** @return the RESERVED flags */
-#define FSP_FLAGS_GET_RESERVED(flags) \
- ((flags & FSP_FLAGS_MASK_RESERVED) \
- >> FSP_FLAGS_POS_RESERVED)
-/** @return the PAGE_COMPRESSION flag */
-#define FSP_FLAGS_HAS_PAGE_COMPRESSION(flags) \
- ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION) \
- >> FSP_FLAGS_POS_PAGE_COMPRESSION)
-
-/** Return the contents of the UNUSED bits */
-#define FSP_FLAGS_GET_UNUSED(flags) \
- (flags >> FSP_FLAGS_POS_UNUSED)
-
-/** @return the PAGE_SSIZE flags for the current innodb_page_size */
-#define FSP_FLAGS_PAGE_SSIZE() \
- ((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) ? \
- 0 : (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1) \
- << FSP_FLAGS_POS_PAGE_SSIZE)
-
-/** @return the value of the DATA_DIR field */
-#define FSP_FLAGS_HAS_DATA_DIR(flags) \
- (flags & 1U << FSP_FLAGS_MEM_DATA_DIR)
-/** @return the COMPRESSION_LEVEL field */
-#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags) \
- ((flags & FSP_FLAGS_MASK_MEM_COMPRESSION_LEVEL) \
- >> FSP_FLAGS_MEM_COMPRESSION_LEVEL)
-/** @return the ATOMIC_WRITES field */
-#define FSP_FLAGS_GET_ATOMIC_WRITES(flags) \
- ((flags & FSP_FLAGS_MASK_MEM_ATOMIC_WRITES) \
- >> FSP_FLAGS_MEM_ATOMIC_WRITES)
-
-/* Compatibility macros for MariaDB 10.1.20 or older 10.1 see
-table above. */
-/** Zero relative shift position of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101 \
- (FSP_FLAGS_POS_ATOMIC_BLOBS \
- + FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
-/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL_MARIADB101 \
- (FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101 + 1)
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define FSP_FLAGS_POS_ATOMIC_WRITES_MARIADB101 \
- (FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL_MARIADB101 + 4)
-/** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE_MARIADB101 \
- (FSP_FLAGS_POS_ATOMIC_WRITES_MARIADB101 + 2)
-
-/** Bit mask of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_MASK_PAGE_COMPRESSION_MARIADB101 \
- (1U << FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101)
-/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL_MARIADB101 \
- (15U << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL_MARIADB101)
-/** Bit mask of the ATOMIC_WRITES field */
-#define FSP_FLAGS_MASK_ATOMIC_WRITES_MARIADB101 \
- (3U << FSP_FLAGS_POS_ATOMIC_WRITES_MARIADB101)
-/** Bit mask of the PAGE_SSIZE field */
-#define FSP_FLAGS_MASK_PAGE_SSIZE_MARIADB101 \
- (15U << FSP_FLAGS_POS_PAGE_SSIZE_MARIADB101)
-
-/** Return the value of the PAGE_COMPRESSION field */
-#define FSP_FLAGS_GET_PAGE_COMPRESSION_MARIADB101(flags) \
- ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_MARIADB101) \
- >> FSP_FLAGS_POS_PAGE_COMPRESSION_MARIADB101)
-/** Return the value of the PAGE_COMPRESSION_LEVEL field */
-#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL_MARIADB101(flags) \
- ((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL_MARIADB101) \
- >> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL_MARIADB101)
-/** Return the value of the PAGE_SSIZE field */
-#define FSP_FLAGS_GET_PAGE_SSIZE_MARIADB101(flags) \
- ((flags & FSP_FLAGS_MASK_PAGE_SSIZE_MARIADB101) \
- >> FSP_FLAGS_POS_PAGE_SSIZE_MARIADB101)
-
-/* @} */
-
-/* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */
-
-/** Offset of the space header within a file page */
-#define FSP_HEADER_OFFSET FIL_PAGE_DATA
-
-/* The data structures in files are defined just as byte strings in C */
-typedef byte fsp_header_t;
-typedef byte xdes_t;
-
-/* SPACE HEADER
- ============
-
-File space header data structure: this data structure is contained in the
-first page of a space. The space for this header is reserved in every extent
-descriptor page, but used only in the first. */
-
-/*-------------------------------------*/
-#define FSP_SPACE_ID 0 /* space id */
-#define FSP_NOT_USED 4 /* this field contained a value up to
- which we know that the modifications
- in the database have been flushed to
- the file space; not used now */
-#define FSP_SIZE 8 /* Current size of the space in
- pages */
-#define FSP_FREE_LIMIT 12 /* Minimum page number for which the
- free list has not been initialized:
- the pages >= this limit are, by
- definition, free; note that in a
- single-table tablespace where size
- < 64 pages, this number is 64, i.e.,
- we have initialized the space
- about the first extent, but have not
- physically allocted those pages to the
- file */
-#define FSP_SPACE_FLAGS 16 /* fsp_space_t.flags, similar to
- dict_table_t::flags */
-#define FSP_FRAG_N_USED 20 /* number of used pages in the
- FSP_FREE_FRAG list */
-#define FSP_FREE 24 /* list of free extents */
-#define FSP_FREE_FRAG (24 + FLST_BASE_NODE_SIZE)
- /* list of partially free extents not
- belonging to any segment */
-#define FSP_FULL_FRAG (24 + 2 * FLST_BASE_NODE_SIZE)
- /* list of full extents not belonging
- to any segment */
-#define FSP_SEG_ID (24 + 3 * FLST_BASE_NODE_SIZE)
- /* 8 bytes which give the first unused
- segment id */
-#define FSP_SEG_INODES_FULL (32 + 3 * FLST_BASE_NODE_SIZE)
- /* list of pages containing segment
- headers, where all the segment inode
- slots are reserved */
-#define FSP_SEG_INODES_FREE (32 + 4 * FLST_BASE_NODE_SIZE)
- /* list of pages containing segment
- headers, where not all the segment
- header slots are reserved */
-/*-------------------------------------*/
-/* File space header size */
-#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE)
-
-#define FSP_FREE_ADD 4 /* this many free extents are added
- to the free list from above
- FSP_FREE_LIMIT at a time */
-/* @} */
-
-#ifndef UNIV_INNOCHECKSUM
-
-/* @defgroup File Segment Inode Constants (moved from fsp0fsp.c) @{ */
-
-/* FILE SEGMENT INODE
- ==================
-
-Segment inode which is created for each segment in a tablespace. NOTE: in
-purge we assume that a segment having only one currently used page can be
-freed in a few steps, so that the freeing cannot fill the file buffer with
-bufferfixed file pages. */
-
-typedef byte fseg_inode_t;
-
-#define FSEG_INODE_PAGE_NODE FSEG_PAGE_DATA
- /* the list node for linking
- segment inode pages */
-
-#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE)
-/*-------------------------------------*/
-#define FSEG_ID 0 /* 8 bytes of segment id: if this is 0,
- it means that the header is unused */
-#define FSEG_NOT_FULL_N_USED 8
- /* number of used segment pages in
- the FSEG_NOT_FULL list */
-#define FSEG_FREE 12
- /* list of free extents of this
- segment */
-#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE)
- /* list of partially free extents */
-#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE)
- /* list of full extents */
-#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE)
- /* magic number used in debugging */
-#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE)
- /* array of individual pages
- belonging to this segment in fsp
- fragment extent lists */
-#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2)
- /* number of slots in the array for
- the fragment pages */
-#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its
- page number within space, FIL_NULL
- means that the slot is not in use */
-/*-------------------------------------*/
-#define FSEG_INODE_SIZE \
- (16 + 3 * FLST_BASE_NODE_SIZE \
- + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE)
-
-#define FSP_SEG_INODES_PER_PAGE(zip_size) \
- (((zip_size ? zip_size : UNIV_PAGE_SIZE) \
- - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE)
- /* Number of segment inodes which fit on a
- single page */
-
-#define FSEG_MAGIC_N_VALUE 97937874
-
-#define FSEG_FILLFACTOR 8 /* If this value is x, then if
- the number of unused but reserved
- pages in a segment is less than
- reserved pages * 1/x, and there are
- at least FSEG_FRAG_LIMIT used pages,
- then we allow a new empty extent to
- be added to the segment in
- fseg_alloc_free_page. Otherwise, we
- use unused pages of the segment. */
-
-#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS
- /* If the segment has >= this many
- used pages, it may be expanded by
- allocating extents to the segment;
- until that only individual fragment
- pages are allocated from the space */
-
-#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment
- is at least this many extents, we
- allow extents to be put to the free
- list of the extent: at most
- FSEG_FREE_LIST_MAX_LEN many */
-#define FSEG_FREE_LIST_MAX_LEN 4
-/* @} */
-
-/* @defgroup Extent Descriptor Constants (moved from fsp0fsp.c) @{ */
-
-/* EXTENT DESCRIPTOR
- =================
-
-File extent descriptor data structure: contains bits to tell which pages in
-the extent are free and which contain old tuple version to clean. */
-
-/*-------------------------------------*/
-#define XDES_ID 0 /* The identifier of the segment
- to which this extent belongs */
-#define XDES_FLST_NODE 8 /* The list node data structure
- for the descriptors */
-#define XDES_STATE (FLST_NODE_SIZE + 8)
- /* contains state information
- of the extent */
-#define XDES_BITMAP (FLST_NODE_SIZE + 12)
- /* Descriptor bitmap of the pages
- in the extent */
-/*-------------------------------------*/
-
-#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */
-#define XDES_FREE_BIT 0 /* Index of the bit which tells if
- the page is free */
-#define XDES_CLEAN_BIT 1 /* NOTE: currently not used!
- Index of the bit which tells if
- there are old versions of tuples
- on the page */
-/* States of a descriptor */
-#define XDES_FREE 1 /* extent is in free list of space */
-#define XDES_FREE_FRAG 2 /* extent is in free fragment list of
- space */
-#define XDES_FULL_FRAG 3 /* extent is in full fragment list of
- space */
-#define XDES_FSEG 4 /* extent belongs to a segment */
-
-/** File extent data structure size in bytes. */
-#define XDES_SIZE \
- (XDES_BITMAP \
- + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
-
-/** File extent data structure size in bytes for MAX page size. */
-#define XDES_SIZE_MAX \
- (XDES_BITMAP \
- + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MAX * XDES_BITS_PER_PAGE))
-
-/** File extent data structure size in bytes for MIN page size. */
-#define XDES_SIZE_MIN \
- (XDES_BITMAP \
- + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MIN * XDES_BITS_PER_PAGE))
-
-/** Offset of the descriptor array on a descriptor page */
-#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
-
-/* @} */
-
-/**********************************************************************//**
-Initializes the file space system. */
-UNIV_INTERN
-void
-fsp_init(void);
-/*==========*/
-/**********************************************************************//**
-Gets the size of the system tablespace from the tablespace header. If
-we do not have an auto-extending data file, this should be equal to
-the size of the data files. If there is an auto-extending data file,
-this can be smaller.
-@return size in pages */
-UNIV_INTERN
-ulint
-fsp_header_get_tablespace_size(void);
-/*================================*/
-/**********************************************************************//**
-Reads the file space size stored in the header page.
-@return tablespace size stored in the space header */
-UNIV_INTERN
-ulint
-fsp_get_size_low(
-/*=============*/
- page_t* page); /*!< in: header page (page 0 in the tablespace) */
-/**********************************************************************//**
-Reads the space id from the first page of a tablespace.
-@return space id, ULINT UNDEFINED if error */
-UNIV_INTERN
-ulint
-fsp_header_get_space_id(
-/*====================*/
- const page_t* page); /*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the space flags from the first page of a tablespace.
-@return flags */
-UNIV_INTERN
-ulint
-fsp_header_get_flags(
-/*=================*/
- const page_t* page); /*!< in: first page of a tablespace */
-/**********************************************************************//**
-Reads the compressed page size from the first page of a tablespace.
-@return compressed page size in bytes, or 0 if uncompressed */
-UNIV_INTERN
-ulint
-fsp_header_get_zip_size(
-/*====================*/
- const page_t* page); /*!< in: first page of a tablespace */
-/**********************************************************************//**
-Writes the space id and flags to a tablespace header. The flags contain
-row type, physical/compressed page size, and logical/uncompressed page
-size of the tablespace. */
-UNIV_INTERN
-void
-fsp_header_init_fields(
-/*===================*/
- page_t* page, /*!< in/out: first page in the space */
- ulint space_id, /*!< in: space id */
- ulint flags); /*!< in: tablespace flags (FSP_SPACE_FLAGS):
- 0, or table->flags if newer than COMPACT */
-/** Initialize a tablespace header.
-@param[in] space_id space id
-@param[in] size current size in blocks
-@param[in,out] mtr mini-transaction */
-UNIV_INTERN
-void
-fsp_header_init(ulint space_id, ulint size, mtr_t* mtr);
-
-/**********************************************************************//**
-Increases the space size field of a space. */
-UNIV_INTERN
-void
-fsp_header_inc_size(
-/*================*/
- ulint space, /*!< in: space id */
- ulint size_inc, /*!< in: size increment in pages */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-UNIV_INTERN
-buf_block_t*
-fseg_create(
-/*========*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-UNIV_INTERN
-buf_block_t*
-fseg_create_general(
-/*================*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- ibool has_done_reservation, /*!< in: TRUE if the caller has already
- done the reservation for the pages with
- fsp_reserve_free_extents (at least 2 extents: one for
- the inode and the other for the segment) then there is
- no need to do the check for this individual
- operation */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Calculates the number of pages reserved by a segment, and how many pages are
-currently used.
-@return number of reserved pages */
-UNIV_INTERN
-ulint
-fseg_n_reserved_pages(
-/*==================*/
- fseg_header_t* header, /*!< in: segment header */
- ulint* used, /*!< out: number of pages used (<= reserved) */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize
-file space fragmentation.
-@param[in/out] seg_header segment header
-@param[in] hint hint of which page would be desirable
-@param[in] direction if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR
-@param[in/out] mtr mini-transaction
-@return X-latched block, or NULL if no page could be allocated */
-#define fseg_alloc_free_page(seg_header, hint, direction, mtr) \
- fseg_alloc_free_page_general(seg_header, hint, direction, \
- FALSE, mtr, mtr)
-/**********************************************************************//**
-Allocates a single free page from a segment. This function implements
-the intelligent allocation strategy which tries to minimize file space
-fragmentation.
-@retval NULL if no page could be allocated
-@retval block, rw_lock_x_lock_count(&block->lock) == 1 if allocation succeeded
-(init_mtr == mtr, or the page was not previously freed in mtr)
-@retval block (not allocated or initialized) otherwise */
-UNIV_INTERN
-buf_block_t*
-fseg_alloc_free_page_general(
-/*=========================*/
- fseg_header_t* seg_header,/*!< in/out: segment header */
- ulint hint, /*!< in: hint of which page would be
- desirable */
- byte direction,/*!< in: if the new page is needed because
- of an index page split, and records are
- inserted there in order, into which
- direction they go alphabetically: FSP_DOWN,
- FSP_UP, FSP_NO_DIR */
- ibool has_done_reservation, /*!< in: TRUE if the caller has
- already done the reservation for the page
- with fsp_reserve_free_extents, then there
- is no need to do the check for this individual
- page */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mtr_t* init_mtr)/*!< in/out: mtr or another mini-transaction
- in which the page should be initialized.
- If init_mtr!=mtr, but the page is already
- latched in mtr, do not initialize the page. */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-/**********************************************************************//**
-Reserves free pages from a tablespace. All mini-transactions which may
-use several pages from the tablespace should call this function beforehand
-and reserve enough free extents so that they certainly will be able
-to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
-
-The alloc_type below has the following meaning: FSP_NORMAL means an
-operation which will probably result in more space usage, like an
-insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are
-deleting rows, then this allocation will in the long run result in
-less space usage (after a purge); FSP_CLEANING means allocation done
-in a physical record delete (like in a purge) or other cleaning operation
-which will result in less space usage in the long run. We prefer the latter
-two types of allocation: when space is scarce, FSP_NORMAL allocations
-will not succeed, but the latter two allocations will succeed, if possible.
-The purpose is to avoid dead end where the database is full but the
-user cannot free any space because these freeing operations temporarily
-reserve some space.
-
-Single-table tablespaces whose size is < 32 pages are a special case. In this
-function we would liberally reserve several 64 page extents for every page
-split or merge in a B-tree. But we do not want to waste disk space if the table
-only occupies < 32 pages. That is why we apply different rules in that special
-case, just ensuring that there are 3 free pages available.
-@return TRUE if we were able to make the reservation */
-UNIV_INTERN
-ibool
-fsp_reserve_free_extents(
-/*=====================*/
- ulint* n_reserved,/*!< out: number of extents actually reserved; if we
- return TRUE and the tablespace size is < 64 pages,
- then this can be 0, otherwise it is n_ext */
- ulint space, /*!< in: space id */
- ulint n_ext, /*!< in: number of extents to reserve */
- ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */
- mtr_t* mtr); /*!< in: mini-transaction */
-/**********************************************************************//**
-This function should be used to get information on how much we still
-will be able to insert new data to the database without running out the
-tablespace. Only free extents are taken into account and we also subtract
-the safety margin required by the above function fsp_reserve_free_extents.
-@return available space in kB */
-UNIV_INTERN
-ullint
-fsp_get_available_space_in_free_extents(
-/*====================================*/
- ulint space); /*!< in: space id */
-/**********************************************************************//**
-Frees a single page of a segment. */
-UNIV_INTERN
-void
-fseg_free_page(
-/*===========*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page offset */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Checks if a single page of a segment is free.
-@return true if free */
-UNIV_INTERN
-bool
-fseg_page_is_free(
-/*==============*/
- fseg_header_t* seg_header, /*!< in: segment header */
- ulint space, /*!< in: space id */
- ulint page) /*!< in: page offset */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************************//**
-Frees part of a segment. This function can be used to free a segment
-by repeatedly calling this function in different mini-transactions.
-Doing the freeing in a single mini-transaction might result in
-too big a mini-transaction.
-@return TRUE if freeing completed */
-UNIV_INTERN
-ibool
-fseg_free_step(
-/*===========*/
- fseg_header_t* header, /*!< in, own: segment header; NOTE: if the header
- resides on the first page of the frag list
- of the segment, this pointer becomes obsolete
- after the last freeing step */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Frees part of a segment. Differs from fseg_free_step because this function
-leaves the header page unfreed.
-@return TRUE if freeing completed, except the header page */
-UNIV_INTERN
-ibool
-fseg_free_step_not_header(
-/*======================*/
- fseg_header_t* header, /*!< in: segment header which must reside on
- the first fragment page of the segment */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return TRUE if a descriptor page */
-UNIV_INLINE
-ibool
-fsp_descr_page(
-/*===========*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no);/*!< in: page number */
-/***********************************************************//**
-Parses a redo log record of a file page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-fsp_parse_init_file_page(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr, /*!< in: buffer end */
- buf_block_t* block); /*!< in: block or NULL */
-/*******************************************************************//**
-Validates the file space system and its segments.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fsp_validate(
-/*=========*/
- ulint space); /*!< in: space id */
-/*******************************************************************//**
-Prints info of a file space. */
-UNIV_INTERN
-void
-fsp_print(
-/*======*/
- ulint space); /*!< in: space id */
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Validates a segment.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-fseg_validate(
-/*==========*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_BTR_PRINT
-/*******************************************************************//**
-Writes info of a segment. */
-UNIV_INTERN
-void
-fseg_print(
-/*=======*/
- fseg_header_t* header, /*!< in: segment header */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-#endif /* UNIV_BTR_PRINT */
-
-/** Validate the tablespace flags, which are stored in the
-tablespace header at offset FSP_SPACE_FLAGS.
-@param[in] flags the contents of FSP_SPACE_FLAGS
-@return whether the flags are correct (not in the buggy 10.1) format */
-MY_ATTRIBUTE((warn_unused_result, const))
-UNIV_INLINE
-bool
-fsp_flags_is_valid(ulint flags)
-{
- DBUG_EXECUTE_IF("fsp_flags_is_valid_failure",
- return(false););
- if (flags == 0) {
- return(true);
- }
- if (flags & ~FSP_FLAGS_MASK) {
- return(false);
- }
- if ((flags & (FSP_FLAGS_MASK_POST_ANTELOPE | FSP_FLAGS_MASK_ATOMIC_BLOBS))
- == FSP_FLAGS_MASK_ATOMIC_BLOBS) {
- /* If the "atomic blobs" flag (indicating
- ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED) flag
- is set, then the "post Antelope" (ROW_FORMAT!=REDUNDANT) flag
- must also be set. */
- return(false);
- }
- /* Bits 10..14 should be 0b0000d where d is the DATA_DIR flag
- of MySQL 5.6 and MariaDB 10.0, which we ignore.
- In the buggy FSP_SPACE_FLAGS written by MariaDB 10.1.0 to 10.1.20,
- bits 10..14 would be nonzero 0bsssaa where sss is
- nonzero PAGE_SSIZE (3, 4, 6, or 7)
- and aa is ATOMIC_WRITES (not 0b11). */
- if (FSP_FLAGS_GET_RESERVED(flags) & ~1) {
- return(false);
- }
-
- const ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
- if (ssize == 1 || ssize == 2 || ssize == 5 || ssize & 8) {
- /* the page_size is not between 4k and 64k;
- 16k should be encoded as 0, not 5 */
- return(false);
- }
- const ulint zssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
- if (zssize == 0) {
- /* not ROW_FORMAT=COMPRESSED */
- } else if (zssize > (ssize ? ssize : 5)) {
- /* invalid KEY_BLOCK_SIZE */
- return(false);
- } else if (~flags & (FSP_FLAGS_MASK_POST_ANTELOPE
- | FSP_FLAGS_MASK_ATOMIC_BLOBS)) {
- /* both these flags should be set for
- ROW_FORMAT=COMPRESSED */
- return(false);
- }
-
- return(true);
-}
-
-/** Convert FSP_SPACE_FLAGS from the buggy MariaDB 10.1.0..10.1.20 format.
-@param[in] flags the contents of FSP_SPACE_FLAGS
-@return the flags corrected from the buggy MariaDB 10.1 format
-@retval ULINT_UNDEFINED if the flags are not in the buggy 10.1 format */
-MY_ATTRIBUTE((warn_unused_result, const))
-UNIV_INLINE
-ulint
-fsp_flags_convert_from_101(ulint flags)
-{
- DBUG_EXECUTE_IF("fsp_flags_is_valid_failure",
- return(ULINT_UNDEFINED););
- if (flags == 0) {
- return(flags);
- }
-
- if (flags >> 18) {
- /* The most significant FSP_SPACE_FLAGS bit that was ever set
- by MariaDB 10.1.0 to 10.1.20 was bit 17 (misplaced DATA_DIR flag).
- The flags must be less than 1<<18 in order to be valid. */
- return(ULINT_UNDEFINED);
- }
-
- if ((flags & (FSP_FLAGS_MASK_POST_ANTELOPE | FSP_FLAGS_MASK_ATOMIC_BLOBS))
- == FSP_FLAGS_MASK_ATOMIC_BLOBS) {
- /* If the "atomic blobs" flag (indicating
- ROW_FORMAT=DYNAMIC or ROW_FORMAT=COMPRESSED) flag
- is set, then the "post Antelope" (ROW_FORMAT!=REDUNDANT) flag
- must also be set. */
- return(ULINT_UNDEFINED);
- }
-
- /* Bits 6..10 denote compression in MariaDB 10.1.0 to 10.1.20.
- They must be either 0b00000 or 0b00011 through 0b10011.
- In correct versions, these bits would be
- 0bd0sss where d is the DATA_DIR flag (garbage bit) and
- sss is the PAGE_SSIZE (3, 4, 6, or 7).
-
- NOTE: MariaDB 10.1.0 to 10.1.20 can misinterpret
- uncompressed data files with innodb_page_size=4k or 64k as
- compressed innodb_page_size=16k files. Below is an exhaustive
- state space analysis.
-
- -0by1zzz: impossible (the bit 4 must be clean; see above)
- -0b101xx: DATA_DIR, innodb_page_size>4k: invalid (COMPRESSION_LEVEL>9)
- +0bx0011: innodb_page_size=4k:
- !!! Misinterpreted as COMPRESSION_LEVEL=9 or 1, COMPRESSION=1.
- -0bx0010: impossible, because sss must be 0b011 or 0b1xx
- -0bx0001: impossible, because sss must be 0b011 or 0b1xx
- -0b10000: DATA_DIR, innodb_page_size=16:
- invalid (COMPRESSION_LEVEL=8 but COMPRESSION=0)
- +0b00111: no DATA_DIR, innodb_page_size=64k:
- !!! Misinterpreted as COMPRESSION_LEVEL=3, COMPRESSION=1.
- -0b00101: impossible, because sss must be 0 for 16k, not 0b101
- -0b001x0: no DATA_DIR, innodb_page_size=32k or 8k:
- invalid (COMPRESSION_LEVEL=3 but COMPRESSION=0)
- +0b00000: innodb_page_size=16k (looks like COMPRESSION=0)
- ??? Could actually be compressed; see PAGE_SSIZE below */
- const ulint level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL_MARIADB101(
- flags);
- if (FSP_FLAGS_GET_PAGE_COMPRESSION_MARIADB101(flags) != (level != 0)
- || level > 9) {
- /* The compression flags are not in the buggy MariaDB
- 10.1 format. */
- return(ULINT_UNDEFINED);
- }
- if (!(~flags & FSP_FLAGS_MASK_ATOMIC_WRITES_MARIADB101)) {
- /* The ATOMIC_WRITES flags cannot be 0b11.
- (The bits 11..12 should actually never be 0b11,
- because in MySQL they would be SHARED|TEMPORARY.) */
- return(ULINT_UNDEFINED);
- }
-
- /* Bits 13..16 are the wrong position for PAGE_SSIZE, and they
- should contain one of the values 3,4,6,7, that is, be of the form
- 0b0011 or 0b01xx (except 0b0110).
- In correct versions, these bits should be 0bc0se
- where c is the MariaDB COMPRESSED flag
- and e is the MySQL 5.7 ENCRYPTION flag
- and s is the MySQL 8.0 SDI flag. MariaDB can only support s=0, e=0.
-
- Compressed innodb_page_size=16k tables with correct FSP_SPACE_FLAGS
- will be properly rejected by older MariaDB 10.1.x because they
- would read as PAGE_SSIZE>=8 which is not valid. */
-
- const ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE_MARIADB101(flags);
- if (ssize == 1 || ssize == 2 || ssize == 5 || ssize & 8) {
- /* the page_size is not between 4k and 64k;
- 16k should be encoded as 0, not 5 */
- return(ULINT_UNDEFINED);
- }
- const ulint zssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
- if (zssize == 0) {
- /* not ROW_FORMAT=COMPRESSED */
- } else if (zssize > (ssize ? ssize : 5)) {
- /* invalid KEY_BLOCK_SIZE */
- return(ULINT_UNDEFINED);
- } else if (~flags & (FSP_FLAGS_MASK_POST_ANTELOPE
- | FSP_FLAGS_MASK_ATOMIC_BLOBS)) {
- /* both these flags should be set for
- ROW_FORMAT=COMPRESSED */
- return(ULINT_UNDEFINED);
- }
-
- flags = ((flags & 0x3f) | ssize << FSP_FLAGS_POS_PAGE_SSIZE
- | FSP_FLAGS_GET_PAGE_COMPRESSION_MARIADB101(flags)
- << FSP_FLAGS_POS_PAGE_COMPRESSION);
- ut_ad(fsp_flags_is_valid(flags));
- return(flags);
-}
-
-/** Compare tablespace flags.
-@param[in] expected expected flags from dict_tf_to_fsp_flags()
-@param[in] actual flags read from FSP_SPACE_FLAGS
-@return whether the flags match */
-MY_ATTRIBUTE((warn_unused_result))
-UNIV_INLINE
-bool
-fsp_flags_match(ulint expected, ulint actual)
-{
- expected &= ~FSP_FLAGS_MEM_MASK;
- ut_ad(fsp_flags_is_valid(expected));
-
- if (actual == expected) {
- return(true);
- }
-
- actual = fsp_flags_convert_from_101(actual);
- return(actual == expected);
-}
-
-/********************************************************************//**
-Determine if the tablespace is compressed from dict_table_t::flags.
-@return TRUE if compressed, FALSE if not compressed */
-UNIV_INLINE
-ibool
-fsp_flags_is_compressed(
-/*====================*/
- ulint flags); /*!< in: tablespace flags */
-
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return descriptor index */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_index(
-/*=======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset); /*!< in: page offset */
-
-/**********************************************************************//**
-Gets a descriptor bit of a page.
-@return TRUE if free */
-UNIV_INLINE
-ibool
-xdes_get_bit(
-/*=========*/
- const xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset);/*!< in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
-
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return descriptor page offset */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_page(
-/*======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset); /*!< in: page offset */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/********************************************************************//**
-Extract the zip size from tablespace flags. A tablespace has only one
-physical page size whether that page is compressed or not.
-@return compressed page size of the file-per-table tablespace in bytes,
-or zero if the table is not compressed. */
-UNIV_INLINE
-ulint
-fsp_flags_get_zip_size(
-/*====================*/
- ulint flags); /*!< in: tablespace flags */
-/********************************************************************//**
-Extract the page size from tablespace flags.
-@return page size of the tablespace in bytes */
-UNIV_INLINE
-ulint
-fsp_flags_get_page_size(
-/*====================*/
- ulint flags); /*!< in: tablespace flags */
-
-/*********************************************************************
-Compute offset after xdes where crypt data can be stored
-@param[in] zip_size Compressed size or 0
-@return offset */
-UNIV_INTERN
-ulint
-fsp_header_get_crypt_offset(
- const ulint zip_size)
- MY_ATTRIBUTE((warn_unused_result));
-
-#define fsp_page_is_free(space,page,mtr) \
- fsp_page_is_free_func(space,page,mtr, __FILE__, __LINE__)
-
-/**********************************************************************//**
-Checks if a single page is free.
-@return true if free */
-UNIV_INTERN
-bool
-fsp_page_is_free_func(
-/*==============*/
- ulint space, /*!< in: space id */
- ulint page, /*!< in: page offset */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- const char *file,
- ulint line);
-
-#ifndef UNIV_NONINL
-#include "fsp0fsp.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic
deleted file mode 100644
index ee4cb1f32c7..00000000000
--- a/storage/xtradb/include/fsp0fsp.ic
+++ /dev/null
@@ -1,202 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/fsp0fsp.ic
-File space management
-
-Created 12/18/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_INNOCHECKSUM
-
-/***********************************************************************//**
-Checks if a page address is an extent descriptor page address.
-@return TRUE if a descriptor page */
-UNIV_INLINE
-ibool
-fsp_descr_page(
-/*===========*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return((page_no & (UNIV_PAGE_SIZE - 1)) == FSP_XDES_OFFSET);
- }
-
- return((page_no & (zip_size - 1)) == FSP_XDES_OFFSET);
-}
-
-/********************************************************************//**
-Determine if the tablespace is compressed from dict_table_t::flags.
-@return TRUE if compressed, FALSE if not compressed */
-UNIV_INLINE
-ibool
-fsp_flags_is_compressed(
-/*====================*/
- ulint flags) /*!< in: tablespace flags */
-{
- return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0);
-}
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/********************************************************************//**
-Extract the zip size from tablespace flags.
-@return compressed page size of the file-per-table tablespace in bytes,
-or zero if the table is not compressed. */
-UNIV_INLINE
-ulint
-fsp_flags_get_zip_size(
-/*===================*/
- ulint flags) /*!< in: tablespace flags */
-{
- ulint zip_size = 0;
- ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags);
-
- /* Convert from a 'log2 minus 9' to a page size in bytes. */
- if (ssize) {
- zip_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
-
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- }
-
- return(zip_size);
-}
-
-/********************************************************************//**
-Extract the page size from tablespace flags.
-@return page size of the tablespace in bytes */
-UNIV_INLINE
-ulint
-fsp_flags_get_page_size(
-/*====================*/
- ulint flags) /*!< in: tablespace flags */
-{
- ulint page_size = 0;
- ulint ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
-
- /* Convert from a 'log2 minus 9' to a page size in bytes. */
- if (UNIV_UNLIKELY(ssize)) {
- page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize);
-
- ut_ad(page_size <= UNIV_PAGE_SIZE_MAX);
- } else {
- /* If the page size was not stored, then it is the
- original 16k. */
- page_size = UNIV_PAGE_SIZE_ORIG;
- }
-
- return(page_size);
-}
-
-#ifndef UNIV_INNOCHECKSUM
-/********************************************************************//**
-Calculates the descriptor index within a descriptor page.
-@return descriptor index */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_index(
-/*=======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset) /*!< in: page offset */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (zip_size == 0) {
- return(ut_2pow_remainder(offset, UNIV_PAGE_SIZE)
- / FSP_EXTENT_SIZE);
- } else {
- return(ut_2pow_remainder(offset, zip_size) / FSP_EXTENT_SIZE);
- }
-}
-
-/**********************************************************************//**
-Gets a descriptor bit of a page.
-@return TRUE if free */
-UNIV_INLINE
-ibool
-xdes_get_bit(
-/*=========*/
- const xdes_t* descr, /*!< in: descriptor */
- ulint bit, /*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
- ulint offset) /*!< in: page offset within extent:
- 0 ... FSP_EXTENT_SIZE - 1 */
-{
- ut_ad(offset < FSP_EXTENT_SIZE);
- ut_ad(bit == XDES_FREE_BIT || bit == XDES_CLEAN_BIT);
-
- ulint index = bit + XDES_BITS_PER_PAGE * offset;
-
- ulint bit_index = index % 8;
- ulint byte_index = index / 8;
-
- return(ut_bit_get_nth(
- mach_read_ulint(descr + XDES_BITMAP + byte_index,
- MLOG_1BYTE),
- bit_index));
-}
-
-/********************************************************************//**
-Calculates the page where the descriptor of a page resides.
-@return descriptor page offset */
-UNIV_INLINE
-ulint
-xdes_calc_descriptor_page(
-/*======================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint offset) /*!< in: page offset */
-{
-#ifndef DOXYGEN /* Doxygen gets confused by these */
-# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET \
- + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX) \
- * XDES_SIZE_MAX
-# error
-# endif
-# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET \
- + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN) \
- * XDES_SIZE_MIN
-# error
-# endif
-#endif /* !DOXYGEN */
-
- ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
- + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
- * XDES_SIZE);
- ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
- + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
- * XDES_SIZE);
-
- ut_ad(ut_is_2pow(zip_size));
-
- if (zip_size == 0) {
- return(ut_2pow_round(offset, UNIV_PAGE_SIZE));
- } else {
- ut_ad(zip_size > XDES_ARR_OFFSET
- + (zip_size / FSP_EXTENT_SIZE) * XDES_SIZE);
- return(ut_2pow_round(offset, zip_size));
- }
-}
-
-#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/xtradb/include/fsp0pagecompress.h b/storage/xtradb/include/fsp0pagecompress.h
deleted file mode 100644
index c623d11c326..00000000000
--- a/storage/xtradb/include/fsp0pagecompress.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fsp0pagecompress.h
-Helper functions for extracting/storing page compression and
-atomic writes information to file space.
-
-Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
-***********************************************************************/
-
-#ifndef fsp0pagecompress_h
-#define fsp0pagecompress_h
-
-/* Supported page compression methods */
-
-#define PAGE_UNCOMPRESSED 0
-#define PAGE_ZLIB_ALGORITHM 1
-#define PAGE_LZ4_ALGORITHM 2
-#define PAGE_LZO_ALGORITHM 3
-#define PAGE_LZMA_ALGORITHM 4
-#define PAGE_BZIP2_ALGORITHM 5
-#define PAGE_SNAPPY_ALGORITHM 6
-#define PAGE_ALGORITHM_LAST PAGE_SNAPPY_ALGORITHM
-
-/**********************************************************************//**
-Reads the page compression level from the first page of a tablespace.
-@return page compression level, or 0 if uncompressed */
-UNIV_INTERN
-ulint
-fsp_header_get_compression_level(
-/*=============================*/
- const page_t* page); /*!< in: first page of a tablespace */
-
-/********************************************************************//**
-Extract the page compression level from tablespace flags.
-A tablespace has only one physical page compression level
-whether that page is compressed or not.
-@return page compression level of the file-per-table tablespace,
-or zero if the table is not compressed. */
-UNIV_INLINE
-ulint
-fsp_flags_get_page_compression_level(
-/*=================================*/
- ulint flags); /*!< in: tablespace flags */
-
-/********************************************************************//**
-Determine the tablespace is using atomic writes from dict_table_t::flags.
-@return true if atomic writes is used, false if not */
-UNIV_INLINE
-atomic_writes_t
-fsp_flags_get_atomic_writes(
-/*========================*/
- ulint flags); /*!< in: tablespace flags */
-
-#ifndef UNIV_NONINL
-#include "fsp0pagecompress.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/fsp0pagecompress.ic b/storage/xtradb/include/fsp0pagecompress.ic
deleted file mode 100644
index 14f968e319e..00000000000
--- a/storage/xtradb/include/fsp0pagecompress.ic
+++ /dev/null
@@ -1,142 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fsp0pagecompress.ic
-Implementation for helper functions for extracting/storing page
-compression and atomic writes information to file space.
-
-Created 11/12/2013 Jan Lindström jan.lindstrom@mariadb.com
-
-***********************************************************************/
-
-/********************************************************************//**
-Determine the tablespace is page compression level from dict_table_t::flags.
-@return page compression level or 0 if not compressed*/
-UNIV_INLINE
-ulint
-fsp_flags_get_page_compression_level(
-/*=================================*/
- ulint flags) /*!< in: tablespace flags */
-{
- return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags));
-}
-
-/********************************************************************//**
-Determine the tablespace is using atomic writes from dict_table_t::flags.
-@return true if atomic writes is used, false if not */
-UNIV_INLINE
-atomic_writes_t
-fsp_flags_get_atomic_writes(
-/*========================*/
- ulint flags) /*!< in: tablespace flags */
-{
- return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags));
-}
-
-/*******************************************************************//**
-Find out wheather the page is index page or not
-@return true if page type index page, false if not */
-UNIV_INLINE
-ibool
-fil_page_is_index_page(
-/*===================*/
- byte* buf) /*!< in: page */
-{
- return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
-}
-
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return true if page is page compressed, false if not */
-UNIV_INLINE
-ibool
-fil_page_is_compressed(
-/*===================*/
- byte* buf) /*!< in: page */
-{
- return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
-}
-
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return true if page is page compressed, false if not */
-UNIV_INLINE
-ibool
-fil_page_is_compressed_encrypted(
-/*=============================*/
- byte* buf) /*!< in: page */
-{
- return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED);
-}
-
-/****************************************************************//**
-Get the name of the compression algorithm used for page
-compression.
-@return compression algorithm name or "UNKNOWN" if not known*/
-UNIV_INLINE
-const char*
-fil_get_compression_alg_name(
-/*=========================*/
- ulint comp_alg) /*!<in: compression algorithm number */
-{
- switch(comp_alg) {
- case PAGE_UNCOMPRESSED:
- return ("uncompressed");
- break;
- case PAGE_ZLIB_ALGORITHM:
- return ("ZLIB");
- break;
- case PAGE_LZ4_ALGORITHM:
- return ("LZ4");
- break;
- case PAGE_LZO_ALGORITHM:
- return ("LZO");
- break;
- case PAGE_LZMA_ALGORITHM:
- return ("LZMA");
- break;
- case PAGE_BZIP2_ALGORITHM:
- return ("BZIP2");
- break;
- case PAGE_SNAPPY_ALGORITHM:
- return ("SNAPPY");
- break;
- /* No default to get compiler warning */
- }
-
- return ("NULL");
-}
-
-#ifndef UNIV_INNOCHECKSUM
-/*******************************************************************//**
-Find out wheather the page is page compressed with lzo method
-@return true if page is page compressed with lzo method, false if not */
-UNIV_INLINE
-ibool
-fil_page_is_lzo_compressed(
-/*=======================*/
- byte* buf) /*!< in: page */
-{
- return((mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED &&
- mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION) == PAGE_LZO_ALGORITHM) ||
- (mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED &&
- mach_read_from_2(buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE) == PAGE_LZO_ALGORITHM));
-}
-
-#endif /* UNIV_INNOCHECKSUM */
diff --git a/storage/xtradb/include/fsp0types.h b/storage/xtradb/include/fsp0types.h
deleted file mode 100644
index 7152d65054f..00000000000
--- a/storage/xtradb/include/fsp0types.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************
-@file include/fsp0types.h
-File space management types
-
-Created May 26, 2009 Vasil Dimov
-*******************************************************/
-
-#ifndef fsp0types_h
-#define fsp0types_h
-
-#include "univ.i"
-
-#include "fil0fil.h" /* for FIL_PAGE_DATA */
-
-/** @name Flags for inserting records in order
-If records are inserted in order, there are the following
-flags to tell this (their type is made byte for the compiler
-to warn if direction and hint parameters are switched in
-fseg_alloc_free_page) */
-/* @{ */
-#define FSP_UP ((byte)111) /*!< alphabetically upwards */
-#define FSP_DOWN ((byte)112) /*!< alphabetically downwards */
-#define FSP_NO_DIR ((byte)113) /*!< no order */
-/* @} */
-
-/** File space extent size in pages
-page size | file space extent size
-----------+-----------------------
- 4 KiB | 256 pages = 1 MiB
- 8 KiB | 128 pages = 1 MiB
- 16 KiB | 64 pages = 1 MiB
- 32 KiB | 64 pages = 2 MiB
- 64 KiB | 64 pages = 4 MiB
-*/
-/** File space extent size (one megabyte if default two or four if not) in pages */
-#define FSP_EXTENT_SIZE ((UNIV_PAGE_SIZE <= (16384) ? \
- (1048576U / UNIV_PAGE_SIZE) : \
- ((UNIV_PAGE_SIZE <= (32768)) ? \
- (2097152U / UNIV_PAGE_SIZE) : \
- (4194304U / UNIV_PAGE_SIZE))))
-
-/** File space extent size (four megabytes) in pages for MAX page size */
-#define FSP_EXTENT_SIZE_MAX (4194304U / UNIV_PAGE_SIZE_MAX)
-
-/** File space extent size (one megabyte) in pages for MIN page size */
-#define FSP_EXTENT_SIZE_MIN (1048576U / UNIV_PAGE_SIZE_MIN)
-
-/** On a page of any file segment, data may be put starting from this
-offset */
-#define FSEG_PAGE_DATA FIL_PAGE_DATA
-
-/** @name File segment header
-The file segment header points to the inode describing the file segment. */
-/* @{ */
-/** Data type for file segment header */
-typedef byte fseg_header_t;
-
-#define FSEG_HDR_SPACE 0 /*!< space id of the inode */
-#define FSEG_HDR_PAGE_NO 4 /*!< page number of the inode */
-#define FSEG_HDR_OFFSET 8 /*!< byte offset of the inode */
-
-#define FSEG_HEADER_SIZE 10 /*!< Length of the file system
- header, in bytes */
-/* @} */
-
-/** Flags for fsp_reserve_free_extents @{ */
-#define FSP_NORMAL 1000000
-#define FSP_UNDO 2000000
-#define FSP_CLEANING 3000000
-/* @} */
-
-/* Number of pages described in a single descriptor page: currently each page
-description takes less than 1 byte; a descriptor page is repeated every
-this many file pages */
-/* #define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE */
-/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
-
-/** @name The space low address page map
-The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
-every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
-/* @{ */
-/*--------------------------------------*/
-#define FSP_XDES_OFFSET 0 /* !< extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET 1 /* !< insert buffer bitmap */
- /* The ibuf bitmap pages are the ones whose
- page number is the number above plus a
- multiple of XDES_DESCRIBED_PER_PAGE */
-
-#define FSP_FIRST_INODE_PAGE_NO 2 /*!< in every tablespace */
- /* The following pages exist
- in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO 3 /*!< insert buffer
- header page, in
- tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /*!< insert buffer
- B-tree root page in
- tablespace 0 */
- /* The ibuf tree root page number in
- tablespace 0; its fseg inode is on the page
- number FSP_FIRST_INODE_PAGE_NO */
-#define FSP_TRX_SYS_PAGE_NO 5 /*!< transaction
- system header, in
- tablespace 0 */
-#define FSP_FIRST_RSEG_PAGE_NO 6 /*!< first rollback segment
- page, in tablespace 0 */
-#define FSP_DICT_HDR_PAGE_NO 7 /*!< data dictionary header
- page, in tablespace 0 */
-/*--------------------------------------*/
-/* @} */
-
-#endif /* fsp0types_h */
diff --git a/storage/xtradb/include/fts0ast.h b/storage/xtradb/include/fts0ast.h
deleted file mode 100644
index 6229869e8d0..00000000000
--- a/storage/xtradb/include/fts0ast.h
+++ /dev/null
@@ -1,342 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0ast.h
-The FTS query parser (AST) abstract syntax tree routines
-
-Created 2007/03/16/03 Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_FST0AST_H
-#define INNOBASE_FST0AST_H
-
-#include "mem0mem.h"
-#include "ha_prototypes.h"
-
-#define exit(x) abort()
-
-/* The type of AST Node */
-enum fts_ast_type_t {
- FTS_AST_OPER, /*!< Operator */
- FTS_AST_NUMB, /*!< Number */
- FTS_AST_TERM, /*!< Term (or word) */
- FTS_AST_TEXT, /*!< Text string */
- FTS_AST_LIST, /*!< Expression list */
- FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */
-};
-
-/* The FTS query operators that we support */
-enum fts_ast_oper_t {
- FTS_NONE, /*!< No operator */
-
- FTS_IGNORE, /*!< Ignore rows that contain
- this word */
-
- FTS_EXIST, /*!< Include rows that contain
- this word */
-
- FTS_NEGATE, /*!< Include rows that contain
- this word but rank them
- lower*/
-
- FTS_INCR_RATING, /*!< Increase the rank for this
- word*/
-
- FTS_DECR_RATING, /*!< Decrease the rank for this
- word*/
-
- FTS_DISTANCE, /*!< Proximity distance */
- FTS_IGNORE_SKIP, /*!< Transient node operator
- signifies that this is a
- FTS_IGNORE node, and ignored in
- the first pass of
- fts_ast_visit() */
- FTS_EXIST_SKIP /*!< Transient node operator
- signifies that this ia a
- FTS_EXIST node, and ignored in
- the first pass of
- fts_ast_visit() */
-};
-
-/* Data types used by the FTS parser */
-struct fts_lexer_t;
-struct fts_ast_node_t;
-struct fts_ast_state_t;
-struct fts_ast_string_t;
-
-typedef dberr_t (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*);
-
-/********************************************************************
-Parse the string using the lexer setup within state.*/
-int
-fts_parse(
-/*======*/
- /* out: 0 on OK, 1 on error */
- fts_ast_state_t* state); /*!< in: ast state instance.*/
-
-/********************************************************************
-Create an AST operator node */
-extern
-fts_ast_node_t*
-fts_ast_create_node_oper(
-/*=====================*/
- void* arg, /*!< in: ast state */
- fts_ast_oper_t oper); /*!< in: ast operator */
-/********************************************************************
-Create an AST term node, makes a copy of ptr */
-extern
-fts_ast_node_t*
-fts_ast_create_node_term(
-/*=====================*/
- void* arg, /*!< in: ast state */
- const fts_ast_string_t* ptr); /*!< in: term string */
-/********************************************************************
-Create an AST text node */
-extern
-fts_ast_node_t*
-fts_ast_create_node_text(
-/*=====================*/
- void* arg, /*!< in: ast state */
- const fts_ast_string_t* ptr); /*!< in: text string */
-/********************************************************************
-Create an AST expr list node */
-extern
-fts_ast_node_t*
-fts_ast_create_node_list(
-/*=====================*/
- void* arg, /*!< in: ast state */
- fts_ast_node_t* expr); /*!< in: ast expr */
-/********************************************************************
-Create a sub-expression list node. This function takes ownership of
-expr and is responsible for deleting it. */
-extern
-fts_ast_node_t*
-fts_ast_create_node_subexp_list(
-/*============================*/
- /* out: new node */
- void* arg, /*!< in: ast state instance */
- fts_ast_node_t* expr); /*!< in: ast expr instance */
-/********************************************************************
-Set the wildcard attribute of a term.*/
-extern
-void
-fts_ast_term_set_wildcard(
-/*======================*/
- fts_ast_node_t* node); /*!< in: term to change */
-/********************************************************************
-Set the proximity attribute of a text node. */
-
-void
-fts_ast_term_set_distance(
-/*======================*/
- fts_ast_node_t* node, /*!< in/out: text node */
- ulint distance); /*!< in: the text proximity
- distance */
-/********************************************************************//**
-Free a fts_ast_node_t instance.
-@return next node to free */
-UNIV_INTERN
-fts_ast_node_t*
-fts_ast_free_node(
-/*==============*/
- fts_ast_node_t* node); /*!< in: node to free */
-/********************************************************************
-Add a sub-expression to an AST*/
-extern
-fts_ast_node_t*
-fts_ast_add_node(
-/*=============*/
- fts_ast_node_t* list, /*!< in: list node instance */
- fts_ast_node_t* node); /*!< in: (sub) expr to add */
-/********************************************************************
-Print the AST node recursively.*/
-extern
-void
-fts_ast_node_print(
-/*===============*/
- fts_ast_node_t* node); /*!< in: ast node to print */
-/********************************************************************
-For tracking node allocations, in case there is an during parsing.*/
-extern
-void
-fts_ast_state_add_node(
-/*===================*/
- fts_ast_state_t*state, /*!< in: ast state instance */
- fts_ast_node_t* node); /*!< in: node to add to state */
-/********************************************************************
-Free node and expr allocations.*/
-extern
-void
-fts_ast_state_free(
-/*===============*/
- fts_ast_state_t*state); /*!< in: state instance
- to free */
-/******************************************************************//**
-Traverse the AST - in-order traversal.
-@return DB_SUCCESS if all went well */
-UNIV_INTERN
-dberr_t
-fts_ast_visit(
-/*==========*/
- fts_ast_oper_t oper, /*!< in: FTS operator */
- fts_ast_node_t* node, /*!< in: instance to traverse*/
- fts_ast_callback visitor, /*!< in: callback */
- void* arg, /*!< in: callback arg */
- bool* has_ignore) /*!< out: whether we encounter
- and ignored processing an
- operator, currently we only
- ignore FTS_IGNORE operator */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*****************************************************************//**
-Process (nested) sub-expression, create a new result set to store the
-sub-expression result by processing nodes under current sub-expression
-list. Merge the sub-expression result with that of parent expression list.
-@return DB_SUCCESS if all went well */
-UNIV_INTERN
-dberr_t
-fts_ast_visit_sub_exp(
-/*==================*/
- fts_ast_node_t* node, /*!< in: instance to traverse*/
- fts_ast_callback visitor, /*!< in: callback */
- void* arg) /*!< in: callback arg */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************
-Create a lex instance.*/
-UNIV_INTERN
-fts_lexer_t*
-fts_lexer_create(
-/*=============*/
- ibool boolean_mode, /*!< in: query type */
- const byte* query, /*!< in: query string */
- ulint query_len) /*!< in: query string len */
- MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
-/********************************************************************
-Free an fts_lexer_t instance.*/
-UNIV_INTERN
-void
-fts_lexer_free(
-/*===========*/
- fts_lexer_t* fts_lexer) /*!< in: lexer instance to
- free */
- MY_ATTRIBUTE((nonnull));
-
-/**
-Create an ast string object, with NUL-terminator, so the string
-has one more byte than len
-@param[in] str pointer to string
-@param[in] len length of the string
-@return ast string with NUL-terminator */
-UNIV_INTERN
-fts_ast_string_t*
-fts_ast_string_create(
- const byte* str,
- ulint len);
-
-/**
-Free an ast string instance
-@param[in,out] ast_str string to free */
-UNIV_INTERN
-void
-fts_ast_string_free(
- fts_ast_string_t* ast_str);
-
-/**
-Translate ast string of type FTS_AST_NUMB to unsigned long by strtoul
-@param[in] str string to translate
-@param[in] base the base
-@return translated number */
-UNIV_INTERN
-ulint
-fts_ast_string_to_ul(
- const fts_ast_string_t* ast_str,
- int base);
-
-/**
-Print the ast string
-@param[in] str string to print */
-UNIV_INTERN
-void
-fts_ast_string_print(
- const fts_ast_string_t* ast_str);
-
-/* String of length len.
-We always store the string of length len with a terminating '\0',
-regardless of there is any 0x00 in the string itself */
-struct fts_ast_string_t {
- /*!< Pointer to string. */
- byte* str;
-
- /*!< Length of the string. */
- ulint len;
-};
-
-/* Query term type */
-struct fts_ast_term_t {
- fts_ast_string_t* ptr; /*!< Pointer to term string.*/
- ibool wildcard; /*!< TRUE if wild card set.*/
-};
-
-/* Query text type */
-struct fts_ast_text_t {
- fts_ast_string_t* ptr; /*!< Pointer to text string.*/
- ulint distance; /*!< > 0 if proximity distance
- set */
-};
-
-/* The list of nodes in an expr list */
-struct fts_ast_list_t {
- fts_ast_node_t* head; /*!< Children list head */
- fts_ast_node_t* tail; /*!< Children list tail */
-};
-
-/* FTS AST node to store the term, text, operator and sub-expressions.*/
-struct fts_ast_node_t {
- fts_ast_type_t type; /*!< The type of node */
- fts_ast_text_t text; /*!< Text node */
- fts_ast_term_t term; /*!< Term node */
- fts_ast_oper_t oper; /*!< Operator value */
- fts_ast_list_t list; /*!< Expression list */
- fts_ast_node_t* next; /*!< Link for expr list */
- fts_ast_node_t* next_alloc; /*!< For tracking allocations */
- bool visited; /*!< whether this node is
- already processed */
-};
-
-/* To track state during parsing */
-struct fts_ast_state_t {
- mem_heap_t* heap; /*!< Heap to use for alloc */
- fts_ast_node_t* root; /*!< If all goes OK, then this
- will point to the root.*/
-
- fts_ast_list_t list; /*!< List of nodes allocated */
-
- fts_lexer_t* lexer; /*!< Lexer callback + arg */
- CHARSET_INFO* charset; /*!< charset used for
- tokenization */
-};
-
-#ifdef UNIV_DEBUG
-const char*
-fts_ast_oper_name_get(fts_ast_oper_t oper);
-const char*
-fts_ast_node_type_get(fts_ast_type_t type);
-#endif /* UNIV_DEBUG */
-
-#endif /* INNOBASE_FSTS0AST_H */
diff --git a/storage/xtradb/include/fts0blex.h b/storage/xtradb/include/fts0blex.h
deleted file mode 100644
index d0e4cae0678..00000000000
--- a/storage/xtradb/include/fts0blex.h
+++ /dev/null
@@ -1,349 +0,0 @@
-#ifndef fts0bHEADER_H
-#define fts0bHEADER_H 1
-#define fts0bIN_HEADER 1
-
-#line 6 "../include/fts0blex.h"
-
-#line 8 "../include/fts0blex.h"
-
-#define YY_INT_ALIGNED short int
-
-/* A lexical scanner generated by flex */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
-#if YY_FLEX_SUBMINOR_VERSION > 0
-#define FLEX_BETA
-#endif
-
-/* First, we deal with platform-specific or compiler-specific issues. */
-
-/* begin standard C headers. */
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/* end standard C headers. */
-
-/* flex integer type definitions */
-
-#ifndef FLEXINT_H
-#define FLEXINT_H
-
-/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-
-/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
- */
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS 1
-#endif
-
-#include <inttypes.h>
-typedef int8_t flex_int8_t;
-typedef uint8_t flex_uint8_t;
-typedef int16_t flex_int16_t;
-typedef uint16_t flex_uint16_t;
-typedef int32_t flex_int32_t;
-typedef uint32_t flex_uint32_t;
-#else
-typedef signed char flex_int8_t;
-typedef short int flex_int16_t;
-typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
-typedef unsigned short int flex_uint16_t;
-typedef unsigned int flex_uint32_t;
-
-/* Limits of integral types. */
-#ifndef INT8_MIN
-#define INT8_MIN (-128)
-#endif
-#ifndef INT16_MIN
-#define INT16_MIN (-32767-1)
-#endif
-#ifndef INT32_MIN
-#define INT32_MIN (-2147483647-1)
-#endif
-#ifndef INT8_MAX
-#define INT8_MAX (127)
-#endif
-#ifndef INT16_MAX
-#define INT16_MAX (32767)
-#endif
-#ifndef INT32_MAX
-#define INT32_MAX (2147483647)
-#endif
-#ifndef UINT8_MAX
-#define UINT8_MAX (255U)
-#endif
-#ifndef UINT16_MAX
-#define UINT16_MAX (65535U)
-#endif
-#ifndef UINT32_MAX
-#define UINT32_MAX (4294967295U)
-#endif
-
-#endif /* ! C99 */
-
-#endif /* ! FLEXINT_H */
-
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else /* ! __cplusplus */
-
-/* C99 requires __STDC__ to be defined as 1. */
-#if defined (__STDC__)
-
-#define YY_USE_CONST
-
-#endif /* defined (__STDC__) */
-#endif /* ! __cplusplus */
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-/* An opaque pointer. */
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
-typedef void* yyscan_t;
-#endif
-
-/* For convenience, these vars (plus the bison vars far below)
- are macros in the reentrant scanner. */
-#define yyin yyg->yyin_r
-#define yyout yyg->yyout_r
-#define yyextra yyg->yyextra_r
-#define yyleng yyg->yyleng_r
-#define yytext yyg->yytext_r
-#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
-#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
-#define yy_flex_debug yyg->yy_flex_debug_r
-
-/* Size of default input buffer. */
-#ifndef YY_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k.
- * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
- * Ditto for the __ia64__ case accordingly.
- */
-#define YY_BUF_SIZE 32768
-#else
-#define YY_BUF_SIZE 16384
-#endif /* __ia64__ */
-#endif
-
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
-#ifndef YY_STRUCT_YY_BUFFER_STATE
-#define YY_STRUCT_YY_BUFFER_STATE
-struct yy_buffer_state
- {
- FILE *yy_input_file;
-
- char *yy_ch_buf; /* input buffer */
- char *yy_buf_pos; /* current position in input buffer */
-
- /* Size of input buffer in bytes, not including room for EOB
- * characters.
- */
- yy_size_t yy_buf_size;
-
- /* Number of characters read into yy_ch_buf, not including EOB
- * characters.
- */
- int yy_n_chars;
-
- /* Whether we "own" the buffer - i.e., we know we created it,
- * and can realloc() it to grow it, and should free() it to
- * delete it.
- */
- int yy_is_our_buffer;
-
- /* Whether this is an "interactive" input source; if so, and
- * if we're using stdio for input, then we want to use getc()
- * instead of fread(), to make sure we stop fetching input after
- * each newline.
- */
- int yy_is_interactive;
-
- /* Whether we're considered to be at the beginning of a line.
- * If so, '^' rules will be active on the next match, otherwise
- * not.
- */
- int yy_at_bol;
-
- int yy_bs_lineno; /**< The line count. */
- int yy_bs_column; /**< The column count. */
-
- /* Whether to try to fill the input buffer when we reach the
- * end of it.
- */
- int yy_fill_buffer;
-
- int yy_buffer_status;
-
- };
-#endif /* !YY_STRUCT_YY_BUFFER_STATE */
-
-void fts0brestart (FILE *input_file ,yyscan_t yyscanner );
-void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0b_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
-void fts0b_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
-void fts0b_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
-void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
-void fts0bpop_buffer_state (yyscan_t yyscanner );
-
-YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
-
-void *fts0balloc (yy_size_t ,yyscan_t yyscanner );
-void *fts0brealloc (void *,yy_size_t ,yyscan_t yyscanner );
-void fts0bfree (void * ,yyscan_t yyscanner );
-
-/* Begin user sect3 */
-
-#define fts0bwrap(n) 1
-#define YY_SKIP_YYWRAP
-
-#define yytext_ptr yytext_r
-
-#ifdef YY_HEADER_EXPORT_START_CONDITIONS
-#define INITIAL 0
-
-#endif
-
-#ifndef YY_NO_UNISTD_H
-/* Special case for "unistd.h", since it is non-ANSI. We include it way
- * down here because we want the user's section 1 to have been scanned first.
- * The user has a chance to override it with an option.
- */
-#include <unistd.h>
-#endif
-
-#ifndef YY_EXTRA_TYPE
-#define YY_EXTRA_TYPE void *
-#endif
-
-int fts0blex_init (yyscan_t* scanner);
-
-int fts0blex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
-
-/* Accessor methods to globals.
- These are made visible to non-reentrant scanners for convenience. */
-
-int fts0blex_destroy (yyscan_t yyscanner );
-
-int fts0bget_debug (yyscan_t yyscanner );
-
-void fts0bset_debug (int debug_flag ,yyscan_t yyscanner );
-
-YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner );
-
-void fts0bset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
-
-FILE *fts0bget_in (yyscan_t yyscanner );
-
-void fts0bset_in (FILE * in_str ,yyscan_t yyscanner );
-
-FILE *fts0bget_out (yyscan_t yyscanner );
-
-void fts0bset_out (FILE * out_str ,yyscan_t yyscanner );
-
-int fts0bget_leng (yyscan_t yyscanner );
-
-char *fts0bget_text (yyscan_t yyscanner );
-
-int fts0bget_lineno (yyscan_t yyscanner );
-
-void fts0bset_lineno (int line_number ,yyscan_t yyscanner );
-
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
-
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int fts0bwrap (yyscan_t yyscanner );
-#else
-extern int fts0bwrap (yyscan_t yyscanner );
-#endif
-#endif
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
-#endif
-
-#ifndef YY_NO_INPUT
-
-#endif
-
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k */
-#define YY_READ_BUF_SIZE 16384
-#else
-#define YY_READ_BUF_SIZE 8192
-#endif /* __ia64__ */
-#endif
-
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
-
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL_IS_OURS 1
-
-extern int fts0blex (yyscan_t yyscanner);
-
-#define YY_DECL int fts0blex (yyscan_t yyscanner)
-#endif /* !YY_DECL */
-
-/* yy_get_previous_state - get the state just before the EOB char was reached */
-
-#undef YY_NEW_FILE
-#undef YY_FLUSH_BUFFER
-#undef yy_set_bol
-#undef yy_new_buffer
-#undef yy_set_interactive
-#undef YY_DO_BEFORE_ACTION
-
-#ifdef YY_DECL_IS_OURS
-#undef YY_DECL_IS_OURS
-#undef YY_DECL
-#endif
-
-#line 73 "fts0blex.l"
-
-
-#line 348 "../include/fts0blex.h"
-#undef fts0bIN_HEADER
-#endif /* fts0bHEADER_H */
diff --git a/storage/xtradb/include/fts0fts.h b/storage/xtradb/include/fts0fts.h
deleted file mode 100644
index 7aa7055640c..00000000000
--- a/storage/xtradb/include/fts0fts.h
+++ /dev/null
@@ -1,1064 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation. All Rights reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0fts.h
-Full text search header file
-
-Created 2011/09/02 Sunny Bains
-***********************************************************************/
-
-#ifndef fts0fts_h
-#define fts0fts_h
-
-#include "univ.i"
-
-#include "data0type.h"
-#include "data0types.h"
-#include "dict0types.h"
-#include "hash0hash.h"
-#include "mem0mem.h"
-#include "rem0types.h"
-#include "row0types.h"
-#include "trx0types.h"
-#include "ut0vec.h"
-#include "ut0rbt.h"
-#include "ut0wqueue.h"
-#include "que0types.h"
-#include "ft_global.h"
-
-/** "NULL" value of a document id. */
-#define FTS_NULL_DOC_ID 0
-
-/** FTS hidden column that is used to map to and from the row */
-#define FTS_DOC_ID_COL_NAME "FTS_DOC_ID"
-
-/** The name of the index created by FTS */
-#define FTS_DOC_ID_INDEX_NAME "FTS_DOC_ID_INDEX"
-
-#define FTS_DOC_ID_INDEX_NAME_LEN 16
-
-/** Doc ID is a 8 byte value */
-#define FTS_DOC_ID_LEN 8
-
-/** The number of fields to sort when we build FT index with
-FIC. Three fields are sort: (word, doc_id, position) */
-#define FTS_NUM_FIELDS_SORT 3
-
-/** Maximum number of rows in a table, smaller than which, we will
-optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */
-#define MAX_DOC_ID_OPT_VAL 1073741824
-
-/** Document id type. */
-typedef ib_uint64_t doc_id_t;
-
-/** doc_id_t printf format */
-#define FTS_DOC_ID_FORMAT IB_ID_FMT
-
-/** Convert document id to the InnoDB (BIG ENDIAN) storage format. */
-#define fts_write_doc_id(d, s) mach_write_to_8(d, s)
-
-/** Read a document id to internal format. */
-#define fts_read_doc_id(s) mach_read_from_8(s)
-
-/** Bind the doc id to a variable */
-#define fts_bind_doc_id(i, n, v) pars_info_bind_int8_literal(i, n, v)
-
-/** Defines for FTS query mode, they have the same values as
-those defined in mysql file ft_global.h */
-#define FTS_NL 0
-#define FTS_BOOL 1
-#define FTS_SORTED 2
-#define FTS_EXPAND 4
-#define FTS_PROXIMITY 8
-#define FTS_PHRASE 16
-#define FTS_OPT_RANKING 32
-
-#define FTS_INDEX_TABLE_IND_NAME "FTS_INDEX_TABLE_IND"
-
-/** Threshold where our optimize thread automatically kicks in */
-#define FTS_OPTIMIZE_THRESHOLD 10000000
-
-/** Threshold to avoid exhausting of doc ids. Consecutive doc id difference
-should not exceed FTS_DOC_ID_MAX_STEP */
-#define FTS_DOC_ID_MAX_STEP 65535
-
-/** Variable specifying the FTS parallel sort degree */
-extern ulong fts_sort_pll_degree;
-
-/** Variable specifying the number of word to optimize for each optimize table
-call */
-extern ulong fts_num_word_optimize;
-
-/** Variable specifying whether we do additional FTS diagnostic printout
-in the log */
-extern char fts_enable_diag_print;
-
-/** FTS rank type, which will be between 0 .. 1 inclusive */
-typedef float fts_rank_t;
-
-/** Type of a row during a transaction. FTS_NOTHING means the row can be
-forgotten from the FTS system's POV, FTS_INVALID is an internal value used
-to mark invalid states.
-
-NOTE: Do not change the order or value of these, fts_trx_row_get_new_state
-depends on them being exactly as they are. */
-enum fts_row_state {
- FTS_INSERT = 0,
- FTS_MODIFY,
- FTS_DELETE,
- FTS_NOTHING,
- FTS_INVALID
-};
-
-/** The FTS table types. */
-enum fts_table_type_t {
- FTS_INDEX_TABLE, /*!< FTS auxiliary table that is
- specific to a particular FTS index
- on a table */
-
- FTS_COMMON_TABLE /*!< FTS auxiliary table that is common
- for all FTS index on a table */
-};
-
-struct fts_doc_t;
-struct fts_cache_t;
-struct fts_token_t;
-struct fts_doc_ids_t;
-struct fts_index_cache_t;
-
-
-/** Initialize the "fts_table" for internal query into FTS auxiliary
-tables */
-#define FTS_INIT_FTS_TABLE(fts_table, m_suffix, m_type, m_table)\
-do { \
- (fts_table)->suffix = m_suffix; \
- (fts_table)->type = m_type; \
- (fts_table)->table_id = m_table->id; \
- (fts_table)->parent = m_table->name; \
- (fts_table)->table = m_table; \
-} while (0);
-
-#define FTS_INIT_INDEX_TABLE(fts_table, m_suffix, m_type, m_index)\
-do { \
- (fts_table)->suffix = m_suffix; \
- (fts_table)->type = m_type; \
- (fts_table)->table_id = m_index->table->id; \
- (fts_table)->parent = m_index->table->name; \
- (fts_table)->table = m_index->table; \
- (fts_table)->index_id = m_index->id; \
-} while (0);
-
-/** Information about changes in a single transaction affecting
-the FTS system. */
-struct fts_trx_t {
- trx_t* trx; /*!< InnoDB transaction */
-
- ib_vector_t* savepoints; /*!< Active savepoints, must have at
- least one element, the implied
- savepoint */
- ib_vector_t* last_stmt; /*!< last_stmt */
-
- mem_heap_t* heap; /*!< heap */
-};
-
-/** Information required for transaction savepoint handling. */
-struct fts_savepoint_t {
- char* name; /*!< First entry is always NULL, the
- default instance. Otherwise the name
- of the savepoint */
-
- ib_rbt_t* tables; /*!< Modified FTS tables */
-};
-
-/** Information about changed rows in a transaction for a single table. */
-struct fts_trx_table_t {
- dict_table_t* table; /*!< table */
-
- fts_trx_t* fts_trx; /*!< link to parent */
-
- ib_rbt_t* rows; /*!< rows changed; indexed by doc-id,
- cells are fts_trx_row_t* */
-
- fts_doc_ids_t* added_doc_ids; /*!< list of added doc ids (NULL until
- the first addition) */
-
- /*!< for adding doc ids */
- que_t* docs_added_graph;
-};
-
-/** Information about one changed row in a transaction. */
-struct fts_trx_row_t {
- doc_id_t doc_id; /*!< Id of the ins/upd/del document */
-
- fts_row_state state; /*!< state of the row */
-
- ib_vector_t* fts_indexes; /*!< The indexes that are affected */
-};
-
-/** List of document ids that were added during a transaction. This
-list is passed on to a background 'Add' thread and OPTIMIZE, so it
-needs its own memory heap. */
-struct fts_doc_ids_t {
- ib_vector_t* doc_ids; /*!< document ids (each element is
- of type doc_id_t). */
-
- ib_alloc_t* self_heap; /*!< Allocator used to create an
- instance of this type and the
- doc_ids vector */
-};
-
-// FIXME: Get rid of this if possible.
-/** Since MySQL's character set support for Unicode is woefully inadequate
-(it supports basic operations like isalpha etc. only for 8-bit characters),
-we have to implement our own. We use UTF-16 without surrogate processing
-as our in-memory format. This typedef is a single such character. */
-typedef unsigned short ib_uc_t;
-
-/** An UTF-16 ro UTF-8 string. */
-struct fts_string_t {
- byte* f_str; /*!< string, not necessary terminated in
- any way */
- ulint f_len; /*!< Length of the string in bytes */
- ulint f_n_char; /*!< Number of characters */
-};
-
-/** Query ranked doc ids. */
-struct fts_ranking_t {
- doc_id_t doc_id; /*!< Document id */
-
- fts_rank_t rank; /*!< Rank is between 0 .. 1 */
-
- byte* words; /*!< this contains the words
- that were queried
- and found in this document */
- ulint words_len; /*!< words len */
-};
-
-/** Query result. */
-struct fts_result_t {
- ib_rbt_node_t* current; /*!< Current element */
-
- ib_rbt_t* rankings_by_id; /*!< RB tree of type fts_ranking_t
- indexed by doc id */
- ib_rbt_t* rankings_by_rank;/*!< RB tree of type fts_ranking_t
- indexed by rank */
-};
-
-/** This is used to generate the FTS auxiliary table name, we need the
-table id and the index id to generate the column specific FTS auxiliary
-table name. */
-struct fts_table_t {
- const char* parent; /*!< Parent table name, this is
- required only for the database
- name */
-
- fts_table_type_t
- type; /*!< The auxiliary table type */
-
- table_id_t table_id; /*!< The table id */
-
- index_id_t index_id; /*!< The index id */
-
- const char* suffix; /*!< The suffix of the fts auxiliary
- table name, can be NULL, not used
- everywhere (yet) */
- const dict_table_t*
- table; /*!< Parent table */
- CHARSET_INFO* charset; /*!< charset info if it is for FTS
- index auxiliary table */
-};
-
-enum fts_status {
- BG_THREAD_STOP = 1, /*!< TRUE if the FTS background thread
- has finished reading the ADDED table,
- meaning more items can be added to
- the table. */
-
- BG_THREAD_READY = 2, /*!< TRUE if the FTS background thread
- is ready */
-
- ADD_THREAD_STARTED = 4, /*!< TRUE if the FTS add thread
- has started */
-
- ADDED_TABLE_SYNCED = 8, /*!< TRUE if the ADDED table record is
- sync-ed after crash recovery */
-
- TABLE_DICT_LOCKED = 16 /*!< Set if the table has
- dict_sys->mutex */
-};
-
-typedef enum fts_status fts_status_t;
-
-/** The state of the FTS sub system. */
-struct fts_t {
- /*!< mutex protecting bg_threads* and
- fts_add_wq. */
- ib_mutex_t bg_threads_mutex;
-
- ulint bg_threads; /*!< number of background threads
- accessing this table */
-
- /*!< TRUE if background threads running
- should stop themselves */
- ulint fts_status; /*!< Status bit regarding fts
- running state */
-
- ib_wqueue_t* add_wq; /*!< Work queue for scheduling jobs
- for the FTS 'Add' thread, or NULL
- if the thread has not yet been
- created. Each work item is a
- fts_trx_doc_ids_t*. */
-
- fts_cache_t* cache; /*!< FTS memory buffer for this table,
- or NULL if the table has no FTS
- index. */
-
- ulint doc_col; /*!< FTS doc id hidden column number
- in the CLUSTERED index. */
-
- ib_vector_t* indexes; /*!< Vector of FTS indexes, this is
- mainly for caching purposes. */
- mem_heap_t* fts_heap; /*!< heap for fts_t allocation */
-};
-
-struct fts_stopword_t;
-
-/** status bits for fts_stopword_t status field. */
-#define STOPWORD_NOT_INIT 0x1
-#define STOPWORD_OFF 0x2
-#define STOPWORD_FROM_DEFAULT 0x4
-#define STOPWORD_USER_TABLE 0x8
-
-extern const char* fts_default_stopword[];
-
-/** Variable specifying the maximum FTS cache size for each table */
-extern ulong fts_max_cache_size;
-
-/** Variable specifying the total memory allocated for FTS cache */
-extern ulong fts_max_total_cache_size;
-
-/** Variable specifying the FTS result cache limit for each query */
-extern ulong fts_result_cache_limit;
-
-/** Variable specifying the maximum FTS max token size */
-extern ulong fts_max_token_size;
-
-/** Variable specifying the minimum FTS max token size */
-extern ulong fts_min_token_size;
-
-/** Whether the total memory used for FTS cache is exhausted, and we will
-need a sync to free some memory */
-extern bool fts_need_sync;
-
-/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
-#define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4)
-
-/** Maximum possible Fulltext word length (in characters) */
-#define FTS_MAX_WORD_LEN_IN_CHAR HA_FT_MAXCHARLEN
-
-/** Variable specifying the table that has Fulltext index to display its
-content through information schema table */
-extern char* fts_internal_tbl_name;
-extern char* fts_internal_tbl_name2;
-
-#define fts_que_graph_free(graph) \
-do { \
- mutex_enter(&dict_sys->mutex); \
- que_graph_free(graph); \
- mutex_exit(&dict_sys->mutex); \
-} while (0)
-
-/******************************************************************//**
-Create a FTS cache. */
-UNIV_INTERN
-fts_cache_t*
-fts_cache_create(
-/*=============*/
- dict_table_t* table); /*!< table owns the FTS cache */
-
-/******************************************************************//**
-Create a FTS index cache.
-@return Index Cache */
-UNIV_INTERN
-fts_index_cache_t*
-fts_cache_index_cache_create(
-/*=========================*/
- dict_table_t* table, /*!< in: table with FTS index */
- dict_index_t* index); /*!< in: FTS index */
-
-/******************************************************************//**
-Get the next available document id. This function creates a new
-transaction to generate the document id.
-@return DB_SUCCESS if OK */
-UNIV_INTERN
-dberr_t
-fts_get_next_doc_id(
-/*================*/
- const dict_table_t* table, /*!< in: table */
- doc_id_t* doc_id) /*!< out: new document id */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Update the next and last Doc ID in the CONFIG table to be the input
-"doc_id" value (+ 1). We would do so after each FTS index build or
-table truncate */
-UNIV_INTERN
-void
-fts_update_next_doc_id(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction */
- const dict_table_t* table, /*!< in: table */
- const char* table_name, /*!< in: table name, or NULL */
- doc_id_t doc_id) /*!< in: DOC ID to set */
- MY_ATTRIBUTE((nonnull(2)));
-
-/******************************************************************//**
-Create a new document id .
-@return DB_SUCCESS if all went well else error */
-UNIV_INTERN
-dberr_t
-fts_create_doc_id(
-/*==============*/
- dict_table_t* table, /*!< in: row is of this
- table. */
- dtuple_t* row, /*!< in/out: add doc id
- value to this row. This is the
- current row that is being
- inserted. */
- mem_heap_t* heap) /*!< in: heap */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Create a new fts_doc_ids_t.
-@return new fts_doc_ids_t. */
-UNIV_INTERN
-fts_doc_ids_t*
-fts_doc_ids_create(void);
-/*=====================*/
-
-/******************************************************************//**
-Free a fts_doc_ids_t. */
-UNIV_INTERN
-void
-fts_doc_ids_free(
-/*=============*/
- fts_doc_ids_t* doc_ids); /*!< in: doc_ids to free */
-
-/******************************************************************//**
-Notify the FTS system about an operation on an FTS-indexed table. */
-UNIV_INTERN
-void
-fts_trx_add_op(
-/*===========*/
- trx_t* trx, /*!< in: InnoDB transaction */
- dict_table_t* table, /*!< in: table */
- doc_id_t doc_id, /*!< in: doc id */
- fts_row_state state, /*!< in: state of the row */
- ib_vector_t* fts_indexes) /*!< in: FTS indexes affected
- (NULL=all) */
- MY_ATTRIBUTE((nonnull(1,2)));
-
-/******************************************************************//**
-Free an FTS trx. */
-UNIV_INTERN
-void
-fts_trx_free(
-/*=========*/
- fts_trx_t* fts_trx); /*!< in, own: FTS trx */
-
-/******************************************************************//**
-Creates the common ancillary tables needed for supporting an FTS index
-on the given table. row_mysql_lock_data_dictionary must have been
-called before this.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_create_common_tables(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- const dict_table_t*
- table, /*!< in: table with one FTS
- index */
- const char* name, /*!< in: table name */
- bool skip_doc_id_index) /*!< in: Skip index on doc id */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_create_index_tables(
-/*====================*/
- trx_t* trx, /*!< in: transaction handle */
- const dict_index_t* index) /*!< in: the FTS index
- instance */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Creates the column specific ancillary tables needed for supporting an
-FTS index on the given table. row_mysql_lock_data_dictionary must have
-been called before this.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_create_index_tables_low(
-/*========================*/
- trx_t* trx, /*!< in: transaction handle */
- const dict_index_t*
- index, /*!< in: the FTS index
- instance */
- const char* table_name, /*!< in: the table name */
- table_id_t table_id) /*!< in: the table id */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Add the FTS document id hidden column. */
-UNIV_INTERN
-void
-fts_add_doc_id_column(
-/*==================*/
- dict_table_t* table, /*!< in/out: Table with FTS index */
- mem_heap_t* heap) /*!< in: temporary memory heap, or NULL */
- MY_ATTRIBUTE((nonnull(1)));
-
-/*********************************************************************//**
-Drops the ancillary tables needed for supporting an FTS index on the
-given table. row_mysql_lock_data_dictionary must have been called before
-this.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_drop_tables(
-/*============*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table) /*!< in: table has the FTS
- index */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-The given transaction is about to be committed; do whatever is necessary
-from the FTS system's POV.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_commit(
-/*=======*/
- trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*******************************************************************//**
-FTS Query entry point.
-@return DB_SUCCESS if successful otherwise error code */
-UNIV_INTERN
-dberr_t
-fts_query(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index to search */
- uint flags, /*!< in: FTS search mode */
- const byte* query, /*!< in: FTS query */
- ulint query_len, /*!< in: FTS query string len
- in bytes */
- fts_result_t** result) /*!< out: query result, to be
- freed by the caller.*/
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/******************************************************************//**
-Retrieve the FTS Relevance Ranking result for doc with doc_id
-@return the relevance ranking value. */
-UNIV_INTERN
-float
-fts_retrieve_ranking(
-/*=================*/
- fts_result_t* result, /*!< in: FTS result structure */
- doc_id_t doc_id); /*!< in: the interested document
- doc_id */
-
-/******************************************************************//**
-FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */
-UNIV_INTERN
-void
-fts_query_sort_result_on_rank(
-/*==========================*/
- fts_result_t* result); /*!< out: result instance
- to sort.*/
-
-/******************************************************************//**
-FTS Query free result, returned by fts_query(). */
-UNIV_INTERN
-void
-fts_query_free_result(
-/*==================*/
- fts_result_t* result); /*!< in: result instance
- to free.*/
-
-/******************************************************************//**
-Extract the doc id from the FTS hidden column. */
-UNIV_INTERN
-doc_id_t
-fts_get_doc_id_from_row(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- dtuple_t* row); /*!< in: row whose FTS doc id we
- want to extract.*/
-
-/******************************************************************//**
-Extract the doc id from the FTS hidden column. */
-UNIV_INTERN
-doc_id_t
-fts_get_doc_id_from_rec(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- const rec_t* rec, /*!< in: rec */
- mem_heap_t* heap); /*!< in: heap */
-
-/******************************************************************//**
-Update the query graph with a new document id.
-@return Doc ID used */
-UNIV_INTERN
-doc_id_t
-fts_update_doc_id(
-/*==============*/
- dict_table_t* table, /*!< in: table */
- upd_field_t* ufield, /*!< out: update node */
- doc_id_t* next_doc_id); /*!< out: buffer for writing */
-
-/******************************************************************//**
-FTS initialize. */
-UNIV_INTERN
-void
-fts_startup(void);
-/*==============*/
-
-/******************************************************************//**
-Signal FTS threads to initiate shutdown. */
-UNIV_INTERN
-void
-fts_start_shutdown(
-/*===============*/
- dict_table_t* table, /*!< in: table with FTS
- indexes */
- fts_t* fts); /*!< in: fts instance to
- shutdown */
-
-/******************************************************************//**
-Wait for FTS threads to shutdown. */
-UNIV_INTERN
-void
-fts_shutdown(
-/*=========*/
- dict_table_t* table, /*!< in: table with FTS
- indexes */
- fts_t* fts); /*!< in: fts instance to
- shutdown */
-
-/******************************************************************//**
-Create an instance of fts_t.
-@return instance of fts_t */
-UNIV_INTERN
-fts_t*
-fts_create(
-/*=======*/
- dict_table_t* table); /*!< out: table with FTS
- indexes */
-
-/**********************************************************************//**
-Free the FTS resources. */
-UNIV_INTERN
-void
-fts_free(
-/*=====*/
- dict_table_t* table); /*!< in/out: table with
- FTS indexes */
-
-/*********************************************************************//**
-Run OPTIMIZE on the given table.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-fts_optimize_table(
-/*===============*/
- dict_table_t* table) /*!< in: table to optimiza */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Startup the optimize thread and create the work queue. */
-UNIV_INTERN
-void
-fts_optimize_init(void);
-/*====================*/
-
-/**********************************************************************//**
-Check whether the work queue is initialized.
-@return TRUE if optimze queue is initialized. */
-UNIV_INTERN
-ibool
-fts_optimize_is_init(void);
-/*======================*/
-
-/****************************************************************//**
-Drops index ancillary tables for a FTS index
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_drop_index_tables(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index) /*!< in: Index to drop */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/******************************************************************//**
-Remove the table from the OPTIMIZER's list. We do wait for
-acknowledgement from the consumer of the message. */
-UNIV_INTERN
-void
-fts_optimize_remove_table(
-/*======================*/
- dict_table_t* table); /*!< in: table to remove */
-
-/** Send sync fts cache for the table.
-@param[in] table table to sync */
-UNIV_INTERN
-void
-fts_optimize_request_sync_table(
- dict_table_t* table);
-
-/**********************************************************************//**
-Signal the optimize thread to prepare for shutdown. */
-UNIV_INTERN
-void
-fts_optimize_start_shutdown(void);
-/*==============================*/
-
-/**********************************************************************//**
-Inform optimize to clean up. */
-UNIV_INTERN
-void
-fts_optimize_end(void);
-/*===================*/
-
-/**********************************************************************//**
-Take a FTS savepoint. */
-UNIV_INTERN
-void
-fts_savepoint_take(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- fts_trx_t* fts_trx, /*!< in: fts transaction */
- const char* name) /*!< in: savepoint name */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Refresh last statement savepoint. */
-UNIV_INTERN
-void
-fts_savepoint_laststmt_refresh(
-/*===========================*/
- trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Release the savepoint data identified by name. */
-UNIV_INTERN
-void
-fts_savepoint_release(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- const char* name); /*!< in: savepoint name */
-
-/**********************************************************************//**
-Free the FTS cache. */
-UNIV_INTERN
-void
-fts_cache_destroy(
-/*==============*/
- fts_cache_t* cache); /*!< in: cache*/
-
-/** Clear cache.
-@param[in,out] cache fts cache */
-UNIV_INTERN
-void
-fts_cache_clear(
- fts_cache_t* cache);
-
-/*********************************************************************//**
-Initialize things in cache. */
-UNIV_INTERN
-void
-fts_cache_init(
-/*===========*/
- fts_cache_t* cache); /*!< in: cache */
-
-/*********************************************************************//**
-Rollback to and including savepoint indentified by name. */
-UNIV_INTERN
-void
-fts_savepoint_rollback(
-/*===================*/
- trx_t* trx, /*!< in: transaction */
- const char* name); /*!< in: savepoint name */
-
-/*********************************************************************//**
-Rollback to and including savepoint indentified by name. */
-UNIV_INTERN
-void
-fts_savepoint_rollback_last_stmt(
-/*=============================*/
- trx_t* trx); /*!< in: transaction */
-
-/***********************************************************************//**
-Drop all orphaned FTS auxiliary tables, those that don't have a parent
-table or FTS index defined on them. */
-UNIV_INTERN
-void
-fts_drop_orphaned_tables(void);
-/*==========================*/
-
-/* Get parent table name if it's a fts aux table
-@param[in] aux_table_name aux table name
-@param[in] aux_table_len aux table length
-@return parent table name, or NULL */
-char*
-fts_get_parent_table_name(
- const char* aux_table_name,
- ulint aux_table_len);
-
-/******************************************************************//**
-Since we do a horizontal split on the index table, we need to drop
-all the split tables.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_drop_index_split_tables(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index) /*!< in: fts instance */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/** Run SYNC on the table, i.e., write out data from the cache to the
-FTS auxiliary INDEX table and clear the cache at the end.
-@param[in,out] table fts table
-@param[in] unlock_cache whether unlock cache when write node
-@param[in] wait whether wait for existing sync to finish
-@param[in] has_dict whether has dict operation lock
-@return DB_SUCCESS on success, error code on failure. */
-UNIV_INTERN
-dberr_t
-fts_sync_table(
- dict_table_t* table,
- bool unlock_cache,
- bool wait,
- bool has_dict);
-
-/****************************************************************//**
-Free the query graph but check whether dict_sys->mutex is already
-held */
-UNIV_INTERN
-void
-fts_que_graph_free_check_lock(
-/*==========================*/
- fts_table_t* fts_table, /*!< in: FTS table */
- const fts_index_cache_t*index_cache, /*!< in: FTS index cache */
- que_t* graph); /*!< in: query graph */
-
-/****************************************************************//**
-Create an FTS index cache. */
-UNIV_INTERN
-CHARSET_INFO*
-fts_index_get_charset(
-/*==================*/
- dict_index_t* index); /*!< in: FTS index */
-
-/*********************************************************************//**
-Get the initial Doc ID by consulting the CONFIG table
-@return initial Doc ID */
-UNIV_INTERN
-doc_id_t
-fts_init_doc_id(
-/*============*/
- const dict_table_t* table); /*!< in: table */
-
-/******************************************************************//**
-compare two character string according to their charset. */
-extern
-int
-innobase_fts_text_cmp(
-/*==================*/
- const void* cs, /*!< in: Character set */
- const void* p1, /*!< in: key */
- const void* p2); /*!< in: node */
-
-/******************************************************************//**
-Makes all characters in a string lower case. */
-extern
-size_t
-innobase_fts_casedn_str(
-/*====================*/
- CHARSET_INFO* cs, /*!< in: Character set */
- char* src, /*!< in: string to put in
- lower case */
- size_t src_len, /*!< in: input string length */
- char* dst, /*!< in: buffer for result
- string */
- size_t dst_len); /*!< in: buffer size */
-
-
-/******************************************************************//**
-compare two character string according to their charset. */
-extern
-int
-innobase_fts_text_cmp_prefix(
-/*=========================*/
- const void* cs, /*!< in: Character set */
- const void* p1, /*!< in: key */
- const void* p2); /*!< in: node */
-
-/*************************************************************//**
-Get the next token from the given string and store it in *token. */
-extern
-ulint
-innobase_mysql_fts_get_token(
-/*=========================*/
- CHARSET_INFO* charset, /*!< in: Character set */
- const byte* start, /*!< in: start of text */
- const byte* end, /*!< in: one character past
- end of text */
- fts_string_t* token, /*!< out: token's text */
- ulint* offset); /*!< out: offset to token,
- measured as characters from
- 'start' */
-
-/*********************************************************************//**
-Fetch COUNT(*) from specified table.
-@return the number of rows in the table */
-UNIV_INTERN
-ulint
-fts_get_rows_count(
-/*===============*/
- fts_table_t* fts_table); /*!< in: fts table to read */
-
-/*************************************************************//**
-Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
-@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
-UNIV_INTERN
-doc_id_t
-fts_get_max_doc_id(
-/*===============*/
- dict_table_t* table); /*!< in: user table */
-
-/******************************************************************//**
-Check whether user supplied stopword table exists and is of
-the right format.
-@return the stopword column charset if qualifies */
-UNIV_INTERN
-CHARSET_INFO*
-fts_valid_stopword_table(
-/*=====================*/
- const char* stopword_table_name); /*!< in: Stopword table
- name */
-/****************************************************************//**
-This function loads specified stopword into FTS cache
-@return TRUE if success */
-UNIV_INTERN
-ibool
-fts_load_stopword(
-/*==============*/
- const dict_table_t*
- table, /*!< in: Table with FTS */
- trx_t* trx, /*!< in: Transaction */
- const char* global_stopword_table, /*!< in: Global stopword table
- name */
- const char* session_stopword_table, /*!< in: Session stopword table
- name */
- ibool stopword_is_on, /*!< in: Whether stopword
- option is turned on/off */
- ibool reload); /*!< in: Whether it is during
- reload of FTS table */
-
-/****************************************************************//**
-Create the vector of fts_get_doc_t instances.
-@return vector of fts_get_doc_t instances */
-UNIV_INTERN
-ib_vector_t*
-fts_get_docs_create(
-/*================*/
- fts_cache_t* cache); /*!< in: fts cache */
-
-/****************************************************************//**
-Read the rows from the FTS index
-@return DB_SUCCESS if OK */
-UNIV_INTERN
-dberr_t
-fts_table_fetch_doc_ids(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table, /*!< in: aux table */
- fts_doc_ids_t* doc_ids); /*!< in: For collecting
- doc ids */
-/****************************************************************//**
-This function brings FTS index in sync when FTS index is first
-used. There are documents that have not yet sync-ed to auxiliary
-tables from last server abnormally shutdown, we will need to bring
-such document into FTS cache before any further operations
-@return TRUE if all OK */
-UNIV_INTERN
-ibool
-fts_init_index(
-/*===========*/
- dict_table_t* table, /*!< in: Table with FTS */
- ibool has_cache_lock); /*!< in: Whether we already
- have cache lock */
-/*******************************************************************//**
-Add a newly create index in FTS cache */
-UNIV_INTERN
-void
-fts_add_index(
-/*==========*/
- dict_index_t* index, /*!< FTS index to be added */
- dict_table_t* table); /*!< table */
-
-/*******************************************************************//**
-Drop auxiliary tables related to an FTS index
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-fts_drop_index(
-/*===========*/
- dict_table_t* table, /*!< in: Table where indexes are dropped */
- dict_index_t* index, /*!< in: Index to be dropped */
- trx_t* trx) /*!< in: Transaction for the drop */
- MY_ATTRIBUTE((nonnull));
-
-/****************************************************************//**
-Rename auxiliary tables for all fts index for a table
-@return DB_SUCCESS or error code */
-
-dberr_t
-fts_rename_aux_tables(
-/*==================*/
- dict_table_t* table, /*!< in: user Table */
- const char* new_name, /*!< in: new table name */
- trx_t* trx); /*!< in: transaction */
-
-/*******************************************************************//**
-Check indexes in the fts->indexes is also present in index cache and
-table->indexes list
-@return TRUE if all indexes match */
-UNIV_INTERN
-ibool
-fts_check_cached_index(
-/*===================*/
- dict_table_t* table); /*!< in: Table where indexes are dropped */
-#endif /*!< fts0fts.h */
-
diff --git a/storage/xtradb/include/fts0opt.h b/storage/xtradb/include/fts0opt.h
deleted file mode 100644
index 92eaf8270d2..00000000000
--- a/storage/xtradb/include/fts0opt.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2001, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0opt.h
-Full Text Search optimize thread
-
-Created 2011-02-15 Jimmy Yang
-***********************************************************************/
-#ifndef INNODB_FTS0OPT_H
-#define INNODB_FTS0OPT_H
-
-/********************************************************************
-Callback function to fetch the rows in an FTS INDEX record. */
-UNIV_INTERN
-ibool
-fts_optimize_index_fetch_node(
-/*==========================*/
- /* out: always returns non-NULL */
- void* row, /* in: sel_node_t* */
- void* user_arg); /* in: pointer to ib_vector_t */
-#endif
diff --git a/storage/xtradb/include/fts0pars.h b/storage/xtradb/include/fts0pars.h
deleted file mode 100644
index 8108e811599..00000000000
--- a/storage/xtradb/include/fts0pars.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* A Bison parser, made by GNU Bison 2.5. */
-
-/* Bison interface for Yacc-like parsers in C
-
- Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
-
-/* As a special exception, you may create a larger work that contains
- part or all of the Bison parser skeleton and distribute that work
- under terms of your choice, so long as that work isn't itself a
- parser generator using the skeleton or a modified version thereof
- as a parser skeleton. Alternatively, if you modify or redistribute
- the parser skeleton itself, you may (at your option) remove this
- special exception, which will cause the skeleton and the resulting
- Bison output files to be licensed under the GNU General Public
- License without this special exception.
-
- This special exception was added by the Free Software Foundation in
- version 2.2 of Bison. */
-
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- FTS_OPER = 258,
- FTS_TEXT = 259,
- FTS_TERM = 260,
- FTS_NUMB = 261
- };
-#endif
-
-
-
-#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-typedef union YYSTYPE
-{
-
-/* Line 2068 of yacc.c */
-#line 61 "fts0pars.y"
-
- int oper;
- fts_ast_string_t* token;
- fts_ast_node_t* node;
-
-
-
-/* Line 2068 of yacc.c */
-#line 64 "fts0pars.hh"
-} YYSTYPE;
-# define YYSTYPE_IS_TRIVIAL 1
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-#endif
-
-
-
-
diff --git a/storage/xtradb/include/fts0priv.h b/storage/xtradb/include/fts0priv.h
deleted file mode 100644
index 2d4e9d88fd1..00000000000
--- a/storage/xtradb/include/fts0priv.h
+++ /dev/null
@@ -1,653 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0priv.h
-Full text search internal header file
-
-Created 2011/09/02 Sunny Bains
-***********************************************************************/
-
-#ifndef INNOBASE_FTS0PRIV_H
-#define INNOBASE_FTS0PRIV_H
-
-#include "dict0dict.h"
-#include "pars0pars.h"
-#include "que0que.h"
-#include "que0types.h"
-#include "fts0types.h"
-
-/* The various states of the FTS sub system pertaining to a table with
-FTS indexes defined on it. */
-enum fts_table_state_enum {
- /* !<This must be 0 since we insert
- a hard coded '0' at create time
- to the config table */
-
- FTS_TABLE_STATE_RUNNING = 0, /*!< Auxiliary tables created OK */
-
- FTS_TABLE_STATE_OPTIMIZING, /*!< This is a substate of RUNNING */
-
- FTS_TABLE_STATE_DELETED /*!< All aux tables to be dropped when
- it's safe to do so */
-};
-
-typedef enum fts_table_state_enum fts_table_state_t;
-
-/** The default time to wait for the background thread (in microsecnds). */
-#define FTS_MAX_BACKGROUND_THREAD_WAIT 10000
-
-/** Maximum number of iterations to wait before we complain */
-#define FTS_BACKGROUND_THREAD_WAIT_COUNT 1000
-
-/** The maximum length of the config table's value column in bytes */
-#define FTS_MAX_CONFIG_NAME_LEN 64
-
-/** The maximum length of the config table's value column in bytes */
-#define FTS_MAX_CONFIG_VALUE_LEN 1024
-
-/** Approx. upper limit of ilist length in bytes. */
-#define FTS_ILIST_MAX_SIZE (64 * 1024)
-
-/** FTS config table name parameters */
-
-/** The number of seconds after which an OPTIMIZE run will stop */
-#define FTS_OPTIMIZE_LIMIT_IN_SECS "optimize_checkpoint_limit"
-
-/** The next doc id */
-#define FTS_SYNCED_DOC_ID "synced_doc_id"
-
-/** The last word that was OPTIMIZED */
-#define FTS_LAST_OPTIMIZED_WORD "last_optimized_word"
-
-/** Total number of documents that have been deleted. The next_doc_id
-minus this count gives us the total number of documents. */
-#define FTS_TOTAL_DELETED_COUNT "deleted_doc_count"
-
-/** Total number of words parsed from all documents */
-#define FTS_TOTAL_WORD_COUNT "total_word_count"
-
-/** Start of optimize of an FTS index */
-#define FTS_OPTIMIZE_START_TIME "optimize_start_time"
-
-/** End of optimize for an FTS index */
-#define FTS_OPTIMIZE_END_TIME "optimize_end_time"
-
-/** User specified stopword table name */
-#define FTS_STOPWORD_TABLE_NAME "stopword_table_name"
-
-/** Whether to use (turn on/off) stopword */
-#define FTS_USE_STOPWORD "use_stopword"
-
-/** State of the FTS system for this table. It can be one of
- RUNNING, OPTIMIZING, DELETED. */
-#define FTS_TABLE_STATE "table_state"
-
-/** The minimum length of an FTS auxiliary table names's id component
-e.g., For an auxiliary table name
-
- FTS_<TABLE_ID>_SUFFIX
-
-This constant is for the minimum length required to store the <TABLE_ID>
-component.
-*/
-#define FTS_AUX_MIN_TABLE_ID_LENGTH 48
-
-/** Maximum length of an integer stored in the config table value column. */
-#define FTS_MAX_INT_LEN 32
-
-/******************************************************************//**
-Parse an SQL string. %s is replaced with the table's id.
-@return query graph */
-UNIV_INTERN
-que_t*
-fts_parse_sql(
-/*==========*/
- fts_table_t* fts_table, /*!< in: FTS aux table */
- pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql) /*!< in: SQL string to evaluate */
- MY_ATTRIBUTE((nonnull(3), malloc, warn_unused_result));
-/******************************************************************//**
-Evaluate a parsed SQL statement
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_eval_sql(
-/*=========*/
- trx_t* trx, /*!< in: transaction */
- que_t* graph) /*!< in: Parsed statement */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Construct the name of an ancillary FTS table for the given table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
-char*
-fts_get_table_name(
-/*===============*/
- const fts_table_t*
- fts_table) /*!< in: FTS aux table info */
- MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
-/******************************************************************//**
-Construct the column specification part of the SQL string for selecting the
-indexed FTS columns for the given table. Adds the necessary bound
-ids to the given 'info' and returns the SQL string. Examples:
-
-One indexed column named "text":
-
- "$sel0",
- info/ids: sel0 -> "text"
-
-Two indexed columns named "subject" and "content":
-
- "$sel0, $sel1",
- info/ids: sel0 -> "subject", sel1 -> "content",
-@return heap-allocated WHERE string */
-UNIV_INTERN
-const char*
-fts_get_select_columns_str(
-/*=======================*/
- dict_index_t* index, /*!< in: FTS index */
- pars_info_t* info, /*!< in/out: parser info */
- mem_heap_t* heap) /*!< in: memory heap */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
-we want to get Doc whose ID is equal to or greater or smaller than supplied
-ID */
-#define FTS_FETCH_DOC_BY_ID_EQUAL 1
-#define FTS_FETCH_DOC_BY_ID_LARGE 2
-#define FTS_FETCH_DOC_BY_ID_SMALL 3
-
-/*************************************************************//**
-Fetch document (= a single row's indexed text) with the given
-document id.
-@return: DB_SUCCESS if fetch is successful, else error */
-UNIV_INTERN
-dberr_t
-fts_doc_fetch_by_doc_id(
-/*====================*/
- fts_get_doc_t* get_doc, /*!< in: state */
- doc_id_t doc_id, /*!< in: id of document to fetch */
- dict_index_t* index_to_use, /*!< in: caller supplied FTS index,
- or NULL */
- ulint option, /*!< in: search option, if it is
- greater than doc_id or equal */
- fts_sql_callback
- callback, /*!< in: callback to read
- records */
- void* arg) /*!< in: callback arg */
- MY_ATTRIBUTE((nonnull(6)));
-
-/*******************************************************************//**
-Callback function for fetch that stores the text of an FTS document,
-converting each column to UTF-16.
-@return always FALSE */
-UNIV_INTERN
-ibool
-fts_query_expansion_fetch_doc(
-/*==========================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: fts_doc_t* */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************
-Write out a single word's data as new entry/entries in the INDEX table.
-@return DB_SUCCESS if all OK. */
-UNIV_INTERN
-dberr_t
-fts_write_node(
-/*===========*/
- trx_t* trx, /*!< in: transaction */
- que_t** graph, /*!< in: query graph */
- fts_table_t* fts_table, /*!< in: the FTS aux index */
- fts_string_t* word, /*!< in: word in UTF-8 */
- fts_node_t* node) /*!< in: node columns */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Tokenize a document. */
-UNIV_INTERN
-void
-fts_tokenize_document(
-/*==================*/
- fts_doc_t* doc, /*!< in/out: document to
- tokenize */
- fts_doc_t* result) /*!< out: if provided, save
- result tokens here */
- MY_ATTRIBUTE((nonnull(1)));
-
-/*******************************************************************//**
-Continue to tokenize a document. */
-UNIV_INTERN
-void
-fts_tokenize_document_next(
-/*=======================*/
- fts_doc_t* doc, /*!< in/out: document to
- tokenize */
- ulint add_pos, /*!< in: add this position to all
- tokens from this tokenization */
- fts_doc_t* result) /*!< out: if provided, save
- result tokens here */
- MY_ATTRIBUTE((nonnull(1)));
-/******************************************************************//**
-Initialize a document. */
-UNIV_INTERN
-void
-fts_doc_init(
-/*=========*/
- fts_doc_t* doc) /*!< in: doc to initialize */
- MY_ATTRIBUTE((nonnull));
-
-/******************************************************************//**
-Do a binary search for a doc id in the array
-@return +ve index if found -ve index where it should be
- inserted if not found */
-UNIV_INTERN
-int
-fts_bsearch(
-/*========*/
- fts_update_t* array, /*!< in: array to sort */
- int lower, /*!< in: lower bound of array*/
- int upper, /*!< in: upper bound of array*/
- doc_id_t doc_id) /*!< in: doc id to lookup */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Free document. */
-UNIV_INTERN
-void
-fts_doc_free(
-/*=========*/
- fts_doc_t* doc) /*!< in: document */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Free fts_optimizer_word_t instanace.*/
-UNIV_INTERN
-void
-fts_word_free(
-/*==========*/
- fts_word_t* word) /*!< in: instance to free.*/
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Read the rows from the FTS inde
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_index_fetch_nodes(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- que_t** graph, /*!< in: prepared statement */
- fts_table_t* fts_table, /*!< in: FTS aux table */
- const fts_string_t*
- word, /*!< in: the word to fetch */
- fts_fetch_t* fetch) /*!< in: fetch callback.*/
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Create a fts_optimizer_word_t instance.
-@return new instance */
-UNIV_INTERN
-fts_word_t*
-fts_word_init(
-/*==========*/
- fts_word_t* word, /*!< in: word to initialize */
- byte* utf8, /*!< in: UTF-8 string */
- ulint len) /*!< in: length of string in bytes */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Compare two fts_trx_table_t instances, we actually compare the
-table id's here.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_cmp(
-/*==============*/
- const void* v1, /*!< in: id1 */
- const void* v2) /*!< in: id2 */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Compare a table id with a trx_table_t table id.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_id_cmp(
-/*=================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Commit a transaction.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-fts_sql_commit(
-/*===========*/
- trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Rollback a transaction.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-fts_sql_rollback(
-/*=============*/
- trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Parse an SQL string. %s is replaced with the table's id. Don't acquire
-the dict mutex
-@return query graph */
-UNIV_INTERN
-que_t*
-fts_parse_sql_no_dict_lock(
-/*=======================*/
- fts_table_t* fts_table, /*!< in: table with FTS index */
- pars_info_t* info, /*!< in: parser info */
- const char* sql) /*!< in: SQL string to evaluate */
- MY_ATTRIBUTE((nonnull(3), malloc, warn_unused_result));
-/******************************************************************//**
-Get value from config table. The caller must ensure that enough
-space is allocated for value to hold the column contents
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_get_value(
-/*=================*/
- trx_t* trx, /* transaction */
- fts_table_t* fts_table, /*!< in: the indexed FTS table */
- const char* name, /*!< in: get config value for
- this parameter name */
- fts_string_t* value) /*!< out: value read from
- config table */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Get value specific to an FTS index from the config table. The caller
-must ensure that enough space is allocated for value to hold the
-column contents.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_get_index_value(
-/*=======================*/
- trx_t* trx, /*!< transaction */
- dict_index_t* index, /*!< in: index */
- const char* param, /*!< in: get config value for
- this parameter name */
- fts_string_t* value) /*!< out: value read from
- config table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Set the value in the config table for name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_set_value(
-/*=================*/
- trx_t* trx, /*!< transaction */
- fts_table_t* fts_table, /*!< in: the indexed FTS table */
- const char* name, /*!< in: get config value for
- this parameter name */
- const fts_string_t*
- value) /*!< in: value to update */
- MY_ATTRIBUTE((nonnull));
-/****************************************************************//**
-Set an ulint value in the config table.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-fts_config_set_ulint(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table, /*!< in: the indexed FTS table */
- const char* name, /*!< in: param name */
- ulint int_value) /*!< in: value */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Set the value specific to an FTS index in the config table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_set_index_value(
-/*=======================*/
- trx_t* trx, /*!< transaction */
- dict_index_t* index, /*!< in: index */
- const char* param, /*!< in: get config value for
- this parameter name */
- fts_string_t* value) /*!< out: value read from
- config table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Increment the value in the config table for column name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_increment_value(
-/*=======================*/
- trx_t* trx, /*!< transaction */
- fts_table_t* fts_table, /*!< in: the indexed FTS table */
- const char* name, /*!< in: increment config value
- for this parameter name */
- ulint delta) /*!< in: increment by this much */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Increment the per index value in the config table for column name.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_increment_index_value(
-/*=============================*/
- trx_t* trx, /*!< transaction */
- dict_index_t* index, /*!< in: FTS index */
- const char* name, /*!< in: increment config value
- for this parameter name */
- ulint delta) /*!< in: increment by this much */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Get an ulint value from the config table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_get_index_ulint(
-/*=======================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index */
- const char* name, /*!< in: param name */
- ulint* int_value) /*!< out: value */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Set an ulint value int the config table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_set_index_ulint(
-/*=======================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: FTS index */
- const char* name, /*!< in: param name */
- ulint int_value) /*!< in: value */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Get an ulint value from the config table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_config_get_ulint(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- fts_table_t* fts_table, /*!< in: the indexed FTS table */
- const char* name, /*!< in: param name */
- ulint* int_value) /*!< out: value */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Search cache for word.
-@return the word node vector if found else NULL */
-UNIV_INTERN
-const ib_vector_t*
-fts_cache_find_word(
-/*================*/
- const fts_index_cache_t*
- index_cache, /*!< in: cache to search */
- const fts_string_t*
- text) /*!< in: word to search for */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Check cache for deleted doc id.
-@return TRUE if deleted */
-UNIV_INTERN
-ibool
-fts_cache_is_deleted_doc_id(
-/*========================*/
- const fts_cache_t*
- cache, /*!< in: cache ito search */
- doc_id_t doc_id) /*!< in: doc id to search for */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Append deleted doc ids to vector and sort the vector. */
-UNIV_INTERN
-void
-fts_cache_append_deleted_doc_ids(
-/*=============================*/
- const fts_cache_t*
- cache, /*!< in: cache to use */
- ib_vector_t* vector); /*!< in: append to this vector */
-/******************************************************************//**
-Wait for the background thread to start. We poll to detect change
-of state, which is acceptable, since the wait should happen only
-once during startup.
-@return true if the thread started else FALSE (i.e timed out) */
-UNIV_INTERN
-ibool
-fts_wait_for_background_thread_to_start(
-/*====================================*/
- dict_table_t* table, /*!< in: table to which the thread
- is attached */
- ulint max_wait); /*!< in: time in microseconds, if set
- to 0 then it disables timeout
- checking */
-#ifdef FTS_DOC_STATS_DEBUG
-/******************************************************************//**
-Get the total number of words in the FTS for a particular FTS index.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-fts_get_total_word_count(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: for this index */
- ulint* total) /*!< out: total words */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif
-/******************************************************************//**
-Search the index specific cache for a particular FTS index.
-@return the index specific cache else NULL */
-UNIV_INTERN
-fts_index_cache_t*
-fts_find_index_cache(
-/*================*/
- const fts_cache_t*
- cache, /*!< in: cache to search */
- const dict_index_t*
- index) /*!< in: index to search for */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Write the table id to the given buffer (including final NUL). Buffer must be
-at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
-@return number of bytes written */
-UNIV_INLINE
-int
-fts_write_object_id(
-/*================*/
- ib_id_t id, /*!< in: a table/index id */
- char* str, /*!< in: buffer to write the id to */
- bool hex_format MY_ATTRIBUTE((unused)))
- /*!< in: true for fixed hex format,
- false for old ambiguous format */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Read the table id from the string generated by fts_write_object_id().
-@return TRUE if parse successful */
-UNIV_INLINE
-ibool
-fts_read_object_id(
-/*===============*/
- ib_id_t* id, /*!< out: a table id */
- const char* str) /*!< in: buffer to read from */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Get the table id.
-@return number of bytes written */
-UNIV_INTERN
-int
-fts_get_table_id(
-/*=============*/
- const fts_table_t*
- fts_table, /*!< in: FTS Auxiliary table */
- char* table_id) /*!< out: table id, must be at least
- FTS_AUX_MIN_TABLE_ID_LENGTH bytes
- long */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Add the table to add to the OPTIMIZER's list. */
-UNIV_INTERN
-void
-fts_optimize_add_table(
-/*===================*/
- dict_table_t* table) /*!< in: table to add */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Optimize a table. */
-UNIV_INTERN
-void
-fts_optimize_do_table(
-/*==================*/
- dict_table_t* table) /*!< in: table to optimize */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Construct the prefix name of an FTS table.
-@return own: table name, must be freed with mem_free() */
-UNIV_INTERN
-char*
-fts_get_table_name_prefix(
-/*======================*/
- const fts_table_t*
- fts_table) /*!< in: Auxiliary table type */
- MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
-/******************************************************************//**
-Add node positions. */
-UNIV_INTERN
-void
-fts_cache_node_add_positions(
-/*=========================*/
- fts_cache_t* cache, /*!< in: cache */
- fts_node_t* node, /*!< in: word node */
- doc_id_t doc_id, /*!< in: doc id */
- ib_vector_t* positions) /*!< in: fts_token_t::positions */
- MY_ATTRIBUTE((nonnull(2,4)));
-
-/******************************************************************//**
-Create the config table name for retrieving index specific value.
-@return index config parameter name */
-UNIV_INTERN
-char*
-fts_config_create_index_param_name(
-/*===============================*/
- const char* param, /*!< in: base name of param */
- const dict_index_t* index) /*!< in: index for config */
- MY_ATTRIBUTE((nonnull, malloc, warn_unused_result));
-
-#ifndef UNIV_NONINL
-#include "fts0priv.ic"
-#endif
-
-#endif /* INNOBASE_FTS0PRIV_H */
diff --git a/storage/xtradb/include/fts0priv.ic b/storage/xtradb/include/fts0priv.ic
deleted file mode 100644
index 88f2d67c7b8..00000000000
--- a/storage/xtradb/include/fts0priv.ic
+++ /dev/null
@@ -1,130 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0priv.ic
-Full text search internal header file
-
-Created 2011/11/12 Sunny Bains
-***********************************************************************/
-
-/******************************************************************//**
-Write the table id to the given buffer (including final NUL). Buffer must be
-at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
-@return number of bytes written */
-UNIV_INLINE
-int
-fts_write_object_id(
-/*================*/
- ib_id_t id, /* in: a table/index id */
- char* str, /* in: buffer to write the id to */
- bool hex_format MY_ATTRIBUTE((unused)))
- /* in: true for fixed hex format,
- false for old ambiguous format */
-{
-
-#ifdef _WIN32
-
- DBUG_EXECUTE_IF("innodb_test_wrong_non_windows_fts_aux_table_name",
- return(sprintf(str, UINT64PFx, id)););
-
- /* Use this to construct old(5.6.14 and 5.7.3) windows
- ambiguous aux table names */
- DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- return(sprintf(str, "%016llu", id)););
-
-#else /* _WIN32 */
-
- /* Use this to construct old(5.6.14 and 5.7.3) windows
- ambiguous aux table names */
- DBUG_EXECUTE_IF("innodb_test_wrong_windows_fts_aux_table_name",
- return(sprintf(str, "%016" PRIu64, id)););
-
- DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- return(sprintf(str, UINT64PFx, id)););
-
-#endif /* _WIN32 */
-
- /* As above, but this is only for those tables failing to rename. */
- if (!hex_format) {
-#ifdef _WIN32
- // FIXME: Use ut_snprintf(), so does following one.
- return(sprintf(str, "%016llu", id));
-#else /* _WIN32 */
- return(sprintf(str, "%016" PRIu64, id));
-#endif /* _WIN32 */
- }
-
- return(sprintf(str, UINT64PFx, id));
-}
-
-/******************************************************************//**
-Read the table id from the string generated by fts_write_object_id().
-@return TRUE if parse successful */
-UNIV_INLINE
-ibool
-fts_read_object_id(
-/*===============*/
- ib_id_t* id, /* out: an id */
- const char* str) /* in: buffer to read from */
-{
- /* NOTE: this func doesn't care about whether current table
- is set with HEX_NAME, the user of the id read here will check
- if the id is HEX or DEC and do the right thing with it. */
- return(sscanf(str, UINT64PFx, id) == 1);
-}
-
-/******************************************************************//**
-Compare two fts_trx_table_t instances.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_cmp(
-/*==============*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table;
- const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
-
- return((table1->id > table2->id)
- ? 1
- : (table1->id == table2->id)
- ? 0
- : -1);
-}
-
-/******************************************************************//**
-Compare a table id with a fts_trx_table_t table id.
-@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_table_id_cmp(
-/*=================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const ullint* table_id = (const ullint*) p1;
- const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table;
-
- return((*table_id > table2->id)
- ? 1
- : (*table_id == table2->id)
- ? 0
- : -1);
-}
diff --git a/storage/xtradb/include/fts0tlex.h b/storage/xtradb/include/fts0tlex.h
deleted file mode 100644
index f91533803e8..00000000000
--- a/storage/xtradb/include/fts0tlex.h
+++ /dev/null
@@ -1,349 +0,0 @@
-#ifndef fts0tHEADER_H
-#define fts0tHEADER_H 1
-#define fts0tIN_HEADER 1
-
-#line 6 "../include/fts0tlex.h"
-
-#line 8 "../include/fts0tlex.h"
-
-#define YY_INT_ALIGNED short int
-
-/* A lexical scanner generated by flex */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
-#if YY_FLEX_SUBMINOR_VERSION > 0
-#define FLEX_BETA
-#endif
-
-/* First, we deal with platform-specific or compiler-specific issues. */
-
-/* begin standard C headers. */
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/* end standard C headers. */
-
-/* flex integer type definitions */
-
-#ifndef FLEXINT_H
-#define FLEXINT_H
-
-/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-
-/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
- */
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS 1
-#endif
-
-#include <inttypes.h>
-typedef int8_t flex_int8_t;
-typedef uint8_t flex_uint8_t;
-typedef int16_t flex_int16_t;
-typedef uint16_t flex_uint16_t;
-typedef int32_t flex_int32_t;
-typedef uint32_t flex_uint32_t;
-#else
-typedef signed char flex_int8_t;
-typedef short int flex_int16_t;
-typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
-typedef unsigned short int flex_uint16_t;
-typedef unsigned int flex_uint32_t;
-
-/* Limits of integral types. */
-#ifndef INT8_MIN
-#define INT8_MIN (-128)
-#endif
-#ifndef INT16_MIN
-#define INT16_MIN (-32767-1)
-#endif
-#ifndef INT32_MIN
-#define INT32_MIN (-2147483647-1)
-#endif
-#ifndef INT8_MAX
-#define INT8_MAX (127)
-#endif
-#ifndef INT16_MAX
-#define INT16_MAX (32767)
-#endif
-#ifndef INT32_MAX
-#define INT32_MAX (2147483647)
-#endif
-#ifndef UINT8_MAX
-#define UINT8_MAX (255U)
-#endif
-#ifndef UINT16_MAX
-#define UINT16_MAX (65535U)
-#endif
-#ifndef UINT32_MAX
-#define UINT32_MAX (4294967295U)
-#endif
-
-#endif /* ! C99 */
-
-#endif /* ! FLEXINT_H */
-
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else /* ! __cplusplus */
-
-/* C99 requires __STDC__ to be defined as 1. */
-#if defined (__STDC__)
-
-#define YY_USE_CONST
-
-#endif /* defined (__STDC__) */
-#endif /* ! __cplusplus */
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-/* An opaque pointer. */
-#ifndef YY_TYPEDEF_YY_SCANNER_T
-#define YY_TYPEDEF_YY_SCANNER_T
-typedef void* yyscan_t;
-#endif
-
-/* For convenience, these vars (plus the bison vars far below)
- are macros in the reentrant scanner. */
-#define yyin yyg->yyin_r
-#define yyout yyg->yyout_r
-#define yyextra yyg->yyextra_r
-#define yyleng yyg->yyleng_r
-#define yytext yyg->yytext_r
-#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno)
-#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column)
-#define yy_flex_debug yyg->yy_flex_debug_r
-
-/* Size of default input buffer. */
-#ifndef YY_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k.
- * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
- * Ditto for the __ia64__ case accordingly.
- */
-#define YY_BUF_SIZE 32768
-#else
-#define YY_BUF_SIZE 16384
-#endif /* __ia64__ */
-#endif
-
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
-#ifndef YY_STRUCT_YY_BUFFER_STATE
-#define YY_STRUCT_YY_BUFFER_STATE
-struct yy_buffer_state
- {
- FILE *yy_input_file;
-
- char *yy_ch_buf; /* input buffer */
- char *yy_buf_pos; /* current position in input buffer */
-
- /* Size of input buffer in bytes, not including room for EOB
- * characters.
- */
- yy_size_t yy_buf_size;
-
- /* Number of characters read into yy_ch_buf, not including EOB
- * characters.
- */
- int yy_n_chars;
-
- /* Whether we "own" the buffer - i.e., we know we created it,
- * and can realloc() it to grow it, and should free() it to
- * delete it.
- */
- int yy_is_our_buffer;
-
- /* Whether this is an "interactive" input source; if so, and
- * if we're using stdio for input, then we want to use getc()
- * instead of fread(), to make sure we stop fetching input after
- * each newline.
- */
- int yy_is_interactive;
-
- /* Whether we're considered to be at the beginning of a line.
- * If so, '^' rules will be active on the next match, otherwise
- * not.
- */
- int yy_at_bol;
-
- int yy_bs_lineno; /**< The line count. */
- int yy_bs_column; /**< The column count. */
-
- /* Whether to try to fill the input buffer when we reach the
- * end of it.
- */
- int yy_fill_buffer;
-
- int yy_buffer_status;
-
- };
-#endif /* !YY_STRUCT_YY_BUFFER_STATE */
-
-void fts0trestart (FILE *input_file ,yyscan_t yyscanner );
-void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0t_create_buffer (FILE *file,int size ,yyscan_t yyscanner );
-void fts0t_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
-void fts0t_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner );
-void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner );
-void fts0tpop_buffer_state (yyscan_t yyscanner );
-
-YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner );
-YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner );
-
-void *fts0talloc (yy_size_t ,yyscan_t yyscanner );
-void *fts0trealloc (void *,yy_size_t ,yyscan_t yyscanner );
-void fts0tfree (void * ,yyscan_t yyscanner );
-
-/* Begin user sect3 */
-
-#define fts0twrap(n) 1
-#define YY_SKIP_YYWRAP
-
-#define yytext_ptr yytext_r
-
-#ifdef YY_HEADER_EXPORT_START_CONDITIONS
-#define INITIAL 0
-
-#endif
-
-#ifndef YY_NO_UNISTD_H
-/* Special case for "unistd.h", since it is non-ANSI. We include it way
- * down here because we want the user's section 1 to have been scanned first.
- * The user has a chance to override it with an option.
- */
-#include <unistd.h>
-#endif
-
-#ifndef YY_EXTRA_TYPE
-#define YY_EXTRA_TYPE void *
-#endif
-
-int fts0tlex_init (yyscan_t* scanner);
-
-int fts0tlex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner);
-
-/* Accessor methods to globals.
- These are made visible to non-reentrant scanners for convenience. */
-
-int fts0tlex_destroy (yyscan_t yyscanner );
-
-int fts0tget_debug (yyscan_t yyscanner );
-
-void fts0tset_debug (int debug_flag ,yyscan_t yyscanner );
-
-YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner );
-
-void fts0tset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner );
-
-FILE *fts0tget_in (yyscan_t yyscanner );
-
-void fts0tset_in (FILE * in_str ,yyscan_t yyscanner );
-
-FILE *fts0tget_out (yyscan_t yyscanner );
-
-void fts0tset_out (FILE * out_str ,yyscan_t yyscanner );
-
-int fts0tget_leng (yyscan_t yyscanner );
-
-char *fts0tget_text (yyscan_t yyscanner );
-
-int fts0tget_lineno (yyscan_t yyscanner );
-
-void fts0tset_lineno (int line_number ,yyscan_t yyscanner );
-
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
-
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int fts0twrap (yyscan_t yyscanner );
-#else
-extern int fts0twrap (yyscan_t yyscanner );
-#endif
-#endif
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner);
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner);
-#endif
-
-#ifndef YY_NO_INPUT
-
-#endif
-
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k */
-#define YY_READ_BUF_SIZE 16384
-#else
-#define YY_READ_BUF_SIZE 8192
-#endif /* __ia64__ */
-#endif
-
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
-
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL_IS_OURS 1
-
-extern int fts0tlex (yyscan_t yyscanner);
-
-#define YY_DECL int fts0tlex (yyscan_t yyscanner)
-#endif /* !YY_DECL */
-
-/* yy_get_previous_state - get the state just before the EOB char was reached */
-
-#undef YY_NEW_FILE
-#undef YY_FLUSH_BUFFER
-#undef yy_set_bol
-#undef yy_new_buffer
-#undef yy_set_interactive
-#undef YY_DO_BEFORE_ACTION
-
-#ifdef YY_DECL_IS_OURS
-#undef YY_DECL_IS_OURS
-#undef YY_DECL
-#endif
-
-#line 68 "fts0tlex.l"
-
-
-#line 348 "../include/fts0tlex.h"
-#undef fts0tIN_HEADER
-#endif /* fts0tHEADER_H */
diff --git a/storage/xtradb/include/fts0types.h b/storage/xtradb/include/fts0types.h
deleted file mode 100644
index 0dad75d8f1b..00000000000
--- a/storage/xtradb/include/fts0types.h
+++ /dev/null
@@ -1,480 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0types.h
-Full text search types file
-
-Created 2007-03-27 Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_FTS0TYPES_H
-#define INNOBASE_FTS0TYPES_H
-
-#include "que0types.h"
-#include "ut0byte.h"
-#include "fut0fut.h"
-#include "ut0rbt.h"
-#include "fts0fts.h"
-
-/** Types used within FTS. */
-struct fts_que_t;
-struct fts_node_t;
-struct fts_utf8_str_t;
-
-/** Callbacks used within FTS. */
-typedef pars_user_func_cb_t fts_sql_callback;
-typedef void (*fts_filter)(void*, fts_node_t*, void*, ulint len);
-
-/** Statistics relevant to a particular document, used during retrieval. */
-struct fts_doc_stats_t {
- doc_id_t doc_id; /*!< Document id */
- ulint word_count; /*!< Total words in the document */
-};
-
-/** It's main purpose is to store the SQL prepared statements that
-are required to retrieve a document from the database. */
-struct fts_get_doc_t {
- fts_index_cache_t*
- index_cache; /*!< The index cache instance */
-
- /*!< Parsed sql statement */
- que_t* get_document_graph;
- fts_cache_t* cache; /*!< The parent cache */
-};
-
-/** Since we can have multiple FTS indexes on a table, we keep a
-per index cache of words etc. */
-struct fts_index_cache_t {
- dict_index_t* index; /*!< The FTS index instance */
-
- ib_rbt_t* words; /*!< Nodes; indexed by fts_string_t*,
- cells are fts_tokenizer_word_t*.*/
-
- ib_vector_t* doc_stats; /*!< Array of the fts_doc_stats_t
- contained in the memory buffer.
- Must be in sorted order (ascending).
- The ideal choice is an rb tree but
- the rb tree imposes a space overhead
- that we can do without */
-
- que_t** ins_graph; /*!< Insert query graphs */
-
- que_t** sel_graph; /*!< Select query graphs */
- CHARSET_INFO* charset; /*!< charset */
-};
-
-/** For supporting the tracking of updates on multiple FTS indexes we need
-to track which FTS indexes need to be updated. For INSERT and DELETE we
-update all fts indexes. */
-struct fts_update_t {
- doc_id_t doc_id; /*!< The doc id affected */
-
- ib_vector_t* fts_indexes; /*!< The FTS indexes that need to be
- updated. A NULL value means all
- indexes need to be updated. This
- vector is not allocated on the heap
- and so must be freed explicitly,
- when we are done with it */
-};
-
-/** Stop word control infotmation. */
-struct fts_stopword_t {
- ulint status; /*!< Status of the stopword tree */
- ib_alloc_t* heap; /*!< The memory allocator to use */
- ib_rbt_t* cached_stopword;/*!< This stores all active stopwords */
- CHARSET_INFO* charset; /*!< charset for stopword */
-};
-
-/** The SYNC state of the cache. There is one instance of this struct
-associated with each ADD thread. */
-struct fts_sync_t {
- trx_t* trx; /*!< The transaction used for SYNCing
- the cache to disk */
- dict_table_t* table; /*!< Table with FTS index(es) */
- ulint max_cache_size; /*!< Max size in bytes of the cache */
- ibool cache_full; /*!< flag, when true it indicates that
- we need to sync the cache to disk */
- ulint lower_index; /*!< the start index of the doc id
- vector from where to start adding
- documents to the FTS cache */
- ulint upper_index; /*!< max index of the doc id vector to
- add to the FTS cache */
- ibool interrupted; /*!< TRUE if SYNC was interrupted */
- doc_id_t min_doc_id; /*!< The smallest doc id added to the
- cache. It should equal to
- doc_ids[lower_index] */
- doc_id_t max_doc_id; /*!< The doc id at which the cache was
- noted as being full, we use this to
- set the upper_limit field */
- ib_time_t start_time; /*!< SYNC start time */
- bool in_progress; /*!< flag whether sync is in progress.*/
- bool unlock_cache; /*!< flag whether unlock cache when
- write fts node */
- os_event_t event; /*!< sync finish event;
- only os_event_set() and os_event_wait()
- are used */
-};
-
-/** The cache for the FTS system. It is a memory-based inverted index
-that new entries are added to, until it grows over the configured maximum
-size, at which time its contents are written to the INDEX table. */
-struct fts_cache_t {
- rw_lock_t lock; /*!< lock protecting all access to the
- memory buffer. FIXME: this needs to
- be our new upgrade-capable rw-lock */
-
- rw_lock_t init_lock; /*!< lock used for the cache
- intialization, it has different
- SYNC level as above cache lock */
-
- ib_mutex_t optimize_lock; /*!< Lock for OPTIMIZE */
-
- ib_mutex_t deleted_lock; /*!< Lock covering deleted_doc_ids */
-
- ib_mutex_t doc_id_lock; /*!< Lock covering Doc ID */
-
- ib_vector_t* deleted_doc_ids;/*!< Array of deleted doc ids, each
- element is of type fts_update_t */
-
- ib_vector_t* indexes; /*!< We store the stats and inverted
- index for the individual FTS indexes
- in this vector. Each element is
- an instance of fts_index_cache_t */
-
- ib_vector_t* get_docs; /*!< information required to read
- the document from the table. Each
- element is of type fts_doc_t */
-
- ulint total_size; /*!< total size consumed by the ilist
- field of all nodes. SYNC is run
- whenever this gets too big */
- fts_sync_t* sync; /*!< sync structure to sync data to
- disk */
- ib_alloc_t* sync_heap; /*!< The heap allocator, for indexes
- and deleted_doc_ids, ie. transient
- objects, they are recreated after
- a SYNC is completed */
-
- ib_alloc_t* self_heap; /*!< This heap is the heap out of
- which an instance of the cache itself
- was created. Objects created using
- this heap will last for the lifetime
- of the cache */
-
- doc_id_t next_doc_id; /*!< Next doc id */
-
- doc_id_t synced_doc_id; /*!< Doc ID sync-ed to CONFIG table */
-
- doc_id_t first_doc_id; /*!< first doc id since this table
- was opened */
-
- ulint deleted; /*!< Number of doc ids deleted since
- last optimized. This variable is
- covered by deleted_lock */
-
- ulint added; /*!< Number of doc ids added since last
- optimized. This variable is covered by
- the deleted lock */
-
- fts_stopword_t stopword_info; /*!< Cached stopwords for the FTS */
- mem_heap_t* cache_heap; /*!< Cache Heap */
-};
-
-/** Columns of the FTS auxiliary INDEX table */
-struct fts_node_t {
- doc_id_t first_doc_id; /*!< First document id in ilist. */
-
- doc_id_t last_doc_id; /*!< Last document id in ilist. */
-
- byte* ilist; /*!< Binary list of documents & word
- positions the token appears in.
- TODO: For now, these are simply
- ut_malloc'd, but if testing shows
- that they waste memory unacceptably, a
- special memory allocator will have
- to be written */
-
- ulint doc_count; /*!< Number of doc ids in ilist */
-
- ulint ilist_size; /*!< Used size of ilist in bytes. */
-
- ulint ilist_size_alloc;
- /*!< Allocated size of ilist in
- bytes */
- bool synced; /*!< flag whether the node is synced */
-};
-
-/** A tokenizer word. Contains information about one word. */
-struct fts_tokenizer_word_t {
- fts_string_t text; /*!< Token text. */
-
- ib_vector_t* nodes; /*!< Word node ilists, each element is
- of type fts_node_t */
-};
-
-/** Word text plus it's array of nodes as on disk in FTS index */
-struct fts_word_t {
- fts_string_t text; /*!< Word value in UTF-8 */
- ib_vector_t* nodes; /*!< Nodes read from disk */
-
- ib_alloc_t* heap_alloc; /*!< For handling all allocations */
-};
-
-/** Callback for reading and filtering nodes that are read from FTS index */
-struct fts_fetch_t {
- void* read_arg; /*!< Arg for the sql_callback */
-
- fts_sql_callback
- read_record; /*!< Callback for reading index
- record */
- ulint total_memory; /*!< Total memory used */
-};
-
-/** For horizontally splitting an FTS auxiliary index */
-struct fts_index_selector_t {
- ulint value; /*!< Character value at which
- to split */
-
- const char* suffix; /*!< FTS aux index suffix */
-};
-
-/** This type represents a single document. */
-struct fts_doc_t {
- fts_string_t text; /*!< document text */
-
- ibool found; /*!< TRUE if the document was found
- successfully in the database */
-
- ib_rbt_t* tokens; /*!< This is filled when the document
- is tokenized. Tokens; indexed by
- fts_string_t*, cells are of type
- fts_token_t* */
-
- ib_alloc_t* self_heap; /*!< An instance of this type is
- allocated from this heap along
- with any objects that have the
- same lifespan, most notably
- the vector of token positions */
- CHARSET_INFO* charset; /*!< Document's charset info */
-};
-
-/** A token and its positions within a document. */
-struct fts_token_t {
- fts_string_t text; /*!< token text */
-
- ib_vector_t* positions; /*!< an array of the positions the
- token is found in; each item is
- actually an ulint. */
-};
-
-/** It's defined in fts/fts0fts.c */
-extern const fts_index_selector_t fts_index_selector[];
-
-/******************************************************************//**
-Compare two UTF-8 strings. */
-UNIV_INLINE
-int
-fts_utf8_string_cmp(
-/*================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: key */
- const void* p2); /*!< in: node */
-
-/******************************************************************//**
-Compare two UTF-8 strings, and return match (0) if
-passed in "key" value equals or is the prefix of the "node" value. */
-UNIV_INLINE
-int
-fts_utf8_string_cmp_prefix(
-/*=======================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: key */
- const void* p2); /*!< in: node */
-
-/******************************************************************//**
-Compare two fts_trx_row_t instances doc_ids. */
-UNIV_INLINE
-int
-fts_trx_row_doc_id_cmp(
-/*===================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: id1 */
- const void* p2); /*!< in: id2 */
-
-/******************************************************************//**
-Compare two fts_ranking_t instances doc_ids. */
-UNIV_INLINE
-int
-fts_ranking_doc_id_cmp(
-/*===================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: id1 */
- const void* p2); /*!< in: id2 */
-
-/******************************************************************//**
-Compare two fts_update_t instances doc_ids. */
-UNIV_INLINE
-int
-fts_update_doc_id_cmp(
-/*==================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- const void* p1, /*!< in: id1 */
- const void* p2); /*!< in: id2 */
-
-/******************************************************************//**
-Decode and return the integer that was encoded using our VLC scheme.*/
-UNIV_INLINE
-ulint
-fts_decode_vlc(
-/*===========*/
- /*!< out: value decoded */
- byte** ptr); /*!< in: ptr to decode from, this ptr is
- incremented by the number of bytes decoded */
-
-/******************************************************************//**
-Duplicate an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_string_dup(
-/*================*/
- /*!< out:
- < 0 if n1 < n2,
- 0 if n1 == n2,
- > 0 if n1 > n2 */
- fts_string_t* dst, /*!< in: dup to here */
- const fts_string_t* src, /*!< in: src string */
- mem_heap_t* heap); /*!< in: heap to use */
-
-/******************************************************************//**
-Return length of val if it were encoded using our VLC scheme. */
-UNIV_INLINE
-ulint
-fts_get_encoded_len(
-/*================*/
- /*!< out: length of value
- encoded, in bytes */
- ulint val); /*!< in: value to encode */
-
-/******************************************************************//**
-Encode an integer using our VLC scheme and return the length in bytes. */
-UNIV_INLINE
-ulint
-fts_encode_int(
-/*===========*/
- /*!< out: length of value
- encoded, in bytes */
- ulint val, /*!< in: value to encode */
- byte* buf); /*!< in: buffer, must have
- enough space */
-
-/******************************************************************//**
-Decode a UTF-8 character.
-
-http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
-
- Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte
-00000000 0xxxxxxx 0xxxxxxx
-00000yyy yyxxxxxx 110yyyyy 10xxxxxx
-zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
-000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
-
-This function decodes UTF-8 sequences up to 6 bytes (31 bits).
-
-On error *ptr will point to the first byte that was not correctly
-decoded. This will hopefully help in resyncing the input. */
-UNIV_INLINE
-ulint
-fts_utf8_decode(
-/*============*/
- /*!< out: UTF8_ERROR if *ptr
- did not point to a valid
- UTF-8 sequence, or the
- Unicode code point. */
- const byte** ptr); /*!< in/out: pointer to
- UTF-8 string. The
- pointer is advanced to
- the start of the next
- character. */
-
-/******************************************************************//**
-Lowercase an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_tolower(
-/*=============*/
- fts_string_t* str); /*!< in: string */
-
-/******************************************************************//**
-Get the selected FTS aux INDEX suffix. */
-UNIV_INLINE
-const char*
-fts_get_suffix(
-/*===========*/
- ulint selected); /*!< in: selected index */
-
-/********************************************************************
-Get the number of index selectors. */
-UNIV_INLINE
-ulint
-fts_get_n_selectors(void);
-/*=====================*/
-
-/******************************************************************//**
-Select the FTS auxiliary index for the given string.
-@return the index to use for the string */
-UNIV_INLINE
-ulint
-fts_select_index(
-/*=============*/
- const CHARSET_INFO* cs, /*!< Charset */
- const byte* str, /*!< in: word string */
- ulint len); /*!< in: string length */
-
-/********************************************************************
-Select the next FTS auxiliary index for the given character.
-@return the next index to use for character */
-UNIV_INLINE
-ulint
-fts_select_next_index(
-/*==================*/
- const CHARSET_INFO* cs, /*!< Charset */
- const byte* str, /*!< in: string */
- ulint len); /*!< in: string length */
-
-#ifndef UNIV_NONINL
-#include "fts0types.ic"
-#include "fts0vlc.ic"
-#endif
-
-#endif /* INNOBASE_FTS0TYPES_H */
diff --git a/storage/xtradb/include/fts0types.ic b/storage/xtradb/include/fts0types.ic
deleted file mode 100644
index f0dfd023a70..00000000000
--- a/storage/xtradb/include/fts0types.ic
+++ /dev/null
@@ -1,388 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0types.ic
-Full text search types.
-
-Created 2007-03-27 Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_FTS0TYPES_IC
-#define INNOBASE_FTS0TYPES_IC
-
-#include <ctype.h>
-
-#include "rem0cmp.h"
-#include "ha_prototypes.h"
-
-extern const ulint UTF8_ERROR;
-
-/* Determine if a UTF-8 continuation byte is valid. */
-#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80)
-
-/******************************************************************//**
-Duplicate an UTF-8 string.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-void
-fts_utf8_string_dup(
-/*================*/
- fts_string_t* dst, /*!< in: dup to here */
- const fts_string_t* src, /*!< in: src string */
- mem_heap_t* heap) /*!< in: heap to use */
-{
- dst->f_str = (byte*)mem_heap_alloc(heap, src->f_len + 1);
- memcpy(dst->f_str, src->f_str, src->f_len);
-
- dst->f_len = src->f_len;
- dst->f_str[src->f_len] = 0;
- dst->f_n_char = src->f_n_char;
-}
-
-/******************************************************************//**
-Compare two fts_trx_row_t doc_ids.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_trx_row_doc_id_cmp(
-/*===================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1;
- const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2;
-
- return((int)(tr1->doc_id - tr2->doc_id));
-}
-
-/******************************************************************//**
-Compare two fts_ranking_t doc_ids.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_ranking_doc_id_cmp(
-/*===================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const fts_ranking_t* rk1 = (const fts_ranking_t*) p1;
- const fts_ranking_t* rk2 = (const fts_ranking_t*) p2;
-
- return((int)(rk1->doc_id - rk2->doc_id));
-}
-
-/******************************************************************//**
-Compare two fts_update_t doc_ids.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_update_doc_id_cmp(
-/*==================*/
- const void* p1, /*!< in: id1 */
- const void* p2) /*!< in: id2 */
-{
- const fts_update_t* up1 = (const fts_update_t*) p1;
- const fts_update_t* up2 = (const fts_update_t*) p2;
-
- return((int)(up1->doc_id - up2->doc_id));
-}
-
-
-/******************************************************************//**
-Lowercase an UTF-8 string. */
-UNIV_INLINE
-void
-fts_utf8_tolower(
-/*=============*/
- fts_string_t* str) /*!< in: string */
-{
- innobase_casedn_str((char*) str->f_str);
-}
-
-/******************************************************************//**
-Compare two UTF-8 strings.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_utf8_string_cmp(
-/*================*/
- const void* p1, /*!< in: key */
- const void* p2) /*!< in: node */
-{
- const fts_string_t* s1 = (const fts_string_t*) p1;
- const fts_string_t* s2 = (const fts_string_t*) p2;
-
- return(cmp_data_data_slow_varchar(
- s1->f_str, s1->f_len, s2->f_str, s2->f_len));
-}
-
-/******************************************************************//**
-Compare two UTF-8 strings, and return match (0) if
-passed in "key" value equals or is the prefix of the "node" value.
-@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
-UNIV_INLINE
-int
-fts_utf8_string_cmp_prefix(
-/*=======================*/
- const void* p1, /*!< in: key */
- const void* p2) /*!< in: node */
-{
- int result;
- ulint len;
-
- const fts_string_t* s1 = (const fts_string_t*) p1;
- const fts_string_t* s2 = (const fts_string_t*) p2;
-
- len = ut_min(s1->f_len, s2->f_len);
-
- result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len);
-
- if (result) {
- return(result);
- }
-
- if (s1->f_len > s2->f_len) {
- return(1);
- }
-
- return(0);
-}
-
-/******************************************************************//**
-Decode a UTF-8 character.
-
-http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf:
-
- Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte
-00000000 0xxxxxxx 0xxxxxxx
-00000yyy yyxxxxxx 110yyyyy 10xxxxxx
-zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
-000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
-
-This function decodes UTF-8 sequences up to 6 bytes (31 bits).
-
-On error *ptr will point to the first byte that was not correctly
-decoded. This will hopefully help in resyncing the input.
-@return UTF8_ERROR if *ptr did not point to a valid
-UTF-8 sequence, or the Unicode code point. */
-UNIV_INLINE
-ulint
-fts_utf8_decode(
-/*============*/
- const byte** ptr) /*!< in/out: pointer to
- UTF-8 string. The
- pointer is advanced to
- the start of the next
- character. */
-{
- const byte* p = *ptr;
- ulint ch = *p++;
-#ifdef UNIV_DEBUG
- ulint min_ch;
-#endif /* UNIV_DEBUG */
-
- if (UNIV_LIKELY(ch < 0x80)) {
- /* 0xxxxxxx */
- } else if (UNIV_UNLIKELY(ch < 0xC0)) {
- /* A continuation byte cannot start a code. */
- goto err_exit;
- } else if (ch < 0xE0) {
- /* 110yyyyy 10xxxxxx */
- ch &= 0x1F;
- ut_d(min_ch = 0x80);
- goto get1;
- } else if (ch < 0xF0) {
- /* 1110zzzz 10yyyyyy 10xxxxxx */
- ch &= 0x0F;
- ut_d(min_ch = 0x800);
- goto get2;
- } else if (ch < 0xF8) {
- /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
- ch &= 0x07;
- ut_d(min_ch = 0x10000);
- goto get3;
- } else if (ch < 0xFC) {
- /* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
- ch &= 0x03;
- ut_d(min_ch = 0x200000);
- goto get4;
- } else if (ch < 0xFE) {
- /* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */
- ut_d(min_ch = 0x4000000);
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-get4:
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-get3:
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-get2:
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-get1:
- if (!fts_utf8_is_valid(*p)) {
- goto err_exit;
- }
- ch <<= 6;
- ch |= (*p++) & 0x3F;
-
- /* The following is needed in the 6-byte case
- when ulint is wider than 32 bits. */
- ch &= 0xFFFFFFFF;
-
- /* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs)
- and U+FFFE and U+FFFF cannot occur in valid UTF-8. */
-
- if ( (ch >= 0xD800 && ch <= 0xDFFF)
-#ifdef UNIV_DEBUG
- || ch < min_ch
-#endif /* UNIV_DEBUG */
- || ch == 0xFFFE || ch == 0xFFFF) {
-
- ch = UTF8_ERROR;
- }
- } else {
-err_exit:
- ch = UTF8_ERROR;
- }
-
- *ptr = p;
-
- return(ch);
-}
-
-/******************************************************************//**
-Get the first character's code position for FTS index partition */
-extern
-ulint
-innobase_strnxfrm(
-/*==============*/
- const CHARSET_INFO* cs, /*!< in: Character set */
- const uchar* p2, /*!< in: string */
- const ulint len2); /*!< in: string length */
-
-/******************************************************************//**
-Select the FTS auxiliary index for the given character.
-@return the index to use for the string */
-UNIV_INLINE
-ulint
-fts_select_index(
-/*=============*/
- const CHARSET_INFO* cs, /*!< in: Charset */
- const byte* str, /*!< in: string */
- ulint len) /*!< in: string length */
-{
- ulint selected = 0;
- ulint value = innobase_strnxfrm(cs, str, len);
-
- while (fts_index_selector[selected].value != 0) {
-
- if (fts_index_selector[selected].value == value) {
-
- return(selected);
-
- } else if (fts_index_selector[selected].value > value) {
-
- return(selected > 0 ? selected - 1 : 0);
- }
-
- ++selected;
- }
-
- ut_ad(selected > 1);
-
- return(selected - 1);
-}
-
-/******************************************************************//**
-Select the next FTS auxiliary index for the given character.
-@return the next index to use for character */
-UNIV_INLINE
-ulint
-fts_select_next_index(
-/*==================*/
- const CHARSET_INFO* cs, /*!< in: Charset */
- const byte* str, /*!< in: string */
- ulint len) /*!< in: string length */
-{
- ulint selected = 0;
- ulint value = innobase_strnxfrm(cs, str, len);
-
- while (fts_index_selector[selected].value != 0) {
-
- if (fts_index_selector[selected].value == value) {
-
- return(selected + 1);
-
- } else if (fts_index_selector[selected].value > value) {
-
- return(selected);
- }
-
- ++selected;
- }
-
- ut_ad(selected > 0);
-
- return((ulint) selected);
-}
-
-/******************************************************************//**
-Return the selected FTS aux index suffix. */
-UNIV_INLINE
-const char*
-fts_get_suffix(
-/*===========*/
- ulint selected) /*!< in: selected index */
-{
- return(fts_index_selector[selected].suffix);
-}
-
-/******************************************************************//**
-Get the number of index selectors.
-@return The number of selectors */
-UNIV_INLINE
-ulint
-fts_get_n_selectors(void)
-/*=====================*/
-{
- ulint i = 0;
-
- // FIXME: This is a hack
- while (fts_index_selector[i].value != 0) {
- ++i;
- }
-
- return(i);
-}
-
-#endif /* INNOBASE_FTS0TYPES_IC */
diff --git a/storage/xtradb/include/fts0vlc.ic b/storage/xtradb/include/fts0vlc.ic
deleted file mode 100644
index e79bcf59347..00000000000
--- a/storage/xtradb/include/fts0vlc.ic
+++ /dev/null
@@ -1,142 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fts0vlc.ic
-Full text variable length integer encoding/decoding.
-
-Created 2007-03-27 Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_FTS0VLC_IC
-#define INNOBASE_FTS0VLC_IC
-
-#include "fts0types.h"
-
-/******************************************************************//**
-Return length of val if it were encoded using our VLC scheme.
-FIXME: We will need to be able encode 8 bytes value
-@return length of value encoded, in bytes */
-UNIV_INLINE
-ulint
-fts_get_encoded_len(
-/*================*/
- ulint val) /* in: value to encode */
-{
- if (val <= 127) {
- return(1);
- } else if (val <= 16383) {
- return(2);
- } else if (val <= 2097151) {
- return(3);
- } else if (val <= 268435455) {
- return(4);
- } else {
- /* Possibly we should care that on 64-bit machines ulint can
- contain values that we can't encode in 5 bytes, but
- fts_encode_int doesn't handle them either so it doesn't much
- matter. */
-
- return(5);
- }
-}
-
-/******************************************************************//**
-Encode an integer using our VLC scheme and return the length in bytes.
-@return length of value encoded, in bytes */
-UNIV_INLINE
-ulint
-fts_encode_int(
-/*===========*/
- ulint val, /* in: value to encode */
- byte* buf) /* in: buffer, must have enough space */
-{
- ulint len;
-
- if (val <= 127) {
- *buf = (byte) val;
-
- len = 1;
- } else if (val <= 16383) {
- *buf++ = (byte)(val >> 7);
- *buf = (byte)(val & 0x7F);
-
- len = 2;
- } else if (val <= 2097151) {
- *buf++ = (byte)(val >> 14);
- *buf++ = (byte)((val >> 7) & 0x7F);
- *buf = (byte)(val & 0x7F);
-
- len = 3;
- } else if (val <= 268435455) {
- *buf++ = (byte)(val >> 21);
- *buf++ = (byte)((val >> 14) & 0x7F);
- *buf++ = (byte)((val >> 7) & 0x7F);
- *buf = (byte)(val & 0x7F);
-
- len = 4;
- } else {
- /* Best to keep the limitations of the 32/64 bit versions
- identical, at least for the time being. */
- ut_ad(val <= 4294967295u);
-
- *buf++ = (byte)(val >> 28);
- *buf++ = (byte)((val >> 21) & 0x7F);
- *buf++ = (byte)((val >> 14) & 0x7F);
- *buf++ = (byte)((val >> 7) & 0x7F);
- *buf = (byte)(val & 0x7F);
-
- len = 5;
- }
-
- /* High-bit on means "last byte in the encoded integer". */
- *buf |= 0x80;
-
- return(len);
-}
-
-/******************************************************************//**
-Decode and return the integer that was encoded using our VLC scheme.
-@return value decoded */
-UNIV_INLINE
-ulint
-fts_decode_vlc(
-/*===========*/
- byte** ptr) /* in: ptr to decode from, this ptr is
- incremented by the number of bytes decoded */
-{
- ulint val = 0;
-
- for (;;) {
- byte b = **ptr;
-
- ++*ptr;
- val |= (b & 0x7F);
-
- /* High-bit on means "last byte in the encoded integer". */
- if (b & 0x80) {
- break;
- } else {
- val <<= 7;
- }
- }
-
- return(val);
-}
-
-#endif
diff --git a/storage/xtradb/include/fut0fut.h b/storage/xtradb/include/fut0fut.h
deleted file mode 100644
index 851cdb44cdf..00000000000
--- a/storage/xtradb/include/fut0fut.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fut0fut.h
-File-based utilities
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-
-#ifndef fut0fut_h
-#define fut0fut_h
-
-#include "univ.i"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
-bufferfixed and latched */
-UNIV_INLINE
-byte*
-fut_get_ptr(
-/*========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t addr, /*!< in: file address */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr); /*!< in: mtr handle */
-
-#ifndef UNIV_NONINL
-#include "fut0fut.ic"
-#endif
-
-#endif
-
diff --git a/storage/xtradb/include/fut0fut.ic b/storage/xtradb/include/fut0fut.ic
deleted file mode 100644
index 15c964df6c7..00000000000
--- a/storage/xtradb/include/fut0fut.ic
+++ /dev/null
@@ -1,60 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fut0fut.ic
-File-based utilities
-
-Created 12/13/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "srv0srv.h"
-#include "sync0rw.h"
-#include "buf0buf.h"
-
-/********************************************************************//**
-Gets a pointer to a file address and latches the page.
-@return pointer to a byte in a frame; the file page in the frame is
-bufferfixed and latched */
-UNIV_INLINE
-byte*
-fut_get_ptr(
-/*========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- fil_addr_t addr, /*!< in: file address */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr handle */
-{
- buf_block_t* block;
- byte* ptr;
-
- ut_ad(addr.boffset < UNIV_PAGE_SIZE);
- ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
-
- block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr);
-
- SRV_CORRUPT_TABLE_CHECK(block, return(0););
-
- ptr = buf_block_get_frame(block) + addr.boffset;
-
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- return(ptr);
-}
diff --git a/storage/xtradb/include/fut0lst.h b/storage/xtradb/include/fut0lst.h
deleted file mode 100644
index 8554cc60cdd..00000000000
--- a/storage/xtradb/include/fut0lst.h
+++ /dev/null
@@ -1,192 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fut0lst.h
-File-based list utilities
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef fut0lst_h
-#define fut0lst_h
-
-#include "univ.i"
-
-#include "fil0fil.h"
-#include "mtr0mtr.h"
-
-
-/* The C 'types' of base node and list node: these should be used to
-write self-documenting code. Of course, the sizeof macro cannot be
-applied to these types! */
-
-typedef byte flst_base_node_t;
-typedef byte flst_node_t;
-
-/* The physical size of a list base node in bytes */
-#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE)
-
-/* The physical size of a list node in bytes */
-#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE)
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Initializes a list base node. */
-UNIV_INLINE
-void
-flst_init(
-/*======*/
- flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Adds a node as the last node in a list. */
-UNIV_INTERN
-void
-flst_add_last(
-/*==========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Adds a node as the first node in a list. */
-UNIV_INTERN
-void
-flst_add_first(
-/*===========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node, /*!< in: node to add */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Inserts a node after another in a list. */
-UNIV_INTERN
-void
-flst_insert_after(
-/*==============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node1, /*!< in: node to insert after */
- flst_node_t* node2, /*!< in: node to add */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Inserts a node before another in a list. */
-UNIV_INTERN
-void
-flst_insert_before(
-/*===============*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to insert */
- flst_node_t* node3, /*!< in: node to insert before */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Removes a node. */
-UNIV_INTERN
-void
-flst_remove(
-/*========*/
- flst_base_node_t* base, /*!< in: pointer to base node of list */
- flst_node_t* node2, /*!< in: node to remove */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list length.
-@return length */
-UNIV_INLINE
-ulint
-flst_get_len(
-/*=========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list first node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_first(
-/*===========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list last node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_last(
-/*==========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list next node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_next_addr(
-/*===============*/
- const flst_node_t* node, /*!< in: pointer to node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Gets list prev node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_prev_addr(
-/*===============*/
- const flst_node_t* node, /*!< in: pointer to node */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Writes a file address. */
-UNIV_INLINE
-void
-flst_write_addr(
-/*============*/
- fil_faddr_t* faddr, /*!< in: pointer to file faddress */
- fil_addr_t addr, /*!< in: file address */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Reads a file address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_read_addr(
-/*===========*/
- const fil_faddr_t* faddr, /*!< in: pointer to file faddress */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************************//**
-Validates a file-based list.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-flst_validate(
-/*==========*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr1); /*!< in: mtr */
-/********************************************************************//**
-Prints info of a file-based list. */
-UNIV_INTERN
-void
-flst_print(
-/*=======*/
- const flst_base_node_t* base, /*!< in: pointer to base node of list */
- mtr_t* mtr); /*!< in: mtr */
-
-
-#ifndef UNIV_NONINL
-#include "fut0lst.ic"
-#endif
-
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/xtradb/include/fut0lst.ic b/storage/xtradb/include/fut0lst.ic
deleted file mode 100644
index d18cf21378f..00000000000
--- a/storage/xtradb/include/fut0lst.ic
+++ /dev/null
@@ -1,167 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/fut0lst.ic
-File-based list utilities
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "fut0fut.h"
-#include "mtr0log.h"
-#include "buf0buf.h"
-
-/* We define the field offsets of a node for the list */
-#define FLST_PREV 0 /* 6-byte address of the previous list element;
- the page part of address is FIL_NULL, if no
- previous element */
-#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next
- list element; the page part of address
- is FIL_NULL, if no next element */
-
-/* We define the field offsets of a base node for the list */
-#define FLST_LEN 0 /* 32-bit list length field */
-#define FLST_FIRST 4 /* 6-byte address of the first element
- of the list; undefined if empty list */
-#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the
- last element of the list; undefined
- if empty list */
-
-/********************************************************************//**
-Writes a file address. */
-UNIV_INLINE
-void
-flst_write_addr(
-/*============*/
- fil_faddr_t* faddr, /*!< in: pointer to file faddress */
- fil_addr_t addr, /*!< in: file address */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(faddr && mtr);
- ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX));
- ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
- ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
-
- mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
- mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
- MLOG_2BYTES, mtr);
-}
-
-/********************************************************************//**
-Reads a file address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_read_addr(
-/*===========*/
- const fil_faddr_t* faddr, /*!< in: pointer to file faddress */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- fil_addr_t addr;
-
- ut_ad(faddr && mtr);
-
- addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr);
- addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
- mtr);
- ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
- ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
- return(addr);
-}
-
-/********************************************************************//**
-Initializes a list base node. */
-UNIV_INLINE
-void
-flst_init(
-/*======*/
- flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX));
-
- mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr);
- flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr);
- flst_write_addr(base + FLST_LAST, fil_addr_null, mtr);
-}
-
-/********************************************************************//**
-Gets list length.
-@return length */
-UNIV_INLINE
-ulint
-flst_get_len(
-/*=========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr));
-}
-
-/********************************************************************//**
-Gets list first node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_first(
-/*===========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(flst_read_addr(base + FLST_FIRST, mtr));
-}
-
-/********************************************************************//**
-Gets list last node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_last(
-/*==========*/
- const flst_base_node_t* base, /*!< in: pointer to base node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(flst_read_addr(base + FLST_LAST, mtr));
-}
-
-/********************************************************************//**
-Gets list next node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_next_addr(
-/*===============*/
- const flst_node_t* node, /*!< in: pointer to node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(flst_read_addr(node + FLST_NEXT, mtr));
-}
-
-/********************************************************************//**
-Gets list prev node address.
-@return file address */
-UNIV_INLINE
-fil_addr_t
-flst_get_prev_addr(
-/*===============*/
- const flst_node_t* node, /*!< in: pointer to node */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- return(flst_read_addr(node + FLST_PREV, mtr));
-}
diff --git a/storage/xtradb/include/ha0ha.h b/storage/xtradb/include/ha0ha.h
deleted file mode 100644
index 58eb581e76a..00000000000
--- a/storage/xtradb/include/ha0ha.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ha0ha.h
-The hash table with external chains
-
-Created 8/18/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef ha0ha_h
-#define ha0ha_h
-
-#include "univ.i"
-
-#include "hash0hash.h"
-#include "page0types.h"
-#include "buf0types.h"
-#include "rem0types.h"
-
-/*************************************************************//**
-Looks for an element in a hash table.
-@return pointer to the data of the first hash table node in chain
-having the fold number, NULL if not found */
-UNIV_INLINE
-const rec_t*
-ha_search_and_get_data(
-/*===================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: folded value of the searched data */
-/*********************************************************//**
-Looks for an element when we know the pointer to the data and updates
-the pointer to data if found.
-@return TRUE if found */
-UNIV_INTERN
-ibool
-ha_search_and_update_if_found_func(
-/*===============================*/
- hash_table_t* table, /*!< in/out: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- const rec_t* data, /*!< in: pointer to the data */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* new_block,/*!< in: block containing new_data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- const rec_t* new_data);/*!< in: new pointer to the data */
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/** Looks for an element when we know the pointer to the data and
-updates the pointer to data if found.
-@param table in/out: hash table
-@param fold in: folded value of the searched data
-@param data in: pointer to the data
-@param new_block in: block containing new_data
-@param new_data in: new pointer to the data */
-# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
- ha_search_and_update_if_found_func(table,fold,data,new_block,new_data)
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-/** Looks for an element when we know the pointer to the data and
-updates the pointer to data if found.
-@param table in/out: hash table
-@param fold in: folded value of the searched data
-@param data in: pointer to the data
-@param new_block ignored: block containing new_data
-@param new_data in: new pointer to the data */
-# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \
- ha_search_and_update_if_found_func(table,fold,data,new_data)
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-/*************************************************************//**
-Creates a hash table with at least n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
-hash_table_t*
-ha_create_func(
-/*===========*/
- ulint n, /*!< in: number of array cells */
-#ifdef UNIV_SYNC_DEBUG
- ulint mutex_level, /*!< in: level of the mutexes in the latching
- order: this is used in the debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_mutexes, /*!< in: number of mutexes to protect the
- hash table: must be a power of 2, or 0 */
- ulint type); /*!< in: type of datastructure for which
- the memory heap is going to be used e.g.:
- MEM_HEAP_FOR_BTR_SEARCH or
- MEM_HEAP_FOR_PAGE_HASH */
-#ifdef UNIV_SYNC_DEBUG
-/** Creates a hash table.
-@return own: created table
-@param n_c in: number of array cells. The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level in: level of the mutexes in the latching order
-@param n_m in: number of mutexes to protect the hash table;
- must be a power of 2, or 0 */
-# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type)
-#else /* UNIV_SYNC_DEBUG */
-/** Creates a hash table.
-@return own: created table
-@param n_c in: number of array cells. The actual number of cells is
-chosen to be a slightly bigger prime number.
-@param level in: level of the mutexes in the latching order
-@param n_m in: number of mutexes to protect the hash table;
- must be a power of 2, or 0 */
-# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type)
-#endif /* UNIV_SYNC_DEBUG */
-
-/*************************************************************//**
-Empties a hash table and frees the memory heaps. */
-UNIV_INTERN
-void
-ha_clear(
-/*=====*/
- hash_table_t* table); /*!< in, own: hash table */
-
-/*************************************************************//**
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated */
-UNIV_INTERN
-ibool
-ha_insert_for_fold_func(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of data; if a node with
- the same fold value already exists, it is
- updated to point to the same data, and no new
- node is created! */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* block, /*!< in: buffer block containing the data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- const rec_t* data); /*!< in: data, must not be NULL */
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/**
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated
-@param t in: hash table
-@param f in: folded value of data
-@param b in: buffer block containing the data
-@param d in: data, must not be NULL */
-# define ha_insert_for_fold(t,f,b,d) do { \
- ha_insert_for_fold_func(t,f,b,d); \
- MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \
-} while(0)
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-/**
-Inserts an entry into a hash table. If an entry with the same fold number
-is found, its node is updated to point to the new data, and no new node
-is inserted.
-@return TRUE if succeed, FALSE if no more memory could be allocated
-@param t in: hash table
-@param f in: folded value of data
-@param b ignored: buffer block containing the data
-@param d in: data, must not be NULL */
-# define ha_insert_for_fold(t,f,b,d) do { \
- ha_insert_for_fold_func(t,f,d); \
- MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \
-} while (0)
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
-/*********************************************************//**
-Looks for an element when we know the pointer to the data and deletes
-it from the hash table if found.
-@return TRUE if found */
-UNIV_INLINE
-ibool
-ha_search_and_delete_if_found(
-/*==========================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- const rec_t* data); /*!< in: pointer to the data */
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Removes from the chain determined by fold all nodes whose data pointer
-points to the page given. */
-UNIV_INTERN
-void
-ha_remove_all_nodes_to_page(
-/*========================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: fold value */
- const page_t* page); /*!< in: buffer page */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/*************************************************************//**
-Validates a given range of the cells in hash table.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-ha_validate(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint start_index, /*!< in: start index */
- ulint end_index); /*!< in: end index */
-#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
-/*************************************************************//**
-Prints info of a hash table. */
-UNIV_INTERN
-void
-ha_print_info(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- hash_table_t* table); /*!< in: hash table */
-#endif /* !UNIV_HOTBACKUP */
-
-/** The hash table external chain node */
-struct ha_node_t {
- ha_node_t* next; /*!< next chain node or NULL if none */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* block; /*!< buffer block containing the data, or NULL */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- const rec_t* data; /*!< pointer to the data */
- ulint fold; /*!< fold value for the data */
-};
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Assert that the synchronization object in a hash operation involving
-possible change in the hash table is held.
-Note that in case of mutexes we assert that mutex is owned while in case
-of rw-locks we assert that it is held in exclusive mode. */
-UNIV_INLINE
-void
-hash_assert_can_modify(
-/*===================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold value */
-/********************************************************************//**
-Assert that the synchronization object in a hash search operation is held.
-Note that in case of mutexes we assert that mutex is owned while in case
-of rw-locks we assert that it is held either in x-mode or s-mode. */
-UNIV_INLINE
-void
-hash_assert_can_search(
-/*===================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold value */
-#else /* UNIV_DEBUG */
-#define hash_assert_can_modify(t, f)
-#define hash_assert_can_search(t, f)
-#endif /* UNIV_DEBUG */
-
-
-#ifndef UNIV_NONINL
-#include "ha0ha.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/ha0ha.ic b/storage/xtradb/include/ha0ha.ic
deleted file mode 100644
index 9d0e396e200..00000000000
--- a/storage/xtradb/include/ha0ha.ic
+++ /dev/null
@@ -1,246 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/ha0ha.ic
-The hash table with external chains
-
-Created 8/18/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0rnd.h"
-#include "mem0mem.h"
-#include "btr0types.h"
-
-/***********************************************************//**
-Deletes a hash node. */
-UNIV_INTERN
-void
-ha_delete_hash_node(
-/*================*/
- hash_table_t* table, /*!< in: hash table */
- ha_node_t* del_node); /*!< in: node to be deleted */
-
-/******************************************************************//**
-Gets a hash node data.
-@return pointer to the data */
-UNIV_INLINE
-const rec_t*
-ha_node_get_data(
-/*=============*/
- const ha_node_t* node) /*!< in: hash chain node */
-{
- return(node->data);
-}
-
-/******************************************************************//**
-Sets hash node data. */
-UNIV_INLINE
-void
-ha_node_set_data_func(
-/*==================*/
- ha_node_t* node, /*!< in: hash chain node */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- buf_block_t* block, /*!< in: buffer block containing the data */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- const rec_t* data) /*!< in: pointer to the data */
-{
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
- node->block = block;
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- node->data = data;
-}
-
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-/** Sets hash node data.
-@param n in: hash chain node
-@param b in: buffer block containing the data
-@param d in: pointer to the data */
-# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d)
-#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-/** Sets hash node data.
-@param n in: hash chain node
-@param b in: buffer block containing the data
-@param d in: pointer to the data */
-# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d)
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
-
-/******************************************************************//**
-Gets the next node in a hash chain.
-@return next node, NULL if none */
-UNIV_INLINE
-ha_node_t*
-ha_chain_get_next(
-/*==============*/
- ha_node_t* node) /*!< in: hash chain node */
-{
- return(node->next);
-}
-
-/******************************************************************//**
-Gets the first node in a hash chain.
-@return first node, NULL if none */
-UNIV_INLINE
-ha_node_t*
-ha_chain_get_first(
-/*===============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold value determining the chain */
-{
- return((ha_node_t*)
- hash_get_nth_cell(table, hash_calc_hash(fold, table))->node);
-}
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-Assert that the synchronization object in a hash operation involving
-possible change in the hash table is held.
-Note that in case of mutexes we assert that mutex is owned while in case
-of rw-locks we assert that it is held in exclusive mode. */
-UNIV_INLINE
-void
-hash_assert_can_modify(
-/*===================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold value */
-{
- if (table->type == HASH_TABLE_SYNC_MUTEX) {
- ut_ad(mutex_own(hash_get_mutex(table, fold)));
- } else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
-# ifdef UNIV_SYNC_DEBUG
- prio_rw_lock_t* lock = hash_get_lock(table, fold);
- ut_ad(rw_lock_own(lock, RW_LOCK_EX));
-# endif
- } else {
- ut_ad(table->type == HASH_TABLE_SYNC_NONE);
- }
-}
-
-/********************************************************************//**
-Assert that the synchronization object in a hash search operation is held.
-Note that in case of mutexes we assert that mutex is owned while in case
-of rw-locks we assert that it is held either in x-mode or s-mode. */
-UNIV_INLINE
-void
-hash_assert_can_search(
-/*===================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold value */
-{
- if (table->type == HASH_TABLE_SYNC_MUTEX) {
- ut_ad(mutex_own(hash_get_mutex(table, fold)));
- } else if (table->type == HASH_TABLE_SYNC_RW_LOCK) {
-# ifdef UNIV_SYNC_DEBUG
- prio_rw_lock_t* lock = hash_get_lock(table, fold);
- ut_ad(rw_lock_own(lock, RW_LOCK_EX)
- || rw_lock_own(lock, RW_LOCK_SHARED));
-# endif
- } else {
- ut_ad(table->type == HASH_TABLE_SYNC_NONE);
- }
-}
-#endif /* UNIV_DEBUG */
-
-/*************************************************************//**
-Looks for an element in a hash table.
-@return pointer to the data of the first hash table node in chain
-having the fold number, NULL if not found */
-UNIV_INLINE
-const rec_t*
-ha_search_and_get_data(
-/*===================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: folded value of the searched data */
-{
- ha_node_t* node;
-
- hash_assert_can_search(table, fold);
- ut_ad(btr_search_enabled);
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (node->fold == fold) {
-
- return(node->data);
- }
-
- node = ha_chain_get_next(node);
- }
-
- return(NULL);
-}
-
-/*********************************************************//**
-Looks for an element when we know the pointer to the data.
-@return pointer to the hash table node, NULL if not found in the table */
-UNIV_INLINE
-ha_node_t*
-ha_search_with_data(
-/*================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- const rec_t* data) /*!< in: pointer to the data */
-{
- ha_node_t* node;
-
- hash_assert_can_search(table, fold);
-
- ut_ad(btr_search_enabled);
-
- node = ha_chain_get_first(table, fold);
-
- while (node) {
- if (node->data == data) {
-
- return(node);
- }
-
- node = ha_chain_get_next(node);
- }
-
- return(NULL);
-}
-
-/*********************************************************//**
-Looks for an element when we know the pointer to the data, and deletes
-it from the hash table, if found.
-@return TRUE if found */
-UNIV_INLINE
-ibool
-ha_search_and_delete_if_found(
-/*==========================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold, /*!< in: folded value of the searched data */
- const rec_t* data) /*!< in: pointer to the data */
-{
- ha_node_t* node;
-
- hash_assert_can_modify(table, fold);
- ut_ad(btr_search_enabled);
-
- node = ha_search_with_data(table, fold, data);
-
- if (node) {
- ha_delete_hash_node(table, node);
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/xtradb/include/ha0storage.h b/storage/xtradb/include/ha0storage.h
deleted file mode 100644
index 0073930b502..00000000000
--- a/storage/xtradb/include/ha0storage.h
+++ /dev/null
@@ -1,140 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ha0storage.h
-Hash storage.
-Provides a data structure that stores chunks of data in
-its own storage, avoiding duplicates.
-
-Created September 22, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef ha0storage_h
-#define ha0storage_h
-
-#include "univ.i"
-
-/** This value is used by default by ha_storage_create(). More memory
-is allocated later when/if it is needed. */
-#define HA_STORAGE_DEFAULT_HEAP_BYTES 1024
-
-/** This value is used by default by ha_storage_create(). It is a
-constant per ha_storage's lifetime. */
-#define HA_STORAGE_DEFAULT_HASH_CELLS 4096
-
-/** Hash storage */
-struct ha_storage_t;
-
-/*******************************************************************//**
-Creates a hash storage. If any of the parameters is 0, then a default
-value is used.
-@return own: hash storage */
-UNIV_INLINE
-ha_storage_t*
-ha_storage_create(
-/*==============*/
- ulint initial_heap_bytes, /*!< in: initial heap's size */
- ulint initial_hash_cells); /*!< in: initial number of cells
- in the hash table */
-
-/*******************************************************************//**
-Copies data into the storage and returns a pointer to the copy. If the
-same data chunk is already present, then pointer to it is returned.
-Data chunks are considered to be equal if len1 == len2 and
-memcmp(data1, data2, len1) == 0. If "data" is not present (and thus
-data_len bytes need to be allocated) and the size of storage is going to
-become more than "memlim" then "data" is not added and NULL is returned.
-To disable this behavior "memlim" can be set to 0, which stands for
-"no limit".
-@return pointer to the copy */
-UNIV_INTERN
-const void*
-ha_storage_put_memlim(
-/*==================*/
- ha_storage_t* storage, /*!< in/out: hash storage */
- const void* data, /*!< in: data to store */
- ulint data_len, /*!< in: data length */
- ulint memlim); /*!< in: memory limit to obey */
-
-/*******************************************************************//**
-Same as ha_storage_put_memlim() but without memory limit.
-@param storage in/out: hash storage
-@param data in: data to store
-@param data_len in: data length
-@return pointer to the copy of the string */
-#define ha_storage_put(storage, data, data_len) \
- ha_storage_put_memlim((storage), (data), (data_len), 0)
-
-/*******************************************************************//**
-Copies string into the storage and returns a pointer to the copy. If the
-same string is already present, then pointer to it is returned.
-Strings are considered to be equal if strcmp(str1, str2) == 0.
-@param storage in/out: hash storage
-@param str in: string to put
-@return pointer to the copy of the string */
-#define ha_storage_put_str(storage, str) \
- ((const char*) ha_storage_put((storage), (str), strlen(str) + 1))
-
-/*******************************************************************//**
-Copies string into the storage and returns a pointer to the copy obeying
-a memory limit.
-If the same string is already present, then pointer to it is returned.
-Strings are considered to be equal if strcmp(str1, str2) == 0.
-@param storage in/out: hash storage
-@param str in: string to put
-@param memlim in: memory limit to obey
-@return pointer to the copy of the string */
-#define ha_storage_put_str_memlim(storage, str, memlim) \
- ((const char*) ha_storage_put_memlim((storage), (str), \
- strlen(str) + 1, (memlim)))
-
-/*******************************************************************//**
-Empties a hash storage, freeing memory occupied by data chunks.
-This invalidates any pointers previously returned by ha_storage_put().
-The hash storage is not invalidated itself and can be used again. */
-UNIV_INLINE
-void
-ha_storage_empty(
-/*=============*/
- ha_storage_t** storage); /*!< in/out: hash storage */
-
-/*******************************************************************//**
-Frees a hash storage and everything it contains, it cannot be used after
-this call.
-This invalidates any pointers previously returned by ha_storage_put(). */
-UNIV_INLINE
-void
-ha_storage_free(
-/*============*/
- ha_storage_t* storage); /*!< in, own: hash storage */
-
-/*******************************************************************//**
-Gets the size of the memory used by a storage.
-@return bytes used */
-UNIV_INLINE
-ulint
-ha_storage_get_size(
-/*================*/
- const ha_storage_t* storage); /*!< in: hash storage */
-
-#ifndef UNIV_NONINL
-#include "ha0storage.ic"
-#endif
-
-#endif /* ha0storage_h */
diff --git a/storage/xtradb/include/ha0storage.ic b/storage/xtradb/include/ha0storage.ic
deleted file mode 100644
index 7150ca045ec..00000000000
--- a/storage/xtradb/include/ha0storage.ic
+++ /dev/null
@@ -1,146 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ha0storage.ic
-Hash storage.
-Provides a data structure that stores chunks of data in
-its own storage, avoiding duplicates.
-
-Created September 24, 2007 Vasil Dimov
-*******************************************************/
-
-#include "univ.i"
-#include "ha0storage.h"
-#include "hash0hash.h"
-#include "mem0mem.h"
-
-/** Hash storage for strings */
-struct ha_storage_t {
- mem_heap_t* heap; /*!< memory heap from which memory is
- allocated */
- hash_table_t* hash; /*!< hash table used to avoid
- duplicates */
-};
-
-/** Objects of this type are stored in ha_storage_t */
-struct ha_storage_node_t {
- ulint data_len;/*!< length of the data */
- const void* data; /*!< pointer to data */
- ha_storage_node_t* next; /*!< next node in hash chain */
-};
-
-/*******************************************************************//**
-Creates a hash storage. If any of the parameters is 0, then a default
-value is used.
-@return own: hash storage */
-UNIV_INLINE
-ha_storage_t*
-ha_storage_create(
-/*==============*/
- ulint initial_heap_bytes, /*!< in: initial heap's size */
- ulint initial_hash_cells) /*!< in: initial number of cells
- in the hash table */
-{
- ha_storage_t* storage;
- mem_heap_t* heap;
-
- if (initial_heap_bytes == 0) {
-
- initial_heap_bytes = HA_STORAGE_DEFAULT_HEAP_BYTES;
- }
-
- if (initial_hash_cells == 0) {
-
- initial_hash_cells = HA_STORAGE_DEFAULT_HASH_CELLS;
- }
-
- /* we put "storage" within "storage->heap" */
-
- heap = mem_heap_create(sizeof(ha_storage_t)
- + initial_heap_bytes);
-
- storage = (ha_storage_t*) mem_heap_alloc(heap,
- sizeof(ha_storage_t));
-
- storage->heap = heap;
- storage->hash = hash_create(initial_hash_cells);
-
- return(storage);
-}
-
-/*******************************************************************//**
-Empties a hash storage, freeing memory occupied by data chunks.
-This invalidates any pointers previously returned by ha_storage_put().
-The hash storage is not invalidated itself and can be used again. */
-UNIV_INLINE
-void
-ha_storage_empty(
-/*=============*/
- ha_storage_t** storage) /*!< in/out: hash storage */
-{
- ha_storage_t temp_storage;
-
- temp_storage.heap = (*storage)->heap;
- temp_storage.hash = (*storage)->hash;
-
- hash_table_clear(temp_storage.hash);
- mem_heap_empty(temp_storage.heap);
-
- *storage = (ha_storage_t*) mem_heap_alloc(temp_storage.heap,
- sizeof(ha_storage_t));
-
- (*storage)->heap = temp_storage.heap;
- (*storage)->hash = temp_storage.hash;
-}
-
-/*******************************************************************//**
-Frees a hash storage and everything it contains, it cannot be used after
-this call.
-This invalidates any pointers previously returned by ha_storage_put(). */
-UNIV_INLINE
-void
-ha_storage_free(
-/*============*/
- ha_storage_t* storage) /*!< in, own: hash storage */
-{
- /* order is important because the pointer storage->hash is
- within the heap */
- hash_table_free(storage->hash);
- mem_heap_free(storage->heap);
-}
-
-/*******************************************************************//**
-Gets the size of the memory used by a storage.
-@return bytes used */
-UNIV_INLINE
-ulint
-ha_storage_get_size(
-/*================*/
- const ha_storage_t* storage) /*!< in: hash storage */
-{
- ulint ret;
-
- ret = mem_heap_get_size(storage->heap);
-
- /* this assumes hash->heap and hash->heaps are NULL */
- ret += sizeof(hash_table_t);
- ret += sizeof(hash_cell_t) * hash_get_n_cells(storage->hash);
-
- return(ret);
-}
diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h
deleted file mode 100644
index b053be9e61d..00000000000
--- a/storage/xtradb/include/ha_prototypes.h
+++ /dev/null
@@ -1,692 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ha_prototypes.h
-Prototypes for global functions in ha_innodb.cc that are called by
-InnoDB C code
-
-Created 5/11/2006 Osku Salerma
-************************************************************************/
-
-#ifndef HA_INNODB_PROTOTYPES_H
-#define HA_INNODB_PROTOTYPES_H
-
-#include "my_dbug.h"
-#include "my_compare.h"
-#include "my_sys.h"
-#include "m_string.h"
-#include "my_base.h"
-#include "dur_prop.h"
-
-#ifndef UNIV_INNOCHECKSUM
-#include "mysqld_error.h"
-#include "debug_sync.h"
-#include "trx0types.h"
-#endif
-
-#include "m_ctype.h" /* CHARSET_INFO */
-
-// Forward declarations
-class Field;
-struct fts_string_t;
-
-/*********************************************************************//**
-Wrapper around MySQL's copy_and_convert function.
-@return number of bytes copied to 'to' */
-UNIV_INTERN
-ulint
-innobase_convert_string(
-/*====================*/
- void* to, /*!< out: converted string */
- ulint to_length, /*!< in: number of bytes reserved
- for the converted string */
- CHARSET_INFO* to_cs, /*!< in: character set to convert to */
- const void* from, /*!< in: string to convert */
- ulint from_length, /*!< in: number of bytes to convert */
- CHARSET_INFO* from_cs, /*!< in: character set to convert
- from */
- uint* errors); /*!< out: number of errors encountered
- during the conversion */
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) that is of
-type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes
-the result to "buf". The result is converted to "system_charset_info".
-Not more than "buf_size" bytes are written to "buf".
-The result is always NUL-terminated (provided buf_size > 0) and the
-number of bytes that were written to "buf" is returned (including the
-terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
-ulint
-innobase_raw_format(
-/*================*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- ulint charset_coll, /*!< in: charset collation */
- char* buf, /*!< out: output buffer */
- ulint buf_size); /*!< in: output buffer size
- in bytes */
-
-#ifndef UNIV_INNOCHECKSUM
-
-/*****************************************************************//**
-Invalidates the MySQL query cache for the table. */
-UNIV_INTERN
-void
-innobase_invalidate_query_cache(
-/*============================*/
- trx_t* trx, /*!< in: transaction which
- modifies the table */
- const char* full_name, /*!< in: concatenation of
- database name, null char NUL,
- table name, null char NUL;
- NOTE that in Windows this is
- always in LOWER CASE! */
- ulint full_name_len); /*!< in: full name length where
- also the null chars count */
-
-#endif /* #ifndef UNIV_INNOCHECKSUM */
-
-/*****************************************************************//**
-Convert a table or index name to the MySQL system_charset_info (UTF-8)
-and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
-char*
-innobase_convert_name(
-/*==================*/
- char* buf, /*!< out: buffer for converted identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* id, /*!< in: identifier to convert */
- ulint idlen, /*!< in: length of id, in bytes */
- THD* thd, /*!< in: MySQL connection thread, or NULL */
- ibool table_id);/*!< in: TRUE=id is a table or database name;
- FALSE=id is an index name */
-
-/******************************************************************//**
-Returns true if the thread is the replication thread on the slave
-server. Used in srv_conc_enter_innodb() to determine if the thread
-should be allowed to enter InnoDB - the replication thread is treated
-differently than other threads. Also used in
-srv_conc_force_exit_innodb().
-@return true if thd is the replication thread */
-UNIV_INTERN
-ibool
-thd_is_replication_slave_thread(
-/*============================*/
- THD* thd); /*!< in: thread handle */
-
-/******************************************************************//**
-Gets information on the durability property requested by thread.
-Used when writing either a prepare or commit record to the log
-buffer.
-@return the durability property. */
-UNIV_INTERN
-enum durability_properties
-thd_requested_durability(
-/*=====================*/
- const THD* thd) /*!< in: thread handle */
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************//**
-Returns true if the transaction this thread is processing has edited
-non-transactional tables. Used by the deadlock detector when deciding
-which transaction to rollback in case of a deadlock - we try to avoid
-rolling back transactions that have edited non-transactional tables.
-@return true if non-transactional tables have been edited */
-UNIV_INTERN
-ibool
-thd_has_edited_nontrans_tables(
-/*===========================*/
- THD* thd); /*!< in: thread handle */
-
-/**
-Get high resolution timestamp for the current query start time.
-
-@retval timestamp in microseconds precision
-*/
-unsigned long long thd_query_start_micro(const MYSQL_THD thd);
-
-/*************************************************************//**
-Prints info of a THD object (== user session thread) to the given file. */
-UNIV_INTERN
-void
-innobase_mysql_print_thd(
-/*=====================*/
- FILE* f, /*!< in: output stream */
- THD* thd, /*!< in: pointer to a MySQL THD object */
- uint max_query_len); /*!< in: max query length to print, or 0 to
- use the default max length */
-
-/*****************************************************************//**
-Log code calls this whenever log has been written and/or flushed up
-to a new position. We use this to notify upper layer of a new commit
-checkpoint when necessary.*/
-UNIV_INTERN
-void
-innobase_mysql_log_notify(
-/*===============*/
- ib_uint64_t write_lsn, /*!< in: LSN written to log file */
- ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
-
-/*************************************************************//**
-InnoDB uses this function to compare two data fields for which the data type
-is such that we must use MySQL code to compare them.
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-UNIV_INTERN
-int
-innobase_mysql_cmp(
-/*===============*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**************************************************************//**
-Converts a MySQL type to an InnoDB type. Note that this function returns
-the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
-VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'.
-@return DATA_BINARY, DATA_VARCHAR, ... */
-UNIV_INTERN
-ulint
-get_innobase_type_from_mysql_type(
-/*==============================*/
- ulint* unsigned_flag, /*!< out: DATA_UNSIGNED if an
- 'unsigned type';
- at least ENUM and SET,
- and unsigned integer
- types are 'unsigned types' */
- const void* field) /*!< in: MySQL Field */
- MY_ATTRIBUTE((nonnull));
-
-/******************************************************************//**
-Get the variable length bounds of the given character set. */
-UNIV_INTERN
-void
-innobase_get_cset_width(
-/*====================*/
- ulint cset, /*!< in: MySQL charset-collation code */
- ulint* mbminlen, /*!< out: minimum length of a char (in bytes) */
- ulint* mbmaxlen); /*!< out: maximum length of a char (in bytes) */
-
-/******************************************************************//**
-Compares NUL-terminated UTF-8 strings case insensitively.
-@return 0 if a=b, <0 if a<b, >1 if a>b */
-UNIV_INTERN
-int
-innobase_strcasecmp(
-/*================*/
- const char* a, /*!< in: first string to compare */
- const char* b); /*!< in: second string to compare */
-
-/******************************************************************//**
-Compares NUL-terminated UTF-8 strings case insensitively. The
-second string contains wildcards.
-@return 0 if a match is found, 1 if not */
-UNIV_INTERN
-int
-innobase_wildcasecmp(
-/*=================*/
- const char* a, /*!< in: string to compare */
- const char* b); /*!< in: wildcard string to compare */
-
-/******************************************************************//**
-Strip dir name from a full path name and return only its file name.
-@return file name or "null" if no file name */
-UNIV_INTERN
-const char*
-innobase_basename(
-/*==============*/
- const char* path_name); /*!< in: full path name */
-
-/******************************************************************//**
-Returns true if the thread is executing a SELECT statement.
-@return true if thd is executing SELECT */
-UNIV_INTERN
-ibool
-thd_is_select(
-/*==========*/
- const THD* thd); /*!< in: thread handle */
-
-/******************************************************************//**
-Converts an identifier to a table name. */
-UNIV_INTERN
-void
-innobase_convert_from_table_id(
-/*===========================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len); /*!< in: length of 'to', in bytes; should
- be at least 5 * strlen(to) + 1 */
-/******************************************************************//**
-Converts an identifier to UTF-8. */
-UNIV_INTERN
-void
-innobase_convert_from_id(
-/*=====================*/
- struct charset_info_st* cs, /*!< in: the 'from' character set */
- char* to, /*!< out: converted identifier */
- const char* from, /*!< in: identifier to convert */
- ulint len); /*!< in: length of 'to', in bytes;
- should be at least 3 * strlen(to) + 1 */
-/******************************************************************//**
-Makes all characters in a NUL-terminated UTF-8 string lower case. */
-UNIV_INTERN
-void
-innobase_casedn_str(
-/*================*/
- char* a); /*!< in/out: string to put in lower case */
-
-#ifdef WITH_WSREP
-UNIV_INTERN
-int
-wsrep_innobase_kill_one_trx(void * const thd_ptr,
- const trx_t * const bf_trx,
- trx_t *victim_trx,
- ibool signal);
-int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
- unsigned char* str, unsigned int str_length,
- unsigned int buf_length);
-#endif /* WITH_WSREP */
-/**********************************************************************//**
-Determines the connection character set.
-@return connection character set */
-UNIV_INTERN
-struct charset_info_st*
-innobase_get_charset(
-/*=================*/
- THD* thd); /*!< in: MySQL thread handle */
-/**********************************************************************//**
-Determines the current SQL statement.
-@return SQL statement string */
-UNIV_INTERN
-const char*
-innobase_get_stmt(
-/*==============*/
- THD* thd, /*!< in: MySQL thread handle */
- size_t* length) /*!< out: length of the SQL statement */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-This function is used to find the storage length in bytes of the first n
-characters for prefix indexes using a multibyte character set. The function
-finds charset information and returns length of prefix_len characters in the
-index field in bytes.
-@return number of bytes occupied by the first n characters */
-UNIV_INTERN
-ulint
-innobase_get_at_most_n_mbchars(
-/*===========================*/
- ulint charset_id, /*!< in: character set id */
- ulint prefix_len, /*!< in: prefix length in bytes of the index
- (this has to be divided by mbmaxlen to get the
- number of CHARACTERS n in the prefix) */
- ulint data_len, /*!< in: length of the string in bytes */
- const char* str); /*!< in: character string */
-
-/*************************************************************//**
-InnoDB index push-down condition check
-@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
-UNIV_INTERN
-enum icp_result
-innobase_index_cond(
-/*================*/
- void* file) /*!< in/out: pointer to ha_innobase */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Returns true if the thread supports XA,
-global value of innodb_supports_xa if thd is NULL.
-@return true if thd supports XA */
-UNIV_INTERN
-ibool
-thd_supports_xa(
-/*============*/
- THD* thd); /*!< in: thread handle, or NULL to query
- the global innodb_supports_xa */
-
-/** Get status of innodb_tmpdir.
-@param[in] thd thread handle, or NULL to query
- the global innodb_tmpdir.
-@retval NULL if innodb_tmpdir="" */
-UNIV_INTERN
-const char*
-thd_innodb_tmpdir(
- THD* thd);
-
-/******************************************************************//**
-Check the status of fake changes mode (innodb_fake_changes)
-@return true if fake change mode is enabled. */
-UNIV_INTERN
-ibool
-thd_fake_changes(
-/*=============*/
- THD* thd); /*!< in: thread handle, or NULL to query
- the global innodb_supports_xa */
-
-/******************************************************************//**
-Returns the lock wait timeout for the current connection.
-@return the lock wait timeout, in seconds */
-UNIV_INTERN
-ulong
-thd_lock_wait_timeout(
-/*==================*/
- THD* thd); /*!< in: thread handle, or NULL to query
- the global innodb_lock_wait_timeout */
-/******************************************************************//**
-Add up the time waited for the lock for the current query. */
-UNIV_INTERN
-void
-thd_set_lock_wait_time(
-/*===================*/
- THD* thd, /*!< in/out: thread handle */
- ulint value); /*!< in: time waited for the lock */
-
-/**********************************************************************//**
-Get the current setting of the table_cache_size global parameter. We do
-a dirty read because for one there is no synchronization object and
-secondly there is little harm in doing so even if we get a torn read.
-@return SQL statement string */
-UNIV_INTERN
-ulint
-innobase_get_table_cache_size(void);
-/*===============================*/
-
-/******************************************************************//**
- */
-ulong
-thd_flush_log_at_trx_commit(
-/*================================*/
- void* thd);
-
-/**********************************************************************//**
-Get the current setting of the lower_case_table_names global parameter from
-mysqld.cc. We do a dirty read because for one there is no synchronization
-object and secondly there is little harm in doing so even if we get a torn
-read.
-@return value of lower_case_table_names */
-UNIV_INTERN
-ulint
-innobase_get_lower_case_table_names(void);
-/*=====================================*/
-
-/*****************************************************************//**
-Frees a possible InnoDB trx object associated with the current THD.
-@return 0 or error number */
-UNIV_INTERN
-int
-innobase_close_thd(
-/*===============*/
- THD* thd); /*!< in: MySQL thread handle for
- which to close the connection */
-/*************************************************************//**
-Get the next token from the given string and store it in *token. */
-UNIV_INTERN
-ulint
-innobase_mysql_fts_get_token(
-/*=========================*/
- CHARSET_INFO* charset, /*!< in: Character set */
- const byte* start, /*!< in: start of text */
- const byte* end, /*!< in: one character past end of
- text */
- fts_string_t* token, /*!< out: token's text */
- ulint* offset); /*!< out: offset to token,
- measured as characters from
- 'start' */
-
-/******************************************************************//**
-compare two character string case insensitively according to their charset. */
-UNIV_INTERN
-int
-innobase_fts_text_case_cmp(
-/*=======================*/
- const void* cs, /*!< in: Character set */
- const void* p1, /*!< in: key */
- const void* p2); /*!< in: node */
-
-/****************************************************************//**
-Get FTS field charset info from the field's prtype
-@return charset info */
-UNIV_INTERN
-CHARSET_INFO*
-innobase_get_fts_charset(
-/*=====================*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number);/*!< in: number of the charset */
-/******************************************************************//**
-Returns true if transaction should be flagged as read-only.
-@return true if the thd is marked as read-only */
-UNIV_INTERN
-ibool
-thd_trx_is_read_only(
-/*=================*/
- THD* thd); /*!< in/out: thread handle */
-
-/******************************************************************//**
-Check if the transaction is an auto-commit transaction. TRUE also
-implies that it is a SELECT (read-only) transaction.
-@return true if the transaction is an auto commit read-only transaction. */
-UNIV_INTERN
-ibool
-thd_trx_is_auto_commit(
-/*===================*/
- THD* thd); /*!< in: thread handle, or NULL */
-
-/*****************************************************************//**
-A wrapper function of innobase_convert_name(), convert a table or
-index name to the MySQL system_charset_info (UTF-8) and quote it if needed.
-@return pointer to the end of buf */
-UNIV_INTERN
-void
-innobase_format_name(
-/*==================*/
- char* buf, /*!< out: buffer for converted
- identifier */
- ulint buflen, /*!< in: length of buf, in bytes */
- const char* name, /*!< in: index or table name
- to format */
- ibool is_index_name) /*!< in: index name */
- MY_ATTRIBUTE((nonnull));
-
-/** Corresponds to Sql_condition:enum_warning_level. */
-enum ib_log_level_t {
- IB_LOG_LEVEL_INFO,
- IB_LOG_LEVEL_WARN,
- IB_LOG_LEVEL_ERROR,
- IB_LOG_LEVEL_FATAL
-};
-
-/******************************************************************//**
-Use this when the args are first converted to a formatted string and then
-passed to the format string from errmsg-utf8.txt. The error message format
-must be: "Some string ... %s".
-
-Push a warning message to the client, it is a wrapper around:
-
-void push_warning_printf(
- THD *thd, Sql_condition::enum_warning_level level,
- uint code, const char *format, ...);
-*/
-UNIV_INTERN
-void
-ib_errf(
-/*====*/
- THD* thd, /*!< in/out: session */
- ib_log_level_t level, /*!< in: warning level */
- ib_uint32_t code, /*!< MySQL error code */
- const char* format, /*!< printf format */
- ...) /*!< Args */
- MY_ATTRIBUTE((format(printf, 4, 5)));
-
-/******************************************************************//**
-Use this when the args are passed to the format string from
-errmsg-utf8.txt directly as is.
-
-Push a warning message to the client, it is a wrapper around:
-
-void push_warning_printf(
- THD *thd, Sql_condition::enum_warning_level level,
- uint code, const char *format, ...);
-*/
-UNIV_INTERN
-void
-ib_senderrf(
-/*========*/
- THD* thd, /*!< in/out: session */
- ib_log_level_t level, /*!< in: warning level */
- ib_uint32_t code, /*!< MySQL error code */
- ...); /*!< Args */
-
-/******************************************************************//**
-Write a message to the MySQL log, prefixed with "InnoDB: ".
-Wrapper around sql_print_information() */
-UNIV_INTERN
-void
-ib_logf(
-/*====*/
- ib_log_level_t level, /*!< in: warning level */
- const char* format, /*!< printf format */
- ...) /*!< Args */
- MY_ATTRIBUTE((format(printf, 2, 3)));
-
-/******************************************************************//**
-Returns the NUL terminated value of glob_hostname.
-@return pointer to glob_hostname. */
-UNIV_INTERN
-const char*
-server_get_hostname();
-/*=================*/
-
-/******************************************************************//**
-Get the error message format string.
-@return the format string or 0 if not found. */
-UNIV_INTERN
-const char*
-innobase_get_err_msg(
-/*=================*/
- int error_code); /*!< in: MySQL error code */
-
-/*********************************************************************//**
-Compute the next autoinc value.
-
-For MySQL replication the autoincrement values can be partitioned among
-the nodes. The offset is the start or origin of the autoincrement value
-for a particular node. For n nodes the increment will be n and the offset
-will be in the interval [1, n]. The formula tries to allocate the next
-value for a particular node.
-
-Note: This function is also called with increment set to the number of
-values we want to reserve for multi-value inserts e.g.,
-
- INSERT INTO T VALUES(), (), ();
-
-innobase_next_autoinc() will be called with increment set to 3 where
-autoinc_lock_mode != TRADITIONAL because we want to reserve 3 values for
-the multi-value INSERT above.
-@return the next value */
-UNIV_INTERN
-ulonglong
-innobase_next_autoinc(
-/*==================*/
- ulonglong current, /*!< in: Current value */
- ulonglong need, /*!< in: count of values needed */
- ulonglong step, /*!< in: AUTOINC increment step */
- ulonglong offset, /*!< in: AUTOINC offset */
- ulonglong max_value) /*!< in: max value for type */
- MY_ATTRIBUTE((pure, warn_unused_result));
-
-/********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type.
-@return maximum allowed value for the field */
-UNIV_INTERN
-ulonglong
-innobase_get_int_col_max_value(
-/*===========================*/
- const Field* field) /*!< in: MySQL field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-/**********************************************************************
-Check if the length of the identifier exceeds the maximum allowed.
-The input to this function is an identifier in charset my_charset_filename.
-return true when length of identifier is too long. */
-UNIV_INTERN
-my_bool
-innobase_check_identifier_length(
-/*=============================*/
- const char* id); /* in: identifier to check. it must belong
- to charset my_charset_filename */
-
-/**********************************************************************
-Converts an identifier from my_charset_filename to UTF-8 charset. */
-uint
-innobase_convert_to_system_charset(
-/*===============================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len, /* in: length of 'to', in bytes */
- uint* errors); /* out: error return */
-
-/**********************************************************************
-Converts an identifier from my_charset_filename to UTF-8 charset. */
-uint
-innobase_convert_to_filename_charset(
-/*=================================*/
- char* to, /* out: converted identifier */
- const char* from, /* in: identifier to convert */
- ulint len); /* in: length of 'to', in bytes */
-
-/********************************************************************//**
-Helper function to push warnings from InnoDB internals to SQL-layer. */
-UNIV_INTERN
-void
-ib_push_warning(
- trx_t* trx, /*!< in: trx */
- ulint error, /*!< in: error code to push as warning */
- const char *format,/*!< in: warning message */
- ...);
-/********************************************************************//**
-Helper function to push warnings from InnoDB internals to SQL-layer. */
-UNIV_INTERN
-void
-ib_push_warning(
- void* ithd, /*!< in: thd */
- ulint error, /*!< in: error code to push as warning */
- const char *format,/*!< in: warning message */
- ...);
-
-/*****************************************************************//**
-Normalizes a table name string. A normalized name consists of the
-database name catenated to '/' and table name. An example:
-test/mytable. On Windows normalization puts both the database name and the
-table name always to lower case if "set_lower_case" is set to TRUE. */
-void
-normalize_table_name_low(
-/*=====================*/
- char* norm_name, /*!< out: normalized name as a
- null-terminated string */
- const char* name, /*!< in: table name string */
- ibool set_lower_case); /*!< in: TRUE if we want to set
- name to lower case */
-#endif /* HA_INNODB_PROTOTYPES_H */
diff --git a/storage/xtradb/include/handler0alter.h b/storage/xtradb/include/handler0alter.h
deleted file mode 100644
index 3dd6c99eb6d..00000000000
--- a/storage/xtradb/include/handler0alter.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/handler0alter.h
-Smart ALTER TABLE
-*******************************************************/
-
-/*************************************************************//**
-Copies an InnoDB record to table->record[0]. */
-UNIV_INTERN
-void
-innobase_rec_to_mysql(
-/*==================*/
- struct TABLE* table, /*!< in/out: MySQL table */
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets)/*!< in: rec_get_offsets(
- rec, index, ...) */
- MY_ATTRIBUTE((nonnull));
-
-/*************************************************************//**
-Copies an InnoDB index entry to table->record[0]. */
-UNIV_INTERN
-void
-innobase_fields_to_mysql(
-/*=====================*/
- struct TABLE* table, /*!< in/out: MySQL table */
- const dict_index_t* index, /*!< in: InnoDB index */
- const dfield_t* fields) /*!< in: InnoDB index fields */
- MY_ATTRIBUTE((nonnull));
-
-/*************************************************************//**
-Copies an InnoDB row to table->record[0]. */
-UNIV_INTERN
-void
-innobase_row_to_mysql(
-/*==================*/
- struct TABLE* table, /*!< in/out: MySQL table */
- const dict_table_t* itab, /*!< in: InnoDB table */
- const dtuple_t* row) /*!< in: InnoDB row */
- MY_ATTRIBUTE((nonnull));
-
-/*************************************************************//**
-Resets table->record[0]. */
-UNIV_INTERN
-void
-innobase_rec_reset(
-/*===============*/
- struct TABLE* table) /*!< in/out: MySQL table */
- MY_ATTRIBUTE((nonnull));
-
-/** Generate the next autoinc based on a snapshot of the session
-auto_increment_increment and auto_increment_offset variables. */
-struct ib_sequence_t {
-
- /**
- @param thd - the session
- @param start_value - the lower bound
- @param max_value - the upper bound (inclusive) */
- ib_sequence_t(THD* thd, ulonglong start_value, ulonglong max_value);
-
- /**
- Postfix increment
- @return the value to insert */
- ulonglong operator++(int) UNIV_NOTHROW;
-
- /** Check if the autoinc "sequence" is exhausted.
- @return true if the sequence is exhausted */
- bool eof() const UNIV_NOTHROW
- {
- return(m_eof);
- }
-
- /**
- @return the next value in the sequence */
- ulonglong last() const UNIV_NOTHROW
- {
- ut_ad(m_next_value > 0);
-
- return(m_next_value);
- }
-
- /** Maximum calumn value if adding an AUTOINC column else 0. Once
- we reach the end of the sequence it will be set to ~0. */
- const ulonglong m_max_value;
-
- /** Value of auto_increment_increment */
- ulong m_increment;
-
- /** Value of auto_increment_offset */
- ulong m_offset;
-
- /** Next value in the sequence */
- ulonglong m_next_value;
-
- /** true if no more values left in the sequence */
- bool m_eof;
-};
diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h
deleted file mode 100644
index 68d3c6ace4e..00000000000
--- a/storage/xtradb/include/hash0hash.h
+++ /dev/null
@@ -1,603 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/hash0hash.h
-The simple hash table utility
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef hash0hash_h
-#define hash0hash_h
-
-#include "univ.i"
-#include "mem0mem.h"
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-
-struct hash_table_t;
-struct hash_cell_t;
-
-typedef void* hash_node_t;
-
-/* Fix Bug #13859: symbol collision between imap/mysql */
-#define hash_create hash0_create
-
-/* Differnt types of hash_table based on the synchronization
-method used for it. */
-enum hash_table_sync_t {
- HASH_TABLE_SYNC_NONE = 0, /*!< Don't use any internal
- synchronization objects for
- this hash_table. */
- HASH_TABLE_SYNC_MUTEX, /*!< Use mutexes to control
- access to this hash_table. */
- HASH_TABLE_SYNC_RW_LOCK /*!< Use rw_locks to control
- access to this hash_table. */
-};
-
-/*************************************************************//**
-Creates a hash table with >= n array cells. The actual number
-of cells is chosen to be a prime number slightly bigger than n.
-@return own: created table */
-UNIV_INTERN
-hash_table_t*
-hash_create(
-/*========*/
- ulint n); /*!< in: number of array cells */
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Creates a sync object array array to protect a hash table.
-::sync_obj can be mutexes or rw_locks depening on the type of
-hash table. */
-UNIV_INTERN
-void
-hash_create_sync_obj_func(
-/*======================*/
- hash_table_t* table, /*!< in: hash table */
- enum hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX
- or HASH_TABLE_SYNC_RW_LOCK */
-#ifdef UNIV_SYNC_DEBUG
- ulint sync_level,/*!< in: latching order level
- of the mutexes: used in the
- debug version */
-#endif /* UNIV_SYNC_DEBUG */
- ulint n_sync_obj);/*!< in: number of sync objects,
- must be a power of 2 */
-#ifdef UNIV_SYNC_DEBUG
-# define hash_create_sync_obj(t, s, n, level) \
- hash_create_sync_obj_func(t, s, level, n)
-#else /* UNIV_SYNC_DEBUG */
-# define hash_create_sync_obj(t, s, n, level) \
- hash_create_sync_obj_func(t, s, n)
-#endif /* UNIV_SYNC_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Frees a hash table. */
-UNIV_INTERN
-void
-hash_table_free(
-/*============*/
- hash_table_t* table); /*!< in, own: hash table */
-/**************************************************************//**
-Calculates the hash value from a folded value.
-@return hashed value */
-UNIV_INLINE
-ulint
-hash_calc_hash(
-/*===========*/
- ulint fold, /*!< in: folded value */
- hash_table_t* table); /*!< in: hash table */
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Assert that the mutex for the table is held */
-# define HASH_ASSERT_OWN(TABLE, FOLD) \
- ut_ad((TABLE)->type != HASH_TABLE_SYNC_MUTEX \
- || (mutex_own(hash_get_mutex((TABLE), FOLD))));
-#else /* !UNIV_HOTBACKUP */
-# define HASH_ASSERT_OWN(TABLE, FOLD)
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Inserts a struct to a hash table. */
-
-#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\
-do {\
- hash_cell_t* cell3333;\
- TYPE* struct3333;\
-\
- HASH_ASSERT_OWN(TABLE, FOLD)\
-\
- (DATA)->NAME = NULL;\
-\
- cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
-\
- if (cell3333->node == NULL) {\
- cell3333->node = DATA;\
- } else {\
- struct3333 = (TYPE*) cell3333->node;\
-\
- while (struct3333->NAME != NULL) {\
-\
- struct3333 = (TYPE*) struct3333->NAME;\
- }\
-\
- struct3333->NAME = DATA;\
- }\
-} while (0)
-
-#ifdef WITH_WSREP
-/*******************************************************************//**
-Inserts a struct to the head of hash table. */
-
-#define HASH_PREPEND(TYPE, NAME, TABLE, FOLD, DATA) \
-do { \
- hash_cell_t* cell3333; \
- TYPE* struct3333; \
- \
- HASH_ASSERT_OWN(TABLE, FOLD) \
- \
- (DATA)->NAME = NULL; \
- \
- cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
- \
- if (cell3333->node == NULL) { \
- cell3333->node = DATA; \
- DATA->NAME = NULL; \
- } else { \
- struct3333 = (TYPE*) cell3333->node; \
- \
- DATA->NAME = struct3333; \
- \
- cell3333->node = DATA; \
- } \
-} while (0)
-#endif /*WITH_WSREP */
-#ifdef UNIV_HASH_DEBUG
-# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1)
-# define HASH_INVALIDATE(DATA, NAME) *(void**) (&DATA->NAME) = (void*) -1
-#else
-# define HASH_ASSERT_VALID(DATA) do {} while (0)
-# define HASH_INVALIDATE(DATA, NAME) do {} while (0)
-#endif
-
-/*******************************************************************//**
-Deletes a struct from a hash table. */
-
-#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\
-do {\
- hash_cell_t* cell3333;\
- TYPE* struct3333;\
-\
- HASH_ASSERT_OWN(TABLE, FOLD)\
-\
- cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\
-\
- if (cell3333->node == DATA) {\
- HASH_ASSERT_VALID(DATA->NAME);\
- cell3333->node = DATA->NAME;\
- } else {\
- struct3333 = (TYPE*) cell3333->node;\
-\
- while (struct3333->NAME != DATA) {\
-\
- struct3333 = (TYPE*) struct3333->NAME;\
- ut_a(struct3333);\
- }\
-\
- struct3333->NAME = DATA->NAME;\
- }\
- HASH_INVALIDATE(DATA, NAME);\
-} while (0)
-
-/*******************************************************************//**
-Gets the first struct in a hash chain, NULL if none. */
-
-#define HASH_GET_FIRST(TABLE, HASH_VAL)\
- (hash_get_nth_cell(TABLE, HASH_VAL)->node)
-
-/*******************************************************************//**
-Gets the next struct in a hash chain, NULL if none. */
-
-#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME)
-
-/********************************************************************//**
-Looks for a struct in a hash table. */
-#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\
-{\
-\
- HASH_ASSERT_OWN(TABLE, FOLD)\
-\
- (DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\
- HASH_ASSERT_VALID(DATA);\
-\
- while ((DATA) != NULL) {\
- ASSERTION;\
- if (TEST) {\
- break;\
- } else {\
- HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\
- (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\
- }\
- }\
-}
-
-/********************************************************************//**
-Looks for an item in all hash buckets. */
-#define HASH_SEARCH_ALL(NAME, TABLE, TYPE, DATA, ASSERTION, TEST) \
-do { \
- ulint i3333; \
- \
- for (i3333 = (TABLE)->n_cells; i3333--; ) { \
- (DATA) = (TYPE) HASH_GET_FIRST(TABLE, i3333); \
- \
- while ((DATA) != NULL) { \
- HASH_ASSERT_VALID(DATA); \
- ASSERTION; \
- \
- if (TEST) { \
- break; \
- } \
- \
- (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA); \
- } \
- \
- if ((DATA) != NULL) { \
- break; \
- } \
- } \
-} while (0)
-
-/************************************************************//**
-Gets the nth cell in a hash table.
-@return pointer to cell */
-UNIV_INLINE
-hash_cell_t*
-hash_get_nth_cell(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint n); /*!< in: cell index */
-
-/*************************************************************//**
-Clears a hash table so that all the cells become empty. */
-UNIV_INLINE
-void
-hash_table_clear(
-/*=============*/
- hash_table_t* table); /*!< in/out: hash table */
-
-/*************************************************************//**
-Returns the number of cells in a hash table.
-@return number of cells */
-UNIV_INLINE
-ulint
-hash_get_n_cells(
-/*=============*/
- hash_table_t* table); /*!< in: table */
-/*******************************************************************//**
-Deletes a struct which is stored in the heap of the hash table, and compacts
-the heap. The fold value must be stored in the struct NODE in a field named
-'fold'. */
-
-#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\
-do {\
- TYPE* node111;\
- TYPE* top_node111;\
- hash_cell_t* cell111;\
- ulint fold111;\
-\
- fold111 = (NODE)->fold;\
-\
- HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\
-\
- top_node111 = (TYPE*) mem_heap_get_top(\
- hash_get_heap(TABLE, fold111),\
- sizeof(TYPE));\
-\
- /* If the node to remove is not the top node in the heap, compact the\
- heap of nodes by moving the top node in the place of NODE. */\
-\
- if (NODE != top_node111) {\
-\
- /* Copy the top node in place of NODE */\
-\
- *(NODE) = *top_node111;\
-\
- cell111 = hash_get_nth_cell(TABLE,\
- hash_calc_hash(top_node111->fold, TABLE));\
-\
- /* Look for the pointer to the top node, to update it */\
-\
- if (cell111->node == top_node111) {\
- /* The top node is the first in the chain */\
-\
- cell111->node = NODE;\
- } else {\
- /* We have to look for the predecessor of the top\
- node */\
- node111 = static_cast<TYPE*>(cell111->node);\
-\
- while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\
-\
- node111 = static_cast<TYPE*>(\
- HASH_GET_NEXT(NAME, node111));\
- }\
-\
- /* Now we have the predecessor node */\
-\
- node111->NAME = NODE;\
- }\
- }\
-\
- /* Free the space occupied by the top node */\
-\
- mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\
-} while (0)
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Move all hash table entries from OLD_TABLE to NEW_TABLE. */
-
-#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \
-do {\
- ulint i2222;\
- ulint cell_count2222;\
-\
- cell_count2222 = hash_get_n_cells(OLD_TABLE);\
-\
- for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
- NODE_TYPE* node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\
-\
- while (node2222) {\
- NODE_TYPE* next2222 = node2222->PTR_NAME;\
- ulint fold2222 = FOLD_FUNC(node2222);\
-\
- HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\
- fold2222, node2222);\
-\
- node2222 = next2222;\
- }\
- }\
-} while (0)
-
-/************************************************************//**
-Gets the sync object index for a fold value in a hash table.
-@return index */
-UNIV_INLINE
-ulint
-hash_get_sync_obj_index(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Gets the nth heap in a hash table.
-@return mem heap */
-UNIV_INLINE
-mem_heap_t*
-hash_get_nth_heap(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i); /*!< in: index of the heap */
-/************************************************************//**
-Gets the heap for a fold value in a hash table.
-@return mem heap */
-UNIV_INLINE
-mem_heap_t*
-hash_get_heap(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Gets the nth mutex in a hash table.
-@return mutex */
-UNIV_INLINE
-ib_prio_mutex_t*
-hash_get_nth_mutex(
-/*===============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i); /*!< in: index of the mutex */
-/************************************************************//**
-Gets the nth rw_lock in a hash table.
-@return rw_lock */
-UNIV_INLINE
-prio_rw_lock_t*
-hash_get_nth_lock(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i); /*!< in: index of the rw_lock */
-/************************************************************//**
-Gets the mutex for a fold value in a hash table.
-@return mutex */
-UNIV_INLINE
-ib_prio_mutex_t*
-hash_get_mutex(
-/*===========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Gets the rw_lock for a fold value in a hash table.
-@return rw_lock */
-UNIV_INLINE
-prio_rw_lock_t*
-hash_get_lock(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Reserves the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_enter(
-/*=============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Releases the mutex for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit(
-/*============*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Reserves all the mutexes of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_mutex_enter_all(
-/*=================*/
- hash_table_t* table); /*!< in: hash table */
-/************************************************************//**
-Releases all the mutexes of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all(
-/*================*/
- hash_table_t* table); /*!< in: hash table */
-/************************************************************//**
-Releases all but the passed in mutex of a hash table. */
-UNIV_INTERN
-void
-hash_mutex_exit_all_but(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ib_prio_mutex_t* keep_mutex); /*!< in: mutex to keep */
-/************************************************************//**
-s-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_lock_s(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-x-lock a lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_lock_x(
-/*========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-unlock an s-lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_unlock_s(
-/*==========*/
-
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-unlock x-lock for a fold value in a hash table. */
-UNIV_INTERN
-void
-hash_unlock_x(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold); /*!< in: fold */
-/************************************************************//**
-Reserves all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_lock_x_all(
-/*============*/
- hash_table_t* table); /*!< in: hash table */
-/************************************************************//**
-Releases all the locks of a hash table, in an ascending order. */
-UNIV_INTERN
-void
-hash_unlock_x_all(
-/*==============*/
- hash_table_t* table); /*!< in: hash table */
-/************************************************************//**
-Releases all but passed in lock of a hash table, */
-UNIV_INTERN
-void
-hash_unlock_x_all_but(
-/*==================*/
- hash_table_t* table, /*!< in: hash table */
- prio_rw_lock_t* keep_lock); /*!< in: lock to keep */
-
-#else /* !UNIV_HOTBACKUP */
-# define hash_get_heap(table, fold) ((table)->heap)
-# define hash_mutex_enter(table, fold) ((void) 0)
-# define hash_mutex_exit(table, fold) ((void) 0)
-# define hash_mutex_enter_all(table) ((void) 0)
-# define hash_mutex_exit_all(table) ((void) 0)
-# define hash_mutex_exit_all_but(t, m) ((void) 0)
-# define hash_lock_s(t, f) ((void) 0)
-# define hash_lock_x(t, f) ((void) 0)
-# define hash_unlock_s(t, f) ((void) 0)
-# define hash_unlock_x(t, f) ((void) 0)
-# define hash_lock_x_all(t) ((void) 0)
-# define hash_unlock_x_all(t) ((void) 0)
-# define hash_unlock_x_all_but(t, l) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-struct hash_cell_t{
- void* node; /*!< hash chain node, NULL if none */
-};
-
-/* The hash table structure */
-struct hash_table_t {
- enum hash_table_sync_t type; /*<! type of hash_table. */
-#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
- ibool adaptive;/* TRUE if this is the hash
- table of the adaptive hash
- index */
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- ulint n_cells;/* number of cells in the hash table */
- hash_cell_t* array; /*!< pointer to cell array */
-#ifndef UNIV_HOTBACKUP
- ulint n_sync_obj;/* if sync_objs != NULL, then
- the number of either the number
- of mutexes or the number of
- rw_locks depending on the type.
- Must be a power of 2 */
- union {
- ib_prio_mutex_t* mutexes;
- /* NULL, or an array of mutexes
- used to protect segments of the
- hash table */
- prio_rw_lock_t* rw_locks;/* NULL, or an array of rw_lcoks
- used to protect segments of the
- hash table */
- } sync_obj;
-
- mem_heap_t** heaps; /*!< if this is non-NULL, hash
- chain nodes for external chaining
- can be allocated from these memory
- heaps; there are then n_mutexes
- many of these heaps */
-#endif /* !UNIV_HOTBACKUP */
- mem_heap_t* heap;
-#ifdef UNIV_DEBUG
- ulint magic_n;
-# define HASH_TABLE_MAGIC_N 76561114
-#endif /* UNIV_DEBUG */
-};
-
-#ifndef UNIV_NONINL
-#include "hash0hash.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/hash0hash.ic b/storage/xtradb/include/hash0hash.ic
deleted file mode 100644
index e4822538e19..00000000000
--- a/storage/xtradb/include/hash0hash.ic
+++ /dev/null
@@ -1,225 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/hash0hash.ic
-The simple hash table utility
-
-Created 5/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "ut0rnd.h"
-
-/************************************************************//**
-Gets the nth cell in a hash table.
-@return pointer to cell */
-UNIV_INLINE
-hash_cell_t*
-hash_get_nth_cell(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint n) /*!< in: cell index */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_ad(n < table->n_cells);
-
- return(table->array + n);
-}
-
-/*************************************************************//**
-Clears a hash table so that all the cells become empty. */
-UNIV_INLINE
-void
-hash_table_clear(
-/*=============*/
- hash_table_t* table) /*!< in/out: hash table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- memset(table->array, 0x0,
- table->n_cells * sizeof(*table->array));
-}
-
-/*************************************************************//**
-Returns the number of cells in a hash table.
-@return number of cells */
-UNIV_INLINE
-ulint
-hash_get_n_cells(
-/*=============*/
- hash_table_t* table) /*!< in: table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- return(table->n_cells);
-}
-
-/**************************************************************//**
-Calculates the hash value from a folded value.
-@return hashed value */
-UNIV_INLINE
-ulint
-hash_calc_hash(
-/*===========*/
- ulint fold, /*!< in: folded value */
- hash_table_t* table) /*!< in: hash table */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- return(ut_hash_ulint(fold, table->n_cells));
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Gets the sync object index for a fold value in a hash table.
-@return index */
-UNIV_INLINE
-ulint
-hash_get_sync_obj_index(
-/*====================*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_ad(table->type != HASH_TABLE_SYNC_NONE);
- ut_ad(ut_is_2pow(table->n_sync_obj));
- return(ut_2pow_remainder(hash_calc_hash(fold, table),
- table->n_sync_obj));
-}
-
-/************************************************************//**
-Gets the nth heap in a hash table.
-@return mem heap */
-UNIV_INLINE
-mem_heap_t*
-hash_get_nth_heap(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i) /*!< in: index of the heap */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_ad(table->type != HASH_TABLE_SYNC_NONE);
- ut_ad(i < table->n_sync_obj);
-
- return(table->heaps[i]);
-}
-
-/************************************************************//**
-Gets the heap for a fold value in a hash table.
-@return mem heap */
-UNIV_INLINE
-mem_heap_t*
-hash_get_heap(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ulint i;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-
- if (table->heap) {
- return(table->heap);
- }
-
- i = hash_get_sync_obj_index(table, fold);
-
- return(hash_get_nth_heap(table, i));
-}
-
-/************************************************************//**
-Gets the nth mutex in a hash table.
-@return mutex */
-UNIV_INLINE
-ib_prio_mutex_t*
-hash_get_nth_mutex(
-/*===============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i) /*!< in: index of the mutex */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_ad(table->type == HASH_TABLE_SYNC_MUTEX);
- ut_ad(i < table->n_sync_obj);
-
- return(table->sync_obj.mutexes + i);
-}
-
-/************************************************************//**
-Gets the mutex for a fold value in a hash table.
-@return mutex */
-UNIV_INLINE
-ib_prio_mutex_t*
-hash_get_mutex(
-/*===========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ulint i;
-
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-
- i = hash_get_sync_obj_index(table, fold);
-
- return(hash_get_nth_mutex(table, i));
-}
-
-/************************************************************//**
-Gets the nth rw_lock in a hash table.
-@return rw_lock */
-UNIV_INLINE
-prio_rw_lock_t*
-hash_get_nth_lock(
-/*==============*/
- hash_table_t* table, /*!< in: hash table */
- ulint i) /*!< in: index of the rw_lock */
-{
- ut_ad(table);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(i < table->n_sync_obj);
-
- return(table->sync_obj.rw_locks + i);
-}
-
-/************************************************************//**
-Gets the rw_lock for a fold value in a hash table.
-@return rw_lock */
-UNIV_INLINE
-prio_rw_lock_t*
-hash_get_lock(
-/*==========*/
- hash_table_t* table, /*!< in: hash table */
- ulint fold) /*!< in: fold */
-{
- ulint i;
-
- ut_ad(table);
- ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK);
- ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
-
- i = hash_get_sync_obj_index(table, fold);
-
- return(hash_get_nth_lock(table, i));
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/ibuf0ibuf.h b/storage/xtradb/include/ibuf0ibuf.h
deleted file mode 100644
index 0b325b68a84..00000000000
--- a/storage/xtradb/include/ibuf0ibuf.h
+++ /dev/null
@@ -1,493 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ibuf0ibuf.h
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0ibuf_h
-#define ibuf0ibuf_h
-
-#include "univ.i"
-
-#include "mtr0mtr.h"
-#include "dict0mem.h"
-#include "fsp0fsp.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "ibuf0types.h"
-
-/** Default value for maximum on-disk size of change buffer in terms
-of percentage of the buffer pool. */
-#define CHANGE_BUFFER_DEFAULT_SIZE (25)
-
-/* Possible operations buffered in the insert/whatever buffer. See
-ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
-typedef enum {
- IBUF_OP_INSERT = 0,
- IBUF_OP_DELETE_MARK = 1,
- IBUF_OP_DELETE = 2,
-
- /* Number of different operation types. */
- IBUF_OP_COUNT = 3
-} ibuf_op_t;
-
-/** Combinations of operations that can be buffered. Because the enum
-values are used for indexing innobase_change_buffering_values[], they
-should start at 0 and there should not be any gaps. */
-typedef enum {
- IBUF_USE_NONE = 0,
- IBUF_USE_INSERT, /* insert */
- IBUF_USE_DELETE_MARK, /* delete */
- IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */
- IBUF_USE_DELETE, /* delete+purge */
- IBUF_USE_ALL, /* insert+delete+purge */
-
- IBUF_USE_COUNT /* number of entries in ibuf_use_t */
-} ibuf_use_t;
-
-/** Operations that can currently be buffered. */
-extern ibuf_use_t ibuf_use;
-
-/** The insert buffer control structure */
-extern ibuf_t* ibuf;
-
-/* The purpose of the insert buffer is to reduce random disk access.
-When we wish to insert a record into a non-unique secondary index and
-the B-tree leaf page where the record belongs to is not in the buffer
-pool, we insert the record into the insert buffer B-tree, indexed by
-(space_id, page_no). When the page is eventually read into the buffer
-pool, we look up the insert buffer B-tree for any modifications to the
-page, and apply these upon the completion of the read operation. This
-is called the insert buffer merge. */
-
-/* The insert buffer merge must always succeed. To guarantee this,
-the insert buffer subsystem keeps track of the free space in pages for
-which it can buffer operations. Two bits per page in the insert
-buffer bitmap indicate the available space in coarse increments. The
-free bits in the insert buffer bitmap must never exceed the free space
-on a page. It is safe to decrement or reset the bits in the bitmap in
-a mini-transaction that is committed before the mini-transaction that
-affects the free space. It is unsafe to increment the bits in a
-separately committed mini-transaction, because in crash recovery, the
-free bits could momentarily be set too high. */
-
-/******************************************************************//**
-Creates the insert buffer data structure at a database startup.
-@return DB_SUCCESS or failure */
-UNIV_INTERN
-dberr_t
-ibuf_init_at_db_start(void);
-/*=======================*/
-/*********************************************************************//**
-Updates the max_size value for ibuf. */
-UNIV_INTERN
-void
-ibuf_max_size_update(
-/*=================*/
- ulint new_val); /*!< in: new value in terms of
- percentage of the buffer pool size */
-/*********************************************************************//**
-Reads the biggest tablespace id from the high end of the insert buffer
-tree and updates the counter in fil_system. */
-UNIV_INTERN
-void
-ibuf_update_max_tablespace_id(void);
-/*===============================*/
-/***************************************************************//**
-Starts an insert buffer mini-transaction. */
-UNIV_INLINE
-void
-ibuf_mtr_start(
-/*===========*/
- mtr_t* mtr) /*!< out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/***************************************************************//**
-Commits an insert buffer mini-transaction. */
-UNIV_INLINE
-void
-ibuf_mtr_commit(
-/*============*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Initializes an ibuf bitmap page. */
-UNIV_INTERN
-void
-ibuf_bitmap_page_init(
-/*==================*/
- buf_block_t* block, /*!< in: bitmap page */
- mtr_t* mtr); /*!< in: mtr */
-/************************************************************************//**
-Resets the free bits of the page in the ibuf bitmap. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to decrement or reset the bits in the bitmap in a mini-transaction
-that is committed before the mini-transaction that affects the free
-space. */
-UNIV_INTERN
-void
-ibuf_reset_free_bits(
-/*=================*/
- buf_block_t* block); /*!< in: index page; free bits are set to 0
- if the index is a non-clustered
- non-unique, and page level is 0 */
-/************************************************************************//**
-Updates the free bits of an uncompressed page in the ibuf bitmap if
-there is not enough free on the page any more. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is
-unsafe to increment the bits in a separately committed
-mini-transaction, because in crash recovery, the free bits could
-momentarily be set too high. It is only safe to use this function for
-decrementing the free bits. Should more free space become available,
-we must not update the free bits here, because that would break crash
-recovery. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
- buf_block_t* block, /*!< in: index page to which we have added new
- records; the free bits are updated if the
- index is non-clustered and non-unique and
- the page level is 0, and the page becomes
- fuller */
- ulint max_ins_size,/*!< in: value of maximum insert size with
- reorganize before the latest operation
- performed to the page */
- ulint increase);/*!< in: upper limit for the additional space
- used in the latest operation, if known, or
- ULINT_UNDEFINED */
-/**********************************************************************//**
-Updates the free bits for an uncompressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_low(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- ulint max_ins_size, /*!< in: value of
- maximum insert size
- with reorganize before
- the latest operation
- performed to the page */
- mtr_t* mtr); /*!< in/out: mtr */
-/**********************************************************************//**
-Updates the free bits for a compressed page to reflect the present
-state. Does this in the mtr given, which means that the latching
-order rules virtually prevent any further operations for this OS
-thread until mtr is committed. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is safe
-to set the free bits in the same mini-transaction that updated the
-page. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_zip(
-/*======================*/
- buf_block_t* block, /*!< in/out: index page */
- mtr_t* mtr); /*!< in/out: mtr */
-/**********************************************************************//**
-Updates the free bits for the two pages to reflect the present state.
-Does this in the mtr given, which means that the latching order rules
-virtually prevent any further operations until mtr is committed.
-NOTE: The free bits in the insert buffer bitmap must never exceed the
-free space on a page. It is safe to set the free bits in the same
-mini-transaction that updated the pages. */
-UNIV_INTERN
-void
-ibuf_update_free_bits_for_two_pages_low(
-/*====================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- buf_block_t* block1, /*!< in: index page */
- buf_block_t* block2, /*!< in: index page */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
- dict_index_t* index, /*!< in: index where to insert */
- ulint ignore_sec_unique); /*!< in: if != 0, we should
- ignore UNIQUE constraint on
- a secondary index when we
- decide */
-/******************************************************************//**
-Returns TRUE if the current OS thread is performing an insert buffer
-routine.
-
-For instance, a read-ahead of non-ibuf pages is forbidden by threads
-that are executing an insert buffer routine.
-@return TRUE if inside an insert buffer routine */
-UNIV_INLINE
-ibool
-ibuf_inside(
-/*========*/
- const mtr_t* mtr) /*!< in: mini-transaction */
- MY_ATTRIBUTE((nonnull, pure));
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page (level 3 page) address.
-@return TRUE if a bitmap page */
-UNIV_INLINE
-ibool
-ibuf_bitmap_page(
-/*=============*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no);/*!< in: page number */
-/***********************************************************************//**
-Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
-Must not be called when recv_no_ibuf_operations==TRUE.
-@return TRUE if level 2 or level 3 page */
-UNIV_INTERN
-ibool
-ibuf_page_low(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number */
-#ifdef UNIV_DEBUG
- ibool x_latch,/*!< in: FALSE if relaxed check
- (avoid latching the bitmap page) */
-#endif /* UNIV_DEBUG */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line where called */
- mtr_t* mtr) /*!< in: mtr which will contain an
- x-latch to the bitmap page if the page
- is not one of the fixed address ibuf
- pages, or NULL, in which case a new
- transaction is created. */
- MY_ATTRIBUTE((warn_unused_result));
-#ifdef UNIV_DEBUG
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
-pages. Must not be called when recv_no_ibuf_operations==TRUE.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes, or 0
-@param page_no page number
-@param mtr mini-transaction or NULL
-@return TRUE if level 2 or level 3 page */
-# define ibuf_page(space, zip_size, page_no, mtr) \
- ibuf_page_low(space, zip_size, page_no, TRUE, __FILE__, __LINE__, mtr)
-#else /* UVIV_DEBUG */
-/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
-pages. Must not be called when recv_no_ibuf_operations==TRUE.
-@param space tablespace identifier
-@param zip_size compressed page size in bytes, or 0
-@param page_no page number
-@param mtr mini-transaction or NULL
-@return TRUE if level 2 or level 3 page */
-# define ibuf_page(space, zip_size, page_no, mtr) \
- ibuf_page_low(space, zip_size, page_no, __FILE__, __LINE__, mtr)
-#endif /* UVIV_DEBUG */
-/***********************************************************************//**
-Frees excess pages from the ibuf free list. This function is called when an OS
-thread calls fsp services to allocate a new file segment, or a new page to a
-file segment, and the thread did not own the fsp latch before this call. */
-UNIV_INTERN
-void
-ibuf_free_excess_pages(void);
-/*========================*/
-/*********************************************************************//**
-Buffer an operation in the insert/delete buffer, instead of doing it
-directly to the disk page, if this is possible. Does not do it if the index
-is clustered or unique.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-ibuf_insert(
-/*========*/
- ibuf_op_t op, /*!< in: operation type */
- const dtuple_t* entry, /*!< in: index entry to insert */
- dict_index_t* index, /*!< in: index where to insert */
- ulint space, /*!< in: space id where to insert */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number where to insert */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-When an index page is read from a disk to the buffer pool, this function
-applies any buffered operations to the page and deletes the entries from the
-insert buffer. If the page is not read, but created in the buffer pool, this
-function deletes its buffered entries from the insert buffer; there can
-exist entries for such a page if the page belonged to an index which
-subsequently was dropped. */
-UNIV_INTERN
-void
-ibuf_merge_or_delete_for_page(
-/*==========================*/
- buf_block_t* block, /*!< in: if page has been read from
- disk, pointer to the page x-latched,
- else NULL */
- ulint space, /*!< in: space id of the index page */
- ulint page_no,/*!< in: page number of the index page */
- ulint zip_size,/*!< in: compressed page size in bytes,
- or 0 */
- ibool update_ibuf_bitmap);/*!< in: normally this is set
- to TRUE, but if we have deleted or are
- deleting the tablespace, then we
- naturally do not want to update a
- non-existent bitmap page */
-/*********************************************************************//**
-Deletes all entries in the insert buffer for a given space id. This is used
-in DISCARD TABLESPACE and IMPORT TABLESPACE.
-NOTE: this does not update the page free bitmaps in the space. The space will
-become CORRUPT when you call this function! */
-UNIV_INTERN
-void
-ibuf_delete_for_discarded_space(
-/*============================*/
- ulint space); /*!< in: space id */
-/** Contract the change buffer by reading pages to the buffer pool.
-@param[in] full If true, do a full contraction based
-on PCT_IO(100). If false, the size of contract batch is determined
-based on the current size of the change buffer.
-@return a lower limit for the combined size in bytes of entries which
-will be merged from ibuf trees to the pages read, 0 if ibuf is
-empty */
-UNIV_INTERN
-ulint
-ibuf_merge_in_background(
- bool full); /*!< in: TRUE if the caller wants to
- do a full contract based on PCT_IO(100).
- If FALSE then the size of contract
- batch is determined based on the
- current size of the ibuf tree. */
-
-/** Contracts insert buffer trees by reading pages referring to space_id
-to the buffer pool.
-@returns number of pages merged.*/
-UNIV_INTERN
-ulint
-ibuf_merge_space(
-/*=============*/
- ulint space); /*!< in: space id */
-
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Parses a redo log record of an ibuf bitmap page init.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-ibuf_parse_bitmap_init(
-/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_IBUF_COUNT_DEBUG
-/******************************************************************//**
-Gets the ibuf count for a given page.
-@return number of entries in the insert buffer currently buffered for
-this page */
-UNIV_INTERN
-ulint
-ibuf_count_get(
-/*===========*/
- ulint space, /*!< in: space id */
- ulint page_no);/*!< in: page number */
-#endif
-/******************************************************************//**
-Looks if the insert buffer is empty.
-@return true if empty */
-UNIV_INTERN
-bool
-ibuf_is_empty(void);
-/*===============*/
-/******************************************************************//**
-Prints info of ibuf. */
-UNIV_INTERN
-void
-ibuf_print(
-/*=======*/
- FILE* file); /*!< in: file where to print */
-/********************************************************************
-Read the first two bytes from a record's fourth field (counter field in new
-records; something else in older records).
-@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
-UNIV_INTERN
-ulint
-ibuf_rec_get_counter(
-/*=================*/
- const rec_t* rec); /*!< in: ibuf record */
-/******************************************************************//**
-Closes insert buffer and frees the data structures. */
-UNIV_INTERN
-void
-ibuf_close(void);
-/*============*/
-/******************************************************************//**
-Function to pass ibuf status variables */
-UNIV_INTERN
-void
-ibuf_export_ibuf_status(
-/*====================*/
- ulint* size,
- ulint* free_list,
- ulint* segment_size,
- ulint* merges,
- ulint* merged_inserts,
- ulint* merged_delete_marks,
- ulint* merged_deletes,
- ulint* discarded_inserts,
- ulint* discarded_delete_marks,
- ulint* discarded_deletes);
-
-/******************************************************************//**
-Checks the insert buffer bitmaps on IMPORT TABLESPACE.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-ibuf_check_bitmap_on_import(
-/*========================*/
- const trx_t* trx, /*!< in: transaction */
- ulint space_id) /*!< in: tablespace identifier */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
-#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
-
-#endif /* !UNIV_HOTBACKUP */
-
-/* The ibuf header page currently contains only the file segment header
-for the file segment from which the pages for the ibuf tree are allocated */
-#define IBUF_HEADER PAGE_DATA
-#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */
-
-/* The insert buffer tree itself is always located in space 0. */
-#define IBUF_SPACE_ID 0
-
-#ifndef UNIV_NONINL
-#include "ibuf0ibuf.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/ibuf0ibuf.ic b/storage/xtradb/include/ibuf0ibuf.ic
deleted file mode 100644
index a5df9f7b6b4..00000000000
--- a/storage/xtradb/include/ibuf0ibuf.ic
+++ /dev/null
@@ -1,368 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ibuf0ibuf.ic
-Insert buffer
-
-Created 7/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "page0page.h"
-#include "page0zip.h"
-#ifndef UNIV_HOTBACKUP
-#include "buf0lru.h"
-
-/** An index page must contain at least UNIV_PAGE_SIZE /
-IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
-buffer inserts to this page. If there is this much of free space, the
-corresponding bits are set in the ibuf bitmap. */
-#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
-
-/***************************************************************//**
-Starts an insert buffer mini-transaction. */
-UNIV_INLINE
-void
-ibuf_mtr_start(
-/*===========*/
- mtr_t* mtr) /*!< out: mini-transaction */
-{
- mtr_start(mtr);
- mtr->inside_ibuf = TRUE;
-}
-/***************************************************************//**
-Commits an insert buffer mini-transaction. */
-UNIV_INLINE
-void
-ibuf_mtr_commit(
-/*============*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(mtr->inside_ibuf);
- ut_d(mtr->inside_ibuf = FALSE);
- mtr_commit(mtr);
-}
-
-/** Insert buffer struct */
-struct ibuf_t{
- ulint size; /*!< current size of the ibuf index
- tree, in pages */
- ulint max_size; /*!< recommended maximum size of the
- ibuf index tree, in pages */
- ulint seg_size; /*!< allocated pages of the file
- segment containing ibuf header and
- tree */
- bool empty; /*!< Protected by the page
- latch of the root page of the
- insert buffer tree
- (FSP_IBUF_TREE_ROOT_PAGE_NO). true
- if and only if the insert
- buffer tree is empty. */
- ulint free_list_len; /*!< length of the free list */
- ulint height; /*!< tree height */
- dict_index_t* index; /*!< insert buffer index */
-
- ulint n_merges; /*!< number of pages merged */
- ulint n_merged_ops[IBUF_OP_COUNT];
- /*!< number of operations of each type
- merged to index pages */
- ulint n_discarded_ops[IBUF_OP_COUNT];
- /*!< number of operations of each type
- discarded without merging due to the
- tablespace being deleted or the
- index being dropped */
-};
-
-/************************************************************************//**
-Sets the free bit of the page in the ibuf bitmap. This is done in a separate
-mini-transaction, hence this operation does not restrict further work to only
-ibuf bitmap operations, which would result if the latch to the bitmap page
-were kept. */
-UNIV_INTERN
-void
-ibuf_set_free_bits_func(
-/*====================*/
- buf_block_t* block, /*!< in: index page of a non-clustered index;
- free bit is reset if page level is 0 */
-#ifdef UNIV_IBUF_DEBUG
- ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
- value which the bits must have before
- setting; this is for debugging */
-#endif /* UNIV_IBUF_DEBUG */
- ulint val); /*!< in: value to set: < 4 */
-#ifdef UNIV_IBUF_DEBUG
-# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v)
-#else /* UNIV_IBUF_DEBUG */
-# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v)
-#endif /* UNIV_IBUF_DEBUG */
-
-/**********************************************************************//**
-A basic partial test if an insert to the insert buffer could be possible and
-recommended. */
-UNIV_INLINE
-ibool
-ibuf_should_try(
-/*============*/
- dict_index_t* index, /*!< in: index where to insert */
- ulint ignore_sec_unique) /*!< in: if != 0, we should
- ignore UNIQUE constraint on
- a secondary index when we
- decide */
-{
- return(ibuf_use != IBUF_USE_NONE
- && ibuf->max_size != 0
- && !dict_index_is_clust(index)
- && index->table->quiesce == QUIESCE_NONE
- && (ignore_sec_unique || !dict_index_is_unique(index))
- && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE);
-}
-
-/******************************************************************//**
-Returns TRUE if the current OS thread is performing an insert buffer
-routine.
-
-For instance, a read-ahead of non-ibuf pages is forbidden by threads
-that are executing an insert buffer routine.
-@return TRUE if inside an insert buffer routine */
-UNIV_INLINE
-ibool
-ibuf_inside(
-/*========*/
- const mtr_t* mtr) /*!< in: mini-transaction */
-{
- return(mtr->inside_ibuf);
-}
-
-/***********************************************************************//**
-Checks if a page address is an ibuf bitmap page address.
-@return TRUE if a bitmap page */
-UNIV_INLINE
-ibool
-ibuf_bitmap_page(
-/*=============*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint page_no)/*!< in: page number */
-{
- ut_ad(ut_is_2pow(zip_size));
-
- if (!zip_size) {
- return((page_no & (UNIV_PAGE_SIZE - 1))
- == FSP_IBUF_BITMAP_OFFSET);
- }
-
- return((page_no & (zip_size - 1)) == FSP_IBUF_BITMAP_OFFSET);
-}
-
-/*********************************************************************//**
-Translates the free space on a page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_bits(
-/*===========================*/
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint max_ins_size) /*!< in: maximum insert size after reorganize
- for the page */
-{
- ulint n;
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
- if (zip_size) {
- n = max_ins_size
- / (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- } else {
- n = max_ins_size
- / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- if (n == 3) {
- n = 2;
- }
-
- if (n > 3) {
- n = 3;
- }
-
- return(n);
-}
-
-/*********************************************************************//**
-Translates the ibuf free bits to the free space on a page in bytes.
-@return maximum insert size after reorganize for the page */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_from_bits(
-/*================================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint bits) /*!< in: value for ibuf bitmap bits */
-{
- ut_ad(bits < 4);
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE);
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
- if (zip_size) {
- if (bits == 3) {
- return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- if (bits == 3) {
- return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE);
- }
-
- return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE));
-}
-
-/*********************************************************************//**
-Translates the free space on a compressed page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free_zip(
-/*==========================*/
- ulint zip_size,
- /*!< in: compressed page size in bytes */
- const buf_block_t* block) /*!< in: buffer block */
-{
- ulint max_ins_size;
- const page_zip_des_t* page_zip;
- lint zip_max_ins;
-
- ut_ad(zip_size == buf_block_get_zip_size(block));
- ut_ad(zip_size);
-
- /* Consider the maximum insert size on the uncompressed page
- without reorganizing the page. We must not assume anything
- about the compression ratio. If zip_max_ins > max_ins_size and
- there is 1/4 garbage on the page, recompression after the
- reorganize could fail, in theory. So, let us guarantee that
- merging a buffered insert to a compressed page will always
- succeed without reorganizing or recompressing the page, just
- by using the page modification log. */
- max_ins_size = page_get_max_insert_size(
- buf_block_get_frame(block), 1);
-
- page_zip = buf_block_get_page_zip(block);
- zip_max_ins = page_zip_max_ins_size(page_zip,
- FALSE/* not clustered */);
-
- if (zip_max_ins < 0) {
- return(0);
- } else if (max_ins_size > (ulint) zip_max_ins) {
- max_ins_size = (ulint) zip_max_ins;
- }
-
- return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size));
-}
-
-/*********************************************************************//**
-Translates the free space on a page to a value in the ibuf bitmap.
-@return value for ibuf bitmap bits */
-UNIV_INLINE
-ulint
-ibuf_index_page_calc_free(
-/*======================*/
- ulint zip_size,/*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- const buf_block_t* block) /*!< in: buffer block */
-{
- ut_ad(zip_size == buf_block_get_zip_size(block));
-
- if (!zip_size) {
- ulint max_ins_size;
-
- max_ins_size = page_get_max_insert_size_after_reorganize(
- buf_block_get_frame(block), 1);
-
- return(ibuf_index_page_calc_free_bits(0, max_ins_size));
- } else {
- return(ibuf_index_page_calc_free_zip(zip_size, block));
- }
-}
-
-/************************************************************************//**
-Updates the free bits of an uncompressed page in the ibuf bitmap if
-there is not enough free on the page any more. This is done in a
-separate mini-transaction, hence this operation does not restrict
-further work to only ibuf bitmap operations, which would result if the
-latch to the bitmap page were kept. NOTE: The free bits in the insert
-buffer bitmap must never exceed the free space on a page. It is
-unsafe to increment the bits in a separately committed
-mini-transaction, because in crash recovery, the free bits could
-momentarily be set too high. It is only safe to use this function for
-decrementing the free bits. Should more free space become available,
-we must not update the free bits here, because that would break crash
-recovery. */
-UNIV_INLINE
-void
-ibuf_update_free_bits_if_full(
-/*==========================*/
- buf_block_t* block, /*!< in: index page to which we have added new
- records; the free bits are updated if the
- index is non-clustered and non-unique and
- the page level is 0, and the page becomes
- fuller */
- ulint max_ins_size,/*!< in: value of maximum insert size with
- reorganize before the latest operation
- performed to the page */
- ulint increase)/*!< in: upper limit for the additional space
- used in the latest operation, if known, or
- ULINT_UNDEFINED */
-{
- ulint before;
- ulint after;
-
- ut_ad(!buf_block_get_page_zip(block));
-
- before = ibuf_index_page_calc_free_bits(0, max_ins_size);
-
- if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
-#endif
- after = ibuf_index_page_calc_free_bits(0, max_ins_size
- - increase);
-#ifdef UNIV_IBUF_DEBUG
- ut_a(after <= ibuf_index_page_calc_free(0, block));
-#endif
- } else {
- after = ibuf_index_page_calc_free(0, block);
- }
-
- if (after == 0) {
- /* We move the page to the front of the buffer pool LRU list:
- the purpose of this is to prevent those pages to which we
- cannot make inserts using the insert buffer from slipping
- out of the buffer pool */
-
- buf_page_make_young(&block->page);
- }
-
- if (before > after) {
- ibuf_set_free_bits(block, after, before);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/ibuf0types.h b/storage/xtradb/include/ibuf0types.h
deleted file mode 100644
index 3fdbf078b0b..00000000000
--- a/storage/xtradb/include/ibuf0types.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ibuf0types.h
-Insert buffer global types
-
-Created 7/29/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef ibuf0types_h
-#define ibuf0types_h
-
-struct ibuf_t;
-
-#endif
diff --git a/storage/xtradb/include/lock0iter.h b/storage/xtradb/include/lock0iter.h
deleted file mode 100644
index 0054850b526..00000000000
--- a/storage/xtradb/include/lock0iter.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0iter.h
-Lock queue iterator type and function prototypes.
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef lock0iter_h
-#define lock0iter_h
-
-#include "univ.i"
-#include "lock0types.h"
-
-struct lock_queue_iterator_t {
- const lock_t* current_lock;
- /* In case this is a record lock queue (not table lock queue)
- then bit_no is the record number within the heap in which the
- record is stored. */
- ulint bit_no;
-};
-
-/*******************************************************************//**
-Initialize lock queue iterator so that it starts to iterate from
-"lock". bit_no specifies the record number within the heap where the
-record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
-1. If the lock is a table lock, thus we have a table lock queue;
-2. If the lock is a record lock and it is a wait lock. In this case
- bit_no is calculated in this function by using
- lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
- of a wait lock. */
-UNIV_INTERN
-void
-lock_queue_iterator_reset(
-/*======================*/
- lock_queue_iterator_t* iter, /*!< out: iterator */
- const lock_t* lock, /*!< in: lock to start from */
- ulint bit_no);/*!< in: record number in the
- heap */
-
-/*******************************************************************//**
-Gets the previous lock in the lock queue, returns NULL if there are no
-more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned).
-@return previous lock or NULL */
-
-const lock_t*
-lock_queue_iterator_get_prev(
-/*=========================*/
- lock_queue_iterator_t* iter); /*!< in/out: iterator */
-
-#endif /* lock0iter_h */
diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h
deleted file mode 100644
index 923c463aa22..00000000000
--- a/storage/xtradb/include/lock0lock.h
+++ /dev/null
@@ -1,1036 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0lock.h
-The transaction lock system
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef lock0lock_h
-#define lock0lock_h
-
-#include "univ.i"
-#include "buf0types.h"
-#include "trx0types.h"
-#include "mtr0types.h"
-#include "rem0types.h"
-#include "dict0types.h"
-#include "que0types.h"
-#include "lock0types.h"
-#include "read0types.h"
-#include "hash0hash.h"
-#include "srv0srv.h"
-#include "ut0vec.h"
-
-#include <string>
-
-#ifdef UNIV_DEBUG
-extern ibool lock_print_waits;
-#endif /* UNIV_DEBUG */
-
-/** Alternatives for innodb_lock_schedule_algorithm, which can be changed by
- setting innodb_lock_schedule_algorithm. */
-enum innodb_lock_schedule_algorithm_t {
- INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS, /*!< First Come First Served */
- INNODB_LOCK_SCHEDULE_ALGORITHM_VATS /*!< Variance-Aware-Transaction-Scheduling */
-};
-
-extern ulong innodb_lock_schedule_algorithm;
-
-extern ulint srv_n_lock_deadlock_count;
-
-/*********************************************************************//**
-Gets the size of a lock struct.
-@return size in bytes */
-UNIV_INTERN
-ulint
-lock_get_size(void);
-/*===============*/
-/*********************************************************************//**
-Creates the lock system at database start. */
-UNIV_INTERN
-void
-lock_sys_create(
-/*============*/
- ulint n_cells); /*!< in: number of slots in lock hash table */
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-UNIV_INTERN
-void
-lock_sys_close(void);
-/*================*/
-/*********************************************************************//**
-Gets the heap_no of the smallest user record on a page.
-@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
-UNIV_INLINE
-ulint
-lock_get_min_heap_no(
-/*=================*/
- const buf_block_t* block); /*!< in: buffer block */
-/*************************************************************//**
-Updates the lock table when we have reorganized a page. NOTE: we copy
-also the locks set on the infimum of the page; the infimum may carry
-locks if an update of a record is occurring on the page, and its locks
-were temporarily stored on the infimum. */
-UNIV_INTERN
-void
-lock_move_reorganize_page(
-/*======================*/
- const buf_block_t* block, /*!< in: old index page, now
- reorganized */
- const buf_block_t* oblock);/*!< in: copy of the old, not
- reorganized page */
-/*************************************************************//**
-Moves the explicit locks on user records to another page if a record
-list end is moved to another page. */
-UNIV_INTERN
-void
-lock_move_rec_list_end(
-/*===================*/
- const buf_block_t* new_block, /*!< in: index page to move to */
- const buf_block_t* block, /*!< in: index page */
- const rec_t* rec); /*!< in: record on page: this
- is the first record moved */
-/*************************************************************//**
-Moves the explicit locks on user records to another page if a record
-list start is moved to another page. */
-UNIV_INTERN
-void
-lock_move_rec_list_start(
-/*=====================*/
- const buf_block_t* new_block, /*!< in: index page to move to */
- const buf_block_t* block, /*!< in: index page */
- const rec_t* rec, /*!< in: record on page:
- this is the first
- record NOT copied */
- const rec_t* old_end); /*!< in: old
- previous-to-last
- record on new_page
- before the records
- were copied */
-/*************************************************************//**
-Updates the lock table when a page is split to the right. */
-UNIV_INTERN
-void
-lock_update_split_right(
-/*====================*/
- const buf_block_t* right_block, /*!< in: right page */
- const buf_block_t* left_block); /*!< in: left page */
-/*************************************************************//**
-Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
-void
-lock_update_merge_right(
-/*====================*/
- const buf_block_t* right_block, /*!< in: right page to
- which merged */
- const rec_t* orig_succ, /*!< in: original
- successor of infimum
- on the right page
- before merge */
- const buf_block_t* left_block); /*!< in: merged index
- page which will be
- discarded */
-/*************************************************************//**
-Updates the lock table when the root page is copied to another in
-btr_root_raise_and_insert. Note that we leave lock structs on the
-root page, even though they do not make sense on other than leaf
-pages: the reason is that in a pessimistic update the infimum record
-of the root page will act as a dummy carrier of the locks of the record
-to be updated. */
-UNIV_INTERN
-void
-lock_update_root_raise(
-/*===================*/
- const buf_block_t* block, /*!< in: index page to which copied */
- const buf_block_t* root); /*!< in: root page */
-/*************************************************************//**
-Updates the lock table when a page is copied to another and the original page
-is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
-void
-lock_update_copy_and_discard(
-/*=========================*/
- const buf_block_t* new_block, /*!< in: index page to
- which copied */
- const buf_block_t* block); /*!< in: index page;
- NOT the root! */
-/*************************************************************//**
-Updates the lock table when a page is split to the left. */
-UNIV_INTERN
-void
-lock_update_split_left(
-/*===================*/
- const buf_block_t* right_block, /*!< in: right page */
- const buf_block_t* left_block); /*!< in: left page */
-/*************************************************************//**
-Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
-void
-lock_update_merge_left(
-/*===================*/
- const buf_block_t* left_block, /*!< in: left page to
- which merged */
- const rec_t* orig_pred, /*!< in: original predecessor
- of supremum on the left page
- before merge */
- const buf_block_t* right_block); /*!< in: merged index page
- which will be discarded */
-/*************************************************************//**
-Updates the lock table when a page is splited and merged to
-two pages. */
-UNIV_INTERN
-void
-lock_update_split_and_merge(
- const buf_block_t* left_block, /*!< in: left page to which merged */
- const rec_t* orig_pred, /*!< in: original predecessor of
- supremum on the left page before merge*/
- const buf_block_t* right_block);/*!< in: right page from which merged */
-/*************************************************************//**
-Resets the original locks on heir and replaces them with gap type locks
-inherited from rec. */
-UNIV_INTERN
-void
-lock_rec_reset_and_inherit_gap_locks(
-/*=================================*/
- const buf_block_t* heir_block, /*!< in: block containing the
- record which inherits */
- const buf_block_t* block, /*!< in: block containing the
- record from which inherited;
- does NOT reset the locks on
- this record */
- ulint heir_heap_no, /*!< in: heap_no of the
- inheriting record */
- ulint heap_no); /*!< in: heap_no of the
- donating record */
-/*************************************************************//**
-Updates the lock table when a page is discarded. */
-UNIV_INTERN
-void
-lock_update_discard(
-/*================*/
- const buf_block_t* heir_block, /*!< in: index page
- which will inherit the locks */
- ulint heir_heap_no, /*!< in: heap_no of the record
- which will inherit the locks */
- const buf_block_t* block); /*!< in: index page
- which will be discarded */
-/*************************************************************//**
-Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
-void
-lock_update_insert(
-/*===============*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec); /*!< in: the inserted record */
-/*************************************************************//**
-Updates the lock table when a record is removed. */
-UNIV_INTERN
-void
-lock_update_delete(
-/*===============*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec); /*!< in: the record to be removed */
-/*********************************************************************//**
-Stores on the page infimum record the explicit locks of another record.
-This function is used to store the lock state of a record when it is
-updated and the size of the record changes in the update. The record
-is in such an update moved, perhaps to another page. The infimum record
-acts as a dummy carrier record, taking care of lock releases while the
-actual record is being moved. */
-UNIV_INTERN
-void
-lock_rec_store_on_page_infimum(
-/*===========================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec); /*!< in: record whose lock state
- is stored on the infimum
- record of the same page; lock
- bits are reset on the
- record */
-/*********************************************************************//**
-Restores the state of explicit lock requests on a single record, where the
-state was stored on the infimum of the page. */
-UNIV_INTERN
-void
-lock_rec_restore_from_page_infimum(
-/*===============================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record whose lock state
- is restored */
- const buf_block_t* donator);/*!< in: page (rec is not
- necessarily on this page)
- whose infimum stored the lock
- state; lock bits are reset on
- the infimum */
-/*********************************************************************//**
-Determines if there are explicit record locks on a page.
-@return an explicit record lock on the page, or NULL if there are none */
-UNIV_INTERN
-lock_t*
-lock_rec_expl_exist_on_page(
-/*========================*/
- ulint space, /*!< in: space id */
- ulint page_no)/*!< in: page number */
- MY_ATTRIBUTE((warn_unused_result));
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate insert of
-a record. If they do, first tests if the query thread should anyway
-be suspended for some reason; if not, then puts the transaction and
-the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_rec_insert_check_and_lock(
-/*===========================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
- set, does nothing */
- const rec_t* rec, /*!< in: record after which to insert */
- buf_block_t* block, /*!< in/out: buffer block of rec */
- dict_index_t* index, /*!< in: index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit)/*!< out: set to TRUE if the new
- inserted record maybe should inherit
- LOCK_GAP type locks from the successor
- record */
- MY_ATTRIBUTE((nonnull(2,3,4,6,7), warn_unused_result));
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate modify (update,
-delete mark, or delete unmark) of a clustered index record. If they do,
-first tests if the query thread should anyway be suspended for some
-reason; if not, then puts the transaction and the query thread to the
-lock wait state and inserts a waiting request for a record x-lock to the
-lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_clust_rec_modify_check_and_lock(
-/*=================================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record which should be
- modified */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate modify
-(delete mark or delete unmark) of a secondary index record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_sec_rec_modify_check_and_lock(
-/*===============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- buf_block_t* block, /*!< in/out: buffer block of rec */
- const rec_t* rec, /*!< in: record which should be
- modified; NOTE: as this is a secondary
- index, we always have to modify the
- clustered index record first: see the
- comment below */
- dict_index_t* index, /*!< in: secondary index */
- que_thr_t* thr, /*!< in: query thread
- (can be NULL if BTR_NO_LOCKING_FLAG) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((warn_unused_result, nonnull(2,3,4,6)));
-/*********************************************************************//**
-Like lock_clust_rec_read_check_and_lock(), but reads a
-secondary index record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_sec_rec_read_check_and_lock(
-/*=============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_clust_rec_read_check_and_lock(
-/*===============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr); /*!< in: query thread */
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. This is an alternative version of
-lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets".
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_clust_rec_read_check_and_lock_alt(
-/*===================================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: clustered index */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Checks that a record is seen in a consistent read.
-@return true if sees, or false if an earlier version of the record
-should be retrieved */
-UNIV_INTERN
-bool
-lock_clust_rec_cons_read_sees(
-/*==========================*/
- const rec_t* rec, /*!< in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- read_view_t* view); /*!< in: consistent read view */
-/*********************************************************************//**
-Checks that a non-clustered index record is seen in a consistent read.
-
-NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case false, the present version of
-rec may be the right, but we must check this from the clustered index
-record.
-
-@return true if certainly sees, or false if an earlier version of the
-clustered index record might be needed */
-UNIV_INTERN
-bool
-lock_sec_rec_cons_read_sees(
-/*========================*/
- const rec_t* rec, /*!< in: user record which
- should be read or passed over
- by a read cursor */
- const read_view_t* view) /*!< in: consistent read view */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_table(
-/*=======*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- dict_table_t* table, /*!< in/out: database table
- in dictionary cache */
- enum lock_mode mode, /*!< in: lock mode */
- que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Creates a table IX lock object for a resurrected transaction. */
-UNIV_INTERN
-void
-lock_table_ix_resurrect(
-/*====================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx); /*!< in/out: transaction */
-/*************************************************************//**
-Removes a granted record lock of a transaction from the queue and grants
-locks to other transactions waiting in the queue if they now are entitled
-to a lock. */
-UNIV_INTERN
-void
-lock_rec_unlock(
-/*============*/
- trx_t* trx, /*!< in/out: transaction that has
- set a record lock */
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record */
- enum lock_mode lock_mode);/*!< in: LOCK_S or LOCK_X */
-/*********************************************************************//**
-Releases a transaction's locks, and releases possible other transactions
-waiting because of these locks. Change the state of the transaction to
-TRX_STATE_COMMITTED_IN_MEMORY. */
-UNIV_INTERN
-void
-lock_trx_release_locks(
-/*===================*/
- trx_t* trx); /*!< in/out: transaction */
-
-/*********************************************************************//**
-Cancels a waiting lock request and releases possible other transactions
-waiting behind it. */
-UNIV_INTERN
-void
-lock_cancel_waiting_and_release(
-/*============================*/
- lock_t* lock); /*!< in/out: waiting lock request */
-
-/*********************************************************************//**
-Removes locks on a table to be dropped or truncated.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
-void
-lock_remove_all_on_table(
-/*=====================*/
- dict_table_t* table, /*!< in: table to be dropped
- or truncated */
- ibool remove_also_table_sx_locks);/*!< in: also removes
- table S and X locks */
-
-/*********************************************************************//**
-Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table.
-@return folded value */
-UNIV_INLINE
-ulint
-lock_rec_fold(
-/*==========*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
- MY_ATTRIBUTE((const));
-/*********************************************************************//**
-Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table.
-@return hashed value */
-UNIV_INLINE
-ulint
-lock_rec_hash(
-/*==========*/
- ulint space, /*!< in: space */
- ulint page_no);/*!< in: page number */
-
-/**********************************************************************//**
-Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found.
-@return bit index == heap number of the record, or ULINT_UNDEFINED if
-none found */
-UNIV_INTERN
-ulint
-lock_rec_find_set_bit(
-/*==================*/
- const lock_t* lock); /*!< in: record lock with at least one
- bit set */
-
-/*********************************************************************//**
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock.
-@return the source table of transaction, if it is covered by an IX or
-IS table lock; dest if there is no source table, and NULL if the
-transaction is locking more than two tables or an inconsistency is
-found */
-UNIV_INTERN
-dict_table_t*
-lock_get_src_table(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* dest, /*!< in: destination of ALTER TABLE */
- enum lock_mode* mode); /*!< out: lock mode of the source table */
-/*********************************************************************//**
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table.
-@return TRUE if table is only locked by trx, with LOCK_IX, and
-possibly LOCK_AUTO_INC */
-UNIV_INTERN
-ibool
-lock_is_table_exclusive(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- const trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
-ibool
-lock_has_to_wait(
-/*=============*/
- const lock_t* lock1, /*!< in: waiting lock */
- const lock_t* lock2); /*!< in: another lock; NOTE that it is
- assumed that this has a lock bit set
- on the same record as in lock1 if the
- locks are record locks */
-/*********************************************************************//**
-Reports that a transaction id is insensible, i.e., in the future. */
-UNIV_INTERN
-void
-lock_report_trx_id_insanity(
-/*========================*/
- trx_id_t trx_id, /*!< in: trx id */
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Prints info of a table lock. */
-UNIV_INTERN
-void
-lock_table_print(
-/*=============*/
- FILE* file, /*!< in: file where to print */
- const lock_t* lock); /*!< in: table type lock */
-/*********************************************************************//**
-Prints info of a record lock. */
-UNIV_INTERN
-void
-lock_rec_print(
-/*===========*/
- FILE* file, /*!< in: file where to print */
- const lock_t* lock); /*!< in: record type lock */
-/*********************************************************************//**
-Prints info of locks for all transactions.
-@return FALSE if not able to obtain lock mutex and exits without
-printing info */
-UNIV_INTERN
-ibool
-lock_print_info_summary(
-/*====================*/
- FILE* file, /*!< in: file where to print */
- ibool nowait) /*!< in: whether to wait for the lock mutex */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Prints info of locks for each transaction. This function assumes that the
-caller holds the lock mutex and more importantly it will release the lock
-mutex on behalf of the caller. (This should be fixed in the future). */
-UNIV_INTERN
-void
-lock_print_info_all_transactions(
-/*=============================*/
- FILE* file); /*!< in: file where to print */
-/*********************************************************************//**
-Return approximate number or record locks (bits set in the bitmap) for
-this transaction. Since delete-marked records may be removed, the
-record count will not be precise.
-The caller must be holding lock_sys->mutex. */
-UNIV_INTERN
-ulint
-lock_number_of_rows_locked(
-/*=======================*/
- const trx_lock_t* trx_lock) /*!< in: transaction locks */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*******************************************************************//**
-Gets the type of a lock. Non-inline version for using outside of the
-lock module.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
-ulint
-lock_get_type(
-/*==========*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the trx of the lock. Non-inline version for using outside of the
-lock module.
-@return trx_t* */
-UNIV_INTERN
-trx_t*
-lock_get_trx(
-/*=========*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the id of the transaction owning a lock.
-@return transaction id */
-UNIV_INTERN
-trx_id_t
-lock_get_trx_id(
-/*============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the mode of a lock in a human readable string.
-The string should not be free()'d or modified.
-@return lock mode */
-UNIV_INTERN
-const char*
-lock_get_mode_str(
-/*==============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the type of a lock in a human readable string.
-The string should not be free()'d or modified.
-@return lock type */
-UNIV_INTERN
-const char*
-lock_get_type_str(
-/*==============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the id of the table on which the lock is.
-@return id of the table */
-UNIV_INTERN
-table_id_t
-lock_get_table_id(
-/*==============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return name of the table */
-UNIV_INTERN
-const char*
-lock_get_table_name(
-/*================*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-For a record lock, gets the index on which the lock is.
-@return index */
-UNIV_INTERN
-const dict_index_t*
-lock_rec_get_index(
-/*===============*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-For a record lock, gets the name of the index on which the lock is.
-The string should not be free()'d or modified.
-@return name of the index */
-UNIV_INTERN
-const char*
-lock_rec_get_index_name(
-/*====================*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-For a record lock, gets the tablespace number on which the lock is.
-@return tablespace number */
-UNIV_INTERN
-ulint
-lock_rec_get_space_id(
-/*==================*/
- const lock_t* lock); /*!< in: lock */
-
-/*******************************************************************//**
-For a record lock, gets the page number on which the lock is.
-@return page number */
-UNIV_INTERN
-ulint
-lock_rec_get_page_no(
-/*=================*/
- const lock_t* lock); /*!< in: lock */
-/*******************************************************************//**
-Check if there are any locks (table or rec) against table.
-@return TRUE if locks exist */
-UNIV_INTERN
-ibool
-lock_table_has_locks(
-/*=================*/
- const dict_table_t* table); /*!< in: check if there are any locks
- held on records in this table or on the
- table itself */
-
-/*********************************************************************//**
-A thread which wakes up threads whose lock wait may have lasted too long.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(lock_wait_timeout_thread)(
-/*=====================================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-
-/********************************************************************//**
-Releases a user OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-UNIV_INTERN
-void
-lock_wait_release_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr); /*!< in: query thread associated with the
- user OS thread */
-
-/***************************************************************//**
-Puts a user OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-UNIV_INTERN
-void
-lock_wait_suspend_thread(
-/*=====================*/
- que_thr_t* thr); /*!< in: query thread associated with the
- user OS thread */
-/*********************************************************************//**
-Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
-function should be called at the the end of an SQL statement, by the
-connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
-void
-lock_unlock_table_autoinc(
-/*======================*/
- trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
-Check whether the transaction has already been rolled back because it
-was selected as a deadlock victim, or if it has to wait then cancel
-the wait lock.
-@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-lock_trx_handle_wait(
-/*=================*/
- trx_t* trx) /*!< in/out: trx lock state */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Get the number of locks on a table.
-@return number of locks */
-UNIV_INTERN
-ulint
-lock_table_get_n_locks(
-/*===================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Checks that a transaction id is sensible, i.e., not in the future.
-@return true if ok */
-UNIV_INTERN
-bool
-lock_check_trx_id_sanity(
-/*=====================*/
- trx_id_t trx_id, /*!< in: trx id */
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Check if the transaction holds any locks on the sys tables
-or its records.
-@return the strongest lock found on any sys table or 0 for none */
-UNIV_INTERN
-const lock_t*
-lock_trx_has_sys_table_locks(
-/*=========================*/
- const trx_t* trx) /*!< in: transaction to check */
- MY_ATTRIBUTE((warn_unused_result));
-
-/*******************************************************************//**
-Check if the transaction holds an exclusive lock on a record.
-@return whether the locks are held */
-UNIV_INTERN
-bool
-lock_trx_has_rec_x_lock(
-/*====================*/
- const trx_t* trx, /*!< in: transaction to check */
- const dict_table_t* table, /*!< in: table to check */
- const buf_block_t* block, /*!< in: buffer block of the record */
- ulint heap_no)/*!< in: record heap number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-
-/** Lock modes and types */
-/* @{ */
-#define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the
- type_mode field in a lock */
-/** Lock types */
-/* @{ */
-#define LOCK_TABLE 16 /*!< table lock */
-#define LOCK_REC 32 /*!< record lock */
-#define LOCK_TYPE_MASK 0xF0UL /*!< mask used to extract lock type from the
- type_mode field in a lock */
-#if LOCK_MODE_MASK & LOCK_TYPE_MASK
-# error "LOCK_MODE_MASK & LOCK_TYPE_MASK"
-#endif
-
-#define LOCK_WAIT 256 /*!< Waiting lock flag; when set, it
- means that the lock has not yet been
- granted, it is just waiting for its
- turn in the wait queue */
-/* Precise modes */
-#define LOCK_ORDINARY 0 /*!< this flag denotes an ordinary
- next-key lock in contrast to LOCK_GAP
- or LOCK_REC_NOT_GAP */
-#define LOCK_GAP 512 /*!< when this bit is set, it means that the
- lock holds only on the gap before the record;
- for instance, an x-lock on the gap does not
- give permission to modify the record on which
- the bit is set; locks of this type are created
- when records are removed from the index chain
- of records */
-#define LOCK_REC_NOT_GAP 1024 /*!< this bit means that the lock is only on
- the index record and does NOT block inserts
- to the gap before the index record; this is
- used in the case when we retrieve a record
- with a unique key, and is also used in
- locking plain SELECTs (not part of UPDATE
- or DELETE) when the user has set the READ
- COMMITTED isolation level */
-#define LOCK_INSERT_INTENTION 2048 /*!< this bit is set when we place a waiting
- gap type record lock request in order to let
- an insert of an index record to wait until
- there are no conflicting locks by other
- transactions on the gap; note that this flag
- remains set when the waiting lock is granted,
- or if the lock is inherited to a neighboring
- record */
-
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
-# error
-#endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
-# error
-#endif
-/* @} */
-
-/** Lock operation struct */
-struct lock_op_t{
- dict_table_t* table; /*!< table to be locked */
- enum lock_mode mode; /*!< lock mode */
-};
-
-/** The lock system struct */
-struct lock_sys_t{
- ib_mutex_t mutex; /*!< Mutex protecting the
- locks */
- hash_table_t* rec_hash; /*!< hash table of the record
- locks */
- ulint rec_num;
- ib_mutex_t wait_mutex; /*!< Mutex protecting the
- next two fields */
- srv_slot_t* waiting_threads; /*!< Array of user threads
- suspended while waiting for
- locks within InnoDB, protected
- by the lock_sys->wait_mutex;
- os_event_set() and
- os_event_reset() on
- waiting_threads[]->event
- are protected by
- trx_t::mutex */
- srv_slot_t* last_slot; /*!< highest slot ever used
- in the waiting_threads array,
- protected by
- lock_sys->wait_mutex */
- ibool rollback_complete;
- /*!< TRUE if rollback of all
- recovered transactions is
- complete. Protected by
- lock_sys->mutex */
-
- ulint n_lock_max_wait_time; /*!< Max wait time */
-
- os_event_t timeout_event; /*!< An event waited for by
- lock_wait_timeout_thread.
- Not protected by a mutex,
- but the waits are timed.
- Signaled on shutdown only. */
-
- bool timeout_thread_active; /*!< True if the timeout thread
- is running */
-};
-
-/** The lock system */
-extern lock_sys_t* lock_sys;
-
-/** Test if lock_sys->mutex can be acquired without waiting. */
-#define lock_mutex_enter_nowait() mutex_enter_nowait(&lock_sys->mutex)
-
-/** Test if lock_sys->mutex is owned. */
-#define lock_mutex_own() mutex_own(&lock_sys->mutex)
-
-/** Acquire the lock_sys->mutex. */
-#define lock_mutex_enter() do { \
- mutex_enter(&lock_sys->mutex); \
-} while (0)
-
-/** Release the lock_sys->mutex. */
-#define lock_mutex_exit() do { \
- mutex_exit(&lock_sys->mutex); \
-} while (0)
-
-/** Test if lock_sys->wait_mutex is owned. */
-#define lock_wait_mutex_own() mutex_own(&lock_sys->wait_mutex)
-
-/** Acquire the lock_sys->wait_mutex. */
-#define lock_wait_mutex_enter() do { \
- mutex_enter(&lock_sys->wait_mutex); \
-} while (0)
-
-/** Release the lock_sys->wait_mutex. */
-#define lock_wait_mutex_exit() do { \
- mutex_exit(&lock_sys->wait_mutex); \
-} while (0)
-
-/*******************************************************************//**
-Get lock mode and table/index name
-@return string containing lock info */
-std::string
-lock_get_info(
- const lock_t*);
-
-#ifndef UNIV_NONINL
-#include "lock0lock.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/lock0lock.ic b/storage/xtradb/include/lock0lock.ic
deleted file mode 100644
index 736936954cb..00000000000
--- a/storage/xtradb/include/lock0lock.ic
+++ /dev/null
@@ -1,92 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0lock.ic
-The transaction lock system
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#include "srv0srv.h"
-#include "dict0dict.h"
-#include "row0row.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
-#include "buf0buf.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "row0vers.h"
-#include "que0que.h"
-#include "btr0cur.h"
-#include "read0read.h"
-#include "log0recv.h"
-
-/*********************************************************************//**
-Calculates the fold value of a page file address: used in inserting or
-searching for a lock in the hash table.
-@return folded value */
-UNIV_INLINE
-ulint
-lock_rec_fold(
-/*==========*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- return(ut_fold_ulint_pair(space, page_no));
-}
-
-/*********************************************************************//**
-Calculates the hash value of a page file address: used in inserting or
-searching for a lock in the hash table.
-@return hashed value */
-UNIV_INLINE
-ulint
-lock_rec_hash(
-/*==========*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- return(hash_calc_hash(lock_rec_fold(space, page_no),
- lock_sys->rec_hash));
-}
-
-/*********************************************************************//**
-Gets the heap_no of the smallest user record on a page.
-@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
-UNIV_INLINE
-ulint
-lock_get_min_heap_no(
-/*=================*/
- const buf_block_t* block) /*!< in: buffer block */
-{
- const page_t* page = block->frame;
-
- if (page_is_comp(page)) {
- return(rec_get_heap_no_new(
- page
- + rec_get_next_offs(page + PAGE_NEW_INFIMUM,
- TRUE)));
- } else {
- return(rec_get_heap_no_old(
- page
- + rec_get_next_offs(page + PAGE_OLD_INFIMUM,
- FALSE)));
- }
-}
diff --git a/storage/xtradb/include/lock0priv.h b/storage/xtradb/include/lock0priv.h
deleted file mode 100644
index 7a74cbdc2e3..00000000000
--- a/storage/xtradb/include/lock0priv.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, MariaDB Corporation
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0priv.h
-Lock module internal structures and methods.
-
-Created July 12, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef lock0priv_h
-#define lock0priv_h
-
-#ifndef LOCK_MODULE_IMPLEMENTATION
-/* If you need to access members of the structures defined in this
-file, please write appropriate functions that retrieve them and put
-those functions in lock/ */
-#error Do not include lock0priv.h outside of the lock/ module
-#endif
-
-#include "univ.i"
-#include "dict0types.h"
-#include "hash0hash.h"
-#include "trx0types.h"
-#include "ut0lst.h"
-
-/** A table lock */
-struct lock_table_t {
- dict_table_t* table; /*!< database table in dictionary
- cache */
- UT_LIST_NODE_T(lock_t)
- locks; /*!< list of locks on the same
- table */
-};
-
-/** Record lock for a page */
-struct lock_rec_t {
- ulint space; /*!< space id */
- ulint page_no; /*!< page number */
- ulint n_bits; /*!< number of bits in the lock
- bitmap; NOTE: the lock bitmap is
- placed immediately after the
- lock struct */
-};
-
-/** Lock struct; protected by lock_sys->mutex */
-struct lock_t {
- trx_t* trx; /*!< transaction owning the
- lock */
- UT_LIST_NODE_T(lock_t)
- trx_locks; /*!< list of the locks of the
- transaction */
- ulint type_mode; /*!< lock type, mode, LOCK_GAP or
- LOCK_REC_NOT_GAP,
- LOCK_INSERT_INTENTION,
- wait flag, ORed */
- hash_node_t hash; /*!< hash chain node for a record
- lock */
- dict_index_t* index; /*!< index for a record lock */
-
- /* Statistics for how long lock has been held and time
- how long this lock had to be waited before it was granted */
- time_t requested_time; /*!< Lock request time */
- ulint wait_time; /*!< Time waited this lock or 0 */
-
- union {
- lock_table_t tab_lock;/*!< table lock */
- lock_rec_t rec_lock;/*!< record lock */
- } un_member; /*!< lock details */
-};
-
-/*********************************************************************//**
-Gets the type of a lock.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INLINE
-ulint
-lock_get_type_low(
-/*==============*/
- const lock_t* lock); /*!< in: lock */
-
-/*********************************************************************//**
-Gets the previous record lock set on a record.
-@return previous lock on the same record, NULL if none exists */
-UNIV_INTERN
-const lock_t*
-lock_rec_get_prev(
-/*==============*/
- const lock_t* in_lock,/*!< in: record lock */
- ulint heap_no);/*!< in: heap number of the record */
-
-/*********************************************************************//**
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index.
-@return transaction id of the transaction which has the x-lock, or 0 */
-UNIV_INLINE
-trx_id_t
-lock_clust_rec_some_has_impl(
-/*=========================*/
- const rec_t* rec, /*!< in: user record */
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#ifndef UNIV_NONINL
-#include "lock0priv.ic"
-#endif
-
-#endif /* lock0priv_h */
diff --git a/storage/xtradb/include/lock0priv.ic b/storage/xtradb/include/lock0priv.ic
deleted file mode 100644
index 6b70dc33d3c..00000000000
--- a/storage/xtradb/include/lock0priv.ic
+++ /dev/null
@@ -1,67 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0priv.ic
-Lock module internal inline methods.
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-/* This file contains only methods which are used in
-lock/lock0* files, other than lock/lock0lock.cc.
-I.e. lock/lock0lock.cc contains more internal inline
-methods but they are used only in that file. */
-
-#ifndef LOCK_MODULE_IMPLEMENTATION
-#error Do not include lock0priv.ic outside of the lock/ module
-#endif
-
-/*********************************************************************//**
-Gets the type of a lock.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INLINE
-ulint
-lock_get_type_low(
-/*==============*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock);
-
- return(lock->type_mode & LOCK_TYPE_MASK);
-}
-
-/*********************************************************************//**
-Checks if some transaction has an implicit x-lock on a record in a clustered
-index.
-@return transaction id of the transaction which has the x-lock, or 0 */
-UNIV_INLINE
-trx_id_t
-lock_clust_rec_some_has_impl(
-/*=========================*/
- const rec_t* rec, /*!< in: user record */
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(page_rec_is_user_rec(rec));
-
- return(row_get_rec_trx_id(rec, index, offsets));
-}
-
-/* vim: set filetype=c: */
diff --git a/storage/xtradb/include/lock0types.h b/storage/xtradb/include/lock0types.h
deleted file mode 100644
index cf32e72f864..00000000000
--- a/storage/xtradb/include/lock0types.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/lock0types.h
-The transaction lock system global types
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef lock0types_h
-#define lock0types_h
-
-#define lock_t ib_lock_t
-struct lock_t;
-struct lock_sys_t;
-
-/* Basic lock modes */
-enum lock_mode {
- LOCK_IS = 0, /* intention shared */
- LOCK_IX, /* intention exclusive */
- LOCK_S, /* shared */
- LOCK_X, /* exclusive */
- LOCK_AUTO_INC, /* locks the auto-inc counter of a table
- in an exclusive mode */
- LOCK_NONE, /* this is used elsewhere to note consistent read */
- LOCK_NUM = LOCK_NONE, /* number of lock modes */
- LOCK_NONE_UNSET = 255
-};
-
-
-#endif
diff --git a/storage/xtradb/include/log0crypt.h b/storage/xtradb/include/log0crypt.h
deleted file mode 100644
index 6b164e90d6e..00000000000
--- a/storage/xtradb/include/log0crypt.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2015, Google Inc. All Rights Reserved.
-Copyright (C) 2014, 2016, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-/**************************************************//**
-@file include/log0crypt.h
-Innodb log encrypt/decrypt
-
-Created 11/25/2013 Minli Zhu
-Modified Jan Lindström jan.lindstrom@mariadb.com
-*******************************************************/
-#ifndef log0crypt_h
-#define log0crypt_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "my_crypt.h"
-
-typedef int Crypt_result;
-
-/* If true, enable redo log encryption. */
-extern my_bool srv_encrypt_log;
-
-/***********************************************************************
-Set next checkpoint's key version to latest one, and generate new key */
-UNIV_INTERN
-void
-log_crypt_set_ver_and_key(
-/*======================*/
- ib_uint64_t next_checkpoint_no);/*!< in: next checkpoint no */
-
-
-/*********************************************************************//**
-Writes the crypto (version, msg and iv) info, which has been used for
-log blocks with lsn <= this checkpoint's lsn, to a log header's
-checkpoint buf. */
-UNIV_INTERN
-void
-log_crypt_write_checkpoint_buf(
-/*===========================*/
- byte* buf); /*!< in/out: checkpoint buffer */
-
-/*********************************************************************//**
-Read the crypto (version, msg and iv) info, which has been used for
-log blocks with lsn <= this checkpoint's lsn, from a log header's
-checkpoint buf. */
-UNIV_INTERN
-bool
-log_crypt_read_checkpoint_buf(
-/*===========================*/
- const byte* buf); /*!< in: checkpoint buffer */
-
-/********************************************************
-Encrypt one or more log block before it is flushed to disk */
-UNIV_INTERN
-void
-log_encrypt_before_write(
-/*=====================*/
- ib_uint64_t next_checkpoint_no, /*!< in: log group to be flushed */
- byte* block, /*!< in/out: pointer to a log block */
- const ulint size); /*!< in: size of log blocks */
-
-/********************************************************
-Decrypt a specified log segment after they are read from a log file to a buffer.
-*/
-UNIV_INTERN
-void
-log_decrypt_after_read(
-/*===================*/
- byte* frame, /*!< in/out: log segment */
- const ulint size); /*!< in: log segment size */
-
-/* Error codes for crypt info */
-typedef enum {
- LOG_UNENCRYPTED = 0,
- LOG_CRYPT_KEY_NOT_FOUND = 1,
- LOG_DECRYPT_MAYBE_FAILED = 2
-} log_crypt_err_t;
-
-/********************************************************
-Check is the checkpoint information encrypted. This check
-is based on fact has log group crypt info and based
-on this crypt info was the key version different from
-unencrypted key version. There is no realible way to
-distinguish encrypted log block from corrupted log block,
-but if log block corruption is found this function is
-used to find out if log block is maybe encrypted but
-encryption key, key management plugin or encryption
-algorithm does not match.
-@return TRUE, if log block may be encrypted */
-UNIV_INTERN
-ibool
-log_crypt_block_maybe_encrypted(
-/*============================*/
- const byte* log_block, /*!< in: log block */
- log_crypt_err_t* err_info); /*!< out: error info */
-
-/********************************************************
-Print crypt error message to error log */
-UNIV_INTERN
-void
-log_crypt_print_error(
-/*==================*/
- log_crypt_err_t err_info); /*!< out: error info */
-
-/*********************************************************************//**
-Print checkpoint no from log block and all encryption keys from
-checkpoints if they are present. Used for problem analysis. */
-void
-log_crypt_print_checkpoint_keys(
-/*============================*/
- const byte* log_block);
-
-#endif // log0crypt.h
diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h
deleted file mode 100644
index a55c1ea818c..00000000000
--- a/storage/xtradb/include/log0log.h
+++ /dev/null
@@ -1,1077 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
-Copyright (c) 2009, Google Inc.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0log.h
-Database log
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef log0log_h
-#define log0log_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "ut0lst.h"
-#ifndef UNIV_HOTBACKUP
-#include "sync0sync.h"
-#include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "log0crypt.h"
-
-/* Type used for all log sequence number storage and arithmetics */
-typedef ib_uint64_t lsn_t;
-#define LSN_MAX IB_UINT64_MAX
-
-#define LSN_PF UINT64PF
-
-/** Redo log buffer */
-struct log_t;
-/** Redo log group */
-struct log_group_t;
-
-#ifdef UNIV_DEBUG
-/** Flag: write to log file? */
-extern ibool log_do_write;
-/** Flag: enable debug output when writing to the log? */
-extern ibool log_debug_writes;
-#else /* UNIV_DEBUG */
-/** Write to log */
-# define log_do_write TRUE
-#endif /* UNIV_DEBUG */
-
-/** Magic value to use instead of log checksums when they are disabled */
-#define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
-
-typedef ulint (*log_checksum_func_t)(const byte* log_block);
-
-/** Pointer to the log checksum calculation function. Protected with
-log_sys->mutex. */
-extern log_checksum_func_t log_checksum_algorithm_ptr;
-
-/** Wait modes for log_write_up_to @{ */
-#define LOG_NO_WAIT 91
-#define LOG_WAIT_ONE_GROUP 92
-#define LOG_WAIT_ALL_GROUPS 93
-/* @} */
-/** Maximum number of log groups in log_group_t::checkpoint_buf */
-#define LOG_MAX_N_GROUPS 32
-
-#define IB_ARCHIVED_LOGS_PREFIX "ib_log_archive_"
-#define IB_ARCHIVED_LOGS_PREFIX_LEN (sizeof(IB_ARCHIVED_LOGS_PREFIX) - 1)
-#define IB_ARCHIVED_LOGS_SERIAL_LEN 20
-
-/*******************************************************************//**
-Calculates where in log files we find a specified lsn.
-@return log file number */
-UNIV_INTERN
-ulint
-log_calc_where_lsn_is(
-/*==================*/
- ib_int64_t* log_file_offset, /*!< out: offset in that file
- (including the header) */
- ib_uint64_t first_header_lsn, /*!< in: first log file start
- lsn */
- ib_uint64_t lsn, /*!< in: lsn whose position to
- determine */
- ulint n_log_files, /*!< in: total number of log
- files */
- ib_int64_t log_file_size); /*!< in: log file size
- (including the header) */
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return end lsn of the log record, zero if did not succeed */
-UNIV_INLINE
-lsn_t
-log_reserve_and_write_fast(
-/*=======================*/
- const void* str, /*!< in: string */
- ulint len, /*!< in: string length */
- lsn_t* start_lsn);/*!< out: start lsn of the log record */
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void);
-/*=============*/
-/***********************************************************************//**
-Checks if there is need for a log buffer flush or a new checkpoint, and does
-this if yes. Any database operation should call this when it has modified
-more than about 4 pages. NOTE that this function may only be called when the
-OS thread owns no synchronization objects except the dictionary mutex. */
-UNIV_INLINE
-void
-log_free_check(void);
-/*================*/
-/**************************************************************************//**
-Locks the log mutex and opens the log for log_write_low. The log must be closed
-with log_close and released with log_release.
-@return start lsn of the log record */
-UNIV_INLINE
-lsn_t
-log_reserve_and_open(
-/*=================*/
- ulint len); /*!< in: length of data to be catenated */
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close.
-@return start lsn of the log record */
-UNIV_INTERN
-lsn_t
-log_open(
-/*=====*/
- ulint len); /*!< in: length of data to be catenated */
-/************************************************************//**
-Writes to the log the string given. It is assumed that the caller holds the
-log mutex. */
-UNIV_INTERN
-void
-log_write_low(
-/*==========*/
- byte* str, /*!< in: string */
- ulint str_len); /*!< in: string length */
-/************************************************************//**
-Closes the log.
-@return lsn */
-UNIV_INTERN
-lsn_t
-log_close(void);
-/*===========*/
-/************************************************************//**
-Gets the current lsn.
-@return current lsn */
-UNIV_INLINE
-lsn_t
-log_get_lsn(void);
-/*=============*/
-/************************************************************//**
-Gets the current lsn.
-@return current lsn */
-UNIV_INLINE
-lsn_t
-log_get_lsn_nowait(void);
-/*=============*/
-/************************************************************//**
-Gets the last lsn that is fully flushed to disk.
-@return last flushed lsn */
-UNIV_INLINE
-ib_uint64_t
-log_get_flush_lsn(void);
-/*=============*/
-/****************************************************************
-Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
-@return log group capacity */
-UNIV_INLINE
-lsn_t
-log_get_capacity(void);
-/*==================*/
-/****************************************************************
-Get log_sys::max_modified_age_async. It is OK to read the value without
-holding log_sys::mutex because it is constant.
-@return max_modified_age_async */
-UNIV_INLINE
-lsn_t
-log_get_max_modified_age_async(void);
-/*================================*/
-/******************************************************//**
-Initializes the log. */
-UNIV_INTERN
-void
-log_init(void);
-/*==========*/
-/******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
-void
-log_group_init(
-/*===========*/
- ulint id, /*!< in: group id */
- ulint n_files, /*!< in: number of log files */
- lsn_t file_size, /*!< in: log file size in bytes */
- ulint space_id, /*!< in: space id of the file space
- which contains the log files of this
- group */
- ulint archive_space_id); /*!< in: space id of the file space
- which contains some archived log
- files for this group; currently, only
- for the first log group this is
- used */
-/******************************************************//**
-Completes an i/o to a log file. */
-UNIV_INTERN
-void
-log_io_complete(
-/*============*/
- log_group_t* group); /*!< in: log group */
-/******************************************************//**
-This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been written to the log file up to the last log entry written
-by the transaction. If there is a flush running, it waits and checks if the
-flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
-void
-log_write_up_to(
-/*============*/
- lsn_t lsn, /*!< in: log sequence number up to which
- the log should be written, LSN_MAX if not specified */
- ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk);
- /*!< in: TRUE if we want the written log
- also to be flushed to disk */
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
-void
-log_buffer_flush_to_disk(void);
-/*==========================*/
-/****************************************************************//**
-This functions writes the log buffer to the log file and if 'flush'
-is set it forces a flush of the log file as well. This is meant to be
-called from background master thread only as it does not wait for
-the write (+ possible flush) to finish. */
-UNIV_INTERN
-void
-log_buffer_sync_in_background(
-/*==========================*/
- ibool flush); /*<! in: flush the logs to disk */
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
-log_checkpoint(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is
- desired */
- ibool write_always, /*!< in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
- ibool safe_to_ignore);/*!< in: TRUE if checkpoint can be ignored in
- the case checkpoint's are disabled */
-
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
-void
-log_make_checkpoint_at(
-/*===================*/
- lsn_t lsn, /*!< in: make a checkpoint at this or a
- later lsn, if LSN_MAX, makes
- a checkpoint at the latest lsn */
- ibool write_always); /*!< in: the function normally checks if
- the new checkpoint would have a
- greater lsn than the previous one: if
- not, then no physical write is done;
- by setting this parameter TRUE, a
- physical write will always be made to
- log files */
-/****************************************************************//**
-Disable checkpoints. This is used when doing a volume snapshot
-to ensure that we don't get checkpoint between snapshoting two
-different volumes */
-UNIV_INTERN
-ibool log_disable_checkpoint();
-
-/****************************************************************//**
-Enable checkpoints that was disabled with log_disable_checkpoint() */
-UNIV_INTERN
-void log_enable_checkpoint();
-
-/****************************************************************//**
-Makes a checkpoint at the latest lsn and writes it to first page of each
-data file in the database, so that we know that the file spaces contain
-all modifications up to that lsn. This can only be called at database
-shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
-void
-logs_empty_and_mark_files_at_shutdown(void);
-/*=======================================*/
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
-void
-log_group_read_checkpoint_info(
-/*===========================*/
- log_group_t* group, /*!< in: log group */
- ulint field); /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
- const byte* buf, /*!< in: buffer containing checkpoint info */
- ulint n, /*!< in: nth slot */
- lsn_t* file_no);/*!< out: archived file number */
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
-void
-log_groups_write_checkpoint_info(void);
-/*==================================*/
-/********************************************************************//**
-Starts an archiving operation.
-@return TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is desired */
- ulint* n_bytes);/*!< out: archive log buffer size, 0 if nothing to
- archive */
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void);
-/*===================*/
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void);
-/*==========================*/
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void);
-/*========================*/
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
-void
-log_archived_file_name_gen(
-/*=======================*/
- char* buf, /*!< in: buffer where to write */
- ulint buf_len,/*!< in: buffer length */
- ulint id, /*!< in: group id */
- lsn_t file_no);/*!< in: file number */
-
-UNIV_INTERN
-void
-log_archived_get_offset(
-/*====================*/
- log_group_t* group, /*!< in: log group */
- lsn_t file_no, /*!< in: archive log file number */
- lsn_t archived_lsn, /*!< in: last archived LSN */
- lsn_t* offset); /*!< out: offset within archived file */
-#else /* !UNIV_HOTBACKUP */
-/******************************************************//**
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-UNIV_INTERN
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/*!< in: buffer which will be written to the
- start of the first log file */
- ib_uint64_t start); /*!< in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************************//**
-Checks that there is enough free space in the log to start a new query step.
-Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
-function may only be called if the calling thread owns no synchronization
-objects! */
-UNIV_INTERN
-void
-log_check_margins(void);
-/*===================*/
-#ifndef UNIV_HOTBACKUP
-/******************************************************//**
-Reads a specified log segment to a buffer. */
-UNIV_INTERN
-void
-log_group_read_log_seg(
-/*===================*/
- ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /*!< in: buffer where to read */
- log_group_t* group, /*!< in: log group */
- lsn_t start_lsn, /*!< in: read area start */
- lsn_t end_lsn, /*!< in: read area end */
- ibool release_mutex); /*!< in: whether the log_sys->mutex
- should be released before the read */
-/******************************************************//**
-Writes a buffer to a log file group. */
-UNIV_INTERN
-void
-log_group_write_buf(
-/*================*/
- log_group_t* group, /*!< in: log group */
- byte* buf, /*!< in: buffer */
- ulint len, /*!< in: buffer len; must be divisible
- by OS_FILE_LOG_BLOCK_SIZE */
- lsn_t start_lsn, /*!< in: start lsn of the buffer; must
- be divisible by
- OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset);/*!< in: start offset of new data in
- buf: this parameter is used to decide
- if we have to write a new log file
- header */
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
-void
-log_group_set_fields(
-/*=================*/
- log_group_t* group, /*!< in/out: group */
- lsn_t lsn); /*!< in: lsn for which the values should be
- set */
-/******************************************************//**
-Calculates the data capacity of a log group, when the log file headers are not
-included.
-@return capacity in bytes */
-UNIV_INTERN
-lsn_t
-log_group_get_capacity(
-/*===================*/
- const log_group_t* group); /*!< in: log group */
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************//**
-Gets a log block flush bit.
-@return TRUE if this block was the first to be written in a log flush */
-UNIV_INLINE
-ibool
-log_block_get_flush_bit(
-/*====================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Gets a log block number stored in the header.
-@return log block number stored in the block header */
-UNIV_INLINE
-ulint
-log_block_get_hdr_no(
-/*=================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Gets a log block data length.
-@return log block data length measured as a byte offset from the block start */
-UNIV_INLINE
-ulint
-log_block_get_data_len(
-/*===================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets the log block data length. */
-UNIV_INLINE
-void
-log_block_set_data_len(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint len); /*!< in: data length */
-/************************************************************//**
-Calculates the checksum for a log block.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_calc_checksum(
-/*====================*/
- const byte* block); /*!< in: log block */
-/************************************************************//**
-Gets a log block checksum field value.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_get_checksum(
-/*===================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets a log block checksum field value. */
-UNIV_INLINE
-void
-log_block_set_checksum(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint checksum); /*!< in: checksum */
-/************************************************************//**
-Gets a log block first mtr log record group offset.
-@return first mtr log record group byte offset from the block start, 0
-if none */
-UNIV_INLINE
-ulint
-log_block_get_first_rec_group(
-/*==========================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Sets the log block first mtr log record group offset. */
-UNIV_INLINE
-void
-log_block_set_first_rec_group(
-/*==========================*/
- byte* log_block, /*!< in/out: log block */
- ulint offset); /*!< in: offset, 0 if none */
-/************************************************************//**
-Gets a log block checkpoint number field (4 lowest bytes).
-@return checkpoint no (4 lowest bytes) */
-UNIV_INLINE
-ulint
-log_block_get_checkpoint_no(
-/*========================*/
- const byte* log_block); /*!< in: log block */
-/************************************************************//**
-Initializes a log block in the log buffer. */
-UNIV_INLINE
-void
-log_block_init(
-/*===========*/
- byte* log_block, /*!< in: pointer to the log buffer */
- lsn_t lsn); /*!< in: lsn within the log block */
-/************************************************************//**
-Initializes a log block in the log buffer in the old, < 3.23.52 format, where
-there was no checksum yet. */
-UNIV_INLINE
-void
-log_block_init_in_old_format(
-/*=========================*/
- byte* log_block, /*!< in: pointer to the log buffer */
- lsn_t lsn); /*!< in: lsn within the log block */
-/************************************************************//**
-Converts a lsn to a log block number.
-@return log block number, it is > 0 and <= 1G */
-UNIV_INLINE
-ulint
-log_block_convert_lsn_to_no(
-/*========================*/
- lsn_t lsn); /*!< in: lsn of a byte within the block */
-/******************************************************//**
-Prints info of the log. */
-UNIV_INTERN
-void
-log_print(
-/*======*/
- FILE* file); /*!< in: file where to print */
-/******************************************************//**
-Peeks the current lsn.
-@return TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
-ibool
-log_peek_lsn(
-/*=========*/
- lsn_t* lsn); /*!< out: if returns TRUE, current lsn is here */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-log_refresh_stats(void);
-/*===================*/
-/********************************************************//**
-Closes all log groups. */
-UNIV_INTERN
-void
-log_group_close_all(void);
-/*=====================*/
-/********************************************************//**
-Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
-void
-log_shutdown(void);
-/*==============*/
-/********************************************************//**
-Free the log system data structures. */
-UNIV_INTERN
-void
-log_mem_free(void);
-/*==============*/
-
-/****************************************************************//**
-Safely reads the log_sys->tracked_lsn value. The writer counterpart function
-is log_set_tracked_lsn() in log0online.c.
-
-@return log_sys->tracked_lsn value. */
-UNIV_INLINE
-lsn_t
-log_get_tracked_lsn(void);
-/*=====================*/
-
-extern log_t* log_sys;
-
-/* Values used as flags */
-#define LOG_FLUSH 7652559
-#define LOG_CHECKPOINT 78656949
-#ifdef UNIV_LOG_ARCHIVE
-# define LOG_ARCHIVE 11122331
-#endif /* UNIV_LOG_ARCHIVE */
-#define LOG_RECOVER 98887331
-
-/* The counting of lsn's starts from this value: this must be non-zero */
-#define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
-
-#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE)
-#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4)
-
-/* Offsets of a log block header */
-#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and
- is allowed to wrap around at 2G; the
- highest bit is set to 1 if this is the
- first log block in a log flush write
- segment */
-#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL
- /* mask used to get the highest bit in
- the preceding field */
-#define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to
- this block */
-#define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an
- mtr log record group in this log block,
- 0 if none; if the value is the same
- as LOG_BLOCK_HDR_DATA_LEN, it means
- that the first rec group has not yet
- been catenated to this log block, but
- if it will, it will start at this
- offset; an archive recovery can
- start parsing the log records starting
- from this offset in this log block,
- if value not 0 */
-#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of
- log_sys->next_checkpoint_no when the
- log block was last written to: if the
- block has not yet been written full,
- this value is only updated before a
- log buffer flush */
-#define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in
- bytes */
-
-/* Offsets of a log block trailer from the end of the block */
-#define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block
- contents; in InnoDB versions
- < 3.23.52 this did not contain the
- checksum but the same value as
- .._HDR_NO */
-#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */
-
-/* Offsets for a checkpoint field */
-#define LOG_CHECKPOINT_NO 0
-#define LOG_CHECKPOINT_LSN 8
-#define LOG_CHECKPOINT_OFFSET_LOW32 16
-#define LOG_CHECKPOINT_LOG_BUF_SIZE 20
-#define LOG_CHECKPOINT_ARCHIVED_LSN 24
-#define LOG_CHECKPOINT_GROUP_ARRAY 32
-
-/* For each value smaller than LOG_MAX_N_GROUPS the following 8 bytes: */
-
-#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0
-#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4
-
-#define LOG_CHECKPOINT_ARRAY_END (LOG_CHECKPOINT_GROUP_ARRAY\
- + LOG_MAX_N_GROUPS * 8)
-#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END
-#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END)
-#if 0
-#define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END)
- /*!< Not used (0);
- This used to contain the
- current fsp free limit in
- tablespace 0, in units of one
- megabyte.
-
- This information might have been used
- since mysqlbackup version 0.35 but
- before 1.41 to decide if unused ends of
- non-auto-extending data files
- in space 0 can be truncated.
-
- This information was made obsolete
- by mysqlbackup --compress. */
-#define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END)
- /*!< Not used (0);
- This magic number tells if the
- checkpoint contains the above field:
- the field was added to
- InnoDB-3.23.50 and
- removed from MySQL 5.6 */
-#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243
- /*!< if LOG_CHECKPOINT_FSP_MAGIC_N
- contains this value, then
- LOG_CHECKPOINT_FSP_FREE_LIMIT
- is valid */
-#endif
-#define LOG_CHECKPOINT_OFFSET_HIGH32 (16 + LOG_CHECKPOINT_ARRAY_END)
-#define LOG_CRYPT_VER (20 + LOG_CHECKPOINT_ARRAY_END)
-
-#define LOG_CRYPT_MAX_ENTRIES (5)
-#define LOG_CRYPT_ENTRY_SIZE (4 + 4 + 2 * MY_AES_BLOCK_SIZE)
-#define LOG_CRYPT_SIZE (1 + 1 + \
- (LOG_CRYPT_MAX_ENTRIES * \
- LOG_CRYPT_ENTRY_SIZE))
-
-#define LOG_CHECKPOINT_SIZE (20 + LOG_CHECKPOINT_ARRAY_END + \
- LOG_CRYPT_SIZE)
-
-/* Offsets of a log file header */
-#define LOG_GROUP_ID 0 /* log group number */
-#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this
- log file */
-#define LOG_FILE_NO 12 /* 4-byte archived log file number;
- this field is only defined in an
- archived log file */
-#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16
- /* a 32-byte field which contains
- the string 'ibbackup' and the
- creation time if the log file was
- created by mysqlbackup --restore;
- when mysqld is first time started
- on the restored database, it can
- print helpful info for the user */
-#define LOG_FILE_OS_FILE_LOG_BLOCK_SIZE 64
- /* extend to record log_block_size
- of XtraDB. 0 means default 512 */
-#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
- /* this 4-byte field is TRUE when
- the writing of an archived log file
- has been completed; this field is
- only defined in an archived log file */
-#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4)
- /* lsn where the archived log file
- at least extends: actually the
- archived log file may extend to a
- later lsn, as long as it is within the
- same log block as this lsn; this field
- is defined only when an archived log
- file has been completely written */
-#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
- /* first checkpoint field in the log
- header; we write alternately to the
- checkpoint fields when we make new
- checkpoints; this field is only defined
- in the first log file of a log group */
-#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE)
- /* second checkpoint field in the log
- header */
-#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
-
-#define LOG_GROUP_OK 301
-#define LOG_GROUP_CORRUPTED 302
-
-/** Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil. */
-struct log_group_t{
- /* The following fields are protected by log_sys->mutex */
- ulint id; /*!< log group id */
- ulint n_files; /*!< number of files in the group */
- lsn_t file_size; /*!< individual log file size in bytes,
- including the log file header */
- ulint space_id; /*!< file space which implements the log
- group */
- ulint state; /*!< LOG_GROUP_OK or
- LOG_GROUP_CORRUPTED */
- lsn_t lsn; /*!< lsn used to fix coordinates within
- the log group */
- lsn_t lsn_offset; /*!< the offset of the above lsn */
- ulint n_pending_writes;/*!< number of currently pending flush
- writes for this log group */
- byte** file_header_bufs_ptr;/*!< unaligned buffers */
- byte** file_header_bufs;/*!< buffers for each file
- header in the group */
-#ifdef UNIV_LOG_ARCHIVE
- /*-----------------------------*/
- byte** archive_file_header_bufs_ptr;/*!< unaligned buffers */
- byte** archive_file_header_bufs;/*!< buffers for each file
- header in the group */
- ulint archive_space_id;/*!< file space which
- implements the log group
- archive */
- lsn_t archived_file_no;/*!< file number corresponding to
- log_sys->archived_lsn */
- lsn_t archived_offset;/*!< file offset corresponding to
- log_sys->archived_lsn, 0 if we have
- not yet written to the archive file
- number archived_file_no */
- lsn_t next_archived_file_no;/*!< during an archive write,
- until the write is completed, we
- store the next value for
- archived_file_no here: the write
- completion function then sets the new
- value to ..._file_no */
- lsn_t next_archived_offset; /*!< like the preceding field */
-#endif /* UNIV_LOG_ARCHIVE */
- /*-----------------------------*/
- lsn_t scanned_lsn; /*!< used only in recovery: recovery scan
- succeeded up to this lsn in this log
- group */
- byte* checkpoint_buf_ptr;/*!< unaligned checkpoint header */
- byte* checkpoint_buf; /*!< checkpoint header is written from
- this buffer to the group */
- UT_LIST_NODE_T(log_group_t)
- log_groups; /*!< list of log groups */
-};
-
-/** Redo log buffer */
-struct log_t{
- byte pad[CACHE_LINE_SIZE]; /*!< padding to prevent other memory
- update hotspots from residing on the
- same memory cache line */
- lsn_t lsn; /*!< log sequence number */
- ulint buf_free; /*!< first free offset within the log
- buffer */
-#ifndef UNIV_HOTBACKUP
- ib_prio_mutex_t mutex; /*!< mutex protecting the log */
-
- ib_mutex_t log_flush_order_mutex;/*!< mutex to serialize access to
- the flush list when we are putting
- dirty blocks in the list. The idea
- behind this mutex is to be able
- to release log_sys->mutex during
- mtr_commit and still ensure that
- insertions in the flush_list happen
- in the LSN order. */
-#endif /* !UNIV_HOTBACKUP */
- byte* buf_ptr; /* unaligned log buffer */
- byte* buf; /*!< log buffer */
- ulint buf_size; /*!< log buffer size in bytes */
- ulint max_buf_free; /*!< recommended maximum value of
- buf_free, after which the buffer is
- flushed */
- #ifdef UNIV_LOG_DEBUG
- ulint old_buf_free; /*!< value of buf free when log was
- last time opened; only in the debug
- version */
- ib_uint64_t old_lsn; /*!< value of lsn when log was
- last time opened; only in the
- debug version */
-#endif /* UNIV_LOG_DEBUG */
- ibool check_flush_or_checkpoint;
- /*!< this is set to TRUE when there may
- be need to flush the log buffer, or
- preflush buffer pool pages, or make
- a checkpoint; this MUST be TRUE when
- lsn - last_checkpoint_lsn >
- max_checkpoint_age; this flag is
- peeked at by log_free_check(), which
- does not reserve the log mutex */
- UT_LIST_BASE_NODE_T(log_group_t)
- log_groups; /*!< log groups */
-
-#ifndef UNIV_HOTBACKUP
- /** The fields involved in the log buffer flush @{ */
-
- ulint buf_next_to_write;/*!< first offset in the log buffer
- where the byte content may not exist
- written to file, e.g., the start
- offset of a log record catenated
- later; this is advanced when a flush
- operation is completed to all the log
- groups */
- volatile bool is_extending; /*!< this is set to true during extend
- the log buffer size */
- lsn_t written_to_some_lsn;
- /*!< first log sequence number not yet
- written to any log group; for this to
- be advanced, it is enough that the
- write i/o has been completed for any
- one log group */
- lsn_t written_to_all_lsn;
- /*!< first log sequence number not yet
- written to some log group; for this to
- be advanced, it is enough that the
- write i/o has been completed for all
- log groups.
- Note that since InnoDB currently
- has only one log group therefore
- this value is redundant. Also it
- is possible that this value
- falls behind the
- flushed_to_disk_lsn transiently.
- It is appropriate to use either
- flushed_to_disk_lsn or
- write_lsn which are always
- up-to-date and accurate. */
- lsn_t write_lsn; /*!< end lsn for the current running
- write */
- ulint write_end_offset;/*!< the data in buffer has
- been written up to this offset
- when the current write ends:
- this field will then be copied
- to buf_next_to_write */
- lsn_t current_flush_lsn;/*!< end lsn for the current running
- write + flush operation */
- lsn_t flushed_to_disk_lsn;
- /*!< how far we have written the log
- AND flushed to disk */
- ulint n_pending_writes;/*!< number of currently
- pending flushes or writes */
- /* NOTE on the 'flush' in names of the fields below: starting from
- 4.0.14, we separate the write of the log file and the actual fsync()
- or other method to flush it to disk. The names below should really
- be 'flush_or_write'! */
- os_event_t no_flush_event; /*!< this event is in the reset state
- when a flush or a write is running;
- os_event_set() and os_event_reset()
- are protected by log_sys_t::mutex */
- ibool one_flushed; /*!< during a flush, this is
- first FALSE and becomes TRUE
- when one log group has been
- written or flushed */
- os_event_t one_flushed_event;/*!< this event is reset when the
- flush or write has not yet completed
- for any log group; e.g., this means
- that a transaction has been committed
- when this is set;
- os_event_set() and os_event_reset()
- are protected by log_sys_t::mutex */
- ulint n_log_ios; /*!< number of log i/os initiated thus
- far */
- ulint n_log_ios_old; /*!< number of log i/o's at the
- previous printout */
- time_t last_printout_time;/*!< when log_print was last time
- called */
- /* @} */
-
- /** Fields involved in checkpoints @{ */
- lsn_t log_group_capacity; /*!< capacity of the log group; if
- the checkpoint age exceeds this, it is
- a serious error because it is possible
- we will then overwrite log and spoil
- crash recovery */
- lsn_t max_modified_age_async;
- /*!< when this recommended
- value for lsn -
- buf_pool_get_oldest_modification()
- is exceeded, we start an
- asynchronous preflush of pool pages */
- lsn_t max_modified_age_sync;
- /*!< when this recommended
- value for lsn -
- buf_pool_get_oldest_modification()
- is exceeded, we start a
- synchronous preflush of pool pages */
- lsn_t max_checkpoint_age_async;
- /*!< when this checkpoint age
- is exceeded we start an
- asynchronous writing of a new
- checkpoint */
- lsn_t max_checkpoint_age;
- /*!< this is the maximum allowed value
- for lsn - last_checkpoint_lsn when a
- new query step is started */
- ib_uint64_t next_checkpoint_no;
- /*!< next checkpoint number */
- lsn_t last_checkpoint_lsn;
- /*!< latest checkpoint lsn */
- lsn_t next_checkpoint_lsn;
- /*!< next checkpoint lsn */
- ulint n_pending_checkpoint_writes;
- /*!< number of currently pending
- checkpoint writes */
- rw_lock_t checkpoint_lock;/*!< this latch is x-locked when a
- checkpoint write is running; a thread
- should wait for this without owning
- the log mutex */
-#endif /* !UNIV_HOTBACKUP */
- byte* checkpoint_buf_ptr;/* unaligned checkpoint header */
- byte* checkpoint_buf; /*!< checkpoint header is read to this
- buffer */
- /* @} */
-#ifdef UNIV_LOG_ARCHIVE
- /** Fields involved in archiving @{ */
- ulint archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING
- LOG_ARCH_STOPPED, LOG_ARCH_OFF */
- lsn_t archived_lsn; /*!< archiving has advanced to this
- lsn */
- lsn_t max_archived_lsn_age_async;
- /*!< recommended maximum age of
- archived_lsn, before we start
- asynchronous copying to the archive */
- lsn_t max_archived_lsn_age;
- /*!< maximum allowed age for
- archived_lsn */
- lsn_t next_archived_lsn;/*!< during an archive write,
- until the write is completed, we
- store the next value for
- archived_lsn here: the write
- completion function then sets the new
- value to archived_lsn */
- ulint archiving_phase;/*!< LOG_ARCHIVE_READ or
- LOG_ARCHIVE_WRITE */
- ulint n_pending_archive_ios;
- /*!< number of currently pending reads
- or writes in archiving */
- rw_lock_t archive_lock; /*!< this latch is x-locked when an
- archive write is running; a thread
- should wait for this without owning
- the log mutex */
- ulint archive_buf_size;/*!< size of archive_buf */
- byte* archive_buf_ptr;/*!< unaligned archived_buf */
- byte* archive_buf; /*!< log segment is written to the
- archive from this buffer */
- os_event_t archiving_on; /*!< if archiving has been stopped;
- os_event_set() and os_event_reset()
- are protected by log_sys_t::mutex */
- /* @} */
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t tracked_lsn; /*!< log tracking has advanced to this
- lsn. Field accessed atomically where
- 64-bit atomic ops are supported,
- protected by the log sys mutex
- otherwise. */
-};
-
-/** Test if flush order mutex is owned. */
-#define log_flush_order_mutex_own() \
- mutex_own(&log_sys->log_flush_order_mutex)
-
-/** Acquire the flush order mutex. */
-#define log_flush_order_mutex_enter() do { \
- mutex_enter(&log_sys->log_flush_order_mutex); \
-} while (0)
-/** Release the flush order mutex. */
-# define log_flush_order_mutex_exit() do { \
- mutex_exit(&log_sys->log_flush_order_mutex); \
-} while (0)
-
-#ifdef UNIV_LOG_ARCHIVE
-/** Archiving state @{ */
-#define LOG_ARCH_ON 71
-#define LOG_ARCH_STOPPING 72
-#define LOG_ARCH_STOPPING2 73
-#define LOG_ARCH_STOPPED 74
-#define LOG_ARCH_OFF 75
-/* @} */
-#endif /* UNIV_LOG_ARCHIVE */
-
-/* log scrubbing speed, in bytes/sec */
-extern ulonglong innodb_scrub_log_speed;
-
-#ifndef UNIV_NONINL
-#include "log0log.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic
deleted file mode 100644
index 70458fa546b..00000000000
--- a/storage/xtradb/include/log0log.ic
+++ /dev/null
@@ -1,567 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0log.ic
-Database log
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0file.h"
-#include "mach0data.h"
-#include "mtr0mtr.h"
-#include "srv0mon.h"
-#include "srv0srv.h"
-#include "ut0crc32.h"
-
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
- const byte* buf, /*!< in: pointer to the start of
- the log segment in the
- log_sys->buf log buffer */
- ulint len, /*!< in: segment length in bytes */
- ib_uint64_t buf_start_lsn); /*!< in: buffer start lsn */
-#endif /* UNIV_LOG_DEBUG */
-
-/************************************************************//**
-Gets a log block flush bit.
-@return TRUE if this block was the first to be written in a log flush */
-UNIV_INLINE
-ibool
-log_block_get_flush_bit(
-/*====================*/
- const byte* log_block) /*!< in: log block */
-{
- if (LOG_BLOCK_FLUSH_BIT_MASK
- & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/************************************************************//**
-Sets the log block flush bit. */
-UNIV_INLINE
-void
-log_block_set_flush_bit(
-/*====================*/
- byte* log_block, /*!< in/out: log block */
- ibool val) /*!< in: value to set */
-{
- ulint field;
-
- field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO);
-
- if (val) {
- field = field | LOG_BLOCK_FLUSH_BIT_MASK;
- } else {
- field = field & ~LOG_BLOCK_FLUSH_BIT_MASK;
- }
-
- mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field);
-}
-
-/************************************************************//**
-Gets a log block number stored in the header.
-@return log block number stored in the block header */
-UNIV_INLINE
-ulint
-log_block_get_hdr_no(
-/*=================*/
- const byte* log_block) /*!< in: log block */
-{
- return(~LOG_BLOCK_FLUSH_BIT_MASK
- & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO));
-}
-
-/************************************************************//**
-Sets the log block number stored in the header; NOTE that this must be set
-before the flush bit! */
-UNIV_INLINE
-void
-log_block_set_hdr_no(
-/*=================*/
- byte* log_block, /*!< in/out: log block */
- ulint n) /*!< in: log block number: must be > 0 and
- < LOG_BLOCK_FLUSH_BIT_MASK */
-{
- ut_ad(n > 0);
- ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK);
-
- mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n);
-}
-
-/************************************************************//**
-Gets a log block data length.
-@return log block data length measured as a byte offset from the block start */
-UNIV_INLINE
-ulint
-log_block_get_data_len(
-/*===================*/
- const byte* log_block) /*!< in: log block */
-{
- return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN));
-}
-
-/************************************************************//**
-Sets the log block data length. */
-UNIV_INLINE
-void
-log_block_set_data_len(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint len) /*!< in: data length */
-{
- mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len);
-}
-
-/************************************************************//**
-Gets a log block first mtr log record group offset.
-@return first mtr log record group byte offset from the block start, 0
-if none */
-UNIV_INLINE
-ulint
-log_block_get_first_rec_group(
-/*==========================*/
- const byte* log_block) /*!< in: log block */
-{
- return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP));
-}
-
-/************************************************************//**
-Sets the log block first mtr log record group offset. */
-UNIV_INLINE
-void
-log_block_set_first_rec_group(
-/*==========================*/
- byte* log_block, /*!< in/out: log block */
- ulint offset) /*!< in: offset, 0 if none */
-{
- mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset);
-}
-
-/************************************************************//**
-Gets a log block checkpoint number field (4 lowest bytes).
-@return checkpoint no (4 lowest bytes) */
-UNIV_INLINE
-ulint
-log_block_get_checkpoint_no(
-/*========================*/
- const byte* log_block) /*!< in: log block */
-{
- return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO));
-}
-
-/************************************************************//**
-Sets a log block checkpoint number field (4 lowest bytes). */
-UNIV_INLINE
-void
-log_block_set_checkpoint_no(
-/*========================*/
- byte* log_block, /*!< in/out: log block */
- ib_uint64_t no) /*!< in: checkpoint no */
-{
- mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO, (ulint) no);
-}
-
-/************************************************************//**
-Converts a lsn to a log block number.
-@return log block number, it is > 0 and <= 1G */
-UNIV_INLINE
-ulint
-log_block_convert_lsn_to_no(
-/*========================*/
- lsn_t lsn) /*!< in: lsn of a byte within the block */
-{
- return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1);
-}
-
-/************************************************************//**
-Calculates the checksum for a log block using the current algorithm.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_calc_checksum(
-/*====================*/
- const byte* block) /*!< in: log block */
-{
- return(log_checksum_algorithm_ptr(block));
-}
-/************************************************************//**
-Calculates the checksum for a log block using the default InnoDB algorithm.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_calc_checksum_innodb(
-/*===========================*/
- const byte* block) /*!< in: log block */
-{
- ulint sum;
- ulint sh;
- ulint i;
-
- sum = 1;
- sh = 0;
-
- for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) {
- ulint b = (ulint) block[i];
- sum &= 0x7FFFFFFFUL;
- sum += b;
- sum += b << sh;
- sh++;
- if (sh > 24) {
- sh = 0;
- }
- }
-
- return(sum);
-}
-
-/************************************************************//**
-Calculates the checksum for a log block using the CRC32 algorithm.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_calc_checksum_crc32(
-/*==========================*/
- const byte* block) /*!< in: log block */
-{
- return(ut_crc32(block, OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE));
-}
-
-/************************************************************//**
-Calculates the checksum for a log block using the "no-op" algorithm.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_calc_checksum_none(
-/*=========================*/
- const byte* block) /*!< in: log block */
-{
- return(LOG_NO_CHECKSUM_MAGIC);
-}
-
-/************************************************************//**
-Gets a log block checksum field value.
-@return checksum */
-UNIV_INLINE
-ulint
-log_block_get_checksum(
-/*===================*/
- const byte* log_block) /*!< in: log block */
-{
- return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM));
-}
-
-/************************************************************//**
-Sets a log block checksum field value. */
-UNIV_INLINE
-void
-log_block_set_checksum(
-/*===================*/
- byte* log_block, /*!< in/out: log block */
- ulint checksum) /*!< in: checksum */
-{
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM,
- checksum);
-}
-
-/************************************************************//**
-Initializes a log block in the log buffer. */
-UNIV_INLINE
-void
-log_block_init(
-/*===========*/
- byte* log_block, /*!< in: pointer to the log buffer */
- lsn_t lsn) /*!< in: lsn within the log block */
-{
- ulint no;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- no = log_block_convert_lsn_to_no(lsn);
-
- log_block_set_hdr_no(log_block, no);
-
- log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
- log_block_set_first_rec_group(log_block, 0);
-}
-
-/************************************************************//**
-Initializes a log block in the log buffer in the old format, where there
-was no checksum yet. */
-UNIV_INLINE
-void
-log_block_init_in_old_format(
-/*=========================*/
- byte* log_block, /*!< in: pointer to the log buffer */
- lsn_t lsn) /*!< in: lsn within the log block */
-{
- ulint no;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- no = log_block_convert_lsn_to_no(lsn);
-
- log_block_set_hdr_no(log_block, no);
- mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE
- - LOG_BLOCK_CHECKSUM, no);
- log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE);
- log_block_set_first_rec_group(log_block, 0);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Writes to the log the string given. The log must be released with
-log_release.
-@return end lsn of the log record, zero if did not succeed */
-UNIV_INLINE
-lsn_t
-log_reserve_and_write_fast(
-/*=======================*/
- const void* str, /*!< in: string */
- ulint len, /*!< in: string length */
- lsn_t* start_lsn)/*!< out: start lsn of the log record */
-{
- ulint data_len;
-#ifdef UNIV_LOG_LSN_DEBUG
- /* length of the LSN pseudo-record */
- ulint lsn_len;
-#endif /* UNIV_LOG_LSN_DEBUG */
-
- mutex_enter(&log_sys->mutex);
-#ifdef UNIV_LOG_LSN_DEBUG
- lsn_len = 1
- + mach_get_compressed_size(log_sys->lsn >> 32)
- + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
-#endif /* UNIV_LOG_LSN_DEBUG */
-
- data_len = len
-#ifdef UNIV_LOG_LSN_DEBUG
- + lsn_len
-#endif /* UNIV_LOG_LSN_DEBUG */
- + log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE;
-
- if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
-
- /* The string does not fit within the current log block or the
- log block would become full. Do not release the log mutex,
- because it has to be reacquired immediately for the "slow" write
- procedure via log_write_low(). */
-
- return(0);
- }
-
- *start_lsn = log_sys->lsn;
-
-#ifdef UNIV_LOG_LSN_DEBUG
- {
- /* Write the LSN pseudo-record. */
- byte* b = &log_sys->buf[log_sys->buf_free];
- *b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
- /* Write the LSN in two parts,
- as a pseudo page number and space id. */
- b += mach_write_compressed(b, log_sys->lsn >> 32);
- b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
- ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
-
- memcpy(b, str, len);
- len += lsn_len;
- }
-#else /* UNIV_LOG_LSN_DEBUG */
- memcpy(log_sys->buf + log_sys->buf_free, str, len);
-#endif /* UNIV_LOG_LSN_DEBUG */
-
- log_block_set_data_len((byte*) ut_align_down(log_sys->buf
- + log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE),
- data_len);
-#ifdef UNIV_LOG_DEBUG
- log_sys->old_buf_free = log_sys->buf_free;
- log_sys->old_lsn = log_sys->lsn;
-#endif
- log_sys->buf_free += len;
-
- ut_ad(log_sys->buf_free <= log_sys->buf_size);
-
- log_sys->lsn += len;
-
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
-
-#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log_sys->buf + log_sys->old_buf_free,
- log_sys->buf_free - log_sys->old_buf_free,
- log_sys->old_lsn);
-#endif
- return(log_sys->lsn);
-}
-
-/**************************************************************************//**
-Locks the log mutex and opens the log for log_write_low. The log must be closed
-with log_close and released with log_release.
-@return start lsn of the log record */
-UNIV_INLINE
-ib_uint64_t
-log_reserve_and_open(
-/*=================*/
- ulint len) /*!< in: length of data to be catenated */
-{
- mutex_enter(&(log_sys->mutex));
-
- return log_open(len);
-}
-
-/***********************************************************************//**
-Releases the log mutex. */
-UNIV_INLINE
-void
-log_release(void)
-/*=============*/
-{
- mutex_exit(&(log_sys->mutex));
-}
-
-/************************************************************//**
-Gets the current lsn.
-@return current lsn */
-UNIV_INLINE
-lsn_t
-log_get_lsn(void)
-/*=============*/
-{
- lsn_t lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- return(lsn);
-}
-
-/************************************************************//**
-Gets the last lsn that is fully flushed to disk.
-@return last flushed lsn */
-UNIV_INLINE
-ib_uint64_t
-log_get_flush_lsn(void)
-/*=============*/
-{
- ib_uint64_t lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->flushed_to_disk_lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- return(lsn);
-}
-
-/************************************************************//**
-Gets the current lsn with a trylock
-@return current lsn or 0 if false*/
-UNIV_INLINE
-lsn_t
-log_get_lsn_nowait(void)
-/*=============*/
-{
- lsn_t lsn=0;
-
- if (!mutex_enter_nowait(&(log_sys->mutex))) {
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
- }
-
- return(lsn);
-}
-
-/****************************************************************
-Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
-@return log group capacity */
-UNIV_INLINE
-lsn_t
-log_get_capacity(void)
-/*==================*/
-{
- return(log_sys->log_group_capacity);
-}
-
-/****************************************************************
-Get log_sys::max_modified_age_async. It is OK to read the value without
-holding log_sys::mutex because it is constant.
-@return max_modified_age_async */
-UNIV_INLINE
-lsn_t
-log_get_max_modified_age_async(void)
-/*================================*/
-{
- return(log_sys->max_modified_age_async);
-}
-
-/***********************************************************************//**
-Checks if there is need for a log buffer flush or a new checkpoint, and does
-this if yes. Any database operation should call this when it has modified
-more than about 4 pages. NOTE that this function may only be called when the
-OS thread owns no synchronization objects except the dictionary mutex. */
-UNIV_INLINE
-void
-log_free_check(void)
-/*================*/
-{
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
-
- if (log_sys->check_flush_or_checkpoint) {
-
- log_check_margins();
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/****************************************************************//**
-Safely reads the log_sys->tracked_lsn value. The writer counterpart function
-is log_set_tracked_lsn() in log0online.c.
-
-@return log_sys->tracked_lsn value. */
-UNIV_INLINE
-lsn_t
-log_get_tracked_lsn(void)
-/*=====================*/
-{
- os_rmb;
- return log_sys->tracked_lsn;
-}
diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h
deleted file mode 100644
index 5c3e7d07fd9..00000000000
--- a/storage/xtradb/include/log0online.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011-2012, Percona Inc. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
-Street, Fifth Floor, Boston, MA 02110-1301, USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0online.h
-Online database log parsing for changed page tracking
-*******************************************************/
-
-#ifndef log0online_h
-#define log0online_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "log0log.h"
-
-/** Single bitmap file information */
-typedef struct log_online_bitmap_file_struct log_online_bitmap_file_t;
-
-/** A set of bitmap files containing some LSN range */
-typedef struct log_online_bitmap_file_range_struct
-log_online_bitmap_file_range_t;
-
-/** An iterator over changed page info */
-typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t;
-
-/** Initialize the constant part of the log tracking subsystem */
-UNIV_INTERN
-void
-log_online_init(void);
-
-/** Initialize the dynamic part of the log tracking subsystem */
-UNIV_INTERN
-void
-log_online_read_init(void);
-
-/** Shut down the dynamic part of the log tracking subsystem */
-UNIV_INTERN
-void
-log_online_read_shutdown(void);
-
-/** Shut down the constant part of the log tracking subsystem */
-UNIV_INTERN
-void
-log_online_shutdown(void);
-
-/*********************************************************************//**
-Reads and parses the redo log up to last checkpoint LSN to build the changed
-page bitmap which is then written to disk.
-
-@return TRUE if log tracking succeeded, FALSE if bitmap write I/O error */
-UNIV_INTERN
-ibool
-log_online_follow_redo_log(void);
-/*=============================*/
-
-/************************************************************//**
-Delete all the bitmap files for data less than the specified LSN.
-If called with lsn == 0 (i.e. set by RESET request) or
-IB_ULONGLONG_MAX, restart the bitmap file sequence, otherwise
-continue it.
-
-@return FALSE to indicate success, TRUE for failure. */
-UNIV_INTERN
-ibool
-log_online_purge_changed_page_bitmaps(
-/*==================================*/
- lsn_t lsn); /*!<in: LSN to purge files up to */
-
-#define LOG_BITMAP_ITERATOR_START_LSN(i) \
- ((i).start_lsn)
-#define LOG_BITMAP_ITERATOR_END_LSN(i) \
- ((i).end_lsn)
-#define LOG_BITMAP_ITERATOR_SPACE_ID(i) \
- ((i).space_id)
-#define LOG_BITMAP_ITERATOR_PAGE_NUM(i) \
- ((i).first_page_id + (i).bit_offset)
-#define LOG_BITMAP_ITERATOR_PAGE_CHANGED(i) \
- ((i).changed)
-
-/*********************************************************************//**
-Initializes log bitmap iterator. The minimum LSN is used for finding the
-correct starting file with records and it there may be records returned by
-the iterator that have LSN less than start_lsn.
-
-@return TRUE if the iterator is initialized OK, FALSE otherwise. */
-UNIV_INTERN
-ibool
-log_online_bitmap_iterator_init(
-/*============================*/
- log_bitmap_iterator_t *i, /*!<in/out: iterator */
- lsn_t min_lsn, /*!<in: start LSN for the
- iterator */
- lsn_t max_lsn); /*!<in: end LSN for the
- iterator */
-
-/*********************************************************************//**
-Releases log bitmap iterator. */
-UNIV_INTERN
-void
-log_online_bitmap_iterator_release(
-/*===============================*/
- log_bitmap_iterator_t *i); /*!<in/out: iterator */
-
-/*********************************************************************//**
-Iterates through bits of saved bitmap blocks.
-Sequentially reads blocks from bitmap file(s) and interates through
-their bits. Ignores blocks with wrong checksum.
-@return TRUE if iteration is successful, FALSE if all bits are iterated. */
-UNIV_INTERN
-ibool
-log_online_bitmap_iterator_next(
-/*============================*/
- log_bitmap_iterator_t *i); /*!<in/out: iterator */
-
-/** Struct for single bitmap file information */
-struct log_online_bitmap_file_struct {
- char name[FN_REFLEN]; /*!< Name with full path */
- pfs_os_file_t file; /*!< Handle to opened file */
- ib_uint64_t size; /*!< Size of the file */
- os_offset_t offset; /*!< Offset of the next read,
- or count of already-read bytes
- */
-};
-
-/** Struct for a set of bitmap files containing some LSN range */
-struct log_online_bitmap_file_range_struct {
- size_t count; /*!< Number of files */
- /*!< Dynamically-allocated array of info about individual files */
- struct files_t {
- char name[FN_REFLEN]; /*!< Name of a file */
- lsn_t start_lsn; /*!< Starting LSN of data in
- this file */
- ulong seq_num; /*!< Sequence number of this
- file */
- } *files;
-};
-
-/** Struct for an iterator through all bits of changed pages bitmap blocks */
-struct log_bitmap_iterator_struct
-{
- lsn_t max_lsn; /*!< End LSN of the
- range */
- ibool failed; /*!< Has the iteration
- stopped prematurely */
- log_online_bitmap_file_range_t in_files; /*!< The bitmap files
- for this iterator */
- size_t in_i; /*!< Currently read
- file index in in_files
- */
- log_online_bitmap_file_t in; /*!< Currently read
- file */
- ib_uint32_t bit_offset; /*!< bit offset inside
- the current bitmap
- block */
- lsn_t start_lsn; /*!< Start LSN of the
- current bitmap block */
- lsn_t end_lsn; /*!< End LSN of the
- current bitmap block */
- ib_uint32_t space_id; /*!< Current block
- space id */
- ib_uint32_t first_page_id; /*!< Id of the first
- page in the current
- block */
- ibool last_page_in_run;/*!< "Last page in
- run" flag value for the
- current block */
- ibool changed; /*!< true if current
- page was changed */
- byte* page; /*!< Bitmap block */
-};
-
-#endif
diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h
deleted file mode 100644
index 73d53d2ddab..00000000000
--- a/storage/xtradb/include/log0recv.h
+++ /dev/null
@@ -1,517 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0recv.h
-Recovery
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef log0recv_h
-#define log0recv_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "buf0types.h"
-#include "hash0hash.h"
-#include "log0log.h"
-#include <list>
-
-/******************************************************//**
-Checks the 4-byte checksum to the trailer checksum field of a log
-block. We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-UNIV_INTERN
-ibool
-log_block_checksum_is_ok_or_old_format(
-/*===================================*/
- const byte* block, /*!< in: pointer to a log block */
- bool print_err); /*!< in print error ? */
-
-/*******************************************************//**
-Calculates the new value for lsn when more data is added to the log. */
-UNIV_INTERN
-ib_uint64_t
-recv_calc_lsn_on_data_add(
-/*======================*/
- lsn_t lsn, /*!< in: old lsn */
- ib_uint64_t len); /*!< in: this many bytes of data is
- added, log block headers not included */
-
-#ifdef UNIV_HOTBACKUP
-extern ibool recv_replay_file_ops;
-
-/*******************************************************************//**
-Reads the checkpoint info needed in hot backup.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-recv_read_checkpoint_info_for_backup(
-/*=================================*/
- const byte* hdr, /*!< in: buffer containing the log group
- header */
- lsn_t* lsn, /*!< out: checkpoint lsn */
- lsn_t* offset, /*!< out: checkpoint offset in the log group */
- lsn_t* cp_no, /*!< out: checkpoint number */
- lsn_t* first_header_lsn)
- /*!< out: lsn of of the start of the
- first log file */
- MY_ATTRIBUTE((nonnull));
-/*******************************************************************//**
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-UNIV_INTERN
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
- byte* buf, /*!< in: buffer containing log data */
- ulint buf_len, /*!< in: data length in that buffer */
- lsn_t* scanned_lsn, /*!< in/out: lsn of buffer start,
- we return scanned lsn */
- ulint* scanned_checkpoint_no,
- /*!< in/out: 4 lowest bytes of the
- highest scanned checkpoint number so
- far */
- ulint* n_bytes_scanned);/*!< out: how much we were able to
- scan, smaller than buf_len if log
- data ended here */
-#endif /* UNIV_HOTBACKUP */
-/*******************************************************************//**
-Returns TRUE if recovery is currently running.
-@return recv_recovery_on */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void);
-/*=====================*/
-/************************************************************************//**
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
-void
-recv_recover_page_func(
-/*===================*/
-#ifndef UNIV_HOTBACKUP
- ibool just_read_in,
- /*!< in: TRUE if the i/o handler calls
- this for a freshly read page */
-#endif /* !UNIV_HOTBACKUP */
- buf_block_t* block); /*!< in/out: buffer block */
-#ifndef UNIV_HOTBACKUP
-/** Wrapper for recv_recover_page_func().
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool.
-@param jri in: TRUE if just read in (the i/o handler calls this for
-a freshly read page)
-@param block in/out: the buffer block
-*/
-# define recv_recover_page(jri, block) recv_recover_page_func(jri, block)
-#else /* !UNIV_HOTBACKUP */
-/** Wrapper for recv_recover_page_func().
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool.
-@param jri in: TRUE if just read in (the i/o handler calls this for
-a freshly read page)
-@param block in/out: the buffer block
-*/
-# define recv_recover_page(jri, block) recv_recover_page_func(block)
-#endif /* !UNIV_HOTBACKUP */
-
-/** Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param[in] type LOG_CHECKPOINT or LOG_ARCHIVE
-@param[in] limit_lsn recover up to this lsn if possible
-@param[in] flushed_lsn flushed lsn from first data file
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-recv_recovery_from_checkpoint_start_func(
-#ifdef UNIV_LOG_ARCHIVE
- ulint type,
- lsn_t limit_lsn,
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t flushed_lsn)
- MY_ATTRIBUTE((warn_unused_result));
-
-#ifdef UNIV_LOG_ARCHIVE
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type in: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim in: recover up to this log sequence number if possible
-@param lsn in: flushed log sequence number from first data file
-@return error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,lsn) \
- recv_recovery_from_checkpoint_start_func(type,lim,lsn)
-#else /* UNIV_LOG_ARCHIVE */
-/** Wrapper for recv_recovery_from_checkpoint_start_func().
-Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param type ignored: LOG_CHECKPOINT or LOG_ARCHIVE
-@param lim ignored: recover up to this log sequence number if possible
-@param lsn in: flushed log sequence number from first data file
-@return error code or DB_SUCCESS */
-# define recv_recovery_from_checkpoint_start(type,lim,lsn) \
- recv_recovery_from_checkpoint_start_func(lsn)
-#endif /* UNIV_LOG_ARCHIVE */
-
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
-void
-recv_recovery_from_checkpoint_finish(void);
-/*======================================*/
-/********************************************************//**
-Initiates the rollback of active transactions. */
-UNIV_INTERN
-void
-recv_recovery_rollback_active(void);
-/*===============================*/
-
-/*******************************************************************//**
-Tries to parse a single log record and returns its length.
-@return length of the record, or 0 if the record was not complete */
-UNIV_INTERN
-ulint
-recv_parse_log_rec(
-/*===============*/
- byte* ptr, /*!< in: pointer to a buffer */
- byte* end_ptr,/*!< in: pointer to the buffer end */
- byte* type, /*!< out: type */
- ulint* space, /*!< out: space id */
- ulint* page_no,/*!< out: page number */
- byte** body); /*!< out: log record body start */
-
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer.
-Parses and hashes the log records if new data found. Unless
-UNIV_HOTBACKUP is defined, this function will apply log records
-automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
-recv_scan_log_recs(
-/*===============*/
- ulint available_memory,/*!< in: we let the hash table of recs
- to grow to this size, at the maximum */
- ibool store_to_hash, /*!< in: TRUE if the records should be
- stored to the hash table; this is set
- to FALSE if just debug checking is
- needed */
- const byte* buf, /*!< in: buffer containing a log
- segment or garbage */
- ulint len, /*!< in: buffer length */
- lsn_t start_lsn, /*!< in: buffer start lsn */
- lsn_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- lsn_t* group_scanned_lsn);/*!< out: scanning succeeded up to
- this lsn */
-/******************************************************//**
-Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
-void
-recv_reset_logs(
-/*============*/
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /*!< in: next archived log file number */
- ibool new_logs_created,/*!< in: TRUE if resetting logs
- is done at the log creation;
- FALSE if it is done after
- archive recovery */
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t lsn); /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Creates new log files after a backup has been restored. */
-UNIV_INTERN
-void
-recv_reset_log_files_for_backup(
-/*============================*/
- const char* log_dir, /*!< in: log file directory path */
- ulint n_log_files, /*!< in: number of log files */
- lsn_t log_file_size, /*!< in: log file size */
- lsn_t lsn); /*!< in: new start lsn, must be
- divisible by OS_FILE_LOG_BLOCK_SIZE */
-#endif /* UNIV_HOTBACKUP */
-/********************************************************//**
-Creates the recovery system. */
-UNIV_INTERN
-void
-recv_sys_create(void);
-/*=================*/
-/**********************************************************//**
-Release recovery system mutexes. */
-UNIV_INTERN
-void
-recv_sys_close(void);
-/*================*/
-/********************************************************//**
-Frees the recovery system memory. */
-UNIV_INTERN
-void
-recv_sys_mem_free(void);
-/*===================*/
-/********************************************************//**
-Inits the recovery system for a recovery operation. */
-UNIV_INTERN
-void
-recv_sys_init(
-/*==========*/
- ulint available_memory); /*!< in: available memory in bytes */
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Reset the state of the recovery system variables. */
-UNIV_INTERN
-void
-recv_sys_var_init(void);
-/*===================*/
-#endif /* !UNIV_HOTBACKUP */
-/** Apply the hash table of stored log records to persistent data pages.
-@param[in] last_batch whether the change buffer merge will be
- performed as part of the operation */
-UNIV_INTERN
-void
-recv_apply_hashed_log_recs(bool last_batch);
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Applies log records in the hash table to a backup. */
-UNIV_INTERN
-void
-recv_apply_log_recs_for_backup(void);
-/*================================*/
-#endif
-
-/** Block of log record data */
-struct recv_data_t{
- recv_data_t* next; /*!< pointer to the next block or NULL */
- /*!< the log record data is stored physically
- immediately after this struct, max amount
- RECV_DATA_BLOCK_SIZE bytes of it */
-};
-
-/** Stored log record struct */
-struct recv_t{
- byte type; /*!< log record type */
- ulint len; /*!< log record body length in bytes */
- recv_data_t* data; /*!< chain of blocks containing the log record
- body */
- lsn_t start_lsn;/*!< start lsn of the log segment written by
- the mtr which generated this log record: NOTE
- that this is not necessarily the start lsn of
- this log record */
- lsn_t end_lsn;/*!< end lsn of the log segment written by
- the mtr which generated this log record: NOTE
- that this is not necessarily the end lsn of
- this log record */
- UT_LIST_NODE_T(recv_t)
- rec_list;/*!< list of log records for this page */
-};
-
-/** States of recv_addr_t */
-enum recv_addr_state {
- /** not yet processed */
- RECV_NOT_PROCESSED,
- /** page is being read */
- RECV_BEING_READ,
- /** log records are being applied on the page */
- RECV_BEING_PROCESSED,
- /** log records have been applied on the page, or they have
- been discarded because the tablespace does not exist */
- RECV_PROCESSED
-};
-
-/** Hashed page file address struct */
-struct recv_addr_t{
- enum recv_addr_state state;
- /*!< recovery state of the page */
- unsigned space:32;/*!< space id */
- unsigned page_no:32;/*!< page number */
- UT_LIST_BASE_NODE_T(recv_t)
- rec_list;/*!< list of log records for this page */
- hash_node_t addr_hash;/*!< hash node in the hash bucket chain */
-};
-
-struct recv_dblwr_t {
- void add(byte* page);
-
- byte* find_page(ulint space_id, ulint page_no);
-
- std::list<byte *> pages; /* Pages from double write buffer */
-
- void operator() () {
- pages.clear();
- }
-};
-
-/** Recovery system data structure */
-struct recv_sys_t{
-#ifndef UNIV_HOTBACKUP
- ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
- n_addrs, and the state field in each recv_addr
- struct */
- ib_mutex_t writer_mutex;/*!< mutex coordinating
- flushing between recv_writer_thread and
- the recovery thread. */
-#endif /* !UNIV_HOTBACKUP */
- ibool apply_log_recs;
- /*!< this is TRUE when log rec application to
- pages is allowed; this flag tells the
- i/o-handler if it should do log record
- application */
- ibool apply_batch_on;
- /*!< this is TRUE when a log rec application
- batch is running */
- lsn_t lsn; /*!< log sequence number */
- ulint last_log_buf_size;
- /*!< size of the log buffer when the database
- last time wrote to the log */
- byte* last_block;
- /*!< possible incomplete last recovered log
- block */
- byte* last_block_buf_start;
- /*!< the nonaligned start address of the
- preceding buffer */
- byte* buf; /*!< buffer for parsing log records */
- ulint len; /*!< amount of data in buf */
- lsn_t parse_start_lsn;
- /*!< this is the lsn from which we were able to
- start parsing log records and adding them to
- the hash table; zero if a suitable
- start point not found yet */
- lsn_t scanned_lsn;
- /*!< the log data has been scanned up to this
- lsn */
- ulint scanned_checkpoint_no;
- /*!< the log data has been scanned up to this
- checkpoint number (lowest 4 bytes) */
- ulint recovered_offset;
- /*!< start offset of non-parsed log records in
- buf */
- lsn_t recovered_lsn;
- /*!< the log records have been parsed up to
- this lsn */
- lsn_t limit_lsn;/*!< recovery should be made at most
- up to this lsn */
- ibool found_corrupt_log;
- /*!< this is set to TRUE if we during log
- scan find a corrupt log block, or a corrupt
- log record, or there is a log parsing
- buffer overflow */
- /** the time when progress was last reported */
- ib_time_t progress_time;
-#ifdef UNIV_LOG_ARCHIVE
- log_group_t* archive_group;
- /*!< in archive recovery: the log group whose
- archive is read */
-#endif /* !UNIV_LOG_ARCHIVE */
- mem_heap_t* heap; /*!< memory heap of log records and file
- addresses*/
- hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
- ulint n_addrs;/*!< number of not processed hashed file
- addresses in the hash table */
-
- recv_dblwr_t dblwr;
-
- /** Determine whether redo log recovery progress should be reported.
- @param[in] time the current time
- @return whether progress should be reported
- (the last report was at least 15 seconds ago) */
- bool report(ib_time_t time)
- {
- if (time - progress_time < 15) {
- return false;
- }
-
- progress_time = time;
- return true;
- }
-};
-
-/** The recovery system */
-extern recv_sys_t* recv_sys;
-
-/** TRUE when applying redo log records during crash recovery; FALSE
-otherwise. Note that this is FALSE while a background thread is
-rolling back incomplete transactions. */
-extern ibool recv_recovery_on;
-/** If the following is TRUE, the buffer pool file pages must be invalidated
-after recovery and no ibuf operations are allowed; this becomes TRUE if
-the log record hash table becomes too full, and log records must be merged
-to file pages already before the recovery is finished: in this case no
-ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state.
-
-TRUE means that recovery is running and no operations on the log files
-are allowed yet: the variable name is misleading. */
-extern ibool recv_no_ibuf_operations;
-/** TRUE when recv_init_crash_recovery() has been called. */
-extern ibool recv_needed_recovery;
-#ifdef UNIV_DEBUG
-/** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
-extern ibool recv_no_log_write;
-#endif /* UNIV_DEBUG */
-
-/** TRUE if buf_page_is_corrupted() should check if the log sequence
-number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-extern ibool recv_lsn_checks_on;
-#ifdef UNIV_HOTBACKUP
-/** TRUE when the redo log is being backed up */
-extern ibool recv_is_making_a_backup;
-#endif /* UNIV_HOTBACKUP */
-/** Maximum page number encountered in the redo log */
-extern ulint recv_max_parsed_page_no;
-
-/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
-times! */
-#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024)
-
-/** Size of block reads when the log groups are scanned forward to do a
-roll-forward */
-#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
-
-/** This many frames must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free frames to read in pages when we start applying the
-log records to the database. */
-extern ulint recv_n_pool_free_frames;
-
-#ifndef UNIV_NONINL
-#include "log0recv.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/log0recv.ic b/storage/xtradb/include/log0recv.ic
deleted file mode 100644
index b29272f4672..00000000000
--- a/storage/xtradb/include/log0recv.ic
+++ /dev/null
@@ -1,37 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/log0recv.ic
-Recovery
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-#include "univ.i"
-
-/*******************************************************************//**
-Returns TRUE if recovery is currently running.
-@return recv_recovery_on */
-UNIV_INLINE
-ibool
-recv_recovery_is_on(void)
-/*=====================*/
-{
- return(recv_recovery_on);
-}
diff --git a/storage/xtradb/include/mach0data.h b/storage/xtradb/include/mach0data.h
deleted file mode 100644
index 2e16634a6c2..00000000000
--- a/storage/xtradb/include/mach0data.h
+++ /dev/null
@@ -1,418 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/mach0data.h
-Utilities for converting data from the database file
-to the machine format.
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef mach0data_h
-#define mach0data_h
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "univ.i"
-#include "ut0byte.h"
-
-/* The data and all fields are always stored in a database file
-in the same format: ascii, big-endian, ... .
-All data in the files MUST be accessed using the functions in this
-module. */
-
-/*******************************************************//**
-The following function is used to store data in one byte. */
-UNIV_INLINE
-void
-mach_write_to_1(
-/*============*/
- byte* b, /*!< in: pointer to byte where to store */
- ulint n); /*!< in: ulint integer to be stored, >= 0, < 256 */
-/********************************************************//**
-The following function is used to fetch data from one byte.
-@return ulint integer, >= 0, < 256 */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
- const byte* b) /*!< in: pointer to byte */
- MY_ATTRIBUTE((warn_unused_result));
-/*******************************************************//**
-The following function is used to store data in two consecutive
-bytes. We store the most significant byte to the lower address. */
-UNIV_INLINE
-void
-mach_write_to_2(
-/*============*/
- byte* b, /*!< in: pointer to two bytes where to store */
- ulint n); /*!< in: ulint integer to be stored, >= 0, < 64k */
-/********************************************************//**
-The following function is used to fetch data from two consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer, >= 0, < 64k */
-UNIV_INLINE
-ulint
-mach_read_from_2(
-/*=============*/
- const byte* b) /*!< in: pointer to two bytes */
- MY_ATTRIBUTE((nonnull, pure));
-
-/********************************************************//**
-The following function is used to convert a 16-bit data item
-to the canonical format, for fast bytewise equality test
-against memory.
-@return 16-bit integer in canonical format */
-UNIV_INLINE
-uint16
-mach_encode_2(
-/*==========*/
- ulint n) /*!< in: integer in machine-dependent format */
- MY_ATTRIBUTE((const));
-/********************************************************//**
-The following function is used to convert a 16-bit data item
-from the canonical format, for fast bytewise equality test
-against memory.
-@return integer in machine-dependent format */
-UNIV_INLINE
-ulint
-mach_decode_2(
-/*==========*/
- uint16 n) /*!< in: 16-bit integer in canonical format */
- MY_ATTRIBUTE((const));
-/*******************************************************//**
-The following function is used to store data in 3 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_3(
-/*============*/
- byte* b, /*!< in: pointer to 3 bytes where to store */
- ulint n); /*!< in: ulint integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_3(
-/*=============*/
- const byte* b) /*!< in: pointer to 3 bytes */
- MY_ATTRIBUTE((warn_unused_result));
-/*******************************************************//**
-The following function is used to store data in four consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_4(
-/*============*/
- byte* b, /*!< in: pointer to four bytes where to store */
- ulint n); /*!< in: ulint integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_4(
-/*=============*/
- const byte* b) /*!< in: pointer to four bytes */
- MY_ATTRIBUTE((warn_unused_result));
-/*********************************************************//**
-Writes a ulint in a compressed form (1..5 bytes).
-@return stored size in bytes */
-UNIV_INLINE
-ulint
-mach_write_compressed(
-/*==================*/
- byte* b, /*!< in: pointer to memory where to store */
- ulint n); /*!< in: ulint integer to be stored */
-/*********************************************************//**
-Returns the size of an ulint when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_get_compressed_size(
-/*=====================*/
- ulint n) /*!< in: ulint integer to be stored */
- MY_ATTRIBUTE((const));
-/*********************************************************//**
-Reads a ulint in a compressed form.
-@return read integer */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
- const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((warn_unused_result));
-/*******************************************************//**
-The following function is used to store data in 6 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_6(
-/*============*/
- byte* b, /*!< in: pointer to 6 bytes where to store */
- ib_uint64_t id); /*!< in: 48-bit integer */
-/********************************************************//**
-The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address.
-@return 48-bit integer */
-UNIV_INLINE
-ib_uint64_t
-mach_read_from_6(
-/*=============*/
- const byte* b) /*!< in: pointer to 6 bytes */
- MY_ATTRIBUTE((warn_unused_result));
-/*******************************************************//**
-The following function is used to store data in 7 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_7(
-/*============*/
- byte* b, /*!< in: pointer to 7 bytes where to store */
- ib_uint64_t n); /*!< in: 56-bit integer */
-/********************************************************//**
-The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address.
-@return 56-bit integer */
-UNIV_INLINE
-ib_uint64_t
-mach_read_from_7(
-/*=============*/
- const byte* b) /*!< in: pointer to 7 bytes */
- MY_ATTRIBUTE((warn_unused_result));
-/*******************************************************//**
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_8(
-/*============*/
- void* b, /*!< in: pointer to 8 bytes where to store */
- ib_uint64_t n); /*!< in: 64-bit integer to be stored */
-/********************************************************//**
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address.
-@return 64-bit integer */
-UNIV_INLINE
-ib_uint64_t
-mach_read_from_8(
-/*=============*/
- const byte* b) /*!< in: pointer to 8 bytes */
- MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************//**
-Writes a 64-bit integer in a compressed form (5..9 bytes).
-@return size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_write_compressed(
-/*======================*/
- byte* b, /*!< in: pointer to memory where to store */
- ib_uint64_t n); /*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_compressed_size(
-/*=========================*/
- ib_uint64_t n); /*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return the value read */
-UNIV_INLINE
-ib_uint64_t
-mach_ull_read_compressed(
-/*=====================*/
- const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((warn_unused_result));
-/*********************************************************//**
-Writes a 64-bit integer in a compressed form (1..11 bytes).
-@return size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_write_much_compressed(
-/*===========================*/
- byte* b, /*!< in: pointer to memory where to store */
- ib_uint64_t n); /*!< in: 64-bit integer to be stored */
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_much_compressed_size(
-/*==============================*/
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
- MY_ATTRIBUTE((const));
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return the value read */
-UNIV_INLINE
-ib_uint64_t
-mach_ull_read_much_compressed(
-/*==========================*/
- const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((warn_unused_result));
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
-mach_parse_compressed(
-/*==================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- ulint* val); /*!< out: read value */
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form
-if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INLINE
-byte*
-mach_ull_parse_compressed(
-/*======================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- ib_uint64_t* val); /*!< out: read value */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************//**
-Reads a double. It is stored in a little-endian format.
-@return double read */
-UNIV_INLINE
-double
-mach_double_read(
-/*=============*/
- const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************//**
-Writes a double. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_double_write(
-/*==============*/
- byte* b, /*!< in: pointer to memory where to write */
- double d); /*!< in: double */
-/*********************************************************//**
-Reads a float. It is stored in a little-endian format.
-@return float read */
-UNIV_INLINE
-float
-mach_float_read(
-/*============*/
- const byte* b) /*!< in: pointer to memory from where to read */
- MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************//**
-Writes a float. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_float_write(
-/*=============*/
- byte* b, /*!< in: pointer to memory where to write */
- float d); /*!< in: float */
-/*********************************************************//**
-Reads a ulint stored in the little-endian format.
-@return unsigned long int */
-UNIV_INLINE
-ulint
-mach_read_from_n_little_endian(
-/*===========================*/
- const byte* buf, /*!< in: from where to read */
- ulint buf_size) /*!< in: from how many bytes to read */
- MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************//**
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_n_little_endian(
-/*==========================*/
- byte* dest, /*!< in: where to write */
- ulint dest_size, /*!< in: into how many bytes to write */
- ulint n); /*!< in: unsigned long int to write */
-/*********************************************************//**
-Reads a ulint stored in the little-endian format.
-@return unsigned long int */
-UNIV_INLINE
-ulint
-mach_read_from_2_little_endian(
-/*===========================*/
- const byte* buf) /*!< in: from where to read */
- MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************//**
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_2_little_endian(
-/*==========================*/
- byte* dest, /*!< in: where to write */
- ulint n); /*!< in: unsigned long int to write */
-/*********************************************************//**
-Convert integral type from storage byte order (big endian) to
-host byte order.
-@return integer value */
-UNIV_INLINE
-ib_uint64_t
-mach_read_int_type(
-/*===============*/
- const byte* src, /*!< in: where to read from */
- ulint len, /*!< in: length of src */
- ibool unsigned_type); /*!< in: signed or unsigned flag */
-/***********************************************************//**
-Convert integral type from host byte order to (big-endian) storage
-byte order. */
-UNIV_INLINE
-void
-mach_write_int_type(
-/*================*/
- byte* dest, /*!< in: where to write*/
- const byte* src, /*!< in: where to read from */
- ulint len, /*!< in: length of src */
- bool usign); /*!< in: signed or unsigned flag */
-
-/*************************************************************
-Convert a ulonglong integer from host byte order to (big-endian)
-storage byte order. */
-UNIV_INLINE
-void
-mach_write_ulonglong(
-/*=================*/
- byte* dest, /*!< in: where to write */
- ulonglong src, /*!< in: where to read from */
- ulint len, /*!< in: length of dest */
- bool usign); /*!< in: signed or unsigned flag */
-
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INLINE
-ulint
-mach_read_ulint(
-/*============*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-
-#endif /* !UNIV_HOTBACKUP */
-#endif /* !UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_NONINL
-#include "mach0data.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic
deleted file mode 100644
index 3b1cf9c0378..00000000000
--- a/storage/xtradb/include/mach0data.ic
+++ /dev/null
@@ -1,869 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/mach0data.ic
-Utilities for converting data from the database file
-to the machine format.
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "ut0mem.h"
-
-/*******************************************************//**
-The following function is used to store data in one byte. */
-UNIV_INLINE
-void
-mach_write_to_1(
-/*============*/
- byte* b, /*!< in: pointer to byte where to store */
- ulint n) /*!< in: ulint integer to be stored, >= 0, < 256 */
-{
- ut_ad(b);
- ut_ad((n & ~0xFFUL) == 0);
-
- b[0] = (byte) n;
-}
-
-/********************************************************//**
-The following function is used to fetch data from one byte.
-@return ulint integer, >= 0, < 256 */
-UNIV_INLINE
-ulint
-mach_read_from_1(
-/*=============*/
- const byte* b) /*!< in: pointer to byte */
-{
- return((ulint)(b[0]));
-}
-
-/*******************************************************//**
-The following function is used to store data in two consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_2(
-/*============*/
- byte* b, /*!< in: pointer to two bytes where to store */
- ulint n) /*!< in: ulint integer to be stored */
-{
- ut_ad(b);
- ut_ad((n & ~0xFFFFUL) == 0);
-
- b[0] = (byte)(n >> 8);
- b[1] = (byte)(n);
-}
-
-/********************************************************//**
-The following function is used to convert a 16-bit data item
-to the canonical format, for fast bytewise equality test
-against memory.
-@return 16-bit integer in canonical format */
-UNIV_INLINE
-uint16
-mach_encode_2(
-/*==========*/
- ulint n) /*!< in: integer in machine-dependent format */
-{
- uint16 ret;
- ut_ad(2 == sizeof ret);
- mach_write_to_2((byte*) &ret, n);
- return(ret);
-}
-/********************************************************//**
-The following function is used to convert a 16-bit data item
-from the canonical format, for fast bytewise equality test
-against memory.
-@return integer in machine-dependent format */
-UNIV_INLINE
-ulint
-mach_decode_2(
-/*==========*/
- uint16 n) /*!< in: 16-bit integer in canonical format */
-{
- ut_ad(2 == sizeof n);
- return(mach_read_from_2((const byte*) &n));
-}
-
-/*******************************************************//**
-The following function is used to store data in 3 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_3(
-/*============*/
- byte* b, /*!< in: pointer to 3 bytes where to store */
- ulint n) /*!< in: ulint integer to be stored */
-{
- ut_ad(b);
- ut_ad((n & ~0xFFFFFFUL) == 0);
-
- b[0] = (byte)(n >> 16);
- b[1] = (byte)(n >> 8);
- b[2] = (byte)(n);
-}
-
-/********************************************************//**
-The following function is used to fetch data from 3 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_3(
-/*=============*/
- const byte* b) /*!< in: pointer to 3 bytes */
-{
- return( ((ulint)(b[0]) << 16)
- | ((ulint)(b[1]) << 8)
- | (ulint)(b[2])
- );
-}
-
-/*******************************************************//**
-The following function is used to store data in four consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_4(
-/*============*/
- byte* b, /*!< in: pointer to four bytes where to store */
- ulint n) /*!< in: ulint integer to be stored */
-{
- ut_ad(b);
-
- b[0] = (byte)(n >> 24);
- b[1] = (byte)(n >> 16);
- b[2] = (byte)(n >> 8);
- b[3] = (byte) n;
-}
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/********************************************************//**
-The following function is used to fetch data from 2 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_2(
-/*=============*/
- const byte* b) /*!< in: pointer to 2 bytes */
-{
- return(((ulint)(b[0]) << 8) | (ulint)(b[1]));
-}
-
-/********************************************************//**
-The following function is used to fetch data from 4 consecutive
-bytes. The most significant byte is at the lowest address.
-@return ulint integer */
-UNIV_INLINE
-ulint
-mach_read_from_4(
-/*=============*/
- const byte* b) /*!< in: pointer to four bytes */
-{
- return( ((ulint)(b[0]) << 24)
- | ((ulint)(b[1]) << 16)
- | ((ulint)(b[2]) << 8)
- | (ulint)(b[3])
- );
-}
-
-#ifndef UNIV_INNOCHECKSUM
-
-/*********************************************************//**
-Writes a ulint in a compressed form where the first byte codes the
-length of the stored ulint. We look at the most significant bits of
-the byte. If the most significant bit is zero, it means 1-byte storage,
-else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0,
-it means 3-byte storage, else if 4th is 0, it means 4-byte storage,
-else the storage is 5-byte.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_write_compressed(
-/*==================*/
- byte* b, /*!< in: pointer to memory where to store */
- ulint n) /*!< in: ulint integer (< 2^32) to be stored */
-{
- ut_ad(b);
-
- if (n < 0x80UL) {
- mach_write_to_1(b, n);
- return(1);
- } else if (n < 0x4000UL) {
- mach_write_to_2(b, n | 0x8000UL);
- return(2);
- } else if (n < 0x200000UL) {
- mach_write_to_3(b, n | 0xC00000UL);
- return(3);
- } else if (n < 0x10000000UL) {
- mach_write_to_4(b, n | 0xE0000000UL);
- return(4);
- } else {
- mach_write_to_1(b, 0xF0UL);
- mach_write_to_4(b + 1, n);
- return(5);
- }
-}
-
-/*********************************************************//**
-Returns the size of a ulint when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_get_compressed_size(
-/*=====================*/
- ulint n) /*!< in: ulint integer (< 2^32) to be stored */
-{
- if (n < 0x80UL) {
- return(1);
- } else if (n < 0x4000UL) {
- return(2);
- } else if (n < 0x200000UL) {
- return(3);
- } else if (n < 0x10000000UL) {
- return(4);
- } else {
- return(5);
- }
-}
-
-/*********************************************************//**
-Reads a ulint in a compressed form.
-@return read integer (< 2^32) */
-UNIV_INLINE
-ulint
-mach_read_compressed(
-/*=================*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- ulint flag;
-
- flag = mach_read_from_1(b);
-
- if (flag < 0x80UL) {
- return(flag);
- } else if (flag < 0xC0UL) {
- return(mach_read_from_2(b) & 0x7FFFUL);
- } else if (flag < 0xE0UL) {
- return(mach_read_from_3(b) & 0x3FFFFFUL);
- } else if (flag < 0xF0UL) {
- return(mach_read_from_4(b) & 0x1FFFFFFFUL);
- } else {
- ut_ad(flag == 0xF0UL);
- return(mach_read_from_4(b + 1));
- }
-}
-
-/*******************************************************//**
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_8(
-/*============*/
- void* b, /*!< in: pointer to 8 bytes where to store */
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_4(static_cast<byte*>(b), (ulint) (n >> 32));
- mach_write_to_4(static_cast<byte*>(b) + 4, (ulint) n);
-}
-
-/********************************************************//**
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address.
-@return 64-bit integer */
-UNIV_INLINE
-ib_uint64_t
-mach_read_from_8(
-/*=============*/
- const byte* b) /*!< in: pointer to 8 bytes */
-{
- ib_uint64_t ull;
-
- ull = ((ib_uint64_t) mach_read_from_4(b)) << 32;
- ull |= (ib_uint64_t) mach_read_from_4(b + 4);
-
- return(ull);
-}
-
-/*******************************************************//**
-The following function is used to store data in 7 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_7(
-/*============*/
- byte* b, /*!< in: pointer to 7 bytes where to store */
- ib_uint64_t n) /*!< in: 56-bit integer */
-{
- ut_ad(b);
-
- mach_write_to_3(b, (ulint) (n >> 32));
- mach_write_to_4(b + 3, (ulint) n);
-}
-
-/********************************************************//**
-The following function is used to fetch data from 7 consecutive
-bytes. The most significant byte is at the lowest address.
-@return 56-bit integer */
-UNIV_INLINE
-ib_uint64_t
-mach_read_from_7(
-/*=============*/
- const byte* b) /*!< in: pointer to 7 bytes */
-{
- return(ut_ull_create(mach_read_from_3(b), mach_read_from_4(b + 3)));
-}
-
-/*******************************************************//**
-The following function is used to store data in 6 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_to_6(
-/*============*/
- byte* b, /*!< in: pointer to 6 bytes where to store */
- ib_uint64_t n) /*!< in: 48-bit integer */
-{
- ut_ad(b);
-
- mach_write_to_2(b, (ulint) (n >> 32));
- mach_write_to_4(b + 2, (ulint) n);
-}
-
-/********************************************************//**
-The following function is used to fetch data from 6 consecutive
-bytes. The most significant byte is at the lowest address.
-@return 48-bit integer */
-UNIV_INLINE
-ib_uint64_t
-mach_read_from_6(
-/*=============*/
- const byte* b) /*!< in: pointer to 6 bytes */
-{
- return(ut_ull_create(mach_read_from_2(b), mach_read_from_4(b + 2)));
-}
-
-/*********************************************************//**
-Writes a 64-bit integer in a compressed form (5..9 bytes).
-@return size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_write_compressed(
-/*======================*/
- byte* b, /*!< in: pointer to memory where to store */
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
-{
- ulint size;
-
- ut_ad(b);
-
- size = mach_write_compressed(b, (ulint) (n >> 32));
- mach_write_to_4(b + size, (ulint) n);
-
- return(size + 4);
-}
-
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_compressed_size(
-/*=========================*/
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
-{
- return(4 + mach_get_compressed_size((ulint) (n >> 32)));
-}
-
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return the value read */
-UNIV_INLINE
-ib_uint64_t
-mach_ull_read_compressed(
-/*=====================*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- ib_uint64_t n;
- ulint size;
-
- n = (ib_uint64_t) mach_read_compressed(b);
-
- size = mach_get_compressed_size((ulint) n);
-
- n <<= 32;
- n |= (ib_uint64_t) mach_read_from_4(b + size);
-
- return(n);
-}
-
-/*********************************************************//**
-Writes a 64-bit integer in a compressed form (1..11 bytes).
-@return size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_write_much_compressed(
-/*===========================*/
- byte* b, /*!< in: pointer to memory where to store */
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
-{
- ulint size;
-
- ut_ad(b);
-
- if (!(n >> 32)) {
- return(mach_write_compressed(b, (ulint) n));
- }
-
- *b = (byte)0xFF;
- size = 1 + mach_write_compressed(b + 1, (ulint) (n >> 32));
-
- size += mach_write_compressed(b + size, (ulint) n & 0xFFFFFFFF);
-
- return(size);
-}
-
-/*********************************************************//**
-Returns the size of a 64-bit integer when written in the compressed form.
-@return compressed size in bytes */
-UNIV_INLINE
-ulint
-mach_ull_get_much_compressed_size(
-/*==============================*/
- ib_uint64_t n) /*!< in: 64-bit integer to be stored */
-{
- if (!(n >> 32)) {
- return(mach_get_compressed_size((ulint) n));
- }
-
- return(1 + mach_get_compressed_size((ulint) (n >> 32))
- + mach_get_compressed_size((ulint) n & ULINT32_MASK));
-}
-
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form.
-@return the value read */
-UNIV_INLINE
-ib_uint64_t
-mach_ull_read_much_compressed(
-/*==========================*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- ib_uint64_t n;
- ulint size;
-
- if (*b != (byte)0xFF) {
- n = 0;
- size = 0;
- } else {
- n = (ib_uint64_t) mach_read_compressed(b + 1);
-
- size = 1 + mach_get_compressed_size((ulint) n);
- n <<= 32;
- }
-
- n |= mach_read_compressed(b + size);
-
- return(n);
-}
-
-/*********************************************************//**
-Reads a 64-bit integer in a compressed form
-if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INLINE
-byte*
-mach_ull_parse_compressed(
-/*======================*/
- byte* ptr, /* in: pointer to buffer from where to read */
- byte* end_ptr,/* in: pointer to end of the buffer */
- ib_uint64_t* val) /* out: read value */
-{
- ulint size;
-
- ut_ad(ptr);
- ut_ad(end_ptr);
- ut_ad(val);
-
- if (end_ptr < ptr + 5) {
-
- return(NULL);
- }
-
- *val = mach_read_compressed(ptr);
-
- size = mach_get_compressed_size((ulint) *val);
-
- ptr += size;
-
- if (end_ptr < ptr + 4) {
-
- return(NULL);
- }
-
- *val <<= 32;
- *val |= mach_read_from_4(ptr);
-
- return(ptr + 4);
-}
-#ifndef UNIV_HOTBACKUP
-/*********************************************************//**
-Reads a double. It is stored in a little-endian format.
-@return double read */
-UNIV_INLINE
-double
-mach_double_read(
-/*=============*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- double d;
- ulint i;
- byte* ptr;
-
- ptr = (byte*) &d;
-
- for (i = 0; i < sizeof(double); i++) {
-#ifdef WORDS_BIGENDIAN
- ptr[sizeof(double) - i - 1] = b[i];
-#else
- ptr[i] = b[i];
-#endif
- }
-
- return(d);
-}
-
-/*********************************************************//**
-Writes a double. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_double_write(
-/*==============*/
- byte* b, /*!< in: pointer to memory where to write */
- double d) /*!< in: double */
-{
- ulint i;
- byte* ptr;
-
- ptr = (byte*) &d;
-
- for (i = 0; i < sizeof(double); i++) {
-#ifdef WORDS_BIGENDIAN
- b[i] = ptr[sizeof(double) - i - 1];
-#else
- b[i] = ptr[i];
-#endif
- }
-}
-
-/*********************************************************//**
-Reads a float. It is stored in a little-endian format.
-@return float read */
-UNIV_INLINE
-float
-mach_float_read(
-/*============*/
- const byte* b) /*!< in: pointer to memory from where to read */
-{
- float d;
- ulint i;
- byte* ptr;
-
- ptr = (byte*) &d;
-
- for (i = 0; i < sizeof(float); i++) {
-#ifdef WORDS_BIGENDIAN
- ptr[sizeof(float) - i - 1] = b[i];
-#else
- ptr[i] = b[i];
-#endif
- }
-
- return(d);
-}
-
-/*********************************************************//**
-Writes a float. It is stored in a little-endian format. */
-UNIV_INLINE
-void
-mach_float_write(
-/*=============*/
- byte* b, /*!< in: pointer to memory where to write */
- float d) /*!< in: float */
-{
- ulint i;
- byte* ptr;
-
- ptr = (byte*) &d;
-
- for (i = 0; i < sizeof(float); i++) {
-#ifdef WORDS_BIGENDIAN
- b[i] = ptr[sizeof(float) - i - 1];
-#else
- b[i] = ptr[i];
-#endif
- }
-}
-
-/*********************************************************//**
-Reads a ulint stored in the little-endian format.
-@return unsigned long int */
-UNIV_INLINE
-ulint
-mach_read_from_n_little_endian(
-/*===========================*/
- const byte* buf, /*!< in: from where to read */
- ulint buf_size) /*!< in: from how many bytes to read */
-{
- ulint n = 0;
- const byte* ptr;
-
- ut_ad(buf_size > 0);
-
- ptr = buf + buf_size;
-
- for (;;) {
- ptr--;
-
- n = n << 8;
-
- n += (ulint)(*ptr);
-
- if (ptr == buf) {
- break;
- }
- }
-
- return(n);
-}
-
-/*********************************************************//**
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_n_little_endian(
-/*==========================*/
- byte* dest, /*!< in: where to write */
- ulint dest_size, /*!< in: into how many bytes to write */
- ulint n) /*!< in: unsigned long int to write */
-{
- byte* end;
-
- ut_ad(dest_size <= sizeof(ulint));
- ut_ad(dest_size > 0);
-
- end = dest + dest_size;
-
- for (;;) {
- *dest = (byte)(n & 0xFF);
-
- n = n >> 8;
-
- dest++;
-
- if (dest == end) {
- break;
- }
- }
-
- ut_ad(n == 0);
-}
-
-/*********************************************************//**
-Reads a ulint stored in the little-endian format.
-@return unsigned long int */
-UNIV_INLINE
-ulint
-mach_read_from_2_little_endian(
-/*===========================*/
- const byte* buf) /*!< in: from where to read */
-{
- return((ulint)(buf[0]) | ((ulint)(buf[1]) << 8));
-}
-
-/*********************************************************//**
-Writes a ulint in the little-endian format. */
-UNIV_INLINE
-void
-mach_write_to_2_little_endian(
-/*==========================*/
- byte* dest, /*!< in: where to write */
- ulint n) /*!< in: unsigned long int to write */
-{
- ut_ad(n < 256 * 256);
-
- *dest = (byte)(n & 0xFFUL);
-
- n = n >> 8;
- dest++;
-
- *dest = (byte)(n & 0xFFUL);
-}
-
-/*********************************************************//**
-Convert integral type from storage byte order (big endian) to
-host byte order.
-@return integer value */
-UNIV_INLINE
-ib_uint64_t
-mach_read_int_type(
-/*===============*/
- const byte* src, /*!< in: where to read from */
- ulint len, /*!< in: length of src */
- ibool unsigned_type) /*!< in: signed or unsigned flag */
-{
- /* XXX this can be optimized on big-endian machines */
-
- ullint ret;
- uint i;
-
- if (unsigned_type || (src[0] & 0x80)) {
-
- ret = 0x0000000000000000ULL;
- } else {
-
- ret = 0xFFFFFFFFFFFFFF00ULL;
- }
-
- if (unsigned_type) {
-
- ret |= src[0];
- } else {
-
- ret |= src[0] ^ 0x80;
- }
-
- for (i = 1; i < len; i++) {
- ret <<= 8;
- ret |= src[i];
- }
-
- return(ret);
-}
-/*********************************************************//**
-Swap byte ordering. */
-UNIV_INLINE
-void
-mach_swap_byte_order(
-/*=================*/
- byte* dest, /*!< out: where to write */
- const byte* from, /*!< in: where to read from */
- ulint len) /*!< in: length of src */
-{
- ut_ad(len > 0);
- ut_ad(len <= 8);
-
- dest += len;
-
- switch (len & 0x7) {
- case 0: *--dest = *from++; /* fall through */
- case 7: *--dest = *from++; /* fall through */
- case 6: *--dest = *from++; /* fall through */
- case 5: *--dest = *from++; /* fall through */
- case 4: *--dest = *from++; /* fall through */
- case 3: *--dest = *from++; /* fall through */
- case 2: *--dest = *from++; /* fall through */
- case 1: *--dest = *from;
- }
-}
-
-/*************************************************************
-Convert integral type from host byte order (big-endian) storage
-byte order. */
-UNIV_INLINE
-void
-mach_write_int_type(
-/*================*/
- byte* dest, /*!< in: where to write */
- const byte* src, /*!< in: where to read from */
- ulint len, /*!< in: length of src */
- bool usign) /*!< in: signed or unsigned flag */
-{
-#ifdef WORDS_BIGENDIAN
- memcpy(dest, src, len);
-#else
- mach_swap_byte_order(dest, src, len);
-#endif /* WORDS_BIGENDIAN */
-
- if (!usign) {
- *dest ^= 0x80;
- }
-}
-
-/*************************************************************
-Convert a ulonglong integer from host byte order to (big-endian)
-storage byte order. */
-UNIV_INLINE
-void
-mach_write_ulonglong(
-/*=================*/
- byte* dest, /*!< in: where to write */
- ulonglong src, /*!< in: where to read from */
- ulint len, /*!< in: length of dest */
- bool usign) /*!< in: signed or unsigned flag */
-{
- byte* ptr = reinterpret_cast<byte*>(&src);
-
- ut_ad(len <= sizeof(ulonglong));
-
-#ifdef WORDS_BIGENDIAN
- memcpy(dest, ptr + (sizeof(src) - len), len);
-#else
- mach_swap_byte_order(dest, reinterpret_cast<byte*>(ptr), len);
-#endif /* WORDS_BIGENDIAN */
-
- if (!usign) {
- *dest ^= 0x80;
- }
-}
-
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INLINE
-ulint
-mach_read_ulint(
-/*============*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type) /*!< in: 1,2 or 4 bytes */
-{
- switch (type) {
- case 1:
- return(mach_read_from_1(ptr));
- case 2:
- return(mach_read_from_2(ptr));
- case 4:
- return(mach_read_from_4(ptr));
- default:
- ut_error;
- }
-
- return(0);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/xtradb/include/mem0dbg.h b/storage/xtradb/include/mem0dbg.h
deleted file mode 100644
index cc339b82910..00000000000
--- a/storage/xtradb/include/mem0dbg.h
+++ /dev/null
@@ -1,150 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0dbg.h
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-/* In the debug version each allocated field is surrounded with
-check fields whose sizes are given below */
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-/* The mutex which protects in the debug version the hash table
-containing the list of live memory heaps, and also the global
-variables in mem0dbg.cc. */
-extern ib_mutex_t mem_hash_mutex;
-# endif /* !UNIV_HOTBACKUP */
-
-#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\
- UNIV_MEM_ALIGNMENT)
-#define MEM_FIELD_TRAILER_SIZE sizeof(ulint)
-#else
-#define MEM_FIELD_HEADER_SIZE 0
-#endif
-
-
-/* Space needed when allocating for a user a field of
-length N. The space is allocated only in multiples of
-UNIV_MEM_ALIGNMENT. In the debug version there are also
-check fields at the both ends of the field. */
-#ifdef UNIV_MEM_DEBUG
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\
- + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT)
-#else
-#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT)
-#endif
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
- mem_heap_t* heap, /*!< in: memory heap */
- byte* top, /*!< in: calculate and validate only until
- this top pointer in the heap is reached,
- if this pointer is NULL, ignored */
- ibool print, /*!< in: if TRUE, prints the contents
- of the heap; works only in
- the debug version */
- ibool* error, /*!< out: TRUE if error */
- ulint* us_size,/*!< out: allocated memory
- (for the user) in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored; in the
- non-debug version this is always -1 */
- ulint* ph_size,/*!< out: physical size of the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
- ulint* n_blocks); /*!< out: number of blocks in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
- mem_heap_t* heap); /*!< in: memory heap */
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it)
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
- mem_heap_t* heap); /*!< in: memory heap */
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void);
-/*===============*/
-/*****************************************************************//**
-Validates the dynamic memory
-@return TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void);
-/*=========================*/
-/************************************************************//**
-Validates the dynamic memory
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void);
-/*===============*/
-#endif /* UNIV_MEM_DEBUG */
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
- void* ptr); /*!< in: pointer to place of possible corruption */
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void);
-/*================*/
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void);
-/*====================*/
diff --git a/storage/xtradb/include/mem0dbg.ic b/storage/xtradb/include/mem0dbg.ic
deleted file mode 100644
index ec60ed35337..00000000000
--- a/storage/xtradb/include/mem0dbg.ic
+++ /dev/null
@@ -1,109 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0dbg.ic
-The memory management: the debug code. This is not an independent
-compilation module but is included in mem0mem.*.
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-extern ulint mem_current_allocated_memory;
-
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
- byte* buf, /*!< in: memory field */
- ulint n); /*!< in: how many bytes the user requested */
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
- byte* buf, /*!< in: memory field */
- ulint n); /*!< in: how many bytes the user requested */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n); /*!< in: length of buffer */
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n); /*!< in: length of buffer */
-/***************************************************************//**
-Inserts a created memory heap to the hash table of
-current allocated memory heaps.
-Initializes the hash table when first called. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
- mem_heap_t* heap, /*!< in: the created heap */
- const char* file_name, /*!< in: file name of creation */
- ulint line); /*!< in: line where created */
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
- mem_heap_t* heap, /*!< in: the heap to be freed */
- const char* file_name, /*!< in: file name of freeing */
- ulint line); /*!< in: line where freed */
-
-
-void
-mem_field_header_set_len(byte* field, ulint len);
-
-ulint
-mem_field_header_get_len(byte* field);
-
-void
-mem_field_header_set_check(byte* field, ulint check);
-
-ulint
-mem_field_header_get_check(byte* field);
-
-void
-mem_field_trailer_set_check(byte* field, ulint check);
-
-ulint
-mem_field_trailer_get_check(byte* field);
-#endif /* UNIV_MEM_DEBUG */
diff --git a/storage/xtradb/include/mem0mem.h b/storage/xtradb/include/mem0mem.h
deleted file mode 100644
index de9b8b29fd9..00000000000
--- a/storage/xtradb/include/mem0mem.h
+++ /dev/null
@@ -1,425 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0mem.h
-The memory management
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0mem_h
-#define mem0mem_h
-
-#include "univ.i"
-#include "ut0mem.h"
-#include "ut0byte.h"
-#include "ut0rnd.h"
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-#endif /* UNIV_HOTBACKUP */
-#include "ut0lst.h"
-#include "mach0data.h"
-
-/* -------------------- MEMORY HEAPS ----------------------------- */
-
-/* A block of a memory heap consists of the info structure
-followed by an area of memory */
-typedef struct mem_block_info_t mem_block_t;
-
-/* A memory heap is a nonempty linear list of memory blocks */
-typedef mem_block_t mem_heap_t;
-
-/* Types of allocation for memory heaps: DYNAMIC means allocation from the
-dynamic memory pool of the C compiler, BUFFER means allocation from the
-buffer pool; the latter method is used for very big heaps */
-
-#define MEM_HEAP_DYNAMIC 0 /* the most common type */
-#define MEM_HEAP_BUFFER 1
-#define MEM_HEAP_BTR_SEARCH 2 /* this flag can optionally be
- ORed to MEM_HEAP_BUFFER, in which
- case heap->free_block is used in
- some cases for memory allocations,
- and if it's NULL, the memory
- allocation functions can return
- NULL. */
-
-/* Different type of heaps in terms of which datastructure is using them */
-#define MEM_HEAP_FOR_BTR_SEARCH (MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER)
-#define MEM_HEAP_FOR_PAGE_HASH (MEM_HEAP_DYNAMIC)
-#define MEM_HEAP_FOR_RECV_SYS (MEM_HEAP_BUFFER)
-#define MEM_HEAP_FOR_LOCK_HEAP (MEM_HEAP_BUFFER)
-
-/* The following start size is used for the first block in the memory heap if
-the size is not specified, i.e., 0 is given as the parameter in the call of
-create. The standard size is the maximum (payload) size of the blocks used for
-allocations of small buffers. */
-
-#define MEM_BLOCK_START_SIZE 64
-#define MEM_BLOCK_STANDARD_SIZE \
- (UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
-
-/* If a memory heap is allowed to grow into the buffer pool, the following
-is the maximum size for a single allocated buffer: */
-#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200)
-
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
- ulint size); /*!< in: common pool size in bytes */
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void);
-/*===========*/
-
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create(N) mem_heap_create_func( \
- (N), __FILE__, __LINE__, MEM_HEAP_DYNAMIC)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create_typed(N, T) mem_heap_create_func( \
- (N), __FILE__, __LINE__, (T))
-
-#else /* UNIV_DEBUG */
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create(N) mem_heap_create_func( \
- (N), MEM_HEAP_DYNAMIC)
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap creation. */
-
-# define mem_heap_create_typed(N, T) mem_heap_create_func( \
- (N), (T))
-
-#endif /* UNIV_DEBUG */
-/**************************************************************//**
-Use this macro instead of the corresponding function! Macro for memory
-heap freeing. */
-
-#define mem_heap_free(heap) mem_heap_free_func(\
- (heap), __FILE__, __LINE__)
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-arguments.
-@return own: memory heap, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INLINE
-mem_heap_t*
-mem_heap_create_func(
-/*=================*/
- ulint n, /*!< in: desired start block size,
- this means that a single user buffer
- of size n will fit in the block,
- 0 creates a default size block */
-#ifdef UNIV_DEBUG
- const char* file_name, /*!< in: file name where created */
- ulint line, /*!< in: line where created */
-#endif /* UNIV_DEBUG */
- ulint type); /*!< in: heap type */
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
-UNIV_INLINE
-void
-mem_heap_free_func(
-/*===============*/
- mem_heap_t* heap, /*!< in, own: heap to be freed */
- const char* file_name, /*!< in: file name where freed */
- ulint line); /*!< in: line where freed */
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return allocated, zero-filled storage */
-UNIV_INLINE
-void*
-mem_heap_zalloc(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
-@return allocated storage, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INLINE
-void*
-mem_heap_alloc(
-/*===========*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return pointer to the heap top */
-UNIV_INLINE
-byte*
-mem_heap_get_heap_top(
-/*==================*/
- mem_heap_t* heap); /*!< in: memory heap */
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_free_heap_top(
-/*===================*/
- mem_heap_t* heap, /*!< in: heap from which to free */
- byte* old_top);/*!< in: pointer to old top of heap */
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_empty(
-/*===========*/
- mem_heap_t* heap); /*!< in: heap to empty */
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap.
-The size of the element must be given.
-@return pointer to the topmost element */
-UNIV_INLINE
-void*
-mem_heap_get_top(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: size of the topmost element */
-/*****************************************************************//**
-Frees the topmost element in a memory heap.
-The size of the element must be given. */
-UNIV_INLINE
-void
-mem_heap_free_top(
-/*==============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: size of the topmost element */
-/*****************************************************************//**
-Returns the space in bytes occupied by a memory heap. */
-UNIV_INLINE
-ulint
-mem_heap_get_size(
-/*==============*/
- mem_heap_t* heap); /*!< in: heap */
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer allocation */
-
-#define mem_zalloc(N) memset(mem_alloc(N), 0, (N))
-
-#ifdef UNIV_DEBUG
-#define mem_alloc(N) mem_alloc_func((N), __FILE__, __LINE__, NULL)
-#define mem_alloc2(N,S) mem_alloc_func((N), __FILE__, __LINE__, (S))
-#else /* UNIV_DEBUG */
-#define mem_alloc(N) mem_alloc_func((N), NULL)
-#define mem_alloc2(N,S) mem_alloc_func((N), (S))
-#endif /* UNIV_DEBUG */
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
- ulint n, /*!< in: requested size in bytes */
-#ifdef UNIV_DEBUG
- const char* file_name, /*!< in: file name where created */
- ulint line, /*!< in: line where created */
-#endif /* UNIV_DEBUG */
- ulint* size); /*!< out: allocated size in bytes,
- or NULL */
-
-/**************************************************************//**
-Use this macro instead of the corresponding function!
-Macro for memory buffer freeing */
-
-#define mem_free(PTR) mem_free_func((PTR), __FILE__, __LINE__)
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Frees a single buffer of storage from
-the dynamic memory of C compiler. Similar to free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
- void* ptr, /*!< in, own: buffer to be freed */
- const char* file_name, /*!< in: file name where created */
- ulint line); /*!< in: line where created */
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
-UNIV_INLINE
-char*
-mem_strdup(
-/*=======*/
- const char* str); /*!< in: string to be copied */
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
-UNIV_INLINE
-char*
-mem_strdupl(
-/*========*/
- const char* str, /*!< in: string to be copied */
- ulint len); /*!< in: length of str, in bytes */
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INTERN
-char*
-mem_heap_strdup(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str); /*!< in: string to be copied */
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str, /*!< in: string to be copied */
- ulint len); /*!< in: length of str, in bytes */
-
-/**********************************************************************//**
-Concatenate two strings and return the result, using a memory heap.
-@return own: the result */
-UNIV_INTERN
-char*
-mem_heap_strcat(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* s1, /*!< in: string 1 */
- const char* s2); /*!< in: string 2 */
-
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-UNIV_INTERN
-void*
-mem_heap_dup(
-/*=========*/
- mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
- const void* data, /*!< in: data to be copied */
- ulint len); /*!< in: length of data, in bytes */
-
-/****************************************************************//**
-A simple sprintf replacement that dynamically allocates the space for the
-formatted string from the given heap. This supports a very limited set of
-the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type).
-@return heap-allocated formatted string */
-UNIV_INTERN
-char*
-mem_heap_printf(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- const char* format, /*!< in: format string */
- ...) MY_ATTRIBUTE ((format (printf, 2, 3)));
-
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
-void
-mem_validate_all_blocks(void);
-/*=========================*/
-#endif
-
-/*#######################################################################*/
-
-/** The info structure stored at the beginning of a heap block */
-struct mem_block_info_t {
- ulint magic_n;/* magic number for debugging */
-#ifdef UNIV_DEBUG
- char file_name[8];/* file name where the mem heap was created */
- ulint line; /*!< line number where the mem heap was created */
-#endif /* UNIV_DEBUG */
- UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the
- the list this is the base node of the list of blocks;
- in subsequent blocks this is undefined */
- UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next
- and prev in the list. The first block allocated
- to the heap is also the first block in this list,
- though it also contains the base node of the list. */
- ulint len; /*!< physical length of this block in bytes */
- ulint total_size; /*!< physical length in bytes of all blocks
- in the heap. This is defined only in the base
- node and is set to ULINT_UNDEFINED in others. */
- ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or
- MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
- ulint free; /*!< offset in bytes of the first free position for
- user data in the block */
- ulint start; /*!< the value of the struct field 'free' at the
- creation of the block */
-#ifndef UNIV_HOTBACKUP
- void* free_block;
- /* if the MEM_HEAP_BTR_SEARCH bit is set in type,
- and this is the heap root, this can contain an
- allocated buffer frame, which can be appended as a
- free block to the heap, if we need more space;
- otherwise, this is NULL */
- void* buf_block;
- /* if this block has been allocated from the buffer
- pool, this contains the buf_block_t handle;
- otherwise, this is NULL */
-#endif /* !UNIV_HOTBACKUP */
-#ifdef MEM_PERIODIC_CHECK
- UT_LIST_NODE_T(mem_block_t) mem_block_list;
- /* List of all mem blocks allocated; protected
- by the mem_comm_pool mutex */
-#endif
-};
-
-#define MEM_BLOCK_MAGIC_N 764741555
-#define MEM_FREED_BLOCK_MAGIC_N 547711122
-
-/* Header size for a memory heap block */
-#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\
- UNIV_MEM_ALIGNMENT)
-#include "mem0dbg.h"
-
-#ifndef UNIV_NONINL
-#include "mem0mem.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/mem0mem.ic b/storage/xtradb/include/mem0mem.ic
deleted file mode 100644
index 63e68150b61..00000000000
--- a/storage/xtradb/include/mem0mem.ic
+++ /dev/null
@@ -1,649 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0mem.ic
-The memory management
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0dbg.ic"
-#ifndef UNIV_HOTBACKUP
-# include "mem0pool.h"
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-# define mem_heap_create_block(heap, n, type, file_name, line) \
- mem_heap_create_block_func(heap, n, file_name, line, type)
-# define mem_heap_create_at(N, file_name, line) \
- mem_heap_create_func(N, file_name, line, MEM_HEAP_DYNAMIC)
-#else /* UNIV_DEBUG */
-# define mem_heap_create_block(heap, n, type, file_name, line) \
- mem_heap_create_block_func(heap, n, type)
-# define mem_heap_create_at(N, file_name, line) \
- mem_heap_create_func(N, MEM_HEAP_DYNAMIC)
-#endif /* UNIV_DEBUG */
-/***************************************************************//**
-Creates a memory heap block where data can be allocated.
-@return own: memory heap block, NULL if did not succeed (only possible
-for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
-mem_block_t*
-mem_heap_create_block_func(
-/*=======================*/
- mem_heap_t* heap, /*!< in: memory heap or NULL if first block
- should be created */
- ulint n, /*!< in: number of bytes needed for user data */
-#ifdef UNIV_DEBUG
- const char* file_name,/*!< in: file name where created */
- ulint line, /*!< in: line where created */
-#endif /* UNIV_DEBUG */
- ulint type); /*!< in: type of heap: MEM_HEAP_DYNAMIC or
- MEM_HEAP_BUFFER */
-/******************************************************************//**
-Frees a block from a memory heap. */
-UNIV_INTERN
-void
-mem_heap_block_free(
-/*================*/
- mem_heap_t* heap, /*!< in: heap */
- mem_block_t* block); /*!< in: block to free */
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Frees the free_block field from a memory heap. */
-UNIV_INTERN
-void
-mem_heap_free_block_free(
-/*=====================*/
- mem_heap_t* heap); /*!< in: heap */
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-Adds a new block to a memory heap.
-@return created block, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
-mem_block_t*
-mem_heap_add_block(
-/*===============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n); /*!< in: number of bytes user needs */
-
-UNIV_INLINE
-void
-mem_block_set_len(mem_block_t* block, ulint len)
-{
- ut_ad(len > 0);
-
- block->len = len;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_len(mem_block_t* block)
-{
- return(block->len);
-}
-
-UNIV_INLINE
-void
-mem_block_set_type(mem_block_t* block, ulint type)
-{
- ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
- || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
-
- block->type = type;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_type(mem_block_t* block)
-{
- return(block->type);
-}
-
-UNIV_INLINE
-void
-mem_block_set_free(mem_block_t* block, ulint free)
-{
- ut_ad(free > 0);
- ut_ad(free <= mem_block_get_len(block));
-
- block->free = free;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_free(mem_block_t* block)
-{
- return(block->free);
-}
-
-UNIV_INLINE
-void
-mem_block_set_start(mem_block_t* block, ulint start)
-{
- ut_ad(start > 0);
-
- block->start = start;
-}
-
-UNIV_INLINE
-ulint
-mem_block_get_start(mem_block_t* block)
-{
- return(block->start);
-}
-
-/***************************************************************//**
-Allocates and zero-fills n bytes of memory from a memory heap.
-@return allocated, zero-filled storage */
-UNIV_INLINE
-void*
-mem_heap_zalloc(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-{
- ut_ad(heap);
- ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH));
- return(memset(mem_heap_alloc(heap, n), 0, n));
-}
-
-/***************************************************************//**
-Allocates n bytes of memory from a memory heap.
-@return allocated storage, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INLINE
-void*
-mem_heap_alloc(
-/*===========*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: number of bytes; if the heap is allowed
- to grow into the buffer pool, this must be
- <= MEM_MAX_ALLOC_IN_BUF */
-{
- mem_block_t* block;
- void* buf;
- ulint free;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF));
-
- /* Check if there is enough space in block. If not, create a new
- block to the heap */
-
- if (mem_block_get_len(block)
- < mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) {
-
- block = mem_heap_add_block(heap, n);
-
- if (block == NULL) {
-
- return(NULL);
- }
- }
-
- free = mem_block_get_free(block);
-
- buf = (byte*) block + free;
-
- mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
-
-#ifdef UNIV_MEM_DEBUG
- UNIV_MEM_ALLOC(buf,
- n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
-
- /* In the debug version write debugging info to the field */
- mem_field_init((byte*) buf, n);
-
- /* Advance buf to point at the storage which will be given to the
- caller */
- buf = (byte*) buf + MEM_FIELD_HEADER_SIZE;
-
-#endif
- UNIV_MEM_ALLOC(buf, n);
- return(buf);
-}
-
-/*****************************************************************//**
-Returns a pointer to the heap top.
-@return pointer to the heap top */
-UNIV_INLINE
-byte*
-mem_heap_get_heap_top(
-/*==================*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- mem_block_t* block;
- byte* buf;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- buf = (byte*) block + mem_block_get_free(block);
-
- return(buf);
-}
-
-/*****************************************************************//**
-Frees the space in a memory heap exceeding the pointer given. The
-pointer must have been acquired from mem_heap_get_heap_top. The first
-memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_free_heap_top(
-/*===================*/
- mem_heap_t* heap, /*!< in: heap from which to free */
- byte* old_top)/*!< in: pointer to old top of heap */
-{
- mem_block_t* block;
- mem_block_t* prev_block;
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
- ibool error;
- ulint total_size;
- ulint size;
-
- ut_ad(mem_heap_check(heap));
-
- /* Validate the heap and get its total allocated size */
- mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size,
- NULL, NULL);
- ut_a(!error);
-
- /* Get the size below top pointer */
- mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL,
- NULL);
- ut_a(!error);
-
-#endif
-
- block = UT_LIST_GET_LAST(heap->base);
-
- while (block != NULL) {
- if (((byte*) block + mem_block_get_free(block) >= old_top)
- && ((byte*) block <= old_top)) {
- /* Found the right block */
-
- break;
- }
-
- /* Store prev_block value before freeing the current block
- (the current block will be erased in freeing) */
-
- prev_block = UT_LIST_GET_PREV(list, block);
-
- mem_heap_block_free(heap, block);
-
- block = prev_block;
- }
-
- ut_ad(block);
-
- /* Set the free field of block */
- mem_block_set_free(block, old_top - (byte*) block);
-
- ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
- UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top);
-#if defined UNIV_MEM_DEBUG
- /* In the debug version erase block from top up */
- mem_erase_buf(old_top, (byte*) block + block->len - old_top);
-
- /* Update allocated memory count */
- mutex_enter(&mem_hash_mutex);
- mem_current_allocated_memory -= (total_size - size);
- mutex_exit(&mem_hash_mutex);
-#endif /* UNIV_MEM_DEBUG */
- UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top);
-
- /* If free == start, we may free the block if it is not the first
- one */
-
- if ((heap != block) && (mem_block_get_free(block)
- == mem_block_get_start(block))) {
- mem_heap_block_free(heap, block);
- }
-}
-
-/*****************************************************************//**
-Empties a memory heap. The first memory block of the heap is not freed. */
-UNIV_INLINE
-void
-mem_heap_empty(
-/*===========*/
- mem_heap_t* heap) /*!< in: heap to empty */
-{
- mem_heap_free_heap_top(heap, (byte*) heap + mem_block_get_start(heap));
-#ifndef UNIV_HOTBACKUP
- if (heap->free_block) {
- mem_heap_free_block_free(heap);
- }
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/*****************************************************************//**
-Returns a pointer to the topmost element in a memory heap. The size of the
-element must be given.
-@return pointer to the topmost element */
-UNIV_INLINE
-void*
-mem_heap_get_top(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: size of the topmost element */
-{
- mem_block_t* block;
- byte* buf;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- buf = (byte*) block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
-
-#ifdef UNIV_MEM_DEBUG
- ut_ad(mem_block_get_start(block) <= (ulint) (buf - (byte*) block));
-
- /* In the debug version, advance buf to point at the storage which
- was given to the caller in the allocation*/
-
- buf += MEM_FIELD_HEADER_SIZE;
-
- /* Check that the field lengths agree */
- ut_ad(n == mem_field_header_get_len(buf));
-#endif
-
- return((void*) buf);
-}
-
-/*****************************************************************//**
-Frees the topmost element in a memory heap. The size of the element must be
-given. */
-UNIV_INLINE
-void
-mem_heap_free_top(
-/*==============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: size of the topmost element */
-{
- mem_block_t* block;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- /* Subtract the free field of block */
- mem_block_set_free(block, mem_block_get_free(block)
- - MEM_SPACE_NEEDED(n));
- UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n);
-#ifdef UNIV_MEM_DEBUG
-
- ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
-
- /* In the debug version check the consistency, and erase field */
- mem_field_erase((byte*) block + mem_block_get_free(block), n);
-#endif
-
- /* If free == start, we may free the block if it is not the first
- one */
-
- if ((heap != block) && (mem_block_get_free(block)
- == mem_block_get_start(block))) {
- mem_heap_block_free(heap, block);
- } else {
- /* Avoid a bogus UNIV_MEM_ASSERT_W() warning in a
- subsequent invocation of mem_heap_free_top().
- Originally, this was UNIV_MEM_FREE(), to catch writes
- to freed memory. */
- UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n);
- }
-}
-
-/*****************************************************************//**
-NOTE: Use the corresponding macros instead of this function. Creates a
-memory heap. For debugging purposes, takes also the file name and line as
-argument.
-@return own: memory heap, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INLINE
-mem_heap_t*
-mem_heap_create_func(
-/*=================*/
- ulint n, /*!< in: desired start block size,
- this means that a single user buffer
- of size n will fit in the block,
- 0 creates a default size block */
-#ifdef UNIV_DEBUG
- const char* file_name, /*!< in: file name where created */
- ulint line, /*!< in: line where created */
-#endif /* UNIV_DEBUG */
- ulint type) /*!< in: heap type */
-{
- mem_block_t* block;
-
- if (!n) {
- n = MEM_BLOCK_START_SIZE;
- }
-
- block = mem_heap_create_block(NULL, n, type, file_name, line);
-
- if (block == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_INIT(block->base);
-
- /* Add the created block itself as the first block in the list */
- UT_LIST_ADD_FIRST(list, block->base, block);
-
-#ifdef UNIV_MEM_DEBUG
-
- mem_hash_insert(block, file_name, line);
-
-#endif
-
- return(block);
-}
-
-/*****************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees the space
-occupied by a memory heap. In the debug version erases the heap memory
-blocks. */
-UNIV_INLINE
-void
-mem_heap_free_func(
-/*===============*/
- mem_heap_t* heap, /*!< in, own: heap to be freed */
- const char* file_name MY_ATTRIBUTE((unused)),
- /*!< in: file name where freed */
- ulint line MY_ATTRIBUTE((unused)))
-{
- mem_block_t* block;
- mem_block_t* prev_block;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
-#ifdef UNIV_MEM_DEBUG
-
- /* In the debug version remove the heap from the hash table of heaps
- and check its consistency */
-
- mem_hash_remove(heap, file_name, line);
-
-#endif
-#ifndef UNIV_HOTBACKUP
- if (heap->free_block) {
- mem_heap_free_block_free(heap);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- while (block != NULL) {
- /* Store the contents of info before freeing current block
- (it is erased in freeing) */
-
- prev_block = UT_LIST_GET_PREV(list, block);
-
- mem_heap_block_free(heap, block);
-
- block = prev_block;
- }
-}
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function.
-Allocates a single buffer of memory from the dynamic memory of
-the C compiler. Is like malloc of C. The buffer must be freed
-with mem_free.
-@return own: free storage */
-UNIV_INLINE
-void*
-mem_alloc_func(
-/*===========*/
- ulint n, /*!< in: desired number of bytes */
-#ifdef UNIV_DEBUG
- const char* file_name, /*!< in: file name where created */
- ulint line, /*!< in: line where created */
-#endif /* UNIV_DEBUG */
- ulint* size) /*!< out: allocated size in bytes,
- or NULL */
-{
- mem_heap_t* heap;
- void* buf;
-
- heap = mem_heap_create_at(n, file_name, line);
-
- /* Note that as we created the first block in the heap big enough
- for the buffer requested by the caller, the buffer will be in the
- first block and thus we can calculate the pointer to the heap from
- the pointer to the buffer when we free the memory buffer. */
-
- if (size) {
- /* Adjust the allocation to the actual size of the
- memory block. */
- ulint m = mem_block_get_len(heap)
- - mem_block_get_free(heap);
-#ifdef UNIV_MEM_DEBUG
- m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE;
-#endif /* UNIV_MEM_DEBUG */
- ut_ad(m >= n);
- n = m;
- *size = m;
- }
-
- buf = mem_heap_alloc(heap, n);
-
- ut_a((byte*) heap == (byte*) buf - MEM_BLOCK_HEADER_SIZE
- - MEM_FIELD_HEADER_SIZE);
- return(buf);
-}
-
-/***************************************************************//**
-NOTE: Use the corresponding macro instead of this function. Frees a single
-buffer of storage from the dynamic memory of the C compiler. Similar to the
-free of C. */
-UNIV_INLINE
-void
-mem_free_func(
-/*==========*/
- void* ptr, /*!< in, own: buffer to be freed */
- const char* file_name, /*!< in: file name where created */
- ulint line) /*!< in: line where created */
-{
- mem_heap_t* heap;
-
- heap = (mem_heap_t*)((byte*) ptr - MEM_BLOCK_HEADER_SIZE
- - MEM_FIELD_HEADER_SIZE);
- mem_heap_free_func(heap, file_name, line);
-}
-
-/*****************************************************************//**
-Returns the space in bytes occupied by a memory heap. */
-UNIV_INLINE
-ulint
-mem_heap_get_size(
-/*==============*/
- mem_heap_t* heap) /*!< in: heap */
-{
- ulint size = 0;
-
- ut_ad(mem_heap_check(heap));
-
- size = heap->total_size;
-
-#ifndef UNIV_HOTBACKUP
- if (heap->free_block) {
- size += UNIV_PAGE_SIZE;
- }
-#endif /* !UNIV_HOTBACKUP */
-
- return(size);
-}
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
-UNIV_INLINE
-char*
-mem_strdup(
-/*=======*/
- const char* str) /*!< in: string to be copied */
-{
- ulint len = strlen(str) + 1;
- return((char*) memcpy(mem_alloc(len), str, len));
-}
-
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string.
-@return own: a copy of the string, must be deallocated with mem_free */
-UNIV_INLINE
-char*
-mem_strdupl(
-/*========*/
- const char* str, /*!< in: string to be copied */
- ulint len) /*!< in: length of str, in bytes */
-{
- char* s = (char*) mem_alloc(len + 1);
- s[len] = 0;
- return((char*) memcpy(s, str, len));
-}
-
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str, /*!< in: string to be copied */
- ulint len) /*!< in: length of str, in bytes */
-{
- char* s = (char*) mem_heap_alloc(heap, len + 1);
- s[len] = 0;
- return((char*) memcpy(s, str, len));
-}
diff --git a/storage/xtradb/include/mem0pool.h b/storage/xtradb/include/mem0pool.h
deleted file mode 100644
index a65ba50fdf9..00000000000
--- a/storage/xtradb/include/mem0pool.h
+++ /dev/null
@@ -1,121 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mem0pool.h
-The lowest-level memory management
-
-Created 6/9/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef mem0pool_h
-#define mem0pool_h
-
-#include "univ.i"
-#include "os0file.h"
-#include "ut0lst.h"
-
-/** Memory pool */
-struct mem_pool_t;
-
-/** The common memory pool */
-extern mem_pool_t* mem_comm_pool;
-
-/** Memory area header */
-struct mem_area_t{
- ulint size_and_free; /*!< memory area size is obtained by
- anding with ~MEM_AREA_FREE; area in
- a free list if ANDing with
- MEM_AREA_FREE results in nonzero */
- UT_LIST_NODE_T(mem_area_t)
- free_list; /*!< free list node */
-};
-
-/** Each memory area takes this many extra bytes for control information */
-#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_t),\
- UNIV_MEM_ALIGNMENT))
-
-/********************************************************************//**
-Creates a memory pool.
-@return memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
- ulint size); /*!< in: pool size in bytes */
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
- mem_pool_t* pool); /*!< in, own: memory pool */
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
- ulint* psize, /*!< in: requested size in bytes; for optimum
- space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE;
- out: allocated size in bytes (greater than
- or equal to the requested size) */
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
- void* ptr, /*!< in, own: pointer to allocated memory
- buffer */
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return reserved mmeory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Validates a memory pool.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
- mem_pool_t* pool); /*!< in: memory pool */
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
- FILE* outfile,/*!< in: output file to write to */
- mem_pool_t* pool); /*!< in: memory pool */
-
-
-#ifndef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/mem0pool.ic b/storage/xtradb/include/mem0pool.ic
deleted file mode 100644
index f4bafb8ba63..00000000000
--- a/storage/xtradb/include/mem0pool.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/mem0pool.ic
-The lowest-level memory management
-
-Created 6/8/1994 Heikki Tuuri
-*************************************************************************/
diff --git a/storage/xtradb/include/mtr0log.h b/storage/xtradb/include/mtr0log.h
deleted file mode 100644
index 18a345d050f..00000000000
--- a/storage/xtradb/include/mtr0log.h
+++ /dev/null
@@ -1,251 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0log.h
-Mini-transaction logging routines
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0log_h
-#define mtr0log_h
-
-#include "univ.i"
-#include "mtr0mtr.h"
-#include "dict0types.h"
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
-record to the mini-transaction log if mtr is not NULL. */
-UNIV_INTERN
-void
-mlog_write_ulint(
-/*=============*/
- byte* ptr, /*!< in: pointer where to write */
- ulint val, /*!< in: value to write */
- byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Writes 8 bytes to a file page. Writes the corresponding log
-record to the mini-transaction log, only if mtr is not NULL */
-UNIV_INTERN
-void
-mlog_write_ull(
-/*===========*/
- byte* ptr, /*!< in: pointer where to write */
- ib_uint64_t val, /*!< in: value to write */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Writes a string to a file page buffered in the buffer pool. Writes the
-corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_write_string(
-/*==============*/
- byte* ptr, /*!< in: pointer where to write */
- const byte* str, /*!< in: string to write */
- ulint len, /*!< in: string length */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Logs a write of a string to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_log_string(
-/*============*/
- byte* ptr, /*!< in: pointer written to */
- ulint len, /*!< in: string length */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Writes initial part of a log record consisting of one-byte item
-type and four-byte space and page numbers. */
-UNIV_INTERN
-void
-mlog_write_initial_log_record(
-/*==========================*/
- const byte* ptr, /*!< in: pointer to (inside) a buffer
- frame holding the file page where
- modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
- ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id,/*!< in: space id, if applicable */
- ulint page_no,/*!< in: page number (not relevant currently) */
- byte* log_ptr,/*!< in: pointer to mtr log which has been opened */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************//**
-Catenates 1 - 4 bytes to the mtr log. */
-UNIV_INLINE
-void
-mlog_catenate_ulint(
-/*================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val, /*!< in: value to write */
- ulint type); /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-/********************************************************//**
-Catenates n bytes to the mtr log. */
-UNIV_INTERN
-void
-mlog_catenate_string(
-/*=================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* str, /*!< in: string to write */
- ulint len); /*!< in: string length */
-/********************************************************//**
-Catenates a compressed ulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ulint_compressed(
-/*===========================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val); /*!< in: value to write */
-/********************************************************//**
-Catenates a compressed 64-bit integer to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ull_compressed(
-/*=========================*/
- mtr_t* mtr, /*!< in: mtr */
- ib_uint64_t val); /*!< in: value to write */
-/********************************************************//**
-Opens a buffer to mlog. It must be closed with mlog_close.
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INLINE
-byte*
-mlog_open(
-/*======*/
- mtr_t* mtr, /*!< in: mtr */
- ulint size); /*!< in: buffer size in bytes; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-/********************************************************//**
-Closes a buffer opened to mlog. */
-UNIV_INLINE
-void
-mlog_close(
-/*=======*/
- mtr_t* mtr, /*!< in: mtr */
- byte* ptr); /*!< in: buffer space from ptr up was not used */
-/********************************************************//**
-Writes the initial part of a log record (3..11 bytes).
-If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly!
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_fast(
-/*===============================*/
- const byte* ptr, /*!< in: pointer to (inside) a buffer
- frame holding the file page where
- modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
- byte* log_ptr,/*!< in: pointer to mtr log which has
- been opened */
- mtr_t* mtr); /*!< in: mtr */
-#else /* !UNIV_HOTBACKUP */
-# define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0)
-# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte*) 0)
-#endif /* !UNIV_HOTBACKUP */
-/********************************************************//**
-Parses an initial log record written by mlog_write_initial_log_record.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_initial_log_record(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* type, /*!< out: log record type: MLOG_1BYTE, ... */
- ulint* space, /*!< out: space id */
- ulint* page_no);/*!< out: page number */
-/********************************************************//**
-Parses a log record written by mlog_write_ulint or mlog_write_ull.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_nbytes(
-/*==============*/
- ulint type, /*!< in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip);/*!< in/out: compressed page, or NULL */
-/********************************************************//**
-Parses a log record written by mlog_write_string.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_string(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip);/*!< in/out: compressed page, or NULL */
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index. Reserves space
-for further log entries. The log entry must be closed with
-mtr_close().
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
-byte*
-mlog_open_and_write_index(
-/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* rec, /*!< in: index record or page */
- const dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
- ulint size); /*!< in: requested buffer size in bytes
- (if 0, calls mlog_close() and
- returns NULL) */
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Parses a log record written by mlog_open_and_write_index.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_index(
-/*=============*/
- byte* ptr, /*!< in: buffer */
- const byte* end_ptr,/*!< in: buffer end */
- ibool comp, /*!< in: TRUE=compact record format */
- dict_index_t** index); /*!< out, own: dummy index */
-
-#ifndef UNIV_HOTBACKUP
-/* Insert, update, and maybe other functions may use this value to define an
-extra mlog buffer size for variable size data */
-#define MLOG_BUF_MARGIN 256
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "mtr0log.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic
deleted file mode 100644
index d508d30fafe..00000000000
--- a/storage/xtradb/include/mtr0log.ic
+++ /dev/null
@@ -1,277 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0log.ic
-Mini-transaction logging routines
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#include "ut0lst.h"
-#include "buf0buf.h"
-#include "buf0dblwr.h"
-#include "fsp0types.h"
-#include "btr0types.h"
-#include "trx0sys.h"
-
-/********************************************************//**
-Opens a buffer to mlog. It must be closed with mlog_close.
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INLINE
-byte*
-mlog_open(
-/*======*/
- mtr_t* mtr, /*!< in: mtr */
- ulint size) /*!< in: buffer size in bytes; MUST be
- smaller than DYN_ARRAY_DATA_SIZE! */
-{
- dyn_array_t* mlog;
-
- mtr->modifications = TRUE;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return(NULL);
- }
-
- mlog = &(mtr->log);
-
- return(dyn_array_open(mlog, size));
-}
-
-/********************************************************//**
-Closes a buffer opened to mlog. */
-UNIV_INLINE
-void
-mlog_close(
-/*=======*/
- mtr_t* mtr, /*!< in: mtr */
- byte* ptr) /*!< in: buffer space from ptr up was not used */
-{
- dyn_array_t* mlog;
-
- ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE);
-
- mlog = &(mtr->log);
-
- dyn_array_close(mlog, ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */
-UNIV_INLINE
-void
-mlog_catenate_ulint(
-/*================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val, /*!< in: value to write */
- ulint type) /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
-{
- dyn_array_t* mlog;
- byte* ptr;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return;
- }
-
- mlog = &(mtr->log);
-
-#if MLOG_1BYTE != 1
-# error "MLOG_1BYTE != 1"
-#endif
-#if MLOG_2BYTES != 2
-# error "MLOG_2BYTES != 2"
-#endif
-#if MLOG_4BYTES != 4
-# error "MLOG_4BYTES != 4"
-#endif
-#if MLOG_8BYTES != 8
-# error "MLOG_8BYTES != 8"
-#endif
- ptr = (byte*) dyn_array_push(mlog, type);
-
- if (type == MLOG_4BYTES) {
- mach_write_to_4(ptr, val);
- } else if (type == MLOG_2BYTES) {
- mach_write_to_2(ptr, val);
- } else {
- ut_ad(type == MLOG_1BYTE);
- mach_write_to_1(ptr, val);
- }
-}
-
-/********************************************************//**
-Catenates a compressed ulint to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ulint_compressed(
-/*===========================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint val) /*!< in: value to write */
-{
- byte* log_ptr;
-
- log_ptr = mlog_open(mtr, 10);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr += mach_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/********************************************************//**
-Catenates a compressed 64-bit integer to mlog. */
-UNIV_INLINE
-void
-mlog_catenate_ull_compressed(
-/*=========================*/
- mtr_t* mtr, /*!< in: mtr */
- ib_uint64_t val) /*!< in: value to write */
-{
- byte* log_ptr;
-
- log_ptr = mlog_open(mtr, 15);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr += mach_ull_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
-}
-
-/********************************************************//**
-Writes the initial part of a log record (3..11 bytes).
-If the implementation of this function is changed, all
-size parameters to mlog_open() should be adjusted accordingly!
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_fast(
-/*===============================*/
- const byte* ptr, /*!< in: pointer to (inside) a buffer
- frame holding the file page where
- modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
- byte* log_ptr,/*!< in: pointer to mtr log which has
- been opened */
- mtr_t* mtr) /*!< in: mtr */
-{
-#ifdef UNIV_DEBUG
- buf_block_t* block;
-#endif
- const byte* page;
- ulint space;
- ulint offset;
-
- ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
- ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
- ut_ad(ptr && log_ptr);
-
- page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
- space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
- offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
-
- /* check whether the page is in the doublewrite buffer;
- the doublewrite buffer is located in pages
- FSP_EXTENT_SIZE, ..., 3 * FSP_EXTENT_SIZE - 1 in the
- system tablespace */
- if (space == TRX_SYS_SPACE
- && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
- if (buf_dblwr_being_created) {
- /* Do nothing: we only come to this branch in an
- InnoDB database creation. We do not redo log
- anything for the doublewrite buffer pages. */
- return(log_ptr);
- } else {
- fprintf(stderr,
- "Error: trying to redo log a record of type "
- "%d on page %lu of space %lu in the "
- "doublewrite buffer, continuing anyway.\n"
- "Please post a bug report to "
- "bugs.mysql.com.\n",
- type, offset, space);
- ut_ad(0);
- }
- }
-
- mach_write_to_1(log_ptr, type);
- log_ptr++;
- log_ptr += mach_write_compressed(log_ptr, space);
- log_ptr += mach_write_compressed(log_ptr, offset);
-
- mtr->n_log_recs++;
-
-#ifdef UNIV_LOG_DEBUG
- fprintf(stderr,
- "Adding to mtr log record type %lu space %lu page no %lu\n",
- (ulong) type, space, offset);
-#endif
-
-#ifdef UNIV_DEBUG
- /* We now assume that all x-latched pages have been modified! */
- block = (buf_block_t*) buf_block_align(ptr);
-
- if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) {
-
- mtr_memo_push(mtr, block, MTR_MEMO_MODIFY);
- }
-#endif
- return(log_ptr);
-}
-
-/********************************************************//**
-Writes a log record about an .ibd file create/delete/rename.
-@return new value of log_ptr */
-UNIV_INLINE
-byte*
-mlog_write_initial_log_record_for_file_op(
-/*======================================*/
- ulint type, /*!< in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or
- MLOG_FILE_RENAME */
- ulint space_id,/*!< in: space id, if applicable */
- ulint page_no,/*!< in: page number (not relevant currently) */
- byte* log_ptr,/*!< in: pointer to mtr log which has been opened */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(log_ptr);
-
- mach_write_to_1(log_ptr, type);
- log_ptr++;
-
- /* We write dummy space id and page number */
- log_ptr += mach_write_compressed(log_ptr, space_id);
- log_ptr += mach_write_compressed(log_ptr, page_no);
-
- mtr->n_log_recs++;
-
- return(log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/mtr0mtr.h b/storage/xtradb/include/mtr0mtr.h
deleted file mode 100644
index ef6cd61719d..00000000000
--- a/storage/xtradb/include/mtr0mtr.h
+++ /dev/null
@@ -1,453 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0mtr.h
-Mini-transaction buffer
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0mtr_h
-#define mtr0mtr_h
-
-#include "univ.i"
-#include "mem0mem.h"
-#include "dyn0dyn.h"
-#include "buf0types.h"
-#include "sync0rw.h"
-#include "ut0byte.h"
-#include "mtr0types.h"
-#include "page0types.h"
-#include "trx0types.h"
-
-/* Logging modes for a mini-transaction */
-#define MTR_LOG_ALL 21 /* default mode: log all operations
- modifying disk-based data */
-#define MTR_LOG_NONE 22 /* log no operations */
-#define MTR_LOG_NO_REDO 23 /* Don't generate REDO */
-/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying
- file space page allocation data
- (operations in fsp0fsp.* ) */
-#define MTR_LOG_SHORT_INSERTS 24 /* inserts are logged in a shorter
- form */
-
-/* Types for the mlock objects to store in the mtr memo; NOTE that the
-first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
-#define MTR_MEMO_PAGE_S_FIX RW_S_LATCH
-#define MTR_MEMO_PAGE_X_FIX RW_X_LATCH
-#define MTR_MEMO_BUF_FIX RW_NO_LATCH
-#ifdef UNIV_DEBUG
-# define MTR_MEMO_MODIFY 54
-#endif /* UNIV_DEBUG */
-#define MTR_MEMO_S_LOCK 55
-#define MTR_MEMO_X_LOCK 56
-
-/** @name Log item types
-The log items are declared 'byte' so that the compiler can warn if val
-and type parameters are switched in a call to mlog_write_ulint. NOTE!
-For 1 - 8 bytes, the flag value must give the length also! @{ */
-#define MLOG_SINGLE_REC_FLAG 128 /*!< if the mtr contains only
- one log record for one page,
- i.e., write_initial_log_record
- has been called only once,
- this flag is ORed to the type
- of that first log record */
-#define MLOG_1BYTE (1) /*!< one byte is written */
-#define MLOG_2BYTES (2) /*!< 2 bytes ... */
-#define MLOG_4BYTES (4) /*!< 4 bytes ... */
-#define MLOG_8BYTES (8) /*!< 8 bytes ... */
-#define MLOG_REC_INSERT ((byte)9) /*!< record insert */
-#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /*!< mark clustered index record
- deleted */
-#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /*!< mark secondary index record
- deleted */
-#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /*!< update of a record,
- preserves record field sizes */
-#define MLOG_REC_DELETE ((byte)14) /*!< delete a record from a
- page */
-#define MLOG_LIST_END_DELETE ((byte)15) /*!< delete record list end on
- index page */
-#define MLOG_LIST_START_DELETE ((byte)16) /*!< delete record list start on
- index page */
-#define MLOG_LIST_END_COPY_CREATED ((byte)17) /*!< copy record list end to a
- new created index page */
-#define MLOG_PAGE_REORGANIZE ((byte)18) /*!< reorganize an
- index page in
- ROW_FORMAT=REDUNDANT */
-#define MLOG_PAGE_CREATE ((byte)19) /*!< create an index page */
-#define MLOG_UNDO_INSERT ((byte)20) /*!< insert entry in an undo
- log */
-#define MLOG_UNDO_ERASE_END ((byte)21) /*!< erase an undo log
- page end */
-#define MLOG_UNDO_INIT ((byte)22) /*!< initialize a page in an
- undo log */
-#define MLOG_UNDO_HDR_DISCARD ((byte)23) /*!< discard an update undo log
- header */
-#define MLOG_UNDO_HDR_REUSE ((byte)24) /*!< reuse an insert undo log
- header */
-#define MLOG_UNDO_HDR_CREATE ((byte)25) /*!< create an undo
- log header */
-#define MLOG_REC_MIN_MARK ((byte)26) /*!< mark an index
- record as the
- predefined minimum
- record */
-#define MLOG_IBUF_BITMAP_INIT ((byte)27) /*!< initialize an
- ibuf bitmap page */
-/*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */
-#ifdef UNIV_LOG_LSN_DEBUG
-# define MLOG_LSN ((byte)28) /* current LSN */
-#endif
-#define MLOG_INIT_FILE_PAGE ((byte)29) /*!< this means that a
- file page is taken
- into use and the prior
- contents of the page
- should be ignored: in
- recovery we must not
- trust the lsn values
- stored to the file
- page */
-#define MLOG_WRITE_STRING ((byte)30) /*!< write a string to
- a page */
-#define MLOG_MULTI_REC_END ((byte)31) /*!< if a single mtr writes
- several log records,
- this log record ends the
- sequence of these records */
-#define MLOG_DUMMY_RECORD ((byte)32) /*!< dummy log record used to
- pad a log block full */
-#define MLOG_FILE_CREATE ((byte)33) /*!< log record about an .ibd
- file creation */
-#define MLOG_FILE_RENAME ((byte)34) /*!< log record about an .ibd
- file rename */
-#define MLOG_FILE_DELETE ((byte)35) /*!< log record about an .ibd
- file deletion */
-#define MLOG_COMP_REC_MIN_MARK ((byte)36) /*!< mark a compact
- index record as the
- predefined minimum
- record */
-#define MLOG_COMP_PAGE_CREATE ((byte)37) /*!< create a compact
- index page */
-#define MLOG_COMP_REC_INSERT ((byte)38) /*!< compact record insert */
-#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
- /*!< mark compact
- clustered index record
- deleted */
-#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/*!< mark compact
- secondary index record
- deleted; this log
- record type is
- redundant, as
- MLOG_REC_SEC_DELETE_MARK
- is independent of the
- record format. */
-#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/*!< update of a
- compact record,
- preserves record field
- sizes */
-#define MLOG_COMP_REC_DELETE ((byte)42) /*!< delete a compact record
- from a page */
-#define MLOG_COMP_LIST_END_DELETE ((byte)43) /*!< delete compact record list
- end on index page */
-#define MLOG_COMP_LIST_START_DELETE ((byte)44) /*!< delete compact record list
- start on index page */
-#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
- /*!< copy compact
- record list end to a
- new created index
- page */
-#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /*!< reorganize an index page */
-#define MLOG_FILE_CREATE2 ((byte)47) /*!< log record about creating
- an .ibd file, with format */
-#define MLOG_ZIP_WRITE_NODE_PTR ((byte)48) /*!< write the node pointer of
- a record on a compressed
- non-leaf B-tree page */
-#define MLOG_ZIP_WRITE_BLOB_PTR ((byte)49) /*!< write the BLOB pointer
- of an externally stored column
- on a compressed page */
-#define MLOG_ZIP_WRITE_HEADER ((byte)50) /*!< write to compressed page
- header */
-#define MLOG_ZIP_PAGE_COMPRESS ((byte)51) /*!< compress an index page */
-#define MLOG_ZIP_PAGE_COMPRESS_NO_DATA ((byte)52)/*!< compress an index page
- without logging it's image */
-#define MLOG_ZIP_PAGE_REORGANIZE ((byte)53) /*!< reorganize a compressed
- page */
-#define MLOG_BIGGEST_TYPE ((byte)53) /*!< biggest value (used in
- assertions) */
-
-#define MLOG_FILE_WRITE_CRYPT_DATA ((byte)100) /*!< log record for
- writing/updating crypt data of
- a tablespace */
-
-#define EXTRA_CHECK_MLOG_NUMBER(x) \
- ((x) == MLOG_FILE_WRITE_CRYPT_DATA)
-
-/* @} */
-
-/** @name Flags for MLOG_FILE operations
-(stored in the page number parameter, called log_flags in the
-functions). The page number parameter was originally written as 0. @{ */
-#define MLOG_FILE_FLAG_TEMP 1 /*!< identifies TEMPORARY TABLE in
- MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */
-/* @} */
-
-/* included here because it needs MLOG_LSN defined */
-#include "log0log.h"
-
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start_trx(
-/*======*/
- mtr_t* mtr, /*!< out: mini-transaction */
- trx_t* trx) /*!< in: transaction */
- __attribute__((nonnull (1)));
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start(
-/*======*/
- mtr_t* mtr) /*!< out: mini-transaction */
-{
- mtr_start_trx(mtr, NULL);
-}
- MY_ATTRIBUTE((nonnull))
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
-void
-mtr_commit(
-/*=======*/
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
-@return savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
- mtr_t* mtr); /*!< in: mtr */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- prio_rw_lock_t* lock); /*!< in: latch to release */
-#else /* !UNIV_HOTBACKUP */
-# define mtr_release_s_latch_at_savepoint(mtr,savepoint,lock) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Releases a buf_page stored in an mtr memo after a
-savepoint. */
-UNIV_INTERN
-void
-mtr_release_buf_page_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- buf_block_t* block); /*!< in: block to release */
-
-/***************************************************************//**
-Gets the logging mode of a mini-transaction.
-@return logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Changes the logging mode of a mini-transaction.
-@return old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
- mtr_t* mtr, /*!< in: mtr */
- ulint mode); /*!< in: logging mode: MTR_LOG_NONE, ... */
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-This macro locks an rw-lock in s-mode. */
-#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\
- (MTR))
-/*********************************************************************//**
-This macro locks an rw-lock in x-mode. */
-#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\
- (MTR))
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
- prio_rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************************//**
-NOTE! Use the macro above!
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
- prio_rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************//**
-Releases an object in the memo stack.
-@return true if released */
-UNIV_INTERN
-bool
-mtr_memo_release(
-/*=============*/
- mtr_t* mtr, /*!< in/out: mini-transaction */
- void* object, /*!< in: object */
- ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
- MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given item.
-@return TRUE if contains */
-UNIV_INLINE
-bool
-mtr_memo_contains(
-/*==============*/
- mtr_t* mtr, /*!< in: mtr */
- const void* object, /*!< in: object to search */
- ulint type) /*!< in: type of object */
- MY_ATTRIBUTE((warn_unused_result));
-
-/**********************************************************//**
-Checks if memo contains the given page.
-@return TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* ptr, /*!< in: pointer to buffer frame */
- ulint type); /*!< in: type of object */
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
-void
-mtr_print(
-/*======*/
- mtr_t* mtr); /*!< in: mtr */
-# else /* !UNIV_HOTBACKUP */
-# define mtr_memo_contains(mtr, object, type) TRUE
-# define mtr_memo_contains_page(mtr, ptr, type) TRUE
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
-/*######################################################################*/
-
-#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */
-
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
-@return log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
- mtr_t* mtr); /*!< in: mini-transaction */
-/***************************************************//**
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
-void
-mtr_memo_push(
-/*==========*/
- mtr_t* mtr, /*!< in: mtr */
- void* object, /*!< in: object */
- ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-
-/** Mini-transaction memo stack slot. */
-struct mtr_memo_slot_t{
- ulint type; /*!< type of the stored object (MTR_MEMO_S_LOCK, ...) */
- void* object; /*!< pointer to the object */
-};
-
-/* Mini-transaction handle and buffer */
-struct mtr_t{
-#ifdef UNIV_DEBUG
- ulint state; /*!< MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */
-#endif
- dyn_array_t memo; /*!< memo stack for locks etc. */
- dyn_array_t log; /*!< mini-transaction log */
- unsigned inside_ibuf:1;
- /*!< TRUE if inside ibuf changes */
- unsigned modifications:1;
- /*!< TRUE if the mini-transaction
- modified buffer pool pages */
- unsigned made_dirty:1;
- /*!< TRUE if mtr has made at least
- one buffer pool page dirty */
- ulint n_log_recs;
- /* count of how many page initial log records
- have been written to the mtr log */
- ulint n_freed_pages;
- /* number of pages that have been freed in
- this mini-transaction */
- ulint log_mode; /* specifies which operations should be
- logged; default value MTR_LOG_ALL */
- lsn_t start_lsn;/* start lsn of the possible log entry for
- this mtr */
- lsn_t end_lsn;/* end lsn of the possible log entry for
- this mtr */
-#ifdef UNIV_DEBUG
- ulint magic_n;
-#endif /* UNIV_DEBUG */
- trx_t* trx; /*!< transaction */
-};
-
-#ifdef UNIV_DEBUG
-# define MTR_MAGIC_N 54551
-#endif /* UNIV_DEBUG */
-
-#define MTR_ACTIVE 12231
-#define MTR_COMMITTING 56456
-#define MTR_COMMITTED 34676
-
-#ifndef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/mtr0mtr.ic b/storage/xtradb/include/mtr0mtr.ic
deleted file mode 100644
index a6d9df09925..00000000000
--- a/storage/xtradb/include/mtr0mtr.ic
+++ /dev/null
@@ -1,298 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0mtr.ic
-Mini-transaction buffer
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-# include "sync0sync.h"
-# include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "mach0data.h"
-
-/***************************************************//**
-Checks if a mini-transaction is dirtying a clean page.
-@return TRUE if the mtr is dirtying a clean page. */
-UNIV_INTERN
-ibool
-mtr_block_dirtied(
-/*==============*/
- const buf_block_t* block) /*!< in: block being x-fixed */
- MY_ATTRIBUTE((nonnull,warn_unused_result));
-
-/***************************************************************//**
-Starts a mini-transaction. */
-UNIV_INLINE
-void
-mtr_start_trx(
-/*======*/
- mtr_t* mtr, /*!< out: mini-transaction */
- trx_t* trx) /*!< in: transaction */
-{
- UNIV_MEM_INVALID(mtr, sizeof *mtr);
-
- dyn_array_create(&(mtr->memo));
- dyn_array_create(&(mtr->log));
-
- mtr->log_mode = MTR_LOG_ALL;
- mtr->inside_ibuf = FALSE;
- mtr->modifications = FALSE;
- mtr->made_dirty = FALSE;
- mtr->n_log_recs = 0;
- mtr->n_freed_pages = 0;
- mtr->trx = trx;
-
- ut_d(mtr->state = MTR_ACTIVE);
- ut_d(mtr->magic_n = MTR_MAGIC_N);
-}
-
-/***************************************************//**
-Pushes an object to an mtr memo stack. */
-UNIV_INLINE
-void
-mtr_memo_push(
-/*==========*/
- mtr_t* mtr, /*!< in: mtr */
- void* object, /*!< in: object */
- ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-{
- dyn_array_t* memo;
- mtr_memo_slot_t* slot;
-
- ut_ad(object);
- ut_ad(type >= MTR_MEMO_PAGE_S_FIX);
- ut_ad(type <= MTR_MEMO_X_LOCK);
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- /* If this mtr has x-fixed a clean page then we set
- the made_dirty flag. This tells us if we need to
- grab log_flush_order_mutex at mtr_commit so that we
- can insert the dirtied page to the flush list. */
- if (type == MTR_MEMO_PAGE_X_FIX && !mtr->made_dirty) {
- mtr->made_dirty =
- mtr_block_dirtied((const buf_block_t*) object);
- }
-
- memo = &(mtr->memo);
-
- slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot);
-
- slot->object = object;
- slot->type = type;
-}
-
-/**********************************************************//**
-Sets and returns a savepoint in mtr.
-@return savepoint */
-UNIV_INLINE
-ulint
-mtr_set_savepoint(
-/*==============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- dyn_array_t* memo;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- return(dyn_array_get_data_size(memo));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Releases the (index tree) s-latch stored in an mtr memo after a
-savepoint. */
-UNIV_INLINE
-void
-mtr_release_s_latch_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- prio_rw_lock_t* lock) /*!< in: latch to release */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- ut_ad(dyn_array_get_data_size(memo) > savepoint);
-
- slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
-
- ut_ad(slot->object == lock);
- ut_ad(slot->type == MTR_MEMO_S_LOCK);
-
- rw_lock_s_unlock(lock);
-
- slot->object = NULL;
-}
-
-# ifdef UNIV_DEBUG
-/**********************************************************//**
-Checks if memo contains the given item.
-@return TRUE if contains */
-UNIV_INLINE
-bool
-mtr_memo_contains(
-/*==============*/
- mtr_t* mtr, /*!< in: mtr */
- const void* object, /*!< in: object to search */
- ulint type) /*!< in: type of object */
-{
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING);
-
- for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
- block;
- block = dyn_array_get_prev_block(&mtr->memo, block)) {
- const mtr_memo_slot_t* start
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block));
- mtr_memo_slot_t* slot
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block)
- + dyn_block_get_used(block));
-
- ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
-
- while (slot-- != start) {
- if (object == slot->object && type == slot->type) {
- return(true);
- }
- }
- }
-
- return(false);
-}
-# endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Returns the log object of a mini-transaction buffer.
-@return log */
-UNIV_INLINE
-dyn_array_t*
-mtr_get_log(
-/*========*/
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- return(&(mtr->log));
-}
-
-/***************************************************************//**
-Gets the logging mode of a mini-transaction.
-@return logging mode: MTR_LOG_NONE, ... */
-UNIV_INLINE
-ulint
-mtr_get_log_mode(
-/*=============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr);
- ut_ad(mtr->log_mode >= MTR_LOG_ALL);
- ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS);
-
- return(mtr->log_mode);
-}
-
-/***************************************************************//**
-Changes the logging mode of a mini-transaction.
-@return old mode */
-UNIV_INLINE
-ulint
-mtr_set_log_mode(
-/*=============*/
- mtr_t* mtr, /*!< in: mtr */
- ulint mode) /*!< in: logging mode: MTR_LOG_NONE, ... */
-{
- ulint old_mode;
-
- ut_ad(mtr);
- ut_ad(mode >= MTR_LOG_ALL);
- ut_ad(mode <= MTR_LOG_SHORT_INSERTS);
-
- old_mode = mtr->log_mode;
-
- if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) {
- /* Do nothing */
- } else {
- mtr->log_mode = mode;
- }
-
- ut_ad(old_mode >= MTR_LOG_ALL);
- ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS);
-
- return(old_mode);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Locks a lock in s-mode. */
-UNIV_INLINE
-void
-mtr_s_lock_func(
-/*============*/
- prio_rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr);
- ut_ad(lock);
-
- rw_lock_s_lock_inline(lock, 0, file, line);
-
- mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK);
-}
-
-/*********************************************************************//**
-Locks a lock in x-mode. */
-UNIV_INLINE
-void
-mtr_x_lock_func(
-/*============*/
- prio_rw_lock_t* lock, /*!< in: rw-lock */
- const char* file, /*!< in: file name */
- ulint line, /*!< in: line number */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(mtr);
- ut_ad(lock);
-
- rw_lock_x_lock_inline(lock, 0, file, line);
-
- mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/mtr0types.h b/storage/xtradb/include/mtr0types.h
deleted file mode 100644
index 43368c0b726..00000000000
--- a/storage/xtradb/include/mtr0types.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mtr0types.h
-Mini-transaction buffer global types
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef mtr0types_h
-#define mtr0types_h
-
-struct mtr_t;
-
-#endif
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
deleted file mode 100644
index b17e09cf0fa..00000000000
--- a/storage/xtradb/include/os0file.h
+++ /dev/null
@@ -1,1565 +0,0 @@
-/***********************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-***********************************************************************/
-
-/**************************************************//**
-@file include/os0file.h
-The interface to the operating system file io
-
-Created 10/21/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0file_h
-#define os0file_h
-
-#include "univ.i"
-#include "trx0types.h"
-
-#ifndef __WIN__
-#include <dirent.h>
-#include <sys/stat.h>
-#include <time.h>
-#endif
-
-/** File node of a tablespace or the log data space */
-struct fil_node_t;
-
-extern ibool os_has_said_disk_full;
-/** Flag: enable debug printout for asynchronous i/o */
-extern ibool os_aio_print_debug;
-
-#ifdef __WIN__
-
-/** We define always WIN_ASYNC_IO, and check at run-time whether
- the OS actually supports it: Win 95 does not, NT does. */
-#define WIN_ASYNC_IO
-
-/** Use unbuffered I/O */
-#define UNIV_NON_BUFFERED_IO
-
-#endif
-
-/** File offset in bytes */
-typedef ib_uint64_t os_offset_t;
-#ifdef _WIN32
-# define SRV_PATH_SEPARATOR '\\'
-/** File handle */
-typedef HANDLE os_file_t;
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-# define OS_FILE_FROM_FD(fd) reinterpret_cast<HANDLE>(_get_osfhandle(fd))
-#else
-# define SRV_PATH_SEPARATOR '/'
-/** File handle */
-typedef int os_file_t;
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-# define OS_FILE_FROM_FD(fd) fd
-#endif
-
-/** File descriptor with optional PERFORMANCE_SCHEMA instrumentation */
-struct pfs_os_file_t
-{
- /** Default constructor */
- pfs_os_file_t() : m_file(
-#ifdef _WIN32
- INVALID_HANDLE_VALUE
-#else
- -1
-#endif
- )
-#ifdef UNIV_PFS_IO
- , m_psi(NULL)
-#endif
- {}
-
- /** The wrapped file handle */
- os_file_t m_file;
-#ifdef UNIV_PFS_IO
- /** PERFORMANCE_SCHEMA descriptor */
- struct PSI_file *m_psi;
-#endif
- /** Implicit type conversion.
- @return the wrapped file handle */
- operator os_file_t() const { return m_file; }
- /** Assignment operator.
- @param[in] file file handle to be assigned */
- void operator=(os_file_t file) { m_file = file; }
-};
-
-/** Umask for creating files */
-extern ulint os_innodb_umask;
-
-/** The next value should be smaller or equal to the smallest sector size used
-on any disk. A log block is required to be a portion of disk which is written
-so that if the start and the end of a block get written to disk, then the
-whole block gets written. This should be true even in most cases of a crash:
-if this fails for a log block, then it is equivalent to a media failure in the
-log. */
-
-#define OS_FILE_LOG_BLOCK_SIZE srv_log_block_size
-
-/** Options for os_file_create_func @{ */
-enum os_file_create_t {
- OS_FILE_OPEN = 51, /*!< to open an existing file (if
- doesn't exist, error) */
- OS_FILE_CREATE, /*!< to create new file (if
- exists, error) */
- OS_FILE_OVERWRITE, /*!< to create a new file, if exists
- the overwrite old file */
- OS_FILE_OPEN_RAW, /*!< to open a raw device or disk
- partition */
- OS_FILE_CREATE_PATH, /*!< to create the directories */
- OS_FILE_OPEN_RETRY, /*!< open with retry */
-
- /** Flags that can be combined with the above values. Please ensure
- that the above values stay below 128. */
-
- OS_FILE_ON_ERROR_NO_EXIT = 128, /*!< do not exit on unknown errors */
- OS_FILE_ON_ERROR_SILENT = 256 /*!< don't print diagnostic messages to
- the log unless it is a fatal error,
- this flag is only used if
- ON_ERROR_NO_EXIT is set */
-};
-
-/** Options for os_file_advise_func @{ */
-enum os_file_advise_t {
- OS_FILE_ADVISE_NORMAL = 1, /*!< no advice on access pattern
- (default) */
- OS_FILE_ADVISE_RANDOM = 2, /*!< access in random order */
- OS_FILE_ADVISE_SEQUENTIAL = 4, /*!< access the specified data
- sequentially (with lower offsets read
- before higher ones) */
- OS_FILE_ADVISE_WILLNEED = 8, /*!< specified data will be accessed
- in the near future */
- OS_FILE_ADVISE_DONTNEED = 16, /*!< specified data will not be
- accessed in the near future */
- OS_FILE_ADVISE_NOREUSE = 32 /*!< access only once */
-};
-
-#define OS_FILE_READ_ONLY 333
-#define OS_FILE_READ_WRITE 444
-#define OS_FILE_READ_ALLOW_DELETE 555 /* for mysqlbackup */
-#define OS_FILE_READ_WRITE_CACHED 666 /* OS_FILE_READ_WRITE but never
- O_DIRECT. Only for
- os_file_create_simple_no_error_handling
- currently. */
-
-/* Options for file_create */
-#define OS_FILE_AIO 61
-#define OS_FILE_NORMAL 62
-/* @} */
-
-/** Types for file create @{ */
-#define OS_DATA_FILE 100
-#define OS_LOG_FILE 101
-/* @} */
-
-/** Error codes from os_file_get_last_error @{ */
-#define OS_FILE_NAME_TOO_LONG 36
-#define OS_FILE_NOT_FOUND 71
-#define OS_FILE_DISK_FULL 72
-#define OS_FILE_ALREADY_EXISTS 73
-#define OS_FILE_PATH_ERROR 74
-#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources
- to become available again */
-#define OS_FILE_SHARING_VIOLATION 76
-#define OS_FILE_ERROR_NOT_SPECIFIED 77
-#define OS_FILE_INSUFFICIENT_RESOURCE 78
-#define OS_FILE_AIO_INTERRUPTED 79
-#define OS_FILE_OPERATION_ABORTED 80
-#define OS_FILE_ACCESS_VIOLATION 81
-#define OS_FILE_OPERATION_NOT_SUPPORTED 125
-#define OS_FILE_ERROR_MAX 200
-/* @} */
-
-/** Types for aio operations @{ */
-#define OS_FILE_READ 10
-#define OS_FILE_WRITE 11
-
-#define OS_FILE_LOG 256 /* This can be ORed to type */
-/* @} */
-
-#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /*!< Win NT does not allow more
- than 64 */
-
-/** Modes for aio operations @{ */
-#define OS_AIO_NORMAL 21 /*!< Normal asynchronous i/o not for ibuf
- pages or ibuf bitmap pages */
-#define OS_AIO_IBUF 22 /*!< Asynchronous i/o for ibuf pages or ibuf
- bitmap pages */
-#define OS_AIO_LOG 23 /*!< Asynchronous i/o for the log */
-#define OS_AIO_SYNC 24 /*!< Asynchronous i/o where the calling thread
- will itself wait for the i/o to complete,
- doing also the job of the i/o-handler thread;
- can be used for any pages, ibuf or non-ibuf.
- This is used to save CPU time, as we can do
- with fewer thread switches. Plain synchronous
- i/o is not as good, because it must serialize
- the file seek and read or write, causing a
- bottleneck for parallelism. */
-
-#define OS_AIO_SIMULATED_WAKE_LATER 512 /*!< This can be ORed to mode
- in the call of os_aio(...),
- if the caller wants to post several i/o
- requests in a batch, and only after that
- wake the i/o-handler thread; this has
- effect only in simulated aio */
-/* @} */
-
-#define OS_WIN31 1 /*!< Microsoft Windows 3.x */
-#define OS_WIN95 2 /*!< Microsoft Windows 95 */
-#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
-#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
-#define OS_WINXP 5 /*!< Microsoft Windows XP
- or Windows Server 2003 */
-#define OS_WINVISTA 6 /*!< Microsoft Windows Vista
- or Windows Server 2008 */
-#define OS_WIN7 7 /*!< Microsoft Windows 7
- or Windows Server 2008 R2 */
-
-
-extern ulint os_n_file_reads;
-extern ulint os_n_file_writes;
-extern ulint os_n_fsyncs;
-
-#define OS_MIN_LOG_BLOCK_SIZE 512
-
-extern ulint srv_log_block_size;
-
-#ifdef UNIV_PFS_IO
-/* Keys to register InnoDB I/O with performance schema */
-extern mysql_pfs_key_t innodb_file_data_key;
-extern mysql_pfs_key_t innodb_file_log_key;
-extern mysql_pfs_key_t innodb_file_temp_key;
-extern mysql_pfs_key_t innodb_file_bmp_key;
-
-/* Following four macros are instumentations to register
-various file I/O operations with performance schema.
-1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
-used to register file creation, opening, closing and renaming.
-2) register_pfs_file_rename_begin() and register_pfs_file_rename_end()
-are used to register file renaming
-2) register_pfs_file_io_begin() and register_pfs_file_io_end() are
-used to register actual file read, write and flush
-3) register_pfs_file_close_begin() and register_pfs_file_close_end()
-are used to register file deletion operations*/
-# define register_pfs_file_open_begin(state, locker, key, op, name, \
- src_file, src_line) \
-do { \
- locker = PSI_FILE_CALL(get_thread_file_name_locker)( \
- state, key, op, name, &locker); \
- if (locker != NULL) { \
- PSI_FILE_CALL(start_file_open_wait)( \
- locker, src_file, src_line); \
- } \
-} while (0)
-
-# define register_pfs_file_open_end(locker, file, result) \
-do { \
- if (locker != NULL) { \
- file.m_psi = PSI_FILE_CALL( \
- end_file_open_wait)( \
- locker, result); \
- } \
-} while (0)
-
-# define register_pfs_file_rename_begin(state, locker, key, op, name, \
- src_file, src_line) \
- register_pfs_file_open_begin(state, locker, key, op, name, \
- src_file, src_line) \
-
-# define register_pfs_file_rename_end(locker, result) \
-do { \
- if (locker != NULL) { \
- PSI_FILE_CALL(end_file_open_wait)(locker, result); \
- } \
-} while (0)
-
-# define register_pfs_file_close_begin(state, locker, key, op, name, \
- src_file, src_line) \
-do { \
- locker = PSI_FILE_CALL(get_thread_file_name_locker)( \
- state, key, op, name, &locker); \
- if (UNIV_LIKELY(locker != NULL)) { \
- PSI_FILE_CALL(start_file_close_wait)( \
- locker, src_file, src_line); \
- } \
-} while (0)
-
-# define register_pfs_file_close_end(locker, result) \
-do { \
- if (UNIV_LIKELY(locker != NULL)) { \
- PSI_FILE_CALL(end_file_close_wait)( \
- locker, result); \
- } \
-} while (0)
-
-# define register_pfs_file_io_begin(state, locker, file, count, op, \
- src_file, src_line) \
-do { \
- locker = PSI_FILE_CALL(get_thread_file_stream_locker)( \
- state, file.m_psi, op); \
- if (locker != NULL) { \
- PSI_FILE_CALL(start_file_wait)( \
- locker, count, src_file, src_line); \
- } \
-} while (0)
-
-# define register_pfs_file_io_end(locker, count) \
-do { \
- if (locker != NULL) { \
- PSI_FILE_CALL(end_file_wait)(locker, count); \
- } \
-} while (0)
-#endif /* UNIV_PFS_IO */
-
-/* Following macros/functions are file I/O APIs that would be performance
-schema instrumented if "UNIV_PFS_IO" is defined. They would point to
-wrapper functions with performance schema instrumentation in such case.
-
-os_file_create
-os_file_create_simple
-os_file_create_simple_no_error_handling
-os_file_close
-os_file_close_no_error_handling
-os_file_rename
-os_aio
-os_file_read
-os_file_read_no_error_handling
-os_file_read_no_error_handling_int_fd
-os_file_write
-os_file_write_int_fd
-os_file_set_eof_at
-os_file_allocate
-
-The wrapper functions have the prefix of "innodb_". */
-
-#ifdef UNIV_PFS_IO
-# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \
- pfs_os_file_create_func(key, name, create, purpose, type, \
- success, atomic_writes, __FILE__, __LINE__)
-
-# define os_file_create_simple(key, name, create, access, success) \
- pfs_os_file_create_simple_func(key, name, create, access, \
- success, __FILE__, __LINE__)
-
-# define os_file_create_simple_no_error_handling( \
- key, name, create_mode, access, success, atomic_writes) \
- pfs_os_file_create_simple_no_error_handling_func( \
- key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
-
-# define os_file_close(file) \
- pfs_os_file_close_func(file, __FILE__, __LINE__)
-
-# define os_file_close_no_error_handling(file) \
- pfs_os_file_close_no_error_handling_func(file, __FILE__, __LINE__)
-
-# define os_aio(type, is_log, mode, name, file, buf, offset, \
- n, page_size, message1, message2, space_id, \
- trx, write_size) \
- pfs_os_aio_func(type, is_log, mode, name, file, buf, offset, \
- n, page_size, message1, message2, space_id, trx, write_size, \
- __FILE__, __LINE__)
-
-# define os_file_read(file, buf, offset, n) \
- pfs_os_file_read_func(file, buf, offset, n, NULL, \
- __FILE__, __LINE__)
-
-# define os_file_read_trx(file, buf, offset, n, trx) \
- pfs_os_file_read_func(file, buf, offset, n, trx, \
- __FILE__, __LINE__)
-
-# define os_file_read_no_error_handling(file, buf, offset, n) \
- pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \
- __FILE__, __LINE__)
-
-# define os_file_read_no_error_handling_int_fd( \
- file, buf, offset, n) \
- pfs_os_file_read_no_error_handling_int_fd_func( \
- file, buf, offset, n, __FILE__, __LINE__)
-
-# define os_file_write(name, file, buf, offset, n) \
- pfs_os_file_write_func(name, file, buf, offset, \
- n, __FILE__, __LINE__)
-
-# define os_file_write_int_fd(name, file, buf, offset, n) \
- pfs_os_file_write_int_fd_func(name, file, buf, offset, \
- n, __FILE__, __LINE__)
-
-# define os_file_flush(file) \
- pfs_os_file_flush_func(file, __FILE__, __LINE__)
-
-# define os_file_rename(key, oldpath, newpath) \
- pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
-
-# define os_file_delete(key, name) \
- pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
-
-# define os_file_delete_if_exists(key, name) \
- pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__)
-
-# define os_file_set_eof_at(file, new_len) \
- pfs_os_file_set_eof_at_func(file, new_len, __FILE__, __LINE__)
-
-# ifdef HAVE_POSIX_FALLOCATE
-# define os_file_allocate(file, offset, len) \
- pfs_os_file_allocate_func(file, offset, len, __FILE__, __LINE__)
-# endif
-
-#else /* UNIV_PFS_IO */
-
-/* If UNIV_PFS_IO is not defined, these I/O APIs point
-to original un-instrumented file I/O APIs */
-# define os_file_create(key, name, create, purpose, type, success, atomic_writes) \
- os_file_create_func(name, create, purpose, type, success, atomic_writes)
-
-# define os_file_create_simple(key, name, create_mode, access, success) \
- os_file_create_simple_func(name, create_mode, access, success)
-
-# define os_file_create_simple_no_error_handling( \
- key, name, create_mode, access, success, atomic_writes) \
- os_file_create_simple_no_error_handling_func( \
- name, create_mode, access, success, atomic_writes)
-
-# define os_file_close(file) \
- os_file_close_func(file)
-
-# define os_file_close_no_error_handling(file) \
- os_file_close_no_error_handling_func(file)
-
-# define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \
- message2, space_id, trx, write_size) \
- os_aio_func(type, is_log, mode, name, file, buf, offset, n, \
- page_size, message1, message2, space_id, trx, write_size)
-
-# define os_file_read(file, buf, offset, n) \
- os_file_read_func(file, buf, offset, n, NULL)
-
-# define os_file_read_trx(file, buf, offset, n, trx) \
- os_file_read_func(file, buf, offset, n, trx)
-
-# define os_file_read_no_error_handling(file, buf, offset, n) \
- os_file_read_no_error_handling_func(file, buf, offset, n)
-# define os_file_read_no_error_handling_int_fd( \
- file, buf, offset, n) \
- os_file_read_no_error_handling_func(file, buf, offset, n)
-
-# define os_file_write_int_fd(name, file, buf, offset, n) \
- os_file_write_func(name, file, buf, offset, n)
-# define os_file_write(name, file, buf, offset, n) \
- os_file_write_func(name, file, buf, offset, n)
-
-
-# define os_file_flush(file) os_file_flush_func(file)
-
-# define os_file_rename(key, oldpath, newpath) \
- os_file_rename_func(oldpath, newpath)
-
-# define os_file_delete(key, name) os_file_delete_func(name)
-
-# define os_file_delete_if_exists(key, name) \
- os_file_delete_if_exists_func(name)
-
-# define os_file_set_eof_at(file, new_len) \
- os_file_set_eof_at_func(file, new_len)
-
-#endif /* UNIV_PFS_IO */
-
-/* File types for directory entry data type */
-
-enum os_file_type_t {
- OS_FILE_TYPE_UNKNOWN = 0,
- OS_FILE_TYPE_FILE, /* regular file
- (or a character/block device) */
- OS_FILE_TYPE_DIR, /* directory */
- OS_FILE_TYPE_LINK /* symbolic link */
-};
-
-/* Maximum path string length in bytes when referring to tables with in the
-'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers
-of this size from the thread stack; that is why this should not be made much
-bigger than 4000 bytes */
-#define OS_FILE_MAX_PATH 4000
-
-/** Struct used in fetching information of a file in a directory */
-struct os_file_stat_t {
- char name[OS_FILE_MAX_PATH]; /*!< path to a file */
- os_file_type_t type; /*!< file type */
- ib_int64_t size; /*!< file size */
- time_t ctime; /*!< creation time */
- time_t mtime; /*!< modification time */
- time_t atime; /*!< access time */
- bool rw_perm; /*!< true if can be opened
- in read-write mode. Only valid
- if type == OS_FILE_TYPE_FILE */
-};
-
-#ifdef __WIN__
-typedef HANDLE os_file_dir_t; /*!< directory stream */
-#else
-typedef DIR* os_file_dir_t; /*!< directory stream */
-#endif
-
-#ifdef __WIN__
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
-OS_WIN7. */
-UNIV_INTERN
-ulint
-os_get_os_version(void);
-/*===================*/
-#endif /* __WIN__ */
-#ifndef UNIV_HOTBACKUP
-
-
-/** Create a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
-parameter (--tmpdir).
-@param[in] path location for creating temporary file
-@return temporary file handle, or NULL on error */
-UNIV_INTERN
-FILE*
-os_file_create_tmpfile(
- const char* path);
-
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************************//**
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing.
-@return directory stream, NULL if error */
-UNIV_INTERN
-os_file_dir_t
-os_file_opendir(
-/*============*/
- const char* dirname, /*!< in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal);/*!< in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
-/***********************************************************************//**
-Closes a directory stream.
-@return 0 if success, -1 if failure */
-UNIV_INTERN
-int
-os_file_closedir(
-/*=============*/
- os_file_dir_t dir); /*!< in: directory stream */
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory.
-@return 0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
-int
-os_file_readdir_next_file(
-/*======================*/
- const char* dirname,/*!< in: directory name or path */
- os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info); /*!< in/out: buffer where the info is returned */
-/*****************************************************************//**
-This function attempts to create a directory named pathname. The new directory
-gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
-directory exists already, nothing is done and the call succeeds, unless the
-fail_if_exists arguments is true.
-@return TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
-os_file_create_directory(
-/*=====================*/
- const char* pathname, /*!< in: directory name as
- null-terminated string */
- ibool fail_if_exists);/*!< in: if TRUE, pre-existing directory
- is treated as an error. */
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create_simple(), not directly
-this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_func(
-/*=======================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes);/*!< in: atomic writes table option
- value */
-/****************************************************************//**
-NOTE! Use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-pfs_os_file_t
-os_file_create_simple_no_error_handling_func(
-/*=========================================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE,
- OS_FILE_READ_ALLOW_DELETE (used by a backup
- program reading the file), or
- OS_FILE_READ_WRITE_CACHED (disable O_DIRECT
- if it would be enabled otherwise) */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes)/*!< in: atomic writes table option
- value */
- __attribute__((nonnull, warn_unused_result));
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor.
-@return true if operation is success and false otherwise */
-UNIV_INTERN
-bool
-os_file_set_nocache(
-/*================*/
- os_file_t fd, /*!< in: file descriptor to alter */
- const char* file_name, /*!< in: file name, used in the
- diagnostic message */
- const char* operation_name);/*!< in: "open" or "create"; used in the
- diagnostic message */
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create(), not directly
-this function!
-Opens an existing file or creates a new.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-pfs_os_file_t
-os_file_create_func(
-/*================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes) /*!< in: atomic writes table option
- value */
- __attribute__((nonnull, warn_unused_result));
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_func(
-/*================*/
- const char* name); /*!< in: file path as a null-terminated
- string */
-
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_if_exists_func(
-/*==========================*/
- const char* name); /*!< in: file path as a null-terminated
- string */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_rename(), not directly
-this function!
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_rename_func(
-/*================*/
- const char* oldpath, /*!< in: old file path as a
- null-terminated string */
- const char* newpath); /*!< in: new file path */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_close(), not directly this
-function!
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_func(
-/*===============*/
- os_file_t file); /*!< in, own: handle to a file */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_close(), not directly this
-function!
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_close_no_error_handling_func(
-/*===============*/
- os_file_t file); /*!< in, own: handle to a file */
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_set_eof_at(), not
-directly this function!
-Truncates a file at the specified position.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_set_eof_at_func(
- os_file_t file, /*!< in: handle to a file */
- ib_uint64_t new_len);/*!< in: new file length */
-
-#ifdef HAVE_POSIX_FALLOCATE
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_allocate(), not
-directly this function!
-Ensures that disk space is allocated for the file.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_allocate_func(
- os_file_t file, /*!< in, own: handle to a file */
- os_offset_t offset, /*!< in: file region offset */
- os_offset_t len); /*!< in: file region length */
-#endif
-
-#ifdef UNIV_PFS_IO
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create_simple(),
-not directly this function!
-A performance schema instrumented wrapper function for
-os_file_create_simple() which opens or creates a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-pfs_os_file_t
-pfs_os_file_create_simple_func(
-/*===========================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes,/*!< in: atomic writes table option
- value */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A performance schema instrumented wrapper function for
-os_file_create_simple_no_error_handling(). Add instrumentation to
-monitor file creation/open.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-pfs_os_file_t
-pfs_os_file_create_simple_no_error_handling_func(
-/*=============================================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode, /*!< in: file create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes,/*!< in: atomic writes table option
- value*/
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create(), not directly
-this function!
-A performance schema wrapper function for os_file_create().
-Add instrumentation to monitor file creation/open.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-pfs_os_file_t
-pfs_os_file_create_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: file create mode */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes,/*!< in: atomic writes table option
- value */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_close(), not directly
-this function!
-A performance schema instrumented wrapper function for os_file_close().
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_close_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_close_no_error_handling(),
-not directly this function!
-A performance schema instrumented wrapper function for
-os_file_close_no_error_handling().
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_close_no_error_handling_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_read() which requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_read_func(
-/*==================*/
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- trx_t* trx, /*!< in: trx */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
-not directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_read_no_error_handling_func() which requests a synchronous
-read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_read_no_error_handling_func(
-/*====================================*/
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_aio(), not directly this
-function!
-Performance schema wrapper function of os_aio() which requests
-an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_aio_func(
-/*============*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset where to read or write */
- ulint n, /*!< in: number of bytes to read or write */
- ulint page_size,/*!< in: page size in bytes */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- ulint space_id,
- trx_t* trx,
- ulint* write_size,/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_write(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_write() which requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_write_func(
-/*===================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n, /*!< in: number of bytes to write */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_flush(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_flush() which flushes the write buffers of a given file to the disk.
-Flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_flush_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_rename(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_rename()
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_rename_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* oldpath,/*!< in: old file path as a null-terminated
- string */
- const char* newpath,/*!< in: new file path */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_delete()
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_delete_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: old file path as a null-terminated
- string */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
-directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_delete_if_exists()
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_delete_if_exists_func(
-/*==============================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: old file path as a null-terminated
- string */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_set_eof_at(), not
-directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_set_eof_at()
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_set_eof_at_func(
- pfs_os_file_t file, /*!< in: handle to a file */
- ib_uint64_t new_len,/*!< in: new file length */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-
-#ifdef HAVE_POSIX_FALLOCATE
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_allocate(), not
-directly this function!
-Ensures that disk space is allocated for the file.
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_allocate_func(
- pfs_os_file_t file, /*!< in, own: handle to a file */
- os_offset_t offset, /*!< in: file region offset */
- os_offset_t len, /*!< in: file region length */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line);/*!< in: line where the func invoked */
-#endif
-
-#endif /* UNIV_PFS_IO */
-
-/***********************************************************************//**
-Checks if the file is marked as invalid.
-@return TRUE if invalid */
-UNIV_INTERN
-bool
-os_file_is_invalid(
- pfs_os_file_t file); /*!< in, own: handle to a file */
-
-/***********************************************************************//**
-Marks the file as invalid. */
-UNIV_INTERN
-void
-os_file_mark_invalid(
- pfs_os_file_t* file); /*!< out: pointer to a handle to a file */
-
-/***********************************************************************//**
-Announces an intention to access file data in a specific pattern in the
-future.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_advise(
- pfs_os_file_t file, /*!< in, own: handle to a file */
- os_offset_t offset, /*!< in: file region offset */
- os_offset_t len, /*!< in: file region length */
- ulint advice);/*!< in: advice for access pattern */
-
-/***********************************************************************//**
-Gets a file size.
-@return file size, or (os_offset_t) -1 on failure */
-UNIV_INTERN
-os_offset_t
-os_file_get_size(
-/*=============*/
- pfs_os_file_t file) /*!< in: handle to a file */
- MY_ATTRIBUTE((warn_unused_result));
-/** Set the size of a newly created file.
-@param[in] name file name
-@param[in] file file handle
-@param[in] size desired file size
-@param[in] sparse whether to create a sparse file (no preallocating)
-@return whether the operation succeeded */
-UNIV_INTERN
-bool
-os_file_set_size(
- const char* name,
- pfs_os_file_t file,
- os_offset_t size,
- bool is_sparse = false)
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************************//**
-Truncates a file at its current position.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_eof(
-/*============*/
- FILE* file); /*!< in: file to be truncated */
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_flush(), not directly this function!
-Flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_flush_func(
-/*===============*/
- os_file_t file); /*!< in, own: handle to a file */
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@return error number, or OS error number + 100 */
-UNIV_INTERN
-ulint
-os_file_get_last_error(
-/*===================*/
- bool report_all_errors); /*!< in: TRUE if we want an error message
- printed of all errors */
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read(), not directly this function!
-Requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_func(
-/*==============*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- trx_t* trx); /*!< in: trx */
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
-void
-os_file_read_string(
-/*================*/
- FILE* file, /*!< in: file to read from */
- char* str, /*!< in: buffer where to read */
- ulint size); /*!< in: size of buffer */
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read_no_error_handling(),
-not directly this function!
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_no_error_handling_func(
-/*================================*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n); /*!< in: number of bytes to read */
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_write(), not directly this
-function!
-Requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_write_func(
-/*===============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n); /*!< in: number of bytes to write */
-
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return TRUE if call succeeded */
-UNIV_INTERN
-ibool
-os_file_status(
-/*===========*/
- const char* path, /*!< in: pathname of the file */
- ibool* exists, /*!< out: TRUE if file exists */
- os_file_type_t* type); /*!< out: type of the file (if it exists) */
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' characters
-are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
-
-@return own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
- const char* path); /*!< in: pathname */
-/****************************************************************//**
-This function returns a new path name after replacing the basename
-in an old path with a new basename. The old_path is a full path
-name including the extension. The tablename is in the normal
-form "databasename/tablename". The new base name is found after
-the forward slash. Both input strings are null terminated.
-
-This function allocates memory to be returned. It is the callers
-responsibility to free the return value after it is no longer needed.
-
-@return own: new full pathname */
-UNIV_INTERN
-char*
-os_file_make_new_pathname(
-/*======================*/
- const char* old_path, /*!< in: pathname */
- const char* new_name); /*!< in: new file name */
-/****************************************************************//**
-This function returns a remote path name by combining a data directory
-path provided in a DATA DIRECTORY clause with the tablename which is
-in the form 'database/tablename'. It strips the file basename (which
-is the tablename) found after the last directory in the path provided.
-The full filepath created will include the database name as a directory
-under the path provided. The filename is the tablename with the '.ibd'
-extension. All input and output strings are null-terminated.
-
-This function allocates memory to be returned. It is the callers
-responsibility to free the return value after it is no longer needed.
-
-@return own: A full pathname; data_dir_path/databasename/tablename.ibd */
-UNIV_INTERN
-char*
-os_file_make_remote_pathname(
-/*=========================*/
- const char* data_dir_path, /*!< in: pathname */
- const char* tablename, /*!< in: tablename */
- const char* extention); /*!< in: file extention; ibd,cfg*/
-/****************************************************************//**
-This function reduces a null-terminated full remote path name into
-the path that is sent by MySQL for DATA DIRECTORY clause. It replaces
-the 'databasename/tablename.ibd' found at the end of the path with just
-'tablename'.
-
-Since the result is always smaller than the path sent in, no new memory
-is allocated. The caller should allocate memory for the path sent in.
-This function manipulates that path in place.
-
-If the path format is not as expected, just return. The result is used
-to inform a SHOW CREATE TABLE command. */
-UNIV_INTERN
-void
-os_file_make_data_dir_path(
-/*========================*/
- char* data_dir_path); /*!< in/out: full path/data_dir_path */
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
- const char* path); /*!< in: path name */
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
-and log i/o. Also creates one array each for read and write where each
-array is divided logically into n_read_segs and n_write_segs
-respectively. The caller must create an i/o handler thread for each
-segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-ibool
-os_aio_init(
-/*========*/
- ulint n_per_seg, /*<! in: maximum number of pending aio
- operations allowed per segment */
- ulint n_read_segs, /*<! in: number of reader threads */
- ulint n_write_segs, /*<! in: number of writer threads */
- ulint n_slots_sync); /*<! in: number of slots in the sync aio
- array */
-/***********************************************************************
-Frees the asynchronous io system. */
-UNIV_INTERN
-void
-os_aio_free(void);
-/*=============*/
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_aio(), not directly this function!
-Requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
-os_aio_func(
-/*========*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset where to read or write */
- ulint n, /*!< in: number of bytes to read or write */
- ulint page_size, /*!< in: page size in bytes */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- ulint space_id,
- trx_t* trx,
- ulint* write_size);/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
-shutdown. */
-UNIV_INTERN
-void
-os_aio_wake_all_threads_at_shutdown(void);
-/*=====================================*/
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
-be other, synchronous, pending writes. */
-UNIV_INTERN
-void
-os_aio_wait_until_no_pending_writes(void);
-/*=====================================*/
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
-void
-os_aio_simulated_wake_handler_threads(void);
-/*=======================================*/
-#ifdef _WIN32
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-UNIV_INTERN
-void
-os_aio_simulated_put_read_threads_to_sleep();
-#else /* _WIN32 */
-# define os_aio_simulated_put_read_threads_to_sleep()
-#endif /* _WIN32 */
-
-#ifdef WIN_ASYNC_IO
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /*!< this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
- ulint* space_id);
-
-#endif
-
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
- ulint* space_id);
-/**********************************************************************//**
-Validates the consistency of the aio system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void);
-/*=================*/
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
-void
-os_aio_print(
-/*=========*/
- FILE* file); /*!< in: file where to print */
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-os_aio_refresh_stats(void);
-/*======================*/
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations. */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void);
-/*=======================*/
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-This function returns information about the specified file
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-os_file_get_status(
-/*===============*/
- const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info, /*!< information of a file in a
- directory */
- bool check_rw_perm); /*!< in: for testing whether the
- file can be opened in RW mode */
-
-#if !defined(UNIV_HOTBACKUP)
-/** Create a temporary file in the location specified by the parameter
-path. If the path is null, then it will be created in tmpdir.
-@param[in] path location for creating temporary file
-@return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
-int
-innobase_mysql_tmpfile(
- const char* path);
-#endif /* !UNIV_HOTBACKUP */
-
-
-#if defined(LINUX_NATIVE_AIO)
-/**************************************************************************
-This function is only used in Linux native asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the IO was successful */
-UNIV_INTERN
-ibool
-os_aio_linux_handle(
-/*================*/
- ulint global_seg, /*!< in: segment number in the aio array
- to wait for; segment 0 is the ibuf
- i/o thread, segment 1 is log i/o thread,
- then follow the non-ibuf read threads,
- and the last are the non-ibuf write
- threads. */
- fil_node_t**message1, /*!< out: the messages passed with the */
- void** message2, /*!< aio request; note that in case the
- aio operation failed, these output
- parameters are valid and can be used to
- restart the operation. */
- ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
- ulint* space_id);
-#endif /* LINUX_NATIVE_AIO */
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool on_error_silent);/*!< in: if TRUE then don't print
- any message to the log. */
-
-
-/***********************************************************************//**
-Try to get number of bytes per sector from file system.
-@return file block size */
-UNIV_INTERN
-ulint
-os_file_get_block_size(
-/*===================*/
- os_file_t file, /*!< in: handle to a file */
- const char* name); /*!< in: file name */
-
-#ifndef UNIV_NONINL
-#include "os0file.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic
deleted file mode 100644
index 72ac9d9dd6a..00000000000
--- a/storage/xtradb/include/os0file.ic
+++ /dev/null
@@ -1,629 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2010, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0file.ic
-The interface to the operating system file io
-
-Created 2/20/2010 Jimmy Yang
-*******************************************************/
-
-#include "univ.i"
-
-#ifdef UNIV_PFS_IO
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create_simple(),
-not directly this function!
-A performance schema instrumented wrapper function for
-os_file_create_simple() which opens or creates a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-pfs_os_file_t
-pfs_os_file_create_simple_func(
-/*===========================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes,/*!< in: atomic writes table option
- value */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- pfs_os_file_t file;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- /* register a file open or creation depending on "create_mode" */
- register_pfs_file_open_begin(&state, locker, key,
- ((create_mode == OS_FILE_CREATE)
- ? PSI_FILE_CREATE
- : PSI_FILE_OPEN),
- name, src_file, src_line);
-
- file = os_file_create_simple_func(name, create_mode,
- access_type, success, atomic_writes);
-
- /* Register psi value for the file */
- register_pfs_file_open_end(locker, file,
- (*success == TRUE ? success : 0));
-
- return(file);
-}
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A performance schema instrumented wrapper function for
-os_file_create_simple_no_error_handling(). Add instrumentation to
-monitor file creation/open.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-pfs_os_file_t
-pfs_os_file_create_simple_no_error_handling_func(
-/*=============================================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode, /*!< in: file create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE, or
- OS_FILE_READ_ALLOW_DELETE; the last option is
- used by a backup program reading the file */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes,/*!< in: atomic writes table option
- value */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- pfs_os_file_t file;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- /* register a file open or creation depending on "create_mode" */
- register_pfs_file_open_begin(&state, locker, key,
- ((create_mode == OS_FILE_CREATE)
- ? PSI_FILE_CREATE
- : PSI_FILE_OPEN),
- name, src_file, src_line);
-
- file = os_file_create_simple_no_error_handling_func(
- name, create_mode, access_type, success, atomic_writes);
-
- register_pfs_file_open_end(locker, file,
- (*success == TRUE ? success : 0));
-
- return(file);
-}
-
-/****************************************************************//**
-NOTE! Please use the corresponding macro os_file_create(), not directly
-this function!
-A performance schema wrapper function for os_file_create().
-Add instrumentation to monitor file creation/open.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INLINE
-pfs_os_file_t
-pfs_os_file_create_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: file create mode */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes, /*!< in: atomic writes table option
- value */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- pfs_os_file_t file;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- /* register a file open or creation depending on "create_mode" */
- register_pfs_file_open_begin(&state, locker, key,
- ((create_mode == OS_FILE_CREATE)
- ? PSI_FILE_CREATE
- : PSI_FILE_OPEN),
- name, src_file, src_line);
-
- file = os_file_create_func(name, create_mode, purpose, type,
- success, atomic_writes);
-
- register_pfs_file_open_end(locker, file,
- (*success == TRUE ? success : 0));
-
- return(file);
-}
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_close(), not directly
-this function!
-A performance schema instrumented wrapper function for os_file_close().
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_close_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- ibool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- /* register the file close */
- register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE,
- src_file, src_line);
-
- result = os_file_close_func(file);
-
- register_pfs_file_io_end(locker, 0);
-
- return(result);
-}
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_close_no_error_handling(),
-not directly this function!
-A performance schema instrumented wrapper function for
-os_file_close_no_error_handling().
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_close_no_error_handling_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- bool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- /* register the file close */
- register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE,
- src_file, src_line);
-
- result = os_file_close_no_error_handling_func(file);
-
- register_pfs_file_io_end(locker, 0);
-
- return(result);
-}
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_aio(), not directly this
-function!
-Performance schema instrumented wrapper function of os_aio() which
-requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_aio_func(
-/*============*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset where to read or write */
- ulint n, /*!< in: number of bytes to read or write */
- ulint page_size, /*!< in: page size in bytes */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- ulint space_id,
- trx_t* trx,
- ulint* write_size,/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- ibool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- /* Register the read or write I/O depending on "type" */
- register_pfs_file_io_begin(&state, locker, file, n,
- (type == OS_FILE_WRITE)
- ? PSI_FILE_WRITE
- : PSI_FILE_READ,
- src_file, src_line);
-
- result = os_aio_func(type, is_log, mode, name, file, buf, offset,
- n, page_size, message1, message2, space_id, trx,
- write_size);
-
- register_pfs_file_io_end(locker, n);
-
- return(result);
-}
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_read(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_read() which requests a synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_read_func(
-/*==================*/
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- trx_t* trx,
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- ibool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
- src_file, src_line);
-
- result = os_file_read_func(file, buf, offset, n, trx);
-
- register_pfs_file_io_end(locker, n);
-
- return(result);
-}
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro
-os_file_read_no_error_handling(), not directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_read_no_error_handling() which requests a synchronous
-positioned read operation. This function does not do any error
-handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_read_no_error_handling_func(
-/*====================================*/
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- ibool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
- src_file, src_line);
-
- result = os_file_read_no_error_handling_func(file, buf, offset, n);
-
- register_pfs_file_io_end(locker, n);
-
- return(result);
-}
-
-/** NOTE! Please use the corresponding macro
-os_file_read_no_error_handling_int_fd(), not directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_read_no_error_handling_int_fd_func() which requests a
-synchronous read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_read_no_error_handling_int_fd_func(
- int file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- PSI_file_locker_state state;
- struct PSI_file_locker* locker;
-
- locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
- &state, file, PSI_FILE_READ);
- if (locker != NULL) {
- PSI_FILE_CALL(start_file_wait)(
- locker, n,
- __FILE__, __LINE__);
- }
- ibool result = os_file_read_no_error_handling_func(
- OS_FILE_FROM_FD(file), buf, offset, n);
-
- if (locker != NULL) {
- PSI_FILE_CALL(end_file_wait)(locker, n);
- }
-
- return(result);
-}
-
-/*******************************************************************//**
-NOTE! Please use the corresponding macro os_file_write(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_write() which requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_write_func(
-/*===================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n, /*!< in: number of bytes to write */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- ibool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE,
- src_file, src_line);
-
- result = os_file_write_func(name, file, buf, offset, n);
-
- register_pfs_file_io_end(locker, n);
-
- return(result);
-}
-
-/** NOTE! Please use the corresponding macro os_file_write(), not
-directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_write() which requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INLINE
-ibool
-pfs_os_file_write_int_fd_func(
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- int file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n, /*!< in: number of bytes to write */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- PSI_file_locker_state state;
- struct PSI_file_locker* locker = NULL;
-
- locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
- &state, file, PSI_FILE_WRITE);
- if (locker != NULL) {
- PSI_FILE_CALL(start_file_wait)(
- locker, n,
- __FILE__, __LINE__);
- }
- ibool result = os_file_write_func(
- name, OS_FILE_FROM_FD(file), buf, offset, n);
-
- if (locker != NULL) {
- PSI_FILE_CALL(end_file_wait)(locker, n);
- }
-
- return(result);
-}
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_flush(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_flush() which flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_flush_func(
-/*===================*/
- pfs_os_file_t file, /*!< in, own: handle to a file */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- ibool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
- src_file, src_line);
- result = os_file_flush_func(file);
-
- register_pfs_file_io_end(locker, 0);
-
- return(result);
-}
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_rename(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_rename()
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_os_file_rename_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* oldpath,/*!< in: old file path as a null-terminated
- string */
- const char* newpath,/*!< in: new file path */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- ibool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_rename_begin(&state, locker, key, PSI_FILE_RENAME, newpath,
- src_file, src_line);
-
- result = os_file_rename_func(oldpath, newpath);
-
- register_pfs_file_rename_end(locker, 0);
-
- return(result);
-}
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete(), not directly
-this function!
-This is the performance schema instrumented wrapper function for
-os_file_delete()
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_delete_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: file path as a null-terminated
- string */
- const char* src_file, /*!< in: file name where func invoked */
- ulint src_line) /*!< in: line where the func invoked */
-{
- bool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
- name, src_file, src_line);
-
- result = os_file_delete_func(name);
-
- register_pfs_file_close_end(locker, 0);
-
- return(result);
-}
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
-directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_delete_if_exists()
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_delete_if_exists_func(
-/*==============================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema Key */
- const char* name, /*!< in: file path as a null-terminated
- string */
- const char* src_file, /*!< in: file name where func invoked */
- ulint src_line) /*!< in: line where the func invoked */
-{
- bool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_close_begin(&state, locker, key, PSI_FILE_DELETE,
- name, src_file, src_line);
-
- result = os_file_delete_if_exists_func(name);
-
- register_pfs_file_close_end(locker, 0);
-
- return(result);
-}
-
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_set_eof_at(), not
-directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_set_eof_at()
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_set_eof_at_func(
- pfs_os_file_t file, /*!< in: handle to a file */
- ib_uint64_t new_len,/*!< in: new file length */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- bool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE,
- src_file, src_line);
- result = os_file_set_eof_at_func(file, new_len);
-
- register_pfs_file_io_end(locker, 0);
-
- return(result);
-}
-
-#ifdef HAVE_POSIX_FALLOCATE
-/***********************************************************************//**
-NOTE! Please use the corresponding macro os_file_allocate(), not
-directly this function!
-Ensures that disk space is allocated for the file.
-@return TRUE if success */
-UNIV_INLINE
-bool
-pfs_os_file_allocate_func(
- pfs_os_file_t file, /*!< in, own: handle to a file */
- os_offset_t offset, /*!< in: file region offset */
- os_offset_t len, /*!< in: file region length */
- const char* src_file,/*!< in: file name where func invoked */
- ulint src_line)/*!< in: line where the func invoked */
-{
- bool result;
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
-
- register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE,
- src_file, src_line);
- result = os_file_allocate_func(file, offset, len);
-
- register_pfs_file_io_end(locker, 0);
-
- return(result);
-}
-#endif
-
-#endif /* UNIV_PFS_IO */
diff --git a/storage/xtradb/include/os0once.h b/storage/xtradb/include/os0once.h
deleted file mode 100644
index a8bbaf1d2d4..00000000000
--- a/storage/xtradb/include/os0once.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0once.h
-A class that aids executing a given function exactly once in a multi-threaded
-environment.
-
-Created Feb 20, 2014 Vasil Dimov
-*******************************************************/
-
-#ifndef os0once_h
-#define os0once_h
-
-#include "univ.i"
-
-#include "os0sync.h"
-#include "ut0ut.h"
-
-/** Execute a given function exactly once in a multi-threaded environment
-or wait for the function to be executed by another thread.
-
-Example usage:
-First the user must create a control variable of type os_once::state_t and
-assign it os_once::NEVER_DONE.
-Then the user must pass this variable, together with a function to be
-executed to os_once::do_or_wait_for_done().
-
-Multiple threads can call os_once::do_or_wait_for_done() simultaneously with
-the same (os_once::state_t) control variable. The provided function will be
-called exactly once and when os_once::do_or_wait_for_done() returns then this
-function has completed execution, by this or another thread. In other words
-os_once::do_or_wait_for_done() will either execute the provided function or
-will wait for its execution to complete if it is already called by another
-thread or will do nothing if the function has already completed its execution
-earlier.
-
-This mimics pthread_once(3), but unfortunatelly pthread_once(3) does not
-support passing arguments to the init_routine() function. We should use
-std::call_once() when we start compiling with C++11 enabled. */
-class os_once {
-public:
- /** Control variables' state type */
- typedef ib_uint32_t state_t;
-
- /** Not yet executed. */
- static const state_t NEVER_DONE = 0;
-
- /** Currently being executed by this or another thread. */
- static const state_t IN_PROGRESS = 1;
-
- /** Finished execution. */
- static const state_t DONE = 2;
-
-#ifdef HAVE_ATOMIC_BUILTINS
- /** Call a given function or wait its execution to complete if it is
- already called by another thread.
- @param[in,out] state control variable
- @param[in] do_func function to call
- @param[in,out] do_func_arg an argument to pass to do_func(). */
- static
- void
- do_or_wait_for_done(
- volatile state_t* state,
- void (*do_func)(void*),
- void* do_func_arg)
- {
- /* Avoid calling os_compare_and_swap_uint32() in the most
- common case. */
- if (*state == DONE) {
- return;
- }
-
- if (os_compare_and_swap_uint32(state,
- NEVER_DONE, IN_PROGRESS)) {
- /* We are the first. Call the function. */
-
- do_func(do_func_arg);
-
- const bool swapped = os_compare_and_swap_uint32(
- state, IN_PROGRESS, DONE);
-
- ut_a(swapped);
- } else {
- /* The state is not NEVER_DONE, so either it is
- IN_PROGRESS (somebody is calling the function right
- now or DONE (it has already been called and completed).
- Wait for it to become DONE. */
- for (;;) {
- const state_t s = *state;
-
- switch (s) {
- case DONE:
- return;
- case IN_PROGRESS:
- break;
- case NEVER_DONE:
- /* fall through */
- default:
- ut_error;
- }
-
- UT_RELAX_CPU();
- }
- }
- }
-#endif /* HAVE_ATOMIC_BUILTINS */
-};
-
-#endif /* os0once_h */
diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h
deleted file mode 100644
index 613e3bd6947..00000000000
--- a/storage/xtradb/include/os0proc.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0proc.h
-The interface to the operating system
-process control primitives
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0proc_h
-#define os0proc_h
-
-#include "univ.i"
-
-#ifdef UNIV_LINUX
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#endif
-
-typedef void* os_process_t;
-typedef unsigned long int os_process_id_t;
-
-extern ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-extern ulint os_large_page_size;
-
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return process id as a number */
-UNIV_INTERN
-ulint
-os_proc_get_number(void);
-/*====================*/
-/****************************************************************//**
-Allocates large pages memory.
-@return allocated memory */
-UNIV_INTERN
-void*
-os_mem_alloc_large(
-/*===============*/
- ulint* n); /*!< in/out: number of bytes */
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
-void
-os_mem_free_large(
-/*==============*/
- void *ptr, /*!< in: pointer returned by
- os_mem_alloc_large() */
- ulint size); /*!< in: size returned by
- os_mem_alloc_large() */
-
-#ifndef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/os0proc.ic b/storage/xtradb/include/os0proc.ic
deleted file mode 100644
index 506f4f8ce0c..00000000000
--- a/storage/xtradb/include/os0proc.ic
+++ /dev/null
@@ -1,27 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0proc.ic
-The interface to the operating system
-process control primitives
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/xtradb/include/os0stacktrace.h b/storage/xtradb/include/os0stacktrace.h
deleted file mode 100644
index e79347c6189..00000000000
--- a/storage/xtradb/include/os0stacktrace.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
-
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-#ifndef os0stacktrace_h
-#define os0stacktrace_h
-
-#if defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS
-#if HAVE_EXECINFO_H
-#include <execinfo.h>
-#endif
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-/***************************************************************//**
-Prints stacktrace for this thread.
-*/
-void
-os_stacktrace_print(
-/*================*/
- int sig_num, /*!< in: signal number */
- siginfo_t* info, /*!< in: signal information */
- void* ucontext);/*!< in: signal context */
-
-#endif /* defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS */
-#endif /* os0stacktrace.h */
diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h
deleted file mode 100644
index ce03f6a2124..00000000000
--- a/storage/xtradb/include/os0sync.h
+++ /dev/null
@@ -1,999 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.h
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0sync_h
-#define os0sync_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "sync0types.h"
-
-#ifdef CPU_LEVEL1_DCACHE_LINESIZE
-# define CACHE_LINE_SIZE CPU_LEVEL1_DCACHE_LINESIZE
-#else
-# error CPU_LEVEL1_DCACHE_LINESIZE is undefined
-#endif /* CPU_LEVEL1_DCACHE_LINESIZE */
-
-#ifdef HAVE_WINDOWS_ATOMICS
-typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
- on LONG variable */
-#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE)
-typedef ulint lock_word_t;
-#else
-
-#define IB_LOCK_WORD_IS_BYTE
-
-typedef byte lock_word_t;
-
-#endif /* HAVE_WINDOWS_ATOMICS */
-
-#ifdef __WIN__
-/** Native event (slow)*/
-typedef HANDLE os_native_event_t;
-/** Native mutex */
-typedef CRITICAL_SECTION fast_mutex_t;
-/** Native condition variable. */
-typedef CONDITION_VARIABLE os_cond_t;
-#else
-/** Native mutex */
-typedef pthread_mutex_t fast_mutex_t;
-/** Native condition variable */
-typedef pthread_cond_t os_cond_t;
-#endif
-
-/** Structure that includes Performance Schema Probe pfs_psi
-in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */
-struct os_fast_mutex_t {
- fast_mutex_t mutex; /*!< os_fast_mutex */
-#ifdef UNIV_PFS_MUTEX
- struct PSI_mutex* pfs_psi;/*!< The performance schema
- instrumentation hook */
-#endif
-};
-
-/** Operating system event handle */
-typedef struct os_event* os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event {
-#ifdef __WIN__
- HANDLE handle; /*!< kernel event object, slow,
- used on older Windows */
-#endif
- os_fast_mutex_t os_mutex; /*!< this mutex protects the next
- fields */
-private:
- /** Masks for the event signal count and set flag in the count_and_set
- field */
- static const ib_uint64_t count_mask = 0x7fffffffffffffffULL;
- static const ib_uint64_t set_mask = 0x8000000000000000ULL;
-
- /** The MSB is set whenever when the event is in the signaled state,
- i.e. a thread does not stop if it tries to wait for this event. Lower
- bits are incremented each time the event becomes signaled. */
- ib_uint64_t count_and_set;
-public:
- os_cond_t cond_var; /*!< condition variable is used in
- waiting for the event */
-
- /** Initialise count_and_set field */
- void init_count_and_set(void)
- {
- /* We return this value in os_event_reset(), which can then be
- be used to pass to the os_event_wait_low(). The value of zero
- is reserved in os_event_wait_low() for the case when the
- caller does not want to pass any signal_count value. To
- distinguish between the two cases we initialize signal_count
- to 1 here. */
- count_and_set = 1;
- }
-
- /** Mark this event as set */
- void set(void)
- {
- count_and_set |= set_mask;
- }
-
- /** Unmark this event as set */
- void reset(void)
- {
- count_and_set &= count_mask;
- }
-
- /** Return true if this event is set */
- bool is_set(void) const
- {
- return count_and_set & set_mask;
- }
-
- /** Bump signal count for this event */
- void inc_signal_count(void)
- {
- ut_ad(static_cast<ib_uint64_t>(signal_count()) < count_mask);
- count_and_set++;
- }
-
- /** Return how many times this event has been signalled */
- ib_int64_t signal_count(void) const
- {
- return (count_and_set & count_mask);
- }
-};
-
-/** Denotes an infinite delay for os_event_wait_time() */
-#define OS_SYNC_INFINITE_TIME ULINT_UNDEFINED
-
-/** Return value of os_event_wait_time() when the time is exceeded */
-#define OS_SYNC_TIME_EXCEEDED 1
-
-/** Operating system mutex handle */
-typedef struct os_mutex_t* os_ib_mutex_t;
-
-// All the os_*_count variables are accessed atomically
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit */
-extern ulint os_thread_count;
-
-extern ulint os_event_count;
-extern ulint os_mutex_count;
-extern ulint os_fast_mutex_count;
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void);
-/*==============*/
-
-/** Create an event semaphore, i.e., a semaphore which may just have two
-states: signaled and nonsignaled. The created event is manual reset: it must be
-reset explicitly by calling sync_os_reset_event.
-@param[in,out] event memory block where to create the event */
-UNIV_INTERN
-void
-os_event_create(os_event_t event);
-
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two states:
-signaled and nonsignaled. The created event is manual reset: it must be reset
-explicitly by calling sync_os_reset_event.
-@return the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(void);
-/*==================*/
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
- os_event_t event); /*!< in: event to set */
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low(). */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
- os_event_t event); /*!< in: event to reset */
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
- os_event_t event, /*!< in: event to free */
- bool free_memory = true);
- /*!< in: if true, deallocate the event memory
- block too */
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state.
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /*!< in: event to wait */
- ib_int64_t reset_sig_count);/*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-
-#define os_event_wait(event) os_event_wait_low(event, 0)
-#define os_event_wait_time(event, t) os_event_wait_time_low(event, t, 0)
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded.
-@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time_low(
-/*===================*/
- os_event_t event, /*!< in: event to wait */
- ulint time_in_usec, /*!< in: timeout in
- microseconds, or
- OS_SYNC_INFINITE_TIME */
- ib_int64_t reset_sig_count); /*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
-@return the mutex handle */
-UNIV_INTERN
-os_ib_mutex_t
-os_mutex_create(void);
-/*=================*/
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
- os_ib_mutex_t mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
- os_ib_mutex_t mutex); /*!< in: mutex to release */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
- os_ib_mutex_t mutex); /*!< in: mutex to free */
-/**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
-
-/**********************************************************************
-Following os_fast_ mutex APIs would be performance schema instrumented:
-
-os_fast_mutex_init
-os_fast_mutex_lock
-os_fast_mutex_unlock
-os_fast_mutex_free
-
-These mutex APIs will point to corresponding wrapper functions that contain
-the performance schema instrumentation.
-
-NOTE! The following macro should be used in mutex operation, not the
-corresponding function. */
-
-#ifdef UNIV_PFS_MUTEX
-# define os_fast_mutex_init(K, M) \
- pfs_os_fast_mutex_init(K, M)
-
-# define os_fast_mutex_lock(M) \
- pfs_os_fast_mutex_lock(M, __FILE__, __LINE__)
-
-# define os_fast_mutex_unlock(M) pfs_os_fast_mutex_unlock(M)
-
-# define os_fast_mutex_free(M) pfs_os_fast_mutex_free(M)
-
-/*********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
-this function!
-A wrapper function for os_fast_mutex_init_func(). Initializes an operating
-system fast mutex semaphore. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_init(
-/*===================*/
- PSI_mutex_key key, /*!< in: Performance Schema
- key */
- os_fast_mutex_t* fast_mutex); /*!< out: fast mutex */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
-this function!
-Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_free(
-/*===================*/
- os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to free */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
-this function!
-Wrapper function of os_fast_mutex_lock. Acquires ownership of a fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_lock(
-/*===================*/
- os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */
- const char* file_name, /*!< in: file name where
- locked */
- ulint line); /*!< in: line where locked */
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
-this function!
-Wrapper function of os_fast_mutex_unlock. Releases ownership of a fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_unlock(
-/*=====================*/
- os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to release */
-
-#else /* UNIV_PFS_MUTEX */
-
-# define os_fast_mutex_init(K, M) \
- os_fast_mutex_init_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_lock(M) \
- os_fast_mutex_lock_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_unlock(M) \
- os_fast_mutex_unlock_func(&((os_fast_mutex_t*)(M))->mutex)
-
-# define os_fast_mutex_free(M) \
- os_fast_mutex_free_func(&((os_fast_mutex_t*)(M))->mutex)
-#endif /* UNIV_PFS_MUTEX */
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required.
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock_full_barrier(
-/*==================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_func(
-/*======================*/
- fast_mutex_t* fast_mutex); /*!< in: mutex to release */
-/**********************************************************//**
-Releases ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_full_barrier(
-/*=================*/
- os_fast_mutex_t* fast_mutex); /*!< in: mutex to release */
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init_func(
-/*====================*/
- fast_mutex_t* fast_mutex); /*!< in: fast mutex */
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock_func(
-/*====================*/
- fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */
-/**********************************************************//**
-Frees an mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free_func(
-/*====================*/
- fast_mutex_t* fast_mutex); /*!< in: mutex to free */
-
-/**********************************************************//**
-Atomic compare-and-swap and increment for InnoDB. */
-
-#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS)
-
-# define HAVE_ATOMIC_BUILTINS
-
-# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE
-# define HAVE_ATOMIC_BUILTINS_BYTE
-# endif
-
-# ifdef HAVE_IB_GCC_ATOMIC_BUILTINS_64
-# define HAVE_ATOMIC_BUILTINS_64
-# endif
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap(ptr, old_val, new_val) \
- __sync_bool_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- os_compare_and_swap(ptr, old_val, new_val)
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use GCC atomic builtins"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes use GCC atomic builtins, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment(ptr, amount) \
- __sync_add_and_fetch(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
- os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_uint32(ptr, amount ) \
- os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
- os_atomic_increment(ptr, amount)
-
-# define os_atomic_increment_uint64(ptr, amount) \
- os_atomic_increment(ptr, amount)
-
-/* Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. */
-
-# define os_atomic_decrement(ptr, amount) \
- __sync_sub_and_fetch(ptr, amount)
-
-# define os_atomic_decrement_uint32(ptr, amount) \
- os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_lint(ptr, amount) \
- os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_ulint(ptr, amount) \
- os_atomic_decrement(ptr, amount)
-
-# define os_atomic_decrement_uint64(ptr, amount) \
- os_atomic_decrement(ptr, amount)
-
-# if defined(HAVE_ATOMIC_BUILTINS)
-
-/** Do an atomic test and set.
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(__sync_lock_test_and_set(ptr, 1));
-}
-
-/** Do an atomic release.
-@param[in,out] ptr Memory location to write to
-@return the previous value */
-inline
-void
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- __sync_lock_release(ptr);
-}
-
-# elif defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
-
-/** Do an atomic test-and-set.
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE));
-}
-
-/** Do an atomic clear.
-@param[in,out] ptr Memory location to set to zero */
-inline
-void
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- __atomic_clear(ptr, __ATOMIC_RELEASE);
-}
-
-# else
-
-# error "Unsupported platform"
-
-# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */
-
-#if defined(__powerpc__) || defined(__aarch64__)
-/*
- os_atomic_test_and_set_byte_release() should imply a release barrier before
- setting, and a full barrier after. But __sync_lock_test_and_set() is only
- documented as an aquire barrier. So on PowerPC we need to add the full
- barrier explicitly. */
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
- do { __sync_lock_release(ptr); \
- __sync_synchronize(); } while (0)
-#else
-/*
- On x86, __sync_lock_test_and_set() happens to be full barrier, due to
- LOCK prefix.
-*/
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
- __sync_lock_test_and_set(ptr, (byte) new_val)
-#endif
-/*
- os_atomic_test_and_set_byte_acquire() is a full memory barrier on x86. But
- in general, just an aquire barrier should be sufficient. */
-# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \
- __sync_lock_test_and_set(ptr, (byte) new_val)
-
-#elif defined(HAVE_IB_SOLARIS_ATOMICS)
-
-# define HAVE_ATOMIC_BUILTINS
-# define HAVE_ATOMIC_BUILTINS_BYTE
-# define HAVE_ATOMIC_BUILTINS_64
-
-/* If not compiling with GCC or GCC doesn't support the atomic
-intrinsics and running on Solaris >= 10 use Solaris atomics */
-
-# include <atomic.h>
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
- (atomic_cas_32(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- (atomic_cas_ulong(ptr, old_val, new_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- ((lint) atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val)
-
-# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS
-# if SIZEOF_PTHREAD_T == 4
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- ((pthread_t) atomic_cas_32(ptr, old_val, new_val) == old_val)
-# elif SIZEOF_PTHREAD_T == 8
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- ((pthread_t) atomic_cas_64(ptr, old_val, new_val) == old_val)
-# else
-# error "SIZEOF_PTHREAD_T != 4 or 8"
-# endif /* SIZEOF_PTHREAD_T CHECK */
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use Solaris atomic functions"
-# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes use Solaris atomic functions, rw_locks do not"
-# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_uint32(ptr, amount) \
- atomic_add_32_nv(ptr, amount)
-
-# define os_atomic_increment_ulint(ptr, amount) \
- atomic_add_long_nv(ptr, amount)
-
-# define os_atomic_increment_lint(ptr, amount) \
- os_atomic_increment_ulint((ulong_t*) ptr, amount)
-
-# define os_atomic_increment_uint64(ptr, amount) \
- atomic_add_64_nv((uint64_t *) ptr, amount)
-
-/* Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. */
-
-# define os_atomic_decrement_uint32(ptr, amount) \
- os_atomic_increment_uint32(ptr, -(amount))
-
-# define os_atomic_decrement_lint(ptr, amount) \
- os_atomic_increment_ulint((ulong_t*) ptr, -(amount))
-
-# define os_atomic_decrement_ulint(ptr, amount) \
- os_atomic_increment_ulint(ptr, -(amount))
-
-# define os_atomic_decrement_uint64(ptr, amount) \
- os_atomic_increment_uint64(ptr, -(amount))
-
-# ifdef IB_LOCK_WORD_IS_BYTE
-
-/** Do an atomic xchg and set to non-zero.
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(atomic_swap_uchar(ptr, 1));
-}
-
-/** Do an atomic xchg and set to zero.
-@param[in,out] ptr Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- return(atomic_swap_uchar(ptr, 0));
-}
-
-# else
-
-/** Do an atomic xchg and set to non-zero.
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(atomic_swap_ulong(ptr, 1));
-}
-
-/** Do an atomic xchg and set to zero.
-@param[in,out] ptr Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- return(atomic_swap_ulong(ptr, 0));
-}
-
-# endif /* IB_LOCK_WORD_IS_BYTE */
-
-# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \
- atomic_swap_uchar(ptr, new_val)
-
-# define os_atomic_test_and_set_byte_release(ptr, new_val) \
- atomic_swap_uchar(ptr, new_val)
-
-#elif defined(HAVE_WINDOWS_ATOMICS)
-
-# define HAVE_ATOMIC_BUILTINS
-# define HAVE_ATOMIC_BUILTINS_BYTE
-# define HAVE_ATOMIC_BUILTINS_64
-
-/**********************************************************//**
-Atomic compare and exchange of signed integers (both 32 and 64 bit).
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-lint
-win_cmp_and_xchg_lint(
-/*==================*/
- volatile lint* ptr, /*!< in/out: source/destination */
- lint new_val, /*!< in: exchange value */
- lint old_val); /*!< in: value to compare to */
-
-/**********************************************************//**
-Atomic addition of signed integers.
-@return Initial value of the variable pointed to by ptr */
-UNIV_INLINE
-lint
-win_xchg_and_add(
-/*=============*/
- volatile lint* ptr, /*!< in/out: address of destination */
- lint val); /*!< in: number to be added */
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-ulint
-win_cmp_and_xchg_ulint(
-/*===================*/
- volatile ulint* ptr, /*!< in/out: source/destination */
- ulint new_val, /*!< in: exchange value */
- ulint old_val); /*!< in: value to compare to */
-
-/**********************************************************//**
-Atomic compare and exchange of 32 bit unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-DWORD
-win_cmp_and_xchg_dword(
-/*===================*/
- volatile DWORD* ptr, /*!< in/out: source/destination */
- DWORD new_val, /*!< in: exchange value */
- DWORD old_val); /*!< in: value to compare to */
-
-/**********************************************************//**
-Returns true if swapped, ptr is pointer to target, old_val is value to
-compare to, new_val is the value to swap in. */
-
-# define os_compare_and_swap_uint32(ptr, old_val, new_val) \
- (InterlockedCompareExchange(reinterpret_cast<volatile long*>(ptr), \
- new_val, old_val) == old_val)
-
-# define os_compare_and_swap_ulint(ptr, old_val, new_val) \
- (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val)
-
-# define os_compare_and_swap_lint(ptr, old_val, new_val) \
- (win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val)
-
-/* windows thread objects can always be passed to windows atomic functions */
-# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \
- (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val)
-
-# define INNODB_RW_LOCKS_USE_ATOMICS
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use Windows interlocked functions"
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount of increment. */
-
-# define os_atomic_increment_lint(ptr, amount) \
- (win_xchg_and_add(ptr, amount) + amount)
-
-# define os_atomic_increment_uint32(ptr, amount) \
- ((ulint) InterlockedExchangeAdd((long*) ptr, amount))
-
-# define os_atomic_increment_ulint(ptr, amount) \
- ((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount))
-
-# define os_atomic_increment_uint64(ptr, amount) \
- ((ib_uint64_t) (InterlockedExchangeAdd64( \
- (ib_int64_t*) ptr, \
- (ib_int64_t) amount) + amount))
-
-/**********************************************************//**
-Returns the resulting value, ptr is pointer to target, amount is the
-amount to decrement. There is no atomic substract function on Windows */
-
-# define os_atomic_decrement_uint32(ptr, amount) \
- ((ulint) InterlockedExchangeAdd((long*) ptr, (-amount)))
-
-# define os_atomic_decrement_lint(ptr, amount) \
- (win_xchg_and_add(ptr, -(lint) amount) - amount)
-
-# define os_atomic_decrement_ulint(ptr, amount) \
- ((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount))
-
-# define os_atomic_decrement_uint64(ptr, amount) \
- ((ib_uint64_t) (InterlockedExchangeAdd64( \
- (ib_int64_t*) ptr, \
- -(ib_int64_t) amount) - amount))
-
-/** Do an atomic test and set.
-InterlockedExchange() operates on LONG, and the LONG will be clobbered
-@param[in,out] ptr Memory location to set to non-zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_test_and_set(volatile lock_word_t* ptr)
-{
- return(InterlockedExchange(ptr, 1));
-}
-
-/** Do an atomic release.
-InterlockedExchange() operates on LONG, and the LONG will be clobbered
-@param[in,out] ptr Memory location to set to zero
-@return the previous value */
-inline
-lock_word_t
-os_atomic_clear(volatile lock_word_t* ptr)
-{
- return(InterlockedExchange(ptr, 0));
-}
-
-# define os_atomic_lock_release_byte(ptr) \
- (void) InterlockedExchange(ptr, 0)
-
-#else
-# define IB_ATOMICS_STARTUP_MSG \
- "Mutexes and rw_locks use InnoDB's own implementation"
-#endif
-#ifdef HAVE_ATOMIC_BUILTINS
-#define os_atomic_inc_ulint(m,v,d) os_atomic_increment_ulint(v, d)
-#define os_atomic_dec_ulint(m,v,d) os_atomic_decrement_ulint(v, d)
-#else
-#define os_atomic_inc_ulint(m,v,d) os_atomic_inc_ulint_func(m, v, d)
-#define os_atomic_dec_ulint(m,v,d) os_atomic_dec_ulint_func(m, v, d)
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-/**********************************************************//**
-Following macros are used to update specified counter atomically
-if HAVE_ATOMIC_BUILTINS defined. Otherwise, use mutex passed in
-for synchronization */
-#ifdef HAVE_ATOMIC_BUILTINS
-#define os_increment_counter_by_amount(mutex, counter, amount) \
- (void) os_atomic_increment_ulint(&counter, amount)
-
-#define os_decrement_counter_by_amount(mutex, counter, amount) \
- (void) os_atomic_increment_ulint(&counter, (-((lint) amount)))
-#else
-#define os_increment_counter_by_amount(mutex, counter, amount) \
- do { \
- mutex_enter(&(mutex)); \
- (counter) += (amount); \
- mutex_exit(&(mutex)); \
- } while (0)
-
-#define os_decrement_counter_by_amount(mutex, counter, amount) \
- do { \
- ut_a(counter >= amount); \
- mutex_enter(&(mutex)); \
- (counter) -= (amount); \
- mutex_exit(&(mutex)); \
- } while (0)
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-#define os_inc_counter(mutex, counter) \
- os_increment_counter_by_amount(mutex, counter, 1)
-
-#define os_dec_counter(mutex, counter) \
- do { \
- os_decrement_counter_by_amount(mutex, counter, 1);\
- } while (0);
-
-/** barrier definitions for memory ordering */
-#if defined(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
-# define HAVE_MEMORY_BARRIER
-# define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE)
-# define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE)
-# define os_mb __atomic_thread_fence(__ATOMIC_SEQ_CST)
-
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "GCC builtin __atomic_thread_fence() is used for memory barrier"
-
-#elif defined(HAVE_IB_GCC_SYNC_SYNCHRONISE)
-# define HAVE_MEMORY_BARRIER
-# define os_rmb __sync_synchronize()
-# define os_wmb __sync_synchronize()
-# define os_mb __sync_synchronize()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "GCC builtin __sync_synchronize() is used for memory barrier"
-
-#elif defined(HAVE_IB_MACHINE_BARRIER_SOLARIS)
-# define HAVE_MEMORY_BARRIER
-# include <mbarrier.h>
-# define os_rmb __machine_r_barrier()
-# define os_wmb __machine_w_barrier()
-# define os_mb __machine_rw_barrier()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "Solaris memory ordering functions are used for memory barrier"
-
-#elif defined(HAVE_WINDOWS_MM_FENCE)
-# define HAVE_MEMORY_BARRIER
-# include <intrin.h>
-# define os_rmb _mm_lfence()
-# define os_wmb _mm_sfence()
-# define os_mb _mm_mfence()
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "_mm_lfence() and _mm_sfence() are used for memory barrier"
-
-#else
-# define os_rmb do { } while(0)
-# define os_wmb do { } while(0)
-# define os_mb do { } while(0)
-# define IB_MEMORY_BARRIER_STARTUP_MSG \
- "Memory barrier is not used"
-#endif
-
-
-/** Simple counter aligned to CACHE_LINE_SIZE
-@tparam Type the integer type of the counter
-@tparam atomic whether to use atomic memory access */
-template <typename Type = ulint, bool atomic = false>
-struct MY_ALIGNED(CACHE_LINE_SIZE) simple_counter
-{
- /** Increment the counter */
- Type inc() { return add(1); }
- /** Decrement the counter */
- Type dec() { return sub(1); }
-
- /** Add to the counter
- @param[in] i amount to be added
- @return the value of the counter after adding */
- Type add(Type i)
- {
- compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint));
- if (atomic) {
- /* GCC would perform a type check in this code
- also in case the template is instantiated with
- simple_counter<Type=not_ulint, atomic=false>.
- On Solaris, os_atomic_increment_ulint() maps
- to atomic_add_long_nv(), which expects the
- parameter to be correctly typed. */
- return os_atomic_increment_ulint(
- reinterpret_cast<ulint*>(&m_counter), i);
- } else {
- return m_counter += i;
- }
- }
- /** Subtract from the counter
- @param[in] i amount to be subtracted
- @return the value of the counter after adding */
- Type sub(Type i)
- {
- compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint));
- if (atomic) {
- return os_atomic_decrement_ulint(&m_counter, i);
- } else {
- return m_counter -= i;
- }
- }
-
- /** @return the value of the counter (non-atomic access)! */
- operator Type() const { return m_counter; }
-
-private:
- /** The counter */
- Type m_counter;
-};
-
-#ifndef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/os0sync.ic b/storage/xtradb/include/os0sync.ic
deleted file mode 100644
index 5f4b0d24089..00000000000
--- a/storage/xtradb/include/os0sync.ic
+++ /dev/null
@@ -1,265 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0sync.ic
-The interface to the operating system synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#ifdef __WIN__
-#include <winbase.h>
-#endif
-
-/**********************************************************//**
-Acquires ownership of a fast mutex.
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock(
-/*==================*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
-{
- fast_mutex_t* mutex = &fast_mutex->mutex;
-
-#ifdef __WIN__
- return(!TryEnterCriticalSection(mutex));
-#else
- /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
- so that it returns 0 on success. In the operating system
- libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
- returns 1 on success (but MySQL remaps that to 0), while Linux,
- FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
- return((ulint) pthread_mutex_trylock(mutex));
-#endif
-}
-
-#ifdef UNIV_PFS_MUTEX
-/*********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly
-this function!
-A wrapper function for os_fast_mutex_init_func(). Initializes an operating
-system fast mutex semaphore. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_init(
-/*===================*/
- PSI_mutex_key key, /*!< in: Performance Schema
- key */
- os_fast_mutex_t* fast_mutex) /*!< out: fast mutex */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
- fast_mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, &fast_mutex->mutex);
-#else
- fast_mutex->pfs_psi = NULL;
-#endif
-
- os_fast_mutex_init_func(&fast_mutex->mutex);
-}
-/******************************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly
-this function!
-Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_free(
-/*===================*/
- os_fast_mutex_t* fast_mutex) /*!< in/out: mutex */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
- if (fast_mutex->pfs_psi != NULL)
- PSI_MUTEX_CALL(destroy_mutex)(fast_mutex->pfs_psi);
-#endif
- fast_mutex->pfs_psi = NULL;
-
- os_fast_mutex_free_func(&fast_mutex->mutex);
-}
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly
-this function!
-Wrapper function of os_fast_mutex_lock_func. Acquires ownership of a fast
-mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_lock(
-/*===================*/
- os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */
- const char* file_name, /*!< in: file name where
- locked */
- ulint line) /*!< in: line where locked */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
- if (fast_mutex->pfs_psi != NULL)
- {
- PSI_mutex_locker* locker;
- PSI_mutex_locker_state state;
-
- locker = PSI_MUTEX_CALL(start_mutex_wait)(
- &state, fast_mutex->pfs_psi,
- PSI_MUTEX_LOCK, file_name,
- static_cast<uint>(line));
-
- os_fast_mutex_lock_func(&fast_mutex->mutex);
-
- if (locker != NULL)
- PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
- }
- else
-#endif
- {
- os_fast_mutex_lock_func(&fast_mutex->mutex);
- }
-
- return;
-}
-/**********************************************************//**
-NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly
-this function!
-Wrapper function of os_fast_mutex_unlock_func. Releases ownership of a
-fast mutex. */
-UNIV_INLINE
-void
-pfs_os_fast_mutex_unlock(
-/*=====================*/
- os_fast_mutex_t* fast_mutex) /*!< in/out: mutex to release */
-{
-#ifdef HAVE_PSI_MUTEX_INTERFACE
- if (fast_mutex->pfs_psi != NULL)
- PSI_MUTEX_CALL(unlock_mutex)(fast_mutex->pfs_psi);
-#endif
-
- os_fast_mutex_unlock_func(&fast_mutex->mutex);
-}
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef HAVE_WINDOWS_ATOMICS
-
-/* Use inline functions to make 64 and 32 bit versions of windows atomic
-functions so that typecasts are evaluated at compile time. Take advantage
-that lint is either __int64 or long int and windows atomic functions work
-on __int64 and LONG */
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-lint
-win_cmp_and_xchg_lint(
-/*==================*/
- volatile lint* ptr, /*!< in/out: source/destination */
- lint new_val, /*!< in: exchange value */
- lint old_val) /*!< in: value to compare to */
-{
-# ifdef _WIN64
- return(InterlockedCompareExchange64(ptr, new_val, old_val));
-# else
- return(InterlockedCompareExchange(ptr, new_val, old_val));
-# endif
-}
-
-/**********************************************************//**
-Atomic addition of signed integers.
-@return Initial value of the variable pointed to by ptr */
-UNIV_INLINE
-lint
-win_xchg_and_add(
-/*=============*/
- volatile lint* ptr, /*!< in/out: address of destination */
- lint val) /*!< in: number to be added */
-{
-#ifdef _WIN64
- return(InterlockedExchangeAdd64(ptr, val));
-#else
- return(InterlockedExchangeAdd(ptr, val));
-#endif
-}
-
-/**********************************************************//**
-Atomic compare and exchange of unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-ulint
-win_cmp_and_xchg_ulint(
-/*===================*/
- volatile ulint* ptr, /*!< in/out: source/destination */
- ulint new_val, /*!< in: exchange value */
- ulint old_val) /*!< in: value to compare to */
-{
- return((ulint) win_cmp_and_xchg_lint(
- (volatile lint*) ptr,
- (lint) new_val,
- (lint) old_val));
-}
-
-/**********************************************************//**
-Atomic compare and exchange of 32-bit unsigned integers.
-@return value found before the exchange.
-If it is not equal to old_value the exchange did not happen. */
-UNIV_INLINE
-DWORD
-win_cmp_and_xchg_dword(
-/*===================*/
- volatile DWORD* ptr, /*!< in/out: source/destination */
- DWORD new_val, /*!< in: exchange value */
- DWORD old_val) /*!< in: value to compare to */
-{
- ut_ad(sizeof(DWORD) == sizeof(LONG)); /* We assume this. */
- return(InterlockedCompareExchange(
- (volatile LONG*) ptr,
- (LONG) new_val,
- (LONG) old_val));
-}
-
-#endif /* HAVE_WINDOWS_ATOMICS */
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required.
-@return 0 if success, != 0 if was reserved by another thread */
-UNIV_INLINE
-ulint
-os_fast_mutex_trylock_full_barrier(
-/*==================*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
-{
-#ifdef __WIN__
- if (TryEnterCriticalSection(&fast_mutex->mutex)) {
-
- return(0);
- } else {
-
- return(1);
- }
-#else
- /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
- so that it returns 0 on success. In the operating system
- libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and
- returns 1 on success (but MySQL remaps that to 0), while Linux,
- FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */
-
-#ifdef __powerpc__
- os_mb;
-#endif
- return((ulint) pthread_mutex_trylock(&fast_mutex->mutex));
-#endif
-}
diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h
deleted file mode 100644
index 7865358b0f7..00000000000
--- a/storage/xtradb/include/os0thread.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0thread.h
-The interface to the operating system
-process and thread control primitives
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef os0thread_h
-#define os0thread_h
-
-#include "univ.i"
-
-#ifdef UNIV_LINUX
-#include <sys/types.h>
-#endif
-
-/* Maximum number of threads which can be created in the program;
-this is also the size of the wait slot array for MySQL threads which
-can wait inside InnoDB */
-
-#define OS_THREAD_MAX_N srv_max_n_threads
-
-/* Possible fixed priorities for threads */
-#define OS_THREAD_PRIORITY_NONE 100
-#define OS_THREAD_PRIORITY_BACKGROUND 1
-#define OS_THREAD_PRIORITY_NORMAL 2
-#define OS_THREAD_PRIORITY_ABOVE_NORMAL 3
-
-#ifdef __WIN__
-typedef void* os_thread_t;
-typedef DWORD os_thread_id_t; /*!< In Windows the thread id
- is an unsigned long int */
-typedef os_thread_id_t os_tid_t;
-extern "C" {
-typedef LPTHREAD_START_ROUTINE os_thread_func_t;
-}
-
-/** Macro for specifying a Windows thread start function. */
-#define DECLARE_THREAD(func) WINAPI func
-
-/** Required to get around a build error on Windows. Even though our functions
-are defined/declared as WINAPI f(LPVOID a); the compiler complains that they
-are defined as: os_thread_ret_t (__cdecl*)(void*). Because our functions
-don't access the arguments and don't return any value, we should be safe. */
-#define os_thread_create(f,a,i) \
- os_thread_create_func(reinterpret_cast<os_thread_func_t>(f), a, i)
-
-#else
-
-typedef pthread_t os_thread_t;
-typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread
- handle itself as the id of
- the thread */
-#ifdef UNIV_LINUX
-typedef pid_t os_tid_t; /*!< An alias for pid_t on
- Linux, where setpriority()
- accepts thread id of this type
- and not pthread_t */
-#else
-typedef os_thread_id_t os_tid_t;
-#endif
-
-extern "C" { typedef void* (*os_thread_func_t)(void*); }
-
-/** Macro for specifying a POSIX thread start function. */
-#define DECLARE_THREAD(func) func
-#define os_thread_create(f,a,i) os_thread_create_func(f, a, i)
-
-#endif /* __WIN__ */
-
-/* Define a function pointer type to use in a typecast */
-typedef void* (*os_posix_f_t) (void*);
-
-#ifdef HAVE_PSI_INTERFACE
-/* Define for performance schema registration key */
-typedef unsigned int mysql_pfs_key_t;
-#endif
-
-/***************************************************************//**
-Compares two thread ids for equality.
-@return TRUE if equal */
-UNIV_INTERN
-ibool
-os_thread_eq(
-/*=========*/
- os_thread_id_t a, /*!< in: OS thread or thread id */
- os_thread_id_t b); /*!< in: OS thread or thread id */
-/****************************************************************//**
-Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though!
-@return thread identifier as a number */
-UNIV_INTERN
-ulint
-os_thread_pf(
-/*=========*/
- os_thread_id_t a); /*!< in: OS thread identifier */
-/****************************************************************//**
-Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns a ulint.
-NOTE: We count the number of threads in os_thread_exit(). A created
-thread should always use that to exit and not use return() to exit.
-@return handle to the thread */
-UNIV_INTERN
-os_thread_t
-os_thread_create_func(
-/*==================*/
- os_thread_func_t func, /*!< in: pointer to function
- from which to start */
- void* arg, /*!< in: argument to start
- function */
- os_thread_id_t* thread_id); /*!< out: id of the created
- thread, or NULL */
-
-/** Waits until the specified thread completes and joins it.
-Its return value is ignored.
-@param[in,out] thread thread to join */
-UNIV_INTERN
-void
-os_thread_join(
- os_thread_t thread);
-
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
-void
-os_thread_exit(
-/*===========*/
- void* exit_value, /*!< in: exit value; in Windows this void*
- is cast as a DWORD */
- bool detach = true) /*!< in: if true, the thread will be detached
- right before exiting. If false, another thread
- is responsible for joining this thread. */
- UNIV_COLD MY_ATTRIBUTE((noreturn));
-/*****************************************************************//**
-Returns the thread identifier of current thread.
-@return current thread identifier */
-UNIV_INTERN
-os_thread_id_t
-os_thread_get_curr_id(void);
-/*========================*/
-/*****************************************************************//**
-Returns the system-specific thread identifier of current thread. On Linux,
-returns tid. On other systems currently returns os_thread_get_curr_id().
-
-@return current thread identifier */
-UNIV_INTERN
-os_tid_t
-os_thread_get_tid(void);
-/*=====================*/
-/*****************************************************************//**
-Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
-void
-os_thread_yield(void);
-/*=================*/
-/*****************************************************************//**
-The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
-void
-os_thread_sleep(
-/*============*/
- ulint tm); /*!< in: time in microseconds */
-/*****************************************************************//**
-Set relative scheduling priority for a given thread on Linux. Currently a
-no-op on other systems.
-
-@return An actual thread priority after the update */
-UNIV_INTERN
-ulint
-os_thread_set_priority(
-/*===================*/
- os_tid_t thread_id, /*!< in: thread id */
- ulint relative_priority); /*!< in: system-specific
- priority value */
-
-/*****************************************************************//**
-Get priority for a given thread on Linux. Currently a
-no-op on other systems.
-
-@return An actual thread priority */
-UNIV_INTERN
-ulint
-os_thread_get_priority(
-/*===================*/
- os_tid_t thread_id); /*!< in: thread id */
-
-#ifndef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/os0thread.ic b/storage/xtradb/include/os0thread.ic
deleted file mode 100644
index 0622d22f2dc..00000000000
--- a/storage/xtradb/include/os0thread.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/os0thread.ic
-The interface to the operating system
-process and thread control primitives
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/page0cur.h b/storage/xtradb/include/page0cur.h
deleted file mode 100644
index f04667ff29c..00000000000
--- a/storage/xtradb/include/page0cur.h
+++ /dev/null
@@ -1,387 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/page0cur.h
-The page cursor
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef page0cur_h
-#define page0cur_h
-
-#include "univ.i"
-
-#include "buf0types.h"
-#include "page0page.h"
-#include "rem0rec.h"
-#include "data0data.h"
-#include "mtr0mtr.h"
-
-
-#define PAGE_CUR_ADAPT
-
-/* Page cursor search modes; the values must be in this order! */
-
-#define PAGE_CUR_UNSUPP 0
-#define PAGE_CUR_G 1
-#define PAGE_CUR_GE 2
-#define PAGE_CUR_L 3
-#define PAGE_CUR_LE 4
-/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in
- "column LIKE 'abc%' ORDER BY column DESC";
- we have to find strings which are <= 'abc' or
- which extend it */
-#ifdef UNIV_SEARCH_DEBUG
-# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */
-#endif /* UNIV_SEARCH_DEBUG */
-
-#ifdef UNIV_DEBUG
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
- page_cur_t* cur); /*!< in: page cursor */
-/*********************************************************//**
-Gets pointer to the buffer block where the cursor is positioned.
-@return page */
-UNIV_INLINE
-buf_block_t*
-page_cur_get_block(
-/*===============*/
- page_cur_t* cur); /*!< in: page cursor */
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_zip_des_t*
-page_cur_get_page_zip(
-/*==================*/
- page_cur_t* cur); /*!< in: page cursor */
-/*********************************************************//**
-Gets the record where the cursor is positioned.
-@return record */
-UNIV_INLINE
-rec_t*
-page_cur_get_rec(
-/*=============*/
- page_cur_t* cur); /*!< in: page cursor */
-#else /* UNIV_DEBUG */
-# define page_cur_get_page(cur) page_align((cur)->rec)
-# define page_cur_get_block(cur) (cur)->block
-# define page_cur_get_page_zip(cur) buf_block_get_page_zip((cur)->block)
-# define page_cur_get_rec(cur) (cur)->rec
-#endif /* UNIV_DEBUG */
-/*********************************************************//**
-Sets the cursor object to point before the first user record
-on the page. */
-UNIV_INLINE
-void
-page_cur_set_before_first(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- page_cur_t* cur); /*!< in: cursor */
-/*********************************************************//**
-Sets the cursor object to point after the last user record on
-the page. */
-UNIV_INLINE
-void
-page_cur_set_after_last(
-/*====================*/
- const buf_block_t* block, /*!< in: index page */
- page_cur_t* cur); /*!< in: cursor */
-/*********************************************************//**
-Returns TRUE if the cursor is before first user record on page.
-@return TRUE if at start */
-UNIV_INLINE
-ibool
-page_cur_is_before_first(
-/*=====================*/
- const page_cur_t* cur); /*!< in: cursor */
-/*********************************************************//**
-Returns TRUE if the cursor is after last user record.
-@return TRUE if at end */
-UNIV_INLINE
-ibool
-page_cur_is_after_last(
-/*===================*/
- const page_cur_t* cur); /*!< in: cursor */
-/**********************************************************//**
-Positions the cursor on the given record. */
-UNIV_INLINE
-void
-page_cur_position(
-/*==============*/
- const rec_t* rec, /*!< in: record on a page */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- page_cur_t* cur); /*!< out: page cursor */
-/**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
- page_cur_t* cur); /*!< out: page cursor */
-/**********************************************************//**
-Moves the cursor to the next record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_next(
-/*==================*/
- page_cur_t* cur); /*!< in/out: cursor; must not be after last */
-/**********************************************************//**
-Moves the cursor to the previous record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_prev(
-/*==================*/
- page_cur_t* cur); /*!< in/out: cursor; not before first */
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same logical position, but the physical position may change if it is
-pointing to a compressed page that was reorganized.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INLINE
-rec_t*
-page_cur_tuple_insert(
-/*==================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const dtuple_t* tuple, /*!< in: pointer to a data tuple */
- dict_index_t* index, /*!< in: record descriptor */
- ulint** offsets,/*!< out: offsets on *rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
- MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same logical position, but the physical position may change if it is
-pointing to a compressed page that was reorganized.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INLINE
-rec_t*
-page_cur_rec_insert(
-/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const rec_t* rec, /*!< in: record to insert */
- dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle, or NULL */
-/***********************************************************//**
-Inserts a record next to page cursor on an uncompressed page.
-Returns pointer to inserted record if succeed, i.e., enough
-space available, NULL otherwise. The cursor stays at the same position.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
-rec_t*
-page_cur_insert_rec_low(
-/*====================*/
- rec_t* current_rec,/*!< in: pointer to current record after
- which the new record is inserted */
- dict_index_t* index, /*!< in: record descriptor */
- const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
- MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
-/***********************************************************//**
-Inserts a record next to page cursor on a compressed and uncompressed
-page. Returns pointer to inserted record if succeed, i.e.,
-enough space available, NULL otherwise.
-The cursor stays at the same position.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
-rec_t*
-page_cur_insert_rec_zip(
-/*====================*/
- page_cur_t* cursor, /*!< in/out: page cursor */
- dict_index_t* index, /*!< in: record descriptor */
- const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
- MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
-/*************************************************************//**
-Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
-void
-page_copy_rec_list_end_to_created_page(
-/*===================================*/
- page_t* new_page, /*!< in/out: index page to copy to */
- rec_t* rec, /*!< in: first record to copy */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************//**
-Deletes a record at the page cursor. The cursor is moved to the
-next record after the deleted one. */
-UNIV_INTERN
-void
-page_cur_delete_rec(
-/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(
- cursor->rec, index) */
- mtr_t* mtr); /*!< in: mini-transaction handle */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return number of matched fields on the left */
-UNIV_INLINE
-ulint
-page_cur_search(
-/*============*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- page_cur_t* cursor);/*!< out: page cursor */
-/****************************************************************//**
-Searches the right position for a page cursor. */
-UNIV_INTERN
-void
-page_cur_search_with_match(
-/*=======================*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- ulint* iup_matched_fields,
- /*!< in/out: already matched
- fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- ulint* ilow_matched_fields,
- /*!< in/out: already matched
- fields in lower limit record */
- ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor);/*!< out: page cursor */
-/***********************************************************//**
-Positions a page cursor on a randomly chosen user record on a page. If there
-are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
-void
-page_cur_open_on_rnd_user_rec(
-/*==========================*/
- buf_block_t* block, /*!< in: page */
- page_cur_t* cursor);/*!< out: page cursor */
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses a log record of a record insert on a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_cur_parse_insert_rec(
-/*======================*/
- ibool is_short,/*!< in: TRUE if short inserts */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/**********************************************************//**
-Parses a log record of copying a record list end to a new created page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_copy_rec_list_to_created_page(
-/*=====================================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses log record of a record delete on a page.
-@return pointer to record end or NULL */
-UNIV_INTERN
-byte*
-page_cur_parse_delete_rec(
-/*======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/*******************************************************//**
-Removes the record from a leaf page. This function does not log
-any changes. It is used by the IMPORT tablespace functions.
-@return true if success, i.e., the page did not become too empty */
-UNIV_INTERN
-bool
-page_delete_rec(
-/*============*/
- const dict_index_t* index, /*!< in: The index that the record
- belongs to */
- page_cur_t* pcur, /*!< in/out: page cursor on record
- to delete */
- page_zip_des_t* page_zip,/*!< in: compressed page descriptor */
- const ulint* offsets);/*!< in: offsets for record */
-
-/** Index page cursor */
-
-struct page_cur_t{
- byte* rec; /*!< pointer to a record on page */
- buf_block_t* block; /*!< pointer to the block containing rec */
-};
-
-#ifndef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/page0cur.ic b/storage/xtradb/include/page0cur.ic
deleted file mode 100644
index 6e068d9f739..00000000000
--- a/storage/xtradb/include/page0cur.ic
+++ /dev/null
@@ -1,328 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/page0cur.ic
-The page cursor
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "page0page.h"
-#include "buf0types.h"
-
-#ifdef UNIV_DEBUG
-# include "rem0cmp.h"
-
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_t*
-page_cur_get_page(
-/*==============*/
- page_cur_t* cur) /*!< in: page cursor */
-{
- ut_ad(cur);
-
- if (cur->rec) {
- ut_ad(page_align(cur->rec) == cur->block->frame);
- }
-
- return(page_align(cur->rec));
-}
-
-/*********************************************************//**
-Gets pointer to the buffer block where the cursor is positioned.
-@return page */
-UNIV_INLINE
-buf_block_t*
-page_cur_get_block(
-/*===============*/
- page_cur_t* cur) /*!< in: page cursor */
-{
- ut_ad(cur);
-
- if (cur->rec) {
- ut_ad(page_align(cur->rec) == cur->block->frame);
- }
-
- return(cur->block);
-}
-
-/*********************************************************//**
-Gets pointer to the page frame where the cursor is positioned.
-@return page */
-UNIV_INLINE
-page_zip_des_t*
-page_cur_get_page_zip(
-/*==================*/
- page_cur_t* cur) /*!< in: page cursor */
-{
- return(buf_block_get_page_zip(page_cur_get_block(cur)));
-}
-
-/*********************************************************//**
-Gets the record where the cursor is positioned.
-@return record */
-UNIV_INLINE
-rec_t*
-page_cur_get_rec(
-/*=============*/
- page_cur_t* cur) /*!< in: page cursor */
-{
- ut_ad(cur);
-
- if (cur->rec) {
- ut_ad(page_align(cur->rec) == cur->block->frame);
- }
-
- return(cur->rec);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************//**
-Sets the cursor object to point before the first user record
-on the page. */
-UNIV_INLINE
-void
-page_cur_set_before_first(
-/*======================*/
- const buf_block_t* block, /*!< in: index page */
- page_cur_t* cur) /*!< in: cursor */
-{
- cur->block = (buf_block_t*) block;
- cur->rec = page_get_infimum_rec(buf_block_get_frame(cur->block));
-}
-
-/*********************************************************//**
-Sets the cursor object to point after the last user record on
-the page. */
-UNIV_INLINE
-void
-page_cur_set_after_last(
-/*====================*/
- const buf_block_t* block, /*!< in: index page */
- page_cur_t* cur) /*!< in: cursor */
-{
- cur->block = (buf_block_t*) block;
- cur->rec = page_get_supremum_rec(buf_block_get_frame(cur->block));
-}
-
-/*********************************************************//**
-Returns TRUE if the cursor is before first user record on page.
-@return TRUE if at start */
-UNIV_INLINE
-ibool
-page_cur_is_before_first(
-/*=====================*/
- const page_cur_t* cur) /*!< in: cursor */
-{
- ut_ad(cur);
- ut_ad(page_align(cur->rec) == cur->block->frame);
- return(page_rec_is_infimum(cur->rec));
-}
-
-/*********************************************************//**
-Returns TRUE if the cursor is after last user record.
-@return TRUE if at end */
-UNIV_INLINE
-ibool
-page_cur_is_after_last(
-/*===================*/
- const page_cur_t* cur) /*!< in: cursor */
-{
- ut_ad(cur);
- ut_ad(page_align(cur->rec) == cur->block->frame);
- return(page_rec_is_supremum(cur->rec));
-}
-
-/**********************************************************//**
-Positions the cursor on the given record. */
-UNIV_INLINE
-void
-page_cur_position(
-/*==============*/
- const rec_t* rec, /*!< in: record on a page */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- page_cur_t* cur) /*!< out: page cursor */
-{
- ut_ad(rec && block && cur);
- ut_ad(page_align(rec) == block->frame);
-
- cur->rec = (rec_t*) rec;
- cur->block = (buf_block_t*) block;
-}
-
-/**********************************************************//**
-Invalidates a page cursor by setting the record pointer NULL. */
-UNIV_INLINE
-void
-page_cur_invalidate(
-/*================*/
- page_cur_t* cur) /*!< out: page cursor */
-{
- ut_ad(cur);
-
- cur->rec = NULL;
- cur->block = NULL;
-}
-
-/**********************************************************//**
-Moves the cursor to the next record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_next(
-/*==================*/
- page_cur_t* cur) /*!< in/out: cursor; must not be after last */
-{
- ut_ad(!page_cur_is_after_last(cur));
-
- cur->rec = page_rec_get_next(cur->rec);
-}
-
-/**********************************************************//**
-Moves the cursor to the previous record on page. */
-UNIV_INLINE
-void
-page_cur_move_to_prev(
-/*==================*/
- page_cur_t* cur) /*!< in/out: page cursor, not before first */
-{
- ut_ad(!page_cur_is_before_first(cur));
-
- cur->rec = page_rec_get_prev(cur->rec);
-}
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Searches the right position for a page cursor.
-@return number of matched fields on the left */
-UNIV_INLINE
-ulint
-page_cur_search(
-/*============*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- page_cur_t* cursor) /*!< out: page cursor */
-{
- ulint low_matched_fields = 0;
- ulint low_matched_bytes = 0;
- ulint up_matched_fields = 0;
- ulint up_matched_bytes = 0;
-
- ut_ad(dtuple_check_typed(tuple));
-
- page_cur_search_with_match(block, index, tuple, mode,
- &up_matched_fields,
- &up_matched_bytes,
- &low_matched_fields,
- &low_matched_bytes,
- cursor);
- return(low_matched_fields);
-}
-
-/***********************************************************//**
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same logical position, but the physical position may change if it is
-pointing to a compressed page that was reorganized.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INLINE
-rec_t*
-page_cur_tuple_insert(
-/*==================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const dtuple_t* tuple, /*!< in: pointer to a data tuple */
- dict_index_t* index, /*!< in: record descriptor */
- ulint** offsets,/*!< out: offsets on *rec */
- mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
- ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
-{
- ulint size
- = rec_get_converted_size(index, tuple, n_ext);
- rec_t* rec;
-
- if (!*heap) {
- *heap = mem_heap_create(size
- + (4 + REC_OFFS_HEADER_SIZE
- + dtuple_get_n_fields(tuple))
- * sizeof **offsets);
- }
-
- rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(*heap, size),
- index, tuple, n_ext);
- *offsets = rec_get_offsets(
- rec, index, *offsets, ULINT_UNDEFINED, heap);
-
- if (buf_block_get_page_zip(cursor->block)) {
- rec = page_cur_insert_rec_zip(
- cursor, index, rec, *offsets, mtr);
- } else {
- rec = page_cur_insert_rec_low(cursor->rec,
- index, rec, *offsets, mtr);
- }
-
- ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, *offsets));
- return(rec);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Inserts a record next to page cursor. Returns pointer to inserted record if
-succeed, i.e., enough space available, NULL otherwise. The cursor stays at
-the same logical position, but the physical position may change if it is
-pointing to a compressed page that was reorganized.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INLINE
-rec_t*
-page_cur_rec_insert(
-/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const rec_t* rec, /*!< in: record to insert */
- dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
-{
- if (buf_block_get_page_zip(cursor->block)) {
- return(page_cur_insert_rec_zip(
- cursor, index, rec, offsets, mtr));
- } else {
- return(page_cur_insert_rec_low(cursor->rec,
- index, rec, offsets, mtr));
- }
-}
diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h
deleted file mode 100644
index eefa0fa4c5b..00000000000
--- a/storage/xtradb/include/page0page.h
+++ /dev/null
@@ -1,1140 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/page0page.h
-Index page routines
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef page0page_h
-#define page0page_h
-
-#include "univ.i"
-
-#include "buf0types.h"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "page0types.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#include "data0data.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
-#include "fsp0fsp.h"
-#include "mtr0mtr.h"
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE
-#endif
-
-/* PAGE HEADER
- ===========
-
-Index page header starts at the first offset left free by the FIL-module */
-
-typedef byte page_header_t;
-
-#define PAGE_HEADER FSEG_PAGE_DATA /* index page header starts at this
- offset */
-/*-----------------------------*/
-#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */
-#define PAGE_HEAP_TOP 2 /* pointer to record heap top */
-#define PAGE_N_HEAP 4 /* number of records in the heap,
- bit 15=flag: new-style compact page format */
-#define PAGE_FREE 6 /* pointer to start of page free record list */
-#define PAGE_GARBAGE 8 /* number of bytes in deleted records */
-#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or
- NULL if this info has been reset by a delete,
- for example */
-#define PAGE_DIRECTION 12 /* last insert direction: PAGE_LEFT, ... */
-#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same
- direction */
-#define PAGE_N_RECS 16 /* number of user records on the page */
-#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified
- a record on the page; trx_id_t; defined only
- in secondary indexes and in the insert buffer
- tree */
-#define PAGE_HEADER_PRIV_END 26 /* end of private data structure of the page
- header which are set in a page create */
-/*----*/
-#define PAGE_LEVEL 26 /* level of the node in an index tree; the
- leaf level is the level 0. This field should
- not be written to after page creation. */
-#define PAGE_INDEX_ID 28 /* index id where the page belongs.
- This field should not be written to after
- page creation. */
-#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in
- a B-tree: defined only on the root page of a
- B-tree, but not in the root of an ibuf tree */
-#define PAGE_BTR_IBUF_FREE_LIST PAGE_BTR_SEG_LEAF
-#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF
- /* in the place of PAGE_BTR_SEG_LEAF and _TOP
- there is a free list base node if the page is
- the root page of an ibuf tree, and at the same
- place is the free list node if the page is in
- a free list */
-#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE)
- /* file segment header for the non-leaf pages
- in a B-tree: defined only on the root page of
- a B-tree, but not in the root of an ibuf
- tree */
-/*----*/
-#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
- /* start of data on the page */
-
-#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES)
- /* offset of the page infimum record on an
- old-style page */
-#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8)
- /* offset of the page supremum record on an
- old-style page */
-#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9)
- /* offset of the page supremum record end on
- an old-style page */
-#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES)
- /* offset of the page infimum record on a
- new-style compact page */
-#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8)
- /* offset of the page supremum record on a
- new-style compact page */
-#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8)
- /* offset of the page supremum record end on
- a new-style compact page */
-/*-----------------------------*/
-
-/* Heap numbers */
-#define PAGE_HEAP_NO_INFIMUM 0 /* page infimum */
-#define PAGE_HEAP_NO_SUPREMUM 1 /* page supremum */
-#define PAGE_HEAP_NO_USER_LOW 2 /* first user record in
- creation (insertion) order,
- not necessarily collation order;
- this record may have been deleted */
-
-/* Directions of cursor movement */
-#define PAGE_LEFT 1
-#define PAGE_RIGHT 2
-#define PAGE_SAME_REC 3
-#define PAGE_SAME_PAGE 4
-#define PAGE_NO_DIRECTION 5
-
-/* PAGE DIRECTORY
- ==============
-*/
-
-typedef byte page_dir_slot_t;
-typedef page_dir_slot_t page_dir_t;
-
-/* Offset of the directory start down from the page end. We call the
-slot with the highest file address directory start, as it points to
-the first record in the list of records. */
-#define PAGE_DIR FIL_PAGE_DATA_END
-
-/* We define a slot in the page directory as two bytes */
-#define PAGE_DIR_SLOT_SIZE 2
-
-/* The offset of the physically lower end of the directory, counted from
-page end, when the page is empty */
-#define PAGE_EMPTY_DIR_START (PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE)
-
-/* The maximum and minimum number of records owned by a directory slot. The
-number may drop below the minimum in the first and the last slot in the
-directory. */
-#define PAGE_DIR_SLOT_MAX_N_OWNED 8
-#define PAGE_DIR_SLOT_MIN_N_OWNED 4
-
-/************************************************************//**
-Gets the start of a page.
-@return start of the page */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
- const void* ptr) /*!< in: pointer to page frame */
- MY_ATTRIBUTE((const));
-/************************************************************//**
-Gets the offset within a page.
-@return offset from the start of the page */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
- const void* ptr) /*!< in: pointer to page frame */
- MY_ATTRIBUTE((const));
-/*************************************************************//**
-Returns the max trx id field value. */
-UNIV_INLINE
-trx_id_t
-page_get_max_trx_id(
-/*================*/
- const page_t* page); /*!< in: page */
-/*************************************************************//**
-Sets the max trx id field value. */
-UNIV_INTERN
-void
-page_set_max_trx_id(
-/*================*/
- buf_block_t* block, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr); /*!< in/out: mini-transaction, or NULL */
-/*************************************************************//**
-Sets the max trx id field value if trx_id is bigger than the previous
-value. */
-UNIV_INLINE
-void
-page_update_max_trx_id(
-/*===================*/
- buf_block_t* block, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/*************************************************************//**
-Reads the given header field. */
-UNIV_INLINE
-ulint
-page_header_get_field(
-/*==================*/
- const page_t* page, /*!< in: page */
- ulint field); /*!< in: PAGE_N_DIR_SLOTS, ... */
-/*************************************************************//**
-Sets the given header field. */
-UNIV_INLINE
-void
-page_header_set_field(
-/*==================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint field, /*!< in: PAGE_N_DIR_SLOTS, ... */
- ulint val); /*!< in: value */
-/*************************************************************//**
-Returns the offset stored in the given header field.
-@return offset from the start of the page, or 0 */
-UNIV_INLINE
-ulint
-page_header_get_offs(
-/*=================*/
- const page_t* page, /*!< in: page */
- ulint field); /*!< in: PAGE_FREE, ... */
-
-/*************************************************************//**
-Returns the pointer stored in the given header field, or NULL. */
-#define page_header_get_ptr(page, field) \
- (page_header_get_offs(page, field) \
- ? page + page_header_get_offs(page, field) : NULL)
-/*************************************************************//**
-Sets the pointer stored in the given header field. */
-UNIV_INLINE
-void
-page_header_set_ptr(
-/*================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint field, /*!< in/out: PAGE_FREE, ... */
- const byte* ptr); /*!< in: pointer or NULL*/
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Resets the last insert info field in the page header. Writes to mlog
-about this operation. */
-UNIV_INLINE
-void
-page_header_reset_last_insert(
-/*==========================*/
- page_t* page, /*!< in: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- mtr_t* mtr); /*!< in: mtr */
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************//**
-Gets the offset of the first record on the page.
-@return offset of the first record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_infimum_offset(
-/*====================*/
- const page_t* page); /*!< in: page which must have record(s) */
-/************************************************************//**
-Gets the offset of the last record on the page.
-@return offset of the last record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_supremum_offset(
-/*=====================*/
- const page_t* page); /*!< in: page which must have record(s) */
-#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page))
-#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page))
-
-/************************************************************//**
-Returns the nth record of the record list.
-This is the inverse function of page_rec_get_n_recs_before().
-@return nth record */
-UNIV_INTERN
-const rec_t*
-page_rec_get_nth_const(
-/*===================*/
- const page_t* page, /*!< in: page */
- ulint nth) /*!< in: nth record */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/************************************************************//**
-Returns the nth record of the record list.
-This is the inverse function of page_rec_get_n_recs_before().
-@return nth record */
-UNIV_INLINE
-rec_t*
-page_rec_get_nth(
-/*=============*/
- page_t* page, /*< in: page */
- ulint nth) /*!< in: nth record */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Returns the middle record of the records on the page. If there is an
-even number of records in the list, returns the first record of the
-upper half-list.
-@return middle record */
-UNIV_INLINE
-rec_t*
-page_get_middle_rec(
-/*================*/
- page_t* page) /*!< in: page */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record on a page; may also
- be page infimum or supremum, in which case
- matched-parameter values below are not
- affected */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns
- contains the value for current comparison */
- ulint* matched_bytes); /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns contains the
- value for current comparison */
-#endif /* !UNIV_HOTBACKUP */
-/*************************************************************//**
-Gets the page number.
-@return page number */
-UNIV_INLINE
-ulint
-page_get_page_no(
-/*=============*/
- const page_t* page); /*!< in: page */
-/*************************************************************//**
-Gets the tablespace identifier.
-@return space id */
-UNIV_INLINE
-ulint
-page_get_space_id(
-/*==============*/
- const page_t* page); /*!< in: page */
-/*************************************************************//**
-Gets the number of user records on page (the infimum and supremum records
-are not user records).
-@return number of user records */
-UNIV_INLINE
-ulint
-page_get_n_recs(
-/*============*/
- const page_t* page); /*!< in: index page */
-/***************************************************************//**
-Returns the number of records before the given record in chain.
-The number includes infimum and supremum records.
-This is the inverse function of page_rec_get_nth().
-@return number of records */
-UNIV_INTERN
-ulint
-page_rec_get_n_recs_before(
-/*=======================*/
- const rec_t* rec); /*!< in: the physical record */
-/*************************************************************//**
-Gets the number of records in the heap.
-@return number of user records */
-UNIV_INLINE
-ulint
-page_dir_get_n_heap(
-/*================*/
- const page_t* page); /*!< in: index page */
-/*************************************************************//**
-Sets the number of records in the heap. */
-UNIV_INLINE
-void
-page_dir_set_n_heap(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL.
- Note that the size of the dense page directory
- in the compressed page trailer is
- n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
- ulint n_heap);/*!< in: number of records */
-/*************************************************************//**
-Gets the number of dir slots in directory.
-@return number of slots */
-UNIV_INLINE
-ulint
-page_dir_get_n_slots(
-/*=================*/
- const page_t* page); /*!< in: index page */
-/*************************************************************//**
-Sets the number of dir slots in directory. */
-UNIV_INLINE
-void
-page_dir_set_n_slots(
-/*=================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint n_slots);/*!< in: number of slots */
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Gets pointer to nth directory slot.
-@return pointer to dir slot */
-UNIV_INLINE
-page_dir_slot_t*
-page_dir_get_nth_slot(
-/*==================*/
- const page_t* page, /*!< in: index page */
- ulint n); /*!< in: position */
-#else /* UNIV_DEBUG */
-# define page_dir_get_nth_slot(page, n) \
- ((page) + UNIV_PAGE_SIZE - PAGE_DIR \
- - (n + 1) * PAGE_DIR_SLOT_SIZE)
-#endif /* UNIV_DEBUG */
-/**************************************************************//**
-Used to check the consistency of a record on a page.
-@return TRUE if succeed */
-UNIV_INLINE
-ibool
-page_rec_check(
-/*===========*/
- const rec_t* rec); /*!< in: record */
-/***************************************************************//**
-Gets the record pointed to by a directory slot.
-@return pointer to record */
-UNIV_INLINE
-const rec_t*
-page_dir_slot_get_rec(
-/*==================*/
- const page_dir_slot_t* slot); /*!< in: directory slot */
-/***************************************************************//**
-This is used to set the record offset in a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_rec(
-/*==================*/
- page_dir_slot_t* slot, /*!< in: directory slot */
- rec_t* rec); /*!< in: record on the page */
-/***************************************************************//**
-Gets the number of records owned by a directory slot.
-@return number of records */
-UNIV_INLINE
-ulint
-page_dir_slot_get_n_owned(
-/*======================*/
- const page_dir_slot_t* slot); /*!< in: page directory slot */
-/***************************************************************//**
-This is used to set the owned records field of a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_n_owned(
-/*======================*/
- page_dir_slot_t*slot, /*!< in/out: directory slot */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n); /*!< in: number of records owned by the slot */
-/************************************************************//**
-Calculates the space reserved for directory slots of a given
-number of records. The exact value is a fraction number
-n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is
-rounded upwards to an integer. */
-UNIV_INLINE
-ulint
-page_dir_calc_reserved_space(
-/*=========================*/
- ulint n_recs); /*!< in: number of records */
-/***************************************************************//**
-Looks for the directory slot which owns the given record.
-@return the directory slot number */
-UNIV_INTERN
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
- const rec_t* rec); /*!< in: the physical record */
-/************************************************************//**
-Determine whether the page is in new-style compact format.
-@return nonzero if the page is in compact format, zero if it is in
-old-style format */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
- const page_t* page); /*!< in: index page */
-/************************************************************//**
-TRUE if the record is on a page in compact format.
-@return nonzero if in compact format */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
- const rec_t* rec); /*!< in: record */
-/***************************************************************//**
-Returns the heap number of a record.
-@return heap number */
-UNIV_INLINE
-ulint
-page_rec_get_heap_no(
-/*=================*/
- const rec_t* rec); /*!< in: the physical record */
-/************************************************************//**
-Determine whether the page is a B-tree leaf.
-@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
-UNIV_INLINE
-bool
-page_is_leaf(
-/*=========*/
- const page_t* page) /*!< in: page */
- MY_ATTRIBUTE((warn_unused_result));
-/************************************************************//**
-Determine whether the page is empty.
-@return true if the page is empty (PAGE_N_RECS = 0) */
-UNIV_INLINE
-bool
-page_is_empty(
-/*==========*/
- const page_t* page) /*!< in: page */
- MY_ATTRIBUTE((nonnull, pure));
-/************************************************************//**
-Determine whether the page contains garbage.
-@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
-UNIV_INLINE
-bool
-page_has_garbage(
-/*=============*/
- const page_t* page) /*!< in: page */
- MY_ATTRIBUTE((nonnull, pure));
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_low(
-/*==================*/
- const rec_t* rec, /*!< in: pointer to record */
- ulint comp); /*!< in: nonzero=compact page layout */
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
- const rec_t* rec); /*!< in: pointer to record */
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_const(
-/*====================*/
- const rec_t* rec); /*!< in: pointer to record */
-/************************************************************//**
-Gets the pointer to the next non delete-marked record on the page.
-If all subsequent records are delete-marked, then this function
-will return the supremum record.
-@return pointer to next non delete-marked record or pointer to supremum */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_non_del_marked(
-/*=============================*/
- const rec_t* rec); /*!< in: pointer to record */
-/************************************************************//**
-Sets the pointer to the next record on the page. */
-UNIV_INLINE
-void
-page_rec_set_next(
-/*==============*/
- rec_t* rec, /*!< in: pointer to record,
- must not be page supremum */
- const rec_t* next); /*!< in: pointer to next record,
- must not be page infimum */
-/************************************************************//**
-Gets the pointer to the previous record.
-@return pointer to previous record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_prev_const(
-/*====================*/
- const rec_t* rec); /*!< in: pointer to record, must not be page
- infimum */
-/************************************************************//**
-Gets the pointer to the previous record.
-@return pointer to previous record */
-UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
- rec_t* rec); /*!< in: pointer to record,
- must not be page infimum */
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
- MY_ATTRIBUTE((const));
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
- MY_ATTRIBUTE((const));
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
- ulint offset) /*!< in: record offset on page */
- MY_ATTRIBUTE((const));
-
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
- const rec_t* rec) /*!< in: record */
- MY_ATTRIBUTE((const));
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
- const rec_t* rec) /*!< in: record */
- MY_ATTRIBUTE((const));
-
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
- const rec_t* rec) /*!< in: record */
- MY_ATTRIBUTE((const));
-/***************************************************************//**
-Looks for the record which owns the given record.
-@return the owner record */
-UNIV_INLINE
-rec_t*
-page_rec_find_owner_rec(
-/*====================*/
- rec_t* rec); /*!< in: the physical record */
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Write a 32-bit field in a data dictionary record. */
-UNIV_INLINE
-void
-page_rec_write_field(
-/*=================*/
- rec_t* rec, /*!< in/out: record to update */
- ulint i, /*!< in: index of the field to update */
- ulint val, /*!< in: value to write */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/************************************************************//**
-Returns the maximum combined size of records which can be inserted on top
-of record heap.
-@return maximum combined size for inserted records */
-UNIV_INLINE
-ulint
-page_get_max_insert_size(
-/*=====================*/
- const page_t* page, /*!< in: index page */
- ulint n_recs);/*!< in: number of records */
-/************************************************************//**
-Returns the maximum combined size of records which can be inserted on top
-of record heap if page is first reorganized.
-@return maximum combined size for inserted records */
-UNIV_INLINE
-ulint
-page_get_max_insert_size_after_reorganize(
-/*======================================*/
- const page_t* page, /*!< in: index page */
- ulint n_recs);/*!< in: number of records */
-/*************************************************************//**
-Calculates free space if a page is emptied.
-@return free space */
-UNIV_INLINE
-ulint
-page_get_free_space_of_empty(
-/*=========================*/
- ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((const));
-/**********************************************************//**
-Returns the base extra size of a physical record. This is the
-size of the fixed header, independent of the record size.
-@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
-UNIV_INLINE
-ulint
-page_rec_get_base_extra_size(
-/*=========================*/
- const rec_t* rec); /*!< in: physical record */
-/************************************************************//**
-Returns the sum of the sizes of the records in the record list
-excluding the infimum and supremum records.
-@return data in bytes */
-UNIV_INLINE
-ulint
-page_get_data_size(
-/*===============*/
- const page_t* page); /*!< in: index page */
-/************************************************************//**
-Allocates a block of memory from the head of the free list
-of an index page. */
-UNIV_INLINE
-void
-page_mem_alloc_free(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
- space available for inserting the record,
- or NULL */
- rec_t* next_rec,/*!< in: pointer to the new head of the
- free record list */
- ulint need); /*!< in: number of bytes allocated */
-/************************************************************//**
-Allocates a block of memory from the heap of an index page.
-@return pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
-byte*
-page_mem_alloc_heap(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
- space available for inserting the record,
- or NULL */
- ulint need, /*!< in: total number of bytes needed */
- ulint* heap_no);/*!< out: this contains the heap number
- of the allocated record
- if allocation succeeds */
-/************************************************************//**
-Puts a record to free list. */
-UNIV_INLINE
-void
-page_mem_free(
-/*==========*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page,
- or NULL */
- rec_t* rec, /*!< in: pointer to the (origin of)
- record */
- const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets);/*!< in: array returned by
- rec_get_offsets() */
-/**********************************************************//**
-Create an uncompressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
-page_t*
-page_create(
-/*========*/
- buf_block_t* block, /*!< in: a buffer block where the
- page is created */
- mtr_t* mtr, /*!< in: mini-transaction handle */
- ulint comp); /*!< in: nonzero=compact page format */
-/**********************************************************//**
-Create a compressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
-page_t*
-page_create_zip(
-/*============*/
- buf_block_t* block, /*!< in/out: a buffer frame where the
- page is created */
- dict_index_t* index, /*!< in: the index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************//**
-Empty a previously created B-tree index page. */
-UNIV_INTERN
-void
-page_create_empty(
-/*==============*/
- buf_block_t* block, /*!< in/out: B-tree block */
- dict_index_t* index, /*!< in: the index of the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
- MY_ATTRIBUTE((nonnull(1,2)));
-/*************************************************************//**
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
-void
-page_copy_rec_list_end_no_locks(
-/*============================*/
- buf_block_t* new_block, /*!< in: index page to copy to */
- buf_block_t* block, /*!< in: index page of rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Copies records from page to new_page, from the given record onward,
-including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to the original successor of the infimum record on
-new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
-rec_t*
-page_copy_rec_list_end(
-/*===================*/
- buf_block_t* new_block, /*!< in/out: index page to copy to */
- buf_block_t* block, /*!< in: index page containing rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Copies records from page to new_page, up to the given record, NOT
-including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to the original predecessor of the supremum record on
-new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
-rec_t*
-page_copy_rec_list_start(
-/*=====================*/
- buf_block_t* new_block, /*!< in/out: index page to copy to */
- buf_block_t* block, /*!< in: index page containing rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Deletes records from a page from a given record onward, including that record.
-The infimum and supremum records are not deleted. */
-UNIV_INTERN
-void
-page_delete_rec_list_end(
-/*=====================*/
- rec_t* rec, /*!< in: pointer to record on page */
- buf_block_t* block, /*!< in: buffer block of the page */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n_recs, /*!< in: number of records to delete,
- or ULINT_UNDEFINED if not known */
- ulint size, /*!< in: the sum of the sizes of the
- records in the end of the chain to
- delete, or ULINT_UNDEFINED if not known */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Deletes records from page, up to the given record, NOT including
-that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
-void
-page_delete_rec_list_start(
-/*=======================*/
- rec_t* rec, /*!< in: record on page */
- buf_block_t* block, /*!< in: buffer block of the page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Moves record list end to another page. Moved records include
-split_rec.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return TRUE on success; FALSE on compression failure (new_block will
-be decompressed) */
-UNIV_INTERN
-ibool
-page_move_rec_list_end(
-/*===================*/
- buf_block_t* new_block, /*!< in/out: index page where to move */
- buf_block_t* block, /*!< in: index page from where to move */
- rec_t* split_rec, /*!< in: first record to move */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr */
-/*************************************************************//**
-Moves record list start to another page. Moved records do not include
-split_rec.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return TRUE on success; FALSE on compression failure */
-UNIV_INTERN
-ibool
-page_move_rec_list_start(
-/*=====================*/
- buf_block_t* new_block, /*!< in/out: index page where to move */
- buf_block_t* block, /*!< in/out: page containing split_rec */
- rec_t* split_rec, /*!< in: first record not to move */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull(1, 2, 4, 5)));
-/****************************************************************//**
-Splits a directory slot which owns too many records. */
-UNIV_INTERN
-void
-page_dir_split_slot(
-/*================*/
- page_t* page, /*!< in: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be written, or NULL */
- ulint slot_no);/*!< in: the directory slot */
-/*************************************************************//**
-Tries to balance the given directory slot with too few records
-with the upper neighbor, so that there are at least the minimum number
-of records owned by the slot; this may result in the merging of
-two slots. */
-UNIV_INTERN
-void
-page_dir_balance_slot(
-/*==================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint slot_no);/*!< in: the directory slot */
-/**********************************************************//**
-Parses a log record of a record list end or start deletion.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_delete_rec_list(
-/*=======================*/
- byte type, /*!< in: MLOG_LIST_END_DELETE,
- MLOG_LIST_START_DELETE,
- MLOG_COMP_LIST_END_DELETE or
- MLOG_COMP_LIST_START_DELETE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in/out: buffer block or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_create(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Prints record contents including the data relevant only in
-the index page context. */
-UNIV_INTERN
-void
-page_rec_print(
-/*===========*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: record descriptor */
-# ifdef UNIV_BTR_PRINT
-/***************************************************************//**
-This is used to print the contents of the directory for
-debugging purposes. */
-UNIV_INTERN
-void
-page_dir_print(
-/*===========*/
- page_t* page, /*!< in: index page */
- ulint pr_n); /*!< in: print n first and n last entries */
-/***************************************************************//**
-This is used to print the contents of the page record list for
-debugging purposes. */
-UNIV_INTERN
-void
-page_print_list(
-/*============*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index, /*!< in: dictionary index of the page */
- ulint pr_n); /*!< in: print n first and n last entries */
-/***************************************************************//**
-Prints the info in a page header. */
-UNIV_INTERN
-void
-page_header_print(
-/*==============*/
- const page_t* page); /*!< in: index page */
-/***************************************************************//**
-This is used to print the contents of the page for
-debugging purposes. */
-UNIV_INTERN
-void
-page_print(
-/*=======*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index, /*!< in: dictionary index of the page */
- ulint dn, /*!< in: print dn first and last entries
- in directory */
- ulint rn); /*!< in: print rn first and last records
- in directory */
-# endif /* UNIV_BTR_PRINT */
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-The following is used to validate a record on a page. This function
-differs from rec_validate as it can also check the n_owned field and
-the heap_no field.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_rec_validate(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***************************************************************//**
-Checks that the first directory slot points to the infimum record and
-the last to the supremum. This function is intended to track if the
-bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
-void
-page_check_dir(
-/*===========*/
- const page_t* page); /*!< in: index page */
-/***************************************************************//**
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_simple_validate_old(
-/*=====================*/
- const page_t* page); /*!< in: index page in ROW_FORMAT=REDUNDANT */
-/***************************************************************//**
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_simple_validate_new(
-/*=====================*/
- const page_t* page); /*!< in: index page in ROW_FORMAT!=REDUNDANT */
-/***************************************************************//**
-This function checks the consistency of an index page.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_validate(
-/*==========*/
- const page_t* page, /*!< in: index page */
- dict_index_t* index); /*!< in: data dictionary index containing
- the page record type definition */
-/***************************************************************//**
-Looks in the page record list for a record with the given heap number.
-@return record, NULL if not found */
-
-const rec_t*
-page_find_rec_with_heap_no(
-/*=======================*/
- const page_t* page, /*!< in: index page */
- ulint heap_no);/*!< in: heap number */
-/** Get the last non-delete-marked record on a page.
-@param[in] page index tree leaf page
-@return the last record, not delete-marked
-@retval infimum record if all records are delete-marked */
-
-const rec_t*
-page_find_rec_max_not_deleted(
- const page_t* page);
-
-#endif /* #ifndef UNIV_INNOCHECKSUM */
-
-/** Issue a warning when the checksum that is stored in the page is valid,
-but different than the global setting innodb_checksum_algorithm.
-@param[in] current_algo current checksum algorithm
-@param[in] page_checksum page valid checksum
-@param[in] space_id tablespace id
-@param[in] page_no page number */
-void
-page_warn_strict_checksum(
- srv_checksum_algorithm_t curr_algo,
- srv_checksum_algorithm_t page_checksum,
- ulint space_id,
- ulint page_no);
-
-#ifndef UNIV_INNOCHECKSUM
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
-
-#ifndef UNIV_NONINL
-#include "page0page.ic"
-#endif
-
-#endif /* #ifndef UNIV_INNOCHECKSUM */
-
-#endif
diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic
deleted file mode 100644
index 364536b86f8..00000000000
--- a/storage/xtradb/include/page0page.ic
+++ /dev/null
@@ -1,1184 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/page0page.ic
-Index page routines
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#include "mach0data.h"
-#ifdef UNIV_DEBUG
-# include "log0recv.h"
-#endif /* !UNIV_DEBUG */
-#ifndef UNIV_HOTBACKUP
-# include "rem0cmp.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "mtr0log.h"
-#include "page0zip.h"
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE
-#endif
-
-extern my_bool srv_immediate_scrub_data_uncompressed;
-
-/************************************************************//**
-Gets the start of a page.
-@return start of the page */
-UNIV_INLINE
-page_t*
-page_align(
-/*=======*/
- const void* ptr) /*!< in: pointer to page frame */
-{
- return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE));
-}
-/************************************************************//**
-Gets the offset within a page.
-@return offset from the start of the page */
-UNIV_INLINE
-ulint
-page_offset(
-/*========*/
- const void* ptr) /*!< in: pointer to page frame */
-{
- return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
-}
-/*************************************************************//**
-Returns the max trx id field value. */
-UNIV_INLINE
-trx_id_t
-page_get_max_trx_id(
-/*================*/
- const page_t* page) /*!< in: page */
-{
- ut_ad(page);
-
- return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID));
-}
-
-/*************************************************************//**
-Sets the max trx id field value if trx_id is bigger than the previous
-value. */
-UNIV_INLINE
-void
-page_update_max_trx_id(
-/*===================*/
- buf_block_t* block, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(block);
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- /* During crash recovery, this function may be called on
- something else than a leaf page of a secondary index or the
- insert buffer index tree (dict_index_is_sec_or_ibuf() returns
- TRUE for the dummy indexes constructed during redo log
- application). In that case, PAGE_MAX_TRX_ID is unused,
- and trx_id is usually zero. */
- ut_ad(trx_id || recv_recovery_is_on());
- ut_ad(page_is_leaf(buf_block_get_frame(block)));
-
- if (page_get_max_trx_id(buf_block_get_frame(block)) < trx_id) {
-
- page_set_max_trx_id(block, page_zip, trx_id, mtr);
- }
-}
-
-/*************************************************************//**
-Reads the given header field. */
-UNIV_INLINE
-ulint
-page_header_get_field(
-/*==================*/
- const page_t* page, /*!< in: page */
- ulint field) /*!< in: PAGE_LEVEL, ... */
-{
- ut_ad(page);
- ut_ad(field <= PAGE_INDEX_ID);
-
- return(mach_read_from_2(page + PAGE_HEADER + field));
-}
-
-/*************************************************************//**
-Sets the given header field. */
-UNIV_INLINE
-void
-page_header_set_field(
-/*==================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint field, /*!< in: PAGE_N_DIR_SLOTS, ... */
- ulint val) /*!< in: value */
-{
- ut_ad(page);
- ut_ad(field <= PAGE_N_RECS);
- ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
- ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
-
- mach_write_to_2(page + PAGE_HEADER + field, val);
- if (page_zip) {
- page_zip_write_header(page_zip,
- page + PAGE_HEADER + field, 2, NULL);
- }
-}
-
-/*************************************************************//**
-Returns the offset stored in the given header field.
-@return offset from the start of the page, or 0 */
-UNIV_INLINE
-ulint
-page_header_get_offs(
-/*=================*/
- const page_t* page, /*!< in: page */
- ulint field) /*!< in: PAGE_FREE, ... */
-{
- ulint offs;
-
- ut_ad((field == PAGE_FREE)
- || (field == PAGE_LAST_INSERT)
- || (field == PAGE_HEAP_TOP));
-
- offs = page_header_get_field(page, field);
-
- ut_ad((field != PAGE_HEAP_TOP) || offs);
-
- return(offs);
-}
-
-/*************************************************************//**
-Sets the pointer stored in the given header field. */
-UNIV_INLINE
-void
-page_header_set_ptr(
-/*================*/
- page_t* page, /*!< in: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint field, /*!< in: PAGE_FREE, ... */
- const byte* ptr) /*!< in: pointer or NULL*/
-{
- ulint offs;
-
- ut_ad(page);
- ut_ad((field == PAGE_FREE)
- || (field == PAGE_LAST_INSERT)
- || (field == PAGE_HEAP_TOP));
-
- if (ptr == NULL) {
- offs = 0;
- } else {
- offs = ptr - page;
- }
-
- ut_ad((field != PAGE_HEAP_TOP) || offs);
-
- page_header_set_field(page, page_zip, field, offs);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Resets the last insert info field in the page header. Writes to mlog
-about this operation. */
-UNIV_INLINE
-void
-page_header_reset_last_insert(
-/*==========================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(page && mtr);
-
- if (page_zip) {
- mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0);
- page_zip_write_header(page_zip,
- page + (PAGE_HEADER + PAGE_LAST_INSERT),
- 2, mtr);
- } else {
- mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0,
- MLOG_2BYTES, mtr);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/************************************************************//**
-Determine whether the page is in new-style compact format.
-@return nonzero if the page is in compact format, zero if it is in
-old-style format */
-UNIV_INLINE
-ulint
-page_is_comp(
-/*=========*/
- const page_t* page) /*!< in: index page */
-{
- return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000);
-}
-
-/************************************************************//**
-TRUE if the record is on a page in compact format.
-@return nonzero if in compact format */
-UNIV_INLINE
-ulint
-page_rec_is_comp(
-/*=============*/
- const rec_t* rec) /*!< in: record */
-{
- return(page_is_comp(page_align(rec)));
-}
-
-/***************************************************************//**
-Returns the heap number of a record.
-@return heap number */
-UNIV_INLINE
-ulint
-page_rec_get_heap_no(
-/*=================*/
- const rec_t* rec) /*!< in: the physical record */
-{
- if (page_rec_is_comp(rec)) {
- return(rec_get_heap_no_new(rec));
- } else {
- return(rec_get_heap_no_old(rec));
- }
-}
-
-/************************************************************//**
-Determine whether the page is a B-tree leaf.
-@return true if the page is a B-tree leaf (PAGE_LEVEL = 0) */
-UNIV_INLINE
-bool
-page_is_leaf(
-/*=========*/
- const page_t* page) /*!< in: page */
-{
- return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL)));
-}
-
-/************************************************************//**
-Determine whether the page is empty.
-@return true if the page is empty (PAGE_N_RECS = 0) */
-UNIV_INLINE
-bool
-page_is_empty(
-/*==========*/
- const page_t* page) /*!< in: page */
-{
- return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_N_RECS)));
-}
-
-/************************************************************//**
-Determine whether the page contains garbage.
-@return true if the page contains garbage (PAGE_GARBAGE is not 0) */
-UNIV_INLINE
-bool
-page_has_garbage(
-/*=============*/
- const page_t* page) /*!< in: page */
-{
- return(!!*(const uint16*) (page + (PAGE_HEADER + PAGE_GARBAGE)));
-}
-
-/************************************************************//**
-Gets the offset of the first record on the page.
-@return offset of the first record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_infimum_offset(
-/*====================*/
- const page_t* page) /*!< in: page which must have record(s) */
-{
- ut_ad(page);
- ut_ad(!page_offset(page));
-
- if (page_is_comp(page)) {
- return(PAGE_NEW_INFIMUM);
- } else {
- return(PAGE_OLD_INFIMUM);
- }
-}
-
-/************************************************************//**
-Gets the offset of the last record on the page.
-@return offset of the last record in record list, relative from page */
-UNIV_INLINE
-ulint
-page_get_supremum_offset(
-/*=====================*/
- const page_t* page) /*!< in: page which must have record(s) */
-{
- ut_ad(page);
- ut_ad(!page_offset(page));
-
- if (page_is_comp(page)) {
- return(PAGE_NEW_SUPREMUM);
- } else {
- return(PAGE_OLD_SUPREMUM);
- }
-}
-
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
-#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM
-# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM"
-#endif
-#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM
-# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM"
-#endif
-#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM
-# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM"
-#endif
-#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END
-# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END"
-#endif
-#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END
-# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END"
-#endif
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(offset != PAGE_NEW_SUPREMUM
- && offset != PAGE_NEW_INFIMUM
- && offset != PAGE_OLD_INFIMUM
- && offset != PAGE_OLD_SUPREMUM);
-}
-
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum_low(
-/*=====================*/
- ulint offset) /*!< in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(offset == PAGE_NEW_SUPREMUM
- || offset == PAGE_OLD_SUPREMUM);
-}
-
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum_low(
-/*====================*/
- ulint offset) /*!< in: record offset on page */
-{
- ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
-
- return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM);
-}
-
-/************************************************************//**
-TRUE if the record is a user record on the page.
-@return TRUE if a user record */
-UNIV_INLINE
-ibool
-page_rec_is_user_rec(
-/*=================*/
- const rec_t* rec) /*!< in: record */
-{
- ut_ad(page_rec_check(rec));
-
- return(page_rec_is_user_rec_low(page_offset(rec)));
-}
-
-/************************************************************//**
-TRUE if the record is the supremum record on a page.
-@return TRUE if the supremum record */
-UNIV_INLINE
-ibool
-page_rec_is_supremum(
-/*=================*/
- const rec_t* rec) /*!< in: record */
-{
- ut_ad(page_rec_check(rec));
-
- return(page_rec_is_supremum_low(page_offset(rec)));
-}
-
-/************************************************************//**
-TRUE if the record is the infimum record on a page.
-@return TRUE if the infimum record */
-UNIV_INLINE
-ibool
-page_rec_is_infimum(
-/*================*/
- const rec_t* rec) /*!< in: record */
-{
- ut_ad(page_rec_check(rec));
-
- return(page_rec_is_infimum_low(page_offset(rec)));
-}
-
-/************************************************************//**
-Returns the nth record of the record list.
-This is the inverse function of page_rec_get_n_recs_before().
-@return nth record */
-UNIV_INLINE
-rec_t*
-page_rec_get_nth(
-/*=============*/
- page_t* page, /*!< in: page */
- ulint nth) /*!< in: nth record */
-{
- return((rec_t*) page_rec_get_nth_const(page, nth));
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Returns the middle record of the records on the page. If there is an
-even number of records in the list, returns the first record of the
-upper half-list.
-@return middle record */
-UNIV_INLINE
-rec_t*
-page_get_middle_rec(
-/*================*/
- page_t* page) /*!< in: page */
-{
- ulint middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
-
- return(page_rec_get_nth(page, middle));
-}
-
-/*************************************************************//**
-Compares a data tuple to a physical record. Differs from the function
-cmp_dtuple_rec_with_match in the way that the record must reside on an
-index page, and also page infimum and supremum records can be given in
-the parameter rec. These are considered as the negative infinity and
-the positive infinity in the alphabetical order.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-UNIV_INLINE
-int
-page_cmp_dtuple_rec_with_match(
-/*===========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record on a page; may also
- be page infimum or supremum, in which case
- matched-parameter values below are not
- affected */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns
- contains the value for current comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns contains the
- value for current comparison */
-{
- ulint rec_offset;
-
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec));
-
- rec_offset = page_offset(rec);
-
- if (rec_offset == PAGE_NEW_INFIMUM
- || rec_offset == PAGE_OLD_INFIMUM) {
-
- return(1);
-
- } else if (rec_offset == PAGE_NEW_SUPREMUM
- || rec_offset == PAGE_OLD_SUPREMUM) {
-
- return(-1);
- }
-
- return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- matched_fields,
- matched_bytes));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Gets the page number.
-@return page number */
-UNIV_INLINE
-ulint
-page_get_page_no(
-/*=============*/
- const page_t* page) /*!< in: page */
-{
- ut_ad(page == page_align((page_t*) page));
- return(mach_read_from_4(page + FIL_PAGE_OFFSET));
-}
-
-/*************************************************************//**
-Gets the tablespace identifier.
-@return space id */
-UNIV_INLINE
-ulint
-page_get_space_id(
-/*==============*/
- const page_t* page) /*!< in: page */
-{
- ut_ad(page == page_align((page_t*) page));
- return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
-}
-
-/*************************************************************//**
-Gets the number of user records on page (infimum and supremum records
-are not user records).
-@return number of user records */
-UNIV_INLINE
-ulint
-page_get_n_recs(
-/*============*/
- const page_t* page) /*!< in: index page */
-{
- return(page_header_get_field(page, PAGE_N_RECS));
-}
-
-/*************************************************************//**
-Gets the number of dir slots in directory.
-@return number of slots */
-UNIV_INLINE
-ulint
-page_dir_get_n_slots(
-/*=================*/
- const page_t* page) /*!< in: index page */
-{
- return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
-}
-/*************************************************************//**
-Sets the number of dir slots in directory. */
-UNIV_INLINE
-void
-page_dir_set_n_slots(
-/*=================*/
- page_t* page, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- ulint n_slots)/*!< in: number of slots */
-{
- page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots);
-}
-
-/*************************************************************//**
-Gets the number of records in the heap.
-@return number of user records */
-UNIV_INLINE
-ulint
-page_dir_get_n_heap(
-/*================*/
- const page_t* page) /*!< in: index page */
-{
- return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
-}
-
-/*************************************************************//**
-Sets the number of records in the heap. */
-UNIV_INLINE
-void
-page_dir_set_n_heap(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL.
- Note that the size of the dense page directory
- in the compressed page trailer is
- n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */
- ulint n_heap) /*!< in: number of records */
-{
- ut_ad(n_heap < 0x8000);
- ut_ad(!page_zip || n_heap
- == (page_header_get_field(page, PAGE_N_HEAP) & 0x7fff) + 1);
-
- page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap
- | (0x8000
- & page_header_get_field(page, PAGE_N_HEAP)));
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Gets pointer to nth directory slot.
-@return pointer to dir slot */
-UNIV_INLINE
-page_dir_slot_t*
-page_dir_get_nth_slot(
-/*==================*/
- const page_t* page, /*!< in: index page */
- ulint n) /*!< in: position */
-{
- ut_ad(page_dir_get_n_slots(page) > n);
-
- return((page_dir_slot_t*)
- page + UNIV_PAGE_SIZE - PAGE_DIR
- - (n + 1) * PAGE_DIR_SLOT_SIZE);
-}
-#endif /* UNIV_DEBUG */
-
-/**************************************************************//**
-Used to check the consistency of a record on a page.
-@return TRUE if succeed */
-UNIV_INLINE
-ibool
-page_rec_check(
-/*===========*/
- const rec_t* rec) /*!< in: record */
-{
- const page_t* page = page_align(rec);
-
- ut_a(rec);
-
- ut_a(page_offset(rec) <= page_header_get_field(page, PAGE_HEAP_TOP));
- ut_a(page_offset(rec) >= PAGE_DATA);
-
- return(TRUE);
-}
-
-/***************************************************************//**
-Gets the record pointed to by a directory slot.
-@return pointer to record */
-UNIV_INLINE
-const rec_t*
-page_dir_slot_get_rec(
-/*==================*/
- const page_dir_slot_t* slot) /*!< in: directory slot */
-{
- return(page_align(slot) + mach_read_from_2(slot));
-}
-
-/***************************************************************//**
-This is used to set the record offset in a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_rec(
-/*==================*/
- page_dir_slot_t* slot, /*!< in: directory slot */
- rec_t* rec) /*!< in: record on the page */
-{
- ut_ad(page_rec_check(rec));
-
- mach_write_to_2(slot, page_offset(rec));
-}
-
-/***************************************************************//**
-Gets the number of records owned by a directory slot.
-@return number of records */
-UNIV_INLINE
-ulint
-page_dir_slot_get_n_owned(
-/*======================*/
- const page_dir_slot_t* slot) /*!< in: page directory slot */
-{
- const rec_t* rec = page_dir_slot_get_rec(slot);
- if (page_rec_is_comp(slot)) {
- return(rec_get_n_owned_new(rec));
- } else {
- return(rec_get_n_owned_old(rec));
- }
-}
-
-/***************************************************************//**
-This is used to set the owned records field of a directory slot. */
-UNIV_INLINE
-void
-page_dir_slot_set_n_owned(
-/*======================*/
- page_dir_slot_t*slot, /*!< in/out: directory slot */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n) /*!< in: number of records owned by the slot */
-{
- rec_t* rec = (rec_t*) page_dir_slot_get_rec(slot);
- if (page_rec_is_comp(slot)) {
- rec_set_n_owned_new(rec, page_zip, n);
- } else {
- ut_ad(!page_zip);
- rec_set_n_owned_old(rec, n);
- }
-}
-
-/************************************************************//**
-Calculates the space reserved for directory slots of a given number of
-records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE /
-PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */
-UNIV_INLINE
-ulint
-page_dir_calc_reserved_space(
-/*=========================*/
- ulint n_recs) /*!< in: number of records */
-{
- return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1)
- / PAGE_DIR_SLOT_MIN_N_OWNED);
-}
-
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_low(
-/*==================*/
- const rec_t* rec, /*!< in: pointer to record */
- ulint comp) /*!< in: nonzero=compact page layout */
-{
- ulint offs;
- const page_t* page;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
-
- offs = rec_get_next_offs(rec, comp);
-
- if (offs >= UNIV_PAGE_SIZE) {
- fprintf(stderr,
- "InnoDB: Next record offset is nonsensical %lu"
- " in record at offset %lu\n"
- "InnoDB: rec address %p, space id %lu, page %lu\n",
- (ulong) offs, (ulong) page_offset(rec),
- (void*) rec,
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page));
- buf_page_print(page, 0, 0);
-
- ut_error;
- } else if (offs == 0) {
-
- return(NULL);
- }
-
- return(page + offs);
-}
-
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-rec_t*
-page_rec_get_next(
-/*==============*/
- const rec_t* rec) /*!< in: pointer to record */
-{
- return((rec_t*) page_rec_get_next_low(rec, page_rec_is_comp(rec)));
-}
-
-/************************************************************//**
-Gets the pointer to the next record on the page.
-@return pointer to next record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_const(
-/*====================*/
- const rec_t* rec) /*!< in: pointer to record */
-{
- return(page_rec_get_next_low(rec, page_rec_is_comp(rec)));
-}
-
-/************************************************************//**
-Gets the pointer to the next non delete-marked record on the page.
-If all subsequent records are delete-marked, then this function
-will return the supremum record.
-@return pointer to next non delete-marked record or pointer to supremum */
-UNIV_INLINE
-const rec_t*
-page_rec_get_next_non_del_marked(
-/*=============================*/
- const rec_t* rec) /*!< in: pointer to record */
-{
- const rec_t* r;
- ulint page_is_compact = page_rec_is_comp(rec);
-
- for (r = page_rec_get_next_const(rec);
- !page_rec_is_supremum(r)
- && rec_get_deleted_flag(r, page_is_compact);
- r = page_rec_get_next_const(r)) {
- /* noop */
- }
-
- return(r);
-}
-
-/************************************************************//**
-Sets the pointer to the next record on the page. */
-UNIV_INLINE
-void
-page_rec_set_next(
-/*==============*/
- rec_t* rec, /*!< in: pointer to record,
- must not be page supremum */
- const rec_t* next) /*!< in: pointer to next record,
- must not be page infimum */
-{
- ulint offs;
-
- ut_ad(page_rec_check(rec));
- ut_ad(!page_rec_is_supremum(rec));
- ut_ad(rec != next);
-
- ut_ad(!next || !page_rec_is_infimum(next));
- ut_ad(!next || page_align(rec) == page_align(next));
-
- offs = next != NULL ? page_offset(next) : 0;
-
- if (page_rec_is_comp(rec)) {
- rec_set_next_offs_new(rec, offs);
- } else {
- rec_set_next_offs_old(rec, offs);
- }
-}
-
-/************************************************************//**
-Gets the pointer to the previous record.
-@return pointer to previous record */
-UNIV_INLINE
-const rec_t*
-page_rec_get_prev_const(
-/*====================*/
- const rec_t* rec) /*!< in: pointer to record, must not be page
- infimum */
-{
- const page_dir_slot_t* slot;
- ulint slot_no;
- const rec_t* rec2;
- const rec_t* prev_rec = NULL;
- const page_t* page;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
-
- ut_ad(!page_rec_is_infimum(rec));
-
- slot_no = page_dir_find_owner_slot(rec);
-
- ut_a(slot_no != 0);
-
- slot = page_dir_get_nth_slot(page, slot_no - 1);
-
- rec2 = page_dir_slot_get_rec(slot);
-
- if (page_is_comp(page)) {
- while (rec != rec2) {
- prev_rec = rec2;
- rec2 = page_rec_get_next_low(rec2, TRUE);
- }
- } else {
- while (rec != rec2) {
- prev_rec = rec2;
- rec2 = page_rec_get_next_low(rec2, FALSE);
- }
- }
-
- ut_a(prev_rec);
-
- return(prev_rec);
-}
-
-/************************************************************//**
-Gets the pointer to the previous record.
-@return pointer to previous record */
-UNIV_INLINE
-rec_t*
-page_rec_get_prev(
-/*==============*/
- rec_t* rec) /*!< in: pointer to record, must not be page
- infimum */
-{
- return((rec_t*) page_rec_get_prev_const(rec));
-}
-
-/***************************************************************//**
-Looks for the record which owns the given record.
-@return the owner record */
-UNIV_INLINE
-rec_t*
-page_rec_find_owner_rec(
-/*====================*/
- rec_t* rec) /*!< in: the physical record */
-{
- ut_ad(page_rec_check(rec));
-
- if (page_rec_is_comp(rec)) {
- while (rec_get_n_owned_new(rec) == 0) {
- rec = page_rec_get_next(rec);
- }
- } else {
- while (rec_get_n_owned_old(rec) == 0) {
- rec = page_rec_get_next(rec);
- }
- }
-
- return(rec);
-}
-
-/**********************************************************//**
-Returns the base extra size of a physical record. This is the
-size of the fixed header, independent of the record size.
-@return REC_N_NEW_EXTRA_BYTES or REC_N_OLD_EXTRA_BYTES */
-UNIV_INLINE
-ulint
-page_rec_get_base_extra_size(
-/*=========================*/
- const rec_t* rec) /*!< in: physical record */
-{
-#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
-# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
-#endif
- return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
-}
-
-/************************************************************//**
-Returns the sum of the sizes of the records in the record list, excluding
-the infimum and supremum records.
-@return data in bytes */
-UNIV_INLINE
-ulint
-page_get_data_size(
-/*===============*/
- const page_t* page) /*!< in: index page */
-{
- ulint ret;
-
- ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
- - (page_is_comp(page)
- ? PAGE_NEW_SUPREMUM_END
- : PAGE_OLD_SUPREMUM_END)
- - page_header_get_field(page, PAGE_GARBAGE));
-
- ut_ad(ret < UNIV_PAGE_SIZE);
-
- return(ret);
-}
-
-
-/************************************************************//**
-Allocates a block of memory from the free list of an index page. */
-UNIV_INLINE
-void
-page_mem_alloc_free(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
- space available for inserting the record,
- or NULL */
- rec_t* next_rec,/*!< in: pointer to the new head of the
- free record list */
- ulint need) /*!< in: number of bytes allocated */
-{
- ulint garbage;
-
-#ifdef UNIV_DEBUG
- const rec_t* old_rec = page_header_get_ptr(page, PAGE_FREE);
- ulint next_offs;
-
- ut_ad(old_rec);
- next_offs = rec_get_next_offs(old_rec, page_is_comp(page));
- ut_ad(next_rec == (next_offs ? page + next_offs : NULL));
-#endif
-
- page_header_set_ptr(page, page_zip, PAGE_FREE, next_rec);
-
- garbage = page_header_get_field(page, PAGE_GARBAGE);
- ut_ad(garbage >= need);
-
- page_header_set_field(page, page_zip, PAGE_GARBAGE, garbage - need);
-}
-
-/*************************************************************//**
-Calculates free space if a page is emptied.
-@return free space */
-UNIV_INLINE
-ulint
-page_get_free_space_of_empty(
-/*=========================*/
- ulint comp) /*!< in: nonzero=compact page layout */
-{
- if (comp) {
- return((ulint)(UNIV_PAGE_SIZE
- - PAGE_NEW_SUPREMUM_END
- - PAGE_DIR
- - 2 * PAGE_DIR_SLOT_SIZE));
- }
-
- return((ulint)(UNIV_PAGE_SIZE
- - PAGE_OLD_SUPREMUM_END
- - PAGE_DIR
- - 2 * PAGE_DIR_SLOT_SIZE));
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Write a 32-bit field in a data dictionary record. */
-UNIV_INLINE
-void
-page_rec_write_field(
-/*=================*/
- rec_t* rec, /*!< in/out: record to update */
- ulint i, /*!< in: index of the field to update */
- ulint val, /*!< in: value to write */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- byte* data;
- ulint len;
-
- data = rec_get_nth_field_old(rec, i, &len);
-
- ut_ad(len == 4);
-
- mlog_write_ulint(data, val, MLOG_4BYTES, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/************************************************************//**
-Each user record on a page, and also the deleted user records in the heap
-takes its size plus the fraction of the dir cell size /
-PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the
-value of page_get_free_space_of_empty, the insert is impossible, otherwise
-it is allowed. This function returns the maximum combined size of records
-which can be inserted on top of the record heap.
-@return maximum combined size for inserted records */
-UNIV_INLINE
-ulint
-page_get_max_insert_size(
-/*=====================*/
- const page_t* page, /*!< in: index page */
- ulint n_recs) /*!< in: number of records */
-{
- ulint occupied;
- ulint free_space;
-
- if (page_is_comp(page)) {
- occupied = page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_NEW_SUPREMUM_END
- + page_dir_calc_reserved_space(
- n_recs + page_dir_get_n_heap(page) - 2);
-
- free_space = page_get_free_space_of_empty(TRUE);
- } else {
- occupied = page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_OLD_SUPREMUM_END
- + page_dir_calc_reserved_space(
- n_recs + page_dir_get_n_heap(page) - 2);
-
- free_space = page_get_free_space_of_empty(FALSE);
- }
-
- /* Above the 'n_recs +' part reserves directory space for the new
- inserted records; the '- 2' excludes page infimum and supremum
- records */
-
- if (occupied > free_space) {
-
- return(0);
- }
-
- return(free_space - occupied);
-}
-
-/************************************************************//**
-Returns the maximum combined size of records which can be inserted on top
-of the record heap if a page is first reorganized.
-@return maximum combined size for inserted records */
-UNIV_INLINE
-ulint
-page_get_max_insert_size_after_reorganize(
-/*======================================*/
- const page_t* page, /*!< in: index page */
- ulint n_recs) /*!< in: number of records */
-{
- ulint occupied;
- ulint free_space;
-
- occupied = page_get_data_size(page)
- + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
-
- free_space = page_get_free_space_of_empty(page_is_comp(page));
-
- if (occupied > free_space) {
-
- return(0);
- }
-
- return(free_space - occupied);
-}
-
-/************************************************************//**
-Puts a record to free list. */
-UNIV_INLINE
-void
-page_mem_free(
-/*==========*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip, /*!< in/out: compressed page,
- or NULL */
- rec_t* rec, /*!< in: pointer to the
- (origin of) record */
- const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets) /*!< in: array returned by
- rec_get_offsets() */
-{
- rec_t* free;
- ulint garbage;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- free = page_header_get_ptr(page, PAGE_FREE);
-
- bool scrub = srv_immediate_scrub_data_uncompressed;
- if (scrub) {
- /* scrub record */
- uint size = rec_offs_data_size(offsets);
- memset(rec, 0, size);
- }
-
- page_rec_set_next(rec, free);
- page_header_set_ptr(page, page_zip, PAGE_FREE, rec);
-
- garbage = page_header_get_field(page, PAGE_GARBAGE);
-
- page_header_set_field(page, page_zip, PAGE_GARBAGE,
- garbage + rec_offs_size(offsets));
-
- if (page_zip) {
- page_zip_dir_delete(page_zip, rec, index, offsets, free);
- } else {
- page_header_set_field(page, page_zip, PAGE_N_RECS,
- page_get_n_recs(page) - 1);
- }
-}
-
-#ifdef UNIV_MATERIALIZE
-#undef UNIV_INLINE
-#define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
diff --git a/storage/xtradb/include/page0types.h b/storage/xtradb/include/page0types.h
deleted file mode 100644
index 3b53de6cc2b..00000000000
--- a/storage/xtradb/include/page0types.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/page0types.h
-Index page routines
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#ifndef page0types_h
-#define page0types_h
-
-using namespace std;
-
-#include <map>
-
-#include "univ.i"
-#include "dict0types.h"
-#include "mtr0types.h"
-
-/** Eliminates a name collision on HP-UX */
-#define page_t ib_page_t
-/** Type of the index page */
-typedef byte page_t;
-/** Index page cursor */
-struct page_cur_t;
-
-/** Compressed index page */
-typedef byte page_zip_t;
-
-/* The following definitions would better belong to page0zip.h,
-but we cannot include page0zip.h from rem0rec.ic, because
-page0*.h includes rem0rec.h and may include rem0rec.ic. */
-
-/** Number of bits needed for representing different compressed page sizes */
-#define PAGE_ZIP_SSIZE_BITS 3
-
-/** Maximum compressed page shift size */
-#define PAGE_ZIP_SSIZE_MAX \
- (UNIV_ZIP_SIZE_SHIFT_MAX - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
-
-/* Make sure there are enough bits available to store the maximum zip
-ssize, which is the number of shifts from 512. */
-#if PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)
-# error "PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)"
-#endif
-
-/** Compressed page descriptor */
-struct page_zip_des_t
-{
- page_zip_t* data; /*!< compressed page data */
-
-#ifdef UNIV_DEBUG
- unsigned m_start:16; /*!< start offset of modification log */
- bool m_external; /*!< Allocated externally, not from the
- buffer pool */
-#endif /* UNIV_DEBUG */
- unsigned m_end:16; /*!< end offset of modification log */
- unsigned m_nonempty:1; /*!< TRUE if the modification log
- is not empty */
- unsigned n_blobs:12; /*!< number of externally stored
- columns on the page; the maximum
- is 744 on a 16 KiB page */
- unsigned ssize:PAGE_ZIP_SSIZE_BITS;
- /*!< 0 or compressed page shift size;
- the size in bytes is
- (UNIV_ZIP_SIZE_MIN >> 1) << ssize. */
-};
-
-/** Compression statistics for a given page size */
-struct page_zip_stat_t {
- /** Number of page compressions */
- ulint compressed;
- /** Number of successful page compressions */
- ulint compressed_ok;
- /** Number of page decompressions */
- ulint decompressed;
- /** Duration of page compressions in microseconds */
- ib_uint64_t compressed_usec;
- /** Duration of page decompressions in microseconds */
- ib_uint64_t decompressed_usec;
- page_zip_stat_t() :
- /* Initialize members to 0 so that when we do
- stlmap[key].compressed++ and element with "key" does not
- exist it gets inserted with zeroed members. */
- compressed(0),
- compressed_ok(0),
- decompressed(0),
- compressed_usec(0),
- decompressed_usec(0)
- { }
-};
-
-#ifndef UNIV_INNOCHECKSUM
-
-/** Compression statistics types */
-typedef map<index_id_t, page_zip_stat_t> page_zip_stat_per_index_t;
-
-/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
-/** Statistics on compression, indexed by dict_index_t::id */
-extern page_zip_stat_per_index_t page_zip_stat_per_index;
-extern ib_mutex_t page_zip_stat_per_index_mutex;
-#ifdef HAVE_PSI_INTERFACE
-extern mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/**********************************************************************//**
-Write the "deleted" flag of a record on a compressed page. The flag must
-already have been written on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_rec_set_deleted(
-/*=====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record on the uncompressed page */
- ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Write the "owned" flag of a record on a compressed page. The n_owned field
-must already have been written on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_rec_set_owned(
-/*===================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record on the uncompressed page */
- ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Shift the dense page directory when a record is deleted. */
-UNIV_INTERN
-void
-page_zip_dir_delete(
-/*================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in: deleted record */
- dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- const byte* free) /*!< in: previous start of the free list */
- MY_ATTRIBUTE((nonnull(1,2,3,4)));
-
-/**********************************************************************//**
-Add a slot to the dense page directory. */
-UNIV_INTERN
-void
-page_zip_dir_add_slot(
-/*==================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- ulint is_clustered) /*!< in: nonzero for clustered index,
- zero for others */
- MY_ATTRIBUTE((nonnull));
-#endif
diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h
deleted file mode 100644
index adafaa6d8b6..00000000000
--- a/storage/xtradb/include/page0zip.h
+++ /dev/null
@@ -1,554 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/page0zip.h
-Compressed page interface
-
-Created June 2005 by Marko Makela
-*******************************************************/
-
-#ifndef page0zip_h
-#define page0zip_h
-
-#ifdef UNIV_MATERIALIZE
-# undef UNIV_INLINE
-# define UNIV_INLINE
-#endif
-
-#ifndef UNIV_INNOCHECKSUM
-#include "page0types.h"
-#include "mtr0types.h"
-#include "dict0types.h"
-#include "srv0srv.h"
-#include "trx0types.h"
-#include "mem0mem.h"
-#else
-#include "univ.i"
-#endif /* !UNIV_INNOCHECKSUM */
-#include "buf0types.h"
-
-/* Compression level to be used by zlib. Settable by user. */
-extern uint page_zip_level;
-
-/* Default compression level. */
-#define DEFAULT_COMPRESSION_LEVEL 6
-
-/* Whether or not to log compressed page images to avoid possible
-compression algorithm changes in zlib. */
-extern my_bool page_zip_log_pages;
-
-#ifndef UNIV_INNOCHECKSUM
-/**********************************************************************//**
-Determine the size of a compressed page in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-page_zip_get_size(
-/*==============*/
- const page_zip_des_t* page_zip) /*!< in: compressed page */
- MY_ATTRIBUTE((nonnull, pure));
-/**********************************************************************//**
-Set the size of a compressed page in bytes. */
-UNIV_INLINE
-void
-page_zip_set_size(
-/*==============*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- ulint size); /*!< in: size in bytes */
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine if a record is so big that it needs to be stored externally.
-@return FALSE if the entire record can be stored locally on the page */
-UNIV_INLINE
-ibool
-page_zip_rec_needs_ext(
-/*===================*/
- ulint rec_size, /*!< in: length of the record in bytes */
- ulint comp, /*!< in: nonzero=compact format */
- ulint n_fields, /*!< in: number of fields in the record;
- ignored if zip_size == 0 */
- ulint zip_size) /*!< in: compressed page size in bytes, or 0 */
- MY_ATTRIBUTE((const));
-
-/**********************************************************************//**
-Determine the guaranteed free space on an empty page.
-@return minimum payload size on the page */
-UNIV_INTERN
-ulint
-page_zip_empty_size(
-/*================*/
- ulint n_fields, /*!< in: number of columns in the index */
- ulint zip_size) /*!< in: compressed page size in bytes */
- MY_ATTRIBUTE((const));
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Initialize a compressed page descriptor. */
-UNIV_INLINE
-void
-page_zip_des_init(
-/*==============*/
- page_zip_des_t* page_zip); /*!< in/out: compressed page
- descriptor */
-
-/**********************************************************************//**
-Configure the zlib allocator to use the given memory heap. */
-UNIV_INTERN
-void
-page_zip_set_alloc(
-/*===============*/
- void* stream, /*!< in/out: zlib stream */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/**********************************************************************//**
-Compress a page.
-@return TRUE on success, FALSE on failure; page_zip will be left
-intact on failure. */
-UNIV_INTERN
-ibool
-page_zip_compress(
-/*==============*/
- page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
- m_start, m_end, m_nonempty */
- const page_t* page, /*!< in: uncompressed page */
- dict_index_t* index, /*!< in: index of the B-tree node */
- ulint level, /*!< in: compression level */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
- MY_ATTRIBUTE((warn_unused_result));
-
-/**********************************************************************//**
-Decompress a page. This function should tolerate errors on the compressed
-page. Instead of letting assertions fail, it will return FALSE if an
-inconsistency is detected.
-@return TRUE on success, FALSE on failure */
-UNIV_INTERN
-ibool
-page_zip_decompress(
-/*================*/
- page_zip_des_t* page_zip,/*!< in: data, ssize;
- out: m_start, m_end, m_nonempty, n_blobs */
- page_t* page, /*!< out: uncompressed page, may be trashed */
- ibool all) /*!< in: TRUE=decompress the whole page;
- FALSE=verify but do not copy some
- page header fields that should not change
- after page creation */
- MY_ATTRIBUTE((nonnull(1,2)));
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Validate a compressed page descriptor.
-@return TRUE if ok */
-UNIV_INLINE
-ibool
-page_zip_simple_validate(
-/*=====================*/
- const page_zip_des_t* page_zip); /*!< in: compressed page
- descriptor */
-#endif /* UNIV_DEBUG */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-#ifdef UNIV_ZIP_DEBUG
-/**********************************************************************//**
-Check that the compressed and decompressed pages match.
-@return TRUE if valid, FALSE if not */
-UNIV_INTERN
-ibool
-page_zip_validate_low(
-/*==================*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- const page_t* page, /*!< in: uncompressed page */
- const dict_index_t* index, /*!< in: index of the page, if known */
- ibool sloppy) /*!< in: FALSE=strict,
- TRUE=ignore the MIN_REC_FLAG */
- MY_ATTRIBUTE((nonnull(1,2)));
-/**********************************************************************//**
-Check that the compressed and decompressed pages match. */
-UNIV_INTERN
-ibool
-page_zip_validate(
-/*==============*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- const page_t* page, /*!< in: uncompressed page */
- const dict_index_t* index) /*!< in: index of the page, if known */
- MY_ATTRIBUTE((nonnull(1,2)));
-#endif /* UNIV_ZIP_DEBUG */
-
-#ifndef UNIV_INNOCHECKSUM
-/**********************************************************************//**
-Determine how big record can be inserted without recompressing the page.
-@return a positive number indicating the maximum size of a record
-whose insertion is guaranteed to succeed, or zero or negative */
-UNIV_INLINE
-lint
-page_zip_max_ins_size(
-/*==================*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- ibool is_clust)/*!< in: TRUE if clustered index */
- MY_ATTRIBUTE((nonnull, pure));
-
-/**********************************************************************//**
-Determine if enough space is available in the modification log.
-@return TRUE if page_zip_write_rec() will succeed */
-UNIV_INLINE
-ibool
-page_zip_available(
-/*===============*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- ibool is_clust,/*!< in: TRUE if clustered index */
- ulint length, /*!< in: combined size of the record */
- ulint create) /*!< in: nonzero=add the record to
- the heap */
- MY_ATTRIBUTE((nonnull, pure));
-
-/**********************************************************************//**
-Write data to the uncompressed header portion of a page. The data must
-already have been written to the uncompressed page. */
-UNIV_INLINE
-void
-page_zip_write_header(
-/*==================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* str, /*!< in: address on the uncompressed page */
- ulint length, /*!< in: length of the data */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
- MY_ATTRIBUTE((nonnull(1,2)));
-
-/**********************************************************************//**
-Write an entire record on the compressed page. The data must already
-have been written to the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_write_rec(
-/*===============*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record being written */
- dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint create) /*!< in: nonzero=insert, zero=update */
- MY_ATTRIBUTE((nonnull));
-
-/***********************************************************//**
-Parses a log record of writing a BLOB pointer of a record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_write_blob_ptr(
-/*==========================*/
- byte* ptr, /*!< in: redo log buffer */
- byte* end_ptr,/*!< in: redo log buffer end */
- page_t* page, /*!< in/out: uncompressed page */
- page_zip_des_t* page_zip);/*!< in/out: compressed page */
-
-/**********************************************************************//**
-Write a BLOB pointer of a record on the leaf page of a clustered index.
-The information must already have been updated on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_write_blob_ptr(
-/*====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in/out: record whose data is being
- written */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint n, /*!< in: column index */
- mtr_t* mtr) /*!< in: mini-transaction handle,
- or NULL if no logging is needed */
- MY_ATTRIBUTE((nonnull(1,2,3,4)));
-
-/***********************************************************//**
-Parses a log record of writing the node pointer of a record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_write_node_ptr(
-/*==========================*/
- byte* ptr, /*!< in: redo log buffer */
- byte* end_ptr,/*!< in: redo log buffer end */
- page_t* page, /*!< in/out: uncompressed page */
- page_zip_des_t* page_zip);/*!< in/out: compressed page */
-
-/**********************************************************************//**
-Write the node pointer of a record on a non-leaf compressed page. */
-UNIV_INTERN
-void
-page_zip_write_node_ptr(
-/*====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in/out: record */
- ulint size, /*!< in: data size of rec */
- ulint ptr, /*!< in: node pointer */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
- MY_ATTRIBUTE((nonnull(1,2)));
-
-/**********************************************************************//**
-Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
-UNIV_INTERN
-void
-page_zip_write_trx_id_and_roll_ptr(
-/*===============================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in/out: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
- trx_id_t trx_id, /*!< in: transaction identifier */
- roll_ptr_t roll_ptr)/*!< in: roll_ptr */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Write the "deleted" flag of a record on a compressed page. The flag must
-already have been written on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_rec_set_deleted(
-/*=====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record on the uncompressed page */
- ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Write the "owned" flag of a record on a compressed page. The n_owned field
-must already have been written on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_rec_set_owned(
-/*===================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record on the uncompressed page */
- ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Insert a record to the dense page directory. */
-UNIV_INTERN
-void
-page_zip_dir_insert(
-/*================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* prev_rec,/*!< in: record after which to insert */
- const byte* free_rec,/*!< in: record from which rec was
- allocated, or NULL */
- byte* rec); /*!< in: record to insert */
-
-/**********************************************************************//**
-Shift the dense page directory and the array of BLOB pointers
-when a record is deleted. */
-UNIV_INTERN
-void
-page_zip_dir_delete(
-/*================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- byte* rec, /*!< in: deleted record */
- const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
- const byte* free) /*!< in: previous start of
- the free list */
- MY_ATTRIBUTE((nonnull(1,2,3,4)));
-
-/**********************************************************************//**
-Add a slot to the dense page directory. */
-UNIV_INTERN
-void
-page_zip_dir_add_slot(
-/*==================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- ulint is_clustered) /*!< in: nonzero for clustered index,
- zero for others */
- MY_ATTRIBUTE((nonnull));
-
-/***********************************************************//**
-Parses a log record of writing to the header of a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_write_header(
-/*========================*/
- byte* ptr, /*!< in: redo log buffer */
- byte* end_ptr,/*!< in: redo log buffer end */
- page_t* page, /*!< in/out: uncompressed page */
- page_zip_des_t* page_zip);/*!< in/out: compressed page */
-
-/**********************************************************************//**
-Write data to the uncompressed header portion of a page. The data must
-already have been written to the uncompressed page.
-However, the data portion of the uncompressed page may differ from
-the compressed page when a record is being inserted in
-page_cur_insert_rec_low(). */
-UNIV_INLINE
-void
-page_zip_write_header(
-/*==================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* str, /*!< in: address on the uncompressed page */
- ulint length, /*!< in: length of the data */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
- MY_ATTRIBUTE((nonnull(1,2)));
-
-/**********************************************************************//**
-Reorganize and compress a page. This is a low-level operation for
-compressed pages, to be used when page_zip_compress() fails.
-On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
-The function btr_page_reorganize() should be preferred whenever possible.
-IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
-non-clustered index, the caller must update the insert buffer free
-bits in the same mini-transaction in such a way that the modification
-will be redo-logged.
-@return TRUE on success, FALSE on failure; page_zip will be left
-intact on failure, but page will be overwritten. */
-UNIV_INTERN
-ibool
-page_zip_reorganize(
-/*================*/
- buf_block_t* block, /*!< in/out: page with compressed page;
- on the compressed page, in: size;
- out: data, n_blobs,
- m_start, m_end, m_nonempty */
- dict_index_t* index, /*!< in: index of the B-tree node */
- mtr_t* mtr); /*!< in: mini-transaction */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Copy the records of a page byte for byte. Do not copy the page header
-or trailer, except those B-tree header fields that are directly
-related to the storage of records. Also copy PAGE_MAX_TRX_ID.
-NOTE: The caller must update the lock table and the adaptive hash index. */
-UNIV_INTERN
-void
-page_zip_copy_recs(
-/*===============*/
- page_zip_des_t* page_zip, /*!< out: copy of src_zip
- (n_blobs, m_start, m_end,
- m_nonempty, data[0..size-1]) */
- page_t* page, /*!< out: copy of src */
- const page_zip_des_t* src_zip, /*!< in: compressed page */
- const page_t* src, /*!< in: page */
- dict_index_t* index, /*!< in: index of the B-tree */
- mtr_t* mtr) /*!< in: mini-transaction */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Parses a log record of compressing an index page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_compress(
-/*====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< out: uncompressed page */
- page_zip_des_t* page_zip)/*!< out: compressed page */
- MY_ATTRIBUTE((warn_unused_result));
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/**********************************************************************//**
-Calculate the compressed page checksum.
-@return page checksum */
-UNIV_INTERN
-ulint
-page_zip_calc_checksum(
-/*===================*/
- const void* data, /*!< in: compressed page */
- ulint size, /*!< in: size of compressed page */
- srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Verify a compressed page's checksum.
-@return TRUE if the stored checksum is valid according to the value of
-innodb_checksum_algorithm */
-UNIV_INTERN
-ibool
-page_zip_verify_checksum(
-/*=====================*/
- const void* data, /*!< in: compressed page */
- ulint size); /*!< in: size of compressed page */
-
-#ifndef UNIV_INNOCHECKSUM
-
-/**********************************************************************//**
-Write a log record of compressing an index page without the data on the page. */
-UNIV_INLINE
-void
-page_zip_compress_write_log_no_data(
-/*================================*/
- ulint level, /*!< in: compression level */
- const page_t* page, /*!< in: page that is compressed */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-Parses a log record of compressing an index page without the data.
-@return end of log record or NULL */
-UNIV_INLINE
-byte*
-page_zip_parse_compress_no_data(
-/*============================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr, /*!< in: buffer end */
- page_t* page, /*!< in: uncompressed page */
- page_zip_des_t* page_zip, /*!< out: compressed page */
- dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull(1,2)));
-
-/**********************************************************************//**
-Reset the counters used for filling
-INFORMATION_SCHEMA.innodb_cmp_per_index. */
-UNIV_INLINE
-void
-page_zip_reset_stat_per_index();
-/*===========================*/
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_HOTBACKUP
-/** Check if a pointer to an uncompressed page matches a compressed page.
-When we IMPORT a tablespace the blocks and accompanying frames are allocted
-from outside the buffer pool.
-@param ptr pointer to an uncompressed page frame
-@param page_zip compressed page descriptor
-@return TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip) \
- (((page_zip)->m_external \
- && (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)) \
- || buf_frame_get_page_zip(ptr) == (page_zip))
-#else /* !UNIV_HOTBACKUP */
-/** Check if a pointer to an uncompressed page matches a compressed page.
-@param ptr pointer to an uncompressed page frame
-@param page_zip compressed page descriptor
-@return TRUE if ptr and page_zip refer to the same block */
-# define PAGE_ZIP_MATCH(ptr, page_zip) \
- (page_align(ptr) + UNIV_PAGE_SIZE == (page_zip)->data)
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_MATERIALIZE
-# undef UNIV_INLINE
-# define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
-
-#ifndef UNIV_INNOCHECKSUM
-#ifndef UNIV_NONINL
-# include "page0zip.ic"
-#endif
-#endif /* !UNIV_INNOCHECKSUM */
-
-#endif /* page0zip_h */
diff --git a/storage/xtradb/include/page0zip.ic b/storage/xtradb/include/page0zip.ic
deleted file mode 100644
index 9a583086925..00000000000
--- a/storage/xtradb/include/page0zip.ic
+++ /dev/null
@@ -1,458 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/page0zip.ic
-Compressed page interface
-
-Created June 2005 by Marko Makela
-*******************************************************/
-
-#ifdef UNIV_MATERIALIZE
-# undef UNIV_INLINE
-# define UNIV_INLINE
-#endif
-
-#include "page0zip.h"
-#include "mtr0log.h"
-#include "page0page.h"
-
-/* The format of compressed pages is as follows.
-
-The header and trailer of the uncompressed pages, excluding the page
-directory in the trailer, are copied as is to the header and trailer
-of the compressed page.
-
-At the end of the compressed page, there is a dense page directory
-pointing to every user record contained on the page, including deleted
-records on the free list. The dense directory is indexed in the
-collation order, i.e., in the order in which the record list is
-linked on the uncompressed page. The infimum and supremum records are
-excluded. The two most significant bits of the entries are allocated
-for the delete-mark and an n_owned flag indicating the last record in
-a chain of records pointed to from the sparse page directory on the
-uncompressed page.
-
-The data between PAGE_ZIP_START and the last page directory entry will
-be written in compressed format, starting at offset PAGE_DATA.
-Infimum and supremum records are not stored. We exclude the
-REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered
-from the dense page directory stored at the end of the compressed
-page.
-
-The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
-roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
-externally stored columns are stored separately, in ascending order of
-heap_no and column index, starting backwards from the dense page
-directory.
-
-The compressed data stream may be followed by a modification log
-covering the compressed portion of the page, as follows.
-
-MODIFICATION LOG ENTRY FORMAT
-- write record:
- - (heap_no - 1) << 1 (1..2 bytes)
- - extra bytes backwards
- - data bytes
-- clear record:
- - (heap_no - 1) << 1 | 1 (1..2 bytes)
-
-The integer values are stored in a variable-length format:
-- 0xxxxxxx: 0..127
-- 1xxxxxxx xxxxxxxx: 0..32767
-
-The end of the modification log is marked by a 0 byte.
-
-In summary, the compressed page looks like this:
-
-(1) Uncompressed page header (PAGE_DATA bytes)
-(2) Compressed index information
-(3) Compressed page data
-(4) Page modification log (page_zip->m_start..page_zip->m_end)
-(5) Empty zero-filled space
-(6) BLOB pointers (on leaf pages)
- - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
- - in descending collation order
-(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
- - indexed by heap_no
- - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
- - REC_NODE_PTR_SIZE for non-leaf pages
- - 0 otherwise
-(8) dense page directory, stored backwards
- - n_dense = n_heap - 2
- - existing records in ascending collation order
- - deleted records (free list) in link order
-*/
-
-/** Start offset of the area that will be compressed */
-#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
-/** Size of an compressed page directory entry */
-#define PAGE_ZIP_DIR_SLOT_SIZE 2
-/** Mask of record offsets */
-#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff
-/** 'owned' flag */
-#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000
-/** 'deleted' flag */
-#define PAGE_ZIP_DIR_SLOT_DEL 0x8000
-
-/**********************************************************************//**
-Determine the size of a compressed page in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-page_zip_get_size(
-/*==============*/
- const page_zip_des_t* page_zip) /*!< in: compressed page */
-{
- ulint size;
-
- if (!page_zip->ssize) {
- return(0);
- }
-
- size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize;
-
- ut_ad(size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(size <= UNIV_PAGE_SIZE);
-
- return(size);
-}
-/**********************************************************************//**
-Set the size of a compressed page in bytes. */
-UNIV_INLINE
-void
-page_zip_set_size(
-/*==============*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- ulint size) /*!< in: size in bytes */
-{
- if (size) {
- int ssize;
-
- ut_ad(ut_is_2pow(size));
-
- for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {
- }
-
- page_zip->ssize = ssize;
- } else {
- page_zip->ssize = 0;
- }
-
- ut_ad(page_zip_get_size(page_zip) == size);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine if a record is so big that it needs to be stored externally.
-@return FALSE if the entire record can be stored locally on the page */
-UNIV_INLINE
-ibool
-page_zip_rec_needs_ext(
-/*===================*/
- ulint rec_size, /*!< in: length of the record in bytes */
- ulint comp, /*!< in: nonzero=compact format */
- ulint n_fields, /*!< in: number of fields in the record;
- ignored if zip_size == 0 */
- ulint zip_size) /*!< in: compressed page size in bytes, or 0 */
-{
- ut_ad(rec_size
- > (comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES));
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(comp || !zip_size);
-
-#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE
- if (rec_size >= REC_MAX_DATA_SIZE) {
- return(TRUE);
- }
-#endif
-
- if (zip_size) {
- ut_ad(comp);
- /* On a compressed page, there is a two-byte entry in
- the dense page directory for every record. But there
- is no record header. There should be enough room for
- one record on an empty leaf page. Subtract 1 byte for
- the encoded heap number. Check also the available space
- on the uncompressed page. */
- return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
- >= page_zip_empty_size(n_fields, zip_size)
- || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
- }
-
- return(rec_size >= page_get_free_space_of_empty(comp) / 2);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Validate a compressed page descriptor.
-@return TRUE if ok */
-UNIV_INLINE
-ibool
-page_zip_simple_validate(
-/*=====================*/
- const page_zip_des_t* page_zip)/*!< in: compressed page descriptor */
-{
- ut_ad(page_zip);
- ut_ad(page_zip->data);
- ut_ad(page_zip->ssize <= PAGE_ZIP_SSIZE_MAX);
- ut_ad(page_zip_get_size(page_zip)
- > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
- ut_ad(page_zip->m_start <= page_zip->m_end);
- ut_ad(page_zip->m_end < page_zip_get_size(page_zip));
- ut_ad(page_zip->n_blobs
- < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE);
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************************//**
-Determine if the length of the page trailer.
-@return length of the page trailer, in bytes, not including the
-terminating zero byte of the modification log */
-UNIV_INLINE
-ibool
-page_zip_get_trailer_len(
-/*=====================*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- ibool is_clust)/*!< in: TRUE if clustered index */
-{
- ulint uncompressed_size;
-
- ut_ad(page_zip_simple_validate(page_zip));
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
- if (!page_is_leaf(page_zip->data)) {
- uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
- + REC_NODE_PTR_SIZE;
- ut_ad(!page_zip->n_blobs);
- } else if (is_clust) {
- uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
- } else {
- uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
- ut_ad(!page_zip->n_blobs);
- }
-
- return((page_dir_get_n_heap(page_zip->data) - 2)
- * uncompressed_size
- + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
-}
-
-/**********************************************************************//**
-Determine how big record can be inserted without recompressing the page.
-@return a positive number indicating the maximum size of a record
-whose insertion is guaranteed to succeed, or zero or negative */
-UNIV_INLINE
-lint
-page_zip_max_ins_size(
-/*==================*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- ibool is_clust)/*!< in: TRUE if clustered index */
-{
- ulint trailer_len;
-
- trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
-
- /* When a record is created, a pointer may be added to
- the dense directory.
- Likewise, space for the columns that will not be
- compressed will be allocated from the page trailer.
- Also the BLOB pointers will be allocated from there, but
- we may as well count them in the length of the record. */
-
- trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
-
- return((lint) page_zip_get_size(page_zip)
- - trailer_len - page_zip->m_end
- - (REC_N_NEW_EXTRA_BYTES - 2));
-}
-
-/**********************************************************************//**
-Determine if enough space is available in the modification log.
-@return TRUE if enough space is available */
-UNIV_INLINE
-ibool
-page_zip_available(
-/*===============*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- ibool is_clust,/*!< in: TRUE if clustered index */
- ulint length, /*!< in: combined size of the record */
- ulint create) /*!< in: nonzero=add the record to
- the heap */
-{
- ulint trailer_len;
-
- ut_ad(length > REC_N_NEW_EXTRA_BYTES);
-
- trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
-
- /* Subtract the fixed extra bytes and add the maximum
- space needed for identifying the record (encoded heap_no). */
- length -= REC_N_NEW_EXTRA_BYTES - 2;
-
- if (create > 0) {
- /* When a record is created, a pointer may be added to
- the dense directory.
- Likewise, space for the columns that will not be
- compressed will be allocated from the page trailer.
- Also the BLOB pointers will be allocated from there, but
- we may as well count them in the length of the record. */
-
- trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
- }
-
- return(length + trailer_len + page_zip->m_end
- < page_zip_get_size(page_zip));
-}
-
-/**********************************************************************//**
-Initialize a compressed page descriptor. */
-UNIV_INLINE
-void
-page_zip_des_init(
-/*==============*/
- page_zip_des_t* page_zip) /*!< in/out: compressed page
- descriptor */
-{
- memset(page_zip, 0, sizeof *page_zip);
-}
-
-/**********************************************************************//**
-Write a log record of writing to the uncompressed header portion of a page. */
-UNIV_INTERN
-void
-page_zip_write_header_log(
-/*======================*/
- const byte* data,/*!< in: data on the uncompressed page */
- ulint length, /*!< in: length of the data */
- mtr_t* mtr); /*!< in: mini-transaction */
-
-/**********************************************************************//**
-Write data to the uncompressed header portion of a page. The data must
-already have been written to the uncompressed page.
-However, the data portion of the uncompressed page may differ from
-the compressed page when a record is being inserted in
-page_cur_insert_rec_zip(). */
-UNIV_INLINE
-void
-page_zip_write_header(
-/*==================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* str, /*!< in: address on the uncompressed page */
- ulint length, /*!< in: length of the data */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
-{
- ulint pos;
-
- ut_ad(PAGE_ZIP_MATCH(str, page_zip));
- ut_ad(page_zip_simple_validate(page_zip));
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
- pos = page_offset(str);
-
- ut_ad(pos < PAGE_DATA);
-
- memcpy(page_zip->data + pos, str, length);
-
- /* The following would fail in page_cur_insert_rec_zip(). */
- /* ut_ad(page_zip_validate(page_zip, str - pos)); */
-
- if (mtr) {
-#ifndef UNIV_HOTBACKUP
- page_zip_write_header_log(str, length, mtr);
-#endif /* !UNIV_HOTBACKUP */
- }
-}
-
-/**********************************************************************//**
-Write a log record of compressing an index page without the data on the page. */
-UNIV_INLINE
-void
-page_zip_compress_write_log_no_data(
-/*================================*/
- ulint level, /*!< in: compression level */
- const page_t* page, /*!< in: page that is compressed */
- dict_index_t* index, /*!< in: index */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr = mlog_open_and_write_index(
- mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1);
-
- if (log_ptr) {
- mach_write_to_1(log_ptr, level);
- mlog_close(mtr, log_ptr + 1);
- }
-}
-
-/**********************************************************************//**
-Parses a log record of compressing an index page without the data.
-@return end of log record or NULL */
-UNIV_INLINE
-byte*
-page_zip_parse_compress_no_data(
-/*============================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr, /*!< in: buffer end */
- page_t* page, /*!< in: uncompressed page */
- page_zip_des_t* page_zip, /*!< out: compressed page */
- dict_index_t* index) /*!< in: index */
-{
- ulint level;
- if (end_ptr == ptr) {
- return(NULL);
- }
-
- level = mach_read_from_1(ptr);
-
- /* If page compression fails then there must be something wrong
- because a compress log record is logged only if the compression
- was successful. Crash in this case. */
-
- if (page
- && !page_zip_compress(page_zip, page, index, level, NULL)) {
- ut_error;
- }
-
- return(ptr + 1);
-}
-
-/**********************************************************************//**
-Reset the counters used for filling
-INFORMATION_SCHEMA.innodb_cmp_per_index. */
-UNIV_INLINE
-void
-page_zip_reset_stat_per_index()
-/*===========================*/
-{
- mutex_enter(&page_zip_stat_per_index_mutex);
-
- page_zip_stat_per_index.erase(
- page_zip_stat_per_index.begin(),
- page_zip_stat_per_index.end());
-
- mutex_exit(&page_zip_stat_per_index_mutex);
-}
-
-#ifdef UNIV_MATERIALIZE
-# undef UNIV_INLINE
-# define UNIV_INLINE UNIV_INLINE_ORIGINAL
-#endif
diff --git a/storage/xtradb/include/pars0grm.h b/storage/xtradb/include/pars0grm.h
deleted file mode 100644
index 8e725fe9545..00000000000
--- a/storage/xtradb/include/pars0grm.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/* A Bison parser, made by GNU Bison 2.3. */
-
-/* Skeleton interface for Bison's Yacc-like parsers in C
-
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA. */
-
-/* As a special exception, you may create a larger work that contains
- part or all of the Bison parser skeleton and distribute that work
- under terms of your choice, so long as that work isn't itself a
- parser generator using the skeleton or a modified version thereof
- as a parser skeleton. Alternatively, if you modify or redistribute
- the parser skeleton itself, you may (at your option) remove this
- special exception, which will cause the skeleton and the resulting
- Bison output files to be licensed under the GNU General Public
- License without this special exception.
-
- This special exception was added by the Free Software Foundation in
- version 2.2 of Bison. */
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- PARS_LIKE_TOKEN = 350,
- PARS_LIKE_TOKEN_EXACT = 351,
- PARS_LIKE_TOKEN_PREFIX = 352,
- PARS_LIKE_TOKEN_SUFFIX = 353,
- PARS_LIKE_TOKEN_SUBSTR = 354,
- PARS_TABLE_NAME_TOKEN = 355,
- PARS_COMPACT_TOKEN = 356,
- PARS_BLOCK_SIZE_TOKEN = 357,
- PARS_BIGINT_TOKEN = 358,
- NEG = 359
- };
-#endif
-/* Tokens. */
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define PARS_LIKE_TOKEN 350
-#define PARS_LIKE_TOKEN_EXACT 351
-#define PARS_LIKE_TOKEN_PREFIX 352
-#define PARS_LIKE_TOKEN_SUFFIX 353
-#define PARS_LIKE_TOKEN_SUBSTR 354
-#define PARS_TABLE_NAME_TOKEN 355
-#define PARS_COMPACT_TOKEN 356
-#define PARS_BLOCK_SIZE_TOKEN 357
-#define PARS_BIGINT_TOKEN 358
-#define NEG 359
-
-
-
-
-#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-extern YYSTYPE yylval;
-
diff --git a/storage/xtradb/include/pars0opt.h b/storage/xtradb/include/pars0opt.h
deleted file mode 100644
index 1084d644c90..00000000000
--- a/storage/xtradb/include/pars0opt.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0opt.h
-Simple SQL optimizer
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0opt_h
-#define pars0opt_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "pars0sym.h"
-#include "dict0types.h"
-#include "row0sel.h"
-
-/*******************************************************************//**
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-UNIV_INTERN
-void
-opt_search_plan(
-/*============*/
- sel_node_t* sel_node); /*!< in: parsed select node */
-/*******************************************************************//**
-Looks for occurrences of the columns of the table in the query subgraph and
-adds them to the list of columns if an occurrence of the same column does not
-already exist in the list. If the column is already in the list, puts a value
-indirection to point to the occurrence in the column list, except if the
-column occurrence we are looking at is in the column list, in which case
-nothing is done. */
-UNIV_INTERN
-void
-opt_find_all_cols(
-/*==============*/
- ibool copy_val, /*!< in: if TRUE, new found columns are
- added as columns to copy */
- dict_index_t* index, /*!< in: index to use */
- sym_node_list_t* col_list, /*!< in: base node of a list where
- to add new found columns */
- plan_t* plan, /*!< in: plan or NULL */
- que_node_t* exp); /*!< in: expression or condition */
-/********************************************************************//**
-Prints info of a query plan. */
-UNIV_INTERN
-void
-opt_print_query_plan(
-/*=================*/
- sel_node_t* sel_node); /*!< in: select node */
-
-#ifndef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/pars0opt.ic b/storage/xtradb/include/pars0opt.ic
deleted file mode 100644
index 786d911ca3d..00000000000
--- a/storage/xtradb/include/pars0opt.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0opt.ic
-Simple SQL optimizer
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/pars0pars.h b/storage/xtradb/include/pars0pars.h
deleted file mode 100644
index 73585c78a6a..00000000000
--- a/storage/xtradb/include/pars0pars.h
+++ /dev/null
@@ -1,826 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0pars.h
-SQL parser
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0pars_h
-#define pars0pars_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "pars0types.h"
-#include "row0types.h"
-#include "trx0types.h"
-#include "ut0vec.h"
-
-/** Type of the user functions. The first argument is always InnoDB-supplied
-and varies in type, while 'user_arg' is a user-supplied argument. The
-meaning of the return type also varies. See the individual use cases, e.g.
-the FETCH statement, for details on them. */
-typedef ibool (*pars_user_func_cb_t)(void* arg, void* user_arg);
-
-/** If the following is set TRUE, the parser will emit debugging
-information */
-extern int yydebug;
-
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-extern ibool pars_print_lexed;
-#endif /* UNIV_SQL_DEBUG */
-
-/* Global variable used while parsing a single procedure or query : the code is
-NOT re-entrant */
-extern sym_tab_t* pars_sym_tab_global;
-
-extern pars_res_word_t pars_to_char_token;
-extern pars_res_word_t pars_to_number_token;
-extern pars_res_word_t pars_to_binary_token;
-extern pars_res_word_t pars_binary_to_number_token;
-extern pars_res_word_t pars_substr_token;
-extern pars_res_word_t pars_replstr_token;
-extern pars_res_word_t pars_concat_token;
-extern pars_res_word_t pars_length_token;
-extern pars_res_word_t pars_instr_token;
-extern pars_res_word_t pars_sysdate_token;
-extern pars_res_word_t pars_printf_token;
-extern pars_res_word_t pars_assert_token;
-extern pars_res_word_t pars_rnd_token;
-extern pars_res_word_t pars_rnd_str_token;
-extern pars_res_word_t pars_count_token;
-extern pars_res_word_t pars_sum_token;
-extern pars_res_word_t pars_distinct_token;
-extern pars_res_word_t pars_binary_token;
-extern pars_res_word_t pars_blob_token;
-extern pars_res_word_t pars_int_token;
-extern pars_res_word_t pars_bigint_token;
-extern pars_res_word_t pars_char_token;
-extern pars_res_word_t pars_float_token;
-extern pars_res_word_t pars_update_token;
-extern pars_res_word_t pars_asc_token;
-extern pars_res_word_t pars_desc_token;
-extern pars_res_word_t pars_open_token;
-extern pars_res_word_t pars_close_token;
-extern pars_res_word_t pars_share_token;
-extern pars_res_word_t pars_unique_token;
-extern pars_res_word_t pars_clustered_token;
-
-extern ulint pars_star_denoter;
-
-/* Procedure parameter types */
-#define PARS_INPUT 0
-#define PARS_OUTPUT 1
-#define PARS_NOT_PARAM 2
-
-int
-yyparse(void);
-
-/*************************************************************//**
-Parses an SQL string returning the query graph.
-@return own: the query graph */
-UNIV_INTERN
-que_t*
-pars_sql(
-/*=====*/
- pars_info_t* info, /*!< in: extra information, or NULL */
- const char* str); /*!< in: SQL string */
-/*************************************************************//**
-Retrieves characters to the lexical analyzer.
-@return number of characters copied or 0 on EOF */
-UNIV_INTERN
-int
-pars_get_lex_chars(
-/*===============*/
- char* buf, /*!< in/out: buffer where to copy */
- int max_size); /*!< in: maximum number of characters which fit
- in the buffer */
-/*************************************************************//**
-Called by yyparse on error. */
-UNIV_INTERN
-void
-yyerror(
-/*====*/
- const char* s); /*!< in: error message string */
-/*********************************************************************//**
-Parses a variable declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_variable_declaration(
-/*======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the variable */
- pars_res_word_t* type); /*!< in: pointer to a type token */
-/*********************************************************************//**
-Parses a function expression.
-@return own: function node in a query tree */
-UNIV_INTERN
-func_node_t*
-pars_func(
-/*======*/
- que_node_t* res_word,/*!< in: function name reserved word */
- que_node_t* arg); /*!< in: first argument in the argument list */
-/*************************************************************************
-Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
-within the search string.
-@return own: function node in a query tree */
-UNIV_INTERN
-int
-pars_like_rebind(
-/*=============*/
- sym_node_t* node, /* in: The search string node.*/
- const byte* ptr, /* in: literal to (re) bind */
- ulint len); /* in: length of literal to (re) bind*/
-/*********************************************************************//**
-Parses an operator expression.
-@return own: function node in a query tree */
-UNIV_INTERN
-func_node_t*
-pars_op(
-/*====*/
- int func, /*!< in: operator token code */
- que_node_t* arg1, /*!< in: first argument */
- que_node_t* arg2); /*!< in: second argument or NULL for an unary
- operator */
-/*********************************************************************//**
-Parses an ORDER BY clause. Order by a single column only is supported.
-@return own: order-by node in a query tree */
-UNIV_INTERN
-order_node_t*
-pars_order_by(
-/*==========*/
- sym_node_t* column, /*!< in: column name */
- pars_res_word_t* asc); /*!< in: &pars_asc_token or pars_desc_token */
-/*********************************************************************//**
-Parses a select list; creates a query graph node for the whole SELECT
-statement.
-@return own: select node in a query tree */
-UNIV_INTERN
-sel_node_t*
-pars_select_list(
-/*=============*/
- que_node_t* select_list, /*!< in: select list */
- sym_node_t* into_list); /*!< in: variables list or NULL */
-/*********************************************************************//**
-Parses a cursor declaration.
-@return sym_node */
-UNIV_INTERN
-que_node_t*
-pars_cursor_declaration(
-/*====================*/
- sym_node_t* sym_node, /*!< in: cursor id node in the symbol
- table */
- sel_node_t* select_node); /*!< in: select node */
-/*********************************************************************//**
-Parses a function declaration.
-@return sym_node */
-UNIV_INTERN
-que_node_t*
-pars_function_declaration(
-/*======================*/
- sym_node_t* sym_node); /*!< in: function id node in the symbol
- table */
-/*********************************************************************//**
-Parses a select statement.
-@return own: select node in a query tree */
-UNIV_INTERN
-sel_node_t*
-pars_select_statement(
-/*==================*/
- sel_node_t* select_node, /*!< in: select node already containing
- the select list */
- sym_node_t* table_list, /*!< in: table list */
- que_node_t* search_cond, /*!< in: search condition or NULL */
- pars_res_word_t* for_update, /*!< in: NULL or &pars_update_token */
- pars_res_word_t* consistent_read,/*!< in: NULL or
- &pars_consistent_token */
- order_node_t* order_by); /*!< in: NULL or an order-by node */
-/*********************************************************************//**
-Parses a column assignment in an update.
-@return column assignment node */
-UNIV_INTERN
-col_assign_node_t*
-pars_column_assignment(
-/*===================*/
- sym_node_t* column, /*!< in: column to assign */
- que_node_t* exp); /*!< in: value to assign */
-/*********************************************************************//**
-Parses a delete or update statement start.
-@return own: update node in a query tree */
-UNIV_INTERN
-upd_node_t*
-pars_update_statement_start(
-/*========================*/
- ibool is_delete, /*!< in: TRUE if delete */
- sym_node_t* table_sym, /*!< in: table name node */
- col_assign_node_t* col_assign_list);/*!< in: column assignment list, NULL
- if delete */
-/*********************************************************************//**
-Parses an update or delete statement.
-@return own: update node in a query tree */
-UNIV_INTERN
-upd_node_t*
-pars_update_statement(
-/*==================*/
- upd_node_t* node, /*!< in: update node */
- sym_node_t* cursor_sym, /*!< in: pointer to a cursor entry in
- the symbol table or NULL */
- que_node_t* search_cond); /*!< in: search condition or NULL */
-/*********************************************************************//**
-Parses an insert statement.
-@return own: update node in a query tree */
-UNIV_INTERN
-ins_node_t*
-pars_insert_statement(
-/*==================*/
- sym_node_t* table_sym, /*!< in: table name node */
- que_node_t* values_list, /*!< in: value expression list or NULL */
- sel_node_t* select); /*!< in: select condition or NULL */
-/*********************************************************************//**
-Parses a procedure parameter declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the parameter */
- ulint param_type,
- /*!< in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type); /*!< in: pointer to a type token */
-/*********************************************************************//**
-Parses an elsif element.
-@return elsif node */
-UNIV_INTERN
-elsif_node_t*
-pars_elsif_element(
-/*===============*/
- que_node_t* cond, /*!< in: if-condition */
- que_node_t* stat_list); /*!< in: statement list */
-/*********************************************************************//**
-Parses an if-statement.
-@return if-statement node */
-UNIV_INTERN
-if_node_t*
-pars_if_statement(
-/*==============*/
- que_node_t* cond, /*!< in: if-condition */
- que_node_t* stat_list, /*!< in: statement list */
- que_node_t* else_part); /*!< in: else-part statement list */
-/*********************************************************************//**
-Parses a for-loop-statement.
-@return for-statement node */
-UNIV_INTERN
-for_node_t*
-pars_for_statement(
-/*===============*/
- sym_node_t* loop_var, /*!< in: loop variable */
- que_node_t* loop_start_limit,/*!< in: loop start expression */
- que_node_t* loop_end_limit, /*!< in: loop end expression */
- que_node_t* stat_list); /*!< in: statement list */
-/*********************************************************************//**
-Parses a while-statement.
-@return while-statement node */
-UNIV_INTERN
-while_node_t*
-pars_while_statement(
-/*=================*/
- que_node_t* cond, /*!< in: while-condition */
- que_node_t* stat_list); /*!< in: statement list */
-/*********************************************************************//**
-Parses an exit statement.
-@return exit statement node */
-UNIV_INTERN
-exit_node_t*
-pars_exit_statement(void);
-/*=====================*/
-/*********************************************************************//**
-Parses a return-statement.
-@return return-statement node */
-UNIV_INTERN
-return_node_t*
-pars_return_statement(void);
-/*=======================*/
-/*********************************************************************//**
-Parses a procedure call.
-@return function node */
-UNIV_INTERN
-func_node_t*
-pars_procedure_call(
-/*================*/
- que_node_t* res_word,/*!< in: procedure name reserved word */
- que_node_t* args); /*!< in: argument list */
-/*********************************************************************//**
-Parses an assignment statement.
-@return assignment statement node */
-UNIV_INTERN
-assign_node_t*
-pars_assignment_statement(
-/*======================*/
- sym_node_t* var, /*!< in: variable to assign */
- que_node_t* val); /*!< in: value to assign */
-/*********************************************************************//**
-Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL.
-@return fetch statement node */
-UNIV_INTERN
-fetch_node_t*
-pars_fetch_statement(
-/*=================*/
- sym_node_t* cursor, /*!< in: cursor node */
- sym_node_t* into_list, /*!< in: variables to set, or NULL */
- sym_node_t* user_func); /*!< in: user function name, or NULL */
-/*********************************************************************//**
-Parses an open or close cursor statement.
-@return fetch statement node */
-UNIV_INTERN
-open_node_t*
-pars_open_statement(
-/*================*/
- ulint type, /*!< in: ROW_SEL_OPEN_CURSOR
- or ROW_SEL_CLOSE_CURSOR */
- sym_node_t* cursor); /*!< in: cursor node */
-/*********************************************************************//**
-Parses a row_printf-statement.
-@return row_printf-statement node */
-UNIV_INTERN
-row_printf_node_t*
-pars_row_printf_statement(
-/*======================*/
- sel_node_t* sel_node); /*!< in: select node */
-/*********************************************************************//**
-Parses a commit statement.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-pars_commit_statement(void);
-/*=======================*/
-/*********************************************************************//**
-Parses a rollback statement.
-@return own: rollback node struct */
-UNIV_INTERN
-roll_node_t*
-pars_rollback_statement(void);
-/*=========================*/
-/*********************************************************************//**
-Parses a column definition at a table creation.
-@return column sym table node */
-UNIV_INTERN
-sym_node_t*
-pars_column_def(
-/*============*/
- sym_node_t* sym_node, /*!< in: column node in the
- symbol table */
- pars_res_word_t* type, /*!< in: data type */
- sym_node_t* len, /*!< in: length of column, or
- NULL */
- void* is_unsigned, /*!< in: if not NULL, column
- is of type UNSIGNED. */
- void* is_not_null); /*!< in: if not NULL, column
- is of type NOT NULL. */
-/*********************************************************************//**
-Parses a table creation operation.
-@return table create subgraph */
-UNIV_INTERN
-tab_node_t*
-pars_create_table(
-/*==============*/
- sym_node_t* table_sym, /*!< in: table name node in the symbol
- table */
- sym_node_t* column_defs, /*!< in: list of column names */
- sym_node_t* compact, /* in: non-NULL if COMPACT table. */
- sym_node_t* block_size, /* in: block size (can be NULL) */
- void* not_fit_in_memory);
- /*!< in: a non-NULL pointer means that
- this is a table which in simulations
- should be simulated as not fitting
- in memory; thread is put to sleep
- to simulate disk accesses; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about non-NULL value if
- it has to reload the table definition
- from disk */
-/*********************************************************************//**
-Parses an index creation operation.
-@return index create subgraph */
-UNIV_INTERN
-ind_node_t*
-pars_create_index(
-/*==============*/
- pars_res_word_t* unique_def, /*!< in: not NULL if a unique index */
- pars_res_word_t* clustered_def, /*!< in: not NULL if a clustered index */
- sym_node_t* index_sym, /*!< in: index name node in the symbol
- table */
- sym_node_t* table_sym, /*!< in: table name node in the symbol
- table */
- sym_node_t* column_list); /*!< in: list of column names */
-/*********************************************************************//**
-Parses a procedure definition.
-@return query fork node */
-UNIV_INTERN
-que_fork_t*
-pars_procedure_definition(
-/*======================*/
- sym_node_t* sym_node, /*!< in: procedure id node in the symbol
- table */
- sym_node_t* param_list, /*!< in: parameter declaration list */
- que_node_t* stat_list); /*!< in: statement list */
-
-/*************************************************************//**
-Parses a stored procedure call, when this is not within another stored
-procedure, that is, the client issues a procedure call directly.
-In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return query graph */
-UNIV_INTERN
-que_fork_t*
-pars_stored_procedure_call(
-/*=======================*/
- sym_node_t* sym_node); /*!< in: stored procedure name */
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
-above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE.
-@return query thread node to run */
-UNIV_INTERN
-que_thr_t*
-pars_complete_graph_for_exec(
-/*=========================*/
- que_node_t* node, /*!< in: root node for an incomplete
- query graph, or NULL for dummy graph */
- trx_t* trx, /*!< in: transaction handle */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
- MY_ATTRIBUTE((nonnull(2,3), warn_unused_result));
-
-/****************************************************************//**
-Create parser info struct.
-@return own: info struct */
-UNIV_INTERN
-pars_info_t*
-pars_info_create(void);
-/*==================*/
-
-/****************************************************************//**
-Free info struct and everything it contains. */
-UNIV_INTERN
-void
-pars_info_free(
-/*===========*/
- pars_info_t* info); /*!< in, own: info struct */
-
-/****************************************************************//**
-Add bound literal. */
-UNIV_INTERN
-void
-pars_info_add_literal(
-/*==================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const void* address, /*!< in: address */
- ulint length, /*!< in: length of data */
- ulint type, /*!< in: type, e.g. DATA_FIXBINARY */
- ulint prtype); /*!< in: precise type, e.g.
- DATA_UNSIGNED */
-
-/****************************************************************//**
-Equivalent to pars_info_add_literal(info, name, str, strlen(str),
-DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
-void
-pars_info_add_str_literal(
-/*======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const char* str); /*!< in: string */
-/********************************************************************
-If the literal value already exists then it rebinds otherwise it
-creates a new entry.*/
-UNIV_INTERN
-void
-pars_info_bind_literal(
-/*===================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const void* address, /* in: address */
- ulint length, /* in: length of data */
- ulint type, /* in: type, e.g. DATA_FIXBINARY */
- ulint prtype); /* in: precise type, e.g. */
-/********************************************************************
-If the literal value already exists then it rebinds otherwise it
-creates a new entry.*/
-UNIV_INTERN
-void
-pars_info_bind_varchar_literal(
-/*===========================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const byte* str, /*!< in: string */
- ulint str_len); /*!< in: string length */
-/****************************************************************//**
-Equivalent to:
-
-char buf[4];
-mach_write_to_4(buf, val);
-pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_bind_int4_literal(
-/*=======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const ib_uint32_t* val); /*!< in: value */
-/********************************************************************
-If the literal value already exists then it rebinds otherwise it
-creates a new entry. */
-UNIV_INTERN
-void
-pars_info_bind_int8_literal(
-/*=======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const ib_uint64_t* val); /*!< in: value */
-/****************************************************************//**
-Add user function. */
-UNIV_INTERN
-void
-pars_info_bind_function(
-/*===================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: function name */
- pars_user_func_cb_t func, /*!< in: function address */
- void* arg); /*!< in: user-supplied argument */
-/****************************************************************//**
-Add bound id. */
-UNIV_INTERN
-void
-pars_info_bind_id(
-/*=============*/
- pars_info_t* info, /*!< in: info struct */
- ibool copy_name,/* in: make a copy of name if TRUE */
- const char* name, /*!< in: name */
- const char* id); /*!< in: id */
-/****************************************************************//**
-Equivalent to:
-
-char buf[4];
-mach_write_to_4(buf, val);
-pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_add_int4_literal(
-/*=======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- lint val); /*!< in: value */
-
-/****************************************************************//**
-Equivalent to:
-
-char buf[8];
-mach_write_to_8(buf, val);
-pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_add_ull_literal(
-/*======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- ib_uint64_t val); /*!< in: value */
-
-/****************************************************************//**
-If the literal value already exists then it rebinds otherwise it
-creates a new entry. */
-UNIV_INTERN
-void
-pars_info_bind_ull_literal(
-/*=======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const ib_uint64_t* val) /*!< in: value */
- MY_ATTRIBUTE((nonnull));
-
-/****************************************************************//**
-Add bound id. */
-UNIV_INTERN
-void
-pars_info_add_id(
-/*=============*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const char* id); /*!< in: id */
-
-/****************************************************************//**
-Get bound literal with the given name.
-@return bound literal, or NULL if not found */
-UNIV_INTERN
-pars_bound_lit_t*
-pars_info_get_bound_lit(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name); /*!< in: bound literal name to find */
-
-/****************************************************************//**
-Get bound id with the given name.
-@return bound id, or NULL if not found */
-UNIV_INTERN
-pars_bound_id_t*
-pars_info_get_bound_id(
-/*===================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name); /*!< in: bound id name to find */
-
-/******************************************************************//**
-Release any resources used by the lexer. */
-UNIV_INTERN
-void
-pars_lexer_close(void);
-/*==================*/
-
-/** Extra information supplied for pars_sql(). */
-struct pars_info_t {
- mem_heap_t* heap; /*!< our own memory heap */
-
- ib_vector_t* funcs; /*!< user functions, or NUll
- (pars_user_func_t*) */
- ib_vector_t* bound_lits; /*!< bound literals, or NULL
- (pars_bound_lit_t*) */
- ib_vector_t* bound_ids; /*!< bound ids, or NULL
- (pars_bound_id_t*) */
-
- ibool graph_owns_us; /*!< if TRUE (which is the default),
- que_graph_free() will free us */
-};
-
-/** User-supplied function and argument. */
-struct pars_user_func_t {
- const char* name; /*!< function name */
- pars_user_func_cb_t func; /*!< function address */
- void* arg; /*!< user-supplied argument */
-};
-
-/** Bound literal. */
-struct pars_bound_lit_t {
- const char* name; /*!< name */
- const void* address; /*!< address */
- ulint length; /*!< length of data */
- ulint type; /*!< type, e.g. DATA_FIXBINARY */
- ulint prtype; /*!< precise type, e.g. DATA_UNSIGNED */
- sym_node_t* node; /*!< symbol node */
-};
-
-/** Bound identifier. */
-struct pars_bound_id_t {
- const char* name; /*!< name */
- const char* id; /*!< identifier */
-};
-
-/** Struct used to denote a reserved word in a parsing tree */
-struct pars_res_word_t{
- int code; /*!< the token code for the reserved word from
- pars0grm.h */
-};
-
-/** A predefined function or operator node in a parsing tree; this construct
-is also used for some non-functions like the assignment ':=' */
-struct func_node_t{
- que_common_t common; /*!< type: QUE_NODE_FUNC */
- int func; /*!< token code of the function name */
- ulint fclass; /*!< class of the function */
- que_node_t* args; /*!< argument(s) of the function */
- UT_LIST_NODE_T(func_node_t) cond_list;
- /*!< list of comparison conditions; defined
- only for comparison operator nodes except,
- presently, for OPT_SCROLL_TYPE ones */
- UT_LIST_NODE_T(func_node_t) func_node_list;
- /*!< list of function nodes in a parsed
- query graph */
-};
-
-/** An order-by node in a select */
-struct order_node_t{
- que_common_t common; /*!< type: QUE_NODE_ORDER */
- sym_node_t* column; /*!< order-by column */
- ibool asc; /*!< TRUE if ascending, FALSE if descending */
-};
-
-/** Procedure definition node */
-struct proc_node_t{
- que_common_t common; /*!< type: QUE_NODE_PROC */
- sym_node_t* proc_id; /*!< procedure name symbol in the symbol
- table of this same procedure */
- sym_node_t* param_list; /*!< input and output parameters */
- que_node_t* stat_list; /*!< statement list */
- sym_tab_t* sym_tab; /*!< symbol table of this procedure */
-};
-
-/** elsif-element node */
-struct elsif_node_t{
- que_common_t common; /*!< type: QUE_NODE_ELSIF */
- que_node_t* cond; /*!< if condition */
- que_node_t* stat_list; /*!< statement list */
-};
-
-/** if-statement node */
-struct if_node_t{
- que_common_t common; /*!< type: QUE_NODE_IF */
- que_node_t* cond; /*!< if condition */
- que_node_t* stat_list; /*!< statement list */
- que_node_t* else_part; /*!< else-part statement list */
- elsif_node_t* elsif_list; /*!< elsif element list */
-};
-
-/** while-statement node */
-struct while_node_t{
- que_common_t common; /*!< type: QUE_NODE_WHILE */
- que_node_t* cond; /*!< while condition */
- que_node_t* stat_list; /*!< statement list */
-};
-
-/** for-loop-statement node */
-struct for_node_t{
- que_common_t common; /*!< type: QUE_NODE_FOR */
- sym_node_t* loop_var; /*!< loop variable: this is the
- dereferenced symbol from the
- variable declarations, not the
- symbol occurrence in the for loop
- definition */
- que_node_t* loop_start_limit;/*!< initial value of loop variable */
- que_node_t* loop_end_limit; /*!< end value of loop variable */
- lint loop_end_value; /*!< evaluated value for the end value:
- it is calculated only when the loop
- is entered, and will not change within
- the loop */
- que_node_t* stat_list; /*!< statement list */
-};
-
-/** exit statement node */
-struct exit_node_t{
- que_common_t common; /*!< type: QUE_NODE_EXIT */
-};
-
-/** return-statement node */
-struct return_node_t{
- que_common_t common; /*!< type: QUE_NODE_RETURN */
-};
-
-/** Assignment statement node */
-struct assign_node_t{
- que_common_t common; /*!< type: QUE_NODE_ASSIGNMENT */
- sym_node_t* var; /*!< variable to set */
- que_node_t* val; /*!< value to assign */
-};
-
-/** Column assignment node */
-struct col_assign_node_t{
- que_common_t common; /*!< type: QUE_NODE_COL_ASSIGN */
- sym_node_t* col; /*!< column to set */
- que_node_t* val; /*!< value to assign */
-};
-
-/** Classes of functions */
-/* @{ */
-#define PARS_FUNC_ARITH 1 /*!< +, -, *, / */
-#define PARS_FUNC_LOGICAL 2 /*!< AND, OR, NOT */
-#define PARS_FUNC_CMP 3 /*!< comparison operators */
-#define PARS_FUNC_PREDEFINED 4 /*!< TO_NUMBER, SUBSTR, ... */
-#define PARS_FUNC_AGGREGATE 5 /*!< COUNT, DISTINCT, SUM */
-#define PARS_FUNC_OTHER 6 /*!< these are not real functions,
- e.g., := */
-/* @} */
-
-#ifndef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/pars0pars.ic b/storage/xtradb/include/pars0pars.ic
deleted file mode 100644
index 4c88337a265..00000000000
--- a/storage/xtradb/include/pars0pars.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0pars.ic
-SQL parser
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/pars0sym.h b/storage/xtradb/include/pars0sym.h
deleted file mode 100644
index bcf73639228..00000000000
--- a/storage/xtradb/include/pars0sym.h
+++ /dev/null
@@ -1,258 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0sym.h
-SQL parser symbol table
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0sym_h
-#define pars0sym_h
-
-#include "univ.i"
-#include "que0types.h"
-#include "usr0types.h"
-#include "dict0types.h"
-#include "pars0types.h"
-#include "row0types.h"
-
-/******************************************************************//**
-Creates a symbol table for a single stored procedure or query.
-@return own: symbol table */
-UNIV_INTERN
-sym_tab_t*
-sym_tab_create(
-/*===========*/
- mem_heap_t* heap); /*!< in: memory heap where to create */
-/******************************************************************//**
-Frees the memory allocated dynamically AFTER parsing phase for variables
-etc. in the symbol table. Does not free the mem heap where the table was
-originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
-void
-sym_tab_free_private(
-/*=================*/
- sym_tab_t* sym_tab); /*!< in, own: symbol table */
-/******************************************************************//**
-Adds an integer literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_int_lit(
-/*================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- ulint val); /*!< in: integer value */
-/******************************************************************//**
-Adds an string literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_str_lit(
-/*================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const byte* str, /*!< in: string with no quotes around
- it */
- ulint len); /*!< in: string length */
-/******************************************************************//**
-Add a bound literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_bound_lit(
-/*==================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const char* name, /*!< in: name of bound literal */
- ulint* lit_type); /*!< out: type of literal (PARS_*_LIT) */
-/**********************************************************************
-Rebind literal to a node in the symbol table. */
-
-sym_node_t*
-sym_tab_rebind_lit(
-/*===============*/
- /* out: symbol table node */
- sym_node_t* node, /* in: node that is bound to literal*/
- const void* address, /* in: pointer to data */
- ulint length); /* in: length of data */
-/******************************************************************//**
-Adds an SQL null literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_null_lit(
-/*=================*/
- sym_tab_t* sym_tab); /*!< in: symbol table */
-/******************************************************************//**
-Adds an identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_id(
-/*===========*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- byte* name, /*!< in: identifier name */
- ulint len); /*!< in: identifier length */
-
-/******************************************************************//**
-Add a bound identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_bound_id(
-/*===========*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const char* name); /*!< in: name of bound id */
-
-/** Index of sym_node_t::field_nos corresponding to the clustered index */
-#define SYM_CLUST_FIELD_NO 0
-/** Index of sym_node_t::field_nos corresponding to a secondary index */
-#define SYM_SEC_FIELD_NO 1
-
-/** Types of a symbol table node */
-enum sym_tab_entry {
- SYM_UNSET, /*!< Unset entry. */
- SYM_VAR = 91, /*!< declared parameter or local
- variable of a procedure */
- SYM_IMPLICIT_VAR, /*!< storage for a intermediate result
- of a calculation */
- SYM_LIT, /*!< literal */
- SYM_TABLE_REF_COUNTED, /*!< database table name, ref counted. Must
- be closed explicitly. */
- SYM_TABLE, /*!< database table name */
- SYM_COLUMN, /*!< database table name */
- SYM_CURSOR, /*!< named cursor */
- SYM_PROCEDURE_NAME, /*!< stored procedure name */
- SYM_INDEX, /*!< database index name */
- SYM_FUNCTION /*!< user function name */
-};
-
-/** Symbol table node */
-struct sym_node_t{
- que_common_t common; /*!< node type:
- QUE_NODE_SYMBOL */
- /* NOTE: if the data field in 'common.val' is not NULL and the symbol
- table node is not for a temporary column, the memory for the value has
- been allocated from dynamic memory and it should be freed when the
- symbol table is discarded */
-
- /* 'alias' and 'indirection' are almost the same, but not quite.
- 'alias' always points to the primary instance of the variable, while
- 'indirection' does the same only if we should use the primary
- instance's values for the node's data. This is usually the case, but
- when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM
- t WHERE id = x;"), we copy the values from the primary instance to
- the cursor's instance so that they are fixed for the duration of the
- cursor, and set 'indirection' to NULL. If we did not, the value of
- 'x' could change between fetches and things would break horribly.
-
- TODO: It would be cleaner to make 'indirection' a boolean field and
- always use 'alias' to refer to the primary node. */
-
- sym_node_t* indirection; /*!< pointer to
- another symbol table
- node which contains
- the value for this
- node, NULL otherwise */
- sym_node_t* alias; /*!< pointer to
- another symbol table
- node for which this
- node is an alias,
- NULL otherwise */
- UT_LIST_NODE_T(sym_node_t) col_var_list; /*!< list of table
- columns or a list of
- input variables for an
- explicit cursor */
- ibool copy_val; /*!< TRUE if a column
- and its value should
- be copied to dynamic
- memory when fetched */
- ulint field_nos[2]; /*!< if a column, in
- the position
- SYM_CLUST_FIELD_NO is
- the field number in the
- clustered index; in
- the position
- SYM_SEC_FIELD_NO
- the field number in the
- non-clustered index to
- use first; if not found
- from the index, then
- ULINT_UNDEFINED */
- ibool resolved; /*!< TRUE if the
- meaning of a variable
- or a column has been
- resolved; for literals
- this is always TRUE */
- enum sym_tab_entry token_type; /*!< type of the
- parsed token */
- const char* name; /*!< name of an id */
- ulint name_len; /*!< id name length */
- dict_table_t* table; /*!< table definition
- if a table id or a
- column id */
- ulint col_no; /*!< column number if a
- column */
- sel_buf_t* prefetch_buf; /*!< NULL, or a buffer
- for cached column
- values for prefetched
- rows */
- sel_node_t* cursor_def; /*!< cursor definition
- select node if a
- named cursor */
- ulint param_type; /*!< PARS_INPUT,
- PARS_OUTPUT, or
- PARS_NOT_PARAM if not a
- procedure parameter */
- sym_tab_t* sym_table; /*!< back pointer to
- the symbol table */
- UT_LIST_NODE_T(sym_node_t) sym_list; /*!< list of symbol
- nodes */
- sym_node_t* like_node; /* LIKE operator node*/
-};
-
-/** Symbol table */
-struct sym_tab_t{
- que_t* query_graph;
- /*!< query graph generated by the
- parser */
- const char* sql_string;
- /*!< SQL string to parse */
- size_t string_len;
- /*!< SQL string length */
- int next_char_pos;
- /*!< position of the next character in
- sql_string to give to the lexical
- analyzer */
- pars_info_t* info; /*!< extra information, or NULL */
- sym_node_list_t sym_list;
- /*!< list of symbol nodes in the symbol
- table */
- UT_LIST_BASE_NODE_T(func_node_t)
- func_node_list;
- /*!< list of function nodes in the
- parsed query graph */
- mem_heap_t* heap; /*!< memory heap from which we can
- allocate space */
-};
-
-#ifndef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/pars0sym.ic b/storage/xtradb/include/pars0sym.ic
deleted file mode 100644
index 266c1a6310d..00000000000
--- a/storage/xtradb/include/pars0sym.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0sym.ic
-SQL parser symbol table
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/pars0types.h b/storage/xtradb/include/pars0types.h
deleted file mode 100644
index 47f4b432d20..00000000000
--- a/storage/xtradb/include/pars0types.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/pars0types.h
-SQL parser global types
-
-Created 1/11/1998 Heikki Tuuri
-*******************************************************/
-
-#ifndef pars0types_h
-#define pars0types_h
-
-struct pars_info_t;
-struct pars_user_func_t;
-struct pars_bound_lit_t;
-struct pars_bound_id_t;
-struct sym_node_t;
-struct sym_tab_t;
-struct pars_res_word_t;
-struct func_node_t;
-struct order_node_t;
-struct proc_node_t;
-struct elsif_node_t;
-struct if_node_t;
-struct while_node_t;
-struct for_node_t;
-struct exit_node_t;
-struct return_node_t;
-struct assign_node_t;
-struct col_assign_node_t;
-
-typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t;
-
-#endif
diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h
deleted file mode 100644
index e5b2a1ba3fc..00000000000
--- a/storage/xtradb/include/que0que.h
+++ /dev/null
@@ -1,531 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/que0que.h
-Query graph
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef que0que_h
-#define que0que_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "btr0sea.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "srv0srv.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "pars0types.h"
-
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-extern ibool que_trace_on;
-
-/** Mutex protecting the query threads. */
-extern ib_mutex_t que_thr_mutex;
-
-/***********************************************************************//**
-Creates a query graph fork node.
-@return own: fork node */
-UNIV_INTERN
-que_fork_t*
-que_fork_create(
-/*============*/
- que_t* graph, /*!< in: graph, if NULL then this
- fork node is assumed to be the
- graph root */
- que_node_t* parent, /*!< in: parent node */
- ulint fork_type, /*!< in: fork type */
- mem_heap_t* heap); /*!< in: memory heap where created */
-/***********************************************************************//**
-Gets the first thr in a fork. */
-UNIV_INLINE
-que_thr_t*
-que_fork_get_first_thr(
-/*===================*/
- que_fork_t* fork); /*!< in: query fork */
-/***********************************************************************//**
-Gets the child node of the first thr in a fork. */
-UNIV_INLINE
-que_node_t*
-que_fork_get_child(
-/*===============*/
- que_fork_t* fork); /*!< in: query fork */
-/***********************************************************************//**
-Sets the parent of a graph node. */
-UNIV_INLINE
-void
-que_node_set_parent(
-/*================*/
- que_node_t* node, /*!< in: graph node */
- que_node_t* parent);/*!< in: parent */
-/***********************************************************************//**
-Creates a query graph thread node.
-@return own: query thread node */
-UNIV_INTERN
-que_thr_t*
-que_thr_create(
-/*===========*/
- que_fork_t* parent, /*!< in: parent node, i.e., a fork node */
- mem_heap_t* heap); /*!< in: memory heap where created */
-/**********************************************************************//**
-Frees a query graph, but not the heap where it was created. Does not free
-explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
-void
-que_graph_free_recursive(
-/*=====================*/
- que_node_t* node); /*!< in: query graph node */
-/**********************************************************************//**
-Frees a query graph. */
-UNIV_INTERN
-void
-que_graph_free(
-/*===========*/
- que_t* graph); /*!< in: query graph; we assume that the memory
- heap where this graph was created is private
- to this graph: if not, then use
- que_graph_free_recursive and free the heap
- afterwards! */
-/**********************************************************************//**
-Stops a query thread if graph or trx is in a state requiring it. The
-conditions are tested in the order (1) graph, (2) trx. The lock_sys_t::mutex
-has to be reserved.
-@return TRUE if stopped */
-UNIV_INTERN
-ibool
-que_thr_stop(
-/*=========*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction. */
-UNIV_INTERN
-void
-que_thr_move_to_run_state_for_mysql(
-/*================================*/
- que_thr_t* thr, /*!< in: an query thread */
- trx_t* trx); /*!< in: transaction */
-/**********************************************************************//**
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL
-select, when there is no error or lock wait. */
-UNIV_INTERN
-void
-que_thr_stop_for_mysql_no_error(
-/*============================*/
- que_thr_t* thr, /*!< in: query thread */
- trx_t* trx); /*!< in: transaction */
-/**********************************************************************//**
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
-query thread is stopped and made inactive, except in the case where
-it was put to the lock wait state in lock0lock.cc, but the lock has already
-been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
-void
-que_thr_stop_for_mysql(
-/*===================*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Run a query thread. Handles lock waits. */
-UNIV_INTERN
-void
-que_run_threads(
-/*============*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Moves a suspended query thread to the QUE_THR_RUNNING state and release
-a worker thread to execute it. This function should be used to end
-the wait state of a query thread waiting for a lock or a stored procedure
-completion.
-@return query thread instance of thread to wakeup or NULL */
-UNIV_INTERN
-que_thr_t*
-que_thr_end_lock_wait(
-/*==================*/
- trx_t* trx); /*!< in: transaction in the
- QUE_THR_LOCK_WAIT state */
-/**********************************************************************//**
-Starts execution of a command in a query fork. Picks a query thread which
-is not in the QUE_THR_RUNNING state and moves it to that state. If none
-can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned.
-@return a query thread of the graph moved to QUE_THR_RUNNING state, or
-NULL; the query thread should be executed by que_run_threads by the
-caller */
-UNIV_INTERN
-que_thr_t*
-que_fork_start_command(
-/*===================*/
- que_fork_t* fork); /*!< in: a query fork */
-/***********************************************************************//**
-Gets the trx of a query thread. */
-UNIV_INLINE
-trx_t*
-thr_get_trx(
-/*========*/
- que_thr_t* thr); /*!< in: query thread */
-/*******************************************************************//**
-Determines if this thread is rolling back an incomplete transaction
-in crash recovery.
-@return TRUE if thr is rolling back an incomplete transaction in crash
-recovery */
-UNIV_INLINE
-ibool
-thr_is_recv(
-/*========*/
- const que_thr_t* thr); /*!< in: query thread */
-/***********************************************************************//**
-Gets the type of a graph node. */
-UNIV_INLINE
-ulint
-que_node_get_type(
-/*==============*/
- que_node_t* node); /*!< in: graph node */
-/***********************************************************************//**
-Gets pointer to the value data type field of a graph node. */
-UNIV_INLINE
-dtype_t*
-que_node_get_data_type(
-/*===================*/
- que_node_t* node); /*!< in: graph node */
-/***********************************************************************//**
-Gets pointer to the value dfield of a graph node. */
-UNIV_INLINE
-dfield_t*
-que_node_get_val(
-/*=============*/
- que_node_t* node); /*!< in: graph node */
-/***********************************************************************//**
-Gets the value buffer size of a graph node.
-@return val buffer size, not defined if val.data == NULL in node */
-UNIV_INLINE
-ulint
-que_node_get_val_buf_size(
-/*======================*/
- que_node_t* node); /*!< in: graph node */
-/***********************************************************************//**
-Sets the value buffer size of a graph node. */
-UNIV_INLINE
-void
-que_node_set_val_buf_size(
-/*======================*/
- que_node_t* node, /*!< in: graph node */
- ulint size); /*!< in: size */
-/*********************************************************************//**
-Gets the next list node in a list of query graph nodes. */
-UNIV_INLINE
-que_node_t*
-que_node_get_next(
-/*==============*/
- que_node_t* node); /*!< in: node in a list */
-/*********************************************************************//**
-Gets the parent node of a query graph node.
-@return parent node or NULL */
-UNIV_INLINE
-que_node_t*
-que_node_get_parent(
-/*================*/
- que_node_t* node); /*!< in: node */
-/****************************************************************//**
-Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop.
-@return containing loop node, or NULL. */
-UNIV_INTERN
-que_node_t*
-que_node_get_containing_loop_node(
-/*==============================*/
- que_node_t* node); /*!< in: node */
-/*********************************************************************//**
-Catenates a query graph node to a list of them, possible empty list.
-@return one-way list of nodes */
-UNIV_INLINE
-que_node_t*
-que_node_list_add_last(
-/*===================*/
- que_node_t* node_list, /*!< in: node list, or NULL */
- que_node_t* node); /*!< in: node */
-/*************************************************************************
-Get the last node from the list.*/
-UNIV_INLINE
-que_node_t*
-que_node_list_get_last(
-/*===================*/
- /* out: node last node from list.*/
- que_node_t* node_list); /* in: node list, or NULL */
-/*********************************************************************//**
-Gets a query graph node list length.
-@return length, for NULL list 0 */
-UNIV_INLINE
-ulint
-que_node_list_get_len(
-/*==================*/
- que_node_t* node_list); /*!< in: node list, or NULL */
-/**********************************************************************//**
-Checks if graph, trx, or session is in a state where the query thread should
-be stopped.
-@return TRUE if should be stopped; NOTE that if the peek is made
-without reserving the trx_t::mutex, then another peek with the mutex
-reserved is necessary before deciding the actual stopping */
-UNIV_INLINE
-ibool
-que_thr_peek_stop(
-/*==============*/
- que_thr_t* thr); /*!< in: query thread */
-/***********************************************************************//**
-Returns TRUE if the query graph is for a SELECT statement.
-@return TRUE if a select */
-UNIV_INLINE
-ibool
-que_graph_is_select(
-/*================*/
- que_t* graph); /*!< in: graph */
-/**********************************************************************//**
-Prints info of an SQL query graph node. */
-UNIV_INTERN
-void
-que_node_print_info(
-/*================*/
- que_node_t* node); /*!< in: query graph node */
-/*********************************************************************//**
-Evaluate the given SQL
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-que_eval_sql(
-/*=========*/
- pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql, /*!< in: SQL string */
- ibool reserve_dict_mutex,
- /*!< in: if TRUE, acquire/release
- dict_sys->mutex around call to pars_sql. */
- trx_t* trx); /*!< in: trx */
-
-/**********************************************************************//**
-Round robin scheduler.
-@return a query thread of the graph moved to QUE_THR_RUNNING state, or
-NULL; the query thread should be executed by que_run_threads by the
-caller */
-UNIV_INTERN
-que_thr_t*
-que_fork_scheduler_round_robin(
-/*===========================*/
- que_fork_t* fork, /*!< in: a query fork */
- que_thr_t* thr); /*!< in: current pos */
-
-/*********************************************************************//**
-Initialise the query sub-system. */
-UNIV_INTERN
-void
-que_init(void);
-/*==========*/
-
-/*********************************************************************//**
-Close the query sub-system. */
-UNIV_INTERN
-void
-que_close(void);
-/*===========*/
-
-/* Query graph query thread node: the fields are protected by the
-trx_t::mutex with the exceptions named below */
-
-struct que_thr_t{
- que_common_t common; /*!< type: QUE_NODE_THR */
- ulint magic_n; /*!< magic number to catch memory
- corruption */
- que_node_t* child; /*!< graph child node */
- que_t* graph; /*!< graph where this node belongs */
- ulint state; /*!< state of the query thread */
- ibool is_active; /*!< TRUE if the thread has been set
- to the run state in
- que_thr_move_to_run_state, but not
- deactivated in
- que_thr_dec_reference_count */
- /*------------------------------*/
- /* The following fields are private to the OS thread executing the
- query thread, and are not protected by any mutex: */
-
- que_node_t* run_node; /*!< pointer to the node where the
- subgraph down from this node is
- currently executed */
- que_node_t* prev_node; /*!< pointer to the node from which
- the control came */
- ulint resource; /*!< resource usage of the query thread
- thus far */
- ulint lock_state; /*!< lock state of thread (table or
- row) */
- struct srv_slot_t*
- slot; /* The thread slot in the wait
- array in srv_sys_t */
- /*------------------------------*/
- /* The following fields are links for the various lists that
- this type can be on. */
- UT_LIST_NODE_T(que_thr_t)
- thrs; /*!< list of thread nodes of the fork
- node */
- UT_LIST_NODE_T(que_thr_t)
- trx_thrs; /*!< lists of threads in wait list of
- the trx */
- UT_LIST_NODE_T(que_thr_t)
- queue; /*!< list of runnable thread nodes in
- the server task queue */
- ulint fk_cascade_depth; /*!< maximum cascading call depth
- supported for foreign key constraint
- related delete/updates */
-};
-
-#define QUE_THR_MAGIC_N 8476583
-#define QUE_THR_MAGIC_FREED 123461526
-
-/* Query graph fork node: its fields are protected by the query thread mutex */
-struct que_fork_t{
- que_common_t common; /*!< type: QUE_NODE_FORK */
- que_t* graph; /*!< query graph of this node */
- ulint fork_type; /*!< fork type */
- ulint n_active_thrs; /*!< if this is the root of a graph, the
- number query threads that have been
- started in que_thr_move_to_run_state
- but for which que_thr_dec_refer_count
- has not yet been called */
- trx_t* trx; /*!< transaction: this is set only in
- the root node */
- ulint state; /*!< state of the fork node */
- que_thr_t* caller; /*!< pointer to a possible calling query
- thread */
- UT_LIST_BASE_NODE_T(que_thr_t)
- thrs; /*!< list of query threads */
- /*------------------------------*/
- /* The fields in this section are defined only in the root node */
- sym_tab_t* sym_tab; /*!< symbol table of the query,
- generated by the parser, or NULL
- if the graph was created 'by hand' */
- pars_info_t* info; /*!< info struct, or NULL */
- /* The following cur_... fields are relevant only in a select graph */
-
- ulint cur_end; /*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START,
- QUE_CUR_END */
- ulint cur_pos; /*!< if there are n rows in the result
- set, values 0 and n + 1 mean before
- first row, or after last row, depending
- on cur_end; values 1...n mean a row
- index */
- ibool cur_on_row; /*!< TRUE if cursor is on a row, i.e.,
- it is not before the first row or
- after the last row */
- sel_node_t* last_sel_node; /*!< last executed select node, or NULL
- if none */
- UT_LIST_NODE_T(que_fork_t)
- graphs; /*!< list of query graphs of a session
- or a stored procedure */
- /*------------------------------*/
- mem_heap_t* heap; /*!< memory heap where the fork was
- created */
-
-};
-
-/* Query fork (or graph) types */
-#define QUE_FORK_SELECT_NON_SCROLL 1 /* forward-only cursor */
-#define QUE_FORK_SELECT_SCROLL 2 /* scrollable cursor */
-#define QUE_FORK_INSERT 3
-#define QUE_FORK_UPDATE 4
-#define QUE_FORK_ROLLBACK 5
- /* This is really the undo graph used in rollback,
- no signal-sending roll_node in this graph */
-#define QUE_FORK_PURGE 6
-#define QUE_FORK_EXECUTE 7
-#define QUE_FORK_PROCEDURE 8
-#define QUE_FORK_PROCEDURE_CALL 9
-#define QUE_FORK_MYSQL_INTERFACE 10
-#define QUE_FORK_RECOVERY 11
-
-/* Query fork (or graph) states */
-#define QUE_FORK_ACTIVE 1
-#define QUE_FORK_COMMAND_WAIT 2
-#define QUE_FORK_INVALID 3
-#define QUE_FORK_BEING_FREED 4
-
-/* Flag which is ORed to control structure statement node types */
-#define QUE_NODE_CONTROL_STAT 1024
-
-/* Query graph node types */
-#define QUE_NODE_LOCK 1
-#define QUE_NODE_INSERT 2
-#define QUE_NODE_UPDATE 4
-#define QUE_NODE_CURSOR 5
-#define QUE_NODE_SELECT 6
-#define QUE_NODE_AGGREGATE 7
-#define QUE_NODE_FORK 8
-#define QUE_NODE_THR 9
-#define QUE_NODE_UNDO 10
-#define QUE_NODE_COMMIT 11
-#define QUE_NODE_ROLLBACK 12
-#define QUE_NODE_PURGE 13
-#define QUE_NODE_CREATE_TABLE 14
-#define QUE_NODE_CREATE_INDEX 15
-#define QUE_NODE_SYMBOL 16
-#define QUE_NODE_RES_WORD 17
-#define QUE_NODE_FUNC 18
-#define QUE_NODE_ORDER 19
-#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_ASSIGNMENT 23
-#define QUE_NODE_FETCH 24
-#define QUE_NODE_OPEN 25
-#define QUE_NODE_COL_ASSIGNMENT 26
-#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT)
-#define QUE_NODE_RETURN 28
-#define QUE_NODE_ROW_PRINTF 29
-#define QUE_NODE_ELSIF 30
-#define QUE_NODE_CALL 31
-#define QUE_NODE_EXIT 32
-
-/* Query thread states */
-#define QUE_THR_RUNNING 1
-#define QUE_THR_PROCEDURE_WAIT 2
-#define QUE_THR_COMPLETED 3 /* in selects this means that the
- thread is at the end of its result set
- (or start, in case of a scroll cursor);
- in other statements, this means the
- thread has done its task */
-#define QUE_THR_COMMAND_WAIT 4
-#define QUE_THR_LOCK_WAIT 5
-#define QUE_THR_SUSPENDED 7
-#define QUE_THR_ERROR 8
-
-/* Query thread lock states */
-#define QUE_THR_LOCK_NOLOCK 0
-#define QUE_THR_LOCK_ROW 1
-#define QUE_THR_LOCK_TABLE 2
-
-/* From where the cursor position is counted */
-#define QUE_CUR_NOT_DEFINED 1
-#define QUE_CUR_START 2
-#define QUE_CUR_END 3
-
-#ifndef UNIV_NONINL
-#include "que0que.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/que0que.ic b/storage/xtradb/include/que0que.ic
deleted file mode 100644
index eff5a86d958..00000000000
--- a/storage/xtradb/include/que0que.ic
+++ /dev/null
@@ -1,309 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/que0que.ic
-Query graph
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "usr0sess.h"
-
-/***********************************************************************//**
-Gets the trx of a query thread. */
-UNIV_INLINE
-trx_t*
-thr_get_trx(
-/*========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad(thr);
-
- return(thr->graph->trx);
-}
-
-/*******************************************************************//**
-Determines if this thread is rolling back an incomplete transaction
-in crash recovery.
-@return TRUE if thr is rolling back an incomplete transaction in crash
-recovery */
-UNIV_INLINE
-ibool
-thr_is_recv(
-/*========*/
- const que_thr_t* thr) /*!< in: query thread */
-{
- return(trx_is_recv(thr->graph->trx));
-}
-
-/***********************************************************************//**
-Gets the first thr in a fork. */
-UNIV_INLINE
-que_thr_t*
-que_fork_get_first_thr(
-/*===================*/
- que_fork_t* fork) /*!< in: query fork */
-{
- return(UT_LIST_GET_FIRST(fork->thrs));
-}
-
-/***********************************************************************//**
-Gets the child node of the first thr in a fork. */
-UNIV_INLINE
-que_node_t*
-que_fork_get_child(
-/*===============*/
- que_fork_t* fork) /*!< in: query fork */
-{
- que_thr_t* thr;
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- return(thr->child);
-}
-
-/***********************************************************************//**
-Gets the type of a graph node. */
-UNIV_INLINE
-ulint
-que_node_get_type(
-/*==============*/
- que_node_t* node) /*!< in: graph node */
-{
- ut_ad(node);
-
- return(((que_common_t*) node)->type);
-}
-
-/***********************************************************************//**
-Gets pointer to the value dfield of a graph node. */
-UNIV_INLINE
-dfield_t*
-que_node_get_val(
-/*=============*/
- que_node_t* node) /*!< in: graph node */
-{
- ut_ad(node);
-
- return(&(((que_common_t*) node)->val));
-}
-
-/***********************************************************************//**
-Gets the value buffer size of a graph node.
-@return val buffer size, not defined if val.data == NULL in node */
-UNIV_INLINE
-ulint
-que_node_get_val_buf_size(
-/*======================*/
- que_node_t* node) /*!< in: graph node */
-{
- ut_ad(node);
-
- return(((que_common_t*) node)->val_buf_size);
-}
-
-/***********************************************************************//**
-Sets the value buffer size of a graph node. */
-UNIV_INLINE
-void
-que_node_set_val_buf_size(
-/*======================*/
- que_node_t* node, /*!< in: graph node */
- ulint size) /*!< in: size */
-{
- ut_ad(node);
-
- ((que_common_t*) node)->val_buf_size = size;
-}
-
-/***********************************************************************//**
-Sets the parent of a graph node. */
-UNIV_INLINE
-void
-que_node_set_parent(
-/*================*/
- que_node_t* node, /*!< in: graph node */
- que_node_t* parent) /*!< in: parent */
-{
- ut_ad(node);
-
- ((que_common_t*) node)->parent = parent;
-}
-
-/***********************************************************************//**
-Gets pointer to the value data type field of a graph node. */
-UNIV_INLINE
-dtype_t*
-que_node_get_data_type(
-/*===================*/
- que_node_t* node) /*!< in: graph node */
-{
- ut_ad(node);
-
- return(dfield_get_type(&((que_common_t*) node)->val));
-}
-
-/*********************************************************************//**
-Catenates a query graph node to a list of them, possible empty list.
-@return one-way list of nodes */
-UNIV_INLINE
-que_node_t*
-que_node_list_add_last(
-/*===================*/
- que_node_t* node_list, /*!< in: node list, or NULL */
- que_node_t* node) /*!< in: node */
-{
- que_common_t* cnode;
- que_common_t* cnode2;
-
- cnode = (que_common_t*) node;
-
- cnode->brother = NULL;
-
- if (node_list == NULL) {
-
- return(node);
- }
-
- cnode2 = (que_common_t*) node_list;
-
- while (cnode2->brother != NULL) {
- cnode2 = (que_common_t*) cnode2->brother;
- }
-
- cnode2->brother = node;
-
- return(node_list);
-}
-
-/*************************************************************************
-Removes a query graph node from the list.*/
-UNIV_INLINE
-que_node_t*
-que_node_list_get_last(
-/*===================*/
- /* out: last node in list.*/
- que_node_t* node_list) /* in: node list */
-{
- que_common_t* node;
-
- ut_a(node_list != NULL);
-
- node = (que_common_t*) node_list;
-
- /* We need the last element */
- while (node->brother != NULL) {
- node = (que_common_t*) node->brother;
- }
-
- return(node);
-}
-/*********************************************************************//**
-Gets the next list node in a list of query graph nodes.
-@return next node in a list of nodes */
-UNIV_INLINE
-que_node_t*
-que_node_get_next(
-/*==============*/
- que_node_t* node) /*!< in: node in a list */
-{
- return(((que_common_t*) node)->brother);
-}
-
-/*********************************************************************//**
-Gets a query graph node list length.
-@return length, for NULL list 0 */
-UNIV_INLINE
-ulint
-que_node_list_get_len(
-/*==================*/
- que_node_t* node_list) /*!< in: node list, or NULL */
-{
- const que_common_t* cnode;
- ulint len;
-
- cnode = (const que_common_t*) node_list;
- len = 0;
-
- while (cnode != NULL) {
- len++;
- cnode = (const que_common_t*) cnode->brother;
- }
-
- return(len);
-}
-
-/*********************************************************************//**
-Gets the parent node of a query graph node.
-@return parent node or NULL */
-UNIV_INLINE
-que_node_t*
-que_node_get_parent(
-/*================*/
- que_node_t* node) /*!< in: node */
-{
- return(((que_common_t*) node)->parent);
-}
-
-/**********************************************************************//**
-Checks if graph, trx, or session is in a state where the query thread should
-be stopped.
-@return TRUE if should be stopped; NOTE that if the peek is made
-without reserving the trx mutex, then another peek with the mutex
-reserved is necessary before deciding the actual stopping */
-UNIV_INLINE
-ibool
-que_thr_peek_stop(
-/*==============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- que_t* graph;
-
- graph = thr->graph;
- trx = graph->trx;
-
- if (graph->state != QUE_FORK_ACTIVE
- || trx->lock.que_state == TRX_QUE_LOCK_WAIT
- || (trx->lock.que_state != TRX_QUE_ROLLING_BACK
- && trx->lock.que_state != TRX_QUE_RUNNING)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***********************************************************************//**
-Returns TRUE if the query graph is for a SELECT statement.
-@return TRUE if a select */
-UNIV_INLINE
-ibool
-que_graph_is_select(
-/*================*/
- que_t* graph) /*!< in: graph */
-{
- if (graph->fork_type == QUE_FORK_SELECT_SCROLL
- || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/storage/xtradb/include/que0types.h b/storage/xtradb/include/que0types.h
deleted file mode 100644
index 0f11cad301a..00000000000
--- a/storage/xtradb/include/que0types.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/que0types.h
-Query graph global types
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef que0types_h
-#define que0types_h
-
-#include "data0data.h"
-#include "dict0types.h"
-
-/* Pseudotype for all graph nodes */
-typedef void que_node_t;
-
-/* Query graph root is a fork node */
-typedef struct que_fork_t que_t;
-
-struct que_thr_t;
-
-/* Common struct at the beginning of each query graph node; the name of this
-substruct must be 'common' */
-
-struct que_common_t{
- ulint type; /*!< query node type */
- que_node_t* parent; /*!< back pointer to parent node, or NULL */
- que_node_t* brother;/* pointer to a possible brother node */
- dfield_t val; /*!< evaluated value for an expression */
- ulint val_buf_size;
- /* buffer size for the evaluated value data,
- if the buffer has been allocated dynamically:
- if this field is != 0, and the node is a
- symbol node or a function node, then we
- have to free the data field in val
- explicitly */
-};
-
-#endif
diff --git a/storage/xtradb/include/read0i_s.h b/storage/xtradb/include/read0i_s.h
deleted file mode 100644
index 11b63affe09..00000000000
--- a/storage/xtradb/include/read0i_s.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2010-2012, Percona Inc. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-#ifndef read0i_s_h
-#define read0i_s_h
-
-#include <trx0types.h>
-
-struct i_s_xtradb_read_view_struct {
- undo_no_t undo_no;/*!< 0 or if type is
- VIEW_HIGH_GRANULARITY
- transaction undo_no when this high-granularity
- consistent read view was created */
- trx_id_t low_limit_no;
- /*!< The view does not need to see the undo
- logs for transactions whose transaction number
- is strictly smaller (<) than this value: they
- can be removed in purge if not needed by other
- views */
- trx_id_t low_limit_id;
- /*!< The read should not see any transaction
- with trx id >= this value. In other words,
- this is the "high water mark". */
- trx_id_t up_limit_id;
- /*!< The read should see all trx ids which
- are strictly smaller (<) than this value.
- In other words,
- this is the "low water mark". */
-};
-
-typedef struct i_s_xtradb_read_view_struct i_s_xtradb_read_view_t;
-
-UNIV_INTERN
-i_s_xtradb_read_view_t*
-read_fill_i_s_xtradb_read_view(i_s_xtradb_read_view_t *rv);
-
-
-#endif /* read0i_s_h */
diff --git a/storage/xtradb/include/read0read.h b/storage/xtradb/include/read0read.h
deleted file mode 100644
index 2d6885884f7..00000000000
--- a/storage/xtradb/include/read0read.h
+++ /dev/null
@@ -1,232 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.h
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0read_h
-#define read0read_h
-
-#include "univ.i"
-
-
-#include "ut0byte.h"
-#include "ut0lst.h"
-#include "btr0types.h"
-#include "trx0trx.h"
-#include "trx0sys.h"
-#include "read0types.h"
-
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or 0 used in purge */
- read_view_t*& view); /*!< in,out: pre-allocated view array or
- NULL if a new one needs to be created */
-
-/*********************************************************************//**
-Clones a read view object. This function will allocate space for two read
-views contiguously, one identical in size and content as @param view (starting
-at returned pointer) and another view immediately following the trx_ids array.
-The second view will have space for an extra trx_id_t element.
-@return read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_clone(
-/*============*/
- const read_view_t* view, /*!< in: view to clone */
- read_view_t*& prebuilt_clone);/*!< in,out: prebuilt view or
- NULL */
-/*********************************************************************//**
-Insert the view in the proper order into the trx_sys->view_list. The
-read view list is ordered by read_view_t::low_limit_no in descending order. */
-UNIV_INTERN
-void
-read_view_add(
-/*==========*/
- read_view_t* view); /*!< in: view to add to */
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, or opens a new. The view
-must be closed with ..._close.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_purge_open(
-/*=================*/
- read_view_t*& clone_view, /*!< in,out: pre-allocated view that
- will be used to clone the oldest view if
- exists */
- read_view_t*& view); /*!< in,out: pre-allocated view array or
- NULL if a new one needs to be created */
-/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INLINE
-void
-read_view_remove(
-/*=============*/
- read_view_t* view, /*!< in: read view, can be 0 */
- bool own_mutex); /*!< in: true if caller owns the
- trx_sys_t::mutex */
-/*********************************************************************//**
-Frees memory allocated by a read view. */
-UNIV_INTERN
-void
-read_view_free(
-/*===========*/
- read_view_t*& view); /*< in,out: read view */
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
-void
-read_view_close_for_mysql(
-/*======================*/
- trx_t* trx); /*!< in: trx which has a read view */
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return true if sees */
-UNIV_INLINE
-bool
-read_view_sees_trx_id(
-/*==================*/
- const read_view_t* view, /*!< in: read view */
- trx_id_t trx_id) /*!< in: trx id */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Prints a read view to file. */
-UNIV_INTERN
-void
-read_view_print(
-/*============*/
- FILE* file, /*!< in: file to print to */
- const read_view_t* view); /*!< in: read view */
-/*********************************************************************//**
-Create a consistent cursor view for mysql to be used in cursors. In this
-consistent read view modifications done by the creating transaction or future
-transactions are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
- trx_t* cr_trx);/*!< in: trx where cursor view is created */
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
- trx_t* trx, /*!< in: trx */
- cursor_view_t* curview); /*!< in: cursor view to be closed */
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
- trx_t* trx, /*!< in: transaction where cursor is set */
- cursor_view_t* curview);/*!< in: consistent cursor view to be set */
-
-/** Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
-
-struct read_view_t{
- ulint type; /*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
- undo_no_t undo_no;/*!< 0 or if type is
- VIEW_HIGH_GRANULARITY
- transaction undo_no when this high-granularity
- consistent read view was created */
- trx_id_t low_limit_no;
- /*!< The view does not need to see the undo
- logs for transactions whose transaction number
- is strictly smaller (<) than this value: they
- can be removed in purge if not needed by other
- views */
- trx_id_t low_limit_id;
- /*!< The read should not see any transaction
- with trx id >= this value. In other words,
- this is the "high water mark". */
- trx_id_t up_limit_id;
- /*!< The read should see all trx ids which
- are strictly smaller (<) than this value.
- In other words,
- this is the "low water mark". */
- ulint n_descr;
- /*!< Number of cells in the trx_ids array */
- ulint max_descr;
- /*!< Maximum number of cells in the trx_ids
- array */
- trx_id_t* descriptors;
- /*!< Additional trx ids which the read should
- not see: typically, these are the read-write
- active transactions at the time when the read
- is serialized, except the reading transaction
- itself; the trx ids in this array are in a
- ascending order. These trx_ids should be
- between the "low" and "high" water marks,
- that is, up_limit_id and low_limit_id. */
- trx_id_t creator_trx_id;
- /*!< trx id of creating transaction, or
- 0 used in purge */
- UT_LIST_NODE_T(read_view_t) view_list;
- /*!< List of read views in trx_sys */
-};
-
-/** Read view types @{ */
-#define VIEW_NORMAL 1 /*!< Normal consistent read view
- where transaction does not see changes
- made by active transactions except
- creating transaction. */
-#define VIEW_HIGH_GRANULARITY 2 /*!< High-granularity read view where
- transaction does not see changes
- made by active transactions and own
- changes after a point in time when this
- read view was created. */
-/* @} */
-
-/** Implement InnoDB framework to support consistent read views in
-cursors. This struct holds both heap where consistent read view
-is allocated and pointer to a read view. */
-
-struct cursor_view_t{
- mem_heap_t* heap;
- /*!< Memory heap for the cursor view */
- read_view_t* read_view;
- /*!< Consistent read view of the cursor*/
- ulint n_mysql_tables_in_use;
- /*!< number of Innobase tables used in the
- processing of this cursor */
-};
-
-#ifndef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/read0read.ic b/storage/xtradb/include/read0read.ic
deleted file mode 100644
index 66bef8866c9..00000000000
--- a/storage/xtradb/include/read0read.ic
+++ /dev/null
@@ -1,131 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.ic
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#include "trx0sys.h"
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates a read view object. */
-static
-bool
-read_view_validate(
-/*===============*/
- const read_view_t* view) /*!< in: view to validate */
-{
- ut_ad(mutex_own(&trx_sys->mutex));
- ut_ad(view->max_descr >= view->n_descr);
- ut_ad(view->descriptors == NULL || view->max_descr > 0);
-
- /* Check that the view->descriptors array is in ascending order. */
- for (ulint i = 1; i < view->n_descr; ++i) {
-
- ut_a(view->descriptors[i] > view->descriptors[i - 1]);
- }
-
- return(true);
-}
-
-/** Functor to validate the view list. */
-struct ViewCheck {
-
- ViewCheck() : m_prev_view(0) { }
-
- void operator()(const read_view_t* view)
- {
- ut_a(m_prev_view == NULL
- || m_prev_view->low_limit_no >= view->low_limit_no);
-
- m_prev_view = view;
- }
-
- const read_view_t* m_prev_view;
-};
-
-/*********************************************************************//**
-Validates a read view list. */
-static
-bool
-read_view_list_validate(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_list_map(trx_sys->view_list, &read_view_t::view_list, ViewCheck());
-
- return(true);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Checks if a read view sees the specified transaction.
-@return true if sees */
-UNIV_INLINE
-bool
-read_view_sees_trx_id(
-/*==================*/
- const read_view_t* view, /*!< in: read view */
- trx_id_t trx_id) /*!< in: trx id */
-{
- if (trx_id < view->up_limit_id) {
-
- return(true);
- } else if (trx_id >= view->low_limit_id) {
-
- return(false);
- }
-
- /* Do a binary search over this view's descriptors array */
-
- return(trx_find_descriptor(view->descriptors, view->n_descr,
- trx_id) == NULL);
-}
-
-/*********************************************************************//**
-Remove a read view from the trx_sys->view_list. */
-UNIV_INLINE
-void
-read_view_remove(
-/*=============*/
- read_view_t* view, /*!< in: read view, can be 0 */
- bool own_mutex) /*!< in: true if caller owns the
- trx_sys_t::mutex */
-{
- if (view != 0) {
- if (!own_mutex) {
- mutex_enter(&trx_sys->mutex);
- }
-
- ut_ad(read_view_validate(view));
-
- UT_LIST_REMOVE(view_list, trx_sys->view_list, view);
-
- ut_ad(read_view_list_validate());
-
- if (!own_mutex) {
- mutex_exit(&trx_sys->mutex);
- }
- }
-}
-
diff --git a/storage/xtradb/include/read0types.h b/storage/xtradb/include/read0types.h
deleted file mode 100644
index 969f4ebb637..00000000000
--- a/storage/xtradb/include/read0types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0types.h
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0types_h
-#define read0types_h
-
-struct read_view_t;
-struct cursor_view_t;
-
-#endif
diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h
deleted file mode 100644
index 65116229fdc..00000000000
--- a/storage/xtradb/include/rem0cmp.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/rem0cmp.h
-Comparison services for records
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-#ifndef rem0cmp_h
-#define rem0cmp_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "data0type.h"
-#include "dict0dict.h"
-#include "rem0rec.h"
-
-/*************************************************************//**
-Returns TRUE if two columns are equal for comparison purposes.
-@return TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
-ibool
-cmp_cols_are_equal(
-/*===============*/
- const dict_col_t* col1, /*!< in: column 1 */
- const dict_col_t* col2, /*!< in: column 2 */
- ibool check_charsets);
- /*!< in: whether to check charsets */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /*!< in: data field length or UNIV_SQL_NULL */
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type to be VARCHAR.
-@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_varchar(
-/*=======================*/
- const byte* lhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint lhs_len,/* in: data field length or UNIV_SQL_NULL */
- const byte* rhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint rhs_len);/* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_prefix(
-/*===========================*/
- const byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- const byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_suffix(
-/*===========================*/
- const byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- const byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*****************************************************************
-This function is used to compare two varchar/char fields. The comparison
-is for the LIKE operator.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow_like_substr(
-/*===========================*/
- const byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- const byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2); /* in: data field length or UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
- const dfield_t* dfield1,/*!< in: data field; must have type field set */
- const dfield_t* dfield2);/*!< in: data field */
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
-int
-cmp_dtuple_rec_with_match_low(
-/*==========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n_cmp, /*!< in: number of fields to compare */
- ulint* matched_fields,
- /*!< in/out: number of already completely
- matched fields; when function returns,
- contains the value for current comparison */
- ulint* matched_bytes)
- /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns, contains the
- value for current comparison */
- MY_ATTRIBUTE((nonnull));
-#define cmp_dtuple_rec_with_match(tuple,rec,offsets,fields,bytes) \
- cmp_dtuple_rec_with_match_low( \
- tuple,rec,offsets,dtuple_get_n_fields_cmp(tuple),fields,bytes)
-/**************************************************************//**
-Compares a data tuple to a physical record.
-@see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
-int
-cmp_dtuple_rec(
-/*===========*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/**************************************************************//**
-Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record.
-@return TRUE if prefix */
-UNIV_INTERN
-ibool
-cmp_dtuple_is_prefix_of_rec(
-/*========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
-none of which are stored externally.
-@retval 1 if rec1 (including non-ordering columns) is greater than rec2
-@retval -1 if rec1 (including non-ordering columns) is less than rec2
-@retval 0 if rec1 is a duplicate of rec2 */
-UNIV_INTERN
-int
-cmp_rec_rec_simple(
-/*===============*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
- const dict_index_t* index, /*!< in: data dictionary index */
- struct TABLE* table) /*!< in: MySQL table, for reporting
- duplicate key value if applicable,
- or NULL */
- MY_ATTRIBUTE((nonnull(1,2,3,4), warn_unused_result));
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
-int
-cmp_rec_rec_with_match(
-/*===================*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /*!< in: data dictionary index */
- ibool nulls_unequal,
- /* in: TRUE if this is for index statistics
- cardinality estimation, and innodb_stats_method
- is "nulls_unequal" or "nulls_ignored" */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when the function returns,
- contains the value the for current
- comparison */
- ulint* matched_bytes);/*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when the function returns, contains
- the value for the current comparison */
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index); /*!< in: data dictionary index */
-
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INTERN
-int
-cmp_dfield_dfield_like_prefix(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_substr(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2);/* in: data field */
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_suffix(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2);/* in: data field */
-
-#ifndef UNIV_NONINL
-#include "rem0cmp.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/rem0cmp.ic b/storage/xtradb/include/rem0cmp.ic
deleted file mode 100644
index 67a2dcacba1..00000000000
--- a/storage/xtradb/include/rem0cmp.ic
+++ /dev/null
@@ -1,186 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/rem0cmp.ic
-Comparison services for records
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INLINE
-int
-cmp_data_data(
-/*==========*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /*!< in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2));
-}
-
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_prefix(
-/*======================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow_like_prefix(data1, len1, data2, len2));
-}
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_suffix(
-/*======================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow_like_suffix(data1, len1, data2, len2));
-}
-/*****************************************************************
-This function is used to compare two (CHAR) data fields for the LIKE
-operator. */
-UNIV_INLINE
-int
-cmp_data_data_like_substr(
-/*======================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- byte* data1, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- byte* data2, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
- return(cmp_data_data_slow_like_substr(data1, len1, data2, len2));
-}
-/*************************************************************//**
-This function is used to compare two dfields where at least the first
-has its data type field set.
-@return 1, 0, -1, if dfield1 is greater, equal, less than dfield2,
-respectively */
-UNIV_INLINE
-int
-cmp_dfield_dfield(
-/*==============*/
- const dfield_t* dfield1,/*!< in: data field; must have type field set */
- const dfield_t* dfield2)/*!< in: data field */
-{
- const dtype_t* type;
-
- ut_ad(dfield_check_typed(dfield1));
-
- type = dfield_get_type(dfield1);
-
- return(cmp_data_data(type->mtype, type->prtype,
- (const byte*) dfield_get_data(dfield1),
- dfield_get_len(dfield1),
- (const byte*) dfield_get_data(dfield2),
- dfield_get_len(dfield2)));
-}
-
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_suffix(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2)/* in: data field */
-{
- ut_ad(dfield_check_typed(dfield1));
-
- return(cmp_data_data_like_suffix(
- (byte*) dfield_get_data(dfield1),
- dfield_get_len(dfield1),
- (byte*) dfield_get_data(dfield2),
- dfield_get_len(dfield2)));
-}
-
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INLINE
-int
-cmp_dfield_dfield_like_substr(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2)/* in: data field */
-{
- ut_ad(dfield_check_typed(dfield1));
-
- return(cmp_data_data_like_substr(
- (byte*) dfield_get_data(dfield1),
- dfield_get_len(dfield1),
- (byte*) dfield_get_data(dfield2),
- dfield_get_len(dfield2)));
-}
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared.
-@return 1, 0 , -1 if rec1 is greater, equal, less, respectively, than
-rec2; only the common first fields are compared */
-UNIV_INLINE
-int
-cmp_rec_rec(
-/*========*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index) /*!< in: data dictionary index */
-{
- ulint match_f = 0;
- ulint match_b = 0;
-
- return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
- FALSE, &match_f, &match_b));
-}
diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h
deleted file mode 100644
index 9baf0ab380a..00000000000
--- a/storage/xtradb/include/rem0rec.h
+++ /dev/null
@@ -1,996 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/rem0rec.h
-Record manager
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef rem0rec_h
-#define rem0rec_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "rem0types.h"
-#include "mtr0types.h"
-#include "page0types.h"
-
-/* Info bit denoting the predefined minimum record: this bit is set
-if and only if the record is the first user record on a non-leaf
-B-tree page that is the leftmost page on its level
-(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */
-#define REC_INFO_MIN_REC_FLAG 0x10UL
-/* The deleted flag in info bits */
-#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the
- record has been delete marked */
-
-/* Number of extra bytes in an old-style record,
-in addition to the data and the offsets */
-#define REC_N_OLD_EXTRA_BYTES 6
-/* Number of extra bytes in a new-style record,
-in addition to the data and the offsets */
-#define REC_N_NEW_EXTRA_BYTES 5
-
-/* Record status values */
-#define REC_STATUS_ORDINARY 0
-#define REC_STATUS_NODE_PTR 1
-#define REC_STATUS_INFIMUM 2
-#define REC_STATUS_SUPREMUM 3
-
-/* The following four constants are needed in page0zip.cc in order to
-efficiently compress and decompress pages. */
-
-/* The offset of heap_no in a compact record */
-#define REC_NEW_HEAP_NO 4
-/* The shift of heap_no in a compact record.
-The status is stored in the low-order bits. */
-#define REC_HEAP_NO_SHIFT 3
-
-/* Length of a B-tree node pointer, in bytes */
-#define REC_NODE_PTR_SIZE 4
-
-/** SQL null flag in a 1-byte offset of ROW_FORMAT=REDUNDANT records */
-#define REC_1BYTE_SQL_NULL_MASK 0x80UL
-/** SQL null flag in a 2-byte offset of ROW_FORMAT=REDUNDANT records */
-#define REC_2BYTE_SQL_NULL_MASK 0x8000UL
-
-/** In a 2-byte offset of ROW_FORMAT=REDUNDANT records, the second most
-significant bit denotes that the tail of a field is stored off-page. */
-#define REC_2BYTE_EXTERN_MASK 0x4000UL
-
-#ifdef UNIV_DEBUG
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 4
-#else /* UNIV_DEBUG */
-/* Length of the rec_get_offsets() header */
-# define REC_OFFS_HEADER_SIZE 2
-#endif /* UNIV_DEBUG */
-
-/* Number of elements that should be initially allocated for the
-offsets[] array, first passed to rec_get_offsets() */
-#define REC_OFFS_NORMAL_SIZE 100
-#define REC_OFFS_SMALL_SIZE 10
-
-/******************************************************//**
-The following function is used to get the pointer of the next chained record
-on the same page.
-@return pointer to the next chained record, or NULL if none */
-UNIV_INLINE
-const rec_t*
-rec_get_next_ptr_const(
-/*===================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to get the pointer of the next chained record
-on the same page.
-@return pointer to the next chained record, or NULL if none */
-UNIV_INLINE
-rec_t*
-rec_get_next_ptr(
-/*=============*/
- rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to get the offset of the
-next chained record on the same page.
-@return the page offset of the next chained record, or 0 if none */
-UNIV_INLINE
-ulint
-rec_get_next_offs(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to set the next record offset field
-of an old-style record. */
-UNIV_INLINE
-void
-rec_set_next_offs_old(
-/*==================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint next) /*!< in: offset of the next record */
- MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function is used to set the next record offset field
-of a new-style record. */
-UNIV_INLINE
-void
-rec_set_next_offs_new(
-/*==================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- ulint next) /*!< in: offset of the next record */
- MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function is used to get the number of fields
-in an old-style record.
-@return number of data fields */
-UNIV_INLINE
-ulint
-rec_get_n_fields_old(
-/*=================*/
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to get the number of fields
-in a record.
-@return number of data fields */
-UNIV_INLINE
-ulint
-rec_get_n_fields(
-/*=============*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index) /*!< in: record descriptor */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to get the number of records owned by the
-previous directory record.
-@return number of owned records */
-UNIV_INLINE
-ulint
-rec_get_n_owned_old(
-/*================*/
- const rec_t* rec) /*!< in: old-style physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned_old(
-/*================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint n_owned) /*!< in: the number of owned */
- MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function is used to get the number of records owned by the
-previous directory record.
-@return number of owned records */
-UNIV_INLINE
-ulint
-rec_get_n_owned_new(
-/*================*/
- const rec_t* rec) /*!< in: new-style physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned_new(
-/*================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n_owned)/*!< in: the number of owned */
- MY_ATTRIBUTE((nonnull(1)));
-/******************************************************//**
-The following function is used to retrieve the info bits of
-a record.
-@return info bits */
-UNIV_INLINE
-ulint
-rec_get_info_bits(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits_old(
-/*==================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint bits) /*!< in: info bits */
- MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits_new(
-/*==================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- ulint bits) /*!< in: info bits */
- MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function retrieves the status bits of a new-style record.
-@return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-/******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
- rec_t* rec, /*!< in/out: physical record */
- ulint bits) /*!< in: info bits */
- MY_ATTRIBUTE((nonnull));
-
-/******************************************************//**
-The following function is used to retrieve the info and status
-bits of a record. (Only compact records have status bits.)
-@return info bits */
-UNIV_INLINE
-ulint
-rec_get_info_and_status_bits(
-/*=========================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to set the info and status
-bits of a record. (Only compact records have status bits.) */
-UNIV_INLINE
-void
-rec_set_info_and_status_bits(
-/*=========================*/
- rec_t* rec, /*!< in/out: compact physical record */
- ulint bits) /*!< in: info bits */
- MY_ATTRIBUTE((nonnull));
-
-/******************************************************//**
-The following function tells if record is delete marked.
-@return nonzero if delete marked */
-UNIV_INLINE
-ulint
-rec_get_deleted_flag(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag_old(
-/*=====================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint flag) /*!< in: nonzero if delete marked */
- MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag_new(
-/*=====================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint flag) /*!< in: nonzero if delete marked */
- MY_ATTRIBUTE((nonnull(1)));
-/******************************************************//**
-The following function tells if a new-style record is a node pointer.
-@return TRUE if node pointer */
-UNIV_INLINE
-ibool
-rec_get_node_ptr_flag(
-/*==================*/
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to get the order number
-of an old-style record in the heap of the index page.
-@return heap order number */
-UNIV_INLINE
-ulint
-rec_get_heap_no_old(
-/*================*/
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to set the heap number
-field in an old-style record. */
-UNIV_INLINE
-void
-rec_set_heap_no_old(
-/*================*/
- rec_t* rec, /*!< in: physical record */
- ulint heap_no)/*!< in: the heap number */
- MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function is used to get the order number
-of a new-style record in the heap of the index page.
-@return heap order number */
-UNIV_INLINE
-ulint
-rec_get_heap_no_new(
-/*================*/
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-The following function is used to set the heap number
-field in a new-style record. */
-UNIV_INLINE
-void
-rec_set_heap_no_new(
-/*================*/
- rec_t* rec, /*!< in/out: physical record */
- ulint heap_no)/*!< in: the heap number */
- MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function is used to test whether the data offsets
-in the record are stored in one-byte or two-byte format.
-@return TRUE if 1-byte form */
-UNIV_INLINE
-ibool
-rec_get_1byte_offs_flag(
-/*====================*/
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-/******************************************************//**
-The following function is used to set the 1-byte offsets flag. */
-UNIV_INLINE
-void
-rec_set_1byte_offs_flag(
-/*====================*/
- rec_t* rec, /*!< in: physical record */
- ibool flag) /*!< in: TRUE if 1byte form */
- MY_ATTRIBUTE((nonnull));
-
-/******************************************************//**
-Returns the offset of nth field end if the record is stored in the 1-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value.
-@return offset of the start of the field, SQL null flag ORed */
-UNIV_INLINE
-ulint
-rec_1_get_field_end_info(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-/******************************************************//**
-Returns the offset of nth field end if the record is stored in the 2-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value.
-@return offset of the start of the field, SQL null flag and extern
-storage flag ORed */
-UNIV_INLINE
-ulint
-rec_2_get_field_end_info(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-/******************************************************//**
-Returns nonzero if the field is stored off-page.
-@retval 0 if the field is stored in-page
-@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
-UNIV_INLINE
-ulint
-rec_2_is_field_extern(
-/*==================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-/******************************************************//**
-Determine how many of the first n columns in a compact
-physical record are stored externally.
-@return number of externally stored columns */
-UNIV_INTERN
-ulint
-rec_get_n_extern_new(
-/*=================*/
- const rec_t* rec, /*!< in: compact physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n) /*!< in: number of columns to scan */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array.
-@return the new offsets */
-UNIV_INTERN
-ulint*
-rec_get_offsets_func(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: array consisting of
- offsets[0] allocated elements,
- or an array from rec_get_offsets(),
- or NULL */
- ulint n_fields,/*!< in: maximum number of
- initialized fields
- (ULINT_UNDEFINED if all fields) */
-#ifdef UNIV_DEBUG
- const char* file, /*!< in: file name where called */
- ulint line, /*!< in: line number where called */
-#endif /* UNIV_DEBUG */
- mem_heap_t** heap) /*!< in/out: memory heap */
-#ifdef UNIV_DEBUG
- MY_ATTRIBUTE((nonnull(1,2,5,7),warn_unused_result));
-#else /* UNIV_DEBUG */
- MY_ATTRIBUTE((nonnull(1,2,5),warn_unused_result));
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-# define rec_get_offsets(rec,index,offsets,n,heap) \
- rec_get_offsets_func(rec,index,offsets,n,__FILE__,__LINE__,heap)
-#else /* UNIV_DEBUG */
-# define rec_get_offsets(rec, index, offsets, n, heap) \
- rec_get_offsets_func(rec, index, offsets, n, heap)
-#endif /* UNIV_DEBUG */
-
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array. */
-UNIV_INTERN
-void
-rec_get_offsets_reverse(
-/*====================*/
- const byte* extra, /*!< in: the extra bytes of a
- compact record in reverse order,
- excluding the fixed-size
- REC_N_NEW_EXTRA_BYTES */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint node_ptr,/*!< in: nonzero=node pointer,
- 0=leaf node */
- ulint* offsets)/*!< in/out: array consisting of
- offsets[0] allocated elements */
- MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
- const rec_t* rec, /*!< in: record or NULL */
- const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
- MY_ATTRIBUTE((nonnull(3), warn_unused_result));
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
- MY_ATTRIBUTE((nonnull));
-#else
-# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
-#endif /* UNIV_DEBUG */
-
-/************************************************************//**
-The following function is used to get the offset to the nth
-data field in an old-style record.
-@return offset to the field */
-UNIV_INTERN
-ulint
-rec_get_nth_field_offs_old(
-/*=======================*/
- const rec_t* rec, /*!< in: record */
- ulint n, /*!< in: index of the field */
- ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
- if SQL null */
- MY_ATTRIBUTE((nonnull));
-#define rec_get_nth_field_old(rec, n, len) \
-((rec) + rec_get_nth_field_offs_old(rec, n, len))
-/************************************************************//**
-Gets the physical size of an old-style field.
-Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size.
-@return field size in bytes */
-UNIV_INLINE
-ulint
-rec_get_nth_field_size(
-/*===================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: index of the field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/************************************************************//**
-The following function is used to get an offset to the nth
-data field in a record.
-@return offset from the origin of rec */
-UNIV_INLINE
-ulint
-rec_get_nth_field_offs(
-/*===================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n, /*!< in: index of the field */
- ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
- if SQL null */
- MY_ATTRIBUTE((nonnull));
-#define rec_get_nth_field(rec, offsets, n, len) \
-((rec) + rec_get_nth_field_offs(offsets, n, len))
-/******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-Determine if the offsets are for a record containing null BLOB pointers.
-@return first field containing a null BLOB pointer, or NULL if none found */
-UNIV_INLINE
-const byte*
-rec_offs_any_null_extern(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- const ulint* offsets) /*!< in: rec_get_offsets(rec) */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-Returns nonzero if the extern bit is set in nth field of rec.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/******************************************************//**
-Gets the physical size of a field.
-@return length of field */
-UNIV_INLINE
-ulint
-rec_offs_nth_size(
-/*==============*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-
-/******************************************************//**
-Returns the number of extern bits set in a record.
-@return number of externally stored fields */
-UNIV_INLINE
-ulint
-rec_offs_n_extern(
-/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/***********************************************************//**
-This is used to modify the value of an already existing field in a record.
-The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null.
-For records in ROW_FORMAT=COMPACT (new-style records), len must not be
-UNIV_SQL_NULL unless the field already is SQL null. */
-UNIV_INLINE
-void
-rec_set_nth_field(
-/*==============*/
- rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n, /*!< in: index number of the field */
- const void* data, /*!< in: pointer to the data if not SQL null */
- ulint len) /*!< in: length of the data or UNIV_SQL_NULL.
- If not SQL null, must have the same
- length as the previous value.
- If SQL null, previous value must be
- SQL null. */
- MY_ATTRIBUTE((nonnull(1,2)));
-/**********************************************************//**
-The following function returns the data size of an old-style physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_get_data_size_old(
-/*==================*/
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
- const ulint* offsets)/*!< in: array for rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/**********************************************************//**
-The following function sets the number of allocated elements
-for an array of offsets. */
-UNIV_INLINE
-void
-rec_offs_set_n_alloc(
-/*=================*/
- ulint* offsets, /*!< out: array for rec_get_offsets(),
- must be allocated */
- ulint n_alloc) /*!< in: number of elements */
- MY_ATTRIBUTE((nonnull));
-#define rec_offs_init(offsets) \
- rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
-/**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/**********************************************************//**
-The following function returns the data size of a physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_data_size(
-/*===============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/**********************************************************//**
-Returns the total size of record minus data size of record.
-The value returned by the function is the distance from record
-start to record origin in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_extra_size(
-/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/**********************************************************//**
-Returns the total size of a physical record.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_size(
-/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#ifdef UNIV_DEBUG
-/**********************************************************//**
-Returns a pointer to the start of the record.
-@return pointer to start */
-UNIV_INLINE
-byte*
-rec_get_start(
-/*==========*/
- const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-/**********************************************************//**
-Returns a pointer to the end of the record.
-@return pointer to end */
-UNIV_INLINE
-byte*
-rec_get_end(
-/*========*/
- const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#else /* UNIV_DEBUG */
-# define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
-# define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
-#endif /* UNIV_DEBUG */
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return pointer to the origin of the copy */
-UNIV_INLINE
-rec_t*
-rec_copy(
-/*=====*/
- void* buf, /*!< in: buffer */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Determines the size of a data tuple prefix in a temporary file.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_temp(
-/*========================*/
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-
-/******************************************************//**
-Determine the offset to each field in temporary file.
-@see rec_convert_dtuple_to_temp() */
-UNIV_INTERN
-void
-rec_init_offsets_temp(
-/*==================*/
- const rec_t* rec, /*!< in: temporary file record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
- MY_ATTRIBUTE((nonnull));
-
-/*********************************************************//**
-Builds a temporary file record out of a data tuple.
-@see rec_init_offsets_temp() */
-UNIV_INTERN
-void
-rec_convert_dtuple_to_temp(
-/*=======================*/
- rec_t* rec, /*!< out: record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields) /*!< in: number of fields */
- MY_ATTRIBUTE((nonnull));
-
-/**************************************************************//**
-Copies the first n fields of a physical record to a new physical record in
-a buffer.
-@return own: copied record */
-UNIV_INTERN
-rec_t*
-rec_copy_prefix_to_buf(
-/*===================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- byte** buf, /*!< in/out: memory buffer
- for the copied prefix,
- or NULL */
- ulint* buf_size) /*!< in/out: buffer size */
- MY_ATTRIBUTE((nonnull));
-/************************************************************//**
-Folds a prefix of a physical record to a ulint.
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
-/*=====*/
- const rec_t* rec, /*!< in: the physical record */
- const ulint* offsets, /*!< in: array returned by
- rec_get_offsets() */
- ulint n_fields, /*!< in: number of complete
- fields to fold */
- ulint n_bytes, /*!< in: number of bytes to fold
- in an incomplete last field */
- index_id_t tree_id) /*!< in: index tree id */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************//**
-Builds a physical record out of a data tuple and
-stores it into the given buffer.
-@return pointer to the origin of physical record */
-UNIV_INTERN
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
- byte* buf, /*!< in: start address of the
- physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext) /*!< in: number of
- externally stored columns */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/**********************************************************//**
-Returns the extra size of an old-style physical record if we know its
-data size and number of fields.
-@return extra size */
-UNIV_INLINE
-ulint
-rec_get_converted_extra_size(
-/*=========================*/
- ulint data_size, /*!< in: data size */
- ulint n_fields, /*!< in: number of fields */
- ulint n_ext) /*!< in: number of externally stored columns */
- MY_ATTRIBUTE((const));
-/**********************************************************//**
-Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_comp_prefix(
-/*===============================*/
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
- MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)));
-/**********************************************************//**
-Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_comp(
-/*========================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
- ulint status, /*!< in: status bits of the record */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
- MY_ATTRIBUTE((nonnull(1,3)));
-/**********************************************************//**
-The following function returns the size of a data tuple when converted to
-a physical record.
-@return size */
-UNIV_INLINE
-ulint
-rec_get_converted_size(
-/*===================*/
- dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext) /*!< in: number of externally stored columns */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-#ifndef UNIV_HOTBACKUP
-/**************************************************************//**
-Copies the first n fields of a physical record to a data tuple.
-The fields are copied to the memory heap. */
-UNIV_INTERN
-void
-rec_copy_prefix_to_dtuple(
-/*======================*/
- dtuple_t* tuple, /*!< out: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- mem_heap_t* heap) /*!< in: memory heap */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-Validates the consistency of a physical record.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-rec_validate(
-/*=========*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull));
-/***************************************************************//**
-Prints an old-style physical record. */
-UNIV_INTERN
-void
-rec_print_old(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((nonnull));
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
-record header. */
-UNIV_INTERN
-void
-rec_print_comp(
-/*===========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull));
-/***************************************************************//**
-Prints a physical record. */
-UNIV_INTERN
-void
-rec_print_new(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((nonnull));
-/***************************************************************//**
-Prints a physical record. */
-UNIV_INTERN
-void
-rec_print(
-/*======*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index) /*!< in: record descriptor */
- MY_ATTRIBUTE((nonnull));
-
-# ifdef UNIV_DEBUG
-/************************************************************//**
-Reads the DB_TRX_ID of a clustered index record.
-@return the value of DB_TRX_ID */
-UNIV_INTERN
-trx_id_t
-rec_get_trx_id(
-/*===========*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index) /*!< in: clustered index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-# endif /* UNIV_DEBUG */
-#endif /* UNIV_HOTBACKUP */
-
-/* Maximum lengths for the data in a physical record if the offsets
-are given in one byte (resp. two byte) format. */
-#define REC_1BYTE_OFFS_LIMIT 0x7FUL
-#define REC_2BYTE_OFFS_LIMIT 0x7FFFUL
-
-/* The data size of record must be smaller than this because we reserve
-two upmost bits in a two byte offset for special purposes */
-#define REC_MAX_DATA_SIZE (16384)
-
-#ifdef WITH_WSREP
-int wsrep_rec_get_foreign_key(
- byte *buf, /* out: extracted key */
- ulint *buf_len, /* in/out: length of buf */
- const rec_t* rec, /* in: physical record */
- dict_index_t* index_for, /* in: index for foreign table */
- dict_index_t* index_ref, /* in: index for referenced table */
- ibool new_protocol); /* in: protocol > 1 */
-#endif /* WITH_WSREP */
-#ifndef UNIV_NONINL
-#include "rem0rec.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/rem0rec.ic b/storage/xtradb/include/rem0rec.ic
deleted file mode 100644
index 5811a77a48b..00000000000
--- a/storage/xtradb/include/rem0rec.ic
+++ /dev/null
@@ -1,1719 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/rem0rec.ic
-Record manager
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mach0data.h"
-#include "ut0byte.h"
-#include "dict0dict.h"
-#include "btr0types.h"
-
-/* Compact flag ORed to the extra size returned by rec_get_offsets() */
-#define REC_OFFS_COMPACT ((ulint) 1 << 31)
-/* SQL NULL flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_SQL_NULL ((ulint) 1 << 31)
-/* External flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_EXTERNAL ((ulint) 1 << 30)
-/* Mask for offsets returned by rec_get_offsets() */
-#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1)
-
-/* Offsets of the bit-fields in an old-style record. NOTE! In the table the
-most significant bytes and bits are written below less significant.
-
- (1) byte offset (2) bit usage within byte
- downward from
- origin -> 1 8 bits pointer to next record
- 2 8 bits pointer to next record
- 3 1 bit short flag
- 7 bits number of fields
- 4 3 bits number of fields
- 5 bits heap number
- 5 8 bits heap number
- 6 4 bits n_owned
- 4 bits info bits
-*/
-
-/* Offsets of the bit-fields in a new-style record. NOTE! In the table the
-most significant bytes and bits are written below less significant.
-
- (1) byte offset (2) bit usage within byte
- downward from
- origin -> 1 8 bits relative offset of next record
- 2 8 bits relative offset of next record
- the relative offset is an unsigned 16-bit
- integer:
- (offset_of_next_record
- - offset_of_this_record) mod 64Ki,
- where mod is the modulo as a non-negative
- number;
- we can calculate the offset of the next
- record with the formula:
- relative_offset + offset_of_this_record
- mod UNIV_PAGE_SIZE
- 3 3 bits status:
- 000=conventional record
- 001=node pointer record (inside B-tree)
- 010=infimum record
- 011=supremum record
- 1xx=reserved
- 5 bits heap number
- 4 8 bits heap number
- 5 4 bits n_owned
- 4 bits info bits
-*/
-
-/* We list the byte offsets from the origin of the record, the mask,
-and the shift needed to obtain each bit-field of the record. */
-
-#define REC_NEXT 2
-#define REC_NEXT_MASK 0xFFFFUL
-#define REC_NEXT_SHIFT 0
-
-#define REC_OLD_SHORT 3 /* This is single byte bit-field */
-#define REC_OLD_SHORT_MASK 0x1UL
-#define REC_OLD_SHORT_SHIFT 0
-
-#define REC_OLD_N_FIELDS 4
-#define REC_OLD_N_FIELDS_MASK 0x7FEUL
-#define REC_OLD_N_FIELDS_SHIFT 1
-
-#define REC_NEW_STATUS 3 /* This is single byte bit-field */
-#define REC_NEW_STATUS_MASK 0x7UL
-#define REC_NEW_STATUS_SHIFT 0
-
-#define REC_OLD_HEAP_NO 5
-#define REC_HEAP_NO_MASK 0xFFF8UL
-#if 0 /* defined in rem0rec.h for use of page0zip.cc */
-#define REC_NEW_HEAP_NO 4
-#define REC_HEAP_NO_SHIFT 3
-#endif
-
-#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */
-#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */
-#define REC_N_OWNED_MASK 0xFUL
-#define REC_N_OWNED_SHIFT 0
-
-#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */
-#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */
-#define REC_INFO_BITS_MASK 0xF0UL
-#define REC_INFO_BITS_SHIFT 0
-
-#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
- ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
- ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
- ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \
- ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \
- ^ 0xFFFFFFFFUL
-# error "sum of old-style masks != 0xFFFFFFFFUL"
-#endif
-#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \
- ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \
- ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \
- ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \
- ^ 0xFFFFFFUL
-# error "sum of new-style masks != 0xFFFFFFUL"
-#endif
-
-/***********************************************************//**
-Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
-void
-rec_set_nth_field_null_bit(
-/*=======================*/
- rec_t* rec, /*!< in: record */
- ulint i, /*!< in: ith field */
- ibool val); /*!< in: value to set */
-/***********************************************************//**
-Sets an old-style record field to SQL null.
-The physical size of the field is not changed. */
-UNIV_INTERN
-void
-rec_set_nth_field_sql_null(
-/*=======================*/
- rec_t* rec, /*!< in: record */
- ulint n); /*!< in: index of the field */
-
-/******************************************************//**
-Gets a bit field from within 1 byte. */
-UNIV_INLINE
-ulint
-rec_get_bit_field_1(
-/*================*/
- const rec_t* rec, /*!< in: pointer to record origin */
- ulint offs, /*!< in: offset from the origin down */
- ulint mask, /*!< in: mask used to filter bits */
- ulint shift) /*!< in: shift right applied after masking */
-{
- ut_ad(rec);
-
- return((mach_read_from_1(rec - offs) & mask) >> shift);
-}
-
-/******************************************************//**
-Sets a bit field within 1 byte. */
-UNIV_INLINE
-void
-rec_set_bit_field_1(
-/*================*/
- rec_t* rec, /*!< in: pointer to record origin */
- ulint val, /*!< in: value to set */
- ulint offs, /*!< in: offset from the origin down */
- ulint mask, /*!< in: mask used to filter bits */
- ulint shift) /*!< in: shift right applied after masking */
-{
- ut_ad(rec);
- ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
- ut_ad(mask);
- ut_ad(mask <= 0xFFUL);
- ut_ad(((mask >> shift) << shift) == mask);
- ut_ad(((val << shift) & mask) == (val << shift));
-
- mach_write_to_1(rec - offs,
- (mach_read_from_1(rec - offs) & ~mask)
- | (val << shift));
-}
-
-/******************************************************//**
-Gets a bit field from within 2 bytes. */
-UNIV_INLINE
-ulint
-rec_get_bit_field_2(
-/*================*/
- const rec_t* rec, /*!< in: pointer to record origin */
- ulint offs, /*!< in: offset from the origin down */
- ulint mask, /*!< in: mask used to filter bits */
- ulint shift) /*!< in: shift right applied after masking */
-{
- ut_ad(rec);
-
- return((mach_read_from_2(rec - offs) & mask) >> shift);
-}
-
-/******************************************************//**
-Sets a bit field within 2 bytes. */
-UNIV_INLINE
-void
-rec_set_bit_field_2(
-/*================*/
- rec_t* rec, /*!< in: pointer to record origin */
- ulint val, /*!< in: value to set */
- ulint offs, /*!< in: offset from the origin down */
- ulint mask, /*!< in: mask used to filter bits */
- ulint shift) /*!< in: shift right applied after masking */
-{
- ut_ad(rec);
- ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
- ut_ad(mask > 0xFFUL);
- ut_ad(mask <= 0xFFFFUL);
- ut_ad((mask >> shift) & 1);
- ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
- ut_ad(((mask >> shift) << shift) == mask);
- ut_ad(((val << shift) & mask) == (val << shift));
-
- mach_write_to_2(rec - offs,
- (mach_read_from_2(rec - offs) & ~mask)
- | (val << shift));
-}
-
-/******************************************************//**
-The following function is used to get the pointer of the next chained record
-on the same page.
-@return pointer to the next chained record, or NULL if none */
-UNIV_INLINE
-const rec_t*
-rec_get_next_ptr_const(
-/*===================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- ulint field_value;
-
- ut_ad(REC_NEXT_MASK == 0xFFFFUL);
- ut_ad(REC_NEXT_SHIFT == 0);
-
- field_value = mach_read_from_2(rec - REC_NEXT);
-
- if (field_value == 0) {
-
- return(NULL);
- }
-
- if (comp) {
-#if UNIV_PAGE_SIZE_MAX <= 32768
- /* Note that for 64 KiB pages, field_value can 'wrap around'
- and the debug assertion is not valid */
-
- /* In the following assertion, field_value is interpreted
- as signed 16-bit integer in 2's complement arithmetics.
- If all platforms defined int16_t in the standard headers,
- the expression could be written simpler as
- (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
- */
- ut_ad((field_value >= 32768
- ? field_value - 65536
- : field_value)
- + ut_align_offset(rec, UNIV_PAGE_SIZE)
- < UNIV_PAGE_SIZE);
-#endif
- /* There must be at least REC_N_NEW_EXTRA_BYTES + 1
- between each record. */
- ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
- && field_value < 32768)
- || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
-
- return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
- + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
- } else {
- ut_ad(field_value < UNIV_PAGE_SIZE);
-
- return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
- + field_value);
- }
-}
-
-/******************************************************//**
-The following function is used to get the pointer of the next chained record
-on the same page.
-@return pointer to the next chained record, or NULL if none */
-UNIV_INLINE
-rec_t*
-rec_get_next_ptr(
-/*=============*/
- rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- return(const_cast<rec_t*>(rec_get_next_ptr_const(rec, comp)));
-}
-
-/******************************************************//**
-The following function is used to get the offset of the next chained record
-on the same page.
-@return the page offset of the next chained record, or 0 if none */
-UNIV_INLINE
-ulint
-rec_get_next_offs(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- ulint field_value;
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
- field_value = mach_read_from_2(rec - REC_NEXT);
-
- if (comp) {
-#if UNIV_PAGE_SIZE_MAX <= 32768
- /* Note that for 64 KiB pages, field_value can 'wrap around'
- and the debug assertion is not valid */
-
- /* In the following assertion, field_value is interpreted
- as signed 16-bit integer in 2's complement arithmetics.
- If all platforms defined int16_t in the standard headers,
- the expression could be written simpler as
- (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
- */
- ut_ad((field_value >= 32768
- ? field_value - 65536
- : field_value)
- + ut_align_offset(rec, UNIV_PAGE_SIZE)
- < UNIV_PAGE_SIZE);
-#endif
- if (field_value == 0) {
-
- return(0);
- }
-
- /* There must be at least REC_N_NEW_EXTRA_BYTES + 1
- between each record. */
- ut_ad((field_value > REC_N_NEW_EXTRA_BYTES
- && field_value < 32768)
- || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
-
- return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
- } else {
- ut_ad(field_value < UNIV_PAGE_SIZE);
-
- return(field_value);
- }
-}
-
-/******************************************************//**
-The following function is used to set the next record offset field
-of an old-style record. */
-UNIV_INLINE
-void
-rec_set_next_offs_old(
-/*==================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint next) /*!< in: offset of the next record */
-{
- ut_ad(rec);
- ut_ad(UNIV_PAGE_SIZE > next);
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
- mach_write_to_2(rec - REC_NEXT, next);
-}
-
-/******************************************************//**
-The following function is used to set the next record offset field
-of a new-style record. */
-UNIV_INLINE
-void
-rec_set_next_offs_new(
-/*==================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- ulint next) /*!< in: offset of the next record */
-{
- ulint field_value;
-
- ut_ad(rec);
- ut_ad(UNIV_PAGE_SIZE > next);
-
- if (!next) {
- field_value = 0;
- } else {
- /* The following two statements calculate
- next - offset_of_rec mod 64Ki, where mod is the modulo
- as a non-negative number */
-
- field_value = (ulint)
- ((lint) next
- - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
- field_value &= REC_NEXT_MASK;
- }
-
- mach_write_to_2(rec - REC_NEXT, field_value);
-}
-
-/******************************************************//**
-The following function is used to get the number of fields
-in an old-style record.
-@return number of data fields */
-UNIV_INLINE
-ulint
-rec_get_n_fields_old(
-/*=================*/
- const rec_t* rec) /*!< in: physical record */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS,
- REC_OLD_N_FIELDS_MASK,
- REC_OLD_N_FIELDS_SHIFT);
- ut_ad(ret <= REC_MAX_N_FIELDS);
- ut_ad(ret > 0);
-
- return(ret);
-}
-
-/******************************************************//**
-The following function is used to set the number of fields
-in an old-style record. */
-UNIV_INLINE
-void
-rec_set_n_fields_old(
-/*=================*/
- rec_t* rec, /*!< in: physical record */
- ulint n_fields) /*!< in: the number of fields */
-{
- ut_ad(rec);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields > 0);
-
- rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS,
- REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
-}
-
-/******************************************************//**
-The following function retrieves the status bits of a new-style record.
-@return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
- const rec_t* rec) /*!< in: physical record */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
- REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
- ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
-
- return(ret);
-}
-
-/******************************************************//**
-The following function is used to get the number of fields
-in a record.
-@return number of data fields */
-UNIV_INLINE
-ulint
-rec_get_n_fields(
-/*=============*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index) /*!< in: record descriptor */
-{
- ut_ad(rec);
- ut_ad(index);
-
- if (!dict_table_is_comp(index->table)) {
- return(rec_get_n_fields_old(rec));
- }
-
- switch (rec_get_status(rec)) {
- case REC_STATUS_ORDINARY:
- return(dict_index_get_n_fields(index));
- case REC_STATUS_NODE_PTR:
- return(dict_index_get_n_unique_in_tree(index) + 1);
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- return(1);
- default:
- ut_error;
- return(ULINT_UNDEFINED);
- }
-}
-
-/******************************************************//**
-The following function is used to get the number of records owned by the
-previous directory record.
-@return number of owned records */
-UNIV_INLINE
-ulint
-rec_get_n_owned_old(
-/*================*/
- const rec_t* rec) /*!< in: old-style physical record */
-{
- return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned_old(
-/*================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint n_owned) /*!< in: the number of owned */
-{
- rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to get the number of records owned by the
-previous directory record.
-@return number of owned records */
-UNIV_INLINE
-ulint
-rec_get_n_owned_new(
-/*================*/
- const rec_t* rec) /*!< in: new-style physical record */
-{
- return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the number of owned records. */
-UNIV_INLINE
-void
-rec_set_n_owned_new(
-/*================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint n_owned)/*!< in: the number of owned */
-{
- rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED,
- REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
- if (page_zip && rec_get_status(rec) != REC_STATUS_SUPREMUM) {
- page_zip_rec_set_owned(page_zip, rec, n_owned);
- }
-}
-
-/******************************************************//**
-The following function is used to retrieve the info bits of a record.
-@return info bits */
-UNIV_INLINE
-ulint
-rec_get_info_bits(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- return(rec_get_bit_field_1(
- rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits_old(
-/*==================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint bits) /*!< in: info bits */
-{
- rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
-}
-/******************************************************//**
-The following function is used to set the info bits of a record. */
-UNIV_INLINE
-void
-rec_set_info_bits_new(
-/*==================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- ulint bits) /*!< in: info bits */
-{
- rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS,
- REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
- rec_t* rec, /*!< in/out: physical record */
- ulint bits) /*!< in: info bits */
-{
- rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
- REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to retrieve the info and status
-bits of a record. (Only compact records have status bits.)
-@return info bits */
-UNIV_INLINE
-ulint
-rec_get_info_and_status_bits(
-/*=========================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- ulint bits;
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
- if (comp) {
- bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
- } else {
- bits = rec_get_info_bits(rec, FALSE);
- ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
- }
- return(bits);
-}
-/******************************************************//**
-The following function is used to set the info and status
-bits of a record. (Only compact records have status bits.) */
-UNIV_INLINE
-void
-rec_set_info_and_status_bits(
-/*=========================*/
- rec_t* rec, /*!< in/out: physical record */
- ulint bits) /*!< in: info bits */
-{
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
- rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
- rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
-}
-
-/******************************************************//**
-The following function tells if record is delete marked.
-@return nonzero if delete marked */
-UNIV_INLINE
-ulint
-rec_get_deleted_flag(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- if (comp) {
- return(rec_get_bit_field_1(rec, REC_NEW_INFO_BITS,
- REC_INFO_DELETED_FLAG,
- REC_INFO_BITS_SHIFT));
- } else {
- return(rec_get_bit_field_1(rec, REC_OLD_INFO_BITS,
- REC_INFO_DELETED_FLAG,
- REC_INFO_BITS_SHIFT));
- }
-}
-
-/******************************************************//**
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag_old(
-/*=====================*/
- rec_t* rec, /*!< in: old-style physical record */
- ulint flag) /*!< in: nonzero if delete marked */
-{
- ulint val;
-
- val = rec_get_info_bits(rec, FALSE);
-
- if (flag) {
- val |= REC_INFO_DELETED_FLAG;
- } else {
- val &= ~REC_INFO_DELETED_FLAG;
- }
-
- rec_set_info_bits_old(rec, val);
-}
-
-/******************************************************//**
-The following function is used to set the deleted bit. */
-UNIV_INLINE
-void
-rec_set_deleted_flag_new(
-/*=====================*/
- rec_t* rec, /*!< in/out: new-style physical record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint flag) /*!< in: nonzero if delete marked */
-{
- ulint val;
-
- val = rec_get_info_bits(rec, TRUE);
-
- if (flag) {
- val |= REC_INFO_DELETED_FLAG;
- } else {
- val &= ~REC_INFO_DELETED_FLAG;
- }
-
- rec_set_info_bits_new(rec, val);
-
- if (page_zip) {
- page_zip_rec_set_deleted(page_zip, rec, flag);
- }
-}
-
-/******************************************************//**
-The following function tells if a new-style record is a node pointer.
-@return TRUE if node pointer */
-UNIV_INLINE
-ibool
-rec_get_node_ptr_flag(
-/*==================*/
- const rec_t* rec) /*!< in: physical record */
-{
- return(REC_STATUS_NODE_PTR == rec_get_status(rec));
-}
-
-/******************************************************//**
-The following function is used to get the order number
-of an old-style record in the heap of the index page.
-@return heap order number */
-UNIV_INLINE
-ulint
-rec_get_heap_no_old(
-/*================*/
- const rec_t* rec) /*!< in: physical record */
-{
- return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the heap number
-field in an old-style record. */
-UNIV_INLINE
-void
-rec_set_heap_no_old(
-/*================*/
- rec_t* rec, /*!< in: physical record */
- ulint heap_no)/*!< in: the heap number */
-{
- rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to get the order number
-of a new-style record in the heap of the index page.
-@return heap order number */
-UNIV_INLINE
-ulint
-rec_get_heap_no_new(
-/*================*/
- const rec_t* rec) /*!< in: physical record */
-{
- return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the heap number
-field in a new-style record. */
-UNIV_INLINE
-void
-rec_set_heap_no_new(
-/*================*/
- rec_t* rec, /*!< in/out: physical record */
- ulint heap_no)/*!< in: the heap number */
-{
- rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO,
- REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
-}
-
-/******************************************************//**
-The following function is used to test whether the data offsets in the record
-are stored in one-byte or two-byte format.
-@return TRUE if 1-byte form */
-UNIV_INLINE
-ibool
-rec_get_1byte_offs_flag(
-/*====================*/
- const rec_t* rec) /*!< in: physical record */
-{
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
-
- return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
- REC_OLD_SHORT_SHIFT));
-}
-
-/******************************************************//**
-The following function is used to set the 1-byte offsets flag. */
-UNIV_INLINE
-void
-rec_set_1byte_offs_flag(
-/*====================*/
- rec_t* rec, /*!< in: physical record */
- ibool flag) /*!< in: TRUE if 1byte form */
-{
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
- ut_ad(flag <= TRUE);
-
- rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
- REC_OLD_SHORT_SHIFT);
-}
-
-/******************************************************//**
-Returns the offset of nth field end if the record is stored in the 1-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value.
-@return offset of the start of the field, SQL null flag ORed */
-UNIV_INLINE
-ulint
-rec_1_get_field_end_info(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1)));
-}
-
-/******************************************************//**
-Returns the offset of nth field end if the record is stored in the 2-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value.
-@return offset of the start of the field, SQL null flag and extern
-storage flag ORed */
-UNIV_INLINE
-ulint
-rec_2_get_field_end_info(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
-}
-
-/******************************************************//**
-Returns nonzero if the field is stored off-page.
-@retval 0 if the field is stored in-page
-@retval REC_2BYTE_EXTERN_MASK if the field is stored externally */
-UNIV_INLINE
-ulint
-rec_2_is_field_extern(
-/*==================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK);
-}
-
-/* Get the base address of offsets. The extra_size is stored at
-this position, and following positions hold the end offsets of
-the fields. */
-#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
-
-/**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
- const ulint* offsets)/*!< in: array for rec_get_offsets() */
-{
- ulint n_alloc;
- ut_ad(offsets);
- n_alloc = offsets[0];
- ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
- UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
- return(n_alloc);
-}
-
-/**********************************************************//**
-The following function sets the number of allocated elements
-for an array of offsets. */
-UNIV_INLINE
-void
-rec_offs_set_n_alloc(
-/*=================*/
- ulint* offsets, /*!< out: array for rec_get_offsets(),
- must be allocated */
- ulint n_alloc) /*!< in: number of elements */
-{
- ut_ad(offsets);
- ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
- UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets);
- offsets[0] = n_alloc;
-}
-
-/**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- ut_ad(offsets);
- n_fields = offsets[1];
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields + REC_OFFS_HEADER_SIZE
- <= rec_offs_get_n_alloc(offsets));
- return(n_fields);
-}
-
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
- const rec_t* rec, /*!< in: record or NULL */
- const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
-{
- ulint i = rec_offs_n_fields(offsets);
- ulint last = ULINT_MAX;
- ulint comp = *rec_offs_base(offsets) & REC_OFFS_COMPACT;
-
- if (rec) {
- ut_ad((ulint) rec == offsets[2]);
- if (!comp) {
- ut_a(rec_get_n_fields_old(rec) >= i);
- }
- }
- if (index) {
- ulint max_n_fields;
- ut_ad((ulint) index == offsets[3]);
- max_n_fields = ut_max(
- dict_index_get_n_fields(index),
- dict_index_get_n_unique_in_tree(index) + 1);
- if (comp && rec) {
- switch (rec_get_status(rec)) {
- case REC_STATUS_ORDINARY:
- break;
- case REC_STATUS_NODE_PTR:
- max_n_fields = dict_index_get_n_unique_in_tree(
- index) + 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- max_n_fields = 1;
- break;
- default:
- ut_error;
- }
- }
- /* index->n_def == 0 for dummy indexes if !comp */
- ut_a(!comp || index->n_def);
- ut_a(!index->n_def || i <= max_n_fields);
- }
- while (i--) {
- ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
- ut_a(curr <= last);
- last = curr;
- }
- return(TRUE);
-}
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
-{
- ut_ad(rec);
- ut_ad(index);
- ut_ad(offsets);
- ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
- offsets[2] = (ulint) rec;
- offsets[3] = (ulint) index;
-}
-#endif /* UNIV_DEBUG */
-
-/************************************************************//**
-The following function is used to get an offset to the nth
-data field in a record.
-@return offset from the origin of rec */
-UNIV_INLINE
-ulint
-rec_get_nth_field_offs(
-/*===================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n, /*!< in: index of the field */
- ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
- if SQL null */
-{
- ulint offs;
- ulint length;
- ut_ad(n < rec_offs_n_fields(offsets));
- ut_ad(len);
-
- if (n == 0) {
- offs = 0;
- } else {
- offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK;
- }
-
- length = rec_offs_base(offsets)[1 + n];
-
- if (length & REC_OFFS_SQL_NULL) {
- length = UNIV_SQL_NULL;
- } else {
- length &= REC_OFFS_MASK;
- length -= offs;
- }
-
- *len = length;
- return(offs);
-}
-
-/******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
-}
-
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
-}
-
-/******************************************************//**
-Determine if the offsets are for a record containing null BLOB pointers.
-@return first field containing a null BLOB pointer, or NULL if none found */
-UNIV_INLINE
-const byte*
-rec_offs_any_null_extern(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- const ulint* offsets) /*!< in: rec_get_offsets(rec) */
-{
- ulint i;
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (!rec_offs_any_extern(offsets)) {
- return(NULL);
- }
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (rec_offs_nth_extern(offsets, i)) {
- ulint len;
- const byte* field
- = rec_get_nth_field(rec, offsets, i, &len);
-
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- if (!memcmp(field + len
- - BTR_EXTERN_FIELD_REF_SIZE,
- field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE)) {
- return(field);
- }
- }
- }
-
- return(NULL);
-}
-
-/******************************************************//**
-Returns nonzero if the extern bit is set in nth field of rec.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_nth_extern(
-/*================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL);
-}
-
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL);
-}
-
-/******************************************************//**
-Gets the physical size of a field.
-@return length of field */
-UNIV_INLINE
-ulint
-rec_offs_nth_size(
-/*==============*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- if (!n) {
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK);
- }
- return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n])
- & REC_OFFS_MASK);
-}
-
-/******************************************************//**
-Returns the number of extern bits set in a record.
-@return number of externally stored fields */
-UNIV_INLINE
-ulint
-rec_offs_n_extern(
-/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n = 0;
-
- if (rec_offs_any_extern(offsets)) {
- ulint i;
-
- for (i = rec_offs_n_fields(offsets); i--; ) {
- if (rec_offs_nth_extern(offsets, i)) {
- n++;
- }
- }
- }
-
- return(n);
-}
-
-/******************************************************//**
-Returns the offset of n - 1th field end if the record is stored in the 1-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value. This function and the 2-byte counterpart are defined here because the
-C-compiler was not able to sum negative and positive constant offsets, and
-warned of constant arithmetic overflow within the compiler.
-@return offset of the start of the PREVIOUS field, SQL null flag ORed */
-UNIV_INLINE
-ulint
-rec_1_get_prev_field_end_info(
-/*==========================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n)));
-}
-
-/******************************************************//**
-Returns the offset of n - 1th field end if the record is stored in the 2-byte
-offsets form. If the field is SQL null, the flag is ORed in the returned
-value.
-@return offset of the start of the PREVIOUS field, SQL null flag ORed */
-UNIV_INLINE
-ulint
-rec_2_get_prev_field_end_info(
-/*==========================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n)));
-}
-
-/******************************************************//**
-Sets the field end info for the nth field if the record is stored in the
-1-byte format. */
-UNIV_INLINE
-void
-rec_1_set_field_end_info(
-/*=====================*/
- rec_t* rec, /*!< in: record */
- ulint n, /*!< in: field index */
- ulint info) /*!< in: value to set */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info);
-}
-
-/******************************************************//**
-Sets the field end info for the nth field if the record is stored in the
-2-byte format. */
-UNIV_INLINE
-void
-rec_2_set_field_end_info(
-/*=====================*/
- rec_t* rec, /*!< in: record */
- ulint n, /*!< in: field index */
- ulint info) /*!< in: value to set */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields_old(rec));
-
- mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info);
-}
-
-/******************************************************//**
-Returns the offset of nth field start if the record is stored in the 1-byte
-offsets form.
-@return offset of the start of the field */
-UNIV_INLINE
-ulint
-rec_1_get_field_start_offs(
-/*=======================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- return(rec_1_get_prev_field_end_info(rec, n)
- & ~REC_1BYTE_SQL_NULL_MASK);
-}
-
-/******************************************************//**
-Returns the offset of nth field start if the record is stored in the 2-byte
-offsets form.
-@return offset of the start of the field */
-UNIV_INLINE
-ulint
-rec_2_get_field_start_offs(
-/*=======================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- return(rec_2_get_prev_field_end_info(rec, n)
- & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
-}
-
-/******************************************************//**
-The following function is used to read the offset of the start of a data field
-in the record. The start of an SQL null field is the end offset of the
-previous non-null field, or 0, if none exists. If n is the number of the last
-field + 1, then the end offset of the last field is returned.
-@return offset of the start of the field */
-UNIV_INLINE
-ulint
-rec_get_field_start_offs(
-/*=====================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: field index */
-{
- ut_ad(rec);
- ut_ad(n <= rec_get_n_fields_old(rec));
-
- if (n == 0) {
-
- return(0);
- }
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- return(rec_1_get_field_start_offs(rec, n));
- }
-
- return(rec_2_get_field_start_offs(rec, n));
-}
-
-/************************************************************//**
-Gets the physical size of an old-style field.
-Also an SQL null may have a field of size > 0,
-if the data type is of a fixed size.
-@return field size in bytes */
-UNIV_INLINE
-ulint
-rec_get_nth_field_size(
-/*===================*/
- const rec_t* rec, /*!< in: record */
- ulint n) /*!< in: index of the field */
-{
- ulint os;
- ulint next_os;
-
- os = rec_get_field_start_offs(rec, n);
- next_os = rec_get_field_start_offs(rec, n + 1);
-
- ut_ad(next_os - os < UNIV_PAGE_SIZE);
-
- return(next_os - os);
-}
-
-/***********************************************************//**
-This is used to modify the value of an already existing field in a record.
-The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null.
-For records in ROW_FORMAT=COMPACT (new-style records), len must not be
-UNIV_SQL_NULL unless the field already is SQL null. */
-UNIV_INLINE
-void
-rec_set_nth_field(
-/*==============*/
- rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n, /*!< in: index number of the field */
- const void* data, /*!< in: pointer to the data
- if not SQL null */
- ulint len) /*!< in: length of the data or UNIV_SQL_NULL */
-{
- byte* data2;
- ulint len2;
-
- ut_ad(rec);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (len == UNIV_SQL_NULL) {
- if (!rec_offs_nth_sql_null(offsets, n)) {
- ut_a(!rec_offs_comp(offsets));
- rec_set_nth_field_sql_null(rec, n);
- }
-
- return;
- }
-
- data2 = rec_get_nth_field(rec, offsets, n, &len2);
- if (len2 == UNIV_SQL_NULL) {
- ut_ad(!rec_offs_comp(offsets));
- rec_set_nth_field_null_bit(rec, n, FALSE);
- ut_ad(len == rec_get_nth_field_size(rec, n));
- } else {
- ut_ad(len2 == len);
- }
-
- ut_memcpy(data2, data, len);
-}
-
-/**********************************************************//**
-The following function returns the data size of an old-style physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_get_data_size_old(
-/*==================*/
- const rec_t* rec) /*!< in: physical record */
-{
- ut_ad(rec);
-
- return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec)));
-}
-
-/**********************************************************//**
-The following function sets the number of fields in offsets. */
-UNIV_INLINE
-void
-rec_offs_set_n_fields(
-/*==================*/
- ulint* offsets, /*!< in/out: array returned by
- rec_get_offsets() */
- ulint n_fields) /*!< in: number of fields */
-{
- ut_ad(offsets);
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields + REC_OFFS_HEADER_SIZE
- <= rec_offs_get_n_alloc(offsets));
- offsets[1] = n_fields;
-}
-
-/**********************************************************//**
-The following function returns the data size of a physical
-record, that is the sum of field lengths. SQL null fields
-are counted as length 0 fields. The value returned by the function
-is the distance from record origin to record end in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_data_size(
-/*===============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint size;
-
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
- & REC_OFFS_MASK;
- ut_ad(size < UNIV_PAGE_SIZE);
- return(size);
-}
-
-/**********************************************************//**
-Returns the total size of record minus data size of record. The value
-returned by the function is the distance from record start to record origin
-in bytes.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_extra_size(
-/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint size;
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL);
- ut_ad(size < UNIV_PAGE_SIZE);
- return(size);
-}
-
-/**********************************************************//**
-Returns the total size of a physical record.
-@return size */
-UNIV_INLINE
-ulint
-rec_offs_size(
-/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************//**
-Returns a pointer to the end of the record.
-@return pointer to end */
-UNIV_INLINE
-byte*
-rec_get_end(
-/*========*/
- const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(const_cast<rec_t*>(rec + rec_offs_data_size(offsets)));
-}
-
-/**********************************************************//**
-Returns a pointer to the start of the record.
-@return pointer to start */
-UNIV_INLINE
-byte*
-rec_get_start(
-/*==========*/
- const rec_t* rec, /*!< in: pointer to record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(const_cast<rec_t*>(rec - rec_offs_extra_size(offsets)));
-}
-#endif /* UNIV_DEBUG */
-
-/***************************************************************//**
-Copies a physical record to a buffer.
-@return pointer to the origin of the copy */
-UNIV_INLINE
-rec_t*
-rec_copy(
-/*=====*/
- void* buf, /*!< in: buffer */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint extra_len;
- ulint data_len;
-
- ut_ad(rec != NULL);
- ut_ad(buf != NULL);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(rec_validate(rec, offsets));
-
- extra_len = rec_offs_extra_size(offsets);
- data_len = rec_offs_data_size(offsets);
-
- ut_memcpy(buf, rec - extra_len, extra_len + data_len);
-
- return((byte*) buf + extra_len);
-}
-
-/**********************************************************//**
-Returns the extra size of an old-style physical record if we know its
-data size and number of fields.
-@return extra size */
-UNIV_INLINE
-ulint
-rec_get_converted_extra_size(
-/*=========================*/
- ulint data_size, /*!< in: data size */
- ulint n_fields, /*!< in: number of fields */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
-
- return(REC_N_OLD_EXTRA_BYTES + n_fields);
- }
-
- return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields);
-}
-
-/**********************************************************//**
-The following function returns the size of a data tuple when converted to
-a physical record.
-@return size */
-UNIV_INLINE
-ulint
-rec_get_converted_size(
-/*===================*/
- dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- ulint data_size;
- ulint extra_size;
-
- ut_ad(index);
- ut_ad(dtuple);
- ut_ad(dtuple_check_typed(dtuple));
-
- ut_ad(dict_index_is_univ(index)
- || dtuple_get_n_fields(dtuple)
- == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
- == REC_STATUS_NODE_PTR)
- ? dict_index_get_n_unique_in_tree(index) + 1
- : dict_index_get_n_fields(index)));
-
- if (dict_table_is_comp(index->table)) {
- return(rec_get_converted_size_comp(index,
- dtuple_get_info_bits(dtuple)
- & REC_NEW_STATUS_MASK,
- dtuple->fields,
- dtuple->n_fields, NULL));
- }
-
- data_size = dtuple_get_data_size(dtuple, 0);
-
- extra_size = rec_get_converted_extra_size(
- data_size, dtuple_get_n_fields(dtuple), n_ext);
-
-#if 0
- /* This code is inactive since it may be the wrong place to add
- in the size of node pointers used in parent pages AND it is not
- currently needed since ha_innobase::max_supported_key_length()
- ensures that the key size limit for each page size is well below
- the actual limit ((free space on page / 4) - record overhead).
- But those limits will need to be raised when InnoDB can
- support multiple page sizes. At that time, we will need
- to consider the node pointer on these universal btrees. */
-
- if (dict_index_is_univ(index)) {
- /* This is for the insert buffer B-tree.
- All fields in the leaf tuple ascend to the
- parent node plus the child page pointer. */
-
- /* ibuf cannot contain externally stored fields */
- ut_ad(n_ext == 0);
-
- /* Add the data pointer and recompute extra_size
- based on one more field. */
- data_size += REC_NODE_PTR_SIZE;
- extra_size = rec_get_converted_extra_size(
- data_size,
- dtuple_get_n_fields(dtuple) + 1,
- 0);
-
- /* Be sure dtuple->n_fields has this node ptr
- accounted for. This function should correspond to
- what rec_convert_dtuple_to_rec() needs in storage.
- In optimistic insert or update-not-in-place, we will
- have to ensure that if the record is converted to a
- node pointer, it will not become too large.*/
- }
-#endif
-
- return(data_size + extra_size);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Folds a prefix of a physical record to a ulint. Folds only existing fields,
-that is, checks that we do not run out of the record.
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
-/*=====*/
- const rec_t* rec, /*!< in: the physical record */
- const ulint* offsets, /*!< in: array returned by
- rec_get_offsets() */
- ulint n_fields, /*!< in: number of complete
- fields to fold */
- ulint n_bytes, /*!< in: number of bytes to fold
- in an incomplete last field */
- index_id_t tree_id) /*!< in: index tree id */
-{
- ulint i;
- const byte* data;
- ulint len;
- ulint fold;
- ulint n_fields_rec;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(rec_validate(rec, offsets));
- ut_ad(n_fields + n_bytes > 0);
-
- n_fields_rec = rec_offs_n_fields(offsets);
- ut_ad(n_fields <= n_fields_rec);
- ut_ad(n_fields < n_fields_rec || n_bytes == 0);
-
- if (n_fields > n_fields_rec) {
- n_fields = n_fields_rec;
- }
-
- if (n_fields == n_fields_rec) {
- n_bytes = 0;
- }
-
- fold = ut_fold_ull(tree_id);
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- if (n_bytes > 0) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- if (len > n_bytes) {
- len = n_bytes;
- }
-
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- return(fold);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h
deleted file mode 100644
index 5da96066f88..00000000000
--- a/storage/xtradb/include/rem0types.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/rem0types.h
-Record manager global types
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifndef rem0types_h
-#define rem0types_h
-
-/* We define the physical record simply as an array of bytes */
-typedef byte rec_t;
-
-/* Maximum values for various fields (for non-blob tuples) */
-#define REC_MAX_N_FIELDS (1024 - 1)
-#define REC_MAX_HEAP_NO (2 * 8192 - 1)
-#define REC_MAX_N_OWNED (16 - 1)
-
-/* Maximum number of user defined fields/columns. The reserved columns
-are the ones InnoDB adds internally: DB_ROW_ID, DB_TRX_ID, DB_ROLL_PTR.
-We need "* 2" because mlog_parse_index() creates a dummy table object
-possibly, with some of the system columns in it, and then adds the 3
-system columns (again) using dict_table_add_system_columns(). The problem
-is that mlog_parse_index() cannot recognize the system columns by
-just having n_fields, n_uniq and the lengths of the columns. */
-#define REC_MAX_N_USER_FIELDS (REC_MAX_N_FIELDS - DATA_N_SYS_COLS * 2)
-
-/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed field length (or indexed prefix length) for indexes on tables of
-ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format.
-Before we support UTF-8 encodings with mbmaxlen = 4, a UTF-8 character
-may take at most 3 bytes. So the limit was set to 3*256, so that one
-can create a column prefix index on 256 characters of a TEXT or VARCHAR
-column also in the UTF-8 charset.
-This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
-files would be at risk! */
-#define REC_ANTELOPE_MAX_INDEX_COL_LEN 768
-
-/** Maximum indexed field length for table format UNIV_FORMAT_B and
-beyond.
-This (3072) is the maximum index row length allowed, so we cannot create index
-prefix column longer than that. */
-#define REC_VERSION_56_MAX_INDEX_COL_LEN 3072
-
-/** Innodb row types are a subset of the MySQL global enum row_type.
-They are made into their own enum so that switch statements can account
-for each of them. */
-enum rec_format_enum {
- REC_FORMAT_REDUNDANT = 0, /*!< REDUNDANT row format */
- REC_FORMAT_COMPACT = 1, /*!< COMPACT row format */
- REC_FORMAT_COMPRESSED = 2, /*!< COMPRESSED row format */
- REC_FORMAT_DYNAMIC = 3 /*!< DYNAMIC row format */
-};
-typedef enum rec_format_enum rec_format_t;
-
-/** Compressed field header size in bytes */
-#define ZIP_COLUMN_HEADER_LENGTH 2
-
-#endif
diff --git a/storage/xtradb/include/row0ext.h b/storage/xtradb/include/row0ext.h
deleted file mode 100644
index a098e2f9b29..00000000000
--- a/storage/xtradb/include/row0ext.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0ext.h
-Caching of externally stored column prefixes
-
-Created September 2006 Marko Makela
-*******************************************************/
-
-#ifndef row0ext_h
-#define row0ext_h
-
-#include "univ.i"
-#include "row0types.h"
-#include "data0types.h"
-#include "mem0mem.h"
-#include "dict0types.h"
-
-/********************************************************************//**
-Creates a cache of column prefixes of externally stored columns.
-@return own: column prefix cache */
-UNIV_INTERN
-row_ext_t*
-row_ext_create(
-/*===========*/
- ulint n_ext, /*!< in: number of externally stored columns */
- const ulint* ext, /*!< in: col_no's of externally stored columns
- in the InnoDB table object, as reported by
- dict_col_get_no(); NOT relative to the records
- in the clustered index */
- ulint flags, /*!< in: table->flags */
- const dtuple_t* tuple, /*!< in: data tuple containing the field
- references of the externally stored
- columns; must be indexed by col_no;
- the clustered index record must be
- covered by a lock or a page latch
- to prevent deletion (rollback or purge). */
- mem_heap_t* heap); /*!< in: heap where created */
-
-/********************************************************************//**
-Looks up a column prefix of an externally stored column.
-@return column prefix, or NULL if the column is not stored externally,
-or pointer to field_ref_zero if the BLOB pointer is unset */
-UNIV_INLINE
-const byte*
-row_ext_lookup_ith(
-/*===============*/
- const row_ext_t* ext, /*!< in/out: column prefix cache */
- ulint i, /*!< in: index of ext->ext[] */
- ulint* len); /*!< out: length of prefix, in bytes,
- at most the length determined by
- DICT_MAX_FIELD_LEN_BY_FORMAT() */
-/********************************************************************//**
-Looks up a column prefix of an externally stored column.
-@return column prefix, or NULL if the column is not stored externally,
-or pointer to field_ref_zero if the BLOB pointer is unset */
-UNIV_INLINE
-const byte*
-row_ext_lookup(
-/*===========*/
- const row_ext_t* ext, /*!< in: column prefix cache */
- ulint col, /*!< in: column number in the InnoDB
- table object, as reported by
- dict_col_get_no(); NOT relative to the
- records in the clustered index */
- ulint* len); /*!< out: length of prefix, in bytes,
- at most the length determined by
- DICT_MAX_FIELD_LEN_BY_FORMAT() */
-
-/** Prefixes of externally stored columns */
-struct row_ext_t{
- ulint n_ext; /*!< number of externally stored columns */
- const ulint* ext; /*!< col_no's of externally stored columns */
- byte* buf; /*!< backing store of the column prefix cache */
- ulint max_len;/*!< maximum prefix length, it could be
- REC_ANTELOPE_MAX_INDEX_COL_LEN or
- REC_VERSION_56_MAX_INDEX_COL_LEN depending
- on row format */
- ulint len[1]; /*!< prefix lengths; 0 if not cached */
-};
-
-#ifndef UNIV_NONINL
-#include "row0ext.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0ext.ic b/storage/xtradb/include/row0ext.ic
deleted file mode 100644
index 39e150d91d5..00000000000
--- a/storage/xtradb/include/row0ext.ic
+++ /dev/null
@@ -1,87 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0ext.ic
-Caching of externally stored column prefixes
-
-Created September 2006 Marko Makela
-*******************************************************/
-
-#include "rem0types.h"
-#include "btr0types.h"
-
-/********************************************************************//**
-Looks up a column prefix of an externally stored column.
-@return column prefix, or NULL if the column is not stored externally,
-or pointer to field_ref_zero if the BLOB pointer is unset */
-UNIV_INLINE
-const byte*
-row_ext_lookup_ith(
-/*===============*/
- const row_ext_t* ext, /*!< in/out: column prefix cache */
- ulint i, /*!< in: index of ext->ext[] */
- ulint* len) /*!< out: length of prefix, in bytes,
- at most ext->max_len */
-{
- ut_ad(ext);
- ut_ad(len);
- ut_ad(i < ext->n_ext);
-
- *len = ext->len[i];
-
- ut_ad(*len <= ext->max_len);
- ut_ad(ext->max_len > 0);
-
- if (*len == 0) {
- /* The BLOB could not be fetched to the cache. */
- return(field_ref_zero);
- } else {
- return(ext->buf + i * ext->max_len);
- }
-}
-
-/********************************************************************//**
-Looks up a column prefix of an externally stored column.
-@return column prefix, or NULL if the column is not stored externally,
-or pointer to field_ref_zero if the BLOB pointer is unset */
-UNIV_INLINE
-const byte*
-row_ext_lookup(
-/*===========*/
- const row_ext_t* ext, /*!< in: column prefix cache */
- ulint col, /*!< in: column number in the InnoDB
- table object, as reported by
- dict_col_get_no(); NOT relative to the
- records in the clustered index */
- ulint* len) /*!< out: length of prefix, in bytes,
- at most ext->max_len */
-{
- ulint i;
-
- ut_ad(ext);
- ut_ad(len);
-
- for (i = 0; i < ext->n_ext; i++) {
- if (col == ext->ext[i]) {
- return(row_ext_lookup_ith(ext, i, len));
- }
- }
-
- return(NULL);
-}
diff --git a/storage/xtradb/include/row0ftsort.h b/storage/xtradb/include/row0ftsort.h
deleted file mode 100644
index 7c9ed23645c..00000000000
--- a/storage/xtradb/include/row0ftsort.h
+++ /dev/null
@@ -1,285 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0ftsort.h
-Create Full Text Index with (parallel) merge sort
-
-Created 10/13/2010 Jimmy Yang
-*******************************************************/
-
-#ifndef row0ftsort_h
-#define row0ftsort_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "row0mysql.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "fts0priv.h"
-#include "row0merge.h"
-
-/** This structure defineds information the scan thread will fetch
-and put to the linked list for parallel tokenization/sort threads
-to process */
-typedef struct fts_doc_item fts_doc_item_t;
-
-/** Information about temporary files used in merge sort */
-struct fts_doc_item {
- dfield_t* field; /*!< field contains document string */
- doc_id_t doc_id; /*!< document ID */
- UT_LIST_NODE_T(fts_doc_item_t) doc_list;
- /*!< list of doc items */
-};
-
-/** This defines the list type that scan thread would feed the parallel
-tokenization threads and sort threads. */
-typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t;
-
-#define FTS_NUM_AUX_INDEX 6
-#define FTS_PLL_MERGE 1
-
-/** Sort information passed to each individual parallel sort thread */
-struct fts_psort_t;
-
-/** Common info passed to each parallel sort thread */
-struct fts_psort_common_t {
- row_merge_dup_t* dup; /*!< descriptor of FTS index */
- dict_table_t* new_table; /*!< source table */
- trx_t* trx; /*!< transaction */
- fts_psort_t* all_info; /*!< all parallel sort info */
- os_event_t sort_event; /*!< sort event */
- os_event_t merge_event; /*!< merge event */
- ibool opt_doc_id_size;/*!< whether to use 4 bytes
- instead of 8 bytes integer to
- store Doc ID during sort, if
- Doc ID will not be big enough
- to use 8 bytes value */
- fil_space_crypt_t* crypt_data; /*!< crypt data or NULL */
-};
-
-struct fts_psort_t {
- ulint psort_id; /*!< Parallel sort ID */
- row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX];
- /*!< sort buffer */
- merge_file_t* merge_file[FTS_NUM_AUX_INDEX];
- /*!< sort file */
- row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX];
- /*!< buffer to write to file */
- row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX];
- /*!< buffer to allocated */
- row_merge_block_t* crypt_block[FTS_NUM_AUX_INDEX];
- /*!< buffer to crypt data */
- row_merge_block_t* crypt_alloc[FTS_NUM_AUX_INDEX];
- /*!< buffer to allocated */
- ulint child_status; /*!< child thread status */
- ulint state; /*!< parent thread state */
- fts_doc_list_t fts_doc_list; /*!< doc list to process */
- fts_psort_common_t* psort_common; /*!< ptr to all psort info */
- os_thread_t thread_hdl; /*!< thread handler */
- dberr_t error; /*!< db error during psort */
- ulint memory_used; /*!< memory used by fts_doc_list */
- ib_mutex_t mutex; /*!< mutex for fts_doc_list */
-};
-
-/** Structure stores information from string tokenization operation */
-struct fts_tokenize_ctx {
- ulint processed_len; /*!< processed string length */
- ulint init_pos; /*!< doc start position */
- ulint buf_used; /*!< the sort buffer (ID) when
- tokenization stops, which
- could due to sort buffer full */
- ulint rows_added[FTS_NUM_AUX_INDEX];
- /*!< number of rows added for
- each FTS index partition */
- ib_rbt_t* cached_stopword;/*!< in: stopword list */
- dfield_t sort_field[FTS_NUM_FIELDS_SORT];
- /*!< in: sort field */
-};
-
-typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
-
-/** Structure stores information needed for the insertion phase of FTS
-parallel sort. */
-struct fts_psort_insert {
- trx_t* trx; /*!< Transaction used for insertion */
- que_t** ins_graph; /*!< insert graph */
- fts_table_t fts_table; /*!< auxiliary table */
- CHARSET_INFO* charset; /*!< charset info */
- mem_heap_t* heap; /*!< heap */
- ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes)
- integer for Doc ID */
-};
-
-typedef struct fts_psort_insert fts_psort_insert_t;
-
-
-/** status bit used for communication between parent and child thread */
-#define FTS_PARENT_COMPLETE 1
-#define FTS_PARENT_EXITING 2
-#define FTS_CHILD_COMPLETE 1
-#define FTS_CHILD_EXITING 2
-
-/** Print some debug information */
-#define FTSORT_PRINT
-
-#ifdef FTSORT_PRINT
-#define DEBUG_FTS_SORT_PRINT(str) \
- do { \
- ut_print_timestamp(stderr); \
- fprintf(stderr, str); \
- } while (0)
-#else
-#define DEBUG_FTS_SORT_PRINT(str)
-#endif /* FTSORT_PRINT */
-
-/*************************************************************//**
-Create a temporary "fts sort index" used to merge sort the
-tokenized doc string. The index has three "fields":
-
-1) Tokenized word,
-2) Doc ID
-3) Word's position in original 'doc'.
-
-@return dict_index_t structure for the fts sort index */
-UNIV_INTERN
-dict_index_t*
-row_merge_create_fts_sort_index(
-/*============================*/
- dict_index_t* index, /*!< in: Original FTS index
- based on which this sort index
- is created */
- const dict_table_t* table, /*!< in: table that FTS index
- is being created on */
- ibool* opt_doc_id_size);
- /*!< out: whether to use 4 bytes
- instead of 8 bytes integer to
- store Doc ID during sort */
-
-/********************************************************************//**
-Initialize FTS parallel sort structures.
-@return TRUE if all successful */
-UNIV_INTERN
-ibool
-row_fts_psort_info_init(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- row_merge_dup_t* dup, /*!< in,own: descriptor of
- FTS index being created */
- const dict_table_t* new_table,/*!< in: table where indexes are
- created */
- ibool opt_doc_id_size,
- /*!< in: whether to use 4 bytes
- instead of 8 bytes integer to
- store Doc ID during sort */
- fts_psort_t** psort, /*!< out: parallel sort info to be
- instantiated */
- fts_psort_t** merge) /*!< out: parallel merge info
- to be instantiated */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Clean up and deallocate FTS parallel sort structures, and close
-temparary merge sort files */
-UNIV_INTERN
-void
-row_fts_psort_info_destroy(
-/*=======================*/
- fts_psort_t* psort_info, /*!< parallel sort info */
- fts_psort_t* merge_info); /*!< parallel merge info */
-/********************************************************************//**
-Free up merge buffers when merge sort is done */
-UNIV_INTERN
-void
-row_fts_free_pll_merge_buf(
-/*=======================*/
- fts_psort_t* psort_info); /*!< in: parallel sort info */
-
-/*********************************************************************//**
-Function performs parallel tokenization of the incoming doc strings.
-@return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
-os_thread_ret_t
-fts_parallel_tokenization(
-/*======================*/
- void* arg); /*!< in: psort_info for the thread */
-/*********************************************************************//**
-Start the parallel tokenization and parallel merge sort */
-UNIV_INTERN
-void
-row_fts_start_psort(
-/*================*/
- fts_psort_t* psort_info); /*!< in: parallel sort info */
-/*********************************************************************//**
-Function performs the merge and insertion of the sorted records.
-@return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
-os_thread_ret_t
-fts_parallel_merge(
-/*===============*/
- void* arg); /*!< in: parallel merge info */
-/*********************************************************************//**
-Kick off the parallel merge and insert thread */
-UNIV_INTERN
-void
-row_fts_start_parallel_merge(
-/*=========================*/
- fts_psort_t* merge_info); /*!< in: parallel sort info */
-/********************************************************************//**
-Read sorted FTS data files and insert data tuples to auxillary tables.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-void
-row_fts_insert_tuple(
-/*=================*/
- fts_psort_insert_t*
- ins_ctx, /*!< in: insert context */
- fts_tokenizer_word_t* word, /*!< in: last processed
- tokenized word */
- ib_vector_t* positions, /*!< in: word position */
- doc_id_t* in_doc_id, /*!< in: last item doc id */
- dtuple_t* dtuple); /*!< in: entry to insert */
-/********************************************************************//**
-Propagate a newly added record up one level in the selection tree
-@return parent where this value propagated to */
-UNIV_INTERN
-int
-row_merge_fts_sel_propagate(
-/*========================*/
- int propogated, /*<! in: tree node propagated */
- int* sel_tree, /*<! in: selection tree */
- ulint level, /*<! in: selection tree level */
- const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
- dict_index_t* index); /*<! in: FTS index */
-/********************************************************************//**
-Read sorted file containing index data tuples and insert these data
-tuples to the index
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-row_fts_merge_insert(
-/*=================*/
- dict_index_t* index, /*!< in: index */
- dict_table_t* table, /*!< in: new table */
- fts_psort_t* psort_info, /*!< parallel sort info */
- ulint id) /* !< in: which auxiliary table's data
- to insert to */
- MY_ATTRIBUTE((nonnull));
-#endif /* row0ftsort_h */
diff --git a/storage/xtradb/include/row0import.h b/storage/xtradb/include/row0import.h
deleted file mode 100644
index a821c230a3b..00000000000
--- a/storage/xtradb/include/row0import.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0import.h
-Header file for import tablespace functions.
-
-Created 2012-02-08 by Sunny Bains
-*******************************************************/
-
-#ifndef row0import_h
-#define row0import_h
-
-#include "univ.i"
-#include "db0err.h"
-#include "dict0types.h"
-
-// Forward declarations
-struct trx_t;
-struct dict_table_t;
-struct row_prebuilt_t;
-
-/*****************************************************************//**
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_import_for_mysql(
-/*=================*/
- dict_table_t* table, /*!< in/out: table */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct
- in MySQL */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*****************************************************************//**
-Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
-@return DB_SUCCESS or error code. */
-UNIV_INTERN
-dberr_t
-row_import_update_discarded_flag(
-/*=============================*/
- trx_t* trx, /*!< in/out: transaction that
- covers the update */
- table_id_t table_id, /*!< in: Table for which we want
- to set the root table->flags2 */
- bool discarded, /*!< in: set MIX_LEN column bit
- to discarded, if true */
- bool dict_locked) /*!< in: Set to true if the
- caller already owns the
- dict_sys_t:: mutex. */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*****************************************************************//**
-Update the (space, root page) of a table's indexes from the values
-in the data dictionary.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_import_update_index_root(
-/*=========================*/
- trx_t* trx, /*!< in/out: transaction that
- covers the update */
- const dict_table_t* table, /*!< in: Table for which we want
- to set the root page_no */
- bool reset, /*!< in: if true then set to
- FIL_NUL */
- bool dict_locked) /*!< in: Set to true if the
- caller already owns the
- dict_sys_t:: mutex. */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_NONINL
-#include "row0import.ic"
-#endif
-
-#endif /* row0import_h */
diff --git a/storage/xtradb/include/row0import.ic b/storage/xtradb/include/row0import.ic
deleted file mode 100644
index c5bbab49f6f..00000000000
--- a/storage/xtradb/include/row0import.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0import.ic
-
-Import tablespace inline functions.
-
-Created 2012-02-08 Sunny Bains
-*******************************************************/
diff --git a/storage/xtradb/include/row0ins.h b/storage/xtradb/include/row0ins.h
deleted file mode 100644
index 71ee39070ef..00000000000
--- a/storage/xtradb/include/row0ins.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0ins.h
-Insert into a table
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0ins_h
-#define row0ins_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-
-/***************************************************************//**
-Checks if foreign key constraint fails for an index entry. Sets shared locks
-which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_foreign_key_check_lock.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_NO_REFERENCED_ROW, or
-DB_ROW_IS_REFERENCED */
-UNIV_INTERN
-dberr_t
-row_ins_check_foreign_constraint(
-/*=============================*/
- ibool check_ref,/*!< in: TRUE If we want to check that
- the referenced table is ok, FALSE if we
- want to check the foreign key table */
- dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the
- tables mentioned in it must be in the
- dictionary cache if they exist at all */
- dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign
- table, else the referenced table */
- dtuple_t* entry, /*!< in: index entry for index */
- que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Creates an insert node struct.
-@return own: insert node struct */
-UNIV_INTERN
-ins_node_t*
-ins_node_create(
-/*============*/
- ulint ins_type, /*!< in: INS_VALUES, ... */
- dict_table_t* table, /*!< in: table where to insert */
- mem_heap_t* heap); /*!< in: mem heap where created */
-/*********************************************************************//**
-Sets a new row to insert for an INS_DIRECT node. This function is only used
-if we have constructed the row separately, which is a rare case; this
-function is quite slow. */
-UNIV_INTERN
-void
-ins_node_set_new_row(
-/*=================*/
- ins_node_t* node, /*!< in: insert node */
- dtuple_t* row); /*!< in: new row (or first row) for the node */
-/***************************************************************//**
-Tries to insert an entry into a clustered index, ignoring foreign key
-constraints. If a record with the same unique key is found, the other
-record is necessarily marked deleted by a committed transaction, or a
-unique key violation error occurs. The delete marked record is then
-updated to an existing record, and we must write an undo log record on
-the delete marked record.
-@retval DB_SUCCESS on success
-@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
-@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
-@return error code */
-UNIV_INTERN
-dberr_t
-row_ins_clust_index_entry_low(
-/*==========================*/
- ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /*!< in: clustered index */
- ulint n_uniq, /*!< in: 0 or index->n_uniq */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr) /*!< in: query thread or NULL */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***************************************************************//**
-Tries to insert an entry into a secondary index. If a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index.
-@retval DB_SUCCESS on success
-@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
-@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
-@return error code */
-UNIV_INTERN
-dberr_t
-row_ins_sec_index_entry_low(
-/*========================*/
- ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /*!< in: secondary index */
- mem_heap_t* offsets_heap,
- /*!< in/out: memory heap that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during
- row_log_table_apply(), or 0 */
- que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***************************************************************//**
-Tries to insert the externally stored fields (off-page columns)
-of a clustered index entry.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
-dberr_t
-row_ins_index_entry_big_rec_func(
-/*=============================*/
- const dtuple_t* entry, /*!< in/out: index entry to insert */
- const big_rec_t* big_rec,/*!< in: externally stored fields */
- ulint* offsets,/*!< in/out: rec offsets */
- mem_heap_t** heap, /*!< in/out: memory heap */
- dict_index_t* index, /*!< in: index */
- const char* file, /*!< in: file name of caller */
-#ifndef DBUG_OFF
- const void* thd, /*!< in: connection, or NULL */
-#endif /* DBUG_OFF */
- ulint line) /*!< in: line number of caller */
- MY_ATTRIBUTE((nonnull(1,2,3,4,5,6), warn_unused_result));
-#ifdef DBUG_OFF
-# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
- row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,line)
-#else /* DBUG_OFF */
-# define row_ins_index_entry_big_rec(e,big,ofs,heap,index,thd,file,line) \
- row_ins_index_entry_big_rec_func(e,big,ofs,heap,index,file,thd,line)
-#endif /* DBUG_OFF */
-/***************************************************************//**
-Inserts an entry into a clustered index. Tries first optimistic,
-then pessimistic descent down the tree. If the entry matches enough
-to a delete marked record, performs the insert by updating or delete
-unmarking the delete marked record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
-dberr_t
-row_ins_clust_index_entry(
-/*======================*/
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- ulint n_ext) /*!< in: number of externally stored columns */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***************************************************************//**
-Inserts an entry into a secondary index. Tries first optimistic,
-then pessimistic descent down the tree. If the entry matches enough
-to a delete marked record, performs the insert by updating or delete
-unmarking the delete marked record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
-dberr_t
-row_ins_sec_index_entry(
-/*====================*/
- dict_index_t* index, /*!< in: secondary index */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************//**
-Inserts a row to a table. This is a high-level function used in
-SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_ins_step(
-/*=========*/
- que_thr_t* thr); /*!< in: query thread */
-
-/* Insert node structure */
-
-struct ins_node_t{
- que_common_t common; /*!< node type: QUE_NODE_INSERT */
- ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */
- dtuple_t* row; /*!< row to insert */
- dict_table_t* table; /*!< table where to insert */
- sel_node_t* select; /*!< select in searched insert */
- que_node_t* values_list;/* list of expressions to evaluate and
- insert in an INS_VALUES insert */
- ulint state; /*!< node execution state */
- dict_index_t* index; /*!< NULL, or the next index where the index
- entry should be inserted */
- dtuple_t* entry; /*!< NULL, or entry to insert in the index;
- after a successful insert of the entry,
- this should be reset to NULL */
- UT_LIST_BASE_NODE_T(dtuple_t)
- entry_list;/* list of entries, one for each index */
- byte* row_id_buf;/* buffer for the row id sys field in row */
- trx_id_t trx_id; /*!< trx id or the last trx which executed the
- node */
- byte* trx_id_buf;/* buffer for the trx id sys field in row */
- mem_heap_t* entry_sys_heap;
- /* memory heap used as auxiliary storage;
- entry_list and sys fields are stored here;
- if this is NULL, entry list should be created
- and buffers for sys fields in row allocated */
- ulint magic_n;
-};
-
-#define INS_NODE_MAGIC_N 15849075
-
-/* Insert node types */
-#define INS_SEARCHED 0 /* INSERT INTO ... SELECT ... */
-#define INS_VALUES 1 /* INSERT INTO ... VALUES ... */
-#define INS_DIRECT 2 /* this is for internal use in dict0crea:
- insert the row directly */
-
-/* Node execution states */
-#define INS_NODE_SET_IX_LOCK 1 /* we should set an IX lock on table */
-#define INS_NODE_ALLOC_ROW_ID 2 /* row id should be allocated */
-#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and
- inserted */
-
-#ifndef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0ins.ic b/storage/xtradb/include/row0ins.ic
deleted file mode 100644
index 9c191d869a2..00000000000
--- a/storage/xtradb/include/row0ins.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0ins.ic
-Insert into a table
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-
diff --git a/storage/xtradb/include/row0log.h b/storage/xtradb/include/row0log.h
deleted file mode 100644
index 5ff148ff045..00000000000
--- a/storage/xtradb/include/row0log.h
+++ /dev/null
@@ -1,244 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0log.h
-Modification log for online index creation and online table rebuild
-
-Created 2011-05-26 Marko Makela
-*******************************************************/
-
-#ifndef row0log_h
-#define row0log_h
-
-#include "univ.i"
-#include "mtr0types.h"
-#include "row0types.h"
-#include "rem0types.h"
-#include "data0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-
-extern ulint onlineddl_rowlog_rows;
-extern ulint onlineddl_rowlog_pct_used;
-extern ulint onlineddl_pct_progress;
-
-/******************************************************//**
-Allocate the row log for an index and flag the index
-for online creation.
-@retval true if success, false if not */
-UNIV_INTERN
-bool
-row_log_allocate(
-/*=============*/
- dict_index_t* index, /*!< in/out: index */
- dict_table_t* table, /*!< in/out: new table being rebuilt,
- or NULL when creating a secondary index */
- bool same_pk,/*!< in: whether the definition of the
- PRIMARY KEY has remained the same */
- const dtuple_t* add_cols,
- /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map,/*!< in: mapping of old column
- numbers to new ones, or NULL if !table */
- const char* path) /*!< in: where to create temporary file */
- MY_ATTRIBUTE((nonnull(1), warn_unused_result));
-
-/******************************************************//**
-Free the row log for an index that was being created online. */
-UNIV_INTERN
-void
-row_log_free(
-/*=========*/
- row_log_t*& log) /*!< in,own: row log */
- MY_ATTRIBUTE((nonnull));
-
-/******************************************************//**
-Free the row log for an index on which online creation was aborted. */
-UNIV_INLINE
-void
-row_log_abort_sec(
-/*==============*/
- dict_index_t* index) /*!< in/out: index (x-latched) */
- MY_ATTRIBUTE((nonnull));
-
-/******************************************************//**
-Try to log an operation to a secondary index that is
-(or was) being created.
-@retval true if the operation was logged or can be ignored
-@retval false if online index creation is not taking place */
-UNIV_INLINE
-bool
-row_log_online_op_try(
-/*==================*/
- dict_index_t* index, /*!< in/out: index, S or X latched */
- const dtuple_t* tuple, /*!< in: index tuple */
- trx_id_t trx_id) /*!< in: transaction ID for insert,
- or 0 for delete */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************//**
-Logs an operation to a secondary index that is (or was) being created. */
-UNIV_INTERN
-void
-row_log_online_op(
-/*==============*/
- dict_index_t* index, /*!< in/out: index, S or X latched */
- const dtuple_t* tuple, /*!< in: index tuple */
- trx_id_t trx_id) /*!< in: transaction ID for insert,
- or 0 for delete */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-
-/******************************************************//**
-Gets the error status of the online index rebuild log.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_log_table_get_error(
-/*====================*/
- const dict_index_t* index) /*!< in: clustered index of a table
- that is being rebuilt online */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/******************************************************//**
-Logs a delete operation to a table that is being rebuilt.
-This will be merged in row_log_table_apply_delete(). */
-UNIV_INTERN
-void
-row_log_table_delete(
-/*=================*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
- const byte* sys) /*!< in: DB_TRX_ID,DB_ROLL_PTR that should
- be logged, or NULL to use those in rec */
- UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,3)));
-
-/******************************************************//**
-Logs an update operation to a table that is being rebuilt.
-This will be merged in row_log_table_apply_update(). */
-UNIV_INTERN
-void
-row_log_table_update(
-/*=================*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
- const dtuple_t* old_pk) /*!< in: row_log_table_get_pk()
- before the update */
- UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,3)));
-
-/******************************************************//**
-Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
-of a table that is being rebuilt.
-@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
-or NULL if the PRIMARY KEY definition does not change */
-UNIV_INTERN
-const dtuple_t*
-row_log_table_get_pk(
-/*=================*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index),
- or NULL */
- byte* sys, /*!< out: DB_TRX_ID,DB_ROLL_PTR for
- row_log_table_delete(), or NULL */
- mem_heap_t** heap) /*!< in/out: memory heap where allocated */
- UNIV_COLD MY_ATTRIBUTE((nonnull(1,2,5), warn_unused_result));
-
-/******************************************************//**
-Logs an insert to a table that is being rebuilt.
-This will be merged in row_log_table_apply_insert(). */
-UNIV_INTERN
-void
-row_log_table_insert(
-/*=================*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-Notes that a BLOB is being freed during online ALTER TABLE. */
-UNIV_INTERN
-void
-row_log_table_blob_free(
-/*====================*/
- dict_index_t* index, /*!< in/out: clustered index, X-latched */
- ulint page_no)/*!< in: starting page number of the BLOB */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-Notes that a BLOB is being allocated during online ALTER TABLE. */
-UNIV_INTERN
-void
-row_log_table_blob_alloc(
-/*=====================*/
- dict_index_t* index, /*!< in/out: clustered index, X-latched */
- ulint page_no)/*!< in: starting page number of the BLOB */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-Apply the row_log_table log to a table upon completing rebuild.
-@return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
-dberr_t
-row_log_table_apply(
-/*================*/
- que_thr_t* thr, /*!< in: query graph */
- dict_table_t* old_table,
- /*!< in: old table */
- struct TABLE* table) /*!< in/out: MySQL table
- (for reporting duplicates) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/******************************************************//**
-Get the latest transaction ID that has invoked row_log_online_op()
-during online creation.
-@return latest transaction ID, or 0 if nothing was logged */
-UNIV_INTERN
-trx_id_t
-row_log_get_max_trx(
-/*================*/
- dict_index_t* index) /*!< in: index, must be locked */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/******************************************************//**
-Merge the row log to the index upon completing index creation.
-@return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
-dberr_t
-row_log_apply(
-/*==========*/
- trx_t* trx, /*!< in: transaction (for checking if
- the operation was interrupted) */
- dict_index_t* index, /*!< in/out: secondary index */
- struct TABLE* table) /*!< in/out: MySQL table
- (for reporting duplicates) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#ifndef UNIV_NONINL
-#include "row0log.ic"
-#endif
-
-#endif /* row0log.h */
diff --git a/storage/xtradb/include/row0log.ic b/storage/xtradb/include/row0log.ic
deleted file mode 100644
index b0f37dbd8e7..00000000000
--- a/storage/xtradb/include/row0log.ic
+++ /dev/null
@@ -1,84 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0log.ic
-Modification log for online index creation and online table rebuild
-
-Created 2012-10-18 Marko Makela
-*******************************************************/
-
-#include "dict0dict.h"
-
-/******************************************************//**
-Free the row log for an index on which online creation was aborted. */
-UNIV_INLINE
-void
-row_log_abort_sec(
-/*===============*/
- dict_index_t* index) /*!< in/out: index (x-latched) */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(!dict_index_is_clust(index));
- dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
- row_log_free(index->online_log);
-}
-
-/******************************************************//**
-Try to log an operation to a secondary index that is
-(or was) being created.
-@retval true if the operation was logged or can be ignored
-@retval false if online index creation is not taking place */
-UNIV_INLINE
-bool
-row_log_online_op_try(
-/*==================*/
- dict_index_t* index, /*!< in/out: index, S or X latched */
- const dtuple_t* tuple, /*!< in: index tuple */
- trx_id_t trx_id) /*!< in: transaction ID for insert,
- or 0 for delete */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
- || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_COMPLETE:
- /* This is a normal index. Do not log anything.
- The caller must perform the operation on the
- index tree directly. */
- return(false);
- case ONLINE_INDEX_CREATION:
- /* The index is being created online. Log the
- operation. */
- row_log_online_op(index, tuple, trx_id);
- break;
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- /* The index was created online, but the operation was
- aborted. Do not log the operation and tell the caller
- to skip the operation. */
- break;
- }
-
- return(true);
-}
diff --git a/storage/xtradb/include/row0merge.h b/storage/xtradb/include/row0merge.h
deleted file mode 100644
index 04d4010ad48..00000000000
--- a/storage/xtradb/include/row0merge.h
+++ /dev/null
@@ -1,469 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2016, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0merge.h
-Index build routines using a merge sort
-
-Created 13/06/2005 Jan Lindstrom
-*******************************************************/
-
-#ifndef row0merge_h
-#define row0merge_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "mtr0mtr.h"
-#include "rem0types.h"
-#include "rem0rec.h"
-#include "read0types.h"
-#include "btr0types.h"
-#include "row0mysql.h"
-#include "lock0types.h"
-#include "srv0srv.h"
-
-/* Reserve free space from every block for key_version */
-#define ROW_MERGE_RESERVE_SIZE 4
-
-/* Cluster index read task is mandatory */
-#define COST_READ_CLUSTERED_INDEX 1.0
-
-/* Basic fixed cost to build all type of index */
-#define COST_BUILD_INDEX_STATIC 0.5
-/* Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index */
-#define COST_BUILD_INDEX_DYNAMIC 0.5
-
-/* Sum of below two must be 1.0 */
-#define PCT_COST_MERGESORT_INDEX 0.4
-#define PCT_COST_INSERT_INDEX 0.6
-
-// Forward declaration
-struct ib_sequence_t;
-
-/** @brief Block size for I/O operations in merge sort.
-
-The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
-rounded to a power of 2.
-
-When not creating a PRIMARY KEY that contains column prefixes, this
-can be set as small as UNIV_PAGE_SIZE / 2. */
-typedef byte row_merge_block_t;
-
-/** @brief Secondary buffer for I/O operations of merge records.
-
-This buffer is used for writing or reading a record that spans two
-row_merge_block_t. Thus, it must be able to hold one merge record,
-whose maximum size is the same as the minimum size of
-row_merge_block_t. */
-typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX];
-
-/** @brief Merge record in row_merge_block_t.
-
-The format is the same as a record in ROW_FORMAT=COMPACT with the
-exception that the REC_N_NEW_EXTRA_BYTES are omitted. */
-typedef byte mrec_t;
-
-/** Merge record in row_merge_buf_t */
-struct mtuple_t {
- dfield_t* fields; /*!< data fields */
-};
-
-/** Buffer for sorting in main memory. */
-struct row_merge_buf_t {
- mem_heap_t* heap; /*!< memory heap where allocated */
- dict_index_t* index; /*!< the index the tuples belong to */
- ulint total_size; /*!< total amount of data bytes */
- ulint n_tuples; /*!< number of data tuples */
- ulint max_tuples; /*!< maximum number of data tuples */
- mtuple_t* tuples; /*!< array of data tuples */
- mtuple_t* tmp_tuples; /*!< temporary copy of tuples,
- for sorting */
-};
-
-/** Information about temporary files used in merge sort */
-struct merge_file_t {
- int fd; /*!< file descriptor */
- ulint offset; /*!< file offset (end of file) */
- ib_uint64_t n_rec; /*!< number of records in the file */
-};
-
-/** Index field definition */
-struct index_field_t {
- ulint col_no; /*!< column offset */
- ulint prefix_len; /*!< column prefix length, or 0
- if indexing the whole column */
- const char* col_name; /*!< column name or NULL */
-};
-
-/** Definition of an index being created */
-struct index_def_t {
- const char* name; /*!< index name */
- ulint ind_type; /*!< 0, DICT_UNIQUE,
- or DICT_CLUSTERED */
- ulint key_number; /*!< MySQL key number,
- or ULINT_UNDEFINED if none */
- ulint n_fields; /*!< number of fields in index */
- index_field_t* fields; /*!< field definitions */
-};
-
-/** Structure for reporting duplicate records. */
-struct row_merge_dup_t {
- dict_index_t* index; /*!< index being sorted */
- struct TABLE* table; /*!< MySQL table object */
- const ulint* col_map;/*!< mapping of column numbers
- in table to the rebuilt table
- (index->table), or NULL if not
- rebuilding table */
- ulint n_dup; /*!< number of duplicates */
-};
-
-/*************************************************************//**
-Report a duplicate key. */
-UNIV_INTERN
-void
-row_merge_dup_report(
-/*=================*/
- row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
- const dfield_t* entry) /*!< in: duplicate index entry */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Sets an exclusive lock on a table, for the duration of creating indexes.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_merge_lock_table(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Drop indexes that were created before an error occurred.
-The data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed. */
-UNIV_INTERN
-void
-row_merge_drop_indexes_dict(
-/*========================*/
- trx_t* trx, /*!< in/out: dictionary transaction */
- table_id_t table_id)/*!< in: table identifier */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Drop those indexes which were created before an error occurred.
-The data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed. */
-UNIV_INTERN
-void
-row_merge_drop_indexes(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in/out: table containing the indexes */
- ibool locked) /*!< in: TRUE=table locked,
- FALSE=may need to do a lazy drop */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Drop all partially created indexes during crash recovery. */
-UNIV_INTERN
-void
-row_merge_drop_temp_indexes(void);
-/*=============================*/
-
-/** Create temporary merge files in the given paramater path, and if
-UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
-@param[in] path location for creating temporary merge files.
-@return File descriptor */
-UNIV_INTERN
-int
-row_merge_file_create_low(
- const char* path)
- MY_ATTRIBUTE((warn_unused_result));
-/*********************************************************************//**
-Destroy a merge file. And de-register the file from Performance Schema
-if UNIV_PFS_IO is defined. */
-UNIV_INTERN
-void
-row_merge_file_destroy_low(
-/*=======================*/
- int fd); /*!< in: merge file descriptor */
-
-/*********************************************************************//**
-Provide a new pathname for a table that is being renamed if it belongs to
-a file-per-table tablespace. The caller is responsible for freeing the
-memory allocated for the return value.
-@return new pathname of tablespace file, or NULL if space = 0 */
-UNIV_INTERN
-char*
-row_make_new_pathname(
-/*==================*/
- dict_table_t* table, /*!< in: table to be renamed */
- const char* new_name); /*!< in: new name */
-/*********************************************************************//**
-Rename the tables in the data dictionary. The data dictionary must
-have been locked exclusively by the caller, because the transaction
-will not be committed.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_merge_rename_tables_dict(
-/*=========================*/
- dict_table_t* old_table, /*!< in/out: old table, renamed to
- tmp_name */
- dict_table_t* new_table, /*!< in/out: new table, renamed to
- old_table->name */
- const char* tmp_name, /*!< in: new name for old_table */
- trx_t* trx) /*!< in/out: dictionary transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*********************************************************************//**
-Rename an index in the dictionary that was created. The data
-dictionary must have been locked exclusively by the caller, because
-the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-row_merge_rename_index_to_add(
-/*==========================*/
- trx_t* trx, /*!< in/out: transaction */
- table_id_t table_id, /*!< in: table identifier */
- index_id_t index_id) /*!< in: index identifier */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Rename an index in the dictionary that is to be dropped. The data
-dictionary must have been locked exclusively by the caller, because
-the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-row_merge_rename_index_to_drop(
-/*===========================*/
- trx_t* trx, /*!< in/out: transaction */
- table_id_t table_id, /*!< in: table identifier */
- index_id_t index_id) /*!< in: index identifier */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Create the index and load in to the dictionary.
-@return index, or NULL on error */
-UNIV_INTERN
-dict_index_t*
-row_merge_create_index(
-/*===================*/
- trx_t* trx, /*!< in/out: trx (sets error_state) */
- dict_table_t* table, /*!< in: the index is on this table */
- const index_def_t* index_def,
- /*!< in: the index definition */
- const char** col_names);
- /*! in: column names if columns are
- renamed or NULL */
-/*********************************************************************//**
-Check if a transaction can use an index.
-@return TRUE if index can be used by the transaction else FALSE */
-UNIV_INTERN
-ibool
-row_merge_is_index_usable(
-/*======================*/
- const trx_t* trx, /*!< in: transaction */
- const dict_index_t* index); /*!< in: index to check */
-/*********************************************************************//**
-Drop a table. The caller must have ensured that the background stats
-thread is not processing the table. This can be done by calling
-dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
-before calling this function.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_merge_drop_table(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table) /*!< in: table instance to drop */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Build indexes on a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_merge_build_indexes(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* old_table, /*!< in: table where rows are
- read from */
- dict_table_t* new_table, /*!< in: table where indexes are
- created; identical to old_table
- unless creating a PRIMARY KEY */
- bool online, /*!< in: true if creating indexes
- online */
- dict_index_t** indexes, /*!< in: indexes to be created */
- const ulint* key_numbers, /*!< in: MySQL key numbers */
- ulint n_indexes, /*!< in: size of indexes[] */
- struct TABLE* table, /*!< in/out: MySQL table, for
- reporting erroneous key value
- if applicable */
- const dtuple_t* add_cols, /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map, /*!< in: mapping of old column
- numbers to new ones, or NULL
- if old_table == new_table */
- ulint add_autoinc, /*!< in: number of added
- AUTO_INCREMENT column, or
- ULINT_UNDEFINED if none is added */
- ib_sequence_t& sequence) /*!< in/out: autoinc sequence */
- MY_ATTRIBUTE((nonnull(1,2,3,5,6,8), warn_unused_result));
-/********************************************************************//**
-Write a buffer to a block. */
-UNIV_INTERN
-void
-row_merge_buf_write(
-/*================*/
- const row_merge_buf_t* buf, /*!< in: sorted buffer */
- const merge_file_t* of, /*!< in: output file */
- row_merge_block_t* block) /*!< out: buffer for writing to file */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Sort a buffer. */
-UNIV_INTERN
-void
-row_merge_buf_sort(
-/*===============*/
- row_merge_buf_t* buf, /*!< in/out: sort buffer */
- row_merge_dup_t* dup) /*!< in/out: reporter of duplicates
- (NULL if non-unique index) */
- MY_ATTRIBUTE((nonnull(1)));
-/********************************************************************//**
-Write a merge block to the file system.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-row_merge_write(
-/*============*/
- int fd, /*!< in: file descriptor */
- ulint offset, /*!< in: offset where to write,
- in number of row_merge_block_t elements */
- const void* buf, /*!< in: data */
- fil_space_crypt_t* crypt_data, /*!< in: table crypt data */
- void* crypt_buf, /*!< in: crypt buf or NULL */
- ulint space); /*!< in: space id */
-
-/********************************************************************//**
-Empty a sort buffer.
-@return sort buffer */
-UNIV_INTERN
-row_merge_buf_t*
-row_merge_buf_empty(
-/*================*/
- row_merge_buf_t* buf) /*!< in,own: sort buffer */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-
-/** Create a merge file in the given location.
-@param[out] merge_file merge file structure
-@param[in] path location for creating temporary file
-@return file descriptor, or -1 on failure */
-UNIV_INTERN
-int
-row_merge_file_create(
- merge_file_t* merge_file,
- const char* path);
-
-/*********************************************************************//**
-Merge disk files.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_merge_sort(
-/*===========*/
- trx_t* trx, /*!< in: transaction */
- const row_merge_dup_t* dup, /*!< in: descriptor of
- index being created */
- merge_file_t* file, /*!< in/out: file containing
- index entries */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- int* tmpfd, /*!< in/out: temporary file handle */
- const bool update_progress, /*!< in: update progress status variable or not */
- const float pct_progress, /*!< in: total progress percent until now */
- const float pct_cost, /*!< in: current progress percent */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
- __attribute__((nonnull(1,2,3,4,5)));
-/*********************************************************************//**
-Allocate a sort buffer.
-@return own: sort buffer */
-UNIV_INTERN
-row_merge_buf_t*
-row_merge_buf_create(
-/*=================*/
- dict_index_t* index) /*!< in: secondary index */
- MY_ATTRIBUTE((warn_unused_result, nonnull, malloc));
-/*********************************************************************//**
-Deallocate a sort buffer. */
-UNIV_INTERN
-void
-row_merge_buf_free(
-/*===============*/
- row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Destroy a merge file. */
-UNIV_INTERN
-void
-row_merge_file_destroy(
-/*===================*/
- merge_file_t* merge_file) /*!< in/out: merge file structure */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Read a merge block from the file system.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-row_merge_read(
-/*===========*/
- int fd, /*!< in: file descriptor */
- ulint offset, /*!< in: offset where to read
- in number of row_merge_block_t
- elements */
- row_merge_block_t* buf, /*!< out: data */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_buf, /*!< in: crypt buf or NULL */
- ulint space); /*!< in: space id */
-
-/********************************************************************//**
-Read a merge record.
-@return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN
-const byte*
-row_merge_read_rec(
-/*===============*/
- row_merge_block_t* block, /*!< in/out: file buffer */
- mrec_buf_t* buf, /*!< in/out: secondary buffer */
- const byte* b, /*!< in: pointer to record */
- const dict_index_t* index, /*!< in: index of the record */
- int fd, /*!< in: file descriptor */
- ulint* foffs, /*!< in/out: file offset */
- const mrec_t** mrec, /*!< out: pointer to merge record,
- or NULL on end of list
- (non-NULL on I/O error) */
- ulint* offsets,/*!< out: offsets of mrec */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
- __attribute__((nonnull(1,2,3,4,6,7,8), warn_unused_result));
-#endif /* row0merge.h */
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
deleted file mode 100644
index a8503a5cfda..00000000000
--- a/storage/xtradb/include/row0mysql.h
+++ /dev/null
@@ -1,932 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0mysql.h
-Interface between Innobase row operations and MySQL.
-Contains also create table and other data dictionary operations.
-
-Created 9/17/2000 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0mysql_h
-#define row0mysql_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-#include "btr0pcur.h"
-#include "trx0types.h"
-#include "fil0crypt.h"
-
-// Forward declaration
-struct SysIndexCallback;
-
-extern ibool row_rollback_on_timeout;
-
-struct row_prebuilt_t;
-
-/*******************************************************************//**
-Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
-void
-row_mysql_prebuilt_free_blob_heap(
-/*==============================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct of a
- ha_innobase:: table handle */
-
-/*******************************************************************//**
-Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format.
-@return pointer to the data, we skip the 1 or 2 bytes at the start
-that are used to store the len */
-UNIV_INTERN
-byte*
-row_mysql_store_true_var_len(
-/*=========================*/
- byte* dest, /*!< in: where to store */
- ulint len, /*!< in: length, must fit in two bytes */
- ulint lenlen);/*!< in: storage length of len: either 1 or 2 bytes */
-/*******************************************************************//**
-Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data.
-@return pointer to the data, we skip the 1 or 2 bytes at the start
-that are used to store the len */
-UNIV_INTERN
-const byte*
-row_mysql_read_true_varchar(
-/*========================*/
- ulint* len, /*!< out: variable-length field length */
- const byte* field, /*!< in: field in the MySQL format */
- ulint lenlen);/*!< in: storage length of len: either 1
- or 2 bytes */
-/*******************************************************************//**
-Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
-void
-row_mysql_store_blob_ref(
-/*=====================*/
- byte* dest, /*!< in: where to store */
- ulint col_len,/*!< in: dest buffer size: determines into
- how many bytes the BLOB length is stored,
- the space for the length may vary from 1
- to 4 bytes */
- const void* data, /*!< in: BLOB data; if the value to store
- is SQL NULL this should be NULL pointer */
- ulint len); /*!< in: BLOB length; if the value to store
- is SQL NULL this should be 0; remember
- also to set the NULL bit in the MySQL record
- header! */
-/*******************************************************************//**
-Reads a reference to a BLOB in the MySQL format.
-@return pointer to BLOB data */
-UNIV_INTERN
-const byte*
-row_mysql_read_blob_ref(
-/*====================*/
- ulint* len, /*!< out: BLOB length */
- const byte* ref, /*!< in: BLOB reference in the
- MySQL format */
- ulint col_len); /*!< in: BLOB reference length
- (not BLOB length) */
-/**************************************************************//**
-Pad a column with spaces. */
-UNIV_INTERN
-void
-row_mysql_pad_col(
-/*==============*/
- ulint mbminlen, /*!< in: minimum size of a character,
- in bytes */
- byte* pad, /*!< out: padded buffer */
- ulint len); /*!< in: number of bytes to pad */
-
-/**************************************************************//**
-Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
-The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.cc.
-@return up to which byte we used buf in the conversion */
-UNIV_INTERN
-byte*
-row_mysql_store_col_in_innobase_format(
-/*===================================*/
- dfield_t* dfield, /*!< in/out: dfield where dtype
- information must be already set when
- this function is called! */
- byte* buf, /*!< in/out: buffer for a converted
- integer value; this must be at least
- col_len long then! NOTE that dfield
- may also get a pointer to 'buf',
- therefore do not discard this as long
- as dfield is used! */
- ibool row_format_col, /*!< TRUE if the mysql_data is from
- a MySQL row, FALSE if from a MySQL
- key value;
- in MySQL, a true VARCHAR storage
- format differs in a row and in a
- key value: in a key value the length
- is always stored in 2 bytes! */
- const byte* mysql_data, /*!< in: MySQL column value, not
- SQL NULL; NOTE that dfield may also
- get a pointer to mysql_data,
- therefore do not discard this as long
- as dfield is used! */
- ulint col_len, /*!< in: MySQL column length; NOTE that
- this is the storage length of the
- column in the MySQL format row, not
- necessarily the length of the actual
- payload data; if the column is a true
- VARCHAR then this is irrelevant */
- ulint comp); /*!< in: nonzero=compact format */
-/****************************************************************//**
-Handles user errors and lock waits detected by the database engine.
-@return true if it was a lock wait and we should continue running the
-query thread */
-UNIV_INTERN
-bool
-row_mysql_handle_errors(
-/*====================*/
- dberr_t* new_err,/*!< out: possible new error encountered in
- rollback, or the old error which was
- during the function entry */
- trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread, or NULL */
- trx_savept_t* savept) /*!< in: savepoint, or NULL */
- MY_ATTRIBUTE((nonnull(1,2)));
-/********************************************************************//**
-Create a prebuilt struct for a MySQL table handle.
-@return own: a prebuilt struct */
-UNIV_INTERN
-row_prebuilt_t*
-row_create_prebuilt(
-/*================*/
- dict_table_t* table, /*!< in: Innobase table handle */
- ulint mysql_row_len); /*!< in: length in bytes of a row in
- the MySQL format */
-/********************************************************************//**
-Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
-void
-row_prebuilt_free(
-/*==============*/
- row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
- ibool dict_locked); /*!< in: TRUE=data dictionary locked */
-/*********************************************************************//**
-Updates the transaction pointers in query graphs stored in the prebuilt
-struct. */
-UNIV_INTERN
-void
-row_update_prebuilt_trx(
-/*====================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
- in MySQL handle */
- trx_t* trx); /*!< in: transaction handle */
-/*********************************************************************//**
-Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
-AUTO_INC lock gives exclusive access to the auto-inc counter of the
-table. The lock is reserved only for the duration of an SQL statement.
-It is not compatible with another AUTO_INC or exclusive lock on the
-table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_lock_table_autoinc_for_mysql(
-/*=============================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
- table handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets a table lock on the table mentioned in prebuilt.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_lock_table_for_mysql(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
- table handle */
- dict_table_t* table, /*!< in: table to lock, or NULL
- if prebuilt->table should be
- locked as
- prebuilt->select_lock_type */
- ulint mode) /*!< in: lock mode of table
- (ignored if table==NULL) */
- MY_ATTRIBUTE((nonnull(1)));
-/*********************************************************************//**
-Does an insert for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_insert_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: row in the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Builds a dummy query graph used in selects. */
-UNIV_INTERN
-void
-row_prebuild_sel_graph(
-/*===================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
- handle */
-/*********************************************************************//**
-Gets pointer to a prebuilt update vector used in updates. If the update
-graph has not yet been built in the prebuilt struct, then this function
-first builds it.
-@return prebuilt update vector */
-UNIV_INTERN
-upd_t*
-row_get_prebuilt_update_vector(
-/*===========================*/
- row_prebuilt_t* prebuilt); /*!< in: prebuilt struct in MySQL
- handle */
-/*********************************************************************//**
-Checks if a table is such that we automatically created a clustered
-index on it (on row id).
-@return TRUE if the clustered index was generated automatically */
-UNIV_INTERN
-ibool
-row_table_got_default_clust_index(
-/*==============================*/
- const dict_table_t* table); /*!< in: table */
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_update_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: the row to be updated, in
- the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
-session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
-Before calling this function row_search_for_mysql() must have
-initialized prebuilt->new_rec_locks to store the information which new
-record locks really were set. This function removes a newly set
-clustered index record lock under prebuilt->pcur or
-prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that
-releases the latest clustered index record lock we set. */
-UNIV_INTERN
-void
-row_unlock_for_mysql(
-/*=================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL
- handle */
- ibool has_latches_on_recs)/*!< in: TRUE if called
- so that we have the latches on
- the records under pcur and
- clust_pcur, and we do not need
- to reposition the cursors. */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL.
-@return true if temporary table */
-UNIV_INTERN
-bool
-row_is_mysql_tmp_table_name(
-/*========================*/
- const char* name) MY_ATTRIBUTE((warn_unused_result));
- /*!< in: table name in the form
- 'database/tablename' */
-
-/*********************************************************************//**
-Creates an query graph node of 'update' type to be used in the MySQL
-interface.
-@return own: update node */
-UNIV_INTERN
-upd_node_t*
-row_create_update_node_for_mysql(
-/*=============================*/
- dict_table_t* table, /*!< in: table to update */
- mem_heap_t* heap); /*!< in: mem heap from which allocated */
-/**********************************************************************//**
-Does a cascaded delete or set null in a foreign key operation.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_update_cascade_for_mysql(
-/*=========================*/
- que_thr_t* thr, /*!< in: query thread */
- upd_node_t* node, /*!< in: update node used in the cascade
- or set null operation */
- dict_table_t* table) /*!< in: table where we do the operation */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Locks the data dictionary exclusively for performing a table create or other
-data dictionary modification operation. */
-UNIV_INTERN
-void
-row_mysql_lock_data_dictionary_func(
-/*================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- ulint line); /*!< in: line number */
-#define row_mysql_lock_data_dictionary(trx) \
- row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__)
-/*********************************************************************//**
-Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
-void
-row_mysql_unlock_data_dictionary(
-/*=============================*/
- trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
-Locks the data dictionary in shared mode from modifications, for performing
-foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
-void
-row_mysql_freeze_data_dictionary_func(
-/*==================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- ulint line); /*!< in: line number */
-#define row_mysql_freeze_data_dictionary(trx) \
- row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__)
-/*********************************************************************//**
-Unlocks the data dictionary shared lock. */
-UNIV_INTERN
-void
-row_mysql_unfreeze_data_dictionary(
-/*===============================*/
- trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). On failure the transaction will
-be rolled back.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_create_table_for_mysql(
-/*=======================*/
- dict_table_t* table, /*!< in, own: table definition
- (will be freed, or on DB_SUCCESS
- added to the data dictionary cache) */
- trx_t* trx, /*!< in/out: transaction */
- bool commit, /*!< in: if true, commit the transaction */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
- __attribute__((nonnull, warn_unused_result));
-/*********************************************************************//**
-Does an index creation operation for MySQL. TODO: currently failure
-to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table.
-@return error number or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_create_index_for_mysql(
-/*=======================*/
- dict_index_t* index, /*!< in, own: index definition
- (will be freed) */
- trx_t* trx, /*!< in: transaction handle */
- const ulint* field_lengths) /*!< in: if not NULL, must contain
- dict_index_get_n_fields(index)
- actual field lengths for the
- index columns, which are
- then checked for not being too
- large. */
- MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-bot participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_table_add_foreign_constraints(
-/*==============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the
- database name before it: test.table2 */
- size_t sql_length, /*!< in: length of sql_string */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-The master thread in srv0srv.cc calls this regularly to drop tables which
-we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix.
-@return how many tables dropped + remaining tables in list */
-UNIV_INTERN
-ulint
-row_drop_tables_for_mysql_in_background(void);
-/*=========================================*/
-/*********************************************************************//**
-Get the background drop list length. NOTE: the caller must own the kernel
-mutex!
-@return how many tables in list */
-UNIV_INTERN
-ulint
-row_get_background_drop_list_len_low(void);
-/*======================================*/
-/*********************************************************************//**
-Sets an exclusive lock on a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_mysql_lock_table(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */
- const char* op_info) /*!< in: string for trx->op_info */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*********************************************************************//**
-Truncates a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_truncate_table_for_mysql(
-/*=========================*/
- dict_table_t* table, /*!< in: table handle */
- trx_t* trx) /*!< in: transaction handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Drops a table for MySQL. If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. If the data dictionary was not already locked
-by the transaction, the transaction will be committed. Otherwise, the
-data dictionary will remain locked.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_drop_table_for_mysql(
-/*=====================*/
- const char* name, /*!< in: table name */
- trx_t* trx, /*!< in: dictionary transaction handle */
- bool drop_db,/*!< in: true=dropping whole database */
- ibool create_failed,/*!<in: TRUE=create table failed
- because e.g. foreign key column
- type mismatch. */
- bool nonatomic = true)
- /*!< in: whether it is permitted
- to release and reacquire dict_operation_lock */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Drop all temporary tables during crash recovery. */
-UNIV_INTERN
-void
-row_mysql_drop_temp_tables(void);
-/*============================*/
-
-/*********************************************************************//**
-Discards the tablespace of a table which stored in an .ibd file. Discarding
-means that this function deletes the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set TRUE.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_discard_tablespace_for_mysql(
-/*=============================*/
- const char* name, /*!< in: table name */
- trx_t* trx) /*!< in: transaction handle */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*****************************************************************//**
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_import_tablespace_for_mysql(
-/*============================*/
- dict_table_t* table, /*!< in/out: table */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Drops a database for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_drop_database_for_mysql(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx) /*!< in: transaction handle */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
-Renames a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_rename_table_for_mysql(
-/*=======================*/
- const char* old_name, /*!< in: old table name */
- const char* new_name, /*!< in: new table name */
- trx_t* trx, /*!< in/out: transaction */
- bool commit) /*!< in: whether to commit trx */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
-in the read view of the current transaction.
-@return true if ok */
-UNIV_INTERN
-bool
-row_check_index_for_mysql(
-/*======================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
- in MySQL handle */
- const dict_index_t* index, /*!< in: index */
- ulint* n_rows) /*!< out: number of entries
- seen in the consistent read */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Determines if a table is a magic monitor table.
-@return true if monitor table */
-UNIV_INTERN
-bool
-row_is_magic_monitor_table(
-/*=======================*/
- const char* table_name) /*!< in: name of the table, in the
- form database/table_name */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Initialize this module */
-UNIV_INTERN
-void
-row_mysql_init(void);
-/*================*/
-
-/*********************************************************************//**
-Close this module */
-UNIV_INTERN
-void
-row_mysql_close(void);
-/*=================*/
-
-/*********************************************************************//**
-Reassigns the table identifier of a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_mysql_table_id_reassign(
-/*========================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx, /*!< in/out: transaction */
- table_id_t* new_id) /*!< out: new table id */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/* A struct describing a place for an individual column in the MySQL
-row format which is presented to the table handler in ha_innobase.
-This template struct is used to speed up row transformations between
-Innobase and MySQL. */
-
-struct mysql_row_templ_t {
- ulint col_no; /*!< column number of the column */
- ulint rec_field_no; /*!< field number of the column in an
- Innobase record in the current index;
- not defined if template_type is
- ROW_MYSQL_WHOLE_ROW */
- bool rec_field_is_prefix; /* is this field in a prefix index? */
- ulint rec_prefix_field_no; /* record field, even if just a
- prefix; same as rec_field_no when not a
- prefix, otherwise rec_field_no is
- ULINT_UNDEFINED but this is the true
- field number*/
- ulint clust_rec_field_no; /*!< field number of the column in an
- Innobase record in the clustered index;
- not defined if template_type is
- ROW_MYSQL_WHOLE_ROW */
- ulint icp_rec_field_no; /*!< field number of the column in an
- Innobase record in the current index;
- not defined unless
- index condition pushdown is used */
- ulint mysql_col_offset; /*!< offset of the column in the MySQL
- row format */
- ulint mysql_col_len; /*!< length of the column in the MySQL
- row format */
- ulint mysql_null_byte_offset; /*!< MySQL NULL bit byte offset in a
- MySQL record */
- ulint mysql_null_bit_mask; /*!< bit mask to get the NULL bit,
- zero if column cannot be NULL */
- ulint type; /*!< column type in Innobase mtype
- numbers DATA_CHAR... */
- ulint mysql_type; /*!< MySQL type code; this is always
- < 256 */
- ulint mysql_length_bytes; /*!< if mysql_type
- == DATA_MYSQL_TRUE_VARCHAR, this tells
- whether we should use 1 or 2 bytes to
- store the MySQL true VARCHAR data
- length at the start of row in the MySQL
- format (NOTE that the MySQL key value
- format always uses 2 bytes for the data
- len) */
- ulint charset; /*!< MySQL charset-collation code
- of the column, or zero */
- ulint mbminlen; /*!< minimum length of a char, in bytes,
- or zero if not a char type */
- ulint mbmaxlen; /*!< maximum length of a char, in bytes,
- or zero if not a char type */
- ulint is_unsigned; /*!< if a column type is an integer
- type and this field is != 0, then
- it is an unsigned integer type */
-};
-
-#define MYSQL_FETCH_CACHE_SIZE 8
-/* After fetching this many rows, we start caching them in fetch_cache */
-#define MYSQL_FETCH_CACHE_THRESHOLD 4
-
-#define ROW_PREBUILT_ALLOCATED 78540783
-#define ROW_PREBUILT_FREED 26423527
-
-/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
-
-handle used within MySQL; these are used to save CPU time. */
-
-struct row_prebuilt_t {
- ulint magic_n; /*!< this magic number is set to
- ROW_PREBUILT_ALLOCATED when created,
- or ROW_PREBUILT_FREED when the
- struct has been freed */
- dict_table_t* table; /*!< Innobase table handle */
- dict_index_t* index; /*!< current index for a search, if
- any */
- trx_t* trx; /*!< current transaction handle */
- unsigned sql_stat_start:1;/*!< TRUE when we start processing of
- an SQL statement: we may have to set
- an intention lock on the table,
- create a consistent read view etc. */
- unsigned mysql_has_locked:1;/*!< this is set TRUE when MySQL
- calls external_lock on this handle
- with a lock flag, and set FALSE when
- with the F_UNLOCK flag */
- unsigned clust_index_was_generated:1;
- /*!< if the user did not define a
- primary key in MySQL, then Innobase
- automatically generated a clustered
- index where the ordering column is
- the row id: in this case this flag
- is set to TRUE */
- unsigned index_usable:1; /*!< caches the value of
- row_merge_is_index_usable(trx,index) */
- unsigned read_just_key:1;/*!< set to 1 when MySQL calls
- ha_innobase::extra with the
- argument HA_EXTRA_KEYREAD; it is enough
- to read just columns defined in
- the index (i.e., no read of the
- clustered index record necessary) */
- unsigned used_in_HANDLER:1;/*!< TRUE if we have been using this
- handle in a MySQL HANDLER low level
- index cursor command: then we must
- store the pcur position even in a
- unique search from a clustered index,
- because HANDLER allows NEXT and PREV
- in such a situation */
- unsigned template_type:2;/*!< ROW_MYSQL_WHOLE_ROW,
- ROW_MYSQL_REC_FIELDS,
- ROW_MYSQL_DUMMY_TEMPLATE, or
- ROW_MYSQL_NO_TEMPLATE */
- unsigned n_template:10; /*!< number of elements in the
- template */
- unsigned null_bitmap_len:10;/*!< number of bytes in the SQL NULL
- bitmap at the start of a row in the
- MySQL format */
- unsigned need_to_access_clustered:1; /*!< if we are fetching
- columns through a secondary index
- and at least one column is not in
- the secondary index, then this is
- set to TRUE; note that sometimes this
- is set but we later optimize out the
- clustered index lookup */
- unsigned templ_contains_blob:1;/*!< TRUE if the template contains
- a column with DATA_BLOB ==
- get_innobase_type_from_mysql_type();
- not to be confused with InnoDB
- externally stored columns
- (VARCHAR can be off-page too) */
- mysql_row_templ_t* mysql_template;/*!< template used to transform
- rows fast between MySQL and Innobase
- formats; memory for this template
- is not allocated from 'heap' */
- mem_heap_t* heap; /*!< memory heap from which
- these auxiliary structures are
- allocated when needed */
- ins_node_t* ins_node; /*!< Innobase SQL insert node
- used to perform inserts
- to the table */
- byte* ins_upd_rec_buff;/*!< buffer for storing data converted
- to the Innobase format from the MySQL
- format */
- const byte* default_rec; /*!< the default values of all columns
- (a "default row") in MySQL format */
- ulint hint_need_to_fetch_extra_cols;
- /*!< normally this is set to 0; if this
- is set to ROW_RETRIEVE_PRIMARY_KEY,
- then we should at least retrieve all
- columns in the primary key; if this
- is set to ROW_RETRIEVE_ALL_COLS, then
- we must retrieve all columns in the
- key (if read_just_key == 1), or all
- columns in the table */
- upd_node_t* upd_node; /*!< Innobase SQL update node used
- to perform updates and deletes */
- trx_id_t trx_id; /*!< The table->def_trx_id when
- ins_graph was built */
- que_fork_t* ins_graph; /*!< Innobase SQL query graph used
- in inserts. Will be rebuilt on
- trx_id or n_indexes mismatch. */
- que_fork_t* upd_graph; /*!< Innobase SQL query graph used
- in updates or deletes */
- btr_pcur_t pcur; /*!< persistent cursor used in selects
- and updates */
- btr_pcur_t clust_pcur; /*!< persistent cursor used in
- some selects and updates */
- que_fork_t* sel_graph; /*!< dummy query graph used in
- selects */
- dtuple_t* search_tuple; /*!< prebuilt dtuple used in selects */
- byte row_id[DATA_ROW_ID_LEN];
- /*!< if the clustered index was
- generated, the row id of the
- last row fetched is stored
- here */
- doc_id_t fts_doc_id; /* if the table has an FTS index on
- it then we fetch the doc_id.
- FTS-FIXME: Currently we fetch it always
- but in the future we must only fetch
- it when FTS columns are being
- updated */
- dtuple_t* clust_ref; /*!< prebuilt dtuple used in
- sel/upd/del */
- ulint select_lock_type;/*!< LOCK_NONE, LOCK_S, or LOCK_X */
- ulint stored_select_lock_type;/*!< this field is used to
- remember the original select_lock_type
- that was decided in ha_innodb.cc,
- ::store_lock(), ::external_lock(),
- etc. */
- ulint row_read_type; /*!< ROW_READ_WITH_LOCKS if row locks
- should be the obtained for records
- under an UPDATE or DELETE cursor.
- If innodb_locks_unsafe_for_binlog
- is TRUE, this can be set to
- ROW_READ_TRY_SEMI_CONSISTENT, so that
- if the row under an UPDATE or DELETE
- cursor was locked by another
- transaction, InnoDB will resort
- to reading the last committed value
- ('semi-consistent read'). Then,
- this field will be set to
- ROW_READ_DID_SEMI_CONSISTENT to
- indicate that. If the row does not
- match the WHERE condition, MySQL will
- invoke handler::unlock_row() to
- clear the flag back to
- ROW_READ_TRY_SEMI_CONSISTENT and
- to simply skip the row. If
- the row matches, the next call to
- row_search_for_mysql() will lock
- the row.
- This eliminates lock waits in some
- cases; note that this breaks
- serializability. */
- ulint new_rec_locks; /*!< normally 0; if
- srv_locks_unsafe_for_binlog is
- TRUE or session is using READ
- COMMITTED or READ UNCOMMITTED
- isolation level, set in
- row_search_for_mysql() if we set a new
- record lock on the secondary
- or clustered index; this is
- used in row_unlock_for_mysql()
- when releasing the lock under
- the cursor if we determine
- after retrieving the row that
- it does not need to be locked
- ('mini-rollback') */
- ulint mysql_prefix_len;/*!< byte offset of the end of
- the last requested column */
- ulint mysql_row_len; /*!< length in bytes of a row in the
- MySQL format */
- ulint n_rows_fetched; /*!< number of rows fetched after
- positioning the current cursor */
- ulint fetch_direction;/*!< ROW_SEL_NEXT or ROW_SEL_PREV */
- byte* fetch_cache[MYSQL_FETCH_CACHE_SIZE];
- /*!< a cache for fetched rows if we
- fetch many rows from the same cursor:
- it saves CPU time to fetch them in a
- batch; we reserve mysql_row_len
- bytes for each such row; these
- pointers point 4 bytes past the
- allocated mem buf start, because
- there is a 4 byte magic number at the
- start and at the end */
- ibool keep_other_fields_on_keyread; /*!< when using fetch
- cache with HA_EXTRA_KEYREAD, don't
- overwrite other fields in mysql row
- row buffer.*/
- ulint fetch_cache_first;/*!< position of the first not yet
- fetched row in fetch_cache */
- ulint n_fetch_cached; /*!< number of not yet fetched rows
- in fetch_cache */
- mem_heap_t* blob_heap; /*!< in SELECTS BLOB fields are copied
- to this heap */
- mem_heap_t* old_vers_heap; /*!< memory heap where a previous
- version is built in consistent read */
- bool in_fts_query; /*!< Whether we are in a FTS query */
- /*----------------------*/
- ulonglong autoinc_last_value;
- /*!< last value of AUTO-INC interval */
- ulonglong autoinc_increment;/*!< The increment step of the auto
- increment column. Value must be
- greater than or equal to 1. Required to
- calculate the next value */
- ulonglong autoinc_offset; /*!< The offset passed to
- get_auto_increment() by MySQL. Required
- to calculate the next value */
- dberr_t autoinc_error; /*!< The actual error code encountered
- while trying to init or read the
- autoinc value from the table. We
- store it here so that we can return
- it to MySQL */
- /*----------------------*/
- void* idx_cond; /*!< In ICP, pointer to a ha_innobase,
- passed to innobase_index_cond().
- NULL if index condition pushdown is
- not used. */
- ulint idx_cond_n_cols;/*!< Number of fields in idx_cond_cols.
- 0 if and only if idx_cond == NULL. */
- /*----------------------*/
- ulint magic_n2; /*!< this should be the same as
- magic_n */
- /*----------------------*/
- unsigned innodb_api:1; /*!< whether this is a InnoDB API
- query */
- const rec_t* innodb_api_rec; /*!< InnoDB API search result */
- byte* srch_key_val1; /*!< buffer used in converting
- search key values from MySQL format
- to InnoDB format.*/
- byte* srch_key_val2; /*!< buffer used in converting
- search key values from MySQL format
- to InnoDB format.*/
- uint srch_key_val_len; /*!< Size of search key */
-
-};
-
-/** Callback for row_mysql_sys_index_iterate() */
-struct SysIndexCallback {
- virtual ~SysIndexCallback() { }
-
- /** Callback method
- @param mtr - current mini transaction
- @param pcur - persistent cursor. */
- virtual void operator()(mtr_t* mtr, btr_pcur_t* pcur) throw() = 0;
-};
-
-#define ROW_PREBUILT_FETCH_MAGIC_N 465765687
-
-#define ROW_MYSQL_WHOLE_ROW 0
-#define ROW_MYSQL_REC_FIELDS 1
-#define ROW_MYSQL_NO_TEMPLATE 2
-#define ROW_MYSQL_DUMMY_TEMPLATE 3 /* dummy template used in
- row_scan_and_check_index */
-
-/* Values for hint_need_to_fetch_extra_cols */
-#define ROW_RETRIEVE_PRIMARY_KEY 1
-#define ROW_RETRIEVE_ALL_COLS 2
-
-/* Values for row_read_type */
-#define ROW_READ_WITH_LOCKS 0
-#define ROW_READ_TRY_SEMI_CONSISTENT 1
-#define ROW_READ_DID_SEMI_CONSISTENT 2
-
-#ifndef UNIV_NONINL
-#include "row0mysql.ic"
-#endif
-
-#endif /* row0mysql.h */
diff --git a/storage/xtradb/include/row0mysql.ic b/storage/xtradb/include/row0mysql.ic
deleted file mode 100644
index 2eb60898c46..00000000000
--- a/storage/xtradb/include/row0mysql.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2001, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0mysql.ic
-MySQL interface for Innobase
-
-Created 1/23/2001 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/row0purge.h b/storage/xtradb/include/row0purge.h
deleted file mode 100644
index 5df899bc399..00000000000
--- a/storage/xtradb/include/row0purge.h
+++ /dev/null
@@ -1,138 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0purge.h
-Purge obsolete records
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0purge_h
-#define row0purge_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "row0purge.h"
-#include "ut0vec.h"
-
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return own: purge node */
-UNIV_INTERN
-purge_node_t*
-row_purge_node_create(
-/*==================*/
- que_thr_t* parent, /*!< in: parent node, i.e., a
- thr node */
- mem_heap_t* heap) /*!< in: memory heap where created */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************//**
-Determines if it is possible to remove a secondary index entry.
-Removal is possible if the secondary index entry does not refer to any
-not delete marked version of a clustered index record where DB_TRX_ID
-is newer than the purge view.
-
-NOTE: This function should only be called by the purge thread, only
-while holding a latch on the leaf page of the secondary index entry
-(or keeping the buffer pool watch on the page). It is possible that
-this function first returns true and then false, if a user transaction
-inserts a record that the secondary index entry would refer to.
-However, in that case, the user transaction would also re-insert the
-secondary index entry after purge has removed it and released the leaf
-page latch.
-@return true if the secondary index record can be purged */
-UNIV_INTERN
-bool
-row_purge_poss_sec(
-/*===============*/
- purge_node_t* node, /*!< in/out: row purge node */
- dict_index_t* index, /*!< in: secondary index */
- const dtuple_t* entry) /*!< in: secondary index entry */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***************************************************************
-Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_purge_step(
-/*===========*/
- que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/* Purge node structure */
-
-struct purge_node_t{
- que_common_t common; /*!< node type: QUE_NODE_PURGE */
- /*----------------------*/
- /* Local storage for this graph node */
- roll_ptr_t roll_ptr;/* roll pointer to undo log record */
- ib_vector_t* undo_recs;/*!< Undo recs to purge */
-
- undo_no_t undo_no;/* undo number of the record */
-
- ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC,
- ... */
- dict_table_t* table; /*!< table where purge is done */
-
- ulint cmpl_info;/* compiler analysis info of an update */
-
- upd_t* update; /*!< update vector for a clustered index
- record */
- dtuple_t* ref; /*!< NULL, or row reference to the next row to
- handle */
- dtuple_t* row; /*!< NULL, or a copy (also fields copied to
- heap) of the indexed fields of the row to
- handle */
- dict_index_t* index; /*!< NULL, or the next index whose record should
- be handled */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
- row; this must be emptied after a successful
- purge of a row */
- ibool found_clust;/* TRUE if the clustered index record
- determined by ref was found in the clustered
- index, and we were able to position pcur on
- it */
- btr_pcur_t pcur; /*!< persistent cursor used in searching the
- clustered index record */
- ibool done; /* Debug flag */
-
-#ifdef UNIV_DEBUG
- /***********************************************************//**
- Validate the persisent cursor. The purge node has two references
- to the clustered index record - one via the ref member, and the
- other via the persistent cursor. These two references must match
- each other if the found_clust flag is set.
- @return true if the persistent cursor is consistent with
- the ref member.*/
- bool validate_pcur();
-#endif
-};
-
-#ifndef UNIV_NONINL
-#include "row0purge.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0purge.ic b/storage/xtradb/include/row0purge.ic
deleted file mode 100644
index 700106d1048..00000000000
--- a/storage/xtradb/include/row0purge.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-
-/**************************************************//**
-@file include/row0purge.ic
-Purge obsolete records
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/row0quiesce.h b/storage/xtradb/include/row0quiesce.h
deleted file mode 100644
index 35d8184d33c..00000000000
--- a/storage/xtradb/include/row0quiesce.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0quiesce.h
-
-Header file for tablespace quiesce functions.
-
-Created 2012-02-08 by Sunny Bains
-*******************************************************/
-
-#ifndef row0quiesce_h
-#define row0quiesce_h
-
-#include "univ.i"
-#include "dict0types.h"
-
-struct trx_t;
-
-/** The version number of the export meta-data text file. */
-#define IB_EXPORT_CFG_VERSION_V1 0x1UL
-
-/*********************************************************************//**
-Quiesce the tablespace that the table resides in. */
-UNIV_INTERN
-void
-row_quiesce_table_start(
-/*====================*/
- dict_table_t* table, /*!< in: quiesce this table */
- trx_t* trx) /*!< in/out: transaction/session */
- MY_ATTRIBUTE((nonnull));
-
-/*********************************************************************//**
-Set a table's quiesce state.
-@return DB_SUCCESS or errro code. */
-UNIV_INTERN
-dberr_t
-row_quiesce_set_state(
-/*==================*/
- dict_table_t* table, /*!< in: quiesce this table */
- ib_quiesce_t state, /*!< in: quiesce state to set */
- trx_t* trx) /*!< in/out: transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*********************************************************************//**
-Cleanup after table quiesce. */
-UNIV_INTERN
-void
-row_quiesce_table_complete(
-/*=======================*/
- dict_table_t* table, /*!< in: quiesce this table */
- trx_t* trx) /*!< in/out: transaction/session */
- MY_ATTRIBUTE((nonnull));
-
-#ifndef UNIV_NONINL
-#include "row0quiesce.ic"
-#endif
-
-#endif /* row0quiesce_h */
diff --git a/storage/xtradb/include/row0quiesce.ic b/storage/xtradb/include/row0quiesce.ic
deleted file mode 100644
index f570a6aed05..00000000000
--- a/storage/xtradb/include/row0quiesce.ic
+++ /dev/null
@@ -1,26 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0quiesce.ic
-
-Quiesce a tablespace.
-
-Created 2012-02-08 Sunny Bains
-*******************************************************/
-
diff --git a/storage/xtradb/include/row0row.h b/storage/xtradb/include/row0row.h
deleted file mode 100644
index b04068c5a5d..00000000000
--- a/storage/xtradb/include/row0row.h
+++ /dev/null
@@ -1,343 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0row.h
-General row routines
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0row_h
-#define row0row_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "mtr0mtr.h"
-#include "rem0types.h"
-#include "read0types.h"
-#include "row0types.h"
-#include "btr0types.h"
-
-/*********************************************************************//**
-Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
-a clustered index record.
-@return offset of DATA_TRX_ID */
-UNIV_INLINE
-ulint
-row_get_trx_id_offset(
-/*==================*/
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: record offsets */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Reads the trx id field from a clustered index record.
-@return value of the field */
-UNIV_INLINE
-trx_id_t
-row_get_rec_trx_id(
-/*===============*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Reads the roll pointer field from a clustered index record.
-@return value of the field */
-UNIV_INLINE
-roll_ptr_t
-row_get_rec_roll_ptr(
-/*=================*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*****************************************************************//**
-When an insert or purge to a table is performed, this function builds
-the entry to be inserted into or purged from an index on the table.
-@return index entry which should be inserted or purged
-@retval NULL if the externally stored columns in the clustered index record
-are unavailable and ext != NULL, or row is missing some needed columns. */
-UNIV_INTERN
-dtuple_t*
-row_build_index_entry_low(
-/*======================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- const row_ext_t* ext, /*!< in: externally stored column
- prefixes, or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory for the index entry
- is allocated */
- MY_ATTRIBUTE((warn_unused_result, nonnull(1,3,4)));
-/*****************************************************************//**
-When an insert or purge to a table is performed, this function builds
-the entry to be inserted into or purged from an index on the table.
-@return index entry which should be inserted or purged, or NULL if the
-externally stored columns in the clustered index record are
-unavailable and ext != NULL */
-UNIV_INLINE
-dtuple_t*
-row_build_index_entry(
-/*==================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- const row_ext_t* ext, /*!< in: externally stored column
- prefixes, or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory for the index entry
- is allocated */
- MY_ATTRIBUTE((warn_unused_result, nonnull(1,3,4)));
-/*******************************************************************//**
-An inverse function to row_build_index_entry. Builds a row from a
-record in a clustered index.
-@return own: row built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_build(
-/*======*/
- ulint type, /*!< in: ROW_COPY_POINTERS or
- ROW_COPY_DATA; the latter
- copies also the data fields to
- heap while the first only
- places pointers to data fields
- on the index page, and thus is
- more efficient */
- const dict_index_t* index, /*!< in: clustered index */
- const rec_t* rec, /*!< in: record in the clustered
- index; NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
- const dict_table_t* col_table,
- /*!< in: table, to check which
- externally stored columns
- occur in the ordering columns
- of an index, or NULL if
- index->table should be
- consulted instead; the user
- columns in this table should be
- the same columns as in index->table */
- const dtuple_t* add_cols,
- /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map,/*!< in: mapping of old column
- numbers to new ones, or NULL */
- row_ext_t** ext, /*!< out, own: cache of
- externally stored column
- prefixes, or NULL */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
- MY_ATTRIBUTE((nonnull(2,3,9)));
-/*******************************************************************//**
-Converts an index record to a typed data tuple.
-@return index entry built; does not set info_bits, and the data fields
-in the entry will point directly to rec */
-UNIV_INTERN
-dtuple_t*
-row_rec_to_index_entry_low(
-/*=======================*/
- const rec_t* rec, /*!< in: record in the index */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Converts an index record to a typed data tuple. NOTE that externally
-stored (often big) fields are NOT copied to heap.
-@return own: index entry built */
-UNIV_INTERN
-dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
- const rec_t* rec, /*!< in: record in the index */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in/out: rec_get_offsets(rec) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record.
-@return own: row reference built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_build_row_ref(
-/*==============*/
- ulint type, /*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap, whereas the latter only places pointers
- to data fields on the index page */
- dict_index_t* index, /*!< in: secondary index */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- mem_heap_t* heap) /*!< in: memory heap from which the memory
- needed is allocated */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INTERN
-void
-row_build_row_ref_in_tuple(
-/*=======================*/
- dtuple_t* ref, /*!< in/out: row reference built;
- see the NOTE below! */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: the data fields in ref
- will point directly into this
- record, therefore, the buffer
- page of this record must be at
- least s-latched and the latch
- held as long as the row
- reference is used! */
- const dict_index_t* index, /*!< in: secondary index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index)
- or NULL */
- trx_t* trx) /*!< in: transaction or NULL */
- MY_ATTRIBUTE((nonnull(1,2,3)));
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INLINE
-void
-row_build_row_ref_fast(
-/*===================*/
- dtuple_t* ref, /*!< in/out: typed data tuple where the
- reference is built */
- const ulint* map, /*!< in: array of field numbers in rec
- telling how ref should be built from
- the fields of rec */
- const rec_t* rec, /*!< in: record in the index; must be
- preserved while ref is used, as we do
- not copy field values to heap */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
-/***************************************************************//**
-Searches the clustered index record for a row, if we have the row
-reference.
-@return TRUE if found */
-UNIV_INTERN
-ibool
-row_search_on_row_ref(
-/*==================*/
- btr_pcur_t* pcur, /*!< out: persistent cursor, which must
- be closed by the caller */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- const dict_table_t* table, /*!< in: table */
- const dtuple_t* ref, /*!< in: row reference */
- mtr_t* mtr) /*!< in/out: mtr */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved.
-@return record or NULL, if no record found */
-UNIV_INTERN
-rec_t*
-row_get_clust_rec(
-/*==============*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: secondary index */
- dict_index_t** clust_index,/*!< out: clustered index */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/** Result of row_search_index_entry */
-enum row_search_result {
- ROW_FOUND = 0, /*!< the record was found */
- ROW_NOT_FOUND, /*!< record not found */
- ROW_BUFFERED, /*!< one of BTR_INSERT, BTR_DELETE, or
- BTR_DELETE_MARK was specified, the
- secondary index leaf page was not in
- the buffer pool, and the operation was
- enqueued in the insert/delete buffer */
- ROW_NOT_DELETED_REF /*!< BTR_DELETE was specified, and
- row_purge_poss_sec() failed */
-};
-
-/***************************************************************//**
-Searches an index record.
-@return whether the record was found or buffered */
-UNIV_INTERN
-enum row_search_result
-row_search_index_entry(
-/*===================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry, /*!< in: index entry */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
- be closed by the caller */
- mtr_t* mtr) /*!< in: mtr */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#define ROW_COPY_DATA 1
-#define ROW_COPY_POINTERS 2
-
-/* The allowed latching order of index records is the following:
-(1) a secondary index record ->
-(2) the clustered index record ->
-(3) rollback segment data for the clustered index record. */
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) using
-"dict_field" and writes the result to "buf".
-Not more than "buf_size" bytes are written to "buf".
-The result is always NUL-terminated (provided buf_size is positive) and the
-number of bytes that were written to "buf" is returned (including the
-terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
-ulint
-row_raw_format(
-/*===========*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- const dict_field_t* dict_field, /*!< in: index field */
- char* buf, /*!< out: output buffer */
- ulint buf_size) /*!< in: output buffer size
- in bytes */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#ifndef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0row.ic b/storage/xtradb/include/row0row.ic
deleted file mode 100644
index ac62422be1f..00000000000
--- a/storage/xtradb/include/row0row.ic
+++ /dev/null
@@ -1,174 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0row.ic
-General row routines
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "dict0dict.h"
-#include "rem0rec.h"
-#include "trx0undo.h"
-
-/*********************************************************************//**
-Gets the offset of the DB_TRX_ID field, in bytes relative to the origin of
-a clustered index record.
-@return offset of DATA_TRX_ID */
-UNIV_INLINE
-ulint
-row_get_trx_id_offset(
-/*==================*/
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: record offsets */
-{
- ulint pos;
- ulint offset;
- ulint len;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(NULL, index, offsets));
-
- pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
-
- offset = rec_get_nth_field_offs(offsets, pos, &len);
-
- ut_ad(len == DATA_TRX_ID_LEN);
-
- return(offset);
-}
-
-/*********************************************************************//**
-Reads the trx id field from a clustered index record.
-@return value of the field */
-UNIV_INLINE
-trx_id_t
-row_get_rec_trx_id(
-/*===============*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ulint offset;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (!offset) {
- offset = row_get_trx_id_offset(index, offsets);
- }
-
- return(trx_read_trx_id(rec + offset));
-}
-
-/*********************************************************************//**
-Reads the roll pointer field from a clustered index record.
-@return value of the field */
-UNIV_INLINE
-roll_ptr_t
-row_get_rec_roll_ptr(
-/*=================*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ulint offset;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- offset = index->trx_id_offset;
-
- if (!offset) {
- offset = row_get_trx_id_offset(index, offsets);
- }
-
- return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
-}
-
-/*****************************************************************//**
-When an insert or purge to a table is performed, this function builds
-the entry to be inserted into or purged from an index on the table.
-@return index entry which should be inserted or purged, or NULL if the
-externally stored columns in the clustered index record are
-unavailable and ext != NULL */
-UNIV_INLINE
-dtuple_t*
-row_build_index_entry(
-/*==================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- const row_ext_t* ext, /*!< in: externally stored column
- prefixes, or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory for the index entry
- is allocated */
-{
- dtuple_t* entry;
-
- ut_ad(dtuple_check_typed(row));
- entry = row_build_index_entry_low(row, ext, index, heap);
- ut_ad(!entry || dtuple_check_typed(entry));
- return(entry);
-}
-
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INLINE
-void
-row_build_row_ref_fast(
-/*===================*/
- dtuple_t* ref, /*!< in/out: typed data tuple where the
- reference is built */
- const ulint* map, /*!< in: array of field numbers in rec
- telling how ref should be built from
- the fields of rec */
- const rec_t* rec, /*!< in: record in the index; must be
- preserved while ref is used, as we do
- not copy field values to heap */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- dfield_t* dfield;
- const byte* field;
- ulint len;
- ulint ref_len;
- ulint field_no;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(!rec_offs_any_extern(offsets));
- ref_len = dtuple_get_n_fields(ref);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- field_no = *(map + i);
-
- if (field_no != ULINT_UNDEFINED) {
-
- field = rec_get_nth_field(rec, offsets,
- field_no, &len);
- dfield_set_data(dfield, field, len);
- }
- }
-}
diff --git a/storage/xtradb/include/row0sel.h b/storage/xtradb/include/row0sel.h
deleted file mode 100644
index fd5bc755a22..00000000000
--- a/storage/xtradb/include/row0sel.h
+++ /dev/null
@@ -1,409 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0sel.h
-Select
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0sel_h
-#define row0sel_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "que0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "row0types.h"
-#include "que0types.h"
-#include "pars0sym.h"
-#include "btr0pcur.h"
-#include "read0read.h"
-#include "row0mysql.h"
-
-/*********************************************************************//**
-Creates a select node struct.
-@return own: select node struct */
-UNIV_INTERN
-sel_node_t*
-sel_node_create(
-/*============*/
- mem_heap_t* heap); /*!< in: memory heap where created */
-/*********************************************************************//**
-Frees the memory private to a select node when a query graph is freed,
-does not free the heap where the node was originally created. */
-UNIV_INTERN
-void
-sel_node_free_private(
-/*==================*/
- sel_node_t* node); /*!< in: select node struct */
-/*********************************************************************//**
-Frees a prefetch buffer for a column, including the dynamically allocated
-memory for data stored there. */
-UNIV_INTERN
-void
-sel_col_prefetch_buf_free(
-/*======================*/
- sel_buf_t* prefetch_buf); /*!< in, own: prefetch buffer */
-/*********************************************************************//**
-Gets the plan node for the nth table in a join.
-@return plan node */
-UNIV_INLINE
-plan_t*
-sel_node_get_nth_plan(
-/*==================*/
- sel_node_t* node, /*!< in: select node */
- ulint i); /*!< in: get ith plan node */
-/**********************************************************************//**
-Performs a select step. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_sel_step(
-/*=========*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs an execution step of an open or close cursor statement node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-open_step(
-/*======*/
- que_thr_t* thr); /*!< in: query thread */
-/**********************************************************************//**
-Performs a fetch for a cursor.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-fetch_step(
-/*=======*/
- que_thr_t* thr); /*!< in: query thread */
-/****************************************************************//**
-Sample callback function for fetch that prints each row.
-@return always returns non-NULL */
-UNIV_INTERN
-void*
-row_fetch_print(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg); /*!< in: not used */
-/***********************************************************//**
-Prints a row in a select result.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_printf_step(
-/*============*/
- que_thr_t* thr); /*!< in: query thread */
-/****************************************************************//**
-Converts a key value stored in MySQL format to an Innobase dtuple. The last
-field of the key value may be just a prefix of a fixed length field: hence
-the parameter key_len. But currently we do not allow search keys where the
-last field is only a prefix of the full key field len and print a warning if
-such appears. */
-UNIV_INTERN
-void
-row_sel_convert_mysql_key_to_innobase(
-/*==================================*/
- dtuple_t* tuple, /*!< in/out: tuple where to build;
- NOTE: we assume that the type info
- in the tuple is already according
- to index! */
- byte* buf, /*!< in: buffer to use in field
- conversions; NOTE that dtuple->data
- may end up pointing inside buf so
- do not discard that buffer while
- the tuple is being used. See
- row_mysql_store_col_in_innobase_format()
- in the case of DATA_INT */
- ulint buf_len, /*!< in: buffer length */
- dict_index_t* index, /*!< in: index of the key value */
- const byte* key_ptr, /*!< in: MySQL key value */
- ulint key_len, /*!< in: MySQL key value length */
- trx_t* trx); /*!< in: transaction */
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor!
-@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, or DB_TOO_BIG_RECORD */
-UNIV_INTERN
-dberr_t
-row_search_for_mysql(
-/*=================*/
- byte* buf, /*!< in/out: buffer for the fetched
- row in the MySQL format */
- ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
- table handle; this contains the info
- of search_tuple, index; if search
- tuple contains 0 fields then we
- position the cursor at the start or
- the end of the index, depending on
- 'mode' */
- ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX */
- ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
- ROW_SEL_PREV; NOTE: if this is != 0,
- then prebuilt must have a pcur
- with stored position! In opening of a
- cursor 'direction' should be 0. */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache.
-@return TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
- trx_t* trx, /*!< in: transaction object */
- const char* norm_name); /*!< in: concatenation of database name,
- '/' char, table name */
-/*******************************************************************//**
-Read the max AUTOINC value from an index.
-@return DB_SUCCESS if all OK else error code */
-UNIV_INTERN
-dberr_t
-row_search_max_autoinc(
-/*===================*/
- dict_index_t* index, /*!< in: index to search */
- const char* col_name, /*!< in: autoinc column name */
- ib_uint64_t* value) /*!< out: AUTOINC value read */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/** A structure for caching column values for prefetched rows */
-struct sel_buf_t{
- byte* data; /*!< data, or NULL; if not NULL, this field
- has allocated memory which must be explicitly
- freed; can be != NULL even when len is
- UNIV_SQL_NULL */
- ulint len; /*!< data length or UNIV_SQL_NULL */
- ulint val_buf_size;
- /*!< size of memory buffer allocated for data:
- this can be more than len; this is defined
- when data != NULL */
-};
-
-/** Query plan */
-struct plan_t{
- dict_table_t* table; /*!< table struct in the dictionary
- cache */
- dict_index_t* index; /*!< table index used in the search */
- btr_pcur_t pcur; /*!< persistent cursor used to search
- the index */
- ibool asc; /*!< TRUE if cursor traveling upwards */
- ibool pcur_is_open; /*!< TRUE if pcur has been positioned
- and we can try to fetch new rows */
- ibool cursor_at_end; /*!< TRUE if the cursor is open but
- we know that there are no more
- qualifying rows left to retrieve from
- the index tree; NOTE though, that
- there may still be unprocessed rows in
- the prefetch stack; always FALSE when
- pcur_is_open is FALSE */
- ibool stored_cursor_rec_processed;
- /*!< TRUE if the pcur position has been
- stored and the record it is positioned
- on has already been processed */
- que_node_t** tuple_exps; /*!< array of expressions
- which are used to calculate
- the field values in the search
- tuple: there is one expression
- for each field in the search
- tuple */
- dtuple_t* tuple; /*!< search tuple */
- ulint mode; /*!< search mode: PAGE_CUR_G, ... */
- ulint n_exact_match; /*!< number of first fields in
- the search tuple which must be
- exactly matched */
- ibool unique_search; /*!< TRUE if we are searching an
- index record with a unique key */
- ulint n_rows_fetched; /*!< number of rows fetched using pcur
- after it was opened */
- ulint n_rows_prefetched;/*!< number of prefetched rows cached
- for fetch: fetching several rows in
- the same mtr saves CPU time */
- ulint first_prefetched;/*!< index of the first cached row in
- select buffer arrays for each column */
- ibool no_prefetch; /*!< no prefetch for this table */
- sym_node_list_t columns; /*!< symbol table nodes for the columns
- to retrieve from the table */
- UT_LIST_BASE_NODE_T(func_node_t)
- end_conds; /*!< conditions which determine the
- fetch limit of the index segment we
- have to look at: when one of these
- fails, the result set has been
- exhausted for the cursor in this
- index; these conditions are normalized
- so that in a comparison the column
- for this table is the first argument */
- UT_LIST_BASE_NODE_T(func_node_t)
- other_conds; /*!< the rest of search conditions we can
- test at this table in a join */
- ibool must_get_clust; /*!< TRUE if index is a non-clustered
- index and we must also fetch the
- clustered index record; this is the
- case if the non-clustered record does
- not contain all the needed columns, or
- if this is a single-table explicit
- cursor, or a searched update or
- delete */
- ulint* clust_map; /*!< map telling how clust_ref is built
- from the fields of a non-clustered
- record */
- dtuple_t* clust_ref; /*!< the reference to the clustered
- index entry is built here if index is
- a non-clustered index */
- btr_pcur_t clust_pcur; /*!< if index is non-clustered, we use
- this pcur to search the clustered
- index */
- mem_heap_t* old_vers_heap; /*!< memory heap used in building an old
- version of a row, or NULL */
-};
-
-/** Select node states */
-enum sel_node_state {
- SEL_NODE_CLOSED, /*!< it is a declared cursor which is not
- currently open */
- SEL_NODE_OPEN, /*!< intention locks not yet set on tables */
- SEL_NODE_FETCH, /*!< intention locks have been set */
- SEL_NODE_NO_MORE_ROWS /*!< cursor has reached the result set end */
-};
-
-/** Select statement node */
-struct sel_node_t{
- que_common_t common; /*!< node type: QUE_NODE_SELECT */
- enum sel_node_state
- state; /*!< node state */
- que_node_t* select_list; /*!< select list */
- sym_node_t* into_list; /*!< variables list or NULL */
- sym_node_t* table_list; /*!< table list */
- ibool asc; /*!< TRUE if the rows should be fetched
- in an ascending order */
- ibool set_x_locks; /*!< TRUE if the cursor is for update or
- delete, which means that a row x-lock
- should be placed on the cursor row */
- ulint row_lock_mode; /*!< LOCK_X or LOCK_S */
- ulint n_tables; /*!< number of tables */
- ulint fetch_table; /*!< number of the next table to access
- in the join */
- plan_t* plans; /*!< array of n_tables many plan nodes
- containing the search plan and the
- search data structures */
- que_node_t* search_cond; /*!< search condition */
- read_view_t* read_view; /*!< if the query is a non-locking
- consistent read, its read view is
- placed here, otherwise NULL */
- ibool consistent_read;/*!< TRUE if the select is a consistent,
- non-locking read */
- order_node_t* order_by; /*!< order by column definition, or
- NULL */
- ibool is_aggregate; /*!< TRUE if the select list consists of
- aggregate functions */
- ibool aggregate_already_fetched;
- /*!< TRUE if the aggregate row has
- already been fetched for the current
- cursor */
- ibool can_get_updated;/*!< this is TRUE if the select
- is in a single-table explicit
- cursor which can get updated
- within the stored procedure,
- or in a searched update or
- delete; NOTE that to determine
- of an explicit cursor if it
- can get updated, the parser
- checks from a stored procedure
- if it contains positioned
- update or delete statements */
- sym_node_t* explicit_cursor;/*!< not NULL if an explicit cursor */
- UT_LIST_BASE_NODE_T(sym_node_t)
- copy_variables; /*!< variables whose values we have to
- copy when an explicit cursor is opened,
- so that they do not change between
- fetches */
-};
-
-/** Fetch statement node */
-struct fetch_node_t{
- que_common_t common; /*!< type: QUE_NODE_FETCH */
- sel_node_t* cursor_def; /*!< cursor definition */
- sym_node_t* into_list; /*!< variables to set */
-
- pars_user_func_t*
- func; /*!< User callback function or NULL.
- The first argument to the function
- is a sel_node_t*, containing the
- results of the SELECT operation for
- one row. If the function returns
- NULL, it is not interested in
- further rows and the cursor is
- modified so (cursor % NOTFOUND) is
- true. If it returns not-NULL,
- continue normally. See
- row_fetch_print() for an example
- (and a useful debugging tool). */
-};
-
-/** Open or close cursor operation type */
-enum open_node_op {
- ROW_SEL_OPEN_CURSOR, /*!< open cursor */
- ROW_SEL_CLOSE_CURSOR /*!< close cursor */
-};
-
-/** Open or close cursor statement node */
-struct open_node_t{
- que_common_t common; /*!< type: QUE_NODE_OPEN */
- enum open_node_op
- op_type; /*!< operation type: open or
- close cursor */
- sel_node_t* cursor_def; /*!< cursor definition */
-};
-
-/** Row printf statement node */
-struct row_printf_node_t{
- que_common_t common; /*!< type: QUE_NODE_ROW_PRINTF */
- sel_node_t* sel_node; /*!< select */
-};
-
-/** Search direction for the MySQL interface */
-enum row_sel_direction {
- ROW_SEL_NEXT = 1, /*!< ascending direction */
- ROW_SEL_PREV = 2 /*!< descending direction */
-};
-
-/** Match mode for the MySQL interface */
-enum row_sel_match_mode {
- ROW_SEL_EXACT = 1, /*!< search using a complete key value */
- ROW_SEL_EXACT_PREFIX /*!< search using a key prefix which
- must match rows: the prefix may
- contain an incomplete field (the last
- field in prefix may be just a prefix
- of a fixed length column) */
-};
-
-#ifndef UNIV_NONINL
-#include "row0sel.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0sel.ic b/storage/xtradb/include/row0sel.ic
deleted file mode 100644
index d83a3448832..00000000000
--- a/storage/xtradb/include/row0sel.ic
+++ /dev/null
@@ -1,105 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0sel.ic
-Select
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-
-/*********************************************************************//**
-Gets the plan node for the nth table in a join.
-@return plan node */
-UNIV_INLINE
-plan_t*
-sel_node_get_nth_plan(
-/*==================*/
- sel_node_t* node, /*!< in: select node */
- ulint i) /*!< in: get ith plan node */
-{
- ut_ad(i < node->n_tables);
-
- return(node->plans + i);
-}
-
-/*********************************************************************//**
-Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means
-that it will start fetching from the start of the result set again, regardless
-of where it was before, and it will set intention locks on the tables. */
-UNIV_INLINE
-void
-sel_node_reset_cursor(
-/*==================*/
- sel_node_t* node) /*!< in: select node */
-{
- node->state = SEL_NODE_OPEN;
-}
-
-/**********************************************************************//**
-Performs an execution step of an open or close cursor statement node.
-@return query thread to run next or NULL */
-UNIV_INLINE
-que_thr_t*
-open_step(
-/*======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- sel_node_t* sel_node;
- open_node_t* node;
- ulint err;
-
- ut_ad(thr);
-
- node = (open_node_t*) thr->run_node;
- ut_ad(que_node_get_type(node) == QUE_NODE_OPEN);
-
- sel_node = node->cursor_def;
-
- err = DB_SUCCESS;
-
- if (node->op_type == ROW_SEL_OPEN_CURSOR) {
-
- /* if (sel_node->state == SEL_NODE_CLOSED) { */
-
- sel_node_reset_cursor(sel_node);
- /* } else {
- err = DB_ERROR;
- } */
- } else {
- if (sel_node->state != SEL_NODE_CLOSED) {
-
- sel_node->state = SEL_NODE_CLOSED;
- } else {
- err = DB_ERROR;
- }
- }
-
- if (err != DB_SUCCESS) {
- /* SQL error detected */
- fprintf(stderr, "SQL error %lu\n", (ulong) err);
-
- ut_error;
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
-}
diff --git a/storage/xtradb/include/row0types.h b/storage/xtradb/include/row0types.h
deleted file mode 100644
index 52c89cb01fa..00000000000
--- a/storage/xtradb/include/row0types.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0types.h
-Row operation global types
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0types_h
-#define row0types_h
-
-struct plan_t;
-
-struct upd_t;
-struct upd_field_t;
-struct upd_node_t;
-struct del_node_t;
-struct ins_node_t;
-struct sel_node_t;
-struct open_node_t;
-struct fetch_node_t;
-
-struct row_printf_node_t;
-struct sel_buf_t;
-
-struct undo_node_t;
-
-struct purge_node_t;
-
-struct row_ext_t;
-
-/** Buffer for logging modifications during online index creation */
-struct row_log_t;
-
-/* MySQL data types */
-struct TABLE;
-
-#endif
diff --git a/storage/xtradb/include/row0uins.h b/storage/xtradb/include/row0uins.h
deleted file mode 100644
index 89e334e5433..00000000000
--- a/storage/xtradb/include/row0uins.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0uins.h
-Fresh insert undo
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0uins_h
-#define row0uins_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/***********************************************************//**
-Undoes a fresh insert of a row to a table. A fresh insert means that
-the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. InnoDB is eager in a rollback:
-if it figures out that an index record will be removed in the purge
-anyway, it will remove it in the rollback.
-@return DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_undo_ins(
-/*=========*/
- undo_node_t* node) /*!< in: row undo node */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifndef UNIV_NONINL
-#include "row0uins.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0uins.ic b/storage/xtradb/include/row0uins.ic
deleted file mode 100644
index 54da2e49874..00000000000
--- a/storage/xtradb/include/row0uins.ic
+++ /dev/null
@@ -1,25 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0uins.ic
-Fresh insert undo
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
diff --git a/storage/xtradb/include/row0umod.h b/storage/xtradb/include/row0umod.h
deleted file mode 100644
index 4f1d8e1f66c..00000000000
--- a/storage/xtradb/include/row0umod.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0umod.h
-Undo modify of a row
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0umod_h
-#define row0umod_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-
-/***********************************************************//**
-Undoes a modify operation on a row of a table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_undo_mod(
-/*=========*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-#ifndef UNIV_NONINL
-#include "row0umod.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0umod.ic b/storage/xtradb/include/row0umod.ic
deleted file mode 100644
index 00a8cd86e01..00000000000
--- a/storage/xtradb/include/row0umod.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0umod.ic
-Undo modify of a row
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/row0undo.h b/storage/xtradb/include/row0undo.h
deleted file mode 100644
index 5dddfb4eae1..00000000000
--- a/storage/xtradb/include/row0undo.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0undo.h
-Row undo
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0undo_h
-#define row0undo_h
-
-#include "univ.i"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "btr0types.h"
-#include "btr0pcur.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "row0types.h"
-
-/********************************************************************//**
-Creates a row undo node to a query graph.
-@return own: undo node */
-UNIV_INTERN
-undo_node_t*
-row_undo_node_create(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
- mem_heap_t* heap); /*!< in: memory heap where created */
-/***********************************************************//**
-Looks for the clustered index record when node has the row reference.
-The pcur in node is used in the search. If found, stores the row to node,
-and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
-caller, regardless of the return value */
-UNIV_INTERN
-ibool
-row_undo_search_clust_to_pcur(
-/*==========================*/
- undo_node_t* node); /*!< in: row undo node */
-/***********************************************************//**
-Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_undo_step(
-/*==========*/
- que_thr_t* thr); /*!< in: query thread */
-
-/* A single query thread will try to perform the undo for all successive
-versions of a clustered index record, if the transaction has modified it
-several times during the execution which is rolled back. It may happen
-that the task is transferred to another query thread, if the other thread
-is assigned to handle an undo log record in the chain of different versions
-of the record, and the other thread happens to get the x-latch to the
-clustered index record at the right time.
- If a query thread notices that the clustered index record it is looking
-for is missing, or the roll ptr field in the record doed not point to the
-undo log record the thread was assigned to handle, then it gives up the undo
-task for that undo log record, and fetches the next. This situation can occur
-just in the case where the transaction modified the same record several times
-and another thread is currently doing the undo for successive versions of
-that index record. */
-
-/** Execution state of an undo node */
-enum undo_exec {
- UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next
- undo log record */
- UNDO_NODE_INSERT, /*!< undo a fresh insert of a
- row to a table */
- UNDO_NODE_MODIFY /*!< undo a modify operation
- (DELETE or UPDATE) on a row
- of a table */
-};
-
-/** Undo node structure */
-struct undo_node_t{
- que_common_t common; /*!< node type: QUE_NODE_UNDO */
- enum undo_exec state; /*!< node execution state */
- trx_t* trx; /*!< trx for which undo is done */
- roll_ptr_t roll_ptr;/*!< roll pointer to undo log record */
- trx_undo_rec_t* undo_rec;/*!< undo log record */
- undo_no_t undo_no;/*!< undo number of the record */
- ulint rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
- ... */
- trx_id_t new_trx_id; /*!< trx id to restore to clustered index
- record */
- btr_pcur_t pcur; /*!< persistent cursor used in searching the
- clustered index record */
- dict_table_t* table; /*!< table where undo is done */
- ulint cmpl_info;/*!< compiler analysis of an update */
- upd_t* update; /*!< update vector for a clustered index
- record */
- dtuple_t* ref; /*!< row reference to the next row to handle */
- dtuple_t* row; /*!< a copy (also fields copied to heap) of the
- row to handle */
- row_ext_t* ext; /*!< NULL, or prefixes of the externally
- stored columns of the row */
- dtuple_t* undo_row;/*!< NULL, or the row after undo */
- row_ext_t* undo_ext;/*!< NULL, or prefixes of the externally
- stored columns of undo_row */
- dict_index_t* index; /*!< the next index whose record should be
- handled */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage for
- row; this must be emptied after undo is tried
- on a row */
-};
-
-
-#ifndef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0undo.ic b/storage/xtradb/include/row0undo.ic
deleted file mode 100644
index b97ffca590e..00000000000
--- a/storage/xtradb/include/row0undo.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0undo.ic
-Row undo
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/row0upd.h b/storage/xtradb/include/row0upd.h
deleted file mode 100644
index 4312fcf7339..00000000000
--- a/storage/xtradb/include/row0upd.h
+++ /dev/null
@@ -1,539 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0upd.h
-Update of a row
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0upd_h
-#define row0upd_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "row0types.h"
-#include "btr0types.h"
-#include "dict0types.h"
-#include "trx0types.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "btr0pcur.h"
-# include "que0types.h"
-# include "pars0types.h"
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Creates an update vector object.
-@return own: update vector object */
-UNIV_INLINE
-upd_t*
-upd_create(
-/*=======*/
- ulint n, /*!< in: number of fields */
- mem_heap_t* heap); /*!< in: heap from which memory allocated */
-/*********************************************************************//**
-Returns the number of fields in the update vector == number of columns
-to be updated by an update vector.
-@return number of fields */
-UNIV_INLINE
-ulint
-upd_get_n_fields(
-/*=============*/
- const upd_t* update); /*!< in: update vector */
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Returns the nth field of an update vector.
-@return update vector field */
-UNIV_INLINE
-upd_field_t*
-upd_get_nth_field(
-/*==============*/
- const upd_t* update, /*!< in: update vector */
- ulint n); /*!< in: field position in update vector */
-#else
-# define upd_get_nth_field(update, n) ((update)->fields + (n))
-#endif
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Sets an index field number to be updated by an update vector field. */
-UNIV_INLINE
-void
-upd_field_set_field_no(
-/*===================*/
- upd_field_t* upd_field, /*!< in: update vector field */
- ulint field_no, /*!< in: field number in a clustered
- index */
- dict_index_t* index, /*!< in: index */
- trx_t* trx); /*!< in: transaction */
-/*********************************************************************//**
-Returns a field of an update vector by field_no.
-@return update vector field, or NULL */
-UNIV_INLINE
-const upd_field_t*
-upd_get_field_by_field_no(
-/*======================*/
- const upd_t* update, /*!< in: update vector */
- ulint no) /*!< in: field_no */
- MY_ATTRIBUTE((nonnull, pure));
-/*********************************************************************//**
-Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record.
-@return new pointer to mlog */
-UNIV_INTERN
-byte*
-row_upd_write_sys_vals_to_log(
-/*==========================*/
- dict_index_t* index, /*!< in: clustered index */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
- byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
- in mlog */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************************//**
-Updates the trx id and roll ptr field in a clustered index record when
-a row is updated or marked deleted. */
-UNIV_INLINE
-void
-row_upd_rec_sys_fields(
-/*===================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record,
- can be 0 during IMPORT */
-/*********************************************************************//**
-Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
-void
-row_upd_index_entry_sys_field(
-/*==========================*/
- dtuple_t* entry, /*!< in/out: index entry, where the memory
- buffers for sys fields are already allocated:
- the function just copies the new values to
- them */
- dict_index_t* index, /*!< in: clustered index */
- ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
- ib_uint64_t val); /*!< in: value to write */
-/*********************************************************************//**
-Creates an update node for a query graph.
-@return own: update node */
-UNIV_INTERN
-upd_node_t*
-upd_node_create(
-/*============*/
- mem_heap_t* heap); /*!< in: mem heap where created */
-/***********************************************************//**
-Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
-void
-row_upd_index_write_log(
-/*====================*/
- const upd_t* update, /*!< in: update vector */
- byte* log_ptr,/*!< in: pointer to mlog buffer: must
- contain at least MLOG_BUF_MARGIN bytes
- of free space; the buffer is closed
- within this function */
- mtr_t* mtr); /*!< in: mtr into whose log to write */
-/***********************************************************//**
-Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update.
-@return TRUE if the update changes the size of some field in index or
-the field is external in rec or update */
-UNIV_INTERN
-ibool
-row_upd_changes_field_size_or_external(
-/*===================================*/
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const upd_t* update);/*!< in: update vector */
-/***********************************************************//**
-Returns true if row update contains disowned external fields.
-@return true if the update contains disowned external fields. */
-UNIV_INTERN
-bool
-row_upd_changes_disowned_external(
-/*==============================*/
- const upd_t* update) /*!< in: update vector */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the
-record given. No field size changes are allowed. This function is
-usually invoked on a clustered index. The only use case for a
-secondary index is row_ins_sec_index_entry_by_modify() or its
-counterpart in ibuf_insert_to_index_page(). */
-UNIV_INTERN
-void
-row_upd_rec_in_place(
-/*=================*/
- rec_t* rec, /*!< in/out: record where replaced */
- dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- const upd_t* update, /*!< in: update vector */
- page_zip_des_t* page_zip);/*!< in: compressed page with enough space
- available, or NULL */
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Builds an update vector from those fields which in a secondary index entry
-differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings!
-@return own: update vector of differing fields */
-UNIV_INTERN
-upd_t*
-row_upd_build_sec_rec_difference_binary(
-/*====================================*/
- const rec_t* rec, /*!< in: secondary index record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const dtuple_t* entry, /*!< in: entry to insert */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
- MY_ATTRIBUTE((warn_unused_result, nonnull));
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
-trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings!
-@return own: update vector of differing fields, excluding roll ptr and
-trx id */
-UNIV_INTERN
-const upd_t*
-row_upd_build_difference_binary(
-/*============================*/
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: clustered index record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
- bool no_sys, /*!< in: skip the system columns
- DB_TRX_ID and DB_ROLL_PTR */
- trx_t* trx, /*!< in: transaction (for diagnostics),
- or NULL */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
- MY_ATTRIBUTE((nonnull(1,2,3,7), warn_unused_result));
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
-UNIV_INTERN
-void
-row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the index so
- that the field number in an upd_field is the
- index position */
- ibool order_only,
- /*!< in: if TRUE, limit the replacement to
- ordering fields of index; note that this
- does not work for non-clustered indexes. */
- mem_heap_t* heap); /*!< in: memory heap for allocating and
- copying the new values */
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
-UNIV_INTERN
-void
-row_upd_index_replace_new_col_vals(
-/*===============================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the
- CLUSTERED index so that the field number in
- an upd_field is the clustered index position */
- mem_heap_t* heap) /*!< in: memory heap for allocating and
- copying the new values */
- MY_ATTRIBUTE((nonnull));
-/***********************************************************//**
-Replaces the new column values stored in the update vector. */
-UNIV_INTERN
-void
-row_upd_replace(
-/*============*/
- dtuple_t* row, /*!< in/out: row where replaced,
- indexed by col_no;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- row_ext_t** ext, /*!< out, own: NULL, or externally
- stored column prefixes */
- const dict_index_t* index, /*!< in: clustered index */
- const upd_t* update, /*!< in: an update vector built for the
- clustered index */
- mem_heap_t* heap); /*!< in: memory heap */
-/***********************************************************//**
-Checks if an update vector changes an ordering field of an index record.
-
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings!
-@return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
-ibool
-row_upd_changes_ord_field_binary_func(
-/*==================================*/
- dict_index_t* index, /*!< in: index of the record */
- const upd_t* update, /*!< in: update vector for the row; NOTE: the
- field numbers in this MUST be clustered index
- positions! */
-#ifdef UNIV_DEBUG
- const que_thr_t*thr, /*!< in: query thread */
-#endif /* UNIV_DEBUG */
- const dtuple_t* row, /*!< in: old value of row, or NULL if the
- row and the data values in update are not
- known when this function is called, e.g., at
- compile time */
- const row_ext_t*ext) /*!< NULL, or prefixes of the externally
- stored columns in the old row */
- MY_ATTRIBUTE((warn_unused_result));
-#ifdef UNIV_DEBUG
-# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \
- row_upd_changes_ord_field_binary_func(index,update,thr,row,ext)
-#else /* UNIV_DEBUG */
-# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \
- row_upd_changes_ord_field_binary_func(index,update,row,ext)
-#endif /* UNIV_DEBUG */
-/***********************************************************//**
-Checks if an FTS indexed column is affected by an UPDATE.
-@return offset within fts_t::indexes if FTS indexed column updated else
-ULINT_UNDEFINED */
-UNIV_INTERN
-ulint
-row_upd_changes_fts_column(
-/*=======================*/
- dict_table_t* table, /*!< in: table */
- upd_field_t* upd_field); /*!< in: field to check */
-/***********************************************************//**
-Checks if an FTS Doc ID column is affected by an UPDATE.
-@return whether Doc ID column is affected */
-UNIV_INTERN
-bool
-row_upd_changes_doc_id(
-/*===================*/
- dict_table_t* table, /*!< in: table */
- upd_field_t* upd_field) /*!< in: field to check */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************//**
-Checks if an update vector changes an ordering field of an index record.
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings!
-@return TRUE if update vector may change an ordering field in an index
-record */
-UNIV_INTERN
-ibool
-row_upd_changes_some_index_ord_field_binary(
-/*========================================*/
- const dict_table_t* table, /*!< in: table */
- const upd_t* update);/*!< in: update vector for the row */
-/***********************************************************//**
-Updates a row in a table. This is a high-level function used
-in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_upd_step(
-/*=========*/
- que_thr_t* thr); /*!< in: query thread */
-#endif /* !UNIV_HOTBACKUP */
-/*********************************************************************//**
-Parses the log data of system field values.
-@return log data end or NULL */
-UNIV_INTERN
-byte*
-row_upd_parse_sys_vals(
-/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint* pos, /*!< out: TRX_ID position in record */
- trx_id_t* trx_id, /*!< out: trx id */
- roll_ptr_t* roll_ptr);/*!< out: roll ptr */
-/*********************************************************************//**
-Updates the trx id and roll ptr field in a clustered index record in database
-recovery. */
-UNIV_INTERN
-void
-row_upd_rec_sys_fields_in_recovery(
-/*===============================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint pos, /*!< in: TRX_ID position in rec */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr);/*!< in: roll ptr of the undo log record */
-/*********************************************************************//**
-Parses the log data written by row_upd_index_write_log.
-@return log data end or NULL */
-UNIV_INTERN
-byte*
-row_upd_index_parse(
-/*================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- mem_heap_t* heap, /*!< in: memory heap where update vector is
- built */
- upd_t** update_out);/*!< out: update vector */
-
-
-/* Update vector field */
-struct upd_field_t{
- unsigned field_no:16; /*!< field number in an index, usually
- the clustered index, but in updating
- a secondary index record in btr0cur.cc
- this is the position in the secondary
- index */
-#ifndef UNIV_HOTBACKUP
- unsigned orig_len:16; /*!< original length of the locally
- stored part of an externally stored
- column, or 0 */
- que_node_t* exp; /*!< expression for calculating a new
- value: it refers to column values and
- constants in the symbol table of the
- query graph */
-#endif /* !UNIV_HOTBACKUP */
- dfield_t new_val; /*!< new value for the column */
-};
-
-/* Update vector structure */
-struct upd_t{
- ulint info_bits; /*!< new value of info bits to record;
- default is 0 */
- ulint n_fields; /*!< number of update fields */
- upd_field_t* fields; /*!< array of update fields */
-};
-
-#ifndef UNIV_HOTBACKUP
-/* Update node structure which also implements the delete operation
-of a row */
-
-struct upd_node_t{
- que_common_t common; /*!< node type: QUE_NODE_UPDATE */
- ibool is_delete;/* TRUE if delete, FALSE if update */
- ibool searched_update;
- /* TRUE if searched update, FALSE if
- positioned */
- ibool in_mysql_interface;
- /* TRUE if the update node was created
- for the MySQL interface */
- dict_foreign_t* foreign;/* NULL or pointer to a foreign key
- constraint if this update node is used in
- doing an ON DELETE or ON UPDATE operation */
- upd_node_t* cascade_node;/* NULL or an update node template which
- is used to implement ON DELETE/UPDATE CASCADE
- or ... SET NULL for foreign keys */
- mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade
- node is created */
- sel_node_t* select; /*!< query graph subtree implementing a base
- table cursor: the rows returned will be
- updated */
- btr_pcur_t* pcur; /*!< persistent cursor placed on the clustered
- index record which should be updated or
- deleted; the cursor is stored in the graph
- of 'select' field above, except in the case
- of the MySQL interface */
- dict_table_t* table; /*!< table where updated */
- upd_t* update; /*!< update vector for the row */
- ulint update_n_fields;
- /* when this struct is used to implement
- a cascade operation for foreign keys, we store
- here the size of the buffer allocated for use
- as the update vector */
- sym_node_list_t columns;/* symbol table nodes for the columns
- to retrieve from the table */
- ibool has_clust_rec_x_lock;
- /* TRUE if the select which retrieves the
- records to update already sets an x-lock on
- the clustered record; note that it must always
- set at least an s-lock */
- ulint cmpl_info;/* information extracted during query
- compilation; speeds up execution:
- UPD_NODE_NO_ORD_CHANGE and
- UPD_NODE_NO_SIZE_CHANGE, ORed */
- /*----------------------*/
- /* Local storage for this graph node */
- ulint state; /*!< node execution state */
- dict_index_t* index; /*!< NULL, or the next index whose record should
- be updated */
- dtuple_t* row; /*!< NULL, or a copy (also fields copied to
- heap) of the row to update; this must be reset
- to NULL after a successful update */
- row_ext_t* ext; /*!< NULL, or prefixes of the externally
- stored columns in the old row */
- dtuple_t* upd_row;/* NULL, or a copy of the updated row */
- row_ext_t* upd_ext;/* NULL, or prefixes of the externally
- stored columns in upd_row */
- mem_heap_t* heap; /*!< memory heap used as auxiliary storage;
- this must be emptied after a successful
- update */
- /*----------------------*/
- sym_node_t* table_sym;/* table node in symbol table */
- que_node_t* col_assign_list;
- /* column assignment list */
- ulint magic_n;
-};
-
-#define UPD_NODE_MAGIC_N 1579975
-
-/* Node execution states */
-#define UPD_NODE_SET_IX_LOCK 1 /* execution came to the node from
- a node above and if the field
- has_clust_rec_x_lock is FALSE, we
- should set an intention x-lock on
- the table */
-#define UPD_NODE_UPDATE_CLUSTERED 2 /* clustered index record should be
- updated */
-#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be
- inserted, old record is already delete
- marked */
-#define UPD_NODE_INSERT_BLOB 4 /* clustered index record should be
- inserted, old record is already
- delete-marked; non-updated BLOBs
- should be inherited by the new record
- and disowned by the old record */
-#define UPD_NODE_UPDATE_ALL_SEC 5 /* an ordering field of the clustered
- index record was changed, or this is
- a delete operation: should update
- all the secondary index records */
-#define UPD_NODE_UPDATE_SOME_SEC 6 /* secondary index entries should be
- looked at and updated if an ordering
- field changed */
-
-/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
-#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be
- changed in the update and no ordering
- field of the clustered index */
-#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be
- changed in the update */
-
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "row0upd.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0upd.ic b/storage/xtradb/include/row0upd.ic
deleted file mode 100644
index 618a77fa4bf..00000000000
--- a/storage/xtradb/include/row0upd.ic
+++ /dev/null
@@ -1,188 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0upd.ic
-Update of a row
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0log.h"
-#ifndef UNIV_HOTBACKUP
-# include "trx0trx.h"
-# include "trx0undo.h"
-# include "row0row.h"
-# include "lock0lock.h"
-#endif /* !UNIV_HOTBACKUP */
-#include "page0zip.h"
-
-/*********************************************************************//**
-Creates an update vector object.
-@return own: update vector object */
-UNIV_INLINE
-upd_t*
-upd_create(
-/*=======*/
- ulint n, /*!< in: number of fields */
- mem_heap_t* heap) /*!< in: heap from which memory allocated */
-{
- upd_t* update;
-
- update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t));
-
- update->n_fields = n;
- update->fields = (upd_field_t*)
- mem_heap_zalloc(heap, sizeof(upd_field_t) * n);
-
- return(update);
-}
-
-/*********************************************************************//**
-Returns the number of fields in the update vector == number of columns
-to be updated by an update vector.
-@return number of fields */
-UNIV_INLINE
-ulint
-upd_get_n_fields(
-/*=============*/
- const upd_t* update) /*!< in: update vector */
-{
- ut_ad(update);
-
- return(update->n_fields);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Returns the nth field of an update vector.
-@return update vector field */
-UNIV_INLINE
-upd_field_t*
-upd_get_nth_field(
-/*==============*/
- const upd_t* update, /*!< in: update vector */
- ulint n) /*!< in: field position in update vector */
-{
- ut_ad(update);
- ut_ad(n < update->n_fields);
-
- return((upd_field_t*) update->fields + n);
-}
-#endif /* UNIV_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Sets an index field number to be updated by an update vector field. */
-UNIV_INLINE
-void
-upd_field_set_field_no(
-/*===================*/
- upd_field_t* upd_field, /*!< in: update vector field */
- ulint field_no, /*!< in: field number in a clustered
- index */
- dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction */
-{
- upd_field->field_no = field_no;
- upd_field->orig_len = 0;
-
- if (field_no >= dict_index_get_n_fields(index)) {
- fprintf(stderr,
- "InnoDB: Error: trying to access field %lu in ",
- (ulong) field_no);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, "\n"
- "InnoDB: but index only has %lu fields\n",
- (ulong) dict_index_get_n_fields(index));
- ut_ad(0);
- }
-
- dict_col_copy_type(dict_index_get_nth_col(index, field_no),
- dfield_get_type(&upd_field->new_val));
-}
-
-/*********************************************************************//**
-Returns a field of an update vector by field_no.
-@return update vector field, or NULL */
-UNIV_INLINE
-const upd_field_t*
-upd_get_field_by_field_no(
-/*======================*/
- const upd_t* update, /*!< in: update vector */
- ulint no) /*!< in: field_no */
-{
- ulint i;
- for (i = 0; i < upd_get_n_fields(update); i++) {
- const upd_field_t* uf = upd_get_nth_field(update, i);
-
- if (uf->field_no == no) {
-
- return(uf);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Updates the trx id and roll ptr field in a clustered index record when
-a row is updated or marked deleted. */
-UNIV_INLINE
-void
-row_upd_rec_sys_fields(
-/*===================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be updated, or NULL */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const trx_t* trx, /*!< in: transaction */
- roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record,
- can be 0 during IMPORT */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (page_zip) {
- ulint pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
- page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets,
- pos, trx->id, roll_ptr);
- } else {
- ulint offset = index->trx_id_offset;
-
- if (!offset) {
- offset = row_get_trx_id_offset(index, offsets);
- }
-
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
- /* During IMPORT the trx id in the record can be in the
- future, if the .ibd file is being imported from another
- instance. During IMPORT roll_ptr will be 0. */
- ut_ad(roll_ptr == 0
- || lock_check_trx_id_sanity(
- trx_read_trx_id(rec + offset),
- rec, index, offsets));
-
- trx_write_trx_id(rec + offset, trx->id);
- trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/row0vers.h b/storage/xtradb/include/row0vers.h
deleted file mode 100644
index 7b850215701..00000000000
--- a/storage/xtradb/include/row0vers.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0vers.h
-Row versions
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef row0vers_h
-#define row0vers_h
-
-#include "univ.i"
-#include "data0data.h"
-#include "dict0types.h"
-#include "trx0types.h"
-#include "que0types.h"
-#include "rem0types.h"
-#include "mtr0mtr.h"
-#include "read0types.h"
-
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
-index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
-UNIV_INTERN
-trx_id_t
-row_vers_impl_x_locked(
-/*===================*/
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
-/*****************************************************************//**
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view.
-@return TRUE if earlier version should be preserved */
-UNIV_INTERN
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
- trx_id_t trx_id, /*!< in: transaction id in the version */
- mtr_t* mtr); /*!< in: mtr holding the latch on the
- clustered index record; it will also
- hold the latch on purge_view */
-/*****************************************************************//**
-Finds out if a version of the record, where the version >= the current
-purge view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry == ientry; exactly in
-this case we return TRUE.
-@return TRUE if earlier version should have */
-UNIV_INTERN
-ibool
-row_vers_old_has_index_entry(
-/*=========================*/
- ibool also_curr,/*!< in: TRUE if also rec is included in the
- versions to search; otherwise only versions
- prior to it are searched */
- const rec_t* rec, /*!< in: record in the clustered index; the
- caller must have a latch on the page */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /*!< in: the secondary index */
- const dtuple_t* ientry);/*!< in: the secondary index entry */
-/*****************************************************************//**
-Constructs the version of a clustered index record which a consistent
-read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
-dberr_t
-row_vers_build_for_consistent_read(
-/*===============================*/
- const rec_t* rec, /*!< in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
- rec_get_offsets(rec, index) */
- read_view_t* view, /*!< in: the consistent read view */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
- *old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- rec_t** old_vers)/*!< out, own: old version, or NULL
- if the history is missing or the record
- does not exist in the view, that is,
- it was freshly inserted afterwards */
- MY_ATTRIBUTE((nonnull(1,2,3,4,5,6,7)));
-
-/*****************************************************************//**
-Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read. */
-UNIV_INTERN
-void
-row_vers_build_for_semi_consistent_read(
-/*====================================*/
- const rec_t* rec, /*!< in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec */
- dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
- rec_get_offsets(rec, index) */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
- *old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
- MY_ATTRIBUTE((nonnull(1,2,3,4,5)));
-
-
-#ifndef UNIV_NONINL
-#include "row0vers.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/row0vers.ic b/storage/xtradb/include/row0vers.ic
deleted file mode 100644
index ef43a55bf70..00000000000
--- a/storage/xtradb/include/row0vers.ic
+++ /dev/null
@@ -1,30 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/row0vers.ic
-Row versions
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-#include "dict0dict.h"
-#include "read0read.h"
-#include "page0page.h"
-#include "log0recv.h"
diff --git a/storage/xtradb/include/srv0conc.h b/storage/xtradb/include/srv0conc.h
deleted file mode 100644
index cf61ef5528d..00000000000
--- a/storage/xtradb/include/srv0conc.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file srv/srv0conc.h
-
-InnoDB concurrency manager header file
-
-Created 2011/04/18 Sunny Bains
-*******************************************************/
-
-#ifndef srv_conc_h
-#define srv_conc_h
-
-/** We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
-extern ulint srv_max_n_threads;
-
-/** The following controls how many threads we let inside InnoDB concurrently:
-threads waiting for locks are not counted into the number because otherwise
-we could get a deadlock. Value of 0 will disable the concurrency check. */
-
-extern ulong srv_thread_concurrency;
-
-/*********************************************************************//**
-Initialise the concurrency management data structures */
-void
-srv_conc_init(void);
-/*===============*/
-
-/*********************************************************************//**
-Free the concurrency management data structures */
-void
-srv_conc_free(void);
-/*===============*/
-
-/*********************************************************************//**
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx); /*!< in: transaction object associated
- with the thread */
-
-/*********************************************************************//**
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
-void
-srv_conc_force_enter_innodb(
-/*========================*/
- trx_t* trx); /*!< in: transaction object associated with
- the thread */
-
-/*********************************************************************//**
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-UNIV_INTERN
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx); /*!< in: transaction object associated with
- the thread */
-
-/*********************************************************************//**
-Get the count of threads waiting inside InnoDB. */
-UNIV_INTERN
-ulint
-srv_conc_get_waiting_threads(void);
-/*==============================*/
-
-/*********************************************************************//**
-Get the count of threads active inside InnoDB. */
-UNIV_INTERN
-ulint
-srv_conc_get_active_threads(void);
-/*==============================*/
-
-#endif /* srv_conc_h */
diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h
deleted file mode 100644
index 63fd449ee18..00000000000
--- a/storage/xtradb/include/srv0mon.h
+++ /dev/null
@@ -1,961 +0,0 @@
-/***********************************************************************
-
-Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-***********************************************************************/
-
-/**************************************************//**
-@file include/srv0mon.h
-Server monitor counter related defines
-
-Created 12/15/2009 Jimmy Yang
-*******************************************************/
-
-#ifndef srv0mon_h
-#define srv0mon_h
-
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-
-
-/** Possible status values for "mon_status" in "struct monitor_value" */
-enum monitor_running_status {
- MONITOR_STARTED = 1, /*!< Monitor has been turned on */
- MONITOR_STOPPED = 2 /*!< Monitor has been turned off */
-};
-
-typedef enum monitor_running_status monitor_running_t;
-
-/** Monitor counter value type */
-typedef ib_int64_t mon_type_t;
-
-/** Two monitor structures are defined in this file. One is
-"monitor_value_t" which contains dynamic counter values for each
-counter. The other is "monitor_info_t", which contains
-static information (counter name, desc etc.) for each counter.
-In addition, an enum datatype "monitor_id_t" is also defined,
-it identifies each monitor with an internally used symbol, whose
-integer value indexes into above two structure for its dynamic
-and static information.
-Developer who intend to add new counters would require to
-fill in counter information as described in "monitor_info_t" and
-create the internal counter ID in "monitor_id_t". */
-
-/** Structure containing the actual values of a monitor counter. */
-struct monitor_value_t {
- ib_time_t mon_start_time; /*!< Start time of monitoring */
- ib_time_t mon_stop_time; /*!< Stop time of monitoring */
- ib_time_t mon_reset_time; /*!< Time counter resetted */
- mon_type_t mon_value; /*!< Current counter Value */
- mon_type_t mon_max_value; /*!< Current Max value */
- mon_type_t mon_min_value; /*!< Current Min value */
- mon_type_t mon_value_reset;/*!< value at last reset */
- mon_type_t mon_max_value_start; /*!< Max value since start */
- mon_type_t mon_min_value_start; /*!< Min value since start */
- mon_type_t mon_start_value;/*!< Value at the start time */
- mon_type_t mon_last_value; /*!< Last set of values */
- monitor_running_t mon_status; /* whether monitor still running */
-};
-
-/** Follwoing defines are possible values for "monitor_type" field in
-"struct monitor_info" */
-enum monitor_type_t {
- MONITOR_NONE = 0, /*!< No monitoring */
- MONITOR_MODULE = 1, /*!< This is a monitor module type,
- not a counter */
- MONITOR_EXISTING = 2, /*!< The monitor carries information from
- an existing system status variable */
- MONITOR_NO_AVERAGE = 4, /*!< Set this status if we don't want to
- calculate the average value for the counter */
- MONITOR_DISPLAY_CURRENT = 8, /*!< Display current value of the
- counter, rather than incremental value
- over the period. Mostly for counters
- displaying current resource usage */
- MONITOR_GROUP_MODULE = 16, /*!< Monitor can be turned on/off
- only as a module, but not individually */
- MONITOR_DEFAULT_ON = 32,/*!< Monitor will be turned on by default at
- server start up */
- MONITOR_SET_OWNER = 64, /*!< Owner of "monitor set", a set of
- monitor counters */
- MONITOR_SET_MEMBER = 128,/*!< Being part of a "monitor set" */
- MONITOR_HIDDEN = 256 /*!< Do not display this monitor in the
- metrics table */
-};
-
-/** Counter minimum value is initialized to be max value of
- mon_type_t (ib_int64_t) */
-#define MIN_RESERVED ((mon_type_t) (IB_UINT64_MAX >> 1))
-#define MAX_RESERVED (~MIN_RESERVED)
-
-/** This enumeration defines internal monitor identifier used internally
-to identify each particular counter. Its value indexes into two arrays,
-one is the "innodb_counter_value" array which records actual monitor
-counter values, the other is "innodb_counter_info" array which describes
-each counter's basic information (name, desc etc.). A couple of
-naming rules here:
-1) If the monitor defines a module, it starts with MONITOR_MODULE
-2) If the monitor uses exisitng counters from "status variable", its ID
-name shall start with MONITOR_OVLD
-
-Please refer to "innodb_counter_info" in srv/srv0mon.cc for detail
-information for each monitor counter */
-
-enum monitor_id_t {
- /* This is to identify the default value set by the metrics
- control global variables */
- MONITOR_DEFAULT_START = 0,
-
- /* Start of Metadata counter */
- MONITOR_MODULE_METADATA,
- MONITOR_TABLE_OPEN,
- MONITOR_TABLE_CLOSE,
- MONITOR_TABLE_REFERENCE,
- MONITOR_OVLD_META_MEM_POOL,
-
- /* Lock manager related counters */
- MONITOR_MODULE_LOCK,
- MONITOR_DEADLOCK,
- MONITOR_TIMEOUT,
- MONITOR_LOCKREC_WAIT,
- MONITOR_TABLELOCK_WAIT,
- MONITOR_NUM_RECLOCK_REQ,
- MONITOR_RECLOCK_CREATED,
- MONITOR_RECLOCK_REMOVED,
- MONITOR_NUM_RECLOCK,
- MONITOR_TABLELOCK_CREATED,
- MONITOR_TABLELOCK_REMOVED,
- MONITOR_NUM_TABLELOCK,
- MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT,
- MONITOR_OVLD_LOCK_WAIT_TIME,
- MONITOR_OVLD_LOCK_MAX_WAIT_TIME,
- MONITOR_OVLD_ROW_LOCK_WAIT,
- MONITOR_OVLD_LOCK_AVG_WAIT_TIME,
-
- /* Buffer and I/O realted counters. */
- MONITOR_MODULE_BUFFER,
- MONITOR_OVLD_BUFFER_POOL_SIZE,
- MONITOR_OVLD_BUF_POOL_READS,
- MONITOR_OVLD_BUF_POOL_READ_REQUESTS,
- MONITOR_OVLD_BUF_POOL_WRITE_REQUEST,
- MONITOR_OVLD_BUF_POOL_WAIT_FREE,
- MONITOR_OVLD_BUF_POOL_READ_AHEAD,
- MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED,
- MONITOR_OVLD_BUF_POOL_PAGE_TOTAL,
- MONITOR_OVLD_BUF_POOL_PAGE_MISC,
- MONITOR_OVLD_BUF_POOL_PAGES_DATA,
- MONITOR_OVLD_BUF_POOL_BYTES_DATA,
- MONITOR_OVLD_BUF_POOL_PAGES_DIRTY,
- MONITOR_OVLD_BUF_POOL_BYTES_DIRTY,
- MONITOR_OVLD_BUF_POOL_PAGES_FREE,
- MONITOR_OVLD_PAGE_CREATED,
- MONITOR_OVLD_PAGES_WRITTEN,
- MONITOR_OVLD_INDEX_PAGES_WRITTEN,
- MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN,
- MONITOR_OVLD_PAGES_READ,
- MONITOR_OVLD_PAGES0_READ,
- MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS,
- MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED,
- MONITOR_OVLD_BYTE_READ,
- MONITOR_OVLD_BYTE_WRITTEN,
- MONITOR_FLUSH_BATCH_SCANNED,
- MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
- MONITOR_FLUSH_BATCH_SCANNED_PER_CALL,
- MONITOR_FLUSH_HP_RESCAN,
- MONITOR_FLUSH_BATCH_TOTAL_PAGE,
- MONITOR_FLUSH_BATCH_COUNT,
- MONITOR_FLUSH_BATCH_PAGES,
- MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
- MONITOR_FLUSH_NEIGHBOR_COUNT,
- MONITOR_FLUSH_NEIGHBOR_PAGES,
- MONITOR_FLUSH_N_TO_FLUSH_REQUESTED,
- MONITOR_FLUSH_AVG_PAGE_RATE,
- MONITOR_FLUSH_LSN_AVG_RATE,
- MONITOR_FLUSH_PCT_FOR_DIRTY,
- MONITOR_FLUSH_PCT_FOR_LSN,
- MONITOR_FLUSH_SYNC_WAITS,
- MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
- MONITOR_FLUSH_ADAPTIVE_COUNT,
- MONITOR_FLUSH_ADAPTIVE_PAGES,
- MONITOR_FLUSH_SYNC_TOTAL_PAGE,
- MONITOR_FLUSH_SYNC_COUNT,
- MONITOR_FLUSH_SYNC_PAGES,
- MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
- MONITOR_FLUSH_BACKGROUND_COUNT,
- MONITOR_FLUSH_BACKGROUND_PAGES,
- MONITOR_LRU_BATCH_SCANNED,
- MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
- MONITOR_LRU_BATCH_SCANNED_PER_CALL,
- MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
- MONITOR_LRU_BATCH_FLUSH_COUNT,
- MONITOR_LRU_BATCH_FLUSH_PAGES,
- MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_BATCH_EVICT_COUNT,
- MONITOR_LRU_BATCH_EVICT_PAGES,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL,
- MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT,
- MONITOR_LRU_GET_FREE_SEARCH,
- MONITOR_LRU_SEARCH_SCANNED,
- MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
- MONITOR_LRU_SEARCH_SCANNED_PER_CALL,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL,
-
- /* Buffer Page I/O specific counters. */
- MONITOR_MODULE_BUF_PAGE,
- MONITOR_INDEX_LEAF_PAGE_READ,
- MONITOR_INDEX_NON_LEAF_PAGE_READ,
- MONITOR_INDEX_IBUF_LEAF_PAGE_READ,
- MONITOR_INDEX_IBUF_NON_LEAF_PAGE_READ,
- MONITOR_UNDO_LOG_PAGE_READ,
- MONITOR_INODE_PAGE_READ,
- MONITOR_IBUF_FREELIST_PAGE_READ,
- MONITOR_IBUF_BITMAP_PAGE_READ,
- MONITOR_SYSTEM_PAGE_READ,
- MONITOR_TRX_SYSTEM_PAGE_READ,
- MONITOR_FSP_HDR_PAGE_READ,
- MONITOR_XDES_PAGE_READ,
- MONITOR_BLOB_PAGE_READ,
- MONITOR_ZBLOB_PAGE_READ,
- MONITOR_ZBLOB2_PAGE_READ,
- MONITOR_OTHER_PAGE_READ,
- MONITOR_INDEX_LEAF_PAGE_WRITTEN,
- MONITOR_INDEX_NON_LEAF_PAGE_WRITTEN,
- MONITOR_INDEX_IBUF_LEAF_PAGE_WRITTEN,
- MONITOR_INDEX_IBUF_NON_LEAF_PAGE_WRITTEN,
- MONITOR_UNDO_LOG_PAGE_WRITTEN,
- MONITOR_INODE_PAGE_WRITTEN,
- MONITOR_IBUF_FREELIST_PAGE_WRITTEN,
- MONITOR_IBUF_BITMAP_PAGE_WRITTEN,
- MONITOR_SYSTEM_PAGE_WRITTEN,
- MONITOR_TRX_SYSTEM_PAGE_WRITTEN,
- MONITOR_FSP_HDR_PAGE_WRITTEN,
- MONITOR_XDES_PAGE_WRITTEN,
- MONITOR_BLOB_PAGE_WRITTEN,
- MONITOR_ZBLOB_PAGE_WRITTEN,
- MONITOR_ZBLOB2_PAGE_WRITTEN,
- MONITOR_OTHER_PAGE_WRITTEN,
-
- /* OS level counters (I/O) */
- MONITOR_MODULE_OS,
- MONITOR_OVLD_OS_FILE_READ,
- MONITOR_OVLD_OS_FILE_WRITE,
- MONITOR_OVLD_OS_FSYNC,
- MONITOR_OS_PENDING_READS,
- MONITOR_OS_PENDING_WRITES,
- MONITOR_OVLD_OS_LOG_WRITTEN,
- MONITOR_OVLD_OS_LOG_FSYNC,
- MONITOR_OVLD_OS_LOG_PENDING_FSYNC,
- MONITOR_OVLD_OS_LOG_PENDING_WRITES,
-
- /* Transaction related counters */
- MONITOR_MODULE_TRX,
- MONITOR_TRX_RW_COMMIT,
- MONITOR_TRX_RO_COMMIT,
- MONITOR_TRX_NL_RO_COMMIT,
- MONITOR_TRX_COMMIT_UNDO,
- MONITOR_TRX_ROLLBACK,
- MONITOR_TRX_ROLLBACK_SAVEPOINT,
- MONITOR_TRX_ROLLBACK_ACTIVE,
- MONITOR_TRX_ACTIVE,
- MONITOR_RSEG_HISTORY_LEN,
- MONITOR_NUM_UNDO_SLOT_USED,
- MONITOR_NUM_UNDO_SLOT_CACHED,
- MONITOR_RSEG_CUR_SIZE,
-
- /* Purge related counters */
- MONITOR_MODULE_PURGE,
- MONITOR_N_DEL_ROW_PURGE,
- MONITOR_N_UPD_EXIST_EXTERN,
- MONITOR_PURGE_INVOKED,
- MONITOR_PURGE_N_PAGE_HANDLED,
- MONITOR_DML_PURGE_DELAY,
- MONITOR_PURGE_STOP_COUNT,
- MONITOR_PURGE_RESUME_COUNT,
-
- /* Recovery related counters */
- MONITOR_MODULE_RECOVERY,
- MONITOR_NUM_CHECKPOINT,
- MONITOR_OVLD_LSN_FLUSHDISK,
- MONITOR_OVLD_LSN_CHECKPOINT,
- MONITOR_OVLD_LSN_CURRENT,
- MONITOR_LSN_CHECKPOINT_AGE,
- MONITOR_OVLD_BUF_OLDEST_LSN,
- MONITOR_OVLD_MAX_AGE_ASYNC,
- MONITOR_OVLD_MAX_AGE_SYNC,
- MONITOR_PENDING_LOG_WRITE,
- MONITOR_PENDING_CHECKPOINT_WRITE,
- MONITOR_LOG_IO,
- MONITOR_OVLD_LOG_WAITS,
- MONITOR_OVLD_LOG_WRITE_REQUEST,
- MONITOR_OVLD_LOG_WRITES,
-
- /* Page Manager related counters */
- MONITOR_MODULE_PAGE,
- MONITOR_PAGE_COMPRESS,
- MONITOR_PAGE_DECOMPRESS,
- MONITOR_PAD_INCREMENTS,
- MONITOR_PAD_DECREMENTS,
-
- /* New monitor variables for page compression */
- MONITOR_OVLD_PAGE_COMPRESS_SAVED,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384,
- MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768,
- MONITOR_OVLD_PAGES_PAGE_COMPRESSED,
- MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
- MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
- MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
- MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,
-
- /* New monitor variables for page encryption */
- MONITOR_OVLD_PAGES_ENCRYPTED,
- MONITOR_OVLD_PAGES_DECRYPTED,
-
- /* Index related counters */
- MONITOR_MODULE_INDEX,
- MONITOR_INDEX_SPLIT,
- MONITOR_INDEX_MERGE_ATTEMPTS,
- MONITOR_INDEX_MERGE_SUCCESSFUL,
- MONITOR_INDEX_REORG_ATTEMPTS,
- MONITOR_INDEX_REORG_SUCCESSFUL,
- MONITOR_INDEX_DISCARD,
-
- /* Adaptive Hash Index related counters */
- MONITOR_MODULE_ADAPTIVE_HASH,
- MONITOR_OVLD_ADAPTIVE_HASH_SEARCH,
- MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE,
- MONITOR_ADAPTIVE_HASH_PAGE_ADDED,
- MONITOR_ADAPTIVE_HASH_PAGE_REMOVED,
- MONITOR_ADAPTIVE_HASH_ROW_ADDED,
- MONITOR_ADAPTIVE_HASH_ROW_REMOVED,
- MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND,
- MONITOR_ADAPTIVE_HASH_ROW_UPDATED,
-
- /* Tablespace related counters */
- MONITOR_MODULE_FIL_SYSTEM,
- MONITOR_OVLD_N_FILE_OPENED,
-
- /* InnoDB Change Buffer related counters */
- MONITOR_MODULE_IBUF_SYSTEM,
- MONITOR_OVLD_IBUF_MERGE_INSERT,
- MONITOR_OVLD_IBUF_MERGE_DELETE,
- MONITOR_OVLD_IBUF_MERGE_PURGE,
- MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT,
- MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE,
- MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE,
- MONITOR_OVLD_IBUF_MERGES,
- MONITOR_OVLD_IBUF_SIZE,
-
- /* Counters for server operations */
- MONITOR_MODULE_SERVER,
- MONITOR_MASTER_THREAD_SLEEP,
- MONITOR_OVLD_SERVER_ACTIVITY,
- MONITOR_MASTER_ACTIVE_LOOPS,
- MONITOR_MASTER_IDLE_LOOPS,
- MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
- MONITOR_SRV_IBUF_MERGE_MICROSECOND,
- MONITOR_SRV_LOG_FLUSH_MICROSECOND,
- MONITOR_SRV_MEM_VALIDATE_MICROSECOND,
- MONITOR_SRV_PURGE_MICROSECOND,
- MONITOR_SRV_DICT_LRU_MICROSECOND,
- MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE,
- MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE,
- MONITOR_SRV_CHECKPOINT_MICROSECOND,
- MONITOR_OVLD_SRV_DBLWR_WRITES,
- MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN,
- MONITOR_OVLD_SRV_PAGE_SIZE,
- MONITOR_OVLD_RWLOCK_S_SPIN_WAITS,
- MONITOR_OVLD_RWLOCK_X_SPIN_WAITS,
- MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS,
- MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS,
- MONITOR_OVLD_RWLOCK_S_OS_WAITS,
- MONITOR_OVLD_RWLOCK_X_OS_WAITS,
-
- /* Data DML related counters */
- MONITOR_MODULE_DML_STATS,
- MONITOR_OLVD_ROW_READ,
- MONITOR_OLVD_ROW_INSERTED,
- MONITOR_OLVD_ROW_DELETED,
- MONITOR_OLVD_ROW_UPDTATED,
- MONITOR_OLVD_SYSTEM_ROW_READ,
- MONITOR_OLVD_SYSTEM_ROW_INSERTED,
- MONITOR_OLVD_SYSTEM_ROW_DELETED,
- MONITOR_OLVD_SYSTEM_ROW_UPDATED,
-
- /* Data DDL related counters */
- MONITOR_MODULE_DDL_STATS,
- MONITOR_BACKGROUND_DROP_INDEX,
- MONITOR_BACKGROUND_DROP_TABLE,
- MONITOR_ONLINE_CREATE_INDEX,
- MONITOR_PENDING_ALTER_TABLE,
-
- MONITOR_MODULE_ICP,
- MONITOR_ICP_ATTEMPTS,
- MONITOR_ICP_NO_MATCH,
- MONITOR_ICP_OUT_OF_RANGE,
- MONITOR_ICP_MATCH,
-
- /* This is used only for control system to turn
- on/off and reset all monitor counters */
- MONITOR_ALL_COUNTER,
-
- /* This must be the last member */
- NUM_MONITOR
-};
-
-/** This informs the monitor control system to turn
-on/off and reset monitor counters through wild card match */
-#define MONITOR_WILDCARD_MATCH (NUM_MONITOR + 1)
-
-/** Cannot find monitor counter with a specified name */
-#define MONITOR_NO_MATCH (NUM_MONITOR + 2)
-
-/** struct monitor_info describes the basic/static information
-about each monitor counter. */
-struct monitor_info_t {
- const char* monitor_name; /*!< Monitor name */
- const char* monitor_module; /*!< Sub Module the monitor
- belongs to */
- const char* monitor_desc; /*!< Brief desc of monitor counter */
- monitor_type_t monitor_type; /*!< Type of Monitor Info */
- monitor_id_t monitor_related_id;/*!< Monitor ID of counter that
- related to this monitor. This is
- set when the monitor belongs to
- a "monitor set" */
- monitor_id_t monitor_id; /*!< Monitor ID as defined in enum
- monitor_id_t */
-};
-
-/** Following are the "set_option" values allowed for
-srv_mon_process_existing_counter() and srv_mon_process_existing_counter()
-functions. To turn on/off/reset the monitor counters. */
-enum mon_option_t {
- MONITOR_TURN_ON = 1, /*!< Turn on the counter */
- MONITOR_TURN_OFF, /*!< Turn off the counter */
- MONITOR_RESET_VALUE, /*!< Reset current values */
- MONITOR_RESET_ALL_VALUE, /*!< Reset all values */
- MONITOR_GET_VALUE /*!< Option for
- srv_mon_process_existing_counter()
- function */
-};
-
-/** Number of bit in a ulint datatype */
-#define NUM_BITS_ULINT (sizeof(ulint) * CHAR_BIT)
-
-/** This "monitor_set_tbl" is a bitmap records whether a particular monitor
-counter has been turned on or off */
-extern ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) /
- NUM_BITS_ULINT];
-
-/** Macros to turn on/off the control bit in monitor_set_tbl for a monitor
-counter option. */
-#define MONITOR_ON(monitor) \
- (monitor_set_tbl[monitor / NUM_BITS_ULINT] |= \
- ((ulint)1 << (monitor % NUM_BITS_ULINT)))
-
-#define MONITOR_OFF(monitor) \
- (monitor_set_tbl[monitor / NUM_BITS_ULINT] &= \
- ~((ulint)1 << (monitor % NUM_BITS_ULINT)))
-
-/** Check whether the requested monitor is turned on/off */
-#define MONITOR_IS_ON(monitor) \
- (monitor_set_tbl[monitor / NUM_BITS_ULINT] & \
- ((ulint)1 << (monitor % NUM_BITS_ULINT)))
-
-/** The actual monitor counter array that records each monintor counter
-value */
-extern monitor_value_t innodb_counter_value[NUM_MONITOR];
-
-/** Following are macro defines for basic montior counter manipulations.
-Please note we do not provide any synchronization for these monitor
-operations due to performance consideration. Most counters can
-be placed under existing mutex protections in respective code
-module. */
-
-/** Macros to access various fields of a monitor counters */
-#define MONITOR_FIELD(monitor, field) \
- (innodb_counter_value[monitor].field)
-
-#define MONITOR_VALUE(monitor) \
- MONITOR_FIELD(monitor, mon_value)
-
-#define MONITOR_MAX_VALUE(monitor) \
- MONITOR_FIELD(monitor, mon_max_value)
-
-#define MONITOR_MIN_VALUE(monitor) \
- MONITOR_FIELD(monitor, mon_min_value)
-
-#define MONITOR_VALUE_RESET(monitor) \
- MONITOR_FIELD(monitor, mon_value_reset)
-
-#define MONITOR_MAX_VALUE_START(monitor) \
- MONITOR_FIELD(monitor, mon_max_value_start)
-
-#define MONITOR_MIN_VALUE_START(monitor) \
- MONITOR_FIELD(monitor, mon_min_value_start)
-
-#define MONITOR_LAST_VALUE(monitor) \
- MONITOR_FIELD(monitor, mon_last_value)
-
-#define MONITOR_START_VALUE(monitor) \
- MONITOR_FIELD(monitor, mon_start_value)
-
-#define MONITOR_VALUE_SINCE_START(monitor) \
- (MONITOR_VALUE(monitor) + MONITOR_VALUE_RESET(monitor))
-
-#define MONITOR_STATUS(monitor) \
- MONITOR_FIELD(monitor, mon_status)
-
-#define MONITOR_SET_START(monitor) \
- do { \
- MONITOR_STATUS(monitor) = MONITOR_STARTED; \
- MONITOR_FIELD((monitor), mon_start_time) = time(NULL); \
- } while (0)
-
-#define MONITOR_SET_OFF(monitor) \
- do { \
- MONITOR_STATUS(monitor) = MONITOR_STOPPED; \
- MONITOR_FIELD((monitor), mon_stop_time) = time(NULL); \
- } while (0)
-
-#define MONITOR_INIT_ZERO_VALUE 0
-
-/** Max and min values are initialized when we first turn on the monitor
-counter, and set the MONITOR_STATUS. */
-#define MONITOR_MAX_MIN_NOT_INIT(monitor) \
- (MONITOR_STATUS(monitor) == MONITOR_INIT_ZERO_VALUE \
- && MONITOR_MIN_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE \
- && MONITOR_MAX_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE)
-
-#define MONITOR_INIT(monitor) \
- if (MONITOR_MAX_MIN_NOT_INIT(monitor)) { \
- MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; \
- MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED; \
- MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; \
- MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED; \
- }
-
-/** Macros to increment/decrement the counters. The normal
-monitor counter operation expects appropriate synchronization
-already exists. No additional mutex is necessary when operating
-on the counters */
-#define MONITOR_INC(monitor) \
- if (MONITOR_IS_ON(monitor)) { \
- MONITOR_VALUE(monitor)++; \
- if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- }
-
-/** Increment a monitor counter under mutex protection.
-Use MONITOR_INC if appropriate mutex protection already exists.
-@param mutex mutex to acquire and release
-@param monitor monitor to be incremented by 1
-@param enabled whether the monitor is enabled */
-#define MONITOR_MUTEX_INC_LOW(mutex, monitor, enabled) \
- ut_ad(!mutex_own(mutex)); \
- if (enabled) { \
- mutex_enter(mutex); \
- if (++MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor); \
- } \
- mutex_exit(mutex); \
- }
-/** Increment a monitor counter under mutex protection.
-Use MONITOR_INC if appropriate mutex protection already exists.
-@param mutex mutex to acquire and release
-@param monitor monitor to be incremented by 1 */
-#define MONITOR_MUTEX_INC(mutex, monitor) \
- MONITOR_MUTEX_INC_LOW(mutex, monitor, MONITOR_IS_ON(monitor))
-/** Decrement a monitor counter under mutex protection.
-Use MONITOR_DEC if appropriate mutex protection already exists.
-@param mutex mutex to acquire and release
-@param monitor monitor to be decremented by 1
-@param enabled whether the monitor is enabled */
-#define MONITOR_MUTEX_DEC_LOW(mutex, monitor, enabled) \
- ut_ad(!mutex_own(mutex)); \
- if (MONITOR_IS_ON(monitor)) { \
- mutex_enter(mutex); \
- if (--MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
- MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor); \
- } \
- mutex_exit(mutex); \
- }
-/** Decrement a monitor counter under mutex protection.
-Use MONITOR_DEC if appropriate mutex protection already exists.
-@param mutex mutex to acquire and release
-@param monitor monitor to be decremented by 1 */
-#define MONITOR_MUTEX_DEC(mutex, monitor) \
- MONITOR_MUTEX_DEC_LOW(mutex, monitor, MONITOR_IS_ON(monitor))
-
-#if defined HAVE_ATOMIC_BUILTINS_64
-/** Atomically increment a monitor counter.
-Use MONITOR_INC if appropriate mutex protection exists.
-@param monitor monitor to be incremented by 1
-@param enabled whether the monitor is enabled */
-# define MONITOR_ATOMIC_INC_LOW(monitor, enabled) \
- if (enabled) { \
- ib_uint64_t value; \
- value = os_atomic_increment_uint64( \
- (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \
- /* Note: This is not 100% accurate because of the \
- inherent race, we ignore it due to performance. */ \
- if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = value; \
- } \
- }
-
-/** Atomically decrement a monitor counter.
-Use MONITOR_DEC if appropriate mutex protection exists.
-@param monitor monitor to be decremented by 1
-@param enabled whether the monitor is enabled */
-# define MONITOR_ATOMIC_DEC_LOW(monitor, enabled) \
- if (enabled) { \
- ib_uint64_t value; \
- value = os_atomic_decrement_uint64( \
- (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \
- /* Note: This is not 100% accurate because of the \
- inherent race, we ignore it due to performance. */ \
- if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) { \
- MONITOR_MIN_VALUE(monitor) = value; \
- } \
- }
-# define srv_mon_create() ((void) 0)
-# define srv_mon_free() ((void) 0)
-#else /* HAVE_ATOMIC_BUILTINS_64 */
-/** Mutex protecting atomic operations on platforms that lack
-built-in operations for atomic memory access */
-extern ib_mutex_t monitor_mutex;
-/****************************************************************//**
-Initialize the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_create(void);
-/*================*/
-/****************************************************************//**
-Close the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_free(void);
-/*==============*/
-
-/** Atomically increment a monitor counter.
-Use MONITOR_INC if appropriate mutex protection exists.
-@param monitor monitor to be incremented by 1
-@param enabled whether the monitor is enabled */
-# define MONITOR_ATOMIC_INC_LOW(monitor, enabled) \
- MONITOR_MUTEX_INC_LOW(&monitor_mutex, monitor, enabled)
-/** Atomically decrement a monitor counter.
-Use MONITOR_DEC if appropriate mutex protection exists.
-@param monitor monitor to be decremented by 1
-@param enabled whether the monitor is enabled */
-# define MONITOR_ATOMIC_DEC_LOW(monitor, enabled) \
- MONITOR_MUTEX_DEC_LOW(&monitor_mutex, monitor, enabled)
-#endif /* HAVE_ATOMIC_BUILTINS_64 */
-
-/** Atomically increment a monitor counter if it is enabled.
-Use MONITOR_INC if appropriate mutex protection exists.
-@param monitor monitor to be incremented by 1 */
-#define MONITOR_ATOMIC_INC(monitor) \
- MONITOR_ATOMIC_INC_LOW(monitor, MONITOR_IS_ON(monitor))
-/** Atomically decrement a monitor counter if it is enabled.
-Use MONITOR_DEC if appropriate mutex protection exists.
-@param monitor monitor to be decremented by 1 */
-#define MONITOR_ATOMIC_DEC(monitor) \
- MONITOR_ATOMIC_DEC_LOW(monitor, MONITOR_IS_ON(monitor))
-
-#define MONITOR_DEC(monitor) \
- if (MONITOR_IS_ON(monitor)) { \
- MONITOR_VALUE(monitor)--; \
- if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
- MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- }
-
-#ifdef UNIV_DEBUG_VALGRIND
-# define MONITOR_CHECK_DEFINED(value) do { \
- mon_type_t m = value; \
- UNIV_MEM_ASSERT_RW(&m, sizeof m); \
-} while (0)
-#else /* UNIV_DEBUG_VALGRIND */
-# define MONITOR_CHECK_DEFINED(value) (void) 0
-#endif /* UNIV_DEBUG_VALGRIND */
-
-#define MONITOR_INC_VALUE(monitor, value) \
- MONITOR_CHECK_DEFINED(value); \
- if (MONITOR_IS_ON(monitor)) { \
- MONITOR_VALUE(monitor) += (mon_type_t) (value); \
- if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- }
-
-#define MONITOR_DEC_VALUE(monitor, value) \
- MONITOR_CHECK_DEFINED(value); \
- if (MONITOR_IS_ON(monitor)) { \
- ut_ad(MONITOR_VALUE(monitor) >= (mon_type_t) (value); \
- MONITOR_VALUE(monitor) -= (mon_type_t) (value); \
- if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
- MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- }
-
-/* Increment/decrement counter without check the monitor on/off bit, which
-could already be checked as a module group */
-#define MONITOR_INC_NOCHECK(monitor) \
- do { \
- MONITOR_VALUE(monitor)++; \
- if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- } while (0) \
-
-#define MONITOR_DEC_NOCHECK(monitor) \
- do { \
- MONITOR_VALUE(monitor)--; \
- if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
- MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- } while (0)
-
-/** Directly set a monitor counter's value */
-#define MONITOR_SET(monitor, value) \
- MONITOR_CHECK_DEFINED(value); \
- if (MONITOR_IS_ON(monitor)) { \
- MONITOR_VALUE(monitor) = (mon_type_t) (value); \
- if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \
- MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- }
-
-/** Add time difference between now and input "value" (in seconds) to the
-monitor counter
-@param monitor monitor to update for the time difference
-@param value the start time value */
-#define MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value) \
- MONITOR_CHECK_DEFINED(value); \
- if (MONITOR_IS_ON(monitor)) { \
- ullint old_time = (value); \
- value = ut_time_us(NULL); \
- MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\
- }
-
-/** This macro updates 3 counters in one call. However, it only checks the
-main/first monitor counter 'monitor', to see it is on or off to decide
-whether to do the update.
-@param monitor the main monitor counter to update. It accounts for
- the accumulative value for the counter.
-@param monitor_n_calls counter that counts number of times this macro is
- called
-@param monitor_per_call counter that records the current and max value of
- each incremental value
-@param value incremental value to record this time */
-#define MONITOR_INC_VALUE_CUMULATIVE( \
- monitor, monitor_n_calls, monitor_per_call, value) \
- MONITOR_CHECK_DEFINED(value); \
- if (MONITOR_IS_ON(monitor)) { \
- MONITOR_VALUE(monitor_n_calls)++; \
- MONITOR_VALUE(monitor_per_call) = (mon_type_t) (value); \
- if (MONITOR_VALUE(monitor_per_call) \
- > MONITOR_MAX_VALUE(monitor_per_call)) { \
- MONITOR_MAX_VALUE(monitor_per_call) = \
- (mon_type_t) (value); \
- } \
- MONITOR_VALUE(monitor) += (mon_type_t) (value); \
- if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- }
-
-/** Directly set a monitor counter's value, and if the value
-is monotonically increasing, only max value needs to be updated */
-#define MONITOR_SET_UPD_MAX_ONLY(monitor, value) \
- MONITOR_CHECK_DEFINED(value); \
- if (MONITOR_IS_ON(monitor)) { \
- MONITOR_VALUE(monitor) = (mon_type_t) (value); \
- if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \
- MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\
- } \
- }
-
-/** Some values such as log sequence number are montomically increasing
-number, do not need to record max/min values */
-#define MONITOR_SET_SIMPLE(monitor, value) \
- MONITOR_CHECK_DEFINED(value); \
- if (MONITOR_IS_ON(monitor)) { \
- MONITOR_VALUE(monitor) = (mon_type_t) (value); \
- }
-
-/** Reset the monitor value and max/min value to zero. The reset
-operation would only be conducted when the counter is turned off */
-#define MONITOR_RESET_ALL(monitor) \
- do { \
- MONITOR_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE; \
- MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; \
- MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; \
- MONITOR_VALUE_RESET(monitor) = MONITOR_INIT_ZERO_VALUE; \
- MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED; \
- MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED; \
- MONITOR_LAST_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE; \
- MONITOR_FIELD(monitor, mon_start_time) = \
- MONITOR_INIT_ZERO_VALUE; \
- MONITOR_FIELD(monitor, mon_stop_time) = \
- MONITOR_INIT_ZERO_VALUE; \
- MONITOR_FIELD(monitor, mon_reset_time) = \
- MONITOR_INIT_ZERO_VALUE; \
- } while (0)
-
-/** Following four macros defines necessary operations to fetch and
-consolidate information from existing system status variables. */
-
-/** Save the passed-in value to mon_start_value field of monitor
-counters */
-#define MONITOR_SAVE_START(monitor, value) do { \
- MONITOR_CHECK_DEFINED(value); \
- (MONITOR_START_VALUE(monitor) = \
- (mon_type_t) (value) - MONITOR_VALUE_RESET(monitor)); \
- } while (0)
-
-/** Save the passed-in value to mon_last_value field of monitor
-counters */
-#define MONITOR_SAVE_LAST(monitor) \
- do { \
- MONITOR_LAST_VALUE(monitor) = MONITOR_VALUE(monitor); \
- MONITOR_START_VALUE(monitor) += MONITOR_VALUE(monitor); \
- } while (0)
-
-/** Set monitor value to the difference of value and mon_start_value
-compensated by mon_last_value if accumulated value is required. */
-#define MONITOR_SET_DIFF(monitor, value) \
- MONITOR_SET_UPD_MAX_ONLY(monitor, ((value) \
- - MONITOR_VALUE_RESET(monitor) \
- - MONITOR_FIELD(monitor, mon_start_value) \
- + MONITOR_FIELD(monitor, mon_last_value)))
-
-/****************************************************************//**
-Get monitor's monitor_info_t by its monitor id (index into the
-innodb_counter_info array
-@return Point to corresponding monitor_info_t, or NULL if no such
-monitor */
-UNIV_INTERN
-monitor_info_t*
-srv_mon_get_info(
-/*=============*/
- monitor_id_t monitor_id); /*!< id index into the
- innodb_counter_info array */
-/****************************************************************//**
-Get monitor's name by its monitor id (index into the
-innodb_counter_info array
-@return corresponding monitor name, or NULL if no such
-monitor */
-UNIV_INTERN
-const char*
-srv_mon_get_name(
-/*=============*/
- monitor_id_t monitor_id); /*!< id index into the
- innodb_counter_info array */
-
-/****************************************************************//**
-Turn on/off/reset monitor counters in a module. If module_value
-is NUM_MONITOR then turn on all monitor counters.
-@return 0 if successful, or the first monitor that cannot be
-turned on because it is already turned on. */
-UNIV_INTERN
-void
-srv_mon_set_module_control(
-/*=======================*/
- monitor_id_t module_id, /*!< in: Module ID as in
- monitor_counter_id. If it is
- set to NUM_MONITOR, this means
- we shall turn on all the counters */
- mon_option_t set_option); /*!< in: Turn on/off reset the
- counter */
-/****************************************************************//**
-This function consolidates some existing server counters used
-by "system status variables". These existing system variables do not have
-mechanism to start/stop and reset the counters, so we simulate these
-controls by remembering the corresponding counter values when the
-corresponding monitors are turned on/off/reset, and do appropriate
-mathematics to deduct the actual value. */
-UNIV_INTERN
-void
-srv_mon_process_existing_counter(
-/*=============================*/
- monitor_id_t monitor_id, /*!< in: the monitor's ID as in
- monitor_counter_id */
- mon_option_t set_option); /*!< in: Turn on/off reset the
- counter */
-/*************************************************************//**
-This function is used to calculate the maximum counter value
-since the start of monitor counter
-@return max counter value since start. */
-UNIV_INLINE
-mon_type_t
-srv_mon_calc_max_since_start(
-/*=========================*/
- monitor_id_t monitor); /*!< in: monitor id */
-/*************************************************************//**
-This function is used to calculate the minimum counter value
-since the start of monitor counter
-@return min counter value since start. */
-UNIV_INLINE
-mon_type_t
-srv_mon_calc_min_since_start(
-/*=========================*/
- monitor_id_t monitor); /*!< in: monitor id*/
-/*************************************************************//**
-Reset a monitor, create a new base line with the current monitor
-value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
-UNIV_INTERN
-void
-srv_mon_reset(
-/*==========*/
- monitor_id_t monitor); /*!< in: monitor id*/
-/*************************************************************//**
-This function resets all values of a monitor counter */
-UNIV_INLINE
-void
-srv_mon_reset_all(
-/*==============*/
- monitor_id_t monitor); /*!< in: monitor id*/
-/*************************************************************//**
-Turn on monitor counters that are marked as default ON. */
-UNIV_INTERN
-void
-srv_mon_default_on(void);
-/*====================*/
-
-#ifndef UNIV_NONINL
-#include "srv0mon.ic"
-#endif
-#else /* !UNIV_HOTBACKUP */
-# define MONITOR_INC(x) ((void) 0)
-# define MONITOR_DEC(x) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/xtradb/include/srv0mon.ic b/storage/xtradb/include/srv0mon.ic
deleted file mode 100644
index 225390c6b6f..00000000000
--- a/storage/xtradb/include/srv0mon.ic
+++ /dev/null
@@ -1,113 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/srv0mon.ic
-Server monitoring system
-
-Created 1/20/2010 Jimmy Yang
-************************************************************************/
-
-/*************************************************************//**
-This function is used to calculate the maximum counter value
-since the start of monitor counter
-@return max counter value since start. */
-UNIV_INLINE
-mon_type_t
-srv_mon_calc_max_since_start(
-/*=========================*/
- monitor_id_t monitor) /*!< in: monitor id */
-{
- if (MONITOR_MAX_VALUE_START(monitor) == MAX_RESERVED) {
-
- /* MONITOR_MAX_VALUE_START has not yet been
- initialized, the max value since start is the
- max count in MONITOR_MAX_VALUE */
- MONITOR_MAX_VALUE_START(monitor) =
- MONITOR_MAX_VALUE(monitor);
-
- } else if (MONITOR_MAX_VALUE(monitor) != MAX_RESERVED
- && (MONITOR_MAX_VALUE(monitor)
- + MONITOR_VALUE_RESET(monitor)
- > MONITOR_MAX_VALUE_START(monitor))) {
-
- /* If the max value since reset (as specified
- in MONITOR_MAX_VALUE) plus the reset value is
- larger than MONITOR_MAX_VALUE_START, reset
- MONITOR_MAX_VALUE_START to this new max value */
- MONITOR_MAX_VALUE_START(monitor) =
- MONITOR_MAX_VALUE(monitor)
- + MONITOR_VALUE_RESET(monitor);
- }
-
- return(MONITOR_MAX_VALUE_START(monitor));
-}
-
-/*************************************************************//**
-This function is used to calculate the minimum counter value
-since the start of monitor counter
-@return min counter value since start. */
-UNIV_INLINE
-mon_type_t
-srv_mon_calc_min_since_start(
-/*=========================*/
- monitor_id_t monitor) /*!< in: monitor id */
-{
- if (MONITOR_MIN_VALUE_START(monitor) == MIN_RESERVED) {
-
- /* MONITOR_MIN_VALUE_START has not yet been
- initialized, the min value since start is the
- min count in MONITOR_MIN_VALUE */
- MONITOR_MIN_VALUE_START(monitor) =
- MONITOR_MIN_VALUE(monitor);
-
- } else if (MONITOR_MIN_VALUE(monitor) != MIN_RESERVED
- && (MONITOR_MIN_VALUE(monitor)
- + MONITOR_VALUE_RESET(monitor)
- < MONITOR_MIN_VALUE_START(monitor))) {
-
- /* If the min value since reset (as specified
- in MONITOR_MIN_VALUE) plus the reset value is
- less than MONITOR_MIN_VALUE_START, reset
- MONITOR_MIN_VALUE_START to this new min value */
- MONITOR_MIN_VALUE_START(monitor) =
- MONITOR_MIN_VALUE(monitor)
- + MONITOR_VALUE_RESET(monitor);
- }
-
- return(MONITOR_MIN_VALUE_START(monitor));
-}
-
-/*************************************************************//**
-This function resets all values of a monitor counter */
-UNIV_INLINE
-void
-srv_mon_reset_all(
-/*==============*/
- monitor_id_t monitor) /*!< in: monitor id */
-{
- /* Do not reset all counter values if monitor is still on. */
- if (MONITOR_IS_ON(monitor)) {
- fprintf(stderr, "InnoDB: Cannot reset all values for "
- "monitor counter %s while it is on. Please "
- "turn it off and retry. \n",
- srv_mon_get_name(monitor));
- } else {
- MONITOR_RESET_ALL(monitor);
- }
-}
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
deleted file mode 100644
index 4e98ce0f1cb..00000000000
--- a/storage/xtradb/include/srv0srv.h
+++ /dev/null
@@ -1,1351 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
-Copyright (c) 2008, 2009, Google Inc.
-Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/srv0srv.h
-The server main program
-
-Created 10/10/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef srv0srv_h
-#define srv0srv_h
-
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-#include "log0log.h"
-#include "sync0sync.h"
-#include "os0sync.h"
-#include "que0types.h"
-#include "trx0types.h"
-#include "srv0conc.h"
-#include "buf0checksum.h"
-#include "ut0counter.h"
-
-/* Global counters used inside InnoDB. */
-struct srv_stats_t {
- typedef ib_counter_t<ulint, 64> ulint_ctr_64_t;
- typedef simple_counter<lsn_t> lsn_ctr_1_t;
- typedef simple_counter<ulint> ulint_ctr_1_t;
- typedef simple_counter<ib_int64_t> ib_int64_ctr_1_t;
-
- /** Count the amount of data written in total (in bytes) */
- ulint_ctr_1_t data_written;
-
- /** Number of the log write requests done */
- ulint_ctr_1_t log_write_requests;
-
- /** Number of physical writes to the log performed */
- ulint_ctr_1_t log_writes;
-
- /** Amount of data written to the log files in bytes */
- lsn_ctr_1_t os_log_written;
-
- /** Number of writes being done to the log files.
- Protected by log_sys->write_mutex. */
- ulint_ctr_1_t os_log_pending_writes;
-
- /** We increase this counter, when we don't have enough
- space in the log buffer and have to flush it */
- ulint_ctr_1_t log_waits;
-
- /** Count the number of times the doublewrite buffer was flushed */
- ulint_ctr_1_t dblwr_writes;
-
- /** Store the number of pages that have been flushed to the
- doublewrite buffer */
- ulint_ctr_1_t dblwr_pages_written;
-
- /** Store the number of write requests issued */
- ulint_ctr_1_t buf_pool_write_requests;
-
- /** Store the number of times when we had to wait for a free page
- in the buffer pool. It happens when the buffer pool is full and we
- need to make a flush, in order to be able to read or create a page. */
- ulint_ctr_1_t buf_pool_wait_free;
-
- /** Count the number of pages that were written from buffer
- pool to the disk */
- ulint_ctr_1_t buf_pool_flushed;
-
- /** Number of buffer pool reads that led to the reading of
- a disk page */
- ulint_ctr_1_t buf_pool_reads;
-
- /** Number of bytes saved by page compression */
- ulint_ctr_64_t page_compression_saved;
- /** Number of 512Byte TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect512;
- /** Number of 1K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect1024;
- /** Number of 2K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect2048;
- /** Number of 4K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect4096;
- /** Number of 8K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect8192;
- /** Number of 16K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect16384;
- /** Number of 32K TRIM by page compression */
- ulint_ctr_64_t page_compression_trim_sect32768;
- /* Number of index pages written */
- ulint_ctr_64_t index_pages_written;
- /* Number of non index pages written */
- ulint_ctr_64_t non_index_pages_written;
- /* Number of pages compressed with page compression */
- ulint_ctr_64_t pages_page_compressed;
- /* Number of TRIM operations induced by page compression */
- ulint_ctr_64_t page_compressed_trim_op;
- /* Number of TRIM operations saved by using actual write size knowledge */
- ulint_ctr_64_t page_compressed_trim_op_saved;
- /* Number of pages decompressed with page compression */
- ulint_ctr_64_t pages_page_decompressed;
- /* Number of page compression errors */
- ulint_ctr_64_t pages_page_compression_error;
- /* Number of pages encrypted */
- ulint_ctr_64_t pages_encrypted;
- /* Number of pages decrypted */
- ulint_ctr_64_t pages_decrypted;
-
- /** Number of data read in total (in bytes) */
- ulint_ctr_1_t data_read;
-
- /** Wait time of database locks */
- ib_int64_ctr_1_t n_lock_wait_time;
-
- /** Number of database lock waits */
- ulint_ctr_1_t n_lock_wait_count;
-
- /** Number of threads currently waiting on database locks */
- simple_counter<ulint, true> n_lock_wait_current_count;
-
- /** Number of rows read. */
- ulint_ctr_64_t n_rows_read;
-
- /** Number of rows updated */
- ulint_ctr_64_t n_rows_updated;
-
- /** Number of rows deleted */
- ulint_ctr_64_t n_rows_deleted;
-
- /** Number of rows inserted */
- ulint_ctr_64_t n_rows_inserted;
-
- /** Number of system rows read. */
- ulint_ctr_64_t n_system_rows_read;
-
- /** Number of system rows updated */
- ulint_ctr_64_t n_system_rows_updated;
-
- /** Number of system rows deleted */
- ulint_ctr_64_t n_system_rows_deleted;
-
- /** Number of system rows inserted */
- ulint_ctr_64_t n_system_rows_inserted;
-
- /** Number of times secondary index lookup triggered cluster lookup */
- ulint_ctr_64_t n_sec_rec_cluster_reads;
-
- /** Number of times prefix optimization avoided triggering cluster lookup */
- ulint_ctr_64_t n_sec_rec_cluster_reads_avoided;
-
- /** Number of lock deadlocks */
- ulint_ctr_1_t lock_deadlock_count;
-
- /** Number of lock waits that have been up to max time (i.e.) lock
- wait timeout */
- ulint_ctr_1_t n_lock_max_wait_time;
-
- /** Number of times page 0 is read from tablespace */
- ulint_ctr_64_t page0_read;
-
- /** Number of encryption_get_latest_key_version calls */
- ulint_ctr_64_t n_key_requests;
-
- /** Number of spaces in keyrotation list */
- ulint_ctr_64_t key_rotation_list_length;
-};
-
-extern const char* srv_main_thread_op_info;
-
-/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
-extern const char srv_mysql50_table_name_prefix[10];
-
-/** Event to signal srv_monitor_thread. Not protected by a mutex.
-Set after setting srv_print_innodb_monitor. */
-extern os_event_t srv_monitor_event;
-
-/** Event to signal the shutdown of srv_error_monitor_thread.
-Not protected by a mutex. */
-extern os_event_t srv_error_event;
-
-/** Event for waking up buf_dump_thread. Not protected by a mutex.
-Set on shutdown or by buf_dump_start() or buf_load_start(). */
-extern os_event_t srv_buf_dump_event;
-
-/** The buffer pool dump/load file name */
-#define SRV_BUF_DUMP_FILENAME_DEFAULT "ib_buffer_pool"
-extern char* srv_buf_dump_filename;
-
-/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
-and/or load it during startup. */
-extern char srv_buffer_pool_dump_at_shutdown;
-extern char srv_buffer_pool_load_at_startup;
-
-/* Whether to disable file system cache if it is defined */
-extern char srv_disable_sort_file_cache;
-
-/* This event is set on checkpoint completion to wake the redo log parser
-thread */
-extern os_event_t srv_checkpoint_completed_event;
-
-/* This event is set on the online redo log following thread after a successful
-log tracking iteration */
-extern os_event_t srv_redo_log_tracked_event;
-
-/** Whether the redo log tracker thread has been started. Does not take into
-account whether the tracking is currently enabled (see srv_track_changed_pages
-for that) */
-extern bool srv_redo_log_thread_started;
-
-/* If the last data file is auto-extended, we add this many pages to it
-at a time */
-#define SRV_AUTO_EXTEND_INCREMENT \
- (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE))
-
-/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
-extern ib_mutex_t srv_monitor_file_mutex;
-
-/* prototypes for new functions added to ha_innodb.cc */
-ibool innobase_get_slow_log();
-
-/* Temporary file for innodb monitor output */
-extern FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile. Only created if !srv_read_only_mode.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-extern ib_mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-extern FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile. Only created if !srv_read_only_mode.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-extern ib_mutex_t srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
-extern FILE* srv_misc_tmpfile;
-
-/* Server parameters which are read from the initfile */
-
-extern char* srv_data_home;
-
-#ifdef UNIV_LOG_ARCHIVE
-extern char* srv_arch_dir;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/** Set if InnoDB must operate in read-only mode. We don't do any
-recovery and open all tables in RO mode instead of RW mode. We don't
-sync the max trx id to disk either. */
-extern my_bool srv_read_only_mode;
-/** Set if InnoDB operates in read-only mode or innodb-force-recovery
-is greater than SRV_FORCE_NO_TRX_UNDO. */
-extern my_bool high_level_read_only;
-/** store to its own file each table created by an user; data
-dictionary tables are in the system tablespace 0 */
-extern my_bool srv_file_per_table;
-/** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */
-extern ulong srv_thread_sleep_delay;
-#if defined(HAVE_ATOMIC_BUILTINS)
-/** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
-extern ulong srv_adaptive_max_sleep_delay;
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-/** The file format to use on new *.ibd files. */
-extern ulint srv_file_format;
-/** Whether to check file format during startup. A value of
-UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
-set it to the highest format we support. */
-extern ulint srv_max_file_format_at_startup;
-/** Place locks to records only i.e. do not use next-key locking except
-on duplicate key checking and foreign key checking */
-extern ibool srv_locks_unsafe_for_binlog;
-
-/** Sort buffer size in index creation */
-extern ulong srv_sort_buf_size;
-/** Maximum modification log file size for online index creation */
-extern unsigned long long srv_online_max_size;
-
-/* If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads.
-Currently we support native aio on windows and linux */
-extern my_bool srv_use_native_aio;
-extern my_bool srv_numa_interleave;
-#endif /* !UNIV_HOTBACKUP */
-
-/* Use trim operation */
-extern my_bool srv_use_trim;
-
-/* Use posix fallocate */
-extern my_bool srv_use_posix_fallocate;
-
-/* Use atomic writes i.e disable doublewrite buffer */
-extern my_bool srv_use_atomic_writes;
-
-/* Compression algorithm*/
-extern ulong innodb_compression_algorithm;
-
-/* Number of flush threads */
-#define MTFLUSH_MAX_WORKER 64
-#define MTFLUSH_DEFAULT_WORKER 8
-
-/* Number of threads used for multi-threaded flush */
-extern long srv_mtflush_threads;
-
-/* If this flag is TRUE, then we will use multi threaded flush. */
-extern my_bool srv_use_mtflush;
-
-/** Server undo tablespaces directory, can be absolute path. */
-extern char* srv_undo_dir;
-
-/** Number of undo tablespaces to use. */
-extern ulong srv_undo_tablespaces;
-
-/** The number of UNDO tablespaces that are open and ready to use. */
-extern ulint srv_undo_tablespaces_open;
-
-/* The number of undo segments to use */
-extern ulong srv_undo_logs;
-
-extern ulint srv_n_data_files;
-extern char** srv_data_file_names;
-extern ulint* srv_data_file_sizes;
-extern ulint* srv_data_file_is_raw_partition;
-
-/** Whether the redo log tracking is currently enabled. Note that it is
-possible for the log tracker thread to be running and the tracking to be
-disabled */
-extern my_bool srv_track_changed_pages;
-extern ulonglong srv_max_bitmap_file_size;
-
-extern
-ulonglong srv_max_changed_pages;
-
-extern uint srv_n_fil_crypt_threads;
-extern uint srv_n_fil_crypt_threads_started;
-
-extern ibool srv_auto_extend_last_data_file;
-extern ulint srv_last_file_size_max;
-extern char* srv_log_group_home_dir;
-#ifndef UNIV_HOTBACKUP
-extern ulong srv_auto_extend_increment;
-
-extern ibool srv_created_new_raw;
-
-/* Optimize prefix index queries to skip cluster index lookup when possible */
-/* Enables or disables this prefix optimization. Disabled by default. */
-extern my_bool srv_prefix_index_cluster_optimization;
-
-/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
-#define SRV_N_LOG_FILES_MAX 100
-extern ulong srv_n_log_files;
-extern ib_uint64_t srv_log_file_size;
-extern ib_uint64_t srv_log_file_size_requested;
-extern ulint srv_log_buffer_size;
-extern uint srv_flush_log_at_timeout;
-extern char srv_use_global_flush_log_at_trx_commit;
-extern char srv_adaptive_flushing;
-
-#ifdef WITH_INNODB_DISALLOW_WRITES
-/* When this event is reset we do not allow any file writes to take place. */
-extern os_event_t srv_allow_writes_event;
-#endif /* WITH_INNODB_DISALLOW_WRITES */
-/* If this flag is TRUE, then we will load the indexes' (and tables') metadata
-even if they are marked as "corrupted". Mostly it is for DBA to process
-corrupted index and table */
-extern my_bool srv_load_corrupted;
-
-extern ulong srv_show_locks_held;
-extern ulong srv_show_verbose_locks;
-
-/* The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-extern const byte* srv_latin1_ordering;
-#ifndef UNIV_HOTBACKUP
-extern my_bool srv_use_sys_malloc;
-#else
-extern ibool srv_use_sys_malloc;
-#endif /* UNIV_HOTBACKUP */
-extern ulint srv_buf_pool_size; /*!< requested size in bytes */
-extern ulint srv_buf_pool_instances; /*!< requested number of buffer pool instances */
-extern ulong srv_n_page_hash_locks; /*!< number of locks to
- protect buf_pool->page_hash */
-extern ulong srv_LRU_scan_depth; /*!< Scan depth for LRU
- flush batch */
-extern ulong srv_flush_neighbors; /*!< whether or not to flush
- neighbors of a block */
-extern ulint srv_buf_pool_old_size; /*!< previously requested size */
-extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */
-extern ulong srv_buf_pool_dump_pct; /*!< dump that may % of each buffer
- pool during BP dump */
-extern ulint srv_mem_pool_size;
-extern ulint srv_lock_table_size;
-
-extern ulong srv_foreground_preflush;/*!< Query thread preflush algorithm */
-
-extern ulint srv_cleaner_max_lru_time;/*!< the maximum time limit for a
- single LRU tail flush iteration by the
- page cleaner thread */
-
-extern ulint srv_cleaner_max_flush_time;/*!< the maximum time limit for a
- single flush list flush iteration by
- the page cleaner thread */
-
-extern ulint srv_cleaner_flush_chunk_size;
- /*!< page cleaner flush list flush
- batches are further divided into this
- chunk size */
-
-extern ulint srv_cleaner_lru_chunk_size;
- /*!< page cleaner LRU list flush
- batches are further divided into this
- chunk size */
-
-extern ulint srv_cleaner_free_list_lwm;/*!< if free list length is lower
- than this percentage of
- srv_LRU_scan_depth, page cleaner LRU
- flushes will issue flush batches to the
- same instance in a row */
-
-extern my_bool srv_cleaner_eviction_factor;
- /*!< if TRUE, page cleaner heuristics
- use evicted instead of flushed page
- counts for its heuristics */
-
-extern ulong srv_cleaner_lsn_age_factor;
- /*!< page cleaner LSN age factor
- formula option */
-
-extern ulong srv_empty_free_list_algorithm;
- /*!< Empty free list for a query thread
- handling algorithm option */
-
-extern ulint srv_n_file_io_threads;
-extern my_bool srv_random_read_ahead;
-extern ulong srv_read_ahead_threshold;
-extern ulint srv_n_read_io_threads;
-extern ulint srv_n_write_io_threads;
-/* Defragmentation, Origianlly facebook default value is 100, but it's too high */
-#define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
-extern my_bool srv_defragment;
-extern uint srv_defragment_n_pages;
-extern uint srv_defragment_stats_accuracy;
-extern uint srv_defragment_fill_factor_n_recs;
-extern double srv_defragment_fill_factor;
-extern uint srv_defragment_frequency;
-extern ulonglong srv_defragment_interval;
-
-extern ulong srv_idle_flush_pct;
-
-/* Number of IO operations per second the server can do */
-extern ulong srv_io_capacity;
-
-/* We use this dummy default value at startup for max_io_capacity.
-The real value is set based on the value of io_capacity. */
-#define SRV_MAX_IO_CAPACITY_DUMMY_DEFAULT (~0UL)
-#define SRV_MAX_IO_CAPACITY_LIMIT (~0UL)
-extern ulong srv_max_io_capacity;
-/* Returns the number of IO operations that is X percent of the
-capacity. PCT_IO(5) -> returns the number of IO operations that
-is 5% of the max where max is srv_io_capacity. */
-#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) (p) / 100.0)))
-
-/* The "innodb_stats_method" setting, decides how InnoDB is going
-to treat NULL value when collecting statistics. It is not defined
-as enum type because the configure option takes unsigned integer type. */
-extern ulong srv_innodb_stats_method;
-
-#ifdef UNIV_LOG_ARCHIVE
-extern bool srv_log_archive_on;
-extern bool srv_archive_recovery;
-extern ib_uint64_t srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-extern char* srv_file_flush_method_str;
-extern ulint srv_unix_file_flush_method;
-extern ulint srv_win_file_flush_method;
-
-extern ulint srv_max_n_open_files;
-
-extern double srv_max_dirty_pages_pct;
-extern double srv_max_dirty_pages_pct_lwm;
-
-extern double srv_adaptive_flushing_lwm;
-extern ulong srv_flushing_avg_loops;
-
-extern ulong srv_force_recovery;
-
-extern ulint srv_fast_shutdown; /*!< If this is 1, do not do a
- purge and index buffer merge.
- If this 2, do not even flush the
- buffer pool to data files at the
- shutdown: we effectively 'crash'
- InnoDB (but lose no committed
- transactions). */
-extern ibool srv_innodb_status;
-
-extern unsigned long long srv_stats_transient_sample_pages;
-extern my_bool srv_stats_persistent;
-extern unsigned long long srv_stats_persistent_sample_pages;
-extern my_bool srv_stats_auto_recalc;
-extern my_bool srv_stats_include_delete_marked;
-extern unsigned long long srv_stats_modified_counter;
-extern my_bool srv_stats_sample_traditional;
-
-extern ibool srv_use_doublewrite_buf;
-extern ulong srv_doublewrite_batch_size;
-
-extern ulong srv_log_arch_expire_sec;
-
-extern double srv_max_buf_pool_modified_pct;
-extern ulong srv_max_purge_lag;
-extern ulong srv_max_purge_lag_delay;
-
-extern ulong srv_replication_delay;
-
-extern my_bool srv_use_stacktrace;
-
-extern ulong srv_pass_corrupt_table;
-
-extern ulong srv_log_checksum_algorithm;
-
-extern bool srv_apply_log_only;
-
-extern bool srv_backup_mode;
-extern bool srv_close_files;
-extern bool srv_xtrabackup;
-
-#define IS_XTRABACKUP() (srv_xtrabackup)
-
-extern my_bool srv_force_primary_key;
-
-/* Helper macro to support srv_pass_corrupt_table checks. If 'cond' is FALSE,
-execute 'code' if srv_pass_corrupt_table is non-zero, or trigger a fatal error
-otherwise. The break statement in 'code' will obviously not work as
-expected. */
-
-#define SRV_CORRUPT_TABLE_CHECK(cond,code) \
- do { \
- if (UNIV_UNLIKELY(!(cond))) { \
- if (srv_pass_corrupt_table) { \
- code \
- } else { \
- ut_error; \
- } \
- } \
- } while(0)
-
-/*-------------------------------------------*/
-
-extern ulint srv_read_views_memory;
-extern ulint srv_descriptors_memory;
-
-extern my_bool srv_print_innodb_monitor;
-extern my_bool srv_print_innodb_lock_monitor;
-extern ibool srv_print_innodb_tablespace_monitor;
-extern ibool srv_print_verbose_log;
-#define DEPRECATED_MSG_INNODB_TABLE_MONITOR \
- "Using innodb_table_monitor is deprecated and it may be removed " \
- "in future releases. Please use the InnoDB INFORMATION_SCHEMA " \
- "tables instead, see " REFMAN "innodb-i_s-tables.html"
-extern ibool srv_print_innodb_table_monitor;
-
-extern bool srv_monitor_active;
-extern bool srv_error_monitor_active;
-
-/* TRUE during the lifetime of the buffer pool dump/load thread */
-extern bool srv_buf_dump_thread_active;
-
-/* TRUE during the lifetime of the stats thread */
-extern bool srv_dict_stats_thread_active;
-
-/* TRUE if enable log scrubbing */
-extern my_bool srv_scrub_log;
-
-extern ulong srv_n_spin_wait_rounds;
-extern ulong srv_n_free_tickets_to_enter;
-extern ulong srv_thread_sleep_delay;
-extern ulong srv_spin_wait_delay;
-extern ibool srv_priority_boost;
-
-extern ulint srv_truncated_status_writes;
-extern ulint srv_available_undo_logs;
-
-extern ulint srv_column_compressed;
-extern ulint srv_column_decompressed;
-
-extern ulint srv_mem_pool_size;
-extern ulint srv_lock_table_size;
-
-#ifdef UNIV_DEBUG
-extern ibool srv_print_thread_releases;
-extern ibool srv_print_lock_waits;
-extern ibool srv_print_buf_io;
-extern ibool srv_print_log_io;
-extern ibool srv_print_latch_waits;
-#else /* UNIV_DEBUG */
-# define srv_print_thread_releases FALSE
-# define srv_print_lock_waits FALSE
-# define srv_print_buf_io FALSE
-# define srv_print_log_io FALSE
-# define srv_print_latch_waits FALSE
-#endif /* UNIV_DEBUG */
-
-#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
-extern my_bool srv_ibuf_disable_background_merge;
-#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
-
-#ifdef UNIV_DEBUG
-extern my_bool srv_purge_view_update_only_debug;
-extern uint srv_sys_space_size_debug;
-#endif /* UNIV_DEBUG */
-
-#define SRV_SEMAPHORE_WAIT_EXTENSION 7200
-extern ulint srv_dml_needed_delay;
-extern long long srv_kill_idle_transaction;
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/** Mutex protecting some server global variables. */
-extern ib_mutex_t server_mutex;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
-#define SRV_MAX_N_IO_THREADS 130
-
-#define SRV_MAX_N_PURGE_THREADS 32
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-extern const char* srv_io_thread_op_info[];
-extern const char* srv_io_thread_function[];
-
-/* The tid of the cleaner thread */
-extern os_tid_t srv_cleaner_tid;
-
-/* The tid of the LRU manager thread */
-extern os_tid_t srv_lru_manager_tid;
-
-/* The tids of the purge threads */
-extern os_tid_t srv_purge_tids[];
-
-/* The tids of the I/O threads */
-extern os_tid_t srv_io_tids[];
-
-/* The tid of the master thread */
-extern os_tid_t srv_master_tid;
-
-/* The relative scheduling priority of the cleaner and LRU manager threads */
-extern ulint srv_sched_priority_cleaner;
-
-/* The relative scheduling priority of the purge threads */
-extern ulint srv_sched_priority_purge;
-
-/* The relative scheduling priority of the I/O threads */
-extern ulint srv_sched_priority_io;
-
-/* The relative scheduling priority of the master thread */
-extern ulint srv_sched_priority_master;
-
-/* The relative priority of the purge coordinator and worker threads. */
-extern my_bool srv_purge_thread_priority;
-
-/* The relative priority of the I/O threads. */
-extern my_bool srv_io_thread_priority;
-
-/* The relative priority of the cleaner thread. */
-extern my_bool srv_cleaner_thread_priority;
-
-/* The relative priority of the master thread. */
-extern my_bool srv_master_thread_priority;
-
-/* the number of purge threads to use from the worker pool (currently 0 or 1) */
-extern ulong srv_n_purge_threads;
-
-/* the number of pages to purge in one batch */
-extern ulong srv_purge_batch_size;
-
-/* the number of sync wait arrays */
-extern ulong srv_sync_array_size;
-
-/* print all user-level transactions deadlocks to mysqld stderr */
-extern my_bool srv_print_all_deadlocks;
-
-extern my_bool srv_cmp_per_index_enabled;
-
-/* is encryption enabled */
-extern ulong srv_encrypt_tables;
-
-/** Status variables to be passed to MySQL */
-extern struct export_var_t export_vars;
-
-/** Global counters */
-extern srv_stats_t srv_stats;
-
-/** When TRUE, fake change transcations take S rather than X row locks.
-When FALSE, row locks are not taken at all. */
-extern my_bool srv_fake_changes_locks;
-
-/** Simulate compression failures. */
-extern uint srv_simulate_comp_failures;
-
-/** Fatal semaphore wait threshold = maximum number of seconds
-that semaphore times out in InnoDB */
-#define DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT 600
-extern ulong srv_fatal_semaphore_wait_threshold;
-
-/** Enable semaphore request instrumentation */
-extern my_bool srv_instrument_semaphores;
-
-/** Buffer pool dump status frequence in percentages */
-extern ulong srv_buf_dump_status_frequency;
-
-# ifdef UNIV_PFS_THREAD
-/* Keys to register InnoDB threads with performance schema */
-extern mysql_pfs_key_t buf_page_cleaner_thread_key;
-extern mysql_pfs_key_t buf_lru_manager_thread_key;
-extern mysql_pfs_key_t trx_rollback_clean_thread_key;
-extern mysql_pfs_key_t io_handler_thread_key;
-extern mysql_pfs_key_t srv_lock_timeout_thread_key;
-extern mysql_pfs_key_t srv_error_monitor_thread_key;
-extern mysql_pfs_key_t srv_monitor_thread_key;
-extern mysql_pfs_key_t srv_master_thread_key;
-extern mysql_pfs_key_t srv_purge_thread_key;
-extern mysql_pfs_key_t recv_writer_thread_key;
-extern mysql_pfs_key_t srv_log_tracking_thread_key;
-
-/* This macro register the current thread and its key with performance
-schema */
-# define pfs_register_thread(key) \
-do { \
- struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
- PSI_THREAD_CALL(set_thread)(psi); \
-} while (0)
-
-/* This macro delist the current thread from performance schema */
-# define pfs_delete_thread() \
-do { \
- PSI_THREAD_CALL(delete_current_thread)(); \
-} while (0)
-# endif /* UNIV_PFS_THREAD */
-
-#endif /* !UNIV_HOTBACKUP */
-
-/** Types of raw partitions in innodb_data_file_path */
-enum {
- SRV_NOT_RAW = 0, /*!< Not a raw partition */
- SRV_NEW_RAW, /*!< A 'newraw' partition, only to be
- initialized */
- SRV_OLD_RAW /*!< An initialized raw partition */
-};
-
-/** Alternatives for the file flush option in Unix; see the InnoDB manual
-about what these mean */
-enum {
- SRV_UNIX_FSYNC = 1, /*!< fsync, the default */
- SRV_UNIX_O_DSYNC, /*!< open log files in O_SYNC mode */
- SRV_UNIX_LITTLESYNC, /*!< do not call os_file_flush()
- when writing data files, but do flush
- after writing to log files */
- SRV_UNIX_NOSYNC, /*!< do not flush after writing */
- SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
- data files. This implies using
- non-buffered IO but still using fsync,
- the reason for which is that some FS
- do not flush meta-data when
- unbuffered IO happens */
- SRV_UNIX_O_DIRECT_NO_FSYNC,
- /*!< do not use fsync() when using
- direct IO i.e.: it can be set to avoid
- the fsync() call that we make when
- using SRV_UNIX_O_DIRECT. However, in
- this case user/DBA should be sure about
- the integrity of the meta-data */
- SRV_UNIX_ALL_O_DIRECT /*!< similar to O_DIRECT, invokes
- os_file_set_nocache() on data and log files.
- This implies using non-buffered IO but still
- using fsync for data but not log files. */
-};
-
-/** Alternatives for file i/o in Windows */
-enum {
- SRV_WIN_IO_NORMAL = 1, /*!< buffered I/O */
- SRV_WIN_IO_UNBUFFERED /*!< unbuffered I/O; this is the default */
-};
-
-/** Alternatives for srv_force_recovery. Non-zero values are intended
-to help the user get a damaged database up so that he can dump intact
-tables and rows with SELECT INTO OUTFILE. The database must not otherwise
-be used with these options! A bigger number below means that all precautions
-of lower numbers are included. */
-enum {
- SRV_FORCE_IGNORE_CORRUPT = 1, /*!< let the server run even if it
- detects a corrupt page */
- SRV_FORCE_NO_BACKGROUND = 2, /*!< prevent the main thread from
- running: if a crash would occur
- in purge, this prevents it */
- SRV_FORCE_NO_TRX_UNDO = 3, /*!< do not run trx rollback after
- recovery */
- SRV_FORCE_NO_IBUF_MERGE = 4, /*!< prevent also ibuf operations:
- if they would cause a crash, better
- not do them */
- SRV_FORCE_NO_UNDO_LOG_SCAN = 5, /*!< do not look at undo logs when
- starting the database: InnoDB will
- treat even incomplete transactions
- as committed */
- SRV_FORCE_NO_LOG_REDO = 6 /*!< do not do the log roll-forward
- in connection with recovery */
-};
-
-/* Alternatives for srv_innodb_stats_method, which could be changed by
-setting innodb_stats_method */
-enum srv_stats_method_name_enum {
- SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as
- equal. This is the default setting
- for innodb_stats_method */
- SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as
- NOT equal. */
- SRV_STATS_NULLS_IGNORED /* NULL values are ignored */
-};
-
-typedef enum srv_stats_method_name_enum srv_stats_method_name_t;
-
-#ifndef UNIV_HOTBACKUP
-/** Types of threads existing in the system. */
-enum srv_thread_type {
- SRV_NONE, /*!< None */
- SRV_WORKER, /*!< threads serving parallelized
- queries and queries released from
- lock wait */
- SRV_PURGE, /*!< Purge coordinator thread */
- SRV_MASTER /*!< the master thread, (whose type
- number must be biggest) */
-};
-
-/*********************************************************************//**
-Boots Innobase server. */
-UNIV_INTERN
-void
-srv_boot(void);
-/*==========*/
-/*********************************************************************//**
-Initializes the server. */
-UNIV_INTERN
-void
-srv_init(void);
-/*==========*/
-/*********************************************************************//**
-Frees the data structures created in srv_init(). */
-UNIV_INTERN
-void
-srv_free(void);
-/*==========*/
-/*********************************************************************//**
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-UNIV_INTERN
-void
-srv_general_init(void);
-/*==================*/
-/*********************************************************************//**
-Sets the info describing an i/o thread current state. */
-UNIV_INTERN
-void
-srv_set_io_thread_op_info(
-/*======================*/
- ulint i, /*!< in: the 'segment' of the i/o thread */
- const char* str); /*!< in: constant char string describing the
- state */
-/*********************************************************************//**
-Resets the info describing an i/o thread current state. */
-UNIV_INTERN
-void
-srv_reset_io_thread_op_info();
-/*=========================*/
-/*******************************************************************//**
-Tells the purge thread that there has been activity in the database
-and wakes up the purge thread if it is suspended (not sleeping). Note
-that there is a small chance that the purge thread stays suspended
-(we do not protect our operation with the srv_sys_t:mutex, for
-performance reasons). */
-UNIV_INTERN
-void
-srv_wake_purge_thread_if_not_active(void);
-/*=====================================*/
-/*******************************************************************//**
-Tells the Innobase server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the kernel
-mutex, for performace reasons). */
-UNIV_INTERN
-void
-srv_active_wake_master_thread(void);
-/*===============================*/
-/*******************************************************************//**
-Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
-void
-srv_wake_master_thread(void);
-/*========================*/
-/******************************************************************//**
-A thread which follows the redo log and outputs the changed page bitmap.
-@return a dummy value */
-extern "C"
-UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_redo_log_follow_thread)(
-/*=======================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-/******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor.
-@return FALSE if not all information printed
-due to failure to obtain necessary mutex */
-UNIV_INTERN
-ibool
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file, /*!< in: output stream */
- ibool nowait, /*!< in: whether to wait for the
- lock_sys_t::mutex */
- ulint* trx_start, /*!< out: file position of the start of
- the list of active transactions */
- ulint* trx_end); /*!< out: file position of the end of
- the list of active transactions */
-
-/******************************************************************//**
-Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
-void
-srv_export_innodb_status(void);
-/*==========================*/
-/*************************************************************//**
-Removes old archived transaction log files.
-Both parameters couldn't be provided at the same time.
-@return DB_SUCCESS on success, otherwise DB_ERROR */
-UNIV_INTERN
-dberr_t
-purge_archived_logs(
- time_t before_date, /*!< in: all files modified
- before timestamp should be removed */
- lsn_t before_lsn); /*!< in: files with this lsn in name
- and earler should be removed */
-/*==========================*/
-/*******************************************************************//**
-Get current server activity count. We don't hold srv_sys::mutex while
-reading this value as it is only used in heuristics.
-@return activity count. */
-UNIV_INTERN
-ulint
-srv_get_activity_count(void);
-/*========================*/
-/*******************************************************************//**
-Check if there has been any activity. Considers background change buffer
-merge as regular server activity unless a non-default
-old_ibuf_merge_activity_count value is passed, in which case the merge will be
-treated as keeping server idle.
-@return FALSE if no change in activity counter. */
-UNIV_INTERN
-ibool
-srv_check_activity(
-/*===============*/
- ulint old_activity_count, /*!< old activity count */
- /*!< old change buffer merge
- activity count, or
- ULINT_UNDEFINED */
- ulint old_ibuf_merge_activity_count = ULINT_UNDEFINED);
-/******************************************************************//**
-Increment the server activity counter. */
-UNIV_INTERN
-void
-srv_inc_activity_count(
-/*===================*/
- bool ibuf_merge_activity = false); /*!< whether this activity bump
- is caused by the background
- change buffer merge */
-
-/**********************************************************************//**
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-UNIV_INTERN
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr); /*!< in: query thread */
-
-/**********************************************************************//**
-Check whether any background thread is active. If so, return the thread
-type.
-@return SRV_NONE if all are are suspended or have exited, thread
-type if any are still active. */
-UNIV_INTERN
-enum srv_thread_type
-srv_get_active_thread_type(void);
-/*============================*/
-
-extern "C" {
-
-/*********************************************************************//**
-A thread which prints the info output by various InnoDB monitors.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_monitor_thread)(
-/*===============================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-
-/*********************************************************************//**
-The master thread controlling the server.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_master_thread)(
-/*==============================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-
-/*************************************************************************
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_error_monitor_thread)(
-/*=====================================*/
- void* arg); /*!< in: a dummy parameter required by
- os_thread_create */
-
-/*********************************************************************//**
-Purge coordinator thread that schedules the purge tasks.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_purge_coordinator_thread)(
-/*=========================================*/
- void* arg MY_ATTRIBUTE((unused))); /*!< in: a dummy parameter
- required by os_thread_create */
-
-/*********************************************************************//**
-Worker thread that reads tasks from the work queue and executes them.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_worker_thread)(
-/*==============================*/
- void* arg MY_ATTRIBUTE((unused))); /*!< in: a dummy parameter
- required by os_thread_create */
-} /* extern "C" */
-
-/**********************************************************************//**
-Get count of tasks in the queue.
-@return number of tasks in queue */
-UNIV_INTERN
-ulint
-srv_get_task_queue_length(void);
-/*===========================*/
-
-/** Ensure that a given number of threads of the type given are running
-(or are already terminated).
-@param[in] type thread type
-@param[in] n number of threads that have to run */
-void
-srv_release_threads(enum srv_thread_type type, ulint n);
-
-/** Wake up the purge threads. */
-UNIV_INTERN
-void
-srv_purge_wakeup();
-
-/** Check whether given space id is undo tablespace id
-@param[in] space_id space id to check
-@return true if it is undo tablespace else false. */
-bool
-srv_is_undo_tablespace(
- ulint space_id);
-
-/** Status variables to be passed to MySQL */
-struct export_var_t{
- ulint innodb_adaptive_hash_hash_searches;
- ulint innodb_adaptive_hash_non_hash_searches;
- ulint innodb_background_log_sync;
- ulint innodb_data_pending_reads; /*!< Pending reads */
- ulint innodb_data_pending_writes; /*!< Pending writes */
- ulint innodb_data_pending_fsyncs; /*!< Pending fsyncs */
- ulint innodb_data_fsyncs; /*!< Number of fsyncs so far */
- ulint innodb_data_read; /*!< Data bytes read */
- ulint innodb_data_writes; /*!< I/O write requests */
- ulint innodb_data_written; /*!< Data bytes written */
- ulint innodb_data_reads; /*!< I/O read requests */
- char innodb_buffer_pool_dump_status[512];/*!< Buf pool dump status */
- char innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
- ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
- ulint innodb_buffer_pool_pages_data; /*!< Data pages */
- ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
- ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
- ulint innodb_buffer_pool_bytes_dirty; /*!< File bytes modified */
- ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */
- ulint innodb_buffer_pool_pages_free; /*!< Free pages */
-#ifdef UNIV_DEBUG
- ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
-#endif /* UNIV_DEBUG */
- ulint innodb_buffer_pool_pages_made_not_young;
- ulint innodb_buffer_pool_pages_made_young;
- ulint innodb_buffer_pool_pages_old;
- ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */
- ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
- ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
- ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
- ulint innodb_buffer_pool_pages_LRU_flushed; /*!< buf_lru_flush_page_count */
- ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
- ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */
- ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
- ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
- ulint innodb_checkpoint_age;
- ulint innodb_checkpoint_max_age;
- ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
- ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
- ulint innodb_deadlocks;
- ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */
- ulint innodb_history_list_length;
- ulint innodb_ibuf_size;
- ulint innodb_ibuf_free_list;
- ulint innodb_ibuf_segment_size;
- ulint innodb_ibuf_merges;
- ulint innodb_ibuf_merged_inserts;
- ulint innodb_ibuf_merged_delete_marks;
- ulint innodb_ibuf_merged_deletes;
- ulint innodb_ibuf_discarded_inserts;
- ulint innodb_ibuf_discarded_delete_marks;
- ulint innodb_ibuf_discarded_deletes;
- ulint innodb_log_waits; /*!< srv_log_waits */
- ulint innodb_log_write_requests; /*!< srv_log_write_requests */
- ulint innodb_log_writes; /*!< srv_log_writes */
- lsn_t innodb_os_log_written; /*!< srv_os_log_written */
- lsn_t innodb_lsn_current;
- lsn_t innodb_lsn_flushed;
- lsn_t innodb_lsn_last_checkpoint;
- ulint innodb_master_thread_active_loops;/*!< srv_main_active_loops */
- ulint innodb_master_thread_idle_loops; /*!< srv_main_idle_loops */
- ib_int64_t innodb_max_trx_id;
- ulint innodb_mem_adaptive_hash;
- ulint innodb_mem_dictionary;
- ulint innodb_mem_total;
- ib_int64_t innodb_mutex_os_waits;
- ib_int64_t innodb_mutex_spin_rounds;
- ib_int64_t innodb_mutex_spin_waits;
- ib_int64_t innodb_oldest_view_low_limit_trx_id;
- ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */
- ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
- ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */
- ulint innodb_page_size; /*!< UNIV_PAGE_SIZE */
- ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */
- ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read*/
- ulint innodb_page0_read; /*!< srv_stats.page0_read */
- ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */
- ib_int64_t innodb_purge_trx_id;
- ib_int64_t innodb_purge_undo_no;
- ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
- ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
- ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
- / 1000 */
- ulint innodb_row_lock_time_avg; /*!< srv_n_lock_wait_time
- / 1000
- / srv_n_lock_wait_count */
- ulint innodb_row_lock_time_max; /*!< srv_n_lock_max_wait_time
- / 1000 */
- ulint innodb_current_row_locks;
- ulint innodb_rows_read; /*!< srv_n_rows_read */
- ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */
- ulint innodb_rows_updated; /*!< srv_n_rows_updated */
- ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
- ulint innodb_system_rows_read; /*!< srv_n_system_rows_read */
- ulint innodb_system_rows_inserted; /*!< srv_n_system_rows_inserted */
- ulint innodb_system_rows_updated; /*!< srv_n_system_rows_updated */
- ulint innodb_system_rows_deleted; /*!< srv_n_system_rows_deleted*/
- ulint innodb_num_open_files; /*!< fil_n_file_opened */
- ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */
- ulint innodb_available_undo_logs; /*!< srv_available_undo_logs */
- ulint innodb_read_views_memory; /*!< srv_read_views_memory */
- ulint innodb_descriptors_memory; /*!< srv_descriptors_memory */
- ib_int64_t innodb_s_lock_os_waits;
- ib_int64_t innodb_s_lock_spin_rounds;
- ib_int64_t innodb_s_lock_spin_waits;
- ib_int64_t innodb_x_lock_os_waits;
- ib_int64_t innodb_x_lock_spin_rounds;
- ib_int64_t innodb_x_lock_spin_waits;
-
- ulint innodb_defragment_compression_failures; /*!< Number of
- defragment re-compression
- failures */
-
- ulint innodb_defragment_failures; /*!< Number of defragment
- failures*/
- ulint innodb_defragment_count; /*!< Number of defragment
- operations*/
-
- ulint innodb_onlineddl_rowlog_rows; /*!< Online alter rows */
- ulint innodb_onlineddl_rowlog_pct_used; /*!< Online alter percentage
- of used row log buffer */
- ulint innodb_onlineddl_pct_progress; /*!< Online alter progress
- */
-
-#ifdef UNIV_DEBUG
- ulint innodb_purge_trx_id_age; /*!< rw_max_trx_id - purged trx_id */
- ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
- - purged view's min trx_id */
-#endif /* UNIV_DEBUG */
- ulint innodb_column_compressed; /*!< srv_column_compressed */
- ulint innodb_column_decompressed; /*!< srv_column_decompressed */
-
- ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect1024;/*!< Number of 1K TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect2048;/*!< Number of 2K TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect8192;/*!< Number of 8K TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect16384;/*!< Number of 16K TRIM
- by page compression */
- ib_int64_t innodb_page_compression_trim_sect32768;/*!< Number of 32K TRIM
- by page compression */
- ib_int64_t innodb_index_pages_written; /*!< Number of index pages
- written */
- ib_int64_t innodb_non_index_pages_written; /*!< Number of non index pages
- written */
- ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
- compressed by page compression */
- ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
- induced by page compression */
- ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
- saved by page compression */
- ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
- decompressed by page
- compression */
- ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
- compression errors */
- ib_int64_t innodb_pages_encrypted; /*!< Number of pages
- encrypted */
- ib_int64_t innodb_pages_decrypted; /*!< Number of pages
- decrypted */
-
- ulint innodb_sec_rec_cluster_reads; /*!< srv_sec_rec_cluster_reads */
- ulint innodb_sec_rec_cluster_reads_avoided;/*!< srv_sec_rec_cluster_reads_avoided */
-
- ulint innodb_encryption_rotation_pages_read_from_cache;
- ulint innodb_encryption_rotation_pages_read_from_disk;
- ulint innodb_encryption_rotation_pages_modified;
- ulint innodb_encryption_rotation_pages_flushed;
- ulint innodb_encryption_rotation_estimated_iops;
- ib_int64_t innodb_encryption_key_requests;
- ib_int64_t innodb_key_rotation_list_length;
-
- ulint innodb_scrub_page_reorganizations;
- ulint innodb_scrub_page_splits;
- ulint innodb_scrub_page_split_failures_underflow;
- ulint innodb_scrub_page_split_failures_out_of_filespace;
- ulint innodb_scrub_page_split_failures_missing_index;
- ulint innodb_scrub_page_split_failures_unknown;
-};
-
-/** Thread slot in the thread table. */
-struct srv_slot_t{
- srv_thread_type type; /*!< thread type: user,
- utility etc. */
- ibool in_use; /*!< TRUE if this slot
- is in use */
- ibool suspended; /*!< TRUE if the thread is
- waiting for the event of this
- slot */
- ib_time_t suspend_time; /*!< time when the thread was
- suspended. Initialized by
- lock_wait_table_reserve_slot()
- for lock wait */
- ulong wait_timeout; /*!< wait time that if exceeded
- the thread will be timed out.
- Initialized by
- lock_wait_table_reserve_slot()
- for lock wait */
- os_event_t event; /*!< event used in suspending
- the thread when it has nothing
- to do */
- que_thr_t* thr; /*!< suspended query thread
- (only used for user threads) */
-};
-
-#else /* !UNIV_HOTBACKUP */
-# define srv_use_adaptive_hash_indexes FALSE
-# define srv_use_native_aio FALSE
-# define srv_numa_interleave FALSE
-# define srv_force_recovery 0UL
-# define srv_set_io_thread_op_info(t,info) ((void) 0)
-# define srv_reset_io_thread_op_info() ((void) 0)
-# define srv_is_being_started 0
-# define srv_win_file_flush_method SRV_WIN_IO_UNBUFFERED
-# define srv_unix_file_flush_method SRV_UNIX_O_DSYNC
-# define srv_start_raw_disk_in_use 0
-# define srv_file_per_table 1
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef WITH_WSREP
-UNIV_INTERN
-void
-wsrep_srv_conc_cancel_wait(
-/*==================*/
- trx_t* trx); /*!< in: transaction object associated with the
- thread */
-#endif /* WITH_WSREP */
-
-#ifndef DBUG_OFF
-/** false before InnoDB monitor has been printed at least once, true
-afterwards */
-extern bool srv_debug_monitor_printed;
-#else
-#define srv_debug_monitor_printed false
-#endif
-
-#endif
diff --git a/storage/xtradb/include/srv0srv.ic b/storage/xtradb/include/srv0srv.ic
deleted file mode 100644
index 53405c06f97..00000000000
--- a/storage/xtradb/include/srv0srv.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/srv0srv.ic
-Server main program
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h
deleted file mode 100644
index b055a9d834f..00000000000
--- a/storage/xtradb/include/srv0start.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/srv0start.h
-Starts the Innobase database server
-
-Created 10/10/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef srv0start_h
-#define srv0start_h
-
-#include "univ.i"
-#include "log0log.h"
-#include "ut0byte.h"
-
-#ifdef __WIN__
-#define SRV_PATH_SEPARATOR '\\'
-#else
-#define SRV_PATH_SEPARATOR '/'
-#endif
-
-/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str); /*!< in/out: null-terminated character string */
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- char* str); /*!< in/out: the data file path string */
-/*********************************************************************//**
-Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
-and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
-void
-srv_free_paths_and_sizes(void);
-/*==========================*/
-/*********************************************************************//**
-Adds a slash or a backslash to the end of a string if it is missing
-and the string is not empty.
-@return string which has the separator if the string is not empty */
-UNIV_INTERN
-char*
-srv_add_path_separator_if_needed(
-/*=============================*/
- char* str); /*!< in: null-terminated character string */
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Starts Innobase and creates a new database if database files
-are not found and the user wants.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-innobase_start_or_create_for_mysql();
-
-/** Shut down InnoDB. */
-UNIV_INTERN
-void
-innodb_shutdown();
-
-/*************************************************************//**
-Copy the file path component of the physical file to parameter. It will
-copy up to and including the terminating path separator.
-@return number of bytes copied or ULINT_UNDEFINED if destination buffer
- is smaller than the path to be copied. */
-UNIV_INTERN
-ulint
-srv_path_copy(
-/*==========*/
- char* dest, /*!< out: destination buffer */
- ulint dest_len, /*!< in: max bytes to copy */
- const char* basedir, /*!< in: base directory */
- const char* table_name) /*!< in: source table name */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-
-/*****************************************************************//**
-Get the meta-data filename from the table name. */
-UNIV_INTERN
-void
-srv_get_meta_data_filename(
-/*======================*/
- dict_table_t* table, /*!< in: table */
- char* filename, /*!< out: filename */
- ulint max_len) /*!< in: filename max length */
- MY_ATTRIBUTE((nonnull));
-
-/** Log sequence number at shutdown */
-extern lsn_t srv_shutdown_lsn;
-/** Log sequence number immediately after startup */
-extern lsn_t srv_start_lsn;
-
-#ifdef HAVE_DARWIN_THREADS
-/** TRUE if the F_FULLFSYNC option is available */
-extern ibool srv_have_fullfsync;
-#endif
-
-/** TRUE if the server is being started */
-extern ibool srv_is_being_started;
-/** TRUE if the server was successfully started */
-extern ibool srv_was_started;
-/** TRUE if the server is being started, before rolling back any
-incomplete transactions */
-extern ibool srv_startup_is_before_trx_rollback_phase;
-
-/** TRUE if a raw partition is in use */
-extern ibool srv_start_raw_disk_in_use;
-
-/** Undo tablespaces starts with space_id. */
-extern ulint srv_undo_space_id_start;
-
-/** Shutdown state */
-enum srv_shutdown_state {
- SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */
- SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in
- logs_empty_and_mark_files_at_shutdown() */
- SRV_SHUTDOWN_FLUSH_PHASE,/*!< At this phase the master and the
- purge threads must have completed their
- work. Once we enter this phase the
- page_cleaner can clean up the buffer
- pool and exit */
- SRV_SHUTDOWN_LAST_PHASE,/*!< Last phase after ensuring that
- the buffer pool can be freed: flush
- all file spaces and close all files */
- SRV_SHUTDOWN_EXIT_THREADS/*!< Exit all threads */
-};
-
-/** Whether any undo log records can be generated */
-extern bool srv_undo_sources;
-
-/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
-SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-extern enum srv_shutdown_state srv_shutdown_state;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Log 'spaces' have id's >= this */
-#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
-
-#endif
diff --git a/storage/xtradb/include/sync0arr.h b/storage/xtradb/include/sync0arr.h
deleted file mode 100644
index 9292026ff13..00000000000
--- a/storage/xtradb/include/sync0arr.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0arr.h
-The wait array used in synchronization primitives
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0arr_h
-#define sync0arr_h
-
-#include "univ.i"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-
-/** Synchonization cell */
-struct sync_cell_t;
-/** Synchronization wait array */
-struct sync_array_t;
-
-/******************************************************************//**
-Get an instance of the sync wait array and reserve a wait array cell
-in the instance for waiting for an object. The event of the cell is
-reset to nonsignalled state.
-If reserving cell of the instance fails, try to get another new
-instance until we can reserve an empty cell of it.
-@return the instance found, never NULL. */
-UNIV_INLINE
-sync_array_t*
-sync_array_get_and_reserve_cell(
-/*============================*/
- void* object, /*!< in: pointer to the object to wait for */
- ulint type, /*!< in: lock request type */
- const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index); /*!< out: index of the reserved cell */
-/******************************************************************//**
-Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state.
-@return true if free cell is found, otherwise false */
-UNIV_INTERN
-bool
-sync_array_reserve_cell(
-/*====================*/
- sync_array_t* arr, /*!< in: wait array */
- void* object, /*!< in: pointer to the object to wait for */
- ulint type, /*!< in: lock request type */
- const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index); /*!< out: index of the reserved cell */
-/******************************************************************//**
-This function should be called when a thread starts to wait on
-a wait array cell. In the debug version this function checks
-if the wait for a semaphore will result in a deadlock, in which
-case prints info and asserts. */
-UNIV_INTERN
-void
-sync_array_wait_event(
-/*==================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index); /*!< in: index of the reserved cell */
-/******************************************************************//**
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
-UNIV_INTERN
-void
-sync_array_free_cell(
-/*=================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index); /*!< in: index of the cell in array */
-/**********************************************************************//**
-Note that one of the wait objects was signalled. */
-UNIV_INTERN
-void
-sync_array_object_signalled(void);
-/*=============================*/
-
-/**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server. */
-UNIV_INTERN
-void
-sync_arr_wake_threads_if_sema_free(void);
-/*====================================*/
-/**********************************************************************//**
-Prints warnings of long semaphore waits to stderr.
-@return TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
-ibool
-sync_array_print_long_waits(
-/*========================*/
- os_thread_id_t* waiter, /*!< out: longest waiting thread */
- const void** sema) /*!< out: longest-waited-for semaphore */
- MY_ATTRIBUTE((nonnull));
-/********************************************************************//**
-Validates the integrity of the wait array. Checks
-that the number of reserved cells equals the count variable. */
-UNIV_INTERN
-void
-sync_array_validate(
-/*================*/
- sync_array_t* arr); /*!< in: sync wait array */
-/**********************************************************************//**
-Prints info of the wait array. */
-UNIV_INTERN
-void
-sync_array_print(
-/*=============*/
- FILE* file); /*!< in: file where to print */
-
-/**********************************************************************//**
-Create the primary system wait array(s), they are protected by an OS mutex */
-UNIV_INTERN
-void
-sync_array_init(
-/*============*/
- ulint n_threads); /*!< in: Number of slots to create */
-/**********************************************************************//**
-Close sync array wait sub-system. */
-UNIV_INTERN
-void
-sync_array_close(void);
-/*==================*/
-
-/**********************************************************************//**
-Get an instance of the sync wait array. */
-UNIV_INTERN
-sync_array_t*
-sync_array_get(void);
-/*================*/
-
-/**********************************************************************//**
-Prints info of the wait array without using any mutexes/semaphores. */
-UNIV_INTERN
-void
-sync_array_print_xtradb(void);
-
-/*****************************************************************//**
-Gets the nth cell in array.
-@return cell */
-UNIV_INTERN
-sync_cell_t*
-sync_array_get_nth_cell(
-/*====================*/
- sync_array_t* arr, /*!< in: sync array */
- ulint n); /*!< in: index */
-
-#ifndef UNIV_NONINL
-#include "sync0arr.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/sync0arr.ic b/storage/xtradb/include/sync0arr.ic
deleted file mode 100644
index 18a46dd0a41..00000000000
--- a/storage/xtradb/include/sync0arr.ic
+++ /dev/null
@@ -1,64 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0arr.ic
-The wait array for synchronization primitives
-
-Inline code
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-/** User configured sync array size */
-extern ulong srv_sync_array_size;
-
-/******************************************************************//**
-Get an instance of the sync wait array and reserve a wait array cell
-in the instance for waiting for an object. The event of the cell is
-reset to nonsignalled state.
-If reserving cell of the instance fails, try to get another new
-instance until we can reserve an empty cell of it.
-@return the instance found, never NULL. */
-UNIV_INLINE
-sync_array_t*
-sync_array_get_and_reserve_cell(
-/*============================*/
- void* object, /*!< in: pointer to the object to wait for */
- ulint type, /*!< in: lock request type */
- const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index) /*!< out: index of the reserved cell */
-{
- sync_array_t* sync_arr;
- bool reserved = false;
-
- for (ulint i = 0; i < srv_sync_array_size && !reserved; ++i) {
- sync_arr = sync_array_get();
- reserved = sync_array_reserve_cell(sync_arr, object, type,
- file, line, index);
- }
-
- /* This won't be true every time, for the loop above may execute
- more than srv_sync_array_size times to reserve a cell.
- But an assertion here makes the code more solid. */
- ut_a(reserved);
-
- return sync_arr;
-}
-
diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h
deleted file mode 100644
index 95d38d3be92..00000000000
--- a/storage/xtradb/include/sync0rw.h
+++ /dev/null
@@ -1,1094 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0rw.h
-The read-write lock (for threads, not for database transactions)
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0rw_h
-#define sync0rw_h
-
-#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-#include "ut0lst.h"
-#include "ut0counter.h"
-#include "sync0sync.h"
-#include "os0sync.h"
-
-/** Enable semaphore request instrumentation */
-extern my_bool srv_instrument_semaphores;
-
-/* The following undef is to prevent a name conflict with a macro
-in MySQL: */
-#undef rw_lock_t
-#endif /* !UNIV_HOTBACKUP */
-
-/** Counters for RW locks. */
-struct rw_lock_stats_t {
- typedef ib_counter_t<ib_int64_t, IB_N_SLOTS> ib_int64_counter_t;
-
- /** number of spin waits on rw-latches,
- resulted during shared (read) locks */
- ib_int64_counter_t rw_s_spin_wait_count;
-
- /** number of spin loop rounds on rw-latches,
- resulted during shared (read) locks */
- ib_int64_counter_t rw_s_spin_round_count;
-
- /** number of OS waits on rw-latches,
- resulted during shared (read) locks */
- ib_int64_counter_t rw_s_os_wait_count;
-
- /** number of unlocks (that unlock shared locks),
- set only when UNIV_SYNC_PERF_STAT is defined */
- ib_int64_counter_t rw_s_exit_count;
-
- /** number of spin waits on rw-latches,
- resulted during exclusive (write) locks */
- ib_int64_counter_t rw_x_spin_wait_count;
-
- /** number of spin loop rounds on rw-latches,
- resulted during exclusive (write) locks */
- ib_int64_counter_t rw_x_spin_round_count;
-
- /** number of OS waits on rw-latches,
- resulted during exclusive (write) locks */
- ib_int64_counter_t rw_x_os_wait_count;
-
- /** number of unlocks (that unlock exclusive locks),
- set only when UNIV_SYNC_PERF_STAT is defined */
- ib_int64_counter_t rw_x_exit_count;
-};
-
-/* Latch types; these are used also in btr0btr.h: keep the numerical values
-smaller than 30 and the order of the numerical values like below! */
-#define RW_S_LATCH 1
-#define RW_X_LATCH 2
-#define RW_NO_LATCH 3
-
-#ifndef UNIV_HOTBACKUP
-/* We decrement lock_word by this amount for each x_lock. It is also the
-start value for the lock_word, meaning that it limits the maximum number
-of concurrent read locks before the rw_lock breaks. The current value of
-0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
-#define X_LOCK_DECR 0x00100000
-
-struct rw_lock_t;
-struct prio_rw_lock_t;
-#ifdef UNIV_SYNC_DEBUG
-struct rw_lock_debug_t;
-#endif /* UNIV_SYNC_DEBUG */
-
-typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t;
-
-extern rw_lock_list_t rw_lock_list;
-extern ib_mutex_t rw_lock_list_mutex;
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-extern os_fast_mutex_t rw_lock_debug_mutex;
-#endif /* UNIV_SYNC_DEBUG */
-
-/** Counters for RW locks. */
-extern rw_lock_stats_t rw_lock_stats;
-
-#ifdef UNIV_PFS_RWLOCK
-/* Following are rwlock keys used to register with MySQL
-performance schema */
-# ifdef UNIV_LOG_ARCHIVE
-extern mysql_pfs_key_t archive_lock_key;
-# endif /* UNIV_LOG_ARCHIVE */
-extern mysql_pfs_key_t btr_search_latch_key;
-extern mysql_pfs_key_t buf_block_lock_key;
-# ifdef UNIV_SYNC_DEBUG
-extern mysql_pfs_key_t buf_block_debug_latch_key;
-# endif /* UNIV_SYNC_DEBUG */
-extern mysql_pfs_key_t dict_operation_lock_key;
-extern mysql_pfs_key_t checkpoint_lock_key;
-extern mysql_pfs_key_t fil_space_latch_key;
-extern mysql_pfs_key_t fts_cache_rw_lock_key;
-extern mysql_pfs_key_t fts_cache_init_rw_lock_key;
-extern mysql_pfs_key_t trx_i_s_cache_lock_key;
-extern mysql_pfs_key_t trx_purge_latch_key;
-extern mysql_pfs_key_t index_tree_rw_lock_key;
-extern mysql_pfs_key_t index_online_log_key;
-extern mysql_pfs_key_t dict_table_stats_key;
-extern mysql_pfs_key_t trx_sys_rw_lock_key;
-extern mysql_pfs_key_t hash_table_rw_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-
-#ifndef UNIV_PFS_RWLOCK
-/******************************************************************//**
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed.
-if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is
-defined, the rwlock are instrumented with performance schema probes. */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_create(K, L, level) \
- rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
-# else /* UNIV_SYNC_DEBUG */
-# define rw_lock_create(K, L, level) \
- rw_lock_create_func((L), #L, __FILE__, __LINE__)
-# endif/* UNIV_SYNC_DEBUG */
-# else /* UNIV_DEBUG */
-# define rw_lock_create(K, L, level) \
- rw_lock_create_func((L), #L, __FILE__, __LINE__)
-# endif /* UNIV_DEBUG */
-
-/**************************************************************//**
-NOTE! The following macros should be used in rw locking and
-unlocking, not the corresponding function. */
-
-# define rw_lock_s_lock(M) \
- rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
-
-# define rw_lock_s_lock_inline(M, P, F, L) \
- rw_lock_s_lock_func((M), (P), (F), (L))
-
-# define rw_lock_s_lock_gen(M, P) \
- rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
-
-# define rw_lock_s_lock_gen_nowait(M, P) \
- rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
-
-# define rw_lock_s_lock_nowait(M, F, L) \
- rw_lock_s_lock_low((M), 0, (F), (L))
-
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L)
-# else
-# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
-# endif
-
-
-# define rw_lock_x_lock(M) \
- rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
-
-# define rw_lock_x_lock_inline(M, P, F, L) \
- rw_lock_x_lock_func((M), (P), (F), (L))
-
-# define rw_lock_x_lock_gen(M, P) \
- rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
-
-# define rw_lock_x_lock_nowait(M) \
- rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
-
-# define rw_lock_x_lock_func_nowait_inline(M, F, L) \
- rw_lock_x_lock_func_nowait((M), (F), (L))
-
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
-# else
-# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
-# endif
-
-# define rw_lock_free(M) rw_lock_free_func(M)
-
-#else /* !UNIV_PFS_RWLOCK */
-
-/* Following macros point to Performance Schema instrumented functions. */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_create(K, L, level) \
- pfs_rw_lock_create_func((K), (L), (level), #L, __FILE__, __LINE__)
-# else /* UNIV_SYNC_DEBUG */
-# define rw_lock_create(K, L, level) \
- pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
-# endif/* UNIV_SYNC_DEBUG */
-# else /* UNIV_DEBUG */
-# define rw_lock_create(K, L, level) \
- pfs_rw_lock_create_func((K), (L), #L, __FILE__, __LINE__)
-# endif /* UNIV_DEBUG */
-
-/******************************************************************
-NOTE! The following macros should be used in rw locking and
-unlocking, not the corresponding function. */
-
-# define rw_lock_s_lock(M) \
- pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
-
-# define rw_lock_s_lock_inline(M, P, F, L) \
- pfs_rw_lock_s_lock_func((M), (P), (F), (L))
-
-# define rw_lock_s_lock_gen(M, P) \
- pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
-
-# define rw_lock_s_lock_gen_nowait(M, P) \
- pfs_rw_lock_s_lock_low((M), (P), __FILE__, __LINE__)
-
-# define rw_lock_s_lock_nowait(M, F, L) \
- pfs_rw_lock_s_lock_low((M), 0, (F), (L))
-
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(P, L)
-# else
-# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(L)
-# endif
-
-# define rw_lock_x_lock(M) \
- pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
-
-# define rw_lock_x_lock_inline(M, P, F, L) \
- pfs_rw_lock_x_lock_func((M), (P), (F), (L))
-
-# define rw_lock_x_lock_gen(M, P) \
- pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
-
-# define rw_lock_x_lock_nowait(M) \
- pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
-
-# define rw_lock_x_lock_func_nowait_inline(M, F, L) \
- pfs_rw_lock_x_lock_func_nowait((M), (F), (L))
-
-# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L)
-# else
-# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(L)
-# endif
-
-# define rw_lock_free(M) pfs_rw_lock_free_func(M)
-
-#endif /* UNIV_PFS_RWLOCK */
-
-#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0)
-#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0)
-
-/******************************************************************//**
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-rw_lock_create_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-/******************************************************************//**
-Creates, or rather, initializes a priority rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-rw_lock_create_func(
-/*================*/
- prio_rw_lock_t* lock, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-/******************************************************************//**
-Calling this function is obligatory only if the memory buffer containing
-the rw-lock is freed. Removes an rw-lock object from the global list. The
-rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
-void
-rw_lock_free_func(
-/*==============*/
- rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Calling this function is obligatory only if the memory buffer containing
-the priority rw-lock is freed. Removes an rw-lock object from the global list.
-The rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
-void
-rw_lock_free_func(
-/*==============*/
- prio_rw_lock_t* lock); /*!< in: rw-lock */
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks.
-@return TRUE */
-UNIV_INTERN
-ibool
-rw_lock_validate(
-/*=============*/
- rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Checks that the priority rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks.
-@return TRUE */
-UNIV_INTERN
-ibool
-rw_lock_validate(
-/*=============*/
- prio_rw_lock_t* lock); /*!< in: rw-lock */
-#endif /* UNIV_DEBUG */
-/******************************************************************//**
-Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_low(
-/*===============*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass MY_ATTRIBUTE((unused)),
- /*!< in: pass value; != 0, if the lock will be
- passed to another thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function, except if
-you supply the file name and line number. Lock an rw-lock in shared mode
-for the current thread. If the rw-lock is locked in exclusive mode, or
-there is an exclusive lock request waiting, the function spins a preset
-time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before
-suspending the thread. */
-UNIV_INLINE
-void
-rw_lock_s_lock_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function, except if
-you supply the file name and line number. Lock a priority rw-lock in shared
-mode for the current thread, using the relative thread priority. If the
-rw-lock is locked in exclusive mode, or there is an exclusive lock request
-waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
-waiting for the lock, before suspending the thread. */
-UNIV_INLINE
-void
-rw_lock_s_lock_func(
-/*================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_func_nowait(
-/*=======================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Releases a shared mode lock. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock); /*!< in/out: rw-lock */
-
-/******************************************************************//**
-Releases a shared mode priority lock. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- prio_rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread. If the rw-lock is locked
-in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock, before suspending the thread. If the same thread has an x-lock
-on the rw-lock, locking succeed, with the following exception: if pass != 0,
-only a single x-lock may be taken on the lock. NOTE: If the same thread has
-an s-lock, locking does not succeed! */
-UNIV_INTERN
-void
-rw_lock_x_lock_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line, /*!< in: line where requested */
- bool priority_lock = false,
- /*!< in: whether the lock is a priority lock */
- bool high_priority = false);
- /*!< in: whether we are acquiring a priority
- lock with high priority */
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock a priority
-rw-lock in exclusive mode for the current thread. If the rw-lock is locked
-in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock, before suspending the thread. If the same thread has an x-lock
-on the rw-lock, locking succeed, with the following exception: if pass != 0,
-only a single x-lock may be taken on the lock. NOTE: If the same thread has
-an s-lock, locking does not succeed! */
-UNIV_INTERN
-void
-rw_lock_x_lock_func(
-/*================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Releases an exclusive mode lock. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-Releases an exclusive mode priority lock. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- prio_rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-This function is used in the insert buffer to move the ownership of an
-x-latch on a buffer frame to the current thread. The x-latch was set by
-the buffer read operation and it protected the buffer frame while the
-read was done. The ownership is moved because we want that the current
-thread is able to acquire a second x-latch which is stored in an mtr.
-This, in turn, is needed to pass the debug checks of index page
-operations. */
-UNIV_INTERN
-void
-rw_lock_x_lock_move_ownership(
-/*==========================*/
- rw_lock_t* lock); /*!< in: lock which was x-locked in the
- buffer read */
-/******************************************************************//**
-Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call.
-@return value of writer_count */
-UNIV_INLINE
-ulint
-rw_lock_get_x_lock_count(
-/*=====================*/
- const rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Returns the value of writer_count for the priority lock. Does not reserve the
-lock mutex, so the caller must be sure it is not changed during the call.
-@return value of writer_count */
-UNIV_INLINE
-ulint
-rw_lock_get_x_lock_count(
-/*=====================*/
- const prio_rw_lock_t* lock); /*!< in: rw-lock */
-/********************************************************************//**
-Check if there are threads waiting for the rw-lock.
-@return 1 if waiters, 0 otherwise */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- const rw_lock_t* lock); /*!< in: rw-lock */
-/********************************************************************//**
-Check if there are threads waiting for the priority rw-lock.
-@return 1 if waiters, 0 otherwise */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- const prio_rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Returns the write-status of the lock - this function made more sense
-with the old rw_lock implementation.
-@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
-UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- const rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Returns the write-status of the priority lock - this function made more sense
-with the old rw_lock implementation.
-@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
-UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- const prio_rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Returns the number of readers.
-@return number of readers */
-UNIV_INLINE
-ulint
-rw_lock_get_reader_count(
-/*=====================*/
- const rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Returns the number of readers.
-@return number of readers */
-UNIV_INLINE
-ulint
-rw_lock_get_reader_count(
-/*=====================*/
- const prio_rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Decrements lock_word the specified amount if it is greater than 0.
-This is used by both s_lock and x_lock operations.
-@return TRUE if decr occurs */
-UNIV_INLINE
-ibool
-rw_lock_lock_word_decr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount); /*!< in: amount to decrement */
-/******************************************************************//**
-Increments lock_word the specified amount and returns new value.
-@return lock->lock_word after increment */
-UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount); /*!< in: amount to increment */
-/******************************************************************//**
-This function sets the lock->writer_thread and lock->recursive fields.
-For platforms where we are using atomic builtins instead of lock->mutex
-it sets the lock->writer_thread field using atomics to ensure memory
-ordering. Note that it is assumed that the caller of this function
-effectively owns the lock i.e.: nobody else is allowed to modify
-lock->writer_thread at this point in time.
-The protocol is that lock->writer_thread MUST be updated BEFORE the
-lock->recursive flag is set. */
-UNIV_INLINE
-void
-rw_lock_set_writer_id_and_recursion_flag(
-/*=====================================*/
- rw_lock_t* lock, /*!< in/out: lock to work on */
- ibool recursive); /*!< in: TRUE if recursion
- allowed */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Checks if the thread has locked the rw-lock in the specified mode, with
-the pass value == 0. */
-UNIV_INTERN
-ibool
-rw_lock_own(
-/*========*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
- MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Checks if the thread has locked the priority rw-lock in the specified mode,
-with the pass value == 0. */
-UNIV_INTERN
-ibool
-rw_lock_own(
-/*========*/
- prio_rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode. */
-UNIV_INTERN
-ibool
-rw_lock_is_locked(
-/*==============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type); /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-#ifdef UNIV_SYNC_DEBUG
-/***************************************************************//**
-Prints debug info of an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_print(
-/*==========*/
- rw_lock_t* lock); /*!< in: rw-lock */
-/***************************************************************//**
-Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
-void
-rw_lock_list_print_info(
-/*====================*/
- FILE* file); /*!< in: file where to print */
-/***************************************************************//**
-Returns the number of currently locked rw-locks.
-Works only in the debug version.
-@return number of locked rw-locks */
-UNIV_INTERN
-ulint
-rw_lock_n_locked(void);
-/*==================*/
-
-/*#####################################################################*/
-
-/******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_enter(void);
-/*===========================*/
-/******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void);
-/*==========================*/
-/*********************************************************************//**
-Prints info of a debug struct. */
-UNIV_INTERN
-void
-rw_lock_debug_print(
-/*================*/
- FILE* f, /*!< in: output stream */
- rw_lock_debug_t* info); /*!< in: debug struct */
-#endif /* UNIV_SYNC_DEBUG */
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! */
-
-/** The structure used in the spin lock implementation of a read-write
-lock. Several threads may have a shared lock simultaneously in this
-lock, but only one writer may have an exclusive lock, in which case no
-shared locks are allowed. To prevent starving of a writer blocked by
-readers, a writer may queue for x-lock by decrementing lock_word: no
-new readers will be let in while the thread waits for readers to
-exit. */
-struct rw_lock_t {
- volatile lint lock_word;
- /*!< Holds the state of the lock. */
- volatile ulint waiters;/*!< 1: there are waiters */
- volatile ibool recursive;/*!< Default value FALSE which means the lock
- is non-recursive. The value is typically set
- to TRUE making normal rw_locks recursive. In
- case of asynchronous IO, when a non-zero
- value of 'pass' is passed then we keep the
- lock non-recursive.
- This flag also tells us about the state of
- writer_thread field. If this flag is set
- then writer_thread MUST contain the thread
- id of the current x-holder or wait-x thread.
- This flag must be reset in x_unlock
- functions before incrementing the lock_word */
- volatile os_thread_id_t writer_thread;
- /*!< Thread id of writer thread. Is only
- guaranteed to have sane and non-stale
- value iff recursive flag is set. */
- struct os_event event; /*!< Used by sync0arr.cc for thread queueing */
- struct os_event wait_ex_event;
- /*!< Event for next-writer to wait on. A thread
- must decrement lock_word before waiting. */
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- ib_mutex_t mutex; /*!< The mutex protecting rw_lock_t */
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-
- UT_LIST_NODE_T(rw_lock_t) list;
- /*!< All allocated rw locks are put into a
- list */
-#ifdef UNIV_SYNC_DEBUG
- UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list;
- /*!< In the debug version: pointer to the debug
- info list of the lock */
- ulint level; /*!< Level in the global latching order. */
-#endif /* UNIV_SYNC_DEBUG */
-#ifdef UNIV_PFS_RWLOCK
- struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
-#endif
- ulint count_os_wait; /*!< Count of os_waits. May not be accurate */
- const char* cfile_name;/*!< File name where lock created */
- const char* lock_name;/*!< lock name */
- os_thread_id_t thread_id;/*!< thread id */
- const char* file_name;/*!< File name where the lock was obtained */
- ulint line; /*!< Line where the rw-lock was locked */
- /* last s-lock file/line is not guaranteed to be correct */
- const char* last_s_file_name;/*!< File name where last s-locked */
- const char* last_x_file_name;/*!< File name where last x-locked */
- ibool writer_is_wait_ex;
- /*!< This is TRUE if the writer field is
- RW_LOCK_WAIT_EX; this field is located far
- from the memory update hotspot fields which
- are at the start of this struct, thus we can
- peek this field without causing much memory
- bus traffic */
- unsigned cline:14; /*!< Line where created */
- unsigned last_s_line:14; /*!< Line number where last time s-locked */
- unsigned last_x_line:14; /*!< Line number where last time x-locked */
-#ifdef UNIV_DEBUG
- ulint magic_n; /*!< RW_LOCK_MAGIC_N */
-/** Value of rw_lock_t::magic_n */
-#define RW_LOCK_MAGIC_N 22643
-#endif /* UNIV_DEBUG */
-};
-
-/** The structure implementing a priority rw lock. */
-struct prio_rw_lock_t {
- struct rw_lock_t base_lock; /* The regular rw latch
- provides the lock word etc. for
- the priority rw lock */
- volatile ulint high_priority_s_waiters;
- /* Number of high priority S
- waiters */
- struct os_event high_priority_s_event; /* High priority wait
- array event for S waiters */
- volatile ulint high_priority_x_waiters;
- /* Number of high priority X
- waiters */
- struct os_event high_priority_x_event;
- /* High priority wait arraay
- event for X waiters */
- volatile ulint high_priority_wait_ex_waiter;
- /* If 1, a waiting next-writer
- exists and is high-priority */
-};
-
-#ifdef UNIV_SYNC_DEBUG
-/** The structure for storing debug info of an rw-lock. All access to this
-structure must be protected by rw_lock_debug_mutex_enter(). */
-struct rw_lock_debug_t {
-
- os_thread_id_t thread_id; /*!< The thread id of the thread which
- locked the rw-lock */
- ulint pass; /*!< Pass value given in the lock operation */
- ulint lock_type; /*!< Type of the lock: RW_LOCK_EX,
- RW_LOCK_SHARED, RW_LOCK_WAIT_EX */
- const char* file_name;/*!< File name where the lock was obtained */
- ulint line; /*!< Line where the rw-lock was locked */
- UT_LIST_NODE_T(rw_lock_debug_t) list;
- /*!< Debug structs are linked in a two-way
- list */
-};
-#endif /* UNIV_SYNC_DEBUG */
-
-/* For performance schema instrumentation, a new set of rwlock
-wrap functions are created if "UNIV_PFS_RWLOCK" is defined.
-The instrumentations are not planted directly into original
-functions, so that we keep the underlying function as they
-are. And in case, user wants to "take out" some rwlock from
-instrumentation even if performance schema (UNIV_PFS_RWLOCK)
-is defined, they can do so by reinstating APIs directly link to
-original underlying functions.
-The instrumented function names have prefix of "pfs_rw_lock_" vs.
-original name prefix of "rw_lock_". Following are list of functions
-that have been instrumented:
-
-rw_lock_create()
-rw_lock_x_lock()
-rw_lock_x_lock_gen()
-rw_lock_x_lock_nowait()
-rw_lock_x_unlock_gen()
-rw_lock_s_lock()
-rw_lock_s_lock_gen()
-rw_lock_s_lock_nowait()
-rw_lock_s_unlock_gen()
-rw_lock_free()
-*/
-
-#ifdef UNIV_PFS_RWLOCK
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_create_func()
-NOTE! Please use the corresponding macro rw_lock_create(), not
-directly this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_create_func(
-/*====================*/
- PSI_rwlock_key key, /*!< in: key registered with
- performance schema */
- rw_lock_t* lock, /*!< in: rw lock */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_create_func()
-NOTE! Please use the corresponding macro rw_lock_create(), not
-directly this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_create_func(
-/*====================*/
- PSI_rwlock_key key, /*!< in: key registered with
- performance schema */
- prio_rw_lock_t* lock, /*!< in: rw lock */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_x_lock_func()
-NOTE! Please use the corresponding macro rw_lock_x_lock(), not
-directly this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_lock_func(
-/*====================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_x_lock_func()
-NOTE! Please use the corresponding macro rw_lock_x_lock(), not
-directly this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_lock_func(
-/*====================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-
-/******************************************************************//**
-Performance schema instrumented wrap function for
-rw_lock_x_lock_func_nowait()
-NOTE! Please use the corresponding macro, not directly this function!
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_rw_lock_x_lock_func_nowait(
-/*===========================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_lock_func()
-NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_s_lock_func(
-/*====================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_lock_func()
-NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_s_lock_func(
-/*====================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_lock_func()
-NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
-this function!
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_rw_lock_s_lock_low(
-/*===================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the
- lock will be passed to another
- thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_lock_func()
-NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
-this function!
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_rw_lock_s_lock_low(
-/*===================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the
- lock will be passed to another
- thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_x_lock_func()
-NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_lock_func(
-/*====================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_x_lock_func()
-NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_lock_func(
-/*====================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
-NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_s_unlock_func(
-/*======================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the
- lock may have been passed to another
- thread to unlock */
-#endif
- rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
-NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_s_unlock_func(
-/*======================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the
- lock may have been passed to another
- thread to unlock */
-#endif
- prio_rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
-NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_unlock_func(
-/*======================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the
- lock may have been passed to another
- thread to unlock */
-#endif
- rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
-NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_unlock_func(
-/*======================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the
- lock may have been passed to another
- thread to unlock */
-#endif
- prio_rw_lock_t* lock); /*!< in/out: rw-lock */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_free_func()
-NOTE! Please use the corresponding macro rw_lock_free(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_free_func(
-/*==================*/
- rw_lock_t* lock); /*!< in: rw-lock */
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_free_func()
-NOTE! Please use the corresponding macro rw_lock_free(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_free_func(
-/*==================*/
- prio_rw_lock_t* lock); /*!< in: rw-lock */
-#endif /* UNIV_PFS_RWLOCK */
-
-
-#ifndef UNIV_NONINL
-#include "sync0rw.ic"
-#endif
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic
deleted file mode 100644
index 91d1e1b0cfc..00000000000
--- a/storage/xtradb/include/sync0rw.ic
+++ /dev/null
@@ -1,1275 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0rw.ic
-The read-write lock (for threads)
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-/******************************************************************//**
-Lock a regular or priority rw-lock in shared mode for the current thread. If
-the rw-lock is locked in exclusive mode, or there is an exclusive lock request
-waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
-waiting for the lock before suspending the thread. */
-UNIV_INTERN
-void
-rw_lock_s_lock_spin(
-/*================*/
- void* _lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- bool priority_lock,
- /*!< in: whether the lock is a priority lock */
- bool high_priority,
- /*!< in: whether we are acquiring a priority
- lock with high priority */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line); /*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Inserts the debug information for an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_add_debug_info(
-/*===================*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint pass, /*!< in: pass value */
- ulint lock_type, /*!< in: lock type */
- const char* file_name, /*!< in: file where requested */
- ulint line); /*!< in: line where requested */
-/******************************************************************//**
-Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_remove_debug_info(
-/*======================*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint pass, /*!< in: pass value */
- ulint lock_type); /*!< in: lock type */
-#endif /* UNIV_SYNC_DEBUG */
-
-/********************************************************************//**
-Check if there are threads waiting for the rw-lock.
-@return 1 if waiters, 0 otherwise */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- return(lock->waiters);
-}
-
-/********************************************************************//**
-Check if there are threads waiting for the priority rw-lock.
-@return 1 if waiters, 0 otherwise */
-UNIV_INLINE
-ulint
-rw_lock_get_waiters(
-/*================*/
- const prio_rw_lock_t* lock) /*!< in: rw-lock */
-{
- return rw_lock_get_waiters(&lock->base_lock)
- || lock->high_priority_s_waiters
- || lock->high_priority_x_waiters;
-}
-
-/********************************************************************//**
-Sets lock->waiters to 1. It is not an error if lock->waiters is already
-1. On platforms where ATOMIC builtins are used this function enforces a
-memory barrier. */
-UNIV_INLINE
-void
-rw_lock_set_waiter_flag(
-/*====================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- (void) os_compare_and_swap_ulint(&lock->waiters, 0, 1);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- lock->waiters = 1;
- os_wmb;
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/********************************************************************//**
-Resets lock->waiters to 0. It is not an error if lock->waiters is already
-0. On platforms where ATOMIC builtins are used this function enforces a
-memory barrier. */
-UNIV_INLINE
-void
-rw_lock_reset_waiter_flag(
-/*======================*/
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- (void) os_compare_and_swap_ulint(&lock->waiters, 1, 0);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- lock->waiters = 0;
- os_wmb;
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-Returns the write-status of the lock - this function made more sense
-with the old rw_lock implementation.
-@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
-UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- lint lock_word = lock->lock_word;
- if (lock_word > 0) {
- /* return NOT_LOCKED in s-lock state, like the writer
- member of the old lock implementation. */
- return(RW_LOCK_NOT_LOCKED);
- } else if ((lock_word == 0) || (lock_word <= -X_LOCK_DECR)) {
- return(RW_LOCK_EX);
- } else {
- ut_ad(lock_word > -X_LOCK_DECR);
- return(RW_LOCK_WAIT_EX);
- }
-}
-
-/******************************************************************//**
-Returns the write-status of the priority lock - this function made more sense
-with the old rw_lock implementation.
-@return RW_LOCK_NOT_LOCKED, RW_LOCK_EX, RW_LOCK_WAIT_EX */
-UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
- const prio_rw_lock_t* lock) /*!< in: rw-lock */
-{
- return(rw_lock_get_writer(&lock->base_lock));
-}
-
-/******************************************************************//**
-Returns the number of readers.
-@return number of readers */
-UNIV_INLINE
-ulint
-rw_lock_get_reader_count(
-/*=====================*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- lint lock_word = lock->lock_word;
- if (lock_word > 0) {
- /* s-locked, no x-waiters */
- return(X_LOCK_DECR - lock_word);
- } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
- /* s-locked, with x-waiters */
- return((ulint)(-lock_word));
- }
- return(0);
-}
-
-/******************************************************************//**
-Returns the number of readers.
-@return number of readers */
-UNIV_INLINE
-ulint
-rw_lock_get_reader_count(
-/*=====================*/
- const prio_rw_lock_t* lock) /*!< in: rw-lock */
-{
- return(rw_lock_get_reader_count(&lock->base_lock));
-}
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
-UNIV_INLINE
-ib_mutex_t*
-rw_lock_get_mutex(
-/*==============*/
- rw_lock_t* lock)
-{
- return(&(lock->mutex));
-}
-#endif
-
-/******************************************************************//**
-Returns the value of writer_count for the lock. Does not reserve the lock
-mutex, so the caller must be sure it is not changed during the call.
-@return value of writer_count */
-UNIV_INLINE
-ulint
-rw_lock_get_x_lock_count(
-/*=====================*/
- const rw_lock_t* lock) /*!< in: rw-lock */
-{
- lint lock_copy = lock->lock_word;
- if ((lock_copy != 0) && (lock_copy > -X_LOCK_DECR)) {
- return(0);
- }
- return((lock_copy == 0) ? 1 : (2 - (lock_copy + X_LOCK_DECR)));
-}
-
-/******************************************************************//**
-Returns the value of writer_count for the priority lock. Does not reserve the
-lock mutex, so the caller must be sure it is not changed during the call.
-@return value of writer_count */
-UNIV_INLINE
-ulint
-rw_lock_get_x_lock_count(
-/*=====================*/
- const prio_rw_lock_t* lock) /*!< in: rw-lock */
-{
- return(rw_lock_get_x_lock_count(&lock->base_lock));
-}
-
-/******************************************************************//**
-Two different implementations for decrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others. This does
-does not support recusive x-locks: they should be handled by the caller and
-need not be atomic since they are performed by the current lock holder.
-Returns true if the decrement was made, false if not.
-@return TRUE if decr occurs */
-UNIV_INLINE
-ibool
-rw_lock_lock_word_decr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount) /*!< in: amount to decrement */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- lint local_lock_word;
-
- os_rmb;
- local_lock_word = lock->lock_word;
- while (local_lock_word > 0) {
- if (os_compare_and_swap_lint(&lock->lock_word,
- local_lock_word,
- local_lock_word - amount)) {
- return(TRUE);
- }
- local_lock_word = lock->lock_word;
- }
- return(FALSE);
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- ibool success = FALSE;
- mutex_enter(&(lock->mutex));
- if (lock->lock_word > 0) {
- lock->lock_word -= amount;
- success = TRUE;
- }
- mutex_exit(&(lock->mutex));
- return(success);
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-Increments lock_word the specified amount and returns new value.
-@return lock->lock_word after increment */
-UNIV_INLINE
-lint
-rw_lock_lock_word_incr(
-/*===================*/
- rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount) /*!< in: amount of increment */
-{
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- return(os_atomic_increment_lint(&lock->lock_word, amount));
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
- lint local_lock_word;
-
- mutex_enter(&(lock->mutex));
-
- lock->lock_word += amount;
- local_lock_word = lock->lock_word;
-
- mutex_exit(&(lock->mutex));
-
- return(local_lock_word);
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-This function sets the lock->writer_thread and lock->recursive fields.
-For platforms where we are using atomic builtins instead of lock->mutex
-it sets the lock->writer_thread field using atomics to ensure memory
-ordering. Note that it is assumed that the caller of this function
-effectively owns the lock i.e.: nobody else is allowed to modify
-lock->writer_thread at this point in time.
-The protocol is that lock->writer_thread MUST be updated BEFORE the
-lock->recursive flag is set. */
-UNIV_INLINE
-void
-rw_lock_set_writer_id_and_recursion_flag(
-/*=====================================*/
- rw_lock_t* lock, /*!< in/out: lock to work on */
- ibool recursive) /*!< in: TRUE if recursion
- allowed */
-{
- os_thread_id_t curr_thread = os_thread_get_curr_id();
-
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- os_thread_id_t local_thread;
- ibool success;
-
- /* Prevent Valgrind warnings about writer_thread being
- uninitialized. It does not matter if writer_thread is
- uninitialized, because we are comparing writer_thread against
- itself, and the operation should always succeed. */
- UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread);
-
- local_thread = lock->writer_thread;
- success = os_compare_and_swap_thread_id(
- &lock->writer_thread, local_thread, curr_thread);
- ut_a(success);
- lock->recursive = recursive;
-
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-
- mutex_enter(&lock->mutex);
- lock->writer_thread = curr_thread;
- lock->recursive = recursive;
- mutex_exit(&lock->mutex);
-
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-Low-level function which tries to lock an rw-lock in s-mode. Performs no
-spinning.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_low(
-/*===============*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass MY_ATTRIBUTE((unused)),
- /*!< in: pass value; != 0, if the lock will be
- passed to another thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- if (!rw_lock_lock_word_decr(lock, 1)) {
- /* Locking did not succeed */
- return(FALSE);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
-#endif
- /* These debugging values are not set safely: they may be incorrect
- or even refer to a line that is invalid for the file name. */
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
- if (srv_instrument_semaphores) {
- lock->thread_id = os_thread_get_curr_id();
- lock->file_name = file_name;
- lock->line = line;
- }
-
- return(TRUE); /* locking succeeded */
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in shared mode for the current thread. If the rw-lock is locked
-in exclusive mode, or there is an exclusive lock request waiting, the
-function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for
-the lock, before suspending the thread. */
-UNIV_INLINE
-void
-rw_lock_s_lock_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- /* NOTE: As we do not know the thread ids for threads which have
- s-locked a latch, and s-lockers will be served only after waiting
- x-lock requests have been fulfilled, then if this thread already
- owns an s-lock here, it may end up in a deadlock with another thread
- which requests an x-lock here. Therefore, we will forbid recursive
- s-locking of a latch: the following assert will warn the programmer
- of the possibility of this kind of a deadlock. If we want to implement
- safe recursive s-locking, we should keep in a list the thread ids of
- the threads which have s-locked a latch. This would use some CPU
- time. */
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
-
- return; /* Success */
- } else {
- /* Did not succeed, try spin wait */
-
- rw_lock_s_lock_spin(lock, pass, false, false, file_name, line);
-
- return;
- }
-}
-
-/******************************************************************//**
-Return true if waiters of higher priority than the current thread
-exist.
-@true if waiterss of higher priority exist */
-UNIV_INLINE
-bool
-rw_lock_higher_prio_waiters_exist(
-/*==============================*/
- bool priority_lock, /*!< in: whether the lock is a priority lock */
- bool high_priority, /*!< in: whether we are acquiring a priority
- lock with high priority */
- void* lock) /*!< in: rw lock */
-{
- if (high_priority || !priority_lock) {
- ut_ad(!(!priority_lock && high_priority));
- return(false);
- }
-
- ut_ad(priority_lock && !high_priority);
-
- prio_rw_lock_t *prio_rw_lock = (prio_rw_lock_t *) lock;
- return prio_rw_lock->high_priority_wait_ex_waiter > 0
- || prio_rw_lock->high_priority_s_waiters > 0
- || prio_rw_lock->high_priority_x_waiters > 0;
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function, except if
-you supply the file name and line number. Lock a priority rw-lock in shared
-mode for the current thread, using the relative thread priority. If the
-rw-lock is locked in exclusive mode, or there is an exclusive lock request
-waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
-waiting for the lock, before suspending the thread. */
-UNIV_INLINE
-void
-rw_lock_s_lock_func(
-/*================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
- ut_ad(!rw_lock_own(lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- bool high_priority = srv_current_thread_priority > 0;
-
- /* Do not attempt to acquire a low-priority S latch if there are
- high-priority waiters even if such attempt would be successful. This
- is to prevent a high priority X request from being starved by a
- sequence of overlapping regular priority S requests. */
-
- if (!rw_lock_higher_prio_waiters_exist(true, high_priority, lock)
- && rw_lock_s_lock_low(&lock->base_lock, pass, file_name, line)) {
-
- return; /* Success */
- } else {
- /* Did not succeed, try spin wait */
- rw_lock_s_lock_spin(lock, pass, true, high_priority, file_name,
- line);
-
- return;
- }
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately.
-@return TRUE if success */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_func_nowait(
-/*=======================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- ibool success;
- ibool local_recursive= lock->recursive;
-
-#ifdef INNODB_RW_LOCKS_USE_ATOMICS
- success = os_compare_and_swap_lint(&lock->lock_word, X_LOCK_DECR, 0);
-#else
-
- success = FALSE;
- mutex_enter(&(lock->mutex));
- if (lock->lock_word == X_LOCK_DECR) {
- lock->lock_word = 0;
- success = TRUE;
- }
- mutex_exit(&(lock->mutex));
-
-#endif
- /* Note: recursive must be loaded before writer_thread see
- comment for rw_lock_set_writer_id_and_recursion_flag().
- To achieve this we load it before os_compare_and_swap_lint(),
- which implies full memory barrier in current implementation. */
- if (success) {
- rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
-
- } else if (local_recursive
- && os_thread_eq(lock->writer_thread,
- os_thread_get_curr_id())) {
- /* Relock: this lock_word modification is safe since no other
- threads can modify (lock, unlock, or reserve) lock_word while
- there is an exclusive writer and this is the writer thread. */
- if (lock->lock_word == 0) {
- lock->lock_word = -X_LOCK_DECR;
- } else {
- lock->lock_word--;
- }
-
- /* Watch for too many recursive locks */
- ut_ad(lock->lock_word < 0);
-
- } else {
- /* Failure */
- return(FALSE);
- }
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
-
- if (srv_instrument_semaphores) {
- lock->thread_id = os_thread_get_curr_id();
- lock->file_name = file_name;
- lock->line = line;
- }
-
- lock->last_x_file_name = file_name;
- lock->last_x_line = line;
-
- ut_ad(rw_lock_validate(lock));
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Releases a shared mode lock. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- ut_ad(lock->lock_word > -X_LOCK_DECR);
- ut_ad(lock->lock_word != 0);
- ut_ad(lock->lock_word < X_LOCK_DECR);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
-#endif
-
- /* Increment lock_word to indicate 1 less reader */
- if (rw_lock_lock_word_incr(lock, 1) == 0) {
-
- /* wait_ex waiter exists. It may not be asleep, but we signal
- anyway. We do not wake other waiters, because they can't
- exist without wait_ex waiter and wait_ex waiter goes first.*/
- os_event_set(&lock->wait_ex_event);
- sync_array_object_signalled();
-
- }
-
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_s_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Releases a shared mode priority lock. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- prio_rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- lint lock_word;
-
- ut_ad(lock->base_lock.lock_word > -X_LOCK_DECR);
- ut_ad(lock->base_lock.lock_word != 0);
- ut_ad(lock->base_lock.lock_word < X_LOCK_DECR);
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(&lock->base_lock, pass, RW_LOCK_SHARED);
-#endif
-
- /* Increment lock_word to indicate 1 less reader */
- lock_word = rw_lock_lock_word_incr(&lock->base_lock, 1);
- if (lock_word == 0) {
-
- /* A waiting next-writer exists, either high priority or
- regular, sharing the same wait event. */
- os_event_set(&lock->base_lock.wait_ex_event);
- sync_array_object_signalled();
-
- } else if (lock_word == X_LOCK_DECR) {
-
- /* S-waiters may exist during an S unlock if a high-priority
- thread released it, because low-priority threads are prevented
- from acquiring S lock while high-priority thread holds it. */
- if (lock->base_lock.waiters) {
-
- rw_lock_reset_waiter_flag(&lock->base_lock);
- os_event_set(&lock->base_lock.event);
- sync_array_object_signalled();
- }
- }
-
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_s_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Prepares an exclusive mode lock release: resets the recursion flag and removes
-the debug information if needed and returns the required lock word increment
-value.
-@return lock word increment value to perform the unlock */
-UNIV_INLINE
-ulint
-rw_lock_x_prepare_unlock(
-/*=====================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- ut_ad(lock->lock_word == 0 || lock->lock_word <= -X_LOCK_DECR);
-
- /* lock->recursive flag also indicates if lock->writer_thread is
- valid or stale. If we are the last of the recursive callers
- then we must unset lock->recursive flag to indicate that the
- lock->writer_thread is now stale.
- Note that since we still hold the x-lock we can safely read the
- lock_word. */
- if (lock->lock_word == 0) {
- /* Last caller in a possible recursive chain. */
- lock->recursive = FALSE;
- }
-
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
-#endif
-
- ulint x_lock_incr;
- if (lock->lock_word == 0) {
- x_lock_incr = X_LOCK_DECR;
- } else if (lock->lock_word == -X_LOCK_DECR) {
- x_lock_incr = X_LOCK_DECR;
- } else {
- ut_ad(lock->lock_word < -X_LOCK_DECR);
- x_lock_incr = 1;
- }
-
- return(x_lock_incr);
-}
-
-/******************************************************************//**
-Releases an exclusive mode lock. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- ulint x_lock_incr = rw_lock_x_prepare_unlock(
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- lock);
-
- if (rw_lock_lock_word_incr(lock, x_lock_incr) == X_LOCK_DECR) {
- /* Lock is now free. May have to signal read/write waiters.
- We do not need to signal wait_ex waiters, since they cannot
- exist when there is a writer. */
-
- if (lock->waiters) {
- rw_lock_reset_waiter_flag(lock);
- os_event_set(&lock->event);
- sync_array_object_signalled();
- }
- }
-
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_x_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Releases an exclusive mode priority lock. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_func(
-/*==================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock may have
- been passed to another thread to unlock */
-#endif
- prio_rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- ulint x_lock_incr = rw_lock_x_prepare_unlock(
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- &lock->base_lock);
-
- ut_ad(lock->high_priority_wait_ex_waiter == 0);
-
- if (rw_lock_lock_word_incr(&lock->base_lock, x_lock_incr)
- == X_LOCK_DECR) {
-
- /* Priority lock is now free. Signal any waiters in this
- order: 1) high priority X waiters; 2) high priority S waiters;
- 3) regular priority waiters.
- We do not need to signal wait_ex waiters, since they cannot
- exist when there is a writer. */
-
- if (lock->high_priority_x_waiters) {
-
- os_event_set(&lock->high_priority_x_event);
- sync_array_object_signalled();
- } else if (lock->high_priority_s_waiters) {
-
- os_event_set(&lock->high_priority_s_event);
- sync_array_object_signalled();
- } else if (lock->base_lock.waiters) {
-
- rw_lock_reset_waiter_flag(&lock->base_lock);
- os_event_set(&lock->base_lock.event);
- sync_array_object_signalled();
- }
- }
-
- ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
- rw_x_exit_count++;
-#endif
-}
-
-#ifdef UNIV_PFS_RWLOCK
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_create_func().
-NOTE! Please use the corresponding macro rw_lock_create(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_create_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: key registered with
- performance schema */
- rw_lock_t* lock, /*!< in: pointer to memory */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
-{
- /* Initialize the rwlock for performance schema */
- lock->pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
-
- /* The actual function to initialize an rwlock */
- rw_lock_create_func(lock,
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- level,
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- cmutex_name,
- cfile_name,
- cline);
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_create_func().
-NOTE! Please use the corresponding macro rw_lock_create(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_create_func(
-/*====================*/
- mysql_pfs_key_t key, /*!< in: key registered with
- performance schema */
- prio_rw_lock_t* lock, /*!< in: pointer to memory */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
-{
- /* Initialize the rwlock for performance schema */
- lock->base_lock.pfs_psi = PSI_RWLOCK_CALL(init_rwlock)(key, lock);
-
- /* The actual function to initialize an rwlock */
- rw_lock_create_func(lock,
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- level,
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- cmutex_name,
- cfile_name,
- cline);
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_x_lock_func()
-NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_lock_func(
-/*====================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- if (lock->pfs_psi != NULL)
- {
- PSI_rwlock_locker* locker;
- PSI_rwlock_locker_state state;
-
- /* Record the entry of rw x lock request in performance schema */
- locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
- &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
- file_name, static_cast<uint>(line));
-
- rw_lock_x_lock_func(
- lock, pass, file_name, static_cast<uint>(line));
-
- if (locker != NULL) {
- PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
- }
- }
- else
- {
- rw_lock_x_lock_func(lock, pass, file_name, line);
- }
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_x_lock_func()
-NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_lock_func(
-/*====================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- if (lock->base_lock.pfs_psi != NULL)
- {
- PSI_rwlock_locker* locker;
- PSI_rwlock_locker_state state;
-
- /* Record the entry of rw x lock request in performance schema */
- locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
- &state, lock->base_lock.pfs_psi, PSI_RWLOCK_WRITELOCK,
- file_name, line);
-
- rw_lock_x_lock_func(lock, pass, file_name, line);
-
- if (locker != NULL) {
- PSI_RWLOCK_CALL(end_rwlock_wrwait)(locker, 0);
- }
- }
- else
- {
- rw_lock_x_lock_func(lock, pass, file_name, line);
- }
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for
-rw_lock_x_lock_func_nowait()
-NOTE! Please use the corresponding macro rw_lock_x_lock_func(),
-not directly this function!
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_rw_lock_x_lock_func_nowait(
-/*===========================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- const char* file_name,/*!< in: file name where lock
- requested */
- ulint line) /*!< in: line where requested */
-{
- ibool ret;
-
- if (lock->pfs_psi != NULL)
- {
- PSI_rwlock_locker* locker;
- PSI_rwlock_locker_state state;
-
- /* Record the entry of rw x lock request in performance schema */
- locker = PSI_RWLOCK_CALL(start_rwlock_wrwait)(
- &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK,
- file_name, static_cast<uint>(line));
-
- ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
-
- if (locker != NULL) {
- PSI_RWLOCK_CALL(end_rwlock_wrwait)(
- locker, static_cast<int>(ret));
- }
- }
- else
- {
- ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
- }
-
- return(ret);
-}
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_free_func()
-NOTE! Please use the corresponding macro rw_lock_free(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_free_func(
-/*==================*/
- rw_lock_t* lock) /*!< in: pointer to rw-lock */
-{
- if (lock->pfs_psi != NULL)
- {
- PSI_RWLOCK_CALL(destroy_rwlock)(lock->pfs_psi);
- lock->pfs_psi = NULL;
- }
-
- rw_lock_free_func(lock);
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_free_func()
-NOTE! Please use the corresponding macro rw_lock_free(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_free_func(
-/*==================*/
- prio_rw_lock_t* lock) /*!< in: pointer to rw-lock */
-{
- if (lock->base_lock.pfs_psi != NULL)
- {
- PSI_RWLOCK_CALL(destroy_rwlock)(lock->base_lock.pfs_psi);
- lock->base_lock.pfs_psi = NULL;
- }
-
- rw_lock_free_func(lock);
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_lock_func()
-NOTE! Please use the corresponding macro rw_lock_s_lock(), not
-directly this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_s_lock_func(
-/*====================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the
- lock will be passed to another
- thread to unlock */
- const char* file_name,/*!< in: file name where lock
- requested */
- ulint line) /*!< in: line where requested */
-{
- if (lock->pfs_psi != NULL)
- {
- PSI_rwlock_locker* locker;
- PSI_rwlock_locker_state state;
-
- /* Instrumented to inform we are aquiring a shared rwlock */
- locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
- &state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
- file_name, static_cast<uint>(line));
-
- rw_lock_s_lock_func(lock, pass, file_name, line);
-
- if (locker != NULL) {
- PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
- }
- }
- else
- {
- rw_lock_s_lock_func(lock, pass, file_name, line);
- }
-
- return;
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_lock_func()
-NOTE! Please use the corresponding macro rw_lock_s_lock(), not
-directly this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_s_lock_func(
-/*====================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the
- lock will be passed to another
- thread to unlock */
- const char* file_name,/*!< in: file name where lock
- requested */
- ulint line) /*!< in: line where requested */
-{
- if (lock->base_lock.pfs_psi != NULL)
- {
- PSI_rwlock_locker* locker;
- PSI_rwlock_locker_state state;
-
- /* Instrumented to inform we are aquiring a shared rwlock */
- locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
- &state, lock->base_lock.pfs_psi, PSI_RWLOCK_READLOCK,
- file_name, line);
-
- rw_lock_s_lock_func(lock, pass, file_name, line);
-
- if (locker != NULL) {
- PSI_RWLOCK_CALL(end_rwlock_rdwait)(locker, 0);
- }
- }
- else
- {
- rw_lock_s_lock_func(lock, pass, file_name, line);
- }
-
- return;
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_lock_func()
-NOTE! Please use the corresponding macro rw_lock_s_lock(), not
-directly this function!
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_rw_lock_s_lock_low(
-/*===================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the
- lock will be passed to another
- thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- ibool ret;
-
- if (lock->pfs_psi != NULL)
- {
- PSI_rwlock_locker* locker;
- PSI_rwlock_locker_state state;
-
- /* Instrumented to inform we are aquiring a shared rwlock */
- locker = PSI_RWLOCK_CALL(start_rwlock_rdwait)(
- &state, lock->pfs_psi, PSI_RWLOCK_READLOCK,
- file_name, static_cast<uint>(line));
-
- ret = rw_lock_s_lock_low(lock, pass, file_name, line);
-
- if (locker != NULL) {
- PSI_RWLOCK_CALL(end_rwlock_rdwait)(
- locker, static_cast<int>(ret));
- }
- }
- else
- {
- ret = rw_lock_s_lock_low(lock, pass, file_name, line);
- }
-
- return(ret);
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_lock_func()
-NOTE! Please use the corresponding macro rw_lock_s_lock(), not
-directly this function!
-@return TRUE if success */
-UNIV_INLINE
-ibool
-pfs_rw_lock_s_lock_low(
-/*===================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the
- lock will be passed to another
- thread to unlock */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- return(pfs_rw_lock_s_lock_low(&lock->base_lock, pass,
- file_name, line));
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_x_unlock_func()
-NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_unlock_func(
-/*======================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the
- lock may have been passed to another
- thread to unlock */
-#endif
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- /* Inform performance schema we are unlocking the lock */
- if (lock->pfs_psi != NULL)
- PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
-
- rw_lock_x_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- lock);
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_x_unlock_func()
-NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
-this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_x_unlock_func(
-/*======================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the
- lock may have been passed to another
- thread to unlock */
-#endif
- prio_rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- /* Inform performance schema we are unlocking the lock */
- if (lock->base_lock.pfs_psi != NULL)
- PSI_RWLOCK_CALL(unlock_rwlock)(lock->base_lock.pfs_psi);
-
- rw_lock_x_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- lock);
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
-NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not
-directly this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_s_unlock_func(
-/*======================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the
- lock may have been passed to another
- thread to unlock */
-#endif
- rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- /* Inform performance schema we are unlocking the lock */
- if (lock->pfs_psi != NULL)
- PSI_RWLOCK_CALL(unlock_rwlock)(lock->pfs_psi);
-
- rw_lock_s_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- lock);
-
-}
-
-/******************************************************************//**
-Performance schema instrumented wrap function for rw_lock_s_unlock_func()
-NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not
-directly this function! */
-UNIV_INLINE
-void
-pfs_rw_lock_s_unlock_func(
-/*======================*/
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the
- lock may have been passed to another
- thread to unlock */
-#endif
- prio_rw_lock_t* lock) /*!< in/out: rw-lock */
-{
- /* Inform performance schema we are unlocking the lock */
- if (lock->base_lock.pfs_psi != NULL)
- PSI_RWLOCK_CALL(unlock_rwlock)(lock->base_lock.pfs_psi);
-
- rw_lock_s_unlock_func(
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- lock);
-
-}
-
-#endif /* UNIV_PFS_RWLOCK */
diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h
deleted file mode 100644
index af445aeb84c..00000000000
--- a/storage/xtradb/include/sync0sync.h
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2012, Facebook Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0sync.h
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0sync_h
-#define sync0sync_h
-
-#include "univ.i"
-#include "sync0types.h"
-#include "ut0lst.h"
-#include "ut0mem.h"
-#include "os0thread.h"
-#include "os0sync.h"
-#include "sync0arr.h"
-#include "ut0counter.h"
-
-/** Enable semaphore request instrumentation */
-extern my_bool srv_instrument_semaphores;
-
-#if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
-extern "C" my_bool timed_mutexes;
-#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-
-#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
-
-/* By default, buffer mutexes and rwlocks will be excluded from
-instrumentation due to their large number of instances. */
-# define PFS_SKIP_BUFFER_MUTEX_RWLOCK
-
-/* By default, event->mutex will also be excluded from instrumentation */
-# define PFS_SKIP_EVENT_MUTEX
-
-#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-/* Key defines to register InnoDB mutexes with performance schema */
-extern mysql_pfs_key_t autoinc_mutex_key;
-extern mysql_pfs_key_t buffer_block_mutex_key;
-extern mysql_pfs_key_t buf_pool_zip_mutex_key;
-extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
-extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
-extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
-extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
-extern mysql_pfs_key_t buf_pool_flush_state_mutex_key;
-extern mysql_pfs_key_t cache_last_read_mutex_key;
-extern mysql_pfs_key_t dict_foreign_err_mutex_key;
-extern mysql_pfs_key_t dict_sys_mutex_key;
-extern mysql_pfs_key_t file_format_max_mutex_key;
-extern mysql_pfs_key_t fil_system_mutex_key;
-extern mysql_pfs_key_t flush_list_mutex_key;
-extern mysql_pfs_key_t fts_bg_threads_mutex_key;
-extern mysql_pfs_key_t fts_delete_mutex_key;
-extern mysql_pfs_key_t fts_optimize_mutex_key;
-extern mysql_pfs_key_t fts_doc_id_mutex_key;
-extern mysql_pfs_key_t fts_pll_tokenize_mutex_key;
-extern mysql_pfs_key_t hash_table_mutex_key;
-extern mysql_pfs_key_t ibuf_bitmap_mutex_key;
-extern mysql_pfs_key_t ibuf_mutex_key;
-extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
-extern mysql_pfs_key_t log_bmp_sys_mutex_key;
-extern mysql_pfs_key_t log_sys_mutex_key;
-extern mysql_pfs_key_t log_flush_order_mutex_key;
-# ifndef HAVE_ATOMIC_BUILTINS
-extern mysql_pfs_key_t server_mutex_key;
-# endif /* !HAVE_ATOMIC_BUILTINS */
-# ifdef UNIV_MEM_DEBUG
-extern mysql_pfs_key_t mem_hash_mutex_key;
-# endif /* UNIV_MEM_DEBUG */
-extern mysql_pfs_key_t mem_pool_mutex_key;
-extern mysql_pfs_key_t mutex_list_mutex_key;
-extern mysql_pfs_key_t purge_sys_bh_mutex_key;
-extern mysql_pfs_key_t recv_sys_mutex_key;
-extern mysql_pfs_key_t recv_writer_mutex_key;
-extern mysql_pfs_key_t rseg_mutex_key;
-# ifdef UNIV_SYNC_DEBUG
-extern mysql_pfs_key_t rw_lock_debug_mutex_key;
-# endif /* UNIV_SYNC_DEBUG */
-extern mysql_pfs_key_t rw_lock_list_mutex_key;
-extern mysql_pfs_key_t rw_lock_mutex_key;
-extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
-extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
-extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
-extern mysql_pfs_key_t srv_threads_mutex_key;
-extern mysql_pfs_key_t srv_monitor_file_mutex_key;
-# ifdef UNIV_SYNC_DEBUG
-extern mysql_pfs_key_t sync_thread_mutex_key;
-# endif /* UNIV_SYNC_DEBUG */
-extern mysql_pfs_key_t buf_dblwr_mutex_key;
-extern mysql_pfs_key_t trx_undo_mutex_key;
-extern mysql_pfs_key_t trx_mutex_key;
-extern mysql_pfs_key_t lock_sys_mutex_key;
-extern mysql_pfs_key_t lock_sys_wait_mutex_key;
-extern mysql_pfs_key_t trx_sys_mutex_key;
-extern mysql_pfs_key_t srv_sys_mutex_key;
-extern mysql_pfs_key_t srv_sys_tasks_mutex_key;
-#ifndef HAVE_ATOMIC_BUILTINS
-extern mysql_pfs_key_t srv_conc_mutex_key;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-#ifndef HAVE_ATOMIC_BUILTINS_64
-extern mysql_pfs_key_t monitor_mutex_key;
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
-extern mysql_pfs_key_t event_os_mutex_key;
-extern mysql_pfs_key_t ut_list_mutex_key;
-extern mysql_pfs_key_t os_mutex_key;
-extern mysql_pfs_key_t zip_pad_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void);
-/*===========*/
-/******************************************************************//**
-Frees the resources in synchronization data structures. */
-UNIV_INTERN
-void
-sync_close(void);
-/*===========*/
-
-#undef mutex_free /* Fix for MacOS X */
-
-#ifdef UNIV_PFS_MUTEX
-/**********************************************************************
-Following mutex APIs would be performance schema instrumented
-if "UNIV_PFS_MUTEX" is defined:
-
-mutex_create
-mutex_enter
-mutex_enter_first
-mutex_enter_last
-mutex_exit
-mutex_enter_nowait
-mutex_free
-
-These mutex APIs will point to corresponding wrapper functions that contain
-the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined.
-The instrumented wrapper functions have the prefix of "innodb_".
-
-NOTE! The following macro should be used in mutex operation, not the
-corresponding function. */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object to a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define mutex_create(K, M, level) \
- pfs_mutex_create_func((K), (M), (level), __FILE__, __LINE__, #M)
-# else
-# define mutex_create(K, M, level) \
- pfs_mutex_create_func((K), (M), __FILE__, __LINE__, #M)
-# endif/* UNIV_SYNC_DEBUG */
-# else
-# define mutex_create(K, M, level) \
- pfs_mutex_create_func((K), (M), __FILE__, __LINE__, #M)
-# endif /* UNIV_DEBUG */
-
-# define mutex_enter(M) \
- pfs_mutex_enter_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_nowait(M) \
- pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_first(M) \
- pfs_mutex_enter_func((M), __FILE__, __LINE__, IB_HIGH_PRIO)
-
-# define mutex_enter_last(M) \
- pfs_mutex_enter_func((M), __FILE__, __LINE__, IB_LOW_PRIO)
-
-# define mutex_exit(M) pfs_mutex_exit_func(M)
-
-# define mutex_free(M) pfs_mutex_free_func(M)
-
-#else /* UNIV_PFS_MUTEX */
-
-/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to
-original non-instrumented functions */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define mutex_create(K, M, level) \
- mutex_create_func((M), (level), __FILE__, __LINE__, #M)
-# else /* UNIV_SYNC_DEBUG */
-# define mutex_create(K, M, level) \
- mutex_create_func((M), __FILE__, __LINE__, #M)
-# endif /* UNIV_SYNC_DEBUG */
-# else /* UNIV_DEBUG */
-# define mutex_create(K, M, level) \
- mutex_create_func((M), __FILE__, __LINE__, #M)
-# endif /* UNIV_DEBUG */
-
-# define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_nowait(M) \
- mutex_enter_nowait_func((M), __FILE__, __LINE__)
-
-# define mutex_enter_first(M) \
- mutex_enter_func((M), __FILE__, __LINE__, IB_HIGH_PRIO)
-
-# define mutex_enter_last(M) \
- mutex_enter_func((M), __FILE__, __LINE__, IB_LOW_PRIO)
-
-# define mutex_exit(M) mutex_exit_func(M)
-
-# define mutex_free(M) mutex_free_func(M)
-
-#endif /* UNIV_PFS_MUTEX */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
- ib_mutex_t* mutex, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline, /*!< in: file line where created */
- const char* cmutex_name); /*!< in: mutex name */
-
-/******************************************************************//**
-Creates, or rather, initializes a priority mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where
- created */
- ulint cline, /*!< in: file line where
- created */
- const char* cmutex_name); /*!< in: mutex name */
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free_func(
-/*============*/
- ib_mutex_t* mutex); /*!< in: mutex */
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a priority mutex object from the mutex list. The
-mutex is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free_func(
-/*============*/
- ib_prio_mutex_t* mutex); /*!< in: mutex */
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-/* NOTE! currently same as mutex_enter! */
-
-#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__)
-/******************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Locks a mutex for the current thread. If the mutex is reserved
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting
-for the mutex before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line); /*!< in: line where locked */
-/******************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Locks a priority mutex for the current thread. If the mutex is
-reserved the function spins a preset time (controlled by SYNC_SPIN_ROUNDS)
-waiting for the mutex before suspending the thread. If the thread is suspended,
-the priority argument value determines the relative order for its wake up. Any
-IB_HIGH_PRIO waiters will be woken up before any IB_LOW_PRIO waiters. In case of
-IB_DEFAULT_PRIO, the relative priority will be set according to
-srv_current_thread_priority. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where
- locked */
- ulint line, /*!< in: line where locked */
- enum ib_sync_priority priority = IB_DEFAULT_PRIO);
- /*!<in: mutex acquisition
- priority */
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return 0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where requested */
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where
- requested */
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
- ib_mutex_t* mutex); /*!< in: pointer to mutex */
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a priority mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
- ib_prio_mutex_t* mutex); /*!< in: pointer to mutex */
-
-
-#ifdef UNIV_PFS_MUTEX
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with peformance schema if "UNIV_PFS_MUTEX" is defined when
-creating the mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
- PSI_mutex_key key, /*!< in: Performance Schema key */
- ib_mutex_t* mutex, /*!< in: pointer to memory */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline, /*!< in: file line where created */
- const char* cmutex_name);
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with peformance schema if "UNIV_PFS_MUTEX" is defined when
-creating the performance mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
- PSI_mutex_key key, /*!< in: Performance Schema
- key */
- ib_prio_mutex_t* mutex, /*!< in: pointer to memory */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where
- created */
- ulint cline, /*!< in: file line where
- created */
- const char* cmutex_name);
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line); /*!< in: line where locked */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where
- locked */
- ulint line, /*!< in: line where locked */
- enum ib_sync_priority priority = IB_DEFAULT_PRIO);
- /*!<in: mutex acquisition
- priority */
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where requested */
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where
- requested */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with peformance schema instrumentation.
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
- ib_mutex_t* mutex); /*!< in: pointer to mutex */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with peformance schema instrumentation.
-Unlocks a priority mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
- ib_prio_mutex_t* mutex); /*!< in: pointer to mutex */
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
- ib_mutex_t* mutex); /*!< in: mutex */
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the priority mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
- ib_prio_mutex_t* mutex); /*!< in: mutex */
-
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked.
-Works only in the debug version.
-@return TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void);
-/*================*/
-#endif /* UNIV_SYNC_DEBUG */
-/*#####################################################################
-FUNCTION PROTOTYPES FOR DEBUGGING */
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
- FILE* file); /*!< in: file where to print */
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
-void
-sync_print(
-/*=======*/
- FILE* file); /*!< in: file where to print */
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
- const ib_mutex_t* mutex); /*!< in: mutex */
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only
-in the debug version.
-@return TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
- const ib_mutex_t* mutex) /*!< in: mutex */
- MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Checks that the current thread owns the priority mutex. Works only
-in the debug version.
-@return TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
- const ib_prio_mutex_t* mutex) /*!< in: priority mutex */
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
- void* latch, /*!< in: pointer to a mutex or an rw-lock */
- ulint level, /*!< in: level in the latching order; if
- SYNC_LEVEL_VARYING, nothing is done */
- ibool relock) /*!< in: TRUE if re-entering an x-lock */
- MY_ATTRIBUTE((nonnull));
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
- void* latch); /*!< in: pointer to a mutex or an rw-lock */
-/******************************************************************//**
-Checks if the level array for the current thread contains a
-mutex or rw-latch at the specified level.
-@return a matching latch, or NULL if not found */
-UNIV_INTERN
-void*
-sync_thread_levels_contains(
-/*========================*/
- ulint level); /*!< in: latching order level
- (SYNC_DICT, ...)*/
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_gen(
-/*============================*/
- ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
- allowed to be owned by the thread */
- MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for data dictionary latches. */
-#define sync_thread_levels_empty_except_dict() \
- (!sync_thread_levels_nonempty_gen(TRUE))
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for the btr_search_latch.
-@return a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_trx(
-/*============================*/
- ibool has_search_latch)
- /*!< in: TRUE if and only if the thread
- is supposed to hold btr_search_latch */
- MY_ATTRIBUTE((warn_unused_result));
-
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_get_debug_info(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: mutex */
- const char** file_name, /*!< out: file where requested */
- ulint* line, /*!< out: line where requested */
- os_thread_id_t* thread_id); /*!< out: id of the thread which owns
- the mutex */
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void);
-/*==================*/
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the value
-of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
- const ib_mutex_t* mutex); /*!< in: mutex */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-NOT to be used outside this module except in debugging! Gets the waiters
-field in a mutex.
-@return value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
- const ib_mutex_t* mutex); /*!< in: mutex */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*
- LATCHING ORDER WITHIN THE DATABASE
- ==================================
-
-The mutex or latch in the central memory object, for instance, a rollback
-segment object, must be acquired before acquiring the latch or latches to
-the corresponding file data structure. In the latching order below, these
-file page object latches are placed immediately below the corresponding
-central memory object latch or mutex.
-
-Synchronization object Notes
----------------------- -----
-
-Dictionary mutex If we have a pointer to a dictionary
-| object, e.g., a table, it can be
-| accessed without reserving the
-| dictionary mutex. We must have a
-| reservation, a memoryfix, to the
-| appropriate table object in this case,
-| and the table must be explicitly
-| released later.
-V
-Dictionary header
-|
-V
-Secondary index tree latch The tree latch protects also all
-| the B-tree non-leaf pages. These
-V can be read with the page only
-Secondary index non-leaf bufferfixed to save CPU time,
-| no s-latch is needed on the page.
-| Modification of a page requires an
-| x-latch on the page, however. If a
-| thread owns an x-latch to the tree,
-| it is allowed to latch non-leaf pages
-| even after it has acquired the fsp
-| latch.
-V
-Secondary index leaf The latch on the secondary index leaf
-| can be kept while accessing the
-| clustered index, to save CPU time.
-V
-Clustered index tree latch To increase concurrency, the tree
-| latch is usually released when the
-| leaf page latch has been acquired.
-V
-Clustered index non-leaf
-|
-V
-Clustered index leaf
-|
-V
-Transaction system header
-|
-V
-Transaction undo mutex The undo log entry must be written
-| before any index page is modified.
-| Transaction undo mutex is for the undo
-| logs the analogue of the tree latch
-| for a B-tree. If a thread has the
-| trx undo mutex reserved, it is allowed
-| to latch the undo log pages in any
-| order, and also after it has acquired
-| the fsp latch.
-V
-Rollback segment mutex The rollback segment mutex must be
-| reserved, if, e.g., a new page must
-| be added to an undo log. The rollback
-| segment and the undo logs in its
-| history list can be seen as an
-| analogue of a B-tree, and the latches
-| reserved similarly, using a version of
-| lock-coupling. If an undo log must be
-| extended by a page when inserting an
-| undo log record, this corresponds to
-| a pessimistic insert in a B-tree.
-V
-Rollback segment header
-|
-V
-Purge system latch
-|
-V
-Undo log pages If a thread owns the trx undo mutex,
-| or for a log in the history list, the
-| rseg mutex, it is allowed to latch
-| undo log pages in any order, and even
-| after it has acquired the fsp latch.
-| If a thread does not have the
-| appropriate mutex, it is allowed to
-| latch only a single undo log page in
-| a mini-transaction.
-V
-File space management latch If a mini-transaction must allocate
-| several file pages, it can do that,
-| because it keeps the x-latch to the
-| file space management in its memo.
-V
-File system pages
-|
-V
-lock_sys_wait_mutex Mutex protecting lock timeout data
-|
-V
-lock_sys_mutex Mutex protecting lock_sys_t
-|
-V
-trx_sys->mutex Mutex protecting trx_sys_t
-|
-V
-Threads mutex Background thread scheduling mutex
-|
-V
-query_thr_mutex Mutex protecting query threads
-|
-V
-trx_mutex Mutex protecting trx_t fields
-|
-V
-Search system mutex
-|
-V
-Buffer pool mutexes
-|
-V
-Log mutex
-|
-Any other latch
-|
-V
-Memory pool mutex */
-
-/* Latching order levels. If you modify these, you have to also update
-sync_thread_add_level(). */
-
-/* User transaction locks are higher than any of the latch levels below:
-no latches are allowed when a thread goes to wait for a normal table
-or row lock! */
-#define SYNC_USER_TRX_LOCK 9999
-#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress
- latching order checking */
-#define SYNC_LEVEL_VARYING 2000 /* Level is varying. Only used with
- buffer pool page locks, which do not
- have a fixed level, but instead have
- their level set after the page is
- locked; see e.g.
- ibuf_bitmap_get_map_page(). */
-#define SYNC_TRX_I_S_RWLOCK 1910 /* Used for
- trx_i_s_cache_t::rw_lock */
-#define SYNC_TRX_I_S_LAST_READ 1900 /* Used for
- trx_i_s_cache_t::last_read_mutex */
-#define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the
- file format tag */
-#define SYNC_DICT_OPERATION 1010 /* table create, drop, etc. reserve
- this in X-mode; implicit or backround
- operations purge, rollback, foreign
- key checks reserve this in S-mode */
-#define SYNC_FTS_CACHE 1005 /* FTS cache rwlock */
-#define SYNC_DICT 1000
-#define SYNC_DICT_AUTOINC_MUTEX 999
-#define SYNC_STATS_AUTO_RECALC 997
-#define SYNC_DICT_HEADER 995
-#define SYNC_IBUF_HEADER 914
-#define SYNC_IBUF_PESS_INSERT_MUTEX 912
-/*-------------------------------*/
-#define SYNC_INDEX_TREE 900
-#define SYNC_TREE_NODE_NEW 892
-#define SYNC_TREE_NODE_FROM_HASH 891
-#define SYNC_TREE_NODE 890
-#define SYNC_PURGE_LATCH 800
-#define SYNC_TRX_UNDO 700
-#define SYNC_RSEG 600
-#define SYNC_RSEG_HEADER_NEW 591
-#define SYNC_RSEG_HEADER 590
-#define SYNC_TRX_UNDO_PAGE 570
-#define SYNC_EXTERN_STORAGE 500
-#define SYNC_FSP 400
-#define SYNC_FSP_PAGE 395
-#define SYNC_STATS_DEFRAG 390
-/*------------------------------------- Change buffer headers */
-#define SYNC_IBUF_MUTEX 370 /* ibuf_mutex */
-/*------------------------------------- Change buffer tree */
-#define SYNC_IBUF_INDEX_TREE 360
-#define SYNC_IBUF_TREE_NODE_NEW 359
-#define SYNC_IBUF_TREE_NODE 358
-#define SYNC_IBUF_BITMAP_MUTEX 351
-#define SYNC_IBUF_BITMAP 350
-/*------------------------------------- Change log for online create index */
-#define SYNC_INDEX_ONLINE_LOG 340
-/*------------------------------------- MySQL query cache mutex */
-/*------------------------------------- MySQL binlog mutex */
-/*-------------------------------*/
-#define SYNC_LOCK_WAIT_SYS 300
-#define SYNC_LOCK_SYS 299
-#define SYNC_TRX_SYS 298
-#define SYNC_TRX 297
-#define SYNC_THREADS 295
-#define SYNC_REC_LOCK 294
-#define SYNC_TRX_SYS_HEADER 290
-#define SYNC_PURGE_QUEUE 200
-#define SYNC_LOG_ONLINE 175
-#define SYNC_LOG 170
-#define SYNC_LOG_FLUSH_ORDER 147
-#define SYNC_RECV 168
-#define SYNC_FTS_TOKENIZE 167
-#define SYNC_FTS_CACHE_INIT 166 /* Used for FTS cache initialization */
-#define SYNC_FTS_BG_THREADS 165
-#define SYNC_FTS_OPTIMIZE 164 // FIXME: is this correct number, test
-#define SYNC_WORK_QUEUE 162
-#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory
- heap that can be extended to the
- buffer pool, its logical level is
- SYNC_SEARCH_SYS, as memory allocation
- can call routines there! Otherwise
- the level is SYNC_MEM_HASH. */
-#define SYNC_BUF_LRU_LIST 151
-#define SYNC_BUF_PAGE_HASH 149 /* buf_pool->page_hash rw_lock */
-#define SYNC_BUF_BLOCK 146 /* Block mutex */
-#define SYNC_BUF_FREE_LIST 145
-#define SYNC_BUF_ZIP_FREE 144
-#define SYNC_BUF_ZIP_HASH 143
-#define SYNC_BUF_FLUSH_STATE 142
-#define SYNC_BUF_FLUSH_LIST 141 /* Buffer flush list mutex */
-#define SYNC_DOUBLEWRITE 139
-#define SYNC_ANY_LATCH 135
-#define SYNC_MEM_HASH 131
-#define SYNC_MEM_POOL 130
-
-/* Codes used to designate lock operations */
-#define RW_LOCK_NOT_LOCKED 350
-#define RW_LOCK_EX 351
-#define RW_LOCK_EXCLUSIVE 351
-#define RW_LOCK_SHARED 352
-#define RW_LOCK_WAIT_EX 353
-#define SYNC_MUTEX 354
-#define SYNC_PRIO_MUTEX 355
-#define PRIO_RW_LOCK_EX 356
-#define PRIO_RW_LOCK_SHARED 357
-
-/* NOTE! The structure appears here only for the compiler to know its size.
-Do not use its fields directly! The structure used in the spin lock
-implementation of a mutual exclusion semaphore. */
-
-/** InnoDB mutex */
-struct ib_mutex_t {
- struct os_event event; /*!< Used by sync0arr.cc for the wait queue */
- volatile lock_word_t lock_word; /*!< lock_word is the target
- of the atomic test-and-set instruction when
- atomic operations are enabled. */
-
-#if !defined(HAVE_ATOMIC_BUILTINS)
- os_fast_mutex_t
- os_fast_mutex; /*!< We use this OS mutex in place of lock_word
- when atomic operations are not enabled */
-#endif
- ulint waiters; /*!< This ulint is set to 1 if there are (or
- may be) threads waiting in the global wait
- array for this mutex to be released.
- Otherwise, this is 0. */
- UT_LIST_NODE_T(ib_mutex_t) list; /*!< All allocated mutexes are put into
- a list. Pointers to the next and prev. */
-
-#ifdef UNIV_SYNC_DEBUG
- ulint level; /*!< Level in the global latching order */
-#endif /* UNIV_SYNC_DEBUG */
-
- const char* file_name; /*!< File where the mutex was locked */
- ulint line; /*!< Line where the mutex was locked */
- const char* cfile_name; /*!< File name where mutex created */
- ulint cline; /*!< Line where created */
- ulong count_os_wait; /*!< count of os_wait */
- const char* cmutex_name; /*!< mutex name */
- os_thread_id_t thread_id; /*!< The thread id of the thread
- which locked the mutex. */
-
-#ifdef UNIV_DEBUG
-
-/** Value of mutex_t::magic_n */
-# define MUTEX_MAGIC_N 979585UL
- ulint magic_n; /*!< MUTEX_MAGIC_N */
- ulint ib_mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_PFS_MUTEX
- struct PSI_mutex* pfs_psi; /*!< The performance schema
- instrumentation hook */
-#endif
-};
-
-/** XtraDB priority mutex */
-struct ib_prio_mutex_t {
- ib_mutex_t base_mutex; /* The regular mutex provides the lock
- word etc. for the priority mutex */
- struct os_event high_priority_event; /* High priority wait array
- event */
- volatile ulint high_priority_waiters; /* Number of threads that asked
- for this mutex to be acquired with high
- priority in the global wait array
- waiting for this mutex to be
- released. */
-};
-
-/** Constant determining how long spin wait is continued before suspending
-the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond
-to 20 microseconds. */
-
-#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
-
-/** The number of iterations in the mutex_spin_wait() spin loop.
-Intended for performance monitoring. */
-extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count;
-/** The number of mutex_spin_wait() calls. Intended for
-performance monitoring. */
-extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count;
-/** The number of OS waits in mutex_spin_wait(). Intended for
-performance monitoring. */
-extern ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count;
-
-/** The number of mutex_exit calls. Intended for performance monitoring. */
-extern ib_int64_t mutex_exit_count;
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-extern ibool sync_order_checks_on;
-#endif /* UNIV_SYNC_DEBUG */
-
-/** This variable is set to TRUE when sync_init is called */
-extern ibool sync_initialized;
-
-/** Global list of database mutexes (not OS mutexes) created. */
-typedef UT_LIST_BASE_NODE_T(ib_mutex_t) ut_list_base_node_t;
-/** Global list of database mutexes (not OS mutexes) created. */
-extern ut_list_base_node_t mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-extern ib_mutex_t mutex_list_mutex;
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/**********************************************************//**
-Function that uses a mutex to decrement a variable atomically */
-UNIV_INLINE
-void
-os_atomic_dec_ulint_func(
-/*=====================*/
- ib_mutex_t* mutex, /*!< in: mutex guarding the
- decrement */
- volatile ulint* var, /*!< in/out: variable to
- decrement */
- ulint delta); /*!< in: delta to decrement */
-/**********************************************************//**
-Function that uses a mutex to increment a variable atomically */
-UNIV_INLINE
-void
-os_atomic_inc_ulint_func(
-/*=====================*/
- ib_mutex_t* mutex, /*!< in: mutex guarding the
- increment */
- volatile ulint* var, /*!< in/out: variable to
- increment */
- ulint delta); /*!< in: delta to increment */
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
-#ifndef UNIV_NONINL
-#include "sync0sync.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic
deleted file mode 100644
index fb24c0ec244..00000000000
--- a/storage/xtradb/include/sync0sync.ic
+++ /dev/null
@@ -1,665 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0sync.ic
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
- ib_mutex_t* mutex, /*!< in: mutex */
- ulint n); /*!< in: value to set */
-/******************************************************************//**
-Reserves a mutex or a priority mutex for the current thread. If the mutex is
-reserved, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS)
-waiting for the mutex before suspending the thread. */
-UNIV_INTERN
-void
-mutex_spin_wait(
-/*============*/
- void* _mutex, /*!< in: pointer to mutex */
- bool high_priority, /*!< in: whether the mutex is a
- priority mutex with high priority
- specified */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line); /*!< in: line where requested */
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: mutex */
- const char* file_name, /*!< in: file where requested */
- ulint line); /*!< in: line where requested */
-#endif /* UNIV_SYNC_DEBUG */
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
- ib_mutex_t* mutex); /*!< in: mutex */
-
-/******************************************************************//**
-Performs an atomic test-and-set instruction to the lock_word field of a
-mutex.
-@return the previous value of lock_word: 0 or 1 */
-UNIV_INLINE
-lock_word_t
-ib_mutex_test_and_set(
-/*==================*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
- return(os_atomic_test_and_set(&mutex->lock_word));
-#else
- ibool ret;
-
- ret = os_fast_mutex_trylock_full_barrier(&(mutex->os_fast_mutex));
-
- if (ret == 0) {
- /* We check that os_fast_mutex_trylock does not leak
- and allow race conditions */
- ut_a(mutex->lock_word == 0);
-
- mutex->lock_word = 1;
- }
-
- return((byte) ret);
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/******************************************************************//**
-Performs a reset instruction to the lock_word field of a mutex. This
-instruction also serializes memory operations to the program order. */
-UNIV_INLINE
-void
-mutex_reset_lock_word(
-/*==================*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
- os_atomic_clear(&mutex->lock_word);
-#else
- mutex->lock_word = 0;
-
- os_fast_mutex_unlock(&(mutex->os_fast_mutex));
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/******************************************************************//**
-Gets the value of the lock word. */
-UNIV_INLINE
-lock_word_t
-mutex_get_lock_word(
-/*================*/
- const ib_mutex_t* mutex) /*!< in: mutex */
-{
- ut_ad(mutex);
-
- return(mutex->lock_word);
-}
-
-/******************************************************************//**
-Gets the waiters field in a mutex.
-@return value to set */
-UNIV_INLINE
-ulint
-mutex_get_waiters(
-/*==============*/
- const ib_mutex_t* mutex) /*!< in: mutex */
-{
- const volatile ulint* ptr; /*!< declared volatile to ensure that
- the value is read from memory */
- ut_ad(mutex);
-
- ptr = &(mutex->waiters);
-
- return(*ptr); /* Here we assume that the read of a single
- word from memory is atomic */
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
- ib_mutex_t* mutex) /*!< in: pointer to mutex */
-{
- ut_ad(mutex_own(mutex));
-
- mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED;
-
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_reset_level(mutex);
-#endif
- mutex_reset_lock_word(mutex);
-
- /* A problem: we assume that mutex_reset_lock word
- is a memory barrier, that is when we read the waiters
- field next, the read must be serialized in memory
- after the reset. A speculative processor might
- perform the read first, which could leave a waiting
- thread hanging indefinitely.
-
- Our current solution call every second
- sync_arr_wake_threads_if_sema_free()
- to wake up possible hanging threads if
- they are missed in mutex_signal_object. */
-
- /* We add a memory barrier to prevent reading of the
- number of waiters before releasing the lock. */
-
- os_mb;
-
- if (mutex_get_waiters(mutex) != 0) {
-
- mutex_signal_object(mutex);
- }
-
-#ifdef UNIV_SYNC_PERF_STAT
- mutex_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_exit(), not directly this function!
-Unlocks a priority mutex owned by the current thread. */
-UNIV_INLINE
-void
-mutex_exit_func(
-/*============*/
- ib_prio_mutex_t* mutex) /*!< in: pointer to mutex */
-{
- ut_ad(mutex_own(mutex));
-
- mutex->base_mutex.thread_id = (os_thread_id_t) ULINT_UNDEFINED;
-
-#ifdef UNIV_SYNC_DEBUG
- sync_thread_reset_level(&mutex->base_mutex);
-#endif
- mutex_reset_lock_word(&mutex->base_mutex);
-
- /* A problem: we assume that mutex_reset_lock word
- is a memory barrier, that is when we read the waiters
- field next, the read must be serialized in memory
- after the reset. A speculative processor might
- perform the read first, which could leave a waiting
- thread hanging indefinitely.
-
- Our current solution call every second
- sync_arr_wake_threads_if_sema_free()
- to wake up possible hanging threads if
- they are missed in mutex_signal_object. */
-
- /* Wake up any high priority waiters first. */
- if (mutex->high_priority_waiters != 0) {
-
- os_event_set(&mutex->high_priority_event);
- sync_array_object_signalled();
-
- } else if (mutex_get_waiters(&mutex->base_mutex) != 0) {
-
- mutex_signal_object(&mutex->base_mutex);
- }
-
-#ifdef UNIV_SYNC_PERF_STAT
- mutex_exit_count++;
-#endif
-
-}
-
-
-/******************************************************************//**
-Locks a mutex for the current thread. If the mutex is reserved, the function
-spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex
-before suspending the thread. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line) /*!< in: line where locked */
-{
- ut_ad(mutex_validate(mutex));
-#ifndef WITH_WSREP
- /* this cannot be be granted when BF trx kills a trx in lock wait state */
- ut_ad(!mutex_own(mutex));
-#endif /* WITH_WSREP */
-
- /* Note that we do not peek at the value of lock_word before trying
- the atomic test_and_set; we could peek, and possibly save time. */
-
- if (!ib_mutex_test_and_set(mutex)) {
- mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- if (srv_instrument_semaphores) {
- mutex->file_name = file_name;
- mutex->line = line;
- }
-
- return; /* Succeeded! */
- }
-
- mutex_spin_wait(mutex, false, file_name, line);
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Locks a priority mutex for the current thread. If the mutex is
-reserved the function spins a preset time (controlled by SYNC_SPIN_ROUNDS)
-waiting for the mutex before suspending the thread. If the thread is suspended,
-the priority argument value determines the relative order for its wake up. Any
-IB_HIGH_PRIO waiters will be woken up before any IB_LOW_PRIO waiters. In case
-of IB_DEFAULT_PRIO, the relative priority will be set according to
-srv_current_thread_priority. */
-UNIV_INLINE
-void
-mutex_enter_func(
-/*=============*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where
- locked */
- ulint line, /*!< in: line where locked */
- enum ib_sync_priority priority)
- /*!<in: mutex acquisition
- priority */
-{
- bool high_priority;
-
- ut_ad(mutex_validate(&mutex->base_mutex));
- ut_ad(!mutex_own(mutex));
-
- /* Note that we do not peek at the value of lock_word before trying
- the atomic test_and_set; we could peek, and possibly save time. */
-
- if (!ib_mutex_test_and_set(&mutex->base_mutex)) {
- mutex->base_mutex.thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(&mutex->base_mutex, file_name, line);
-#endif
- if(srv_instrument_semaphores) {
- mutex->base_mutex.file_name = file_name;
- mutex->base_mutex.line = line;
- }
-
- return; /* Succeeded! */
- }
-
- if (UNIV_LIKELY(priority == IB_DEFAULT_PRIO)) {
- high_priority = srv_current_thread_priority;
- } else {
- high_priority = (priority == IB_HIGH_PRIO);
- }
- mutex_spin_wait(mutex, high_priority, file_name, line);
-}
-
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line) /*!< in: line where
- requested */
-{
- return mutex_enter_nowait_func(&mutex->base_mutex, file_name, line);
-}
-
-#ifdef UNIV_PFS_MUTEX
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where locked */
- ulint line) /*!< in: line where locked */
-{
- if (mutex->pfs_psi != NULL) {
- PSI_mutex_locker* locker;
- PSI_mutex_locker_state state;
-
- locker = PSI_MUTEX_CALL(start_mutex_wait)(
- &state, mutex->pfs_psi,
- PSI_MUTEX_LOCK, file_name,
- static_cast<uint>(line));
-
- mutex_enter_func(mutex, file_name, line);
-
- if (locker != NULL) {
- PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
- }
- } else {
- mutex_enter_func(mutex, file_name, line);
- }
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_func(). */
-UNIV_INLINE
-void
-pfs_mutex_enter_func(
-/*=================*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where
- locked */
- ulint line, /*!< in: line where locked */
- enum ib_sync_priority priority) /*!<in: mutex acquisition
- priority */
-{
- if (mutex->base_mutex.pfs_psi != NULL) {
- PSI_mutex_locker* locker;
- PSI_mutex_locker_state state;
-
- locker = PSI_MUTEX_CALL(start_mutex_wait)(
- &state, mutex->base_mutex.pfs_psi,
- PSI_MUTEX_LOCK, file_name, line);
-
- mutex_enter_func(mutex, file_name, line, priority);
-
- if (locker != NULL) {
- PSI_MUTEX_CALL(end_mutex_wait)(locker, 0);
- }
- } else {
- mutex_enter_func(mutex, file_name, line, priority);
- }
-}
-
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line) /*!< in: line where requested */
-{
- ulint ret;
-
- if (mutex->pfs_psi != NULL) {
- PSI_mutex_locker* locker;
- PSI_mutex_locker_state state;
-
- locker = PSI_MUTEX_CALL(start_mutex_wait)(
- &state, mutex->pfs_psi,
- PSI_MUTEX_TRYLOCK, file_name,
- static_cast<uint>(line));
-
- ret = mutex_enter_nowait_func(mutex, file_name, line);
-
- if (locker != NULL) {
- PSI_MUTEX_CALL(end_mutex_wait)(locker, (int) ret);
- }
- } else {
- ret = mutex_enter_nowait_func(mutex, file_name, line);
- }
-
- return(ret);
-}
-
-/********************************************************************//**
-NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
-this function!
-This is a performance schema instrumented wrapper function for
-mutex_enter_nowait_func.
-@return 0 if succeed, 1 if not */
-UNIV_INLINE
-ulint
-pfs_mutex_enter_nowait_func(
-/*========================*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line) /*!< in: line where
- requested */
-{
- return pfs_mutex_enter_nowait_func(&mutex->base_mutex, file_name,
- line);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with performance schema instrumentation.
-Unlocks a mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
- ib_mutex_t* mutex) /*!< in: pointer to mutex */
-{
- if (mutex->pfs_psi != NULL) {
- PSI_MUTEX_CALL(unlock_mutex)(mutex->pfs_psi);
- }
-
- mutex_exit_func(mutex);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_exit(), not directly
-this function!
-A wrap function of mutex_exit_func() with peformance schema instrumentation.
-Unlocks a priority mutex owned by the current thread. */
-UNIV_INLINE
-void
-pfs_mutex_exit_func(
-/*================*/
- ib_prio_mutex_t* mutex) /*!< in: pointer to mutex */
-{
- if (mutex->base_mutex.pfs_psi != NULL) {
- PSI_MUTEX_CALL(unlock_mutex)(mutex->base_mutex.pfs_psi);
- }
-
- mutex_exit_func(mutex);
-}
-
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with performance schema if "UNIV_PFS_MUTEX" is defined when
-creating the mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
- mysql_pfs_key_t key, /*!< in: Performance Schema key */
- ib_mutex_t* mutex, /*!< in: pointer to memory */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline, /*!< in: file line where created */
- const char* cmutex_name) /*!< in: mutex name */
-{
- mutex->pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
-
- mutex_create_func(mutex,
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- level,
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- cfile_name,
- cline,
- cmutex_name);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_create(), not directly
-this function!
-A wrapper function for mutex_create_func(), registers the mutex
-with peformance schema if "UNIV_PFS_MUTEX" is defined when
-creating the performance mutex */
-UNIV_INLINE
-void
-pfs_mutex_create_func(
-/*==================*/
- PSI_mutex_key key, /*!< in: Performance Schema
- key */
- ib_prio_mutex_t* mutex, /*!< in: pointer to memory */
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where
- created */
- ulint cline, /*!< in: file line where
- created */
- const char* cmutex_name)
-{
- mutex->base_mutex.pfs_psi = PSI_MUTEX_CALL(init_mutex)(key, mutex);
-
- mutex_create_func(mutex,
-# ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- level,
-# endif /* UNIV_SYNC_DEBUG */
-# endif /* UNIV_DEBUG */
- cfile_name,
- cline,
- cmutex_name);
-}
-
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
- if (mutex->pfs_psi != NULL) {
- PSI_MUTEX_CALL(destroy_mutex)(mutex->pfs_psi);
- mutex->pfs_psi = NULL;
- }
-
- mutex_free_func(mutex);
-}
-
-/******************************************************************//**
-NOTE! Please use the corresponding macro mutex_free(), not directly
-this function!
-Wrapper function for mutex_free_func(). Also destroys the performance
-schema probes when freeing the priority mutex */
-UNIV_INLINE
-void
-pfs_mutex_free_func(
-/*================*/
- ib_prio_mutex_t* mutex) /*!< in: mutex */
-{
- if (mutex->base_mutex.pfs_psi != NULL) {
- PSI_MUTEX_CALL(destroy_mutex)(mutex->base_mutex.pfs_psi);
- mutex->base_mutex.pfs_psi = NULL;
- }
-
- mutex_free_func(mutex);
-}
-
-
-#endif /* UNIV_PFS_MUTEX */
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/**********************************************************//**
-Function that uses a mutex to decrement a variable atomically */
-UNIV_INLINE
-void
-os_atomic_dec_ulint_func(
-/*=====================*/
- ib_mutex_t* mutex, /*!< in: mutex guarding the dec */
- volatile ulint* var, /*!< in/out: variable to decrement */
- ulint delta) /*!< in: delta to decrement */
-{
- mutex_enter(mutex);
-
- /* I don't think we will encounter a situation where
- this check will not be required. */
- ut_ad(*var >= delta);
-
- *var -= delta;
-
- mutex_exit(mutex);
-}
-
-/**********************************************************//**
-Function that uses a mutex to increment a variable atomically */
-UNIV_INLINE
-void
-os_atomic_inc_ulint_func(
-/*=====================*/
- ib_mutex_t* mutex, /*!< in: mutex guarding the increment */
- volatile ulint* var, /*!< in/out: variable to increment */
- ulint delta) /*!< in: delta to increment */
-{
- mutex_enter(mutex);
-
- *var += delta;
-
- mutex_exit(mutex);
-}
-#endif /* !HAVE_ATOMIC_BUILTINS */
diff --git a/storage/xtradb/include/sync0types.h b/storage/xtradb/include/sync0types.h
deleted file mode 100644
index 04baaa0339d..00000000000
--- a/storage/xtradb/include/sync0types.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/sync0types.h
-Global types for sync
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#ifndef sync0types_h
-#define sync0types_h
-
-struct ib_mutex_t;
-
-/* The relative priority of the current thread. If 0, low priority; if 1, high
-priority. */
-extern UNIV_THREAD_LOCAL ulint srv_current_thread_priority;
-
-struct ib_prio_mutex_t;
-
-/** Priority mutex and rwlatch acquisition priorities */
-enum ib_sync_priority {
- IB_DEFAULT_PRIO,
- IB_LOW_PRIO,
- IB_HIGH_PRIO
-};
-
-#endif
diff --git a/storage/xtradb/include/trx0i_s.h b/storage/xtradb/include/trx0i_s.h
deleted file mode 100644
index ac5e00c6834..00000000000
--- a/storage/xtradb/include/trx0i_s.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0i_s.h
-INFORMATION SCHEMA innodb_trx, innodb_locks and
-innodb_lock_waits tables cache structures and public
-functions.
-
-Created July 17, 2007 Vasil Dimov
-*******************************************************/
-
-#ifndef trx0i_s_h
-#define trx0i_s_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "dict0types.h"
-#include "ut0ut.h"
-
-/** The maximum amount of memory that can be consumed by innodb_trx,
-innodb_locks and innodb_lock_waits information schema tables. */
-#define TRX_I_S_MEM_LIMIT 16777216 /* 16 MiB */
-
-/** The maximum length of a string that can be stored in
-i_s_locks_row_t::lock_data */
-#define TRX_I_S_LOCK_DATA_MAX_LEN 8192
-
-/** The maximum length of a string that can be stored in
-i_s_trx_row_t::trx_query */
-#define TRX_I_S_TRX_QUERY_MAX_LEN 1024
-
-/** The maximum length of a string that can be stored in
-i_s_trx_row_t::trx_operation_state */
-#define TRX_I_S_TRX_OP_STATE_MAX_LEN 64
-
-/** The maximum length of a string that can be stored in
-i_s_trx_row_t::trx_foreign_key_error */
-#define TRX_I_S_TRX_FK_ERROR_MAX_LEN 256
-
-/** The maximum length of a string that can be stored in
-i_s_trx_row_t::trx_isolation_level */
-#define TRX_I_S_TRX_ISOLATION_LEVEL_MAX_LEN 16
-
-/** Safely copy strings in to the INNODB_TRX table's
-string based columns */
-#define TRX_I_S_STRING_COPY(data, field, constraint, tcache) \
-do { \
- if (strlen(data) > constraint) { \
- char buff[constraint + 1]; \
- strncpy(buff, data, constraint); \
- buff[constraint] = '\0'; \
- \
- field = static_cast<const char*>( \
- ha_storage_put_memlim( \
- (tcache)->storage, buff, constraint + 1,\
- MAX_ALLOWED_FOR_STORAGE(tcache))); \
- } else { \
- field = static_cast<const char*>( \
- ha_storage_put_str_memlim( \
- (tcache)->storage, data, \
- MAX_ALLOWED_FOR_STORAGE(tcache))); \
- } \
-} while (0)
-
-/** A row of INFORMATION_SCHEMA.innodb_locks */
-struct i_s_locks_row_t;
-
-/** Objects of trx_i_s_cache_t::locks_hash */
-struct i_s_hash_chain_t;
-
-/** Objects of this type are added to the hash table
-trx_i_s_cache_t::locks_hash */
-struct i_s_hash_chain_t {
- i_s_locks_row_t* value; /*!< row of
- INFORMATION_SCHEMA.innodb_locks*/
- i_s_hash_chain_t* next; /*!< next item in the hash chain */
-};
-
-/** This structure represents INFORMATION_SCHEMA.innodb_locks row */
-struct i_s_locks_row_t {
- trx_id_t lock_trx_id; /*!< transaction identifier */
- const char* lock_mode; /*!< lock mode from
- lock_get_mode_str() */
- const char* lock_type; /*!< lock type from
- lock_get_type_str() */
- const char* lock_table; /*!< table name from
- lock_get_table_name() */
- const char* lock_index; /*!< index name from
- lock_rec_get_index_name() */
- /** Information for record locks. All these are
- ULINT_UNDEFINED for table locks. */
- /* @{ */
- ulint lock_space; /*!< tablespace identifier */
- ulint lock_page; /*!< page number within the_space */
- ulint lock_rec; /*!< heap number of the record
- on the page */
- const char* lock_data; /*!< (some) content of the record */
- /* @} */
-
- /** The following are auxiliary and not included in the table */
- /* @{ */
- table_id_t lock_table_id;
- /*!< table identifier from
- lock_get_table_id */
- i_s_hash_chain_t hash_chain; /*!< hash table chain node for
- trx_i_s_cache_t::locks_hash */
- /* @} */
-};
-
-/** This structure represents INFORMATION_SCHEMA.innodb_trx row */
-struct i_s_trx_row_t {
- trx_id_t trx_id; /*!< transaction identifier */
- const char* trx_state; /*!< transaction state from
- trx_get_que_state_str() */
- ib_time_t trx_started; /*!< trx_t::start_time */
- const i_s_locks_row_t* requested_lock_row;
- /*!< pointer to a row
- in innodb_locks if trx
- is waiting, or NULL */
- ib_time_t trx_wait_started; /*!< trx_t::wait_started */
- ullint trx_weight; /*!< TRX_WEIGHT() */
- ulint trx_mysql_thread_id; /*!< thd_get_thread_id() */
- const char* trx_query; /*!< MySQL statement being
- executed in the transaction */
- struct charset_info_st* trx_query_cs;
- /*!< charset encode the MySQL
- statement */
- const char* trx_operation_state; /*!< trx_t::op_info */
- ulint trx_tables_in_use;/*!< n_mysql_tables_in_use in
- trx_t */
- ulint trx_tables_locked;
- /*!< mysql_n_tables_locked in
- trx_t */
- ulint trx_lock_structs;/*!< list len of trx_locks in
- trx_t */
- ulint trx_lock_memory_bytes;
- /*!< mem_heap_get_size(
- trx->lock_heap) */
- ulint trx_rows_locked;/*!< lock_number_of_rows_locked() */
- ullint trx_rows_modified;/*!< trx_t::undo_no */
- ulint trx_concurrency_tickets;
- /*!< n_tickets_to_enter_innodb in
- trx_t */
- const char* trx_isolation_level;
- /*!< isolation_level in trx_t */
- ibool trx_unique_checks;
- /*!< check_unique_secondary in trx_t*/
- ibool trx_foreign_key_checks;
- /*!< check_foreigns in trx_t */
- const char* trx_foreign_key_error;
- /*!< detailed_error in trx_t */
- ibool trx_has_search_latch;
- /*!< has_search_latch in trx_t */
- ulint trx_search_latch_timeout;
- /*!< search_latch_timeout in trx_t */
- ulint trx_is_read_only;
- /*!< trx_t::read_only */
- ulint trx_is_autocommit_non_locking;
- /*!< trx_is_autocommit_non_locking(trx)
- */
-};
-
-/** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */
-struct i_s_lock_waits_row_t {
- const i_s_locks_row_t* requested_lock_row; /*!< requested lock */
- const i_s_locks_row_t* blocking_lock_row; /*!< blocking lock */
-};
-
-/** Cache of INFORMATION_SCHEMA table data */
-struct trx_i_s_cache_t;
-
-/** Auxiliary enum used by functions that need to select one of the
-INFORMATION_SCHEMA tables */
-enum i_s_table {
- I_S_INNODB_TRX, /*!< INFORMATION_SCHEMA.innodb_trx */
- I_S_INNODB_LOCKS, /*!< INFORMATION_SCHEMA.innodb_locks */
- I_S_INNODB_LOCK_WAITS /*!< INFORMATION_SCHEMA.innodb_lock_waits */
-};
-
-/** This is the intermediate buffer where data needed to fill the
-INFORMATION SCHEMA tables is fetched and later retrieved by the C++
-code in handler/i_s.cc. */
-extern trx_i_s_cache_t* trx_i_s_cache;
-
-/*******************************************************************//**
-Initialize INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_init(
-/*===============*/
- trx_i_s_cache_t* cache); /*!< out: cache to init */
-/*******************************************************************//**
-Free the INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_free(
-/*===============*/
- trx_i_s_cache_t* cache); /*!< in/out: cache to free */
-
-/*******************************************************************//**
-Issue a shared/read lock on the tables cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_start_read(
-/*=====================*/
- trx_i_s_cache_t* cache); /*!< in: cache */
-
-/*******************************************************************//**
-Release a shared/read lock on the tables cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_end_read(
-/*===================*/
- trx_i_s_cache_t* cache); /*!< in: cache */
-
-/*******************************************************************//**
-Issue an exclusive/write lock on the tables cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_start_write(
-/*======================*/
- trx_i_s_cache_t* cache); /*!< in: cache */
-
-/*******************************************************************//**
-Release an exclusive/write lock on the tables cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_end_write(
-/*====================*/
- trx_i_s_cache_t* cache); /*!< in: cache */
-
-
-/*******************************************************************//**
-Retrieves the number of used rows in the cache for a given
-INFORMATION SCHEMA table.
-@return number of rows */
-UNIV_INTERN
-ulint
-trx_i_s_cache_get_rows_used(
-/*========================*/
- trx_i_s_cache_t* cache, /*!< in: cache */
- enum i_s_table table); /*!< in: which table */
-
-/*******************************************************************//**
-Retrieves the nth row in the cache for a given INFORMATION SCHEMA
-table.
-@return row */
-UNIV_INTERN
-void*
-trx_i_s_cache_get_nth_row(
-/*======================*/
- trx_i_s_cache_t* cache, /*!< in: cache */
- enum i_s_table table, /*!< in: which table */
- ulint n); /*!< in: row number */
-
-/*******************************************************************//**
-Update the transactions cache if it has not been read for some time.
-@return 0 - fetched, 1 - not */
-UNIV_INTERN
-int
-trx_i_s_possibly_fetch_data_into_cache(
-/*===================================*/
- trx_i_s_cache_t* cache); /*!< in/out: cache */
-
-/*******************************************************************//**
-Returns TRUE if the data in the cache is truncated due to the memory
-limit posed by TRX_I_S_MEM_LIMIT.
-@return TRUE if truncated */
-UNIV_INTERN
-ibool
-trx_i_s_cache_is_truncated(
-/*=======================*/
- trx_i_s_cache_t* cache); /*!< in: cache */
-
-/** The maximum length of a resulting lock_id_size in
-trx_i_s_create_lock_id(), not including the terminating NUL.
-":%lu:%lu:%lu" -> 63 chars */
-#define TRX_I_S_LOCK_ID_MAX_LEN (TRX_ID_MAX_LEN + 63)
-
-/*******************************************************************//**
-Crafts a lock id string from a i_s_locks_row_t object. Returns its
-second argument. This function aborts if there is not enough space in
-lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
-want to be 100% sure that it will not abort.
-@return resulting lock id */
-UNIV_INTERN
-char*
-trx_i_s_create_lock_id(
-/*===================*/
- const i_s_locks_row_t* row, /*!< in: innodb_locks row */
- char* lock_id,/*!< out: resulting lock_id */
- ulint lock_id_size);/*!< in: size of the lock id
- buffer */
-
-UNIV_INTERN
-void
-trx_i_s_get_lock_sys_memory_usage(ulint *constant, ulint *variable);
-
-#endif /* trx0i_s_h */
diff --git a/storage/xtradb/include/trx0purge.h b/storage/xtradb/include/trx0purge.h
deleted file mode 100644
index 7b9b5dc49cd..00000000000
--- a/storage/xtradb/include/trx0purge.h
+++ /dev/null
@@ -1,226 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0purge.h
-Purge old versions
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0purge_h
-#define trx0purge_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "que0types.h"
-#include "page0page.h"
-#include "usr0sess.h"
-#include "fil0fil.h"
-
-/** The global data structure coordinating a purge */
-extern trx_purge_t* purge_sys;
-
-/** A dummy undo record used as a return value when we have a whole undo log
-which needs no purge */
-extern trx_undo_rec_t trx_purge_dummy_rec;
-
-/********************************************************************//**
-Calculates the file address of an undo log header when we have the file
-address of its history list node.
-@return file address of the log */
-UNIV_INLINE
-fil_addr_t
-trx_purge_get_log_from_hist(
-/*========================*/
- fil_addr_t node_addr); /*!< in: file address of the history
- list node of the log */
-/********************************************************************//**
-Creates the global purge system control structure and inits the history
-mutex. */
-UNIV_INTERN
-void
-trx_purge_sys_create(
-/*=================*/
- ulint n_purge_threads,/*!< in: number of purge threads */
- ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/
-/********************************************************************//**
-Frees the global purge system control structure. */
-UNIV_INTERN
-void
-trx_purge_sys_close(void);
-/*======================*/
-/************************************************************************
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
-void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
- trx_t* trx, /*!< in: transaction */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr); /*!< in: mtr */
-/*******************************************************************//**
-This function runs a purge batch.
-@return number of undo log pages handled in the batch */
-UNIV_INTERN
-ulint
-trx_purge(
-/*======*/
- ulint n_purge_threads, /*!< in: number of purge tasks to
- submit to task queue. */
- ulint limit, /*!< in: the maximum number of
- records to purge in one batch */
- bool truncate); /*!< in: truncate history if true */
-/*******************************************************************//**
-Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-UNIV_INTERN
-void
-trx_purge_stop(void);
-/*================*/
-/*******************************************************************//**
-Resume purge, move to PURGE_STATE_RUN. */
-UNIV_INTERN
-void
-trx_purge_run(void);
-/*================*/
-
-/** Purge states */
-enum purge_state_t {
- PURGE_STATE_INIT, /*!< Purge instance created */
- PURGE_STATE_RUN, /*!< Purge should be running */
- PURGE_STATE_STOP, /*!< Purge should be stopped */
- PURGE_STATE_EXIT, /*!< Purge has been shutdown */
- PURGE_STATE_DISABLED /*!< Purge was never started */
-};
-
-/*******************************************************************//**
-Get the purge state.
-@return purge state. */
-UNIV_INTERN
-purge_state_t
-trx_purge_state(void);
-/*=================*/
-
-/** This is the purge pointer/iterator. We need both the undo no and the
-transaction no up to which purge has parsed and applied the records. */
-struct purge_iter_t {
- trx_id_t trx_no; /*!< Purge has advanced past all
- transactions whose number is less
- than this */
- undo_no_t undo_no; /*!< Purge has advanced past all records
- whose undo number is less than this */
-};
-
-/** The control structure used in the purge operation */
-struct trx_purge_t{
- sess_t* sess; /*!< System session running the purge
- query */
- trx_t* trx; /*!< System transaction running the
- purge query: this trx is not in the
- trx list of the trx system and it
- never ends */
- prio_rw_lock_t latch; /*!< The latch protecting the purge
- view. A purge operation must acquire an
- x-latch here for the instant at which
- it changes the purge view: an undo
- log operation can prevent this by
- obtaining an s-latch here. It also
- protects state and running */
- os_event_t event; /*!< State signal event;
- os_event_set() and os_event_reset()
- are protected by trx_purge_t::latch
- X-lock */
- ulint n_stop; /*!< Counter to track number stops */
- volatile bool running; /*!< true, if purge is active,
- we check this without the latch too */
- volatile purge_state_t state; /*!< Purge coordinator thread states,
- we check this in several places
- without holding the latch. */
- que_t* query; /*!< The query graph which will do the
- parallelized purge operation */
- read_view_t* view; /*!< The purge will not remove undo logs
- which are >= this view (purge view) */
- read_view_t* prebuilt_clone; /*!< Pre-built view which is used as a
- temporary clone of the oldest view in
- read_view_purge_open() */
- read_view_t* prebuilt_view; /*!< Pre-built view array */
- volatile ulint n_submitted; /*!< Count of total tasks submitted
- to the task queue */
- volatile ulint n_completed; /*!< Count of total tasks completed */
-
- /*------------------------------*/
- /* The following two fields form the 'purge pointer' which advances
- during a purge, and which is used in history list truncation */
-
- purge_iter_t iter; /* Limit up to which we have read and
- parsed the UNDO log records. Not
- necessarily purged from the indexes.
- Note that this can never be less than
- the limit below, we check for this
- invariant in trx0purge.cc */
- purge_iter_t limit; /* The 'purge pointer' which advances
- during a purge, and which is used in
- history list truncation */
-#ifdef UNIV_DEBUG
- purge_iter_t done; /* Indicate 'purge pointer' which have
- purged already accurately. */
-#endif /* UNIV_DEBUG */
- /*-----------------------------*/
- ibool next_stored; /*!< TRUE if the info of the next record
- to purge is stored below: if yes, then
- the transaction number and the undo
- number of the record are stored in
- purge_trx_no and purge_undo_no above */
- trx_rseg_t* rseg; /*!< Rollback segment for the next undo
- record to purge */
- ulint page_no; /*!< Page number for the next undo
- record to purge, page number of the
- log header, if dummy record */
- ulint offset; /*!< Page offset for the next undo
- record to purge, 0 if the dummy
- record */
- ulint hdr_page_no; /*!< Header page of the undo log where
- the next record to purge belongs */
- ulint hdr_offset; /*!< Header byte offset on the page */
- /*-----------------------------*/
- mem_heap_t* heap; /*!< Temporary storage used during a
- purge: can be emptied after purge
- completes */
- /*-----------------------------*/
- ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on
- rseg_queue_t::trx_no. It is protected
- by the bh_mutex */
- ib_mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
-};
-
-/** Info required to purge a record */
-struct trx_purge_rec_t {
- trx_undo_rec_t* undo_rec; /*!< Record to purge */
- roll_ptr_t roll_ptr; /*!< File pointr to UNDO record */
-};
-
-#ifndef UNIV_NONINL
-#include "trx0purge.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/trx0purge.ic b/storage/xtradb/include/trx0purge.ic
deleted file mode 100644
index ca9cc1fb894..00000000000
--- a/storage/xtradb/include/trx0purge.ic
+++ /dev/null
@@ -1,62 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0purge.ic
-Purge old versions
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0undo.h"
-
-/********************************************************************//**
-Calculates the file address of an undo log header when we have the file
-address of its history list node.
-@return file address of the log */
-UNIV_INLINE
-fil_addr_t
-trx_purge_get_log_from_hist(
-/*========================*/
- fil_addr_t node_addr) /*!< in: file address of the history
- list node of the log */
-{
- node_addr.boffset -= TRX_UNDO_HISTORY_NODE;
-
- return(node_addr);
-}
-
-#ifdef UNIV_DEBUG
-/********************************************************************//**
-address of its history list node.
-@return TRUE if purge_sys_t::limit <= purge_sys_t::iter*/
-UNIV_INLINE
-ibool
-trx_purge_check_limit(void)
-/*=======================*/
-{
- ut_ad(purge_sys->limit.trx_no <= purge_sys->iter.trx_no);
-
- if (purge_sys->limit.trx_no == purge_sys->iter.trx_no) {
- ut_ad(purge_sys->limit.undo_no <= purge_sys->iter.undo_no);
- }
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
diff --git a/storage/xtradb/include/trx0rec.h b/storage/xtradb/include/trx0rec.h
deleted file mode 100644
index a6e202d04e4..00000000000
--- a/storage/xtradb/include/trx0rec.h
+++ /dev/null
@@ -1,319 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0rec.h
-Transaction undo log record
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0rec_h
-#define trx0rec_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "row0types.h"
-#include "mtr0mtr.h"
-#include "dict0types.h"
-#include "data0data.h"
-#include "rem0types.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "que0types.h"
-
-/***********************************************************************//**
-Copies the undo record to the heap.
-@return own: copy of undo log record */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_rec_copy(
-/*==============*/
- const trx_undo_rec_t* undo_rec, /*!< in: undo log record */
- mem_heap_t* heap); /*!< in: heap where copied */
-/**********************************************************************//**
-Reads the undo log record type.
-@return record type */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_type(
-/*==================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Reads the undo log record number.
-@return undo no */
-UNIV_INLINE
-undo_no_t
-trx_undo_rec_get_undo_no(
-/*=====================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
- undo_no_t undo_no) /*!< in: undo no read from node */
- MY_ATTRIBUTE((const));
-
-/**********************************************************************//**
-Returns the start of the undo record data area. */
-#define trx_undo_rec_get_ptr(undo_rec, undo_no) \
- ((undo_rec) + trx_undo_rec_get_offset(undo_no))
-
-/**********************************************************************//**
-Reads from an undo log record the general parameters.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_pars(
-/*==================*/
- trx_undo_rec_t* undo_rec, /*!< in: undo log record */
- ulint* type, /*!< out: undo record type:
- TRX_UNDO_INSERT_REC, ... */
- ulint* cmpl_info, /*!< out: compiler info, relevant only
- for update type records */
- bool* updated_extern, /*!< out: true if we updated an
- externally stored fild */
- undo_no_t* undo_no, /*!< out: undo log record number */
- table_id_t* table_id) /*!< out: table id */
- MY_ATTRIBUTE((nonnull));
-/*******************************************************************//**
-Builds a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_row_ref(
-/*=====================*/
- byte* ptr, /*!< in: remaining part of a copy of an undo log
- record, at the start of the row reference;
- NOTE that this copy of the undo log record must
- be preserved as long as the row reference is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t** ref, /*!< out, own: row reference */
- mem_heap_t* heap); /*!< in: memory heap from which the memory
- needed is allocated */
-/*******************************************************************//**
-Skips a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_skip_row_ref(
-/*======================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, at the start of the row reference */
- dict_index_t* index); /*!< in: clustered index */
-/**********************************************************************//**
-Reads from an undo log update record the system field values of the old
-version.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
-byte*
-trx_undo_update_rec_get_sys_cols(
-/*=============================*/
- byte* ptr, /*!< in: remaining part of undo
- log record after reading
- general parameters */
- trx_id_t* trx_id, /*!< out: trx id */
- roll_ptr_t* roll_ptr, /*!< out: roll ptr */
- ulint* info_bits); /*!< out: info bits state */
-/*******************************************************************//**
-Builds an update vector based on a remaining part of an undo log record.
-@return remaining part of the record, NULL if an error detected, which
-means that the record is corrupted */
-UNIV_INTERN
-byte*
-trx_undo_update_rec_get_update(
-/*===========================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, after reading the row reference
- NOTE that this copy of the undo log record must
- be preserved as long as the update vector is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
- TRX_UNDO_UPD_DEL_REC, or
- TRX_UNDO_DEL_MARK_REC; in the last case,
- only trx id and roll ptr fields are added to
- the update vector */
- trx_id_t trx_id, /*!< in: transaction id from this undorecord */
- roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
- ulint info_bits,/*!< in: info bits from this undo record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap, /*!< in: memory heap from which the memory
- needed is allocated */
- upd_t** upd); /*!< out, own: update vector */
-/*******************************************************************//**
-Builds a partial row from an update undo log record, for purge.
-It contains the columns which occur as ordering in any index of the table.
-Any missing columns are indicated by col->mtype == DATA_MISSING.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_partial_row(
-/*=========================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record of a suitable type, at the start of
- the stored index columns;
- NOTE that this copy of the undo log record must
- be preserved as long as the partial row is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t** row, /*!< out, own: partial row */
- ibool ignore_prefix, /*!< in: flag to indicate if we
- expect blob prefixes in undo. Used
- only in the assertion. */
- mem_heap_t* heap) /*!< in: memory heap from which the memory
- needed is allocated */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/***********************************************************************//**
-Writes information to an undo log about an insert, update, or a delete marking
-of a clustered index record. This information is used in a rollback of the
-transaction and in consistent reads that must look to the history of this
-transaction.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-trx_undo_report_row_operation(
-/*==========================*/
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* clust_entry, /*!< in: in the case of an insert,
- index entry to insert into the
- clustered index, otherwise NULL */
- const upd_t* update, /*!< in: in the case of an update,
- the update vector, otherwise NULL */
- ulint cmpl_info, /*!< in: compiler info on secondary
- index updates */
- const rec_t* rec, /*!< in: case of an update or delete
- marking, the record in the clustered
- index, otherwise NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
- roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
- inserted undo log record,
- 0 if BTR_NO_UNDO_LOG
- flag was specified */
- MY_ATTRIBUTE((nonnull(1,2,8), warn_unused_result));
-/******************************************************************//**
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists.
-@return own: copy of the record */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-/*======================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Build a previous version of a clustered index record. The caller must
-hold a latch on the index page of the clustered index record.
-@retval true if previous version was built, or if it was an insert
-or the table has been rebuilt
-@retval false if the previous version is earlier than purge_view,
-which means that it may have been removed */
-UNIV_INTERN
-bool
-trx_undo_prev_version_build(
-/*========================*/
- const rec_t* index_rec,/*!< in: clustered index record in the
- index tree */
- mtr_t* index_mtr,/*!< in: mtr which contains the latch to
- index_rec page and purge_view */
- const rec_t* rec, /*!< in: version of a clustered index record */
- dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mem_heap_t* heap, /*!< in: memory heap from which the memory
- needed is allocated */
- rec_t** old_vers)/*!< out, own: previous version, or NULL if
- rec is the first inserted version, or if
- history data has been deleted */
- MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_add_undo_rec(
-/*========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page); /*!< in: page or NULL */
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-
-#ifndef UNIV_HOTBACKUP
-
-/* Types of an undo log record: these have to be smaller than 16, as the
-compilation info multiplied by 16 is ORed to this value in an undo log
-record */
-
-#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
-#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
- record */
-#define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to
- a not delete marked record; also the
- fields of the record can change */
-#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields
- do not change */
-#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
- this and ORed to the type above */
-#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
- to denote that we updated external
- storage fields: used by purge to
- free the external storage */
-
-#ifndef UNIV_NONINL
-#include "trx0rec.ic"
-#endif
-
-#endif /* !UNIV_HOTBACKUP */
-
-#endif /* trx0rec_h */
diff --git a/storage/xtradb/include/trx0rec.ic b/storage/xtradb/include/trx0rec.ic
deleted file mode 100644
index 08704f6b821..00000000000
--- a/storage/xtradb/include/trx0rec.ic
+++ /dev/null
@@ -1,113 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0rec.ic
-Transaction undo log record
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Reads from an undo log record the record type.
-@return record type */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_type(
-/*==================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1));
-}
-
-/**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
-}
-
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Reads the undo log record number.
-@return undo no */
-UNIV_INLINE
-undo_no_t
-trx_undo_rec_get_undo_no(
-/*=====================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- const byte* ptr;
-
- ptr = undo_rec + 3;
-
- return(mach_ull_read_much_compressed(ptr));
-}
-
-/**********************************************************************//**
-Returns the start of the undo record data area.
-@return offset to the data area */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_offset(
-/*====================*/
- undo_no_t undo_no) /*!< in: undo no read from node */
-{
- return(3 + mach_ull_get_much_compressed_size(undo_no));
-}
-
-/***********************************************************************//**
-Copies the undo record to the heap.
-@return own: copy of undo log record */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_rec_copy(
-/*==============*/
- const trx_undo_rec_t* undo_rec, /*!< in: undo log record */
- mem_heap_t* heap) /*!< in: heap where copied */
-{
- ulint len;
-
- len = mach_read_from_2(undo_rec)
- - ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
- ut_ad(len < UNIV_PAGE_SIZE);
- return((trx_undo_rec_t*) mem_heap_dup(heap, undo_rec, len));
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/trx0roll.h b/storage/xtradb/include/trx0roll.h
deleted file mode 100644
index b2e9d8a077f..00000000000
--- a/storage/xtradb/include/trx0roll.h
+++ /dev/null
@@ -1,298 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0roll.h
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0roll_h
-#define trx0roll_h
-
-#include "univ.i"
-#include "btr0types.h"
-#include "trx0trx.h"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-
-extern bool trx_rollback_or_clean_is_active;
-
-/*******************************************************************//**
-Determines if this transaction is rolling back an incomplete transaction
-in crash recovery.
-@return TRUE if trx is an incomplete transaction that is being rolled
-back in crash recovery */
-UNIV_INTERN
-ibool
-trx_is_recv(
-/*========*/
- const trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Returns a transaction savepoint taken at this point in time.
-@return savepoint */
-UNIV_INTERN
-trx_savept_t
-trx_savept_take(
-/*============*/
- trx_t* trx); /*!< in: transaction */
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
-void
-trx_undo_arr_free(
-/*==============*/
- trx_undo_arr_t* arr); /*!< in: undo number array */
-/*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return pointer to the nth element */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- ulint n); /*!< in: position */
-/********************************************************************//**
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
-@return undo log record copied to heap, NULL if none left, or if the
-undo number of the top record would be less than the limit */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t limit, /*!< in: least undo number we need */
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- mem_heap_t* heap); /*!< in: memory heap where copied */
-/********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no);/*!< in: undo number of the record */
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no);/*!< in: undo number */
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
-void
-trx_rollback_or_clean_recovered(
-/*============================*/
- ibool all); /*!< in: FALSE=roll back dictionary transactions;
- TRUE=roll back all non-PREPARED transactions */
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
-/*================================================*/
- void* arg MY_ATTRIBUTE((unused)));
- /*!< in: a dummy parameter required by
- os_thread_create */
-/*********************************************************************//**
-Creates a rollback command node struct.
-@return own: rollback node struct */
-UNIV_INTERN
-roll_node_t*
-roll_node_create(
-/*=============*/
- mem_heap_t* heap); /*!< in: mem heap where created */
-/***********************************************************//**
-Performs an execution step for a rollback command node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_rollback_step(
-/*==============*/
- que_thr_t* thr); /*!< in: query thread */
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_for_mysql(
-/*===================*/
- trx_t* trx) /*!< in/out: transaction */
- MY_ATTRIBUTE((nonnull));
-/*******************************************************************//**
-Rollback the latest SQL statement for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_last_sql_stat_for_mysql(
-/*=================================*/
- trx_t* trx) /*!< in/out: transaction */
- MY_ATTRIBUTE((nonnull));
-/*******************************************************************//**
-Rollback a transaction to a given savepoint or do a complete rollback.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_to_savepoint(
-/*======================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
- partial rollback requested, or NULL for
- complete rollback */
- MY_ATTRIBUTE((nonnull(1)));
-/*******************************************************************//**
-Rolls back a transaction back to a named savepoint. Modifications after the
-savepoint are undone but InnoDB does NOT release the corresponding locks
-which are stored in memory. If a lock is 'implicit', that is, a new inserted
-row holds a lock where the lock information is carried by the trx id stored in
-the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_to_savepoint_for_mysql(
-/*================================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
- position corresponding to this
- savepoint; MySQL needs this
- information to remove the
- binlog entries of the queries
- executed after the savepoint */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Creates a named savepoint. If the transaction is not yet started, starts it.
-If there is already a savepoint of the same name, this call erases that old
-savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback.
-@return always DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_savepoint_for_mysql(
-/*====================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
- position corresponding to this
- connection at the time of the
- savepoint */
- MY_ATTRIBUTE((nonnull));
-/*******************************************************************//**
-Releases a named savepoint. Savepoints which
-were set after this savepoint are deleted.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_release_savepoint_for_mysql(
-/*============================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name) /*!< in: savepoint name */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Frees savepoint structs starting from savep. */
-UNIV_INTERN
-void
-trx_roll_savepoints_free(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep); /*!< in: free all savepoints > this one;
- if this is NULL, free all savepoints
- of trx */
-
-/** A cell of trx_undo_arr_t; used during a rollback and a purge */
-struct trx_undo_inf_t{
- ibool in_use; /*!< true if cell is being used */
- trx_id_t trx_no; /*!< transaction number: not defined during
- a rollback */
- undo_no_t undo_no;/*!< undo number of an undo record */
-};
-
-/** During a rollback and a purge, undo numbers of undo records currently being
-processed are stored in this array */
-
-struct trx_undo_arr_t{
- ulint n_cells; /*!< number of cells in the array */
- ulint n_used; /*!< number of cells in use */
- trx_undo_inf_t* infos; /*!< the array of undo infos */
- mem_heap_t* heap; /*!< memory heap from which allocated */
-};
-
-/** Rollback node states */
-enum roll_node_state {
- ROLL_NODE_NONE = 0, /*!< Unknown state */
- ROLL_NODE_SEND, /*!< about to send a rollback signal to
- the transaction */
- ROLL_NODE_WAIT /*!< rollback signal sent to the
- transaction, waiting for completion */
-};
-
-/** Rollback command node in a query graph */
-struct roll_node_t{
- que_common_t common; /*!< node type: QUE_NODE_ROLLBACK */
- enum roll_node_state state; /*!< node execution state */
- ibool partial;/*!< TRUE if we want a partial
- rollback */
- trx_savept_t savept; /*!< savepoint to which to
- roll back, in the case of a
- partial rollback */
- que_thr_t* undo_thr;/*!< undo query graph */
-};
-
-/** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */
-struct trx_named_savept_t{
- char* name; /*!< savepoint name */
- trx_savept_t savept; /*!< the undo number corresponding to
- the savepoint */
- ib_int64_t mysql_binlog_cache_pos;
- /*!< the MySQL binlog cache position
- corresponding to this savepoint, not
- defined if the MySQL binlogging is not
- enabled */
- UT_LIST_NODE_T(trx_named_savept_t)
- trx_savepoints; /*!< the list of savepoints of a
- transaction */
-};
-
-#ifndef UNIV_NONINL
-#include "trx0roll.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/trx0roll.ic b/storage/xtradb/include/trx0roll.ic
deleted file mode 100644
index 178e9bb730a..00000000000
--- a/storage/xtradb/include/trx0roll.ic
+++ /dev/null
@@ -1,40 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0roll.ic
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/*******************************************************************//**
-Returns pointer to nth element in an undo number array.
-@return pointer to the nth element */
-UNIV_INLINE
-trx_undo_inf_t*
-trx_undo_arr_get_nth_info(
-/*======================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- ulint n) /*!< in: position */
-{
- ut_ad(arr);
- ut_ad(n < arr->n_cells);
-
- return(arr->infos + n);
-}
diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h
deleted file mode 100644
index e2853df7045..00000000000
--- a/storage/xtradb/include/trx0rseg.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0rseg.h
-Rollback segment
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0rseg_h
-#define trx0rseg_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "trx0sys.h"
-#include "ut0bh.h"
-
-/******************************************************************//**
-Gets a rollback segment header.
-@return rollback segment header, page x-latched */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get(
-/*==========*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return rollback segment header, page x-latched */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get_new(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Sets the file page number of the nth undo log slot. */
-UNIV_INLINE
-void
-trx_rsegf_set_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- ulint page_no,/*!< in: page number of the undo log segment */
- mtr_t* mtr); /*!< in: mtr */
-/****************************************************************//**
-Looks for a free slot for an undo log segment.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INLINE
-ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Looks for a rollback segment, based on the rollback segment id.
-@return rollback segment */
-UNIV_INLINE
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- ulint id); /*!< in: rollback segment id */
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
-ulint
-trx_rseg_header_create(
-/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint max_size, /*!< in: max size in pages */
- ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */
- mtr_t* mtr); /*!< in: mtr */
-/*********************************************************************//**
-Creates the memory copies for rollback segments and initializes the
-rseg array in trx_sys at a database startup. */
-UNIV_INTERN
-void
-trx_rseg_array_init(
-/*================*/
- trx_sysf_t* sys_header, /*!< in/out: trx system header */
- ib_bh_t* ib_bh, /*!< in: rseg queue */
- mtr_t* mtr); /*!< in/out: mtr */
-/***************************************************************************
-Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
-void
-trx_rseg_mem_free(
-/*==============*/
- trx_rseg_t* rseg); /*!< in, own: instance to free */
-
-/** Create a rollback segment.
-@param[in] space undo tablespace ID
-@return pointer to new rollback segment
-@retval NULL on failure */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(ulint space);
-
-/********************************************************************
-Get the number of unique rollback tablespaces in use except space id 0.
-The last space id will be the sentinel value ULINT_UNDEFINED. The array
-will be sorted on space id. Note: space_ids should have have space for
-TRX_SYS_N_RSEGS + 1 elements.
-@return number of unique rollback tablespaces in use. */
-UNIV_INTERN
-ulint
-trx_rseg_get_n_undo_tablespaces(
-/*============================*/
- ulint* space_ids); /*!< out: array of space ids of
- UNDO tablespaces */
-/* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16)
-
-/* Maximum number of transactions supported by a single rollback segment */
-#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
-
-/* The rollback segment memory object */
-struct trx_rseg_t{
- /*--------------------------------------------------------*/
- ulint id; /*!< rollback segment id == the index of
- its slot in the trx system file copy */
- ib_prio_mutex_t mutex; /*!< mutex protecting the fields in this
- struct except id, which is constant */
- ulint space; /*!< space where the rollback segment is
- header is placed */
- ulint zip_size;/* compressed page size of space
- in bytes, or 0 for uncompressed spaces */
- ulint page_no;/* page number of the rollback segment
- header */
- ulint max_size;/* maximum allowed size in pages */
- ulint curr_size;/* current size in pages */
- /*--------------------------------------------------------*/
- /* Fields for update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
- /* List of update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
- /* List of update undo log segments
- cached for fast reuse */
- /*--------------------------------------------------------*/
- /* Fields for insert undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
- /* List of insert undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
- /* List of insert undo log segments
- cached for fast reuse */
- /*--------------------------------------------------------*/
- ulint last_page_no; /*!< Page number of the last not yet
- purged log header in the history list;
- FIL_NULL if all list purged */
- ulint last_offset; /*!< Byte offset of the last not yet
- purged log header */
- trx_id_t last_trx_no; /*!< Transaction number of the last not
- yet purged log */
- ibool last_del_marks; /*!< TRUE if the last not yet purged log
- needs purging */
-};
-
-/** For prioritising the rollback segments for purge. */
-struct rseg_queue_t {
- trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
- trx_rseg_t* rseg; /*!< Rollback segment */
-};
-
-/* Undo log segment slot in a rollback segment header */
-/*-------------------------------------------------------------*/
-#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
- an undo log segment */
-/*-------------------------------------------------------------*/
-/* Slot size */
-#define TRX_RSEG_SLOT_SIZE 4
-
-/* The offset of the rollback segment header on its page */
-#define TRX_RSEG FSEG_PAGE_DATA
-
-/* Transaction rollback segment header */
-/*-------------------------------------------------------------*/
-#define TRX_RSEG_MAX_SIZE 0 /* Maximum allowed size for rollback
- segment in pages */
-#define TRX_RSEG_HISTORY_SIZE 4 /* Number of file pages occupied
- by the logs in the history list */
-#define TRX_RSEG_HISTORY 8 /* The update undo logs for committed
- transactions */
-#define TRX_RSEG_FSEG_HEADER (8 + FLST_BASE_NODE_SIZE)
- /* Header for the file segment where
- this page is placed */
-#define TRX_RSEG_UNDO_SLOTS (8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
- /* Undo log segment slots */
-/*-------------------------------------------------------------*/
-
-#ifndef UNIV_NONINL
-#include "trx0rseg.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/trx0rseg.ic b/storage/xtradb/include/trx0rseg.ic
deleted file mode 100644
index 30743da9b8c..00000000000
--- a/storage/xtradb/include/trx0rseg.ic
+++ /dev/null
@@ -1,167 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0rseg.ic
-Rollback segment
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "srv0srv.h"
-#include "mtr0log.h"
-#include "trx0sys.h"
-
-/******************************************************************//**
-Gets a rollback segment header.
-@return rollback segment header, page x-latched */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get(
-/*==========*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- trx_rsegf_t* header;
-
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
-
- header = TRX_RSEG + buf_block_get_frame(block);
-
- return(header);
-}
-
-/******************************************************************//**
-Gets a newly created rollback segment header.
-@return rollback segment header, page x-latched */
-UNIV_INLINE
-trx_rsegf_t*
-trx_rsegf_get_new(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the header */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- trx_rsegf_t* header;
-
- block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
-
- header = TRX_RSEG + buf_block_get_frame(block);
-
- return(header);
-}
-
-/***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (n >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: trying to get slot %lu of rseg\n",
- (ulong) n);
- ut_error;
- }
-
- return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
- + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
-}
-
-/***************************************************************//**
-Sets the file page number of the nth undo log slot. */
-UNIV_INLINE
-void
-trx_rsegf_set_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- ulint page_no,/*!< in: page number of the undo log segment */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (n >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: trying to set slot %lu of rseg\n",
- (ulong) n);
- ut_error;
- }
-
- mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE,
- page_no, MLOG_4BYTES, mtr);
-}
-
-/****************************************************************//**
-Looks for a free slot for an undo log segment.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INLINE
-ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
- ulint page_no;
-
- for (i = 0;
-#ifndef UNIV_DEBUG
- i < TRX_RSEG_N_SLOTS;
-#else
- i < (trx_rseg_n_slots_debug ? trx_rseg_n_slots_debug : TRX_RSEG_N_SLOTS);
-#endif
- i++) {
-
- page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/******************************************************************//**
-Looks for a rollback segment, based on the rollback segment id.
-@return rollback segment */
-UNIV_INLINE
-trx_rseg_t*
-trx_rseg_get_on_id(
-/*===============*/
- ulint id) /*!< in: rollback segment id */
-{
- ut_a(id < TRX_SYS_N_RSEGS);
-
- return(trx_sys->rseg_array[id]);
-}
-
diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h
deleted file mode 100644
index 9bfffd09532..00000000000
--- a/storage/xtradb/include/trx0sys.h
+++ /dev/null
@@ -1,756 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0sys.h
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0sys_h
-#define trx0sys_h
-
-#include "univ.i"
-
-#include "trx0types.h"
-#include "fsp0types.h"
-#include "fil0fil.h"
-#include "buf0buf.h"
-#ifndef UNIV_HOTBACKUP
-#include "mtr0mtr.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-#include "sync0sync.h"
-#include "ut0lst.h"
-#include "ut0bh.h"
-#include "read0types.h"
-#include "page0types.h"
-#include "ut0bh.h"
-#ifdef WITH_WSREP
-#include "trx0xa.h"
-#endif /* WITH_WSREP */
-
-typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t;
-
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-extern char trx_sys_mysql_master_log_name[];
-/** Master binlog file position. We have successfully got the updates
-up to this position. -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-extern ib_int64_t trx_sys_mysql_master_log_pos;
-/* @} */
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-extern char trx_sys_mysql_bin_log_name[];
-/** Binlog file position, or -1 if unknown */
-extern ib_int64_t trx_sys_mysql_bin_log_pos;
-/* @} */
-
-/** The transaction system */
-extern trx_sys_t* trx_sys;
-
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return TRUE if trx sys header page */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
- ulint space, /*!< in: space */
- ulint page_no);/*!< in: page number */
-/*****************************************************************//**
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started.
-@return min binary heap of rsegs to purge */
-UNIV_INTERN
-ib_bh_t*
-trx_sys_init_at_db_start(void);
-/*==========================*/
-/*****************************************************************//**
-Creates the trx_sys instance and initializes ib_bh and mutex. */
-UNIV_INTERN
-void
-trx_sys_create(void);
-/*================*/
-/*****************************************************************//**
-Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
-void
-trx_sys_create_sys_pages(void);
-/*==========================*/
-/****************************************************************//**
-Looks for a free slot for a rollback segment in the trx system file copy.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
-Gets the pointer in the nth slot of the rseg array.
-@return pointer to rseg object, NULL if slot not in use */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n); /*!< in: index of slot */
-/**********************************************************************//**
-Gets a pointer to the transaction system file copy and x-locks its page.
-@return pointer to system file copy, page x-locked */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-file copy.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint space, /*!< in: space id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint page_no, /*!< in: page number, FIL_NULL if
- the slot is reset to unused */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id(void);
-/*========================*/
-/*****************************************************************//**
-Determines the maximum transaction id.
-@return maximum currently allocated trx id; will be stale after the
-next call to trx_sys_get_new_trx_id() */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_max_trx_id(void);
-/*========================*/
-
-/*************************************************************//**
-Find a slot for a given trx ID in a descriptors array.
-@return: slot pointer */
-UNIV_INLINE
-trx_id_t*
-trx_find_descriptor(
-/*================*/
- const trx_id_t* descriptors, /*!< in: descriptors array */
- ulint n_descr, /*!< in: array size */
- trx_id_t trx_id); /*!< in: trx pointer */
-
-#ifdef UNIV_DEBUG
-/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
-extern uint trx_rseg_n_slots_debug;
-#endif
-
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
- byte* ptr, /*!< in: pointer to memory where written */
- trx_id_t id); /*!< in: id */
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
-@return id */
-UNIV_INLINE
-trx_id_t
-trx_read_trx_id(
-/*============*/
- const byte* ptr); /*!< in: pointer to memory from where to read */
-/****************************************************************//**
-Looks for the trx instance with the given id in the rw trx_list.
-The caller must be holding trx_sys->mutex.
-@return the trx handle or NULL if not found;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_get_rw_trx_by_id(
-/*=================*/
- trx_id_t trx_id);/*!< in: trx id to search for */
-/****************************************************************//**
-Returns the minimum trx id in rw trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->state to
-find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id(void);
-/*===================*/
-/****************************************************************//**
-Returns pointer to a transaction instance if a rw transaction with the given id
-is active. Caller must hold trx_sys->mutex. If the caller is not holding
-lock_sys->mutex, the transaction may already have been committed.
-@return transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_rw_get_active_trx_by_id(
-/*========================*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt); /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. Caller must hold
-trx_sys->mutex. If the caller is not holding lock_sys->mutex, the
-transaction may already have been committed.
-@return true if rw transaction it with a given id is active. */
-UNIV_INLINE
-bool
-trx_rw_is_active_low(
-/*=================*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt); /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
-committed.
-@return true if rw transaction it with a given id is active. */
-UNIV_INLINE
-bool
-trx_rw_is_active(
-/*=============*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt); /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-#ifdef UNIV_DEBUG
-/****************************************************************//**
-Checks whether a trx is in one of rw_trx_list or ro_trx_list.
-@return TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
-/*============*/
- const trx_t* in_trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-/***********************************************************//**
-Assert that a transaction has been recovered.
-@return TRUE */
-UNIV_INLINE
-ibool
-trx_assert_recovered(
-/*=================*/
- trx_id_t trx_id) /*!< in: transaction identifier */
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-/*****************************************************************//**
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-UNIV_INTERN
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- const char* file_name,/*!< in: MySQL log file name */
- ib_int64_t offset, /*!< in: position in that log file */
- ulint field, /*!< in: offset of the MySQL log info field in
- the trx sys header */
-#ifdef WITH_WSREP
- trx_sysf_t* sys_header, /*!< in: trx sys header */
-#endif /* WITH_WSREP */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset(void);
-/*===================================*/
-#ifdef WITH_WSREP
-/** Update WSREP checkpoint XID in sys header. */
-void
-trx_sys_update_wsrep_checkpoint(
- const XID* xid, /*!< in: WSREP XID */
- trx_sysf_t* sys_header, /*!< in: sys_header */
- mtr_t* mtr); /*!< in: mtr */
-
-/** Read WSREP checkpoint XID from sys header.
-@return true on success, false on error. */
-bool
-trx_sys_read_wsrep_checkpoint(
- XID* xid); /*!< out: WSREP XID */
-#endif /* WITH_WSREP */
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void);
-/*====================================*/
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_init(void);
-/*==========================*/
-/*****************************************************************//**
-Closes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_close(void);
-/*===========================*/
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-UNIV_INTERN
-void
-trx_sys_file_format_tag_init(void);
-/*==============================*/
-/*****************************************************************//**
-Shutdown/Close the transaction system. */
-UNIV_INTERN
-void
-trx_sys_close(void);
-/*===============*/
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id); /*!< in: id of the file format */
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
- ulint format_id, /*!< in: file format id */
- const char** name); /*!< out: max file format name or
- NULL if not needed. */
-/*********************************************************************
-Creates the rollback segments
-@return number of rollback segments that are active. */
-UNIV_INTERN
-ulint
-trx_sys_create_rsegs(
-/*=================*/
- ulint n_spaces, /*!< number of tablespaces for UNDO logs */
- ulint n_rsegs); /*!< number of rollback segments to create */
-/*****************************************************************//**
-Get the number of transaction in the system, independent of their state.
-@return count of transactions in trx_sys_t::trx_list */
-UNIV_INLINE
-ulint
-trx_sys_get_n_rw_trx(void);
-/*======================*/
-
-/*********************************************************************
-Check if there are any active (non-prepared) transactions.
-@return total number of active transactions or 0 if none */
-UNIV_INTERN
-ulint
-trx_sys_any_active_transactions(void);
-/*=================================*/
-#else /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- const byte* page); /*!< in: buffer containing the trx
- system header page, i.e., page number
- TRX_SYS_PAGE_NO in the tablespace */
-/*****************************************************************//**
-Reads the file format id from the first system table space file.
-Even if the call succeeds and returns TRUE, the returned format id
-may be ULINT_UNDEFINED signalling that the format id was not present
-in the data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_file_format_id(
-/*========================*/
- const char *pathname, /*!< in: pathname of the first system
- table space file */
- ulint *format_id); /*!< out: file format of the system table
- space */
-/*****************************************************************//**
-Reads the file format id from the given per-table data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_pertable_file_format_id(
-/*=================================*/
- const char *pathname, /*!< in: pathname of a per-table
- datafile */
- ulint *format_id); /*!< out: file format of the per-table
- data file */
-#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_max_get(void);
-/*=============================*/
-/*****************************************************************//**
-Check for the max file format tag stored on disk.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-trx_sys_file_format_max_check(
-/*==========================*/
- ulint max_format_id); /*!< in: the max format id to check */
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
- const char** name, /*!< out: max file format name */
- ulint format_id); /*!< in: file format identifier */
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id); /*!< in: id of the file format */
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Validate the trx_sys_t::trx_list. */
-UNIV_INTERN
-ibool
-trx_sys_validate_trx_list(void);
-/*===========================*/
-#endif /* UNIV_DEBUG */
-
-/* The automatically created system rollback segment has this id */
-#define TRX_SYS_SYSTEM_RSEG_ID 0
-
-/* Space id and page no where the trx system file copy resides */
-#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
-#include "fsp0fsp.h"
-#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
-
-/* The offset of the transaction system header on the page */
-#define TRX_SYS FSEG_PAGE_DATA
-
-/** Transaction system header */
-/*------------------------------------------------------------- @{ */
-#define TRX_SYS_TRX_ID_STORE 0 /*!< the maximum trx id or trx
- number modulo
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- written to a file page by any
- transaction; the assignment of
- transaction ids continues from
- this number rounded up by
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- plus
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- when the database is
- started */
-#define TRX_SYS_FSEG_HEADER 8 /*!< segment header for the
- tablespace segment the trx
- system is created into */
-#define TRX_SYS_RSEGS (8 + FSEG_HEADER_SIZE)
- /*!< the start of the array of
- rollback segment specification
- slots */
-/*------------------------------------------------------------- @} */
-
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one (signed)
-byte, therefore 128; each slot is currently 8 bytes in size. If you want
-to raise the level to 256 then you will need to fix some assertions that
-impose the 7 bit restriction. e.g., mach_write_to_3() */
-#define TRX_SYS_N_RSEGS 128
-/* Originally, InnoDB defined TRX_SYS_N_RSEGS as 256 but created only one
-rollback segment. It initialized some arrays with this number of entries.
-We must remember this limit in order to keep file compatibility. */
-#define TRX_SYS_OLD_N_RSEGS 256
-
-/** Maximum length of MySQL binlog file name, in bytes.
-@see trx_sys_mysql_master_log_name
-@see trx_sys_mysql_bin_log_name */
-#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
-/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
-#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
-
-#if UNIV_PAGE_SIZE_MIN < 4096
-# error "UNIV_PAGE_SIZE_MIN < 4096"
-#endif
-/** The offset of the MySQL replication info in the trx system header;
-this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */
-#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000)
-
-/** The offset of the MySQL binlog offset info in the trx system header */
-#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
-#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /*!< magic number which is
- TRX_SYS_MYSQL_LOG_MAGIC_N
- if we have valid data in the
- MySQL binlog info */
-#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /*!< high 4 bytes of the offset
- within that file */
-#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /*!< low 4 bytes of the offset
- within that file */
-#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
-
-#ifdef WITH_WSREP
-/* The offset to WSREP XID headers */
-#define TRX_SYS_WSREP_XID_INFO (UNIV_PAGE_SIZE - 3500)
-#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
-#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
-
-/* XID field: formatID, gtrid_len, bqual_len, xid_data */
-#define TRX_SYS_WSREP_XID_LEN (4 + 4 + 4 + XIDDATASIZE)
-#define TRX_SYS_WSREP_XID_FORMAT 4
-#define TRX_SYS_WSREP_XID_GTRID_LEN 8
-#define TRX_SYS_WSREP_XID_BQUAL_LEN 12
-#define TRX_SYS_WSREP_XID_DATA 16
-#endif /* WITH_WSREP*/
-
-/** Doublewrite buffer */
-/* @{ */
-/** The offset of the doublewrite buffer header on the trx system header page */
-#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
-/*-------------------------------------------------------------*/
-#define TRX_SYS_DOUBLEWRITE_FSEG 0 /*!< fseg header of the fseg
- containing the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE
- /*!< 4-byte magic number which
- shows if we already have
- created the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE)
- /*!< page number of the
- first page in the first
- sequence of 64
- (= FSP_EXTENT_SIZE) consecutive
- pages in the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE)
- /*!< page number of the
- first page in the second
- sequence of 64 consecutive
- pages in the doublewrite
- buffer */
-#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /*!< we repeat
- TRX_SYS_DOUBLEWRITE_MAGIC,
- TRX_SYS_DOUBLEWRITE_BLOCK1,
- TRX_SYS_DOUBLEWRITE_BLOCK2
- so that if the trx sys
- header is half-written
- to disk, we still may
- be able to recover the
- information */
-/** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
-we must reset the doublewrite buffer, because starting from 4.1.x the
-space id of a data page is stored into
-FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
-#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE)
-
-/*-------------------------------------------------------------*/
-/** Contents of TRX_SYS_DOUBLEWRITE_MAGIC */
-#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855
-/** Contents of TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED */
-#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386
-
-/** Size of the doublewrite block in pages */
-#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
-/* @} */
-
-/** File format tag */
-/* @{ */
-/** The offset of the file format tag on the trx system header page
-(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
-#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
-
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this 64-bit constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N \
- ((ib_uint64_t) TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH << 32 \
- | TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW)
-/* @} */
-
-#define TRX_DESCR_ARRAY_INITIAL_SIZE 1000
-
-#ifndef UNIV_HOTBACKUP
-/** The transaction system central memory data structure. */
-struct trx_sys_t{
-
- ib_mutex_t mutex; /*!< mutex protecting most fields in
- this structure except when noted
- otherwise */
- ulint n_prepared_trx; /*!< Number of transactions currently
- in the XA PREPARED state */
- ulint n_prepared_recovered_trx; /*!< Number of transactions
- currently in XA PREPARED state that are
- also recovered. Such transactions cannot
- be added during runtime. They can only
- occur after recovery if mysqld crashed
- while there were XA PREPARED
- transactions. We disable query cache
- if such transactions exist. */
- trx_id_t max_trx_id; /*!< The smallest number not yet
- assigned as a transaction id or
- transaction number */
- char pad1[CACHE_LINE_SIZE]; /*!< Ensure max_trx_id does not share
- cache line with other fields. */
- trx_id_t* descriptors; /*!< Array of trx descriptors */
- ulint descr_n_max; /*!< The current size of the descriptors
- array. */
- char pad2[CACHE_LINE_SIZE]; /*!< Ensure static descriptor fields
- do not share cache lines with
- descr_n_used */
- ulint descr_n_used; /*!< Number of used elements in the
- descriptors array. */
- char pad3[CACHE_LINE_SIZE]; /*!< Ensure descriptors do not share
- cache line with other fields */
-#ifdef UNIV_DEBUG
- trx_id_t rw_max_trx_id; /*!< Max trx id of read-write transactions
- which exist or existed */
-#endif
- trx_list_t rw_trx_list; /*!< List of active and committed in
- memory read-write transactions, sorted
- on trx id, biggest first. Recovered
- transactions are always on this list. */
- char pad4[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not
- share cache line with other fields */
- trx_list_t ro_trx_list; /*!< List of active and committed in
- memory read-only transactions, sorted
- on trx id, biggest first. NOTE:
- The order for read-only transactions
- is not necessary. We should exploit
- this and increase concurrency during
- add/remove. */
- char pad5[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not
- share cache line with other fields */
- trx_list_t mysql_trx_list; /*!< List of transactions created
- for MySQL. All transactions on
- ro_trx_list are on mysql_trx_list. The
- rw_trx_list can contain system
- transactions and recovered transactions
- that will not be in the mysql_trx_list.
- There can be active non-locking
- auto-commit read only transactions that
- are on this list but not on ro_trx_list.
- mysql_trx_list may additionally contain
- transactions that have not yet been
- started in InnoDB. */
- char pad6[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not
- share cache line with other fields */
- trx_list_t trx_serial_list;
- /*!< trx->no ordered List of
- transactions in either TRX_PREPARED or
- TRX_ACTIVE which have already been
- assigned a serialization number */
- char pad7[CACHE_LINE_SIZE]; /*!< Ensure list base nodes do not
- share cache line with other fields */
- trx_rseg_t* const rseg_array[TRX_SYS_N_RSEGS];
- /*!< Pointer array to rollback
- segments; NULL if slot not in use;
- created and destroyed in
- single-threaded mode; not protected
- by any mutex, because it is read-only
- during multi-threaded operation */
- ulint rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY
- list (update undo logs for committed
- transactions), protected by
- rseg->mutex */
- UT_LIST_BASE_NODE_T(read_view_t) view_list;
- /*!< List of read views sorted
- on trx no, biggest first */
-};
-
-/** When a trx id which is zero modulo this number (which must be a power of
-two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
-page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN 256
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic
deleted file mode 100644
index 6024c1dc94e..00000000000
--- a/storage/xtradb/include/trx0sys.ic
+++ /dev/null
@@ -1,568 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0sys.ic
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0trx.h"
-#include "data0type.h"
-#ifndef UNIV_HOTBACKUP
-# include "srv0srv.h"
-# include "mtr0log.h"
-
-/* The typedef for rseg slot in the file copy */
-typedef byte trx_sysf_rseg_t;
-
-/* Rollback segment specification slot offsets */
-/*-------------------------------------------------------------*/
-#define TRX_SYS_RSEG_SPACE 0 /* space where the segment
- header is placed; starting with
- MySQL/InnoDB 5.1.7, this is
- UNIV_UNDEFINED if the slot is unused */
-#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the segment
- header is placed; this is FIL_NULL
- if the slot is unused */
-/*-------------------------------------------------------------*/
-/* Size of a rollback segment specification slot */
-#define TRX_SYS_RSEG_SLOT_SIZE 8
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
-void
-trx_sys_flush_max_trx_id(void);
-/*==========================*/
-
-/***************************************************************//**
-Checks if a page address is the trx sys header page.
-@return TRUE if trx sys header page */
-UNIV_INLINE
-ibool
-trx_sys_hdr_page(
-/*=============*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/***************************************************************//**
-Gets the pointer in the nth slot of the rseg array.
-@return pointer to rseg object, NULL if slot not in use */
-UNIV_INLINE
-trx_rseg_t*
-trx_sys_get_nth_rseg(
-/*=================*/
- trx_sys_t* sys, /*!< in: trx system */
- ulint n) /*!< in: index of slot */
-{
- ut_ad(n < TRX_SYS_N_RSEGS);
-
- return(sys->rseg_array[n]);
-}
-
-/**********************************************************************//**
-Gets a pointer to the transaction system header and x-latches its page.
-@return pointer to system header, page x-latched. */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block;
- trx_sysf_t* header;
-
- ut_ad(mtr);
-
- block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
- header = TRX_SYS + buf_block_get_frame(block);
-
- return(header);
-}
-
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys header */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-header.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint space, /*!< in: space id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- mlog_write_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE,
- space,
- MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-header. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys header */
- ulint i, /*!< in: slot index == rseg id */
- ulint page_no, /*!< in: page number, FIL_NULL if the
- slot is reset to unused */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- mlog_write_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_PAGE_NO,
- page_no,
- MLOG_4BYTES, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
- byte* ptr, /*!< in: pointer to memory where written */
- trx_id_t id) /*!< in: id */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
- mach_write_to_6(ptr, id);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
-@return id */
-UNIV_INLINE
-trx_id_t
-trx_read_trx_id(
-/*============*/
- const byte* ptr) /*!< in: pointer to memory from where to read */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
- return(mach_read_from_6(ptr));
-}
-
-/****************************************************************//**
-Looks for the trx handle with the given id in rw_trx_list.
-The caller must be holding trx_sys->mutex.
-@return the trx handle or NULL if not found;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_get_rw_trx_by_id(
-/*=================*/
- trx_id_t trx_id) /*!< in: trx id to search for */
-{
- trx_t* trx;
- ulint len;
- trx_t* first;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- len = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- if (len == 0) {
- return(NULL);
- }
-
- /* Because the list is ordered on trx id in descending order,
- we try to speed things up a bit. */
-
- trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- assert_trx_in_rw_list(trx);
-
- if (trx_id == trx->id) {
- return(trx);
- } else if (len == 1 || trx_id > trx->id) {
- return(NULL);
- }
-
- first = trx;
-
- trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
- assert_trx_in_rw_list(trx);
-
- if (trx_id == trx->id) {
- return(trx);
- } else if (len == 2 || trx_id < trx->id) {
- return(NULL);
- }
-
- /* Search the list from the lower end (tail). */
- if (trx_id < (first->id + trx->id) >> 1) {
- for (trx = UT_LIST_GET_PREV(trx_list, trx);
- trx != NULL && trx_id > trx->id;
- trx = UT_LIST_GET_PREV(trx_list, trx)) {
- assert_trx_in_rw_list(trx);
- }
- } else {
- for (trx = UT_LIST_GET_NEXT(trx_list, first);
- trx != NULL && trx_id < trx->id;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- assert_trx_in_rw_list(trx);
- }
- }
-
- return((trx != NULL && trx->id == trx_id) ? trx : NULL);
-}
-
-/****************************************************************//**
-Returns the minimum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->state
-to find out if the minimum trx id transaction itself is active, or already
-committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id_low(void)
-/*=======================*/
-{
- trx_id_t id;
- const trx_t* trx;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
-
- if (trx == NULL) {
- id = trx_sys->max_trx_id;
- } else {
- assert_trx_in_rw_list(trx);
- id = trx->id;
- }
-
- return(id);
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-/***********************************************************//**
-Assert that a transaction has been recovered.
-@return TRUE */
-UNIV_INLINE
-ibool
-trx_assert_recovered(
-/*=================*/
- trx_id_t trx_id) /*!< in: transaction identifier */
-{
- const trx_t* trx;
-
- mutex_enter(&trx_sys->mutex);
-
- trx = trx_get_rw_trx_by_id(trx_id);
- ut_a(trx->is_recovered);
-
- mutex_exit(&trx_sys->mutex);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
-/****************************************************************//**
-Returns the minimum trx id in rw trx list. This is the smallest id for which
-the rw trx can possibly be active. (But, you must look at the trx->state
-to find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id(void)
-/*===================*/
-{
- trx_id_t id;
-
- mutex_enter(&trx_sys->mutex);
-
- id = trx_rw_min_trx_id_low();
-
- mutex_exit(&trx_sys->mutex);
-
- return(id);
-}
-
-/****************************************************************//**
-Returns pointer to a transaction instance if a rw transaction with the given id
-is active. Caller must hold trx_sys->mutex. If the caller is not holding
-lock_sys->mutex, the transaction may already have been committed.
-@return transaction instance if active, or NULL;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_rw_get_active_trx_by_id(
-/*========================*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt) /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- if (trx_id < trx_rw_min_trx_id_low()) {
-
- trx = NULL;
- } else if (trx_id >= trx_sys->max_trx_id) {
-
- /* There must be corruption: we let the caller handle the
- diagnostic prints in this case. */
-
- trx = NULL;
- if (corrupt != NULL) {
- *corrupt = TRUE;
- }
- } else {
- trx = trx_get_rw_trx_by_id(trx_id);
-
- if (trx != NULL
- && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
-
- trx = NULL;
- }
- }
-
- return(trx);
-}
-
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. Caller must hold
-trx_sys->mutex. If the caller is not holding lock_sys->mutex, the
-transaction may already have been committed.
-@return true if rw transaction it with a given id is active. */
-UNIV_INLINE
-bool
-trx_rw_is_active_low(
-/*=================*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt) /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-{
- ut_ad(mutex_own(&trx_sys->mutex));
-
- if (UNIV_UNLIKELY(trx_id >= trx_sys->max_trx_id)) {
-
- /* There must be corruption: we let the caller handle the
- diagnostic prints in this case. */
-
- if (corrupt != NULL) {
- *corrupt = TRUE;
- }
-
- return(false);
- }
-
- return(trx_find_descriptor(trx_sys->descriptors, trx_sys->descr_n_used,
- trx_id) != NULL);
-}
-
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
-committed.
-@return true if rw transaction it with a given id is active. */
-UNIV_INLINE
-bool
-trx_rw_is_active(
-/*=============*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt) /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-{
- bool res;
-
- mutex_enter(&trx_sys->mutex);
-
- res = trx_rw_is_active_low(trx_id, corrupt);
-
- mutex_exit(&trx_sys->mutex);
-
- return(res);
-}
-
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id(void)
-/*========================*/
-{
-#ifndef WITH_WSREP
- /* wsrep_fake_trx_id violates this assert */
- ut_ad(mutex_own(&trx_sys->mutex));
-#endif /* WITH_WSREP */
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
- will evaluate to TRUE when this function is first time called,
- and the value for trx id will be written to disk-based header!
- Thus trx id values will not overlap when the database is
- repeatedly started! */
-
- if (!(trx_sys->max_trx_id % (trx_id_t) TRX_SYS_TRX_ID_WRITE_MARGIN)) {
-
- trx_sys_flush_max_trx_id();
- }
-
- return(trx_sys->max_trx_id++);
-}
-
-/*****************************************************************//**
-Determines the maximum transaction id.
-@return maximum currently allocated trx id; will be stale after the
-next call to trx_sys_get_new_trx_id() */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_max_trx_id(void)
-/*========================*/
-{
-#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
- trx_id_t max_trx_id;
-#endif
-
- ut_ad(!mutex_own(&trx_sys->mutex));
-
-#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
- /* Avoid torn reads. */
- mutex_enter(&trx_sys->mutex);
- max_trx_id = trx_sys->max_trx_id;
- mutex_exit(&trx_sys->mutex);
- return(max_trx_id);
-#else
- /* Perform a dirty read. Callers should be prepared for stale
- values, and we know that the value fits in a machine word, so
- that it will be read and written atomically. */
- return(trx_sys->max_trx_id);
-#endif
-}
-
-/*****************************************************************//**
-Get the number of transaction in the system, independent of their state.
-@return count of transactions in trx_sys_t::rw_trx_list */
-UNIV_INLINE
-ulint
-trx_sys_get_n_rw_trx(void)
-/*======================*/
-{
- ulint n_trx;
-
- mutex_enter(&trx_sys->mutex);
-
- n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- mutex_exit(&trx_sys->mutex);
-
- return(n_trx);
-}
-
-
-/*************************************************************//**
-Find a slot for a given trx ID in a descriptors array.
-@return: slot pointer */
-UNIV_INLINE
-trx_id_t*
-trx_find_descriptor(
-/*================*/
- const trx_id_t* descriptors, /*!< in: descriptors array */
- ulint n_descr, /*!< in: array size */
- trx_id_t trx_id) /*!< in: trx id */
-{
- ut_ad(descriptors != trx_sys->descriptors ||
- mutex_own(&trx_sys->mutex));
-
- if (UNIV_UNLIKELY(n_descr == 0)) {
-
- return(NULL);
- }
-
- return((trx_id_t *) bsearch(&trx_id, descriptors, n_descr,
- sizeof(trx_id_t), trx_descr_cmp));
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
deleted file mode 100644
index 1b490eca2af..00000000000
--- a/storage/xtradb/include/trx0trx.h
+++ /dev/null
@@ -1,1232 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0trx.h
-The transaction
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0trx_h
-#define trx0trx_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "dict0types.h"
-#ifndef UNIV_HOTBACKUP
-#include "lock0types.h"
-#include "log0log.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "mem0mem.h"
-#include "read0types.h"
-#include "trx0xa.h"
-#include "ut0vec.h"
-#include "fts0fts.h"
-
-/** Dummy session used currently in MySQL interface */
-extern sess_t* trx_dummy_sess;
-
-/********************************************************************//**
-In XtraDB it is impossible for a transaction to own a search latch outside of
-InnoDB code, so there is nothing to release on demand. We keep this function to
-simplify maintenance.*/
-UNIV_INLINE
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx MY_ATTRIBUTE((unused))); /*!< in: transaction */
-/******************************************************************//**
-Set detailed error message for the transaction. */
-UNIV_INTERN
-void
-trx_set_detailed_error(
-/*===================*/
- trx_t* trx, /*!< in: transaction struct */
- const char* msg); /*!< in: detailed error message */
-/*************************************************************//**
-Set detailed error message for the transaction from a file. Note that the
-file is rewinded before reading from it. */
-UNIV_INTERN
-void
-trx_set_detailed_error_from_file(
-/*=============================*/
- trx_t* trx, /*!< in: transaction struct */
- FILE* file); /*!< in: file to read message from */
-/****************************************************************//**
-Retrieves the error_info field from a trx.
-@return the error info */
-UNIV_INLINE
-const dict_index_t*
-trx_get_error_info(
-/*===============*/
- const trx_t* trx); /*!< in: trx object */
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_mysql(void);
-/*========================*/
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_background(void);
-/*=============================*/
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx); /*!< in, own: trx object */
-/********************************************************************//**
-At shutdown, frees a transaction object that is in the PREPARED state. */
-UNIV_INTERN
-void
-trx_free_prepared(
-/*==============*/
- trx_t* trx) /*!< in, own: trx object */
- UNIV_COLD;
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
-void
-trx_free_for_mysql(
-/*===============*/
- trx_t* trx); /*!< in, own: trx object */
-/****************************************************************//**
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-UNIV_INTERN
-void
-trx_lists_init_at_db_start(void);
-/*============================*/
-
-#ifdef UNIV_DEBUG
-#define trx_start_if_not_started_xa(t) \
- { \
- (t)->start_line = __LINE__; \
- (t)->start_file = __FILE__; \
- trx_start_if_not_started_xa_low((t)); \
- }
-#else
-#define trx_start_if_not_started_xa(t) \
- trx_start_if_not_started_xa_low((t))
-#endif /* UNIV_DEBUG */
-
-/*************************************************************//**
-Starts the transaction if it is not yet started. */
-UNIV_INTERN
-void
-trx_start_if_not_started_xa_low(
-/*============================*/
- trx_t* trx); /*!< in: transaction */
-/*************************************************************//**
-Starts the transaction if it is not yet started. */
-UNIV_INTERN
-void
-trx_start_if_not_started_low(
-/*=========================*/
- trx_t* trx); /*!< in: transaction */
-
-#ifdef UNIV_DEBUG
-#define trx_start_if_not_started(t) \
- { \
- (t)->start_line = __LINE__; \
- (t)->start_file = __FILE__; \
- trx_start_if_not_started_low((t)); \
- }
-#else
-#define trx_start_if_not_started(t) \
- trx_start_if_not_started_low((t))
-#endif /* UNIV_DEBUG */
-
-/*************************************************************//**
-Starts the transaction for a DDL operation. */
-UNIV_INTERN
-void
-trx_start_for_ddl_low(
-/*==================*/
- trx_t* trx, /*!< in/out: transaction */
- trx_dict_op_t op) /*!< in: dictionary operation type */
- MY_ATTRIBUTE((nonnull));
-
-#ifdef UNIV_DEBUG
-#define trx_start_for_ddl(t, o) \
- { \
- ut_ad((t)->start_file == 0); \
- (t)->start_line = __LINE__; \
- (t)->start_file = __FILE__; \
- trx_start_for_ddl_low((t), (o)); \
- }
-#else
-#define trx_start_for_ddl(t, o) \
- trx_start_for_ddl_low((t), (o))
-#endif /* UNIV_DEBUG */
-
-/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
-void
-trx_commit(
-/*=======*/
- trx_t* trx) /*!< in/out: transaction */
- MY_ATTRIBUTE((nonnull));
-/****************************************************************//**
-Commits a transaction and a mini-transaction. */
-UNIV_INTERN
-void
-trx_commit_low(
-/*===========*/
- trx_t* trx, /*!< in/out: transaction */
- mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
- or NULL if trx made no modifications */
- MY_ATTRIBUTE((nonnull(1)));
-/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-UNIV_INTERN
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx); /*!< in: transaction */
-/**********************************************************************//**
-Does the transaction commit for MySQL.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-trx_commit_for_mysql(
-/*=================*/
- trx_t* trx); /*!< in/out: transaction */
-/**********************************************************************//**
-Does the transaction prepare for MySQL. */
-UNIV_INTERN
-void
-trx_prepare_for_mysql(
-/*==================*/
- trx_t* trx); /*!< in/out: trx handle */
-/**********************************************************************//**
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery.
-@return number of prepared transactions */
-UNIV_INTERN
-int
-trx_recover_for_mysql(
-/*==================*/
- XID* xid_list, /*!< in/out: prepared transactions */
- ulint len); /*!< in: number of slots in xid_list */
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL; on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-UNIV_INTERN
-trx_t *
-trx_get_trx_by_xid(
-/*===============*/
- const XID* xid); /*!< in: X/Open XA transaction identifier */
-/**********************************************************************//**
-If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE. */
-UNIV_INTERN
-void
-trx_commit_complete_for_mysql(
-/*==========================*/
- trx_t* trx) /*!< in/out: transaction */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
-Marks the latest SQL statement ended. */
-UNIV_INTERN
-void
-trx_mark_sql_stat_end(
-/*==================*/
- trx_t* trx); /*!< in: trx handle */
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return consistent read view */
-UNIV_INTERN
-read_view_t*
-trx_assign_read_view(
-/*=================*/
- trx_t* trx); /*!< in: active transaction */
-/********************************************************************//**
-Clones the read view from another transaction. All the consistent reads within
-the receiver transaction will get the same read view as the donor transaction
-@return read view clone */
-UNIV_INTERN
-read_view_t*
-trx_clone_read_view(
-/*================*/
- trx_t* trx, /*!< in: receiver transaction */
- trx_t* from_trx) /*!< in: donor transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/****************************************************************//**
-Prepares a transaction for commit/rollback. */
-UNIV_INTERN
-void
-trx_commit_or_rollback_prepare(
-/*===========================*/
- trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
-Creates a commit command node struct.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-trx_commit_node_create(
-/*===================*/
- mem_heap_t* heap); /*!< in: mem heap where created */
-/***********************************************************//**
-Performs an execution step for a commit type node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_commit_step(
-/*============*/
- que_thr_t* thr); /*!< in: query thread */
-
-/**********************************************************************//**
-Prints info about a transaction.
-Caller must hold trx_sys->mutex. */
-UNIV_INTERN
-void
-trx_print_low(
-/*==========*/
- FILE* f,
- /*!< in: output stream */
- const trx_t* trx,
- /*!< in: transaction */
- ulint max_query_len,
- /*!< in: max query length to print,
- or 0 to use the default max length */
- ulint n_rec_locks,
- /*!< in: lock_number_of_rows_locked(&trx->lock) */
- ulint n_trx_locks,
- /*!< in: length of trx->lock.trx_locks */
- ulint heap_size)
- /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
- MY_ATTRIBUTE((nonnull));
-
-#ifdef WITH_WSREP
-/**********************************************************************//**
-Prints info about a transaction.
-Transaction information may be retrieved without having trx_sys->mutex acquired
-so it may not be completely accurate. The caller must own lock_sys->mutex
-and the trx must have some locks to make sure that it does not escape
-without locking lock_sys->mutex. */
-UNIV_INTERN
-void
-wsrep_trx_print_locking(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
- MY_ATTRIBUTE((nonnull));
-#endif /* WITH_WSREP */
-
-/**********************************************************************//**
-Prints info about a transaction.
-The caller must hold lock_sys->mutex and trx_sys->mutex.
-When possible, use trx_print() instead. */
-UNIV_INTERN
-void
-trx_print_latched(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
- MY_ATTRIBUTE((nonnull));
-
-#ifdef WITH_WSREP
-/**********************************************************************//**
-Prints info about a transaction.
-Transaction information may be retrieved without having trx_sys->mutex acquired
-so it may not be completely accurate. The caller must own lock_sys->mutex
-and the trx must have some locks to make sure that it does not escape
-without locking lock_sys->mutex. */
-UNIV_INTERN
-void
-wsrep_trx_print_locking(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
- MY_ATTRIBUTE((nonnull));
-#endif /* WITH_WSREP */
-/**********************************************************************//**
-Prints info about a transaction.
-Acquires and releases lock_sys->mutex and trx_sys->mutex. */
-UNIV_INTERN
-void
-trx_print(
-/*======*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
- MY_ATTRIBUTE((nonnull));
-
-/**********************************************************************//**
-Determine if a transaction is a dictionary operation.
-@return dictionary operation mode */
-UNIV_INLINE
-enum trx_dict_op_t
-trx_get_dict_operation(
-/*===================*/
- const trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((pure));
-/**********************************************************************//**
-Flag a transaction a dictionary operation. */
-UNIV_INLINE
-void
-trx_set_dict_operation(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op_t op); /*!< in: operation, not
- TRX_DICT_OP_NONE */
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determines if a transaction is in the given state.
-The caller must hold trx_sys->mutex, or it must be the thread
-that is serving a running transaction.
-A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
-unless it is a non-locking autocommit read only transaction, which is only
-in trx_sys->mysql_trx_list.
-@return TRUE if trx->state == state */
-UNIV_INLINE
-ibool
-trx_state_eq(
-/*=========*/
- const trx_t* trx, /*!< in: transaction */
- trx_state_t state, /*!< in: state;
- if state != TRX_STATE_NOT_STARTED
- asserts that
- trx->state != TRX_STATE_NOT_STARTED */
- bool relaxed = false)
- /*!< in: whether to allow
- trx->state == TRX_STATE_NOT_STARTED
- after an error has been reported */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-# ifdef UNIV_DEBUG
-/**********************************************************************//**
-Asserts that a transaction has been started.
-The caller must hold trx_sys->mutex.
-@return TRUE if started */
-UNIV_INTERN
-ibool
-trx_assert_started(
-/*===============*/
- const trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-# endif /* UNIV_DEBUG */
-
-/**********************************************************************//**
-Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-UNIV_INTERN
-ibool
-trx_is_interrupted(
-/*===============*/
- const trx_t* trx); /*!< in: transaction */
-/**********************************************************************//**
-Determines if the currently running transaction is in strict mode.
-@return TRUE if strict */
-UNIV_INTERN
-ibool
-trx_is_strict(
-/*==========*/
- trx_t* trx); /*!< in: transaction */
-#else /* !UNIV_HOTBACKUP */
-#define trx_is_interrupted(trx) FALSE
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Calculates the "weight" of a transaction. The weight of one transaction
-is estimated as the number of altered rows + the number of locked rows.
-@param t transaction
-@return transaction weight */
-#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
-
-/*******************************************************************//**
-Compares the "weight" (or size) of two transactions. Transactions that
-have edited non-transactional tables are considered heavier than ones
-that have not.
-@return TRUE if weight(a) >= weight(b) */
-UNIV_INTERN
-ibool
-trx_weight_ge(
-/*==========*/
- const trx_t* a, /*!< in: the first transaction to be compared */
- const trx_t* b); /*!< in: the second transaction to be compared */
-
-/* Maximum length of a string that can be returned by
-trx_get_que_state_str(). */
-#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */
-
-/*******************************************************************//**
-Retrieves transaction's que state in a human readable string. The string
-should not be free()'d or modified.
-@return string in the data segment */
-UNIV_INLINE
-const char*
-trx_get_que_state_str(
-/*==================*/
- const trx_t* trx); /*!< in: transaction */
-
-/****************************************************************//**
-Assign a read-only transaction a rollback-segment, if it is attempting
-to write to a TEMPORARY table. */
-UNIV_INTERN
-void
-trx_assign_rseg(
-/*============*/
- trx_t* trx); /*!< A read-only transaction that
- needs to be assigned a RBS. */
-
-/*************************************************************//**
-Callback function for trx_find_descriptor() to compare trx IDs. */
-UNIV_INTERN
-int
-trx_descr_cmp(
-/*==========*/
- const void *a, /*!< in: pointer to first comparison argument */
- const void *b); /*!< in: pointer to second comparison argument */
-
-/*************************************************************//**
-Release a slot for a given trx in the global descriptors array. */
-UNIV_INTERN
-void
-trx_release_descriptor(
-/*===================*/
- trx_t* trx); /*!< in: trx pointer */
-
-/*******************************************************************//**
-Transactions that aren't started by the MySQL server don't set
-the trx_t::mysql_thd field. For such transactions we set the lock
-wait timeout to 0 instead of the user configured value that comes
-from innodb_lock_wait_timeout via trx_t::mysql_thd.
-@param trx transaction
-@return lock wait timeout in seconds */
-#define trx_lock_wait_timeout_get(trx) \
- ((trx)->mysql_thd != NULL \
- ? thd_lock_wait_timeout((trx)->mysql_thd) \
- : 0)
-
-/*******************************************************************//**
-Determine if the transaction is a non-locking autocommit select
-(implied read-only).
-@param t transaction
-@return true if non-locking autocommit select transaction. */
-#define trx_is_autocommit_non_locking(t) \
-((t)->auto_commit && (t)->will_lock == 0)
-
-/*******************************************************************//**
-Determine if the transaction is a non-locking autocommit select
-with an explicit check for the read-only status.
-@param t transaction
-@return true if non-locking autocommit read-only transaction. */
-#define trx_is_ac_nl_ro(t) \
-((t)->read_only && trx_is_autocommit_non_locking((t)))
-
-/*******************************************************************//**
-Assert that the transaction is in the trx_sys_t::rw_trx_list */
-#define assert_trx_in_rw_list(t) do { \
- ut_ad(!(t)->read_only); \
- assert_trx_in_list(t); \
-} while (0)
-
-/*******************************************************************//**
-Assert that the transaction is either in trx_sys->ro_trx_list or
-trx_sys->rw_trx_list but not both and it cannot be an autocommit
-non-locking select */
-#define assert_trx_in_list(t) do { \
- ut_ad((t)->in_ro_trx_list == (t)->read_only); \
- ut_ad((t)->in_rw_trx_list == !(t)->read_only); \
- ut_ad(!trx_is_autocommit_non_locking((t))); \
- switch ((t)->state) { \
- case TRX_STATE_PREPARED: \
- /* fall through */ \
- case TRX_STATE_ACTIVE: \
- case TRX_STATE_COMMITTED_IN_MEMORY: \
- continue; \
- case TRX_STATE_NOT_STARTED: \
- break; \
- } \
- ut_error; \
-} while (0)
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Assert that an autocommit non-locking select cannot be in the
-ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
-The tranasction must be in the mysql_trx_list. */
-# define assert_trx_nonlocking_or_in_list(t) \
- do { \
- if (trx_is_autocommit_non_locking(t)) { \
- trx_state_t t_state = (t)->state; \
- ut_ad((t)->read_only); \
- ut_ad(!(t)->is_recovered); \
- ut_ad(!(t)->in_ro_trx_list); \
- ut_ad(!(t)->in_rw_trx_list); \
- ut_ad((t)->in_mysql_trx_list); \
- ut_ad(t_state == TRX_STATE_NOT_STARTED \
- || t_state == TRX_STATE_ACTIVE); \
- } else { \
- assert_trx_in_list(t); \
- } \
- } while (0)
-#else /* UNIV_DEBUG */
-/*******************************************************************//**
-Assert that an autocommit non-locking slect cannot be in the
-ro_trx_list nor the rw_trx_list and that it is a read-only transaction.
-The tranasction must be in the mysql_trx_list. */
-# define assert_trx_nonlocking_or_in_list(trx) ((void)0)
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state
-captures the state of the query thread during the execution of a query.
-This is different from a transaction state. The query state of a transaction
-can be updated asynchronously by other threads. The other threads can be
-system threads, like the timeout monitor thread or user threads executing
-other queries. Another thing to be mindful of is that there is a delay between
-when a query thread is put into LOCK_WAIT state and before it actually starts
-waiting. Between these two events it is possible that the query thread is
-granted the lock it was waiting for, which implies that the state can be changed
-asynchronously.
-
-All these operations take place within the context of locking. Therefore state
-changes within the locking code must acquire both the lock mutex and the
-trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or
-trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient
-to only acquire the trx->mutex.
-To query the state either of the mutexes is sufficient within the locking
-code and no mutex is required when the query thread is no longer waiting. */
-
-/** The locks and state of an active transaction. Protected by
-lock_sys->mutex, trx->mutex or both. */
-struct trx_lock_t {
- ulint n_active_thrs; /*!< number of active query threads */
-
- trx_que_t que_state; /*!< valid when trx->state
- == TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
- TRX_QUE_LOCK_WAIT, ... */
-
- lock_t* wait_lock; /*!< if trx execution state is
- TRX_QUE_LOCK_WAIT, this points to
- the lock request, otherwise this is
- NULL; set to non-NULL when holding
- both trx->mutex and lock_sys->mutex;
- set to NULL when holding
- lock_sys->mutex; readers should
- hold lock_sys->mutex, except when
- they are holding trx->mutex and
- wait_lock==NULL */
- ib_uint64_t deadlock_mark; /*!< A mark field that is initialized
- to and checked against lock_mark_counter
- by lock_deadlock_recursive(). */
- ibool was_chosen_as_deadlock_victim;
- /*!< when the transaction decides to
- wait for a lock, it sets this to FALSE;
- if another transaction chooses this
- transaction as a victim in deadlock
- resolution, it sets this to TRUE.
- Protected by trx->mutex. */
- time_t wait_started; /*!< lock wait started at this time,
- protected only by lock_sys->mutex */
-
- que_thr_t* wait_thr; /*!< query thread belonging to this
- trx that is in QUE_THR_LOCK_WAIT
- state. For threads suspended in a
- lock wait, this is protected by
- lock_sys->mutex. Otherwise, this may
- only be modified by the thread that is
- serving the running transaction. */
-
- mem_heap_t* lock_heap; /*!< memory heap for trx_locks;
- protected by lock_sys->mutex */
-
- UT_LIST_BASE_NODE_T(lock_t)
- trx_locks; /*!< locks requested
- by the transaction;
- insertions are protected by trx->mutex
- and lock_sys->mutex; removals are
- protected by lock_sys->mutex */
-
- ib_vector_t* table_locks; /*!< All table locks requested by this
- transaction, including AUTOINC locks */
-
- ibool cancel; /*!< TRUE if the transaction is being
- rolled back either via deadlock
- detection or due to lock timeout. The
- caller has to acquire the trx_t::mutex
- in order to cancel the locks. In
- lock_trx_table_locks_remove() we
- check for this cancel of a transaction's
- locks and avoid reacquiring the trx
- mutex to prevent recursive deadlocks.
- Protected by both the lock sys mutex
- and the trx_t::mutex. */
-};
-
-#define TRX_MAGIC_N 91118598
-
-/** The transaction handle
-
-Normally, there is a 1:1 relationship between a transaction handle
-(trx) and a session (client connection). One session is associated
-with exactly one user transaction. There are some exceptions to this:
-
-* For DDL operations, a subtransaction is allocated that modifies the
-data dictionary tables. Lock waits and deadlocks are prevented by
-acquiring the dict_operation_lock before starting the subtransaction
-and releasing it after committing the subtransaction.
-
-* The purge system uses a special transaction that is not associated
-with any session.
-
-* If the system crashed or it was quickly shut down while there were
-transactions in the ACTIVE or PREPARED state, these transactions would
-no longer be associated with a session when the server is restarted.
-
-A session may be served by at most one thread at a time. The serving
-thread of a session might change in some MySQL implementations.
-Therefore we do not have os_thread_get_curr_id() assertions in the code.
-
-Normally, only the thread that is currently associated with a running
-transaction may access (read and modify) the trx object, and it may do
-so without holding any mutex. The following are exceptions to this:
-
-* trx_rollback_resurrected() may access resurrected (connectionless)
-transactions while the system is already processing new user
-transactions. The trx_sys->mutex prevents a race condition between it
-and lock_trx_release_locks() [invoked by trx_commit()].
-
-* trx_print_low() may access transactions not associated with the current
-thread. The caller must be holding trx_sys->mutex and lock_sys->mutex.
-
-* When a transaction handle is in the trx_sys->mysql_trx_list or
-trx_sys->trx_list, some of its fields must not be modified without
-holding trx_sys->mutex exclusively.
-
-* The locking code (in particular, lock_deadlock_recursive() and
-lock_rec_convert_impl_to_expl()) will access transactions associated
-to other connections. The locks of transactions are protected by
-lock_sys->mutex and sometimes by trx->mutex. */
-
-typedef enum {
- TRX_SERVER_ABORT = 0,
- TRX_WSREP_ABORT = 1
-} trx_abort_t;
-
-struct trx_t{
- ulint magic_n;
-
- ib_mutex_t mutex; /*!< Mutex protecting the fields
- state and lock
- (except some fields of lock, which
- are protected by lock_sys->mutex) */
-
- /** State of the trx from the point of view of concurrency control
- and the valid state transitions.
-
- Possible states:
-
- TRX_STATE_NOT_STARTED
- TRX_STATE_ACTIVE
- TRX_STATE_PREPARED
- TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
-
- Valid state transitions are:
-
- Regular transactions:
- * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
-
- Auto-commit non-locking read-only:
- * NOT_STARTED -> ACTIVE -> NOT_STARTED
-
- XA (2PC):
- * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
-
- Recovered XA:
- * NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
-
- XA (2PC) (shutdown before ROLLBACK or COMMIT):
- * NOT_STARTED -> PREPARED -> (freed)
-
- Latching and various transaction lists membership rules:
-
- XA (2PC) transactions are always treated as non-autocommit.
-
- Transitions to ACTIVE or NOT_STARTED occur when
- !in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed).
-
- Autocommit non-locking read-only transactions move between states
- without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list.
-
- When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
- it is a user transaction. It cannot be in ro_trx_list or rw_trx_list.
-
- ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
- The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
-
- ACTIVE->COMMITTED is possible when the transaction is in
- ro_trx_list or rw_trx_list.
-
- Transitions to COMMITTED are protected by both lock_sys->mutex
- and trx->mutex.
-
- NOTE: Some of these state change constraints are an overkill,
- currently only required for a consistent view for printing stats.
- This unnecessarily adds a huge cost for the general case.
-
- NOTE: In the future we should add read only transactions to the
- ro_trx_list the first time they try to acquire a lock ie. by default
- we treat all read-only transactions as non-locking. */
- trx_state_t state;
-
- trx_lock_t lock; /*!< Information about the transaction
- locks and state. Protected by
- trx->mutex or lock_sys->mutex
- or both */
- ulint is_recovered; /*!< 0=normal transaction,
- 1=recovered, must be rolled back,
- protected by trx_sys->mutex when
- trx->in_rw_trx_list holds */
-
- /* These fields are not protected by any mutex. */
- const char* op_info; /*!< English text describing the
- current operation, or an empty
- string */
- ulint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
- ulint check_foreigns; /*!< normally TRUE, but if the user
- wants to suppress foreign key checks,
- (in table imports, for example) we
- set this FALSE */
- /*------------------------------*/
- /* MySQL has a transaction coordinator to coordinate two phase
- commit between multiple storage engines and the binary log. When
- an engine participates in a transaction, it's responsible for
- registering itself using the trans_register_ha() API. */
- unsigned is_registered:1;/* This flag is set to 1 after the
- transaction has been registered with
- the coordinator using the XA API, and
- is set to 0 after commit or rollback. */
- unsigned active_commit_ordered:1;/* 1 if owns prepare mutex, if
- this is set to 1 then registered should
- also be set to 1. This is used in the
- XA code */
- /*------------------------------*/
- ulint check_unique_secondary;
- /*!< normally TRUE, but if the user
- wants to speed up inserts by
- suppressing unique key checks
- for secondary indexes when we decide
- if we can use the insert buffer for
- them, we set this FALSE */
- ulint support_xa; /*!< normally we do the XA two-phase
- commit steps, but by setting this to
- FALSE, one can save CPU time and about
- 150 bytes in the undo log size as then
- we skip XA steps */
- ulint fake_changes;
- ulint flush_log_later;/* In 2PC, we hold the
- prepare_commit mutex across
- both phases. In that case, we
- defer flush of the logs to disk
- until after we release the
- mutex. */
- ulint must_flush_log_later;/*!< this flag is set to TRUE in
- trx_commit() if flush_log_later was
- TRUE, and there were modifications by
- the transaction; in that case we must
- flush the log in
- trx_commit_complete_for_mysql() */
- ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- bool has_search_latch;
- /*!< true if this trx has latched any
- search system latch in S-mode */
- ulint search_latch_timeout;
- /*!< If we notice that someone is
- waiting for our S-lock on the search
- latch to be released, we wait in
- row0sel.cc for BTR_SEA_TIMEOUT new
- searches until we try to keep
- the search latch again over
- calls from MySQL; this is intended
- to reduce contention on the search
- latch */
- trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */
-
- /* Fields protected by the srv_conc_mutex. */
- ulint declared_to_be_inside_innodb;
- /*!< this is TRUE if we have declared
- this transaction in
- srv_conc_enter_innodb to be inside the
- InnoDB engine */
- ulint n_tickets_to_enter_innodb;
- /*!< this can be > 0 only when
- declared_to_... is TRUE; when we come
- to srv_conc_innodb_enter, if the value
- here is > 0, we decrement this by 1 */
- ulint dict_operation_lock_mode;
- /*!< 0, RW_S_LATCH, or RW_X_LATCH:
- the latch mode trx currently holds
- on dict_operation_lock. Protected
- by dict_operation_lock. */
-
- trx_id_t no; /*!< transaction serialization number:
- max trx id shortly before the
- transaction is moved to
- COMMITTED_IN_MEMORY state.
- Protected by trx_sys_t::mutex
- when trx->in_rw_trx_list. Initially
- set to TRX_ID_MAX. */
-
- time_t start_time; /*!< time the trx state last time became
- TRX_STATE_ACTIVE */
- ib_uint64_t start_time_micro; /*!< start time of transaction in
- microseconds */
- trx_id_t id; /*!< transaction id */
- XID xid; /*!< X/Open XA transaction
- identification to identify a
- transaction branch */
- lsn_t commit_lsn; /*!< lsn at the time of the commit */
- table_id_t table_id; /*!< Table to drop iff dict_operation
- == TRX_DICT_OP_TABLE, or 0. */
- /*------------------------------*/
- THD* mysql_thd; /*!< MySQL thread handle corresponding
- to this trx, or NULL */
- trx_abort_t abort_type; /*!< Transaction abort type */
-
- const char* mysql_log_file_name;
- /*!< if MySQL binlog is used, this field
- contains a pointer to the latest file
- name; this is NULL if binlog is not
- used */
- ib_int64_t mysql_log_offset;
- /*!< if MySQL binlog is used, this
- field contains the end offset of the
- binlog entry */
- time_t idle_start;
- ib_int64_t last_stmt_start;
- /*------------------------------*/
- ulint n_mysql_tables_in_use; /*!< number of Innobase tables
- used in the processing of the current
- SQL statement in MySQL */
- ulint mysql_n_tables_locked;
- /*!< how many tables the current SQL
- statement uses, except those
- in consistent read */
- /*------------------------------*/
- UT_LIST_NODE_T(trx_t)
- trx_list; /*!< list of transactions;
- protected by trx_sys->mutex.
- The same node is used for both
- trx_sys_t::ro_trx_list and
- trx_sys_t::rw_trx_list */
-#ifdef UNIV_DEBUG
- /** The following two fields are mutually exclusive. */
- /* @{ */
-
- ibool in_ro_trx_list; /*!< TRUE if in trx_sys->ro_trx_list */
- ibool in_rw_trx_list; /*!< TRUE if in trx_sys->rw_trx_list */
- /* @} */
-#endif /* UNIV_DEBUG */
- UT_LIST_NODE_T(trx_t)
- mysql_trx_list; /*!< list of transactions created for
- MySQL; protected by trx_sys->mutex */
-#ifdef UNIV_DEBUG
- ibool in_mysql_trx_list;
- /*!< TRUE if in
- trx_sys->mysql_trx_list */
-#endif /* UNIV_DEBUG */
- UT_LIST_NODE_T(trx_t)
- trx_serial_list;/*!< list node for
- trx_sys->trx_serial_list */
- bool in_trx_serial_list;
- /* Set when transaction is in the
- trx_serial_list */
- /*------------------------------*/
- dberr_t error_state; /*!< 0 if no error, otherwise error
- number; NOTE That ONLY the thread
- doing the transaction is allowed to
- set this field: this is NOT protected
- by any mutex */
- const dict_index_t*error_info; /*!< if the error number indicates a
- duplicate key error, a pointer to
- the problematic index is stored here */
- ulint error_key_num; /*!< if the index creation fails to a
- duplicate key error, a mysql key
- number of that index is stored here */
- sess_t* sess; /*!< session of the trx, NULL if none */
- que_t* graph; /*!< query currently run in the session,
- or NULL if none; NOTE that the query
- belongs to the session, and it can
- survive over a transaction commit, if
- it is a stored procedure with a COMMIT
- WORK statement, for instance */
- read_view_t* global_read_view;
- /*!< consistent read view associated
- to a transaction or NULL */
- read_view_t* read_view; /*!< consistent read view used in the
- transaction or NULL, this read view
- if defined can be normal read view
- associated to a transaction (i.e.
- same as global_read_view) or read view
- associated to a cursor */
- read_view_t* prebuilt_view; /* pre-built view array */
- /*------------------------------*/
- UT_LIST_BASE_NODE_T(trx_named_savept_t)
- trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
- oldest first */
- /*------------------------------*/
- ib_mutex_t undo_mutex; /*!< mutex protecting the fields in this
- section (down to undo_no_arr), EXCEPT
- last_sql_stat_start, which can be
- accessed only when we know that there
- cannot be any activity in the undo
- logs! */
- undo_no_t undo_no; /*!< next undo log record number to
- assign; since the undo log is
- private for a transaction, this
- is a simple ascending sequence
- with no gaps; thus it represents
- the number of modified/inserted
- rows in a transaction */
- trx_savept_t last_sql_stat_start;
- /*!< undo_no when the last sql statement
- was started: in case of an error, trx
- is rolled back down to this undo
- number; see note at undo_mutex! */
- trx_rseg_t* rseg; /*!< rollback segment assigned to the
- transaction, or NULL if not assigned
- yet */
- trx_undo_t* insert_undo; /*!< pointer to the insert undo log, or
- NULL if no inserts performed yet */
- trx_undo_t* update_undo; /*!< pointer to the update undo log, or
- NULL if no update performed yet */
- undo_no_t roll_limit; /*!< least undo number to undo during
- a rollback */
- ulint pages_undone; /*!< number of undo log pages undone
- since the last undo log truncation */
- trx_undo_arr_t* undo_no_arr; /*!< array of undo numbers of undo log
- records which are currently processed
- by a rollback operation */
- /*------------------------------*/
- ulint n_autoinc_rows; /*!< no. of AUTO-INC rows required for
- an SQL statement. This is useful for
- multi-row INSERTs */
- ib_vector_t* autoinc_locks; /* AUTOINC locks held by this
- transaction. Note that these are
- also in the lock list trx_locks. This
- vector needs to be freed explicitly
- when the trx instance is destroyed.
- Protected by lock_sys->mutex. */
- /*------------------------------*/
- ibool read_only; /*!< TRUE if transaction is flagged
- as a READ-ONLY transaction.
- if !auto_commit || will_lock > 0
- then it will added to the list
- trx_sys_t::ro_trx_list. A read only
- transaction will not be assigned an
- UNDO log. Non-locking auto-commit
- read-only transaction will not be on
- either list. */
- ibool auto_commit; /*!< TRUE if it is an autocommit */
- ulint will_lock; /*!< Will acquire some locks. Increment
- each time we determine that a lock will
- be acquired by the MySQL layer. */
- bool ddl; /*!< true if it is a transaction that
- is being started for a DDL operation */
- /*------------------------------*/
- fts_trx_t* fts_trx; /*!< FTS information, or NULL if
- transaction hasn't modified tables
- with FTS indexes (yet). */
- doc_id_t fts_next_doc_id;/* The document id used for updates */
- /*------------------------------*/
- ulint flush_tables; /*!< if "covering" the FLUSH TABLES",
- count of tables being flushed. */
-
- /*------------------------------*/
-#ifdef UNIV_DEBUG
- ulint start_line; /*!< Track where it was started from */
- const char* start_file; /*!< Filename where it was started */
-#endif /* UNIV_DEBUG */
- /*------------------------------*/
- bool api_trx; /*!< trx started by InnoDB API */
- bool api_auto_commit;/*!< automatic commit */
- bool read_write; /*!< if read and write operation */
-
- /*------------------------------*/
- char detailed_error[256]; /*!< detailed error message for last
- error, or empty. */
-#ifdef WITH_WSREP
- os_event_t wsrep_event; /* event waited for in srv_conc_slot */
-#endif /* WITH_WSREP */
- /*------------------------------*/
- ulint io_reads;
- ib_uint64_t io_read;
- ulint io_reads_wait_timer;
- ib_uint64_t lock_que_wait_ustarted;
- ulint lock_que_wait_timer;
- ulint innodb_que_wait_timer;
- ulint distinct_page_access;
-#define DPAH_SIZE 8192
- byte* distinct_page_access_hash;
- ibool take_stats;
-
- /* Lock wait statistics */
- ulint n_rec_lock_waits;
- /*!< Number of record lock waits,
- might not be exactly correct. */
- ulint n_table_lock_waits;
- /*!< Number of table lock waits,
- might not be exactly correct. */
- ulint total_rec_lock_wait_time;
- /*!< Total rec lock wait time up
- to this moment. */
- ulint total_table_lock_wait_time;
- /*!< Total table lock wait time
- up to this moment. */
-};
-
-/* Transaction isolation levels (trx->isolation_level) */
-#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking
- SELECTs are performed so that
- we do not look at a possible
- earlier version of a record;
- thus they are not 'consistent'
- reads under this isolation
- level; otherwise like level
- 2 */
-
-#define TRX_ISO_READ_COMMITTED 1 /* somewhat Oracle-like
- isolation, except that in
- range UPDATE and DELETE we
- must block phantom rows
- with next-key locks;
- SELECT ... FOR UPDATE and ...
- LOCK IN SHARE MODE only lock
- the index records, NOT the
- gaps before them, and thus
- allow free inserting;
- each consistent read reads its
- own snapshot */
-
-#define TRX_ISO_REPEATABLE_READ 2 /* this is the default;
- all consistent reads in the
- same trx read the same
- snapshot;
- full next-key locking used
- in locking reads to block
- insertions into gaps */
-
-#define TRX_ISO_SERIALIZABLE 3 /* all plain SELECTs are
- converted to LOCK IN SHARE
- MODE reads */
-
-/* Treatment of duplicate values (trx->duplicates; for example, in inserts).
-Multiple flags can be combined with bitwise OR. */
-#define TRX_DUP_IGNORE 1 /* duplicate rows are to be updated */
-#define TRX_DUP_REPLACE 2 /* duplicate rows are to be replaced */
-
-
-/* Types of a trx signal */
-#define TRX_SIG_NO_SIGNAL 0
-#define TRX_SIG_TOTAL_ROLLBACK 1
-#define TRX_SIG_ROLLBACK_TO_SAVEPT 2
-#define TRX_SIG_COMMIT 3
-#define TRX_SIG_BREAK_EXECUTION 5
-
-/* Sender types of a signal */
-#define TRX_SIG_SELF 0 /* sent by the session itself, or
- by an error occurring within this
- session */
-#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which
- must hold rights to this) */
-
-/* Flag bits for trx_struct.active_flag */
-#define TRX_ACTIVE_IN_MYSQL (1<<0)
-#define TRX_ACTIVE_COMMIT_ORDERED (1<<1)
-
-/** Commit node states */
-enum commit_node_state {
- COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
- the transaction */
- COMMIT_NODE_WAIT /*!< commit signal sent to the transaction,
- waiting for completion */
-};
-
-/** Commit command node in a query graph */
-struct commit_node_t{
- que_common_t common; /*!< node type: QUE_NODE_COMMIT */
- enum commit_node_state
- state; /*!< node execution state */
-};
-
-
-/** Test if trx->mutex is owned. */
-#define trx_mutex_own(t) mutex_own(&t->mutex)
-
-/** Acquire the trx->mutex. */
-#define trx_mutex_enter(t) do { \
- mutex_enter(&t->mutex); \
-} while (0)
-
-/** Release the trx->mutex. */
-#define trx_mutex_exit(t) do { \
- mutex_exit(&t->mutex); \
-} while (0)
-
-/** @brief The latch protecting the adaptive search system
-
-This latch protects the
-(1) hash index;
-(2) columns of a record to which we have a pointer in the hash index;
-
-but does NOT protect:
-
-(3) next record offset field in a record;
-(4) next or previous records on the same page.
-
-Bear in mind (3) and (4) when using the hash index.
-*/
-extern prio_rw_lock_t* btr_search_latch_arr;
-
-#ifndef UNIV_NONINL
-#include "trx0trx.ic"
-#endif
-#endif /* !UNIV_HOTBACKUP */
-
-#endif
diff --git a/storage/xtradb/include/trx0trx.ic b/storage/xtradb/include/trx0trx.ic
deleted file mode 100644
index eb7d62d9cad..00000000000
--- a/storage/xtradb/include/trx0trx.ic
+++ /dev/null
@@ -1,184 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0trx.ic
-The transaction
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-/**********************************************************************//**
-Determines if a transaction is in the given state.
-The caller must hold trx_sys->mutex, or it must be the thread
-that is serving a running transaction.
-A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list
-unless it is a non-locking autocommit read only transaction, which is only
-in trx_sys->mysql_trx_list.
-@return TRUE if trx->state == state */
-UNIV_INLINE
-ibool
-trx_state_eq(
-/*=========*/
- const trx_t* trx, /*!< in: transaction */
- trx_state_t state, /*!< in: state;
- if state != TRX_STATE_NOT_STARTED
- asserts that
- trx->state != TRX_STATE_NOT_STARTED */
- bool relaxed)
- /*!< in: whether to allow
- trx->state == TRX_STATE_NOT_STARTED
- after an error has been reported */
-{
-#ifdef UNIV_DEBUG
- switch (trx->state) {
- case TRX_STATE_PREPARED:
- ut_ad(!trx_is_autocommit_non_locking(trx));
- return(trx->state == state);
-
- case TRX_STATE_ACTIVE:
- assert_trx_nonlocking_or_in_list(trx);
- return(state == trx->state);
-
- case TRX_STATE_COMMITTED_IN_MEMORY:
- assert_trx_in_list(trx);
- return(state == trx->state);
-
- case TRX_STATE_NOT_STARTED:
- /* This state is not allowed for running transactions. */
- ut_a(state == TRX_STATE_NOT_STARTED
- || (relaxed
- && thd_get_error_number(trx->mysql_thd)));
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_ro_trx_list);
- return(state == trx->state);
- }
- ut_error;
-#endif /* UNIV_DEBUG */
- return(trx->state == state);
-}
-
-/****************************************************************//**
-Retrieves the error_info field from a trx.
-@return the error info */
-UNIV_INLINE
-const dict_index_t*
-trx_get_error_info(
-/*===============*/
- const trx_t* trx) /*!< in: trx object */
-{
- return(trx->error_info);
-}
-
-/*******************************************************************//**
-Retrieves transaction's que state in a human readable string. The string
-should not be free()'d or modified.
-@return string in the data segment */
-UNIV_INLINE
-const char*
-trx_get_que_state_str(
-/*==================*/
- const trx_t* trx) /*!< in: transaction */
-{
- /* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */
- switch (trx->lock.que_state) {
- case TRX_QUE_RUNNING:
- return("RUNNING");
- case TRX_QUE_LOCK_WAIT:
- return("LOCK WAIT");
- case TRX_QUE_ROLLING_BACK:
- return("ROLLING BACK");
- case TRX_QUE_COMMITTING:
- return("COMMITTING");
- default:
- return("UNKNOWN");
- }
-}
-
-/**********************************************************************//**
-Determine if a transaction is a dictionary operation.
-@return dictionary operation mode */
-UNIV_INLINE
-enum trx_dict_op_t
-trx_get_dict_operation(
-/*===================*/
- const trx_t* trx) /*!< in: transaction */
-{
- trx_dict_op_t op = static_cast<trx_dict_op_t>(trx->dict_operation);
-
-#ifdef UNIV_DEBUG
- switch (op) {
- case TRX_DICT_OP_NONE:
- case TRX_DICT_OP_TABLE:
- case TRX_DICT_OP_INDEX:
- return(op);
- }
- ut_error;
-#endif /* UNIV_DEBUG */
- return(op);
-}
-/**********************************************************************//**
-Flag a transaction a dictionary operation. */
-UNIV_INLINE
-void
-trx_set_dict_operation(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction */
- enum trx_dict_op_t op) /*!< in: operation, not
- TRX_DICT_OP_NONE */
-{
-#ifdef UNIV_DEBUG
- enum trx_dict_op_t old_op = trx_get_dict_operation(trx);
-
- switch (op) {
- case TRX_DICT_OP_NONE:
- ut_error;
- break;
- case TRX_DICT_OP_TABLE:
- switch (old_op) {
- case TRX_DICT_OP_NONE:
- case TRX_DICT_OP_INDEX:
- case TRX_DICT_OP_TABLE:
- goto ok;
- }
- ut_error;
- break;
- case TRX_DICT_OP_INDEX:
- ut_ad(old_op == TRX_DICT_OP_NONE);
- break;
- }
-ok:
-#endif /* UNIV_DEBUG */
-
- trx->ddl = true;
- trx->dict_operation = op;
-}
-
-/********************************************************************//**
-In XtraDB it is impossible for a transaction to own a search latch outside of
-InnoDB code, so there is nothing to release on demand. We keep this function to
-simplify maintenance.*/
-UNIV_INLINE
-void
-trx_search_latch_release_if_reserved(
-/*=================================*/
- trx_t* trx MY_ATTRIBUTE((unused))) /*!< in: transaction */
-{
- ut_ad(!trx->has_search_latch);
-}
diff --git a/storage/xtradb/include/trx0types.h b/storage/xtradb/include/trx0types.h
deleted file mode 100644
index 7ca95131328..00000000000
--- a/storage/xtradb/include/trx0types.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0types.h
-Transaction system global type definitions
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0types_h
-#define trx0types_h
-
-#include "ut0byte.h"
-
-/** printf(3) format used for printing DB_TRX_ID and other system fields */
-#define TRX_ID_FMT IB_ID_FMT
-
-/** maximum length that a formatted trx_t::id could take, not including
-the terminating NUL character. */
-#define TRX_ID_MAX_LEN 17
-
-/** Transaction execution states when trx->state == TRX_STATE_ACTIVE */
-enum trx_que_t {
- TRX_QUE_RUNNING, /*!< transaction is running */
- TRX_QUE_LOCK_WAIT, /*!< transaction is waiting for
- a lock */
- TRX_QUE_ROLLING_BACK, /*!< transaction is rolling back */
- TRX_QUE_COMMITTING /*!< transaction is committing */
-};
-
-/** Transaction states (trx_t::state) */
-enum trx_state_t {
- TRX_STATE_NOT_STARTED,
- TRX_STATE_ACTIVE,
- TRX_STATE_PREPARED, /* Support for 2PC/XA */
- TRX_STATE_COMMITTED_IN_MEMORY
-};
-
-/** Type of data dictionary operation */
-enum trx_dict_op_t {
- /** The transaction is not modifying the data dictionary. */
- TRX_DICT_OP_NONE = 0,
- /** The transaction is creating a table or an index, or
- dropping a table. The table must be dropped in crash
- recovery. This and TRX_DICT_OP_NONE are the only possible
- operation modes in crash recovery. */
- TRX_DICT_OP_TABLE = 1,
- /** The transaction is creating or dropping an index in an
- existing table. In crash recovery, the data dictionary
- must be locked, but the table must not be dropped. */
- TRX_DICT_OP_INDEX = 2
-};
-
-/** Memory objects */
-/* @{ */
-/** Transaction */
-struct trx_t;
-/** The locks and state of an active transaction */
-struct trx_lock_t;
-/** Transaction system */
-struct trx_sys_t;
-/** Signal */
-struct trx_sig_t;
-/** Rollback segment */
-struct trx_rseg_t;
-/** Transaction undo log */
-struct trx_undo_t;
-/** Array of undo numbers of undo records being rolled back or purged */
-struct trx_undo_arr_t;
-/** A cell of trx_undo_arr_t */
-struct trx_undo_inf_t;
-/** The control structure used in the purge operation */
-struct trx_purge_t;
-/** Rollback command node in a query graph */
-struct roll_node_t;
-/** Commit command node in a query graph */
-struct commit_node_t;
-/** SAVEPOINT command node in a query graph */
-struct trx_named_savept_t;
-/* @} */
-
-/** Rollback contexts */
-enum trx_rb_ctx {
- RB_NONE = 0, /*!< no rollback */
- RB_NORMAL, /*!< normal rollback */
- RB_RECOVERY_PURGE_REC,
- /*!< rolling back an incomplete transaction,
- in crash recovery, rolling back an
- INSERT that was performed by updating a
- delete-marked record; if the delete-marked record
- no longer exists in an active read view, it will
- be purged */
- RB_RECOVERY /*!< rolling back an incomplete transaction,
- in crash recovery */
-};
-
-/** Row identifier (DB_ROW_ID, DATA_ROW_ID) */
-typedef ib_id_t row_id_t;
-/** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
-typedef ib_id_t trx_id_t;
-/** Rollback pointer (DB_ROLL_PTR, DATA_ROLL_PTR) */
-typedef ib_id_t roll_ptr_t;
-/** Undo number */
-typedef ib_id_t undo_no_t;
-
-/** Maximum transaction identifier */
-#define TRX_ID_MAX IB_ID_MAX
-
-/** Transaction savepoint */
-struct trx_savept_t{
- undo_no_t least_undo_no; /*!< least undo number to undo */
-};
-
-/** File objects */
-/* @{ */
-/** Transaction system header */
-typedef byte trx_sysf_t;
-/** Rollback segment header */
-typedef byte trx_rsegf_t;
-/** Undo segment header */
-typedef byte trx_usegf_t;
-/** Undo log header */
-typedef byte trx_ulogf_t;
-/** Undo log page header */
-typedef byte trx_upagef_t;
-
-/** Undo log record */
-typedef byte trx_undo_rec_t;
-/* @} */
-
-#endif
diff --git a/storage/xtradb/include/trx0undo.h b/storage/xtradb/include/trx0undo.h
deleted file mode 100644
index 190308112ba..00000000000
--- a/storage/xtradb/include/trx0undo.h
+++ /dev/null
@@ -1,595 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0undo.h
-Transaction undo log
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef trx0undo_h
-#define trx0undo_h
-
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
-#include "page0types.h"
-#include "trx0xa.h"
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Builds a roll pointer.
-@return roll pointer */
-UNIV_INLINE
-roll_ptr_t
-trx_undo_build_roll_ptr(
-/*====================*/
- ibool is_insert, /*!< in: TRUE if insert undo log */
- ulint rseg_id, /*!< in: rollback segment id */
- ulint page_no, /*!< in: page number */
- ulint offset); /*!< in: offset of the undo entry within page */
-/***********************************************************************//**
-Decodes a roll pointer. */
-UNIV_INLINE
-void
-trx_undo_decode_roll_ptr(
-/*=====================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer */
- ibool* is_insert, /*!< out: TRUE if insert undo log */
- ulint* rseg_id, /*!< out: rollback segment id */
- ulint* page_no, /*!< out: page number */
- ulint* offset); /*!< out: offset of the undo
- entry within page */
-/***********************************************************************//**
-Returns TRUE if the roll pointer is of the insert type.
-@return TRUE if insert undo log */
-UNIV_INLINE
-ibool
-trx_undo_roll_ptr_is_insert(
-/*========================*/
- roll_ptr_t roll_ptr); /*!< in: roll pointer */
-/***********************************************************************//**
-Returns true if the record is of the insert type.
-@return true if the record was freshly inserted (not updated). */
-UNIV_INLINE
-bool
-trx_undo_trx_id_is_insert(
-/*======================*/
- const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
- MY_ATTRIBUTE((nonnull, pure, warn_unused_result));
-#endif /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Writes a roll ptr to an index page. In case that the size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_roll_ptr(
-/*===============*/
- byte* ptr, /*!< in: pointer to memory where
- written */
- roll_ptr_t roll_ptr); /*!< in: roll ptr */
-/*****************************************************************//**
-Reads a roll ptr from an index page. In case that the roll ptr size
-changes in some future version, this function should be used instead of
-mach_read_...
-@return roll ptr */
-UNIV_INLINE
-roll_ptr_t
-trx_read_roll_ptr(
-/*==============*/
- const byte* ptr); /*!< in: pointer to memory from where to read */
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return pointer to page x-latched */
-UNIV_INLINE
-page_t*
-trx_undo_page_get(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return pointer to page s-latched */
-UNIV_INLINE
-page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the next undo log record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_next_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset); /*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header offset on page */
-/***********************************************************************//**
-Gets the previous record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_prev_rec(
-/*==================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- bool shared, /*!< in: true=S-latch, false=X-latch */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************************//**
-Gets the next record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_next_rec(
-/*==================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- mtr_t* mtr); /*!< in: mtr */
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return undo log record, the page latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_first_rec(
-/*===================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr); /*!< in: mtr */
-/********************************************************************//**
-Tries to add a page to the undo log segment where the undo log is placed.
-@return X-latched block if success, else NULL */
-UNIV_INTERN
-buf_block_t*
-trx_undo_add_page(
-/*==============*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory object */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Frees the last undo log page.
-The caller must hold the rollback segment mutex. */
-UNIV_INTERN
-void
-trx_undo_free_last_page_func(
-/*==========================*/
-#ifdef UNIV_DEBUG
- const trx_t* trx, /*!< in: transaction */
-#endif /* UNIV_DEBUG */
- trx_undo_t* undo, /*!< in/out: undo log memory copy */
- mtr_t* mtr) /*!< in/out: mini-transaction which does not
- have a latch to any undo log page or which
- has allocated the undo log page */
- MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-# define trx_undo_free_last_page(trx,undo,mtr) \
- trx_undo_free_last_page_func(trx,undo,mtr)
-#else /* UNIV_DEBUG */
-# define trx_undo_free_last_page(trx,undo,mtr) \
- trx_undo_free_last_page_func(undo,mtr)
-#endif /* UNIV_DEBUG */
-
-/***********************************************************************//**
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-UNIV_INTERN
-void
-trx_undo_truncate_end(
-/*=======================*/
- trx_t* trx, /*!< in: transaction whose undo log it is */
- trx_undo_t* undo, /*!< in/out: undo log */
- undo_no_t limit) /*!< in: all undo records with undo number
- >= this value should be truncated */
- MY_ATTRIBUTE((nonnull));
-
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
-void
-trx_undo_truncate_start(
-/*====================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ulint space, /*!< in: space id of the log */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset on the page */
- undo_no_t limit); /*!< in: all undo pages with
- undo numbers < this value
- should be truncated; NOTE that
- the function only frees whole
- pages; the header page is not
- freed, but emptied, if all the
- records there are < limit */
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy.
-This function is only called when the database is started or a new
-rollback segment created.
-@return the combined size of undo log segments in pages */
-UNIV_INTERN
-ulint
-trx_undo_lists_init(
-/*================*/
- trx_rseg_t* rseg); /*!< in: rollback segment memory object */
-/**********************************************************************//**
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused.
-@return DB_SUCCESS if undo log assign successful, possible error codes
-are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
-DB_OUT_OF_MEMORY */
-UNIV_INTERN
-dberr_t
-trx_undo_assign_undo(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction finish.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
-page_t*
-trx_undo_set_state_at_finish(
-/*=========================*/
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
-page_t*
-trx_undo_set_state_at_prepare(
-/*==========================*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr); /*!< in: mtr */
-
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-UNIV_INTERN
-void
-trx_undo_update_cleanup(
-/*====================*/
- trx_t* trx, /*!< in: trx owning the update undo log */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr); /*!< in: mtr */
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
-void
-trx_undo_insert_cleanup(
-/*====================*/
- trx_t* trx); /*!< in: transaction handle */
-
-/********************************************************************//**
-At shutdown, frees the undo logs of a PREPARED transaction. */
-UNIV_INTERN
-void
-trx_undo_free_prepared(
-/*===================*/
- trx_t* trx) /*!< in/out: PREPARED transaction */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-#endif /* !UNIV_HOTBACKUP */
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_page_init(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_page_header(
-/*=======================*/
- ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/***********************************************************//**
-Parses the redo log entry of an undo log page header discard.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/************************************************************************
-Frees an undo log memory copy. */
-UNIV_INTERN
-void
-trx_undo_mem_free(
-/*==============*/
- trx_undo_t* undo); /* in: the undo object to be freed */
-
-/* Types of an undo log segment */
-#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */
-#define TRX_UNDO_UPDATE 2 /* contains undo entries for updates
- and delete markings: in short,
- modifys (the name 'UPDATE' is a
- historical relic) */
-/* States of an undo log segment */
-#define TRX_UNDO_ACTIVE 1 /* contains an undo log of an active
- transaction */
-#define TRX_UNDO_CACHED 2 /* cached for quick reuse */
-#define TRX_UNDO_TO_FREE 3 /* insert undo segment can be freed */
-#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be
- reused: it can be freed in purge when
- all undo data in it is removed */
-#define TRX_UNDO_PREPARED 5 /* contains an undo log of an
- prepared transaction */
-
-#ifndef UNIV_HOTBACKUP
-/** Transaction undo log memory object; this is protected by the undo_mutex
-in the corresponding transaction object */
-
-struct trx_undo_t{
- /*-----------------------------*/
- ulint id; /*!< undo log slot number within the
- rollback segment */
- ulint type; /*!< TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- ulint state; /*!< state of the corresponding undo log
- segment */
- ibool del_marks; /*!< relevant only in an update undo
- log: this is TRUE if the transaction may
- have delete marked records, because of
- a delete of a row or an update of an
- indexed field; purge is then
- necessary; also TRUE if the transaction
- has updated an externally stored
- field */
- trx_id_t trx_id; /*!< id of the trx assigned to the undo
- log */
- XID xid; /*!< X/Open XA transaction
- identification */
- ibool dict_operation; /*!< TRUE if a dict operation trx */
- table_id_t table_id; /*!< if a dict operation, then the table
- id */
- trx_rseg_t* rseg; /*!< rseg where the undo log belongs */
- /*-----------------------------*/
- ulint space; /*!< space id where the undo log
- placed */
- ulint zip_size; /*!< compressed page size of space
- in bytes, or 0 for uncompressed */
- ulint hdr_page_no; /*!< page number of the header page in
- the undo log */
- ulint hdr_offset; /*!< header offset of the undo log on
- the page */
- ulint last_page_no; /*!< page number of the last page in the
- undo log; this may differ from
- top_page_no during a rollback */
- ulint size; /*!< current size in pages */
- /*-----------------------------*/
- ulint empty; /*!< TRUE if the stack of undo log
- records is currently empty */
- ulint top_page_no; /*!< page number where the latest undo
- log record was catenated; during
- rollback the page from which the latest
- undo record was chosen */
- ulint top_offset; /*!< offset of the latest undo record,
- i.e., the topmost element in the undo
- log if we think of it as a stack */
- undo_no_t top_undo_no; /*!< undo number of the latest record */
- buf_block_t* guess_block; /*!< guess for the buffer block where
- the top page might reside */
- /*-----------------------------*/
- UT_LIST_NODE_T(trx_undo_t) undo_list;
- /*!< undo log objects in the rollback
- segment are chained into lists */
-};
-#endif /* !UNIV_HOTBACKUP */
-
-/** The offset of the undo log page header on pages of the undo log */
-#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA
-/*-------------------------------------------------------------*/
-/** Transaction undo log page header offsets */
-/* @{ */
-#define TRX_UNDO_PAGE_TYPE 0 /*!< TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
-#define TRX_UNDO_PAGE_START 2 /*!< Byte offset where the undo log
- records for the LATEST transaction
- start on this page (remember that
- in an update undo log, the first page
- can contain several undo logs) */
-#define TRX_UNDO_PAGE_FREE 4 /*!< On each page of the undo log this
- field contains the byte offset of the
- first free byte on the page */
-#define TRX_UNDO_PAGE_NODE 6 /*!< The file list node in the chain
- of undo log pages */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE)
- /*!< Size of the transaction undo
- log page header, in bytes */
-/* @} */
-
-/** An update undo segment with just one page can be reused if it has
-at most this many bytes used; we must leave space at least for one new undo
-log header on the page */
-
-#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4)
-
-/* An update undo log segment may contain several undo logs on its first page
-if the undo logs took so little space that the segment could be cached and
-reused. All the undo log headers are then on the first page, and the last one
-owns the undo log records on subsequent pages if the segment is bigger than
-one page. If an undo log is stored in a segment, then on the first page it is
-allowed to have zero undo records, but if the segment extends to several
-pages, then all the rest of the pages must contain at least one undo log
-record. */
-
-/** The offset of the undo log segment header on the first page of the undo
-log segment */
-
-#define TRX_UNDO_SEG_HDR (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE)
-/** Undo log segment header */
-/* @{ */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_STATE 0 /*!< TRX_UNDO_ACTIVE, ... */
-#define TRX_UNDO_LAST_LOG 2 /*!< Offset of the last undo log header
- on the segment header page, 0 if
- none */
-#define TRX_UNDO_FSEG_HEADER 4 /*!< Header for the file segment which
- the undo log segment occupies */
-#define TRX_UNDO_PAGE_LIST (4 + FSEG_HEADER_SIZE)
- /*!< Base node for the list of pages in
- the undo log segment; defined only on
- the undo log segment's first page */
-/*-------------------------------------------------------------*/
-/** Size of the undo log segment header */
-#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE)
-/* @} */
-
-
-/** The undo log header. There can be several undo log headers on the first
-page of an update undo log segment. */
-/* @{ */
-/*-------------------------------------------------------------*/
-#define TRX_UNDO_TRX_ID 0 /*!< Transaction id */
-#define TRX_UNDO_TRX_NO 8 /*!< Transaction number of the
- transaction; defined only if the log
- is in a history list */
-#define TRX_UNDO_DEL_MARKS 16 /*!< Defined only in an update undo
- log: TRUE if the transaction may have
- done delete markings of records, and
- thus purge is necessary */
-#define TRX_UNDO_LOG_START 18 /*!< Offset of the first undo log record
- of this log on the header page; purge
- may remove undo log record from the
- log start, and therefore this is not
- necessarily the same as this log
- header end offset */
-#define TRX_UNDO_XID_EXISTS 20 /*!< TRUE if undo log header includes
- X/Open XA transaction identification
- XID */
-#define TRX_UNDO_DICT_TRANS 21 /*!< TRUE if the transaction is a table
- create, index create, or drop
- transaction: in recovery
- the transaction cannot be rolled back
- in the usual way: a 'rollback' rather
- means dropping the created or dropped
- table, if it still exists */
-#define TRX_UNDO_TABLE_ID 22 /*!< Id of the table if the preceding
- field is TRUE */
-#define TRX_UNDO_NEXT_LOG 30 /*!< Offset of the next undo log header
- on this page, 0 if none */
-#define TRX_UNDO_PREV_LOG 32 /*!< Offset of the previous undo log
- header on this page, 0 if none */
-#define TRX_UNDO_HISTORY_NODE 34 /*!< If the log is put to the history
- list, the file list node is here */
-/*-------------------------------------------------------------*/
-/** Size of the undo log header without XID information */
-#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
-
-/* Note: the writing of the undo log old header is coded by a log record
-MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
-header is logged separately. In this sense, the XID is not really a member
-of the undo log header. TODO: do not append the XID to the log header if XA
-is not needed by the user. The XID wastes about 150 bytes of space in every
-undo log. In the history list we may have millions of undo logs, which means
-quite a large overhead. */
-
-/** X/Open XA Transaction Identification (XID) */
-/* @{ */
-/** xid_t::formatID */
-#define TRX_UNDO_XA_FORMAT (TRX_UNDO_LOG_OLD_HDR_SIZE)
-/** xid_t::gtrid_length */
-#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4)
-/** xid_t::bqual_length */
-#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4)
-/** Distributed transaction identifier data */
-#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4)
-/*--------------------------------------------------------------*/
-#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
- /*!< Total size of the undo log header
- with the XA XID */
-/* @} */
-
-#ifndef UNIV_NONINL
-#include "trx0undo.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/trx0undo.ic b/storage/xtradb/include/trx0undo.ic
deleted file mode 100644
index 577759d6c3d..00000000000
--- a/storage/xtradb/include/trx0undo.ic
+++ /dev/null
@@ -1,363 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0undo.ic
-Transaction undo log
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "data0type.h"
-#include "page0page.h"
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Builds a roll pointer.
-@return roll pointer */
-UNIV_INLINE
-roll_ptr_t
-trx_undo_build_roll_ptr(
-/*====================*/
- ibool is_insert, /*!< in: TRUE if insert undo log */
- ulint rseg_id, /*!< in: rollback segment id */
- ulint page_no, /*!< in: page number */
- ulint offset) /*!< in: offset of the undo entry within page */
-{
- roll_ptr_t roll_ptr;
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- ut_ad(is_insert == 0 || is_insert == 1);
- ut_ad(rseg_id < TRX_SYS_N_RSEGS);
- ut_ad(offset < 65536);
-
- roll_ptr = (roll_ptr_t) is_insert << 55
- | (roll_ptr_t) rseg_id << 48
- | (roll_ptr_t) page_no << 16
- | offset;
- return(roll_ptr);
-}
-
-/***********************************************************************//**
-Decodes a roll pointer. */
-UNIV_INLINE
-void
-trx_undo_decode_roll_ptr(
-/*=====================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer */
- ibool* is_insert, /*!< out: TRUE if insert undo log */
- ulint* rseg_id, /*!< out: rollback segment id */
- ulint* page_no, /*!< out: page number */
- ulint* offset) /*!< out: offset of the undo
- entry within page */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- ut_ad(roll_ptr < (1ULL << 56));
- *offset = (ulint) roll_ptr & 0xFFFF;
- roll_ptr >>= 16;
- *page_no = (ulint) roll_ptr & 0xFFFFFFFF;
- roll_ptr >>= 32;
- *rseg_id = (ulint) roll_ptr & 0x7F;
- roll_ptr >>= 7;
- *is_insert = (ibool) roll_ptr; /* TRUE==1 */
-}
-
-/***********************************************************************//**
-Returns TRUE if the roll pointer is of the insert type.
-@return TRUE if insert undo log */
-UNIV_INLINE
-ibool
-trx_undo_roll_ptr_is_insert(
-/*========================*/
- roll_ptr_t roll_ptr) /*!< in: roll pointer */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- ut_ad(roll_ptr < (1ULL << 56));
- return((ibool) (roll_ptr >> 55));
-}
-
-/***********************************************************************//**
-Returns true if the record is of the insert type.
-@return true if the record was freshly inserted (not updated). */
-UNIV_INLINE
-bool
-trx_undo_trx_id_is_insert(
-/*======================*/
- const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
-{
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error
-#endif
- return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*****************************************************************//**
-Writes a roll ptr to an index page. In case that the size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_roll_ptr(
-/*===============*/
- byte* ptr, /*!< in: pointer to memory where
- written */
- roll_ptr_t roll_ptr) /*!< in: roll ptr */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- mach_write_to_7(ptr, roll_ptr);
-}
-
-/*****************************************************************//**
-Reads a roll ptr from an index page. In case that the roll ptr size
-changes in some future version, this function should be used instead of
-mach_read_...
-@return roll ptr */
-UNIV_INLINE
-roll_ptr_t
-trx_read_roll_ptr(
-/*==============*/
- const byte* ptr) /*!< in: pointer to memory from where to read */
-{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- return(mach_read_from_7(ptr));
-}
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Gets an undo log page and x-latches it.
-@return pointer to page x-latched */
-UNIV_INLINE
-page_t*
-trx_undo_page_get(
-/*==============*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block = buf_page_get(space, zip_size, page_no,
- RW_X_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- return(buf_block_get_frame(block));
-}
-
-/******************************************************************//**
-Gets an undo log page and s-latches it.
-@return pointer to page s-latched */
-UNIV_INLINE
-page_t*
-trx_undo_page_get_s_latched(
-/*========================*/
- ulint space, /*!< in: space where placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block = buf_page_get(space, zip_size, page_no,
- RW_S_LATCH, mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- return(buf_block_get_frame(block));
-}
-
-/******************************************************************//**
-Returns the start offset of the undo log records of the specified undo
-log on the page.
-@return start offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_start(
-/*====================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
-
- if (page_no == page_get_page_no(undo_page)) {
-
- start = mach_read_from_2(offset + undo_page
- + TRX_UNDO_LOG_START);
- } else {
- start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
- }
-
- return(start);
-}
-
-/******************************************************************//**
-Returns the end offset of the undo log records of the specified undo
-log on the page.
-@return end offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_end(
-/*==================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- trx_ulogf_t* log_hdr;
- ulint end;
-
- if (page_no == page_get_page_no(undo_page)) {
-
- log_hdr = undo_page + offset;
-
- end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
- if (end == 0) {
- end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- }
- } else {
- end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- }
-
- return(end);
-}
-
-/******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- page_t* undo_page;
- ulint start;
-
- undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
-
- if (start + undo_page == rec) {
-
- return(NULL);
- }
-
- return(undo_page + mach_read_from_2(rec - 2));
-}
-
-/******************************************************************//**
-Returns the next undo log record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_next_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- page_t* undo_page;
- ulint end;
- ulint next;
-
- undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
-
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- next = mach_read_from_2(rec);
-
- if (next == end) {
-
- return(NULL);
- }
-
- return(undo_page + next);
-}
-
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
- ulint end;
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- if (start == end) {
-
- return(NULL);
- }
-
- return(undo_page + mach_read_from_2(undo_page + end - 2));
-}
-
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
- ulint end;
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- if (start == end) {
-
- return(NULL);
- }
-
- return(undo_page + start);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/trx0xa.h b/storage/xtradb/include/trx0xa.h
deleted file mode 100644
index 4d5adc68dcd..00000000000
--- a/storage/xtradb/include/trx0xa.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*
- * Start of xa.h header
- *
- * Define a symbol to prevent multiple inclusions of this header file
- */
-#ifndef XA_H
-#define XA_H
-
-#include "handler.h"
-
-/*
- * Transaction branch identification: XID and NULLXID:
- */
-#ifndef XIDDATASIZE
-
-/** Sizes of transaction identifier */
-#define XIDDATASIZE 128 /*!< maximum size of a transaction
- identifier, in bytes */
-#define MAXGTRIDSIZE 64 /*!< maximum size in bytes of gtrid */
-#define MAXBQUALSIZE 64 /*!< maximum size in bytes of bqual */
-
-#endif
-/** X/Open XA distributed transaction status codes */
-/* @{ */
-#define XA_OK 0 /*!< normal execution */
-#define XAER_ASYNC -2 /*!< asynchronous operation already
- outstanding */
-#define XAER_RMERR -3 /*!< a resource manager error
- occurred in the transaction
- branch */
-#define XAER_NOTA -4 /*!< the XID is not valid */
-#define XAER_INVAL -5 /*!< invalid arguments were given */
-#define XAER_PROTO -6 /*!< routine invoked in an improper
- context */
-#define XAER_RMFAIL -7 /*!< resource manager unavailable */
-#define XAER_DUPID -8 /*!< the XID already exists */
-#define XAER_OUTSIDE -9 /*!< resource manager doing
- work outside transaction */
-/* @} */
-#endif /* ifndef XA_H */
-/*
- * End of xa.h header
- */
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
deleted file mode 100644
index 23c8c0a659d..00000000000
--- a/storage/xtradb/include/univ.i
+++ /dev/null
@@ -1,706 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/***********************************************************************//**
-@file include/univ.i
-Version control for database, common definitions, and include files
-
-Created 1/20/1994 Heikki Tuuri
-****************************************************************************/
-
-#ifndef univ_i
-#define univ_i
-
-#ifdef UNIV_HOTBACKUP
-#include "hb_univ.i"
-#endif /* UNIV_HOTBACKUP */
-
-/* aux macros to convert M into "123" (string) if M is defined like
-#define M 123 */
-#define _IB_TO_STR(s) #s
-#define IB_TO_STR(s) _IB_TO_STR(s)
-
-#define INNODB_VERSION_MAJOR 5
-#define INNODB_VERSION_MINOR 6
-#define INNODB_VERSION_BUGFIX 36
-
-#ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 82.0
-#endif
-
-/* Enable UNIV_LOG_ARCHIVE in XtraDB */
-#define UNIV_LOG_ARCHIVE 1
-
-/* The following is the InnoDB version as shown in
-SELECT plugin_version FROM information_schema.plugins;
-calculated in make_version_string() in sql/sql_show.cc like this:
-"version >> 8" . "version & 0xff"
-because the version is shown with only one dot, we skip the last
-component, i.e. we show M.N.P as M.N */
-#define INNODB_VERSION_SHORT \
- (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
-
-#define INNODB_VERSION_STR \
- IB_TO_STR(INNODB_VERSION_MAJOR) "." \
- IB_TO_STR(INNODB_VERSION_MINOR) "." \
- IB_TO_STR(INNODB_VERSION_BUGFIX) "-" \
- IB_TO_STR(PERCONA_INNODB_VERSION)
-
-#define REFMAN "http://dev.mysql.com/doc/refman/" \
- IB_TO_STR(INNODB_VERSION_MAJOR) "." \
- IB_TO_STR(INNODB_VERSION_MINOR) "/en/"
-
-#ifdef MYSQL_DYNAMIC_PLUGIN
-/* In the dynamic plugin, redefine some externally visible symbols
-in order not to conflict with the symbols of a builtin InnoDB. */
-
-/* Rename all C++ classes that contain virtual functions, because we
-have not figured out how to apply the visibility=hidden attribute to
-the virtual method table (vtable) in GCC 3. */
-# define ha_innobase ha_innodb
-#endif /* MYSQL_DYNAMIC_PLUGIN */
-
-#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
-# undef __WIN__
-# define __WIN__
-
-# include <windows.h>
-
-# ifdef _NT_
-# define __NT__
-# endif
-
-#else
-/* The defines used with MySQL */
-
-/* Include two header files from MySQL to make the Unix flavor used
-in compiling more Posix-compatible. These headers also define __WIN__
-if we are compiling on Windows. */
-
-#ifndef UNIV_HOTBACKUP
-# include <my_global.h>
-# include <my_pthread.h>
-#endif /* UNIV_HOTBACKUP */
-
-/* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */
-# include <sys/stat.h>
-# if !defined(__WIN__)
-# include <sys/mman.h> /* mmap() for os0proc.cc */
-# endif
-
-/* Include the header file generated by GNU autoconf */
-# ifndef __WIN__
-# ifndef UNIV_HOTBACKUP
-# include "config.h"
-# endif /* UNIV_HOTBACKUP */
-# endif
-
-# ifdef HAVE_SCHED_H
-# include <sched.h>
-# endif
-
-# ifdef HAVE_MALLOC_H
-# include <malloc.h>
-# endif
-
-/* We only try to do explicit inlining of functions with gcc and
-Sun Studio */
-
-# ifdef HAVE_PREAD
-# define HAVE_PWRITE
-# endif
-
-#endif /* #if (defined(WIN32) || ... */
-
-#ifndef __WIN__
-#define __STDC_FORMAT_MACROS /* Enable C99 printf format macros */
-#include <inttypes.h>
-#endif /* !__WIN__ */
-
-/* Following defines are to enable performance schema
-instrumentation in each of four InnoDB modules if
-HAVE_PSI_INTERFACE is defined. */
-#if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP
-# define UNIV_PFS_MUTEX
-# define UNIV_PFS_RWLOCK
-
-# define UNIV_PFS_IO
-# define UNIV_PFS_THREAD
-
-/* There are mutexes/rwlocks that we want to exclude from
-instrumentation even if their corresponding performance schema
-define is set. And this PFS_NOT_INSTRUMENTED is used
-as the key value to identify those objects that would
-be excluded from instrumentation. */
-# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED
-
-# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED)
-
-#endif /* HAVE_PSI_INTERFACE */
-
-#ifdef __WIN__
-# define YY_NO_UNISTD_H 1
-#endif /* __WIN__ */
-
-/* DEBUG VERSION CONTROL
- ===================== */
-
-/* When this macro is defined then additional test functions will be
-compiled. These functions live at the end of each relevant source file
-and have "test_" prefix. These functions are not called from anywhere in
-the code, they can be called from gdb after
-innobase_start_or_create_for_mysql() has executed using the call
-command. Not tested on Windows. */
-/*
-#define UNIV_COMPILE_TEST_FUNCS
-*/
-
-#if defined HAVE_valgrind && defined HAVE_VALGRIND
-# define UNIV_DEBUG_VALGRIND
-#endif
-#if 0
-#define UNIV_DEBUG_VALGRIND /* Enable extra
- Valgrind instrumentation */
-#define UNIV_DEBUG_PRINT /* Enable the compilation of
- some debug print functions */
-#define UNIV_AHI_DEBUG /* Enable adaptive hash index
- debugging without UNIV_DEBUG */
-#define UNIV_BUF_DEBUG /* Enable buffer pool
- debugging without UNIV_DEBUG */
-#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column
- debugging without UNIV_DEBUG */
-#define UNIV_DEBUG /* Enable ut_ad() assertions
- and disable UNIV_INLINE */
-#define UNIV_DEBUG_LOCK_VALIDATE /* Enable
- ut_ad(lock_rec_validate_page())
- assertions. */
-#define UNIV_DEBUG_FILE_ACCESSES /* Enable freed block access
- debugging without UNIV_DEBUG */
-#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */
-#define UNIV_HASH_DEBUG /* debug HASH_ macros */
-#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */
-#define UNIV_LOG_LSN_DEBUG /* write LSN to the redo log;
-this will break redo log file compatibility, but it may be useful when
-debugging redo log application problems. */
-#define UNIV_MEM_DEBUG /* detect memory leaks etc */
-#define UNIV_IBUF_DEBUG /* debug the insert buffer */
-#define UNIV_BLOB_DEBUG /* track BLOB ownership;
-assumes that no BLOBs survive server restart */
-#define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer;
-this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
-and the insert buffer must be empty when the database is started */
-#define UNIV_PERF_DEBUG /* debug flag that enables
- light weight performance
- related stuff. */
-#define UNIV_SYNC_DEBUG /* debug mutex and latch
-operations (very slow); also UNIV_DEBUG must be defined */
-#define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */
-#define UNIV_SYNC_PERF_STAT /* operation counts for
- rw-locks and mutexes */
-#define UNIV_SEARCH_PERF_STAT /* statistics for the
- adaptive hash index */
-#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output
- in sync0sync.cc */
-#define UNIV_BTR_PRINT /* enable functions for
- printing B-trees */
-#define UNIV_ZIP_DEBUG /* extensive consistency checks
- for compressed pages */
-#define UNIV_ZIP_COPY /* call page_zip_copy_recs()
- more often */
-#define UNIV_AIO_DEBUG /* prints info about
- submitted and reaped AIO
- requests to the log. */
-#define UNIV_STATS_DEBUG /* prints various stats
- related debug info from
- dict0stats.c */
-#define FTS_INTERNAL_DIAG_PRINT /* FTS internal debugging
- info output */
-#endif
-
-#define UNIV_BTR_DEBUG /* check B-tree links */
-#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */
-
-/*
-#define UNIV_SQL_DEBUG
-#define UNIV_LOG_DEBUG
-*/
- /* the above option prevents forcing of log to disk
- at a buffer page write: it should be tested with this
- option off; also some ibuf tests are suppressed */
-
-/* Linkage specifier for non-static InnoDB symbols (variables and functions)
-that are only referenced from within InnoDB, not from MySQL. We disable the
-GCC visibility directive on all Sun operating systems because there is no
-easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */
-#define MY_ATTRIBUTE __attribute__
-#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER)
-# define UNIV_INTERN MY_ATTRIBUTE((visibility ("hidden")))
-#else
-# define UNIV_INTERN
-#endif
-#if defined(INNODB_COMPILER_HINTS) \
- && defined __GNUC__ \
- && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3)
-/** Starting with GCC 4.3, the "cold" attribute is used to inform the
-compiler that a function is unlikely executed. The function is
-optimized for size rather than speed and on many targets it is placed
-into special subsection of the text section so all cold functions
-appears close together improving code locality of non-cold parts of
-program. The paths leading to call of cold functions within code are
-marked as unlikely by the branch prediction mechanism. optimize a
-rarely invoked function for size instead for speed. */
-# define UNIV_COLD MY_ATTRIBUTE((cold))
-#else
-# define UNIV_COLD /* empty */
-#endif
-
-#ifdef UNIV_LINUX
-# define UNIV_THREAD_LOCAL __thread
-#else
-/* FIXME: the TLS variables are silently broken on other platforms for now */
-# define UNIV_THREAD_LOCAL
-#endif
-
-#ifndef UNIV_MUST_NOT_INLINE
-/* Definition for inline version */
-
-#define UNIV_INLINE static inline
-
-#else /* !UNIV_MUST_NOT_INLINE */
-/* If we want to compile a noninlined version we use the following macro
-definitions: */
-
-#define UNIV_NONINL
-#define UNIV_INLINE UNIV_INTERN
-
-#endif /* !UNIV_MUST_NOT_INLINE */
-
-#define UNIV_WORD_SIZE SIZEOF_SIZE_T
-
-/** The following alignment is used in memory allocations in memory heap
-management to ensure correct alignment for doubles etc. */
-#define UNIV_MEM_ALIGNMENT 8
-
-/*
- DATABASE VERSION CONTROL
- ========================
-*/
-
-/** There are currently two InnoDB file formats which are used to group
-features with similar restrictions and dependencies. Using an enum allows
-switch statements to give a compiler warning when a new one is introduced. */
-enum innodb_file_formats_enum {
- /** Antelope File Format: InnoDB/MySQL up to 5.1.
- This format includes REDUNDANT and COMPACT row formats */
- UNIV_FORMAT_A = 0,
-
- /** Barracuda File Format: Introduced in InnoDB plugin for 5.1:
- This format includes COMPRESSED and DYNAMIC row formats. It
- includes the ability to create secondary indexes from data that
- is not on the clustered index page and the ability to store more
- data off the clustered index page. */
- UNIV_FORMAT_B = 1
-};
-
-typedef enum innodb_file_formats_enum innodb_file_formats_t;
-
-/** Minimum supported file format */
-#define UNIV_FORMAT_MIN UNIV_FORMAT_A
-
-/** Maximum supported file format */
-#define UNIV_FORMAT_MAX UNIV_FORMAT_B
-
-/** The 2-logarithm of UNIV_PAGE_SIZE: */
-#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift
-
-#ifdef HAVE_LZO
-#define IF_LZO(A,B) A
-#else
-#define IF_LZO(A,B) B
-#endif
-
-#ifdef HAVE_LZ4
-#define IF_LZ4(A,B) A
-#else
-#define IF_LZ4(A,B) B
-#endif
-
-#ifdef HAVE_LZMA
-#define IF_LZMA(A,B) A
-#else
-#define IF_LZMA(A,B) B
-#endif
-
-#ifdef HAVE_BZIP2
-#define IF_BZIP2(A,B) A
-#else
-#define IF_BZIP2(A,B) B
-#endif
-
-#ifdef HAVE_SNAPPY
-#define IF_SNAPPY(A,B) A
-#else
-#define IF_SNAPPY(A,B) B
-#endif
-
-/** The universal page size of the database */
-#define UNIV_PAGE_SIZE ((ulint) srv_page_size)
-
-/** log2 of smallest compressed page size (1<<10 == 1024 bytes)
-Note: This must never change! */
-#define UNIV_ZIP_SIZE_SHIFT_MIN 10
-
-/** log2 of largest compressed page size (1<<14 == 16384 bytes).
-A compressed page directory entry reserves 14 bits for the start offset
-and 2 bits for flags. This limits the uncompressed page size to 16k.
-*/
-#define UNIV_ZIP_SIZE_SHIFT_MAX 14
-
-/* Define the Min, Max, Default page sizes. */
-/** Minimum Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_MIN 12
-/** log2 of largest page size (1<<16 == 64436 bytes). */
-/** Maximum Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_MAX 16
-/** log2 of default page size (1<<14 == 16384 bytes). */
-/** Default Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_DEF 14
-/** Original 16k InnoDB Page Size Shift, in case the default changes */
-#define UNIV_PAGE_SIZE_SHIFT_ORIG 14
-
-/** Minimum page size InnoDB currently supports. */
-#define UNIV_PAGE_SIZE_MIN (1 << UNIV_PAGE_SIZE_SHIFT_MIN)
-/** Maximum page size InnoDB currently supports. */
-#define UNIV_PAGE_SIZE_MAX (1 << UNIV_PAGE_SIZE_SHIFT_MAX)
-/** Default page size for InnoDB tablespaces. */
-#define UNIV_PAGE_SIZE_DEF (1 << UNIV_PAGE_SIZE_SHIFT_DEF)
-/** Original 16k page size for InnoDB tablespaces. */
-#define UNIV_PAGE_SIZE_ORIG (1 << UNIV_PAGE_SIZE_SHIFT_ORIG)
-
-/** Smallest compressed page size */
-#define UNIV_ZIP_SIZE_MIN (1 << UNIV_ZIP_SIZE_SHIFT_MIN)
-
-/** Largest compressed page size */
-#define UNIV_ZIP_SIZE_MAX (1 << UNIV_ZIP_SIZE_SHIFT_MAX)
-
-/** Number of supported page sizes (The convention 'ssize' is used
-for 'log2 minus 9' or the number of shifts starting with 512.)
-This number varies depending on UNIV_PAGE_SIZE. */
-#define UNIV_PAGE_SSIZE_MAX \
- (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
-
-/** Maximum number of parallel threads in a parallelized operation */
-#define UNIV_MAX_PARALLELISM 32
-
-/** This is the "mbmaxlen" for my_charset_filename (defined in
-strings/ctype-utf8.c), which is used to encode File and Database names. */
-#define FILENAME_CHARSET_MAXNAMLEN 5
-
-/** The maximum length of an encode table name in bytes. The max
-table and database names are NAME_CHAR_LEN (64) characters. After the
-encoding, the max length would be NAME_CHAR_LEN (64) *
-FILENAME_CHARSET_MAXNAMLEN (5) = 320 bytes. The number does not include a
-terminating '\0'. InnoDB can handle longer names internally */
-#define MAX_TABLE_NAME_LEN 320
-
-/** The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is
-the MySQL's NAME_LEN, see check_and_convert_db_name(). */
-#define MAX_DATABASE_NAME_LEN MAX_TABLE_NAME_LEN
-
-/** MAX_FULL_NAME_LEN defines the full name path including the
-database name and table name. In addition, 14 bytes is added for:
- 2 for surrounding quotes around table name
- 1 for the separating dot (.)
- 9 for the #mysql50# prefix */
-#define MAX_FULL_NAME_LEN \
- (MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
-
-/** The maximum length in bytes that a database name can occupy when stored in
-UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
-mysql_com.h if you are to use this macro. */
-#define MAX_DB_UTF8_LEN (NAME_LEN + 1)
-
-/** The maximum length in bytes that a table name can occupy when stored in
-UTF8, including the terminating '\0', see dict_fs2utf8(). You must include
-mysql_com.h if you are to use this macro. */
-#define MAX_TABLE_UTF8_LEN (NAME_LEN + sizeof(srv_mysql50_table_name_prefix))
-
-/*
- UNIVERSAL TYPE DEFINITIONS
- ==========================
-*/
-
-/* Note that inside MySQL 'byte' is defined as char on Linux! */
-#define byte unsigned char
-
-/* Another basic type we use is unsigned long integer which should be equal to
-the word size of the machine, that is on a 32-bit platform 32 bits, and on a
-64-bit platform 64 bits. We also give the printf format for the type as a
-macro ULINTPF. */
-
-
-#ifdef _WIN32
-/* Use the integer types and formatting strings defined in Visual Studio. */
-# define UINT32PF "%u"
-# define INT64PF "%lld"
-# define UINT64PF "%llu"
-# define UINT64PFx "%016llx"
-typedef __int64 ib_int64_t;
-typedef unsigned __int64 ib_uint64_t;
-typedef unsigned __int32 ib_uint32_t;
-#else
-/* Use the integer types and formatting strings defined in the C99 standard. */
-# define UINT32PF "%" PRIu32
-# define INT64PF "%" PRId64
-# define UINT64PF "%" PRIu64
-# define UINT64PFx "%016" PRIx64
-typedef int64_t ib_int64_t;
-typedef uint64_t ib_uint64_t;
-typedef uint32_t ib_uint32_t;
-#endif
-
-#define IB_ID_FMT UINT64PF
-
-/* Type used for all log sequence number storage and arithmetics */
-typedef ib_uint64_t lsn_t;
-
-#ifdef _WIN64
-typedef unsigned __int64 ulint;
-typedef __int64 lint;
-# define ULINTPF UINT64PF
-#define MYSQL_SYSVAR_ULINT MYSQL_SYSVAR_ULONGLONG
-#else
-typedef unsigned long int ulint;
-typedef long int lint;
-# define ULINTPF "%lu"
-#define MYSQL_SYSVAR_ULINT MYSQL_SYSVAR_ULONG
-#endif /* _WIN64 */
-
-#ifndef UNIV_HOTBACKUP
-typedef unsigned long long int ullint;
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef __WIN__
-#if SIZEOF_LONG != SIZEOF_VOIDP
-#error "Error: InnoDB's ulint must be of the same size as void*"
-#endif
-#endif
-
-/** The 'undefined' value for a ulint */
-#define ULINT_UNDEFINED ((ulint)(-1))
-
-#define ULONG_UNDEFINED ((ulong)(-1))
-
-/** The 'undefined' value for a ib_uint64_t */
-#define UINT64_UNDEFINED ((ib_uint64_t)(-1))
-
-/** The bitmask of 32-bit unsigned integer */
-#define ULINT32_MASK 0xFFFFFFFF
-/** The undefined 32-bit unsigned integer */
-#define ULINT32_UNDEFINED ULINT32_MASK
-
-/** Maximum value for a ulint */
-#define ULINT_MAX ((ulint)(-2))
-
-/** Maximum value for ib_uint64_t */
-#define IB_UINT64_MAX ((ib_uint64_t) (~0ULL))
-
-/** The generic InnoDB system object identifier data type */
-typedef ib_uint64_t ib_id_t;
-#define IB_ID_MAX IB_UINT64_MAX
-
-/** The 'undefined' value for a ullint */
-#define ULLINT_UNDEFINED ((ullint)(-1))
-
-/** This 'ibool' type is used within Innobase. Remember that different included
-headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
-#define ibool ulint
-
-#ifndef TRUE
-
-#define TRUE 1
-#define FALSE 0
-
-#endif
-
-#define UNIV_NOTHROW
-
-/** The following number as the length of a logical field means that the field
-has the SQL NULL as its value. NOTE that because we assume that the length
-of a field is a 32-bit integer when we store it, for example, to an undo log
-on disk, we must have also this number fit in 32 bits, also in 64-bit
-computers! */
-
-#define UNIV_SQL_NULL ULINT32_UNDEFINED
-
-/** Lengths which are not UNIV_SQL_NULL, but bigger than the following
-number indicate that a field contains a reference to an externally
-stored part of the field in the tablespace. The length field then
-contains the sum of the following flag and the locally stored len. */
-
-#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_DEF)
-
-#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER)
-#define HAVE_GCC_GT_2
-/* Tell the compiler that variable/function is unused. */
-# define UNIV_UNUSED MY_ATTRIBUTE ((unused))
-#else
-# define UNIV_UNUSED
-#endif /* CHECK FOR GCC VER_GT_2 */
-
-/* Some macros to improve branch prediction and reduce cache misses */
-#if defined(INNODB_COMPILER_HINTS) && defined(HAVE_GCC_GT_2)
-/* Tell the compiler that 'expr' probably evaluates to 'constant'. */
-# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant)
-/* Tell the compiler that a pointer is likely to be NULL */
-# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ulint) ptr, 0)
-/* Minimize cache-miss latency by moving data at addr into a cache before
-it is read. */
-# define UNIV_PREFETCH_R(addr) __builtin_prefetch(addr, 0, 3)
-/* Minimize cache-miss latency by moving data at addr into a cache before
-it is read or written. */
-# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
-
-/* Sun Studio includes sun_prefetch.h as of version 5.9 */
-#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \
- || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590)
-
-# include <sun_prefetch.h>
-
-#if __SUNPRO_C >= 0x550
-# undef UNIV_INTERN
-# define UNIV_INTERN __hidden
-#endif /* __SUNPRO_C >= 0x550 */
-
-# define UNIV_EXPECT(expr,value) (expr)
-# define UNIV_LIKELY_NULL(expr) (expr)
-
-# if defined(INNODB_COMPILER_HINTS)
-//# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr)
-# define UNIV_PREFETCH_R(addr) ((void) 0)
-# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
-# else
-# define UNIV_PREFETCH_R(addr) ((void) 0)
-# define UNIV_PREFETCH_RW(addr) ((void) 0)
-# endif /* INNODB_COMPILER_HINTS */
-
-#else
-/* Dummy versions of the macros */
-# define UNIV_EXPECT(expr,value) (expr)
-# define UNIV_LIKELY_NULL(expr) (expr)
-# define UNIV_PREFETCH_R(addr) ((void) 0)
-# define UNIV_PREFETCH_RW(addr) ((void) 0)
-#endif
-
-/* Tell the compiler that cond is likely to hold */
-#define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE)
-/* Tell the compiler that cond is unlikely to hold */
-#define UNIV_UNLIKELY(cond) UNIV_EXPECT(cond, FALSE)
-
-/* Compile-time constant of the given array's size. */
-#define UT_ARR_SIZE(a) (sizeof(a) / sizeof((a)[0]))
-
-/* The return type from a thread's start function differs between Unix and
-Windows, so define a typedef for it and a macro to use at the end of such
-functions. */
-
-#ifdef __WIN__
-#define usleep(a) Sleep((a)/1000)
-typedef DWORD os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(0)
-#else
-typedef void* os_thread_ret_t;
-#define OS_THREAD_DUMMY_RETURN return(NULL)
-#endif
-
-#include <stdio.h>
-#include "ut0dbg.h"
-#include "ut0ut.h"
-#include "db0err.h"
-#ifdef UNIV_DEBUG_VALGRIND
-# include <valgrind/memcheck.h>
-# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size)
-# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
-# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
-# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
-# define UNIV_MEM_DESC(addr, size) VALGRIND_CREATE_BLOCK(addr, size, #addr)
-# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b)
-# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do { \
- const void* _p = (const void*) (ulint) \
- VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \
- if (UNIV_LIKELY_NULL(_p)) { \
- fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n", \
- __FILE__, __LINE__, \
- (const void*) (addr), (unsigned) (size), (long) \
- (((const char*) _p) - ((const char*) (addr)))); \
- if (should_abort) { \
- ut_error; \
- } \
- } \
-} while (0)
-# define UNIV_MEM_ASSERT_RW(addr, size) \
- UNIV_MEM_ASSERT_RW_LOW(addr, size, false)
-# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) \
- UNIV_MEM_ASSERT_RW_LOW(addr, size, true)
-# define UNIV_MEM_ASSERT_W(addr, size) do { \
- const void* _p = (const void*) (ulint) \
- VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size); \
- if (UNIV_LIKELY_NULL(_p)) \
- fprintf(stderr, "%s:%d: %p[%u] unwritable at %ld\n", \
- __FILE__, __LINE__, \
- (const void*) (addr), (unsigned) (size), (long) \
- (((const char*) _p) - ((const char*) (addr)))); \
- } while (0)
-# define UNIV_MEM_TRASH(addr, c, size) do { \
- ut_d(memset(addr, c, size)); \
- UNIV_MEM_INVALID(addr, size); \
- } while (0)
-#else
-# define UNIV_MEM_VALID(addr, size) do {} while(0)
-# define UNIV_MEM_INVALID(addr, size) do {} while(0)
-# define UNIV_MEM_FREE(addr, size) do {} while(0)
-# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
-# define UNIV_MEM_DESC(addr, size) do {} while(0)
-# define UNIV_MEM_UNDESC(b) do {} while(0)
-# define UNIV_MEM_ASSERT_RW_LOW(addr, size, should_abort) do {} while(0)
-# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0)
-# define UNIV_MEM_ASSERT_RW_ABORT(addr, size) do {} while(0)
-# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0)
-# define UNIV_MEM_TRASH(addr, c, size) do {} while(0)
-#endif
-#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \
- UNIV_MEM_ASSERT_W(addr, size); \
- UNIV_MEM_FREE(addr, size); \
-} while (0)
-#define UNIV_MEM_ASSERT_AND_ALLOC(addr, size) do { \
- UNIV_MEM_ASSERT_W(addr, size); \
- UNIV_MEM_ALLOC(addr, size); \
-} while (0)
-
-extern ulong srv_page_size_shift;
-extern ulong srv_page_size;
-
-#endif
diff --git a/storage/xtradb/include/usr0sess.h b/storage/xtradb/include/usr0sess.h
deleted file mode 100644
index b5c80b97b43..00000000000
--- a/storage/xtradb/include/usr0sess.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0sess.h
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0sess_h
-#define usr0sess_h
-
-#include "univ.i"
-#include "ut0byte.h"
-#include "trx0types.h"
-#include "srv0srv.h"
-#include "trx0types.h"
-#include "usr0types.h"
-#include "que0types.h"
-#include "data0data.h"
-#include "rem0rec.h"
-
-/*********************************************************************//**
-Opens a session.
-@return own: session object */
-UNIV_INTERN
-sess_t*
-sess_open(void);
-/*============*/
-/*********************************************************************//**
-Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
-void
-sess_close(
-/*=======*/
- sess_t* sess); /* in, own: session object */
-
-/* The session handle. This data structure is only used by purge and is
-not really necessary. We should get rid of it. */
-struct sess_t{
- ulint state; /*!< state of the session */
- trx_t* trx; /*!< transaction object permanently
- assigned for the session: the
- transaction instance designated by the
- trx id changes, but the memory
- structure is preserved */
- UT_LIST_BASE_NODE_T(que_t)
- graphs; /*!< query graphs belonging to this
- session */
-};
-
-/* Session states */
-#define SESS_ACTIVE 1
-#define SESS_ERROR 2 /* session contains an error message
- which has not yet been communicated
- to the client */
-#ifndef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/usr0sess.ic b/storage/xtradb/include/usr0sess.ic
deleted file mode 100644
index 284e59537fe..00000000000
--- a/storage/xtradb/include/usr0sess.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0sess.ic
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/usr0types.h b/storage/xtradb/include/usr0types.h
deleted file mode 100644
index 6ba937cacc8..00000000000
--- a/storage/xtradb/include/usr0types.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/usr0types.h
-Users and sessions global types
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef usr0types_h
-#define usr0types_h
-
-struct sess_t;
-
-#endif
diff --git a/storage/xtradb/include/ut0bh.h b/storage/xtradb/include/ut0bh.h
deleted file mode 100644
index 1085736c7ab..00000000000
--- a/storage/xtradb/include/ut0bh.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2011, 2013, Oracle Corpn. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.h
-Binary min-heap interface.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_UT0BH_H
-#define INNOBASE_UT0BH_H
-
-#include "univ.i"
-
-/** Comparison function for objects in the binary heap. */
-typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
-
-struct ib_bh_t;
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INLINE
-ulint
-ib_bh_size(
-/*=======*/
- const ib_bh_t* ib_bh); /*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INLINE
-ibool
-ib_bh_is_empty(
-/*===========*/
- const ib_bh_t* ib_bh); /*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INLINE
-ibool
-ib_bh_is_full(
-/*===========*/
- const ib_bh_t* ib_bh); /*!< in: instance */
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INLINE
-void*
-ib_bh_get(
-/*=======*/
- ib_bh_t* ib_bh, /*!< in: instance */
- ulint i); /*!< in: index */
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INLINE
-void*
-ib_bh_set(
-/*======*/
- ib_bh_t* ib_bh, /*!< in/out: instance */
- ulint i, /*!< in: index */
- const void* elem); /*!< in: element to add */
-
-/**********************************************************************//**
-Return the first element from the binary heap.
-@return pointer to first element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_first(
-/*========*/
- ib_bh_t* ib_bh); /*!< in: instance */
-
-/**********************************************************************//**
-Return the last element from the binary heap.
-@return pointer to last element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_last(
-/*========*/
- ib_bh_t* ib_bh); /*!< in/out: instance */
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
- ib_bh_cmp_t compare, /*!< in: comparator */
- ulint sizeof_elem, /*!< in: size of one element */
- ulint max_elems); /*!< in: max elements allowed */
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
- ib_bh_t* ib_bh); /*!< in,own: instance */
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
- ib_bh_t* ib_bh, /*!< in/out: instance */
- const void* elem); /*!< in: element to add */
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
- ib_bh_t* ib_bh); /*!< in/out: instance */
-
-/** Binary heap data structure */
-struct ib_bh_t {
- ulint max_elems; /*!< max elements allowed */
- ulint n_elems; /*!< current size */
- ulint sizeof_elem; /*!< sizeof element */
- ib_bh_cmp_t compare; /*!< comparator */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0bh.ic"
-#endif
-
-#endif /* INNOBASE_UT0BH_H */
diff --git a/storage/xtradb/include/ut0bh.ic b/storage/xtradb/include/ut0bh.ic
deleted file mode 100644
index b11de5b8b3e..00000000000
--- a/storage/xtradb/include/ut0bh.ic
+++ /dev/null
@@ -1,125 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.ic
-Binary min-heap implementation.
-
-Created 2011-01-15 by Sunny Bains
-*******************************************************/
-
-#include "ut0bh.h"
-#include "ut0mem.h" /* For ut_memcpy() */
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INLINE
-ulint
-ib_bh_size(
-/*=======*/
- const ib_bh_t* ib_bh) /*!< in: instance */
-{
- return(ib_bh->n_elems);
-}
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INLINE
-ibool
-ib_bh_is_empty(
-/*===========*/
- const ib_bh_t* ib_bh) /*!< in: instance */
-{
- return(ib_bh_size(ib_bh) == 0);
-}
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INLINE
-ibool
-ib_bh_is_full(
-/*===========*/
- const ib_bh_t* ib_bh) /*!< in: instance */
-{
- return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
-}
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INLINE
-void*
-ib_bh_get(
-/*=======*/
- ib_bh_t* ib_bh, /*!< in: instance */
- ulint i) /*!< in: index */
-{
- byte* ptr = (byte*) (ib_bh + 1);
-
- ut_a(i < ib_bh_size(ib_bh));
-
- return(ptr + (ib_bh->sizeof_elem * i));
-}
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INLINE
-void*
-ib_bh_set(
-/*======*/
- ib_bh_t* ib_bh, /*!< in/out: instance */
- ulint i, /*!< in: index */
- const void* elem) /*!< in: element to add */
-{
- void* ptr = ib_bh_get(ib_bh, i);
-
- ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Return the first element from the binary heap.
-@return pointer to first element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_first(
-/*========*/
- ib_bh_t* ib_bh) /*!< in: instance */
-{
- return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
-}
-
-/**********************************************************************//**
-Return the last element from the binary heap.
-@return pointer to last element or NULL if empty. */
-UNIV_INLINE
-void*
-ib_bh_last(
-/*========*/
- ib_bh_t* ib_bh) /*!< in/out: instance */
-{
- return(ib_bh_is_empty(ib_bh)
- ? NULL
- : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
-}
-
diff --git a/storage/xtradb/include/ut0byte.h b/storage/xtradb/include/ut0byte.h
deleted file mode 100644
index 4893ab9f9af..00000000000
--- a/storage/xtradb/include/ut0byte.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0byte.h
-Utilities for byte operations
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0byte_h
-#define ut0byte_h
-
-
-
-#include "univ.i"
-
-/*******************************************************//**
-Creates a 64-bit integer out of two 32-bit integers.
-@return created integer */
-UNIV_INLINE
-ib_uint64_t
-ut_ull_create(
-/*==========*/
- ulint high, /*!< in: high-order 32 bits */
- ulint low) /*!< in: low-order 32 bits */
- MY_ATTRIBUTE((const));
-
-/********************************************************//**
-Rounds a 64-bit integer downward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-ib_uint64_t
-ut_uint64_align_down(
-/*=================*/
- ib_uint64_t n, /*!< in: number to be rounded */
- ulint align_no); /*!< in: align by this number
- which must be a power of 2 */
-/********************************************************//**
-Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-ib_uint64_t
-ut_uint64_align_up(
-/*===============*/
- ib_uint64_t n, /*!< in: number to be rounded */
- ulint align_no); /*!< in: align by this number
- which must be a power of 2 */
-/*********************************************************//**
-The following function rounds up a pointer to the nearest aligned address.
-@return aligned pointer */
-UNIV_INLINE
-void*
-ut_align(
-/*=====*/
- const void* ptr, /*!< in: pointer */
- ulint align_no); /*!< in: align by this number */
-/*********************************************************//**
-The following function rounds down a pointer to the nearest
-aligned address.
-@return aligned pointer */
-UNIV_INLINE
-void*
-ut_align_down(
-/*==========*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
- MY_ATTRIBUTE((const));
-/*********************************************************//**
-The following function computes the offset of a pointer from the nearest
-aligned address.
-@return distance from aligned pointer */
-UNIV_INLINE
-ulint
-ut_align_offset(
-/*============*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
- MY_ATTRIBUTE((const));
-/*****************************************************************//**
-Gets the nth bit of a ulint.
-@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
-UNIV_INLINE
-ibool
-ut_bit_get_nth(
-/*===========*/
- ulint a, /*!< in: ulint */
- ulint n); /*!< in: nth bit requested */
-/*****************************************************************//**
-Sets the nth bit of a ulint.
-@return the ulint with the bit set as requested */
-UNIV_INLINE
-ulint
-ut_bit_set_nth(
-/*===========*/
- ulint a, /*!< in: ulint */
- ulint n, /*!< in: nth bit requested */
- ibool val); /*!< in: value for the bit to set */
-
-#ifndef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/ut0byte.ic b/storage/xtradb/include/ut0byte.ic
deleted file mode 100644
index 1a7af5ae33d..00000000000
--- a/storage/xtradb/include/ut0byte.ic
+++ /dev/null
@@ -1,173 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************************//**
-@file include/ut0byte.ic
-Utilities for byte operations
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-/*******************************************************//**
-Creates a 64-bit integer out of two 32-bit integers.
-@return created integer */
-UNIV_INLINE
-ib_uint64_t
-ut_ull_create(
-/*==========*/
- ulint high, /*!< in: high-order 32 bits */
- ulint low) /*!< in: low-order 32 bits */
-{
- ut_ad(high <= ULINT32_MASK);
- ut_ad(low <= ULINT32_MASK);
- return(((ib_uint64_t) high) << 32 | low);
-}
-
-/********************************************************//**
-Rounds a 64-bit integer downward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-ib_uint64_t
-ut_uint64_align_down(
-/*=================*/
- ib_uint64_t n, /*!< in: number to be rounded */
- ulint align_no) /*!< in: align by this number
- which must be a power of 2 */
-{
- ut_ad(align_no > 0);
- ut_ad(ut_is_2pow(align_no));
-
- return(n & ~((ib_uint64_t) align_no - 1));
-}
-
-/********************************************************//**
-Rounds ib_uint64_t upward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-ib_uint64_t
-ut_uint64_align_up(
-/*===============*/
- ib_uint64_t n, /*!< in: number to be rounded */
- ulint align_no) /*!< in: align by this number
- which must be a power of 2 */
-{
- ib_uint64_t align_1 = (ib_uint64_t) align_no - 1;
-
- ut_ad(align_no > 0);
- ut_ad(ut_is_2pow(align_no));
-
- return((n + align_1) & ~align_1);
-}
-
-/*********************************************************//**
-The following function rounds up a pointer to the nearest aligned address.
-@return aligned pointer */
-UNIV_INLINE
-void*
-ut_align(
-/*=====*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return((void*)((((ulint) ptr) + align_no - 1) & ~(align_no - 1)));
-}
-
-/*********************************************************//**
-The following function rounds down a pointer to the nearest
-aligned address.
-@return aligned pointer */
-UNIV_INLINE
-void*
-ut_align_down(
-/*==========*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return((void*)(((ulint) ptr) & ~(align_no - 1)));
-}
-
-/*********************************************************//**
-The following function computes the offset of a pointer from the nearest
-aligned address.
-@return distance from aligned pointer */
-UNIV_INLINE
-ulint
-ut_align_offset(
-/*============*/
- const void* ptr, /*!< in: pointer */
- ulint align_no) /*!< in: align by this number */
-{
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
- ut_ad(ptr);
-
- ut_ad(sizeof(void*) == sizeof(ulint));
-
- return(((ulint) ptr) & (align_no - 1));
-}
-
-/*****************************************************************//**
-Gets the nth bit of a ulint.
-@return TRUE if nth bit is 1; 0th bit is defined to be the least significant */
-UNIV_INLINE
-ibool
-ut_bit_get_nth(
-/*===========*/
- ulint a, /*!< in: ulint */
- ulint n) /*!< in: nth bit requested */
-{
- ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- return(1 & (a >> n));
-}
-
-/*****************************************************************//**
-Sets the nth bit of a ulint.
-@return the ulint with the bit set as requested */
-UNIV_INLINE
-ulint
-ut_bit_set_nth(
-/*===========*/
- ulint a, /*!< in: ulint */
- ulint n, /*!< in: nth bit requested */
- ibool val) /*!< in: value for the bit to set */
-{
- ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- if (val) {
- return(((ulint) 1 << n) | a);
- } else {
- return(~((ulint) 1 << n) & a);
- }
-}
diff --git a/storage/xtradb/include/ut0counter.h b/storage/xtradb/include/ut0counter.h
deleted file mode 100644
index 4f736428a17..00000000000
--- a/storage/xtradb/include/ut0counter.h
+++ /dev/null
@@ -1,159 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ut0counter.h
-
-Counter utility class
-
-Created 2012/04/12 by Sunny Bains
-*******************************************************/
-
-#ifndef UT0COUNTER_H
-#define UT0COUNTER_H
-
-#include "univ.i"
-#include <string.h>
-#include "os0thread.h"
-
-/** CPU cache line size */
-#ifndef UNIV_HOTBACKUP
-# ifdef CPU_LEVEL1_DCACHE_LINESIZE
-# define CACHE_LINE_SIZE CPU_LEVEL1_DCACHE_LINESIZE
-# else
-# error CPU_LEVEL1_DCACHE_LINESIZE is undefined
-# endif /* CPU_LEVEL1_DCACHE_LINESIZE */
-#else
-# define CACHE_LINE_SIZE 64
-#endif /* UNIV_HOTBACKUP */
-
-/** Default number of slots to use in ib_counter_t */
-#define IB_N_SLOTS 64
-
-/** Get the offset into the counter array. */
-template <typename Type, int N>
-struct generic_indexer_t {
- /** @return offset within m_counter */
- size_t offset(size_t index) const UNIV_NOTHROW {
- return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type)));
- }
-};
-
-#ifdef HAVE_SCHED_GETCPU
-#include <utmpx.h>
-/** Use the cpu id to index into the counter array. If it fails then
-use the thread id. */
-template <typename Type, int N>
-struct get_sched_indexer_t : public generic_indexer_t<Type, N> {
- /* @return result from sched_getcpu(), the thread id if it fails. */
- size_t get_rnd_index() const UNIV_NOTHROW {
-
- size_t cpu = sched_getcpu();
- if (cpu == -1) {
- cpu = (lint) os_thread_get_curr_id();
- }
-
- return(cpu);
- }
-};
-#endif /* HAVE_SCHED_GETCPU */
-
-/** Use the thread id to index into the counter array. */
-template <typename Type, int N>
-struct thread_id_indexer_t : public generic_indexer_t<Type, N> {
- /* @return a random number, currently we use the thread id. Where
- thread id is represented as a pointer, it may not work as
- effectively. */
- size_t get_rnd_index() const UNIV_NOTHROW {
- return((lint) os_thread_get_curr_id());
- }
-
- /** @return a random offset to the array */
- size_t get_rnd_offset() const UNIV_NOTHROW
- {
- return(generic_indexer_t<Type, N>::offset(get_rnd_index()));
- }
-};
-
-/** Class for using fuzzy counters. The counter is not protected by any
-mutex and the results are not guaranteed to be 100% accurate but close
-enough. Creates an array of counters and separates each element by the
-CACHE_LINE_SIZE bytes */
-template <
- typename Type,
- int N = IB_N_SLOTS,
- template<typename, int> class Indexer = thread_id_indexer_t>
-struct MY_ALIGNED(CACHE_LINE_SIZE) ib_counter_t
-{
-#ifdef UNIV_DEBUG
- ~ib_counter_t()
- {
- size_t n = (CACHE_LINE_SIZE / sizeof(Type));
-
- /* Check that we aren't writing outside our defined bounds. */
- for (size_t i = 0; i < UT_ARR_SIZE(m_counter); i += n) {
- for (size_t j = 1; j < n - 1; ++j) {
- ut_ad(m_counter[i + j] == 0);
- }
- }
- }
-#endif /* UNIV_DEBUG */
-
- /** Increment the counter by 1. */
- void inc() UNIV_NOTHROW { add(1); }
-
- /** Increment the counter by 1.
- @param[in] index a reasonably thread-unique identifier */
- void inc(size_t index) UNIV_NOTHROW { add(index, 1); }
-
- /** Add to the counter.
- @param[in] n amount to be added */
- void add(Type n) UNIV_NOTHROW { add(m_policy.get_rnd_offset(), n); }
-
- /** Add to the counter.
- @param[in] index a reasonably thread-unique identifier
- @param[in] n amount to be added */
- void add(size_t index, Type n) UNIV_NOTHROW {
- size_t i = m_policy.offset(index);
-
- ut_ad(i < UT_ARR_SIZE(m_counter));
-
- m_counter[i] += n;
- }
-
- /* @return total value - not 100% accurate, since it is not atomic. */
- operator Type() const UNIV_NOTHROW {
- Type total = 0;
-
- for (size_t i = 0; i < N; ++i) {
- total += m_counter[m_policy.offset(i)];
- }
-
- return(total);
- }
-
-private:
- /** Indexer into the array */
- Indexer<Type, N>m_policy;
-
- /** Slot 0 is unused. */
- Type m_counter[(N + 1) * (CACHE_LINE_SIZE / sizeof(Type))];
-};
-
-#endif /* UT0COUNTER_H */
diff --git a/storage/xtradb/include/ut0crc32.h b/storage/xtradb/include/ut0crc32.h
deleted file mode 100644
index d6dd376d9af..00000000000
--- a/storage/xtradb/include/ut0crc32.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/ut0crc32.h
-CRC32 implementation
-
-Created Aug 10, 2011 Vasil Dimov
-*******************************************************/
-
-#ifndef ut0crc32_h
-#define ut0crc32_h
-
-#include "univ.i"
-
-/********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
-allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
-void
-ut_crc32_init();
-/*===========*/
-
-/********************************************************************//**
-Calculates CRC32.
-@param ptr - data over which to calculate CRC32.
-@param len - data length in bytes.
-@return CRC32 (CRC-32C, using the GF(2) primitive polynomial 0x11EDC6F41,
-or 0x1EDC6F41 without the high-order bit) */
-typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len);
-
-extern ib_ut_crc32_t ut_crc32;
-
-extern const char *ut_crc32_implementation;
-
-#endif /* ut0crc32_h */
diff --git a/storage/xtradb/include/ut0dbg.h b/storage/xtradb/include/ut0dbg.h
deleted file mode 100644
index 3f5baef0a3c..00000000000
--- a/storage/xtradb/include/ut0dbg.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*****************************************************************//**
-@file include/ut0dbg.h
-Debug utilities for Innobase
-
-Created 1/30/1994 Heikki Tuuri
-**********************************************************************/
-
-#ifndef ut0dbg_h
-#define ut0dbg_h
-
-#ifdef UNIV_INNOCHECKSUM
-#define ut_a assert
-#define ut_ad assert
-#define ut_error assert(0)
-#else /* !UNIV_INNOCHECKSUM */
-
-#include "univ.i"
-#include <stdlib.h>
-#include "os0thread.h"
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-/** Test if an assertion fails.
-@param EXPR assertion expression
-@return nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR)))
-#else
-/** This is used to eliminate compiler warnings */
-extern ulint ut_dbg_zero;
-/** Test if an assertion fails.
-@param EXPR assertion expression
-@return nonzero if EXPR holds, zero if not */
-# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero)
-#endif
-
-/*************************************************************//**
-Report a failed assertion. */
-UNIV_INTERN
-void
-ut_dbg_assertion_failed(
-/*====================*/
- const char* expr, /*!< in: the failed assertion */
- const char* file, /*!< in: source file containing the assertion */
- ulint line) /*!< in: line number of the assertion */
- UNIV_COLD MY_ATTRIBUTE((nonnull(2)));
-
-/** Abort the execution. */
-# define UT_DBG_PANIC abort()
-
-/** Abort execution if EXPR does not evaluate to nonzero.
-@param EXPR assertion expression that should hold */
-#define ut_a(EXPR) do { \
- if (UT_DBG_FAIL(EXPR)) { \
- ut_dbg_assertion_failed(#EXPR, \
- __FILE__, (ulint) __LINE__); \
- UT_DBG_PANIC; \
- } \
-} while (0)
-
-/** Abort execution. */
-#define ut_error do { \
- ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__); \
- UT_DBG_PANIC; \
-} while (0)
-
-#ifdef UNIV_DEBUG
-/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_ad(EXPR) ut_a(EXPR)
-/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_d(EXPR) do {EXPR;} while (0)
-#else
-/** Debug assertion. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_ad(EXPR)
-/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
-#define ut_d(EXPR)
-#endif
-
-/** Silence warnings about an unused variable by doing a null assignment.
-@param A the unused variable */
-#define UT_NOT_USED(A) A = A
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-/** structure used for recording usage statistics */
-struct speedo_t {
- struct rusage ru; /*!< getrusage() result */
- struct timeval tv; /*!< gettimeofday() result */
-};
-
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
- speedo_t* speedo); /*!< out: speedo */
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
- const speedo_t* speedo); /*!< in: speedo */
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-#endif
diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h
deleted file mode 100644
index 796a272db59..00000000000
--- a/storage/xtradb/include/ut0list.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0list.h
-A double-linked list
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-/*******************************************************************//**
-A double-linked list. This differs from the one in ut0lst.h in that in this
-one, each list node contains a pointer to the data, whereas the one in
-ut0lst.h uses a strategy where the list pointers are embedded in the data
-items themselves.
-
-Use this one when you need to store arbitrary data in the list where you
-can't embed the list pointers in the data, if a data item needs to be
-stored in multiple lists, etc.
-
-Note about the memory management: ib_list_t is a fixed-size struct whose
-allocation/deallocation is done through ib_list_create/ib_list_free, but the
-memory for the list nodes is allocated through a user-given memory heap,
-which can either be the same for all nodes or vary per node. Most users will
-probably want to create a memory heap to store the item-specific data, and
-pass in this same heap to the list node creation functions, thus
-automatically freeing the list node when the item's heap is freed.
-
-************************************************************************/
-
-#ifndef IB_LIST_H
-#define IB_LIST_H
-
-#include "mem0mem.h"
-
-struct ib_list_t;
-struct ib_list_node_t;
-
-/****************************************************************//**
-Create a new list using mem_alloc. Lists created with this function must be
-freed with ib_list_free.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create(void);
-/*=================*/
-
-
-/****************************************************************//**
-Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create_heap(
-/*================*/
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
-Free a list. */
-UNIV_INTERN
-void
-ib_list_free(
-/*=========*/
- ib_list_t* list); /*!< in: list */
-
-/****************************************************************//**
-Add the data to the start of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_first(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
-Add the data to the end of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_last(
-/*=============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
-Add the data after the indicated node.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_after(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* prev_node, /*!< in: node preceding new node (can
- be NULL) */
- void* data, /*!< in: data */
- mem_heap_t* heap); /*!< in: memory heap to use */
-
-/****************************************************************//**
-Remove the node from the list. */
-UNIV_INTERN
-void
-ib_list_remove(
-/*===========*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* node); /*!< in: node to remove */
-
-/****************************************************************//**
-Get the first node in the list.
-@return first node, or NULL */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_first(
-/*==============*/
- ib_list_t* list); /*!< in: list */
-
-/****************************************************************//**
-Get the last node in the list.
-@return last node, or NULL */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_last(
-/*=============*/
- ib_list_t* list); /*!< in: list */
-
-/********************************************************************
-Check if list is empty. */
-UNIV_INLINE
-ibool
-ib_list_is_empty(
-/*=============*/
- /* out: TRUE if empty else */
- const ib_list_t* list); /* in: list */
-
-/********************************************************************
-Get number of items on list.
-@return number of items on list */
-UNIV_INLINE
-ulint
-ib_list_len(
-/*========*/
- const ib_list_t* list); /*<! in: list */
-
-/* List. */
-struct ib_list_t {
- ib_list_node_t* first; /*!< first node */
- ib_list_node_t* last; /*!< last node */
- ibool is_heap_list; /*!< TRUE if this list was
- allocated through a heap */
-};
-
-/* A list node. */
-struct ib_list_node_t {
- ib_list_node_t* prev; /*!< previous node */
- ib_list_node_t* next; /*!< next node */
- void* data; /*!< user data */
-};
-
-/* Quite often, the only additional piece of data you need is the per-item
-memory heap, so we have this generic struct available to use in those
-cases. */
-struct ib_list_helper_t {
- mem_heap_t* heap; /*!< memory heap */
- void* data; /*!< user data */
-};
-
-#ifndef UNIV_NONINL
-#include "ut0list.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic
deleted file mode 100644
index 7a7f53adb2f..00000000000
--- a/storage/xtradb/include/ut0list.ic
+++ /dev/null
@@ -1,80 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0list.ic
-A double-linked list
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-/****************************************************************//**
-Get the first node in the list.
-@return first node, or NULL */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_first(
-/*==============*/
- ib_list_t* list) /*!< in: list */
-{
- return(list->first);
-}
-
-/****************************************************************//**
-Get the last node in the list.
-@return last node, or NULL */
-UNIV_INLINE
-ib_list_node_t*
-ib_list_get_last(
-/*=============*/
- ib_list_t* list) /*!< in: list */
-{
- return(list->last);
-}
-
-/********************************************************************
-Check if list is empty. */
-UNIV_INLINE
-ibool
-ib_list_is_empty(
-/*=============*/
- /* out: TRUE if empty else FALSE */
- const ib_list_t* list) /* in: list */
-{
- return(!(list->first || list->last));
-}
-
-/********************************************************************
-Get number of items on list.
-@return number of items on list */
-UNIV_INLINE
-ulint
-ib_list_len(
-/*========*/
- const ib_list_t* list) /*<! in: list */
-{
- ulint len = 0;
- ib_list_node_t* node = list->first;
-
- while(node) {
- len++;
- node = node->next;
- }
-
- return (len);
-}
diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h
deleted file mode 100644
index b53e7ade4c1..00000000000
--- a/storage/xtradb/include/ut0lst.h
+++ /dev/null
@@ -1,408 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0lst.h
-List utilities
-
-Created 9/10/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0lst_h
-#define ut0lst_h
-
-#include "univ.i"
-
-/*******************************************************************//**
-Return offset of F in POD T.
-@param T - POD pointer
-@param F - Field in T */
-#define IB_OFFSETOF(T, F) \
- (reinterpret_cast<byte*>(&(T)->F) - reinterpret_cast<byte*>(T))
-
-/* This module implements the two-way linear list which should be used
-if a list is used in the database. Note that a single struct may belong
-to two or more lists, provided that the list are given different names.
-An example of the usage of the lists can be found in fil0fil.cc. */
-
-/*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which acts
-as the two-way list base node. The base node contains pointers
-to both ends of the list and a count of nodes in the list (excluding
-the base node from the count).
-@param TYPE the name of the list node data type */
-template <typename TYPE>
-struct ut_list_base {
- typedef TYPE elem_type;
-
- ulint count; /*!< count of nodes in list */
- TYPE* start; /*!< pointer to list start, NULL if empty */
- TYPE* end; /*!< pointer to list end, NULL if empty */
-};
-
-#define UT_LIST_BASE_NODE_T(TYPE) ut_list_base<TYPE>
-
-/*******************************************************************//**
-This macro expands to the unnamed type definition of a struct which
-should be embedded in the nodes of the list, the node type must be a struct.
-This struct contains the pointers to next and previous nodes in the list.
-The name of the field in the node struct should be the name given
-to the list.
-@param TYPE the list node type name */
-/* Example:
-struct LRU_node_t {
- UT_LIST_NODE_T(LRU_node_t) LRU_list;
- ...
-}
-The example implements an LRU list of name LRU_list. Its nodes are of type
-LRU_node_t. */
-
-template <typename TYPE>
-struct ut_list_node {
- TYPE* prev; /*!< pointer to the previous node,
- NULL if start of list */
- TYPE* next; /*!< pointer to next node, NULL if end of list */
-};
-
-#define UT_LIST_NODE_T(TYPE) ut_list_node<TYPE>
-
-/*******************************************************************//**
-Get the list node at offset.
-@param elem - list element
-@param offset - offset within element.
-@return reference to list node. */
-template <typename Type>
-ut_list_node<Type>&
-ut_elem_get_node(Type& elem, size_t offset)
-{
- ut_a(offset < sizeof(elem));
-
- return(*reinterpret_cast<ut_list_node<Type>*>(
- reinterpret_cast<byte*>(&elem) + offset));
-}
-
-/*******************************************************************//**
-Initializes the base node of a two-way list.
-@param BASE the list base node
-*/
-#define UT_LIST_INIT(BASE)\
-{\
- (BASE).count = 0;\
- (BASE).start = NULL;\
- (BASE).end = NULL;\
-}\
-
-/*******************************************************************//**
-Adds the node as the first element in a two-way linked list.
-@param list the base node (not a pointer to it)
-@param elem the element to add
-@param offset offset of list node in elem. */
-template <typename List, typename Type>
-void
-ut_list_prepend(
- List& list,
- Type& elem,
- size_t offset)
-{
- ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
-
- elem_node.prev = 0;
- elem_node.next = list.start;
-
- if (list.start != 0) {
- ut_list_node<Type>& base_node =
- ut_elem_get_node(*list.start, offset);
-
- ut_ad(list.start != &elem);
-
- base_node.prev = &elem;
- }
-
- list.start = &elem;
-
- if (list.end == 0) {
- list.end = &elem;
- }
-
- ++list.count;
-}
-
-/*******************************************************************//**
-Adds the node as the first element in a two-way linked list.
-@param NAME list name
-@param LIST the base node (not a pointer to it)
-@param ELEM the element to add */
-#define UT_LIST_ADD_FIRST(NAME, LIST, ELEM) \
- ut_list_prepend(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
-
-/*******************************************************************//**
-Adds the node as the last element in a two-way linked list.
-@param list list
-@param elem the element to add
-@param offset offset of list node in elem */
-template <typename List, typename Type>
-void
-ut_list_append(
- List& list,
- Type& elem,
- size_t offset)
-{
- ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
-
- elem_node.next = 0;
- elem_node.prev = list.end;
-
- if (list.end != 0) {
- ut_list_node<Type>& base_node =
- ut_elem_get_node(*list.end, offset);
-
- ut_ad(list.end != &elem);
-
- base_node.next = &elem;
- }
-
- list.end = &elem;
-
- if (list.start == 0) {
- list.start = &elem;
- }
-
- ++list.count;
-}
-
-/*******************************************************************//**
-Adds the node as the last element in a two-way linked list.
-@param NAME list name
-@param LIST list
-@param ELEM the element to add */
-#define UT_LIST_ADD_LAST(NAME, LIST, ELEM)\
- ut_list_append(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
-
-/*******************************************************************//**
-Inserts a ELEM2 after ELEM1 in a list.
-@param list the base node
-@param elem1 node after which ELEM2 is inserted
-@param elem2 node being inserted after NODE1
-@param offset offset of list node in elem1 and elem2 */
-template <typename List, typename Type>
-void
-ut_list_insert(
- List& list,
- Type& elem1,
- Type& elem2,
- size_t offset)
-{
- ut_ad(&elem1 != &elem2);
-
- ut_list_node<Type>& elem1_node = ut_elem_get_node(elem1, offset);
- ut_list_node<Type>& elem2_node = ut_elem_get_node(elem2, offset);
-
- elem2_node.prev = &elem1;
- elem2_node.next = elem1_node.next;
-
- if (elem1_node.next != NULL) {
- ut_list_node<Type>& next_node =
- ut_elem_get_node(*elem1_node.next, offset);
-
- next_node.prev = &elem2;
- }
-
- elem1_node.next = &elem2;
-
- if (list.end == &elem1) {
- list.end = &elem2;
- }
-
- ++list.count;
-}
-
-/*******************************************************************//**
-Inserts a ELEM2 after ELEM1 in a list.
-@param NAME list name
-@param LIST the base node
-@param ELEM1 node after which ELEM2 is inserted
-@param ELEM2 node being inserted after ELEM1 */
-#define UT_LIST_INSERT_AFTER(NAME, LIST, ELEM1, ELEM2)\
- ut_list_insert(LIST, *ELEM1, *ELEM2, IB_OFFSETOF(ELEM1, NAME))
-
-#ifdef UNIV_LIST_DEBUG
-/** Invalidate the pointers in a list node.
-@param NAME list name
-@param N pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(N) \
- (N).next = (Type*) -1; \
- (N).prev = (N).next
-#else
-/** Invalidate the pointers in a list node.
-@param NAME list name
-@param N pointer to the node that was removed */
-# define UT_LIST_REMOVE_CLEAR(N)
-#endif /* UNIV_LIST_DEBUG */
-
-/*******************************************************************//**
-Removes a node from a two-way linked list.
-@param list the base node (not a pointer to it)
-@param elem node to be removed from the list
-@param offset offset of list node within elem */
-template <typename List, typename Type>
-void
-ut_list_remove(
- List& list,
- Type& elem,
- size_t offset)
-{
- ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset);
-
- ut_a(list.count > 0);
-
- if (elem_node.next != NULL) {
- ut_list_node<Type>& next_node =
- ut_elem_get_node(*elem_node.next, offset);
-
- next_node.prev = elem_node.prev;
- } else {
- list.end = elem_node.prev;
- }
-
- if (elem_node.prev != NULL) {
- ut_list_node<Type>& prev_node =
- ut_elem_get_node(*elem_node.prev, offset);
-
- prev_node.next = elem_node.next;
- } else {
- list.start = elem_node.next;
- }
-
- UT_LIST_REMOVE_CLEAR(elem_node);
-
- --list.count;
-}
-
-/*******************************************************************//**
-Removes a node from a two-way linked list.
- aram NAME list name
-@param LIST the base node (not a pointer to it)
-@param ELEM node to be removed from the list */
-#define UT_LIST_REMOVE(NAME, LIST, ELEM) \
- ut_list_remove(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME))
-
-/********************************************************************//**
-Gets the next node in a two-way list.
-@param NAME list name
-@param N pointer to a node
-@return the successor of N in NAME, or NULL */
-#define UT_LIST_GET_NEXT(NAME, N)\
- (((N)->NAME).next)
-
-/********************************************************************//**
-Gets the previous node in a two-way list.
-@param NAME list name
-@param N pointer to a node
-@return the predecessor of N in NAME, or NULL */
-#define UT_LIST_GET_PREV(NAME, N)\
- (((N)->NAME).prev)
-
-/********************************************************************//**
-Alternative macro to get the number of nodes in a two-way list, i.e.,
-its length.
-@param BASE the base node (not a pointer to it).
-@return the number of nodes in the list */
-#define UT_LIST_GET_LEN(BASE)\
- (BASE).count
-
-/********************************************************************//**
-Gets the first node in a two-way list.
-@param BASE the base node (not a pointer to it)
-@return first node, or NULL if the list is empty */
-#define UT_LIST_GET_FIRST(BASE)\
- (BASE).start
-
-/********************************************************************//**
-Gets the last node in a two-way list.
-@param BASE the base node (not a pointer to it)
-@return last node, or NULL if the list is empty */
-#define UT_LIST_GET_LAST(BASE)\
- (BASE).end
-
-struct NullValidate { void operator()(const void* elem) { } };
-
-/********************************************************************//**
-Iterate over all the elements and call the functor for each element.
-@param list base node (not a pointer to it)
-@param functor Functor that is called for each element in the list
-@parm node pointer to member node within list element */
-template <typename List, class Functor>
-void
-ut_list_map(
- List& list,
- ut_list_node<typename List::elem_type>
- List::elem_type::*node,
- Functor functor)
-{
- ulint count = 0;
-
- for (typename List::elem_type* elem = list.start;
- elem != 0;
- elem = (elem->*node).next, ++count) {
-
- functor(elem);
- }
-
- ut_a(count == list.count);
-}
-
-/********************************************************************//**
-Checks the consistency of a two-way list.
-@param list base node (not a pointer to it)
-@param functor Functor that is called for each element in the list
-@parm node pointer to member node within list element */
-template <typename List, class Functor>
-void
-ut_list_validate(
- List& list,
- ut_list_node<typename List::elem_type>
- List::elem_type::*node,
- Functor functor = NullValidate())
-{
- ut_list_map(list, node, functor);
-
- ulint count = 0;
-
- for (typename List::elem_type* elem = list.end;
- elem != 0;
- elem = (elem->*node).prev, ++count) {
-
- functor(elem);
- }
-
- ut_a(count == list.count);
-}
-
-/********************************************************************//**
-Checks the consistency of a two-way list.
-@param NAME the name of the list
-@param TYPE node type
-@param LIST base node (not a pointer to it)
-@param FUNCTOR called for each list element */
-#define UT_LIST_VALIDATE(NAME, TYPE, LIST, FUNCTOR) \
- ut_list_validate(LIST, &TYPE::NAME, FUNCTOR)
-
-#define UT_LIST_CHECK(NAME, TYPE, LIST) \
- ut_list_validate(LIST, &TYPE::NAME, NullValidate())
-
-#endif /* ut0lst.h */
diff --git a/storage/xtradb/include/ut0mem.h b/storage/xtradb/include/ut0mem.h
deleted file mode 100644
index 81470358f2f..00000000000
--- a/storage/xtradb/include/ut0mem.h
+++ /dev/null
@@ -1,261 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0mem.h
-Memory primitives
-
-Created 5/30/1994 Heikki Tuuri
-************************************************************************/
-
-#ifndef ut0mem_h
-#define ut0mem_h
-
-#include "univ.i"
-#include <string.h>
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc(). Does not count malloc()
-if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
-extern ulint ut_total_allocated_memory;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-extern os_fast_mutex_t ut_list_mutex;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Wrapper for memcpy(3). Copy memory area when the source and
-target are not overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
-UNIV_INLINE
-void*
-ut_memcpy(void* dest, const void* sour, ulint n);
-
-/** Wrapper for memmove(3). Copy memory area when the source and
-target are overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
-UNIV_INLINE
-void*
-ut_memmove(void* dest, const void* sour, ulint n);
-
-/** Wrapper for memcmp(3). Compare memory areas.
-* @param str1 in: first memory block to compare
-* @param str2 in: second memory block to compare
-* @param n in: number of bytes to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
- or greater than str2, respectively. */
-UNIV_INLINE
-int
-ut_memcmp(const void* str1, const void* str2, ulint n);
-
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void);
-/*=============*/
-
-/**********************************************************************//**
-Allocates memory.
-@return own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
- ulint n, /*!< in: number of bytes to allocate */
- ibool assert_on_error) /*!< in: if TRUE, we crash mysqld if
- the memory cannot be allocated */
- MY_ATTRIBUTE((malloc));
-/**********************************************************************//**
-Allocates memory. */
-#define ut_malloc(n) ut_malloc_low(n, TRUE)
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
-a nop. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
- void* ptr); /*!< in, own: memory block, can be NULL */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
- realloc() changes the size of the memory block pointed to
- by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem­
- ory will be uninitialized. If ptr is NULL, the call is
- equivalent to malloc(size); if size is equal to zero, the
- call is equivalent to free(ptr). Unless ptr is NULL, it
- must have been returned by an earlier call to malloc(),
- calloc() or realloc().
-
-RETURN VALUE
- realloc() returns a pointer to the newly allocated memory,
- which is suitably aligned for any kind of variable and may
- be different from ptr, or NULL if the request fails. If
- size was equal to 0, either NULL or a pointer suitable to
- be passed to free() is returned. If realloc() fails the
- original block is left untouched - it is not freed or
- moved.
-@return own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
- void* ptr, /*!< in: pointer to old block or NULL */
- ulint size); /*!< in: desired size */
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void);
-/*=================*/
-#endif /* !UNIV_HOTBACKUP */
-
-/** Wrapper for strcpy(3). Copy a NUL-terminated string.
-* @param dest in: copy to
-* @param sour in: copy from
-* @return dest */
-UNIV_INLINE
-char*
-ut_strcpy(char* dest, const char* sour);
-
-/** Wrapper for strlen(3). Determine the length of a NUL-terminated string.
-* @param str in: string
-* @return length of the string in bytes, excluding the terminating NUL */
-UNIV_INLINE
-ulint
-ut_strlen(const char* str);
-
-/** Wrapper for strcmp(3). Compare NUL-terminated strings.
-* @param str1 in: first string to compare
-* @param str2 in: second string to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
- or greater than str2, respectively. */
-UNIV_INLINE
-int
-ut_strcmp(const char* str1, const char* str2);
-
-/**********************************************************************//**
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy(
-/*=======*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size); /*!< in: size of destination buffer */
-
-/**********************************************************************//**
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy_rev(
-/*===========*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size); /*!< in: size of destination buffer */
-
-/**********************************************************************//**
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once.
-@return the number of times s2 occurs in s1 */
-UNIV_INTERN
-ulint
-ut_strcount(
-/*========*/
- const char* s1, /*!< in: string to search in */
- const char* s2); /*!< in: string to search for */
-
-/**********************************************************************//**
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once.
-@return own: modified string, must be freed with mem_free() */
-UNIV_INTERN
-char*
-ut_strreplace(
-/*==========*/
- const char* str, /*!< in: string to operate on */
- const char* s1, /*!< in: string to replace */
- const char* s2); /*!< in: string to replace s1 with */
-
-/********************************************************************
-Concatenate 3 strings.*/
-
-char*
-ut_str3cat(
-/*=======*/
- /* out, own: concatenated string, must be
- freed with mem_free() */
- const char* s1, /* in: string 1 */
- const char* s2, /* in: string 2 */
- const char* s3); /* in: string 3 */
-
-/**********************************************************************//**
-Converts a raw binary data to a NUL-terminated hex string. The output is
-truncated if there is not enough space in "hex", make sure "hex_size" is at
-least (2 * raw_size + 1) if you do not want this to happen. Returns the
-actual number of characters written to "hex" (including the NUL).
-@return number of chars written */
-UNIV_INLINE
-ulint
-ut_raw_to_hex(
-/*==========*/
- const void* raw, /*!< in: raw data */
- ulint raw_size, /*!< in: "raw" length in bytes */
- char* hex, /*!< out: hex string */
- ulint hex_size); /*!< in: "hex" size in bytes */
-
-/*******************************************************************//**
-Adds single quotes to the start and end of string and escapes any quotes
-by doubling them. Returns the number of bytes that were written to "buf"
-(including the terminating NUL). If buf_size is too small then the
-trailing bytes from "str" are discarded.
-@return number of bytes that were written */
-UNIV_INLINE
-ulint
-ut_str_sql_format(
-/*==============*/
- const char* str, /*!< in: string */
- ulint str_len, /*!< in: string length in bytes */
- char* buf, /*!< out: output buffer */
- ulint buf_size); /*!< in: output buffer size
- in bytes */
-
-#ifndef UNIV_NONINL
-#include "ut0mem.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/ut0mem.ic b/storage/xtradb/include/ut0mem.ic
deleted file mode 100644
index 5c9071d52cc..00000000000
--- a/storage/xtradb/include/ut0mem.ic
+++ /dev/null
@@ -1,317 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0mem.ic
-Memory primitives
-
-Created 5/30/1994 Heikki Tuuri
-************************************************************************/
-
-#include "ut0byte.h"
-#include "mach0data.h"
-
-/** Wrapper for memcpy(3). Copy memory area when the source and
-target are not overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
-UNIV_INLINE
-void*
-ut_memcpy(void* dest, const void* sour, ulint n)
-{
- return(memcpy(dest, sour, n));
-}
-
-/** Wrapper for memmove(3). Copy memory area when the source and
-target are overlapping.
-* @param dest in: copy to
-* @param sour in: copy from
-* @param n in: number of bytes to copy
-* @return dest */
-UNIV_INLINE
-void*
-ut_memmove(void* dest, const void* sour, ulint n)
-{
- return(memmove(dest, sour, n));
-}
-
-/** Wrapper for memcmp(3). Compare memory areas.
-* @param str1 in: first memory block to compare
-* @param str2 in: second memory block to compare
-* @param n in: number of bytes to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
- or greater than str2, respectively. */
-UNIV_INLINE
-int
-ut_memcmp(const void* str1, const void* str2, ulint n)
-{
- return(memcmp(str1, str2, n));
-}
-
-/** Wrapper for strcpy(3). Copy a NUL-terminated string.
-* @param dest in: copy to
-* @param sour in: copy from
-* @return dest */
-UNIV_INLINE
-char*
-ut_strcpy(char* dest, const char* sour)
-{
- return(strcpy(dest, sour));
-}
-
-/** Wrapper for strlen(3). Determine the length of a NUL-terminated string.
-* @param str in: string
-* @return length of the string in bytes, excluding the terminating NUL */
-UNIV_INLINE
-ulint
-ut_strlen(const char* str)
-{
- return(strlen(str));
-}
-
-/** Wrapper for strcmp(3). Compare NUL-terminated strings.
-* @param str1 in: first string to compare
-* @param str2 in: second string to compare
-* @return negative, 0, or positive if str1 is smaller, equal,
- or greater than str2, respectively. */
-UNIV_INLINE
-int
-ut_strcmp(const char* str1, const char* str2)
-{
- return(strcmp(str1, str2));
-}
-
-/**********************************************************************//**
-Converts a raw binary data to a NUL-terminated hex string. The output is
-truncated if there is not enough space in "hex", make sure "hex_size" is at
-least (2 * raw_size + 1) if you do not want this to happen. Returns the
-actual number of characters written to "hex" (including the NUL).
-@return number of chars written */
-UNIV_INLINE
-ulint
-ut_raw_to_hex(
-/*==========*/
- const void* raw, /*!< in: raw data */
- ulint raw_size, /*!< in: "raw" length in bytes */
- char* hex, /*!< out: hex string */
- ulint hex_size) /*!< in: "hex" size in bytes */
-{
-
-#ifdef WORDS_BIGENDIAN
-
-#define MK_UINT16(a, b) (((uint16) (a)) << 8 | (uint16) (b))
-
-#define UINT16_GET_A(u) ((unsigned char) ((u) >> 8))
-#define UINT16_GET_B(u) ((unsigned char) ((u) & 0xFF))
-
-#else /* WORDS_BIGENDIAN */
-
-#define MK_UINT16(a, b) (((uint16) (b)) << 8 | (uint16) (a))
-
-#define UINT16_GET_A(u) ((unsigned char) ((u) & 0xFF))
-#define UINT16_GET_B(u) ((unsigned char) ((u) >> 8))
-
-#endif /* WORDS_BIGENDIAN */
-
-#define MK_ALL_UINT16_WITH_A(a) \
- MK_UINT16(a, '0'), \
- MK_UINT16(a, '1'), \
- MK_UINT16(a, '2'), \
- MK_UINT16(a, '3'), \
- MK_UINT16(a, '4'), \
- MK_UINT16(a, '5'), \
- MK_UINT16(a, '6'), \
- MK_UINT16(a, '7'), \
- MK_UINT16(a, '8'), \
- MK_UINT16(a, '9'), \
- MK_UINT16(a, 'A'), \
- MK_UINT16(a, 'B'), \
- MK_UINT16(a, 'C'), \
- MK_UINT16(a, 'D'), \
- MK_UINT16(a, 'E'), \
- MK_UINT16(a, 'F')
-
- static const uint16 hex_map[256] = {
- MK_ALL_UINT16_WITH_A('0'),
- MK_ALL_UINT16_WITH_A('1'),
- MK_ALL_UINT16_WITH_A('2'),
- MK_ALL_UINT16_WITH_A('3'),
- MK_ALL_UINT16_WITH_A('4'),
- MK_ALL_UINT16_WITH_A('5'),
- MK_ALL_UINT16_WITH_A('6'),
- MK_ALL_UINT16_WITH_A('7'),
- MK_ALL_UINT16_WITH_A('8'),
- MK_ALL_UINT16_WITH_A('9'),
- MK_ALL_UINT16_WITH_A('A'),
- MK_ALL_UINT16_WITH_A('B'),
- MK_ALL_UINT16_WITH_A('C'),
- MK_ALL_UINT16_WITH_A('D'),
- MK_ALL_UINT16_WITH_A('E'),
- MK_ALL_UINT16_WITH_A('F')
- };
- const unsigned char* rawc;
- ulint read_bytes;
- ulint write_bytes;
- ulint i;
-
- rawc = (const unsigned char*) raw;
-
- if (hex_size == 0) {
-
- return(0);
- }
-
- if (hex_size <= 2 * raw_size) {
-
- read_bytes = hex_size / 2;
- write_bytes = hex_size;
- } else {
-
- read_bytes = raw_size;
- write_bytes = 2 * raw_size + 1;
- }
-
-#define LOOP_READ_BYTES(ASSIGN) \
- for (i = 0; i < read_bytes; i++) { \
- ASSIGN; \
- hex += 2; \
- rawc++; \
- }
-
- if (ut_align_offset(hex, 2) == 0) {
-
- LOOP_READ_BYTES(
- *(uint16*) hex = hex_map[*rawc]
- );
- } else {
-
- LOOP_READ_BYTES(
- *hex = UINT16_GET_A(hex_map[*rawc]);
- *(hex + 1) = UINT16_GET_B(hex_map[*rawc])
- );
- }
-
- if (hex_size <= 2 * raw_size && hex_size % 2 == 0) {
-
- hex--;
- }
-
- *hex = '\0';
-
- return(write_bytes);
-}
-
-/*******************************************************************//**
-Adds single quotes to the start and end of string and escapes any quotes
-by doubling them. Returns the number of bytes that were written to "buf"
-(including the terminating NUL). If buf_size is too small then the
-trailing bytes from "str" are discarded.
-@return number of bytes that were written */
-UNIV_INLINE
-ulint
-ut_str_sql_format(
-/*==============*/
- const char* str, /*!< in: string */
- ulint str_len, /*!< in: string length in bytes */
- char* buf, /*!< out: output buffer */
- ulint buf_size) /*!< in: output buffer size
- in bytes */
-{
- ulint str_i;
- ulint buf_i;
-
- buf_i = 0;
-
- switch (buf_size) {
- case 3:
-
- if (str_len == 0) {
-
- buf[buf_i] = '\'';
- buf_i++;
- buf[buf_i] = '\'';
- buf_i++;
- }
- /* FALLTHROUGH */
- case 2:
- case 1:
-
- buf[buf_i] = '\0';
- buf_i++;
- /* FALLTHROUGH */
- case 0:
-
- return(buf_i);
- }
-
- /* buf_size >= 4 */
-
- buf[0] = '\'';
- buf_i = 1;
-
- for (str_i = 0; str_i < str_len; str_i++) {
-
- char ch;
-
- if (buf_size - buf_i == 2) {
-
- break;
- }
-
- ch = str[str_i];
-
- switch (ch) {
- case '\0':
-
- if (buf_size - buf_i < 4) {
-
- goto func_exit;
- }
- buf[buf_i] = '\\';
- buf_i++;
- buf[buf_i] = '0';
- buf_i++;
- break;
- case '\'':
- case '\\':
-
- if (buf_size - buf_i < 4) {
-
- goto func_exit;
- }
- buf[buf_i] = ch;
- buf_i++;
- /* FALLTHROUGH */
- default:
-
- buf[buf_i] = ch;
- buf_i++;
- }
- }
-
-func_exit:
-
- buf[buf_i] = '\'';
- buf_i++;
- buf[buf_i] = '\0';
- buf_i++;
-
- return(buf_i);
-}
diff --git a/storage/xtradb/include/ut0rbt.h b/storage/xtradb/include/ut0rbt.h
deleted file mode 100644
index 5c25104b5d7..00000000000
--- a/storage/xtradb/include/ut0rbt.h
+++ /dev/null
@@ -1,346 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-/******************************************************************//**
-@file include/ut0rbt.h
-Various utilities
-
-Created 2007-03-20 Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_UT0RBT_H
-#define INNOBASE_UT0RBT_H
-
-#if !defined(IB_RBT_TESTING)
-#include "univ.i"
-#include "ut0mem.h"
-#else
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-
-#define ut_malloc malloc
-#define ut_free free
-#define ulint unsigned long
-#define ut_a(c) assert(c)
-#define ut_error assert(0)
-#define ibool unsigned int
-#define TRUE 1
-#define FALSE 0
-#endif
-
-struct ib_rbt_node_t;
-typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
-typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
-typedef int (*ib_rbt_arg_compare)(const void*, const void* p1, const void* p2);
-
-/** Red black tree color types */
-enum ib_rbt_color_t {
- IB_RBT_RED,
- IB_RBT_BLACK
-};
-
-/** Red black tree node */
-struct ib_rbt_node_t {
- ib_rbt_color_t color; /* color of this node */
-
- ib_rbt_node_t* left; /* points left child */
- ib_rbt_node_t* right; /* points right child */
- ib_rbt_node_t* parent; /* points parent node */
-
- char value[1]; /* Data value */
-};
-
-/** Red black tree instance.*/
-struct ib_rbt_t {
- ib_rbt_node_t* nil; /* Black colored node that is
- used as a sentinel. This is
- pre-allocated too.*/
-
- ib_rbt_node_t* root; /* Root of the tree, this is
- pre-allocated and the first
- data node is the left child.*/
-
- ulint n_nodes; /* Total number of data nodes */
-
- ib_rbt_compare compare; /* Fn. to use for comparison */
- ib_rbt_arg_compare
- compare_with_arg; /* Fn. to use for comparison
- with argument */
- ulint sizeof_value; /* Sizeof the item in bytes */
- void* cmp_arg; /* Compare func argument */
-};
-
-/** The result of searching for a key in the tree, this is useful for
-a speedy lookup and insert if key doesn't exist.*/
-struct ib_rbt_bound_t {
- const ib_rbt_node_t*
- last; /* Last node visited */
-
- int result; /* Result of comparing with
- the last non-nil node that
- was visited */
-};
-
-/* Size in elements (t is an rb tree instance) */
-#define rbt_size(t) (t->n_nodes)
-
-/* Check whether the rb tree is empty (t is an rb tree instance) */
-#define rbt_empty(t) (rbt_size(t) == 0)
-
-/* Get data value (t is the data type, n is an rb tree node instance) */
-#define rbt_value(t, n) ((t*) &n->value[0])
-
-/* Compare a key with the node value (t is tree, k is key, n is node)*/
-#define rbt_compare(t, k, n) (t->compare(k, n->value))
-
-/* Node size. FIXME: name might clash, but currently it does not, so for easier
- maintenance do not rename it for now. */
-#define SIZEOF_NODE(t) ((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1)
-
-/**********************************************************************//**
-Free an instance of a red black tree */
-UNIV_INTERN
-void
-rbt_free(
-/*=====*/
- ib_rbt_t* tree); /*!< in: rb tree to free */
-/**********************************************************************//**
-Create an instance of a red black tree
-@return rb tree instance */
-UNIV_INTERN
-ib_rbt_t*
-rbt_create(
-/*=======*/
- size_t sizeof_value, /*!< in: size in bytes */
- ib_rbt_compare compare); /*!< in: comparator */
-/**********************************************************************//**
-Create an instance of a red black tree, whose comparison function takes
-an argument
-@return rb tree instance */
-UNIV_INTERN
-ib_rbt_t*
-rbt_create_arg_cmp(
-/*===============*/
- size_t sizeof_value, /*!< in: size in bytes */
- ib_rbt_arg_compare
- compare, /*!< in: comparator */
- void* cmp_arg); /*!< in: compare fn arg */
-/**********************************************************************//**
-Delete a node from the red black tree, identified by key */
-UNIV_INTERN
-ibool
-rbt_delete(
-/*=======*/
- /* in: TRUE on success */
- ib_rbt_t* tree, /* in: rb tree */
- const void* key); /* in: key to delete */
-/**********************************************************************//**
-Remove a node from the red black tree, NOTE: This function will not delete
-the node instance, THAT IS THE CALLERS RESPONSIBILITY.
-@return the deleted node with the const. */
-UNIV_INTERN
-ib_rbt_node_t*
-rbt_remove_node(
-/*============*/
- ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t*
- node); /*!< in: node to delete, this
- is a fudge and declared const
- because the caller has access
- only to const nodes.*/
-/**********************************************************************//**
-Return a node from the red black tree, identified by
-key, NULL if not found
-@return node if found else return NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_lookup(
-/*=======*/
- const ib_rbt_t* tree, /*!< in: rb tree to search */
- const void* key); /*!< in: key to lookup */
-/**********************************************************************//**
-Add data to the red black tree, identified by key (no dups yet!)
-@return inserted node */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_insert(
-/*=======*/
- ib_rbt_t* tree, /*!< in: rb tree */
- const void* key, /*!< in: key for ordering */
- const void* value); /*!< in: data that will be
- copied to the node.*/
-/**********************************************************************//**
-Add a new node to the tree, useful for data that is pre-sorted.
-@return appended node */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_add_node(
-/*=========*/
- ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: parent */
- const void* value); /*!< in: this value is copied
- to the node */
-/****************************************************************//**
-Add a new caller-provided node to tree at the specified position.
-The node must have its key fields initialized correctly.
-@return added node */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_add_preallocated_node(
-/*======================*/
- ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: parent */
- ib_rbt_node_t* node); /*!< in: node */
-/**********************************************************************//**
-Return the left most data node in the tree
-@return left most node */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_first(
-/*======*/
- const ib_rbt_t* tree); /*!< in: rb tree */
-/**********************************************************************//**
-Return the right most data node in the tree
-@return right most node */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_last(
-/*=====*/
- const ib_rbt_t* tree); /*!< in: rb tree */
-/**********************************************************************//**
-Return the next node from current.
-@return successor node to current that is passed in. */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_next(
-/*=====*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* /* in: current node */
- current);
-/**********************************************************************//**
-Return the prev node from current.
-@return precedessor node to current that is passed in */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_prev(
-/*=====*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* /* in: current node */
- current);
-/**********************************************************************//**
-Find the node that has the lowest key that is >= key.
-@return node that satisfies the lower bound constraint or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_lower_bound(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key); /*!< in: key to search */
-/**********************************************************************//**
-Find the node that has the greatest key that is <= key.
-@return node that satisifies the upper bound constraint or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_upper_bound(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key); /*!< in: key to search */
-/**********************************************************************//**
-Search for the key, a node will be retuned in parent.last, whether it
-was found or not. If not found then parent.last will contain the
-parent node for the possibly new key otherwise the matching node.
-@return result of last comparison */
-UNIV_INTERN
-int
-rbt_search(
-/*=======*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: search bounds */
- const void* key); /*!< in: key to search */
-/**********************************************************************//**
-Search for the key, a node will be retuned in parent.last, whether it
-was found or not. If not found then parent.last will contain the
-parent node for the possibly new key otherwise the matching node.
-@return result of last comparison */
-UNIV_INTERN
-int
-rbt_search_cmp(
-/*===========*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: search bounds */
- const void* key, /*!< in: key to search */
- ib_rbt_compare compare, /*!< in: comparator */
- ib_rbt_arg_compare
- arg_compare); /*!< in: fn to compare items
- with argument */
-/**********************************************************************//**
-Clear the tree, deletes (and free's) all the nodes. */
-UNIV_INTERN
-void
-rbt_clear(
-/*======*/
- ib_rbt_t* tree); /*!< in: rb tree */
-/****************************************************************//**
-Clear the tree without deleting and freeing its nodes. */
-UNIV_INTERN
-void
-rbt_reset(
-/*======*/
- ib_rbt_t* tree); /*!< in: rb tree */
-/**********************************************************************//**
-Merge the node from dst into src. Return the number of nodes merged.
-@return no. of recs merged */
-UNIV_INTERN
-ulint
-rbt_merge_uniq(
-/*===========*/
- ib_rbt_t* dst, /*!< in: dst rb tree */
- const ib_rbt_t* src); /*!< in: src rb tree */
-/**********************************************************************//**
-Merge the node from dst into src. Return the number of nodes merged.
-Delete the nodes from src after copying node to dst. As a side effect
-the duplicates will be left untouched in the src, since we don't support
-duplicates (yet). NOTE: src and dst must be similar, the function doesn't
-check for this condition (yet).
-@return no. of recs merged */
-UNIV_INTERN
-ulint
-rbt_merge_uniq_destructive(
-/*=======================*/
- ib_rbt_t* dst, /*!< in: dst rb tree */
- ib_rbt_t* src); /*!< in: src rb tree */
-/**********************************************************************//**
-Verify the integrity of the RB tree. For debugging. 0 failure else height
-of tree (in count of black nodes).
-@return TRUE if OK FALSE if tree invalid. */
-UNIV_INTERN
-ibool
-rbt_validate(
-/*=========*/
- const ib_rbt_t* tree); /*!< in: tree to validate */
-/**********************************************************************//**
-Iterate over the tree in depth first order. */
-UNIV_INTERN
-void
-rbt_print(
-/*======*/
- const ib_rbt_t* tree, /*!< in: tree to traverse */
- ib_rbt_print_node print); /*!< in: print function */
-
-#endif /* INNOBASE_UT0RBT_H */
diff --git a/storage/xtradb/include/ut0rnd.h b/storage/xtradb/include/ut0rnd.h
deleted file mode 100644
index 6ed3ee3b2e5..00000000000
--- a/storage/xtradb/include/ut0rnd.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0rnd.h
-Random numbers and hashing
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0rnd_h
-#define ut0rnd_h
-
-#include "univ.i"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "ut0byte.h"
-
-/** The 'character code' for end of field or string (used
-in folding records */
-#define UT_END_OF_FIELD 257
-
-/********************************************************//**
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
- ulint seed); /*!< in: seed */
-/********************************************************//**
-The following function generates a series of 'random' ulint integers.
-@return the next 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
- ulint rnd); /*!< in: the previous random number value */
-/*********************************************************//**
-The following function generates 'random' ulint integers which
-enumerate the value space (let there be N of them) of ulint integers
-in a pseudo-random fashion. Note that the same integer is repeated
-always after N calls to the generator.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void);
-/*==================*/
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- ulint low, /*!< in: low limit; can generate also this value */
- ulint high); /*!< in: high limit; can generate also this value */
-/*********************************************************//**
-Generates a random iboolean value.
-@return the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void);
-/*=================*/
-/*******************************************************//**
-The following function generates a hash value for a ulint integer
-to a hash table of size table_size, which should be a prime or some
-random number to work reliably.
-@return hash value */
-UNIV_INLINE
-ulint
-ut_hash_ulint(
-/*==========*/
- ulint key, /*!< in: value to be hashed */
- ulint table_size); /*!< in: hash table size */
-/*************************************************************//**
-Folds a 64-bit integer.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_ull(
-/*========*/
- ib_uint64_t d) /*!< in: 64-bit integer */
- MY_ATTRIBUTE((const));
-/*************************************************************//**
-Folds a character string ending in the null character.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_string(
-/*===========*/
- const char* str) /*!< in: null-terminated string */
- MY_ATTRIBUTE((pure));
-/***********************************************************//**
-Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2.
-@return prime */
-UNIV_INTERN
-ulint
-ut_find_prime(
-/*==========*/
- ulint n) /*!< in: positive number > 100 */
- MY_ATTRIBUTE((const));
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/*************************************************************//**
-Folds a pair of ulints.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
- ulint n1, /*!< in: ulint */
- ulint n2) /*!< in: ulint */
- MY_ATTRIBUTE((const));
-/*************************************************************//**
-Folds a binary string.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_binary(
-/*===========*/
- const byte* str, /*!< in: string of bytes */
- ulint len) /*!< in: length */
- MY_ATTRIBUTE((pure));
-
-
-#ifndef UNIV_NONINL
-#include "ut0rnd.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/ut0rnd.ic b/storage/xtradb/include/ut0rnd.ic
deleted file mode 100644
index 987dfac03c1..00000000000
--- a/storage/xtradb/include/ut0rnd.ic
+++ /dev/null
@@ -1,262 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************************//**
-@file include/ut0rnd.ic
-Random numbers and hashing
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-#define UT_HASH_RANDOM_MASK 1463735687
-#define UT_HASH_RANDOM_MASK2 1653893711
-
-#ifndef UNIV_INNOCHECKSUM
-
-#define UT_RND1 151117737
-#define UT_RND2 119785373
-#define UT_RND3 85689495
-#define UT_RND4 76595339
-#define UT_SUM_RND2 98781234
-#define UT_SUM_RND3 126792457
-#define UT_SUM_RND4 63498502
-#define UT_XOR_RND1 187678878
-#define UT_XOR_RND2 143537923
-
-/** Seed value of ut_rnd_gen_ulint() */
-extern ulint ut_rnd_ulint_counter;
-
-/********************************************************//**
-This is used to set the random number seed. */
-UNIV_INLINE
-void
-ut_rnd_set_seed(
-/*============*/
- ulint seed) /*!< in: seed */
-{
- ut_rnd_ulint_counter = seed;
-}
-
-/********************************************************//**
-The following function generates a series of 'random' ulint integers.
-@return the next 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_next_ulint(
-/*==================*/
- ulint rnd) /*!< in: the previous random number value */
-{
- ulint n_bits;
-
- n_bits = 8 * sizeof(ulint);
-
- rnd = UT_RND2 * rnd + UT_SUM_RND3;
- rnd = UT_XOR_RND1 ^ rnd;
- rnd = (rnd << 20) + (rnd >> (n_bits - 20));
- rnd = UT_RND3 * rnd + UT_SUM_RND4;
- rnd = UT_XOR_RND2 ^ rnd;
- rnd = (rnd << 20) + (rnd >> (n_bits - 20));
- rnd = UT_RND1 * rnd + UT_SUM_RND2;
-
- return(rnd);
-}
-
-/********************************************************//**
-The following function generates 'random' ulint integers which
-enumerate the value space of ulint integers in a pseudo random
-fashion. Note that the same integer is repeated always after
-2 to power 32 calls to the generator (if ulint is 32-bit).
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_gen_ulint(void)
-/*==================*/
-{
- ulint rnd;
-
- ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
-
- rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
-
- return(rnd);
-}
-
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- ulint low, /*!< in: low limit; can generate also this value */
- ulint high) /*!< in: high limit; can generate also this value */
-{
- ulint rnd;
-
- ut_ad(high >= low);
-
- if (low == high) {
-
- return(low);
- }
-
- rnd = ut_rnd_gen_ulint();
-
- return(low + (rnd % (high - low)));
-}
-
-/*********************************************************//**
-Generates a random iboolean value.
-@return the random value */
-UNIV_INLINE
-ibool
-ut_rnd_gen_ibool(void)
-/*=================*/
-{
- ulint x;
-
- x = ut_rnd_gen_ulint();
-
- if (((x >> 20) + (x >> 15)) & 1) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*******************************************************//**
-The following function generates a hash value for a ulint integer
-to a hash table of size table_size, which should be a prime
-or some random number for the hash table to work reliably.
-@return hash value */
-UNIV_INLINE
-ulint
-ut_hash_ulint(
-/*==========*/
- ulint key, /*!< in: value to be hashed */
- ulint table_size) /*!< in: hash table size */
-{
- ut_ad(table_size);
- key = key ^ UT_HASH_RANDOM_MASK2;
-
- return(key % table_size);
-}
-
-/*************************************************************//**
-Folds a 64-bit integer.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_ull(
-/*========*/
- ib_uint64_t d) /*!< in: 64-bit integer */
-{
- return(ut_fold_ulint_pair((ulint) d & ULINT32_MASK,
- (ulint) (d >> 32)));
-}
-
-/*************************************************************//**
-Folds a character string ending in the null character.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_string(
-/*===========*/
- const char* str) /*!< in: null-terminated string */
-{
- ulint fold = 0;
-
- ut_ad(str);
-
- while (*str != '\0') {
- fold = ut_fold_ulint_pair(fold, (ulint)(*str));
- str++;
- }
-
- return(fold);
-}
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/*************************************************************//**
-Folds a pair of ulints.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_ulint_pair(
-/*===============*/
- ulint n1, /*!< in: ulint */
- ulint n2) /*!< in: ulint */
-{
- return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
- ^ UT_HASH_RANDOM_MASK) + n2);
-}
-
-/*************************************************************//**
-Folds a binary string.
-@return folded value */
-UNIV_INLINE
-ulint
-ut_fold_binary(
-/*===========*/
- const byte* str, /*!< in: string of bytes */
- ulint len) /*!< in: length */
-{
- ulint fold = 0;
- const byte* str_end = str + (len & 0xFFFFFFF8);
-
- ut_ad(str || !len);
-
- while (str < str_end) {
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- }
-
- switch (len & 0x7) {
- case 7:
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- /* fall through */
- case 6:
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- /* fall through */
- case 5:
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- /* fall through */
- case 4:
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- /* fall through */
- case 3:
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- /* fall through */
- case 2:
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- /* fall through */
- case 1:
- fold = ut_fold_ulint_pair(fold, (ulint)(*str++));
- }
-
- return(fold);
-}
diff --git a/storage/xtradb/include/ut0sort.h b/storage/xtradb/include/ut0sort.h
deleted file mode 100644
index 75648b5c317..00000000000
--- a/storage/xtradb/include/ut0sort.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0sort.h
-Sort utility
-
-Created 11/9/1995 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0sort_h
-#define ut0sort_h
-
-#include "univ.i"
-
-/* This module gives a macro definition of the body of
-a standard sort function for an array of elements of any
-type. The comparison function is given as a parameter to
-the macro. The sort algorithm is mergesort which has logarithmic
-worst case.
-*/
-
-/*******************************************************************//**
-This macro expands to the body of a standard sort function.
-The sort function uses mergesort and must be defined separately
-for each type of array.
-Also the comparison function has to be defined individually
-for each array cell type. SORT_FUN is the sort function name.
-The function takes the array to be sorted (ARR),
-the array of auxiliary space (AUX_ARR) of same size,
-and the low (LOW), inclusive, and high (HIGH), noninclusive,
-limits for the sort interval as arguments.
-CMP_FUN is the comparison function name. It takes as arguments
-two elements from the array and returns 1, if the first is bigger,
-0 if equal, and -1 if the second bigger. */
-
-#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
-{\
- ulint ut_sort_mid77;\
- ulint ut_sort_i77;\
- ulint ut_sort_low77;\
- ulint ut_sort_high77;\
-\
- ut_ad((LOW) < (HIGH));\
- ut_ad(ARR);\
- ut_ad(AUX_ARR);\
-\
- if ((LOW) == (HIGH) - 1) {\
- return;\
- } else if ((LOW) == (HIGH) - 2) {\
- if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\
- (AUX_ARR)[LOW] = (ARR)[LOW];\
- (ARR)[LOW] = (ARR)[(HIGH) - 1];\
- (ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\
- }\
- return;\
- }\
-\
- ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\
-\
- SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\
- SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\
-\
- ut_sort_low77 = (LOW);\
- ut_sort_high77 = ut_sort_mid77;\
-\
- for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\
-\
- if (ut_sort_low77 >= ut_sort_mid77) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
- ut_sort_high77++;\
- } else if (ut_sort_high77 >= (HIGH)) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
- ut_sort_low77++;\
- } else if (CMP_FUN((ARR)[ut_sort_low77],\
- (ARR)[ut_sort_high77]) > 0) {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\
- ut_sort_high77++;\
- } else {\
- (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\
- ut_sort_low77++;\
- }\
- }\
-\
- memcpy((void*) ((ARR) + (LOW)), (AUX_ARR) + (LOW),\
- ((HIGH) - (LOW)) * sizeof *(ARR));\
-}\
-
-
-#endif
-
diff --git a/storage/xtradb/include/ut0timer.h b/storage/xtradb/include/ut0timer.h
deleted file mode 100644
index f361ae79bf5..00000000000
--- a/storage/xtradb/include/ut0timer.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
-Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/ut0timer.h
-Timer rountines
-
-Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
-modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
-*************************************************************************/
-#ifndef ut0timer_h
-#define ut0timer_h
-
-#include "univ.i"
-#include "data0type.h"
-#include <my_rdtsc.h>
-
-/* Current timer stats */
-extern struct my_timer_unit_info ut_timer;
-
-/**************************************************************//**
-Function pointer to point selected timer function.
-@return timer current value */
-extern ulonglong (*ut_timer_now)(void);
-
-/**************************************************************//**
-Sets up the data required for use of my_timer_* functions.
-Selects the best timer by high frequency, and tight resolution.
-Points my_timer_now() to the selected timer function.
-Initializes my_timer struct to contain the info for selected timer.*/
-UNIV_INTERN
-void ut_init_timer(void);
-
-/**************************************************************//**
-Return time passed since time then, automatically adjusted
-for the estimated timer overhead.
-@return time passed since "then" */
-UNIV_INLINE
-ulonglong
-ut_timer_since(
-/*===========*/
- ulonglong then); /*!< in: time where to calculate */
-/**************************************************************//**
-Get time passed since "then", and update then to now
-@return time passed sinche "then" */
-UNIV_INLINE
-ulonglong
-ut_timer_since_and_update(
-/*======================*/
- ulonglong *then); /*!< in: time where to calculate */
-/**************************************************************//**
-Convert native timer units in a ulonglong into seconds in a double
-@return time in a seconds */
-UNIV_INLINE
-double
-ut_timer_to_seconds(
-/*=================*/
- ulonglong when); /*!< in: time where to calculate */
-/**************************************************************//**
-Convert native timer units in a ulonglong into milliseconds in a double
-@return time in milliseconds */
-UNIV_INLINE
-double
-ut_timer_to_milliseconds(
-/*=====================*/
- ulonglong when); /*!< in: time where to calculate */
-/**************************************************************//**
-Convert native timer units in a ulonglong into microseconds in a double
-@return time in microseconds */
-UNIV_INLINE
-double
-ut_timer_to_microseconds(
-/*=====================*/
- ulonglong when); /*!< in: time where to calculate */
-/**************************************************************//**
-Convert microseconds in a double to native timer units in a ulonglong
-@return time in microseconds */
-UNIV_INLINE
-ulonglong
-ut_microseconds_to_timer(
-/*=====================*/
- ulonglong when); /*!< in: time where to calculate */
-
-#ifndef UNIV_NONINL
-#include "ut0timer.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/ut0timer.ic b/storage/xtradb/include/ut0timer.ic
deleted file mode 100644
index 62e17a10fb1..00000000000
--- a/storage/xtradb/include/ut0timer.ic
+++ /dev/null
@@ -1,113 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
-Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file include/ut0timer.ic
-Timer rountines
-
-Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
-modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
-*************************************************************************/
-
-/**************************************************************//**
-Return time passed since time then, automatically adjusted
-for the estimated timer overhead.
-@return time passed since "then" */
-UNIV_INLINE
-ulonglong
-ut_timer_since(
-/*===========*/
- ulonglong then) /*!< in: time where to calculate */
-{
- return (ut_timer_now() - then) - ut_timer.overhead;
-}
-
-/**************************************************************//**
-Get time passed since "then", and update then to now
-@return time passed sinche "then" */
-UNIV_INLINE
-ulonglong
-ut_timer_since_and_update(
-/*======================*/
- ulonglong *then) /*!< in: time where to calculate */
-{
- ulonglong now = ut_timer_now();
- ulonglong ret = (now - (*then)) - ut_timer.overhead;
- *then = now;
- return ret;
-}
-
-/**************************************************************//**
-Convert native timer units in a ulonglong into seconds in a double
-@return time in a seconds */
-UNIV_INLINE
-double
-ut_timer_to_seconds(
-/*=================*/
- ulonglong when) /*!< in: time where to calculate */
-{
- double ret = (double)(when);
- ret /= (double)(ut_timer.frequency);
- return ret;
-}
-
-/**************************************************************//**
-Convert native timer units in a ulonglong into milliseconds in a double
-@return time in milliseconds */
-UNIV_INLINE
-double
-ut_timer_to_milliseconds(
-/*=====================*/
- ulonglong when) /*!< in: time where to calculate */
-{
- double ret = (double)(when);
- ret *= 1000.0;
- ret /= (double)(ut_timer.frequency);
- return ret;
-}
-
-/**************************************************************//**
-Convert native timer units in a ulonglong into microseconds in a double
-@return time in microseconds */
-UNIV_INLINE
-double
-ut_timer_to_microseconds(
-/*=====================*/
- ulonglong when) /*!< in: time where to calculate */
-{
- double ret = (double)(when);
- ret *= 1000000.0;
- ret /= (double)(ut_timer.frequency);
- return ret;
-}
-
-/**************************************************************//**
-Convert microseconds in a double to native timer units in a ulonglong
-@return time in microseconds */
-UNIV_INLINE
-ulonglong
-ut_microseconds_to_timer(
-/*=====================*/
- ulonglong when) /*!< in: time where to calculate */
-{
- double ret = when;
- ret *= (double)(ut_timer.frequency);
- ret /= 1000000.0;
- return (ulonglong)ret;
-}
diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h
deleted file mode 100644
index ca4ce0d4ef9..00000000000
--- a/storage/xtradb/include/ut0ut.h
+++ /dev/null
@@ -1,526 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0ut.h
-Various utilities
-
-Created 1/20/1994 Heikki Tuuri
-***********************************************************************/
-
-#ifndef ut0ut_h
-#define ut0ut_h
-
-#include "univ.i"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "db0err.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#endif /* UNIV_HOTBACKUP */
-
-#include <time.h>
-#ifndef MYSQL_SERVER
-#include <ctype.h>
-#endif
-
-#include <stdarg.h> /* for va_list */
-
-#include <string>
-
-/** Index name prefix in fast index creation */
-#define TEMP_INDEX_PREFIX '\377'
-/** Index name prefix in fast index creation, as a string constant */
-#define TEMP_INDEX_PREFIX_STR "\377"
-
-/** Time stamp */
-typedef time_t ib_time_t;
-
-/* In order to call a piece of code, when a function returns or when the
-scope ends, use this utility class. It will invoke the given function
-object in its destructor. */
-template<typename F>
-struct ut_when_dtor {
- ut_when_dtor(F& p) : f(p) {}
- ~ut_when_dtor() {
- f();
- }
-private:
- F& f;
-};
-
-#ifndef UNIV_HOTBACKUP
-# if defined(HAVE_PAUSE_INSTRUCTION)
- /* According to the gcc info page, asm volatile means that the
- instruction has important side-effects and must not be removed.
- Also asm volatile may trigger a memory barrier (spilling all registers
- to memory). */
-# ifdef __SUNPRO_CC
-# define UT_RELAX_CPU() asm ("pause" )
-# else
-# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif /* __SUNPRO_CC */
-
-# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
-# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-# elif defined(HAVE_WINDOWS_ATOMICS)
- /* In the Win32 API, the x86 PAUSE instruction is executed by calling
- the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
- independent way by using YieldProcessor. */
-# define UT_RELAX_CPU() YieldProcessor()
-# elif defined(__powerpc__) && defined __GLIBC__
-#include <sys/platform/ppc.h>
-# define UT_RELAX_CPU() __ppc_get_timebase()
-# else
-# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
-# endif
-
-#if defined (__GNUC__)
-# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
-#elif defined (_MSC_VER)
-# define UT_COMPILER_BARRIER() _ReadWriteBarrier()
-#else
-# define UT_COMPILER_BARRIER()
-#endif
-
-# if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
-#include <sys/platform/ppc.h>
-# define UT_LOW_PRIORITY_CPU() __ppc_set_ppr_low()
-# define UT_RESUME_PRIORITY_CPU() __ppc_set_ppr_med()
-# else
-# define UT_LOW_PRIORITY_CPU() ((void)0)
-# define UT_RESUME_PRIORITY_CPU() ((void)0)
-# endif
-
-/*********************************************************************//**
-Delays execution for at most max_wait_us microseconds or returns earlier
-if cond becomes true.
-@param cond in: condition to wait for; evaluated every 2 ms
-@param max_wait_us in: maximum delay to wait, in microseconds */
-#define UT_WAIT_FOR(cond, max_wait_us) \
-do { \
- ullint start_us; \
- start_us = ut_time_us(NULL); \
- while (!(cond) \
- && ut_time_us(NULL) - start_us < (max_wait_us)) {\
- \
- os_thread_sleep(2000 /* 2 ms */); \
- } \
-} while (0)
-#endif /* !UNIV_HOTBACKUP */
-
-template <class T> T ut_min(T a, T b) { return(a < b ? a : b); }
-template <class T> T ut_max(T a, T b) { return(a > b ? a : b); }
-
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2); /*!< in: second number */
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2); /*!< in: second number */
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
-UNIV_INLINE
-void
-ut_pair_min(
-/*========*/
- ulint* a, /*!< out: more significant part of minimum */
- ulint* b, /*!< out: less significant part of minimum */
- ulint a1, /*!< in: more significant part of first pair */
- ulint b1, /*!< in: less significant part of first pair */
- ulint a2, /*!< in: more significant part of second pair */
- ulint b2); /*!< in: less significant part of second pair */
-/******************************************************//**
-Compares two ulints.
-@return 1 if a > b, 0 if a == b, -1 if a < b */
-UNIV_INLINE
-int
-ut_ulint_cmp(
-/*=========*/
- ulint a, /*!< in: ulint */
- ulint b); /*!< in: ulint */
-/*******************************************************//**
-Compares two pairs of ulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
-UNIV_INLINE
-int
-ut_pair_cmp(
-/*========*/
- ulint a1, /*!< in: more significant part of first pair */
- ulint a2, /*!< in: less significant part of first pair */
- ulint b1, /*!< in: more significant part of second pair */
- ulint b2); /*!< in: less significant part of second pair */
-/*************************************************************//**
-Determines if a number is zero or a power of two.
-@param n in: number
-@return nonzero if n is zero or a power of two; zero otherwise */
-#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1)))
-/*************************************************************//**
-Calculates fast the remainder of n/m when m is a power of two.
-@param n in: numerator
-@param m in: denominator, must be a power of two
-@return the remainder of n/m */
-#define ut_2pow_remainder(n, m) ((n) & ((m) - 1))
-/*************************************************************//**
-Calculates the biggest multiple of m that is not bigger than n
-when m is a power of two. In other words, rounds n down to m * k.
-@param n in: number to round down
-@param m in: alignment, must be a power of two
-@return n rounded down to the biggest possible integer multiple of m */
-#define ut_2pow_round(n, m) ((n) & ~((m) - 1))
-/** Align a number down to a multiple of a power of two.
-@param n in: number to round down
-@param m in: alignment, must be a power of two
-@return n rounded down to the biggest possible integer multiple of m */
-#define ut_calc_align_down(n, m) ut_2pow_round(n, m)
-/********************************************************//**
-Calculates the smallest multiple of m that is not smaller than n
-when m is a power of two. In other words, rounds n up to m * k.
-@param n in: number to round up
-@param m in: alignment, must be a power of two
-@return n rounded up to the smallest possible integer multiple of m */
-#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1))
-/*************************************************************//**
-Calculates fast the 2-logarithm of a number, rounded upward to an
-integer.
-@return logarithm in the base 2, rounded upward */
-UNIV_INLINE
-ulint
-ut_2_log(
-/*=====*/
- ulint n); /*!< in: number */
-/*************************************************************//**
-Calculates 2 to power n.
-@return 2 to power n */
-UNIV_INLINE
-ulint
-ut_2_exp(
-/*=====*/
- ulint n); /*!< in: number */
-/*************************************************************//**
-Calculates fast the number rounded up to the nearest power of 2.
-@return first power of 2 which is >= n */
-UNIV_INTERN
-ulint
-ut_2_power_up(
-/*==========*/
- ulint n) /*!< in: number != 0 */
- MY_ATTRIBUTE((const));
-
-/** Determine how many bytes (groups of 8 bits) are needed to
-store the given number of bits.
-@param b in: bits
-@return number of bytes (octets) needed to represent b */
-#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
-
-/**********************************************************//**
-Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime.
-@return system time */
-UNIV_INTERN
-ib_time_t
-ut_time(void);
-/*=========*/
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Returns system time.
-Upon successful completion, the value 0 is returned; otherwise the
-value -1 is returned and the global variable errno is set to indicate the
-error.
-@return 0 on success, -1 otherwise */
-UNIV_INTERN
-int
-ut_usectime(
-/*========*/
- ulint* sec, /*!< out: seconds since the Epoch */
- ulint* ms); /*!< out: microseconds since the Epoch+*sec */
-
-/**********************************************************//**
-Returns the number of microseconds since epoch. Similar to
-time(3), the return value is also stored in *tloc, provided
-that tloc is non-NULL.
-@return us since epoch */
-UNIV_INTERN
-ullint
-ut_time_us(
-/*=======*/
- ullint* tloc); /*!< out: us since epoch, if non-NULL */
-/**********************************************************//**
-Returns the number of milliseconds since some epoch. The
-value may wrap around. It should only be used for heuristic
-purposes.
-@return ms since epoch */
-UNIV_INTERN
-ulint
-ut_time_ms(void);
-/*============*/
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Returns the number of milliseconds since some epoch. The
-value may wrap around. It should only be used for heuristic
-purposes.
-@return ms since epoch */
-UNIV_INTERN
-ulint
-ut_time_ms(void);
-/*============*/
-
-/**********************************************************//**
-Returns the difference of two times in seconds.
-@return time2 - time1 expressed in seconds */
-UNIV_INTERN
-double
-ut_difftime(
-/*========*/
- ib_time_t time2, /*!< in: time */
- ib_time_t time1); /*!< in: time */
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/**********************************************************//**
-Prints a timestamp to a file. */
-UNIV_INTERN
-void
-ut_print_timestamp(
-/*===============*/
- FILE* file) /*!< in: file where to print */
- UNIV_COLD MY_ATTRIBUTE((nonnull));
-
-#ifndef UNIV_INNOCHECKSUM
-
-/**********************************************************//**
-Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp(
-/*=================*/
- char* buf); /*!< in: buffer where to sprintf */
-#ifdef UNIV_HOTBACKUP
-/**********************************************************//**
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
- char* buf); /*!< in: buffer where to sprintf */
-/**********************************************************//**
-Returns current year, month, day. */
-UNIV_INTERN
-void
-ut_get_year_month_day(
-/*==================*/
- ulint* year, /*!< out: current year */
- ulint* month, /*!< out: month */
- ulint* day); /*!< out: day */
-#else /* UNIV_HOTBACKUP */
-/*************************************************************//**
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++.
-@return dummy value */
-UNIV_INTERN
-void
-ut_delay(
-/*=====*/
- ulint delay); /*!< in: delay in microseconds on 100 MHz Pentium */
-#endif /* UNIV_HOTBACKUP */
-/*************************************************************//**
-Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
-void
-ut_print_buf(
-/*=========*/
- FILE* file, /*!< in: file where to print */
- const void* buf, /*!< in: memory buffer */
- ulint len); /*!< in: length of the buffer */
-
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
-void
-ut_print_filename(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const char* name); /*!< in: name to print */
-
-#ifndef UNIV_HOTBACKUP
-/* Forward declaration of transaction handle */
-struct trx_t;
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_name(
-/*==========*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name); /*!< in: name to print */
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_namel(
-/*===========*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /*!< in: name to print */
- ulint namelen);/*!< in: length of name */
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-std::string
-ut_get_name(
-/*=========*/
- const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name); /*!< in: name to print */
-/**********************************************************************//**
-Formats a table or index name, quoted as an SQL identifier. If the name
-contains a slash '/', the result will contain two identifiers separated by
-a period (.), as in SQL database_name.identifier.
-@return pointer to 'formatted' */
-UNIV_INTERN
-char*
-ut_format_name(
-/*===========*/
- const char* name, /*!< in: table or index name, must be
- '\0'-terminated */
- ibool is_table, /*!< in: if TRUE then 'name' is a table
- name */
- char* formatted, /*!< out: formatted result, will be
- '\0'-terminated */
- ulint formatted_size);/*!< out: no more than this number of
- bytes will be written to 'formatted' */
-
-/**********************************************************************//**
-Catenate files. */
-UNIV_INTERN
-void
-ut_copy_file(
-/*=========*/
- FILE* dest, /*!< in: output file */
- FILE* src); /*!< in: input file to be appended to output */
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef __WIN__
-/**********************************************************************//**
-A substitute for vsnprintf(3), formatted output conversion into
-a limited buffer. Note: this function DOES NOT return the number of
-characters that would have been printed if the buffer was unlimited because
-VC's _vsnprintf() returns -1 in this case and we would need to call
-_vscprintf() in addition to estimate that but we would need another copy
-of "ap" for that and VC does not provide va_copy(). */
-UNIV_INTERN
-void
-ut_vsnprintf(
-/*=========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- va_list ap); /*!< in: format values */
-
-/**********************************************************************//**
-A substitute for snprintf(3), formatted output conversion into
-a limited buffer.
-@return number of characters that would have been printed if the size
-were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
-int
-ut_snprintf(
-/*========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- ...); /*!< in: format values */
-#else
-/**********************************************************************//**
-A wrapper for vsnprintf(3), formatted output conversion into
-a limited buffer. Note: this function DOES NOT return the number of
-characters that would have been printed if the buffer was unlimited because
-VC's _vsnprintf() returns -1 in this case and we would need to call
-_vscprintf() in addition to estimate that but we would need another copy
-of "ap" for that and VC does not provide va_copy(). */
-# define ut_vsnprintf(buf, size, fmt, ap) \
- ((void) vsnprintf(buf, size, fmt, ap))
-/**********************************************************************//**
-A wrapper for snprintf(3), formatted output conversion into
-a limited buffer. */
-# define ut_snprintf snprintf
-#endif /* __WIN__ */
-
-/*************************************************************//**
-Convert an error number to a human readable text message. The
-returned string is static and should not be freed or modified.
-@return string, describing the error */
-UNIV_INTERN
-const char*
-ut_strerr(
-/*======*/
- dberr_t num); /*!< in: error number */
-
-/****************************************************************
-Sort function for ulint arrays. */
-UNIV_INTERN
-void
-ut_ulint_sort(
-/*==========*/
- ulint* arr, /*!< in/out: array to sort */
- ulint* aux_arr, /*!< in/out: aux array to use in sort */
- ulint low, /*!< in: lower bound */
- ulint high) /*!< in: upper bound */
- MY_ATTRIBUTE((nonnull));
-
-#ifndef UNIV_NONINL
-#include "ut0ut.ic"
-#endif
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-#endif
-
diff --git a/storage/xtradb/include/ut0ut.ic b/storage/xtradb/include/ut0ut.ic
deleted file mode 100644
index 4e0f76e1957..00000000000
--- a/storage/xtradb/include/ut0ut.ic
+++ /dev/null
@@ -1,162 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************************//**
-@file include/ut0ut.ic
-Various utilities
-
-Created 5/30/1994 Heikki Tuuri
-*******************************************************************/
-
-/******************************************************//**
-Calculates the minimum of two ulints.
-@return minimum */
-UNIV_INLINE
-ulint
-ut_min(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2) /*!< in: second number */
-{
- return((n1 <= n2) ? n1 : n2);
-}
-
-/******************************************************//**
-Calculates the maximum of two ulints.
-@return maximum */
-UNIV_INLINE
-ulint
-ut_max(
-/*===*/
- ulint n1, /*!< in: first number */
- ulint n2) /*!< in: second number */
-{
- return((n1 <= n2) ? n2 : n1);
-}
-
-/****************************************************************//**
-Calculates minimum of two ulint-pairs. */
-UNIV_INLINE
-void
-ut_pair_min(
-/*========*/
- ulint* a, /*!< out: more significant part of minimum */
- ulint* b, /*!< out: less significant part of minimum */
- ulint a1, /*!< in: more significant part of first pair */
- ulint b1, /*!< in: less significant part of first pair */
- ulint a2, /*!< in: more significant part of second pair */
- ulint b2) /*!< in: less significant part of second pair */
-{
- if (a1 == a2) {
- *a = a1;
- *b = ut_min(b1, b2);
- } else if (a1 < a2) {
- *a = a1;
- *b = b1;
- } else {
- *a = a2;
- *b = b2;
- }
-}
-
-/******************************************************//**
-Compares two ulints.
-@return 1 if a > b, 0 if a == b, -1 if a < b */
-UNIV_INLINE
-int
-ut_ulint_cmp(
-/*=========*/
- ulint a, /*!< in: ulint */
- ulint b) /*!< in: ulint */
-{
- if (a < b) {
- return(-1);
- } else if (a == b) {
- return(0);
- } else {
- return(1);
- }
-}
-
-/*******************************************************//**
-Compares two pairs of ulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
-UNIV_INLINE
-int
-ut_pair_cmp(
-/*========*/
- ulint a1, /*!< in: more significant part of first pair */
- ulint a2, /*!< in: less significant part of first pair */
- ulint b1, /*!< in: more significant part of second pair */
- ulint b2) /*!< in: less significant part of second pair */
-{
- if (a1 > b1) {
- return(1);
- } else if (a1 < b1) {
- return(-1);
- } else if (a2 > b2) {
- return(1);
- } else if (a2 < b2) {
- return(-1);
- } else {
- return(0);
- }
-}
-
-/*************************************************************//**
-Calculates fast the 2-logarithm of a number, rounded upward to an
-integer.
-@return logarithm in the base 2, rounded upward */
-UNIV_INLINE
-ulint
-ut_2_log(
-/*=====*/
- ulint n) /*!< in: number != 0 */
-{
- ulint res;
-
- res = 0;
-
- ut_ad(n > 0);
-
- n = n - 1;
-
- for (;;) {
- n = n / 2;
-
- if (n == 0) {
- break;
- }
-
- res++;
- }
-
- return(res + 1);
-}
-
-/*************************************************************//**
-Calculates 2 to power n.
-@return 2 to power n */
-UNIV_INLINE
-ulint
-ut_2_exp(
-/*=====*/
- ulint n) /*!< in: number */
-{
- return((ulint) 1 << n);
-}
diff --git a/storage/xtradb/include/ut0vec.h b/storage/xtradb/include/ut0vec.h
deleted file mode 100644
index 432fb348a09..00000000000
--- a/storage/xtradb/include/ut0vec.h
+++ /dev/null
@@ -1,337 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0vec.h
-A vector of pointers to data items
-
-Created 4/6/2006 Osku Salerma
-************************************************************************/
-
-#ifndef IB_VECTOR_H
-#define IB_VECTOR_H
-
-#include "univ.i"
-#include "mem0mem.h"
-
-struct ib_alloc_t;
-struct ib_vector_t;
-
-typedef void* (*ib_mem_alloc_t)(
- /* out: Pointer to allocated memory */
- ib_alloc_t* allocator, /* in: Pointer to allocator instance */
- ulint size); /* in: Number of bytes to allocate */
-
-typedef void (*ib_mem_free_t)(
- ib_alloc_t* allocator, /* in: Pointer to allocator instance */
- void* ptr); /* in: Memory to free */
-
-typedef void* (*ib_mem_resize_t)(
- /* out: Pointer to resized memory */
- ib_alloc_t* allocator, /* in: Pointer to allocator */
- void* ptr, /* in: Memory to resize */
- ulint old_size, /* in: Old memory size in bytes */
- ulint new_size); /* in: New size in bytes */
-
-typedef int (*ib_compare_t)(const void*, const void*);
-
-/* An automatically resizing vector datatype with the following properties:
-
- -All memory allocation is done through an allocator, which is responsible for
-freeing it when done with the vector.
-*/
-
-/* This is useful shorthand for elements of type void* */
-#define ib_vector_getp(v, n) (*(void**) ib_vector_get(v, n))
-#define ib_vector_getp_const(v, n) (*(void**) ib_vector_get_const(v, n))
-
-#define ib_vector_allocator(v) (v->allocator)
-
-/********************************************************************
-Create a new vector with the given initial size. */
-UNIV_INTERN
-ib_vector_t*
-ib_vector_create(
-/*=============*/
- /* out: vector */
- ib_alloc_t* alloc, /* in: Allocator */
- /* in: size of the data item */
- ulint sizeof_value,
- ulint size); /* in: initial size */
-
-/********************************************************************
-Destroy the vector. Make sure the vector owns the allocator, e.g.,
-the heap in the the heap allocator. */
-UNIV_INLINE
-void
-ib_vector_free(
-/*===========*/
- ib_vector_t* vec); /* in/out: vector */
-
-/********************************************************************
-Push a new element to the vector, increasing its size if necessary,
-if elem is not NULL then elem is copied to the vector.*/
-UNIV_INLINE
-void*
-ib_vector_push(
-/*===========*/
- /* out: pointer the "new" element */
- ib_vector_t* vec, /* in/out: vector */
- const void* elem); /* in: data element */
-
-/********************************************************************
-Pop the last element from the vector.*/
-UNIV_INLINE
-void*
-ib_vector_pop(
-/*==========*/
- /* out: pointer to the "new" element */
- ib_vector_t* vec); /* in/out: vector */
-
-/*******************************************************************//**
-Remove an element to the vector
-@return pointer to the "removed" element */
-UNIV_INLINE
-void*
-ib_vector_remove(
-/*=============*/
- ib_vector_t* vec, /*!< in: vector */
- const void* elem); /*!< in: value to remove */
-
-/********************************************************************
-Get the number of elements in the vector. */
-UNIV_INLINE
-ulint
-ib_vector_size(
-/*===========*/
- /* out: number of elements in vector */
- const ib_vector_t* vec); /* in: vector */
-
-/********************************************************************
-Increase the size of the vector. */
-UNIV_INTERN
-void
-ib_vector_resize(
-/*=============*/
- /* out: number of elements in vector */
- ib_vector_t* vec); /* in/out: vector */
-
-/********************************************************************
-Test whether a vector is empty or not.
-@return TRUE if empty */
-UNIV_INLINE
-ibool
-ib_vector_is_empty(
-/*===============*/
- const ib_vector_t* vec); /*!< in: vector */
-
-/****************************************************************//**
-Get the n'th element.
-@return n'th element */
-UNIV_INLINE
-void*
-ib_vector_get(
-/*==========*/
- ib_vector_t* vec, /*!< in: vector */
- ulint n); /*!< in: element index to get */
-
-/********************************************************************
-Const version of the get n'th element.
-@return n'th element */
-UNIV_INLINE
-const void*
-ib_vector_get_const(
-/*================*/
- const ib_vector_t* vec, /* in: vector */
- ulint n); /* in: element index to get */
-/****************************************************************//**
-Get last element. The vector must not be empty.
-@return last element */
-UNIV_INLINE
-void*
-ib_vector_get_last(
-/*===============*/
- ib_vector_t* vec); /*!< in: vector */
-/****************************************************************//**
-Set the n'th element. */
-UNIV_INLINE
-void
-ib_vector_set(
-/*==========*/
- ib_vector_t* vec, /*!< in/out: vector */
- ulint n, /*!< in: element index to set */
- void* elem); /*!< in: data element */
-
-/********************************************************************
-Reset the vector size to 0 elements. */
-UNIV_INLINE
-void
-ib_vector_reset(
-/*============*/
- ib_vector_t* vec); /* in/out: vector */
-
-/********************************************************************
-Get the last element of the vector. */
-UNIV_INLINE
-void*
-ib_vector_last(
-/*===========*/
- /* out: pointer to last element */
- ib_vector_t* vec); /* in/out: vector */
-
-/********************************************************************
-Get the last element of the vector. */
-UNIV_INLINE
-const void*
-ib_vector_last_const(
-/*=================*/
- /* out: pointer to last element */
- const ib_vector_t* vec); /* in: vector */
-
-/********************************************************************
-Sort the vector elements. */
-UNIV_INLINE
-void
-ib_vector_sort(
-/*===========*/
- ib_vector_t* vec, /* in/out: vector */
- ib_compare_t compare); /* in: the comparator to use for sort */
-
-/********************************************************************
-The default ib_vector_t heap free. Does nothing. */
-UNIV_INLINE
-void
-ib_heap_free(
-/*=========*/
- ib_alloc_t* allocator, /* in: allocator */
- void* ptr); /* in: size in bytes */
-
-/********************************************************************
-The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */
-UNIV_INLINE
-void*
-ib_heap_malloc(
-/*===========*/
- /* out: pointer to allocated memory */
- ib_alloc_t* allocator, /* in: allocator */
- ulint size); /* in: size in bytes */
-
-/********************************************************************
-The default ib_vector_t heap resize. Since we can't resize the heap
-we have to copy the elements from the old ptr to the new ptr.
-Uses mem_heap_alloc(). */
-UNIV_INLINE
-void*
-ib_heap_resize(
-/*===========*/
- /* out: pointer to reallocated
- memory */
- ib_alloc_t* allocator, /* in: allocator */
- void* old_ptr, /* in: pointer to memory */
- ulint old_size, /* in: old size in bytes */
- ulint new_size); /* in: new size in bytes */
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-ib_alloc_t*
-ib_heap_allocator_create(
-/*=====================*/
- /* out: heap allocator instance */
- mem_heap_t* heap); /* in: heap to use */
-
-/********************************************************************
-Free a heap allocator. */
-UNIV_INLINE
-void
-ib_heap_allocator_free(
-/*===================*/
- ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */
-
-/********************************************************************
-Wrapper for ut_free(). */
-UNIV_INLINE
-void
-ib_ut_free(
-/*=======*/
- ib_alloc_t* allocator, /* in: allocator */
- void* ptr); /* in: size in bytes */
-
-/********************************************************************
-Wrapper for ut_malloc(). */
-UNIV_INLINE
-void*
-ib_ut_malloc(
-/*=========*/
- /* out: pointer to allocated memory */
- ib_alloc_t* allocator, /* in: allocator */
- ulint size); /* in: size in bytes */
-
-/********************************************************************
-Wrapper for ut_realloc(). */
-UNIV_INLINE
-void*
-ib_ut_resize(
-/*=========*/
- /* out: pointer to reallocated
- memory */
- ib_alloc_t* allocator, /* in: allocator */
- void* old_ptr, /* in: pointer to memory */
- ulint old_size, /* in: old size in bytes */
- ulint new_size); /* in: new size in bytes */
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-ib_alloc_t*
-ib_ut_allocator_create(void);
-/*=========================*/
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-void
-ib_ut_allocator_free(
-/*=================*/
- ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */
-
-/* Allocator used by ib_vector_t. */
-struct ib_alloc_t {
- ib_mem_alloc_t mem_malloc; /* For allocating memory */
- ib_mem_free_t mem_release; /* For freeing memory */
- ib_mem_resize_t mem_resize; /* For resizing memory */
- void* arg; /* Currently if not NULL then it
- points to the heap instance */
-};
-
-/* See comment at beginning of file. */
-struct ib_vector_t {
- ib_alloc_t* allocator; /* Allocator, because one size
- doesn't fit all */
- void* data; /* data elements */
- ulint used; /* number of elements currently used */
- ulint total; /* number of elements allocated */
- /* Size of a data item */
- ulint sizeof_value;
-};
-
-#ifndef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
-
-#endif /* IB_VECTOR_H */
diff --git a/storage/xtradb/include/ut0vec.ic b/storage/xtradb/include/ut0vec.ic
deleted file mode 100644
index f41a85e1d1d..00000000000
--- a/storage/xtradb/include/ut0vec.ic
+++ /dev/null
@@ -1,425 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0vec.ic
-A vector of pointers to data items
-
-Created 4/6/2006 Osku Salerma
-************************************************************************/
-
-#define IB_VEC_OFFSET(v, i) (vec->sizeof_value * i)
-
-/********************************************************************
-The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */
-UNIV_INLINE
-void*
-ib_heap_malloc(
-/*===========*/
- ib_alloc_t* allocator, /* in: allocator */
- ulint size) /* in: size in bytes */
-{
- mem_heap_t* heap = (mem_heap_t*) allocator->arg;
-
- return(mem_heap_alloc(heap, size));
-}
-
-/********************************************************************
-The default ib_vector_t heap free. Does nothing. */
-UNIV_INLINE
-void
-ib_heap_free(
-/*=========*/
- ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
- void* ptr UNIV_UNUSED) /* in: size in bytes */
-{
- /* We can't free individual elements. */
-}
-
-/********************************************************************
-The default ib_vector_t heap resize. Since we can't resize the heap
-we have to copy the elements from the old ptr to the new ptr.
-Uses mem_heap_alloc(). */
-UNIV_INLINE
-void*
-ib_heap_resize(
-/*===========*/
- ib_alloc_t* allocator, /* in: allocator */
- void* old_ptr, /* in: pointer to memory */
- ulint old_size, /* in: old size in bytes */
- ulint new_size) /* in: new size in bytes */
-{
- void* new_ptr;
- mem_heap_t* heap = (mem_heap_t*) allocator->arg;
-
- new_ptr = mem_heap_alloc(heap, new_size);
- memcpy(new_ptr, old_ptr, old_size);
-
- return(new_ptr);
-}
-
-/********************************************************************
-Create a heap allocator that uses the passed in heap. */
-UNIV_INLINE
-ib_alloc_t*
-ib_heap_allocator_create(
-/*=====================*/
- mem_heap_t* heap) /* in: heap to use */
-{
- ib_alloc_t* heap_alloc;
-
- heap_alloc = (ib_alloc_t*) mem_heap_alloc(heap, sizeof(*heap_alloc));
-
- heap_alloc->arg = heap;
- heap_alloc->mem_release = ib_heap_free;
- heap_alloc->mem_malloc = ib_heap_malloc;
- heap_alloc->mem_resize = ib_heap_resize;
-
- return(heap_alloc);
-}
-
-/********************************************************************
-Free a heap allocator. */
-UNIV_INLINE
-void
-ib_heap_allocator_free(
-/*===================*/
- ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */
-{
- mem_heap_free((mem_heap_t*) ib_ut_alloc->arg);
-}
-
-/********************************************************************
-Wrapper around ut_malloc(). */
-UNIV_INLINE
-void*
-ib_ut_malloc(
-/*=========*/
- ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
- ulint size) /* in: size in bytes */
-{
- return(ut_malloc(size));
-}
-
-/********************************************************************
-Wrapper around ut_free(). */
-UNIV_INLINE
-void
-ib_ut_free(
-/*=======*/
- ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
- void* ptr) /* in: size in bytes */
-{
- ut_free(ptr);
-}
-
-/********************************************************************
-Wrapper aroung ut_realloc(). */
-UNIV_INLINE
-void*
-ib_ut_resize(
-/*=========*/
- ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */
- void* old_ptr, /* in: pointer to memory */
- ulint old_size UNIV_UNUSED,/* in: old size in bytes */
- ulint new_size) /* in: new size in bytes */
-{
- return(ut_realloc(old_ptr, new_size));
-}
-
-/********************************************************************
-Create a ut allocator. */
-UNIV_INLINE
-ib_alloc_t*
-ib_ut_allocator_create(void)
-/*========================*/
-{
- ib_alloc_t* ib_ut_alloc;
-
- ib_ut_alloc = (ib_alloc_t*) ut_malloc(sizeof(*ib_ut_alloc));
-
- ib_ut_alloc->arg = NULL;
- ib_ut_alloc->mem_release = ib_ut_free;
- ib_ut_alloc->mem_malloc = ib_ut_malloc;
- ib_ut_alloc->mem_resize = ib_ut_resize;
-
- return(ib_ut_alloc);
-}
-
-/********************************************************************
-Free a ut allocator. */
-UNIV_INLINE
-void
-ib_ut_allocator_free(
-/*=================*/
- ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */
-{
- ut_free(ib_ut_alloc);
-}
-
-/********************************************************************
-Get number of elements in vector. */
-UNIV_INLINE
-ulint
-ib_vector_size(
-/*===========*/
- /* out: number of elements in vector*/
- const ib_vector_t* vec) /* in: vector */
-{
- return(vec->used);
-}
-
-/****************************************************************//**
-Get n'th element. */
-UNIV_INLINE
-void*
-ib_vector_get(
-/*==========*/
- ib_vector_t* vec, /*!< in: vector */
- ulint n) /*!< in: element index to get */
-{
- ut_a(n < vec->used);
-
- return((byte*) vec->data + IB_VEC_OFFSET(vec, n));
-}
-
-/********************************************************************
-Const version of the get n'th element.
-@return n'th element */
-UNIV_INLINE
-const void*
-ib_vector_get_const(
-/*================*/
- const ib_vector_t* vec, /* in: vector */
- ulint n) /* in: element index to get */
-{
- ut_a(n < vec->used);
-
- return((byte*) vec->data + IB_VEC_OFFSET(vec, n));
-}
-/****************************************************************//**
-Get last element. The vector must not be empty.
-@return last element */
-UNIV_INLINE
-void*
-ib_vector_get_last(
-/*===============*/
- ib_vector_t* vec) /*!< in: vector */
-{
- ut_a(vec->used > 0);
-
- return((byte*) ib_vector_get(vec, vec->used - 1));
-}
-
-/****************************************************************//**
-Set the n'th element. */
-UNIV_INLINE
-void
-ib_vector_set(
-/*==========*/
- ib_vector_t* vec, /*!< in/out: vector */
- ulint n, /*!< in: element index to set */
- void* elem) /*!< in: data element */
-{
- void* slot;
-
- ut_a(n < vec->used);
-
- slot = ((byte*) vec->data + IB_VEC_OFFSET(vec, n));
- memcpy(slot, elem, vec->sizeof_value);
-}
-
-/********************************************************************
-Reset the vector size to 0 elements. */
-UNIV_INLINE
-void
-ib_vector_reset(
-/*============*/
- /* out: void */
- ib_vector_t* vec) /* in: vector */
-{
- vec->used = 0;
-}
-
-/********************************************************************
-Get the last element of the vector. */
-UNIV_INLINE
-void*
-ib_vector_last(
-/*===========*/
- /* out: void */
- ib_vector_t* vec) /* in: vector */
-{
- ut_a(ib_vector_size(vec) > 0);
-
- return(ib_vector_get(vec, ib_vector_size(vec) - 1));
-}
-
-/********************************************************************
-Get the last element of the vector. */
-UNIV_INLINE
-const void*
-ib_vector_last_const(
-/*=================*/
- /* out: void */
- const ib_vector_t* vec) /* in: vector */
-{
- ut_a(ib_vector_size(vec) > 0);
-
- return(ib_vector_get_const(vec, ib_vector_size(vec) - 1));
-}
-
-/****************************************************************//**
-Remove the last element from the vector.
-@return last vector element */
-UNIV_INLINE
-void*
-ib_vector_pop(
-/*==========*/
- /* out: pointer to element */
- ib_vector_t* vec) /* in: vector */
-{
- void* elem;
-
- ut_a(vec->used > 0);
-
- elem = ib_vector_last(vec);
- --vec->used;
-
- return(elem);
-}
-
-/********************************************************************
-Append an element to the vector, if elem != NULL then copy the data
-from elem.*/
-UNIV_INLINE
-void*
-ib_vector_push(
-/*===========*/
- /* out: pointer to the "new" element */
- ib_vector_t* vec, /* in: vector */
- const void* elem) /* in: element to add (can be NULL) */
-{
- void* last;
-
- if (vec->used >= vec->total) {
- ib_vector_resize(vec);
- }
-
- last = (byte*) vec->data + IB_VEC_OFFSET(vec, vec->used);
-
-#ifdef UNIV_DEBUG
- memset(last, 0, vec->sizeof_value);
-#endif
-
- if (elem) {
- memcpy(last, elem, vec->sizeof_value);
- }
-
- ++vec->used;
-
- return(last);
-}
-
-/*******************************************************************//**
-Remove an element to the vector
-@return pointer to the "removed" element */
-UNIV_INLINE
-void*
-ib_vector_remove(
-/*=============*/
- ib_vector_t* vec, /*!< in: vector */
- const void* elem) /*!< in: value to remove */
-{
- void* current = NULL;
- void* next;
- ulint i;
- ulint old_used_count = vec->used;
-
- for (i = 0; i < vec->used; i++) {
- current = ib_vector_get(vec, i);
-
- if (*(void**) current == elem) {
- if (i == vec->used - 1) {
- return(ib_vector_pop(vec));
- }
-
- next = ib_vector_get(vec, i + 1);
- memmove(current, next, vec->sizeof_value
- * (vec->used - i - 1));
- --vec->used;
- break;
- }
- }
-
- return((old_used_count != vec->used) ? current : NULL);
-}
-
-/********************************************************************
-Sort the vector elements. */
-UNIV_INLINE
-void
-ib_vector_sort(
-/*===========*/
- /* out: void */
- ib_vector_t* vec, /* in: vector */
- ib_compare_t compare)/* in: the comparator to use for sort */
-{
- qsort(vec->data, vec->used, vec->sizeof_value, compare);
-}
-
-/********************************************************************
-Destroy the vector. Make sure the vector owns the allocator, e.g.,
-the heap in the the heap allocator. */
-UNIV_INLINE
-void
-ib_vector_free(
-/*===========*/
- ib_vector_t* vec) /* in, own: vector */
-{
- /* Currently we only support two types of allocators, heap
- and ut_malloc(), when the heap is freed all the elements are
- freed too. With ut allocator, we need to free the elements,
- the vector instance and the allocator separately. */
-
- /* Only the heap allocator uses the arg field. */
- if (vec->allocator->arg) {
- mem_heap_free((mem_heap_t*) vec->allocator->arg);
- } else {
- ib_alloc_t* allocator;
-
- allocator = vec->allocator;
-
- allocator->mem_release(allocator, vec->data);
- allocator->mem_release(allocator, vec);
-
- ib_ut_allocator_free(allocator);
- }
-}
-
-/********************************************************************
-Test whether a vector is empty or not.
-@return TRUE if empty */
-UNIV_INLINE
-ibool
-ib_vector_is_empty(
-/*===============*/
- const ib_vector_t* vec) /*!< in: vector */
-{
- return(ib_vector_size(vec) == 0);
-}
diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h
deleted file mode 100644
index d69363afe7b..00000000000
--- a/storage/xtradb/include/ut0wqueue.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file include/ut0wqueue.h
-A work queue
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-/*******************************************************************//**
-A Work queue. Threads can add work items to the queue and other threads can
-wait for work items to be available and take them off the queue for
-processing.
-************************************************************************/
-
-#ifndef IB_WORK_QUEUE_H
-#define IB_WORK_QUEUE_H
-
-#include "ut0list.h"
-#include "mem0mem.h"
-#include "os0sync.h"
-#include "sync0types.h"
-
-struct ib_wqueue_t;
-
-/****************************************************************//**
-Create a new work queue.
-@return work queue */
-UNIV_INTERN
-ib_wqueue_t*
-ib_wqueue_create(void);
-/*===================*/
-
-/****************************************************************//**
-Free a work queue. */
-UNIV_INTERN
-void
-ib_wqueue_free(
-/*===========*/
- ib_wqueue_t* wq); /*!< in: work queue */
-
-/****************************************************************//**
-Add a work item to the queue. */
-UNIV_INTERN
-void
-ib_wqueue_add(
-/*==========*/
- ib_wqueue_t* wq, /*!< in: work queue */
- void* item, /*!< in: work item */
- mem_heap_t* heap); /*!< in: memory heap to use for allocating the
- list node */
-
-/********************************************************************
-Check if queue is empty. */
-
-ibool
-ib_wqueue_is_empty(
-/*===============*/
- /* out: TRUE if queue empty
- else FALSE */
- const ib_wqueue_t* wq); /* in: work queue */
-
-/****************************************************************//**
-Wait for a work item to appear in the queue.
-@return work item */
-UNIV_INTERN
-void*
-ib_wqueue_wait(
-/*===========*/
- ib_wqueue_t* wq); /*!< in: work queue */
-
-/********************************************************************
-Wait for a work item to appear in the queue for specified time. */
-
-void*
-ib_wqueue_timedwait(
-/*================*/
- /* out: work item or NULL on timeout*/
- ib_wqueue_t* wq, /* in: work queue */
- ib_time_t wait_in_usecs); /* in: wait time in micro seconds */
-
-/********************************************************************
-Return first item on work queue or NULL if queue is empty
-@return work item or NULL */
-void*
-ib_wqueue_nowait(
-/*=============*/
- ib_wqueue_t* wq); /*<! in: work queue */
-
-
-/********************************************************************
-Get number of items on queue.
-@return number of items on queue */
-ulint
-ib_wqueue_len(
-/*==========*/
- ib_wqueue_t* wq); /*<! in: work queue */
-
-/* Work queue. */
-struct ib_wqueue_t {
- ib_mutex_t mutex; /*!< mutex protecting everything */
- ib_list_t* items; /*!< work item list */
- os_event_t event; /*!< event we use to signal additions to list;
- os_event_set() and os_event_reset() are
- protected by ib_wqueue_t::mutex */
-};
-
-#endif
diff --git a/storage/xtradb/lock/lock0iter.cc b/storage/xtradb/lock/lock0iter.cc
deleted file mode 100644
index b424d2fc757..00000000000
--- a/storage/xtradb/lock/lock0iter.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file lock/lock0iter.cc
-Lock queue iterator. Can iterate over table and record
-lock queues.
-
-Created July 16, 2007 Vasil Dimov
-*******************************************************/
-
-#define LOCK_MODULE_IMPLEMENTATION
-
-#include "univ.i"
-#include "lock0iter.h"
-#include "lock0lock.h"
-#include "lock0priv.h"
-#include "ut0dbg.h"
-#include "ut0lst.h"
-
-/*******************************************************************//**
-Initialize lock queue iterator so that it starts to iterate from
-"lock". bit_no specifies the record number within the heap where the
-record is stored. It can be undefined (ULINT_UNDEFINED) in two cases:
-1. If the lock is a table lock, thus we have a table lock queue;
-2. If the lock is a record lock and it is a wait lock. In this case
- bit_no is calculated in this function by using
- lock_rec_find_set_bit(). There is exactly one bit set in the bitmap
- of a wait lock. */
-UNIV_INTERN
-void
-lock_queue_iterator_reset(
-/*======================*/
- lock_queue_iterator_t* iter, /*!< out: iterator */
- const lock_t* lock, /*!< in: lock to start from */
- ulint bit_no) /*!< in: record number in the
- heap */
-{
- ut_ad(lock_mutex_own());
-
- iter->current_lock = lock;
-
- if (bit_no != ULINT_UNDEFINED) {
-
- iter->bit_no = bit_no;
- } else {
-
- switch (lock_get_type_low(lock)) {
- case LOCK_TABLE:
- iter->bit_no = ULINT_UNDEFINED;
- break;
- case LOCK_REC:
- iter->bit_no = lock_rec_find_set_bit(lock);
- ut_a(iter->bit_no != ULINT_UNDEFINED);
- break;
- default:
- ut_error;
- }
- }
-}
-
-/*******************************************************************//**
-Gets the previous lock in the lock queue, returns NULL if there are no
-more locks (i.e. the current lock is the first one). The iterator is
-receded (if not-NULL is returned).
-@return previous lock or NULL */
-UNIV_INTERN
-const lock_t*
-lock_queue_iterator_get_prev(
-/*=========================*/
- lock_queue_iterator_t* iter) /*!< in/out: iterator */
-{
- const lock_t* prev_lock;
-
- ut_ad(lock_mutex_own());
-
- switch (lock_get_type_low(iter->current_lock)) {
- case LOCK_REC:
- prev_lock = lock_rec_get_prev(
- iter->current_lock, iter->bit_no);
- break;
- case LOCK_TABLE:
- prev_lock = UT_LIST_GET_PREV(
- un_member.tab_lock.locks, iter->current_lock);
- break;
- default:
- ut_error;
- }
-
- if (prev_lock != NULL) {
-
- iter->current_lock = prev_lock;
- }
-
- return(prev_lock);
-}
diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc
deleted file mode 100644
index 71612f66fcd..00000000000
--- a/storage/xtradb/lock/lock0lock.cc
+++ /dev/null
@@ -1,8338 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file lock/lock0lock.cc
-The transaction lock system
-
-Created 5/7/1996 Heikki Tuuri
-*******************************************************/
-
-#define LOCK_MODULE_IMPLEMENTATION
-
-#include "lock0lock.h"
-#include "lock0priv.h"
-
-#ifdef UNIV_NONINL
-#include "lock0lock.ic"
-#include "lock0priv.ic"
-#endif
-
-#include "ha_prototypes.h"
-#include "usr0sess.h"
-#include "trx0purge.h"
-#include "dict0mem.h"
-#include "dict0boot.h"
-#include "trx0sys.h"
-#include "pars0pars.h" /* pars_complete_graph_for_exec() */
-#include "que0que.h" /* que_node_get_parent() */
-#include "row0mysql.h" /* row_mysql_handle_errors() */
-#include "row0sel.h" /* sel_node_create(), sel_node_t */
-#include "row0types.h" /* sel_node_t */
-#include "srv0mon.h"
-#include "ut0vec.h"
-#include "btr0btr.h"
-#include "dict0boot.h"
-#include <set>
-#include "mysql/plugin.h"
-
-#include <mysql/service_wsrep.h>
-
-#include <string>
-#include <sstream>
-
-/* Restricts the length of search we will do in the waits-for
-graph of transactions */
-#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000
-
-/* Restricts the search depth we will do in the waits-for graph of
-transactions */
-#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200
-
-/* When releasing transaction locks, this specifies how often we release
-the lock mutex for a moment to give also others access to it */
-
-#define LOCK_RELEASE_INTERVAL 1000
-
-/* Safety margin when creating a new record lock: this many extra records
-can be inserted to the page without need to create a lock with a bigger
-bitmap */
-
-#define LOCK_PAGE_BITMAP_MARGIN 64
-
-/** Lock scheduling algorithm */
-ulong innodb_lock_schedule_algorithm = INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS;
-
-/* An explicit record lock affects both the record and the gap before it.
-An implicit x-lock does not affect the gap, it only locks the index
-record from read or update.
-
-If a transaction has modified or inserted an index record, then
-it owns an implicit x-lock on the record. On a secondary index record,
-a transaction has an implicit x-lock also if it has modified the
-clustered index record, the max trx id of the page where the secondary
-index record resides is >= trx id of the transaction (or database recovery
-is running), and there are no explicit non-gap lock requests on the
-secondary index record.
-
-This complicated definition for a secondary index comes from the
-implementation: we want to be able to determine if a secondary index
-record has an implicit x-lock, just by looking at the present clustered
-index record, not at the historical versions of the record. The
-complicated definition can be explained to the user so that there is
-nondeterminism in the access path when a query is answered: we may,
-or may not, access the clustered index record and thus may, or may not,
-bump into an x-lock set there.
-
-Different transaction can have conflicting locks set on the gap at the
-same time. The locks on the gap are purely inhibitive: an insert cannot
-be made, or a select cursor may have to wait if a different transaction
-has a conflicting lock on the gap. An x-lock on the gap does not give
-the right to insert into the gap.
-
-An explicit lock can be placed on a user record or the supremum record of
-a page. The locks on the supremum record are always thought to be of the gap
-type, though the gap bit is not set. When we perform an update of a record
-where the size of the record changes, we may temporarily store its explicit
-locks on the infimum record of the page, though the infimum otherwise never
-carries locks.
-
-A waiting record lock can also be of the gap type. A waiting lock request
-can be granted when there is no conflicting mode lock request by another
-transaction ahead of it in the explicit lock queue.
-
-In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
-It only locks the record it is placed on, not the gap before the record.
-This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
-level.
-
--------------------------------------------------------------------------
-RULE 1: If there is an implicit x-lock on a record, and there are non-gap
--------
-lock requests waiting in the queue, then the transaction holding the implicit
-x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
-released, we can grant locks to waiting lock requests purely by looking at
-the explicit lock requests in the queue.
-
-RULE 3: Different transactions cannot have conflicting granted non-gap locks
--------
-on a record at the same time. However, they can have conflicting granted gap
-locks.
-RULE 4: If a there is a waiting lock request in a queue, no lock request,
--------
-gap or not, can be inserted ahead of it in the queue. In record deletes
-and page splits new gap type locks can be created by the database manager
-for a transaction, and without rule 4, the waits-for graph of transactions
-might become cyclic without the database noticing it, as the deadlock check
-is only performed when a transaction itself requests a lock!
--------------------------------------------------------------------------
-
-An insert is allowed to a gap if there are no explicit lock requests by
-other transactions on the next record. It does not matter if these lock
-requests are granted or waiting, gap bit set or not, with the exception
-that a gap type request set by another transaction to wait for
-its turn to do an insert is ignored. On the other hand, an
-implicit x-lock by another transaction does not prevent an insert, which
-allows for more concurrency when using an Oracle-style sequence number
-generator for the primary key with many transactions doing inserts
-concurrently.
-
-A modify of a record is allowed if the transaction has an x-lock on the
-record, or if other transactions do not have any non-gap lock requests on the
-record.
-
-A read of a single user record with a cursor is allowed if the transaction
-has a non-gap explicit, or an implicit lock on the record, or if the other
-transactions have no x-lock requests on the record. At a page supremum a
-read is always allowed.
-
-In summary, an implicit lock is seen as a granted x-lock only on the
-record, not on the gap. An explicit lock with no gap bit set is a lock
-both on the record and the gap. If the gap bit is set, the lock is only
-on the gap. Different transaction cannot own conflicting locks on the
-record at the same time, but they may own conflicting locks on the gap.
-Granted locks on a record give an access right to the record, but gap type
-locks just inhibit operations.
-
-NOTE: Finding out if some transaction has an implicit x-lock on a secondary
-index record can be cumbersome. We may have to look at previous versions of
-the corresponding clustered index record to find out if a delete marked
-secondary index record was delete marked by an active transaction, not by
-a committed one.
-
-FACT A: If a transaction has inserted a row, it can delete it any time
-without need to wait for locks.
-
-PROOF: The transaction has an implicit x-lock on every index record inserted
-for the row, and can thus modify each record without the need to wait. Q.E.D.
-
-FACT B: If a transaction has read some result set with a cursor, it can read
-it again, and retrieves the same result set, if it has not modified the
-result set in the meantime. Hence, there is no phantom problem. If the
-biggest record, in the alphabetical order, touched by the cursor is removed,
-a lock wait may occur, otherwise not.
-
-PROOF: When a read cursor proceeds, it sets an s-lock on each user record
-it passes, and a gap type s-lock on each page supremum. The cursor must
-wait until it has these locks granted. Then no other transaction can
-have a granted x-lock on any of the user records, and therefore cannot
-modify the user records. Neither can any other transaction insert into
-the gaps which were passed over by the cursor. Page splits and merges,
-and removal of obsolete versions of records do not affect this, because
-when a user record or a page supremum is removed, the next record inherits
-its locks as gap type locks, and therefore blocks inserts to the same gap.
-Also, if a page supremum is inserted, it inherits its locks from the successor
-record. When the cursor is positioned again at the start of the result set,
-the records it will touch on its course are either records it touched
-during the last pass or new inserted page supremums. It can immediately
-access all these records, and when it arrives at the biggest record, it
-notices that the result set is complete. If the biggest record was removed,
-lock wait can occur because the next record only inherits a gap type lock,
-and a wait may be needed. Q.E.D. */
-
-/* If an index record should be changed or a new inserted, we must check
-the lock on the record or the next. When a read cursor starts reading,
-we will set a record level s-lock on each record it passes, except on the
-initial record on which the cursor is positioned before we start to fetch
-records. Our index tree search has the convention that the B-tree
-cursor is positioned BEFORE the first possibly matching record in
-the search. Optimizations are possible here: if the record is searched
-on an equality condition to a unique key, we could actually set a special
-lock on the record, a lock which would not prevent any insert before
-this record. In the next key locking an x-lock set on a record also
-prevents inserts just before that record.
- There are special infimum and supremum records on each page.
-A supremum record can be locked by a read cursor. This records cannot be
-updated but the lock prevents insert of a user record to the end of
-the page.
- Next key locks will prevent the phantom problem where new rows
-could appear to SELECT result sets after the select operation has been
-performed. Prevention of phantoms ensures the serilizability of
-transactions.
- What should we check if an insert of a new record is wanted?
-Only the lock on the next record on the same page, because also the
-supremum record can carry a lock. An s-lock prevents insertion, but
-what about an x-lock? If it was set by a searched update, then there
-is implicitly an s-lock, too, and the insert should be prevented.
-What if our transaction owns an x-lock to the next record, but there is
-a waiting s-lock request on the next record? If this s-lock was placed
-by a read cursor moving in the ascending order in the index, we cannot
-do the insert immediately, because when we finally commit our transaction,
-the read cursor should see also the new inserted record. So we should
-move the read cursor backward from the next record for it to pass over
-the new inserted record. This move backward may be too cumbersome to
-implement. If we in this situation just enqueue a second x-lock request
-for our transaction on the next record, then the deadlock mechanism
-notices a deadlock between our transaction and the s-lock request
-transaction. This seems to be an ok solution.
- We could have the convention that granted explicit record locks,
-lock the corresponding records from changing, and also lock the gaps
-before them from inserting. A waiting explicit lock request locks the gap
-before from inserting. Implicit record x-locks, which we derive from the
-transaction id in the clustered index record, only lock the record itself
-from modification, not the gap before it from inserting.
- How should we store update locks? If the search is done by a unique
-key, we could just modify the record trx id. Otherwise, we could put a record
-x-lock on the record. If the update changes ordering fields of the
-clustered index record, the inserted new record needs no record lock in
-lock table, the trx id is enough. The same holds for a secondary index
-record. Searched delete is similar to update.
-
-PROBLEM:
-What about waiting lock requests? If a transaction is waiting to make an
-update to a record which another modified, how does the other transaction
-know to send the end-lock-wait signal to the waiting transaction? If we have
-the convention that a transaction may wait for just one lock at a time, how
-do we preserve it if lock wait ends?
-
-PROBLEM:
-Checking the trx id label of a secondary index record. In the case of a
-modification, not an insert, is this necessary? A secondary index record
-is modified only by setting or resetting its deleted flag. A secondary index
-record contains fields to uniquely determine the corresponding clustered
-index record. A secondary index record is therefore only modified if we
-also modify the clustered index record, and the trx id checking is done
-on the clustered index record, before we come to modify the secondary index
-record. So, in the case of delete marking or unmarking a secondary index
-record, we do not have to care about trx ids, only the locks in the lock
-table must be checked. In the case of a select from a secondary index, the
-trx id is relevant, and in this case we may have to search the clustered
-index record.
-
-PROBLEM: How to update record locks when page is split or merged, or
---------------------------------------------------------------------
-a record is deleted or updated?
-If the size of fields in a record changes, we perform the update by
-a delete followed by an insert. How can we retain the locks set or
-waiting on the record? Because a record lock is indexed in the bitmap
-by the heap number of the record, when we remove the record from the
-record list, it is possible still to keep the lock bits. If the page
-is reorganized, we could make a table of old and new heap numbers,
-and permute the bitmaps in the locks accordingly. We can add to the
-table a row telling where the updated record ended. If the update does
-not require a reorganization of the page, we can simply move the lock
-bits for the updated record to the position determined by its new heap
-number (we may have to allocate a new lock, if we run out of the bitmap
-in the old one).
- A more complicated case is the one where the reinsertion of the
-updated record is done pessimistically, because the structure of the
-tree may change.
-
-PROBLEM: If a supremum record is removed in a page merge, or a record
----------------------------------------------------------------------
-removed in a purge, what to do to the waiting lock requests? In a split to
-the right, we just move the lock requests to the new supremum. If a record
-is removed, we could move the waiting lock request to its inheritor, the
-next record in the index. But, the next record may already have lock
-requests on its own queue. A new deadlock check should be made then. Maybe
-it is easier just to release the waiting transactions. They can then enqueue
-new lock requests on appropriate records.
-
-PROBLEM: When a record is inserted, what locks should it inherit from the
--------------------------------------------------------------------------
-upper neighbor? An insert of a new supremum record in a page split is
-always possible, but an insert of a new user record requires that the upper
-neighbor does not have any lock requests by other transactions, granted or
-waiting, in its lock queue. Solution: We can copy the locks as gap type
-locks, so that also the waiting locks are transformed to granted gap type
-locks on the inserted record. */
-
-#define LOCK_STACK_SIZE OS_THREAD_MAX_N
-
-/* LOCK COMPATIBILITY MATRIX
- * IS IX S X AI
- * IS + + + - +
- * IX + + - - +
- * S + - + - -
- * X - - - - -
- * AI + + - - -
- *
- * Note that for rows, InnoDB only acquires S or X locks.
- * For tables, InnoDB normally acquires IS or IX locks.
- * S or X table locks are only acquired for LOCK TABLES.
- * Auto-increment (AI) locks are needed because of
- * statement-level MySQL binlog.
- * See also lock_mode_compatible().
- */
-static const byte lock_compatibility_matrix[5][5] = {
- /** IS IX S X AI */
- /* IS */ { TRUE, TRUE, TRUE, FALSE, TRUE},
- /* IX */ { TRUE, TRUE, FALSE, FALSE, TRUE},
- /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE},
- /* X */ { FALSE, FALSE, FALSE, FALSE, FALSE},
- /* AI */ { TRUE, TRUE, FALSE, FALSE, FALSE}
-};
-
-/* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
- * IS IX S X AI
- * IS + - - - -
- * IX + + - - -
- * S + - + - -
- * X + + + + +
- * AI - - - - +
- * See lock_mode_stronger_or_eq().
- */
-static const byte lock_strength_matrix[5][5] = {
- /** IS IX S X AI */
- /* IS */ { TRUE, FALSE, FALSE, FALSE, FALSE},
- /* IX */ { TRUE, TRUE, FALSE, FALSE, FALSE},
- /* S */ { TRUE, FALSE, TRUE, FALSE, FALSE},
- /* X */ { TRUE, TRUE, TRUE, TRUE, TRUE},
- /* AI */ { FALSE, FALSE, FALSE, FALSE, TRUE}
-};
-
-/** Deadlock check context. */
-struct lock_deadlock_ctx_t {
- const trx_t* start; /*!< Joining transaction that is
- requesting a lock in an incompatible
- mode */
-
- const lock_t* wait_lock; /*!< Lock that trx wants */
-
- ib_uint64_t mark_start; /*!< Value of lock_mark_count at
- the start of the deadlock check. */
-
- ulint depth; /*!< Stack depth */
-
- ulint cost; /*!< Calculation steps thus far */
-
- ibool too_deep; /*!< TRUE if search was too deep and
- was aborted */
-};
-
-/** DFS visited node information used during deadlock checking. */
-struct lock_stack_t {
- const lock_t* lock; /*!< Current lock */
- const lock_t* wait_lock; /*!< Waiting for lock */
- ulint heap_no; /*!< heap number if rec lock */
-};
-
-/*********************************************************************//**
-Checks if a waiting record lock request still has to wait in a queue.
-@return lock that is causing the wait */
-static
-const lock_t*
-lock_rec_has_to_wait_in_queue(
-/*==========================*/
- const lock_t* wait_lock); /*!< in: waiting record lock */
-
-/*************************************************************//**
-Grants a lock to a waiting lock request and releases the waiting transaction.
-The caller must hold lock_sys->mutex. */
-static
-void
-lock_grant(
-/*=======*/
- lock_t* lock, /*!< in/out: waiting lock request */
- bool owns_trx_mutex); /*!< in: whether lock->trx->mutex is owned */
-
-extern "C" void thd_report_wait_for(MYSQL_THD thd, MYSQL_THD other_thd);
-extern "C" int thd_need_wait_for(const MYSQL_THD thd);
-extern "C"
-int thd_need_ordering_with(const MYSQL_THD thd, const MYSQL_THD other_thd);
-
-extern "C"
-int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
-
-/** Stack to use during DFS search. Currently only a single stack is required
-because there is no parallel deadlock check. This stack is protected by
-the lock_sys_t::mutex. */
-static lock_stack_t* lock_stack;
-
-#ifdef UNIV_DEBUG
-/** The count of the types of locks. */
-static const ulint lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t lock_sys_mutex_key;
-/* Key to register mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t lock_sys_wait_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/* Buffer to collect THDs to report waits for. */
-struct thd_wait_reports {
- struct thd_wait_reports *next; /*!< List link */
- ulint used; /*!< How many elements in waitees[] */
- trx_t *waitees[64]; /*!< Trxs for thd_report_wait_for() */
-};
-
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool lock_print_waits = FALSE;
-
-/*********************************************************************//**
-Validates the lock system.
-@return TRUE if ok */
-static
-bool
-lock_validate();
-/*============*/
-
-/*********************************************************************//**
-Validates the record lock queues on a page.
-@return TRUE if ok */
-static
-ibool
-lock_rec_validate_page(
-/*===================*/
- const buf_block_t* block) /*!< in: buffer block */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-
-/* The lock system */
-UNIV_INTERN lock_sys_t* lock_sys = NULL;
-
-/** We store info on the latest deadlock error to this buffer. InnoDB
-Monitor will then fetch it and print */
-UNIV_INTERN ibool lock_deadlock_found = FALSE;
-/** Only created if !srv_read_only_mode */
-static FILE* lock_latest_err_file;
-
-/********************************************************************//**
-Checks if a joining lock request results in a deadlock. If a deadlock is
-found this function will resolve the dadlock by choosing a victim transaction
-and rolling it back. It will attempt to resolve all deadlocks. The returned
-transaction id will be the joining transaction id or 0 if some other
-transaction was chosen as a victim and rolled back or no deadlock found.
-
-@return id of transaction chosen as victim or 0 */
-static
-trx_id_t
-lock_deadlock_check_and_resolve(
-/*===========================*/
- const lock_t* lock, /*!< in: lock the transaction is requesting */
- const trx_t* trx); /*!< in: transaction */
-
-/*********************************************************************//**
-Gets the nth bit of a record lock.
-@return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
-UNIV_INLINE
-ibool
-lock_rec_get_nth_bit(
-/*=================*/
- const lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit */
-{
- const byte* b;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- if (i >= lock->un_member.rec_lock.n_bits) {
-
- return(FALSE);
- }
-
- b = ((const byte*) &lock[1]) + (i / 8);
-
- return(1 & *b >> (i % 8));
-}
-
-/*********************************************************************//**
-Reports that a transaction id is insensible, i.e., in the future. */
-UNIV_INTERN
-void
-lock_report_trx_id_insanity(
-/*========================*/
- trx_id_t trx_id, /*!< in: trx id */
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */
-{
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: transaction id associated with record\n",
- stderr);
- rec_print_new(stderr, rec, offsets);
- fputs("InnoDB: in ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fprintf(stderr, "\n"
- "InnoDB: is " TRX_ID_FMT " which is higher than the"
- " global trx id counter " TRX_ID_FMT "!\n"
- "InnoDB: The table is corrupt. You have to do"
- " dump + drop + reimport.\n",
- trx_id, max_trx_id);
-}
-
-/*********************************************************************//**
-Checks that a transaction id is sensible, i.e., not in the future.
-@return true if ok */
-#ifdef UNIV_DEBUG
-UNIV_INTERN
-#else
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-#endif
-bool
-lock_check_trx_id_sanity(
-/*=====================*/
- trx_id_t trx_id, /*!< in: trx id */
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
-{
- bool is_ok;
- trx_id_t max_trx_id;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- max_trx_id = trx_sys_get_max_trx_id();
- is_ok = trx_id < max_trx_id;
-
- if (UNIV_UNLIKELY(!is_ok)) {
- lock_report_trx_id_insanity(trx_id,
- rec, index, offsets, max_trx_id);
- }
-
- return(is_ok);
-}
-
-/*********************************************************************//**
-Checks that a record is seen in a consistent read.
-@return true if sees, or false if an earlier version of the record
-should be retrieved */
-UNIV_INTERN
-bool
-lock_clust_rec_cons_read_sees(
-/*==========================*/
- const rec_t* rec, /*!< in: user record which should be read or
- passed over by a read cursor */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- read_view_t* view) /*!< in: consistent read view */
-{
- trx_id_t trx_id;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- /* NOTE that we call this function while holding the search
- system latch. */
-
- trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- return(read_view_sees_trx_id(view, trx_id));
-}
-
-/*********************************************************************//**
-Checks that a non-clustered index record is seen in a consistent read.
-
-NOTE that a non-clustered index page contains so little information on
-its modifications that also in the case false, the present version of
-rec may be the right, but we must check this from the clustered index
-record.
-
-@return true if certainly sees, or false if an earlier version of the
-clustered index record might be needed */
-UNIV_INTERN
-bool
-lock_sec_rec_cons_read_sees(
-/*========================*/
- const rec_t* rec, /*!< in: user record which
- should be read or passed over
- by a read cursor */
- const read_view_t* view) /*!< in: consistent read view */
-{
- trx_id_t max_trx_id;
-
- ut_ad(page_rec_is_user_rec(rec));
-
- /* NOTE that we might call this function while holding the search
- system latch. */
-
- if (recv_recovery_is_on()) {
-
- return(false);
- }
-
- max_trx_id = page_get_max_trx_id(page_align(rec));
- ut_ad(max_trx_id);
-
- return(max_trx_id < view->up_limit_id);
-}
-
-/*********************************************************************//**
-Creates the lock system at database start. */
-UNIV_INTERN
-void
-lock_sys_create(
-/*============*/
- ulint n_cells) /*!< in: number of slots in lock hash table */
-{
- ulint lock_sys_sz;
-
- lock_sys_sz = sizeof(*lock_sys)
- + OS_THREAD_MAX_N * sizeof(srv_slot_t);
-
- lock_sys = static_cast<lock_sys_t*>(mem_zalloc(lock_sys_sz));
-
- lock_stack = static_cast<lock_stack_t*>(
- mem_zalloc(sizeof(*lock_stack) * LOCK_STACK_SIZE));
-
- void* ptr = &lock_sys[1];
-
- lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr);
-
- lock_sys->last_slot = lock_sys->waiting_threads;
-
- mutex_create(lock_sys_mutex_key, &lock_sys->mutex, SYNC_LOCK_SYS);
-
- mutex_create(lock_sys_wait_mutex_key,
- &lock_sys->wait_mutex, SYNC_LOCK_WAIT_SYS);
-
- lock_sys->timeout_event = os_event_create();
-
- lock_sys->rec_hash = hash_create(n_cells);
- lock_sys->rec_num = 0;
-
- if (!srv_read_only_mode) {
- lock_latest_err_file = os_file_create_tmpfile(NULL);
- ut_a(lock_latest_err_file);
- }
-}
-
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-UNIV_INTERN
-void
-lock_sys_close(void)
-/*================*/
-{
- if (lock_latest_err_file != NULL) {
- fclose(lock_latest_err_file);
- lock_latest_err_file = NULL;
- }
-
- hash_table_free(lock_sys->rec_hash);
-
- mutex_free(&lock_sys->mutex);
- mutex_free(&lock_sys->wait_mutex);
-
- os_event_free(lock_sys->timeout_event);
-
- for (srv_slot_t* slot = lock_sys->waiting_threads;
- slot < lock_sys->waiting_threads + OS_THREAD_MAX_N; slot++) {
-
- ut_ad(!slot->in_use);
- ut_ad(!slot->thr);
- if (slot->event != NULL)
- os_event_free(slot->event);
- }
-
- mem_free(lock_stack);
- mem_free(lock_sys);
-
- lock_sys = NULL;
- lock_stack = NULL;
-}
-
-/*********************************************************************//**
-Gets the size of a lock struct.
-@return size in bytes */
-UNIV_INTERN
-ulint
-lock_get_size(void)
-/*===============*/
-{
- return((ulint) sizeof(lock_t));
-}
-
-/*********************************************************************//**
-Gets the mode of a lock.
-@return mode */
-UNIV_INLINE
-enum lock_mode
-lock_get_mode(
-/*==========*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock);
-
- return(static_cast<enum lock_mode>(lock->type_mode & LOCK_MODE_MASK));
-}
-
-/*********************************************************************//**
-Gets the wait flag of a lock.
-@return LOCK_WAIT if waiting, 0 if not */
-UNIV_INLINE
-ulint
-lock_get_wait(
-/*==========*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock);
-
- return(lock->type_mode & LOCK_WAIT);
-}
-
-/*********************************************************************//**
-Gets the source table of an ALTER TABLE transaction. The table must be
-covered by an IX or IS table lock.
-@return the source table of transaction, if it is covered by an IX or
-IS table lock; dest if there is no source table, and NULL if the
-transaction is locking more than two tables or an inconsistency is
-found */
-UNIV_INTERN
-dict_table_t*
-lock_get_src_table(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* dest, /*!< in: destination of ALTER TABLE */
- enum lock_mode* mode) /*!< out: lock mode of the source table */
-{
- dict_table_t* src;
- lock_t* lock;
-
- ut_ad(!lock_mutex_own());
-
- src = NULL;
- *mode = LOCK_NONE;
-
- /* The trx mutex protects the trx_locks for our purposes.
- Other transactions could want to convert one of our implicit
- record locks to an explicit one. For that, they would need our
- trx mutex. Waiting locks can be removed while only holding
- lock_sys->mutex, but this is a running transaction and cannot
- thus be holding any waiting locks. */
- trx_mutex_enter(trx);
-
- for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
- lock != NULL;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
- lock_table_t* tab_lock;
- enum lock_mode lock_mode;
- if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
- /* We are only interested in table locks. */
- continue;
- }
- tab_lock = &lock->un_member.tab_lock;
- if (dest == tab_lock->table) {
- /* We are not interested in the destination table. */
- continue;
- } else if (!src) {
- /* This presumably is the source table. */
- src = tab_lock->table;
- if (UT_LIST_GET_LEN(src->locks) != 1
- || UT_LIST_GET_FIRST(src->locks) != lock) {
- /* We only support the case when
- there is only one lock on this table. */
- src = NULL;
- goto func_exit;
- }
- } else if (src != tab_lock->table) {
- /* The transaction is locking more than
- two tables (src and dest): abort */
- src = NULL;
- goto func_exit;
- }
-
- /* Check that the source table is locked by
- LOCK_IX or LOCK_IS. */
- lock_mode = lock_get_mode(lock);
- if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
- if (*mode != LOCK_NONE && *mode != lock_mode) {
- /* There are multiple locks on src. */
- src = NULL;
- goto func_exit;
- }
- *mode = lock_mode;
- }
- }
-
- if (!src) {
- /* No source table lock found: flag the situation to caller */
- src = dest;
- }
-
-func_exit:
- trx_mutex_exit(trx);
- return(src);
-}
-
-/*********************************************************************//**
-Determine if the given table is exclusively "owned" by the given
-transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
-on the table.
-@return TRUE if table is only locked by trx, with LOCK_IX, and
-possibly LOCK_AUTO_INC */
-UNIV_INTERN
-ibool
-lock_is_table_exclusive(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- const trx_t* trx) /*!< in: transaction */
-{
- const lock_t* lock;
- ibool ok = FALSE;
-
- ut_ad(table);
- ut_ad(trx);
-
- lock_mutex_enter();
-
- for (lock = UT_LIST_GET_FIRST(table->locks);
- lock != NULL;
- lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
- if (lock->trx != trx) {
- /* A lock on the table is held
- by some other transaction. */
- goto not_ok;
- }
-
- if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
- /* We are interested in table locks only. */
- continue;
- }
-
- switch (lock_get_mode(lock)) {
- case LOCK_IX:
- ok = TRUE;
- break;
- case LOCK_AUTO_INC:
- /* It is allowed for trx to hold an
- auto_increment lock. */
- break;
- default:
-not_ok:
- /* Other table locks than LOCK_IX are not allowed. */
- ok = FALSE;
- goto func_exit;
- }
- }
-
-func_exit:
- lock_mutex_exit();
-
- return(ok);
-}
-
-/*********************************************************************//**
-Sets the wait flag of a lock and the back pointer in trx to lock. */
-UNIV_INLINE
-void
-lock_set_lock_and_trx_wait(
-/*=======================*/
- lock_t* lock, /*!< in: lock */
- trx_t* trx) /*!< in/out: trx */
-{
- ut_ad(lock);
- ut_ad(lock->trx == trx);
- ut_ad(trx->lock.wait_lock == NULL);
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(trx));
-
- trx->lock.wait_lock = lock;
- lock->type_mode |= LOCK_WAIT;
-}
-
-/**********************************************************************//**
-The back pointer to a waiting lock request in the transaction is set to NULL
-and the wait bit in lock type_mode is reset. */
-UNIV_INLINE
-void
-lock_reset_lock_and_trx_wait(
-/*=========================*/
- lock_t* lock) /*!< in/out: record lock */
-{
- ut_ad(lock_get_wait(lock));
- ut_ad(lock_mutex_own());
-
- if (lock->trx->lock.wait_lock &&
- lock->trx->lock.wait_lock != lock) {
- const char* stmt=NULL;
- const char* stmt2=NULL;
- size_t stmt_len;
- trx_id_t trx_id = 0;
- stmt = lock->trx->mysql_thd
- ? innobase_get_stmt(lock->trx->mysql_thd, &stmt_len)
- : NULL;
-
- if (lock->trx->lock.wait_lock &&
- lock->trx->lock.wait_lock->trx) {
- trx_id = lock->trx->lock.wait_lock->trx->id;
- stmt2 = lock->trx->lock.wait_lock->trx->mysql_thd
- ? innobase_get_stmt(
- lock->trx->lock.wait_lock
- ->trx->mysql_thd, &stmt_len)
- : NULL;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Trx id " TRX_ID_FMT
- " is waiting a lock in statement %s"
- " for this trx id " TRX_ID_FMT
- " and statement %s wait_lock %p",
- lock->trx->id,
- stmt ? stmt : "NULL",
- trx_id,
- stmt2 ? stmt2 : "NULL",
- lock->trx->lock.wait_lock);
- ut_ad(lock->trx->lock.wait_lock == lock);
- }
-
- lock->trx->lock.wait_lock = NULL;
- lock->type_mode &= ~LOCK_WAIT;
-}
-
-/*********************************************************************//**
-Gets the gap flag of a record lock.
-@return LOCK_GAP or 0 */
-UNIV_INLINE
-ulint
-lock_rec_get_gap(
-/*=============*/
- const lock_t* lock) /*!< in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->type_mode & LOCK_GAP);
-}
-
-/*********************************************************************//**
-Gets the LOCK_REC_NOT_GAP flag of a record lock.
-@return LOCK_REC_NOT_GAP or 0 */
-UNIV_INLINE
-ulint
-lock_rec_get_rec_not_gap(
-/*=====================*/
- const lock_t* lock) /*!< in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->type_mode & LOCK_REC_NOT_GAP);
-}
-
-/*********************************************************************//**
-Gets the waiting insert flag of a record lock.
-@return LOCK_INSERT_INTENTION or 0 */
-UNIV_INLINE
-ulint
-lock_rec_get_insert_intention(
-/*==========================*/
- const lock_t* lock) /*!< in: record lock */
-{
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->type_mode & LOCK_INSERT_INTENTION);
-}
-
-/*********************************************************************//**
-Calculates if lock mode 1 is stronger or equal to lock mode 2.
-@return nonzero if mode1 stronger or equal to mode2 */
-UNIV_INLINE
-ulint
-lock_mode_stronger_or_eq(
-/*=====================*/
- enum lock_mode mode1, /*!< in: lock mode */
- enum lock_mode mode2) /*!< in: lock mode */
-{
- ut_ad((ulint) mode1 < lock_types);
- ut_ad((ulint) mode2 < lock_types);
-
- return(lock_strength_matrix[mode1][mode2]);
-}
-
-/*********************************************************************//**
-Calculates if lock mode 1 is compatible with lock mode 2.
-@return nonzero if mode1 compatible with mode2 */
-UNIV_INLINE
-ulint
-lock_mode_compatible(
-/*=================*/
- enum lock_mode mode1, /*!< in: lock mode */
- enum lock_mode mode2) /*!< in: lock mode */
-{
- ut_ad((ulint) mode1 < lock_types);
- ut_ad((ulint) mode2 < lock_types);
-
- return(lock_compatibility_matrix[mode1][mode2]);
-}
-
-/*********************************************************************//**
-Checks if a lock request for a new lock has to wait for request lock2.
-@return TRUE if new lock has to wait for lock2 to be removed */
-UNIV_INLINE
-ibool
-lock_rec_has_to_wait(
-/*=================*/
-#ifdef WITH_WSREP
- ibool for_locking, /*!< is caller locking or releasing */
-#endif /* WITH_WSREP */
- const trx_t* trx, /*!< in: trx of new lock */
- ulint type_mode,/*!< in: precise mode of the new lock
- to set: LOCK_S or LOCK_X, possibly
- ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
- LOCK_INSERT_INTENTION */
- const lock_t* lock2, /*!< in: another record lock; NOTE that
- it is assumed that this has a lock bit
- set on the same record as in the new
- lock we are setting */
- ibool lock_is_on_supremum) /*!< in: TRUE if we are setting the
- lock on the 'supremum' record of an
- index page: we know then that the lock
- request is really for a 'gap' type lock */
-{
- ut_ad(trx && lock2);
- ut_ad(lock_get_type_low(lock2) == LOCK_REC);
-
- if (trx != lock2->trx
- && !lock_mode_compatible(static_cast<enum lock_mode>(
- LOCK_MODE_MASK & type_mode),
- lock_get_mode(lock2))) {
-
- /* We have somewhat complex rules when gap type record locks
- cause waits */
-
- if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
- && !(type_mode & LOCK_INSERT_INTENTION)) {
-
- /* Gap type locks without LOCK_INSERT_INTENTION flag
- do not need to wait for anything. This is because
- different users can have conflicting lock types
- on gaps. */
-
- return(FALSE);
- }
-
- if (!(type_mode & LOCK_INSERT_INTENTION)
- && lock_rec_get_gap(lock2)) {
-
- /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
- does not need to wait for a gap type lock */
-
- return(FALSE);
- }
-
- if ((type_mode & LOCK_GAP)
- && lock_rec_get_rec_not_gap(lock2)) {
-
- /* Lock on gap does not need to wait for
- a LOCK_REC_NOT_GAP type lock */
-
- return(FALSE);
- }
-
- if (lock_rec_get_insert_intention(lock2)) {
-
- /* No lock request needs to wait for an insert
- intention lock to be removed. This is ok since our
- rules allow conflicting locks on gaps. This eliminates
- a spurious deadlock caused by a next-key lock waiting
- for an insert intention lock; when the insert
- intention lock was granted, the insert deadlocked on
- the waiting next-key lock.
-
- Also, insert intention locks do not disturb each
- other. */
-
- return(FALSE);
- }
-
- if ((type_mode & LOCK_GAP || lock_rec_get_gap(lock2)) &&
- !thd_need_ordering_with(trx->mysql_thd,
- lock2->trx->mysql_thd)) {
- /* If the upper server layer has already decided on the
- commit order between the transaction requesting the
- lock and the transaction owning the lock, we do not
- need to wait for gap locks. Such ordeering by the upper
- server layer happens in parallel replication, where the
- commit order is fixed to match the original order on the
- master.
-
- Such gap locks are mainly needed to get serialisability
- between transactions so that they will be binlogged in
- the correct order so that statement-based replication
- will give the correct results. Since the right order
- was already determined on the master, we do not need
- to enforce it again here.
-
- Skipping the locks is not essential for correctness,
- since in case of deadlock we will just kill the later
- transaction and retry it. But it can save some
- unnecessary rollbacks and retries. */
-
- return (FALSE);
- }
-
-#ifdef WITH_WSREP
- /* if BF thread is locking and has conflict with another BF
- thread, we need to look at trx ordering and lock types */
- if (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
- wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
-
- if (wsrep_debug) {
- fprintf(stderr,
- "BF-BF lock conflict, locking: %lu\n",
- for_locking);
- lock_rec_print(stderr, lock2);
- }
-
- if (wsrep_trx_order_before(trx->mysql_thd,
- lock2->trx->mysql_thd) &&
- (type_mode & LOCK_MODE_MASK) == LOCK_X &&
- (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X)
- {
- if (for_locking || wsrep_debug) {
- /* exclusive lock conflicts are not
- accepted */
- fprintf(stderr,
- "BF-BF X lock conflict,"
- "mode: %lu supremum: %lu\n",
- type_mode, lock_is_on_supremum);
- fprintf(stderr,
- "conflicts states: my %d locked %d\n",
- wsrep_thd_conflict_state(trx->mysql_thd, FALSE),
- wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE) );
- lock_rec_print(stderr, lock2);
- if (for_locking) return FALSE;
- //abort();
- }
- } else {
- /* if lock2->index->n_uniq <=
- lock2->index->n_user_defined_cols
- operation is on uniq index
- */
- if (wsrep_debug) fprintf(stderr,
- "BF conflict, modes: %lu %lu, "
- "idx: %s-%s n_uniq %u n_user %u\n",
- type_mode, lock2->type_mode,
- lock2->index->name,
- lock2->index->table_name,
- lock2->index->n_uniq,
- lock2->index->n_user_defined_cols);
- return FALSE;
- }
- }
-#endif /* WITH_WSREP */
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-UNIV_INTERN
-ibool
-lock_has_to_wait(
-/*=============*/
- const lock_t* lock1, /*!< in: waiting lock */
- const lock_t* lock2) /*!< in: another lock; NOTE that it is
- assumed that this has a lock bit set
- on the same record as in lock1 if the
- locks are record locks */
-{
- ut_ad(lock1 && lock2);
-
- if (lock1->trx != lock2->trx
- && !lock_mode_compatible(lock_get_mode(lock1),
- lock_get_mode(lock2))) {
- if (lock_get_type_low(lock1) == LOCK_REC) {
- ut_ad(lock_get_type_low(lock2) == LOCK_REC);
-
- /* If this lock request is for a supremum record
- then the second bit on the lock bitmap is set */
-
-#ifdef WITH_WSREP
- return(lock_rec_has_to_wait(FALSE, lock1->trx,
-#else
- return(lock_rec_has_to_wait(lock1->trx,
-#endif /* WITH_WSREP */
- lock1->type_mode, lock2,
- lock_rec_get_nth_bit(
- lock1, 1)));
- }
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*============== RECORD LOCK BASIC FUNCTIONS ============================*/
-
-/*********************************************************************//**
-Gets the number of bits in a record lock bitmap.
-@return number of bits */
-UNIV_INLINE
-ulint
-lock_rec_get_n_bits(
-/*================*/
- const lock_t* lock) /*!< in: record lock */
-{
- return(lock->un_member.rec_lock.n_bits);
-}
-
-/**********************************************************************//**
-Sets the nth bit of a record lock to TRUE. */
-UNIV_INLINE
-void
-lock_rec_set_nth_bit(
-/*=================*/
- lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit */
-{
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- ut_ad(i < lock->un_member.rec_lock.n_bits);
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- ((byte*) &lock[1])[byte_index] |= 1 << bit_index;
-}
-
-/**********************************************************************//**
-Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
-if none found.
-@return bit index == heap number of the record, or ULINT_UNDEFINED if
-none found */
-UNIV_INTERN
-ulint
-lock_rec_find_set_bit(
-/*==================*/
- const lock_t* lock) /*!< in: record lock with at least one bit set */
-{
- ulint i;
-
- for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
-
- if (lock_rec_get_nth_bit(lock, i)) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**********************************************************************//**
-Resets the nth bit of a record lock. */
-UNIV_INLINE
-void
-lock_rec_reset_nth_bit(
-/*===================*/
- lock_t* lock, /*!< in: record lock */
- ulint i) /*!< in: index of the bit which must be set to TRUE
- when this function is called */
-{
- ulint byte_index;
- ulint bit_index;
-
- ut_ad(lock);
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- ut_ad(i < lock->un_member.rec_lock.n_bits);
-
- byte_index = i / 8;
- bit_index = i % 8;
-
- ((byte*) &lock[1])[byte_index] &= ~(1 << bit_index);
-}
-
-/*********************************************************************//**
-Gets the first or next record lock on a page.
-@return next lock, NULL if none exists */
-UNIV_INLINE
-const lock_t*
-lock_rec_get_next_on_page_const(
-/*============================*/
- const lock_t* lock) /*!< in: a record lock */
-{
- ulint space;
- ulint page_no;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- for (;;) {
- lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock));
-
- if (!lock) {
-
- break;
- }
-
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the first or next record lock on a page.
-@return next lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next_on_page(
-/*======================*/
- lock_t* lock) /*!< in: a record lock */
-{
- return((lock_t*) lock_rec_get_next_on_page_const(lock));
-}
-
-/*********************************************************************//**
-Gets the first record lock on a page, where the page is identified by its
-file address.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page_addr(
-/*============================*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- for (lock = static_cast<lock_t*>(
- HASH_GET_FIRST(lock_sys->rec_hash,
- lock_rec_hash(space, page_no)));
- lock != NULL;
- lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
-
- if (lock->un_member.rec_lock.space == space
- && lock->un_member.rec_lock.page_no == page_no) {
-
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Determines if there are explicit record locks on a page.
-@return an explicit record lock on the page, or NULL if there are none */
-UNIV_INTERN
-lock_t*
-lock_rec_expl_exist_on_page(
-/*========================*/
- ulint space, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- lock_t* lock;
-
- lock_mutex_enter();
- lock = lock_rec_get_first_on_page_addr(space, page_no);
- lock_mutex_exit();
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the first record lock on a page, where the page is identified by a
-pointer to it.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first_on_page(
-/*=======================*/
- const buf_block_t* block) /*!< in: buffer block */
-{
- ulint hash;
- lock_t* lock;
- ulint space = buf_block_get_space(block);
- ulint page_no = buf_block_get_page_no(block);
-
- ut_ad(lock_mutex_own());
-
- hash = buf_block_get_lock_hash_val(block);
-
- for (lock = static_cast<lock_t*>(
- HASH_GET_FIRST( lock_sys->rec_hash, hash));
- lock != NULL;
- lock = static_cast<lock_t*>(HASH_GET_NEXT(hash, lock))) {
-
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)) {
-
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the next explicit lock request on a record.
-@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
-UNIV_INLINE
-lock_t*
-lock_rec_get_next(
-/*==============*/
- ulint heap_no,/*!< in: heap number of the record */
- lock_t* lock) /*!< in: lock */
-{
- ut_ad(lock_mutex_own());
-
- do {
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
- lock = lock_rec_get_next_on_page(lock);
- } while (lock && !lock_rec_get_nth_bit(lock, heap_no));
-
- return(lock);
-}
-
-/*********************************************************************//**
-Gets the next explicit lock request on a record.
-@return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
-UNIV_INLINE
-const lock_t*
-lock_rec_get_next_const(
-/*====================*/
- ulint heap_no,/*!< in: heap number of the record */
- const lock_t* lock) /*!< in: lock */
-{
- return(lock_rec_get_next(heap_no, (lock_t*) lock));
-}
-
-/*********************************************************************//**
-Gets the first explicit lock request on a record.
-@return first lock, NULL if none exists */
-UNIV_INLINE
-lock_t*
-lock_rec_get_first(
-/*===============*/
- const buf_block_t* block, /*!< in: block containing the record */
- ulint heap_no)/*!< in: heap number of the record */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- for (lock = lock_rec_get_first_on_page(block); lock;
- lock = lock_rec_get_next_on_page(lock)) {
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- break;
- }
- }
-
- return(lock);
-}
-
-/*********************************************************************//**
-Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
-pointer in the transaction! This function is used in lock object creation
-and resetting. */
-static
-void
-lock_rec_bitmap_reset(
-/*==================*/
- lock_t* lock) /*!< in: record lock */
-{
- ulint n_bytes;
-
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- /* Reset to zero the bitmap which resides immediately after the lock
- struct */
-
- n_bytes = lock_rec_get_n_bits(lock) / 8;
-
- ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
-
- memset(&lock[1], 0, n_bytes);
-}
-
-/*********************************************************************//**
-Copies a record lock to heap.
-@return copy of lock */
-static
-lock_t*
-lock_rec_copy(
-/*==========*/
- const lock_t* lock, /*!< in: record lock */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint size;
-
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
-
- return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
-}
-
-/*********************************************************************//**
-Gets the previous record lock set on a record.
-@return previous lock on the same record, NULL if none exists */
-UNIV_INTERN
-const lock_t*
-lock_rec_get_prev(
-/*==============*/
- const lock_t* in_lock,/*!< in: record lock */
- ulint heap_no)/*!< in: heap number of the record */
-{
- lock_t* lock;
- ulint space;
- ulint page_no;
- lock_t* found_lock = NULL;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- for (lock = lock_rec_get_first_on_page_addr(space, page_no);
- /* No op */;
- lock = lock_rec_get_next_on_page(lock)) {
-
- ut_ad(lock);
-
- if (lock == in_lock) {
-
- return(found_lock);
- }
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
-
- found_lock = lock;
- }
- }
-}
-
-/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/
-
-/*********************************************************************//**
-Checks if a transaction has the specified table lock, or stronger. This
-function should only be called by the thread that owns the transaction.
-@return lock or NULL */
-UNIV_INLINE
-const lock_t*
-lock_table_has(
-/*===========*/
- const trx_t* trx, /*!< in: transaction */
- const dict_table_t* table, /*!< in: table */
- enum lock_mode mode) /*!< in: lock mode */
-{
- lint i;
-
- if (ib_vector_is_empty(trx->lock.table_locks)) {
- return(NULL);
- }
-
- /* Look for stronger locks the same trx already has on the table */
-
- for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
- const lock_t* lock;
- enum lock_mode lock_mode;
-
- lock = *static_cast<const lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
-
- if (lock == NULL) {
- continue;
- }
-
- lock_mode = lock_get_mode(lock);
-
- ut_ad(trx == lock->trx);
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
- ut_ad(lock->un_member.tab_lock.table != NULL);
-
- if (table == lock->un_member.tab_lock.table
- && lock_mode_stronger_or_eq(lock_mode, mode)) {
-
- ut_ad(!lock_get_wait(lock));
-
- return(lock);
- }
- }
-
- return(NULL);
-}
-
-/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
-
-/*********************************************************************//**
-Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
-to precise_mode.
-@return lock or NULL */
-UNIV_INLINE
-lock_t*
-lock_rec_has_expl(
-/*==============*/
- ulint precise_mode,/*!< in: LOCK_S or LOCK_X
- possibly ORed to LOCK_GAP or
- LOCK_REC_NOT_GAP, for a
- supremum record we regard this
- always a gap type request */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- trx_id_t trx_id) /*!< in: transaction id */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
- ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
- || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
- ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
-
- for (lock = lock_rec_get_first(block, heap_no);
- lock != NULL;
- lock = lock_rec_get_next(heap_no, lock)) {
-
- if (lock->trx->id == trx_id
- && !lock_rec_get_insert_intention(lock)
- && lock_mode_stronger_or_eq(
- lock_get_mode(lock),
- static_cast<enum lock_mode>(
- precise_mode & LOCK_MODE_MASK))
- && !lock_get_wait(lock)
- && (!lock_rec_get_rec_not_gap(lock)
- || (precise_mode & LOCK_REC_NOT_GAP)
- || heap_no == PAGE_HEAP_NO_SUPREMUM)
- && (!lock_rec_get_gap(lock)
- || (precise_mode & LOCK_GAP)
- || heap_no == PAGE_HEAP_NO_SUPREMUM)) {
-
- return(lock);
- }
- }
-
- return(NULL);
-}
-
-#ifdef WITH_WSREP
-static
-void
-lock_rec_discard(lock_t* in_lock);
-#endif
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Checks if some other transaction has a lock request in the queue.
-@return lock or NULL */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const lock_t*
-lock_rec_other_has_expl_req(
-/*========================*/
- enum lock_mode mode, /*!< in: LOCK_S or LOCK_X */
- ulint gap, /*!< in: LOCK_GAP if also gap
- locks are taken into account,
- or 0 if not */
- ulint wait, /*!< in: LOCK_WAIT if also
- waiting locks are taken into
- account, or 0 if not */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- trx_id_t trx_id) /*!< in: transaction */
-{
- const lock_t* lock;
-
- ut_ad(lock_mutex_own());
- ut_ad(mode == LOCK_X || mode == LOCK_S);
- ut_ad(gap == 0 || gap == LOCK_GAP);
- ut_ad(wait == 0 || wait == LOCK_WAIT);
-
- for (lock = lock_rec_get_first(block, heap_no);
- lock != NULL;
- lock = lock_rec_get_next_const(heap_no, lock)) {
-
- if (lock->trx->id != trx_id
- && (gap
- || !(lock_rec_get_gap(lock)
- || heap_no == PAGE_HEAP_NO_SUPREMUM))
- && (wait || !lock_get_wait(lock))
- && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
-
- return(lock);
- }
- }
-
- return(NULL);
-}
-#endif /* UNIV_DEBUG */
-
-#ifdef WITH_WSREP
-static
-void
-wsrep_kill_victim(
- const trx_t * const trx,
- const lock_t *lock)
-{
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(lock->trx));
-
- /* quit for native mysql */
- if (!wsrep_on(trx->mysql_thd)) return;
-
- my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
- my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE);
-
- if ((bf_this && !bf_other) ||
- (bf_this && bf_other && wsrep_trx_order_before(
- trx->mysql_thd, lock->trx->mysql_thd))) {
-
- if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
- if (wsrep_debug) {
- fprintf(stderr, "WSREP: BF victim waiting\n");
- }
- /* cannot release lock, until our lock
- is in the queue*/
- } else if (lock->trx != trx) {
- if (wsrep_log_conflicts) {
- if (bf_this) {
- fputs("\n*** Priority TRANSACTION:\n",
- stderr);
- } else {
- fputs("\n*** Victim TRANSACTION:\n",
- stderr);
- }
-
- wsrep_trx_print_locking(stderr, trx, 3000);
-
- if (bf_other) {
- fputs("\n*** Priority TRANSACTION:\n",
- stderr);
- } else {
- fputs("\n*** Victim TRANSACTION:\n",
- stderr);
- }
- wsrep_trx_print_locking(stderr, lock->trx, 3000);
-
- fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
- stderr);
-
- if (lock_get_type(lock) == LOCK_REC) {
- lock_rec_print(stderr, lock);
- } else {
- lock_table_print(stderr, lock);
- }
- }
-
- lock->trx->abort_type = TRX_WSREP_ABORT;
- wsrep_innobase_kill_one_trx(trx->mysql_thd,
- (const trx_t*) trx, lock->trx, TRUE);
- lock->trx->abort_type = TRX_SERVER_ABORT;
- }
- }
-}
-#endif
-/*********************************************************************//**
-Checks if some other transaction has a conflicting explicit lock request
-in the queue, so that we have to wait.
-@return lock or NULL */
-static
-const lock_t*
-lock_rec_other_has_conflicting(
-/*===========================*/
- enum lock_mode mode, /*!< in: LOCK_S or LOCK_X,
- possibly ORed to LOCK_GAP or
- LOC_REC_NOT_GAP,
- LOCK_INSERT_INTENTION */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- const trx_t* trx) /*!< in: our transaction */
-{
- const lock_t* lock;
- ibool is_supremum;
-
- ut_ad(lock_mutex_own());
-
- is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
-
- for (lock = lock_rec_get_first(block, heap_no);
- lock != NULL;
- lock = lock_rec_get_next_const(heap_no, lock)) {
-
-#ifdef WITH_WSREP
- if (lock_rec_has_to_wait(TRUE, trx, mode, lock, is_supremum)) {
- if (wsrep_on(trx->mysql_thd)) {
- trx_mutex_enter(lock->trx);
- wsrep_kill_victim(trx, lock);
- trx_mutex_exit(lock->trx);
- }
-#else
- if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) {
-#endif /* WITH_WSREP */
-
- return(lock);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Looks for a suitable type record lock struct by the same trx on the same page.
-This can be used to save space when a new record lock should be set on a page:
-no new struct is needed, if a suitable old is found.
-@return lock or NULL */
-UNIV_INLINE
-lock_t*
-lock_rec_find_similar_on_page(
-/*==========================*/
- ulint type_mode, /*!< in: lock type_mode field */
- ulint heap_no, /*!< in: heap number of the record */
- lock_t* lock, /*!< in: lock_rec_get_first_on_page() */
- const trx_t* trx) /*!< in: transaction */
-{
- ut_ad(lock_mutex_own());
-
- for (/* No op */;
- lock != NULL;
- lock = lock_rec_get_next_on_page(lock)) {
-
- if (lock->trx == trx
- && lock->type_mode == type_mode
- && lock_rec_get_n_bits(lock) > heap_no) {
-
- return(lock);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Checks if some transaction has an implicit x-lock on a record in a secondary
-index.
-@return transaction id of the transaction which has the x-lock, or 0;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active(). */
-static
-trx_id_t
-lock_sec_rec_some_has_impl(
-/*=======================*/
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- trx_id_t trx_id;
- trx_id_t max_trx_id;
- const page_t* page = page_align(rec);
-
- ut_ad(!lock_mutex_own());
- ut_ad(!mutex_own(&trx_sys->mutex));
- ut_ad(!dict_index_is_clust(index));
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- max_trx_id = page_get_max_trx_id(page);
-
- /* Some transaction may have an implicit x-lock on the record only
- if the max trx id for the page >= min trx id for the trx list, or
- database recovery is running. We do not write the changes of a page
- max trx id to the log, and therefore during recovery, this value
- for a page may be incorrect. */
-
- if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
-
- trx_id = 0;
-
- } else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
-
- buf_page_print(page, 0, 0);
-
- /* The page is corrupt: try to avoid a crash by returning 0 */
- trx_id = 0;
-
- /* In this case it is possible that some transaction has an implicit
- x-lock. We have to look in the clustered index. */
-
- } else {
- trx_id = row_vers_impl_x_locked(rec, index, offsets);
- }
-
- return(trx_id);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Checks if some transaction, other than given trx_id, has an explicit
-lock on the given rec, in the given precise_mode.
-@return the transaction, whose id is not equal to trx_id, that has an
-explicit lock on the given rec, in the given precise_mode or NULL.*/
-static
-trx_t*
-lock_rec_other_trx_holds_expl(
-/*==========================*/
- ulint precise_mode, /*!< in: LOCK_S or LOCK_X
- possibly ORed to LOCK_GAP or
- LOCK_REC_NOT_GAP. */
- trx_id_t trx_id, /*!< in: trx holding implicit
- lock on rec */
- const rec_t* rec, /*!< in: user record */
- const buf_block_t* block) /*!< in: buffer block
- containing the record */
-{
- trx_t* holds = NULL;
-
- lock_mutex_enter();
- mutex_enter(&trx_sys->mutex);
-
- trx_id_t* impl_trx_desc = trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- trx_id);
- if (impl_trx_desc) {
- ut_ad(trx_id == *impl_trx_desc);
- ulint heap_no = page_rec_get_heap_no(rec);
- ulint rw_trx_count = trx_sys->descr_n_used;
- trx_id_t* rw_trx_snapshot = static_cast<trx_id_t *>
- (ut_malloc(sizeof(trx_id_t) * rw_trx_count));
- memcpy(rw_trx_snapshot, trx_sys->descriptors,
- sizeof(trx_id_t) * rw_trx_count);
-
- mutex_exit(&trx_sys->mutex);
-
- for (ulint i = 0; i < rw_trx_count; i++) {
-
- lock_t* expl_lock = lock_rec_has_expl(precise_mode,
- block, heap_no,
- rw_trx_snapshot[i]);
- if (expl_lock && expl_lock->trx->id != trx_id) {
- /* An explicit lock is held by trx other than
- the trx holding the implicit lock. */
- holds = expl_lock->trx;
- break;
- }
- }
-
- ut_free(rw_trx_snapshot);
-
- } else {
- mutex_exit(&trx_sys->mutex);
- }
-
- lock_mutex_exit();
-
- return(holds);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Return approximate number or record locks (bits set in the bitmap) for
-this transaction. Since delete-marked records may be removed, the
-record count will not be precise.
-The caller must be holding lock_sys->mutex. */
-UNIV_INTERN
-ulint
-lock_number_of_rows_locked(
-/*=======================*/
- const trx_lock_t* trx_lock) /*!< in: transaction locks */
-{
- const lock_t* lock;
- ulint n_records = 0;
-
- ut_ad(lock_mutex_own());
-
- for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
- lock != NULL;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
-
- if (lock_get_type_low(lock) == LOCK_REC) {
- ulint n_bit;
- ulint n_bits = lock_rec_get_n_bits(lock);
-
- for (n_bit = 0; n_bit < n_bits; n_bit++) {
- if (lock_rec_get_nth_bit(lock, n_bit)) {
- n_records++;
- }
- }
- }
- }
-
- return(n_records);
-}
-
-/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
-
-#ifdef WITH_WSREP
-static
-void
-wsrep_print_wait_locks(
-/*============*/
- lock_t* c_lock) /* conflicting lock to print */
-{
- if (wsrep_debug && c_lock->trx->lock.wait_lock != c_lock) {
- fprintf(stderr, "WSREP: c_lock != wait lock\n");
- if (lock_get_type_low(c_lock) & LOCK_TABLE)
- lock_table_print(stderr, c_lock);
- else
- lock_rec_print(stderr, c_lock);
-
- if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE)
- lock_table_print(stderr, c_lock->trx->lock.wait_lock);
- else
- lock_rec_print(stderr, c_lock->trx->lock.wait_lock);
- }
-}
-#endif /* WITH_WSREP */
-
-/*********************************************************************//**
-Check if lock1 has higher priority than lock2.
-NULL has lowest priority.
-If neither of them is wait lock, the first one has higher priority.
-If only one of them is a wait lock, it has lower priority.
-Otherwise, the one with an older transaction has higher priority.
-@returns true if lock1 has higher priority, false otherwise. */
-bool
-has_higher_priority(
- lock_t *lock1,
- lock_t *lock2)
-{
- if (lock1 == NULL) {
- return false;
- } else if (lock2 == NULL) {
- return true;
- }
- // No preference. Compre them by wait mode and trx age.
- if (!lock_get_wait(lock1)) {
- return true;
- } else if (!lock_get_wait(lock2)) {
- return false;
- }
- return lock1->trx->start_time_micro <= lock2->trx->start_time_micro;
-}
-
-/*********************************************************************//**
-Insert a lock to the hash list according to the mode (whether it is a wait
-lock) and the age of the transaction the it is associated with.
-If the lock is not a wait lock, insert it to the head of the hash list.
-Otherwise, insert it to the middle of the wait locks according to the age of
-the transaciton. */
-static
-dberr_t
-lock_rec_insert_by_trx_age(
- lock_t *in_lock) /*!< in: lock to be insert */{
- ulint space;
- ulint page_no;
- ulint rec_fold;
- lock_t* node;
- lock_t* next;
- hash_cell_t* cell;
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
- rec_fold = lock_rec_fold(space, page_no);
- cell = hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash));
-
- node = (lock_t *) cell->node;
- // If in_lock is not a wait lock, we insert it to the head of the list.
- if (node == NULL || !lock_get_wait(in_lock) || has_higher_priority(in_lock, node)) {
- cell->node = in_lock;
- in_lock->hash = node;
- if (lock_get_wait(in_lock)) {
- lock_grant(in_lock, true);
- return DB_SUCCESS_LOCKED_REC;
- }
- return DB_SUCCESS;
- }
- while (node != NULL && has_higher_priority((lock_t *) node->hash,
- in_lock)) {
- node = (lock_t *) node->hash;
- }
- next = (lock_t *) node->hash;
- node->hash = in_lock;
- in_lock->hash = next;
-
- if (lock_get_wait(in_lock) && !lock_rec_has_to_wait_in_queue(in_lock)) {
- lock_grant(in_lock, true);
- if (cell->node != in_lock) {
- // Move it to the front of the queue
- node->hash = in_lock->hash;
- next = (lock_t *) cell->node;
- cell->node = in_lock;
- in_lock->hash = next;
- }
- return DB_SUCCESS_LOCKED_REC;
- }
-
- return DB_SUCCESS;
-}
-
-static
-bool
-lock_queue_validate(
- const lock_t *in_lock) /*!< in: lock whose hash list is to be validated */
-{
- ulint space;
- ulint page_no;
- ulint rec_fold;
- hash_cell_t* cell;
- lock_t* next;
- bool wait_lock = false;
-
- if (in_lock == NULL) {
- return true;
- }
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
- rec_fold = lock_rec_fold(space, page_no);
- cell = hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash));
- next = (lock_t *) cell->node;
- while (next != NULL) {
- // If this is a granted lock, check that there's no wait lock before it.
- if (!lock_get_wait(next)) {
- ut_ad(!wait_lock);
- } else {
- wait_lock = true;
- }
- next = (lock_t *) next->hash;
- }
- return true;
-}
-
-static
-void
-lock_rec_insert_to_head(
- lock_t *in_lock, /*!< in: lock to be insert */
- ulint rec_fold) /*!< in: rec_fold of the page */
-{
- hash_cell_t* cell;
- lock_t* node;
-
- if (in_lock == NULL) {
- return;
- }
-
- cell = hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash));
- node = (lock_t *) cell->node;
- if (node != in_lock) {
- cell->node = in_lock;
- in_lock->hash = node;
- }
-}
-
-/*********************************************************************//**
-Creates a new record lock and inserts it to the lock queue. Does NOT check
-for deadlocks or lock compatibility!
-@return created lock */
-static
-lock_t*
-lock_rec_create(
-/*============*/
-#ifdef WITH_WSREP
- lock_t* const c_lock, /* conflicting lock */
- que_thr_t* thr,
-#endif
- ulint type_mode,/*!< in: lock mode and wait
- flag, type is ignored and
- replaced by LOCK_REC */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- dict_index_t* index, /*!< in: index of record */
- trx_t* trx, /*!< in/out: transaction */
- ibool caller_owns_trx_mutex)
- /*!< in: TRUE if caller owns
- trx mutex */
-{
- lock_t* lock;
- ulint page_no;
- ulint space;
- ulint rec_fold;
- ulint n_bits;
- ulint n_bytes;
- bool wait_lock;
- const page_t* page;
-
- ut_ad(lock_mutex_own());
- ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
- ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
-
- /* Non-locking autocommit read-only transactions should not set
- any locks. */
- assert_trx_in_list(trx);
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- page = block->frame;
-
- btr_assert_not_corrupted(block, index);
-
- /* If rec is the supremum record, then we reset the gap and
- LOCK_REC_NOT_GAP bits, as all locks on the supremum are
- automatically of the gap type */
-
- if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
- ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
-
- type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
- }
-
- wait_lock = type_mode & LOCK_WAIT;
-
- /* Make lock bitmap bigger by a safety margin */
- n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN;
- n_bytes = 1 + n_bits / 8;
-
- lock = static_cast<lock_t*>(
- mem_heap_alloc(trx->lock.lock_heap, sizeof(lock_t) + n_bytes));
-
- lock->trx = trx;
-
- lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC;
- lock->index = index;
-
- lock->un_member.rec_lock.space = space;
- lock->un_member.rec_lock.page_no = page_no;
- lock->un_member.rec_lock.n_bits = n_bytes * 8;
- rec_fold = lock_rec_fold(space, page_no);
-
- /* Reset to zero the bitmap which resides immediately after the
- lock struct */
-
- lock_rec_bitmap_reset(lock);
-
- /* Set the bit corresponding to rec */
- lock_rec_set_nth_bit(lock, heap_no);
-
- lock->requested_time = ut_time();
- lock->wait_time = 0;
-
- index->table->n_rec_locks++;
-
- ut_ad(index->table->n_ref_count > 0 || !index->table->can_be_evicted);
-
-#ifdef WITH_WSREP
- if (c_lock &&
- wsrep_on(trx->mysql_thd) &&
- wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- lock_t *hash = (lock_t *)c_lock->hash;
- lock_t *prev = NULL;
-
- while (hash &&
- wsrep_thd_is_BF(((lock_t *)hash)->trx->mysql_thd, TRUE) &&
- wsrep_trx_order_before(
- ((lock_t *)hash)->trx->mysql_thd,
- trx->mysql_thd)) {
- prev = hash;
- hash = (lock_t *)hash->hash;
- }
- lock->hash = hash;
- if (prev) {
- prev->hash = lock;
- } else {
- c_lock->hash = lock;
- }
- /*
- * delayed conflict resolution '...kill_one_trx' was not called,
- * if victim was waiting for some other lock
- */
- trx_mutex_enter(c_lock->trx);
- if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
- c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
-
- if (wsrep_debug) {
- wsrep_print_wait_locks(c_lock);
- }
-
- trx->lock.que_state = TRX_QUE_LOCK_WAIT;
- lock_set_lock_and_trx_wait(lock, trx);
- UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
-
- ut_ad(thr != NULL);
- trx->lock.wait_thr = thr;
- thr->state = QUE_THR_LOCK_WAIT;
-
- /* have to release trx mutex for the duration of
- victim lock release. This will eventually call
- lock_grant, which wants to grant trx mutex again
- */
- if (caller_owns_trx_mutex) {
- trx_mutex_exit(trx);
- }
- lock_cancel_waiting_and_release(
- c_lock->trx->lock.wait_lock);
-
- if (caller_owns_trx_mutex) {
- trx_mutex_enter(trx);
- }
-
- /* trx might not wait for c_lock, but some other lock
- does not matter if wait_lock was released above
- */
- if (c_lock->trx->lock.wait_lock == c_lock) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- trx_mutex_exit(c_lock->trx);
-
- if (wsrep_debug) {
- fprintf(
- stderr,
- "WSREP: c_lock canceled %llu\n",
- (ulonglong) c_lock->trx->id);
- }
-
- /* have to bail out here to avoid lock_set_lock... */
- return(lock);
- }
- trx_mutex_exit(c_lock->trx);
- } else if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
- && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
- if (wait_lock) {
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
- } else {
- lock_rec_insert_to_head(lock, rec_fold);
- }
- } else {
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
- }
-#else
- if (innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
- && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
- if (wait_lock) {
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
- } else {
- lock_rec_insert_to_head(lock, rec_fold);
- }
- } else {
- HASH_INSERT(lock_t, hash, lock_sys->rec_hash, rec_fold, lock);
- }
-#endif /* WITH_WSREP */
-
- lock_sys->rec_num++;
-
- if (!caller_owns_trx_mutex) {
- trx_mutex_enter(trx);
- }
- ut_ad(trx_mutex_own(trx));
-
- if (type_mode & LOCK_WAIT) {
- lock_set_lock_and_trx_wait(lock, trx);
- }
-
- UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
-
- if (!caller_owns_trx_mutex) {
- trx_mutex_exit(trx);
- }
-
- MONITOR_INC(MONITOR_RECLOCK_CREATED);
- MONITOR_INC(MONITOR_NUM_RECLOCK);
- return(lock);
-}
-
-/*********************************************************************//**
-Enqueues a waiting request for a lock which cannot be granted immediately.
-Checks for deadlocks.
-@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
-DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
-there was a deadlock, but another transaction was chosen as a victim,
-and we got the lock immediately: no need to wait then */
-static
-dberr_t
-lock_rec_enqueue_waiting(
-/*=====================*/
-#ifdef WITH_WSREP
- lock_t* c_lock, /* conflicting lock */
-#endif
- ulint type_mode,/*!< in: lock mode this
- transaction is requesting:
- LOCK_S or LOCK_X, possibly
- ORed with LOCK_GAP or
- LOCK_REC_NOT_GAP, ORed with
- LOCK_INSERT_INTENTION if this
- waiting lock request is set
- when performing an insert of
- an index record */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- lock_t* lock;
- trx_id_t victim_trx_id;
- ulint sec;
- ulint ms;
- ulint space;
- ulint page_no;
- dberr_t err;
-
-
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
- ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
-
- trx = thr_get_trx(thr);
-
- ut_ad(trx_mutex_own(trx));
-
- /* Test if there already is some other reason to suspend thread:
- we do not enqueue a lock request if the query thread should be
- stopped anyway */
-
- if (que_thr_stop(thr)) {
- ut_error;
-
- return(DB_QUE_THR_SUSPENDED);
- }
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- break;
- case TRX_DICT_OP_TABLE:
- case TRX_DICT_OP_INDEX:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: a record lock wait happens"
- " in a dictionary operation!\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs(".\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- stderr);
- ut_ad(0);
- }
-
- /* Enqueue the lock request that will wait to be granted, note that
- we already own the trx mutex. */
- lock = lock_rec_create(
-#ifdef WITH_WSREP
- c_lock, thr,
-#endif /* WITH_WSREP */
- type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
-
- /* Release the mutex to obey the latching order.
- This is safe, because lock_deadlock_check_and_resolve()
- is invoked when a lock wait is enqueued for the currently
- running transaction. Because trx is a running transaction
- (it is not currently suspended because of a lock wait),
- its state can only be changed by this thread, which is
- currently associated with the transaction. */
-
- trx_mutex_exit(trx);
-
- victim_trx_id = lock_deadlock_check_and_resolve(lock, trx);
-
- trx_mutex_enter(trx);
-
- if (victim_trx_id != 0) {
-
- ut_ad(victim_trx_id == trx->id);
-
- lock_reset_lock_and_trx_wait(lock);
- lock_rec_reset_nth_bit(lock, heap_no);
-
- return(DB_DEADLOCK);
-
- } else if (trx->lock.wait_lock == NULL) {
-
- /* If there was a deadlock but we chose another
- transaction as a victim, it is possible that we
- already have the lock now granted! */
-
- err = DB_SUCCESS_LOCKED_REC;
- } else {
- trx->lock.que_state = TRX_QUE_LOCK_WAIT;
-
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- trx->lock.wait_started = ut_time();
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- ut_usectime(&sec, &ms);
- trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
- }
-
- ut_a(que_thr_stop(thr));
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " in index ",
- trx->id);
- ut_print_name(stderr, trx, FALSE, index->name);
- }
-#endif /* UNIV_DEBUG */
-
- MONITOR_INC(MONITOR_LOCKREC_WAIT);
-
- trx->n_rec_lock_waits++;
-
- err = DB_LOCK_WAIT;
- }
-
- // Move it only when it does not cause a deadlock.
- if (err != DB_DEADLOCK
- && innodb_lock_schedule_algorithm
- == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS
- && !thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), lock);
- dberr_t res = lock_rec_insert_by_trx_age(lock);
- if (res != DB_SUCCESS) {
- return res;
- }
- }
-
- return err;
-}
-
-/*********************************************************************//**
-Adds a record lock request in the record queue. The request is normally
-added as the last in the queue, but if there are no waiting lock requests
-on the record, and the request to be added is not a waiting request, we
-can reuse a suitable record lock object already existing on the same page,
-just setting the appropriate bit in its bitmap. This is a low-level function
-which does NOT check for deadlocks or lock compatibility!
-@return lock where the bit was set */
-static
-lock_t*
-lock_rec_add_to_queue(
-/*==================*/
- ulint type_mode,/*!< in: lock mode, wait, gap
- etc. flags; type is ignored
- and replaced by LOCK_REC */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of the record */
- dict_index_t* index, /*!< in: index of record */
- trx_t* trx, /*!< in/out: transaction */
- ibool caller_owns_trx_mutex)
- /*!< in: TRUE if caller owns the
- transaction mutex */
-{
- lock_t* lock;
- lock_t* first_lock;
-
- ut_ad(lock_mutex_own());
- ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
- ut_ad(dict_index_is_clust(index)
- || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
-#ifdef UNIV_DEBUG
- switch (type_mode & LOCK_MODE_MASK) {
- case LOCK_X:
- case LOCK_S:
- break;
- default:
- ut_error;
- }
-
- if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
- enum lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
- ? LOCK_X
- : LOCK_S;
- const lock_t* other_lock
- = lock_rec_other_has_expl_req(mode, 0, LOCK_WAIT,
- block, heap_no, trx->id);
-#ifdef WITH_WSREP
- /* this can potentionally assert with wsrep */
- if (wsrep_thd_is_wsrep(trx->mysql_thd)) {
- if (wsrep_debug && other_lock) {
- fprintf(stderr,
- "WSREP: InnoDB assert ignored\n");
- }
- } else {
- ut_a(!other_lock);
- }
-#else
- ut_a(!other_lock);
-#endif /* WITH_WSREP */
- }
-#endif /* UNIV_DEBUG */
-
- type_mode |= LOCK_REC;
-
- /* If rec is the supremum record, then we can reset the gap bit, as
- all locks on the supremum are automatically of the gap type, and we
- try to avoid unnecessary memory consumption of a new record lock
- struct for a gap type lock */
-
- if (UNIV_UNLIKELY(heap_no == PAGE_HEAP_NO_SUPREMUM)) {
- ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
-
- /* There should never be LOCK_REC_NOT_GAP on a supremum
- record, but let us play safe */
-
- type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP);
- }
-
- /* Look for a waiting lock request on the same record or on a gap */
-
- for (first_lock = lock = lock_rec_get_first_on_page(block);
- lock != NULL;
- lock = lock_rec_get_next_on_page(lock)) {
-
- if (lock_get_wait(lock)
- && lock_rec_get_nth_bit(lock, heap_no)) {
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- if (wsrep_debug) {
- fprintf(stderr,
- "BF skipping wait: "
- TRX_ID_FMT "\n",
- trx->id);
- lock_rec_print(stderr, lock);
- }
- } else
-#endif
- goto somebody_waits;
- }
- }
-
- if (UNIV_LIKELY(!(type_mode & LOCK_WAIT))) {
-
- /* Look for a similar record lock on the same page:
- if one is found and there are no waiting lock requests,
- we can just set the bit */
-
- lock = lock_rec_find_similar_on_page(
- type_mode, heap_no, first_lock, trx);
-
- if (lock) {
-
- lock_rec_set_nth_bit(lock, heap_no);
-
- return(lock);
- }
- }
-
-somebody_waits:
-#ifdef WITH_WSREP
- return(lock_rec_create(NULL, NULL,
- type_mode, block, heap_no, index, trx,
- caller_owns_trx_mutex));
-#else
- return(lock_rec_create(
- type_mode, block, heap_no, index, trx,
- caller_owns_trx_mutex));
-#endif /* WITH_WSREP */
-}
-
-/** Record locking request status */
-enum lock_rec_req_status {
- /** Failed to acquire a lock */
- LOCK_REC_FAIL,
- /** Succeeded in acquiring a lock (implicit or already acquired) */
- LOCK_REC_SUCCESS,
- /** Explicitly created a new lock */
- LOCK_REC_SUCCESS_CREATED
-};
-
-/*********************************************************************//**
-This is a fast routine for locking a record in the most common cases:
-there are no explicit locks on the page, or there is just one lock, owned
-by this transaction, and of the right type_mode. This is a low-level function
-which does NOT look at implicit locks! Checks lock compatibility within
-explicit locks. This function sets a normal next-key lock, or in the case of
-a page supremum record, a gap type lock.
-@return whether the locking succeeded */
-UNIV_INLINE
-enum lock_rec_req_status
-lock_rec_lock_fast(
-/*===============*/
- ibool impl, /*!< in: if TRUE, no lock is set
- if no wait is necessary: we
- assume that the caller will
- set an implicit lock */
- ulint mode, /*!< in: lock mode: LOCK_X or
- LOCK_S possibly ORed to either
- LOCK_GAP or LOCK_REC_NOT_GAP */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
-{
- lock_t* lock;
- trx_t* trx;
- enum lock_rec_req_status status = LOCK_REC_SUCCESS;
-
- ut_ad(lock_mutex_own());
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
- ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
-
- DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
-
- lock = lock_rec_get_first_on_page(block);
-
- trx = thr_get_trx(thr);
-
- if (lock == NULL) {
- if (!impl) {
- /* Note that we don't own the trx mutex. */
-#ifdef WITH_WSREP
- lock = lock_rec_create(NULL, thr,
- mode, block, heap_no, index, trx, FALSE);
-#else
- lock = lock_rec_create(
- mode, block, heap_no, index, trx, FALSE);
-#endif
-
- }
- status = LOCK_REC_SUCCESS_CREATED;
- } else {
- trx_mutex_enter(trx);
-
- if (lock_rec_get_next_on_page(lock)
- || lock->trx != trx
- || lock->type_mode != (mode | LOCK_REC)
- || lock_rec_get_n_bits(lock) <= heap_no) {
-
- status = LOCK_REC_FAIL;
- } else if (!impl) {
- /* If the nth bit of the record lock is already set
- then we do not set a new lock bit, otherwise we do
- set */
- if (!lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_set_nth_bit(lock, heap_no);
- status = LOCK_REC_SUCCESS_CREATED;
- }
- }
-
- trx_mutex_exit(trx);
- }
-
- return(status);
-}
-
-/*********************************************************************//**
-This is the general, and slower, routine for locking a record. This is a
-low-level function which does NOT look at implicit locks! Checks lock
-compatibility within explicit locks. This function sets a normal next-key
-lock, or in the case of a page supremum record, a gap type lock.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-static
-dberr_t
-lock_rec_lock_slow(
-/*===============*/
- ibool impl, /*!< in: if TRUE, no lock is set
- if no wait is necessary: we
- assume that the caller will
- set an implicit lock */
- ulint mode, /*!< in: lock mode: LOCK_X or
- LOCK_S possibly ORed to either
- LOCK_GAP or LOCK_REC_NOT_GAP */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
-#ifdef WITH_WSREP
- lock_t* c_lock(NULL);
-#endif
- dberr_t err = DB_SUCCESS;
-
- ut_ad(lock_mutex_own());
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
- ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
-
- DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
-
- trx = thr_get_trx(thr);
- trx_mutex_enter(trx);
-
- if (lock_rec_has_expl(mode, block, heap_no, trx->id)) {
-
- /* The trx already has a strong enough lock on rec: do
- nothing */
-
-#ifdef WITH_WSREP
- } else if ((c_lock = (lock_t *)lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(mode),
- block, heap_no, trx))) {
-#else
- } else if (lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(mode),
- block, heap_no, trx)) {
-#endif /* WITH_WSREP */
-
- /* If another transaction has a non-gap conflicting
- request in the queue, as this transaction does not
- have a lock strong enough already granted on the
- record, we have to wait. */
-
-#ifdef WITH_WSREP
- /* c_lock is NULL here if jump to enqueue_waiting happened
- but it's ok because lock is not NULL in that case and c_lock
- is not used. */
- err = lock_rec_enqueue_waiting(c_lock,
- mode, block, heap_no, index, thr);
-#else
- err = lock_rec_enqueue_waiting(
- mode, block, heap_no, index, thr);
-#endif /* WITH_WSREP */
-
- } else if (!impl) {
- /* Set the requested lock on the record, note that
- we already own the transaction mutex. */
-
- lock_rec_add_to_queue(
- LOCK_REC | mode, block, heap_no, index, trx, TRUE);
-
- err = DB_SUCCESS_LOCKED_REC;
- }
-
- trx_mutex_exit(trx);
-
- return(err);
-}
-
-/*********************************************************************//**
-Tries to lock the specified record in the mode requested. If not immediately
-possible, enqueues a waiting lock request. This is a low-level function
-which does NOT look at implicit locks! Checks lock compatibility within
-explicit locks. This function sets a normal next-key lock, or in the case
-of a page supremum record, a gap type lock.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-static
-dberr_t
-lock_rec_lock(
-/*==========*/
- ibool impl, /*!< in: if TRUE, no lock is set
- if no wait is necessary: we
- assume that the caller will
- set an implicit lock */
- ulint mode, /*!< in: lock mode: LOCK_X or
- LOCK_S possibly ORed to either
- LOCK_GAP or LOCK_REC_NOT_GAP */
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no,/*!< in: heap number of record */
- dict_index_t* index, /*!< in: index of record */
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad(lock_mutex_own());
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
- ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
- || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
- ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
- || (LOCK_MODE_MASK & mode) == LOCK_X);
- ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
- || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
- || mode - (LOCK_MODE_MASK & mode) == 0);
-
- ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
-
- /* We try a simplified and faster subroutine for the most
- common cases */
- switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
- case LOCK_REC_SUCCESS:
- return(DB_SUCCESS);
- case LOCK_REC_SUCCESS_CREATED:
- return(DB_SUCCESS_LOCKED_REC);
- case LOCK_REC_FAIL:
- return(lock_rec_lock_slow(impl, mode, block,
- heap_no, index, thr));
- }
-
- ut_error;
- return(DB_ERROR);
-}
-
-/*********************************************************************//**
-Checks if a waiting record lock request still has to wait in a queue.
-@return lock that is causing the wait */
-static
-const lock_t*
-lock_rec_has_to_wait_in_queue(
-/*==========================*/
- const lock_t* wait_lock) /*!< in: waiting record lock */
-{
- const lock_t* lock;
- ulint space;
- ulint page_no;
- ulint heap_no;
- ulint bit_mask;
- ulint bit_offset;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_wait(wait_lock));
- ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
-
- space = wait_lock->un_member.rec_lock.space;
- page_no = wait_lock->un_member.rec_lock.page_no;
- heap_no = lock_rec_find_set_bit(wait_lock);
-
- bit_offset = heap_no / 8;
- bit_mask = static_cast<ulint>(1 << (heap_no % 8));
-
- for (lock = lock_rec_get_first_on_page_addr(space, page_no);
- lock != wait_lock;
- lock = lock_rec_get_next_on_page_const(lock)) {
-
- const byte* p = (const byte*) &lock[1];
-
- if (heap_no < lock_rec_get_n_bits(lock)
- && (p[bit_offset] & bit_mask)
- && lock_has_to_wait(wait_lock, lock)) {
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) &&
- wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) {
- /* don't wait for another BF lock */
- continue;
- }
-#endif
- return(lock);
- }
- }
-
- return(NULL);
-}
-
-/*************************************************************//**
-Grants a lock to a waiting lock request and releases the waiting transaction.
-The caller must hold lock_sys->mutex but not lock->trx->mutex. */
-static
-void
-lock_grant(
-/*=======*/
- lock_t* lock, /*!< in/out: waiting lock request */
- bool owns_trx_mutex) /*!< in: whether lock->trx->mutex is owned */
-{
- ut_ad(lock_mutex_own());
-
- lock_reset_lock_and_trx_wait(lock);
-
- if (!owns_trx_mutex) {
- trx_mutex_enter(lock->trx);
- }
-
- if (lock_get_mode(lock) == LOCK_AUTO_INC) {
- dict_table_t* table = lock->un_member.tab_lock.table;
-
- if (UNIV_UNLIKELY(table->autoinc_trx == lock->trx)) {
- fprintf(stderr,
- "InnoDB: Error: trx already had"
- " an AUTO-INC lock!\n");
- } else {
- table->autoinc_trx = lock->trx;
-
- ib_vector_push(lock->trx->autoinc_locks, &lock);
- }
- }
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fprintf(stderr, "Lock wait for trx " TRX_ID_FMT " ends\n",
- lock->trx->id);
- }
-#endif /* UNIV_DEBUG */
-
- /* If we are resolving a deadlock by choosing another transaction
- as a victim, then our original transaction may not be in the
- TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
- for it */
-
- if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
- que_thr_t* thr;
-
- thr = que_thr_end_lock_wait(lock->trx);
-
- if (thr != NULL) {
- lock_wait_release_thread_if_suspended(thr);
- }
- }
-
- /* Cumulate total lock wait time for statistics */
- if (lock_get_type_low(lock) & LOCK_TABLE) {
- lock->trx->total_table_lock_wait_time +=
- (ulint)difftime(ut_time(), lock->trx->lock.wait_started);
- } else {
- lock->trx->total_rec_lock_wait_time +=
- (ulint)difftime(ut_time(), lock->trx->lock.wait_started);
- }
-
- lock->wait_time = (ulint)difftime(ut_time(), lock->requested_time);
-
- if (!owns_trx_mutex) {
- trx_mutex_exit(lock->trx);
- }
-}
-
-/*************************************************************//**
-Cancels a waiting record lock request and releases the waiting transaction
-that requested it. NOTE: does NOT check if waiting lock requests behind this
-one can now be granted! */
-static
-void
-lock_rec_cancel(
-/*============*/
- lock_t* lock) /*!< in: waiting record lock request */
-{
- que_thr_t* thr;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_type_low(lock) == LOCK_REC);
-
- /* Reset the bit (there can be only one set bit) in the lock bitmap */
- lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
-
- /* Reset the wait flag and the back pointer to lock in trx */
-
- lock_reset_lock_and_trx_wait(lock);
-
- /* The following function releases the trx from lock wait */
-
- trx_mutex_enter(lock->trx);
-
- thr = que_thr_end_lock_wait(lock->trx);
-
- if (thr != NULL) {
- lock_wait_release_thread_if_suspended(thr);
- }
-
- trx_mutex_exit(lock->trx);
-}
-
-static
-void
-lock_grant_and_move_on_page(
- ulint space,
- ulint page_no)
-{
- lock_t* lock;
- lock_t* next;
- lock_t* previous;
- ulint rec_fold = lock_rec_fold(space, page_no);
-
- previous = (lock_t *) hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash))->node;
- if (previous == NULL) {
- return;
- }
- if (previous->un_member.rec_lock.space == space &&
- previous->un_member.rec_lock.page_no == page_no) {
- lock = previous;
- }
- else {
- next = (lock_t *) previous->hash;
- while (next &&
- (next->un_member.rec_lock.space != space ||
- next->un_member.rec_lock.page_no != page_no)) {
- previous = next;
- next = (lock_t *) previous->hash;
- }
- lock = (lock_t *) previous->hash;
- }
-
- ut_ad(previous->hash == lock || previous == lock);
- /* Grant locks if there are no conflicting locks ahead.
- Move granted locks to the head of the list. */
- for (;lock != NULL;) {
- /* If the lock is a wait lock on this page, and it does not need to wait. */
- if ((lock->un_member.rec_lock.space == space)
- && (lock->un_member.rec_lock.page_no == page_no)
- && lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- lock_grant(lock, false);
-
- if (previous != NULL) {
- /* Move the lock to the head of the list. */
- HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
- lock_rec_insert_to_head(lock, rec_fold);
- } else {
- /* Already at the head of the list. */
- previous = lock;
- }
- /* Move on to the next lock. */
- lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
- } else {
- previous = lock;
- lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
- }
- }
-}
-
-/*************************************************************//**
-Removes a record lock request, waiting or granted, from the queue and
-grants locks to other transactions in the queue if they now are entitled
-to a lock. NOTE: all record locks contained in in_lock are removed. */
-static
-void
-lock_rec_dequeue_from_page(
-/*=======================*/
- lock_t* in_lock) /*!< in: record lock object: all
- record locks which are contained in
- this lock object are removed;
- transactions waiting behind will
- get their lock requests granted,
- if they are now qualified to it */
-{
- ulint space;
- ulint page_no;
- lock_t* lock;
- trx_lock_t* trx_lock;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
- /* We may or may not be holding in_lock->trx->mutex here. */
-
- trx_lock = &in_lock->trx->lock;
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- in_lock->index->table->n_rec_locks--;
-
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), in_lock);
- lock_sys->rec_num--;
-
- UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock);
-
- MONITOR_INC(MONITOR_RECLOCK_REMOVED);
- MONITOR_DEC(MONITOR_NUM_RECLOCK);
-
- if (innodb_lock_schedule_algorithm
- == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
- thd_is_replication_slave_thread(in_lock->trx->mysql_thd)) {
- /* Check if waiting locks in the queue can now be granted: grant
- locks if there are no conflicting locks ahead. Stop at the first
- X lock that is waiting or has been granted. */
-
- for (lock = lock_rec_get_first_on_page_addr(space, page_no);
- lock != NULL;
- lock = lock_rec_get_next_on_page(lock)) {
-
- if (lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- ut_ad(lock->trx != in_lock->trx);
- lock_grant(lock, false);
- }
- }
- } else {
- lock_grant_and_move_on_page(space, page_no);
- }
-}
-
-/*************************************************************//**
-Removes a record lock request, waiting or granted, from the queue. */
-static
-void
-lock_rec_discard(
-/*=============*/
- lock_t* in_lock) /*!< in: record lock object: all
- record locks which are contained
- in this lock object are removed */
-{
- ulint space;
- ulint page_no;
- trx_lock_t* trx_lock;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
-
- trx_lock = &in_lock->trx->lock;
-
- space = in_lock->un_member.rec_lock.space;
- page_no = in_lock->un_member.rec_lock.page_no;
-
- in_lock->index->table->n_rec_locks--;
-
- HASH_DELETE(lock_t, hash, lock_sys->rec_hash,
- lock_rec_fold(space, page_no), in_lock);
- lock_sys->rec_num--;
-
- UT_LIST_REMOVE(trx_locks, trx_lock->trx_locks, in_lock);
-
- MONITOR_INC(MONITOR_RECLOCK_REMOVED);
- MONITOR_DEC(MONITOR_NUM_RECLOCK);
-}
-
-/*************************************************************//**
-Removes record lock objects set on an index page which is discarded. This
-function does not move locks, or check for waiting locks, therefore the
-lock bitmaps must already be reset when this function is called. */
-static
-void
-lock_rec_free_all_from_discard_page(
-/*================================*/
- const buf_block_t* block) /*!< in: page to be discarded */
-{
- ulint space;
- ulint page_no;
- lock_t* lock;
- lock_t* next_lock;
-
- ut_ad(lock_mutex_own());
-
- space = buf_block_get_space(block);
- page_no = buf_block_get_page_no(block);
-
- lock = lock_rec_get_first_on_page_addr(space, page_no);
-
- while (lock != NULL) {
- ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
- ut_ad(!lock_get_wait(lock));
-
- next_lock = lock_rec_get_next_on_page(lock);
-
- lock_rec_discard(lock);
-
- lock = next_lock;
- }
-}
-
-/*============= RECORD LOCK MOVING AND INHERITING ===================*/
-
-/*************************************************************//**
-Resets the lock bits for a single record. Releases transactions waiting for
-lock requests here. */
-static
-void
-lock_rec_reset_and_release_wait(
-/*============================*/
- const buf_block_t* block, /*!< in: buffer block containing
- the record */
- ulint heap_no)/*!< in: heap number of record */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- for (lock = lock_rec_get_first(block, heap_no);
- lock != NULL;
- lock = lock_rec_get_next(heap_no, lock)) {
-
- if (lock_get_wait(lock)) {
- lock_rec_cancel(lock);
- } else {
- lock_rec_reset_nth_bit(lock, heap_no);
- }
- }
-}
-
-/*************************************************************//**
-Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
-of another record as gap type locks, but does not reset the lock bits of
-the other record. Also waiting lock requests on rec are inherited as
-GRANTED gap locks. */
-static
-void
-lock_rec_inherit_to_gap(
-/*====================*/
- const buf_block_t* heir_block, /*!< in: block containing the
- record which inherits */
- const buf_block_t* block, /*!< in: block containing the
- record from which inherited;
- does NOT reset the locks on
- this record */
- ulint heir_heap_no, /*!< in: heap_no of the
- inheriting record */
- ulint heap_no) /*!< in: heap_no of the
- donating record */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- /* If srv_locks_unsafe_for_binlog is TRUE or session is using
- READ COMMITTED isolation level, we do not want locks set
- by an UPDATE or a DELETE to be inherited as gap type locks. But we
- DO want S-locks/X-locks(taken for replace) set by a consistency
- constraint to be inherited also then */
-
- for (lock = lock_rec_get_first(block, heap_no);
- lock != NULL;
- lock = lock_rec_get_next(heap_no, lock)) {
-
- if (!lock_rec_get_insert_intention(lock)
- && !((srv_locks_unsafe_for_binlog
- || lock->trx->isolation_level
- <= TRX_ISO_READ_COMMITTED)
- && lock_get_mode(lock) ==
- (lock->trx->duplicates ? LOCK_S : LOCK_X))) {
-
- lock_rec_add_to_queue(
- LOCK_REC | LOCK_GAP | lock_get_mode(lock),
- heir_block, heir_heap_no, lock->index,
- lock->trx, FALSE);
- }
- }
-}
-
-/*************************************************************//**
-Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
-of another record as gap type locks, but does not reset the lock bits of the
-other record. Also waiting lock requests are inherited as GRANTED gap locks. */
-static
-void
-lock_rec_inherit_to_gap_if_gap_lock(
-/*================================*/
- const buf_block_t* block, /*!< in: buffer block */
- ulint heir_heap_no, /*!< in: heap_no of
- record which inherits */
- ulint heap_no) /*!< in: heap_no of record
- from which inherited;
- does NOT reset the locks
- on this record */
-{
- lock_t* lock;
-
- lock_mutex_enter();
-
- for (lock = lock_rec_get_first(block, heap_no);
- lock != NULL;
- lock = lock_rec_get_next(heap_no, lock)) {
-
- if (!lock_rec_get_insert_intention(lock)
- && (heap_no == PAGE_HEAP_NO_SUPREMUM
- || !lock_rec_get_rec_not_gap(lock))) {
-
- lock_rec_add_to_queue(
- LOCK_REC | LOCK_GAP | lock_get_mode(lock),
- block, heir_heap_no, lock->index,
- lock->trx, FALSE);
- }
- }
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Moves the locks of a record to another record and resets the lock bits of
-the donating record. */
-static
-void
-lock_rec_move(
-/*==========*/
- const buf_block_t* receiver, /*!< in: buffer block containing
- the receiving record */
- const buf_block_t* donator, /*!< in: buffer block containing
- the donating record */
- ulint receiver_heap_no,/*!< in: heap_no of the record
- which gets the locks; there
- must be no lock requests
- on it! */
- ulint donator_heap_no)/*!< in: heap_no of the record
- which gives the locks */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- ut_ad(lock_rec_get_first(receiver, receiver_heap_no) == NULL);
-
- for (lock = lock_rec_get_first(donator, donator_heap_no);
- lock != NULL;
- lock = lock_rec_get_next(donator_heap_no, lock)) {
-
- const ulint type_mode = lock->type_mode;
-
- lock_rec_reset_nth_bit(lock, donator_heap_no);
-
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- /* Note that we FIRST reset the bit, and then set the lock:
- the function works also if donator == receiver */
-
- lock_rec_add_to_queue(
- type_mode, receiver, receiver_heap_no,
- lock->index, lock->trx, FALSE);
- }
-
- ut_ad(lock_rec_get_first(donator, donator_heap_no) == NULL);
-}
-
-/*************************************************************//**
-Updates the lock table when we have reorganized a page. NOTE: we copy
-also the locks set on the infimum of the page; the infimum may carry
-locks if an update of a record is occurring on the page, and its locks
-were temporarily stored on the infimum. */
-UNIV_INTERN
-void
-lock_move_reorganize_page(
-/*======================*/
- const buf_block_t* block, /*!< in: old index page, now
- reorganized */
- const buf_block_t* oblock) /*!< in: copy of the old, not
- reorganized page */
-{
- lock_t* lock;
- UT_LIST_BASE_NODE_T(lock_t) old_locks;
- mem_heap_t* heap = NULL;
- ulint comp;
-
- lock_mutex_enter();
-
- lock = lock_rec_get_first_on_page(block);
-
- if (lock == NULL) {
- lock_mutex_exit();
-
- return;
- }
-
- heap = mem_heap_create(256);
-
- /* Copy first all the locks on the page to heap and reset the
- bitmaps in the original locks; chain the copies of the locks
- using the trx_locks field in them. */
-
- UT_LIST_INIT(old_locks);
-
- do {
- /* Make a copy of the lock */
- lock_t* old_lock = lock_rec_copy(lock, heap);
-
- UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock);
-
- /* Reset bitmap of lock */
- lock_rec_bitmap_reset(lock);
-
- if (lock_get_wait(lock)) {
-
- lock_reset_lock_and_trx_wait(lock);
- }
-
- lock = lock_rec_get_next_on_page(lock);
- } while (lock != NULL);
-
- comp = page_is_comp(block->frame);
- ut_ad(comp == page_is_comp(oblock->frame));
-
- for (lock = UT_LIST_GET_FIRST(old_locks); lock;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
- /* NOTE: we copy also the locks set on the infimum and
- supremum of the page; the infimum may carry locks if an
- update of a record is occurring on the page, and its locks
- were temporarily stored on the infimum */
- page_cur_t cur1;
- page_cur_t cur2;
-
- page_cur_set_before_first(block, &cur1);
- page_cur_set_before_first(oblock, &cur2);
-
- /* Set locks according to old locks */
- for (;;) {
- ulint old_heap_no;
- ulint new_heap_no;
-
- ut_ad(comp || !memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- if (UNIV_LIKELY(comp)) {
- old_heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- new_heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
- } else {
- old_heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
- new_heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- }
-
- if (lock_rec_get_nth_bit(lock, old_heap_no)) {
-
- /* Clear the bit in old_lock. */
- ut_d(lock_rec_reset_nth_bit(lock,
- old_heap_no));
-
- /* NOTE that the old lock bitmap could be too
- small for the new heap number! */
-
- lock_rec_add_to_queue(
- lock->type_mode, block, new_heap_no,
- lock->index, lock->trx, FALSE);
-
- /* if (new_heap_no == PAGE_HEAP_NO_SUPREMUM
- && lock_get_wait(lock)) {
- fprintf(stderr,
- "---\n--\n!!!Lock reorg: supr type %lu\n",
- lock->type_mode);
- } */
- }
-
- if (UNIV_UNLIKELY
- (new_heap_no == PAGE_HEAP_NO_SUPREMUM)) {
-
- ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
- break;
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
-#ifdef UNIV_DEBUG
- {
- ulint i = lock_rec_find_set_bit(lock);
-
- /* Check that all locks were moved. */
- if (UNIV_UNLIKELY(i != ULINT_UNDEFINED)) {
- fprintf(stderr,
- "lock_move_reorganize_page():"
- " %lu not moved in %p\n",
- (ulong) i, (void*) lock);
- ut_error;
- }
- }
-#endif /* UNIV_DEBUG */
- }
-
- lock_mutex_exit();
-
- mem_heap_free(heap);
-
-#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(block));
-#endif
-}
-
-/*************************************************************//**
-Moves the explicit locks on user records to another page if a record
-list end is moved to another page. */
-UNIV_INTERN
-void
-lock_move_rec_list_end(
-/*===================*/
- const buf_block_t* new_block, /*!< in: index page to move to */
- const buf_block_t* block, /*!< in: index page */
- const rec_t* rec) /*!< in: record on page: this
- is the first record moved */
-{
- lock_t* lock;
- const ulint comp = page_rec_is_comp(rec);
-
- lock_mutex_enter();
-
- /* Note: when we move locks from record to record, waiting locks
- and possible granted gap type locks behind them are enqueued in
- the original order, because new elements are inserted to a hash
- table to the end of the hash chain, and lock_rec_add_to_queue
- does not reuse locks if there are waiters in the queue. */
-
- for (lock = lock_rec_get_first_on_page(block); lock;
- lock = lock_rec_get_next_on_page(lock)) {
- page_cur_t cur1;
- page_cur_t cur2;
- const ulint type_mode = lock->type_mode;
-
- page_cur_position(rec, block, &cur1);
-
- if (page_cur_is_before_first(&cur1)) {
- page_cur_move_to_next(&cur1);
- }
-
- page_cur_set_before_first(new_block, &cur2);
- page_cur_move_to_next(&cur2);
-
- /* Copy lock requests on user records to new page and
- reset the lock bits on the old */
-
- while (!page_cur_is_after_last(&cur1)) {
- ulint heap_no;
-
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- ut_ad(!memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(&cur2))));
- }
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_reset_nth_bit(lock, heap_no);
-
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
- }
-
- lock_rec_add_to_queue(
- type_mode, new_block, heap_no,
- lock->index, lock->trx, FALSE);
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
- }
-
- lock_mutex_exit();
-
-#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(block));
- ut_ad(lock_rec_validate_page(new_block));
-#endif
-}
-
-/*************************************************************//**
-Moves the explicit locks on user records to another page if a record
-list start is moved to another page. */
-UNIV_INTERN
-void
-lock_move_rec_list_start(
-/*=====================*/
- const buf_block_t* new_block, /*!< in: index page to
- move to */
- const buf_block_t* block, /*!< in: index page */
- const rec_t* rec, /*!< in: record on page:
- this is the first
- record NOT copied */
- const rec_t* old_end) /*!< in: old
- previous-to-last
- record on new_page
- before the records
- were copied */
-{
- lock_t* lock;
- const ulint comp = page_rec_is_comp(rec);
-
- ut_ad(block->frame == page_align(rec));
- ut_ad(new_block->frame == page_align(old_end));
-
- lock_mutex_enter();
-
- for (lock = lock_rec_get_first_on_page(block); lock;
- lock = lock_rec_get_next_on_page(lock)) {
- page_cur_t cur1;
- page_cur_t cur2;
- const ulint type_mode = lock->type_mode;
-
- page_cur_set_before_first(block, &cur1);
- page_cur_move_to_next(&cur1);
-
- page_cur_position(old_end, new_block, &cur2);
- page_cur_move_to_next(&cur2);
-
- /* Copy lock requests on user records to new page and
- reset the lock bits on the old */
-
- while (page_cur_get_rec(&cur1) != rec) {
- ulint heap_no;
-
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur1));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur1));
- ut_ad(!memcmp(page_cur_get_rec(&cur1),
- page_cur_get_rec(&cur2),
- rec_get_data_size_old(
- page_cur_get_rec(
- &cur2))));
- }
-
- if (lock_rec_get_nth_bit(lock, heap_no)) {
- lock_rec_reset_nth_bit(lock, heap_no);
-
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- if (comp) {
- heap_no = rec_get_heap_no_new(
- page_cur_get_rec(&cur2));
- } else {
- heap_no = rec_get_heap_no_old(
- page_cur_get_rec(&cur2));
- }
-
- lock_rec_add_to_queue(
- type_mode, new_block, heap_no,
- lock->index, lock->trx, FALSE);
- }
-
- page_cur_move_to_next(&cur1);
- page_cur_move_to_next(&cur2);
- }
-
-#ifdef UNIV_DEBUG
- if (page_rec_is_supremum(rec)) {
- ulint i;
-
- for (i = PAGE_HEAP_NO_USER_LOW;
- i < lock_rec_get_n_bits(lock); i++) {
- if (UNIV_UNLIKELY
- (lock_rec_get_nth_bit(lock, i))) {
-
- fprintf(stderr,
- "lock_move_rec_list_start():"
- " %lu not moved in %p\n",
- (ulong) i, (void*) lock);
- ut_error;
- }
- }
- }
-#endif /* UNIV_DEBUG */
- }
-
- lock_mutex_exit();
-
-#ifdef UNIV_DEBUG_LOCK_VALIDATE
- ut_ad(lock_rec_validate_page(block));
-#endif
-}
-
-/*************************************************************//**
-Updates the lock table when a page is split to the right. */
-UNIV_INTERN
-void
-lock_update_split_right(
-/*====================*/
- const buf_block_t* right_block, /*!< in: right page */
- const buf_block_t* left_block) /*!< in: left page */
-{
- ulint heap_no = lock_get_min_heap_no(right_block);
-
- lock_mutex_enter();
-
- /* Move the locks on the supremum of the left page to the supremum
- of the right page */
-
- lock_rec_move(right_block, left_block,
- PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
-
- /* Inherit the locks to the supremum of left page from the successor
- of the infimum on right page */
-
- lock_rec_inherit_to_gap(left_block, right_block,
- PAGE_HEAP_NO_SUPREMUM, heap_no);
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is merged to the right. */
-UNIV_INTERN
-void
-lock_update_merge_right(
-/*====================*/
- const buf_block_t* right_block, /*!< in: right page to
- which merged */
- const rec_t* orig_succ, /*!< in: original
- successor of infimum
- on the right page
- before merge */
- const buf_block_t* left_block) /*!< in: merged index
- page which will be
- discarded */
-{
- lock_mutex_enter();
-
- /* Inherit the locks from the supremum of the left page to the
- original successor of infimum on the right page, to which the left
- page was merged */
-
- lock_rec_inherit_to_gap(right_block, left_block,
- page_rec_get_heap_no(orig_succ),
- PAGE_HEAP_NO_SUPREMUM);
-
- /* Reset the locks on the supremum of the left page, releasing
- waiting transactions */
-
- lock_rec_reset_and_release_wait(left_block,
- PAGE_HEAP_NO_SUPREMUM);
-
- lock_rec_free_all_from_discard_page(left_block);
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when the root page is copied to another in
-btr_root_raise_and_insert. Note that we leave lock structs on the
-root page, even though they do not make sense on other than leaf
-pages: the reason is that in a pessimistic update the infimum record
-of the root page will act as a dummy carrier of the locks of the record
-to be updated. */
-UNIV_INTERN
-void
-lock_update_root_raise(
-/*===================*/
- const buf_block_t* block, /*!< in: index page to which copied */
- const buf_block_t* root) /*!< in: root page */
-{
- lock_mutex_enter();
-
- /* Move the locks on the supremum of the root to the supremum
- of block */
-
- lock_rec_move(block, root,
- PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is copied to another and the original page
-is removed from the chain of leaf pages, except if page is the root! */
-UNIV_INTERN
-void
-lock_update_copy_and_discard(
-/*=========================*/
- const buf_block_t* new_block, /*!< in: index page to
- which copied */
- const buf_block_t* block) /*!< in: index page;
- NOT the root! */
-{
- lock_mutex_enter();
-
- /* Move the locks on the supremum of the old page to the supremum
- of new_page */
-
- lock_rec_move(new_block, block,
- PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
- lock_rec_free_all_from_discard_page(block);
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is split to the left. */
-UNIV_INTERN
-void
-lock_update_split_left(
-/*===================*/
- const buf_block_t* right_block, /*!< in: right page */
- const buf_block_t* left_block) /*!< in: left page */
-{
- ulint heap_no = lock_get_min_heap_no(right_block);
-
- lock_mutex_enter();
-
- /* Inherit the locks to the supremum of the left page from the
- successor of the infimum on the right page */
-
- lock_rec_inherit_to_gap(left_block, right_block,
- PAGE_HEAP_NO_SUPREMUM, heap_no);
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is merged to the left. */
-UNIV_INTERN
-void
-lock_update_merge_left(
-/*===================*/
- const buf_block_t* left_block, /*!< in: left page to
- which merged */
- const rec_t* orig_pred, /*!< in: original predecessor
- of supremum on the left page
- before merge */
- const buf_block_t* right_block) /*!< in: merged index page
- which will be discarded */
-{
- const rec_t* left_next_rec;
-
- ut_ad(left_block->frame == page_align(orig_pred));
-
- lock_mutex_enter();
-
- left_next_rec = page_rec_get_next_const(orig_pred);
-
- if (!page_rec_is_supremum(left_next_rec)) {
-
- /* Inherit the locks on the supremum of the left page to the
- first record which was moved from the right page */
-
- lock_rec_inherit_to_gap(left_block, left_block,
- page_rec_get_heap_no(left_next_rec),
- PAGE_HEAP_NO_SUPREMUM);
-
- /* Reset the locks on the supremum of the left page,
- releasing waiting transactions */
-
- lock_rec_reset_and_release_wait(left_block,
- PAGE_HEAP_NO_SUPREMUM);
- }
-
- /* Move the locks from the supremum of right page to the supremum
- of the left page */
-
- lock_rec_move(left_block, right_block,
- PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
-
- lock_rec_free_all_from_discard_page(right_block);
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is split and merged to
-two pages. */
-UNIV_INTERN
-void
-lock_update_split_and_merge(
- const buf_block_t* left_block, /*!< in: left page to which merged */
- const rec_t* orig_pred, /*!< in: original predecessor of
- supremum on the left page before merge*/
- const buf_block_t* right_block) /*!< in: right page from which merged */
-{
- const rec_t* left_next_rec;
-
- ut_a(left_block && right_block);
- ut_a(orig_pred);
-
- lock_mutex_enter();
-
- left_next_rec = page_rec_get_next_const(orig_pred);
-
- /* Inherit the locks on the supremum of the left page to the
- first record which was moved from the right page */
- lock_rec_inherit_to_gap(
- left_block, left_block,
- page_rec_get_heap_no(left_next_rec),
- PAGE_HEAP_NO_SUPREMUM);
-
- /* Reset the locks on the supremum of the left page,
- releasing waiting transactions */
- lock_rec_reset_and_release_wait(left_block,
- PAGE_HEAP_NO_SUPREMUM);
-
- /* Inherit the locks to the supremum of the left page from the
- successor of the infimum on the right page */
- lock_rec_inherit_to_gap(left_block, right_block,
- PAGE_HEAP_NO_SUPREMUM,
- lock_get_min_heap_no(right_block));
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Resets the original locks on heir and replaces them with gap type locks
-inherited from rec. */
-UNIV_INTERN
-void
-lock_rec_reset_and_inherit_gap_locks(
-/*=================================*/
- const buf_block_t* heir_block, /*!< in: block containing the
- record which inherits */
- const buf_block_t* block, /*!< in: block containing the
- record from which inherited;
- does NOT reset the locks on
- this record */
- ulint heir_heap_no, /*!< in: heap_no of the
- inheriting record */
- ulint heap_no) /*!< in: heap_no of the
- donating record */
-{
- lock_mutex_enter();
-
- lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
-
- lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when a page is discarded. */
-UNIV_INTERN
-void
-lock_update_discard(
-/*================*/
- const buf_block_t* heir_block, /*!< in: index page
- which will inherit the locks */
- ulint heir_heap_no, /*!< in: heap_no of the record
- which will inherit the locks */
- const buf_block_t* block) /*!< in: index page
- which will be discarded */
-{
- const page_t* page = block->frame;
- const rec_t* rec;
- ulint heap_no;
-
- lock_mutex_enter();
-
- if (!lock_rec_get_first_on_page(block)) {
- /* No locks exist on page, nothing to do */
-
- lock_mutex_exit();
-
- return;
- }
-
- /* Inherit all the locks on the page to the record and reset all
- the locks on the page */
-
- if (page_is_comp(page)) {
- rec = page + PAGE_NEW_INFIMUM;
-
- do {
- heap_no = rec_get_heap_no_new(rec);
-
- lock_rec_inherit_to_gap(heir_block, block,
- heir_heap_no, heap_no);
-
- lock_rec_reset_and_release_wait(block, heap_no);
-
- rec = page + rec_get_next_offs(rec, TRUE);
- } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
- } else {
- rec = page + PAGE_OLD_INFIMUM;
-
- do {
- heap_no = rec_get_heap_no_old(rec);
-
- lock_rec_inherit_to_gap(heir_block, block,
- heir_heap_no, heap_no);
-
- lock_rec_reset_and_release_wait(block, heap_no);
-
- rec = page + rec_get_next_offs(rec, FALSE);
- } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
- }
-
- lock_rec_free_all_from_discard_page(block);
-
- lock_mutex_exit();
-}
-
-/*************************************************************//**
-Updates the lock table when a new user record is inserted. */
-UNIV_INTERN
-void
-lock_update_insert(
-/*===============*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec) /*!< in: the inserted record */
-{
- ulint receiver_heap_no;
- ulint donator_heap_no;
-
- ut_ad(block->frame == page_align(rec));
-
- /* Inherit the gap-locking locks for rec, in gap mode, from the next
- record */
-
- if (page_rec_is_comp(rec)) {
- receiver_heap_no = rec_get_heap_no_new(rec);
- donator_heap_no = rec_get_heap_no_new(
- page_rec_get_next_low(rec, TRUE));
- } else {
- receiver_heap_no = rec_get_heap_no_old(rec);
- donator_heap_no = rec_get_heap_no_old(
- page_rec_get_next_low(rec, FALSE));
- }
-
- lock_rec_inherit_to_gap_if_gap_lock(
- block, receiver_heap_no, donator_heap_no);
-}
-
-/*************************************************************//**
-Updates the lock table when a record is removed. */
-UNIV_INTERN
-void
-lock_update_delete(
-/*===============*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec) /*!< in: the record to be removed */
-{
- const page_t* page = block->frame;
- ulint heap_no;
- ulint next_heap_no;
-
- ut_ad(page == page_align(rec));
-
- if (page_is_comp(page)) {
- heap_no = rec_get_heap_no_new(rec);
- next_heap_no = rec_get_heap_no_new(page
- + rec_get_next_offs(rec,
- TRUE));
- } else {
- heap_no = rec_get_heap_no_old(rec);
- next_heap_no = rec_get_heap_no_old(page
- + rec_get_next_offs(rec,
- FALSE));
- }
-
- lock_mutex_enter();
-
- /* Let the next record inherit the locks from rec, in gap mode */
-
- lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
-
- /* Reset the lock bits on rec and release waiting transactions */
-
- lock_rec_reset_and_release_wait(block, heap_no);
-
- lock_mutex_exit();
-}
-
-/*********************************************************************//**
-Stores on the page infimum record the explicit locks of another record.
-This function is used to store the lock state of a record when it is
-updated and the size of the record changes in the update. The record
-is moved in such an update, perhaps to another page. The infimum record
-acts as a dummy carrier record, taking care of lock releases while the
-actual record is being moved. */
-UNIV_INTERN
-void
-lock_rec_store_on_page_infimum(
-/*===========================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec) /*!< in: record whose lock state
- is stored on the infimum
- record of the same page; lock
- bits are reset on the
- record */
-{
- ulint heap_no = page_rec_get_heap_no(rec);
-
- ut_ad(block->frame == page_align(rec));
-
- lock_mutex_enter();
-
- lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
-
- lock_mutex_exit();
-}
-
-/*********************************************************************//**
-Restores the state of explicit lock requests on a single record, where the
-state was stored on the infimum of the page. */
-UNIV_INTERN
-void
-lock_rec_restore_from_page_infimum(
-/*===============================*/
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record whose lock state
- is restored */
- const buf_block_t* donator)/*!< in: page (rec is not
- necessarily on this page)
- whose infimum stored the lock
- state; lock bits are reset on
- the infimum */
-{
- ulint heap_no = page_rec_get_heap_no(rec);
-
- lock_mutex_enter();
-
- lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
-
- lock_mutex_exit();
-}
-
-/*=========== DEADLOCK CHECKING ======================================*/
-
-/*********************************************************************//**
-rewind(3) the file used for storing the latest detected deadlock and
-print a heading message to stderr if printing of all deadlocks to stderr
-is enabled. */
-UNIV_INLINE
-void
-lock_deadlock_start_print()
-/*=======================*/
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- rewind(lock_latest_err_file);
- ut_print_timestamp(lock_latest_err_file);
-
- if (srv_print_all_deadlocks) {
- ut_print_timestamp(stderr);
- fprintf(stderr, "InnoDB: transactions deadlock detected, "
- "dumping detailed information.\n");
- ut_print_timestamp(stderr);
- }
-}
-
-/*********************************************************************//**
-Print a message to the deadlock file and possibly to stderr. */
-UNIV_INLINE
-void
-lock_deadlock_fputs(
-/*================*/
- const char* msg) /*!< in: message to print */
-{
- if (!srv_read_only_mode) {
- fputs(msg, lock_latest_err_file);
-
- if (srv_print_all_deadlocks) {
- fputs(msg, stderr);
- }
- }
-}
-
-/*********************************************************************//**
-Print transaction data to the deadlock file and possibly to stderr. */
-UNIV_INLINE
-void
-lock_deadlock_trx_print(
-/*====================*/
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
- ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
- ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
-
- mutex_enter(&trx_sys->mutex);
-
- trx_print_low(lock_latest_err_file, trx, max_query_len,
- n_rec_locks, n_trx_locks, heap_size);
-
- if (srv_print_all_deadlocks) {
- trx_print_low(stderr, trx, max_query_len,
- n_rec_locks, n_trx_locks, heap_size);
- }
-
- mutex_exit(&trx_sys->mutex);
-}
-
-/*********************************************************************//**
-Print lock data to the deadlock file and possibly to stderr. */
-UNIV_INLINE
-void
-lock_deadlock_lock_print(
-/*=====================*/
- const lock_t* lock) /*!< in: record or table type lock */
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- if (lock_get_type_low(lock) == LOCK_REC) {
- lock_rec_print(lock_latest_err_file, lock);
-
- if (srv_print_all_deadlocks) {
- lock_rec_print(stderr, lock);
- }
- } else {
- lock_table_print(lock_latest_err_file, lock);
-
- if (srv_print_all_deadlocks) {
- lock_table_print(stderr, lock);
- }
- }
-}
-
-/** Used in deadlock tracking. Protected by lock_sys->mutex. */
-static ib_uint64_t lock_mark_counter = 0;
-
-/** Check if the search is too deep. */
-#define lock_deadlock_too_deep(c) \
- (c->depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK \
- || c->cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK)
-
-/********************************************************************//**
-Get the next lock in the queue that is owned by a transaction whose
-sub-tree has not already been searched.
-@return next lock or NULL if at end of queue */
-static
-const lock_t*
-lock_get_next_lock(
-/*===============*/
- const lock_deadlock_ctx_t*
- ctx, /*!< in: deadlock context */
- const lock_t* lock, /*!< in: lock in the queue */
- ulint heap_no)/*!< in: heap no if rec lock else
- ULINT_UNDEFINED */
-{
- ut_ad(lock_mutex_own());
-
- do {
- if (lock_get_type_low(lock) == LOCK_REC) {
- ut_ad(heap_no != ULINT_UNDEFINED);
- lock = lock_rec_get_next_const(heap_no, lock);
- } else {
- ut_ad(heap_no == ULINT_UNDEFINED);
- ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock);
- }
- } while (lock != NULL
- && lock->trx->lock.deadlock_mark > ctx->mark_start);
-
- ut_ad(lock == NULL
- || lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
-
- return(lock);
-}
-
-/********************************************************************//**
-Get the first lock to search. The search starts from the current
-wait_lock. What we are really interested in is an edge from the
-current wait_lock's owning transaction to another transaction that has
-a lock ahead in the queue. We skip locks where the owning transaction's
-sub-tree has already been searched.
-@return first lock or NULL */
-static
-const lock_t*
-lock_get_first_lock(
-/*================*/
- const lock_deadlock_ctx_t*
- ctx, /*!< in: deadlock context */
- ulint* heap_no)/*!< out: heap no if rec lock,
- else ULINT_UNDEFINED */
-{
- const lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- lock = ctx->wait_lock;
-
- if (lock_get_type_low(lock) == LOCK_REC) {
-
- *heap_no = lock_rec_find_set_bit(lock);
- ut_ad(*heap_no != ULINT_UNDEFINED);
-
- lock = lock_rec_get_first_on_page_addr(
- lock->un_member.rec_lock.space,
- lock->un_member.rec_lock.page_no);
-
- /* Position on the first lock on the physical record. */
- if (!lock_rec_get_nth_bit(lock, *heap_no)) {
- lock = lock_rec_get_next_const(*heap_no, lock);
- }
-
- } else {
- *heap_no = ULINT_UNDEFINED;
- ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
- dict_table_t* table = lock->un_member.tab_lock.table;
- lock = UT_LIST_GET_FIRST(table->locks);
- }
-
- ut_a(lock != NULL);
- ut_a(lock != ctx->wait_lock ||
- innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_VATS);
- ut_ad(lock_get_type_low(lock) == lock_get_type_low(ctx->wait_lock));
-
- return(lock);
-}
-
-/********************************************************************//**
-Notify that a deadlock has been detected and print the conflicting
-transaction info. */
-static
-void
-lock_deadlock_notify(
-/*=================*/
- const lock_deadlock_ctx_t* ctx, /*!< in: deadlock context */
- const lock_t* lock) /*!< in: lock causing
- deadlock */
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- lock_deadlock_start_print();
-
- lock_deadlock_fputs("\n*** (1) TRANSACTION:\n");
-
- lock_deadlock_trx_print(ctx->wait_lock->trx, 3000);
-
- lock_deadlock_fputs("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
- lock_deadlock_lock_print(ctx->wait_lock);
-
- lock_deadlock_fputs("*** (2) TRANSACTION:\n");
-
- lock_deadlock_trx_print(lock->trx, 3000);
-
- lock_deadlock_fputs("*** (2) HOLDS THE LOCK(S):\n");
-
- lock_deadlock_lock_print(lock);
-
- /* It is possible that the joining transaction was granted its
- lock when we rolled back some other waiting transaction. */
-
- if (ctx->start->lock.wait_lock != 0) {
- lock_deadlock_fputs(
- "*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
- lock_deadlock_lock_print(ctx->start->lock.wait_lock);
- }
-
-#ifdef UNIV_DEBUG
- if (lock_print_waits) {
- fputs("Deadlock detected\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/********************************************************************//**
-Select the victim transaction that should be rolledback.
-@return victim transaction */
-static
-const trx_t*
-lock_deadlock_select_victim(
-/*========================*/
- const lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */
-{
- ut_ad(lock_mutex_own());
- ut_ad(ctx->start->lock.wait_lock != 0);
- ut_ad(ctx->wait_lock->trx != ctx->start);
-
- if (trx_weight_ge(ctx->wait_lock->trx, ctx->start)) {
- /* The joining transaction is 'smaller',
- choose it as the victim and roll it back. */
-
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) {
- return(ctx->wait_lock->trx);
- }
- else
-#endif /* WITH_WSREP */
- return(ctx->start);
- }
-
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(ctx->wait_lock->trx->mysql_thd, TRUE)) {
- return(ctx->start);
- }
- else
-#endif /* WITH_WSREP */
- return(ctx->wait_lock->trx);
-}
-
-/********************************************************************//**
-Pop the deadlock search state from the stack.
-@return stack slot instance that was on top of the stack. */
-static
-const lock_stack_t*
-lock_deadlock_pop(
-/*==============*/
- lock_deadlock_ctx_t* ctx) /*!< in/out: context */
-{
- ut_ad(lock_mutex_own());
-
- ut_ad(ctx->depth > 0);
-
- return(&lock_stack[--ctx->depth]);
-}
-
-/********************************************************************//**
-Push the deadlock search state onto the stack.
-@return slot that was used in the stack */
-static
-lock_stack_t*
-lock_deadlock_push(
-/*===============*/
- lock_deadlock_ctx_t* ctx, /*!< in/out: context */
- const lock_t* lock, /*!< in: current lock */
- ulint heap_no) /*!< in: heap number */
-{
- ut_ad(lock_mutex_own());
-
- /* Save current search state. */
-
- if (LOCK_STACK_SIZE > ctx->depth) {
- lock_stack_t* stack;
-
- stack = &lock_stack[ctx->depth++];
-
- stack->lock = lock;
- stack->heap_no = heap_no;
- stack->wait_lock = ctx->wait_lock;
-
- return(stack);
- }
-
- return(NULL);
-}
-
-/********************************************************************//**
-Looks iteratively for a deadlock. Note: the joining transaction may
-have been granted its lock by the deadlock checks.
-@return 0 if no deadlock else the victim transaction id.*/
-static
-trx_id_t
-lock_deadlock_search(
-/*=================*/
- lock_deadlock_ctx_t* ctx, /*!< in/out: deadlock context */
- struct thd_wait_reports*waitee_ptr) /*!< in/out: list of waitees */
-{
- const lock_t* lock;
- ulint heap_no;
-
- ut_ad(lock_mutex_own());
- ut_ad(!trx_mutex_own(ctx->start));
-
- ut_ad(ctx->start != NULL);
- ut_ad(ctx->wait_lock != NULL);
- assert_trx_in_list(ctx->wait_lock->trx);
- ut_ad(ctx->mark_start <= lock_mark_counter);
-
- /* Look at the locks ahead of wait_lock in the lock queue. */
- lock = lock_get_first_lock(ctx, &heap_no);
-
- for (;;) {
-
- /* We should never visit the same sub-tree more than once. */
- ut_ad(lock == NULL
- || lock->trx->lock.deadlock_mark <= ctx->mark_start);
-
- while (ctx->depth > 0 && lock == NULL) {
- const lock_stack_t* stack;
-
- /* Restore previous search state. */
-
- stack = lock_deadlock_pop(ctx);
-
- lock = stack->lock;
- heap_no = stack->heap_no;
- ctx->wait_lock = stack->wait_lock;
-
- lock = lock_get_next_lock(ctx, lock, heap_no);
- }
-
- if (lock == NULL) {
- break;
- } else if (lock == ctx->wait_lock) {
-
- /* We can mark this subtree as searched */
- ut_ad(lock->trx->lock.deadlock_mark <= ctx->mark_start);
-
- lock->trx->lock.deadlock_mark = ++lock_mark_counter;
-
- /* We are not prepared for an overflow. This 64-bit
- counter should never wrap around. At 10^9 increments
- per second, it would take 10^3 years of uptime. */
-
- ut_ad(lock_mark_counter > 0);
-
- lock = NULL;
-
- } else if (!lock_has_to_wait(ctx->wait_lock, lock)) {
-
- /* No conflict, next lock */
- lock = lock_get_next_lock(ctx, lock, heap_no);
-
- } else if (lock->trx == ctx->start) {
-
- /* Found a cycle. */
-
- lock_deadlock_notify(ctx, lock);
-
- return(lock_deadlock_select_victim(ctx)->id);
-
- } else if (lock_deadlock_too_deep(ctx)) {
-
- /* Search too deep to continue. */
-
- ctx->too_deep = TRUE;
-
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE)) {
- return(ctx->wait_lock->trx->id);
- }
- else
-#endif /* WITH_WSREP */
- /* Select the joining transaction as the victim. */
- return(ctx->start->id);
-
- } else {
- /* We do not need to report autoinc locks to the upper
- layer. These locks are released before commit, so they
- can not cause deadlocks with binlog-fixed commit
- order. */
- if (waitee_ptr &&
- (lock_get_type_low(lock) != LOCK_TABLE ||
- lock_get_mode(lock) != LOCK_AUTO_INC)) {
- if (waitee_ptr->used ==
- sizeof(waitee_ptr->waitees) /
- sizeof(waitee_ptr->waitees[0])) {
- waitee_ptr->next =
- (struct thd_wait_reports *)
- mem_alloc(sizeof(*waitee_ptr));
- waitee_ptr = waitee_ptr->next;
- if (!waitee_ptr) {
- ctx->too_deep = TRUE;
- return(ctx->start->id);
- }
- waitee_ptr->next = NULL;
- waitee_ptr->used = 0;
- }
- waitee_ptr->waitees[waitee_ptr->used++] = lock->trx;
- }
-
- if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
- /* Another trx ahead has requested a lock in an
- incompatible mode, and is itself waiting for a lock. */
-
- ++ctx->cost;
-
- /* Save current search state. */
- if (!lock_deadlock_push(ctx, lock, heap_no)) {
-
- /* Unable to save current search state, stack
- size not big enough. */
-
- ctx->too_deep = TRUE;
-
-#ifdef WITH_WSREP
- if (wsrep_thd_is_BF(ctx->start->mysql_thd, TRUE))
- return(lock->trx->id);
- else
-#endif /* WITH_WSREP */
-
- return(ctx->start->id);
- }
-
- ctx->wait_lock = lock->trx->lock.wait_lock;
- lock = lock_get_first_lock(ctx, &heap_no);
-
- if (lock->trx->lock.deadlock_mark > ctx->mark_start) {
- lock = lock_get_next_lock(ctx, lock, heap_no);
- }
-
- } else {
- lock = lock_get_next_lock(ctx, lock, heap_no);
- }
- }
- }
-
- ut_a(lock == NULL && ctx->depth == 0);
-
- /* No deadlock found. */
- return(0);
-}
-
-/********************************************************************//**
-Print info about transaction that was rolled back. */
-static
-void
-lock_deadlock_joining_trx_print(
-/*============================*/
- const trx_t* trx, /*!< in: transaction rolled back */
- const lock_t* lock) /*!< in: lock trx wants */
-{
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- /* If the lock search exceeds the max step
- or the max depth, the current trx will be
- the victim. Print its information. */
- lock_deadlock_start_print();
-
- lock_deadlock_fputs(
- "TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
- " WAITS-FOR GRAPH, WE WILL ROLL BACK"
- " FOLLOWING TRANSACTION \n\n"
- "*** TRANSACTION:\n");
-
- lock_deadlock_trx_print(trx, 3000);
-
- lock_deadlock_fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
-
- lock_deadlock_lock_print(lock);
-}
-
-/********************************************************************//**
-Rollback transaction selected as the victim. */
-static
-void
-lock_deadlock_trx_rollback(
-/*=======================*/
- lock_deadlock_ctx_t* ctx) /*!< in: deadlock context */
-{
- trx_t* trx;
-
- ut_ad(lock_mutex_own());
-
- trx = ctx->wait_lock->trx;
-
- lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (1)\n");
-
- trx_mutex_enter(trx);
-
- trx->lock.was_chosen_as_deadlock_victim = TRUE;
-
- lock_cancel_waiting_and_release(trx->lock.wait_lock);
-
- trx_mutex_exit(trx);
-}
-
-static
-void
-lock_report_waiters_to_mysql(
-/*=======================*/
- struct thd_wait_reports* waitee_buf_ptr, /*!< in: set of trxs */
- THD* mysql_thd, /*!< in: THD */
- trx_id_t victim_trx_id) /*!< in: Trx selected
- as deadlock victim, if
- any */
-{
- struct thd_wait_reports* p;
- struct thd_wait_reports* q;
- ulint i;
-
- p = waitee_buf_ptr;
- while (p) {
- i = 0;
- while (i < p->used) {
- trx_t *w_trx = p->waitees[i];
- /* There is no need to report waits to a trx already
- selected as a victim. */
- if (w_trx->id != victim_trx_id) {
- /* If thd_report_wait_for() decides to kill the
- transaction, then we will get a call back into
- innobase_kill_query. We mark this by setting
- current_lock_mutex_owner, so we can avoid trying
- to recursively take lock_sys->mutex. */
- w_trx->abort_type = TRX_REPLICATION_ABORT;
- thd_report_wait_for(mysql_thd, w_trx->mysql_thd);
- w_trx->abort_type = TRX_SERVER_ABORT;
- }
- ++i;
- }
- q = p->next;
- if (p != waitee_buf_ptr) {
- mem_free(p);
- }
- p = q;
- }
-}
-
-
-/********************************************************************//**
-Checks if a joining lock request results in a deadlock. If a deadlock is
-found this function will resolve the dadlock by choosing a victim transaction
-and rolling it back. It will attempt to resolve all deadlocks. The returned
-transaction id will be the joining transaction id or 0 if some other
-transaction was chosen as a victim and rolled back or no deadlock found.
-
-@return id of transaction chosen as victim or 0 */
-static
-trx_id_t
-lock_deadlock_check_and_resolve(
-/*============================*/
- const lock_t* lock, /*!< in: lock the transaction is requesting */
- const trx_t* trx) /*!< in: transaction */
-{
- trx_id_t victim_trx_id;
- struct thd_wait_reports waitee_buf;
- struct thd_wait_reports*waitee_buf_ptr;
- THD* start_mysql_thd;
-
- ut_ad(trx != NULL);
- ut_ad(lock != NULL);
- ut_ad(lock_mutex_own());
- assert_trx_in_list(trx);
-
- start_mysql_thd = trx->mysql_thd;
- if (start_mysql_thd && thd_need_wait_for(start_mysql_thd)) {
- waitee_buf_ptr = &waitee_buf;
- } else {
- waitee_buf_ptr = NULL;
- }
-
- /* Try and resolve as many deadlocks as possible. */
- do {
- lock_deadlock_ctx_t ctx;
-
- /* Reset the context. */
- ctx.cost = 0;
- ctx.depth = 0;
- ctx.start = trx;
- ctx.too_deep = FALSE;
- ctx.wait_lock = lock;
- ctx.mark_start = lock_mark_counter;
-
- if (waitee_buf_ptr) {
- waitee_buf_ptr->next = NULL;
- waitee_buf_ptr->used = 0;
- }
-
- victim_trx_id = lock_deadlock_search(&ctx, waitee_buf_ptr);
-
- /* Report waits to upper layer, as needed. */
- if (waitee_buf_ptr) {
- lock_report_waiters_to_mysql(waitee_buf_ptr,
- start_mysql_thd,
- victim_trx_id);
- }
-
- /* Search too deep, we rollback the joining transaction. */
- if (ctx.too_deep) {
-
- ut_a(trx == ctx.start);
- ut_a(victim_trx_id == trx->id);
-
-#ifdef WITH_WSREP
- if (!wsrep_thd_is_BF(ctx.start->mysql_thd, TRUE))
- {
-#endif /* WITH_WSREP */
- if (!srv_read_only_mode) {
- lock_deadlock_joining_trx_print(trx, lock);
- }
-#ifdef WITH_WSREP
- } else {
- /* BF processor */;
- }
-#endif /* WITH_WSREP */
-
- } else if (victim_trx_id != 0 && victim_trx_id != trx->id) {
-
- ut_ad(victim_trx_id == ctx.wait_lock->trx->id);
- lock_deadlock_trx_rollback(&ctx);
-
- lock_deadlock_found = TRUE;
-
- MONITOR_INC(MONITOR_DEADLOCK);
- srv_stats.lock_deadlock_count.inc();
- }
- } while (victim_trx_id != 0 && victim_trx_id != trx->id);
-
- /* If the joining transaction was selected as the victim. */
- if (victim_trx_id != 0) {
- ut_a(victim_trx_id == trx->id);
-
- MONITOR_INC(MONITOR_DEADLOCK);
- srv_stats.lock_deadlock_count.inc();
-
- lock_deadlock_fputs("*** WE ROLL BACK TRANSACTION (2)\n");
-
- lock_deadlock_found = TRUE;
- }
-
- return(victim_trx_id);
-}
-
-/*========================= TABLE LOCKS ==============================*/
-
-/*********************************************************************//**
-Creates a table lock object and adds it as the last in the lock queue
-of the table. Does NOT check for deadlocks or lock compatibility.
-@return own: new lock object */
-UNIV_INLINE
-lock_t*
-lock_table_create(
-/*==============*/
-#ifdef WITH_WSREP
- lock_t* c_lock, /*!< in: conflicting lock */
-#endif
- dict_table_t* table, /*!< in/out: database table
- in dictionary cache */
- ulint type_mode,/*!< in: lock mode possibly ORed with
- LOCK_WAIT */
- trx_t* trx) /*!< in: trx */
-{
- lock_t* lock;
-
- ut_ad(table && trx);
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(trx));
-
- /* Non-locking autocommit read-only transactions should not set
- any locks. */
- assert_trx_in_list(trx);
-
- if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
- ++table->n_waiting_or_granted_auto_inc_locks;
- }
-
- /* For AUTOINC locking we reuse the lock instance only if
- there is no wait involved else we allocate the waiting lock
- from the transaction lock heap. */
- if (type_mode == LOCK_AUTO_INC) {
-
- lock = table->autoinc_lock;
-
- table->autoinc_trx = trx;
-
- ib_vector_push(trx->autoinc_locks, &lock);
- } else {
- lock = static_cast<lock_t*>(
- mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
- }
-
- lock->type_mode = type_mode | LOCK_TABLE;
- lock->trx = trx;
- lock->requested_time = ut_time();
- lock->wait_time = 0;
-
- lock->un_member.tab_lock.table = table;
-
- ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
-
- UT_LIST_ADD_LAST(trx_locks, trx->lock.trx_locks, lock);
-
-#ifdef WITH_WSREP
- if (wsrep_thd_is_wsrep(trx->mysql_thd)) {
- if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- UT_LIST_INSERT_AFTER(
- un_member.tab_lock.locks, table->locks, c_lock, lock);
- } else {
- UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
- }
-
- if (c_lock) {
- trx_mutex_enter(c_lock->trx);
- }
-
- if (c_lock && c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
- c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
-
- if (wsrep_debug) {
- wsrep_print_wait_locks(c_lock);
- wsrep_print_wait_locks(c_lock->trx->lock.wait_lock);
- }
-
- /* have to release trx mutex for the duration of
- victim lock release. This will eventually call
- lock_grant, which wants to grant trx mutex again
- */
- /* caller has trx_mutex, have to release for lock cancel */
- trx_mutex_exit(trx);
- lock_cancel_waiting_and_release(c_lock->trx->lock.wait_lock);
- trx_mutex_enter(trx);
-
- /* trx might not wait for c_lock, but some other lock
- does not matter if wait_lock was released above
- */
- if (c_lock->trx->lock.wait_lock == c_lock) {
- lock_reset_lock_and_trx_wait(lock);
- }
-
- if (wsrep_debug) {
- fprintf(stderr, "WSREP: c_lock canceled %llu\n",
- (ulonglong) c_lock->trx->id);
- }
- }
- if (c_lock) {
- trx_mutex_exit(c_lock->trx);
- }
- } else {
-#endif /* WITH_WSREP */
- UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock);
-#ifdef WITH_WSREP
- }
-#endif /* WITH_WSREP */
-
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
-
- lock_set_lock_and_trx_wait(lock, trx);
- }
-
- ib_vector_push(lock->trx->lock.table_locks, &lock);
-
- MONITOR_INC(MONITOR_TABLELOCK_CREATED);
- MONITOR_INC(MONITOR_NUM_TABLELOCK);
-
- return(lock);
-}
-
-/*************************************************************//**
-Pops autoinc lock requests from the transaction's autoinc_locks. We
-handle the case where there are gaps in the array and they need to
-be popped off the stack. */
-UNIV_INLINE
-void
-lock_table_pop_autoinc_locks(
-/*=========================*/
- trx_t* trx) /*!< in/out: transaction that owns the AUTOINC locks */
-{
- ut_ad(lock_mutex_own());
- ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
-
- /* Skip any gaps, gaps are NULL lock entries in the
- trx->autoinc_locks vector. */
-
- do {
- ib_vector_pop(trx->autoinc_locks);
-
- if (ib_vector_is_empty(trx->autoinc_locks)) {
- return;
- }
-
- } while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL);
-}
-
-/*************************************************************//**
-Removes an autoinc lock request from the transaction's autoinc_locks. */
-UNIV_INLINE
-void
-lock_table_remove_autoinc_lock(
-/*===========================*/
- lock_t* lock, /*!< in: table lock */
- trx_t* trx) /*!< in/out: transaction that owns the lock */
-{
- lock_t* autoinc_lock;
- lint i = ib_vector_size(trx->autoinc_locks) - 1;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
- ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
-
- /* With stored functions and procedures the user may drop
- a table within the same "statement". This special case has
- to be handled by deleting only those AUTOINC locks that were
- held by the table being dropped. */
-
- autoinc_lock = *static_cast<lock_t**>(
- ib_vector_get(trx->autoinc_locks, i));
-
- /* This is the default fast case. */
-
- if (autoinc_lock == lock) {
- lock_table_pop_autoinc_locks(trx);
- } else {
- /* The last element should never be NULL */
- ut_a(autoinc_lock != NULL);
-
- /* Handle freeing the locks from within the stack. */
-
- while (--i >= 0) {
- autoinc_lock = *static_cast<lock_t**>(
- ib_vector_get(trx->autoinc_locks, i));
-
- if (UNIV_LIKELY(autoinc_lock == lock)) {
- void* null_var = NULL;
- ib_vector_set(trx->autoinc_locks, i, &null_var);
- return;
- }
- }
-
- /* Must find the autoinc lock. */
- ut_error;
- }
-}
-
-/*************************************************************//**
-Removes a table lock request from the queue and the trx list of locks;
-this is a low-level function which does NOT check if waiting requests
-can now be granted. */
-UNIV_INLINE
-void
-lock_table_remove_low(
-/*==================*/
- lock_t* lock) /*!< in/out: table lock */
-{
- trx_t* trx;
- dict_table_t* table;
-
- ut_ad(lock_mutex_own());
-
- trx = lock->trx;
- table = lock->un_member.tab_lock.table;
-
- /* Remove the table from the transaction's AUTOINC vector, if
- the lock that is being released is an AUTOINC lock. */
- if (lock_get_mode(lock) == LOCK_AUTO_INC) {
-
- /* The table's AUTOINC lock can get transferred to
- another transaction before we get here. */
- if (table->autoinc_trx == trx) {
- table->autoinc_trx = NULL;
- }
-
- /* The locks must be freed in the reverse order from
- the one in which they were acquired. This is to avoid
- traversing the AUTOINC lock vector unnecessarily.
-
- We only store locks that were granted in the
- trx->autoinc_locks vector (see lock_table_create()
- and lock_grant()). Therefore it can be empty and we
- need to check for that. */
-
- if (!lock_get_wait(lock)
- && !ib_vector_is_empty(trx->autoinc_locks)) {
-
- lock_table_remove_autoinc_lock(lock, trx);
- }
-
- ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
- table->n_waiting_or_granted_auto_inc_locks--;
- }
-
- UT_LIST_REMOVE(trx_locks, trx->lock.trx_locks, lock);
- UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock);
-
- MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
- MONITOR_DEC(MONITOR_NUM_TABLELOCK);
-}
-
-/*********************************************************************//**
-Enqueues a waiting request for a table lock which cannot be granted
-immediately. Checks for deadlocks.
-@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
-DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
-transaction was chosen as a victim, and we got the lock immediately:
-no need to wait then */
-static
-dberr_t
-lock_table_enqueue_waiting(
-/*=======================*/
-#ifdef WITH_WSREP
- lock_t* c_lock, /*!< in: conflicting lock */
-#endif
- ulint mode, /*!< in: lock mode this transaction is
- requesting */
- dict_table_t* table, /*!< in/out: table */
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- lock_t* lock;
- trx_id_t victim_trx_id;
- ulint sec;
- ulint ms;
-
- ut_ad(lock_mutex_own());
- ut_ad(!srv_read_only_mode);
-
- trx = thr_get_trx(thr);
- ut_ad(trx_mutex_own(trx));
-
- /* Test if there already is some other reason to suspend thread:
- we do not enqueue a lock request if the query thread should be
- stopped anyway */
-
- if (que_thr_stop(thr)) {
- ut_error;
-
- return(DB_QUE_THR_SUSPENDED);
- }
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- break;
- case TRX_DICT_OP_TABLE:
- case TRX_DICT_OP_INDEX:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: a table lock wait happens"
- " in a dictionary operation!\n"
- "InnoDB: Table name ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(".\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n",
- stderr);
- ut_ad(0);
- }
-
- /* Enqueue the lock request that will wait to be granted */
-
-#ifdef WITH_WSREP
- if (trx->lock.was_chosen_as_deadlock_victim) {
- return(DB_DEADLOCK);
- }
- lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx);
-#else
- lock = lock_table_create(table, mode | LOCK_WAIT, trx);
-#endif /* WITH_WSREP */
-
- /* Release the mutex to obey the latching order.
- This is safe, because lock_deadlock_check_and_resolve()
- is invoked when a lock wait is enqueued for the currently
- running transaction. Because trx is a running transaction
- (it is not currently suspended because of a lock wait),
- its state can only be changed by this thread, which is
- currently associated with the transaction. */
-
- trx_mutex_exit(trx);
-
- victim_trx_id = lock_deadlock_check_and_resolve(lock, trx);
-
- trx_mutex_enter(trx);
-
- if (victim_trx_id != 0) {
- ut_ad(victim_trx_id == trx->id);
-
- /* The order here is important, we don't want to
- lose the state of the lock before calling remove. */
- lock_table_remove_low(lock);
- lock_reset_lock_and_trx_wait(lock);
-
- return(DB_DEADLOCK);
- } else if (trx->lock.wait_lock == NULL) {
- /* Deadlock resolution chose another transaction as a victim,
- and we accidentally got our lock granted! */
-
- return(DB_SUCCESS);
- }
-
- trx->lock.que_state = TRX_QUE_LOCK_WAIT;
-
- trx->lock.wait_started = ut_time();
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- trx->n_table_lock_waits++;
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- ut_usectime(&sec, &ms);
- trx->lock_que_wait_ustarted = (ib_uint64_t)sec * 1000000 + ms;
- }
-
- ut_a(que_thr_stop(thr));
-
- MONITOR_INC(MONITOR_TABLELOCK_WAIT);
-
- return(DB_LOCK_WAIT);
-}
-
-/*********************************************************************//**
-Checks if other transactions have an incompatible mode lock request in
-the lock queue.
-@return lock or NULL */
-UNIV_INLINE
-const lock_t*
-lock_table_other_has_incompatible(
-/*==============================*/
- const trx_t* trx, /*!< in: transaction, or NULL if all
- transactions should be included */
- ulint wait, /*!< in: LOCK_WAIT if also
- waiting locks are taken into
- account, or 0 if not */
- const dict_table_t* table, /*!< in: table */
- enum lock_mode mode) /*!< in: lock mode */
-{
- const lock_t* lock;
-
- ut_ad(lock_mutex_own());
-
- for (lock = UT_LIST_GET_LAST(table->locks);
- lock != NULL;
- lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
-
- if (lock->trx != trx
- && !lock_mode_compatible(lock_get_mode(lock), mode)
- && (wait || !lock_get_wait(lock))) {
-
-#ifdef WITH_WSREP
- if(wsrep_thd_is_wsrep(trx->mysql_thd)) {
- if (wsrep_debug) {
- fprintf(stderr, "WSREP: trx "
- TRX_ID_FMT
- " table lock abort\n",
- trx->id);
- }
- trx_mutex_enter(lock->trx);
- wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
- trx_mutex_exit(lock->trx);
- }
-#endif
-
- return(lock);
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Locks the specified database table in the mode given. If the lock cannot
-be granted immediately, the query thread is put to wait.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_table(
-/*=======*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
- does nothing */
- dict_table_t* table, /*!< in/out: database table
- in dictionary cache */
- enum lock_mode mode, /*!< in: lock mode */
- que_thr_t* thr) /*!< in: query thread */
-{
-#ifdef WITH_WSREP
- lock_t *c_lock = NULL;
-#endif
- trx_t* trx;
- dberr_t err;
- const lock_t* wait_for;
-
- ut_ad(table != NULL);
- ut_ad(thr != NULL);
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- ut_a(flags == 0);
-
- trx = thr_get_trx(thr);
-
- if (UNIV_UNLIKELY(trx->fake_changes && mode == LOCK_IX)) {
- mode = LOCK_IS;
- }
-
- /* Look for equal or stronger locks the same trx already
- has on the table. No need to acquire the lock mutex here
- because only this transacton can add/access table locks
- to/from trx_t::table_locks. */
-
- if (lock_table_has(trx, table, mode)) {
-
- return(DB_SUCCESS);
- }
-
- lock_mutex_enter();
-
- DBUG_EXECUTE_IF("fatal-semaphore-timeout",
- { os_thread_sleep(3600000000); });
-
- /* We have to check if the new lock is compatible with any locks
- other transactions have in the table lock queue. */
-
-#ifdef WITH_WSREP
- wait_for = lock_table_other_has_incompatible(
- trx, LOCK_WAIT, table, mode);
-#else
- wait_for = lock_table_other_has_incompatible(
- trx, LOCK_WAIT, table, mode);
-#endif
-
- trx_mutex_enter(trx);
-
- /* Another trx has a request on the table in an incompatible
- mode: this trx may have to wait */
-
- if (wait_for != NULL) {
-#ifdef WITH_WSREP
- err = lock_table_enqueue_waiting((ib_lock_t*)wait_for, mode | flags, table, thr);
-#else
- err = lock_table_enqueue_waiting(mode | flags, table, thr);
-#endif
- } else {
-#ifdef WITH_WSREP
- lock_table_create(c_lock, table, mode | flags, trx);
-#else
- lock_table_create(table, mode | flags, trx);
-#endif
-
- ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
-
- err = DB_SUCCESS;
- }
-
- lock_mutex_exit();
-
- trx_mutex_exit(trx);
-
- return(err);
-}
-
-/*********************************************************************//**
-Creates a table IX lock object for a resurrected transaction. */
-UNIV_INTERN
-void
-lock_table_ix_resurrect(
-/*====================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(trx->is_recovered);
-
- if (lock_table_has(trx, table, LOCK_IX)) {
- return;
- }
-
- lock_mutex_enter();
-
- /* We have to check if the new lock is compatible with any locks
- other transactions have in the table lock queue. */
-
- ut_ad(!lock_table_other_has_incompatible(
- trx, LOCK_WAIT, table, LOCK_IX));
-
- trx_mutex_enter(trx);
-#ifdef WITH_WSREP
- lock_table_create(NULL, table, LOCK_IX, trx);
-#else
- lock_table_create(table, LOCK_IX, trx);
-#endif
- lock_mutex_exit();
- trx_mutex_exit(trx);
-}
-
-/*********************************************************************//**
-Checks if a waiting table lock request still has to wait in a queue.
-@return TRUE if still has to wait */
-static
-ibool
-lock_table_has_to_wait_in_queue(
-/*============================*/
- const lock_t* wait_lock) /*!< in: waiting table lock */
-{
- const dict_table_t* table;
- const lock_t* lock;
-
- ut_ad(lock_mutex_own());
- ut_ad(lock_get_wait(wait_lock));
-
- table = wait_lock->un_member.tab_lock.table;
-
- for (lock = UT_LIST_GET_FIRST(table->locks);
- lock != wait_lock;
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
-
- if (lock_has_to_wait(wait_lock, lock)) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Removes a table lock request, waiting or granted, from the queue and grants
-locks to other transactions in the queue, if they now are entitled to a
-lock. */
-static
-void
-lock_table_dequeue(
-/*===============*/
- lock_t* in_lock)/*!< in/out: table lock object; transactions waiting
- behind will get their lock requests granted, if
- they are now qualified to it */
-{
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
- ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
-
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
-
- lock_table_remove_low(in_lock);
-
- /* Check if waiting locks in the queue can now be granted: grant
- locks if there are no conflicting locks ahead. */
-
- for (/* No op */;
- lock != NULL;
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
-
- if (lock_get_wait(lock)
- && !lock_table_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- ut_ad(in_lock->trx != lock->trx);
- lock_grant(lock, false);
- }
- }
-}
-
-/*=========================== LOCK RELEASE ==============================*/
-static
-void
-lock_grant_and_move_on_rec(
- lock_t* first_lock,
- ulint heap_no)
-{
- lock_t* lock;
- lock_t* previous;
- ulint space;
- ulint page_no;
- ulint rec_fold;
-
- space = first_lock->un_member.rec_lock.space;
- page_no = first_lock->un_member.rec_lock.page_no;
- rec_fold = lock_rec_fold(space, page_no);
-
- previous = (lock_t *) hash_get_nth_cell(lock_sys->rec_hash,
- hash_calc_hash(rec_fold, lock_sys->rec_hash))->node;
- if (previous == NULL) {
- return;
- }
- if (previous == first_lock) {
- lock = previous;
- } else {
- while (previous->hash &&
- previous->hash != first_lock) {
- previous = (lock_t *) previous->hash;
- }
- lock = (lock_t *) previous->hash;
- }
- /* Grant locks if there are no conflicting locks ahead.
- Move granted locks to the head of the list. */
- for (;lock != NULL;) {
-
- /* If the lock is a wait lock on this page, and it does not need to wait. */
- if (lock->un_member.rec_lock.space == space
- && lock->un_member.rec_lock.page_no == page_no
- && lock_rec_get_nth_bit(lock, heap_no)
- && lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- lock_grant(lock, false);
-
- if (previous != NULL) {
- /* Move the lock to the head of the list. */
- HASH_GET_NEXT(hash, previous) = HASH_GET_NEXT(hash, lock);
- lock_rec_insert_to_head(lock, rec_fold);
- } else {
- /* Already at the head of the list. */
- previous = lock;
- }
- /* Move on to the next lock. */
- lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, previous));
- } else {
- previous = lock;
- lock = static_cast<lock_t *>(HASH_GET_NEXT(hash, lock));
- }
- }
-}
-
-/*************************************************************//**
-Removes a granted record lock of a transaction from the queue and grants
-locks to other transactions waiting in the queue if they now are entitled
-to a lock. */
-UNIV_INTERN
-void
-lock_rec_unlock(
-/*============*/
- trx_t* trx, /*!< in/out: transaction that has
- set a record lock */
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record */
- enum lock_mode lock_mode)/*!< in: LOCK_S or LOCK_X */
-{
- lock_t* first_lock;
- lock_t* lock;
- ulint heap_no;
- const char* stmt;
- size_t stmt_len;
-
- ut_ad(trx);
- ut_ad(rec);
- ut_ad(block->frame == page_align(rec));
- ut_ad(!trx->lock.wait_lock);
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
-
- heap_no = page_rec_get_heap_no(rec);
-
- lock_mutex_enter();
- trx_mutex_enter(trx);
-
- first_lock = lock_rec_get_first(block, heap_no);
-
- /* Find the last lock with the same lock_mode and transaction
- on the record. */
-
- for (lock = first_lock; lock != NULL;
- lock = lock_rec_get_next(heap_no, lock)) {
- if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
- goto released;
- }
- }
-
- lock_mutex_exit();
- trx_mutex_exit(trx);
-
- stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "unlock row could not find a %u mode lock on the record;"
- " statement=%.*s",
- lock_mode,
- (int) stmt_len, stmt);
-
- return;
-
-released:
- ut_a(!lock_get_wait(lock));
- lock_rec_reset_nth_bit(lock, heap_no);
-
- if (innodb_lock_schedule_algorithm
- == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
- thd_is_replication_slave_thread(lock->trx->mysql_thd)) {
-
- /* Check if we can now grant waiting lock requests */
-
- for (lock = first_lock; lock != NULL;
- lock = lock_rec_get_next(heap_no, lock)) {
- if (lock_get_wait(lock)
- && !lock_rec_has_to_wait_in_queue(lock)) {
-
- /* Grant the lock */
- ut_ad(trx != lock->trx);
- lock_grant(lock, false);
- }
- }
- } else {
- lock_grant_and_move_on_rec(first_lock, heap_no);
- }
-
- lock_mutex_exit();
- trx_mutex_exit(trx);
-}
-
-/*********************************************************************//**
-Releases transaction locks, and releases possible other transactions waiting
-because of these locks. */
-static
-void
-lock_release(
-/*=========*/
- trx_t* trx) /*!< in/out: transaction */
-{
- lock_t* lock;
- ulint count = 0;
- trx_id_t max_trx_id;
-
- ut_ad(lock_mutex_own());
- ut_ad(!trx_mutex_own(trx));
-
- max_trx_id = trx_sys_get_max_trx_id();
-
- for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
- lock != NULL;
- lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
-
- if (lock_get_type_low(lock) == LOCK_REC) {
-
-#ifdef UNIV_DEBUG
- /* Check if the transcation locked a record
- in a system table in X mode. It should have set
- the dict_op code correctly if it did. */
- if (lock->index->table->id < DICT_HDR_FIRST_ID
- && lock_get_mode(lock) == LOCK_X) {
-
- ut_ad(lock_get_mode(lock) != LOCK_IX);
- ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
- }
-#endif /* UNIV_DEBUG */
-
- lock_rec_dequeue_from_page(lock);
- } else {
- dict_table_t* table;
-
- table = lock->un_member.tab_lock.table;
-#ifdef UNIV_DEBUG
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-
- /* Check if the transcation locked a system table
- in IX mode. It should have set the dict_op code
- correctly if it did. */
- if (table->id < DICT_HDR_FIRST_ID
- && (lock_get_mode(lock) == LOCK_X
- || lock_get_mode(lock) == LOCK_IX)) {
-
- ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
- }
-#endif /* UNIV_DEBUG */
-
- if (lock_get_mode(lock) != LOCK_IS
- && trx->undo_no != 0) {
-
- /* The trx may have modified the table. We
- block the use of the MySQL query cache for
- all currently active transactions. */
-
- table->query_cache_inv_trx_id = max_trx_id;
- }
-
- lock_table_dequeue(lock);
- }
-
- if (count == LOCK_RELEASE_INTERVAL) {
- /* Release the mutex for a while, so that we
- do not monopolize it */
-
- lock_mutex_exit();
-
- lock_mutex_enter();
-
- count = 0;
- }
-
- ++count;
- }
-
- /* We don't remove the locks one by one from the vector for
- efficiency reasons. We simply reset it because we would have
- released all the locks anyway. */
-
- ib_vector_reset(trx->lock.table_locks);
-
- ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
- ut_a(ib_vector_is_empty(trx->lock.table_locks));
-
- mem_heap_empty(trx->lock.lock_heap);
-}
-
-/* True if a lock mode is S or X */
-#define IS_LOCK_S_OR_X(lock) \
- (lock_get_mode(lock) == LOCK_S \
- || lock_get_mode(lock) == LOCK_X)
-
-/*********************************************************************//**
-Removes table locks of the transaction on a table to be dropped. */
-static
-void
-lock_trx_table_locks_remove(
-/*========================*/
- const lock_t* lock_to_remove) /*!< in: lock to remove */
-{
- lint i;
- trx_t* trx = lock_to_remove->trx;
-
- ut_ad(lock_mutex_own());
-
- /* It is safe to read this because we are holding the lock mutex */
- if (!trx->lock.cancel) {
- trx_mutex_enter(trx);
- } else {
- ut_ad(trx_mutex_own(trx));
- }
-
- for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
- const lock_t* lock;
-
- lock = *static_cast<lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
-
- if (lock == NULL) {
- continue;
- }
-
- ut_a(trx == lock->trx);
- ut_a(lock_get_type_low(lock) & LOCK_TABLE);
- ut_a(lock->un_member.tab_lock.table != NULL);
-
- if (lock == lock_to_remove) {
- void* null_var = NULL;
- ib_vector_set(trx->lock.table_locks, i, &null_var);
-
- if (!trx->lock.cancel) {
- trx_mutex_exit(trx);
- }
-
- return;
- }
- }
-
- if (!trx->lock.cancel) {
- trx_mutex_exit(trx);
- }
-
- /* Lock must exist in the vector. */
- ut_error;
-}
-
-/*********************************************************************//**
-Removes locks of a transaction on a table to be dropped.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock that is going to be removed is allowed to be a wait lock. */
-static
-void
-lock_remove_all_on_table_for_trx(
-/*=============================*/
- dict_table_t* table, /*!< in: table to be dropped */
- trx_t* trx, /*!< in: a transaction */
- ibool remove_also_table_sx_locks)/*!< in: also removes
- table S and X locks */
-{
- lock_t* lock;
- lock_t* prev_lock;
-
- ut_ad(lock_mutex_own());
-
- for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
- lock != NULL;
- lock = prev_lock) {
-
- prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
-
- if (lock_get_type_low(lock) == LOCK_REC
- && lock->index->table == table) {
- ut_a(!lock_get_wait(lock));
-
- lock_rec_discard(lock);
- } else if (lock_get_type_low(lock) & LOCK_TABLE
- && lock->un_member.tab_lock.table == table
- && (remove_also_table_sx_locks
- || !IS_LOCK_S_OR_X(lock))) {
-
- ut_a(!lock_get_wait(lock));
-
- lock_trx_table_locks_remove(lock);
- lock_table_remove_low(lock);
- }
- }
-}
-
-/*******************************************************************//**
-Remove any explicit record locks held by recovering transactions on
-the table.
-@return number of recovered transactions examined */
-static
-ulint
-lock_remove_recovered_trx_record_locks(
-/*===================================*/
- dict_table_t* table) /*!< in: check if there are any locks
- held on records in this table or on the
- table itself */
-{
- trx_t* trx;
- ulint n_recovered_trx = 0;
-
- ut_a(table != NULL);
- ut_ad(lock_mutex_own());
-
- mutex_enter(&trx_sys->mutex);
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- lock_t* lock;
- lock_t* next_lock;
-
- assert_trx_in_rw_list(trx);
-
- if (!trx->is_recovered) {
- continue;
- }
-
- /* Because we are holding the lock_sys->mutex,
- implicit locks cannot be converted to explicit ones
- while we are scanning the explicit locks. */
-
- for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
- lock != NULL;
- lock = next_lock) {
-
- ut_a(lock->trx == trx);
-
- /* Recovered transactions can't wait on a lock. */
-
- ut_a(!lock_get_wait(lock));
-
- next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
-
- switch (lock_get_type_low(lock)) {
- default:
- ut_error;
- case LOCK_TABLE:
- if (lock->un_member.tab_lock.table == table) {
- lock_trx_table_locks_remove(lock);
- lock_table_remove_low(lock);
- }
- break;
- case LOCK_REC:
- if (lock->index->table == table) {
- lock_rec_discard(lock);
- }
- }
- }
-
- ++n_recovered_trx;
- }
-
- mutex_exit(&trx_sys->mutex);
-
- return(n_recovered_trx);
-}
-
-/*********************************************************************//**
-Removes locks on a table to be dropped or truncated.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-UNIV_INTERN
-void
-lock_remove_all_on_table(
-/*=====================*/
- dict_table_t* table, /*!< in: table to be dropped
- or truncated */
- ibool remove_also_table_sx_locks)/*!< in: also removes
- table S and X locks */
-{
- lock_t* lock;
-
- lock_mutex_enter();
-
- for (lock = UT_LIST_GET_FIRST(table->locks);
- lock != NULL;
- /* No op */) {
-
- lock_t* prev_lock;
-
- prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
-
- /* If we should remove all locks (remove_also_table_sx_locks
- is TRUE), or if the lock is not table-level S or X lock,
- then check we are not going to remove a wait lock. */
- if (remove_also_table_sx_locks
- || !(lock_get_type(lock) == LOCK_TABLE
- && IS_LOCK_S_OR_X(lock))) {
-
- ut_a(!lock_get_wait(lock));
- }
-
- lock_remove_all_on_table_for_trx(
- table, lock->trx, remove_also_table_sx_locks);
-
- if (prev_lock == NULL) {
- if (lock == UT_LIST_GET_FIRST(table->locks)) {
- /* lock was not removed, pick its successor */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, lock);
- } else {
- /* lock was removed, pick the first one */
- lock = UT_LIST_GET_FIRST(table->locks);
- }
- } else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
- prev_lock) != lock) {
- /* If lock was removed by
- lock_remove_all_on_table_for_trx() then pick the
- successor of prev_lock ... */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, prev_lock);
- } else {
- /* ... otherwise pick the successor of lock. */
- lock = UT_LIST_GET_NEXT(
- un_member.tab_lock.locks, lock);
- }
- }
-
- /* Note: Recovered transactions don't have table level IX or IS locks
- but can have implicit record locks that have been converted to explicit
- record locks. Such record locks cannot be freed by traversing the
- transaction lock list in dict_table_t (as above). */
-
- if (!lock_sys->rollback_complete
- && lock_remove_recovered_trx_record_locks(table) == 0) {
-
- lock_sys->rollback_complete = TRUE;
- }
-
- lock_mutex_exit();
-}
-
-/*===================== VALIDATION AND DEBUGGING ====================*/
-
-/*********************************************************************//**
-Prints info of a table lock. */
-UNIV_INTERN
-void
-lock_table_print(
-/*=============*/
- FILE* file, /*!< in: file where to print */
- const lock_t* lock) /*!< in: table type lock */
-{
- ut_ad(lock_mutex_own());
- ut_a(lock_get_type_low(lock) == LOCK_TABLE);
-
- fputs("TABLE LOCK table ", file);
- ut_print_name(file, lock->trx, TRUE,
- lock->un_member.tab_lock.table->name);
- fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
-
- if (lock_get_mode(lock) == LOCK_S) {
- fputs(" lock mode S", file);
- } else if (lock_get_mode(lock) == LOCK_X) {
- fputs(" lock mode X", file);
- } else if (lock_get_mode(lock) == LOCK_IS) {
- fputs(" lock mode IS", file);
- } else if (lock_get_mode(lock) == LOCK_IX) {
- fputs(" lock mode IX", file);
- } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
- fputs(" lock mode AUTO-INC", file);
- } else {
- fprintf(file, " unknown lock mode %lu",
- (ulong) lock_get_mode(lock));
- }
-
- if (lock_get_wait(lock)) {
- fputs(" waiting", file);
- }
-
- fprintf(file, " lock hold time %lu wait time before grant %lu ",
- (ulint)difftime(ut_time(), lock->requested_time),
- lock->wait_time);
-
- putc('\n', file);
-}
-
-/*********************************************************************//**
-Prints info of a record lock. */
-UNIV_INTERN
-void
-lock_rec_print(
-/*===========*/
- FILE* file, /*!< in: file where to print */
- const lock_t* lock) /*!< in: record type lock */
-{
- const buf_block_t* block;
- ulint space;
- ulint page_no;
- ulint i;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(lock_mutex_own());
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
- space = lock->un_member.rec_lock.space;
- page_no = lock->un_member.rec_lock.page_no;
-
- fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ",
- (ulong) space, (ulong) page_no,
- (ulong) lock_rec_get_n_bits(lock));
-
- dict_index_name_print(file, lock->trx, lock->index);
-
- /* Print number of table locks */
- fprintf(file, " trx table locks %lu total table locks %lu ",
- ib_vector_size(lock->trx->lock.table_locks),
- UT_LIST_GET_LEN(lock->index->table->locks));
-
- fprintf(file, " trx id " TRX_ID_FMT, lock->trx->id);
-
- if (lock_get_mode(lock) == LOCK_S) {
- fputs(" lock mode S", file);
- } else if (lock_get_mode(lock) == LOCK_X) {
- fputs(" lock_mode X", file);
- } else {
- ut_error;
- }
-
- if (lock_rec_get_gap(lock)) {
- fputs(" locks gap before rec", file);
- }
-
- if (lock_rec_get_rec_not_gap(lock)) {
- fputs(" locks rec but not gap", file);
- }
-
- if (lock_rec_get_insert_intention(lock)) {
- fputs(" insert intention", file);
- }
-
- if (lock_get_wait(lock)) {
- fputs(" waiting", file);
- }
-
- mtr_start(&mtr);
-
- fprintf(file, " lock hold time %lu wait time before grant %lu ",
- (ulint)difftime(ut_time(), lock->requested_time),
- lock->wait_time);
-
- putc('\n', file);
-
- if ( srv_show_verbose_locks ) {
- block = buf_page_try_get(space, page_no, &mtr);
-
- for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
-
- if (!lock_rec_get_nth_bit(lock, i)) {
- continue;
- }
-
- fprintf(file, "Record lock, heap no %lu", (ulong) i);
-
- if (block) {
- const rec_t* rec;
-
- rec = page_find_rec_with_heap_no(
- buf_block_get_frame(block), i);
-
- offsets = rec_get_offsets(
- rec, lock->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- putc(' ', file);
- rec_print_new(file, rec, offsets);
- }
-
- putc('\n', file);
- }
- }
-
- mtr_commit(&mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-#ifdef UNIV_DEBUG
-/* Print the number of lock structs from lock_print_info_summary() only
-in non-production builds for performance reasons, see
-http://bugs.mysql.com/36942 */
-#define PRINT_NUM_OF_LOCK_STRUCTS
-#endif /* UNIV_DEBUG */
-
-#ifdef PRINT_NUM_OF_LOCK_STRUCTS
-/*********************************************************************//**
-Calculates the number of record lock structs in the record lock hash table.
-@return number of record locks */
-static
-ulint
-lock_get_n_rec_locks(void)
-/*======================*/
-{
- ulint n_locks = 0;
- ulint i;
-
- ut_ad(lock_mutex_own());
-
- for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
- const lock_t* lock;
-
- for (lock = static_cast<const lock_t*>(
- HASH_GET_FIRST(lock_sys->rec_hash, i));
- lock != 0;
- lock = static_cast<const lock_t*>(
- HASH_GET_NEXT(hash, lock))) {
-
- n_locks++;
- }
- }
-
- return(n_locks);
-}
-#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
-
-/*********************************************************************//**
-Prints info of locks for all transactions.
-@return FALSE if not able to obtain lock mutex
-and exits without printing info */
-UNIV_INTERN
-ibool
-lock_print_info_summary(
-/*====================*/
- FILE* file, /*!< in: file where to print */
- ibool nowait) /*!< in: whether to wait for the lock mutex */
-{
- /* if nowait is FALSE, wait on the lock mutex,
- otherwise return immediately if fail to obtain the
- mutex. */
- if (!nowait) {
- lock_mutex_enter();
- } else if (lock_mutex_enter_nowait()) {
- fputs("FAIL TO OBTAIN LOCK MUTEX, "
- "SKIP LOCK INFO PRINTING\n", file);
- return(FALSE);
- }
-
- if (lock_deadlock_found) {
- fputs("------------------------\n"
- "LATEST DETECTED DEADLOCK\n"
- "------------------------\n", file);
-
- if (!srv_read_only_mode) {
- ut_copy_file(file, lock_latest_err_file);
- }
- }
-
- fputs("------------\n"
- "TRANSACTIONS\n"
- "------------\n", file);
-
- fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
- trx_sys_get_max_trx_id());
-
- fprintf(file,
- "Purge done for trx's n:o < " TRX_ID_FMT
- " undo n:o < " TRX_ID_FMT " state: ",
- purge_sys->iter.trx_no,
- purge_sys->iter.undo_no);
-
- /* Note: We are reading the state without the latch. One because it
- will violate the latching order and two because we are merely querying
- the state of the variable for display. */
-
- switch (purge_sys->state){
- case PURGE_STATE_INIT:
- /* Should never be in this state while the system is running. */
- ut_error;
-
- case PURGE_STATE_EXIT:
- fprintf(file, "exited");
- break;
-
- case PURGE_STATE_DISABLED:
- fprintf(file, "disabled");
- break;
-
- case PURGE_STATE_RUN:
- fprintf(file, "running");
- /* Check if it is waiting for more data to arrive. */
- if (!purge_sys->running) {
- fprintf(file, " but idle");
- }
- break;
-
- case PURGE_STATE_STOP:
- fprintf(file, "stopped");
- break;
- }
-
- fprintf(file, "\n");
-
- fprintf(file,
- "History list length %lu\n",
- (ulong) trx_sys->rseg_history_len);
-
-#ifdef PRINT_NUM_OF_LOCK_STRUCTS
- fprintf(file,
- "Total number of lock structs in row lock hash table %lu\n",
- (ulong) lock_get_n_rec_locks());
-#endif /* PRINT_NUM_OF_LOCK_STRUCTS */
- return(TRUE);
-}
-
-/*********************************************************************//**
-Prints info of locks for each transaction. This function assumes that the
-caller holds the lock mutex and more importantly it will release the lock
-mutex on behalf of the caller. (This should be fixed in the future). */
-UNIV_INTERN
-void
-lock_print_info_all_transactions(
-/*=============================*/
- FILE* file) /*!< in: file where to print */
-{
- const lock_t* lock;
- ibool load_page_first = TRUE;
- ulint nth_trx = 0;
- ulint nth_lock = 0;
- ulint i;
- mtr_t mtr;
- const trx_t* trx;
- trx_list_t* trx_list = &trx_sys->rw_trx_list;
-
- fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
-
- ut_ad(lock_mutex_own());
-
- mutex_enter(&trx_sys->mutex);
-
- /* First print info on non-active transactions */
-
- /* NOTE: information of auto-commit non-locking read-only
- transactions will be omitted here. The information will be
- available from INFORMATION_SCHEMA.INNODB_TRX. */
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
-
- ut_ad(trx->in_mysql_trx_list);
-
- /* See state transitions and locking rules in trx0trx.h */
-
- if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
- fputs("---", file);
- trx_print_latched(file, trx, 600);
- }
- }
-
-loop:
- /* Since we temporarily release lock_sys->mutex and
- trx_sys->mutex when reading a database page in below,
- variable trx may be obsolete now and we must loop
- through the trx list to get probably the same trx,
- or some other trx. */
-
- for (trx = UT_LIST_GET_FIRST(*trx_list), i = 0;
- trx && (i < nth_trx);
- trx = UT_LIST_GET_NEXT(trx_list, trx), i++) {
-
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
- }
-
- ut_ad(trx == NULL
- || trx->read_only == (trx_list == &trx_sys->ro_trx_list));
-
- if (trx == NULL) {
- /* Check the read-only transaction list next. */
- if (trx_list == &trx_sys->rw_trx_list) {
- trx_list = &trx_sys->ro_trx_list;
- nth_trx = 0;
- nth_lock = 0;
- goto loop;
- }
-
- lock_mutex_exit();
- mutex_exit(&trx_sys->mutex);
-
- ut_ad(lock_validate());
-
- return;
- }
-
- assert_trx_in_list(trx);
-
- if (nth_lock == 0) {
- fputs("---", file);
-
- trx_print_latched(file, trx, 600);
-
- if (trx->read_view) {
- fprintf(file,
- "Trx read view will not see trx with"
- " id >= " TRX_ID_FMT
- ", sees < " TRX_ID_FMT "\n",
- trx->read_view->low_limit_id,
- trx->read_view->up_limit_id);
- }
-
- /* Total trx lock waits and times */
- fprintf(file, "Trx #rec lock waits %lu #table lock waits %lu\n",
- trx->n_rec_lock_waits, trx->n_table_lock_waits);
- fprintf(file, "Trx total rec lock wait time %lu SEC\n",
- trx->total_rec_lock_wait_time);
- fprintf(file, "Trx total table lock wait time %lu SEC\n",
- trx->total_table_lock_wait_time);
-
- if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
- fprintf(file,
- "------- TRX HAS BEEN WAITING %lu SEC"
- " FOR THIS LOCK TO BE GRANTED:\n",
- (ulong) difftime(ut_time(),
- trx->lock.wait_started));
-
- if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
- lock_rec_print(file, trx->lock.wait_lock);
- } else {
- lock_table_print(file, trx->lock.wait_lock);
- }
-
- fputs("------------------\n", file);
- }
- }
-
- if (!srv_print_innodb_lock_monitor || !srv_show_locks_held) {
- nth_trx++;
- goto loop;
- }
-
- i = 0;
-
- /* Look at the note about the trx loop above why we loop here:
- lock may be an obsolete pointer now. */
-
- lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
-
- while (lock && (i < nth_lock)) {
- lock = UT_LIST_GET_NEXT(trx_locks, lock);
- i++;
- }
-
- if (lock == NULL) {
- nth_trx++;
- nth_lock = 0;
-
- goto loop;
- }
-
- if (lock_get_type_low(lock) == LOCK_REC) {
- if (load_page_first) {
- ulint space_id = lock->un_member.rec_lock.space;
- /* Check if the space is exists or not. only
- when the space is valid, try to get the page. */
- fil_space_t* space = fil_space_acquire(space_id);
- ulint page_no = lock->un_member.rec_lock.page_no;
-
- if (!space) {
-
- /* It is a single table tablespace and
- the .ibd file is missing (TRUNCATE
- TABLE probably stole the locks): just
- print the lock without attempting to
- load the page in the buffer pool. */
-
- fprintf(file, "RECORD LOCKS on"
- " non-existing space: " ULINTPF "\n",
- space_id);
- goto print_rec;
- }
-
- const ulint zip_size = fsp_flags_get_zip_size(space->flags);
-
- lock_mutex_exit();
- mutex_exit(&trx_sys->mutex);
-
- if (srv_show_verbose_locks) {
-
- DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
-
- if (space) {
- mtr_start(&mtr);
-
- buf_page_get_gen(space_id, zip_size,
- page_no, RW_NO_LATCH,
- NULL,
- BUF_GET_POSSIBLY_FREED,
- __FILE__, __LINE__,
- &mtr);
-
- mtr_commit(&mtr);
-
- }
- }
-
- fil_space_release(space);
-
- load_page_first = FALSE;
-
- lock_mutex_enter();
-
- mutex_enter(&trx_sys->mutex);
-
- goto loop;
- }
-
-print_rec:
- lock_rec_print(file, lock);
- } else {
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-
- lock_table_print(file, lock);
- }
-
- load_page_first = TRUE;
-
- nth_lock++;
-
- if (nth_lock >= srv_show_locks_held) {
- fputs("TOO MANY LOCKS PRINTED FOR THIS TRX:"
- " SUPPRESSING FURTHER PRINTS\n",
- file);
-
- nth_trx++;
- nth_lock = 0;
- }
-
- goto loop;
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Find the the lock in the trx_t::trx_lock_t::table_locks vector.
-@return TRUE if found */
-static
-ibool
-lock_trx_table_locks_find(
-/*======================*/
- trx_t* trx, /*!< in: trx to validate */
- const lock_t* find_lock) /*!< in: lock to find */
-{
- lint i;
- ibool found = FALSE;
-
- trx_mutex_enter(trx);
-
- for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
- const lock_t* lock;
-
- lock = *static_cast<const lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
-
- if (lock == NULL) {
- continue;
- } else if (lock == find_lock) {
- /* Can't be duplicates. */
- ut_a(!found);
- found = TRUE;
- }
-
- ut_a(trx == lock->trx);
- ut_a(lock_get_type_low(lock) & LOCK_TABLE);
- ut_a(lock->un_member.tab_lock.table != NULL);
- }
-
- trx_mutex_exit(trx);
-
- return(found);
-}
-
-/*********************************************************************//**
-Validates the lock queue on a table.
-@return TRUE if ok */
-static
-ibool
-lock_table_queue_validate(
-/*======================*/
- const dict_table_t* table) /*!< in: table */
-{
- const lock_t* lock;
-
- ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
-
- for (lock = UT_LIST_GET_FIRST(table->locks);
- lock != NULL;
- lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
-
- /* lock->trx->state cannot change from or to NOT_STARTED
- while we are holding the trx_sys->mutex. It may change
- from ACTIVE to PREPARED, but it may not change to
- COMMITTED, because we are holding the lock_sys->mutex. */
- ut_ad(trx_assert_started(lock->trx));
-
- if (!lock_get_wait(lock)) {
-
- ut_a(!lock_table_other_has_incompatible(
- lock->trx, 0, table,
- lock_get_mode(lock)));
- } else {
-
- ut_a(lock_table_has_to_wait_in_queue(lock));
- }
-
- ut_a(lock_trx_table_locks_find(lock->trx, lock));
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Validates the lock queue on a single record.
-@return TRUE if ok */
-static
-ibool
-lock_rec_queue_validate(
-/*====================*/
- ibool locked_lock_trx_sys,
- /*!< in: if the caller holds
- both the lock mutex and
- trx_sys_t->lock. */
- const buf_block_t* block, /*!< in: buffer block containing rec */
- const rec_t* rec, /*!< in: record to look at */
- const dict_index_t* index, /*!< in: index, or NULL if not known */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- const lock_t* lock;
- ulint heap_no;
-
- ut_a(rec);
- ut_a(block->frame == page_align(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
- ut_ad(lock_mutex_own() == locked_lock_trx_sys);
- ut_ad(!index || dict_index_is_clust(index)
- || !dict_index_is_online_ddl(index));
-
- heap_no = page_rec_get_heap_no(rec);
-
- if (!locked_lock_trx_sys) {
- lock_mutex_enter();
- mutex_enter(&trx_sys->mutex);
- }
-
- if (!page_rec_is_user_rec(rec)) {
-
- for (lock = lock_rec_get_first(block, heap_no);
- lock != NULL;
- lock = lock_rec_get_next_const(heap_no, lock)) {
-
- ut_a(trx_in_trx_list(lock->trx));
-
- if (lock_get_wait(lock)) {
- ut_a(lock_rec_has_to_wait_in_queue(lock));
- }
-
- if (index) {
- ut_a(lock->index == index);
- }
- }
-
- goto func_exit;
- }
-
- if (!index);
- else if (dict_index_is_clust(index)) {
- trx_id_t trx_id;
- trx_id_t* trx_desc;
-
- /* Unlike the non-debug code, this invariant can only succeed
- if the check and assertion are covered by the lock mutex. */
-
- trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
- trx_desc = trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- trx_id);
-
- ut_ad(lock_mutex_own());
- /* trx_id cannot be committed until lock_mutex_exit()
- because lock_trx_release_locks() acquires lock_sys->mutex */
-
- if (trx_desc != NULL
- && lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT,
- block, heap_no, trx_id)) {
-
- ut_ad(trx_id == *trx_desc);
- ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, trx_id));
- }
- }
-
- for (lock = lock_rec_get_first(block, heap_no);
- lock != NULL;
- lock = lock_rec_get_next_const(heap_no, lock)) {
-
- ut_a(trx_in_trx_list(lock->trx));
-
- if (index) {
- ut_a(lock->index == index);
- }
-
- if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
-
- enum lock_mode mode;
-
-
- if (lock_get_mode(lock) == LOCK_S) {
- mode = LOCK_X;
- } else {
- mode = LOCK_S;
- }
-
- const lock_t* other_lock
- = lock_rec_other_has_expl_req(
- mode, 0, 0, block, heap_no,
- lock->trx->id);
-#ifdef WITH_WSREP
- ut_a(!other_lock
- || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE)
- || wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE));
-
-#else
- ut_a(!other_lock);
-#endif /* WITH_WSREP */
-
- } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)
- && innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS) {
- // If using VATS, it's possible that a wait lock is inserted to a place in the list
- // such that it does not need to wait.
- ut_a(lock_rec_has_to_wait_in_queue(lock));
- }
- }
-
- ut_ad(innodb_lock_schedule_algorithm == INNODB_LOCK_SCHEDULE_ALGORITHM_FCFS ||
- lock_queue_validate(lock));
-
-func_exit:
- if (!locked_lock_trx_sys) {
- lock_mutex_exit();
- mutex_exit(&trx_sys->mutex);
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Validates the record lock queues on a page.
-@return TRUE if ok */
-static
-ibool
-lock_rec_validate_page(
-/*===================*/
- const buf_block_t* block) /*!< in: buffer block */
-{
- const lock_t* lock;
- const rec_t* rec;
- ulint nth_lock = 0;
- ulint nth_bit = 0;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(!lock_mutex_own());
-
- lock_mutex_enter();
- mutex_enter(&trx_sys->mutex);
-loop:
- lock = lock_rec_get_first_on_page_addr(buf_block_get_space(block),
- buf_block_get_page_no(block));
-
- if (!lock) {
- goto function_exit;
- }
-
-#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
- ut_a(!block->page.file_page_was_freed);
-#endif
-
- for (i = 0; i < nth_lock; i++) {
-
- lock = lock_rec_get_next_on_page_const(lock);
-
- if (!lock) {
- goto function_exit;
- }
- }
-
- ut_a(trx_in_trx_list(lock->trx));
-
-# ifdef UNIV_SYNC_DEBUG
- /* Only validate the record queues when this thread is not
- holding a space->latch. Deadlocks are possible due to
- latching order violation when UNIV_DEBUG is defined while
- UNIV_SYNC_DEBUG is not. */
- if (!sync_thread_levels_contains(SYNC_FSP))
-# endif /* UNIV_SYNC_DEBUG */
- for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
-
- if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
-
- rec = page_find_rec_with_heap_no(block->frame, i);
- ut_a(rec);
- offsets = rec_get_offsets(rec, lock->index, offsets,
- ULINT_UNDEFINED, &heap);
-#if 0
- fprintf(stderr,
- "Validating %u %u\n",
- block->page.space, block->page.offset);
-#endif
- /* If this thread is holding the file space
- latch (fil_space_t::latch), the following
- check WILL break the latching order and may
- cause a deadlock of threads. */
-
- lock_rec_queue_validate(
- TRUE, block, rec, lock->index, offsets);
-
- nth_bit = i + 1;
-
- goto loop;
- }
- }
-
- nth_bit = 0;
- nth_lock++;
-
- goto loop;
-
-function_exit:
- lock_mutex_exit();
- mutex_exit(&trx_sys->mutex);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(TRUE);
-}
-
-/*********************************************************************//**
-Validates the table locks.
-@return TRUE if ok */
-static
-ibool
-lock_validate_table_locks(
-/*======================*/
- const trx_list_t* trx_list) /*!< in: trx list */
-{
- const trx_t* trx;
-
- ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_ad(trx_list == &trx_sys->rw_trx_list
- || trx_list == &trx_sys->ro_trx_list);
-
- for (trx = UT_LIST_GET_FIRST(*trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- const lock_t* lock;
-
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
-
- for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
- lock != NULL;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
-
- if (lock_get_type_low(lock) & LOCK_TABLE) {
-
- lock_table_queue_validate(
- lock->un_member.tab_lock.table);
- }
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Validate record locks up to a limit.
-@return lock at limit or NULL if no more locks in the hash bucket */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const lock_t*
-lock_rec_validate(
-/*==============*/
- ulint start, /*!< in: lock_sys->rec_hash
- bucket */
- ib_uint64_t* limit) /*!< in/out: upper limit of
- (space, page_no) */
-{
- ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
-
- for (const lock_t* lock = static_cast<const lock_t*>(
- HASH_GET_FIRST(lock_sys->rec_hash, start));
- lock != NULL;
- lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
-
- ib_uint64_t current;
-
- ut_a(trx_in_trx_list(lock->trx));
- ut_a(lock_get_type(lock) == LOCK_REC);
-
- current = ut_ull_create(
- lock->un_member.rec_lock.space,
- lock->un_member.rec_lock.page_no);
-
- if (current > *limit) {
- *limit = current + 1;
- return(lock);
- }
- }
-
- return(0);
-}
-
-/*********************************************************************//**
-Validate a record lock's block */
-static
-void
-lock_rec_block_validate(
-/*====================*/
- ulint space_id,
- ulint page_no)
-{
- /* The lock and the block that it is referring to may be freed at
- this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
- If the lock exists in lock_rec_validate_page() we assert
- !block->page.file_page_was_freed. */
-
- buf_block_t* block;
- mtr_t mtr;
-
- /* Make sure that the tablespace is not deleted while we are
- trying to access the page. */
- if (fil_space_t* space = fil_space_acquire(space_id)) {
-
- mtr_start(&mtr);
- block = buf_page_get_gen(
- space_id, fsp_flags_get_zip_size(space->flags),
- page_no, RW_X_LATCH, NULL,
- BUF_GET_POSSIBLY_FREED,
- __FILE__, __LINE__, &mtr);
-
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
- ut_ad(lock_rec_validate_page(block));
- mtr_commit(&mtr);
-
- fil_space_release(space);
- }
-}
-
-/*********************************************************************//**
-Validates the lock system.
-@return TRUE if ok */
-static
-bool
-lock_validate()
-/*===========*/
-{
- typedef std::pair<ulint, ulint> page_addr_t;
- typedef std::set<page_addr_t> page_addr_set;
- page_addr_set pages;
-
- lock_mutex_enter();
- mutex_enter(&trx_sys->mutex);
-
- ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
- ut_a(lock_validate_table_locks(&trx_sys->ro_trx_list));
-
- /* Iterate over all the record locks and validate the locks. We
- don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
- Release both mutexes during the validation check. */
-
- for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
- const lock_t* lock;
- ib_uint64_t limit = 0;
-
- while ((lock = lock_rec_validate(i, &limit)) != 0) {
-
- ulint space = lock->un_member.rec_lock.space;
- ulint page_no = lock->un_member.rec_lock.page_no;
-
- pages.insert(std::make_pair(space, page_no));
- }
- }
-
- mutex_exit(&trx_sys->mutex);
- lock_mutex_exit();
-
- for (page_addr_set::const_iterator it = pages.begin();
- it != pages.end();
- ++it) {
- lock_rec_block_validate((*it).first, (*it).second);
- }
-
- return(true);
-}
-#endif /* UNIV_DEBUG */
-/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
-
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate insert of
-a record. If they do, first tests if the query thread should anyway
-be suspended for some reason; if not, then puts the transaction and
-the query thread to the lock wait state and inserts a waiting request
-for a gap x-lock to the lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_rec_insert_check_and_lock(
-/*===========================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
- set, does nothing */
- const rec_t* rec, /*!< in: record after which to insert */
- buf_block_t* block, /*!< in/out: buffer block of rec */
- dict_index_t* index, /*!< in: index */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit)/*!< out: set to TRUE if the new
- inserted record maybe should inherit
- LOCK_GAP type locks from the successor
- record */
-{
- const rec_t* next_rec;
- trx_t* trx;
- lock_t* lock;
- dberr_t err;
- ulint next_rec_heap_no;
- ibool inherit_in = *inherit;
-#ifdef WITH_WSREP
- lock_t* c_lock=NULL;
-#endif
-
- ut_ad(block->frame == page_align(rec));
- ut_ad(!dict_index_is_online_ddl(index)
- || dict_index_is_clust(index)
- || (flags & BTR_CREATE_FLAG));
- ut_ad((flags & BTR_NO_LOCKING_FLAG) || thr);
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- trx = thr_get_trx(thr);
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- return(DB_SUCCESS);
- }
-
- next_rec = page_rec_get_next_const(rec);
- next_rec_heap_no = page_rec_get_heap_no(next_rec);
-
- lock_mutex_enter();
- /* Because this code is invoked for a running transaction by
- the thread that is serving the transaction, it is not necessary
- to hold trx->mutex here. */
-
- /* When inserting a record into an index, the table must be at
- least IX-locked. When we are building an index, we would pass
- BTR_NO_LOCKING_FLAG and skip the locking altogether. */
- ut_ad(lock_table_has(trx, index->table, LOCK_IX));
-
- lock = lock_rec_get_first(block, next_rec_heap_no);
-
- if (UNIV_LIKELY(lock == NULL)) {
- /* We optimize CPU time usage in the simplest case */
-
- lock_mutex_exit();
-
- if (inherit_in && !dict_index_is_clust(index)) {
- /* Update the page max trx id field */
- page_update_max_trx_id(block,
- buf_block_get_page_zip(block),
- trx->id, mtr);
- }
-
- *inherit = FALSE;
-
- return(DB_SUCCESS);
- }
-
- *inherit = TRUE;
-
- /* If another transaction has an explicit lock request which locks
- the gap, waiting or granted, on the successor, the insert has to wait.
-
- An exception is the case where the lock by the another transaction
- is a gap type lock which it placed to wait for its turn to insert. We
- do not consider that kind of a lock conflicting with our insert. This
- eliminates an unnecessary deadlock which resulted when 2 transactions
- had to wait for their insert. Both had waiting gap type lock requests
- on the successor, which produced an unnecessary deadlock. */
-
-#ifdef WITH_WSREP
- if ((c_lock = (ib_lock_t*)lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
- block, next_rec_heap_no, trx))) {
-#else
- if (lock_rec_other_has_conflicting(
- static_cast<enum lock_mode>(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION),
- block, next_rec_heap_no, trx)) {
-#endif /* WITH_WSREP */
-
- /* Note that we may get DB_SUCCESS also here! */
- trx_mutex_enter(trx);
-
-#ifdef WITH_WSREP
- err = lock_rec_enqueue_waiting(c_lock,
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
- block, next_rec_heap_no, index, thr);
-#else
- err = lock_rec_enqueue_waiting(
- LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
- block, next_rec_heap_no, index, thr);
-#endif /* WITH_WSREP */
-
- trx_mutex_exit(trx);
- } else {
- err = DB_SUCCESS;
- }
-
- lock_mutex_exit();
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- err = DB_SUCCESS;
- /* fall through */
- case DB_SUCCESS:
- if (!inherit_in || dict_index_is_clust(index)) {
- break;
- }
- /* Update the page max trx id field */
- page_update_max_trx_id(block,
- buf_block_get_page_zip(block),
- trx->id, mtr);
- default:
- /* We only care about the two return values. */
- break;
- }
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(next_rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- ut_ad(lock_rec_queue_validate(
- FALSE, block, next_rec, index, offsets));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
-
- return(err);
-}
-
-/*********************************************************************//**
-If a transaction has an implicit x-lock on a record, but no explicit x-lock
-set on the record, sets one for it. */
-static
-void
-lock_rec_convert_impl_to_expl(
-/*==========================*/
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record on page */
- dict_index_t* index, /*!< in: index of record */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- trx_id_t trx_id;
-
- ut_ad(!lock_mutex_own());
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
-
- if (dict_index_is_clust(index)) {
- trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
- /* The clustered index record was last modified by
- this transaction. The transaction may have been
- committed a long time ago. */
- } else {
- ut_ad(!dict_index_is_online_ddl(index));
- trx_id = lock_sec_rec_some_has_impl(rec, index, offsets);
- /* The transaction can be committed before the
- trx_is_active(trx_id, NULL) check below, because we are not
- holding lock_mutex. */
-
- ut_ad(!lock_rec_other_trx_holds_expl(LOCK_S | LOCK_REC_NOT_GAP,
- trx_id, rec, block));
- }
-
- if (trx_id != 0) {
- trx_id_t* impl_trx_desc;
- ulint heap_no = page_rec_get_heap_no(rec);
-
- lock_mutex_enter();
-
- /* If the transaction is still active and has no
- explicit x-lock set on the record, set one for it */
-
- mutex_enter(&trx_sys->mutex);
- impl_trx_desc = trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- trx_id);
- mutex_exit(&trx_sys->mutex);
-
- /* trx_id cannot be committed until lock_mutex_exit()
- because lock_trx_release_locks() acquires lock_sys->mutex */
-
- if (impl_trx_desc != NULL
- && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
- heap_no, trx_id)) {
- ulint type_mode = (LOCK_REC | LOCK_X
- | LOCK_REC_NOT_GAP);
-
- mutex_enter(&trx_sys->mutex);
- trx_t* impl_trx = trx_rw_get_active_trx_by_id(trx_id,
- NULL);
- mutex_exit(&trx_sys->mutex);
- ut_ad(impl_trx != NULL);
-
- lock_rec_add_to_queue(
- type_mode, block, heap_no, index,
- impl_trx, FALSE);
- }
-
- lock_mutex_exit();
- }
-}
-
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate modify (update,
-delete mark, or delete unmark) of a clustered index record. If they do,
-first tests if the query thread should anyway be suspended for some
-reason; if not, then puts the transaction and the query thread to the
-lock wait state and inserts a waiting request for a record x-lock to the
-lock queue.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_clust_rec_modify_check_and_lock(
-/*=================================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record which should be
- modified */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- ulint heap_no;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(dict_index_is_clust(index));
- ut_ad(block->frame == page_align(rec));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- return(DB_SUCCESS);
- }
-
- heap_no = rec_offs_comp(offsets)
- ? rec_get_heap_no_new(rec)
- : rec_get_heap_no_old(rec);
-
- /* If a transaction has no explicit x-lock set on the record, set one
- for it */
-
- lock_rec_convert_impl_to_expl(block, rec, index, offsets);
-
- lock_mutex_enter();
- trx_t* trx = thr_get_trx(thr);
-
- ut_ad(lock_table_has(trx, index->table, LOCK_IX));
-
- err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, index, thr);
-
- MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
-
- lock_mutex_exit();
-
- ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
-
- if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
- err = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate modify (delete
-mark or delete unmark) of a secondary index record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_sec_rec_modify_check_and_lock(
-/*===============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- buf_block_t* block, /*!< in/out: buffer block of rec */
- const rec_t* rec, /*!< in: record which should be
- modified; NOTE: as this is a secondary
- index, we always have to modify the
- clustered index record first: see the
- comment below */
- dict_index_t* index, /*!< in: secondary index */
- que_thr_t* thr, /*!< in: query thread
- (can be NULL if BTR_NO_LOCKING_FLAG) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- dberr_t err;
- ulint heap_no;
-
- ut_ad(!dict_index_is_clust(index));
- ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
- ut_ad(block->frame == page_align(rec));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- return(DB_SUCCESS);
- }
-
- heap_no = page_rec_get_heap_no(rec);
-
- /* Another transaction cannot have an implicit lock on the record,
- because when we come here, we already have modified the clustered
- index record, and this would not have been possible if another active
- transaction had modified this secondary index record. */
-
- trx_t* trx = thr_get_trx(thr);
- lock_mutex_enter();
-
- ut_ad(lock_table_has(trx, index->table, LOCK_IX));
-
- err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, index, thr);
-
- MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
-
- lock_mutex_exit();
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- ut_ad(lock_rec_queue_validate(
- FALSE, block, rec, index, offsets));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
-
- if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
- /* Update the page max trx id field */
- /* It might not be necessary to do this if
- err == DB_SUCCESS (no new lock created),
- but it should not cost too much performance. */
- page_update_max_trx_id(block,
- buf_block_get_page_zip(block),
- thr_get_trx(thr)->id, mtr);
- err = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Like lock_clust_rec_read_check_and_lock(), but reads a
-secondary index record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_sec_rec_read_check_and_lock(
-/*=============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: secondary index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- ulint heap_no;
-
- ut_ad(!dict_index_is_clust(index));
- ut_ad(!dict_index_is_online_ddl(index));
- ut_ad(block->frame == page_align(rec));
- ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mode == LOCK_X || mode == LOCK_S);
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) {
- if (!srv_fake_changes_locks) {
- return(DB_SUCCESS);
- }
- if (mode == LOCK_X) {
- mode = LOCK_S;
- }
- }
-
- heap_no = page_rec_get_heap_no(rec);
-
- /* Some transaction may have an implicit x-lock on the record only
- if the max trx id for the page >= min trx id for the trx list or a
- database recovery is running. */
-
- if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id()
- || recv_recovery_is_on())
- && !page_rec_is_supremum(rec)) {
-
- lock_rec_convert_impl_to_expl(block, rec, index, offsets);
- }
-
- trx_t* trx = thr_get_trx(thr);
- lock_mutex_enter();
-
- ut_ad(mode != LOCK_X
- || lock_table_has(trx, index->table, LOCK_IX));
- ut_ad(mode != LOCK_S
- || lock_table_has(trx, index->table, LOCK_IS));
-
- err = lock_rec_lock(FALSE, mode | gap_mode,
- block, heap_no, index, thr);
-
- MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
-
- lock_mutex_exit();
-
- ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
-or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_clust_rec_read_check_and_lock(
-/*===============================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- ulint heap_no;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(block->frame == page_align(rec));
- ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
- ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
- || gap_mode == LOCK_REC_NOT_GAP);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (flags & BTR_NO_LOCKING_FLAG) {
-
- return(DB_SUCCESS);
- }
-
- if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) {
- if (!srv_fake_changes_locks) {
- return(DB_SUCCESS);
- }
- if (mode == LOCK_X) {
- mode = LOCK_S;
- }
- }
-
- heap_no = page_rec_get_heap_no(rec);
-
- if (UNIV_LIKELY(heap_no != PAGE_HEAP_NO_SUPREMUM)) {
-
- lock_rec_convert_impl_to_expl(block, rec, index, offsets);
- }
-
- lock_mutex_enter();
- trx_t* trx = thr_get_trx(thr);
-
- ut_ad(mode != LOCK_X
- || lock_table_has(trx, index->table, LOCK_IX));
- ut_ad(mode != LOCK_S
- || lock_table_has(trx, index->table, LOCK_IS));
-
- err = lock_rec_lock(FALSE, mode | gap_mode,
- block, heap_no, index, thr);
-
- MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
-
- lock_mutex_exit();
-
- ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
-
- return(err);
-}
-/*********************************************************************//**
-Checks if locks of other transactions prevent an immediate read, or passing
-over by a read cursor, of a clustered index record. If they do, first tests
-if the query thread should anyway be suspended for some reason; if not, then
-puts the transaction and the query thread to the lock wait state and inserts a
-waiting request for a record lock to the lock queue. Sets the requested mode
-lock on the record. This is an alternative version of
-lock_clust_rec_read_check_and_lock() that does not require the parameter
-"offsets".
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
-UNIV_INTERN
-dberr_t
-lock_clust_rec_read_check_and_lock_alt(
-/*===================================*/
- ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
- bit is set, does nothing */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: user record or page
- supremum record which should
- be read or passed over by a
- read cursor */
- dict_index_t* index, /*!< in: clustered index */
- enum lock_mode mode, /*!< in: mode of the lock which
- the read cursor should set on
- records: LOCK_S or LOCK_X; the
- latter is possible in
- SELECT FOR UPDATE */
- ulint gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
-{
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- dberr_t err;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &tmp_heap);
- err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
- offsets, mode, gap_mode, thr);
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- if (UNIV_UNLIKELY(err == DB_SUCCESS_LOCKED_REC)) {
- err = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*******************************************************************//**
-Release the last lock from the transaction's autoinc locks. */
-UNIV_INLINE
-void
-lock_release_autoinc_last_lock(
-/*===========================*/
- ib_vector_t* autoinc_locks) /*!< in/out: vector of AUTOINC locks */
-{
- ulint last;
- lock_t* lock;
-
- ut_ad(lock_mutex_own());
- ut_a(!ib_vector_is_empty(autoinc_locks));
-
- /* The lock to be release must be the last lock acquired. */
- last = ib_vector_size(autoinc_locks) - 1;
- lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last));
-
- /* Should have only AUTOINC locks in the vector. */
- ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
- ut_a(lock_get_type(lock) == LOCK_TABLE);
-
- ut_a(lock->un_member.tab_lock.table != NULL);
-
- /* This will remove the lock from the trx autoinc_locks too. */
- lock_table_dequeue(lock);
-
- /* Remove from the table vector too. */
- lock_trx_table_locks_remove(lock);
-}
-
-/*******************************************************************//**
-Check if a transaction holds any autoinc locks.
-@return TRUE if the transaction holds any AUTOINC locks. */
-static
-ibool
-lock_trx_holds_autoinc_locks(
-/*=========================*/
- const trx_t* trx) /*!< in: transaction */
-{
- ut_a(trx->autoinc_locks != NULL);
-
- return(!ib_vector_is_empty(trx->autoinc_locks));
-}
-
-/*******************************************************************//**
-Release all the transaction's autoinc locks. */
-static
-void
-lock_release_autoinc_locks(
-/*=======================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(lock_mutex_own());
- /* If this is invoked for a running transaction by the thread
- that is serving the transaction, then it is not necessary to
- hold trx->mutex here. */
-
- ut_a(trx->autoinc_locks != NULL);
-
- /* We release the locks in the reverse order. This is to
- avoid searching the vector for the element to delete at
- the lower level. See (lock_table_remove_low()) for details. */
- while (!ib_vector_is_empty(trx->autoinc_locks)) {
-
- /* lock_table_remove_low() will also remove the lock from
- the transaction's autoinc_locks vector. */
- lock_release_autoinc_last_lock(trx->autoinc_locks);
- }
-
- /* Should release all locks. */
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
-}
-
-/*******************************************************************//**
-Gets the type of a lock. Non-inline version for using outside of the
-lock module.
-@return LOCK_TABLE or LOCK_REC */
-UNIV_INTERN
-ulint
-lock_get_type(
-/*==========*/
- const lock_t* lock) /*!< in: lock */
-{
- return(lock_get_type_low(lock));
-}
-
-/*******************************************************************//**
-Gets the trx of the lock. Non-inline version for using outside of the
-lock module.
-@return trx_t* */
-UNIV_INTERN
-trx_t*
-lock_get_trx(
-/*=========*/
- const lock_t* lock) /*!< in: lock */
-{
- return (lock->trx);
-}
-
-/*******************************************************************//**
-Gets the id of the transaction owning a lock.
-@return transaction id */
-UNIV_INTERN
-trx_id_t
-lock_get_trx_id(
-/*============*/
- const lock_t* lock) /*!< in: lock */
-{
- return(lock->trx->id);
-}
-
-/*******************************************************************//**
-Gets the mode of a lock in a human readable string.
-The string should not be free()'d or modified.
-@return lock mode */
-UNIV_INTERN
-const char*
-lock_get_mode_str(
-/*==============*/
- const lock_t* lock) /*!< in: lock */
-{
- ibool is_gap_lock;
-
- is_gap_lock = lock_get_type_low(lock) == LOCK_REC
- && lock_rec_get_gap(lock);
-
- switch (lock_get_mode(lock)) {
- case LOCK_S:
- if (is_gap_lock) {
- return("S,GAP");
- } else {
- return("S");
- }
- case LOCK_X:
- if (is_gap_lock) {
- return("X,GAP");
- } else {
- return("X");
- }
- case LOCK_IS:
- if (is_gap_lock) {
- return("IS,GAP");
- } else {
- return("IS");
- }
- case LOCK_IX:
- if (is_gap_lock) {
- return("IX,GAP");
- } else {
- return("IX");
- }
- case LOCK_AUTO_INC:
- return("AUTO_INC");
- default:
- return("UNKNOWN");
- }
-}
-
-/*******************************************************************//**
-Gets the type of a lock in a human readable string.
-The string should not be free()'d or modified.
-@return lock type */
-UNIV_INTERN
-const char*
-lock_get_type_str(
-/*==============*/
- const lock_t* lock) /*!< in: lock */
-{
- switch (lock_get_type_low(lock)) {
- case LOCK_REC:
- return("RECORD");
- case LOCK_TABLE:
- return("TABLE");
- default:
- return("UNKNOWN");
- }
-}
-
-/*******************************************************************//**
-Gets the table on which the lock is.
-@return table */
-UNIV_INLINE
-dict_table_t*
-lock_get_table(
-/*===========*/
- const lock_t* lock) /*!< in: lock */
-{
- switch (lock_get_type_low(lock)) {
- case LOCK_REC:
- ut_ad(dict_index_is_clust(lock->index)
- || !dict_index_is_online_ddl(lock->index));
- return(lock->index->table);
- case LOCK_TABLE:
- return(lock->un_member.tab_lock.table);
- default:
- ut_error;
- return(NULL);
- }
-}
-
-/*******************************************************************//**
-Gets the id of the table on which the lock is.
-@return id of the table */
-UNIV_INTERN
-table_id_t
-lock_get_table_id(
-/*==============*/
- const lock_t* lock) /*!< in: lock */
-{
- dict_table_t* table;
-
- table = lock_get_table(lock);
-
- return(table->id);
-}
-
-/*******************************************************************//**
-Gets the name of the table on which the lock is.
-The string should not be free()'d or modified.
-@return name of the table */
-UNIV_INTERN
-const char*
-lock_get_table_name(
-/*================*/
- const lock_t* lock) /*!< in: lock */
-{
- dict_table_t* table;
-
- table = lock_get_table(lock);
-
- return(table->name);
-}
-
-/*******************************************************************//**
-For a record lock, gets the index on which the lock is.
-@return index */
-UNIV_INTERN
-const dict_index_t*
-lock_rec_get_index(
-/*===============*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_a(lock_get_type_low(lock) == LOCK_REC);
- ut_ad(dict_index_is_clust(lock->index)
- || !dict_index_is_online_ddl(lock->index));
-
- return(lock->index);
-}
-
-/*******************************************************************//**
-For a record lock, gets the name of the index on which the lock is.
-The string should not be free()'d or modified.
-@return name of the index */
-UNIV_INTERN
-const char*
-lock_rec_get_index_name(
-/*====================*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_a(lock_get_type_low(lock) == LOCK_REC);
- ut_ad(dict_index_is_clust(lock->index)
- || !dict_index_is_online_ddl(lock->index));
-
- return(lock->index->name);
-}
-
-/*******************************************************************//**
-For a record lock, gets the tablespace number on which the lock is.
-@return tablespace number */
-UNIV_INTERN
-ulint
-lock_rec_get_space_id(
-/*==================*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->un_member.rec_lock.space);
-}
-
-/*******************************************************************//**
-For a record lock, gets the page number on which the lock is.
-@return page number */
-UNIV_INTERN
-ulint
-lock_rec_get_page_no(
-/*=================*/
- const lock_t* lock) /*!< in: lock */
-{
- ut_a(lock_get_type_low(lock) == LOCK_REC);
-
- return(lock->un_member.rec_lock.page_no);
-}
-
-/*********************************************************************//**
-Cancels a waiting lock request and releases possible other transactions
-waiting behind it. */
-UNIV_INTERN
-void
-lock_cancel_waiting_and_release(
-/*============================*/
- lock_t* lock) /*!< in/out: waiting lock request */
-{
- que_thr_t* thr;
-
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(lock->trx));
-
- lock->trx->lock.cancel = TRUE;
-
- if (lock_get_type_low(lock) == LOCK_REC) {
-
- lock_rec_dequeue_from_page(lock);
- } else {
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
-
- if (lock->trx->autoinc_locks != NULL) {
- /* Release the transaction's AUTOINC locks. */
- lock_release_autoinc_locks(lock->trx);
- }
-
- lock_table_dequeue(lock);
- }
-
- /* Reset the wait flag and the back pointer to lock in trx. */
-
- lock_reset_lock_and_trx_wait(lock);
-
- /* The following function releases the trx from lock wait. */
-
- thr = que_thr_end_lock_wait(lock->trx);
-
- if (thr != NULL) {
- lock_wait_release_thread_if_suspended(thr);
- }
-
- lock->trx->lock.cancel = FALSE;
-}
-
-/*********************************************************************//**
-Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
-function should be called at the the end of an SQL statement, by the
-connection thread that owns the transaction (trx->mysql_thd). */
-UNIV_INTERN
-void
-lock_unlock_table_autoinc(
-/*======================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(!lock_mutex_own());
- ut_ad(!trx_mutex_own(trx));
- ut_ad(!trx->lock.wait_lock);
- /* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
- but not COMMITTED transactions. */
- ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
- || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
-
- /* This function is invoked for a running transaction by the
- thread that is serving the transaction. Therefore it is not
- necessary to hold trx->mutex here. */
-
- if (lock_trx_holds_autoinc_locks(trx)) {
- lock_mutex_enter();
-
- lock_release_autoinc_locks(trx);
-
- lock_mutex_exit();
- }
-}
-
-/*********************************************************************//**
-Releases a transaction's locks, and releases possible other transactions
-waiting because of these locks. Change the state of the transaction to
-TRX_STATE_COMMITTED_IN_MEMORY. */
-UNIV_INTERN
-void
-lock_trx_release_locks(
-/*===================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- assert_trx_in_list(trx);
-
- if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
- mutex_enter(&trx_sys->mutex);
- ut_a(trx_sys->n_prepared_trx > 0);
- trx_sys->n_prepared_trx--;
- if (trx->is_recovered) {
- ut_a(trx_sys->n_prepared_recovered_trx > 0);
- trx_sys->n_prepared_recovered_trx--;
- }
- mutex_exit(&trx_sys->mutex);
- } else {
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
- }
-
- /* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
- is protected by both the lock_sys->mutex and the trx->mutex.
- We also lock trx_sys->mutex, because state transition to
- TRX_STATE_COMMITTED_IN_MEMORY must be atomic with removing trx
- from the descriptors array. */
- lock_mutex_enter();
- mutex_enter(&trx_sys->mutex);
- trx_mutex_enter(trx);
-
- /* The following assignment makes the transaction committed in memory
- and makes its changes to data visible to other transactions.
- NOTE that there is a small discrepancy from the strict formal
- visibility rules here: a human user of the database can see
- modifications made by another transaction T even before the necessary
- log segment has been flushed to the disk. If the database happens to
- crash before the flush, the user has seen modifications from T which
- will never be a committed transaction. However, any transaction T2
- which sees the modifications of the committing transaction T, and
- which also itself makes modifications to the database, will get an lsn
- larger than the committing transaction T. In the case where the log
- flush fails, and T never gets committed, also T2 will never get
- committed. */
-
- /*--------------------------------------*/
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
- /* The following also removes trx from trx_serial_list */
- trx_release_descriptor(trx);
- /*--------------------------------------*/
-
- /* If the background thread trx_rollback_or_clean_recovered()
- is still active then there is a chance that the rollback
- thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
- to clean it up calling trx_cleanup_at_db_startup(). This can
- happen in the case we are committing a trx here that is left
- in PREPARED state during the crash. Note that commit of the
- rollback of a PREPARED trx happens in the recovery thread
- while the rollback of other transactions happen in the
- background thread. To avoid this race we unconditionally unset
- the is_recovered flag. */
-
- trx->is_recovered = FALSE;
-
- trx_mutex_exit(trx);
-
- mutex_exit(&trx_sys->mutex);
-
- lock_release(trx);
-
- lock_mutex_exit();
-}
-
-/*********************************************************************//**
-Check whether the transaction has already been rolled back because it
-was selected as a deadlock victim, or if it has to wait then cancel
-the wait lock.
-@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-lock_trx_handle_wait(
-/*=================*/
- trx_t* trx) /*!< in/out: trx lock state */
-{
- dberr_t err;
-
- lock_mutex_enter();
-
- trx_mutex_enter(trx);
-
- if (trx->lock.was_chosen_as_deadlock_victim) {
- err = DB_DEADLOCK;
- } else if (trx->lock.wait_lock != NULL) {
- lock_cancel_waiting_and_release(trx->lock.wait_lock);
- err = DB_LOCK_WAIT;
- } else {
- /* The lock was probably granted before we got here. */
- err = DB_SUCCESS;
- }
-
- lock_mutex_exit();
- trx_mutex_exit(trx);
-
- return(err);
-}
-
-/*********************************************************************//**
-Get the number of locks on a table.
-@return number of locks */
-UNIV_INTERN
-ulint
-lock_table_get_n_locks(
-/*===================*/
- const dict_table_t* table) /*!< in: table */
-{
- ulint n_table_locks;
-
- lock_mutex_enter();
-
- n_table_locks = UT_LIST_GET_LEN(table->locks);
-
- lock_mutex_exit();
-
- return(n_table_locks);
-}
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Do an exhaustive check for any locks (table or rec) against the table.
-@return lock if found */
-static
-const lock_t*
-lock_table_locks_lookup(
-/*====================*/
- const dict_table_t* table, /*!< in: check if there are
- any locks held on records in
- this table or on the table
- itself */
- const trx_list_t* trx_list) /*!< in: trx list to check */
-{
- trx_t* trx;
-
- ut_a(table != NULL);
- ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_ad(trx_list == &trx_sys->rw_trx_list
- || trx_list == &trx_sys->ro_trx_list);
-
- for (trx = UT_LIST_GET_FIRST(*trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- const lock_t* lock;
-
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
-
- for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
- lock != NULL;
- lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
-
- ut_a(lock->trx == trx);
-
- if (lock_get_type_low(lock) == LOCK_REC) {
- ut_ad(!dict_index_is_online_ddl(lock->index)
- || dict_index_is_clust(lock->index));
- if (lock->index->table == table) {
- return(lock);
- }
- } else if (lock->un_member.tab_lock.table == table) {
- return(lock);
- }
- }
- }
-
- return(NULL);
-}
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Check if there are any locks (table or rec) against table.
-@return TRUE if table has either table or record locks. */
-UNIV_INTERN
-ibool
-lock_table_has_locks(
-/*=================*/
- const dict_table_t* table) /*!< in: check if there are any locks
- held on records in this table or on the
- table itself */
-{
- ibool has_locks;
-
- lock_mutex_enter();
-
- has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
-
-#ifdef UNIV_DEBUG
- if (!has_locks) {
- mutex_enter(&trx_sys->mutex);
-
- ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
- ut_ad(!lock_table_locks_lookup(table, &trx_sys->ro_trx_list));
-
- mutex_exit(&trx_sys->mutex);
- }
-#endif /* UNIV_DEBUG */
-
- lock_mutex_exit();
-
- return(has_locks);
-}
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Check if the transaction holds any locks on the sys tables
-or its records.
-@return the strongest lock found on any sys table or 0 for none */
-UNIV_INTERN
-const lock_t*
-lock_trx_has_sys_table_locks(
-/*=========================*/
- const trx_t* trx) /*!< in: transaction to check */
-{
- lint i;
- const lock_t* strongest_lock = 0;
- lock_mode strongest = LOCK_NONE;
-
- lock_mutex_enter();
-
- /* Find a valid mode. Note: ib_vector_size() can be 0. */
- for (i = ib_vector_size(trx->lock.table_locks) - 1; i >= 0; --i) {
- const lock_t* lock;
-
- lock = *static_cast<const lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
-
- if (lock != NULL
- && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
-
- strongest = lock_get_mode(lock);
- ut_ad(strongest != LOCK_NONE);
- strongest_lock = lock;
- break;
- }
- }
-
- if (strongest == LOCK_NONE) {
- lock_mutex_exit();
- return(NULL);
- }
-
- for (/* No op */; i >= 0; --i) {
- const lock_t* lock;
-
- lock = *static_cast<const lock_t**>(
- ib_vector_get(trx->lock.table_locks, i));
-
- if (lock == NULL) {
- continue;
- }
-
- ut_ad(trx == lock->trx);
- ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
- ut_ad(lock->un_member.tab_lock.table != NULL);
-
- lock_mode mode = lock_get_mode(lock);
-
- if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
- && lock_mode_stronger_or_eq(mode, strongest)) {
-
- strongest = mode;
- strongest_lock = lock;
- }
- }
-
- lock_mutex_exit();
-
- return(strongest_lock);
-}
-
-/*******************************************************************//**
-Check if the transaction holds an exclusive lock on a record.
-@return whether the locks are held */
-UNIV_INTERN
-bool
-lock_trx_has_rec_x_lock(
-/*====================*/
- const trx_t* trx, /*!< in: transaction to check */
- const dict_table_t* table, /*!< in: table to check */
- const buf_block_t* block, /*!< in: buffer block of the record */
- ulint heap_no)/*!< in: record heap number */
-{
- enum lock_mode intention_lock;
- enum lock_mode rec_lock;
- ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
-
- intention_lock = LOCK_IS;
- rec_lock = LOCK_S;
- } else {
-
- intention_lock = LOCK_IX;
- rec_lock = LOCK_X;
- }
- lock_mutex_enter();
- ut_a(lock_table_has(trx, table, intention_lock));
- if (UNIV_LIKELY(srv_fake_changes_locks)) {
-
- ut_a(lock_rec_has_expl(rec_lock | LOCK_REC_NOT_GAP,
- block, heap_no, trx->id));
- }
- lock_mutex_exit();
- return(true);
-}
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Get lock mode and table/index name
-@return string containing lock info */
-std::string
-lock_get_info(
- const lock_t* lock)
-{
- std::string info;
- std::string mode("mode ");
- std::string index("index ");
- std::string table("table ");
- std::string n_uniq(" n_uniq");
- std::string n_user(" n_user");
- std::string lock_mode((lock_get_mode_str(lock)));
- std::string iname(lock->index->name);
- std::string tname(lock->index->table_name);
-
-#define SSTR( x ) reinterpret_cast< std::ostringstream & >( \
- ( std::ostringstream() << std::dec << x ) ).str()
-
- info = mode + lock_mode
- + index + iname
- + table + tname
- + n_uniq + SSTR(lock->index->n_uniq)
- + n_user + SSTR(lock->index->n_user_defined_cols);
-
- return info;
-}
diff --git a/storage/xtradb/lock/lock0wait.cc b/storage/xtradb/lock/lock0wait.cc
deleted file mode 100644
index a447027e336..00000000000
--- a/storage/xtradb/lock/lock0wait.cc
+++ /dev/null
@@ -1,576 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file lock/lock0wait.cc
-The transaction lock system
-
-Created 25/5/2010 Sunny Bains
-*******************************************************/
-
-#define LOCK_MODULE_IMPLEMENTATION
-
-#include "srv0mon.h"
-#include "que0que.h"
-#include "lock0lock.h"
-#include "row0mysql.h"
-#include "srv0start.h"
-#include "ha_prototypes.h"
-#include "lock0priv.h"
-
-#include <mysql/service_wsrep.h>
-
-/*********************************************************************//**
-Print the contents of the lock_sys_t::waiting_threads array. */
-static
-void
-lock_wait_table_print(void)
-/*=======================*/
-{
- ulint i;
- const srv_slot_t* slot;
-
- ut_ad(lock_wait_mutex_own());
-
- slot = lock_sys->waiting_threads;
-
- for (i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
-
- fprintf(stderr,
- "Slot %lu: thread type %lu,"
- " in use %lu, susp %lu, timeout %lu, time %lu\n",
- (ulong) i,
- (ulong) slot->type,
- (ulong) slot->in_use,
- (ulong) slot->suspended,
- slot->wait_timeout,
- (ulong) difftime(ut_time(), slot->suspend_time));
- }
-}
-
-/*********************************************************************//**
-Release a slot in the lock_sys_t::waiting_threads. Adjust the array last pointer
-if there are empty slots towards the end of the table. */
-static
-void
-lock_wait_table_release_slot(
-/*=========================*/
- srv_slot_t* slot) /*!< in: slot to release */
-{
-#ifdef UNIV_DEBUG
- srv_slot_t* upper = lock_sys->waiting_threads + OS_THREAD_MAX_N;
-#endif /* UNIV_DEBUG */
-
- lock_wait_mutex_enter();
-
- ut_ad(slot->in_use);
- ut_ad(slot->thr != NULL);
- ut_ad(slot->thr->slot != NULL);
- ut_ad(slot->thr->slot == slot);
-
- /* Must be within the array boundaries. */
- ut_ad(slot >= lock_sys->waiting_threads);
- ut_ad(slot < upper);
-
- /* Note: When we reserve the slot we use the trx_t::mutex to update
- the slot values to change the state to reserved. Here we are using the
- lock mutex to change the state of the slot to free. This is by design,
- because when we query the slot state we always hold both the lock and
- trx_t::mutex. To reduce contention on the lock mutex when reserving the
- slot we avoid acquiring the lock mutex. */
-
- lock_mutex_enter();
-
- slot->thr->slot = NULL;
- slot->thr = NULL;
- slot->in_use = FALSE;
-
- lock_mutex_exit();
-
- /* Scan backwards and adjust the last free slot pointer. */
- for (slot = lock_sys->last_slot;
- slot > lock_sys->waiting_threads && !slot->in_use;
- --slot) {
- /* No op */
- }
-
- /* Either the array is empty or the last scanned slot is in use. */
- ut_ad(slot->in_use || slot == lock_sys->waiting_threads);
-
- lock_sys->last_slot = slot + 1;
-
- /* The last slot is either outside of the array boundary or it's
- on an empty slot. */
- ut_ad(lock_sys->last_slot == upper || !lock_sys->last_slot->in_use);
-
- ut_ad(lock_sys->last_slot >= lock_sys->waiting_threads);
- ut_ad(lock_sys->last_slot <= upper);
-
- lock_wait_mutex_exit();
-}
-
-/*********************************************************************//**
-Reserves a slot in the thread table for the current user OS thread.
-@return reserved slot */
-static
-srv_slot_t*
-lock_wait_table_reserve_slot(
-/*=========================*/
- que_thr_t* thr, /*!< in: query thread associated
- with the user OS thread */
- ulong wait_timeout) /*!< in: lock wait timeout value */
-{
- ulint i;
- srv_slot_t* slot;
-
- ut_ad(lock_wait_mutex_own());
- ut_ad(trx_mutex_own(thr_get_trx(thr)));
-
- slot = lock_sys->waiting_threads;
-
- for (i = OS_THREAD_MAX_N; i--; ++slot) {
- if (!slot->in_use) {
- slot->in_use = TRUE;
- slot->thr = thr;
- slot->thr->slot = slot;
-
- if (slot->event == NULL) {
- slot->event = os_event_create();
- ut_a(slot->event);
- }
-
- os_event_reset(slot->event);
- slot->suspended = TRUE;
- slot->suspend_time = ut_time();
- slot->wait_timeout = wait_timeout;
-
- if (slot == lock_sys->last_slot) {
- ++lock_sys->last_slot;
- }
-
- ut_ad(lock_sys->last_slot
- <= lock_sys->waiting_threads + OS_THREAD_MAX_N);
-
- return(slot);
- }
- }
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: There appear to be %lu user"
- " threads currently waiting\n"
- "InnoDB: inside InnoDB, which is the"
- " upper limit. Cannot continue operation.\n"
- "InnoDB: As a last thing, we print"
- " a list of waiting threads.\n", (ulong) OS_THREAD_MAX_N);
-
- lock_wait_table_print();
-
- ut_error;
- return(NULL);
-}
-
-#ifdef WITH_WSREP
-/*********************************************************************//**
-check if lock timeout was for priority thread,
-as a side effect trigger lock monitor
-@return false for regular lock timeout */
-static ibool
-wsrep_is_BF_lock_timeout(
-/*====================*/
- trx_t* trx) /* in: trx to check for lock priority */
-{
- if (wsrep_on(trx->mysql_thd) &&
- wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
- fprintf(stderr, "WSREP: BF lock wait long\n");
- srv_print_innodb_monitor = TRUE;
- srv_print_innodb_lock_monitor = TRUE;
- os_event_set(srv_monitor_event);
- return TRUE;
- }
- return FALSE;
- }
-#endif /* WITH_WSREP */
-
-/***************************************************************//**
-Puts a user OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-UNIV_INTERN
-void
-lock_wait_suspend_thread(
-/*=====================*/
- que_thr_t* thr) /*!< in: query thread associated with the
- user OS thread */
-{
- srv_slot_t* slot;
- double wait_time;
- trx_t* trx;
- ulint had_dict_lock;
- ibool was_declared_inside_innodb;
- ib_int64_t start_time = 0;
- ib_int64_t finish_time;
- ulint sec;
- ulint ms;
- ulong lock_wait_timeout;
-
- trx = thr_get_trx(thr);
-
- if (trx->mysql_thd != 0) {
- DEBUG_SYNC_C("lock_wait_suspend_thread_enter");
- }
-
- /* InnoDB system transactions (such as the purge, and
- incomplete transactions that are being rolled back after crash
- recovery) will use the global value of
- innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
- lock_wait_timeout = trx_lock_wait_timeout_get(trx);
-
- lock_wait_mutex_enter();
-
- trx_mutex_enter(trx);
-
- trx->error_state = DB_SUCCESS;
-
- if (thr->state == QUE_THR_RUNNING) {
-
- ut_ad(thr->is_active);
-
- /* The lock has already been released or this transaction
- was chosen as a deadlock victim: no need to suspend */
-
- if (trx->lock.was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- }
-
- lock_wait_mutex_exit();
- trx_mutex_exit(trx);
- return;
- }
-
- ut_ad(!thr->is_active);
-
- slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout);
-
- lock_wait_mutex_exit();
- trx_mutex_exit(trx);
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- srv_stats.n_lock_wait_count.inc();
- srv_stats.n_lock_wait_current_count.inc();
-
- if (ut_usectime(&sec, &ms) == -1) {
- start_time = -1;
- } else {
- start_time = (ib_int64_t) sec * 1000000 + ms;
- }
- }
-
- ulint lock_type = ULINT_UNDEFINED;
-
- /* The wait_lock can be cleared by another thread when the
- lock is released. But the wait can only be initiated by the
- current thread which owns the transaction. Only acquire the
- mutex if the wait_lock is still active. */
- if (const lock_t* wait_lock = trx->lock.wait_lock) {
- lock_mutex_enter();
- wait_lock = trx->lock.wait_lock;
- if (wait_lock) {
- lock_type = lock_get_type_low(wait_lock);
- }
- lock_mutex_exit();
- }
-
- had_dict_lock = trx->dict_operation_lock_mode;
-
- switch (had_dict_lock) {
- case 0:
- break;
- case RW_S_LATCH:
- /* Release foreign key check latch */
- row_mysql_unfreeze_data_dictionary(trx);
-
- DEBUG_SYNC_C("lock_wait_release_s_latch_before_sleep");
- break;
- default:
- /* There should never be a lock wait when the
- dictionary latch is reserved in X mode. Dictionary
- transactions should only acquire locks on dictionary
- tables, not other tables. All access to dictionary
- tables should be covered by dictionary
- transactions. */
- ut_error;
- }
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- /* Suspend this thread and wait for the event. */
-
- was_declared_inside_innodb = trx->declared_to_be_inside_innodb;
-
- if (was_declared_inside_innodb) {
- /* We must declare this OS thread to exit InnoDB, since a
- possible other thread holding a lock which this thread waits
- for must be allowed to enter, sooner or later */
-
- srv_conc_force_exit_innodb(trx);
- }
-
- /* Unknown is also treated like a record lock */
- if (lock_type == ULINT_UNDEFINED || lock_type == LOCK_REC) {
- thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK);
- } else {
- ut_ad(lock_type == LOCK_TABLE);
- thd_wait_begin(trx->mysql_thd, THD_WAIT_TABLE_LOCK);
- }
-
- os_event_wait(slot->event);
-
- thd_wait_end(trx->mysql_thd);
-
- /* After resuming, reacquire the data dictionary latch if
- necessary. */
-
- if (was_declared_inside_innodb) {
-
- /* Return back inside InnoDB */
-
- srv_conc_force_enter_innodb(trx);
- }
-
- if (had_dict_lock) {
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- /* Release the slot for others to use */
-
- lock_wait_table_release_slot(slot);
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- ulint diff_time;
-
- if (ut_usectime(&sec, &ms) == -1) {
- finish_time = -1;
- } else {
- finish_time = (ib_int64_t) sec * 1000000 + ms;
- }
-
- diff_time = (finish_time > start_time) ?
- (ulint) (finish_time - start_time) : 0;
-
- srv_stats.n_lock_wait_current_count.dec();
- srv_stats.n_lock_wait_time.add(diff_time);
-
- /* Only update the variable if we successfully
- retrieved the start and finish times. See Bug#36819. */
- if (diff_time > lock_sys->n_lock_max_wait_time
- && start_time != -1
- && finish_time != -1) {
-
- lock_sys->n_lock_max_wait_time = diff_time;
- }
-
- /* Record the lock wait time for this thread */
- thd_set_lock_wait_time(trx->mysql_thd, diff_time);
-
- }
-
- if (lock_wait_timeout < 100000000
- && wait_time > (double) lock_wait_timeout) {
-#ifdef WITH_WSREP
- if (!wsrep_on(trx->mysql_thd) ||
- (!wsrep_is_BF_lock_timeout(trx) &&
- trx->error_state != DB_DEADLOCK)) {
-#endif /* WITH_WSREP */
-
- trx->error_state = DB_LOCK_WAIT_TIMEOUT;
-
-#ifdef WITH_WSREP
- }
-#endif /* WITH_WSREP */
- MONITOR_INC(MONITOR_TIMEOUT);
- }
-
- if (trx_is_interrupted(trx)) {
-
- trx->error_state = DB_INTERRUPTED;
- }
-}
-
-/********************************************************************//**
-Releases a user OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-UNIV_INTERN
-void
-lock_wait_release_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr) /*!< in: query thread associated with the
- user OS thread */
-{
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(thr_get_trx(thr)));
-
- /* We own both the lock mutex and the trx_t::mutex but not the
- lock wait mutex. This is OK because other threads will see the state
- of this slot as being in use and no other thread can change the state
- of the slot to free unless that thread also owns the lock mutex. */
-
- if (thr->slot != NULL && thr->slot->in_use && thr->slot->thr == thr) {
- trx_t* trx = thr_get_trx(thr);
-
- if (trx->lock.was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- }
-
- os_event_set(thr->slot->event);
- }
-}
-
-/*********************************************************************//**
-Check if the thread lock wait has timed out. Release its locks if the
-wait has actually timed out. */
-static
-void
-lock_wait_check_and_cancel(
-/*=======================*/
- const srv_slot_t* slot) /*!< in: slot reserved by a user
- thread when the wait started */
-{
- trx_t* trx;
- double wait_time;
- ib_time_t suspend_time = slot->suspend_time;
-
- ut_ad(lock_wait_mutex_own());
-
- ut_ad(slot->in_use);
-
- ut_ad(slot->suspended);
-
- wait_time = ut_difftime(ut_time(), suspend_time);
-
- trx = thr_get_trx(slot->thr);
-
- if (trx_is_interrupted(trx)
- || (slot->wait_timeout < 100000000
- && (wait_time > (double) slot->wait_timeout
- || wait_time < 0))) {
-
- /* Timeout exceeded or a wrap-around in system
- time counter: cancel the lock request queued
- by the transaction and release possible
- other transactions waiting behind; it is
- possible that the lock has already been
- granted: in that case do nothing */
-
- lock_mutex_enter();
-
- trx_mutex_enter(trx);
-
- if (trx->lock.wait_lock) {
-
- ut_a(trx->lock.que_state == TRX_QUE_LOCK_WAIT);
-#ifdef WITH_WSREP
- if (!wsrep_is_BF_lock_timeout(trx)) {
-#endif /* WITH_WSREP */
- lock_cancel_waiting_and_release(trx->lock.wait_lock);
-#ifdef WITH_WSREP
- }
-#endif /* WITH_WSREP */
- }
-
- lock_mutex_exit();
-
- trx_mutex_exit(trx);
- }
-
-}
-
-/*********************************************************************//**
-A thread which wakes up threads whose lock wait may have lasted too long.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(lock_wait_timeout_thread)(void*)
-{
- ib_int64_t sig_count = 0;
- os_event_t event = lock_sys->timeout_event;
-
- ut_ad(!srv_read_only_mode);
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_lock_timeout_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
- do {
- srv_slot_t* slot;
-
- /* When someone is waiting for a lock, we wake up every second
- and check if a timeout has passed for a lock wait */
-
- os_event_wait_time_low(event, 1000000, sig_count);
- sig_count = os_event_reset(event);
-
- if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
- break;
- }
-
- lock_wait_mutex_enter();
-
- /* Check all slots for user threads that are waiting
- on locks, and if they have exceeded the time limit. */
-
- for (slot = lock_sys->waiting_threads;
- slot < lock_sys->last_slot;
- ++slot) {
-
- /* We are doing a read without the lock mutex
- and/or the trx mutex. This is OK because a slot
- can't be freed or reserved without the lock wait
- mutex. */
-
- if (slot->in_use) {
- lock_wait_check_and_cancel(slot);
- }
- }
-
- sig_count = os_event_reset(event);
-
- lock_wait_mutex_exit();
-
- } while (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP);
-
- lock_sys->timeout_thread_active = false;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
diff --git a/storage/xtradb/log/log0crypt.cc b/storage/xtradb/log/log0crypt.cc
deleted file mode 100644
index f6c1416d81a..00000000000
--- a/storage/xtradb/log/log0crypt.cc
+++ /dev/null
@@ -1,638 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013, 2015, Google Inc. All Rights Reserved.
-Copyright (C) 2014, 2016, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-/**************************************************//**
-@file log0crypt.cc
-Innodb log encrypt/decrypt
-
-Created 11/25/2013 Minli Zhu Google
-Modified Jan Lindström jan.lindstrom@mariadb.com
-*******************************************************/
-#include "m_string.h"
-#include "log0crypt.h"
-#include <mysql/service_my_crypt.h>
-
-#include "log0log.h"
-#include "srv0start.h" // for srv_start_lsn
-#include "log0recv.h" // for recv_sys
-
-#include "ha_prototypes.h" // IB_LOG_
-
-/* Used for debugging */
-// #define DEBUG_CRYPT 1
-#define UNENCRYPTED_KEY_VER 0
-
-/* If true, enable redo log encryption. */
-extern my_bool srv_encrypt_log;
-
-
-#include <algorithm> // std::sort
-#include <deque>
-
-/* If true, enable redo log encryption. */
-UNIV_INTERN my_bool srv_encrypt_log = FALSE;
-/*
- Sub system type for InnoDB redo log crypto.
- Set and used to validate crypto msg.
-*/
-static const byte redo_log_purpose_byte = 0x02;
-
-#define LOG_DEFAULT_ENCRYPTION_KEY 1
-
-/*
- Store this many keys into each checkpoint info
-*/
-static const size_t kMaxSavedKeys = LOG_CRYPT_MAX_ENTRIES;
-
-struct crypt_info_t {
- ib_uint64_t checkpoint_no; /*!< checkpoint no */
- uint key_version; /*!< mysqld key version */
- byte crypt_msg[MY_AES_BLOCK_SIZE];
- byte crypt_key[MY_AES_BLOCK_SIZE];
- byte crypt_nonce[MY_AES_BLOCK_SIZE];
-};
-
-static std::deque<crypt_info_t> crypt_info;
-
-/*********************************************************************//**
-Get a log block's start lsn.
-@return a log block's start lsn */
-static inline
-lsn_t
-log_block_get_start_lsn(
-/*====================*/
- lsn_t lsn, /*!< in: checkpoint lsn */
- ulint log_block_no) /*!< in: log block number */
-{
- lsn_t start_lsn =
- (lsn & (lsn_t)0xffffffff00000000ULL) |
- (((log_block_no - 1) & (lsn_t)0x3fffffff) << 9);
- return start_lsn;
-}
-
-/*********************************************************************//**
-Get crypt info from checkpoint.
-@return a crypt info or NULL if not present. */
-static
-const crypt_info_t*
-get_crypt_info(
-/*===========*/
- ib_uint64_t checkpoint_no)
-{
- /* so that no one is modifying array while we search */
- ut_ad(mutex_own(&(log_sys->mutex)));
- size_t items = crypt_info.size();
-
- /* a log block only stores 4-bytes of checkpoint no */
- checkpoint_no &= 0xFFFFFFFF;
- for (size_t i = 0; i < items; i++) {
- struct crypt_info_t* it = &crypt_info[i];
-
- if (it->checkpoint_no == checkpoint_no) {
- return it;
- }
- }
-
- /* If checkpoint contains more than one key and we did not
- find the correct one use the first one. */
- if (items) {
- return (&crypt_info[0]);
- }
-
- return NULL;
-}
-
-/*********************************************************************//**
-Get crypt info from log block
-@return a crypt info or NULL if not present. */
-static
-const crypt_info_t*
-get_crypt_info(
-/*===========*/
- const byte* log_block)
-{
- ib_uint64_t checkpoint_no = log_block_get_checkpoint_no(log_block);
- return get_crypt_info(checkpoint_no);
-}
-
-/*********************************************************************//**
-Print checkpoint no from log block and all encryption keys from
-checkpoints if they are present. Used for problem analysis. */
-void
-log_crypt_print_checkpoint_keys(
-/*============================*/
- const byte* log_block)
-{
- ib_uint64_t checkpoint_no = log_block_get_checkpoint_no(log_block);
-
- if (crypt_info.size()) {
- fprintf(stderr,
- "InnoDB: redo log checkpoint: " UINT64PF " [ chk key ]: ",
- checkpoint_no);
- for (size_t i = 0; i < crypt_info.size(); i++) {
- struct crypt_info_t* it = &crypt_info[i];
- fprintf(stderr, "[ " UINT64PF " %u ] ",
- it->checkpoint_no,
- it->key_version);
- }
- fprintf(stderr, "\n");
- }
-}
-
-/*********************************************************************//**
-Call AES CTR to encrypt/decrypt log blocks. */
-static
-Crypt_result
-log_blocks_crypt(
-/*=============*/
- const byte* block, /*!< in: blocks before encrypt/decrypt*/
- ulint size, /*!< in: size of block */
- byte* dst_block, /*!< out: blocks after encrypt/decrypt */
- int what, /*!< in: encrypt or decrypt*/
- const crypt_info_t* crypt_info) /*!< in: crypt info or NULL */
-{
- byte *log_block = (byte*)block;
- Crypt_result rc = MY_AES_OK;
- uint dst_len;
- byte aes_ctr_counter[MY_AES_BLOCK_SIZE];
- byte is_encrypt= what == ENCRYPTION_FLAG_ENCRYPT;
- lsn_t lsn = is_encrypt ? log_sys->lsn : srv_start_lsn;
-
- const uint src_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE;
- for (ulint i = 0; i < size ; i += OS_FILE_LOG_BLOCK_SIZE) {
- ulint log_block_no = log_block_get_hdr_no(log_block);
- lsn_t log_block_start_lsn = log_block_get_start_lsn(
- lsn, log_block_no);
-
- const crypt_info_t* info = crypt_info == NULL ? get_crypt_info(log_block) :
- crypt_info;
-#ifdef DEBUG_CRYPT
- fprintf(stderr,
- "%s %lu chkpt: %lu key: %u lsn: %lu\n",
- is_encrypt ? "crypt" : "decrypt",
- log_block_no,
- log_block_get_checkpoint_no(log_block),
- info ? info->key_version : 0,
- log_block_start_lsn);
-#endif
- /* If no key is found from checkpoint assume the log_block
- to be unencrypted. If checkpoint contains the encryption key
- compare log_block current checksum, if checksum matches,
- block can't be encrypted. */
- if (info == NULL ||
- info->key_version == UNENCRYPTED_KEY_VER ||
- (log_block_checksum_is_ok_or_old_format(log_block, false) &&
- what == ENCRYPTION_FLAG_DECRYPT)) {
- memcpy(dst_block, log_block, OS_FILE_LOG_BLOCK_SIZE);
- goto next;
- }
-
- ut_ad(what == ENCRYPTION_FLAG_DECRYPT ? !log_block_checksum_is_ok_or_old_format(log_block, false) :
- log_block_checksum_is_ok_or_old_format(log_block, false));
-
- // Assume log block header is not encrypted
- memcpy(dst_block, log_block, LOG_BLOCK_HDR_SIZE);
-
- // aes_ctr_counter = nonce(3-byte) + start lsn to a log block
- // (8-byte) + lbn (4-byte) + abn
- // (1-byte, only 5 bits are used). "+" means concatenate.
- bzero(aes_ctr_counter, MY_AES_BLOCK_SIZE);
- memcpy(aes_ctr_counter, info->crypt_nonce, 3);
- mach_write_to_8(aes_ctr_counter + 3, log_block_start_lsn);
- mach_write_to_4(aes_ctr_counter + 11, log_block_no);
- bzero(aes_ctr_counter + 15, 1);
-
- int rc;
- rc = encryption_crypt(log_block + LOG_BLOCK_HDR_SIZE, src_len,
- dst_block + LOG_BLOCK_HDR_SIZE, &dst_len,
- (unsigned char*)(info->crypt_key), 16,
- aes_ctr_counter, MY_AES_BLOCK_SIZE,
- what | ENCRYPTION_FLAG_NOPAD,
- LOG_DEFAULT_ENCRYPTION_KEY,
- info->key_version);
-
- ut_a(rc == MY_AES_OK);
- ut_a(dst_len == src_len);
-next:
- log_block += OS_FILE_LOG_BLOCK_SIZE;
- dst_block += OS_FILE_LOG_BLOCK_SIZE;
- }
-
- return rc;
-}
-
-/*********************************************************************//**
-Generate crypt key from crypt msg.
-@return true if successfull, false if not. */
-static
-bool
-init_crypt_key(
-/*===========*/
- crypt_info_t* info) /*< in/out: crypt info */
-{
- if (info->key_version == UNENCRYPTED_KEY_VER) {
- memset(info->crypt_key, 0, sizeof(info->crypt_key));
- memset(info->crypt_msg, 0, sizeof(info->crypt_msg));
- memset(info->crypt_nonce, 0, sizeof(info->crypt_nonce));
- return true;
- }
-
- byte mysqld_key[MY_AES_MAX_KEY_LENGTH] = {0};
- uint keylen= sizeof(mysqld_key);
- uint rc;
-
- rc = encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY, info->key_version, mysqld_key, &keylen);
-
- if (rc) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: getting mysqld crypto key "
- "from key version failed err = %u. Reason could be that requested"
- " key_version %u is not found or required encryption "
- " key management is not found.", rc, info->key_version);
- return false;
- }
-
- uint dst_len;
- int err= my_aes_crypt(MY_AES_ECB, ENCRYPTION_FLAG_NOPAD|ENCRYPTION_FLAG_ENCRYPT,
- info->crypt_msg, sizeof(info->crypt_msg), //src, srclen
- info->crypt_key, &dst_len, //dst, &dstlen
- (unsigned char*)&mysqld_key, sizeof(mysqld_key),
- NULL, 0);
-
- if (err != MY_AES_OK || dst_len != MY_AES_BLOCK_SIZE) {
- fprintf(stderr,
- "\nInnodb redo log crypto: getting redo log crypto key "
- "failed err = %d len = %u.\n", err, dst_len);
- return false;
- }
-
- return true;
-}
-
-/*********************************************************************//**
-Compare function for checkpoint numbers
-@return true if first checkpoint is larger than second one */
-static
-bool
-mysort(const crypt_info_t& i,
- const crypt_info_t& j)
-{
- return i.checkpoint_no > j.checkpoint_no;
-}
-
-/*********************************************************************//**
-Add crypt info to set if it is not already present
-@return true if successfull, false if not- */
-static
-bool
-add_crypt_info(
-/*===========*/
- crypt_info_t* info, /*!< in: crypt info */
- bool checkpoint_read)/*!< in: do we read checkpoint */
-{
- const crypt_info_t* found=NULL;
- /* so that no one is searching array while we modify it */
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- found = get_crypt_info(info->checkpoint_no);
-
- /* If one crypt info is found then we add a new one only if we
- are reading checkpoint from the log. New checkpoints will always
- use the first created crypt info. */
- if (found != NULL &&
- ( found->checkpoint_no == info->checkpoint_no || !checkpoint_read)) {
- // already present...
- return true;
- }
-
- if (!init_crypt_key(info)) {
- return false;
- }
-
- crypt_info.push_back(*info);
-
- /* a log block only stores 4-bytes of checkpoint no */
- crypt_info.back().checkpoint_no &= 0xFFFFFFFF;
-
- // keep keys sorted, assuming that last added key will be used most
- std::sort(crypt_info.begin(), crypt_info.end(), mysort);
-
- return true;
-}
-
-/*********************************************************************//**
-Encrypt log blocks. */
-UNIV_INTERN
-Crypt_result
-log_blocks_encrypt(
-/*===============*/
- const byte* block, /*!< in: blocks before encryption */
- const ulint size, /*!< in: size of blocks, must be multiple of a log block */
- byte* dst_block) /*!< out: blocks after encryption */
-{
- return log_blocks_crypt(block, size, dst_block, ENCRYPTION_FLAG_ENCRYPT, NULL);
-}
-
-/*********************************************************************//**
-Set next checkpoint's key version to latest one, and generate current
-key. Key version 0 means no encryption. */
-UNIV_INTERN
-void
-log_crypt_set_ver_and_key(
-/*======================*/
- ib_uint64_t next_checkpoint_no)
-{
- crypt_info_t info;
- info.checkpoint_no = next_checkpoint_no;
-
- if (!srv_encrypt_log) {
- info.key_version = UNENCRYPTED_KEY_VER;
- } else {
- info.key_version = encryption_key_get_latest_version(LOG_DEFAULT_ENCRYPTION_KEY);
- }
-
- if (info.key_version == UNENCRYPTED_KEY_VER) {
- memset(info.crypt_msg, 0, sizeof(info.crypt_msg));
- memset(info.crypt_nonce, 0, sizeof(info.crypt_nonce));
- } else {
- if (my_random_bytes(info.crypt_msg, MY_AES_BLOCK_SIZE) != MY_AES_OK) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: generate "
- "%u-byte random number as crypto msg failed.",
- MY_AES_BLOCK_SIZE);
- ut_error;
- }
-
- if (my_random_bytes(info.crypt_nonce, MY_AES_BLOCK_SIZE) != MY_AES_OK) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: generate "
- "%u-byte random number as AES_CTR nonce failed.",
- MY_AES_BLOCK_SIZE);
- ut_error;
- }
-
- }
-
- add_crypt_info(&info, false);
-}
-
-/********************************************************
-Encrypt one or more log block before it is flushed to disk */
-UNIV_INTERN
-void
-log_encrypt_before_write(
-/*=====================*/
- ib_uint64_t next_checkpoint_no, /*!< in: log group to be flushed */
- byte* block, /*!< in/out: pointer to a log block */
- const ulint size) /*!< in: size of log blocks */
-{
- ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- const crypt_info_t* info = get_crypt_info(next_checkpoint_no);
- if (info == NULL) {
- return;
- }
-
- /* If the key is not encrypted or user has requested not to
- encrypt, do not change log block. */
- if (info->key_version == UNENCRYPTED_KEY_VER || !srv_encrypt_log) {
- return;
- }
-
- byte* dst_frame = (byte*)malloc(size);
-
- //encrypt log blocks content
- Crypt_result result = log_blocks_crypt(block, size, dst_frame, ENCRYPTION_FLAG_ENCRYPT, NULL);
-
- if (result == MY_AES_OK) {
- ut_ad(block[0] == dst_frame[0]);
- memcpy(block, dst_frame, size);
- }
- free(dst_frame);
-
- if (unlikely(result != MY_AES_OK)) {
- ut_error;
- }
-}
-
-/********************************************************
-Decrypt a specified log segment after they are read from a log file to a buffer.
-*/
-void
-log_decrypt_after_read(
-/*===================*/
- byte* frame, /*!< in/out: log segment */
- const ulint size) /*!< in: log segment size */
-{
- ut_ad(size % OS_FILE_LOG_BLOCK_SIZE == 0);
- byte* dst_frame = (byte*)malloc(size);
-
- // decrypt log blocks content
- Crypt_result result = log_blocks_crypt(frame, size, dst_frame, ENCRYPTION_FLAG_DECRYPT, NULL);
-
- if (result == MY_AES_OK) {
- memcpy(frame, dst_frame, size);
- }
- free(dst_frame);
-
- if (unlikely(result != MY_AES_OK)) {
- ut_error;
- }
-}
-
-/*********************************************************************//**
-Writes the crypto (version, msg and iv) info, which has been used for
-log blocks with lsn <= this checkpoint's lsn, to a log header's
-checkpoint buf. */
-UNIV_INTERN
-void
-log_crypt_write_checkpoint_buf(
-/*===========================*/
- byte* buf) /*!< in/out: checkpoint buffer */
-{
- byte *save = buf;
-
- // Only write kMaxSavedKeys (sort keys to remove oldest)
- std::sort(crypt_info.begin(), crypt_info.end(), mysort);
- while (crypt_info.size() > kMaxSavedKeys) {
- crypt_info.pop_back();
- }
-
- bool encrypted = false;
- for (size_t i = 0; i < crypt_info.size(); i++) {
- const crypt_info_t & it = crypt_info[i];
- if (it.key_version != UNENCRYPTED_KEY_VER) {
- encrypted = true;
- break;
- }
- }
-
- if (encrypted == false) {
- // if no encryption is inuse then zero out
- // crypt data for upward/downward compability
- memset(buf + LOG_CRYPT_VER, 0, LOG_CRYPT_SIZE);
- return;
- }
-
- ib_uint64_t checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
- buf += LOG_CRYPT_VER;
-
- mach_write_to_1(buf + 0, redo_log_purpose_byte);
- mach_write_to_1(buf + 1, crypt_info.size());
- buf += 2;
- for (size_t i = 0; i < crypt_info.size(); i++) {
- struct crypt_info_t* it = &crypt_info[i];
- mach_write_to_4(buf + 0, it->checkpoint_no);
- mach_write_to_4(buf + 4, it->key_version);
- memcpy(buf + 8, it->crypt_msg, MY_AES_BLOCK_SIZE);
- memcpy(buf + 24, it->crypt_nonce, MY_AES_BLOCK_SIZE);
- buf += LOG_CRYPT_ENTRY_SIZE;
- }
-
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "write chk: %lu [ chk key ]: ", checkpoint_no);
- for (size_t i = 0; i < crypt_info.size(); i++) {
- struct crypt_info_t* it = &crypt_info[i];
- fprintf(stderr, "[ %lu %u ] ",
- it->checkpoint_no,
- it->key_version);
- }
- fprintf(stderr, "\n");
-#else
- (void)checkpoint_no; // unused variable
-#endif
- ut_a((ulint)(buf - save) <= OS_FILE_LOG_BLOCK_SIZE);
-}
-
-/*********************************************************************//**
-Read the crypto (version, msg and iv) info, which has been used for
-log blocks with lsn <= this checkpoint's lsn, from a log header's
-checkpoint buf. */
-UNIV_INTERN
-bool
-log_crypt_read_checkpoint_buf(
-/*===========================*/
- const byte* buf) { /*!< in: checkpoint buffer */
-
- buf += LOG_CRYPT_VER;
-
- byte scheme = buf[0];
- if (scheme != redo_log_purpose_byte) {
- return true;
- }
- buf++;
- size_t n = buf[0];
- buf++;
-
- for (size_t i = 0; i < n; i++) {
- struct crypt_info_t info;
- info.checkpoint_no = mach_read_from_4(buf + 0);
- info.key_version = mach_read_from_4(buf + 4);
- memcpy(info.crypt_msg, buf + 8, MY_AES_BLOCK_SIZE);
- memcpy(info.crypt_nonce, buf + 24, MY_AES_BLOCK_SIZE);
-
- if (!add_crypt_info(&info, true)) {
- return false;
- }
- buf += LOG_CRYPT_ENTRY_SIZE;
- }
-
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "read [ chk key ]: ");
- for (size_t i = 0; i < crypt_info.size(); i++) {
- struct crypt_info_t* it = &crypt_info[i];
- fprintf(stderr, "[ %lu %u ] ",
- it->checkpoint_no,
- it->key_version);
- }
- fprintf(stderr, "\n");
-#endif
- return true;
-}
-
-/********************************************************
-Check is the checkpoint information encrypted. This check
-is based on fact has log group crypt info and based
-on this crypt info was the key version different from
-unencrypted key version. There is no realible way to
-distinguish encrypted log block from corrupted log block,
-but if log block corruption is found this function is
-used to find out if log block is maybe encrypted but
-encryption key, key management plugin or encryption
-algorithm does not match.
-@return TRUE, if log block may be encrypted */
-UNIV_INTERN
-ibool
-log_crypt_block_maybe_encrypted(
-/*============================*/
- const byte* log_block, /*!< in: log block */
- log_crypt_err_t* err_info) /*!< out: error info */
-{
- ibool maybe_encrypted = FALSE;
- const crypt_info_t* crypt_info;
-
- *err_info = LOG_UNENCRYPTED;
- crypt_info = get_crypt_info(log_block);
-
- if (crypt_info &&
- crypt_info->key_version != UNENCRYPTED_KEY_VER) {
- byte mysqld_key[MY_AES_BLOCK_SIZE] = {0};
- uint keylen= sizeof(mysqld_key);
-
- /* Log block contains crypt info and based on key
- version block could be encrypted. */
- *err_info = LOG_DECRYPT_MAYBE_FAILED;
- maybe_encrypted = TRUE;
-
- if (encryption_key_get(LOG_DEFAULT_ENCRYPTION_KEY,
- crypt_info->key_version, mysqld_key, &keylen)) {
- *err_info = LOG_CRYPT_KEY_NOT_FOUND;
- }
- }
-
- return (maybe_encrypted);
-}
-
-/********************************************************
-Print crypt error message to error log */
-UNIV_INTERN
-void
-log_crypt_print_error(
-/*==================*/
- log_crypt_err_t err_info) /*!< out: error info */
-{
- switch(err_info) {
- case LOG_CRYPT_KEY_NOT_FOUND:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: getting mysqld crypto key "
- "from key version failed. Reason could be that "
- "requested key version is not found or required "
- "encryption key management plugin is not found.");
- break;
- case LOG_DECRYPT_MAYBE_FAILED:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Redo log crypto: failed to decrypt log block. "
- "Reason could be that requested key version is "
- "not found, required encryption key management "
- "plugin is not found or configured encryption "
- "algorithm and/or method does not match.");
- break;
- default:
- ut_error; /* Real bug */
- }
-}
diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc
deleted file mode 100644
index 833f3240369..00000000000
--- a/storage/xtradb/log/log0log.cc
+++ /dev/null
@@ -1,4141 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2009, Google Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file log/log0log.cc
-Database log
-
-Created 12/9/1995 Heikki Tuuri
-*******************************************************/
-
-#include "config.h"
-#ifdef HAVE_ALLOCA_H
-#include "alloca.h"
-#elif defined(HAVE_MALLOC_H)
-#include "malloc.h"
-#endif
-
-/* Used for debugging */
-// #define DEBUG_CRYPT 1
-
-#include "log0log.h"
-
-#ifdef UNIV_NONINL
-#include "log0log.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#if MYSQL_VERSION_ID < 100200
-# include <my_systemd.h> /* sd_notifyf() */
-#endif
-
-#include "mem0mem.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "lock0lock.h"
-#include "log0recv.h"
-#include "fil0fil.h"
-#include "dict0boot.h"
-#include "dict0stats_bg.h"
-#include "dict0stats_bg.h"
-#include "btr0defragment.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "srv0mon.h"
-
-/*
-General philosophy of InnoDB redo-logs:
-
-1) Every change to a contents of a data page must be done
-through mtr, which in mtr_commit() writes log records
-to the InnoDB redo log.
-
-2) Normally these changes are performed using a mlog_write_ulint()
-or similar function.
-
-3) In some page level operations only a code number of a
-c-function and its parameters are written to the log to
-reduce the size of the log.
-
- 3a) You should not add parameters to these kind of functions
- (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
-
- 3b) You should not add such functionality which either change
- working when compared with the old or are dependent on data
- outside of the page. These kind of functions should implement
- self-contained page transformation and it should be unchanged
- if you don't have very essential reasons to change log
- semantics or format.
-
-*/
-
-/* Global log system variable */
-UNIV_INTERN log_t* log_sys = NULL;
-
-/** Pointer to the log checksum calculation function */
-UNIV_INTERN log_checksum_func_t log_checksum_algorithm_ptr =
- log_block_calc_checksum_innodb;
-
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(log_scrub_thread)(void*);
-
-/* Next log block number to do dummy record filling if no log records written
-for a while */
-static ulint next_lbn_to_pad = 0;
-
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t checkpoint_lock_key;
-# ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN mysql_pfs_key_t archive_lock_key;
-# endif
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t log_sys_mutex_key;
-UNIV_INTERN mysql_pfs_key_t log_flush_order_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
-
-/* These control how often we print warnings if the last checkpoint is too
-old */
-UNIV_INTERN ibool log_has_printed_chkp_warning = FALSE;
-UNIV_INTERN time_t log_last_warning_time;
-
-#ifdef UNIV_LOG_ARCHIVE
-/* Pointer to this variable is used as the i/o-message when we do i/o to an
-archive */
-UNIV_INTERN byte log_archive_io;
-#endif /* UNIV_LOG_ARCHIVE */
-
-UNIV_INTERN ulint log_disable_checkpoint_active= 0;
-
-/* A margin for free space in the log buffer before a log entry is catenated */
-#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
-
-/* Margins for free space in the log buffer after a log entry is catenated */
-#define LOG_BUF_FLUSH_RATIO 2
-#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
-
-/* Margin for the free space in the smallest log group, before a new query
-step which modifies the database, is started */
-
-#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
-#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
-
-/* This parameter controls asynchronous making of a new checkpoint; the value
-should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
-
-#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
-
-/* This parameter controls synchronous preflushing of modified buffer pages */
-#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
-
-/* The same ratio for asynchronous preflushing; this value should be less than
-the previous */
-#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
-
-/* Extra margin, in addition to one log file, used in archiving */
-#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
-
-/* This parameter controls asynchronous writing to the archive */
-#define LOG_ARCHIVE_RATIO_ASYNC 16
-
-/* Codes used in unlocking flush latches */
-#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
-#define LOG_UNLOCK_FLUSH_LOCK 2
-
-/* States of an archiving operation */
-#define LOG_ARCHIVE_READ 1
-#define LOG_ARCHIVE_WRITE 2
-
-/** Event to wake up the log scrub thread */
-static os_event_t log_scrub_event;
-
-static bool log_scrub_thread_active;
-
-/******************************************************//**
-Completes a checkpoint write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void);
-/*============================*/
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void);
-/*=========================*/
-#endif /* UNIV_LOG_ARCHIVE */
-
-/****************************************************************//**
-Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
-exists.
-@return LSN of oldest modification */
-static
-lsn_t
-log_buf_pool_get_oldest_modification(void)
-/*======================================*/
-{
- lsn_t lsn;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- lsn = buf_pool_get_oldest_modification();
-
- if (!lsn) {
-
- lsn = log_sys->lsn;
- }
-
- return(lsn);
-}
-
-/****************************************************************//**
-Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
-exists.
-@return LSN of oldest modification */
-static
-lsn_t
-log_buf_pool_get_oldest_modification_peek(void)
-/*===========================================*/
-{
- lsn_t lsn;
-
- lsn = buf_pool_get_oldest_modification_peek();
-
- if (!lsn) {
-
- lsn = log_sys->lsn;
- }
-
- return(lsn);
-}
-
-/****************************************************************//**
-Checks if the log groups have a big enough margin of free space in
-so that a new log entry can be written without overwriting log data
-that is not read by the changed page bitmap thread.
-@return TRUE if there is not enough free space. */
-static
-ibool
-log_check_tracking_margin(
- ulint lsn_advance) /*!< in: an upper limit on how much log data we
- plan to write. If zero, the margin will be
- checked for the already-written log. */
-{
- lsn_t tracked_lsn;
- lsn_t tracked_lsn_age;
-
- if (!srv_track_changed_pages) {
- return FALSE;
- }
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- tracked_lsn = log_get_tracked_lsn();
- tracked_lsn_age = log_sys->lsn - tracked_lsn;
-
- /* The overwrite would happen when log_sys->log_group_capacity is
- exceeded, but we use max_checkpoint_age for an extra safety margin. */
- return tracked_lsn_age + lsn_advance > log_sys->max_checkpoint_age;
-}
-
-/** Extends the log buffer.
-@param[in] len requested minimum size in bytes */
-static
-void
-log_buffer_extend(
- ulint len)
-{
- ulint move_start;
- ulint move_end;
- byte* tmp_buf = reinterpret_cast<byte *>(alloca(OS_FILE_LOG_BLOCK_SIZE));
-
- mutex_enter(&(log_sys->mutex));
-
- while (log_sys->is_extending) {
- /* Another thread is trying to extend already.
- Needs to wait for. */
- mutex_exit(&(log_sys->mutex));
-
- log_buffer_flush_to_disk();
-
- mutex_enter(&(log_sys->mutex));
-
- if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) {
- /* Already extended enough by the others */
- mutex_exit(&(log_sys->mutex));
- return;
- }
- }
-
- log_sys->is_extending = true;
-
- while (log_sys->n_pending_writes != 0
- || ut_calc_align_down(log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE)
- != ut_calc_align_down(log_sys->buf_next_to_write,
- OS_FILE_LOG_BLOCK_SIZE)) {
- /* Buffer might have >1 blocks to write still. */
- mutex_exit(&(log_sys->mutex));
-
- log_buffer_flush_to_disk();
-
- mutex_enter(&(log_sys->mutex));
- }
-
- move_start = ut_calc_align_down(
- log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
- move_end = log_sys->buf_free;
-
- /* store the last log block in buffer */
- ut_memcpy(tmp_buf, log_sys->buf + move_start,
- move_end - move_start);
-
- log_sys->buf_free -= move_start;
- log_sys->buf_next_to_write -= move_start;
-
- /* reallocate log buffer */
- srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
- mem_free(log_sys->buf_ptr);
- log_sys->buf_ptr = static_cast<byte*>(
- mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
- log_sys->buf = static_cast<byte*>(
- ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
- log_sys->buf_size = LOG_BUFFER_SIZE;
- log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
- - LOG_BUF_FLUSH_MARGIN;
-
- /* restore the last log block */
- ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start);
-
- ut_ad(log_sys->is_extending);
- log_sys->is_extending = false;
-
- mutex_exit(&(log_sys->mutex));
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "innodb_log_buffer_size was extended to %lu.",
- LOG_BUFFER_SIZE);
-}
-
-/************************************************************//**
-Opens the log for log_write_low. The log must be closed with log_close.
-@return start lsn of the log record */
-UNIV_INTERN
-lsn_t
-log_open(
-/*=====*/
- ulint len) /*!< in: length of data to be catenated */
-{
- log_t* log = log_sys;
- ulint len_upper_limit;
-#ifdef UNIV_LOG_ARCHIVE
- lsn_t archived_lsn_age;
- ulint dummy;
-#endif /* UNIV_LOG_ARCHIVE */
- ulint count = 0;
- ulint tcount = 0;
-
- if (len >= log->buf_size / 2) {
- DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
- DBUG_SUICIDE(););
-
- /* log_buffer is too small. try to extend instead of crash. */
- ib_logf(IB_LOG_LEVEL_WARN,
- "The transaction log size is too large"
- " for innodb_log_buffer_size (%lu >= %lu / 2). "
- "Trying to extend it.",
- len, LOG_BUFFER_SIZE);
-
- log_buffer_extend((len + 1) * 2);
- }
-loop:
- ut_ad(!recv_no_log_write);
-
- if (log->is_extending) {
-
- mutex_exit(&(log->mutex));
-
- /* Log buffer size is extending. Writing up to the next block
- should wait for the extending finished. */
-
- os_thread_sleep(100000);
-
- ut_ad(++count < 50);
-
- goto loop;
- }
-
- /* Calculate an upper limit for the space the string may take in the
- log buffer */
-
- len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
-
- if (log->buf_free + len_upper_limit > log->buf_size) {
-
- mutex_exit(&(log->mutex));
-
- /* Not enough free space, do a syncronous flush of the log
- buffer */
-
- log_buffer_flush_to_disk();
-
- srv_stats.log_waits.inc();
-
- ut_ad(++count < 50);
-
- mutex_enter(&(log->mutex));
-
- goto loop;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (log->archiving_state != LOG_ARCH_OFF) {
-
- archived_lsn_age = log->lsn - log->archived_lsn;
- if (archived_lsn_age + len_upper_limit
- > log->max_archived_lsn_age) {
- /* Not enough free archived space in log groups: do a
- synchronous archive write batch: */
-
- mutex_exit(&(log->mutex));
-
- ut_ad(len_upper_limit <= log->max_archived_lsn_age);
-
- log_archive_do(TRUE, &dummy);
-
- ut_ad(++count < 50);
-
- mutex_enter(&(log->mutex));
-
- goto loop;
- }
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (log_check_tracking_margin(len_upper_limit) &&
- (++tcount + count < 50)) {
-
- /* This log write would violate the untracked LSN free space
- margin. Limit this to 50 retries as there might be situations
- where we have no choice but to proceed anyway, i.e. if the log
- is about to be overflown, log tracking or not. */
- mutex_exit(&(log->mutex));
-
- os_thread_sleep(10000);
-
- mutex_enter(&(log->mutex));
-
- goto loop;
- }
-
-#ifdef UNIV_LOG_DEBUG
- log->old_buf_free = log->buf_free;
- log->old_lsn = log->lsn;
-#endif
- return(log->lsn);
-}
-
-/************************************************************//**
-Writes to the log the string given. It is assumed that the caller holds the
-log mutex. */
-UNIV_INTERN
-void
-log_write_low(
-/*==========*/
- byte* str, /*!< in: string */
- ulint str_len) /*!< in: string length */
-{
- log_t* log = log_sys;
- ulint len;
- ulint data_len;
- byte* log_block;
-
- ut_ad(mutex_own(&(log->mutex)));
-part_loop:
- ut_ad(!recv_no_log_write);
- /* Calculate a part length */
-
- data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
-
- if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
-
- /* The string fits within the current log block */
-
- len = str_len;
- } else {
- data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
-
- len = OS_FILE_LOG_BLOCK_SIZE
- - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_TRL_SIZE;
- }
-
- ut_memcpy(log->buf + log->buf_free, str, len);
-
- str_len -= len;
- str = str + len;
-
- log_block = static_cast<byte*>(
- ut_align_down(
- log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
-
- log_block_set_data_len(log_block, data_len);
-
- if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
- /* This block became full */
- log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_checkpoint_no(log_block,
- log_sys->next_checkpoint_no);
- len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
-
- log->lsn += len;
-
- /* Initialize the next block header */
- log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
- } else {
- log->lsn += len;
- }
-
- log->buf_free += len;
-
- ut_ad(log->buf_free <= log->buf_size);
-
- if (str_len > 0) {
- goto part_loop;
- }
-
- srv_stats.log_write_requests.inc();
-}
-
-/************************************************************//**
-Closes the log.
-@return lsn */
-UNIV_INTERN
-lsn_t
-log_close(void)
-/*===========*/
-{
- byte* log_block;
- ulint first_rec_group;
- lsn_t oldest_lsn;
- lsn_t lsn;
- lsn_t tracked_lsn;
- lsn_t tracked_lsn_age;
- log_t* log = log_sys;
- lsn_t checkpoint_age;
-
- ut_ad(mutex_own(&(log->mutex)));
- ut_ad(!recv_no_log_write);
-
- lsn = log->lsn;
-
- log_block = static_cast<byte*>(
- ut_align_down(
- log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
-
- first_rec_group = log_block_get_first_rec_group(log_block);
-
- if (first_rec_group == 0) {
- /* We initialized a new log block which was not written
- full by the current mtr: the next mtr log record group
- will start within this block at the offset data_len */
-
- log_block_set_first_rec_group(
- log_block, log_block_get_data_len(log_block));
- }
-
- if (log->buf_free > log->max_buf_free) {
-
- log->check_flush_or_checkpoint = TRUE;
- }
-
- if (srv_track_changed_pages) {
-
- tracked_lsn = log_get_tracked_lsn();
- tracked_lsn_age = lsn - tracked_lsn;
-
- if (tracked_lsn_age >= log->log_group_capacity) {
-
- fprintf(stderr, "InnoDB: Error: the age of the "
- "oldest untracked record exceeds the log "
- "group capacity!\n");
- fprintf(stderr, "InnoDB: Error: stopping the log "
- "tracking thread at LSN " LSN_PF "\n",
- tracked_lsn);
- srv_track_changed_pages = FALSE;
- }
- }
-
- checkpoint_age = lsn - log->last_checkpoint_lsn;
-
- if (checkpoint_age >= log->log_group_capacity) {
- /* TODO: split btr_store_big_rec_extern_fields() into small
- steps so that we can release all latches in the middle, and
- call log_free_check() to ensure we never write over log written
- after the latest checkpoint. In principle, we should split all
- big_rec operations, but other operations are smaller. */
-
- if (!log_has_printed_chkp_warning
- || difftime(time(NULL), log_last_warning_time) > 15) {
-
- log_has_printed_chkp_warning = TRUE;
- log_last_warning_time = time(NULL);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: ERROR: the age of the last"
- " checkpoint is " LSN_PF ",\n"
- "InnoDB: which exceeds the log group"
- " capacity " LSN_PF ".\n"
- "InnoDB: If you are using big"
- " BLOB or TEXT rows, you must set the\n"
- "InnoDB: combined size of log files"
- " at least 10 times bigger than the\n"
- "InnoDB: largest such row.\n",
- checkpoint_age,
- log->log_group_capacity);
- }
- }
-
- if (checkpoint_age <= log->max_modified_age_sync) {
-
- goto function_exit;
- }
-
- oldest_lsn = buf_pool_get_oldest_modification();
-
- if (!oldest_lsn
- || lsn - oldest_lsn > log->max_modified_age_sync
- || checkpoint_age > log->max_checkpoint_age_async) {
-
- log->check_flush_or_checkpoint = TRUE;
- }
-function_exit:
-
-#ifdef UNIV_LOG_DEBUG
- log_check_log_recs(log->buf + log->old_buf_free,
- log->buf_free - log->old_buf_free, log->old_lsn);
-#endif
-
- return(lsn);
-}
-
-/******************************************************//**
-Pads the current log block full with dummy log records. Used in producing
-consistent archived log files and scrubbing redo log. */
-static
-void
-log_pad_current_log_block(void)
-/*===========================*/
-{
- byte b = MLOG_DUMMY_RECORD;
- ulint pad_length;
- ulint i;
- lsn_t lsn;
-
- /* We retrieve lsn only because otherwise gcc crashed on HP-UX */
- lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
-
- pad_length = OS_FILE_LOG_BLOCK_SIZE
- - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
- - LOG_BLOCK_TRL_SIZE;
- if (pad_length
- == (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- - LOG_BLOCK_TRL_SIZE)) {
-
- pad_length = 0;
- }
-
- for (i = 0; i < pad_length; i++) {
- log_write_low(&b, 1);
- }
-
- lsn = log_sys->lsn;
-
- log_close();
- log_release();
-
- ut_a(lsn % OS_FILE_LOG_BLOCK_SIZE == LOG_BLOCK_HDR_SIZE);
-}
-
-/******************************************************//**
-Calculates the data capacity of a log group, when the log file headers are not
-included.
-@return capacity in bytes */
-UNIV_INTERN
-lsn_t
-log_group_get_capacity(
-/*===================*/
- const log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
-}
-
-/******************************************************//**
-Calculates the offset within a log group, when the log file headers are not
-included.
-@return size offset (<= offset) */
-UNIV_INLINE
-lsn_t
-log_group_calc_size_offset(
-/*=======================*/
- lsn_t offset, /*!< in: real offset within the
- log group */
- const log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
-}
-
-/******************************************************//**
-Calculates the offset within a log group, when the log file headers are
-included.
-@return real offset (>= offset) */
-UNIV_INLINE
-lsn_t
-log_group_calc_real_offset(
-/*=======================*/
- lsn_t offset, /*!< in: size offset within the
- log group */
- const log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- return(offset + LOG_FILE_HDR_SIZE
- * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
-}
-
-/******************************************************//**
-Calculates the offset of an lsn within a log group.
-@return offset within the log group */
-static
-lsn_t
-log_group_calc_lsn_offset(
-/*======================*/
- lsn_t lsn, /*!< in: lsn */
- const log_group_t* group) /*!< in: log group */
-{
- lsn_t gr_lsn;
- lsn_t gr_lsn_size_offset;
- lsn_t difference;
- lsn_t group_size;
- lsn_t offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- gr_lsn = group->lsn;
-
- gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, group);
-
- group_size = log_group_get_capacity(group);
-
- if (lsn >= gr_lsn) {
-
- difference = lsn - gr_lsn;
- } else {
- difference = gr_lsn - lsn;
-
- difference = difference % group_size;
-
- difference = group_size - difference;
- }
-
- offset = (gr_lsn_size_offset + difference) % group_size;
-
- /* fprintf(stderr,
- "Offset is " LSN_PF " gr_lsn_offset is " LSN_PF
- " difference is " LSN_PF "\n",
- offset, gr_lsn_size_offset, difference);
- */
-
- return(log_group_calc_real_offset(offset, group));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool log_debug_writes = FALSE;
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Calculates where in log files we find a specified lsn.
-@return log file number */
-UNIV_INTERN
-ulint
-log_calc_where_lsn_is(
-/*==================*/
- ib_int64_t* log_file_offset, /*!< out: offset in that file
- (including the header) */
- ib_uint64_t first_header_lsn, /*!< in: first log file start
- lsn */
- ib_uint64_t lsn, /*!< in: lsn whose position to
- determine */
- ulint n_log_files, /*!< in: total number of log
- files */
- ib_int64_t log_file_size) /*!< in: log file size
- (including the header) */
-{
- ib_int64_t capacity = log_file_size - LOG_FILE_HDR_SIZE;
- ulint file_no;
- ib_int64_t add_this_many;
-
- if (lsn < first_header_lsn) {
- add_this_many = 1 + (first_header_lsn - lsn)
- / (capacity * (ib_int64_t) n_log_files);
- lsn += add_this_many
- * capacity * (ib_int64_t) n_log_files;
- }
-
- ut_a(lsn >= first_header_lsn);
-
- file_no = ((ulint)((lsn - first_header_lsn) / capacity))
- % n_log_files;
- *log_file_offset = (lsn - first_header_lsn) % capacity;
-
- *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
-
- return(file_no);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-UNIV_INTERN
-void
-log_group_set_fields(
-/*=================*/
- log_group_t* group, /*!< in/out: group */
- lsn_t lsn) /*!< in: lsn for which the values should be
- set */
-{
- group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
- group->lsn = lsn;
-}
-
-/*****************************************************************//**
-Calculates the recommended highest values for lsn - last_checkpoint_lsn,
-lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age.
-@return error value FALSE if the smallest log group is too small to
-accommodate the number of OS threads in the database server */
-static
-ibool
-log_calc_max_ages(void)
-/*===================*/
-{
- lsn_t margin;
- ulint free;
-
- lsn_t smallest_capacity = ((srv_log_file_size_requested
- << srv_page_size_shift)
- - LOG_FILE_HDR_SIZE)
- * srv_n_log_files;
-
- /* Add extra safety */
- smallest_capacity -= smallest_capacity / 10;
-
- /* For each OS thread we must reserve so much free space in the
- smallest log group that it can accommodate the log entries produced
- by single query steps: running out of free log space is a serious
- system error which requires rebooting the database. */
-
- free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
- + LOG_CHECKPOINT_EXTRA_FREE;
- if (free >= smallest_capacity / 2) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "The combined size of ib_logfiles"
- " should be bigger than\n"
- "InnoDB: 200 kB * innodb_thread_concurrency.");
- }
- margin = smallest_capacity - free;
- margin = margin - margin / 10; /* Add still some extra safety */
-
- mutex_enter(&log_sys->mutex);
-
- log_sys->log_group_capacity = smallest_capacity;
-
- log_sys->max_modified_age_async = margin
- - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
- log_sys->max_modified_age_sync = margin
- - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
-
- log_sys->max_checkpoint_age_async = margin - margin
- / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
- log_sys->max_checkpoint_age = margin;
-
-#ifdef UNIV_LOG_ARCHIVE
- lsn_t archive_margin = smallest_capacity
- - (srv_log_file_size_requested - LOG_FILE_HDR_SIZE)
- - LOG_ARCHIVE_EXTRA_MARGIN;
- log_sys->max_archived_lsn_age = archive_margin;
-
- log_sys->max_archived_lsn_age_async = archive_margin
- - archive_margin / LOG_ARCHIVE_RATIO_ASYNC;
-#endif /* UNIV_LOG_ARCHIVE */
- mutex_exit(&log_sys->mutex);
-
- return(true);
-}
-
-/******************************************************//**
-Initializes the log. */
-UNIV_INTERN
-void
-log_init(void)
-/*==========*/
-{
- log_sys = static_cast<log_t*>(mem_alloc(sizeof(log_t)));
-
- mutex_create(log_sys_mutex_key, &log_sys->mutex, SYNC_LOG);
-
- mutex_create(log_flush_order_mutex_key,
- &log_sys->log_flush_order_mutex,
- SYNC_LOG_FLUSH_ORDER);
-
- mutex_enter(&(log_sys->mutex));
-
- /* Start the lsn from one log block from zero: this way every
- log record has a start lsn != zero, a fact which we will use */
-
- log_sys->lsn = LOG_START_LSN;
-
- ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
- ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
-
- log_sys->buf_ptr = static_cast<byte*>(
- mem_zalloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE));
-
- log_sys->buf = static_cast<byte*>(
- ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-
- log_sys->buf_size = LOG_BUFFER_SIZE;
- log_sys->is_extending = false;
-
- log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
- - LOG_BUF_FLUSH_MARGIN;
- log_sys->check_flush_or_checkpoint = TRUE;
- UT_LIST_INIT(log_sys->log_groups);
-
- log_sys->n_log_ios = 0;
-
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = time(NULL);
- /*----------------------------*/
-
- log_sys->buf_next_to_write = 0;
-
- log_sys->write_lsn = 0;
- log_sys->current_flush_lsn = 0;
- log_sys->flushed_to_disk_lsn = 0;
-
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->n_pending_writes = 0;
-
- log_sys->no_flush_event = os_event_create();
-
- os_event_set(log_sys->no_flush_event);
-
- log_sys->one_flushed_event = os_event_create();
-
- os_event_set(log_sys->one_flushed_event);
-
- /*----------------------------*/
-
- log_sys->next_checkpoint_no = 0;
- log_sys->last_checkpoint_lsn = log_sys->lsn;
- log_sys->next_checkpoint_lsn = log_sys->lsn;
- log_sys->n_pending_checkpoint_writes = 0;
-
-
- rw_lock_create(checkpoint_lock_key, &log_sys->checkpoint_lock,
- SYNC_NO_ORDER_CHECK);
-
- log_sys->checkpoint_buf_ptr = static_cast<byte*>(
- mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
-
- log_sys->checkpoint_buf = static_cast<byte*>(
- ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-
- /*----------------------------*/
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Under MySQL, log archiving is always off */
- log_sys->archiving_state = LOG_ARCH_OFF;
- log_sys->archived_lsn = log_sys->lsn;
- log_sys->next_archived_lsn = 0;
-
- log_sys->n_pending_archive_ios = 0;
-
- rw_lock_create(archive_lock_key, &log_sys->archive_lock,
- SYNC_NO_ORDER_CHECK);
-
- log_sys->archive_buf_ptr = static_cast<byte*>(
- mem_zalloc(LOG_ARCHIVE_BUF_SIZE + OS_FILE_LOG_BLOCK_SIZE));
-
- log_sys->archive_buf = static_cast<byte*>(
- ut_align(log_sys->archive_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-
- log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE;
-
- log_sys->archiving_on = os_event_create();
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_sys->tracked_lsn = 0;
-
- /*----------------------------*/
-
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE; // TODO(minliz): ensure various LOG_START_LSN?
-
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
-
- mutex_exit(&(log_sys->mutex));
-
- log_scrub_thread_active = !srv_read_only_mode && srv_scrub_log;
- if (log_scrub_thread_active) {
- log_scrub_event = os_event_create();
- os_thread_create(log_scrub_thread, NULL, NULL);
- }
-
-#ifdef UNIV_LOG_DEBUG
- recv_sys_create();
- recv_sys_init(buf_pool_get_curr_size());
-
- recv_sys->parse_start_lsn = log_sys->lsn;
- recv_sys->scanned_lsn = log_sys->lsn;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = log_sys->lsn;
- recv_sys->limit_lsn = LSN_MAX;
-#endif
-}
-
-/******************************************************************//**
-Inits a log group to the log system. */
-UNIV_INTERN
-void
-log_group_init(
-/*===========*/
- ulint id, /*!< in: group id */
- ulint n_files, /*!< in: number of log files */
- lsn_t file_size, /*!< in: log file size in bytes */
- ulint space_id, /*!< in: space id of the file space
- which contains the log files of this
- group */
- ulint archive_space_id) /*!< in: space id of the file space
- which contains some archived log
- files for this group; currently, only
- for the first log group this is
- used */
-{
- ulint i;
-
- log_group_t* group;
-
- group = static_cast<log_group_t*>(mem_alloc(sizeof(log_group_t)));
-
- group->id = id;
- group->n_files = n_files;
- group->file_size = file_size;
- group->space_id = space_id;
- group->state = LOG_GROUP_OK;
- group->lsn = LOG_START_LSN;
- group->lsn_offset = LOG_FILE_HDR_SIZE;
- group->n_pending_writes = 0;
-
- group->file_header_bufs_ptr = static_cast<byte**>(
- mem_zalloc(sizeof(byte*) * n_files));
-
- group->file_header_bufs = static_cast<byte**>(
- mem_zalloc(sizeof(byte**) * n_files));
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_file_header_bufs_ptr = static_cast<byte**>(
- mem_zalloc( sizeof(byte*) * n_files));
-
- group->archive_file_header_bufs = static_cast<byte**>(
- mem_zalloc(sizeof(byte*) * n_files));
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < n_files; i++) {
- group->file_header_bufs_ptr[i] = static_cast<byte*>(
- mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
-
- group->file_header_bufs[i] = static_cast<byte*>(
- ut_align(group->file_header_bufs_ptr[i],
- OS_FILE_LOG_BLOCK_SIZE));
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_file_header_bufs_ptr[i] = static_cast<byte*>(
- mem_zalloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
-
- group->archive_file_header_bufs[i] = static_cast<byte*>(
- ut_align(group->archive_file_header_bufs_ptr[i],
- OS_FILE_LOG_BLOCK_SIZE));
-#endif /* UNIV_LOG_ARCHIVE */
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- group->archive_space_id = archive_space_id;
-
- group->archived_file_no = LOG_START_LSN;
- group->archived_offset = 0;
-#endif /* UNIV_LOG_ARCHIVE */
-
- group->checkpoint_buf_ptr = static_cast<byte*>(
- mem_zalloc(2 * OS_FILE_LOG_BLOCK_SIZE));
-
- group->checkpoint_buf = static_cast<byte*>(
- ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
-
- UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
-
- ut_a(log_calc_max_ages());
-}
-
-/******************************************************************//**
-Does the unlockings needed in flush i/o completion. */
-UNIV_INLINE
-void
-log_flush_do_unlocks(
-/*=================*/
- ulint code) /*!< in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
- and LOG_UNLOCK_NONE_FLUSHED_LOCK */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- /* NOTE that we must own the log mutex when doing the setting of the
- events: this is because transactions will wait for these events to
- be set, and at that moment the log flush they were waiting for must
- have ended. If the log mutex were not reserved here, the i/o-thread
- calling this function might be preempted for a while, and when it
- resumed execution, it might be that a new flush had been started, and
- this function would erroneously signal the NEW flush as completed.
- Thus, the changes in the state of these events are performed
- atomically in conjunction with the changes in the state of
- log_sys->n_pending_writes etc. */
-
- if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
- os_event_set(log_sys->one_flushed_event);
- }
-
- if (code & LOG_UNLOCK_FLUSH_LOCK) {
- os_event_set(log_sys->no_flush_event);
- }
-}
-
-/******************************************************************//**
-Checks if a flush is completed for a log group and does the completion
-routine if yes.
-@return LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
-UNIV_INLINE
-ulint
-log_group_check_flush_completion(
-/*=============================*/
- log_group_t* group) /*!< in: log group */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (!log_sys->one_flushed && group->n_pending_writes == 0) {
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Log flushed first to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- log_sys->written_to_some_lsn = log_sys->write_lsn;
- log_sys->one_flushed = TRUE;
-
- return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes && (group->n_pending_writes == 0)) {
-
- fprintf(stderr, "Log flushed to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
- return(0);
-}
-
-/******************************************************//**
-Checks if a flush is completed and does the completion routine if yes.
-@return LOG_UNLOCK_FLUSH_LOCK or 0 */
-static
-ulint
-log_sys_check_flush_completion(void)
-/*================================*/
-{
- ulint move_start;
- ulint move_end;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->n_pending_writes == 0) {
-
- log_sys->written_to_all_lsn = log_sys->write_lsn;
- log_sys->buf_next_to_write = log_sys->write_end_offset;
-
- if (log_sys->write_end_offset > log_sys->max_buf_free / 2) {
- /* Move the log buffer content to the start of the
- buffer */
-
- move_start = ut_calc_align_down(
- log_sys->write_end_offset,
- OS_FILE_LOG_BLOCK_SIZE);
- move_end = ut_calc_align(log_sys->buf_free,
- OS_FILE_LOG_BLOCK_SIZE);
-
- ut_memmove(log_sys->buf, log_sys->buf + move_start,
- move_end - move_start);
- log_sys->buf_free -= move_start;
-
- log_sys->buf_next_to_write -= move_start;
- }
-
- return(LOG_UNLOCK_FLUSH_LOCK);
- }
-
- return(0);
-}
-
-/******************************************************//**
-Completes an i/o to a log file. */
-UNIV_INTERN
-void
-log_io_complete(
-/*============*/
- log_group_t* group) /*!< in: log group or a dummy pointer */
-{
- ulint unlock;
-
-#ifdef UNIV_LOG_ARCHIVE
- if ((byte*) group == &log_archive_io) {
- /* It was an archive write */
-
- log_io_complete_archive();
-
- return;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if ((ulint) group & 0x1UL) {
- /* It was a checkpoint write */
- group = (log_group_t*)((ulint) group - 1);
-
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
-
- fil_flush(group->space_id);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Checkpoint info written to group %lu\n",
- group->id);
- }
-#endif /* UNIV_DEBUG */
- log_io_complete_checkpoint();
-
- return;
- }
-
- ut_error; /*!< We currently use synchronous writing of the
- logs and cannot end up here! */
-
- if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
- && srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
- && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
- && thd_flush_log_at_trx_commit(NULL) != 2) {
-
- fil_flush(group->space_id);
- }
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- ut_a(group->n_pending_writes > 0);
- ut_a(log_sys->n_pending_writes > 0);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
- MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
-
- unlock = log_group_check_flush_completion(group);
- unlock = unlock | log_sys_check_flush_completion();
-
- log_flush_do_unlocks(unlock);
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/******************************************************//**
-Writes a log file header to a log file space. */
-static
-void
-log_group_file_header_flush(
-/*========================*/
- log_group_t* group, /*!< in: log group */
- ulint nth_file, /*!< in: header to the nth file in the
- log file space */
- lsn_t start_lsn) /*!< in: log file data starts at this
- lsn */
-{
- byte* buf;
- lsn_t dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(!recv_no_log_write);
- ut_a(nth_file < group->n_files);
-
- buf = *(group->file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
-
- /* Wipe over possible label of mysqlbackup --restore */
- memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4);
-
- mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE,
- srv_log_block_size);
-
- dest_offset = nth_file * group->file_size;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Writing log file header to group %lu file %lu\n",
- (ulong) group->id, (ulong) nth_file);
- }
-#endif /* UNIV_DEBUG */
- if (log_do_write) {
- log_sys->n_log_ios++;
-
- MONITOR_INC(MONITOR_LOG_IO);
-
- srv_stats.os_log_pending_writes.inc();
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
- (ulint) (dest_offset / UNIV_PAGE_SIZE),
- (ulint) (dest_offset % UNIV_PAGE_SIZE),
- OS_FILE_LOG_BLOCK_SIZE,
- buf, group, 0);
-
- srv_stats.os_log_pending_writes.dec();
- }
-}
-
-/******************************************************//**
-Stores a 4-byte checksum to the trailer checksum field of a log block
-before writing it to a log file. This checksum is used in recovery to
-check the consistency of a log block. */
-void
-log_block_store_checksum(
-/*=====================*/
- byte* block) /*!< in/out: pointer to a log block */
-{
- log_block_set_checksum(block, log_block_calc_checksum(block));
-}
-
-/******************************************************//**
-Writes a buffer to a log file group. */
-UNIV_INTERN
-void
-log_group_write_buf(
-/*================*/
- log_group_t* group, /*!< in: log group */
- byte* buf, /*!< in: buffer */
- ulint len, /*!< in: buffer len; must be divisible
- by OS_FILE_LOG_BLOCK_SIZE */
- lsn_t start_lsn, /*!< in: start lsn of the buffer; must
- be divisible by
- OS_FILE_LOG_BLOCK_SIZE */
- ulint new_data_offset)/*!< in: start offset of new data in
- buf: this parameter is used to decide
- if we have to write a new log file
- header */
-{
- ulint write_len;
- ibool write_header;
- lsn_t next_offset;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(!recv_no_log_write);
- ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- if (new_data_offset == 0) {
- write_header = TRUE;
- } else {
- write_header = FALSE;
- }
-loop:
- if (len == 0) {
-
- return;
- }
-
- next_offset = log_group_calc_lsn_offset(start_lsn, group);
-
- if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
- && write_header) {
- /* We start to write a new log file instance in the group */
-
- ut_a(next_offset / group->file_size <= ULINT_MAX);
-
- log_group_file_header_flush(group, (ulint)
- (next_offset / group->file_size),
- start_lsn);
- srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
-
- srv_stats.log_writes.inc();
- }
-
- if ((next_offset % group->file_size) + len > group->file_size) {
-
- /* if the above condition holds, then the below expression
- is < len which is ulint, so the typecast is ok */
- write_len = (ulint)
- (group->file_size - (next_offset % group->file_size));
- } else {
- write_len = len;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
-
- fprintf(stderr,
- "Writing log file segment to group %lu"
- " offset " LSN_PF " len %lu\n"
- "start lsn " LSN_PF "\n"
- "First block n:o %lu last block n:o %lu\n",
- (ulong) group->id, next_offset,
- write_len,
- start_lsn,
- (ulong) log_block_get_hdr_no(buf),
- (ulong) log_block_get_hdr_no(
- buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
- ut_a(log_block_get_hdr_no(buf)
- == log_block_convert_lsn_to_no(start_lsn));
-
- for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
-
- ut_a(log_block_get_hdr_no(buf) + i
- == log_block_get_hdr_no(
- buf + i * OS_FILE_LOG_BLOCK_SIZE));
- }
- }
-#endif /* UNIV_DEBUG */
- /* Calculate the checksums for each log block and write them to
- the trailer fields of the log blocks */
-
- for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
- log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
- }
-
- if (log_do_write) {
- log_sys->n_log_ios++;
-
- MONITOR_INC(MONITOR_LOG_IO);
-
- srv_stats.os_log_pending_writes.inc();
-
- ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
-
- log_encrypt_before_write(log_sys->next_checkpoint_no,
- buf, write_len);
-
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "WRITE: block: %lu checkpoint: %lu %.8lx %.8lx\n",
- log_block_get_hdr_no(buf),
- log_block_get_checkpoint_no(buf),
- log_block_calc_checksum(buf),
- log_block_get_checksum(buf));
-#endif
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
- (ulint) (next_offset / UNIV_PAGE_SIZE),
- (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
- group, 0);
-
- srv_stats.os_log_pending_writes.dec();
-
- srv_stats.os_log_written.add(write_len);
- srv_stats.log_writes.inc();
- }
-
- if (write_len < len) {
- start_lsn += write_len;
- len -= write_len;
- buf += write_len;
-
- write_header = TRUE;
-
- goto loop;
- }
-}
-
-/******************************************************//**
-This function is called, e.g., when a transaction wants to commit. It checks
-that the log has been written to the log file up to the last log entry written
-by the transaction. If there is a flush running, it waits and checks if the
-flush flushed enough. If not, starts a new flush. */
-UNIV_INTERN
-void
-log_write_up_to(
-/*============*/
- lsn_t lsn, /*!< in: log sequence number up to which
- the log should be written,
- LSN_MAX if not specified */
- ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
- or LOG_WAIT_ALL_GROUPS */
- ibool flush_to_disk)
- /*!< in: TRUE if we want the written log
- also to be flushed to disk */
-{
- log_group_t* group;
- ulint start_offset;
- ulint end_offset;
- ulint area_start;
- ulint area_end;
-#ifdef UNIV_DEBUG
- ulint loop_count = 0;
-#endif /* UNIV_DEBUG */
- ulint unlock;
- ib_uint64_t write_lsn;
- ib_uint64_t flush_lsn;
-
- ut_ad(!srv_read_only_mode);
-
- if (recv_no_ibuf_operations) {
- /* Recovery is running and no operations on the log files are
- allowed yet (the variable name .._no_ibuf_.. is misleading) */
-
- return;
- }
-
-loop:
- ut_ad(++loop_count < 100);
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- if (flush_to_disk
- && log_sys->flushed_to_disk_lsn >= lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- if (!flush_to_disk
- && (log_sys->written_to_all_lsn >= lsn
- || (log_sys->written_to_some_lsn >= lsn
- && wait != LOG_WAIT_ALL_GROUPS))) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- if (log_sys->n_pending_writes > 0) {
- /* A write (+ possibly flush to disk) is running */
-
- if (flush_to_disk
- && log_sys->current_flush_lsn >= lsn) {
- /* The write + flush will write enough: wait for it to
- complete */
-
- goto do_waits;
- }
-
- if (!flush_to_disk
- && log_sys->write_lsn >= lsn) {
- /* The write will write enough: wait for it to
- complete */
-
- goto do_waits;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the write to complete and try to start a new
- write */
-
- os_event_wait(log_sys->no_flush_event);
-
- goto loop;
- }
-
- if (!flush_to_disk
- && log_sys->buf_free == log_sys->buf_next_to_write) {
- /* Nothing to write and no flush to disk requested */
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Writing log from " LSN_PF " up to lsn " LSN_PF "\n",
- log_sys->written_to_all_lsn,
- log_sys->lsn);
- }
-#endif /* UNIV_DEBUG */
- log_sys->n_pending_writes++;
- MONITOR_INC(MONITOR_PENDING_LOG_WRITE);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group->n_pending_writes++; /*!< We assume here that we have only
- one log group! */
-
- os_event_reset(log_sys->no_flush_event);
- os_event_reset(log_sys->one_flushed_event);
-
- start_offset = log_sys->buf_next_to_write;
- end_offset = log_sys->buf_free;
-
- area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
- area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_ad(area_end - area_start > 0);
-
- log_sys->write_lsn = log_sys->lsn;
-
- if (flush_to_disk) {
- log_sys->current_flush_lsn = log_sys->lsn;
- }
-
- log_sys->one_flushed = FALSE;
-
- log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
- log_block_set_checkpoint_no(
- log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
- log_sys->next_checkpoint_no);
-
- /* Copy the last, incompletely written, log block a log block length
- up, so that when the flush operation writes from the log buffer, the
- segment to write will not be changed by writers to the log */
-
- ut_memcpy(log_sys->buf + area_end,
- log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
- OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
- log_sys->write_end_offset = log_sys->buf_free;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- /* Do the write to the log files */
-
- while (group) {
- log_group_write_buf(
- group, log_sys->buf + area_start,
- area_end - area_start,
- ut_uint64_align_down(log_sys->written_to_all_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- start_offset - area_start);
-
- log_group_set_fields(group, log_sys->write_lsn);
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC
- || srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
- /* O_DSYNC or ALL_O_DIRECT means the OS did not buffer the log
- file at all: so we have also flushed to disk what we have
- written */
-
- log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
-
- } else if (flush_to_disk) {
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- fil_flush(group->space_id);
- log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
- }
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- ut_a(group->n_pending_writes == 1);
- ut_a(log_sys->n_pending_writes == 1);
-
- group->n_pending_writes--;
- log_sys->n_pending_writes--;
- MONITOR_DEC(MONITOR_PENDING_LOG_WRITE);
-
- unlock = log_group_check_flush_completion(group);
- unlock = unlock | log_sys_check_flush_completion();
-
- log_flush_do_unlocks(unlock);
-
- write_lsn = log_sys->write_lsn;
- flush_lsn = log_sys->flushed_to_disk_lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- innobase_mysql_log_notify(write_lsn, flush_lsn);
-
- return;
-
-do_waits:
- mutex_exit(&(log_sys->mutex));
-
- switch (wait) {
- case LOG_WAIT_ONE_GROUP:
- os_event_wait(log_sys->one_flushed_event);
- break;
- case LOG_WAIT_ALL_GROUPS:
- os_event_wait(log_sys->no_flush_event);
- break;
-#ifdef UNIV_DEBUG
- case LOG_NO_WAIT:
- break;
- default:
- ut_error;
-#endif /* UNIV_DEBUG */
- }
-}
-
-/****************************************************************//**
-Does a syncronous flush of the log buffer to disk. */
-UNIV_INTERN
-void
-log_buffer_flush_to_disk(void)
-/*==========================*/
-{
- lsn_t lsn;
-
- ut_ad(!srv_read_only_mode);
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-}
-
-/****************************************************************//**
-This functions writes the log buffer to the log file and if 'flush'
-is set it forces a flush of the log file as well. This is meant to be
-called from background master thread only as it does not wait for
-the write (+ possible flush) to finish. */
-UNIV_INTERN
-void
-log_buffer_sync_in_background(
-/*==========================*/
- ibool flush) /*!< in: flush the logs to disk */
-{
- lsn_t lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(lsn, LOG_NO_WAIT, flush);
-}
-
-/********************************************************************
-
-Tries to establish a big enough margin of free space in the log buffer, such
-that a new log entry can be catenated without an immediate need for a flush. */
-static
-void
-log_flush_margin(void)
-/*==================*/
-{
- log_t* log = log_sys;
- lsn_t lsn = 0;
-
- mutex_enter(&(log->mutex));
-
- if (log->buf_free > log->max_buf_free) {
-
- if (log->n_pending_writes > 0) {
- /* A flush is running: hope that it will provide enough
- free space */
- } else {
- lsn = log->lsn;
- }
- }
-
- mutex_exit(&(log->mutex));
-
- if (lsn) {
- log_write_up_to(lsn, LOG_NO_WAIT, FALSE);
- }
-}
-
-/****************************************************************//**
-Advances the smallest lsn for which there are unflushed dirty blocks in the
-buffer pool. NOTE: this function may only be called if the calling thread owns
-no synchronization objects!
-@return false if there was a flush batch of the same type running,
-which means that we could not start this flush batch */
-static
-bool
-log_preflush_pool_modified_pages(
-/*=============================*/
- lsn_t new_oldest) /*!< in: try to advance oldest_modified_lsn
- at least to this lsn */
-{
- lsn_t current_oldest;
- ulint i;
-
- if (recv_recovery_on) {
- /* If the recovery is running, we must first apply all
- log records to their respective file pages to get the
- right modify lsn values to these pages: otherwise, there
- might be pages on disk which are not yet recovered to the
- current lsn, and even after calling this function, we could
- not know how up-to-date the disk version of the database is,
- and we could not make a new checkpoint on the basis of the
- info on the buffer pool only. */
-
- recv_apply_hashed_log_recs(true);
- }
-
- if (!buf_page_cleaner_is_active
- || (srv_foreground_preflush
- == SRV_FOREGROUND_PREFLUSH_SYNC_PREFLUSH)
- || (new_oldest == LSN_MAX)) {
-
- ulint n_pages;
-
- bool success = buf_flush_list(ULINT_MAX, new_oldest, &n_pages);
-
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- if (!success) {
- MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
- }
-
- MONITOR_INC_VALUE_CUMULATIVE(
- MONITOR_FLUSH_SYNC_TOTAL_PAGE,
- MONITOR_FLUSH_SYNC_COUNT,
- MONITOR_FLUSH_SYNC_PAGES,
- n_pages);
-
- return(success);
- }
-
- ut_ad(srv_foreground_preflush == SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF);
-
- current_oldest = buf_pool_get_oldest_modification();
- i = 0;
-
- while (current_oldest < new_oldest && current_oldest) {
-
- while (!buf_flush_flush_list_in_progress()) {
-
- /* If a flush list flush by the cleaner thread is not
- running, backoff until one is started. */
- os_thread_sleep(ut_rnd_interval(0, 1 << i));
- i++;
- i %= 16;
- }
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- current_oldest = buf_pool_get_oldest_modification();
- }
-
- return(current_oldest >= new_oldest || !current_oldest);
-}
-
-/******************************************************//**
-Completes a checkpoint. */
-static
-void
-log_complete_checkpoint(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(log_sys->n_pending_checkpoint_writes == 0);
-
- log_sys->next_checkpoint_no++;
-
- ut_ad(log_sys->next_checkpoint_lsn >= log_sys->last_checkpoint_lsn);
- log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
-
- rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
-}
-
-/******************************************************//**
-Completes an asynchronous checkpoint info write i/o to a log file. */
-static
-void
-log_io_complete_checkpoint(void)
-/*============================*/
-{
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(log_sys->n_pending_checkpoint_writes > 0);
-
- log_sys->n_pending_checkpoint_writes--;
- MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
-
- if (log_sys->n_pending_checkpoint_writes == 0) {
- log_complete_checkpoint();
- }
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wake the redo log watching thread to parse the log up to this
- checkpoint. */
- if (srv_track_changed_pages) {
- os_event_reset(srv_redo_log_tracked_event);
- os_event_set(srv_checkpoint_completed_event);
- }
-}
-
-/*******************************************************************//**
-Writes info to a checkpoint about a log group. */
-static
-void
-log_checkpoint_set_nth_group_info(
-/*==============================*/
- byte* buf, /*!< in: buffer for checkpoint info */
- ulint n, /*!< in: nth slot */
- lsn_t file_no)/*!< in: archived file number */
-{
- ut_ad(n < LOG_MAX_N_GROUPS);
-
- mach_write_to_8(buf + LOG_CHECKPOINT_GROUP_ARRAY +
- 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO,
- file_no);
-}
-
-/*******************************************************************//**
-Gets info from a checkpoint about a log group. */
-UNIV_INTERN
-void
-log_checkpoint_get_nth_group_info(
-/*==============================*/
- const byte* buf, /*!< in: buffer containing checkpoint info */
- ulint n, /*!< in: nth slot */
- lsn_t* file_no)/*!< out: archived file number */
-{
- ut_ad(n < LOG_MAX_N_GROUPS);
-
- *file_no = mach_read_from_8(buf + LOG_CHECKPOINT_GROUP_ARRAY +
- 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
-}
-
-/******************************************************//**
-Writes the checkpoint info to a log group header. */
-static
-void
-log_group_checkpoint(
-/*=================*/
- log_group_t* group) /*!< in: log group */
-{
- log_group_t* group2;
-#ifdef UNIV_LOG_ARCHIVE
- ib_uint64_t archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t lsn_offset;
- ulint write_offset;
- ulint fold;
- byte* buf;
- ulint i;
-
- ut_ad(!srv_read_only_mode);
- ut_ad(srv_shutdown_state != SRV_SHUTDOWN_LAST_PHASE);
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
-
- buf = group->checkpoint_buf;
-
-#ifdef UNIV_DEBUG
- lsn_t old_next_checkpoint_lsn
- = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
- ut_ad(old_next_checkpoint_lsn <= log_sys->next_checkpoint_lsn);
-#endif /* UNIV_DEBUG */
- mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
-
- log_crypt_write_checkpoint_buf(buf);
-
- lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
- group);
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
- lsn_offset & 0xFFFFFFFFUL);
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32,
- lsn_offset >> 32);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
-
-#ifdef UNIV_LOG_ARCHIVE
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- archived_lsn = LSN_MAX;
- } else {
- archived_lsn = log_sys->archived_lsn;
- }
-
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
-#else /* UNIV_LOG_ARCHIVE */
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
- log_checkpoint_set_nth_group_info(buf, i, 0);
- }
-
- group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group2) {
- log_checkpoint_set_nth_group_info(buf, group2->id,
-#ifdef UNIV_LOG_ARCHIVE
- group2->archived_file_no
-#else /* UNIV_LOG_ARCHIVE */
- 0
-#endif /* UNIV_LOG_ARCHIVE */
- );
-
- group2 = UT_LIST_GET_NEXT(log_groups, group2);
- }
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
-
- /* We alternate the physical place of the checkpoint info in the first
- log file */
-
- if ((log_sys->next_checkpoint_no & 1) == 0) {
- write_offset = LOG_CHECKPOINT_1;
- } else {
- write_offset = LOG_CHECKPOINT_2;
- }
-
- if (log_do_write) {
- if (log_sys->n_pending_checkpoint_writes == 0) {
-
- rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
- LOG_CHECKPOINT);
- }
-
- log_sys->n_pending_checkpoint_writes++;
- MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
-
- log_sys->n_log_ios++;
-
- MONITOR_INC(MONITOR_LOG_IO);
-
- /* We send as the last parameter the group machine address
- added with 1, as we want to distinguish between a normal log
- file write and a checkpoint field write */
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->space_id, 0,
- write_offset / UNIV_PAGE_SIZE,
- write_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf, ((byte*) group + 1), 0);
-
- ut_ad(((ulint) group & 0x1UL) == 0);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Writes info to a buffer of a log group when log files are created in
-backup restoration. */
-UNIV_INTERN
-void
-log_reset_first_header_and_checkpoint(
-/*==================================*/
- byte* hdr_buf,/*!< in: buffer which will be written to the
- start of the first log file */
- ib_uint64_t start) /*!< in: lsn of the start of the first log file;
- we pretend that there is a checkpoint at
- start + LOG_BLOCK_HDR_SIZE */
-{
- ulint fold;
- byte* buf;
- ib_uint64_t lsn;
-
- mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
- mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start);
-
- lsn = start + LOG_BLOCK_HDR_SIZE;
-
- /* Write the label of mysqlbackup --restore */
- strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- "ibbackup ");
- ut_sprintf_timestamp((char*) hdr_buf
- + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP
- + (sizeof "ibbackup ") - 1));
- buf = hdr_buf + LOG_CHECKPOINT_1;
-
- mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);
- mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
-
- log_crypt_write_checkpoint_buf(buf);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_LOW32,
- LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
- mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET_HIGH32, 0);
-
- mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
-
- mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, LSN_MAX);
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
- mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
-
- /* Starting from InnoDB-3.23.50, we should also write info on
- allocated size in the tablespace, but unfortunately we do not
- know it here */
-}
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************//**
-Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
-UNIV_INTERN
-void
-log_group_read_checkpoint_info(
-/*===========================*/
- log_group_t* group, /*!< in: log group */
- ulint field) /*!< in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- log_sys->n_log_ios++;
-
- MONITOR_INC(MONITOR_LOG_IO);
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
- field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0);
-}
-
-/******************************************************//**
-Writes checkpoint info to groups. */
-UNIV_INTERN
-void
-log_groups_write_checkpoint_info(void)
-/*==================================*/
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (!srv_read_only_mode) {
- for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group;
- group = UT_LIST_GET_NEXT(log_groups, group)) {
-
- log_group_checkpoint(group);
- }
- }
-}
-
-/******************************************************//**
-Makes a checkpoint. Note that this function does not flush dirty
-blocks from the buffer pool: it only checks what is lsn of the oldest
-modification in the pool, and writes information about the lsn in
-log files. Use log_make_checkpoint_at to flush also the pool.
-@return TRUE if success, FALSE if a checkpoint write was already running */
-UNIV_INTERN
-ibool
-log_checkpoint(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is
- desired */
- ibool write_always, /*!< in: the function normally checks if the
- the new checkpoint would have a greater
- lsn than the previous one: if not, then no
- physical write is done; by setting this
- parameter TRUE, a physical write will always be
- made to log files */
- ibool safe_to_ignore) /*!< in: TRUE if checkpoint can be ignored in
- the case checkpoint's are disabled */
-{
- lsn_t oldest_lsn;
-
- ut_ad(!srv_read_only_mode);
-
- if (recv_recovery_is_on()) {
- recv_apply_hashed_log_recs(true);
- }
-
- if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC &&
- srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT) {
- fil_flush_file_spaces(FIL_TABLESPACE);
- }
-
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(!recv_no_log_write);
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- mutex_exit(&(log_sys->mutex));
-
- /* Because log also contains headers and dummy log records,
- if the buffer pool contains no dirty buffers, oldest_lsn
- gets the value log_sys->lsn from the previous function,
- and we must make sure that the log is flushed up to that
- lsn. If there are dirty buffers in the buffer pool, then our
- write-ahead-logging algorithm ensures that the log has been flushed
- up to oldest_lsn. */
-
- log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
- mutex_enter(&(log_sys->mutex));
-
- /* Return if this is not a forced checkpoint and either there is no
- need for a checkpoint or if checkpoints are disabled */
- if (!write_always
- && (log_sys->last_checkpoint_lsn >= oldest_lsn ||
- (safe_to_ignore && log_disable_checkpoint_active)))
- {
-
- mutex_exit(&(log_sys->mutex));
-
- return(TRUE);
- }
-
- if (log_disable_checkpoint_active)
- {
- /* Wait until we are allowed to do a checkpoint */
- mutex_exit(&(log_sys->mutex));
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- mutex_enter(&(log_sys->mutex));
- }
-
- ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
-
- if (log_sys->n_pending_checkpoint_writes > 0) {
- /* A checkpoint write is running */
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- }
-
- return(FALSE);
- }
-
- ut_ad(oldest_lsn >= log_sys->next_checkpoint_lsn);
- log_sys->next_checkpoint_lsn = oldest_lsn;
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr, "Making checkpoint no "
- LSN_PF " at lsn " LSN_PF "\n",
- log_sys->next_checkpoint_no,
- oldest_lsn);
- }
-#endif /* UNIV_DEBUG */
-
- /* generate key version and key used to encrypt future blocks,
- *
- * NOTE: the +1 is as the next_checkpoint_no will be updated once
- * the checkpoint info has been written and THEN blocks will be encrypted
- * with new key
- */
- if (srv_encrypt_log) {
- log_crypt_set_ver_and_key(log_sys->next_checkpoint_no + 1);
- }
-
- log_groups_write_checkpoint_info();
-
- MONITOR_INC(MONITOR_NUM_CHECKPOINT);
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- }
-
- return(TRUE);
-}
-
-/****************************************************************//**
-Makes a checkpoint at a given lsn or later. */
-UNIV_INTERN
-void
-log_make_checkpoint_at(
-/*===================*/
- lsn_t lsn, /*!< in: make a checkpoint at this or a
- later lsn, if LSN_MAX, makes
- a checkpoint at the latest lsn */
- ibool write_always) /*!< in: the function normally checks if
- the new checkpoint would have a
- greater lsn than the previous one: if
- not, then no physical write is done;
- by setting this parameter TRUE, a
- physical write will always be made to
- log files */
-{
- /* Preflush pages synchronously */
-
- while (!log_preflush_pool_modified_pages(lsn)) {
- /* Flush as much as we can */
- }
-
- while (!log_checkpoint(TRUE, write_always, FALSE)) {
- /* Force a checkpoint */
- }
-}
-
-/****************************************************************//**
-Disable checkpoints. This is used when doing a volumne snapshot
-to ensure that we don't get checkpoint between snapshoting two
-different volumes */
-
-UNIV_INTERN
-ibool log_disable_checkpoint()
-{
- mutex_enter(&(log_sys->mutex));
-
- /*
- Wait if a checkpoint write is running.
- This is the same code that is used in log_checkpoint() to ensure
- that two checkpoints are not happening at the same time.
- */
- while (log_sys->n_pending_checkpoint_writes > 0)
- {
- mutex_exit(&(log_sys->mutex));
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
- mutex_enter(&(log_sys->mutex));
- }
- /*
- The following should never be true; It's is here just in case of
- wrong usage of this function. (Better safe than sorry).
- */
-
- if (log_disable_checkpoint_active)
- {
- mutex_exit(&(log_sys->mutex));
- return 1; /* Already disabled */
- }
- /*
- Take the checkpoint lock to ensure we will not get any checkpoints
- running
- */
- rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
- log_disable_checkpoint_active= 1;
- mutex_exit(&(log_sys->mutex));
- return 0;
-}
-
-
-/****************************************************************//**
-Enable checkpoints that was disabled with log_disable_checkpoint()
-This lock is called by MariaDB and only when we have done call earlier
-to log_disable_checkpoint().
-
-Note: We can't take a log->mutex lock here running log_checkpoint()
-which is waiting (log_sys->checkpoint_lock may already have it.
-This is however safe to do without a mutex as log_disable_checkpoint
-is protected by log_sys->checkpoint_lock.
-*/
-
-UNIV_INTERN
-void log_enable_checkpoint()
-{
- ut_ad(log_disable_checkpoint_active);
- /* Test variable, mostly to protect against wrong usage */
- if (log_disable_checkpoint_active)
- {
- log_disable_checkpoint_active= 0;
- rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
- }
-}
-
-/****************************************************************//**
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for a
-checkpoint. NOTE: this function may only be called if the calling thread
-owns no synchronization objects! */
-static
-void
-log_checkpoint_margin(void)
-/*=======================*/
-{
- log_t* log = log_sys;
- lsn_t age;
- lsn_t checkpoint_age;
- ib_uint64_t advance;
- lsn_t oldest_lsn;
- ibool checkpoint_sync;
- ibool do_checkpoint;
- bool success;
-loop:
- checkpoint_sync = FALSE;
- do_checkpoint = FALSE;
- advance = 0;
-
- mutex_enter(&(log->mutex));
- ut_ad(!recv_no_log_write);
-
- if (log->check_flush_or_checkpoint == FALSE) {
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- oldest_lsn = log_buf_pool_get_oldest_modification();
-
- age = log->lsn - oldest_lsn;
-
- if (age > log->max_modified_age_sync) {
-
- /* A flush is urgent: we have to do a synchronous preflush */
- advance = 2 * (age - log->max_modified_age_sync);
- }
-
- checkpoint_age = log->lsn - log->last_checkpoint_lsn;
-
- if (checkpoint_age > log->max_checkpoint_age) {
- /* A checkpoint is urgent: we do it synchronously */
-
- checkpoint_sync = TRUE;
-
- do_checkpoint = TRUE;
-
- } else if (checkpoint_age > log->max_checkpoint_age_async) {
- /* A checkpoint is not urgent: do it asynchronously */
-
- do_checkpoint = TRUE;
-
- log->check_flush_or_checkpoint = FALSE;
- } else {
- log->check_flush_or_checkpoint = FALSE;
- }
-
- mutex_exit(&(log->mutex));
-
- if (advance) {
- lsn_t new_oldest = oldest_lsn + advance;
-
- success = log_preflush_pool_modified_pages(new_oldest);
-
- /* If the flush succeeded, this thread has done its part
- and can proceed. If it did not succeed, there was another
- thread doing a flush at the same time. */
- if (!success) {
- mutex_enter(&(log->mutex));
-
- log->check_flush_or_checkpoint = TRUE;
-
- mutex_exit(&(log->mutex));
- goto loop;
- }
- }
-
- if (do_checkpoint) {
- log_checkpoint(checkpoint_sync, FALSE, FALSE);
-
- if (checkpoint_sync) {
-
- goto loop;
- }
- }
-}
-
-/******************************************************//**
-Reads a specified log segment to a buffer. Optionally releases the log mutex
-before the I/O. */
-UNIV_INTERN
-void
-log_group_read_log_seg(
-/*===================*/
- ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */
- byte* buf, /*!< in: buffer where to read */
- log_group_t* group, /*!< in: log group */
- lsn_t start_lsn, /*!< in: read area start */
- lsn_t end_lsn, /*!< in: read area end */
- ibool release_mutex) /*!< in: whether the log_sys->mutex
- should be released before the read */
-{
- ulint len;
- lsn_t source_offset;
- bool sync;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- sync = (type == LOG_RECOVER);
-loop:
- source_offset = log_group_calc_lsn_offset(start_lsn, group);
-
- ut_a(end_lsn - start_lsn <= ULINT_MAX);
- len = (ulint) (end_lsn - start_lsn);
-
- ut_ad(len != 0);
-
- if ((source_offset % group->file_size) + len > group->file_size) {
-
- /* If the above condition is true then len (which is ulint)
- is > the expression below, so the typecast is ok */
- len = (ulint) (group->file_size -
- (source_offset % group->file_size));
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (type == LOG_ARCHIVE) {
-
- log_sys->n_pending_archive_ios++;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_sys->n_log_ios++;
-
- MONITOR_INC(MONITOR_LOG_IO);
-
- ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
-
- if (release_mutex) {
- mutex_exit(&(log_sys->mutex));
- }
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
- (ulint) (source_offset / UNIV_PAGE_SIZE),
- (ulint) (source_offset % UNIV_PAGE_SIZE),
- len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL, 0);
-
- if (release_mutex) {
- mutex_enter(&log_sys->mutex);
- }
-
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "BEFORE DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx offset %lu\n",
- log_block_get_hdr_no(buf),
- log_block_get_checkpoint_no(buf),
- log_block_calc_checksum(buf),
- log_block_get_checksum(buf), source_offset);
-#endif
-
- log_decrypt_after_read(buf, len);
-
-#ifdef DEBUG_CRYPT
- fprintf(stderr, "AFTER DECRYPT: block: %lu checkpoint: %lu %.8lx %.8lx\n",
- log_block_get_hdr_no(buf),
- log_block_get_checkpoint_no(buf),
- log_block_calc_checksum(buf),
- log_block_get_checksum(buf));
-#endif
-
- if (release_mutex) {
- mutex_exit(&log_sys->mutex);
- }
-
- start_lsn += len;
- buf += len;
-
- if (recv_sys && recv_sys->report(ut_time())) {
- ib_logf(IB_LOG_LEVEL_INFO, "Read redo log up to LSN=" LSN_PF,
- start_lsn);
- sd_notifyf(0, "STATUS=Read redo log up to LSN=" LSN_PF,
- start_lsn);
- }
-
- if (start_lsn != end_lsn) {
-
- if (release_mutex) {
- mutex_enter(&(log_sys->mutex));
- }
- goto loop;
- }
-}
-
-#ifdef UNIV_LOG_ARCHIVE
-/******************************************************//**
-Generates an archived log file name. */
-UNIV_INTERN
-void
-log_archived_file_name_gen(
-/*=======================*/
- char* buf, /*!< in: buffer where to write */
- ulint buf_len,/*!< in: buffer length */
- ulint id MY_ATTRIBUTE((unused)),
- /*!< in: group id;
- currently we only archive the first group */
- lsn_t file_no)/*!< in: file number */
-{
- ulint dirnamelen;
-
- dirnamelen = strlen(srv_arch_dir);
-
- ut_a(buf_len > dirnamelen +
- IB_ARCHIVED_LOGS_SERIAL_LEN +
- IB_ARCHIVED_LOGS_PREFIX_LEN + 2);
-
- strcpy(buf, srv_arch_dir);
-
- if (buf[dirnamelen-1] != SRV_PATH_SEPARATOR) {
- buf[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
- sprintf(buf + dirnamelen, IB_ARCHIVED_LOGS_PREFIX
- "%0" IB_TO_STR(IB_ARCHIVED_LOGS_SERIAL_LEN) "llu",
- (unsigned long long)file_no);
-}
-
-/******************************************************//**
-Get offset within archived log file to continue to write
-with. */
-UNIV_INTERN
-void
-log_archived_get_offset(
-/*=====================*/
- log_group_t* group, /*!< in: log group */
- lsn_t file_no, /*!< in: archive log file number */
- lsn_t archived_lsn, /*!< in: last archived LSN */
- lsn_t* offset) /*!< out: offset within archived file */
-{
- char file_name[OS_FILE_MAX_PATH];
- ibool exists;
- os_file_type_t type;
-
- log_archived_file_name_gen(file_name,
- sizeof(file_name), group->id, file_no);
-
- ut_a(os_file_status(file_name, &exists, &type));
-
- if (!exists) {
- *offset = 0;
- return;
- }
-
- *offset = archived_lsn - file_no + LOG_FILE_HDR_SIZE;
-
- if (archived_lsn != LSN_MAX) {
- *offset = archived_lsn - file_no + LOG_FILE_HDR_SIZE;
- } else {
- /* Archiving was OFF prior startup */
- *offset = 0;
- }
-
- ut_a(group->file_size >= *offset + LOG_FILE_HDR_SIZE);
-
- return;
-}
-
-/******************************************************//**
-Writes a log file header to a log file space. */
-static
-void
-log_group_archive_file_header_write(
-/*================================*/
- log_group_t* group, /*!< in: log group */
- ulint nth_file, /*!< in: header to the nth file in the
- archive log file space */
- lsn_t file_no, /*!< in: archived file number */
- ib_uint64_t start_lsn) /*!< in: log file data starts at this
- lsn */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- ut_a(nth_file < group->n_files);
-
- buf = *(group->archive_file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_GROUP_ID, group->id);
- mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
- mach_write_to_4(buf + LOG_FILE_NO, file_no);
-
- mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
-
- dest_offset = nth_file * group->file_size;
-
- log_sys->n_log_ios++;
-
- MONITOR_INC(MONITOR_LOG_IO);
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
- 0,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- 2 * OS_FILE_LOG_BLOCK_SIZE,
- buf, &log_archive_io, 0);
-}
-
-/******************************************************//**
-Writes a log file header to a completed archived log file. */
-static
-void
-log_group_archive_completed_header_write(
-/*=====================================*/
- log_group_t* group, /*!< in: log group */
- ulint nth_file, /*!< in: header to the nth file in the
- archive log file space */
- ib_uint64_t end_lsn) /*!< in: end lsn of the file */
-{
- byte* buf;
- ulint dest_offset;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_a(nth_file < group->n_files);
-
- buf = *(group->archive_file_header_bufs + nth_file);
-
- mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
- mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
-
- dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
-
- log_sys->n_log_ios++;
-
- MONITOR_INC(MONITOR_LOG_IO);
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->archive_space_id,
- 0,
- dest_offset / UNIV_PAGE_SIZE,
- dest_offset % UNIV_PAGE_SIZE,
- OS_FILE_LOG_BLOCK_SIZE,
- buf + LOG_FILE_ARCH_COMPLETED,
- &log_archive_io, 0);
-}
-
-/******************************************************//**
-Does the archive writes for a single log group. */
-static
-void
-log_group_archive(
-/*==============*/
- log_group_t* group) /*!< in: log group */
-{
- pfs_os_file_t file_handle;
- lsn_t start_lsn;
- lsn_t end_lsn;
- char name[OS_FILE_MAX_PATH];
- byte* buf;
- ulint len;
- ibool ret;
- lsn_t next_offset;
- ulint n_files;
- ulint open_mode;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- start_lsn = log_sys->archived_lsn;
-
- ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- end_lsn = log_sys->next_archived_lsn;
-
- ut_a(end_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
-
- buf = log_sys->archive_buf;
-
- n_files = 0;
-
- next_offset = group->archived_offset;
-loop:
- if ((next_offset % group->file_size == 0)
- || (fil_space_get_size(group->archive_space_id) == 0)) {
-
- /* Add the file to the archive file space; create or open the
- file */
-
- if (next_offset % group->file_size == 0) {
- open_mode = OS_FILE_CREATE;
- if (n_files == 0) {
- /* Adjust archived_file_no to match start_lsn
- which is written in file header as well */
- group->archived_file_no = start_lsn;
- }
- } else {
- open_mode = OS_FILE_OPEN;
- }
-
- log_archived_file_name_gen(name, sizeof(name), group->id,
- group->archived_file_no +
- n_files * (group->file_size -
- LOG_FILE_HDR_SIZE));
-
- file_handle = os_file_create(innodb_file_log_key,
- name, open_mode,
- OS_FILE_AIO,
- OS_DATA_FILE, &ret, FALSE);
-
- if (!ret && (open_mode == OS_FILE_CREATE)) {
- file_handle = os_file_create(
- innodb_file_log_key, name, OS_FILE_OPEN,
- OS_FILE_AIO, OS_DATA_FILE, &ret, FALSE);
- }
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "InnoDB: Cannot create or open"
- " archive log file %s.\n", name);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr, "Created archive file %s\n", name);
- }
-#endif /* UNIV_DEBUG */
-
- ret = os_file_close(file_handle);
-
- ut_a(ret);
-
- /* Add the archive file as a node to the space */
-
- ut_a(fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
- group->archive_space_id, FALSE));
-
- if (next_offset % group->file_size == 0) {
- log_group_archive_file_header_write(
- group, n_files,
- group->archived_file_no +
- n_files * (group->file_size - LOG_FILE_HDR_SIZE),
- start_lsn);
-
- next_offset += LOG_FILE_HDR_SIZE;
- }
- }
-
- len = end_lsn - start_lsn;
-
- if (group->file_size < (next_offset % group->file_size) + len) {
-
- len = group->file_size - (next_offset % group->file_size);
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Archiving starting at lsn " LSN_PF ", len %lu"
- " to group %lu\n",
- start_lsn,
- (ulong) len, (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- log_sys->n_pending_archive_ios++;
-
- log_sys->n_log_ios++;
-
- MONITOR_INC(MONITOR_LOG_IO);
-
- //TODO (jonaso): This must be dead code??
- log_encrypt_before_write(log_sys->next_checkpoint_no, buf, len);
-
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, false, group->archive_space_id,
- 0,
- (ulint) (next_offset / UNIV_PAGE_SIZE),
- (ulint) (next_offset % UNIV_PAGE_SIZE),
- ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
- &log_archive_io, 0);
-
- start_lsn += len;
- next_offset += len;
- buf += len;
-
- if (next_offset % group->file_size == 0) {
- n_files++;
- }
-
- if (end_lsn != start_lsn) {
-
- goto loop;
- }
-
- group->next_archived_file_no = group->archived_file_no +
- n_files * (group->file_size - LOG_FILE_HDR_SIZE);
- group->next_archived_offset = next_offset % group->file_size;
-
- ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
-}
-
-/*****************************************************//**
-(Writes to the archive of each log group.) Currently, only the first
-group is archived. */
-static
-void
-log_archive_groups(void)
-/*====================*/
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- log_group_archive(group);
-}
-
-/*****************************************************//**
-Completes the archiving write phase for (each log group), currently,
-the first log group. */
-static
-void
-log_archive_write_complete_groups(void)
-/*===================================*/
-{
- log_group_t* group;
- lsn_t end_offset;
- ulint trunc_files;
- ulint n_files;
- ib_uint64_t start_lsn;
- ib_uint64_t end_lsn;
- ulint i;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- group->archived_file_no = group->next_archived_file_no;
- group->archived_offset = group->next_archived_offset;
-
- /* Truncate from the archive file space all but the last
- file, or if it has been written full, all files */
-
- n_files = (UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id))
- / group->file_size;
- ut_ad(n_files > 0);
-
- end_offset = group->archived_offset;
-
- if (end_offset % group->file_size == 0) {
-
- trunc_files = n_files;
- } else {
- trunc_files = n_files - 1;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes && trunc_files) {
- fprintf(stderr,
- "Complete file(s) archived to group %lu\n",
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- /* Calculate the archive file space start lsn */
- start_lsn = log_sys->next_archived_lsn
- - (end_offset - LOG_FILE_HDR_SIZE + trunc_files
- * (group->file_size - LOG_FILE_HDR_SIZE));
- end_lsn = start_lsn;
-
- for (i = 0; i < trunc_files; i++) {
-
- end_lsn += group->file_size - LOG_FILE_HDR_SIZE;
-
- /* Write a notice to the headers of archived log
- files that the file write has been completed */
-
- log_group_archive_completed_header_write(group, i, end_lsn);
- }
-
- fil_space_truncate_start(group->archive_space_id,
- trunc_files * group->file_size);
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fputs("Archiving writes completed\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_archive_check_completion_low(void)
-/*==================================*/
-{
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->n_pending_archive_ios == 0
- && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fputs("Archiving read completed\n", stderr);
- }
-#endif /* UNIV_DEBUG */
-
- /* Archive buffer has now been read in: start archive writes */
-
- log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
-
- log_archive_groups();
- }
-
- if (log_sys->n_pending_archive_ios == 0
- && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
-
- log_archive_write_complete_groups();
-
- log_sys->archived_lsn = log_sys->next_archived_lsn;
-
- rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
- }
-}
-
-/******************************************************//**
-Completes an archiving i/o. */
-static
-void
-log_io_complete_archive(void)
-/*=========================*/
-{
- log_group_t* group;
-
- mutex_enter(&(log_sys->mutex));
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- mutex_exit(&(log_sys->mutex));
-
- fil_flush(group->archive_space_id);
-
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(log_sys->n_pending_archive_ios > 0);
-
- log_sys->n_pending_archive_ios--;
-
- log_archive_check_completion_low();
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/********************************************************************//**
-Starts an archiving operation.
-@return TRUE if succeed, FALSE if an archiving operation was already running */
-UNIV_INTERN
-ibool
-log_archive_do(
-/*===========*/
- ibool sync, /*!< in: TRUE if synchronous operation is desired */
- ulint* n_bytes)/*!< out: archive log buffer size, 0 if nothing to
- archive */
-{
- ibool calc_new_limit;
- lsn_t start_lsn;
- lsn_t limit_lsn = LSN_MAX;
-
- calc_new_limit = TRUE;
-loop:
- mutex_enter(&(log_sys->mutex));
-
- switch (log_sys->archiving_state) {
- case LOG_ARCH_OFF:
-arch_none:
- mutex_exit(&(log_sys->mutex));
-
- *n_bytes = 0;
-
- return(TRUE);
- case LOG_ARCH_STOPPED:
- case LOG_ARCH_STOPPING2:
- mutex_exit(&(log_sys->mutex));
-
- os_event_wait(log_sys->archiving_on);
-
- goto loop;
- }
-
- start_lsn = log_sys->archived_lsn;
-
- if (calc_new_limit) {
- ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
- limit_lsn = start_lsn + log_sys->archive_buf_size;
-
- *n_bytes = log_sys->archive_buf_size;
-
- if (limit_lsn >= log_sys->lsn) {
-
- limit_lsn = ut_uint64_align_down(
- log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE);
- }
- }
-
- if (log_sys->archived_lsn >= limit_lsn) {
-
- goto arch_none;
- }
-
- if (log_sys->written_to_all_lsn < limit_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE);
-
- calc_new_limit = FALSE;
-
- goto loop;
- }
-
- if (log_sys->n_pending_archive_ios > 0) {
- /* An archiving operation is running */
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
- }
-
- *n_bytes = log_sys->archive_buf_size;
-
- return(FALSE);
- }
-
- rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
-
- log_sys->archiving_phase = LOG_ARCHIVE_READ;
-
- log_sys->next_archived_lsn = limit_lsn;
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "Archiving from lsn " LSN_PF " to lsn " LSN_PF "\n",
- log_sys->archived_lsn, limit_lsn);
- }
-#endif /* UNIV_DEBUG */
-
- /* Read the log segment to the archive buffer */
-
- log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
- UT_LIST_GET_FIRST(log_sys->log_groups),
- start_lsn, limit_lsn, FALSE);
-
- mutex_exit(&(log_sys->mutex));
-
- if (sync) {
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
- }
-
- *n_bytes = log_sys->archive_buf_size;
-
- return(TRUE);
-}
-
-/****************************************************************//**
-Writes the log contents to the archive at least up to the lsn when this
-function was called. */
-static
-void
-log_archive_all(void)
-/*=================*/
-{
- lsn_t present_lsn;
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- present_lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- log_pad_current_log_block();
-
- for (;;) {
-
- ulint archived_bytes;
-
- mutex_enter(&(log_sys->mutex));
-
- if (present_lsn <= log_sys->archived_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- return;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_do(TRUE, &archived_bytes);
-
- if (archived_bytes == 0)
- return;
- }
-}
-
-/*****************************************************//**
-Closes the possible open archive log file (for each group) the first group,
-and if it was open, increments the group file count by 2, if desired. */
-static
-void
-log_archive_close_groups(
-/*=====================*/
- ibool increment_file_count) /*!< in: TRUE if we want to increment
- the file count */
-{
- log_group_t* group;
- ulint trunc_len;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
- return;
- }
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- trunc_len = UNIV_PAGE_SIZE
- * fil_space_get_size(group->archive_space_id);
- if (trunc_len > 0) {
- ut_a(trunc_len == group->file_size);
-
- /* Write a notice to the headers of archived log
- files that the file write has been completed */
-
- log_group_archive_completed_header_write(
- group, 0, log_sys->archived_lsn);
-
- fil_space_truncate_start(group->archive_space_id,
- trunc_len);
- if (increment_file_count) {
- group->archived_offset = 0;
- }
-
- }
-}
-
-/****************************************************************//**
-Writes the log contents to the archive up to the lsn when this function was
-called, and stops the archiving. When archiving is started again, the archived
-log file numbers start from 2 higher, so that the archiving will not write
-again to the archived log files which exist when this function returns. */
-static
-void
-log_archive_stop(void)
-/*==================*/
-{
- ibool success;
-
- mutex_enter(&(log_sys->mutex));
-
- ut_ad(log_sys->archiving_state == LOG_ARCH_ON);
- log_sys->archiving_state = LOG_ARCH_STOPPING;
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_all();
-
- mutex_enter(&(log_sys->mutex));
-
- log_sys->archiving_state = LOG_ARCH_STOPPING2;
- os_event_reset(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for a possible archiving operation to end */
-
- rw_lock_s_lock(&(log_sys->archive_lock));
- rw_lock_s_unlock(&(log_sys->archive_lock));
-
- mutex_enter(&(log_sys->mutex));
-
- /* Close all archived log files, incrementing the file count by 2,
- if appropriate */
-
- log_archive_close_groups(TRUE);
-
- mutex_exit(&(log_sys->mutex));
-
- /* Make a checkpoint, so that if recovery is needed, the file numbers
- of new archived log files will start from the right value */
-
- success = FALSE;
-
- while (!success) {
- success = log_checkpoint(TRUE, TRUE, FALSE);
- }
-
- mutex_enter(&(log_sys->mutex));
-
- log_sys->archiving_state = LOG_ARCH_STOPPED;
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/****************************************************************//**
-Starts again archiving which has been stopped.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_start(void)
-/*===================*/
-{
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
- }
-
- log_sys->archiving_state = LOG_ARCH_ON;
-
- os_event_set(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
-}
-
-/****************************************************************//**
-Stop archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_noarchivelog(void)
-/*==========================*/
-{
- ut_ad(!srv_read_only_mode);
-loop:
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_STOPPED
- || log_sys->archiving_state == LOG_ARCH_OFF) {
-
- log_sys->archiving_state = LOG_ARCH_OFF;
-
- os_event_set(log_sys->archiving_on);
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- log_archive_stop();
-
- os_thread_sleep(500000);
-
- goto loop;
-}
-
-/****************************************************************//**
-Start archiving the log so that a gap may occur in the archived log files.
-@return DB_SUCCESS or DB_ERROR */
-UNIV_INTERN
-ulint
-log_archive_archivelog(void)
-/*========================*/
-{
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&(log_sys->mutex));
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
-
- log_sys->archiving_state = LOG_ARCH_ON;
-
- log_sys->archived_lsn
- = ut_uint64_align_down(log_sys->lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- mutex_exit(&(log_sys->mutex));
-
- return(DB_SUCCESS);
- }
-
- mutex_exit(&(log_sys->mutex));
-
- return(DB_ERROR);
-}
-
-/****************************************************************//**
-Tries to establish a big enough margin of free space in the log groups, such
-that a new log entry can be catenated without an immediate need for
-archiving. */
-static
-void
-log_archive_margin(void)
-/*====================*/
-{
- log_t* log = log_sys;
- ulint age;
- ibool sync;
- ulint dummy;
-loop:
- mutex_enter(&(log->mutex));
-
- if (log->archiving_state == LOG_ARCH_OFF) {
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- age = log->lsn - log->archived_lsn;
-
- if (age > log->max_archived_lsn_age) {
-
- /* An archiving is urgent: we have to do synchronous i/o */
-
- sync = TRUE;
-
- } else if (age > log->max_archived_lsn_age_async) {
-
- /* An archiving is not urgent: we do asynchronous i/o */
-
- sync = FALSE;
- } else {
- /* No archiving required yet */
-
- mutex_exit(&(log->mutex));
-
- return;
- }
-
- mutex_exit(&(log->mutex));
-
- log_archive_do(sync, &dummy);
-
- if (sync == TRUE) {
- /* Check again that enough was written to the archive */
-
- goto loop;
- }
-}
-#endif /* UNIV_LOG_ARCHIVE */
-
-/********************************************************************//**
-Checks that there is enough free space in the log to start a new query step.
-Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
-function may only be called if the calling thread owns no synchronization
-objects! */
-UNIV_INTERN
-void
-log_check_margins(void)
-/*===================*/
-{
-loop:
- log_flush_margin();
-
- log_checkpoint_margin();
-
- mutex_enter(&(log_sys->mutex));
- if (log_check_tracking_margin(0)) {
-
- mutex_exit(&(log_sys->mutex));
- os_thread_sleep(10000);
- goto loop;
- }
- mutex_exit(&(log_sys->mutex));
-
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_margin();
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_enter(&(log_sys->mutex));
- ut_ad(!recv_no_log_write);
-
- if (log_sys->check_flush_or_checkpoint) {
-
- mutex_exit(&(log_sys->mutex));
-
- goto loop;
- }
-
- mutex_exit(&(log_sys->mutex));
-}
-
-/****************************************************************//**
-Makes a checkpoint at the latest lsn and writes it to first page of each
-data file in the database, so that we know that the file spaces contain
-all modifications up to that lsn. This can only be called at database
-shutdown. This function also writes all log in log files to the log archive. */
-UNIV_INTERN
-void
-logs_empty_and_mark_files_at_shutdown(void)
-/*=======================================*/
-{
- lsn_t lsn;
- lsn_t tracked_lsn;
- ulint count = 0;
- ulint pending_io;
- ibool server_busy;
-
- ib_logf(IB_LOG_LEVEL_INFO, "Starting shutdown...");
-
- /* Enable checkpoints if someone had turned them off */
- if (log_disable_checkpoint_active)
- log_enable_checkpoint();
-
- /* Wait until the master thread and all other operations are idle: our
- algorithm only works if the server is idle at shutdown */
-
- srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
-loop:
- if (!srv_read_only_mode) {
- os_event_set(srv_error_event);
- os_event_set(srv_monitor_event);
- os_event_set(srv_buf_dump_event);
- os_event_set(lock_sys->timeout_event);
- os_event_set(dict_stats_event);
- }
- os_thread_sleep(100000);
-
- count++;
-
- /* Check that there are no longer transactions, except for
- PREPARED ones. We need this wait even for the 'very fast'
- shutdown, because the InnoDB layer may have committed or
- prepared transactions and we don't want to lose them. */
-
- if (ulint total_trx = srv_was_started && !srv_read_only_mode
- && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
- ? trx_sys_any_active_transactions() : 0) {
- if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for %lu active transactions to finish",
- (ulong) total_trx);
-
- count = 0;
- }
-
- goto loop;
- }
-
- /* We need these threads to stop early in shutdown. */
- const char* thread_name;
-
- if (srv_error_monitor_active) {
- thread_name = "srv_error_monitor_thread";
- } else if (srv_monitor_active) {
- thread_name = "srv_monitor_thread";
- } else if (srv_dict_stats_thread_active) {
- thread_name = "dict_stats_thread";
- } else if (lock_sys->timeout_thread_active) {
- thread_name = "lock_wait_timeout_thread";
- } else if (srv_buf_dump_thread_active) {
- thread_name = "buf_dump_thread";
- } else if (btr_defragment_thread_active) {
- thread_name = "btr_defragment_thread";
- } else if (srv_fast_shutdown != 2 && trx_rollback_or_clean_is_active) {
- thread_name = "rollback of recovered transactions";
- } else {
- thread_name = NULL;
- }
-
- if (thread_name) {
- ut_ad(!srv_read_only_mode);
-wait_suspend_loop:
- if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for %s to exit", thread_name);
- count = 0;
- }
- goto loop;
- }
-
- /* Check that the background threads are suspended */
-
- switch (srv_get_active_thread_type()) {
- case SRV_NONE:
- if (!srv_n_fil_crypt_threads_started) {
- srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE;
- break;
- }
- os_event_set(fil_crypt_threads_event);
- thread_name = "fil_crypt_thread";
- goto wait_suspend_loop;
- case SRV_PURGE:
- srv_purge_wakeup();
- thread_name = "purge thread";
- goto wait_suspend_loop;
- case SRV_MASTER:
- thread_name = "master thread";
- goto wait_suspend_loop;
- case SRV_WORKER:
- thread_name = "worker threads";
- goto wait_suspend_loop;
- }
-
- /* At this point only page_cleaner should be active. We wait
- here to let it complete the flushing of the buffer pools
- before proceeding further. */
-
- count = 0;
- while (buf_page_cleaner_is_active || buf_lru_manager_is_active) {
- if (srv_print_verbose_log && count == 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for page_cleaner to "
- "finish flushing of buffer pool");
- }
- ++count;
- os_thread_sleep(100000);
- if (count > 600) {
- count = 0;
- }
- }
-
- if (log_scrub_thread_active) {
- ut_ad(!srv_read_only_mode);
- os_event_set(log_scrub_event);
- }
-
- mutex_enter(&log_sys->mutex);
- server_busy = log_scrub_thread_active
- || log_sys->n_pending_checkpoint_writes
-#ifdef UNIV_LOG_ARCHIVE
- || log_sys->n_pending_archive_ios
-#endif /* UNIV_LOG_ARCHIVE */
- || log_sys->n_pending_writes;
- mutex_exit(&log_sys->mutex);
-
- if (server_busy) {
- if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Pending checkpoint_writes: %lu. "
- "Pending log flush writes: %lu",
- (ulong) log_sys->n_pending_checkpoint_writes,
- (ulong) log_sys->n_pending_writes);
- count = 0;
- }
- goto loop;
- }
-
- ut_ad(!log_scrub_thread_active);
-
- pending_io = buf_pool_check_no_pending_io();
-
- if (pending_io) {
- if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for %lu buffer page I/Os to complete",
- (ulong) pending_io);
- count = 0;
- }
-
- goto loop;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_all();
-#endif /* UNIV_LOG_ARCHIVE */
- if (srv_fast_shutdown == 2) {
- if (!srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "MySQL has requested a very fast shutdown "
- "without flushing the InnoDB buffer pool to "
- "data files. At the next mysqld startup "
- "InnoDB will do a crash recovery!");
-
- /* In this fastest shutdown we do not flush the
- buffer pool:
-
- it is essentially a 'crash' of the InnoDB server.
- Make sure that the log is all flushed to disk, so
- that we can recover all committed transactions in
- a crash recovery. We must not write the lsn stamps
- to the data files, since at a startup InnoDB deduces
- from the stamps if the previous shutdown was clean. */
-
- log_buffer_flush_to_disk();
- }
-
- srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
-
- /* Wake the log tracking thread which will then immediatelly
- quit because of srv_shutdown_state value */
- if (srv_redo_log_thread_started) {
- os_event_reset(srv_redo_log_tracked_event);
- os_event_set(srv_checkpoint_completed_event);
- }
-
- fil_close_all_files();
- return;
- }
-
- if (!srv_read_only_mode) {
- log_make_checkpoint_at(LSN_MAX, TRUE);
-
- mutex_enter(&log_sys->mutex);
-
- tracked_lsn = log_get_tracked_lsn();
-
- lsn = log_sys->lsn;
-
- if (lsn != log_sys->last_checkpoint_lsn
- || (srv_track_changed_pages
- && (tracked_lsn != log_sys->last_checkpoint_lsn))
-#ifdef UNIV_LOG_ARCHIVE
- || (srv_log_archive_on
- && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
-
- mutex_exit(&log_sys->mutex);
-
- goto loop;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- log_archive_close_groups(TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_exit(&log_sys->mutex);
-
- fil_flush_file_spaces(FIL_TABLESPACE);
- fil_flush_file_spaces(FIL_LOG);
-
- /* The call fil_write_flushed_lsn_to_data_files() will
- bypass the buffer pool: therefore it is essential that
- the buffer pool has been completely flushed to disk! */
-
- if (!buf_all_freed()) {
- if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for dirty buffer pages"
- " to be flushed");
- count = 0;
- }
-
- goto loop;
- }
- } else {
- lsn = srv_start_lsn;
- }
-
- srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
-
- /* Signal the log following thread to quit */
- if (srv_redo_log_thread_started) {
- os_event_reset(srv_redo_log_tracked_event);
- os_event_set(srv_checkpoint_completed_event);
- }
-
- /* Make some checks that the server really is quiet */
- srv_thread_type type = srv_get_active_thread_type();
- ut_a(type == SRV_NONE);
-
- bool freed = buf_all_freed();
- ut_a(freed);
-
- ut_a(lsn == log_sys->lsn);
- ut_ad(srv_force_recovery >= SRV_FORCE_NO_LOG_REDO
- || lsn == log_sys->last_checkpoint_lsn);
-
- if (lsn < srv_start_lsn) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Log sequence number at shutdown " LSN_PF " "
- "is lower than at startup " LSN_PF "!",
- lsn, srv_start_lsn);
- }
-
- srv_shutdown_lsn = lsn;
-
- if (!srv_read_only_mode) {
- dberr_t err = fil_write_flushed_lsn(lsn);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to write flush lsn to the "
- "system tablespace at shutdown err=%s",
- ut_strerr(err));
- }
- }
-
- fil_close_all_files();
-
- /* Make some checks that the server really is quiet */
- type = srv_get_active_thread_type();
- ut_a(type == SRV_NONE);
-
- freed = buf_all_freed();
- ut_a(freed);
-
- ut_a(lsn == log_sys->lsn);
-}
-
-#ifdef UNIV_LOG_DEBUG
-/******************************************************//**
-Checks by parsing that the catenated log segment for a single mtr is
-consistent. */
-UNIV_INTERN
-ibool
-log_check_log_recs(
-/*===============*/
- const byte* buf, /*!< in: pointer to the start of
- the log segment in the
- log_sys->buf log buffer */
- ulint len, /*!< in: segment length in bytes */
- ib_uint64_t buf_start_lsn) /*!< in: buffer start lsn */
-{
- ib_uint64_t contiguous_lsn;
- ib_uint64_t scanned_lsn;
- const byte* start;
- const byte* end;
- byte* buf1;
- byte* scan_buf;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- if (len == 0) {
-
- return(TRUE);
- }
-
- start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
- end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
-
- buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
- scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_memcpy(scan_buf, start, end - start);
-
- recv_scan_log_recs((buf_pool_get_n_pages()
- - (recv_n_pool_free_frames * srv_buf_pool_instances))
- * UNIV_PAGE_SIZE, FALSE, scan_buf, end - start,
- ut_uint64_align_down(buf_start_lsn,
- OS_FILE_LOG_BLOCK_SIZE),
- &contiguous_lsn, &scanned_lsn);
-
- ut_a(scanned_lsn == buf_start_lsn + len);
- ut_a(recv_sys->recovered_lsn == scanned_lsn);
-
- mem_free(buf1);
-
- return(TRUE);
-}
-#endif /* UNIV_LOG_DEBUG */
-
-/******************************************************//**
-Peeks the current lsn.
-@return TRUE if success, FALSE if could not get the log system mutex */
-UNIV_INTERN
-ibool
-log_peek_lsn(
-/*=========*/
- lsn_t* lsn) /*!< out: if returns TRUE, current lsn is here */
-{
- if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
- *lsn = log_sys->lsn;
-
- mutex_exit(&(log_sys->mutex));
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/******************************************************//**
-Prints info of the log. */
-UNIV_INTERN
-void
-log_print(
-/*======*/
- FILE* file) /*!< in: file where to print */
-{
- double time_elapsed;
- time_t current_time;
-
- // mutex_enter(&(log_sys->mutex));
-
- fprintf(file,
- "Log sequence number " LSN_PF "\n"
- "Log flushed up to " LSN_PF "\n"
- "Pages flushed up to " LSN_PF "\n"
- "Last checkpoint at " LSN_PF "\n",
- log_sys->lsn,
- log_sys->flushed_to_disk_lsn,
- log_buf_pool_get_oldest_modification_peek(),
- log_sys->last_checkpoint_lsn);
-
- fprintf(file,
- "Max checkpoint age " LSN_PF "\n"
- "Checkpoint age target " LSN_PF "\n"
- "Modified age " LSN_PF "\n"
- "Checkpoint age " LSN_PF "\n",
- log_sys->max_checkpoint_age,
- log_sys->max_checkpoint_age_async,
- log_sys->lsn -log_buf_pool_get_oldest_modification_peek(),
- log_sys->lsn - log_sys->last_checkpoint_lsn);
-
- current_time = time(NULL);
-
- time_elapsed = difftime(current_time,
- log_sys->last_printout_time);
-
- if (time_elapsed <= 0) {
- time_elapsed = 1;
- }
-
- fprintf(file,
- "%lu pending log writes, %lu pending chkp writes\n"
- "%lu log i/o's done, %.2f log i/o's/second\n",
- (ulong) log_sys->n_pending_writes,
- (ulong) log_sys->n_pending_checkpoint_writes,
- (ulong) log_sys->n_log_ios,
- ((double)(log_sys->n_log_ios - log_sys->n_log_ios_old)
- / time_elapsed));
-
- if (srv_track_changed_pages) {
-
- /* The maximum tracked LSN age is equal to the maximum
- checkpoint age */
- fprintf(file,
- "Log tracking enabled\n"
- "Log tracked up to " LSN_PF "\n"
- "Max tracked LSN age " LSN_PF "\n",
- log_get_tracked_lsn(),
- log_sys->max_checkpoint_age);
- }
-
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = current_time;
-
- //mutex_exit(&(log_sys->mutex));
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-log_refresh_stats(void)
-/*===================*/
-{
- log_sys->n_log_ios_old = log_sys->n_log_ios;
- log_sys->last_printout_time = time(NULL);
-}
-
-/********************************************************//**
-Closes a log group. */
-static
-void
-log_group_close(
-/*===========*/
- log_group_t* group) /* in,own: log group to close */
-{
- ulint i;
-
- for (i = 0; i < group->n_files; i++) {
- mem_free(group->file_header_bufs_ptr[i]);
-#ifdef UNIV_LOG_ARCHIVE
- mem_free(group->archive_file_header_bufs_ptr[i]);
-#endif /* UNIV_LOG_ARCHIVE */
- }
-
- mem_free(group->file_header_bufs_ptr);
- mem_free(group->file_header_bufs);
-
-#ifdef UNIV_LOG_ARCHIVE
- mem_free(group->archive_file_header_bufs_ptr);
- mem_free(group->archive_file_header_bufs);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mem_free(group->checkpoint_buf_ptr);
-
- mem_free(group);
-}
-
-/********************************************************//**
-Closes all log groups. */
-UNIV_INTERN
-void
-log_group_close_all(void)
-/*=====================*/
-{
- log_group_t* group;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
- log_group_t* prev_group = group;
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- UT_LIST_REMOVE(log_groups, log_sys->log_groups, prev_group);
-
- log_group_close(prev_group);
- }
-}
-
-/********************************************************//**
-Shutdown the log system but do not release all the memory. */
-UNIV_INTERN
-void
-log_shutdown(void)
-/*==============*/
-{
- log_group_close_all();
-
- mem_free(log_sys->buf_ptr);
- log_sys->buf_ptr = NULL;
- log_sys->buf = NULL;
- mem_free(log_sys->checkpoint_buf_ptr);
- log_sys->checkpoint_buf_ptr = NULL;
- log_sys->checkpoint_buf = NULL;
- mem_free(log_sys->archive_buf_ptr);
- log_sys->archive_buf_ptr = NULL;
- log_sys->archive_buf = NULL;
-
- os_event_free(log_sys->no_flush_event);
- os_event_free(log_sys->one_flushed_event);
-
- rw_lock_free(&log_sys->checkpoint_lock);
-
- mutex_free(&log_sys->mutex);
- mutex_free(&log_sys->log_flush_order_mutex);
-
- if (!srv_read_only_mode && srv_scrub_log) {
- os_event_free(log_scrub_event);
- log_scrub_event = NULL;
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- rw_lock_free(&log_sys->archive_lock);
- os_event_free(log_sys->archiving_on);
-#endif /* UNIV_LOG_ARCHIVE */
-
-#ifdef UNIV_LOG_DEBUG
- recv_sys_debug_free();
-#endif
-
- recv_sys_close();
-}
-
-/********************************************************//**
-Free the log system data structures. */
-UNIV_INTERN
-void
-log_mem_free(void)
-/*==============*/
-{
- if (log_sys != NULL) {
- recv_sys_mem_free();
- mem_free(log_sys);
-
- log_sys = NULL;
- }
-}
-
-/*****************************************************************//*
-If no log record has been written for a while, fill current log
-block with dummy records. */
-static
-void
-log_scrub()
-/*=========*/
-{
- ulint cur_lbn = log_block_convert_lsn_to_no(log_sys->lsn);
- if (next_lbn_to_pad == cur_lbn)
- {
- log_pad_current_log_block();
- }
- next_lbn_to_pad = log_block_convert_lsn_to_no(log_sys->lsn);
-}
-
-/* log scrubbing speed, in bytes/sec */
-UNIV_INTERN ulonglong innodb_scrub_log_speed;
-
-/*****************************************************************//**
-This is the main thread for log scrub. It waits for an event and
-when waked up fills current log block with dummy records and
-sleeps again.
-@return this function does not return, it calls os_thread_exit() */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(log_scrub_thread)(void*)
-{
- ut_ad(!srv_read_only_mode);
-
- while (srv_shutdown_state < SRV_SHUTDOWN_FLUSH_PHASE) {
- /* log scrubbing interval in µs. */
- ulonglong interval = 1000*1000*512/innodb_scrub_log_speed;
-
- os_event_wait_time(log_scrub_event, interval);
-
- log_scrub();
-
- os_event_reset(log_scrub_event);
- }
-
- log_scrub_thread_active = false;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc
deleted file mode 100644
index 27382977e5c..00000000000
--- a/storage/xtradb/log/log0online.cc
+++ /dev/null
@@ -1,1912 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011-2012 Percona Inc. All Rights Reserved.
-Copyright (C) 2016, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
-Street, Fifth Floor, Boston, MA 02110-1301, USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file log/log0online.cc
-Online database log parsing for changed page tracking
-
-*******************************************************/
-
-#include "log0online.h"
-
-#include "my_dbug.h"
-
-#include "log0recv.h"
-#include "mach0data.h"
-#include "mtr0log.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "trx0sys.h"
-#include "ut0rbt.h"
-
-enum { FOLLOW_SCAN_SIZE = 4 * (UNIV_PAGE_SIZE_MAX) };
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register log_bmp_sys->mutex with PFS */
-UNIV_INTERN mysql_pfs_key_t log_bmp_sys_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/** Log parsing and bitmap output data structure */
-struct log_bitmap_struct {
- byte* read_buf_ptr; /*!< Unaligned log read buffer */
- byte* read_buf; /*!< log read buffer */
- byte parse_buf[RECV_PARSING_BUF_SIZE];
- /*!< log parse buffer */
- byte* parse_buf_end; /*!< parse buffer position where the
- next read log data should be copied to.
- If the previous log records were fully
- parsed, it points to the start,
- otherwise points immediatelly past the
- end of the incomplete log record. */
- char bmp_file_home[FN_REFLEN];
- /*!< directory for bitmap files */
- log_online_bitmap_file_t out; /*!< The current bitmap file */
- ulint out_seq_num; /*!< the bitmap file sequence number */
- lsn_t start_lsn; /*!< the LSN of the next unparsed
- record and the start of the next LSN
- interval to be parsed. */
- lsn_t end_lsn; /*!< the end of the LSN interval to be
- parsed, equal to the next checkpoint
- LSN at the time of parse */
- lsn_t next_parse_lsn; /*!< the LSN of the next unparsed
- record in the current parse */
- ib_rbt_t* modified_pages; /*!< the current modified page set,
- organized as the RB-tree with the keys
- of (space, 4KB-block-start-page-id)
- pairs */
- ib_rbt_node_t* page_free_list; /*!< Singly-linked list of freed nodes
- of modified_pages tree for later
- reuse. Nodes are linked through
- ib_rbt_node_t.left as this field has
- both the correct type and the tree does
- not mind its overwrite during
- rbt_next() tree traversal. */
-};
-
-/* The log parsing and bitmap output struct instance */
-static struct log_bitmap_struct* log_bmp_sys;
-
-/* Mutex protecting log_bmp_sys */
-static ib_mutex_t log_bmp_sys_mutex;
-
-/** File name stem for bitmap files. */
-static const char* bmp_file_name_stem = "ib_modified_log_";
-
-/** File name template for bitmap files. The 1st format tag is a directory
-name, the 2nd tag is the stem, the 3rd tag is a file sequence number, the 4th
-tag is the start LSN for the file. */
-static const char* bmp_file_name_template = "%s%s%lu_%llu.xdb";
-
-/* On server startup with empty database srv_start_lsn == 0, in
-which case the first LSN of actual log records will be this. */
-#define MIN_TRACKED_LSN ((LOG_START_LSN) + (LOG_BLOCK_HDR_SIZE))
-
-/* Tests if num bit of bitmap is set */
-#define IS_BIT_SET(bitmap, num) \
- (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL)))
-
-/** The bitmap file block size in bytes. All writes will be multiples of this.
- */
-enum {
- MODIFIED_PAGE_BLOCK_SIZE = 4096
-};
-
-
-/** Offsets in a file bitmap block */
-enum {
- MODIFIED_PAGE_IS_LAST_BLOCK = 0,/* 1 if last block in the current
- write, 0 otherwise. */
- MODIFIED_PAGE_START_LSN = 4, /* The starting tracked LSN of this and
- other blocks in the same write */
- MODIFIED_PAGE_END_LSN = 12, /* The ending tracked LSN of this and
- other blocks in the same write */
- MODIFIED_PAGE_SPACE_ID = 20, /* The space ID of tracked pages in
- this block */
- MODIFIED_PAGE_1ST_PAGE_ID = 24, /* The page ID of the first tracked
- page in this block */
- MODIFIED_PAGE_BLOCK_UNUSED_1 = 28,/* Unused in order to align the start
- of bitmap at 8 byte boundary */
- MODIFIED_PAGE_BLOCK_BITMAP = 32,/* Start of the bitmap itself */
- MODIFIED_PAGE_BLOCK_UNUSED_2 = MODIFIED_PAGE_BLOCK_SIZE - 8,
- /* Unused in order to align the end of
- bitmap at 8 byte boundary */
- MODIFIED_PAGE_BLOCK_CHECKSUM = MODIFIED_PAGE_BLOCK_SIZE - 4
- /* The checksum of the current block */
-};
-
-/** Length of the bitmap data in a block in bytes */
-enum { MODIFIED_PAGE_BLOCK_BITMAP_LEN
- = MODIFIED_PAGE_BLOCK_UNUSED_2 - MODIFIED_PAGE_BLOCK_BITMAP };
-
-/** Length of the bitmap data in a block in page ids */
-enum { MODIFIED_PAGE_BLOCK_ID_COUNT = MODIFIED_PAGE_BLOCK_BITMAP_LEN * 8 };
-
-/****************************************************************//**
-Provide a comparisson function for the RB-tree tree (space,
-block_start_page) pairs. Actual implementation does not matter as
-long as the ordering is full.
-@return -1 if p1 < p2, 0 if p1 == p2, 1 if p1 > p2
-*/
-static
-int
-log_online_compare_bmp_keys(
-/*========================*/
- const void* p1, /*!<in: 1st key to compare */
- const void* p2) /*!<in: 2nd key to compare */
-{
- const byte *k1 = (const byte *)p1;
- const byte *k2 = (const byte *)p2;
-
- ulint k1_space = mach_read_from_4(k1 + MODIFIED_PAGE_SPACE_ID);
- ulint k2_space = mach_read_from_4(k2 + MODIFIED_PAGE_SPACE_ID);
- if (k1_space == k2_space) {
- ulint k1_start_page
- = mach_read_from_4(k1 + MODIFIED_PAGE_1ST_PAGE_ID);
- ulint k2_start_page
- = mach_read_from_4(k2 + MODIFIED_PAGE_1ST_PAGE_ID);
- return k1_start_page < k2_start_page
- ? -1 : k1_start_page > k2_start_page ? 1 : 0;
- }
- return k1_space < k2_space ? -1 : 1;
-}
-
-/****************************************************************//**
-Set a bit for tracked page in the bitmap. Expand the bitmap tree as
-necessary. */
-static
-void
-log_online_set_page_bit(
-/*====================*/
- ulint space, /*!<in: log record space id */
- ulint page_no)/*!<in: log record page id */
-{
- ut_ad(mutex_own(&log_bmp_sys_mutex));
-
- ut_a(space != ULINT_UNDEFINED);
- ut_a(page_no != ULINT_UNDEFINED);
-
- ulint block_start_page = page_no / MODIFIED_PAGE_BLOCK_ID_COUNT
- * MODIFIED_PAGE_BLOCK_ID_COUNT;
- ulint block_pos = block_start_page ? (page_no % block_start_page / 8)
- : (page_no / 8);
- uint bit_pos = page_no % 8;
-
- byte search_page[MODIFIED_PAGE_BLOCK_SIZE];
- mach_write_to_4(search_page + MODIFIED_PAGE_SPACE_ID, space);
- mach_write_to_4(search_page + MODIFIED_PAGE_1ST_PAGE_ID,
- block_start_page);
-
- byte *page_ptr;
- ib_rbt_bound_t tree_search_pos;
- if (!rbt_search(log_bmp_sys->modified_pages, &tree_search_pos,
- search_page)) {
- page_ptr = rbt_value(byte, tree_search_pos.last);
- }
- else {
- ib_rbt_node_t *new_node;
-
- if (log_bmp_sys->page_free_list) {
- new_node = log_bmp_sys->page_free_list;
- log_bmp_sys->page_free_list = new_node->left;
- }
- else {
- new_node = static_cast<ib_rbt_node_t *>
- (ut_malloc
- (SIZEOF_NODE(log_bmp_sys->modified_pages)));
- }
- memset(new_node, 0, SIZEOF_NODE(log_bmp_sys->modified_pages));
-
- page_ptr = rbt_value(byte, new_node);
- mach_write_to_4(page_ptr + MODIFIED_PAGE_SPACE_ID, space);
- mach_write_to_4(page_ptr + MODIFIED_PAGE_1ST_PAGE_ID,
- block_start_page);
-
- rbt_add_preallocated_node(log_bmp_sys->modified_pages,
- &tree_search_pos, new_node);
- }
- page_ptr[MODIFIED_PAGE_BLOCK_BITMAP + block_pos] |= (1U << bit_pos);
-}
-
-/****************************************************************//**
-Calculate a bitmap block checksum. Algorithm borrowed from
-log_block_calc_checksum.
-@return checksum */
-UNIV_INLINE
-ulint
-log_online_calc_checksum(
-/*=====================*/
- const byte* block) /*!<in: bitmap block */
-{
- ulint sum;
- ulint sh;
- ulint i;
-
- sum = 1;
- sh = 0;
-
- for (i = 0; i < MODIFIED_PAGE_BLOCK_CHECKSUM; i++) {
-
- ulint b = block[i];
- sum &= 0x7FFFFFFFUL;
- sum += b;
- sum += b << sh;
- sh++;
- if (sh > 24) {
- sh = 0;
- }
- }
-
- return sum;
-}
-
-/****************************************************************//**
-Read one bitmap data page and check it for corruption.
-
-@return TRUE if page read OK, FALSE if I/O error */
-static
-ibool
-log_online_read_bitmap_page(
-/*========================*/
- log_online_bitmap_file_t *bitmap_file, /*!<in/out: bitmap
- file */
- byte *page, /*!<out: read page.
- Must be at least
- MODIFIED_PAGE_BLOCK_SIZE
- bytes long */
- ibool *checksum_ok) /*!<out: TRUE if page
- checksum OK */
-{
- ulint checksum;
- ulint actual_checksum;
- ibool success;
-
- ut_a(bitmap_file->size >= MODIFIED_PAGE_BLOCK_SIZE);
- ut_a(bitmap_file->offset
- <= bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE);
- ut_a(bitmap_file->offset % MODIFIED_PAGE_BLOCK_SIZE == 0);
-
- success = os_file_read(bitmap_file->file, page, bitmap_file->offset,
- MODIFIED_PAGE_BLOCK_SIZE);
-
- if (UNIV_UNLIKELY(!success)) {
-
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- ib_logf(IB_LOG_LEVEL_WARN,
- "failed reading changed page bitmap file \'%s\'",
- bitmap_file->name);
- return FALSE;
- }
-
- bitmap_file->offset += MODIFIED_PAGE_BLOCK_SIZE;
- ut_ad(bitmap_file->offset <= bitmap_file->size);
-
- checksum = mach_read_from_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM);
- actual_checksum = log_online_calc_checksum(page);
- *checksum_ok = (checksum == actual_checksum);
-
- return TRUE;
-}
-
-/****************************************************************//**
-Get the last tracked fully LSN from the bitmap file by reading
-backwards untile a correct end page is found. Detects incomplete
-writes and corrupted data. Sets the start output position for the
-written bitmap data.
-
-Multiple bitmap files are handled using the following assumptions:
-1) Only the last file might be corrupted. In case where no good data was found
-in the last file, assume that the next to last file is OK. This assumption
-does not limit crash recovery capability in any way.
-2) If the whole of the last file was corrupted, assume that the start LSN in
-its name is correct and use it for (re-)tracking start.
-
-@return the last fully tracked LSN */
-static
-lsn_t
-log_online_read_last_tracked_lsn(void)
-/*==================================*/
-{
- byte page[MODIFIED_PAGE_BLOCK_SIZE];
- ibool is_last_page = FALSE;
- ibool checksum_ok = FALSE;
- lsn_t result;
- os_offset_t read_offset = log_bmp_sys->out.offset;
-
- while ((!checksum_ok || !is_last_page) && read_offset > 0)
- {
- read_offset -= MODIFIED_PAGE_BLOCK_SIZE;
- log_bmp_sys->out.offset = read_offset;
-
- if (!log_online_read_bitmap_page(&log_bmp_sys->out, page,
- &checksum_ok)) {
- checksum_ok = FALSE;
- result = 0;
- break;
- }
-
- if (checksum_ok) {
- is_last_page
- = mach_read_from_4
- (page + MODIFIED_PAGE_IS_LAST_BLOCK);
- } else {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "corruption detected in \'%s\' at offset "
- UINT64PF,
- log_bmp_sys->out.name, read_offset);
- }
- };
-
- result = (checksum_ok && is_last_page)
- ? mach_read_from_8(page + MODIFIED_PAGE_END_LSN) : 0;
-
- /* Truncate the output file to discard the corrupted bitmap data, if
- any */
- if (!os_file_set_eof_at(log_bmp_sys->out.file,
- log_bmp_sys->out.offset)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "failed truncating changed page bitmap file \'%s\' to "
- UINT64PF " bytes",
- log_bmp_sys->out.name, log_bmp_sys->out.offset);
- result = 0;
- }
- return result;
-}
-
-/****************************************************************//**
-Safely write the log_sys->tracked_lsn value. Uses atomic operations
-if available, otherwise this field is protected with the log system
-mutex. The reader counterpart function is log_get_tracked_lsn() in
-log0log.c. */
-UNIV_INLINE
-void
-log_set_tracked_lsn(
-/*================*/
- lsn_t tracked_lsn) /*!<in: new value */
-{
- log_sys->tracked_lsn = tracked_lsn;
- os_wmb;
-}
-
-/*********************************************************************//**
-Check if missing, if any, LSN interval can be read and tracked using the
-current LSN value, the LSN value where the tracking stopped, and the log group
-capacity.
-
-@return TRUE if the missing interval can be tracked or if there's no missing
-data. */
-static
-ibool
-log_online_can_track_missing(
-/*=========================*/
- lsn_t last_tracked_lsn, /*!<in: last tracked LSN */
- lsn_t tracking_start_lsn) /*!<in: current LSN */
-{
- /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty
- bitmap file, handle this too. */
- last_tracked_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN);
-
- if (last_tracked_lsn > tracking_start_lsn) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "last tracked LSN " LSN_PF " is ahead of tracking "
- "start LSN " LSN_PF ". This can be caused by "
- "mismatched bitmap files.",
- last_tracked_lsn, tracking_start_lsn);
- }
-
- return (last_tracked_lsn == tracking_start_lsn)
- || (log_sys->lsn - last_tracked_lsn
- <= log_sys->log_group_capacity);
-}
-
-
-/****************************************************************//**
-Diagnose a gap in tracked LSN range on server startup due to crash or
-very fast shutdown and try to close it by tracking the data
-immediatelly, if possible. */
-static
-void
-log_online_track_missing_on_startup(
-/*================================*/
- lsn_t last_tracked_lsn, /*!<in: last tracked LSN read from the
- bitmap file */
- lsn_t tracking_start_lsn) /*!<in: last checkpoint LSN of the
- current server startup */
-{
- ut_ad(last_tracked_lsn != tracking_start_lsn);
- ut_ad(srv_track_changed_pages);
-
- ib_logf(IB_LOG_LEVEL_WARN, "last tracked LSN in \'%s\' is " LSN_PF
- ", but the last checkpoint LSN is " LSN_PF ". This might be "
- "due to a server crash or a very fast shutdown.",
- log_bmp_sys->out.name, last_tracked_lsn, tracking_start_lsn);
-
- /* See if we can fully recover the missing interval */
- if (log_online_can_track_missing(last_tracked_lsn,
- tracking_start_lsn)) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "reading the log to advance the last tracked LSN.");
-
- log_bmp_sys->start_lsn = ut_max(last_tracked_lsn,
- MIN_TRACKED_LSN);
- log_set_tracked_lsn(log_bmp_sys->start_lsn);
- ut_a(log_online_follow_redo_log());
- ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "continuing tracking changed pages from LSN " LSN_PF,
- log_bmp_sys->end_lsn);
- }
- else {
- ib_logf(IB_LOG_LEVEL_WARN,
- "the age of last tracked LSN exceeds log capacity, "
- "tracking-based incremental backups will work only "
- "from the higher LSN!");
-
- log_bmp_sys->end_lsn = log_bmp_sys->start_lsn
- = tracking_start_lsn;
- log_set_tracked_lsn(log_bmp_sys->start_lsn);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "starting tracking changed pages from LSN " LSN_PF,
- log_bmp_sys->end_lsn);
- }
-}
-
-/*********************************************************************//**
-Format a bitmap output file name to log_bmp_sys->out.name. */
-static
-void
-log_online_make_bitmap_name(
-/*=========================*/
- lsn_t start_lsn) /*!< in: the start LSN name part */
-{
- ut_snprintf(log_bmp_sys->out.name, FN_REFLEN, bmp_file_name_template,
- log_bmp_sys->bmp_file_home, bmp_file_name_stem,
- log_bmp_sys->out_seq_num, start_lsn);
-}
-
-/*********************************************************************//**
-Check if an old file that has the name of a new bitmap file we are about to
-create should be overwritten. */
-static
-ibool
-log_online_should_overwrite(
-/*========================*/
- const char *path) /*!< in: path to file */
-{
- dberr_t err;
- os_file_stat_t file_info;
-
- /* Currently, it's OK to overwrite 0-sized files only */
- err = os_file_get_status(path, &file_info, false);
- return err == DB_SUCCESS && file_info.type == OS_FILE_TYPE_FILE
- && file_info.size == 0LL;
-}
-
-/*********************************************************************//**
-Create a new empty bitmap output file.
-
-@return TRUE if operation succeeded, FALSE if I/O error */
-static
-ibool
-log_online_start_bitmap_file(void)
-/*==============================*/
-{
- ibool success = TRUE;
-
- /* Check for an old file that should be deleted first */
- if (log_online_should_overwrite(log_bmp_sys->out.name)) {
-
- success = static_cast<ibool>(
- os_file_delete_if_exists(innodb_file_bmp_key,
- log_bmp_sys->out.name));
- }
-
- if (UNIV_LIKELY(success)) {
- log_bmp_sys->out.file
- = os_file_create_simple_no_error_handling(
- innodb_file_bmp_key,
- log_bmp_sys->out.name,
- OS_FILE_CREATE,
- OS_FILE_READ_WRITE_CACHED,
- &success, FALSE);
- }
- if (UNIV_UNLIKELY(!success)) {
-
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- ib_logf(IB_LOG_LEVEL_ERROR,
- "cannot create \'%s\'", log_bmp_sys->out.name);
- return FALSE;
- }
-
- log_bmp_sys->out.offset = 0;
- return TRUE;
-}
-
-/*********************************************************************//**
-Close the current bitmap output file and create the next one.
-
-@return TRUE if operation succeeded, FALSE if I/O error */
-static
-ibool
-log_online_rotate_bitmap_file(
-/*===========================*/
- lsn_t next_file_start_lsn) /*!<in: the start LSN name
- part */
-{
- if (!os_file_is_invalid(log_bmp_sys->out.file)) {
- os_file_close(log_bmp_sys->out.file);
- os_file_mark_invalid(&log_bmp_sys->out.file);
- }
- log_bmp_sys->out_seq_num++;
- log_online_make_bitmap_name(next_file_start_lsn);
- return log_online_start_bitmap_file();
-}
-
-/*********************************************************************//**
-Check the name of a given file if it's a changed page bitmap file and
-return file sequence and start LSN name components if it is. If is not,
-the values of output parameters are undefined.
-
-@return TRUE if a given file is a changed page bitmap file. */
-static
-ibool
-log_online_is_bitmap_file(
-/*======================*/
- const os_file_stat_t* file_info, /*!<in: file to
- check */
- ulong* bitmap_file_seq_num, /*!<out: bitmap file
- sequence number */
- lsn_t* bitmap_file_start_lsn) /*!<out: bitmap file
- start LSN */
-{
- char stem[FN_REFLEN];
-
- ut_ad (strlen(file_info->name) < OS_FILE_MAX_PATH);
-
- return ((file_info->type == OS_FILE_TYPE_FILE
- || file_info->type == OS_FILE_TYPE_LINK)
- && (sscanf(file_info->name, "%[a-z_]%lu_%llu.xdb", stem,
- bitmap_file_seq_num,
- (unsigned long long *)bitmap_file_start_lsn) == 3)
- && (!strcmp(stem, bmp_file_name_stem)));
-}
-
-/** Initialize the constant part of the log tracking subsystem */
-UNIV_INTERN
-void
-log_online_init(void)
-{
- mutex_create(log_bmp_sys_mutex_key, &log_bmp_sys_mutex,
- SYNC_LOG_ONLINE);
-}
-
-/** Initialize the dynamic part of the log tracking subsystem */
-UNIV_INTERN
-void
-log_online_read_init(void)
-{
- ibool success;
- lsn_t tracking_start_lsn
- = ut_max(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN);
- os_file_dir_t bitmap_dir;
- os_file_stat_t bitmap_dir_file_info;
- lsn_t last_file_start_lsn = MIN_TRACKED_LSN;
- size_t srv_data_home_len;
-
- /* Bitmap data start and end in a bitmap block must be 8-byte
- aligned. */
- compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP % 8 == 0);
- compile_time_assert(MODIFIED_PAGE_BLOCK_BITMAP_LEN % 8 == 0);
-
- ut_ad(srv_track_changed_pages);
-
- log_bmp_sys = static_cast<log_bitmap_struct *>
- (ut_malloc(sizeof(*log_bmp_sys)));
- log_bmp_sys->read_buf_ptr = static_cast<byte *>
- (ut_malloc(FOLLOW_SCAN_SIZE + OS_FILE_LOG_BLOCK_SIZE));
- log_bmp_sys->read_buf = static_cast<byte *>
- (ut_align(log_bmp_sys->read_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
-
- /* Initialize bitmap file directory from srv_data_home and add a path
- separator if needed. */
- srv_data_home_len = strlen(srv_data_home);
- ut_a (srv_data_home_len < FN_REFLEN);
- strcpy(log_bmp_sys->bmp_file_home, srv_data_home);
- if (srv_data_home_len
- && log_bmp_sys->bmp_file_home[srv_data_home_len - 1]
- != SRV_PATH_SEPARATOR) {
-
- ut_a (srv_data_home_len < FN_REFLEN - 1);
- log_bmp_sys->bmp_file_home[srv_data_home_len]
- = SRV_PATH_SEPARATOR;
- log_bmp_sys->bmp_file_home[srv_data_home_len + 1] = '\0';
- }
-
- /* Enumerate existing bitmap files to either open the last one to get
- the last tracked LSN either to find that there are none and start
- tracking from scratch. */
- log_bmp_sys->out.name[0] = '\0';
- log_bmp_sys->out_seq_num = 0;
-
- bitmap_dir = os_file_opendir(log_bmp_sys->bmp_file_home, TRUE);
- ut_a(bitmap_dir);
- while (!os_file_readdir_next_file(log_bmp_sys->bmp_file_home,
- bitmap_dir, &bitmap_dir_file_info)) {
-
- ulong file_seq_num;
- lsn_t file_start_lsn;
-
- if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
- &file_seq_num,
- &file_start_lsn)) {
- continue;
- }
-
- if (file_seq_num > log_bmp_sys->out_seq_num
- && bitmap_dir_file_info.size > 0) {
- log_bmp_sys->out_seq_num = file_seq_num;
- last_file_start_lsn = file_start_lsn;
- /* No dir component (log_bmp_sys->bmp_file_home) here,
- because that's the cwd */
- strncpy(log_bmp_sys->out.name,
- bitmap_dir_file_info.name, FN_REFLEN - 1);
- log_bmp_sys->out.name[FN_REFLEN - 1] = '\0';
- }
- }
-
- if (os_file_closedir(bitmap_dir)) {
- os_file_get_last_error(TRUE);
- ib_logf(IB_LOG_LEVEL_FATAL, "cannot close \'%s\'",
- log_bmp_sys->bmp_file_home);
- }
-
- if (!log_bmp_sys->out_seq_num) {
- log_bmp_sys->out_seq_num = 1;
- log_online_make_bitmap_name(0);
- }
-
- log_bmp_sys->modified_pages = rbt_create(MODIFIED_PAGE_BLOCK_SIZE,
- log_online_compare_bmp_keys);
- log_bmp_sys->page_free_list = NULL;
-
- log_bmp_sys->out.file
- = os_file_create_simple_no_error_handling
- (innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN,
- OS_FILE_READ_WRITE_CACHED, &success, FALSE);
-
- if (!success) {
-
- /* New file, tracking from scratch */
- ut_a(log_online_start_bitmap_file());
- }
- else {
-
- /* Read the last tracked LSN from the last file */
- lsn_t last_tracked_lsn;
- lsn_t file_start_lsn;
-
- log_bmp_sys->out.size
- = os_file_get_size(log_bmp_sys->out.file);
- log_bmp_sys->out.offset = log_bmp_sys->out.size;
-
- if (log_bmp_sys->out.offset % MODIFIED_PAGE_BLOCK_SIZE != 0) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "truncated block detected in \'%s\' at offset "
- UINT64PF,
- log_bmp_sys->out.name,
- log_bmp_sys->out.offset);
- log_bmp_sys->out.offset -=
- log_bmp_sys->out.offset
- % MODIFIED_PAGE_BLOCK_SIZE;
- }
-
- last_tracked_lsn = log_online_read_last_tracked_lsn();
- /* Do not rotate if we truncated the file to zero length - we
- can just start writing there */
- const bool need_rotate = (last_tracked_lsn != 0);
- if (!last_tracked_lsn) {
-
- last_tracked_lsn = last_file_start_lsn;
- }
-
- /* Start a new file. Choose the LSN value in its name based on
- if we can retrack any missing data. */
- if (log_online_can_track_missing(last_tracked_lsn,
- tracking_start_lsn)) {
- file_start_lsn = last_tracked_lsn;
- } else {
- file_start_lsn = tracking_start_lsn;
- }
- ut_a(!need_rotate
- || log_online_rotate_bitmap_file(file_start_lsn));
-
- if (last_tracked_lsn < tracking_start_lsn) {
-
- log_online_track_missing_on_startup
- (last_tracked_lsn, tracking_start_lsn);
- return;
- }
-
- if (last_tracked_lsn > tracking_start_lsn) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "last tracked LSN is " LSN_PF ", but the last "
- "checkpoint LSN is " LSN_PF ". The "
- "tracking-based incremental backups will work "
- "only from the latter LSN!",
- last_tracked_lsn, tracking_start_lsn);
- }
-
- }
-
- ib_logf(IB_LOG_LEVEL_INFO, "starting tracking changed pages from LSN "
- LSN_PF, tracking_start_lsn);
- log_bmp_sys->start_lsn = tracking_start_lsn;
- log_set_tracked_lsn(tracking_start_lsn);
-}
-
-/** Shut down the dynamic part of the log tracking subsystem */
-UNIV_INTERN
-void
-log_online_read_shutdown(void)
-{
- mutex_enter(&log_bmp_sys_mutex);
-
- srv_track_changed_pages = FALSE;
-
- ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list;
-
- if (!os_file_is_invalid(log_bmp_sys->out.file)) {
- os_file_close(log_bmp_sys->out.file);
- os_file_mark_invalid(&log_bmp_sys->out.file);
- }
-
- rbt_free(log_bmp_sys->modified_pages);
-
- while (free_list_node) {
- ib_rbt_node_t *next = free_list_node->left;
- ut_free(free_list_node);
- free_list_node = next;
- }
-
- ut_free(log_bmp_sys->read_buf_ptr);
- ut_free(log_bmp_sys);
- log_bmp_sys = NULL;
-
- srv_redo_log_thread_started = false;
-
- mutex_exit(&log_bmp_sys_mutex);
-}
-
-/** Shut down the constant part of the log tracking subsystem */
-UNIV_INTERN
-void
-log_online_shutdown(void)
-{
- mutex_free(&log_bmp_sys_mutex);
-}
-
-/*********************************************************************//**
-For the given minilog record type determine if the record has (space; page)
-associated with it.
-@return TRUE if the record has (space; page) in it */
-static
-ibool
-log_online_rec_has_page(
-/*====================*/
- byte type) /*!<in: the minilog record type */
-{
- return type != MLOG_MULTI_REC_END && type != MLOG_DUMMY_RECORD;
-}
-
-/*********************************************************************//**
-Check if a page field for a given log record type actually contains a page
-id. It does not for file operations and MLOG_LSN.
-@return TRUE if page field contains actual page id, FALSE otherwise */
-static
-ibool
-log_online_rec_page_means_page(
-/*===========================*/
- byte type) /*!<in: log record type */
-{
- return log_online_rec_has_page(type)
-#ifdef UNIV_LOG_LSN_DEBUG
- && type != MLOG_LSN
-#endif
- && type != MLOG_FILE_CREATE
- && type != MLOG_FILE_RENAME
- && type != MLOG_FILE_DELETE
- && type != MLOG_FILE_CREATE2;
-}
-
-/*********************************************************************//**
-Parse the log data in the parse buffer for the (space, page) pairs and add
-them to the modified page set as necessary. Removes the fully-parsed records
-from the buffer. If an incomplete record is found, moves it to the end of the
-buffer. */
-static
-void
-log_online_parse_redo_log(void)
-/*===========================*/
-{
- ut_ad(mutex_own(&log_bmp_sys_mutex));
-
- byte *ptr = log_bmp_sys->parse_buf;
- byte *end = log_bmp_sys->parse_buf_end;
- ulint len = 0;
-
- while (ptr != end
- && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) {
-
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
-
- /* recv_sys is not initialized, so on corrupt log we will
- SIGSEGV. But the log of a live database should not be
- corrupt. */
- len = recv_parse_log_rec(ptr, end, &type, &space, &page_no,
- &body);
- if (len > 0) {
-
- if (log_online_rec_page_means_page(type)) {
-
- ut_a(len >= 3);
- log_online_set_page_bit(space, page_no);
- }
-
- ptr += len;
- ut_ad(ptr <= end);
- log_bmp_sys->next_parse_lsn
- = recv_calc_lsn_on_data_add
- (log_bmp_sys->next_parse_lsn, len);
- }
- else {
-
- /* Incomplete log record. Shift it to the
- beginning of the parse buffer and leave it to be
- completed on the next read. */
- ut_memmove(log_bmp_sys->parse_buf, ptr, end - ptr);
- log_bmp_sys->parse_buf_end
- = log_bmp_sys->parse_buf + (end - ptr);
- ptr = end;
- }
- }
-
- if (len > 0) {
-
- log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf;
- }
-}
-
-/*********************************************************************//**
-Check the log block checksum.
-@return TRUE if the log block checksum is OK, FALSE otherwise. */
-static
-ibool
-log_online_is_valid_log_seg(
-/*========================*/
- const byte* log_block) /*!< in: read log data */
-{
- ibool checksum_is_ok
- = log_block_checksum_is_ok_or_old_format(log_block, true);
-
- if (!checksum_is_ok) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "log block checksum mismatch: expected " ULINTPF ", "
- "calculated checksum " ULINTPF,
- log_block_get_checksum(log_block),
- log_block_calc_checksum(log_block));
- }
-
- return checksum_is_ok;
-}
-
-/*********************************************************************//**
-Copy new log data to the parse buffer while skipping log block header,
-trailer and already parsed data. */
-static
-void
-log_online_add_to_parse_buf(
-/*========================*/
- const byte* log_block, /*!< in: read log data */
- ulint data_len, /*!< in: length of read log data */
- ulint skip_len) /*!< in: how much of log data to
- skip */
-{
- ut_ad(mutex_own(&log_bmp_sys_mutex));
-
- ulint start_offset = skip_len ? skip_len : LOG_BLOCK_HDR_SIZE;
- ulint end_offset
- = (data_len == OS_FILE_LOG_BLOCK_SIZE)
- ? data_len - LOG_BLOCK_TRL_SIZE
- : data_len;
- ulint actual_data_len = (end_offset >= start_offset)
- ? end_offset - start_offset : 0;
-
- ut_memcpy(log_bmp_sys->parse_buf_end, log_block + start_offset,
- actual_data_len);
-
- log_bmp_sys->parse_buf_end += actual_data_len;
-
- ut_a(log_bmp_sys->parse_buf_end - log_bmp_sys->parse_buf
- <= RECV_PARSING_BUF_SIZE);
-}
-
-/*********************************************************************//**
-Parse the log block: first copies the read log data to the parse buffer while
-skipping log block header, trailer and already parsed data. Then it actually
-parses the log to add to the modified page bitmap. */
-static
-void
-log_online_parse_redo_log_block(
-/*============================*/
- const byte* log_block, /*!< in: read log data */
- ulint skip_already_parsed_len) /*!< in: how many bytes of
- log data should be skipped as
- they were parsed before */
-{
- ut_ad(mutex_own(&log_bmp_sys_mutex));
-
- ulint block_data_len = log_block_get_data_len(log_block);
-
- ut_ad(block_data_len % OS_FILE_LOG_BLOCK_SIZE == 0
- || block_data_len < OS_FILE_LOG_BLOCK_SIZE);
-
- log_online_add_to_parse_buf(log_block, block_data_len,
- skip_already_parsed_len);
- log_online_parse_redo_log();
-}
-
-/*********************************************************************//**
-Read and parse one redo log chunk and updates the modified page bitmap. */
-static
-void
-log_online_follow_log_seg(
-/*======================*/
- log_group_t* group, /*!< in: the log group to use */
- lsn_t block_start_lsn, /*!< in: the LSN to read from */
- lsn_t block_end_lsn) /*!< in: the LSN to read to */
-{
- ut_ad(mutex_own(&log_bmp_sys_mutex));
-
- /* Pointer to the current OS_FILE_LOG_BLOCK-sized chunk of the read log
- data to parse */
- byte* log_block = log_bmp_sys->read_buf;
- byte* log_block_end = log_bmp_sys->read_buf
- + (block_end_lsn - block_start_lsn);
-
- mutex_enter(&log_sys->mutex);
- log_group_read_log_seg(LOG_RECOVER, log_bmp_sys->read_buf,
- group, block_start_lsn, block_end_lsn, TRUE);
- /* log_group_read_log_seg will release the log_sys->mutex for us */
-
- while (log_block < log_block_end
- && log_bmp_sys->next_parse_lsn < log_bmp_sys->end_lsn) {
-
- /* How many bytes of log data should we skip in the current log
- block. Skipping is necessary because we round down the next
- parse LSN thus it is possible to read the already-processed log
- data many times */
- ulint skip_already_parsed_len = 0;
-
- if (!log_online_is_valid_log_seg(log_block)) {
- break;
- }
-
- if ((block_start_lsn <= log_bmp_sys->next_parse_lsn)
- && (block_start_lsn + OS_FILE_LOG_BLOCK_SIZE
- > log_bmp_sys->next_parse_lsn)) {
-
- /* The next parse LSN is inside the current block, skip
- data preceding it. */
- skip_already_parsed_len
- = (ulint)(log_bmp_sys->next_parse_lsn
- - block_start_lsn);
- }
- else {
-
- /* If the next parse LSN is not inside the current
- block, then the only option is that we have processed
- ahead already. */
- ut_a(block_start_lsn > log_bmp_sys->next_parse_lsn);
- }
-
- /* TODO: merge the copying to the parse buf code with
- skip_already_len calculations */
- log_online_parse_redo_log_block(log_block,
- skip_already_parsed_len);
-
- log_block += OS_FILE_LOG_BLOCK_SIZE;
- block_start_lsn += OS_FILE_LOG_BLOCK_SIZE;
- }
-
- return;
-}
-
-/*********************************************************************//**
-Read and parse the redo log in a given group in FOLLOW_SCAN_SIZE-sized
-chunks and updates the modified page bitmap. */
-static
-void
-log_online_follow_log_group(
-/*========================*/
- log_group_t* group, /*!< in: the log group to use */
- lsn_t contiguous_lsn) /*!< in: the LSN of log block start
- containing the log_parse_start_lsn */
-{
- ut_ad(mutex_own(&log_bmp_sys_mutex));
-
- lsn_t block_start_lsn = contiguous_lsn;
- lsn_t block_end_lsn;
-
- log_bmp_sys->next_parse_lsn = log_bmp_sys->start_lsn;
- log_bmp_sys->parse_buf_end = log_bmp_sys->parse_buf;
-
- do {
- block_end_lsn = block_start_lsn + FOLLOW_SCAN_SIZE;
-
- log_online_follow_log_seg(group, block_start_lsn,
- block_end_lsn);
-
- /* Next parse LSN can become higher than the last read LSN
- only in the case when the read LSN falls right on the block
- boundary, in which case next parse lsn is bumped to the actual
- data LSN on the next (not yet read) block. This assert is
- slightly conservative. */
- ut_a(log_bmp_sys->next_parse_lsn
- <= block_end_lsn + LOG_BLOCK_HDR_SIZE
- + LOG_BLOCK_TRL_SIZE);
-
- block_start_lsn = block_end_lsn;
- } while (block_end_lsn < log_bmp_sys->end_lsn);
-
- /* Assert that the last read log record is a full one */
- ut_a(log_bmp_sys->parse_buf_end == log_bmp_sys->parse_buf);
-}
-
-/*********************************************************************//**
-Write, flush one bitmap block to disk and advance the output position if
-successful.
-
-@return TRUE if page written OK, FALSE if I/O error */
-static
-ibool
-log_online_write_bitmap_page(
-/*=========================*/
- const byte *block) /*!< in: block to write */
-{
- ut_ad(mutex_own(&log_bmp_sys_mutex));
-
- /* Simulate a write error */
- DBUG_EXECUTE_IF("bitmap_page_write_error",
- {
- ulint space_id
- = mach_read_from_4(block
- + MODIFIED_PAGE_SPACE_ID);
- if (space_id > 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "simulating bitmap write "
- "error in "
- "log_online_write_bitmap_page "
- "for space ID %lu",
- space_id);
- return FALSE;
- }
- });
-
- /* A crash injection site that ensures last checkpoint LSN > last
- tracked LSN, so that LSN tracking for this interval is tested. */
- DBUG_EXECUTE_IF("crash_before_bitmap_write",
- {
- ulint space_id
- = mach_read_from_4(block
- + MODIFIED_PAGE_SPACE_ID);
- if (space_id > 0)
- DBUG_SUICIDE();
- });
-
-
- ibool success = os_file_write(log_bmp_sys->out.name,
- log_bmp_sys->out.file, block,
- log_bmp_sys->out.offset,
- MODIFIED_PAGE_BLOCK_SIZE);
- if (UNIV_UNLIKELY(!success)) {
-
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- ib_logf(IB_LOG_LEVEL_ERROR, "failed writing changed page "
- "bitmap file \'%s\'", log_bmp_sys->out.name);
- return FALSE;
- }
-
- success = os_file_flush(log_bmp_sys->out.file);
- if (UNIV_UNLIKELY(!success)) {
-
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- ib_logf(IB_LOG_LEVEL_ERROR, "failed flushing changed page "
- "bitmap file \'%s\'", log_bmp_sys->out.name);
- return FALSE;
- }
-
- os_file_advise(log_bmp_sys->out.file, log_bmp_sys->out.offset,
- MODIFIED_PAGE_BLOCK_SIZE, OS_FILE_ADVISE_DONTNEED);
-
- log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE;
- return TRUE;
-}
-
-/*********************************************************************//**
-Append the current changed page bitmap to the bitmap file. Clears the
-bitmap tree and recycles its nodes to the free list.
-
-@return TRUE if bitmap written OK, FALSE if I/O error*/
-static
-ibool
-log_online_write_bitmap(void)
-/*=========================*/
-{
- ut_ad(mutex_own(&log_bmp_sys_mutex));
-
- if (log_bmp_sys->out.offset >= srv_max_bitmap_file_size) {
- if (!log_online_rotate_bitmap_file(log_bmp_sys->start_lsn)) {
- return FALSE;
- }
- }
-
- ib_rbt_node_t *bmp_tree_node
- = (ib_rbt_node_t *)rbt_first(log_bmp_sys->modified_pages);
- const ib_rbt_node_t * const last_bmp_tree_node
- = rbt_last(log_bmp_sys->modified_pages);
-
- ibool success = TRUE;
-
- while (bmp_tree_node) {
-
- byte *page = rbt_value(byte, bmp_tree_node);
-
- /* In case of a bitmap page write error keep on looping over
- the tree to reclaim its memory through the free list instead of
- returning immediatelly. */
- if (UNIV_LIKELY(success)) {
- if (bmp_tree_node == last_bmp_tree_node) {
- mach_write_to_4(page
- + MODIFIED_PAGE_IS_LAST_BLOCK,
- 1);
- }
-
- mach_write_to_8(page + MODIFIED_PAGE_START_LSN,
- log_bmp_sys->start_lsn);
- mach_write_to_8(page + MODIFIED_PAGE_END_LSN,
- log_bmp_sys->end_lsn);
- mach_write_to_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM,
- log_online_calc_checksum(page));
-
- success = log_online_write_bitmap_page(page);
- }
-
- bmp_tree_node->left = log_bmp_sys->page_free_list;
- log_bmp_sys->page_free_list = bmp_tree_node;
-
- bmp_tree_node = (ib_rbt_node_t*)
- rbt_next(log_bmp_sys->modified_pages, bmp_tree_node);
-
- DBUG_EXECUTE_IF("bitmap_page_2_write_error",
- if (bmp_tree_node)
- {
- DBUG_SET("+d,bitmap_page_write_error");
- DBUG_SET("-d,bitmap_page_2_write_error");
- });
- }
-
- rbt_reset(log_bmp_sys->modified_pages);
- return success;
-}
-
-/*********************************************************************//**
-Read and parse the redo log up to last checkpoint LSN to build the changed
-page bitmap which is then written to disk.
-
-@return TRUE if log tracking succeeded, FALSE if bitmap write I/O error */
-UNIV_INTERN
-ibool
-log_online_follow_redo_log(void)
-/*============================*/
-{
- lsn_t contiguous_start_lsn;
- log_group_t* group;
- ibool result;
-
- ut_ad(!srv_read_only_mode);
-
- if (!srv_track_changed_pages)
- return TRUE;
-
- DEBUG_SYNC_C("log_online_follow_redo_log");
-
- mutex_enter(&log_bmp_sys_mutex);
-
- if (!srv_track_changed_pages) {
- mutex_exit(&log_bmp_sys_mutex);
- return TRUE;
- }
-
- /* Grab the LSN of the last checkpoint, we will parse up to it */
- mutex_enter(&(log_sys->mutex));
- log_bmp_sys->end_lsn = log_sys->last_checkpoint_lsn;
- mutex_exit(&(log_sys->mutex));
-
- if (log_bmp_sys->end_lsn == log_bmp_sys->start_lsn) {
- mutex_exit(&log_bmp_sys_mutex);
- return TRUE;
- }
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
- ut_a(group);
-
- contiguous_start_lsn = ut_uint64_align_down(log_bmp_sys->start_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
-
- while (group) {
- log_online_follow_log_group(group, contiguous_start_lsn);
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- result = log_online_write_bitmap();
- log_bmp_sys->start_lsn = log_bmp_sys->end_lsn;
- log_set_tracked_lsn(log_bmp_sys->start_lsn);
-
- mutex_exit(&log_bmp_sys_mutex);
- return result;
-}
-
-/*********************************************************************//**
-Diagnose a bitmap file range setup failure and free the partially-initialized
-bitmap file range. */
-UNIV_COLD
-static
-void
-log_online_diagnose_inconsistent_dir(
-/*=================================*/
- log_online_bitmap_file_range_t *bitmap_files) /*!<in/out: bitmap file
- range */
-{
- ib_logf(IB_LOG_LEVEL_WARN,
- "InnoDB: Warning: inconsistent bitmap file "
- "directory for a "
- "INFORMATION_SCHEMA.INNODB_CHANGED_PAGES query");
- free(bitmap_files->files);
-}
-
-/*********************************************************************//**
-List the bitmap files in srv_data_home and setup their range that contains the
-specified LSN interval. This range, if non-empty, will start with a file that
-has the greatest LSN equal to or less than the start LSN and will include all
-the files up to the one with the greatest LSN less than the end LSN. Caller
-must free bitmap_files->files when done if bitmap_files set to non-NULL and
-this function returned TRUE. Field bitmap_files->count might be set to a
-larger value than the actual count of the files, and space for the unused array
-slots will be allocated but cleared to zeroes.
-
-@return TRUE if succeeded
-*/
-static
-ibool
-log_online_setup_bitmap_file_range(
-/*===============================*/
- log_online_bitmap_file_range_t *bitmap_files, /*!<in/out: bitmap file
- range */
- lsn_t range_start, /*!<in: start LSN */
- lsn_t range_end) /*!<in: end LSN */
-{
- os_file_dir_t bitmap_dir;
- os_file_stat_t bitmap_dir_file_info;
- ulong first_file_seq_num = ULONG_MAX;
- ulong last_file_seq_num = 0;
- lsn_t first_file_start_lsn = LSN_MAX;
-
- ut_ad(range_end >= range_start);
-
- bitmap_files->count = 0;
- bitmap_files->files = NULL;
-
- /* 1st pass: size the info array */
-
- bitmap_dir = os_file_opendir(srv_data_home, FALSE);
- if (UNIV_UNLIKELY(!bitmap_dir)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "failed to open bitmap directory \'%s\'",
- srv_data_home);
- return FALSE;
- }
-
- while (!os_file_readdir_next_file(srv_data_home, bitmap_dir,
- &bitmap_dir_file_info)) {
-
- ulong file_seq_num;
- lsn_t file_start_lsn;
-
- if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
- &file_seq_num,
- &file_start_lsn)
- || file_start_lsn >= range_end) {
-
- continue;
- }
-
- if (file_seq_num > last_file_seq_num) {
-
- last_file_seq_num = file_seq_num;
- }
-
- if (file_start_lsn >= range_start
- || file_start_lsn == first_file_start_lsn
- || first_file_start_lsn > range_start) {
-
- /* A file that falls into the range */
-
- if (file_start_lsn < first_file_start_lsn) {
-
- first_file_start_lsn = file_start_lsn;
- }
- if (file_seq_num < first_file_seq_num) {
-
- first_file_seq_num = file_seq_num;
- }
- } else if (file_start_lsn > first_file_start_lsn) {
-
- /* A file that has LSN closer to the range start
- but smaller than it, replacing another such file */
- first_file_start_lsn = file_start_lsn;
- first_file_seq_num = file_seq_num;
- }
- }
-
- if (UNIV_UNLIKELY(os_file_closedir(bitmap_dir))) {
-
- os_file_get_last_error(TRUE);
- ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'",
- srv_data_home);
- return FALSE;
- }
-
- if (first_file_seq_num == ULONG_MAX && last_file_seq_num == 0) {
-
- bitmap_files->count = 0;
- return TRUE;
- }
-
- bitmap_files->count = last_file_seq_num - first_file_seq_num + 1;
-
- DEBUG_SYNC_C("setup_bitmap_range_middle");
-
- /* 2nd pass: get the file names in the file_seq_num order */
-
- bitmap_dir = os_file_opendir(srv_data_home, FALSE);
- if (UNIV_UNLIKELY(!bitmap_dir)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "failed to open bitmap directory \'%s\'",
- srv_data_home);
- return FALSE;
- }
-
- bitmap_files->files
- = static_cast<log_online_bitmap_file_range_struct::files_t *>
- (ut_malloc(bitmap_files->count
- * sizeof(bitmap_files->files[0])));
- memset(bitmap_files->files, 0,
- bitmap_files->count * sizeof(bitmap_files->files[0]));
-
- while (!os_file_readdir_next_file(srv_data_home, bitmap_dir,
- &bitmap_dir_file_info)) {
-
- ulong file_seq_num;
- lsn_t file_start_lsn;
- size_t array_pos;
-
- if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
- &file_seq_num,
- &file_start_lsn)
- || file_start_lsn >= range_end
- || file_start_lsn < first_file_start_lsn) {
-
- continue;
- }
-
- array_pos = file_seq_num - first_file_seq_num;
- if (UNIV_UNLIKELY(array_pos >= bitmap_files->count)) {
-
- log_online_diagnose_inconsistent_dir(bitmap_files);
- os_file_closedir(bitmap_dir);
- return FALSE;
- }
-
-
- if (file_seq_num > bitmap_files->files[array_pos].seq_num) {
-
- bitmap_files->files[array_pos].seq_num = file_seq_num;
- strncpy(bitmap_files->files[array_pos].name,
- bitmap_dir_file_info.name, FN_REFLEN);
- bitmap_files->files[array_pos].name[FN_REFLEN - 1]
- = '\0';
- bitmap_files->files[array_pos].start_lsn
- = file_start_lsn;
- }
- }
-
- if (UNIV_UNLIKELY(os_file_closedir(bitmap_dir))) {
-
- os_file_get_last_error(TRUE);
- ib_logf(IB_LOG_LEVEL_ERROR, "cannot close \'%s\'",
- srv_data_home);
- free(bitmap_files->files);
- return FALSE;
- }
-
- if (!bitmap_files->files[0].seq_num
- || bitmap_files->files[0].seq_num != first_file_seq_num) {
-
- log_online_diagnose_inconsistent_dir(bitmap_files);
- return FALSE;
- }
-
- {
- size_t i;
- for (i = 1; i < bitmap_files->count; i++) {
- if (!bitmap_files->files[i].seq_num) {
- break;
- }
- if ((bitmap_files->files[i].seq_num
- <= bitmap_files->files[i - 1].seq_num)
- || (bitmap_files->files[i].start_lsn
- < bitmap_files->files[i - 1].start_lsn)) {
-
- log_online_diagnose_inconsistent_dir(
- bitmap_files);
- return FALSE;
- }
- }
- }
-
- return TRUE;
-}
-
-/****************************************************************//**
-Open a bitmap file for reading.
-
-@return TRUE if opened successfully */
-static
-ibool
-log_online_open_bitmap_file_read_only(
-/*==================================*/
- const char* name, /*!<in: bitmap file
- name without directory,
- which is assumed to be
- srv_data_home */
- log_online_bitmap_file_t* bitmap_file) /*!<out: opened bitmap
- file */
-{
- ibool success = FALSE;
- size_t srv_data_home_len;
-
- ut_ad(name[0] != '\0');
-
- srv_data_home_len = strlen(srv_data_home);
- if (srv_data_home_len
- && srv_data_home[srv_data_home_len-1]
- != SRV_PATH_SEPARATOR) {
- ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%c%s",
- srv_data_home, SRV_PATH_SEPARATOR, name);
- } else {
- ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%s",
- srv_data_home, name);
- }
- bitmap_file->file
- = os_file_create_simple_no_error_handling(innodb_file_bmp_key,
- bitmap_file->name,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success, FALSE);
- if (UNIV_UNLIKELY(!success)) {
-
- /* Here and below assume that bitmap file names do not
- contain apostrophes, thus no need for ut_print_filename(). */
- ib_logf(IB_LOG_LEVEL_WARN,
- "error opening the changed page bitmap \'%s\'",
- bitmap_file->name);
- return FALSE;
- }
-
- bitmap_file->size = os_file_get_size(bitmap_file->file);
- bitmap_file->offset = 0;
-
- os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_SEQUENTIAL);
- os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_NOREUSE);
-
- return TRUE;
-}
-
-/****************************************************************//**
-Diagnose one or both of the following situations if we read close to
-the end of bitmap file:
-1) Warn if the remainder of the file is less than one page.
-2) Error if we cannot read any more full pages but the last read page
-did not have the last-in-run flag set.
-
-@return FALSE for the error */
-static
-ibool
-log_online_diagnose_bitmap_eof(
-/*===========================*/
- const log_online_bitmap_file_t* bitmap_file, /*!< in: bitmap file */
- ibool last_page_in_run)/*!< in: "last page in
- run" flag value in the
- last read page */
-{
- /* Check if we are too close to EOF to read a full page */
- if ((bitmap_file->size < MODIFIED_PAGE_BLOCK_SIZE)
- || (bitmap_file->offset
- > bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE)) {
-
- if (UNIV_UNLIKELY(bitmap_file->offset != bitmap_file->size)) {
-
- /* If we are not at EOF and we have less than one page
- to read, it's junk. This error is not fatal in
- itself. */
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "junk at the end of changed page bitmap file "
- "\'%s\'.", bitmap_file->name);
- }
-
- if (UNIV_UNLIKELY(!last_page_in_run)) {
-
- /* We are at EOF but the last read page did not finish
- a run */
- /* It's a "Warning" here because it's not a fatal error
- for the whole server */
- ib_logf(IB_LOG_LEVEL_WARN,
- "changed page bitmap file \'%s\', size "
- UINT64PF " bytes, does not "
- "contain a complete run at the next read "
- "offset " UINT64PF,
- bitmap_file->name, bitmap_file->size,
- bitmap_file->offset);
- return FALSE;
- }
- }
- return TRUE;
-}
-
-/*********************************************************************//**
-Initialize the log bitmap iterator for a given range. The records are
-processed at a bitmap block granularity, i.e. all the records in the same block
-share the same start and end LSN values, the exact LSN of each record is
-unavailable (nor is it defined for blocks that are touched more than once in
-the LSN interval contained in the block). Thus min_lsn and max_lsn should be
-set at block boundaries or bigger, otherwise the records at the 1st and the
-last blocks will not be returned. Also note that there might be returned
-records with LSN < min_lsn, as min_lsn is used to select the correct starting
-file but not block.
-
-@return TRUE if the iterator is initialized OK, FALSE otherwise. */
-UNIV_INTERN
-ibool
-log_online_bitmap_iterator_init(
-/*============================*/
- log_bitmap_iterator_t *i, /*!<in/out: iterator */
- lsn_t min_lsn,/*!< in: start LSN */
- lsn_t max_lsn)/*!< in: end LSN */
-{
- ut_a(i);
-
- i->max_lsn = max_lsn;
-
- if (UNIV_UNLIKELY(min_lsn > max_lsn)) {
-
- /* Empty range */
- i->in_files.count = 0;
- i->in_files.files = NULL;
- os_file_mark_invalid(&i->in.file);
- i->page = NULL;
- i->failed = FALSE;
- return TRUE;
- }
-
- if (!log_online_setup_bitmap_file_range(&i->in_files, min_lsn,
- max_lsn)) {
-
- i->failed = TRUE;
- return FALSE;
- }
-
- i->in_i = 0;
-
- if (i->in_files.count == 0) {
-
- /* Empty range */
- os_file_mark_invalid(&i->in.file);
- i->page = NULL;
- i->failed = FALSE;
- return TRUE;
- }
-
- /* Open the 1st bitmap file */
- if (UNIV_UNLIKELY(!log_online_open_bitmap_file_read_only(
- i->in_files.files[i->in_i].name,
- &i->in))) {
-
- i->in_i = i->in_files.count;
- free(i->in_files.files);
- i->failed = TRUE;
- return FALSE;
- }
-
- i->page = static_cast<byte *>(ut_malloc(MODIFIED_PAGE_BLOCK_SIZE));
- i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN;
- i->start_lsn = i->end_lsn = 0;
- i->space_id = 0;
- i->first_page_id = 0;
- i->last_page_in_run = TRUE;
- i->changed = FALSE;
- i->failed = FALSE;
-
- return TRUE;
-}
-
-/*********************************************************************//**
-Releases log bitmap iterator. */
-UNIV_INTERN
-void
-log_online_bitmap_iterator_release(
-/*===============================*/
- log_bitmap_iterator_t *i) /*!<in/out: iterator */
-{
- ut_a(i);
-
- if (!os_file_is_invalid(i->in.file)) {
-
- os_file_close(i->in.file);
- os_file_mark_invalid(&i->in.file);
- }
- if (i->in_files.files) {
-
- ut_free(i->in_files.files);
- }
- if (i->page) {
-
- ut_free(i->page);
- }
- i->failed = TRUE;
-}
-
-/*********************************************************************//**
-Iterates through bits of saved bitmap blocks.
-Sequentially reads blocks from bitmap file(s) and interates through
-their bits. Ignores blocks with wrong checksum.
-@return TRUE if iteration is successful, FALSE if all bits are iterated. */
-UNIV_INTERN
-ibool
-log_online_bitmap_iterator_next(
-/*============================*/
- log_bitmap_iterator_t *i) /*!<in/out: iterator */
-{
- ibool checksum_ok = FALSE;
- ibool success;
-
- ut_a(i);
-
- if (UNIV_UNLIKELY(i->in_files.count == 0)) {
-
- return FALSE;
- }
-
- if (UNIV_LIKELY(i->bit_offset < MODIFIED_PAGE_BLOCK_BITMAP_LEN))
- {
- ++i->bit_offset;
- i->changed =
- IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP,
- i->bit_offset);
- return TRUE;
- }
-
- if (i->end_lsn >= i->max_lsn && i->last_page_in_run)
- return FALSE;
-
- while (!checksum_ok)
- {
- while (i->in.size < MODIFIED_PAGE_BLOCK_SIZE
- || (i->in.offset
- > i->in.size - MODIFIED_PAGE_BLOCK_SIZE)) {
-
- /* Advance file */
- i->in_i++;
- success = os_file_close_no_error_handling(
- i->in.file);
- os_file_mark_invalid(&i->in.file);
- if (UNIV_UNLIKELY(!success)) {
-
- os_file_get_last_error(TRUE);
- i->failed = TRUE;
- return FALSE;
- }
-
- success = log_online_diagnose_bitmap_eof(
- &i->in, i->last_page_in_run);
- if (UNIV_UNLIKELY(!success)) {
-
- i->failed = TRUE;
- return FALSE;
-
- }
-
- if (i->in_i == i->in_files.count) {
-
- return FALSE;
- }
-
- if (UNIV_UNLIKELY(i->in_files.files[i->in_i].seq_num
- == 0)) {
-
- i->failed = TRUE;
- return FALSE;
- }
-
- success = log_online_open_bitmap_file_read_only(
- i->in_files.files[i->in_i].name,
- &i->in);
- if (UNIV_UNLIKELY(!success)) {
-
- i->failed = TRUE;
- return FALSE;
- }
- }
-
- success = log_online_read_bitmap_page(&i->in, i->page,
- &checksum_ok);
- if (UNIV_UNLIKELY(!success)) {
-
- os_file_get_last_error(TRUE);
- ib_logf(IB_LOG_LEVEL_WARN,
- "failed reading changed page bitmap file "
- "\'%s\'", i->in_files.files[i->in_i].name);
- i->failed = TRUE;
- return FALSE;
- }
- }
-
- i->start_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN);
- i->end_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN);
- i->space_id = mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID);
- i->first_page_id = mach_read_from_4(i->page
- + MODIFIED_PAGE_1ST_PAGE_ID);
- i->last_page_in_run = mach_read_from_4(i->page
- + MODIFIED_PAGE_IS_LAST_BLOCK);
- i->bit_offset = 0;
- i->changed = IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP,
- i->bit_offset);
-
- return TRUE;
-}
-
-/************************************************************//**
-Delete all the bitmap files for data less than the specified LSN.
-If called with lsn == 0 (i.e. set by RESET request) or LSN_MAX,
-restart the bitmap file sequence, otherwise continue it.
-
-@return FALSE to indicate success, TRUE for failure. */
-UNIV_INTERN
-ibool
-log_online_purge_changed_page_bitmaps(
-/*==================================*/
- lsn_t lsn) /*!< in: LSN to purge files up to */
-{
- log_online_bitmap_file_range_t bitmap_files;
- size_t i;
- ibool result = FALSE;
-
- if (lsn == 0) {
- lsn = LSN_MAX;
- }
-
- bool log_bmp_sys_inited = false;
- if (srv_redo_log_thread_started) {
- /* User requests might happen with both enabled and disabled
- tracking */
- log_bmp_sys_inited = true;
- mutex_enter(&log_bmp_sys_mutex);
- if (!srv_redo_log_thread_started) {
- log_bmp_sys_inited = false;
- mutex_exit(&log_bmp_sys_mutex);
- }
- }
-
- if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, LSN_MAX)) {
- if (log_bmp_sys_inited) {
- mutex_exit(&log_bmp_sys_mutex);
- }
- return TRUE;
- }
-
- if (srv_redo_log_thread_started && lsn > log_bmp_sys->end_lsn) {
- /* If we have to delete the current output file, close it
- first. */
- os_file_close(log_bmp_sys->out.file);
- os_file_mark_invalid(&log_bmp_sys->out.file);
- }
-
- for (i = 0; i < bitmap_files.count; i++) {
-
- /* We consider the end LSN of the current bitmap, derived from
- the start LSN of the subsequent bitmap file, to determine
- whether to remove the current bitmap. Note that bitmap_files
- does not contain an entry for the bitmap past the given LSN so
- we must check the boundary conditions as well. For example,
- consider 1_0.xdb and 2_10.xdb and querying LSN 5. bitmap_files
- will only contain 1_0.xdb and we must not delete it since it
- represents LSNs 0-9. */
- if ((i + 1 == bitmap_files.count
- || bitmap_files.files[i + 1].seq_num == 0
- || bitmap_files.files[i + 1].start_lsn > lsn)
- && (lsn != LSN_MAX)) {
-
- break;
- }
- if (!os_file_delete_if_exists(innodb_file_bmp_key,
- bitmap_files.files[i].name)) {
-
- os_file_get_last_error(TRUE);
- result = TRUE;
- break;
- }
- }
-
- if (log_bmp_sys_inited) {
- if (lsn > log_bmp_sys->end_lsn) {
- lsn_t new_file_lsn;
- if (lsn == LSN_MAX) {
- /* RESET restarts the sequence */
- log_bmp_sys->out_seq_num = 0;
- new_file_lsn = 0;
- } else {
- new_file_lsn = log_bmp_sys->end_lsn;
- }
- if (!log_online_rotate_bitmap_file(new_file_lsn)) {
- /* If file create failed, stop log tracking */
- srv_track_changed_pages = FALSE;
- }
- }
-
- mutex_exit(&log_bmp_sys_mutex);
- }
-
- free(bitmap_files.files);
- return result;
-}
diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc
deleted file mode 100644
index fb64309cee4..00000000000
--- a/storage/xtradb/log/log0recv.cc
+++ /dev/null
@@ -1,3814 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file log/log0recv.cc
-Recovery
-
-Created 9/20/1997 Heikki Tuuri
-*******************************************************/
-
-// First include (the generated) my_config.h, to get correct platform defines.
-#include "my_config.h"
-#include <stdio.h> // Solaris/x86 header file bug
-
-#include <vector>
-#include <my_systemd.h>
-
-#include "log0recv.h"
-
-#ifdef UNIV_NONINL
-#include "log0recv.ic"
-#endif
-
-#include "log0crypt.h"
-
-#include "config.h"
-#ifdef HAVE_ALLOCA_H
-#include "alloca.h"
-#elif defined(HAVE_MALLOC_H)
-#include "malloc.h"
-#endif
-
-#include "mem0mem.h"
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "page0cur.h"
-#include "page0zip.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "ibuf0ibuf.h"
-#include "trx0undo.h"
-#include "trx0rec.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#ifndef UNIV_HOTBACKUP
-# include "buf0rea.h"
-# include "srv0srv.h"
-# include "srv0start.h"
-# include "trx0roll.h"
-# include "row0merge.h"
-# include "sync0sync.h"
-#else /* !UNIV_HOTBACKUP */
-
-
-/** This is set to FALSE if the backup was originally taken with the
-mysqlbackup --include regexp option: then we do not want to create tables in
-directories which were not included */
-UNIV_INTERN ibool recv_replay_file_ops = TRUE;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Log records are stored in the hash table in chunks at most of this size;
-this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
-#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
-
-/** Read-ahead area in applying log records to file pages */
-#define RECV_READ_AHEAD_AREA 32
-
-/** The recovery system */
-UNIV_INTERN recv_sys_t* recv_sys;
-/** TRUE when applying redo log records during crash recovery; FALSE
-otherwise. Note that this is FALSE while a background thread is
-rolling back incomplete transactions. */
-UNIV_INTERN ibool recv_recovery_on;
-
-#ifndef UNIV_HOTBACKUP
-/** TRUE when recv_init_crash_recovery() has been called. */
-UNIV_INTERN ibool recv_needed_recovery;
-# ifdef UNIV_DEBUG
-/** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
-UNIV_INTERN ibool recv_no_log_write = FALSE;
-# endif /* UNIV_DEBUG */
-
-/** TRUE if buf_page_is_corrupted() should check if the log sequence
-number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
-recv_recovery_from_checkpoint_start_func(). */
-UNIV_INTERN ibool recv_lsn_checks_on;
-
-/** There are two conditions under which we scan the logs, the first
-is normal startup and the second is when we do a recovery from an
-archive.
-This flag is set if we are doing a scan from the last checkpoint during
-startup. If we find log entries that were written after the last checkpoint
-we know that the server was not cleanly shutdown. We must then initialize
-the crash recovery environment before attempting to store these entries in
-the log hash table. */
-static ibool recv_log_scan_is_startup_type;
-
-/** If the following is TRUE, the buffer pool file pages must be invalidated
-after recovery and no ibuf operations are allowed; this becomes TRUE if
-the log record hash table becomes too full, and log records must be merged
-to file pages already before the recovery is finished: in this case no
-ibuf operations are allowed, as they could modify the pages read in the
-buffer pool before the pages have been recovered to the up-to-date state.
-
-TRUE means that recovery is running and no operations on the log files
-are allowed yet: the variable name is misleading. */
-UNIV_INTERN ibool recv_no_ibuf_operations;
-/** TRUE when the redo log is being backed up */
-# define recv_is_making_a_backup FALSE
-/** TRUE when recovering from a backed up redo log file */
-# define recv_is_from_backup FALSE
-#else /* !UNIV_HOTBACKUP */
-# define recv_needed_recovery FALSE
-/** TRUE when the redo log is being backed up */
-UNIV_INTERN ibool recv_is_making_a_backup = FALSE;
-/** TRUE when recovering from a backed up redo log file */
-UNIV_INTERN ibool recv_is_from_backup = FALSE;
-# define buf_pool_get_curr_size() (5 * 1024 * 1024)
-#endif /* !UNIV_HOTBACKUP */
-
-/** The type of the previous parsed redo log record */
-static ulint recv_previous_parsed_rec_type;
-/** The offset of the previous parsed redo log record */
-static ulint recv_previous_parsed_rec_offset;
-/** The 'multi' flag of the previous parsed redo log record */
-static ulint recv_previous_parsed_rec_is_multi;
-
-/** Maximum page number encountered in the redo log */
-UNIV_INTERN ulint recv_max_parsed_page_no;
-
-/** This many frames must be left free in the buffer pool when we scan
-the log and store the scanned log records in the buffer pool: we will
-use these free frames to read in pages when we start applying the
-log records to the database.
-This is the default value. If the actual size of the buffer pool is
-larger than 10 MB we'll set this value to 512. */
-UNIV_INTERN ulint recv_n_pool_free_frames;
-
-/** The maximum lsn we see for a page during the recovery process. If this
-is bigger than the lsn we are able to scan up to, that is an indication that
-the recovery failed and the database may be corrupt. */
-UNIV_INTERN lsn_t recv_max_page_lsn;
-
-#ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t trx_rollback_clean_thread_key;
-#endif /* UNIV_PFS_THREAD */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t recv_sys_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifndef UNIV_HOTBACKUP
-# ifdef UNIV_PFS_THREAD
-UNIV_INTERN mysql_pfs_key_t recv_writer_thread_key;
-# endif /* UNIV_PFS_THREAD */
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t recv_writer_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
-
-/** Flag indicating if recv_writer thread is active. */
-static volatile bool recv_writer_thread_active;
-UNIV_INTERN os_thread_t recv_writer_thread_handle = 0;
-#endif /* !UNIV_HOTBACKUP */
-
-/* prototypes */
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************//**
-Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
-static
-void
-recv_init_crash_recovery(void);
-/*===========================*/
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Creates the recovery system. */
-UNIV_INTERN
-void
-recv_sys_create(void)
-/*=================*/
-{
- if (recv_sys != NULL) {
-
- return;
- }
-
- recv_sys = static_cast<recv_sys_t*>(mem_zalloc(sizeof(*recv_sys)));
-
- mutex_create(recv_sys_mutex_key, &recv_sys->mutex, SYNC_RECV);
-
-#ifndef UNIV_HOTBACKUP
- mutex_create(recv_writer_mutex_key, &recv_sys->writer_mutex,
- SYNC_LEVEL_VARYING);
-#endif /* !UNIV_HOTBACKUP */
-
- recv_sys->heap = NULL;
- recv_sys->addr_hash = NULL;
-}
-
-/********************************************************//**
-Release recovery system mutexes. */
-UNIV_INTERN
-void
-recv_sys_close(void)
-/*================*/
-{
- if (recv_sys != NULL) {
- if (recv_sys->addr_hash != NULL) {
- hash_table_free(recv_sys->addr_hash);
- }
-
- if (recv_sys->heap != NULL) {
- mem_heap_free(recv_sys->heap);
- }
-
- if (recv_sys->buf != NULL) {
- ut_free(recv_sys->buf);
- }
-
- if (recv_sys->last_block_buf_start != NULL) {
- mem_free(recv_sys->last_block_buf_start);
- }
-
-#ifndef UNIV_HOTBACKUP
- ut_ad(!recv_writer_thread_active);
- mutex_free(&recv_sys->writer_mutex);
-#endif /* !UNIV_HOTBACKUP */
-
- mutex_free(&recv_sys->mutex);
-
- mem_free(recv_sys);
- recv_sys = NULL;
- }
-}
-
-/********************************************************//**
-Frees the recovery system memory. */
-UNIV_INTERN
-void
-recv_sys_mem_free(void)
-/*===================*/
-{
- if (recv_sys != NULL) {
- if (recv_sys->addr_hash != NULL) {
- hash_table_free(recv_sys->addr_hash);
- }
-
- if (recv_sys->heap != NULL) {
- mem_heap_free(recv_sys->heap);
- }
-
- if (recv_sys->buf != NULL) {
- ut_free(recv_sys->buf);
- }
-
- if (recv_sys->last_block_buf_start != NULL) {
- mem_free(recv_sys->last_block_buf_start);
- }
-
- mem_free(recv_sys);
- recv_sys = NULL;
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************
-Reset the state of the recovery system variables. */
-UNIV_INTERN
-void
-recv_sys_var_init(void)
-/*===================*/
-{
- recv_lsn_checks_on = FALSE;
-
- recv_n_pool_free_frames = 256;
-
- recv_recovery_on = FALSE;
-
- recv_needed_recovery = FALSE;
-
- recv_lsn_checks_on = FALSE;
-
- recv_log_scan_is_startup_type = FALSE;
-
- recv_no_ibuf_operations = FALSE;
-
- recv_previous_parsed_rec_type = 999999;
-
- recv_previous_parsed_rec_offset = 0;
-
- recv_previous_parsed_rec_is_multi = 0;
-
- recv_max_parsed_page_no = 0;
-
- recv_n_pool_free_frames = 256;
-
- recv_max_page_lsn = 0;
-}
-
-/******************************************************************//**
-recv_writer thread tasked with flushing dirty pages from the buffer
-pools.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(recv_writer_thread)(
-/*===============================*/
- void* arg MY_ATTRIBUTE((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- my_thread_init();
- ut_ad(!srv_read_only_mode);
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(recv_writer_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "InnoDB: recv_writer thread running, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
-
- os_thread_sleep(100000);
-
- mutex_enter(&recv_sys->writer_mutex);
-
- if (!recv_recovery_on) {
- mutex_exit(&recv_sys->writer_mutex);
- break;
- }
-
- /* Flush pages from end of LRU if required */
- buf_flush_LRU_tail();
-
- mutex_exit(&recv_sys->writer_mutex);
- }
-
- recv_writer_thread_active = false;
-
- my_thread_end();
- /* We count the number of threads in os_thread_exit().
- A created thread should always use that to exit and not
- use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/************************************************************
-Inits the recovery system for a recovery operation. */
-UNIV_INTERN
-void
-recv_sys_init(
-/*==========*/
- ulint available_memory) /*!< in: available memory in bytes */
-{
- if (recv_sys->heap != NULL) {
-
- return;
- }
-
-#ifndef UNIV_HOTBACKUP
- mutex_enter(&(recv_sys->mutex));
-
- recv_sys->heap = mem_heap_create_typed(256,
- MEM_HEAP_FOR_RECV_SYS);
-#else /* !UNIV_HOTBACKUP */
- recv_sys->heap = mem_heap_create(256);
- recv_is_from_backup = TRUE;
-#endif /* !UNIV_HOTBACKUP */
-
- /* Set appropriate value of recv_n_pool_free_frames. */
- if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
- /* Buffer pool of size greater than 10 MB. */
- recv_n_pool_free_frames = 512;
- }
-
- recv_sys->buf = static_cast<byte*>(ut_malloc(RECV_PARSING_BUF_SIZE));
- recv_sys->len = 0;
- recv_sys->recovered_offset = 0;
-
- recv_sys->addr_hash = hash_create(available_memory / 512);
- recv_sys->n_addrs = 0;
-
- recv_sys->apply_log_recs = FALSE;
- recv_sys->apply_batch_on = FALSE;
-
- recv_sys->last_block_buf_start = static_cast<byte*>(
- mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE));
-
- recv_sys->last_block = static_cast<byte*>(ut_align(
- recv_sys->last_block_buf_start, OS_FILE_LOG_BLOCK_SIZE));
-
- recv_sys->found_corrupt_log = FALSE;
- recv_sys->progress_time = ut_time();
-
- recv_max_page_lsn = 0;
-
- /* Call the constructor for recv_sys_t::dblwr member */
- new (&recv_sys->dblwr) recv_dblwr_t();
-
- mutex_exit(&(recv_sys->mutex));
-}
-
-/** Empty a fully processed hash table. */
-static
-void
-recv_sys_empty_hash()
-{
- ut_ad(mutex_own(&(recv_sys->mutex)));
- ut_a(recv_sys->n_addrs == 0);
-
- hash_table_free(recv_sys->addr_hash);
- mem_heap_empty(recv_sys->heap);
-
- recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 512);
-}
-
-#ifndef UNIV_HOTBACKUP
-# ifndef UNIV_LOG_DEBUG
-/********************************************************//**
-Frees the recovery system. */
-static
-void
-recv_sys_debug_free(void)
-/*=====================*/
-{
- mutex_enter(&(recv_sys->mutex));
-
- hash_table_free(recv_sys->addr_hash);
- mem_heap_free(recv_sys->heap);
- ut_free(recv_sys->buf);
- mem_free(recv_sys->last_block_buf_start);
-
- recv_sys->buf = NULL;
- recv_sys->heap = NULL;
- recv_sys->addr_hash = NULL;
- recv_sys->last_block_buf_start = NULL;
-
- mutex_exit(&(recv_sys->mutex));
-}
-# endif /* UNIV_LOG_DEBUG */
-
-# ifdef UNIV_LOG_ARCHIVE
-/********************************************************//**
-Truncates possible corrupted or extra records from a log group. */
-static
-void
-recv_truncate_group(
-/*================*/
- log_group_t* group, /*!< in: log group */
- lsn_t recovered_lsn, /*!< in: recovery succeeded up to this
- lsn */
- lsn_t limit_lsn, /*!< in: this was the limit for
- recovery */
- lsn_t checkpoint_lsn, /*!< in: recovery was started from this
- checkpoint */
- lsn_t archived_lsn) /*!< in: the log has been archived up to
- this lsn */
-{
- lsn_t start_lsn;
- lsn_t end_lsn;
- lsn_t finish_lsn1;
- lsn_t finish_lsn2;
- lsn_t finish_lsn;
-
- if (archived_lsn == LSN_MAX) {
- /* Checkpoint was taken in the NOARCHIVELOG mode */
- archived_lsn = checkpoint_lsn;
- }
-
- finish_lsn1 = ut_uint64_align_down(archived_lsn,
- OS_FILE_LOG_BLOCK_SIZE)
- + log_group_get_capacity(group);
-
- finish_lsn2 = ut_uint64_align_up(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE)
- + recv_sys->last_log_buf_size;
-
- if (limit_lsn != LSN_MAX) {
- /* We do not know how far we should erase log records: erase
- as much as possible */
-
- finish_lsn = finish_lsn1;
- } else {
- /* It is enough to erase the length of the log buffer */
- finish_lsn = finish_lsn1 < finish_lsn2
- ? finish_lsn1 : finish_lsn2;
- }
-
- ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- memset(log_sys->buf, 0, RECV_SCAN_SIZE);
-
- start_lsn = ut_uint64_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
-
- if (start_lsn != recovered_lsn) {
- /* Copy the last incomplete log block to the log buffer and
- edit its data length: */
- lsn_t diff = recovered_lsn - start_lsn;
-
- ut_a(diff <= 0xFFFFUL);
-
- ut_memcpy(log_sys->buf, recv_sys->last_block,
- OS_FILE_LOG_BLOCK_SIZE);
- log_block_set_data_len(log_sys->buf, (ulint) diff);
- }
-
- if (start_lsn >= finish_lsn) {
-
- return;
- }
-
- for (;;) {
- ulint len;
-
- end_lsn = start_lsn + RECV_SCAN_SIZE;
-
- if (end_lsn > finish_lsn) {
-
- end_lsn = finish_lsn;
- }
-
- len = (ulint) (end_lsn - start_lsn);
-
- log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
- if (end_lsn >= finish_lsn) {
-
- return;
- }
-
- memset(log_sys->buf, 0, RECV_SCAN_SIZE);
-
- start_lsn = end_lsn;
- }
-}
-
-/********************************************************//**
-Copies the log segment between group->recovered_lsn and recovered_lsn from the
-most up-to-date log group to group, so that it contains the latest log data. */
-static
-void
-recv_copy_group(
-/*============*/
- log_group_t* up_to_date_group, /*!< in: the most up-to-date log
- group */
- log_group_t* group, /*!< in: copy to this log
- group */
- lsn_t recovered_lsn) /*!< in: recovery succeeded up
- to this lsn */
-{
- lsn_t start_lsn;
- lsn_t end_lsn;
-
- if (group->scanned_lsn >= recovered_lsn) {
-
- return;
- }
-
- ut_a(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- start_lsn = ut_uint64_align_down(group->scanned_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- for (;;) {
- ulint len;
-
- end_lsn = start_lsn + RECV_SCAN_SIZE;
-
- if (end_lsn > recovered_lsn) {
- end_lsn = ut_uint64_align_up(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- }
-
- log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
- up_to_date_group, start_lsn, end_lsn,
- FALSE);
-
- len = (ulint) (end_lsn - start_lsn);
-
- log_group_write_buf(group, log_sys->buf, len, start_lsn, 0);
-
- if (end_lsn >= recovered_lsn) {
-
- return;
- }
-
- start_lsn = end_lsn;
- }
-}
-# endif /* UNIV_LOG_ARCHIVE */
-
-/********************************************************//**
-Copies a log segment from the most up-to-date log group to the other log
-groups, so that they all contain the latest log data. Also writes the info
-about the latest checkpoint to the groups, and inits the fields in the group
-memory structs to up-to-date values. */
-static
-void
-recv_synchronize_groups(
-/*====================*/
-#ifdef UNIV_LOG_ARCHIVE
- log_group_t* up_to_date_group /*!< in: the most up-to-date
- log group */
-#endif
- )
-{
- lsn_t start_lsn;
- lsn_t end_lsn;
- lsn_t recovered_lsn;
-
- recovered_lsn = recv_sys->recovered_lsn;
-
- /* Read the last recovered log block to the recovery system buffer:
- the block is always incomplete */
-
- start_lsn = ut_uint64_align_down(recovered_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
- end_lsn = ut_uint64_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
-
- ut_a(start_lsn != end_lsn);
-
- log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
-#ifdef UNIV_LOG_ARCHIVE
- up_to_date_group,
-#else /* UNIV_LOG_ARCHIVE */
- UT_LIST_GET_FIRST(log_sys->log_groups),
-#endif /* UNIV_LOG_ARCHIVE */
- start_lsn, end_lsn, FALSE);
-
- for (log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
- group;
- group = UT_LIST_GET_NEXT(log_groups, group)) {
-#ifdef UNIV_LOG_ARCHIVE
- if (group != up_to_date_group) {
-
- /* Copy log data if needed */
-
- recv_copy_group(group, up_to_date_group,
- recovered_lsn);
- }
-#endif /* UNIV_LOG_ARCHIVE */
- /* Update the fields in the group struct to correspond to
- recovered_lsn */
-
- log_group_set_fields(group, recovered_lsn);
- ut_a(log_sys);
-
- }
- /* Copy the checkpoint info to the groups; remember that we have
- incremented checkpoint_no by one, and the info will not be written
- over the max checkpoint info, thus making the preservation of max
- checkpoint info on disk certain */
-
- log_groups_write_checkpoint_info();
-
- mutex_exit(&(log_sys->mutex));
-
- /* Wait for the checkpoint write to complete */
- rw_lock_s_lock(&(log_sys->checkpoint_lock));
- rw_lock_s_unlock(&(log_sys->checkpoint_lock));
-
- mutex_enter(&(log_sys->mutex));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Checks the consistency of the checkpoint info
-@return TRUE if ok */
-ibool
-recv_check_cp_is_consistent(
-/*========================*/
- const byte* buf) /*!< in: buffer containing checkpoint info */
-{
- ulint fold;
-
- fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-
- if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
- buf + LOG_CHECKPOINT_CHECKSUM_1)) {
- return(FALSE);
- }
-
- fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
- LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
-
- if ((fold & 0xFFFFFFFFUL) != mach_read_from_4(
- buf + LOG_CHECKPOINT_CHECKSUM_2)) {
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Looks for the maximum consistent checkpoint from the log groups.
-@return error code or DB_SUCCESS */
-MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-recv_find_max_checkpoint(
-/*=====================*/
- log_group_t** max_group, /*!< out: max group */
- ulint* max_field) /*!< out: LOG_CHECKPOINT_1 or
- LOG_CHECKPOINT_2 */
-{
- log_group_t* group;
- ib_uint64_t max_no;
- ib_uint64_t checkpoint_no;
- ulint field;
- byte* buf;
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- max_no = 0;
- *max_group = NULL;
- *max_field = 0;
-
- buf = log_sys->checkpoint_buf;
-
- while (group) {
-
- ulint log_hdr_log_block_size;
-
- group->state = LOG_GROUP_CORRUPTED;
-
- /* Assert that we can reuse log_sys->checkpoint_buf to read the
- part of the header that contains the log block size. */
- ut_ad(LOG_FILE_OS_FILE_LOG_BLOCK_SIZE + 4
- < OS_FILE_LOG_BLOCK_SIZE);
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
- 0, 0, OS_FILE_LOG_BLOCK_SIZE,
- log_sys->checkpoint_buf, NULL, NULL);
- log_hdr_log_block_size
- = mach_read_from_4(log_sys->checkpoint_buf
- + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE);
- if (log_hdr_log_block_size == 0) {
- /* 0 means default value */
- log_hdr_log_block_size = 512;
- }
- if (UNIV_UNLIKELY(log_hdr_log_block_size
- != srv_log_block_size)) {
- fprintf(stderr,
- "InnoDB: Error: The block size of ib_logfile "
- "%lu is not equal to innodb_log_block_size "
- "%lu.\n"
- "InnoDB: Error: Suggestion - Recreate log "
- "files.\n",
- log_hdr_log_block_size, srv_log_block_size);
- return(DB_ERROR);
- }
-
- for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2;
- field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) {
-
- log_group_read_checkpoint_info(group, field);
-
- if (!recv_check_cp_is_consistent(buf)) {
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Checkpoint in group"
- " %lu at %lu invalid, %lu\n",
- (ulong) group->id,
- (ulong) field,
- (ulong) mach_read_from_4(
- buf
- + LOG_CHECKPOINT_CHECKSUM_1));
-
- }
-#endif /* UNIV_DEBUG */
- goto not_consistent;
- }
-
- group->state = LOG_GROUP_OK;
-
- group->lsn = mach_read_from_8(
- buf + LOG_CHECKPOINT_LSN);
- group->lsn_offset = mach_read_from_4(
- buf + LOG_CHECKPOINT_OFFSET_LOW32);
- group->lsn_offset |= ((lsn_t) mach_read_from_4(
- buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
- checkpoint_no = mach_read_from_8(
- buf + LOG_CHECKPOINT_NO);
-
- if (!log_crypt_read_checkpoint_buf(buf)) {
- return DB_ERROR;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Checkpoint number %lu"
- " found in group %lu\n",
- (ulong) checkpoint_no,
- (ulong) group->id);
- }
-#endif /* UNIV_DEBUG */
-
- if (checkpoint_no >= max_no) {
- *max_group = group;
- *max_field = field;
- max_no = checkpoint_no;
- }
-
-not_consistent:
- ;
- }
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- if (*max_group == NULL) {
-
- fprintf(stderr,
- "InnoDB: No valid checkpoint found.\n"
- "InnoDB: If you are attempting downgrade"
- " from MySQL 5.7.9 or later,\n"
- "InnoDB: please refer to " REFMAN
- "upgrading-downgrading.html\n"
- "InnoDB: If this error appears when you are"
- " creating an InnoDB database,\n"
- "InnoDB: the problem may be that during"
- " an earlier attempt you managed\n"
- "InnoDB: to create the InnoDB data files,"
- " but log file creation failed.\n"
- "InnoDB: If that is the case, please refer to\n"
- "InnoDB: " REFMAN "error-creating-innodb.html\n");
- return(DB_ERROR);
- }
-
- return(DB_SUCCESS);
-}
-#else /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Reads the checkpoint info needed in hot backup.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-recv_read_checkpoint_info_for_backup(
-/*=================================*/
- const byte* hdr, /*!< in: buffer containing the log group
- header */
- lsn_t* lsn, /*!< out: checkpoint lsn */
- lsn_t* offset, /*!< out: checkpoint offset in the log group */
- lsn_t* cp_no, /*!< out: checkpoint number */
- lsn_t* first_header_lsn)
- /*!< out: lsn of of the start of the
- first log file */
-{
- ulint max_cp = 0;
- ib_uint64_t max_cp_no = 0;
- const byte* cp_buf;
-
- cp_buf = hdr + LOG_CHECKPOINT_1;
-
- if (recv_check_cp_is_consistent(cp_buf)) {
- max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
- max_cp = LOG_CHECKPOINT_1;
- }
-
- cp_buf = hdr + LOG_CHECKPOINT_2;
-
- if (recv_check_cp_is_consistent(cp_buf)) {
- if (mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO) > max_cp_no) {
- max_cp = LOG_CHECKPOINT_2;
- }
- }
-
- if (max_cp == 0) {
- return(FALSE);
- }
-
- cp_buf = hdr + max_cp;
-
- *lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
- *offset = mach_read_from_4(
- cp_buf + LOG_CHECKPOINT_OFFSET_LOW32);
- *offset |= ((lsn_t) mach_read_from_4(
- cp_buf + LOG_CHECKPOINT_OFFSET_HIGH32)) << 32;
-
- *cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
-
- *first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/******************************************************//**
-Checks the 4-byte checksum to the trailer checksum field of a log
-block. We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-UNIV_INTERN
-ibool
-log_block_checksum_is_ok_or_old_format(
-/*===================================*/
- const byte* block, /*!< in: pointer to a log block */
- bool print_err) /*!< in print if error found */
-{
-#ifdef UNIV_LOG_DEBUG
- return(TRUE);
-#endif /* UNIV_LOG_DEBUG */
-
- ulint block_checksum = log_block_get_checksum(block);
-
- if (UNIV_LIKELY(srv_log_checksum_algorithm ==
- SRV_CHECKSUM_ALGORITHM_NONE ||
- log_block_calc_checksum(block) == block_checksum)) {
-
- return(TRUE);
- }
-
- if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 ||
- srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB ||
- srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) {
-
- const char* algo = NULL;
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "log block checksum mismatch: expected " ULINTPF ", "
- "calculated checksum " ULINTPF,
- block_checksum,
- log_block_calc_checksum(block));
-
- if (block_checksum == LOG_NO_CHECKSUM_MAGIC) {
-
- algo = "none";
- } else if (block_checksum ==
- log_block_calc_checksum_crc32(block)) {
-
- algo = "crc32";
- } else if (block_checksum ==
- log_block_calc_checksum_innodb(block)) {
-
- algo = "innodb";
- }
-
- if (algo) {
-
- const char* current_algo;
-
- current_algo = buf_checksum_algorithm_name(
- (srv_checksum_algorithm_t)
- srv_log_checksum_algorithm);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "current InnoDB log checksum type: %s, "
- "detected log checksum type: %s",
- current_algo,
- algo);
- }
-
- ib_logf(IB_LOG_LEVEL_FATAL,
- "STRICT method was specified for innodb_log_checksum, "
- "so we intentionally assert here.");
- }
-
- ut_ad(srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_CRC32 ||
- srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB);
-
- if (block_checksum == LOG_NO_CHECKSUM_MAGIC ||
- block_checksum == log_block_calc_checksum_crc32(block) ||
- block_checksum == log_block_calc_checksum_innodb(block)) {
-
- return(TRUE);
- }
-
- if (log_block_get_hdr_no(block) == block_checksum) {
-
- /* We assume the log block is in the format of
- InnoDB version < 3.23.52 and the block is ok */
-#if 0
- fprintf(stderr,
- "InnoDB: Scanned old format < InnoDB-3.23.52"
- " log block number %lu\n",
- log_block_get_hdr_no(block));
-#endif
- return(TRUE);
- }
-
- if (print_err) {
- fprintf(stderr, "BROKEN: block: %lu checkpoint: %lu %.8lx %.8lx\n",
- log_block_get_hdr_no(block),
- log_block_get_checkpoint_no(block),
- log_block_calc_checksum(block),
- log_block_get_checksum(block));
- }
-
- return(FALSE);
-}
-
-#ifdef UNIV_HOTBACKUP
-/*******************************************************************//**
-Scans the log segment and n_bytes_scanned is set to the length of valid
-log scanned. */
-UNIV_INTERN
-void
-recv_scan_log_seg_for_backup(
-/*=========================*/
- byte* buf, /*!< in: buffer containing log data */
- ulint buf_len, /*!< in: data length in that buffer */
- lsn_t* scanned_lsn, /*!< in/out: lsn of buffer start,
- we return scanned lsn */
- ulint* scanned_checkpoint_no,
- /*!< in/out: 4 lowest bytes of the
- highest scanned checkpoint number so
- far */
- ulint* n_bytes_scanned)/*!< out: how much we were able to
- scan, smaller than buf_len if log
- data ended here */
-{
- ulint data_len;
- byte* log_block;
- ulint no;
-
- *n_bytes_scanned = 0;
-
- for (log_block = buf; log_block < buf + buf_len;
- log_block += OS_FILE_LOG_BLOCK_SIZE) {
-
- no = log_block_get_hdr_no(log_block);
-
-#if 0
- fprintf(stderr, "Log block header no %lu\n", no);
-#endif
-
- if (no != log_block_convert_lsn_to_no(*scanned_lsn)
- || !log_block_checksum_is_ok_or_old_format(log_block)) {
-#if 0
- fprintf(stderr,
- "Log block n:o %lu, scanned lsn n:o %lu\n",
- no, log_block_convert_lsn_to_no(*scanned_lsn));
-#endif
- /* Garbage or an incompletely written log block */
-
- log_block += OS_FILE_LOG_BLOCK_SIZE;
-#if 0
- fprintf(stderr,
- "Next log block n:o %lu\n",
- log_block_get_hdr_no(log_block));
-#endif
- break;
- }
-
- if (*scanned_checkpoint_no > 0
- && log_block_get_checkpoint_no(log_block)
- < *scanned_checkpoint_no
- && *scanned_checkpoint_no
- - log_block_get_checkpoint_no(log_block)
- > 0x80000000UL) {
-
- /* Garbage from a log buffer flush which was made
- before the most recent database recovery */
-#if 0
- fprintf(stderr,
- "Scanned cp n:o %lu, block cp n:o %lu\n",
- *scanned_checkpoint_no,
- log_block_get_checkpoint_no(log_block));
-#endif
- break;
- }
-
- data_len = log_block_get_data_len(log_block);
-
- *scanned_checkpoint_no
- = log_block_get_checkpoint_no(log_block);
- *scanned_lsn += data_len;
-
- *n_bytes_scanned += data_len;
-
- if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
- /* Log data ends here */
-
-#if 0
- fprintf(stderr, "Log block data len %lu\n",
- data_len);
-#endif
- break;
- }
- }
-}
-#endif /* UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Tries to parse a single log record body and also applies it to a page if
-specified. File ops are parsed, but not applied in this function.
-@return log record end, NULL if not a complete record */
-static
-byte*
-recv_parse_or_apply_log_rec_body(
-/*=============================*/
- byte type, /*!< in: type */
- byte* ptr, /*!< in: pointer to a buffer */
- byte* end_ptr,/*!< in: pointer to the buffer end */
- buf_block_t* block, /*!< in/out: buffer block or NULL; if
- not NULL, then the log record is
- applied to the page, and the log
- record should be complete then */
- mtr_t* mtr, /*!< in: mtr or NULL; should be non-NULL
- if and only if block is non-NULL */
- ulint space_id)
- /*!< in: tablespace id obtained by
- parsing initial log record */
-{
- dict_index_t* index = NULL;
- page_t* page;
- page_zip_des_t* page_zip;
-#ifdef UNIV_DEBUG
- ulint page_type;
-#endif /* UNIV_DEBUG */
-
- ut_ad(!block == !mtr);
-
- if (block) {
- page = block->frame;
- page_zip = buf_block_get_page_zip(block);
- ut_d(page_type = fil_page_get_type(page));
- } else {
- page = NULL;
- page_zip = NULL;
- ut_d(page_type = FIL_PAGE_TYPE_ALLOCATED);
- }
-
- switch (type) {
-#ifdef UNIV_LOG_LSN_DEBUG
- case MLOG_LSN:
- /* The LSN is checked in recv_parse_log_rec(). */
- break;
-#endif /* UNIV_LOG_LSN_DEBUG */
- case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES:
- /* Note that crypt data can be set to empty page */
- ptr = mlog_parse_nbytes(type, ptr, end_ptr, page, page_zip);
- break;
- case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_INSERT,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr,
- block, index, mtr);
- }
- break;
- case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_CLUST_DELETE_MARK,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_cur_parse_del_mark_set_clust_rec(
- ptr, end_ptr, page, page_zip, index);
- }
- break;
- case MLOG_COMP_REC_SEC_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- /* This log record type is obsolete, but we process it for
- backward compatibility with MySQL 5.0.3 and 5.0.4. */
- ut_a(!page || page_is_comp(page));
- ut_a(!page_zip);
- ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index);
- if (!ptr) {
- break;
- }
- /* Fall through */
- case MLOG_REC_SEC_DELETE_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_UPDATE_IN_PLACE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page,
- page_zip, index);
- }
- break;
- case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE:
- case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_parse_delete_rec_list(type, ptr, end_ptr,
- block, index, mtr);
- }
- break;
- case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_LIST_END_COPY_CREATED,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_parse_copy_rec_list_to_created_page(
- ptr, end_ptr, block, index, mtr);
- }
- break;
- case MLOG_PAGE_REORGANIZE:
- case MLOG_COMP_PAGE_REORGANIZE:
- case MLOG_ZIP_PAGE_REORGANIZE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type != MLOG_PAGE_REORGANIZE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = btr_parse_page_reorganize(
- ptr, end_ptr, index,
- type == MLOG_ZIP_PAGE_REORGANIZE,
- block, mtr);
- }
- break;
- case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE:
- /* Allow anything in page_type when creating a page. */
- ut_a(!page_zip);
- ptr = page_parse_create(ptr, end_ptr,
- type == MLOG_COMP_PAGE_CREATE,
- block, mtr);
- break;
- case MLOG_UNDO_INSERT:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page);
- break;
- case MLOG_UNDO_ERASE_END:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_INIT:
- /* Allow anything in page_type when creating a page. */
- ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_HDR_DISCARD:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr);
- break;
- case MLOG_UNDO_HDR_CREATE:
- case MLOG_UNDO_HDR_REUSE:
- ut_ad(!page || page_type == FIL_PAGE_UNDO_LOG);
- ptr = trx_undo_parse_page_header(type, ptr, end_ptr,
- page, mtr);
- break;
- case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- /* On a compressed page, MLOG_COMP_REC_MIN_MARK
- will be followed by MLOG_COMP_REC_DELETE
- or MLOG_ZIP_WRITE_HEADER(FIL_PAGE_PREV, FIL_NULL)
- in the same mini-transaction. */
- ut_a(type == MLOG_COMP_REC_MIN_MARK || !page_zip);
- ptr = btr_parse_set_min_rec_mark(
- ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK,
- page, mtr);
- break;
- case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
-
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr,
- type == MLOG_COMP_REC_DELETE,
- &index))) {
- ut_a(!page
- || (ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table));
- ptr = page_cur_parse_delete_rec(ptr, end_ptr,
- block, index, mtr);
- }
- break;
- case MLOG_IBUF_BITMAP_INIT:
- /* Allow anything in page_type when creating a page. */
- ptr = ibuf_parse_bitmap_init(ptr, end_ptr, block, mtr);
- break;
- case MLOG_INIT_FILE_PAGE:
- /* Allow anything in page_type when creating a page. */
- ptr = fsp_parse_init_file_page(ptr, end_ptr, block);
- break;
- case MLOG_WRITE_STRING:
- /* Allow setting crypt_data also for empty page */
- ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
- break;
- case MLOG_FILE_RENAME:
- /* Do not rerun file-based log entries if this is
- IO completion from a page read. */
- if (page == NULL) {
- ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
- (recv_recovery_is_on()
- ? space_id : 0), 0);
- }
- break;
- case MLOG_FILE_CREATE:
- case MLOG_FILE_DELETE:
- case MLOG_FILE_CREATE2:
- /* Do not rerun file-based log entries if this is
- IO completion from a page read. */
- if (page == NULL) {
- ptr = fil_op_log_parse_or_replay(ptr, end_ptr,
- type, 0, 0);
- }
- break;
- case MLOG_ZIP_WRITE_NODE_PTR:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- ptr = page_zip_parse_write_node_ptr(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_ZIP_WRITE_BLOB_PTR:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- ptr = page_zip_parse_write_blob_ptr(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_ZIP_WRITE_HEADER:
- ut_ad(!page || page_type == FIL_PAGE_INDEX);
- ptr = page_zip_parse_write_header(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_ZIP_PAGE_COMPRESS:
- /* Allow anything in page_type when creating a page. */
- ptr = page_zip_parse_compress(ptr, end_ptr,
- page, page_zip);
- break;
- case MLOG_ZIP_PAGE_COMPRESS_NO_DATA:
- if (NULL != (ptr = mlog_parse_index(
- ptr, end_ptr, TRUE, &index))) {
-
- ut_a(!page || ((ibool)!!page_is_comp(page)
- == dict_table_is_comp(index->table)));
- ptr = page_zip_parse_compress_no_data(
- ptr, end_ptr, page, page_zip, index);
- }
- break;
- case MLOG_FILE_WRITE_CRYPT_DATA:
- dberr_t err;
- ptr = const_cast<byte*>(fil_parse_write_crypt_data(ptr, end_ptr, block, &err));
-
- if (err != DB_SUCCESS) {
- recv_sys->found_corrupt_log = TRUE;
- }
- break;
- default:
- ptr = NULL;
- recv_sys->found_corrupt_log = TRUE;
- }
-
- if (index) {
- dict_table_t* table = index->table;
-
- dict_mem_index_free(index);
- dict_mem_table_free(table);
- }
-
- return(ptr);
-}
-
-/*********************************************************************//**
-Calculates the fold value of a page file address: used in inserting or
-searching for a log record in the hash table.
-@return folded value */
-UNIV_INLINE
-ulint
-recv_fold(
-/*======*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- return(ut_fold_ulint_pair(space, page_no));
-}
-
-/*********************************************************************//**
-Calculates the hash value of a page file address: used in inserting or
-searching for a log record in the hash table.
-@return folded value */
-UNIV_INLINE
-ulint
-recv_hash(
-/*======*/
- ulint space, /*!< in: space */
- ulint page_no)/*!< in: page number */
-{
- return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash));
-}
-
-/*********************************************************************//**
-Gets the hashed file address struct for a page.
-@return file address struct, NULL if not found from the hash table */
-static
-recv_addr_t*
-recv_get_fil_addr_struct(
-/*=====================*/
- ulint space, /*!< in: space id */
- ulint page_no)/*!< in: page number */
-{
- recv_addr_t* recv_addr;
-
- for (recv_addr = static_cast<recv_addr_t*>(
- HASH_GET_FIRST(recv_sys->addr_hash,
- recv_hash(space, page_no)));
- recv_addr != 0;
- recv_addr = static_cast<recv_addr_t*>(
- HASH_GET_NEXT(addr_hash, recv_addr))) {
-
- if (recv_addr->space == space
- && recv_addr->page_no == page_no) {
-
- return(recv_addr);
- }
- }
-
- return(NULL);
-}
-
-/*******************************************************************//**
-Adds a new log record to the hash table of log records. */
-static
-void
-recv_add_to_hash_table(
-/*===================*/
- byte type, /*!< in: log record type */
- ulint space, /*!< in: space id */
- ulint page_no, /*!< in: page number */
- byte* body, /*!< in: log record body */
- byte* rec_end, /*!< in: log record end */
- lsn_t start_lsn, /*!< in: start lsn of the mtr */
- lsn_t end_lsn) /*!< in: end lsn of the mtr */
-{
- recv_t* recv;
- ulint len;
- recv_data_t* recv_data;
- recv_data_t** prev_field;
- recv_addr_t* recv_addr;
-
- if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) {
- /* The tablespace does not exist any more: do not store the
- log record */
-
- return;
- }
-
- len = rec_end - body;
-
- recv = static_cast<recv_t*>(
- mem_heap_alloc(recv_sys->heap, sizeof(recv_t)));
-
- recv->type = type;
- recv->len = rec_end - body;
- recv->start_lsn = start_lsn;
- recv->end_lsn = end_lsn;
-
- recv_addr = recv_get_fil_addr_struct(space, page_no);
-
- if (recv_addr == NULL) {
- recv_addr = static_cast<recv_addr_t*>(
- mem_heap_alloc(recv_sys->heap, sizeof(recv_addr_t)));
-
- recv_addr->space = space;
- recv_addr->page_no = page_no;
- recv_addr->state = RECV_NOT_PROCESSED;
-
- UT_LIST_INIT(recv_addr->rec_list);
-
- HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash,
- recv_fold(space, page_no), recv_addr);
- recv_sys->n_addrs++;
-#if 0
- fprintf(stderr, "Inserting log rec for space %lu, page %lu\n",
- space, page_no);
-#endif
- }
-
- UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv);
-
- prev_field = &(recv->data);
-
- /* Store the log record body in chunks of less than UNIV_PAGE_SIZE:
- recv_sys->heap grows into the buffer pool, and bigger chunks could not
- be allocated */
-
- while (rec_end > body) {
-
- len = rec_end - body;
-
- if (len > RECV_DATA_BLOCK_SIZE) {
- len = RECV_DATA_BLOCK_SIZE;
- }
-
- recv_data = static_cast<recv_data_t*>(
- mem_heap_alloc(recv_sys->heap,
- sizeof(recv_data_t) + len));
-
- *prev_field = recv_data;
-
- memcpy(recv_data + 1, body, len);
-
- prev_field = &(recv_data->next);
-
- body += len;
- }
-
- *prev_field = NULL;
-}
-
-/*********************************************************************//**
-Copies the log record body from recv to buf. */
-static
-void
-recv_data_copy_to_buf(
-/*==================*/
- byte* buf, /*!< in: buffer of length at least recv->len */
- recv_t* recv) /*!< in: log record */
-{
- recv_data_t* recv_data;
- ulint part_len;
- ulint len;
-
- len = recv->len;
- recv_data = recv->data;
-
- while (len > 0) {
- if (len > RECV_DATA_BLOCK_SIZE) {
- part_len = RECV_DATA_BLOCK_SIZE;
- } else {
- part_len = len;
- }
-
- ut_memcpy(buf, ((byte*) recv_data) + sizeof(recv_data_t),
- part_len);
- buf += part_len;
- len -= part_len;
-
- recv_data = recv_data->next;
- }
-}
-
-/************************************************************************//**
-Applies the hashed log records to the page, if the page lsn is less than the
-lsn of a log record. This can be called when a buffer page has just been
-read in, or also for a page already in the buffer pool. */
-UNIV_INTERN
-void
-recv_recover_page_func(
-/*===================*/
-#ifndef UNIV_HOTBACKUP
- ibool just_read_in,
- /*!< in: TRUE if the i/o handler calls
- this for a freshly read page */
-#endif /* !UNIV_HOTBACKUP */
- buf_block_t* block) /*!< in/out: buffer block */
-{
- page_t* page;
- page_zip_des_t* page_zip;
- recv_addr_t* recv_addr;
- recv_t* recv;
- byte* buf;
- lsn_t start_lsn;
- lsn_t end_lsn;
- lsn_t page_lsn;
- lsn_t page_newest_lsn;
- ibool modification_to_page;
-#ifndef UNIV_HOTBACKUP
- ibool success;
-#endif /* !UNIV_HOTBACKUP */
- mtr_t mtr;
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_sys->apply_log_recs == FALSE) {
-
- /* Log records should not be applied now */
-
- mutex_exit(&(recv_sys->mutex));
-
- return;
- }
-
- recv_addr = recv_get_fil_addr_struct(buf_block_get_space(block),
- buf_block_get_page_no(block));
-
- if ((recv_addr == NULL)
- /* bugfix: http://bugs.mysql.com/bug.php?id=44140 */
- || (recv_addr->state == RECV_BEING_READ && !just_read_in)
- || (recv_addr->state == RECV_BEING_PROCESSED)
- || (recv_addr->state == RECV_PROCESSED)) {
-
- mutex_exit(&(recv_sys->mutex));
-
- return;
- }
-
-#if 0
- fprintf(stderr, "Recovering space %lu, page %lu\n",
- buf_block_get_space(block), buf_block_get_page_no(block));
-#endif
-
- recv_addr->state = RECV_BEING_PROCESSED;
-
- mutex_exit(&(recv_sys->mutex));
-
- mtr_start(&mtr);
- mtr_set_log_mode(&mtr, MTR_LOG_NONE);
-
- page = block->frame;
- page_zip = buf_block_get_page_zip(block);
-
-#ifndef UNIV_HOTBACKUP
- if (just_read_in) {
- /* Move the ownership of the x-latch on the page to
- this OS thread, so that we can acquire a second
- x-latch on it. This is needed for the operations to
- the page to pass the debug checks. */
-
- rw_lock_x_lock_move_ownership(&block->lock);
- }
-
- success = buf_page_get_known_nowait(RW_X_LATCH, block,
- BUF_KEEP_OLD,
- __FILE__, __LINE__,
- &mtr);
- ut_a(success);
-
- buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-#endif /* !UNIV_HOTBACKUP */
-
- /* Read the newest modification lsn from the page */
- page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);
-
-#ifndef UNIV_HOTBACKUP
- /* It may be that the page has been modified in the buffer
- pool: read the newest modification lsn there */
-
- page_newest_lsn = buf_page_get_newest_modification(&block->page);
-
- if (page_newest_lsn) {
-
- page_lsn = page_newest_lsn;
- }
-#else /* !UNIV_HOTBACKUP */
- /* In recovery from a backup we do not really use the buffer pool */
- page_newest_lsn = 0;
-#endif /* !UNIV_HOTBACKUP */
-
- modification_to_page = FALSE;
- start_lsn = end_lsn = 0;
-
- recv = UT_LIST_GET_FIRST(recv_addr->rec_list);
-
- while (recv) {
- end_lsn = recv->end_lsn;
-
- if (recv->len > RECV_DATA_BLOCK_SIZE) {
- /* We have to copy the record body to a separate
- buffer */
-
- buf = static_cast<byte*>(mem_alloc(recv->len));
-
- recv_data_copy_to_buf(buf, recv);
- } else {
- buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
- }
-
- if (recv->type == MLOG_INIT_FILE_PAGE) {
- page_lsn = page_newest_lsn;
-
- memset(FIL_PAGE_LSN + page, 0, 8);
- memset(UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM
- + page, 0, 8);
-
- if (page_zip) {
- memset(FIL_PAGE_LSN + page_zip->data, 0, 8);
- }
- }
-
- if (recv->start_lsn >= page_lsn) {
-
- lsn_t end_lsn;
-
- if (!modification_to_page) {
-
- modification_to_page = TRUE;
- start_lsn = recv->start_lsn;
- }
-
- DBUG_PRINT("ib_log",
- ("apply " LSN_PF ": %u len %u "
- "page %u:%u", recv->start_lsn,
- (unsigned) recv->type,
- (unsigned) recv->len,
- (unsigned) recv_addr->space,
- (unsigned) recv_addr->page_no));
-
- recv_parse_or_apply_log_rec_body(recv->type, buf,
- buf + recv->len,
- block, &mtr,
- recv_addr->space);
-
- end_lsn = recv->start_lsn + recv->len;
- mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
- mach_write_to_8(UNIV_PAGE_SIZE
- - FIL_PAGE_END_LSN_OLD_CHKSUM
- + page, end_lsn);
-
- if (page_zip) {
- mach_write_to_8(FIL_PAGE_LSN
- + page_zip->data, end_lsn);
- }
- }
-
- if (recv->len > RECV_DATA_BLOCK_SIZE) {
- mem_free(buf);
- }
-
- recv = UT_LIST_GET_NEXT(rec_list, recv);
- }
-
-#ifdef UNIV_ZIP_DEBUG
- if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
-
- ut_a(!page_zip
- || page_zip_validate_low(page_zip, page, NULL, FALSE));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
- if (modification_to_page) {
- ut_a(block);
-
- log_flush_order_mutex_enter();
- buf_flush_recv_note_modification(block, start_lsn, end_lsn);
- log_flush_order_mutex_exit();
- }
-#endif /* !UNIV_HOTBACKUP */
-
- /* Make sure that committing mtr does not change the modification
- lsn values of page */
-
- mtr.modifications = FALSE;
-
- mtr_commit(&mtr);
-
- ib_time_t time = ut_time();
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_max_page_lsn < page_lsn) {
- recv_max_page_lsn = page_lsn;
- }
-
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs > 0);
- if (ulint n = --recv_sys->n_addrs) {
- if (recv_sys->report(time)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "To recover: " ULINTPF " pages from log", n);
- sd_notifyf(0, "STATUS=To recover: " ULINTPF
- " pages from log", n);
- }
- }
-
- mutex_exit(&recv_sys->mutex);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************************//**
-Reads in pages which have hashed log records, from an area around a given
-page number.
-@return number of pages found */
-static
-ulint
-recv_read_in_area(
-/*==============*/
- ulint space, /*!< in: space */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no)/*!< in: page number */
-{
- recv_addr_t* recv_addr;
- ulint page_nos[RECV_READ_AHEAD_AREA];
- ulint low_limit;
- ulint n;
-
- low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA);
-
- n = 0;
-
- for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA;
- page_no++) {
- recv_addr = recv_get_fil_addr_struct(space, page_no);
-
- if (recv_addr && !buf_page_peek(space, page_no)) {
-
- mutex_enter(&(recv_sys->mutex));
-
- if (recv_addr->state == RECV_NOT_PROCESSED) {
- recv_addr->state = RECV_BEING_READ;
-
- page_nos[n] = page_no;
-
- n++;
- }
-
- mutex_exit(&(recv_sys->mutex));
- }
- }
-
- buf_read_recv_pages(FALSE, space, zip_size, page_nos, n);
- return(n);
-}
-
-/** Apply the hash table of stored log records to persistent data pages.
-@param[in] last_batch whether the change buffer merge will be
- performed as part of the operation */
-UNIV_INTERN
-void
-recv_apply_hashed_log_recs(bool last_batch)
-{
- for (;;) {
- mutex_enter(&recv_sys->mutex);
-
- if (!recv_sys->apply_batch_on) {
- break;
- }
-
- if (recv_sys->found_corrupt_log) {
- mutex_exit(&recv_sys->mutex);
- return;
- }
-
- mutex_exit(&recv_sys->mutex);
- os_thread_sleep(500000);
- }
-
- ut_ad(!last_batch == mutex_own(&log_sys->mutex));
-
- if (!last_batch) {
- recv_no_ibuf_operations = TRUE;
- }
-
- if (ulint n = recv_sys->n_addrs) {
- const char* msg = last_batch
- ? "Starting final batch to recover "
- : "Starting a batch to recover ";
- ib_logf(IB_LOG_LEVEL_INFO,
- "%s" ULINTPF " pages from redo log", msg, n);
- sd_notifyf(0, "STATUS=%s" ULINTPF " pages from redo log",
- msg, n);
- }
-
- recv_sys->apply_log_recs = TRUE;
- recv_sys->apply_batch_on = TRUE;
-
- for (ulint i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) {
- for (recv_addr_t* recv_addr = static_cast<recv_addr_t*>(
- HASH_GET_FIRST(recv_sys->addr_hash, i));
- recv_addr;
- recv_addr = static_cast<recv_addr_t*>(
- HASH_GET_NEXT(addr_hash, recv_addr))) {
-
- ulint space = recv_addr->space;
- ulint zip_size = fil_space_get_zip_size(space);
- ulint page_no = recv_addr->page_no;
-
- if (recv_addr->state == RECV_NOT_PROCESSED) {
- mutex_exit(&recv_sys->mutex);
-
- if (buf_page_peek(space, page_no)) {
- mtr_t mtr;
- mtr_start(&mtr);
- buf_block_t* block = buf_page_get(
- space, zip_size, page_no,
- RW_X_LATCH, &mtr);
- buf_block_dbg_add_level(
- block, SYNC_NO_ORDER_CHECK);
-
- recv_recover_page(FALSE, block);
- mtr_commit(&mtr);
- } else {
- recv_read_in_area(space, zip_size,
- page_no);
- }
-
- mutex_enter(&recv_sys->mutex);
- }
- }
- }
-
- /* Wait until all the pages have been processed */
-
- while (recv_sys->n_addrs != 0) {
-
- mutex_exit(&(recv_sys->mutex));
-
- if (recv_sys->found_corrupt_log) {
- return;
- }
-
- os_thread_sleep(500000);
-
- mutex_enter(&(recv_sys->mutex));
- }
-
- if (!last_batch) {
- bool success;
-
- /* Flush all the file pages to disk and invalidate them in
- the buffer pool */
-
- ut_d(recv_no_log_write = TRUE);
- mutex_exit(&(recv_sys->mutex));
- mutex_exit(&(log_sys->mutex));
-
- /* Stop the recv_writer thread from issuing any LRU
- flush batches. */
- mutex_enter(&recv_sys->writer_mutex);
-
- /* Wait for any currently run batch to end. */
- buf_flush_wait_LRU_batch_end();
-
- success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
-
- ut_a(success);
-
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- buf_pool_invalidate();
-
- /* Allow batches from recv_writer thread. */
- mutex_exit(&recv_sys->writer_mutex);
-
- mutex_enter(&(log_sys->mutex));
- mutex_enter(&(recv_sys->mutex));
- ut_d(recv_no_log_write = FALSE);
-
- recv_no_ibuf_operations = FALSE;
- }
-
- recv_sys->apply_log_recs = FALSE;
- recv_sys->apply_batch_on = FALSE;
-
- recv_sys_empty_hash();
-
- mutex_exit(&recv_sys->mutex);
-}
-#else /* !UNIV_HOTBACKUP */
-/*******************************************************************//**
-Applies log records in the hash table to a backup. */
-UNIV_INTERN
-void
-recv_apply_log_recs_for_backup(void)
-/*================================*/
-{
- recv_addr_t* recv_addr;
- ulint n_hash_cells;
- buf_block_t* block;
- ulint actual_size;
- ibool success;
- ulint error;
- ulint i;
-
- recv_sys->apply_log_recs = TRUE;
- recv_sys->apply_batch_on = TRUE;
-
- block = back_block1;
-
- n_hash_cells = hash_get_n_cells(recv_sys->addr_hash);
-
- for (i = 0; i < n_hash_cells; i++) {
- /* The address hash table is externally chained */
- recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node;
-
- while (recv_addr != NULL) {
-
- ulint zip_size
- = fil_space_get_zip_size(recv_addr->space);
-
- if (zip_size == ULINT_UNDEFINED) {
-#if 0
- fprintf(stderr,
- "InnoDB: Warning: cannot apply"
- " log record to"
- " tablespace %lu page %lu,\n"
- "InnoDB: because tablespace with"
- " that id does not exist.\n",
- recv_addr->space, recv_addr->page_no);
-#endif
- recv_addr->state = RECV_PROCESSED;
-
- ut_a(recv_sys->n_addrs);
- recv_sys->n_addrs--;
-
- goto skip_this_recv_addr;
- }
-
- /* We simulate a page read made by the buffer pool, to
- make sure the recovery apparatus works ok. We must init
- the block. */
-
- buf_page_init_for_backup_restore(
- recv_addr->space, recv_addr->page_no,
- zip_size, block);
-
- /* Extend the tablespace's last file if the page_no
- does not fall inside its bounds; we assume the last
- file is auto-extending, and mysqlbackup copied the file
- when it still was smaller */
-
- success = fil_extend_space_to_desired_size(
- &actual_size,
- recv_addr->space, recv_addr->page_no + 1);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot extend"
- " tablespace %u to hold %u pages\n",
- recv_addr->space, recv_addr->page_no);
-
- exit(1);
- }
-
- /* Read the page from the tablespace file using the
- fil0fil.cc routines */
-
- if (zip_size) {
- error = fil_io(OS_FILE_READ, true,
- recv_addr->space, zip_size,
- recv_addr->page_no, 0, zip_size,
- block->page.zip.data, NULL, 0, 0, false);
- if (error == DB_SUCCESS
- && !buf_zip_decompress(block, TRUE)) {
- exit(1);
- }
- } else {
- error = fil_io(OS_FILE_READ, true,
- recv_addr->space, 0,
- recv_addr->page_no, 0,
- UNIV_PAGE_SIZE,
- block->frame, NULL, 0, 0, false);
- }
-
- if (error != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read"
- " from tablespace"
- " %lu page number %lu\n",
- (ulong) recv_addr->space,
- (ulong) recv_addr->page_no);
-
- exit(1);
- }
-
- /* Apply the log records to this page */
- recv_recover_page(FALSE, block);
-
- /* Write the page back to the tablespace file using the
- fil0fil.cc routines */
-
- buf_flush_init_for_writing(
- block->frame, buf_block_get_page_zip(block),
- mach_read_from_8(block->frame + FIL_PAGE_LSN));
-
- if (zip_size) {
- error = fil_io(OS_FILE_WRITE, true,
- recv_addr->space, zip_size,
- recv_addr->page_no, 0,
- zip_size,
- block->page.zip.data, NULL, 0, 0, false);
- } else {
- error = fil_io(OS_FILE_WRITE, true,
- recv_addr->space, 0,
- recv_addr->page_no, 0,
- UNIV_PAGE_SIZE,
- block->frame, NULL, 0,
- block->latest_modification,
- block->encrypt_later);
- }
-skip_this_recv_addr:
- recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
- }
- }
- sd_notify(0, "STATUS=InnoDB: Apply batch for backup completed");
-
- recv_sys_empty_hash();
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************//**
-Tries to parse a single log record and returns its length.
-@return length of the record, or 0 if the record was not complete */
-UNIV_INTERN
-ulint
-recv_parse_log_rec(
-/*===============*/
- byte* ptr, /*!< in: pointer to a buffer */
- byte* end_ptr,/*!< in: pointer to the buffer end */
- byte* type, /*!< out: type */
- ulint* space, /*!< out: space id */
- ulint* page_no,/*!< out: page number */
- byte** body) /*!< out: log record body start */
-{
- byte* new_ptr;
-
- *body = NULL;
-
- if (ptr == end_ptr) {
-
- return(0);
- }
-
- if (*ptr == MLOG_MULTI_REC_END) {
-
- *type = *ptr;
-
- return(1);
- }
-
- if (*ptr == MLOG_DUMMY_RECORD) {
- *type = *ptr;
-
- *space = ULINT_UNDEFINED - 1; /* For debugging */
-
- return(1);
- }
-
- new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space,
- page_no);
- *body = new_ptr;
-
- if (UNIV_UNLIKELY(!new_ptr)) {
-
- return(0);
- }
-
-#ifdef UNIV_LOG_LSN_DEBUG
- if (*type == MLOG_LSN) {
- lsn_t lsn = (lsn_t) *space << 32 | *page_no;
-# ifdef UNIV_LOG_DEBUG
- ut_a(lsn == log_sys->old_lsn);
-# else /* UNIV_LOG_DEBUG */
- ut_a(lsn == recv_sys->recovered_lsn);
-# endif /* UNIV_LOG_DEBUG */
- }
-#endif /* UNIV_LOG_LSN_DEBUG */
-
- byte* old_ptr = new_ptr;
- new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
- NULL, NULL, *space);
- if (UNIV_UNLIKELY(new_ptr == NULL)) {
-
- return(0);
- }
-
- if (*page_no == 0 && *type == MLOG_4BYTES
- && mach_read_from_2(old_ptr) == FSP_HEADER_OFFSET + FSP_SIZE) {
- ulint size;
- mach_parse_compressed(old_ptr + 2, end_ptr, &size);
- fil_space_set_recv_size(*space, size);
- }
-
- if (*page_no > recv_max_parsed_page_no) {
- recv_max_parsed_page_no = *page_no;
- }
-
- return(new_ptr - ptr);
-}
-
-/*******************************************************//**
-Calculates the new value for lsn when more data is added to the log. */
-UNIV_INTERN
-lsn_t
-recv_calc_lsn_on_data_add(
-/*======================*/
- lsn_t lsn, /*!< in: old lsn */
- ib_uint64_t len) /*!< in: this many bytes of data is
- added, log block headers not included */
-{
- ulint frag_len;
- ib_uint64_t lsn_len;
-
- frag_len = (lsn % OS_FILE_LOG_BLOCK_SIZE) - LOG_BLOCK_HDR_SIZE;
- ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- - LOG_BLOCK_TRL_SIZE);
- lsn_len = len;
- lsn_len += (lsn_len + frag_len)
- / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE
- - LOG_BLOCK_TRL_SIZE)
- * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
-
- return(lsn + lsn_len);
-}
-
-#ifdef UNIV_LOG_DEBUG
-/*******************************************************//**
-Checks that the parser recognizes incomplete initial segments of a log
-record as incomplete. */
-static
-void
-recv_check_incomplete_log_recs(
-/*===========================*/
- byte* ptr, /*!< in: pointer to a complete log record */
- ulint len) /*!< in: length of the log record */
-{
- ulint i;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
-
- for (i = 0; i < len; i++) {
- ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space,
- &page_no, &body));
- }
-}
-#endif /* UNIV_LOG_DEBUG */
-
-/*******************************************************//**
-Prints diagnostic info of corrupt log. */
-static
-void
-recv_report_corrupt_log(
-/*====================*/
- byte* ptr, /*!< in: pointer to corrupt log record */
- byte type, /*!< in: type of the record */
- ulint space, /*!< in: space id, this may also be garbage */
- ulint page_no)/*!< in: page number, this may also be garbage */
-{
- fprintf(stderr,
- "InnoDB: ############### CORRUPT LOG RECORD FOUND\n"
- "InnoDB: Log record type %lu, space id %lu, page number %lu\n"
- "InnoDB: Log parsing proceeded successfully up to " LSN_PF "\n"
- "InnoDB: Previous log record type %lu, is multi %lu\n"
- "InnoDB: Recv offset %lu, prev %lu\n",
- (ulong) type, (ulong) space, (ulong) page_no,
- recv_sys->recovered_lsn,
- (ulong) recv_previous_parsed_rec_type,
- (ulong) recv_previous_parsed_rec_is_multi,
- (ulong) (ptr - recv_sys->buf),
- (ulong) recv_previous_parsed_rec_offset);
-
- if ((ulint)(ptr - recv_sys->buf + 100)
- > recv_previous_parsed_rec_offset
- && (ulint)(ptr - recv_sys->buf + 100
- - recv_previous_parsed_rec_offset)
- < 200000) {
- fputs("InnoDB: Hex dump of corrupt log starting"
- " 100 bytes before the start\n"
- "InnoDB: of the previous log rec,\n"
- "InnoDB: and ending 100 bytes after the start"
- " of the corrupt rec:\n",
- stderr);
-
- ut_print_buf(stderr,
- recv_sys->buf
- + recv_previous_parsed_rec_offset - 100,
- ptr - recv_sys->buf + 200
- - recv_previous_parsed_rec_offset);
- putc('\n', stderr);
- }
-
-#ifndef UNIV_HOTBACKUP
- if (!srv_force_recovery) {
- fputs("InnoDB: Set innodb_force_recovery"
- " to ignore this error.\n", stderr);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
- "InnoDB: is possible that the log scan did not proceed\n"
- "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
- "InnoDB: on your InnoDB tables to check that they are ok!\n"
- "InnoDB: If mysqld crashes after this recovery, look at\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
-
- fflush(stderr);
-}
-
-/*******************************************************//**
-Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages.
-@return currently always returns FALSE */
-static
-ibool
-recv_parse_log_recs(
-/*================*/
- ibool store_to_hash, /*!< in: TRUE if the records should be stored
- to the hash table; this is set to FALSE if just
- debug checking is needed */
- dberr_t* err) /*!< out: DB_SUCCESS if successfull,
- DB_ERROR if parsing fails. */
-{
- byte* ptr;
- byte* end_ptr;
- ulint single_rec;
- ulint len;
- ulint total_len;
- lsn_t new_recovered_lsn;
- lsn_t old_lsn;
- byte type;
- ulint space;
- ulint page_no;
- byte* body;
- ulint n_recs;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
- ut_ad(recv_sys->parse_start_lsn != 0);
-loop:
- ptr = recv_sys->buf + recv_sys->recovered_offset;
-
- end_ptr = recv_sys->buf + recv_sys->len;
-
- if (ptr == end_ptr) {
-
- return(FALSE);
- }
-
- single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
-
- if (single_rec || *ptr == MLOG_DUMMY_RECORD) {
- /* The mtr only modified a single page, or this is a file op */
-
- old_lsn = recv_sys->recovered_lsn;
-
- /* Try to parse a log record, fetching its type, space id,
- page no, and a pointer to the body of the log record */
-
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
-
- if (len == 0 || recv_sys->found_corrupt_log) {
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(ptr,
- type, space, page_no);
- }
-
- return(FALSE);
- }
-
- new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
-
- if (new_recovered_lsn > recv_sys->scanned_lsn) {
- /* The log record filled a log block, and we require
- that also the next log block should have been scanned
- in */
-
- return(FALSE);
- }
-
- recv_previous_parsed_rec_type = (ulint) type;
- recv_previous_parsed_rec_offset = recv_sys->recovered_offset;
- recv_previous_parsed_rec_is_multi = 0;
-
- recv_sys->recovered_offset += len;
- recv_sys->recovered_lsn = new_recovered_lsn;
-
- DBUG_PRINT("ib_log",
- ("scan " LSN_PF ": log rec %u len %u "
- "page %u:%u", old_lsn,
- (unsigned) type, (unsigned) len,
- (unsigned) space, (unsigned) page_no));
-
- if (type == MLOG_DUMMY_RECORD) {
- /* Do nothing */
-
- } else if (!store_to_hash) {
- /* In debug checking, update a replicate page
- according to the log record, and check that it
- becomes identical with the original page */
-#ifdef UNIV_LOG_DEBUG
- recv_check_incomplete_log_recs(ptr, len);
-#endif/* UNIV_LOG_DEBUG */
-
- } else if (type == MLOG_FILE_CREATE
- || type == MLOG_FILE_CREATE2
- || type == MLOG_FILE_RENAME
- || type == MLOG_FILE_DELETE) {
- ut_a(space);
-#ifdef UNIV_HOTBACKUP
- if (recv_replay_file_ops) {
-
- /* In mysqlbackup --apply-log, replay an .ibd
- file operation, if possible; note that
- fil_path_to_mysql_datadir is set in mysqlbackup
- to point to the datadir we should use there */
-
- if (NULL == fil_op_log_parse_or_replay(
- body, end_ptr, type,
- space, page_no)) {
- fprintf(stderr,
- "InnoDB: Error: file op"
- " log record of type %lu"
- " space %lu not complete in\n"
- "InnoDB: the replay phase."
- " Path %s\n",
- (ulint) type, space,
- (char*)(body + 2));
-
- *err = DB_ERROR;
- return(FALSE);
- }
- }
-#endif
- /* In normal mysqld crash recovery we do not try to
- replay file operations */
-#ifdef UNIV_LOG_LSN_DEBUG
- } else if (type == MLOG_LSN) {
- /* Do not add these records to the hash table.
- The page number and space id fields are misused
- for something else. */
-#endif /* UNIV_LOG_LSN_DEBUG */
- } else {
- recv_add_to_hash_table(type, space, page_no, body,
- ptr + len, old_lsn,
- recv_sys->recovered_lsn);
- }
- } else {
- /* Check that all the records associated with the single mtr
- are included within the buffer */
-
- total_len = 0;
- n_recs = 0;
-
- for (;;) {
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
- if (len == 0 || recv_sys->found_corrupt_log) {
-
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(
- ptr, type, space, page_no);
- }
-
- return(FALSE);
- }
-
- recv_previous_parsed_rec_type = (ulint) type;
- recv_previous_parsed_rec_offset
- = recv_sys->recovered_offset + total_len;
- recv_previous_parsed_rec_is_multi = 1;
-
-#ifdef UNIV_LOG_DEBUG
- if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
- recv_check_incomplete_log_recs(ptr, len);
- }
-#endif /* UNIV_LOG_DEBUG */
-
- DBUG_PRINT("ib_log",
- ("scan " LSN_PF ": multi-log rec %u "
- "len %u page %u:%u",
- recv_sys->recovered_lsn,
- (unsigned) type, (unsigned) len,
- (unsigned) space, (unsigned) page_no));
-
- total_len += len;
- n_recs++;
-
- ptr += len;
-
- if (type == MLOG_MULTI_REC_END) {
-
- /* Found the end mark for the records */
-
- break;
- }
- }
-
- new_recovered_lsn = recv_calc_lsn_on_data_add(
- recv_sys->recovered_lsn, total_len);
-
- if (new_recovered_lsn > recv_sys->scanned_lsn) {
- /* The log record filled a log block, and we require
- that also the next log block should have been scanned
- in */
-
- return(FALSE);
- }
-
- /* Add all the records to the hash table */
-
- ptr = recv_sys->buf + recv_sys->recovered_offset;
-
- for (;;) {
- old_lsn = recv_sys->recovered_lsn;
- len = recv_parse_log_rec(ptr, end_ptr, &type, &space,
- &page_no, &body);
- if (recv_sys->found_corrupt_log) {
-
- recv_report_corrupt_log(ptr,
- type, space, page_no);
- }
-
- ut_a(len != 0);
- ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG));
-
- recv_sys->recovered_offset += len;
- recv_sys->recovered_lsn
- = recv_calc_lsn_on_data_add(old_lsn, len);
- if (type == MLOG_MULTI_REC_END) {
-
- /* Found the end mark for the records */
-
- break;
- }
-
- if (store_to_hash
-#ifdef UNIV_LOG_LSN_DEBUG
- && type != MLOG_LSN
-#endif /* UNIV_LOG_LSN_DEBUG */
- ) {
- recv_add_to_hash_table(type, space, page_no,
- body, ptr + len,
- old_lsn,
- new_recovered_lsn);
- }
-
- ptr += len;
- }
- }
-
- goto loop;
-}
-
-/*******************************************************//**
-Adds data from a new log block to the parsing buffer of recv_sys if
-recv_sys->parse_start_lsn is non-zero.
-@return TRUE if more data added */
-static
-ibool
-recv_sys_add_to_parsing_buf(
-/*========================*/
- const byte* log_block, /*!< in: log block */
- lsn_t scanned_lsn) /*!< in: lsn of how far we were able
- to find data in this log block */
-{
- ulint more_len;
- ulint data_len;
- ulint start_offset;
- ulint end_offset;
-
- ut_ad(scanned_lsn >= recv_sys->scanned_lsn);
-
- if (!recv_sys->parse_start_lsn) {
- /* Cannot start parsing yet because no start point for
- it found */
-
- return(FALSE);
- }
-
- data_len = log_block_get_data_len(log_block);
-
- if (recv_sys->parse_start_lsn >= scanned_lsn) {
-
- return(FALSE);
-
- } else if (recv_sys->scanned_lsn >= scanned_lsn) {
-
- return(FALSE);
-
- } else if (recv_sys->parse_start_lsn > recv_sys->scanned_lsn) {
- more_len = (ulint) (scanned_lsn - recv_sys->parse_start_lsn);
- } else {
- more_len = (ulint) (scanned_lsn - recv_sys->scanned_lsn);
- }
-
- if (more_len == 0) {
-
- return(FALSE);
- }
-
- ut_ad(data_len >= more_len);
-
- start_offset = data_len - more_len;
-
- if (start_offset < LOG_BLOCK_HDR_SIZE) {
- start_offset = LOG_BLOCK_HDR_SIZE;
- }
-
- end_offset = data_len;
-
- if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
- end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
- }
-
- ut_ad(start_offset <= end_offset);
-
- if (start_offset < end_offset) {
- ut_memcpy(recv_sys->buf + recv_sys->len,
- log_block + start_offset, end_offset - start_offset);
-
- recv_sys->len += end_offset - start_offset;
-
- ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
- }
-
- return(TRUE);
-}
-
-/*******************************************************//**
-Moves the parsing buffer data left to the buffer start. */
-static
-void
-recv_sys_justify_left_parsing_buf(void)
-/*===================================*/
-{
- ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset,
- recv_sys->len - recv_sys->recovered_offset);
-
- recv_sys->len -= recv_sys->recovered_offset;
-
- recv_sys->recovered_offset = 0;
-}
-
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer.
-Parses and hashes the log records if new data found. Unless
-UNIV_HOTBACKUP is defined, this function will apply log records
-automatically when the hash table becomes full.
-@return TRUE if limit_lsn has been reached, or not able to scan any
-more in this log group */
-UNIV_INTERN
-ibool
-recv_scan_log_recs(
-/*===============*/
- ulint available_memory,/*!< in: we let the hash table of recs
- to grow to this size, at the maximum */
- ibool store_to_hash, /*!< in: TRUE if the records should be
- stored to the hash table; this is set
- to FALSE if just debug checking is
- needed */
- const byte* buf, /*!< in: buffer containing a log
- segment or garbage */
- ulint len, /*!< in: buffer length */
- lsn_t start_lsn, /*!< in: buffer start lsn */
- lsn_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- lsn_t* group_scanned_lsn,/*!< out: scanning succeeded up to
- this lsn */
- dberr_t* err) /*!< out: error code or DB_SUCCESS */
-{
- const byte* log_block;
- ulint no;
- lsn_t scanned_lsn;
- ibool finished;
- ulint data_len;
- ibool more_data;
- bool maybe_encrypted=false;
-
- ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
- ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
- ut_a(store_to_hash <= TRUE);
-
- finished = FALSE;
-
- log_block = buf;
- scanned_lsn = start_lsn;
- more_data = FALSE;
- *err = DB_SUCCESS;
-
- do {
- log_crypt_err_t log_crypt_err;
-
- no = log_block_get_hdr_no(log_block);
- /*
- fprintf(stderr, "Log block header no %lu\n", no);
-
- fprintf(stderr, "Scanned lsn no %lu\n",
- log_block_convert_lsn_to_no(scanned_lsn));
- */
-
- if (no != log_block_convert_lsn_to_no(scanned_lsn)
- || !log_block_checksum_is_ok_or_old_format(log_block, true)) {
-
- if (no == log_block_convert_lsn_to_no(scanned_lsn)
- && !log_block_checksum_is_ok_or_old_format(
- log_block, true)) {
- fprintf(stderr,
- "InnoDB: Log block no %lu at"
- " lsn " LSN_PF " has\n"
- "InnoDB: ok header, but checksum field"
- " contains %lu, should be %lu\n",
- (ulong) no,
- scanned_lsn,
- (ulong) log_block_get_checksum(
- log_block),
- (ulong) log_block_calc_checksum(
- log_block));
- }
-
- maybe_encrypted = log_crypt_block_maybe_encrypted(log_block,
- &log_crypt_err);
-
- /* Garbage or an incompletely written log block */
-
- /* Print checkpoint encryption keys if present */
- log_crypt_print_checkpoint_keys(log_block);
- finished = TRUE;
-
- if (maybe_encrypted) {
- /* Log block maybe encrypted finish processing*/
- log_crypt_print_error(log_crypt_err);
- *err = DB_ERROR;
- return (TRUE);
- }
-
- /* Stop if we encounter a garbage log block */
- if (!srv_force_recovery) {
- fputs("InnoDB: Set innodb_force_recovery"
- " to ignore this error.\n", stderr);
- *err = DB_ERROR;
- return (TRUE);
- }
-
- break;
-
- }
-
- if (log_block_get_flush_bit(log_block)) {
- /* This block was a start of a log flush operation:
- we know that the previous flush operation must have
- been completed for all log groups before this block
- can have been flushed to any of the groups. Therefore,
- we know that log data is contiguous up to scanned_lsn
- in all non-corrupt log groups. */
-
- if (scanned_lsn > *contiguous_lsn) {
- *contiguous_lsn = scanned_lsn;
- }
- }
-
- data_len = log_block_get_data_len(log_block);
-
- if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE))
- && scanned_lsn + data_len > recv_sys->scanned_lsn
- && (recv_sys->scanned_checkpoint_no > 0)
- && (log_block_get_checkpoint_no(log_block)
- < recv_sys->scanned_checkpoint_no)
- && (recv_sys->scanned_checkpoint_no
- - log_block_get_checkpoint_no(log_block)
- > 0x80000000UL)) {
-
- /* Garbage from a log buffer flush which was made
- before the most recent database recovery */
-
- finished = TRUE;
-#ifdef UNIV_LOG_DEBUG
- /* This is not really an error, but currently
- we stop here in the debug version: */
-
- *err = DB_ERROR;
- return (TRUE);
-#endif
- break;
- }
-
- if (!recv_sys->parse_start_lsn
- && (log_block_get_first_rec_group(log_block) > 0)) {
-
- /* We found a point from which to start the parsing
- of log records */
-
- recv_sys->parse_start_lsn = scanned_lsn
- + log_block_get_first_rec_group(log_block);
- recv_sys->scanned_lsn = recv_sys->parse_start_lsn;
- recv_sys->recovered_lsn = recv_sys->parse_start_lsn;
- }
-
- scanned_lsn += data_len;
-
- if (scanned_lsn > recv_sys->scanned_lsn) {
-
- /* We have found more entries. If this scan is
- of startup type, we must initiate crash recovery
- environment before parsing these log records. */
-
-#ifndef UNIV_HOTBACKUP
- if (recv_log_scan_is_startup_type
- && !recv_needed_recovery) {
- if (!srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Starting crash recovery from "
- "checkpoint LSN=" LSN_PF,
- recv_sys->scanned_lsn);
-
- recv_init_crash_recovery();
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "innodb_read_only prevents"
- " crash recovery");
- recv_needed_recovery = TRUE;
- return(TRUE);
- }
- }
-#endif /* !UNIV_HOTBACKUP */
-
- /* We were able to find more log data: add it to the
- parsing buffer if parse_start_lsn is already
- non-zero */
-
- if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
- >= RECV_PARSING_BUF_SIZE) {
- fprintf(stderr,
- "InnoDB: Error: log parsing"
- " buffer overflow."
- " Recovery may have failed!\n");
-
- recv_sys->found_corrupt_log = TRUE;
-
-#ifndef UNIV_HOTBACKUP
- if (!srv_force_recovery) {
- fputs("InnoDB: Set"
- " innodb_force_recovery"
- " to ignore this error.\n",
- stderr);
- *err = DB_ERROR;
- return (TRUE);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- } else if (!recv_sys->found_corrupt_log) {
- more_data = recv_sys_add_to_parsing_buf(
- log_block, scanned_lsn);
- }
-
- recv_sys->scanned_lsn = scanned_lsn;
- recv_sys->scanned_checkpoint_no
- = log_block_get_checkpoint_no(log_block);
- }
-
- if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
- /* Log data for this group ends here */
-
- finished = TRUE;
- break;
- } else {
- log_block += OS_FILE_LOG_BLOCK_SIZE;
- }
- } while (log_block < buf + len && !finished);
-
- *group_scanned_lsn = scanned_lsn;
-
- if (more_data && !recv_sys->found_corrupt_log) {
- /* Try to parse more log records */
-
- recv_parse_log_recs(store_to_hash, err);
-
- if (*err != DB_SUCCESS) {
- return (TRUE);
- }
-
-#ifndef UNIV_HOTBACKUP
- if (store_to_hash
- && mem_heap_get_size(recv_sys->heap) > available_memory) {
-
- /* Hash table of log records has grown too big:
- empty it; FALSE means no ibuf operations
- allowed, as we cannot add new records to the
- log yet: they would be produced by ibuf
- operations */
-
- recv_apply_hashed_log_recs(false);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
- /* Move parsing buffer data to the buffer start */
-
- recv_sys_justify_left_parsing_buf();
- }
- }
-
- return(finished);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*******************************************************//**
-Scans log from a buffer and stores new log data to the parsing buffer. Parses
-and hashes the log records if new data found. */
-static
-void
-recv_group_scan_log_recs(
-/*=====================*/
- log_group_t* group, /*!< in: log group */
- lsn_t* contiguous_lsn, /*!< in/out: it is known that all log
- groups contain contiguous log data up
- to this lsn */
- lsn_t* group_scanned_lsn,/*!< out: scanning succeeded up to
- this lsn */
- dberr_t* err) /*!< out: error code or DB_SUCCESS */
-{
- ibool finished;
- lsn_t start_lsn;
- lsn_t end_lsn;
-
- finished = FALSE;
- *err = DB_SUCCESS;
-
- start_lsn = *contiguous_lsn;
-
- while (!finished) {
- end_lsn = start_lsn + RECV_SCAN_SIZE;
-
- log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
- group, start_lsn, end_lsn, FALSE);
-
- finished = recv_scan_log_recs(
- (buf_pool_get_n_pages()
- - (recv_n_pool_free_frames * srv_buf_pool_instances))
- * UNIV_PAGE_SIZE,
- TRUE, log_sys->buf, RECV_SCAN_SIZE,
- start_lsn, contiguous_lsn, group_scanned_lsn,
- err);
-
- if (*err != DB_SUCCESS) {
- break;
- }
-
- start_lsn = end_lsn;
- }
-
-#ifdef UNIV_DEBUG
- if (log_debug_writes) {
- fprintf(stderr,
- "InnoDB: Scanned group %lu up to"
- " log sequence number " LSN_PF "\n",
- (ulong) group->id,
- *group_scanned_lsn);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/*******************************************************//**
-Initialize crash recovery environment. Can be called iff
-recv_needed_recovery == FALSE. */
-static
-void
-recv_init_crash_recovery(void)
-/*==========================*/
-{
- ut_ad(!srv_read_only_mode);
- ut_a(!recv_needed_recovery);
-
- recv_needed_recovery = TRUE;
-
- fil_load_single_table_tablespaces();
-
- /* If we are using the doublewrite method, we will
- check if there are half-written pages in data files,
- and restore them from the doublewrite buffer if
- possible */
-
- if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
- buf_dblwr_process();
-
- /* Spawn the background thread to flush dirty pages
- from the buffer pools. */
- recv_writer_thread_active = true;
- recv_writer_thread_handle = os_thread_create(
- recv_writer_thread, 0, 0);
- }
-}
-
-/** Recovers from a checkpoint. When this function returns, the database is able
-to start processing of new user transactions, but the function
-recv_recovery_from_checkpoint_finish should be called later to complete
-the recovery and free the resources used in it.
-@param[in] type LOG_CHECKPOINT or LOG_ARCHIVE
-@param[in] limit_lsn recover up to this lsn if possible
-@param[in] flushed_lsn flushed lsn from first data file
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-recv_recovery_from_checkpoint_start_func(
-#ifdef UNIV_LOG_ARCHIVE
- ulint type,
- lsn_t limit_lsn,
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t flushed_lsn)
-{
- log_group_t* group;
- log_group_t* max_cp_group;
- ulint max_cp_field;
- lsn_t checkpoint_lsn;
- ib_uint64_t checkpoint_no;
- lsn_t group_scanned_lsn = 0;
- lsn_t contiguous_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- log_group_t* up_to_date_group;
- lsn_t archived_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
- byte* buf;
- byte* log_hdr_buf;
- byte* log_hdr_buf_base = reinterpret_cast<byte *>
- (alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE));
- dberr_t err;
-
- /* Initialize red-black tree for fast insertions into the
- flush_list during recovery process. */
- buf_flush_init_flush_rbt();
-
- ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr);
-
- log_hdr_buf = static_cast<byte *>
- (ut_align(log_hdr_buf_base, OS_FILE_LOG_BLOCK_SIZE));
-
-#ifdef UNIV_LOG_ARCHIVE
- ut_ad(type != LOG_CHECKPOINT || limit_lsn == LSN_MAX);
-/** TRUE when recovering from a checkpoint */
-# define TYPE_CHECKPOINT (type == LOG_CHECKPOINT)
-/** Recover up to this log sequence number */
-# define LIMIT_LSN limit_lsn
-#else /* UNIV_LOG_ARCHIVE */
-/** TRUE when recovering from a checkpoint */
-# define TYPE_CHECKPOINT 1
-/** Recover up to this log sequence number */
-# define LIMIT_LSN LSN_MAX
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "The user has set SRV_FORCE_NO_LOG_REDO on, "
- "skipping log redo");
-
- return(DB_SUCCESS);
- }
-
- recv_recovery_on = TRUE;
-
- recv_sys->limit_lsn = LIMIT_LSN;
-
- mutex_enter(&(log_sys->mutex));
-
- /* Look for the latest checkpoint from any of the log groups */
-
- err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field);
-
- if (err != DB_SUCCESS) {
-
- mutex_exit(&(log_sys->mutex));
-
- return(err);
- }
-
- log_group_read_checkpoint_info(max_cp_group, max_cp_field);
-
- buf = log_sys->checkpoint_buf;
-
- checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN);
- checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO);
-#ifdef UNIV_LOG_ARCHIVE
- archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN);
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* Read the first log file header to print a note if this is
- a recovery from a restored InnoDB Hot Backup */
-
- fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
- 0, 0, LOG_FILE_HDR_SIZE,
- log_hdr_buf, max_cp_group, 0);
-
- if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
-
- if (srv_read_only_mode) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot restore from mysqlbackup, InnoDB "
- "running in read-only mode!");
-
- return(DB_ERROR);
- }
-
- /* This log file was created by mysqlbackup --restore: print
- a note to the user about it */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "The log file was created by mysqlbackup --apply-log "
- "at %s. The following crash recovery is part of a "
- "normal restore.",
- log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP);
-
- /* Wipe over the label now */
-
- memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
- ' ', 4);
- /* Write to the log file to wipe over the label */
- fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
- max_cp_group->space_id, 0,
- 0, 0, OS_FILE_LOG_BLOCK_SIZE,
- log_hdr_buf, max_cp_group, 0);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- log_checkpoint_get_nth_group_info(buf, group->id,
- &(group->archived_file_no));
-
- log_archived_get_offset(group, group->archived_file_no,
- archived_lsn, &(group->archived_offset));
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (TYPE_CHECKPOINT) {
- /* Start reading the log groups from the checkpoint lsn up. The
- variable contiguous_lsn contains an lsn up to which the log is
- known to be contiguously written to all log groups. */
- recv_sys->parse_start_lsn = checkpoint_lsn;
- recv_sys->scanned_lsn = checkpoint_lsn;
- recv_sys->scanned_checkpoint_no = 0;
- recv_sys->recovered_lsn = checkpoint_lsn;
- srv_start_lsn = checkpoint_lsn;
- }
-
- contiguous_lsn = ut_uint64_align_down(recv_sys->scanned_lsn,
- OS_FILE_LOG_BLOCK_SIZE);
-#ifdef UNIV_LOG_ARCHIVE
- if (TYPE_CHECKPOINT) {
- up_to_date_group = max_cp_group;
- } else {
- ulint capacity;
- dberr_t err;
-
- /* Try to recover the remaining part from logs: first from
- the logs of the archived group */
-
- group = recv_sys->archive_group;
- capacity = log_group_get_capacity(group);
-
- if (recv_sys->scanned_lsn > checkpoint_lsn + capacity
- || checkpoint_lsn > recv_sys->scanned_lsn + capacity) {
-
- mutex_exit(&(log_sys->mutex));
-
- /* The group does not contain enough log: probably
- an archived log file was missing or corrupt */
-
- return(DB_ERROR);
- }
-
- recv_group_scan_log_recs(group, &contiguous_lsn,
- &group_scanned_lsn, &err);
-
- if (err != DB_SUCCESS || recv_sys->scanned_lsn < checkpoint_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- /* The group did not contain enough log: an archived
- log file was missing or invalid, or the log group
- was corrupt */
-
- return(DB_ERROR);
- }
-
- group->scanned_lsn = group_scanned_lsn;
- up_to_date_group = group;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
-#ifdef UNIV_LOG_ARCHIVE
- if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* Set the flag to publish that we are doing startup scan. */
- recv_log_scan_is_startup_type = TYPE_CHECKPOINT;
- while (group) {
-#ifdef UNIV_LOG_ARCHIVE
- lsn_t old_scanned_lsn = recv_sys->scanned_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
- dberr_t err = DB_SUCCESS;
-
- recv_group_scan_log_recs(group, &contiguous_lsn,
- &group_scanned_lsn, &err);
-
- if (err != DB_SUCCESS) {
- return (err);
- }
-
- group->scanned_lsn = group_scanned_lsn;
-
-#ifdef UNIV_LOG_ARCHIVE
- if (old_scanned_lsn < group_scanned_lsn) {
- /* We found a more up-to-date group */
-
- up_to_date_group = group;
- }
-
- if ((type == LOG_ARCHIVE)
- && (group == recv_sys->archive_group)) {
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- /* Done with startup scan. Clear the flag. */
- recv_log_scan_is_startup_type = FALSE;
-
- if (srv_read_only_mode && recv_needed_recovery) {
- return(DB_READ_ONLY);
- }
-
- if (TYPE_CHECKPOINT) {
- /* NOTE: we always do a 'recovery' at startup, but only if
- there is something wrong we will print a message to the
- user about recovery: */
-
- if (checkpoint_lsn != flushed_lsn) {
- if (!recv_needed_recovery) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "The log sequence number "
- LSN_PF
- " in ibdata file do not match"
- " the log sequence number "
- LSN_PF
- " in the ib_logfiles!",
- flushed_lsn,
- checkpoint_lsn);
-
- if (!srv_read_only_mode) {
- recv_init_crash_recovery();
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can't initiate database "
- "recovery, running "
- "in read-only-mode.");
- return(DB_READ_ONLY);
- }
- }
- }
- }
-
- /* We currently have only one log group */
- if (group_scanned_lsn < checkpoint_lsn
- || group_scanned_lsn < recv_max_page_lsn) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "We scanned the log up to "
- LSN_PF ". A checkpoint was at " LSN_PF
- " and the maximum LSN on a database page was " LSN_PF
- ". It is possible that the database is now corrupt!",
- group_scanned_lsn, checkpoint_lsn, recv_max_page_lsn);
- }
-
- if (recv_sys->recovered_lsn < checkpoint_lsn) {
-
- mutex_exit(&(log_sys->mutex));
-
- if (recv_sys->recovered_lsn >= LIMIT_LSN) {
-
- return(DB_SUCCESS);
- }
-
- /* No harm in trying to do RO access. */
- if (!srv_read_only_mode) {
- return (DB_READ_ONLY);
- }
-
- return(DB_ERROR);
- }
-
- /* Synchronize the uncorrupted log groups to the most up-to-date log
- group; we also copy checkpoint info to groups */
-
- log_sys->next_checkpoint_lsn = checkpoint_lsn;
- log_sys->next_checkpoint_no = checkpoint_no + 1;
- /* here the checkpoint info is written without any redo logging ongoing
- * and next_checkpoint_no is updated directly hence no +1 */
- log_crypt_set_ver_and_key(log_sys->next_checkpoint_no);
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->archived_lsn = archived_lsn;
-
- recv_synchronize_groups(up_to_date_group);
-#else /* UNIV_LOG_ARCHIVE */
- recv_synchronize_groups();
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (!recv_needed_recovery) {
- ut_a(checkpoint_lsn == recv_sys->recovered_lsn);
- } else {
- srv_start_lsn = recv_sys->recovered_lsn;
- }
-
- log_sys->lsn = recv_sys->recovered_lsn;
-
- ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE);
-
- log_sys->buf_free = (ulint) log_sys->lsn % OS_FILE_LOG_BLOCK_SIZE;
- log_sys->buf_next_to_write = log_sys->buf_free;
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->last_checkpoint_lsn = checkpoint_lsn;
-
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
-
- log_sys->next_checkpoint_no = checkpoint_no + 1;
- log_crypt_set_ver_and_key(log_sys->next_checkpoint_no);
-
-#ifdef UNIV_LOG_ARCHIVE
- if (archived_lsn == LSN_MAX) {
-
- log_sys->archiving_state = LOG_ARCH_OFF;
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_enter(&recv_sys->mutex);
-
- recv_sys->apply_log_recs = TRUE;
-
- mutex_exit(&recv_sys->mutex);
-
- mutex_exit(&log_sys->mutex);
-
- recv_lsn_checks_on = TRUE;
-
- /* The database is now ready to start almost normal processing of user
- transactions: transaction rollbacks and the application of the log
- records in the hash table can be run in background. */
-
- return(DB_SUCCESS);
-
-#undef TYPE_CHECKPOINT
-#undef LIMIT_LSN
-}
-
-/********************************************************//**
-Completes recovery from a checkpoint. */
-UNIV_INTERN
-void
-recv_recovery_from_checkpoint_finish(void)
-/*======================================*/
-{
- if (recv_needed_recovery) {
- trx_sys_print_mysql_master_log_pos();
- trx_sys_print_mysql_binlog_offset();
- }
-
- if (recv_sys->found_corrupt_log) {
-
- fprintf(stderr,
- "InnoDB: WARNING: the log file may have been"
- " corrupt and it\n"
- "InnoDB: is possible that the log scan or parsing"
- " did not proceed\n"
- "InnoDB: far enough in recovery. Please run"
- " CHECK TABLE\n"
- "InnoDB: on your InnoDB tables to check that"
- " they are ok!\n"
- "InnoDB: It may be safest to recover your"
- " InnoDB database from\n"
- "InnoDB: a backup!\n");
- }
-
- /* Make sure that the recv_writer thread is done. This is
- required because it grabs various mutexes and we want to
- ensure that when we enable sync_order_checks there is no
- mutex currently held by any thread. */
- mutex_enter(&recv_sys->writer_mutex);
-
- /* Free the resources of the recovery system */
- recv_recovery_on = FALSE;
-
- /* By acquring the mutex we ensure that the recv_writer thread
- won't trigger any more LRU batchtes. Now wait for currently
- in progress batches to finish. */
- buf_flush_wait_LRU_batch_end();
-
- mutex_exit(&recv_sys->writer_mutex);
-
- ulint count = 0;
- while (recv_writer_thread_active) {
- ++count;
- os_thread_sleep(100000);
- if (srv_print_verbose_log && count > 600) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for recv_writer to "
- "finish flushing of buffer pool");
- count = 0;
- }
- }
-
-#ifdef __WIN__
- if (recv_writer_thread_handle) {
- CloseHandle(recv_writer_thread_handle);
- recv_writer_thread_handle = 0;
- }
-#endif /* __WIN__ */
-
-#ifndef UNIV_LOG_DEBUG
- recv_sys_debug_free();
-#endif
- /* Free up the flush_rbt. */
- buf_flush_free_flush_rbt();
-
- /* Roll back any recovered data dictionary transactions, so
- that the data dictionary tables will be free of any locks.
- The data dictionary latch should guarantee that there is at
- most one data dictionary transaction active at a time. */
- if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
- trx_rollback_or_clean_recovered(FALSE);
- }
-}
-
-/********************************************************//**
-Initiates the rollback of active transactions. */
-UNIV_INTERN
-void
-recv_recovery_rollback_active(void)
-/*===============================*/
-{
-#ifdef UNIV_SYNC_DEBUG
- /* Wait for a while so that created threads have time to suspend
- themselves before we switch the latching order checks on */
- os_thread_sleep(1000000);
-
- ut_ad(!recv_writer_thread_active);
-
- /* Switch latching order checks on in sync0sync.cc */
- sync_order_checks_on = TRUE;
-#endif
- /* We can't start any (DDL) transactions if UNDO logging
- has been disabled, additionally disable ROLLBACK of recovered
- user transactions. */
- if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
- && !srv_read_only_mode) {
-
- /* Drop partially created indexes. */
- row_merge_drop_temp_indexes();
- /* Drop temporary tables. */
- row_mysql_drop_temp_tables();
-
- /* Drop any auxiliary tables that were not dropped when the
- parent table was dropped. This can happen if the parent table
- was dropped but the server crashed before the auxiliary tables
- were dropped. */
- fts_drop_orphaned_tables();
-
- /* Rollback the uncommitted transactions which have no user
- session */
-
- trx_rollback_or_clean_is_active = true;
- os_thread_create(trx_rollback_or_clean_all_recovered, 0, 0);
- }
-}
-
-/******************************************************//**
-Resets the logs. The contents of log files will be lost! */
-UNIV_INTERN
-void
-recv_reset_logs(
-/*============*/
-#ifdef UNIV_LOG_ARCHIVE
- ulint arch_log_no, /*!< in: next archived log file number */
- ibool new_logs_created,/*!< in: TRUE if resetting logs
- is done at the log creation;
- FALSE if it is done after
- archive recovery */
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t lsn) /*!< in: reset to this lsn
- rounded up to be divisible by
- OS_FILE_LOG_BLOCK_SIZE, after
- which we add
- LOG_BLOCK_HDR_SIZE */
-{
- log_group_t* group;
-
- ut_ad(mutex_own(&(log_sys->mutex)));
-
- log_sys->lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
-
- group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- while (group) {
- group->lsn = log_sys->lsn;
- group->lsn_offset = LOG_FILE_HDR_SIZE;
-#ifdef UNIV_LOG_ARCHIVE
- group->archived_file_no = arch_log_no;
- group->archived_offset = 0;
-
- if (!new_logs_created) {
- recv_truncate_group(group, group->lsn, group->lsn,
- group->lsn, group->lsn);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- group = UT_LIST_GET_NEXT(log_groups, group);
- }
-
- log_sys->buf_next_to_write = 0;
- log_sys->written_to_some_lsn = log_sys->lsn;
- log_sys->written_to_all_lsn = log_sys->lsn;
-
- log_sys->next_checkpoint_no = 0;
- log_sys->last_checkpoint_lsn = 0;
-
-#ifdef UNIV_LOG_ARCHIVE
- log_sys->archived_lsn = log_sys->lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
- log_sys->tracked_lsn = log_sys->lsn;
-
- log_block_init(log_sys->buf, log_sys->lsn);
- log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
-
- log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
- log_sys->lsn += LOG_BLOCK_HDR_SIZE;
-
- MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- (log_sys->lsn - log_sys->last_checkpoint_lsn));
-
- mutex_exit(&(log_sys->mutex));
-
- /* Reset the checkpoint fields in logs */
-
- log_make_checkpoint_at(LSN_MAX, TRUE);
-
- mutex_enter(&(log_sys->mutex));
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_HOTBACKUP
-/******************************************************//**
-Creates new log files after a backup has been restored. */
-UNIV_INTERN
-void
-recv_reset_log_files_for_backup(
-/*============================*/
- const char* log_dir, /*!< in: log file directory path */
- ulint n_log_files, /*!< in: number of log files */
- lsn_t log_file_size, /*!< in: log file size */
- lsn_t lsn) /*!< in: new start lsn, must be
- divisible by OS_FILE_LOG_BLOCK_SIZE */
-{
- os_file_t log_file;
- ibool success;
- byte* buf;
- ulint i;
- ulint log_dir_len;
- char name[5000];
- static const char ib_logfile_basename[] = "ib_logfile";
-
- log_dir_len = strlen(log_dir);
- /* full path name of ib_logfile consists of log dir path + basename
- + number. This must fit in the name buffer.
- */
- ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name));
-
- buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
-
- for (i = 0; i < n_log_files; i++) {
-
- sprintf(name, "%s%s%lu", log_dir,
- ib_logfile_basename, (ulong) i);
-
- log_file = os_file_create_simple(innodb_file_log_key,
- name, OS_FILE_CREATE,
- OS_FILE_READ_WRITE,
- &success);
- if (!success) {
- fprintf(stderr,
- "InnoDB: Cannot create %s. Check that"
- " the file does not exist yet.\n", name);
-
- exit(1);
- }
-
- fprintf(stderr,
- "Setting log file size to %llu\n",
- log_file_size);
-
- success = os_file_set_size(name, log_file, log_file_size);
-
- if (!success) {
- fprintf(stderr,
- "InnoDB: Cannot set %s size to %llu\n",
- name, log_file_size);
- exit(1);
- }
-
- os_file_flush(log_file);
- os_file_close(log_file);
- }
-
- /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
-
- log_reset_first_header_and_checkpoint(buf, lsn);
-
- log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn);
- log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
- LOG_BLOCK_HDR_SIZE);
- sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0);
-
- log_file = os_file_create_simple(innodb_file_log_key,
- name, OS_FILE_OPEN,
- OS_FILE_READ_WRITE, &success);
- if (!success) {
- fprintf(stderr, "InnoDB: Cannot open %s.\n", name);
-
- exit(1);
- }
-
- os_file_write(name, log_file, buf, 0,
- LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
- os_file_flush(log_file);
- os_file_close(log_file);
-
- ut_free(buf);
-}
-#endif /* UNIV_HOTBACKUP */
-
-/******************************************************//**
-Checks the 4-byte checksum to the trailer checksum field of a log
-block. We also accept a log block in the old format before
-InnoDB-3.23.52 where the checksum field contains the log block number.
-@return TRUE if ok, or if the log block may be in the format of InnoDB
-version predating 3.23.52 */
-UNIV_INTERN
-ibool
-log_block_checksum_is_ok_or_old_format(
-/*===================================*/
- const byte* block) /*!< in: pointer to a log block */
-{
-#ifdef UNIV_LOG_DEBUG
- return(TRUE);
-#endif /* UNIV_LOG_DEBUG */
-
- ulint block_checksum = log_block_get_checksum(block);
-
- if (UNIV_LIKELY(srv_log_checksum_algorithm ==
- SRV_CHECKSUM_ALGORITHM_NONE ||
- log_block_calc_checksum(block) == block_checksum)) {
-
- return(TRUE);
- }
-
- if (srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 ||
- srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB ||
- srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_STRICT_NONE) {
-
- const char* algo = NULL;
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "log block checksum mismatch: expected " ULINTPF ", "
- "calculated checksum " ULINTPF,
- block_checksum,
- log_block_calc_checksum(block));
-
- if (block_checksum == LOG_NO_CHECKSUM_MAGIC) {
-
- algo = "none";
- } else if (block_checksum ==
- log_block_calc_checksum_crc32(block)) {
-
- algo = "crc32";
- } else if (block_checksum ==
- log_block_calc_checksum_innodb(block)) {
-
- algo = "innodb";
- }
-
- if (algo) {
-
- const char* current_algo;
-
- current_algo = buf_checksum_algorithm_name(
- (srv_checksum_algorithm_t)
- srv_log_checksum_algorithm);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "current InnoDB log checksum type: %s, "
- "detected log checksum type: %s",
- current_algo,
- algo);
- }
-
- ib_logf(IB_LOG_LEVEL_FATAL,
- "STRICT method was specified for innodb_log_checksum, "
- "so we intentionally assert here.");
- }
-
- ut_ad(srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_CRC32 ||
- srv_log_checksum_algorithm == SRV_CHECKSUM_ALGORITHM_INNODB);
-
- if (block_checksum == LOG_NO_CHECKSUM_MAGIC ||
- block_checksum == log_block_calc_checksum_crc32(block) ||
- block_checksum == log_block_calc_checksum_innodb(block)) {
-
- return(TRUE);
- }
-
- if (log_block_get_hdr_no(block) == block_checksum) {
-
- /* We assume the log block is in the format of
- InnoDB version < 3.23.52 and the block is ok */
-#if 0
- fprintf(stderr,
- "InnoDB: Scanned old format < InnoDB-3.23.52"
- " log block number %lu\n",
- log_block_get_hdr_no(block));
-#endif
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-void recv_dblwr_t::add(byte* page)
-{
- pages.push_back(page);
-}
-
-byte* recv_dblwr_t::find_page(ulint space_id, ulint page_no)
-{
- std::vector<byte*> matches;
- byte* result = 0;
-
- for (std::list<byte*>::iterator i = pages.begin();
- i != pages.end(); ++i) {
-
- if ((page_get_space_id(*i) == space_id)
- && (page_get_page_no(*i) == page_no)) {
- matches.push_back(*i);
- }
- }
-
- if (matches.size() == 1) {
- result = matches[0];
- } else if (matches.size() > 1) {
-
- lsn_t max_lsn = 0;
- lsn_t page_lsn = 0;
-
- for (std::vector<byte*>::iterator i = matches.begin();
- i != matches.end(); ++i) {
-
- page_lsn = mach_read_from_8(*i + FIL_PAGE_LSN);
-
- if (page_lsn > max_lsn) {
- max_lsn = page_lsn;
- result = *i;
- }
- }
- }
-
- return(result);
-}
diff --git a/storage/xtradb/mach/mach0data.cc b/storage/xtradb/mach/mach0data.cc
deleted file mode 100644
index feeedb01609..00000000000
--- a/storage/xtradb/mach/mach0data.cc
+++ /dev/null
@@ -1,125 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file mach/mach0data.cc
-Utilities for converting data from the database file
-to the machine format.
-
-Created 11/28/1995 Heikki Tuuri
-***********************************************************************/
-
-#include "mach0data.h"
-
-#ifdef UNIV_NONINL
-#include "mach0data.ic"
-#endif
-
-/*********************************************************//**
-Reads a ulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
-byte*
-mach_parse_compressed(
-/*==================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- ulint* val) /*!< out: read value (< 2^32) */
-{
- ulint flag;
-
- ut_ad(ptr && end_ptr && val);
-
- if (ptr >= end_ptr) {
-
- return(NULL);
- }
-
- flag = mach_read_from_1(ptr);
-
- if (flag < 0x80UL) {
- *val = flag;
- return(ptr + 1);
- }
-
- /* Workaround GCC bug
- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77673:
- the compiler moves mach_read_from_4 right to the beginning of the
- function, causing and out-of-bounds read if we are reading a short
- integer close to the end of buffer. */
-#if defined(__GNUC__) && (__GNUC__ >= 5) && !defined(__clang__)
-#define DEPLOY_FENCE
-#endif
-
-#ifdef DEPLOY_FENCE
- __atomic_thread_fence(__ATOMIC_ACQUIRE);
-#endif
-
- if (flag < 0xC0UL) {
- if (end_ptr < ptr + 2) {
- return(NULL);
- }
-
- *val = mach_read_from_2(ptr) & 0x7FFFUL;
-
- return(ptr + 2);
- }
-
-#ifdef DEPLOY_FENCE
- __atomic_thread_fence(__ATOMIC_ACQUIRE);
-#endif
-
- if (flag < 0xE0UL) {
- if (end_ptr < ptr + 3) {
- return(NULL);
- }
-
- *val = mach_read_from_3(ptr) & 0x3FFFFFUL;
-
- return(ptr + 3);
- }
-
-#ifdef DEPLOY_FENCE
- __atomic_thread_fence(__ATOMIC_ACQUIRE);
-#endif
-
- if (flag < 0xF0UL) {
- if (end_ptr < ptr + 4) {
- return(NULL);
- }
-
- *val = mach_read_from_4(ptr) & 0x1FFFFFFFUL;
-
- return(ptr + 4);
- }
-
-#ifdef DEPLOY_FENCE
- __atomic_thread_fence(__ATOMIC_ACQUIRE);
-#endif
-
-#undef DEPLOY_FENCE
-
- ut_ad(flag == 0xF0UL);
-
- if (end_ptr < ptr + 5) {
- return(NULL);
- }
-
- *val = mach_read_from_4(ptr + 1);
- return(ptr + 5);
-}
diff --git a/storage/xtradb/mem/mem0dbg.cc b/storage/xtradb/mem/mem0dbg.cc
deleted file mode 100644
index a77785a369a..00000000000
--- a/storage/xtradb/mem/mem0dbg.cc
+++ /dev/null
@@ -1,1050 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0dbg.cc
-The memory management: the debug code. This is not a compilation module,
-but is included in mem0mem.* !
-
-Created 6/9/1994 Heikki Tuuri
-*************************************************************************/
-
-#ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-/* The mutex which protects in the debug version the hash table
-containing the list of live memory heaps, and also the global
-variables below. */
-UNIV_INTERN ib_mutex_t mem_hash_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mem_hash_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t mem_hash_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-# endif /* !UNIV_HOTBACKUP */
-
-/* The following variables contain information about the
-extent of memory allocations. Only used in the debug version.
-Protected by mem_hash_mutex above. */
-
-static ulint mem_n_created_heaps = 0;
-static ulint mem_n_allocations = 0;
-static ulint mem_total_allocated_memory = 0;
-UNIV_INTERN ulint mem_current_allocated_memory = 0;
-static ulint mem_max_allocated_memory = 0;
-# ifndef UNIV_HOTBACKUP
-static ulint mem_last_print_info = 0;
-static ibool mem_hash_initialized = FALSE;
-# endif /* !UNIV_HOTBACKUP */
-
-/* Size of the hash table for memory management tracking */
-#define MEM_HASH_SIZE 997
-
-/* The node of the list containing currently allocated memory heaps */
-
-struct mem_hash_node_t {
- UT_LIST_NODE_T(mem_hash_node_t)
- list; /*!< hash list node */
- mem_heap_t* heap; /*!< memory heap */
- const char* file_name;/* file where heap was created*/
- ulint line; /*!< file line of creation */
- ulint nth_heap;/* this is the nth heap created */
- UT_LIST_NODE_T(mem_hash_node_t)
- all_list;/* list of all created heaps */
-};
-
-typedef UT_LIST_BASE_NODE_T(mem_hash_node_t) mem_hash_cell_t;
-
-/* The hash table of allocated heaps */
-static mem_hash_cell_t mem_hash_table[MEM_HASH_SIZE];
-
-/* The base node of the list of all allocated heaps */
-static mem_hash_cell_t mem_all_list_base;
-
-
-
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i);
-
-/* Accessor function for the hash table. Returns a pointer to the
-table cell. */
-UNIV_INLINE
-mem_hash_cell_t*
-mem_hash_get_nth_cell(ulint i)
-{
- ut_a(i < MEM_HASH_SIZE);
-
- return(&(mem_hash_table[i]));
-}
-
-/* Accessor functions for a memory field in the debug version */
-UNIV_INTERN
-void
-mem_field_header_set_len(byte* field, ulint len)
-{
- mach_write_to_4(field - 2 * sizeof(ulint), len);
-}
-
-UNIV_INTERN
-ulint
-mem_field_header_get_len(byte* field)
-{
- return(mach_read_from_4(field - 2 * sizeof(ulint)));
-}
-
-UNIV_INTERN
-void
-mem_field_header_set_check(byte* field, ulint check)
-{
- mach_write_to_4(field - sizeof(ulint), check);
-}
-
-UNIV_INTERN
-ulint
-mem_field_header_get_check(byte* field)
-{
- return(mach_read_from_4(field - sizeof(ulint)));
-}
-
-UNIV_INTERN
-void
-mem_field_trailer_set_check(byte* field, ulint check)
-{
- mach_write_to_4(field + mem_field_header_get_len(field), check);
-}
-
-UNIV_INTERN
-ulint
-mem_field_trailer_get_check(byte* field)
-{
- return(mach_read_from_4(field
- + mem_field_header_get_len(field)));
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Initializes the memory system. */
-UNIV_INTERN
-void
-mem_init(
-/*=====*/
- ulint size) /*!< in: common pool size in bytes */
-{
-#ifdef UNIV_MEM_DEBUG
-
- ulint i;
-
- /* Initialize the hash table */
- ut_a(FALSE == mem_hash_initialized);
-
- mutex_create(mem_hash_mutex_key, &mem_hash_mutex, SYNC_MEM_HASH);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
- UT_LIST_INIT(*mem_hash_get_nth_cell(i));
- }
-
- UT_LIST_INIT(mem_all_list_base);
-
- mem_hash_initialized = TRUE;
-#endif
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- /* When innodb_use_sys_malloc is set, the
- mem_comm_pool won't be used for any allocations. We
- create a dummy mem_comm_pool, because some statistics
- and debugging code relies on it being initialized. */
- size = 1;
- }
-
- mem_comm_pool = mem_pool_create(size);
-}
-
-/******************************************************************//**
-Closes the memory system. */
-UNIV_INTERN
-void
-mem_close(void)
-/*===========*/
-{
- mem_pool_free(mem_comm_pool);
- mem_comm_pool = NULL;
-#ifdef UNIV_MEM_DEBUG
- mutex_free(&mem_hash_mutex);
- mem_hash_initialized = FALSE;
-#endif /* UNIV_MEM_DEBUG */
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_MEM_DEBUG
-/******************************************************************//**
-Initializes an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_init(
-/*===========*/
- byte* buf, /*!< in: memory field */
- ulint n) /*!< in: how many bytes the user requested */
-{
- ulint rnd;
- byte* usr_buf;
-
- usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
- /* In the debug version write the length field and the
- check fields to the start and the end of the allocated storage.
- The field header consists of a length field and
- a random number field, in this order. The field trailer contains
- the same random number as a check field. */
-
- mem_field_header_set_len(usr_buf, n);
-
- rnd = ut_rnd_gen_ulint();
-
- mem_field_header_set_check(usr_buf, rnd);
- mem_field_trailer_set_check(usr_buf, rnd);
-
- /* Update the memory allocation information */
-
- mutex_enter(&mem_hash_mutex);
-
- mem_total_allocated_memory += n;
- mem_current_allocated_memory += n;
- mem_n_allocations++;
-
- if (mem_current_allocated_memory > mem_max_allocated_memory) {
- mem_max_allocated_memory = mem_current_allocated_memory;
- }
-
- mutex_exit(&mem_hash_mutex);
-
- /* In the debug version set the buffer to a random
- combination of 0xBA and 0xBE */
-
- mem_init_buf(usr_buf, n);
-}
-
-/******************************************************************//**
-Erases an allocated memory field in the debug version. */
-UNIV_INTERN
-void
-mem_field_erase(
-/*============*/
- byte* buf, /*!< in: memory field */
- ulint n MY_ATTRIBUTE((unused)))
- /*!< in: how many bytes the user requested */
-{
- byte* usr_buf;
-
- usr_buf = buf + MEM_FIELD_HEADER_SIZE;
-
- mutex_enter(&mem_hash_mutex);
- mem_current_allocated_memory -= n;
- mutex_exit(&mem_hash_mutex);
-
- /* Check that the field lengths agree */
- ut_ad(n == (ulint) mem_field_header_get_len(usr_buf));
-
- /* In the debug version, set the freed space to a random
- combination of 0xDE and 0xAD */
-
- mem_erase_buf(buf, MEM_SPACE_NEEDED(n));
-}
-
-/***************************************************************//**
-Initializes a buffer to a random combination of hex BA and BE.
-Used to initialize allocated memory. */
-UNIV_INTERN
-void
-mem_init_buf(
-/*=========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n) /*!< in: length of buffer */
-{
- byte* ptr;
-
- UNIV_MEM_ASSERT_W(buf, n);
-
- for (ptr = buf; ptr < buf + n; ptr++) {
-
- if (ut_rnd_gen_ibool()) {
- *ptr = 0xBA;
- } else {
- *ptr = 0xBE;
- }
- }
-
- UNIV_MEM_INVALID(buf, n);
-}
-
-/***************************************************************//**
-Initializes a buffer to a random combination of hex DE and AD.
-Used to erase freed memory. */
-UNIV_INTERN
-void
-mem_erase_buf(
-/*==========*/
- byte* buf, /*!< in: pointer to buffer */
- ulint n) /*!< in: length of buffer */
-{
- byte* ptr;
-
- UNIV_MEM_ASSERT_W(buf, n);
-
- for (ptr = buf; ptr < buf + n; ptr++) {
- if (ut_rnd_gen_ibool()) {
- *ptr = 0xDE;
- } else {
- *ptr = 0xAD;
- }
- }
-
- UNIV_MEM_FREE(buf, n);
-}
-
-/***************************************************************//**
-Inserts a created memory heap to the hash table of current allocated
-memory heaps. */
-UNIV_INTERN
-void
-mem_hash_insert(
-/*============*/
- mem_heap_t* heap, /*!< in: the created heap */
- const char* file_name, /*!< in: file name of creation */
- ulint line) /*!< in: line where created */
-{
- mem_hash_node_t* new_node;
- ulint cell_no ;
-
- ut_ad(mem_heap_check(heap));
-
- mutex_enter(&mem_hash_mutex);
-
- cell_no = ut_hash_ulint((ulint) heap, MEM_HASH_SIZE);
-
- /* Allocate a new node to the list */
- new_node = static_cast<mem_hash_node_t*>(ut_malloc(sizeof(*new_node)));
-
- new_node->heap = heap;
- new_node->file_name = file_name;
- new_node->line = line;
- new_node->nth_heap = mem_n_created_heaps;
-
- /* Insert into lists */
- UT_LIST_ADD_FIRST(list, *mem_hash_get_nth_cell(cell_no), new_node);
-
- UT_LIST_ADD_LAST(all_list, mem_all_list_base, new_node);
-
- mem_n_created_heaps++;
-
- mutex_exit(&mem_hash_mutex);
-}
-
-/***************************************************************//**
-Removes a memory heap (which is going to be freed by the caller)
-from the list of live memory heaps. Returns the size of the heap
-in terms of how much memory in bytes was allocated for the user of
-the heap (not the total space occupied by the heap).
-Also validates the heap.
-NOTE: This function does not free the storage occupied by the
-heap itself, only the node in the list of heaps. */
-UNIV_INTERN
-void
-mem_hash_remove(
-/*============*/
- mem_heap_t* heap, /*!< in: the heap to be freed */
- const char* file_name, /*!< in: file name of freeing */
- ulint line) /*!< in: line where freed */
-{
- mem_hash_node_t* node;
- ulint cell_no;
- ibool error;
- ulint size;
-
- ut_ad(mem_heap_check(heap));
-
- mutex_enter(&mem_hash_mutex);
-
- cell_no = ut_hash_ulint((ulint) heap, MEM_HASH_SIZE);
-
- /* Look for the heap in the hash table list */
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no));
-
- while (node != NULL) {
- if (node->heap == heap) {
-
- break;
- }
-
- node = UT_LIST_GET_NEXT(list, node);
- }
-
- if (node == NULL) {
- fprintf(stderr,
- "Memory heap or buffer freed in %s line %lu"
- " did not exist.\n",
- innobase_basename(file_name), (ulong) line);
- ut_error;
- }
-
- /* Remove from lists */
- UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node);
-
- UT_LIST_REMOVE(all_list, mem_all_list_base, node);
-
- /* Validate the heap which will be freed */
- mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size,
- NULL, NULL);
- if (error) {
- fprintf(stderr,
- "Inconsistency in memory heap or"
- " buffer n:o %lu created\n"
- "in %s line %lu and tried to free in %s line %lu.\n"
- "Hex dump of 400 bytes around memory heap"
- " first block start:\n",
- node->nth_heap,
- innobase_basename(node->file_name), (ulong) node->line,
- innobase_basename(file_name), (ulong) line);
- ut_print_buf(stderr, (byte*) node->heap - 200, 400);
- fputs("\nDump of the mem heap:\n", stderr);
- mem_heap_validate_or_print(node->heap, NULL, TRUE, &error,
- &size, NULL, NULL);
- ut_error;
- }
-
- /* Free the memory occupied by the node struct */
- ut_free(node);
-
- mem_current_allocated_memory -= size;
-
- mutex_exit(&mem_hash_mutex);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG
-/***************************************************************//**
-Checks a memory heap for consistency and prints the contents if requested.
-Outputs the sum of sizes of buffers given to the user (only in
-the debug version), the physical size of the heap and the number of
-blocks in the heap. In case of error returns 0 as sizes and number
-of blocks. */
-UNIV_INTERN
-void
-mem_heap_validate_or_print(
-/*=======================*/
- mem_heap_t* heap, /*!< in: memory heap */
- byte* top MY_ATTRIBUTE((unused)),
- /*!< in: calculate and validate only until
- this top pointer in the heap is reached,
- if this pointer is NULL, ignored */
- ibool print, /*!< in: if TRUE, prints the contents
- of the heap; works only in
- the debug version */
- ibool* error, /*!< out: TRUE if error */
- ulint* us_size,/*!< out: allocated memory
- (for the user) in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored; in the
- non-debug version this is always -1 */
- ulint* ph_size,/*!< out: physical size of the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
- ulint* n_blocks) /*!< out: number of blocks in the heap,
- if a NULL pointer is passed as this
- argument, it is ignored */
-{
- mem_block_t* block;
- ulint total_len = 0;
- ulint block_count = 0;
- ulint phys_len = 0;
-#ifdef UNIV_MEM_DEBUG
- ulint len;
- byte* field;
- byte* user_field;
- ulint check_field;
-#endif
-
- /* Pessimistically, we set the parameters to error values */
- if (us_size != NULL) {
- *us_size = 0;
- }
- if (ph_size != NULL) {
- *ph_size = 0;
- }
- if (n_blocks != NULL) {
- *n_blocks = 0;
- }
- *error = TRUE;
-
- block = heap;
-
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- return;
- }
-
- if (print) {
- fputs("Memory heap:", stderr);
- }
-
- while (block != NULL) {
- phys_len += mem_block_get_len(block);
-
- if ((block->type == MEM_HEAP_BUFFER)
- && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) {
-
- fprintf(stderr,
- "InnoDB: Error: mem block %p"
- " length %lu > UNIV_PAGE_SIZE\n",
- (void*) block,
- (ulong) mem_block_get_len(block));
- /* error */
-
- return;
- }
-
-#ifdef UNIV_MEM_DEBUG
- /* We can trace the fields of the block only in the debug
- version */
- if (print) {
- fprintf(stderr, " Block %ld:", block_count);
- }
-
- field = (byte*) block + mem_block_get_start(block);
-
- if (top && (field == top)) {
-
- goto completed;
- }
-
- while (field < (byte*) block + mem_block_get_free(block)) {
-
- /* Calculate the pointer to the storage
- which was given to the user */
-
- user_field = field + MEM_FIELD_HEADER_SIZE;
-
- len = mem_field_header_get_len(user_field);
-
- if (print) {
- ut_print_buf(stderr, user_field, len);
- putc('\n', stderr);
- }
-
- total_len += len;
- check_field = mem_field_header_get_check(user_field);
-
- if (check_field
- != mem_field_trailer_get_check(user_field)) {
- /* error */
-
- fprintf(stderr,
- "InnoDB: Error: block %lx mem"
- " field %lx len %lu\n"
- "InnoDB: header check field is"
- " %lx but trailer %lx\n",
- (ulint) block,
- (ulint) field, len, check_field,
- mem_field_trailer_get_check(
- user_field));
-
- return;
- }
-
- /* Move to next field */
- field = field + MEM_SPACE_NEEDED(len);
-
- if (top && (field == top)) {
-
- goto completed;
- }
-
- }
-
- /* At the end check that we have arrived to the first free
- position */
-
- if (field != (byte*) block + mem_block_get_free(block)) {
- /* error */
-
- fprintf(stderr,
- "InnoDB: Error: block %lx end of"
- " mem fields %lx\n"
- "InnoDB: but block free at %lx\n",
- (ulint) block, (ulint) field,
- (ulint)((byte*) block
- + mem_block_get_free(block)));
-
- return;
- }
-
-#endif
-
- block = UT_LIST_GET_NEXT(list, block);
- block_count++;
- }
-#ifdef UNIV_MEM_DEBUG
-completed:
-#endif
- if (us_size != NULL) {
- *us_size = total_len;
- }
- if (ph_size != NULL) {
- *ph_size = phys_len;
- }
- if (n_blocks != NULL) {
- *n_blocks = block_count;
- }
- *error = FALSE;
-}
-
-/**************************************************************//**
-Prints the contents of a memory heap. */
-static
-void
-mem_heap_print(
-/*===========*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ibool error;
- ulint us_size;
- ulint phys_size;
- ulint n_blocks;
-
- ut_ad(mem_heap_check(heap));
-
- mem_heap_validate_or_print(heap, NULL, TRUE, &error,
- &us_size, &phys_size, &n_blocks);
- fprintf(stderr,
- "\nheap type: %lu; size: user size %lu;"
- " physical size %lu; blocks %lu.\n",
- (ulong) heap->type, (ulong) us_size,
- (ulong) phys_size, (ulong) n_blocks);
- ut_a(!error);
-}
-
-/**************************************************************//**
-Validates the contents of a memory heap.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_validate(
-/*==============*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ibool error;
- ulint us_size;
- ulint phys_size;
- ulint n_blocks;
-
- ut_ad(mem_heap_check(heap));
-
- mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size,
- &phys_size, &n_blocks);
- if (error) {
- mem_heap_print(heap);
- }
-
- ut_a(!error);
-
- return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */
-
-#ifdef UNIV_DEBUG
-/**************************************************************//**
-Checks that an object is a memory heap (or a block of it).
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_heap_check(
-/*===========*/
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N);
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-#ifdef UNIV_MEM_DEBUG
-/*****************************************************************//**
-TRUE if no memory is currently allocated.
-@return TRUE if no heaps exist */
-UNIV_INTERN
-ibool
-mem_all_freed(void)
-/*===============*/
-{
- mem_hash_node_t* node;
- ulint heap_count = 0;
- ulint i;
-
- mem_validate();
-
- mutex_enter(&mem_hash_mutex);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
-
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
- while (node != NULL) {
- heap_count++;
- node = UT_LIST_GET_NEXT(list, node);
- }
- }
-
- mutex_exit(&mem_hash_mutex);
-
- if (heap_count == 0) {
-# ifndef UNIV_HOTBACKUP
- ut_a(mem_pool_get_reserved(mem_comm_pool) == 0);
-# endif /* !UNIV_HOTBACKUP */
-
- return(TRUE);
- } else {
- return(FALSE);
- }
-}
-
-/*****************************************************************//**
-Validates the dynamic memory allocation system.
-@return TRUE if error */
-UNIV_INTERN
-ibool
-mem_validate_no_assert(void)
-/*========================*/
-{
- mem_hash_node_t* node;
- ulint n_heaps = 0;
- ulint allocated_mem;
- ulint ph_size;
- ulint total_allocated_mem = 0;
- ibool error = FALSE;
- ulint n_blocks;
- ulint i;
-
-# ifndef UNIV_HOTBACKUP
- mem_pool_validate(mem_comm_pool);
-# endif /* !UNIV_HOTBACKUP */
-
- mutex_enter(&mem_hash_mutex);
-
- for (i = 0; i < MEM_HASH_SIZE; i++) {
-
- node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i));
-
- while (node != NULL) {
- n_heaps++;
-
- mem_heap_validate_or_print(node->heap, NULL,
- FALSE, &error,
- &allocated_mem,
- &ph_size, &n_blocks);
-
- if (error) {
- fprintf(stderr,
- "\nERROR!!!!!!!!!!!!!!!!!!!"
- "!!!!!!!!!!!!!!!!!!!!!!!\n\n"
- "Inconsistency in memory heap"
- " or buffer created\n"
- "in %s line %lu.\n",
- innobase_basename(node->file_name),
- node->line);
-
- mutex_exit(&mem_hash_mutex);
-
- return(TRUE);
- }
-
- total_allocated_mem += allocated_mem;
- node = UT_LIST_GET_NEXT(list, node);
- }
- }
-
- if ((n_heaps == 0) && (mem_current_allocated_memory != 0)) {
- error = TRUE;
- }
-
- if (mem_total_allocated_memory < mem_current_allocated_memory) {
- error = TRUE;
- }
-
- if (mem_max_allocated_memory > mem_total_allocated_memory) {
- error = TRUE;
- }
-
- if (mem_n_created_heaps < n_heaps) {
- error = TRUE;
- }
-
- mutex_exit(&mem_hash_mutex);
-
- return(error);
-}
-
-/************************************************************//**
-Validates the dynamic memory
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_validate(void)
-/*==============*/
-{
- ut_a(!mem_validate_no_assert());
-
- return(TRUE);
-}
-#endif /* UNIV_MEM_DEBUG */
-
-/************************************************************//**
-Tries to find neigboring memory allocation blocks and dumps to stderr
-the neighborhood of a given pointer. */
-UNIV_INTERN
-void
-mem_analyze_corruption(
-/*===================*/
- void* ptr) /*!< in: pointer to place of possible corruption */
-{
- byte* p;
- ulint i;
- ulint dist;
-
- fputs("InnoDB: Apparent memory corruption: mem dump ", stderr);
- ut_print_buf(stderr, (byte*) ptr - 250, 500);
-
- fputs("\nInnoDB: Scanning backward trying to find"
- " previous allocated mem blocks\n", stderr);
-
- p = (byte*) ptr;
- dist = 0;
-
- for (i = 0; i < 10; i++) {
- for (;;) {
- if (((ulint) p) % 4 == 0) {
-
- if (*((ulint*) p) == MEM_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Mem block at - %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
-
- if (*((ulint*) p) == MEM_FREED_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Freed mem block at - %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
- }
-
- p--;
- dist++;
- }
-
- p--;
- dist++;
- }
-
- fprintf(stderr,
- "InnoDB: Scanning forward trying to find next"
- " allocated mem blocks\n");
-
- p = (byte*) ptr;
- dist = 0;
-
- for (i = 0; i < 10; i++) {
- for (;;) {
- if (((ulint) p) % 4 == 0) {
-
- if (*((ulint*) p) == MEM_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Mem block at + %lu, file %s,"
- " line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
-
- if (*((ulint*) p) == MEM_FREED_BLOCK_MAGIC_N) {
- fprintf(stderr,
- "Freed mem block at + %lu,"
- " file %s, line %lu\n",
- (ulong) dist,
- (p + sizeof(ulint)),
- (ulong)
- (*(ulint*)(p + 8
- + sizeof(ulint))));
-
- break;
- }
- }
-
- p++;
- dist++;
- }
-
- p++;
- dist++;
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated
-memory heaps or buffers. Can only be used in the debug version. */
-static
-void
-mem_print_info_low(
-/*===============*/
- ibool print_all) /*!< in: if TRUE, all heaps are printed,
- else only the heaps allocated after the
- previous call of this function */
-{
-#ifdef UNIV_MEM_DEBUG
- mem_hash_node_t* node;
- ulint n_heaps = 0;
- ulint allocated_mem;
- ulint ph_size;
- ulint total_allocated_mem = 0;
- ibool error;
- ulint n_blocks;
-#endif
- FILE* outfile;
-
- /* outfile = fopen("ibdebug", "a"); */
-
- outfile = stdout;
-
- fprintf(outfile, "\n");
- fprintf(outfile,
- "________________________________________________________\n");
- fprintf(outfile, "MEMORY ALLOCATION INFORMATION\n\n");
-
-#ifndef UNIV_MEM_DEBUG
-
- UT_NOT_USED(print_all);
-
- mem_pool_print_info(outfile, mem_comm_pool);
-
- fprintf(outfile,
- "Sorry, non-debug version cannot give more memory info\n");
-
- /* fclose(outfile); */
-
- return;
-#else
- mutex_enter(&mem_hash_mutex);
-
- fprintf(outfile, "LIST OF CREATED HEAPS AND ALLOCATED BUFFERS: \n\n");
-
- if (!print_all) {
- fprintf(outfile, "AFTER THE LAST PRINT INFO\n");
- }
-
- node = UT_LIST_GET_FIRST(mem_all_list_base);
-
- while (node != NULL) {
- n_heaps++;
-
- if (!print_all && node->nth_heap < mem_last_print_info) {
-
- goto next_heap;
- }
-
- mem_heap_validate_or_print(node->heap, NULL,
- FALSE, &error, &allocated_mem,
- &ph_size, &n_blocks);
- total_allocated_mem += allocated_mem;
-
- fprintf(outfile,
- "%lu: file %s line %lu of size %lu phys.size %lu"
- " with %lu blocks, type %lu\n",
- node->nth_heap,
- innobase_basename(node->file_name), node->line,
- allocated_mem, ph_size, n_blocks,
- (node->heap)->type);
-next_heap:
- node = UT_LIST_GET_NEXT(all_list, node);
- }
-
- fprintf(outfile, "\n");
-
- fprintf(outfile, "Current allocated memory : %lu\n",
- mem_current_allocated_memory);
- fprintf(outfile, "Current allocated heaps and buffers : %lu\n",
- n_heaps);
- fprintf(outfile, "Cumulative allocated memory : %lu\n",
- mem_total_allocated_memory);
- fprintf(outfile, "Maximum allocated memory : %lu\n",
- mem_max_allocated_memory);
- fprintf(outfile, "Cumulative created heaps and buffers : %lu\n",
- mem_n_created_heaps);
- fprintf(outfile, "Cumulative number of allocations : %lu\n",
- mem_n_allocations);
-
- mem_last_print_info = mem_n_created_heaps;
-
- mutex_exit(&mem_hash_mutex);
-
- mem_pool_print_info(outfile, mem_comm_pool);
-
- /* mem_validate(); */
-
- /* fclose(outfile); */
-#endif
-}
-
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers. Can only be used in the debug version. */
-UNIV_INTERN
-void
-mem_print_info(void)
-/*================*/
-{
- mem_print_info_low(TRUE);
-}
-
-/*****************************************************************//**
-Prints information of dynamic memory usage and currently allocated memory
-heaps or buffers since the last ..._print_info or..._print_new_info. */
-UNIV_INTERN
-void
-mem_print_new_info(void)
-/*====================*/
-{
- mem_print_info_low(FALSE);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/mem/mem0mem.cc b/storage/xtradb/mem/mem0mem.cc
deleted file mode 100644
index e066aff5b30..00000000000
--- a/storage/xtradb/mem/mem0mem.cc
+++ /dev/null
@@ -1,583 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0mem.cc
-The memory management
-
-Created 6/9/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0mem.h"
-#ifdef UNIV_NONINL
-#include "mem0mem.ic"
-#endif
-
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "mem0dbg.cc"
-#include <stdarg.h>
-
-/*
- THE MEMORY MANAGEMENT
- =====================
-
-The basic element of the memory management is called a memory
-heap. A memory heap is conceptually a
-stack from which memory can be allocated. The stack may grow infinitely.
-The top element of the stack may be freed, or
-the whole stack can be freed at one time. The advantage of the
-memory heap concept is that we can avoid using the malloc and free
-functions of C which are quite expensive, for example, on the Solaris + GCC
-system (50 MHz Sparc, 1993) the pair takes 3 microseconds,
-on Win NT + 100MHz Pentium, 2.5 microseconds.
-When we use a memory heap,
-we can allocate larger blocks of memory at a time and thus
-reduce overhead. Slightly more efficient the method is when we
-allocate the memory from the index page buffer pool, as we can
-claim a new page fast. This is called buffer allocation.
-When we allocate the memory from the dynamic memory of the
-C environment, that is called dynamic allocation.
-
-The default way of operation of the memory heap is the following.
-First, when the heap is created, an initial block of memory is
-allocated. In dynamic allocation this may be about 50 bytes.
-If more space is needed, additional blocks are allocated
-and they are put into a linked list.
-After the initial block, each allocated block is twice the size of the
-previous, until a threshold is attained, after which the sizes
-of the blocks stay the same. An exception is, of course, the case
-where the caller requests a memory buffer whose size is
-bigger than the threshold. In that case a block big enough must
-be allocated.
-
-The heap is physically arranged so that if the current block
-becomes full, a new block is allocated and always inserted in the
-chain of blocks as the last block.
-
-In the debug version of the memory management, all the allocated
-heaps are kept in a list (which is implemented as a hash table).
-Thus we can notice if the caller tries to free an already freed
-heap. In addition, each buffer given to the caller contains
-start field at the start and a trailer field at the end of the buffer.
-
-The start field has the following content:
-A. sizeof(ulint) bytes of field length (in the standard byte order)
-B. sizeof(ulint) bytes of check field (a random number)
-
-The trailer field contains:
-A. sizeof(ulint) bytes of check field (the same random number as at the start)
-
-Thus we can notice if something has been copied over the
-borders of the buffer, which is illegal.
-The memory in the buffers is initialized to a random byte sequence.
-After freeing, all the blocks in the heap are set to random bytes
-to help us discover errors which result from the use of
-buffers in an already freed heap. */
-
-#ifdef MEM_PERIODIC_CHECK
-
-ibool mem_block_list_inited;
-/* List of all mem blocks allocated; protected by the mem_comm_pool mutex */
-UT_LIST_BASE_NODE_T(mem_block_t) mem_block_list;
-
-#endif
-
-/**********************************************************************//**
-Duplicates a NUL-terminated string, allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INTERN
-char*
-mem_heap_strdup(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str) /*!< in: string to be copied */
-{
- return(static_cast<char*>(mem_heap_dup(heap, str, strlen(str) + 1)));
-}
-
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-UNIV_INTERN
-void*
-mem_heap_dup(
-/*=========*/
- mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
- const void* data, /*!< in: data to be copied */
- ulint len) /*!< in: length of data, in bytes */
-{
- return(memcpy(mem_heap_alloc(heap, len), data, len));
-}
-
-/**********************************************************************//**
-Concatenate two strings and return the result, using a memory heap.
-@return own: the result */
-UNIV_INTERN
-char*
-mem_heap_strcat(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* s1, /*!< in: string 1 */
- const char* s2) /*!< in: string 2 */
-{
- char* s;
- ulint s1_len = strlen(s1);
- ulint s2_len = strlen(s2);
-
- s = static_cast<char*>(mem_heap_alloc(heap, s1_len + s2_len + 1));
-
- memcpy(s, s1, s1_len);
- memcpy(s + s1_len, s2, s2_len);
-
- s[s1_len + s2_len] = '\0';
-
- return(s);
-}
-
-
-/****************************************************************//**
-Helper function for mem_heap_printf.
-@return length of formatted string, including terminating NUL */
-static
-ulint
-mem_heap_printf_low(
-/*================*/
- char* buf, /*!< in/out: buffer to store formatted string
- in, or NULL to just calculate length */
- const char* format, /*!< in: format string */
- va_list ap) /*!< in: arguments */
-{
- ulint len = 0;
-
- while (*format) {
-
- /* Does this format specifier have the 'l' length modifier. */
- ibool is_long = FALSE;
-
- /* Length of one parameter. */
- size_t plen;
-
- if (*format++ != '%') {
- /* Non-format character. */
-
- len++;
-
- if (buf) {
- *buf++ = *(format - 1);
- }
-
- continue;
- }
-
- if (*format == 'l') {
- is_long = TRUE;
- format++;
- }
-
- switch (*format++) {
- case 's':
- /* string */
- {
- char* s = va_arg(ap, char*);
-
- /* "%ls" is a non-sensical format specifier. */
- ut_a(!is_long);
-
- plen = strlen(s);
- len += plen;
-
- if (buf) {
- memcpy(buf, s, plen);
- buf += plen;
- }
- }
-
- break;
-
- case 'u':
- /* unsigned int */
- {
- char tmp[32];
- unsigned long val;
-
- /* We only support 'long' values for now. */
- ut_a(is_long);
-
- val = va_arg(ap, unsigned long);
-
- plen = sprintf(tmp, "%lu", val);
- len += plen;
-
- if (buf) {
- memcpy(buf, tmp, plen);
- buf += plen;
- }
- }
-
- break;
-
- case '%':
-
- /* "%l%" is a non-sensical format specifier. */
- ut_a(!is_long);
-
- len++;
-
- if (buf) {
- *buf++ = '%';
- }
-
- break;
-
- default:
- ut_error;
- }
- }
-
- /* For the NUL character. */
- len++;
-
- if (buf) {
- *buf = '\0';
- }
-
- return(len);
-}
-
-/****************************************************************//**
-A simple sprintf replacement that dynamically allocates the space for the
-formatted string from the given heap. This supports a very limited set of
-the printf syntax: types 's' and 'u' and length modifier 'l' (which is
-required for the 'u' type).
-@return heap-allocated formatted string */
-UNIV_INTERN
-char*
-mem_heap_printf(
-/*============*/
- mem_heap_t* heap, /*!< in: memory heap */
- const char* format, /*!< in: format string */
- ...)
-{
- va_list ap;
- char* str;
- ulint len;
-
- /* Calculate length of string */
- len = 0;
- va_start(ap, format);
- len = mem_heap_printf_low(NULL, format, ap);
- va_end(ap);
-
- /* Now create it for real. */
- str = static_cast<char*>(mem_heap_alloc(heap, len));
- va_start(ap, format);
- mem_heap_printf_low(str, format, ap);
- va_end(ap);
-
- return(str);
-}
-
-/***************************************************************//**
-Creates a memory heap block where data can be allocated.
-@return own: memory heap block, NULL if did not succeed (only possible
-for MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
-mem_block_t*
-mem_heap_create_block_func(
-/*=======================*/
- mem_heap_t* heap, /*!< in: memory heap or NULL if first block
- should be created */
- ulint n, /*!< in: number of bytes needed for user data */
-#ifdef UNIV_DEBUG
- const char* file_name,/*!< in: file name where created */
- ulint line, /*!< in: line where created */
-#endif /* UNIV_DEBUG */
- ulint type) /*!< in: type of heap: MEM_HEAP_DYNAMIC or
- MEM_HEAP_BUFFER */
-{
-#ifndef UNIV_HOTBACKUP
- buf_block_t* buf_block = NULL;
-#endif /* !UNIV_HOTBACKUP */
- mem_block_t* block;
- ulint len;
-
- ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER)
- || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH));
-
- if (heap && heap->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(heap);
- }
-
- /* In dynamic allocation, calculate the size: block header + data. */
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
-
-#ifndef UNIV_HOTBACKUP
- if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
-
- ut_ad(type == MEM_HEAP_DYNAMIC || n <= MEM_MAX_ALLOC_IN_BUF);
-
- block = static_cast<mem_block_t*>(
- mem_area_alloc(&len, mem_comm_pool));
- } else {
- len = UNIV_PAGE_SIZE;
-
- if ((type & MEM_HEAP_BTR_SEARCH) && heap) {
- /* We cannot allocate the block from the
- buffer pool, but must get the free block from
- the heap header free block field */
-
- buf_block = static_cast<buf_block_t*>(heap->free_block);
- heap->free_block = NULL;
-
- if (UNIV_UNLIKELY(!buf_block)) {
-
- return(NULL);
- }
- } else {
- buf_block = buf_block_alloc(NULL);
- }
-
- block = (mem_block_t*) buf_block->frame;
- }
-
- if(!block) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- " InnoDB: Unable to allocate memory of size %lu.\n",
- len);
- }
- block->buf_block = buf_block;
- block->free_block = NULL;
-#else /* !UNIV_HOTBACKUP */
- len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n);
- block = ut_malloc(len);
- ut_ad(block);
-#endif /* !UNIV_HOTBACKUP */
-
- block->magic_n = MEM_BLOCK_MAGIC_N;
- ut_d(ut_strlcpy_rev(block->file_name, file_name,
- sizeof(block->file_name)));
- ut_d(block->line = line);
-
-#ifdef MEM_PERIODIC_CHECK
- mutex_enter(&(mem_comm_pool->mutex));
-
- if (!mem_block_list_inited) {
- mem_block_list_inited = TRUE;
- UT_LIST_INIT(mem_block_list);
- }
-
- UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block);
-
- mutex_exit(&(mem_comm_pool->mutex));
-#endif
- mem_block_set_len(block, len);
- mem_block_set_type(block, type);
- mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
- mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
-
- if (UNIV_UNLIKELY(heap == NULL)) {
- /* This is the first block of the heap. The field
- total_size should be initialized here */
- block->total_size = len;
- } else {
- /* Not the first allocation for the heap. This block's
- total_length field should be set to undefined. */
- ut_d(block->total_size = ULINT_UNDEFINED);
- UNIV_MEM_INVALID(&block->total_size,
- sizeof block->total_size);
-
- heap->total_size += len;
- }
-
- ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
-
- return(block);
-}
-
-/***************************************************************//**
-Adds a new block to a memory heap.
-@return created block, NULL if did not succeed (only possible for
-MEM_HEAP_BTR_SEARCH type heaps) */
-UNIV_INTERN
-mem_block_t*
-mem_heap_add_block(
-/*===============*/
- mem_heap_t* heap, /*!< in: memory heap */
- ulint n) /*!< in: number of bytes user needs */
-{
- mem_block_t* block;
- mem_block_t* new_block;
- ulint new_size;
-
- ut_ad(mem_heap_check(heap));
-
- block = UT_LIST_GET_LAST(heap->base);
-
- /* We have to allocate a new block. The size is always at least
- doubled until the standard size is reached. After that the size
- stays the same, except in cases where the caller needs more space. */
-
- new_size = 2 * mem_block_get_len(block);
-
- if (heap->type != MEM_HEAP_DYNAMIC) {
- /* From the buffer pool we allocate buffer frames */
- ut_a(n <= MEM_MAX_ALLOC_IN_BUF);
-
- if (new_size > MEM_MAX_ALLOC_IN_BUF) {
- new_size = MEM_MAX_ALLOC_IN_BUF;
- }
- } else if (new_size > MEM_BLOCK_STANDARD_SIZE) {
-
- new_size = MEM_BLOCK_STANDARD_SIZE;
- }
-
- if (new_size < n) {
- new_size = n;
- }
-
- new_block = mem_heap_create_block(heap, new_size, heap->type,
- heap->file_name, heap->line);
- if (new_block == NULL) {
-
- return(NULL);
- }
-
- /* Add the new block as the last block */
-
- UT_LIST_INSERT_AFTER(list, heap->base, block, new_block);
-
- return(new_block);
-}
-
-/******************************************************************//**
-Frees a block from a memory heap. */
-UNIV_INTERN
-void
-mem_heap_block_free(
-/*================*/
- mem_heap_t* heap, /*!< in: heap */
- mem_block_t* block) /*!< in: block to free */
-{
- ulint type;
- ulint len;
-#ifndef UNIV_HOTBACKUP
- buf_block_t* buf_block;
-
- buf_block = static_cast<buf_block_t*>(block->buf_block);
-#endif /* !UNIV_HOTBACKUP */
-
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(block);
- }
-
- UT_LIST_REMOVE(list, heap->base, block);
-
-#ifdef MEM_PERIODIC_CHECK
- mutex_enter(&(mem_comm_pool->mutex));
-
- UT_LIST_REMOVE(mem_block_list, mem_block_list, block);
-
- mutex_exit(&(mem_comm_pool->mutex));
-#endif
-
- ut_ad(heap->total_size >= block->len);
- heap->total_size -= block->len;
-
- type = heap->type;
- len = block->len;
- block->magic_n = MEM_FREED_BLOCK_MAGIC_N;
-
-#ifndef UNIV_HOTBACKUP
- if (!srv_use_sys_malloc) {
-#ifdef UNIV_MEM_DEBUG
- /* In the debug version we set the memory to a random
- combination of hex 0xDE and 0xAD. */
-
- mem_erase_buf((byte*) block, len);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_AND_FREE(block, len);
-#endif /* UNIV_MEM_DEBUG */
-
- }
- if (type == MEM_HEAP_DYNAMIC || len < UNIV_PAGE_SIZE / 2) {
-
- ut_ad(!buf_block);
- mem_area_free(block, mem_comm_pool);
- } else {
- ut_ad(type & MEM_HEAP_BUFFER);
-
- buf_block_free(buf_block);
- }
-#else /* !UNIV_HOTBACKUP */
-#ifdef UNIV_MEM_DEBUG
- /* In the debug version we set the memory to a random
- combination of hex 0xDE and 0xAD. */
-
- mem_erase_buf((byte*) block, len);
-#else /* UNIV_MEM_DEBUG */
- UNIV_MEM_ASSERT_AND_FREE(block, len);
-#endif /* UNIV_MEM_DEBUG */
- ut_free(block);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/******************************************************************//**
-Frees the free_block field from a memory heap. */
-UNIV_INTERN
-void
-mem_heap_free_block_free(
-/*=====================*/
- mem_heap_t* heap) /*!< in: heap */
-{
- if (UNIV_LIKELY_NULL(heap->free_block)) {
-
- buf_block_free(static_cast<buf_block_t*>(heap->free_block));
-
- heap->free_block = NULL;
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef MEM_PERIODIC_CHECK
-/******************************************************************//**
-Goes through the list of all allocated mem blocks, checks their magic
-numbers, and reports possible corruption. */
-UNIV_INTERN
-void
-mem_validate_all_blocks(void)
-/*=========================*/
-{
- mem_block_t* block;
-
- mutex_enter(&(mem_comm_pool->mutex));
-
- block = UT_LIST_GET_FIRST(mem_block_list);
-
- while (block) {
- if (block->magic_n != MEM_BLOCK_MAGIC_N) {
- mem_analyze_corruption(block);
- }
-
- block = UT_LIST_GET_NEXT(mem_block_list, block);
- }
-
- mutex_exit(&(mem_comm_pool->mutex));
-}
-#endif
diff --git a/storage/xtradb/mem/mem0pool.cc b/storage/xtradb/mem/mem0pool.cc
deleted file mode 100644
index 42d0417c768..00000000000
--- a/storage/xtradb/mem/mem0pool.cc
+++ /dev/null
@@ -1,728 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file mem/mem0pool.cc
-The lowest-level memory management
-
-Created 5/12/1997 Heikki Tuuri
-*************************************************************************/
-
-#include "mem0pool.h"
-#ifdef UNIV_NONINL
-#include "mem0pool.ic"
-#endif
-
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "ut0mem.h"
-#include "ut0lst.h"
-#include "ut0byte.h"
-#include "mem0mem.h"
-#include "srv0start.h"
-
-/* We would like to use also the buffer frames to allocate memory. This
-would be desirable, because then the memory consumption of the database
-would be fixed, and we might even lock the buffer pool to the main memory.
-The problem here is that the buffer management routines can themselves call
-memory allocation, while the buffer pool mutex is reserved.
-
-The main components of the memory consumption are:
-
-1. buffer pool,
-2. parsed and optimized SQL statements,
-3. data dictionary cache,
-4. log buffer,
-5. locks for each transaction,
-6. hash table for the adaptive index,
-7. state and buffers for each SQL query currently being executed,
-8. session for each user, and
-9. stack for each OS thread.
-
-Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially
-consume very much memory. Items 7 and 8 should consume quite little memory,
-and the OS should take care of item 9, which too should consume little memory.
-
-A solution to the memory management:
-
-1. the buffer pool size is set separately;
-2. log buffer size is set separately;
-3. the common pool size for all the other entries, except 8, is set separately.
-
-Problems: we may waste memory if the common pool is set too big. Another
-problem is the locks, which may take very much space in big transactions.
-Then the shared pool size should be set very big. We can allow locks to take
-space from the buffer pool, but the SQL optimizer is then unaware of the
-usable size of the buffer pool. We could also combine the objects in the
-common pool and the buffers in the buffer pool into a single LRU list and
-manage it uniformly, but this approach does not take into account the parsing
-and other costs unique to SQL statements.
-
-The locks for a transaction can be seen as a part of the state of the
-transaction. Hence, they should be stored in the common pool. We still
-have the problem of a very big update transaction, for example, which
-will set very many x-locks on rows, and the locks will consume a lot
-of memory, say, half of the buffer pool size.
-
-Another problem is what to do if we are not able to malloc a requested
-block of memory from the common pool. Then we can request memory from
-the operating system. If it does not help, a system error results.
-
-Because 5 and 6 may potentially consume very much memory, we let them grow
-into the buffer pool. We may let the locks of a transaction take frames
-from the buffer pool, when the corresponding memory heap block has grown to
-the size of a buffer frame. Similarly for the hash node cells of the locks,
-and for the adaptive index. Thus, for each individual transaction, its locks
-can occupy at most about the size of the buffer frame of memory in the common
-pool, and after that its locks will grow into the buffer pool. */
-
-/** Mask used to extract the free bit from area->size */
-#define MEM_AREA_FREE 1
-
-/** The smallest memory area total size */
-#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE)
-
-
-/** Data structure for a memory pool. The space is allocated using the buddy
-algorithm, where free list i contains areas of size 2 to power i. */
-struct mem_pool_t{
- byte* buf; /*!< memory pool */
- ulint size; /*!< memory common pool size */
- ulint reserved; /*!< amount of currently allocated
- memory */
- ib_mutex_t mutex; /*!< mutex protecting this struct */
- UT_LIST_BASE_NODE_T(mem_area_t)
- free_list[64]; /*!< lists of free memory areas: an
- area is put to the list whose number
- is the 2-logarithm of the area size */
-};
-
-/** The common memory pool */
-UNIV_INTERN mem_pool_t* mem_comm_pool = NULL;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register mutex in mem_pool_t with performance schema */
-UNIV_INTERN mysql_pfs_key_t mem_pool_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/* We use this counter to check that the mem pool mutex does not leak;
-this is to track a strange assertion failure reported at
-mysql@lists.mysql.com */
-
-UNIV_INTERN ulint mem_n_threads_inside = 0;
-
-/********************************************************************//**
-Reserves the mem pool mutex if we are not in server shutdown. Use
-this function only in memory free functions, since only memory
-free functions are used during server shutdown. */
-UNIV_INLINE
-void
-mem_pool_mutex_enter(
-/*=================*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
- mutex_enter(&(pool->mutex));
- }
-}
-
-/********************************************************************//**
-Releases the mem pool mutex if we are not in server shutdown. As
-its corresponding mem_pool_mutex_enter() function, use it only
-in memory free functions */
-UNIV_INLINE
-void
-mem_pool_mutex_exit(
-/*================*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- if (srv_shutdown_state < SRV_SHUTDOWN_EXIT_THREADS) {
- mutex_exit(&(pool->mutex));
- }
-}
-
-/********************************************************************//**
-Returns memory area size.
-@return size */
-UNIV_INLINE
-ulint
-mem_area_get_size(
-/*==============*/
- mem_area_t* area) /*!< in: area */
-{
- return(area->size_and_free & ~MEM_AREA_FREE);
-}
-
-/********************************************************************//**
-Sets memory area size. */
-UNIV_INLINE
-void
-mem_area_set_size(
-/*==============*/
- mem_area_t* area, /*!< in: area */
- ulint size) /*!< in: size */
-{
- area->size_and_free = (area->size_and_free & MEM_AREA_FREE)
- | size;
-}
-
-/********************************************************************//**
-Returns memory area free bit.
-@return TRUE if free */
-UNIV_INLINE
-ibool
-mem_area_get_free(
-/*==============*/
- mem_area_t* area) /*!< in: area */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
- return(area->size_and_free & MEM_AREA_FREE);
-}
-
-/********************************************************************//**
-Sets memory area free bit. */
-UNIV_INLINE
-void
-mem_area_set_free(
-/*==============*/
- mem_area_t* area, /*!< in: area */
- ibool free) /*!< in: free bit value */
-{
-#if TRUE != MEM_AREA_FREE
-# error "TRUE != MEM_AREA_FREE"
-#endif
- area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE)
- | free;
-}
-
-/********************************************************************//**
-Creates a memory pool.
-@return memory pool */
-UNIV_INTERN
-mem_pool_t*
-mem_pool_create(
-/*============*/
- ulint size) /*!< in: pool size in bytes */
-{
- mem_pool_t* pool;
- mem_area_t* area;
- ulint i;
- ulint used;
-
- pool = static_cast<mem_pool_t*>(ut_malloc(sizeof(mem_pool_t)));
-
- pool->buf = static_cast<byte*>(ut_malloc_low(size, TRUE));
- pool->size = size;
-
- mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL);
-
- /* Initialize the free lists */
-
- for (i = 0; i < 64; i++) {
-
- UT_LIST_INIT(pool->free_list[i]);
- }
-
- used = 0;
-
- while (size - used >= MEM_AREA_MIN_SIZE) {
-
- i = ut_2_log(size - used);
-
- if (ut_2_exp(i) > size - used) {
-
- /* ut_2_log rounds upward */
-
- i--;
- }
-
- area = (mem_area_t*)(pool->buf + used);
-
- mem_area_set_size(area, ut_2_exp(i));
- mem_area_set_free(area, TRUE);
- UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
- ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
- used = used + ut_2_exp(i);
- }
-
- ut_ad(size >= used);
-
- pool->reserved = 0;
-
- return(pool);
-}
-
-/********************************************************************//**
-Frees a memory pool. */
-UNIV_INTERN
-void
-mem_pool_free(
-/*==========*/
- mem_pool_t* pool) /*!< in, own: memory pool */
-{
- mutex_free(&pool->mutex);
- ut_free(pool->buf);
- ut_free(pool);
-}
-
-/********************************************************************//**
-Fills the specified free list.
-@return TRUE if we were able to insert a block to the free list */
-static
-ibool
-mem_pool_fill_free_list(
-/*====================*/
- ulint i, /*!< in: free list index */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* area2;
- ibool ret;
-
- ut_ad(mutex_own(&(pool->mutex)));
-
- if (UNIV_UNLIKELY(i >= 63)) {
- /* We come here when we have run out of space in the
- memory pool: */
-
- return(FALSE);
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
-
- if (area == NULL) {
- if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: mem pool free list %lu"
- " length is %lu\n"
- "InnoDB: though the list is empty!\n",
- (ulong) i + 1,
- (ulong)
- UT_LIST_GET_LEN(pool->free_list[i + 1]));
- }
-
- ret = mem_pool_fill_free_list(i + 1, pool);
-
- if (ret == FALSE) {
-
- return(FALSE);
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[i + 1]);
- }
-
- if (UNIV_UNLIKELY(UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0)) {
- mem_analyze_corruption(area);
-
- ut_error;
- }
-
- UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
-
- area2 = (mem_area_t*)(((byte*) area) + ut_2_exp(i));
- UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
-
- mem_area_set_size(area2, ut_2_exp(i));
- mem_area_set_free(area2, TRUE);
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2);
-
- mem_area_set_size(area, ut_2_exp(i));
-
- UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Allocates memory from a pool. NOTE: This low-level function should only be
-used in mem0mem.*!
-@return own: allocated memory buffer */
-UNIV_INTERN
-void*
-mem_area_alloc(
-/*===========*/
- ulint* psize, /*!< in: requested size in bytes; for optimum
- space usage, the size should be a power of 2
- minus MEM_AREA_EXTRA_SIZE;
- out: allocated size in bytes (greater than
- or equal to the requested size) */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- ulint size;
- ulint n;
- ibool ret;
-
- /* If we are using os allocator just make a simple call
- to malloc */
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- return(malloc(*psize));
- }
-
- size = *psize;
- n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE));
-
- mutex_enter(&(pool->mutex));
- mem_n_threads_inside++;
-
- ut_a(mem_n_threads_inside == 1);
-
- area = UT_LIST_GET_FIRST(pool->free_list[n]);
-
- if (area == NULL) {
- ret = mem_pool_fill_free_list(n, pool);
-
- if (ret == FALSE) {
- /* Out of memory in memory pool: we try to allocate
- from the operating system with the regular malloc: */
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- return(ut_malloc(size));
- }
-
- area = UT_LIST_GET_FIRST(pool->free_list[n]);
- }
-
- if (!mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Error: Removing element from mem pool"
- " free list %lu though the\n"
- "InnoDB: element is not marked free!\n",
- (ulong) n);
-
- mem_analyze_corruption(area);
-
- /* Try to analyze a strange assertion failure reported at
- mysql@lists.mysql.com where the free bit IS 1 in the
- hex dump above */
-
- if (mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Probably a race condition"
- " because now the area is marked free!\n");
- }
-
- ut_error;
- }
-
- if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) {
- fprintf(stderr,
- "InnoDB: Error: Removing element from mem pool"
- " free list %lu\n"
- "InnoDB: though the list length is 0!\n",
- (ulong) n);
- mem_analyze_corruption(area);
-
- ut_error;
- }
-
- ut_ad(mem_area_get_size(area) == ut_2_exp(n));
-
- mem_area_set_free(area, FALSE);
-
- UT_LIST_REMOVE(free_list, pool->free_list[n], area);
-
- pool->reserved += mem_area_get_size(area);
-
- mem_n_threads_inside--;
- mutex_exit(&(pool->mutex));
-
- ut_ad(mem_pool_validate(pool));
-
- *psize = ut_2_exp(n) - MEM_AREA_EXTRA_SIZE;
- UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*) area, *psize);
-
- return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*) area)));
-}
-
-/********************************************************************//**
-Gets the buddy of an area, if it exists in pool.
-@return the buddy, NULL if no buddy in pool */
-UNIV_INLINE
-mem_area_t*
-mem_area_get_buddy(
-/*===============*/
- mem_area_t* area, /*!< in: memory area */
- ulint size, /*!< in: memory area size */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* buddy;
-
- ut_ad(size != 0);
-
- if (((((byte*) area) - pool->buf) % (2 * size)) == 0) {
-
- /* The buddy is in a higher address */
-
- buddy = (mem_area_t*)(((byte*) area) + size);
-
- if ((((byte*) buddy) - pool->buf) + size > pool->size) {
-
- /* The buddy is not wholly contained in the pool:
- there is no buddy */
-
- buddy = NULL;
- }
- } else {
- /* The buddy is in a lower address; NOTE that area cannot
- be at the pool lower end, because then we would end up to
- the upper branch in this if-clause: the remainder would be
- 0 */
-
- buddy = (mem_area_t*)(((byte*) area) - size);
- }
-
- return(buddy);
-}
-
-/********************************************************************//**
-Frees memory to a pool. */
-UNIV_INTERN
-void
-mem_area_free(
-/*==========*/
- void* ptr, /*!< in, own: pointer to allocated memory
- buffer */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* buddy;
- void* new_ptr;
- ulint size;
- ulint n;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- free(ptr);
-
- return;
- }
-
- /* It may be that the area was really allocated from the OS with
- regular malloc: check if ptr points within our memory pool */
-
- if ((byte*) ptr < pool->buf || (byte*) ptr >= pool->buf + pool->size) {
- ut_free(ptr);
-
- return;
- }
-
- area = (mem_area_t*) (((byte*) ptr) - MEM_AREA_EXTRA_SIZE);
-
- if (mem_area_get_free(area)) {
- fprintf(stderr,
- "InnoDB: Error: Freeing element to mem pool"
- " free list though the\n"
- "InnoDB: element is marked free!\n");
-
- mem_analyze_corruption(area);
- ut_error;
- }
-
- size = mem_area_get_size(area);
- UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
-
- if (size == 0) {
- fprintf(stderr,
- "InnoDB: Error: Mem area size is 0. Possibly a"
- " memory overrun of the\n"
- "InnoDB: previous allocated area!\n");
-
- mem_analyze_corruption(area);
- ut_error;
- }
-
-#ifdef UNIV_LIGHT_MEM_DEBUG
- if (((byte*) area) + size < pool->buf + pool->size) {
-
- ulint next_size;
-
- next_size = mem_area_get_size(
- (mem_area_t*)(((byte*) area) + size));
- if (UNIV_UNLIKELY(!next_size || !ut_is_2pow(next_size))) {
- fprintf(stderr,
- "InnoDB: Error: Memory area size %lu,"
- " next area size %lu not a power of 2!\n"
- "InnoDB: Possibly a memory overrun of"
- " the buffer being freed here.\n",
- (ulong) size, (ulong) next_size);
- mem_analyze_corruption(area);
-
- ut_error;
- }
- }
-#endif
- buddy = mem_area_get_buddy(area, size, pool);
-
- n = ut_2_log(size);
-
- mem_pool_mutex_enter(pool);
- mem_n_threads_inside++;
-
- ut_a(mem_n_threads_inside == 1);
-
- if (buddy && mem_area_get_free(buddy)
- && (size == mem_area_get_size(buddy))) {
-
- /* The buddy is in a free list */
-
- if ((byte*) buddy < (byte*) area) {
- new_ptr = ((byte*) buddy) + MEM_AREA_EXTRA_SIZE;
-
- mem_area_set_size(buddy, 2 * size);
- mem_area_set_free(buddy, FALSE);
- } else {
- new_ptr = ptr;
-
- mem_area_set_size(area, 2 * size);
- }
-
- /* Remove the buddy from its free list and merge it to area */
-
- UT_LIST_REMOVE(free_list, pool->free_list[n], buddy);
-
- pool->reserved += ut_2_exp(n);
-
- mem_n_threads_inside--;
- mem_pool_mutex_exit(pool);
-
- mem_area_free(new_ptr, pool);
-
- return;
- } else {
- UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area);
-
- mem_area_set_free(area, TRUE);
-
- ut_ad(pool->reserved >= size);
-
- pool->reserved -= size;
- }
-
- mem_n_threads_inside--;
- mem_pool_mutex_exit(pool);
-
- ut_ad(mem_pool_validate(pool));
-}
-
-/********************************************************************//**
-Validates a memory pool.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-mem_pool_validate(
-/*==============*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- mem_area_t* area;
- mem_area_t* buddy;
- ulint free;
- ulint i;
-
- mem_pool_mutex_enter(pool);
-
- free = 0;
-
- for (i = 0; i < 64; i++) {
-
- UT_LIST_CHECK(free_list, mem_area_t, pool->free_list[i]);
-
- for (area = UT_LIST_GET_FIRST(pool->free_list[i]);
- area != 0;
- area = UT_LIST_GET_NEXT(free_list, area)) {
-
- ut_a(mem_area_get_free(area));
- ut_a(mem_area_get_size(area) == ut_2_exp(i));
-
- buddy = mem_area_get_buddy(area, ut_2_exp(i), pool);
-
- ut_a(!buddy || !mem_area_get_free(buddy)
- || (ut_2_exp(i) != mem_area_get_size(buddy)));
-
- free += ut_2_exp(i);
- }
- }
-
- ut_a(free + pool->reserved == pool->size);
-
- mem_pool_mutex_exit(pool);
-
- return(TRUE);
-}
-
-/********************************************************************//**
-Prints info of a memory pool. */
-UNIV_INTERN
-void
-mem_pool_print_info(
-/*================*/
- FILE* outfile,/*!< in: output file to write to */
- mem_pool_t* pool) /*!< in: memory pool */
-{
- ulint i;
-
- mem_pool_validate(pool);
-
- fprintf(outfile, "INFO OF A MEMORY POOL\n");
-
- mutex_enter(&(pool->mutex));
-
- for (i = 0; i < 64; i++) {
- if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) {
-
- fprintf(outfile,
- "Free list length %lu for"
- " blocks of size %lu\n",
- (ulong) UT_LIST_GET_LEN(pool->free_list[i]),
- (ulong) ut_2_exp(i));
- }
- }
-
- fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size,
- (ulong) pool->reserved);
- mutex_exit(&(pool->mutex));
-}
-
-/********************************************************************//**
-Returns the amount of reserved memory.
-@return reserved memory in bytes */
-UNIV_INTERN
-ulint
-mem_pool_get_reserved(
-/*==================*/
- mem_pool_t* pool) /*!< in: memory pool */
-{
- ulint reserved;
-
- mutex_enter(&(pool->mutex));
-
- reserved = pool->reserved;
-
- mutex_exit(&(pool->mutex));
-
- return(reserved);
-}
diff --git a/storage/xtradb/mtr/mtr0log.cc b/storage/xtradb/mtr/mtr0log.cc
deleted file mode 100644
index 82df1df63d4..00000000000
--- a/storage/xtradb/mtr/mtr0log.cc
+++ /dev/null
@@ -1,620 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file mtr/mtr0log.cc
-Mini-transaction log routines
-
-Created 12/7/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0log.h"
-
-#ifdef UNIV_NONINL
-#include "mtr0log.ic"
-#endif
-
-#include "buf0buf.h"
-#include "dict0dict.h"
-#include "log0recv.h"
-#include "page0page.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "dict0boot.h"
-
-/********************************************************//**
-Catenates n bytes to the mtr log. */
-UNIV_INTERN
-void
-mlog_catenate_string(
-/*=================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* str, /*!< in: string to write */
- ulint len) /*!< in: string length */
-{
- dyn_array_t* mlog;
-
- if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) {
-
- return;
- }
-
- mlog = &(mtr->log);
-
- dyn_push_string(mlog, str, len);
-}
-
-/********************************************************//**
-Writes the initial part of a log record consisting of one-byte item
-type and four-byte space and page numbers. Also pushes info
-to the mtr memo that a buffer page has been modified. */
-UNIV_INTERN
-void
-mlog_write_initial_log_record(
-/*==========================*/
- const byte* ptr, /*!< in: pointer to (inside) a buffer
- frame holding the file page where
- modification is made */
- byte type, /*!< in: log item type: MLOG_1BYTE, ... */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(type));
- ut_ad(type > MLOG_8BYTES);
-
- log_ptr = mlog_open(mtr, 11);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr);
-
- mlog_close(mtr, log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Parses an initial log record written by mlog_write_initial_log_record.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_initial_log_record(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* type, /*!< out: log record type: MLOG_1BYTE, ... */
- ulint* space, /*!< out: space id */
- ulint* page_no)/*!< out: page number */
-{
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG);
- ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type));
-
- ptr++;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, space);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, page_no);
-
- return(ptr);
-}
-
-/********************************************************//**
-Parses a log record written by mlog_write_ulint or mlog_write_ull.
-@return parsed record end, NULL if not a complete record or a corrupt record */
-UNIV_INTERN
-byte*
-mlog_parse_nbytes(
-/*==============*/
- ulint type, /*!< in: log record type: MLOG_1BYTE, ... */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip)/*!< in/out: compressed page, or NULL */
-{
- ulint offset;
- ulint val;
- ib_uint64_t dval;
-
- ut_a(type <= MLOG_8BYTES);
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX ||
- /* scrubbing changes page type from FIL_PAGE_INDEX to
- * FIL_PAGE_TYPE_ALLOCATED (rest of this assertion is below) */
- (type == MLOG_2BYTES && offset == FIL_PAGE_TYPE));
-
- if (offset >= UNIV_PAGE_SIZE) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (type == MLOG_8BYTES) {
- ptr = mach_ull_parse_compressed(ptr, end_ptr, &dval);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- if (page_zip) {
- mach_write_to_8
- (((page_zip_des_t*) page_zip)->data
- + offset, dval);
- }
- mach_write_to_8(page + offset, dval);
- }
-
- return(ptr);
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, &val);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- switch (type) {
- case MLOG_1BYTE:
- if (UNIV_UNLIKELY(val > 0xFFUL)) {
- goto corrupt;
- }
- if (page) {
- if (page_zip) {
- mach_write_to_1
- (((page_zip_des_t*) page_zip)->data
- + offset, val);
- }
- mach_write_to_1(page + offset, val);
- }
- break;
- case MLOG_2BYTES:
- if (UNIV_UNLIKELY(val > 0xFFFFUL)) {
- goto corrupt;
- }
- if (page) {
- if (page_zip) {
- mach_write_to_2
- (((page_zip_des_t*) page_zip)->data
- + offset, val);
- }
- mach_write_to_2(page + offset, val);
- }
- ut_a(!page || !page_zip ||
- fil_page_get_type(page) != FIL_PAGE_INDEX ||
- /* scrubbing changes page type from FIL_PAGE_INDEX to
- * FIL_PAGE_TYPE_ALLOCATED */
- (type == MLOG_2BYTES &&
- offset == FIL_PAGE_TYPE &&
- val == FIL_PAGE_TYPE_ALLOCATED));
-
- break;
- case MLOG_4BYTES:
- if (page) {
- if (page_zip) {
- mach_write_to_4
- (((page_zip_des_t*) page_zip)->data
- + offset, val);
- }
- mach_write_to_4(page + offset, val);
- }
- break;
- default:
- corrupt:
- recv_sys->found_corrupt_log = TRUE;
- ptr = NULL;
- }
-
- return(ptr);
-}
-
-/********************************************************//**
-Writes 1, 2 or 4 bytes to a file page. Writes the corresponding log
-record to the mini-transaction log if mtr is not NULL. */
-UNIV_INTERN
-void
-mlog_write_ulint(
-/*=============*/
- byte* ptr, /*!< in: pointer where to write */
- ulint val, /*!< in: value to write */
- byte type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- switch (type) {
- case MLOG_1BYTE:
- mach_write_to_1(ptr, val);
- break;
- case MLOG_2BYTES:
- mach_write_to_2(ptr, val);
- break;
- case MLOG_4BYTES:
- mach_write_to_4(ptr, val);
- break;
- default:
- ut_error;
- }
-
- if (mtr != 0) {
- byte* log_ptr = mlog_open(mtr, 11 + 2 + 5);
-
- /* If no logging is requested, we may return now */
-
- if (log_ptr != 0) {
-
- log_ptr = mlog_write_initial_log_record_fast(
- ptr, type, log_ptr, mtr);
-
- mach_write_to_2(log_ptr, page_offset(ptr));
- log_ptr += 2;
-
- log_ptr += mach_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
- }
- }
-}
-
-/********************************************************//**
-Writes 8 bytes to a file page. Writes the corresponding log
-record to the mini-transaction log, only if mtr is not NULL */
-UNIV_INTERN
-void
-mlog_write_ull(
-/*===========*/
- byte* ptr, /*!< in: pointer where to write */
- ib_uint64_t val, /*!< in: value to write */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- mach_write_to_8(ptr, val);
-
- if (mtr != 0) {
- byte* log_ptr = mlog_open(mtr, 11 + 2 + 9);
-
- /* If no logging is requested, we may return now */
- if (log_ptr != 0) {
-
- log_ptr = mlog_write_initial_log_record_fast(
- ptr, MLOG_8BYTES, log_ptr, mtr);
-
- mach_write_to_2(log_ptr, page_offset(ptr));
- log_ptr += 2;
-
- log_ptr += mach_ull_write_compressed(log_ptr, val);
-
- mlog_close(mtr, log_ptr);
- }
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Writes a string to a file page buffered in the buffer pool. Writes the
-corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_write_string(
-/*==============*/
- byte* ptr, /*!< in: pointer where to write */
- const byte* str, /*!< in: string to write */
- ulint len, /*!< in: string length */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ut_ad(ptr && mtr);
- ut_a(len < UNIV_PAGE_SIZE);
-
- memcpy(ptr, str, len);
-
- mlog_log_string(ptr, len, mtr);
-}
-
-/********************************************************//**
-Logs a write of a string to a file page buffered in the buffer pool.
-Writes the corresponding log record to the mini-transaction log. */
-UNIV_INTERN
-void
-mlog_log_string(
-/*============*/
- byte* ptr, /*!< in: pointer written to */
- ulint len, /*!< in: string length */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(ptr && mtr);
- ut_ad(len <= UNIV_PAGE_SIZE);
-
- log_ptr = mlog_open(mtr, 30);
-
- /* If no logging is requested, we may return now */
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING,
- log_ptr, mtr);
- mach_write_to_2(log_ptr, page_offset(ptr));
- log_ptr += 2;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
-
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, ptr, len);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Parses a log record written by mlog_write_string.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_string(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- byte* page, /*!< in: page where to apply the log record, or NULL */
- void* page_zip)/*!< in/out: compressed page, or NULL */
-{
- ulint offset;
- ulint len;
-
- ut_a(!page || !page_zip || fil_page_get_type(page) != FIL_PAGE_INDEX);
-
- if (end_ptr < ptr + 4) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
- len = mach_read_from_2(ptr);
- ptr += 2;
-
- if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
- || UNIV_UNLIKELY(len + offset > UNIV_PAGE_SIZE)) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- }
-
- if (page) {
- if (page_zip) {
- memcpy(((page_zip_des_t*) page_zip)->data
- + offset, ptr, len);
- }
- memcpy(page + offset, ptr, len);
- }
-
- return(ptr + len);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************//**
-Opens a buffer for mlog, writes the initial log record and,
-if needed, the field lengths of an index.
-@return buffer, NULL if log mode MTR_LOG_NONE */
-UNIV_INTERN
-byte*
-mlog_open_and_write_index(
-/*======================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* rec, /*!< in: index record or page */
- const dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: log item type */
- ulint size) /*!< in: requested buffer size in bytes
- (if 0, calls mlog_close() and
- returns NULL) */
-{
- byte* log_ptr;
- const byte* log_start;
- const byte* log_end;
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- if (!page_rec_is_comp(rec)) {
- log_start = log_ptr = mlog_open(mtr, 11 + size);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_ptr = mlog_write_initial_log_record_fast(rec, type,
- log_ptr, mtr);
- log_end = log_ptr + 11 + size;
- } else {
- ulint i;
- ulint n = dict_index_get_n_fields(index);
- /* total size needed */
- ulint total = 11 + size + (n + 2) * 2;
- ulint alloc = total;
- /* allocate at most DYN_ARRAY_DATA_SIZE at a time */
- if (alloc > DYN_ARRAY_DATA_SIZE) {
- alloc = DYN_ARRAY_DATA_SIZE;
- }
- log_start = log_ptr = mlog_open(mtr, alloc);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_end = log_ptr + alloc;
- log_ptr = mlog_write_initial_log_record_fast(rec, type,
- log_ptr, mtr);
- mach_write_to_2(log_ptr, n);
- log_ptr += 2;
- mach_write_to_2(log_ptr,
- dict_index_get_n_unique_in_tree(index));
- log_ptr += 2;
- for (i = 0; i < n; i++) {
- dict_field_t* field;
- const dict_col_t* col;
- ulint len;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
- len = field->fixed_len;
- ut_ad(len < 0x7fff);
- if (len == 0
- && (col->len > 255 || col->mtype == DATA_BLOB)) {
- /* variable-length field
- with maximum length > 255 */
- len = 0x7fff;
- }
- if (col->prtype & DATA_NOT_NULL) {
- len |= 0x8000;
- }
- if (log_ptr + 2 > log_end) {
- mlog_close(mtr, log_ptr);
- ut_a(total > (ulint) (log_ptr - log_start));
- total -= log_ptr - log_start;
- alloc = total;
- if (alloc > DYN_ARRAY_DATA_SIZE) {
- alloc = DYN_ARRAY_DATA_SIZE;
- }
- log_start = log_ptr = mlog_open(mtr, alloc);
- if (!log_ptr) {
- return(NULL); /* logging is disabled */
- }
- log_end = log_ptr + alloc;
- }
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
- }
- }
- if (size == 0) {
- mlog_close(mtr, log_ptr);
- log_ptr = NULL;
- } else if (log_ptr + size > log_end) {
- mlog_close(mtr, log_ptr);
- log_ptr = mlog_open(mtr, size);
- }
- return(log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Parses a log record written by mlog_open_and_write_index.
-@return parsed record end, NULL if not a complete record */
-UNIV_INTERN
-byte*
-mlog_parse_index(
-/*=============*/
- byte* ptr, /*!< in: buffer */
- const byte* end_ptr,/*!< in: buffer end */
- ibool comp, /*!< in: TRUE=compact row format */
- dict_index_t** index) /*!< out, own: dummy index */
-{
- ulint i, n, n_uniq;
- dict_table_t* table;
- dict_index_t* ind;
-
- ut_ad(comp == FALSE || comp == TRUE);
-
- if (comp) {
- if (end_ptr < ptr + 4) {
- return(NULL);
- }
- n = mach_read_from_2(ptr);
- ptr += 2;
- n_uniq = mach_read_from_2(ptr);
- ptr += 2;
- ut_ad(n_uniq <= n);
- if (end_ptr < ptr + n * 2) {
- return(NULL);
- }
- } else {
- n = n_uniq = 1;
- }
- table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n,
- comp ? DICT_TF_COMPACT : 0, 0);
- ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY",
- DICT_HDR_SPACE, 0, n);
- ind->table = table;
- ind->n_uniq = (unsigned int) n_uniq;
- if (n_uniq != n) {
- ut_a(n_uniq + DATA_ROLL_PTR <= n);
- ind->type = DICT_CLUSTERED;
- }
- if (comp) {
- for (i = 0; i < n; i++) {
- ulint len = mach_read_from_2(ptr);
- ptr += 2;
- /* The high-order bit of len is the NOT NULL flag;
- the rest is 0 or 0x7fff for variable-length fields,
- and 1..0x7ffe for fixed-length fields. */
- dict_mem_table_add_col(
- table, NULL, NULL,
- ((len + 1) & 0x7fff) <= 1
- ? DATA_BINARY : DATA_FIXBINARY,
- len & 0x8000 ? DATA_NOT_NULL : 0,
- len & 0x7fff);
-
- dict_index_add_col(ind, table,
- dict_table_get_nth_col(table, i),
- 0);
- }
- dict_table_add_system_columns(table, table->heap);
- if (n_uniq != n) {
- /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */
- ut_a(DATA_TRX_ID_LEN
- == dict_index_get_nth_col(ind, DATA_TRX_ID - 1
- + n_uniq)->len);
- ut_a(DATA_ROLL_PTR_LEN
- == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1
- + n_uniq)->len);
- ind->fields[DATA_TRX_ID - 1 + n_uniq].col
- = &table->cols[n + DATA_TRX_ID];
- ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col
- = &table->cols[n + DATA_ROLL_PTR];
- }
- }
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- ind->cached = TRUE;
- *index = ind;
- return(ptr);
-}
diff --git a/storage/xtradb/mtr/mtr0mtr.cc b/storage/xtradb/mtr/mtr0mtr.cc
deleted file mode 100644
index e564b270d00..00000000000
--- a/storage/xtradb/mtr/mtr0mtr.cc
+++ /dev/null
@@ -1,474 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file mtr/mtr0mtr.cc
-Mini-transaction buffer
-
-Created 11/26/1995 Heikki Tuuri
-*******************************************************/
-
-#include "mtr0mtr.h"
-
-#ifdef UNIV_NONINL
-#include "mtr0mtr.ic"
-#endif
-
-#include "buf0buf.h"
-#include "buf0flu.h"
-#include "page0types.h"
-#include "mtr0log.h"
-#include "log0log.h"
-#include "buf0flu.h"
-
-#ifndef UNIV_HOTBACKUP
-# include "log0recv.h"
-
-/***************************************************//**
-Checks if a mini-transaction is dirtying a clean page.
-@return TRUE if the mtr is dirtying a clean page. */
-UNIV_INTERN
-ibool
-mtr_block_dirtied(
-/*==============*/
- const buf_block_t* block) /*!< in: block being x-fixed */
-{
- ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
- ut_ad(block->page.buf_fix_count > 0);
-
- /* It is OK to read oldest_modification because no
- other thread can be performing a write of it and it
- is only during write that the value is reset to 0. */
- return(block->page.oldest_modification == 0);
-}
-
-/*****************************************************************//**
-Releases the item in the slot given. */
-static MY_ATTRIBUTE((nonnull))
-void
-mtr_memo_slot_release_func(
-/*=======================*/
-#ifdef UNIV_DEBUG
- mtr_t* mtr, /*!< in/out: mini-transaction */
-#endif /* UNIV_DEBUG */
- mtr_memo_slot_t* slot) /*!< in: memo slot */
-{
- void* object = slot->object;
- slot->object = NULL;
-
- /* slot release is a local operation for the current mtr.
- We must not be holding the flush_order mutex while
- doing this. */
- ut_ad(!log_flush_order_mutex_own());
-
- switch (slot->type) {
- case MTR_MEMO_PAGE_S_FIX:
- case MTR_MEMO_PAGE_X_FIX:
- case MTR_MEMO_BUF_FIX:
- buf_page_release((buf_block_t*) object, slot->type);
- break;
- case MTR_MEMO_S_LOCK:
- rw_lock_s_unlock((prio_rw_lock_t*) object);
- break;
- case MTR_MEMO_X_LOCK:
- rw_lock_x_unlock((prio_rw_lock_t*) object);
- break;
-#ifdef UNIV_DEBUG
- default:
- ut_ad(slot->type == MTR_MEMO_MODIFY);
- ut_ad(mtr_memo_contains(mtr, object, MTR_MEMO_PAGE_X_FIX));
-#endif /* UNIV_DEBUG */
- }
-}
-
-#ifdef UNIV_DEBUG
-# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(mtr, slot)
-#else /* UNIV_DEBUG */
-# define mtr_memo_slot_release(mtr, slot) mtr_memo_slot_release_func(slot)
-#endif /* UNIV_DEBUG */
-
-/**********************************************************//**
-Releases the mlocks and other objects stored in an mtr memo.
-They are released in the order opposite to which they were pushed
-to the memo. */
-static MY_ATTRIBUTE((nonnull))
-void
-mtr_memo_pop_all(
-/*=============*/
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
- commit */
-
- for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
- block;
- block = dyn_array_get_prev_block(&mtr->memo, block)) {
- const mtr_memo_slot_t* start
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block));
- mtr_memo_slot_t* slot
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block)
- + dyn_block_get_used(block));
-
- ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
-
- while (slot-- != start) {
- if (slot->object != NULL) {
- mtr_memo_slot_release(mtr, slot);
- }
- }
- }
-}
-
-/*****************************************************************//**
-Releases the item in the slot given. */
-static
-void
-mtr_memo_slot_note_modification(
-/*============================*/
- mtr_t* mtr, /*!< in: mtr */
- mtr_memo_slot_t* slot) /*!< in: memo slot */
-{
- ut_ad(mtr->modifications);
- ut_ad(!srv_read_only_mode);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
-
- if (slot->object != NULL && slot->type == MTR_MEMO_PAGE_X_FIX) {
- buf_block_t* block = (buf_block_t*) slot->object;
-
- ut_ad(!mtr->made_dirty || log_flush_order_mutex_own());
- buf_flush_note_modification(block, mtr);
- }
-}
-
-/**********************************************************//**
-Add the modified pages to the buffer flush list. They are released
-in the order opposite to which they were pushed to the memo. NOTE! It is
-essential that the x-rw-lock on a modified buffer page is not released
-before buf_page_note_modification is called for that page! Otherwise,
-some thread might race to modify it, and the flush list sort order on
-lsn would be destroyed. */
-static
-void
-mtr_memo_note_modifications(
-/*========================*/
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(!srv_read_only_mode);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in
- commit */
-
- for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
- block;
- block = dyn_array_get_prev_block(&mtr->memo, block)) {
- const mtr_memo_slot_t* start
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block));
- mtr_memo_slot_t* slot
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block)
- + dyn_block_get_used(block));
-
- ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
-
- while (slot-- != start) {
- if (slot->object != NULL) {
- mtr_memo_slot_note_modification(mtr, slot);
- }
- }
- }
-}
-
-/************************************************************//**
-Append the dirty pages to the flush list. */
-static
-void
-mtr_add_dirtied_pages_to_flush_list(
-/*================================*/
- mtr_t* mtr) /*!< in/out: mtr */
-{
- ut_ad(!srv_read_only_mode);
-
- /* No need to acquire log_flush_order_mutex if this mtr has
- not dirtied a clean page. log_flush_order_mutex is used to
- ensure ordered insertions in the flush_list. We need to
- insert in the flush_list iff the page in question was clean
- before modifications. */
- if (mtr->made_dirty) {
- log_flush_order_mutex_enter();
- }
-
- /* It is now safe to release the log mutex because the
- flush_order mutex will ensure that we are the first one
- to insert into the flush list. */
- log_release();
-
- if (mtr->modifications) {
- mtr_memo_note_modifications(mtr);
- }
-
- if (mtr->made_dirty) {
- log_flush_order_mutex_exit();
- }
-}
-
-/************************************************************//**
-Writes the contents of a mini-transaction log, if any, to the database log. */
-static
-void
-mtr_log_reserve_and_write(
-/*======================*/
- mtr_t* mtr) /*!< in/out: mtr */
-{
- dyn_array_t* mlog;
- ulint data_size;
- byte* first_data;
-
- ut_ad(!srv_read_only_mode);
-
- mlog = &(mtr->log);
-
- first_data = dyn_block_get_data(mlog);
-
- if (mtr->n_log_recs > 1) {
- mlog_catenate_ulint(mtr, MLOG_MULTI_REC_END, MLOG_1BYTE);
- } else {
- *first_data = (byte)((ulint)*first_data
- | MLOG_SINGLE_REC_FLAG);
- }
-
- if (mlog->heap == NULL) {
- ulint len;
-
- len = mtr->log_mode != MTR_LOG_NO_REDO
- ? dyn_block_get_used(mlog) : 0;
-
- mtr->end_lsn = log_reserve_and_write_fast(
- first_data, len, &mtr->start_lsn);
-
- if (mtr->end_lsn) {
-
- /* Success. We have the log mutex.
- Add pages to flush list and exit */
- mtr_add_dirtied_pages_to_flush_list(mtr);
-
- return;
- }
- } else {
- mutex_enter(&log_sys->mutex);
- }
-
- data_size = dyn_array_get_data_size(mlog);
-
- /* Open the database log for log_write_low */
- mtr->start_lsn = log_open(data_size);
-
- if (mtr->log_mode == MTR_LOG_ALL) {
-
- for (dyn_block_t* block = mlog;
- block != 0;
- block = dyn_array_get_next_block(mlog, block)) {
-
- log_write_low(
- dyn_block_get_data(block),
- dyn_block_get_used(block));
- }
-
- } else {
- ut_ad(mtr->log_mode == MTR_LOG_NONE
- || mtr->log_mode == MTR_LOG_NO_REDO);
- /* Do nothing */
- }
-
- mtr->end_lsn = log_close();
-
- mtr_add_dirtied_pages_to_flush_list(mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Commits a mini-transaction. */
-UNIV_INTERN
-void
-mtr_commit(
-/*=======*/
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(!mtr->inside_ibuf);
- ut_d(mtr->state = MTR_COMMITTING);
-
-#ifndef UNIV_HOTBACKUP
- /* This is a dirty read, for debugging. */
- ut_ad(!recv_no_log_write);
-
- if (mtr->modifications && mtr->n_log_recs) {
- ut_ad(!srv_read_only_mode);
- mtr_log_reserve_and_write(mtr);
- }
-
- mtr_memo_pop_all(mtr);
-#endif /* !UNIV_HOTBACKUP */
-
- dyn_array_free(&(mtr->memo));
- dyn_array_free(&(mtr->log));
-#ifdef UNIV_DEBUG_VALGRIND
- /* Declare everything uninitialized except
- mtr->start_lsn, mtr->end_lsn and mtr->state. */
- {
- lsn_t start_lsn = mtr->start_lsn;
- lsn_t end_lsn = mtr->end_lsn;
- UNIV_MEM_INVALID(mtr, sizeof *mtr);
- mtr->start_lsn = start_lsn;
- mtr->end_lsn = end_lsn;
- }
-#endif /* UNIV_DEBUG_VALGRIND */
- ut_d(mtr->state = MTR_COMMITTED);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************//**
-Releases an object in the memo stack.
-@return true if released */
-UNIV_INTERN
-bool
-mtr_memo_release(
-/*=============*/
- mtr_t* mtr, /*!< in/out: mini-transaction */
- void* object, /*!< in: object */
- ulint type) /*!< in: object type: MTR_MEMO_S_LOCK, ... */
-{
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
- /* We cannot release a page that has been written to in the
- middle of a mini-transaction. */
- ut_ad(!mtr->modifications || type != MTR_MEMO_PAGE_X_FIX);
-
- for (const dyn_block_t* block = dyn_array_get_last_block(&mtr->memo);
- block;
- block = dyn_array_get_prev_block(&mtr->memo, block)) {
- const mtr_memo_slot_t* start
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block));
- mtr_memo_slot_t* slot
- = reinterpret_cast<mtr_memo_slot_t*>(
- dyn_block_get_data(block)
- + dyn_block_get_used(block));
-
- ut_ad(!(dyn_block_get_used(block) % sizeof(mtr_memo_slot_t)));
-
- while (slot-- != start) {
- if (object == slot->object && type == slot->type) {
- mtr_memo_slot_release(mtr, slot);
- return(true);
- }
- }
- }
-
- return(false);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************//**
-Reads 1 - 4 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INTERN
-ulint
-mtr_read_ulint(
-/*===========*/
- const byte* ptr, /*!< in: pointer from where to read */
- ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
- mtr_t* mtr MY_ATTRIBUTE((unused)))
- /*!< in: mini-transaction handle */
-{
- ut_ad(mtr->state == MTR_ACTIVE);
- ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX));
-
- return(mach_read_ulint(ptr, type));
-}
-
-#ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Checks if memo contains the given page.
-@return TRUE if contains */
-UNIV_INTERN
-ibool
-mtr_memo_contains_page(
-/*===================*/
- mtr_t* mtr, /*!< in: mtr */
- const byte* ptr, /*!< in: pointer to buffer frame */
- ulint type) /*!< in: type of object */
-{
- return(mtr_memo_contains(mtr, buf_block_align(ptr), type));
-}
-
-/*********************************************************//**
-Prints info of an mtr handle. */
-UNIV_INTERN
-void
-mtr_print(
-/*======*/
- mtr_t* mtr) /*!< in: mtr */
-{
- fprintf(stderr,
- "Mini-transaction handle: memo size %lu bytes"
- " log size %lu bytes\n",
- (ulong) dyn_array_get_data_size(&(mtr->memo)),
- (ulong) dyn_array_get_data_size(&(mtr->log)));
-}
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
-
-/**********************************************************//**
-Releases a buf_page stored in an mtr memo after a
-savepoint. */
-UNIV_INTERN
-void
-mtr_release_buf_page_at_savepoint(
-/*=============================*/
- mtr_t* mtr, /*!< in: mtr */
- ulint savepoint, /*!< in: savepoint */
- buf_block_t* block) /*!< in: block to release */
-{
- mtr_memo_slot_t* slot;
- dyn_array_t* memo;
-
- ut_ad(mtr);
- ut_ad(mtr->magic_n == MTR_MAGIC_N);
- ut_ad(mtr->state == MTR_ACTIVE);
-
- memo = &(mtr->memo);
-
- ut_ad(dyn_array_get_data_size(memo) > savepoint);
-
- slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint);
-
- ut_ad(slot->object == block);
- ut_ad(slot->type == MTR_MEMO_PAGE_S_FIX ||
- slot->type == MTR_MEMO_PAGE_X_FIX ||
- slot->type == MTR_MEMO_BUF_FIX);
-
- buf_page_release((buf_block_t*) slot->object, slot->type);
- slot->object = NULL;
-}
diff --git a/storage/xtradb/mysql-test/storage_engine/alter_tablespace.opt b/storage/xtradb/mysql-test/storage_engine/alter_tablespace.opt
deleted file mode 100644
index cf4b117e1b1..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/alter_tablespace.opt
+++ /dev/null
@@ -1,2 +0,0 @@
---innodb-file-per-table=1
-
diff --git a/storage/xtradb/mysql-test/storage_engine/autoinc_secondary.rdiff b/storage/xtradb/mysql-test/storage_engine/autoinc_secondary.rdiff
deleted file mode 100644
index 00cda7c4435..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/autoinc_secondary.rdiff
+++ /dev/null
@@ -1,30 +0,0 @@
---- suite/storage_engine/autoinc_secondary.result 2012-07-12 04:34:18.153885986 +0400
-+++ suite/storage_engine/autoinc_secondary.reject 2012-07-15 17:47:03.937703666 +0400
-@@ -13,18 +13,15 @@
- 5 a
- DROP TABLE t1;
- CREATE TABLE t1 (a <CHAR_COLUMN>, b <INT_COLUMN> AUTO_INCREMENT, PRIMARY KEY (a,b)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--INSERT INTO t1 (a) VALUES ('a'),('b'),('b'),('c'),('a');
--SELECT LAST_INSERT_ID();
--LAST_INSERT_ID()
--1
--SELECT a,b FROM t1;
--a b
--a 1
--a 2
--b 1
--b 2
--c 1
--DROP TABLE t1;
-+ERROR 42000: Incorrect table definition; there can be only one auto column and it must be defined as a key
-+# ERROR: Statement ended with errno 1075, errname ER_WRONG_AUTO_KEY (expected to succeed)
-+# ------------ UNEXPECTED RESULT ------------
-+# The statement|command finished with ER_WRONG_AUTO_KEY.
-+# Multi-part keys or PK or AUTO_INCREMENT (on a secondary column) or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
-+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
-+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
-+# Also, this problem may cause a chain effect (more errors of different kinds in the test).
-+# -------------------------------------------
- CREATE TABLE t1 (a <CHAR_COLUMN>, b <INT_COLUMN> AUTO_INCREMENT, PRIMARY KEY (a,b), <CUSTOM_INDEX>(b)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- INSERT INTO t1 (a) VALUES ('a'),('b'),('b'),('c'),('a');
- SELECT LAST_INSERT_ID();
diff --git a/storage/xtradb/mysql-test/storage_engine/cache_index.rdiff b/storage/xtradb/mysql-test/storage_engine/cache_index.rdiff
deleted file mode 100644
index e04df87aa34..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/cache_index.rdiff
+++ /dev/null
@@ -1,71 +0,0 @@
---- suite/storage_engine/cache_index.result 2012-07-15 00:22:19.822493731 +0400
-+++ suite/storage_engine/cache_index.reject 2012-07-15 17:47:18.321522834 +0400
-@@ -12,31 +12,31 @@
- SET GLOBAL <CACHE_NAME>.key_buffer_size=128*1024;
- CACHE INDEX t1 INDEX (a), t2 IN <CACHE_NAME>;
- Table Op Msg_type Msg_text
--test.t1 assign_to_keycache status OK
--test.t2 assign_to_keycache status OK
-+test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
-+test.t2 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
- LOAD INDEX INTO CACHE t1, t2;
- Table Op Msg_type Msg_text
--test.t1 preload_keys status OK
--test.t2 preload_keys status OK
-+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
-+test.t2 preload_keys note The storage engine for the table doesn't support preload_keys
- INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d');
- SET GLOBAL <CACHE_NAME>.key_buffer_size=8*1024;
- LOAD INDEX INTO CACHE t1, t2 IGNORE LEAVES;
- Table Op Msg_type Msg_text
--test.t1 preload_keys status OK
--test.t2 preload_keys status OK
-+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
-+test.t2 preload_keys note The storage engine for the table doesn't support preload_keys
- SET GLOBAL <CACHE_NAME>.key_cache_age_threshold = 100, <CACHE_NAME>.key_cache_block_size = 512, <CACHE_NAME>.key_cache_division_limit = 1, <CACHE_NAME>.key_cache_segments=2;
- INSERT INTO t1 (a,b) VALUES (5,'e'),(6,'f');
- LOAD INDEX INTO CACHE t1;
- Table Op Msg_type Msg_text
--test.t1 preload_keys status OK
-+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
- SET GLOBAL new_<CACHE_NAME>.key_buffer_size=128*1024;
- CACHE INDEX t1 IN new_<CACHE_NAME>;
- Table Op Msg_type Msg_text
--test.t1 assign_to_keycache status OK
-+test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
- INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h');
- LOAD INDEX INTO CACHE t1 IGNORE LEAVES;
- Table Op Msg_type Msg_text
--test.t1 preload_keys status OK
-+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
- INSERT INTO t1 (a,b) VALUES (9,'i');
- DROP TABLE t2;
- DROP TABLE t1;
-@@ -47,11 +47,11 @@
- ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- CACHE INDEX t1 IN <CACHE_NAME>;
- Table Op Msg_type Msg_text
--test.t1 assign_to_keycache status OK
-+test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
- INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
- LOAD INDEX INTO CACHE t1;
- Table Op Msg_type Msg_text
--test.t1 preload_keys status OK
-+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
- DROP TABLE t1;
- CREATE TABLE t1 (a <INT_COLUMN>,
- b <CHAR_COLUMN>,
-@@ -59,11 +59,11 @@
- ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- CACHE INDEX t1 IN <CACHE_NAME>;
- Table Op Msg_type Msg_text
--test.t1 assign_to_keycache status OK
-+test.t1 assign_to_keycache note The storage engine for the table doesn't support assign_to_keycache
- INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
- LOAD INDEX INTO CACHE t1;
- Table Op Msg_type Msg_text
--test.t1 preload_keys status OK
-+test.t1 preload_keys note The storage engine for the table doesn't support preload_keys
- DROP TABLE t1;
- SET GLOBAL <CACHE_NAME>.key_buffer_size=0;
- SET GLOBAL new_<CACHE_NAME>.key_buffer_size=0;
diff --git a/storage/xtradb/mysql-test/storage_engine/checksum_table_live.rdiff b/storage/xtradb/mysql-test/storage_engine/checksum_table_live.rdiff
deleted file mode 100644
index 71c782848a6..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/checksum_table_live.rdiff
+++ /dev/null
@@ -1,13 +0,0 @@
---- suite/storage_engine/checksum_table_live.result 2012-07-12 21:05:44.497062968 +0400
-+++ suite/storage_engine/checksum_table_live.reject 2012-07-15 17:47:28.105399836 +0400
-@@ -11,8 +11,8 @@
- test.t1 4272806499
- CHECKSUM TABLE t1, t2 QUICK;
- Table Checksum
--test.t1 4272806499
--test.t2 0
-+test.t1 NULL
-+test.t2 NULL
- CHECKSUM TABLE t1, t2 EXTENDED;
- Table Checksum
- test.t1 4272806499
diff --git a/storage/xtradb/mysql-test/storage_engine/col_opt_not_null.opt b/storage/xtradb/mysql-test/storage_engine/col_opt_not_null.opt
deleted file mode 100644
index 40445305fc6..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/col_opt_not_null.opt
+++ /dev/null
@@ -1 +0,0 @@
---innodb_log_file_size=100M
diff --git a/storage/xtradb/mysql-test/storage_engine/col_opt_null.opt b/storage/xtradb/mysql-test/storage_engine/col_opt_null.opt
deleted file mode 100644
index 40445305fc6..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/col_opt_null.opt
+++ /dev/null
@@ -1 +0,0 @@
---innodb_log_file_size=100M
diff --git a/storage/xtradb/mysql-test/storage_engine/define_engine.inc b/storage/xtradb/mysql-test/storage_engine/define_engine.inc
deleted file mode 100644
index 77e384d2351..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/define_engine.inc
+++ /dev/null
@@ -1,49 +0,0 @@
-###########################################
-#
-# This is a template of the include file define_engine.inc which
-# should be placed in storage/<engine>/mysql-test/storage_engine folder.
-#
-################################
-#
-# The name of the engine under test must be defined in $ENGINE variable.
-# You can set it either here (uncomment and edit) or in your environment.
-#
-let $ENGINE = InnoDB;
-#
-################################
-#
-# The following three variables define specific options for columns and tables.
-# Normally there should be none needed, but for some engines it can be different.
-# If the engine requires specific column option for all or indexed columns,
-# set them inside the comment, e.g. /*!NOT NULL*/.
-# Do the same for table options if needed, e.g. /*!INSERT_METHOD=LAST*/
-
-let $default_col_opts = /*!*/;
-let $default_col_indexed_opts = /*!*/;
-let $default_tbl_opts = /*!*/;
-
-# INDEX, UNIQUE INDEX, PRIMARY KEY, special index type - choose the fist that the engine allows,
-# or set it to /*!*/ if none is supported
-
-let $default_index = /*!INDEX*/;
-
-# If the engine does not support the following types, replace them with the closest possible
-
-let $default_int_type = INT(11);
-let $default_char_type = CHAR(8);
-
-################################
-
---disable_query_log
---disable_result_log
-
-# Here you can place your custom MTR code which needs to be executed before each test,
-# e.g. creation of an additional schema or table, etc.
-# The cleanup part should be defined in cleanup_engine.inc
-
-CALL mtr.add_suppression("InnoDB: Resizing redo log from .* to .* pages, LSN=.*");
-CALL mtr.add_suppression("InnoDB: Starting to delete and rewrite log files.");
-CALL mtr.add_suppression("InnoDB: New log files created, LSN=.*");
-
---enable_query_log
---enable_result_log
diff --git a/storage/xtradb/mysql-test/storage_engine/disabled.def b/storage/xtradb/mysql-test/storage_engine/disabled.def
deleted file mode 100644
index 2f3793047f4..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/disabled.def
+++ /dev/null
@@ -1,8 +0,0 @@
-autoinc_vars : MySQL:65225 (InnoDB miscalculates auto-increment)
-tbl_opt_ai : MySQL:65901 (AUTO_INCREMENT option on InnoDB table is ignored if added before autoinc column)
-delete_low_prio : InnoDB does not use table-level locking
-insert_high_prio : InnoDB does not use table-level locking
-insert_low_prio : InnoDB does not use table-level locking
-select_high_prio : InnoDB does not use table-level locking
-update_low_prio : InnoDB does not use table-level locking
-
diff --git a/storage/xtradb/mysql-test/storage_engine/fulltext_search.rdiff b/storage/xtradb/mysql-test/storage_engine/fulltext_search.rdiff
deleted file mode 100644
index a68fe83070e..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/fulltext_search.rdiff
+++ /dev/null
@@ -1,49 +0,0 @@
---- suite/storage_engine/fulltext_search.result 2013-11-27 18:50:16.000000000 +0400
-+++ suite/storage_engine/fulltext_search.reject 2014-02-05 15:33:26.000000000 +0400
-@@ -52,15 +52,14 @@
- INSERT INTO t1 (v0,v1,v2) VALUES ('text4','Contributing more...','...is a good idea'),('text5','test','test');
- SELECT v0, MATCH(v1) AGAINST('contributing') AS rating FROM t1 WHERE MATCH(v1) AGAINST ('contributing');
- v0 rating
--text4 1.3705332279205322
-+text4 0.4885590672492981
- SELECT v0 FROM t1 WHERE MATCH(v1,v2) AGAINST ('-test1 +critical +Cook*' IN BOOLEAN MODE);
--v0
--text1
-+ERROR HY000: Can't find FULLTEXT index matching the column list
- SELECT v0 FROM t1 WHERE MATCH(v1,v2) AGAINST ('-patch +critical +Cook*' IN BOOLEAN MODE);
--v0
-+ERROR HY000: Can't find FULLTEXT index matching the column list
- SELECT v0, MATCH(v1) AGAINST('database' WITH QUERY EXPANSION) AS rating FROM t1 WHERE MATCH(v1) AGAINST ('database' WITH QUERY EXPANSION);
- v0 rating
--text1 178.11756896972656
-+text1 151.4530487060547
- DROP TABLE t1;
- CREATE TABLE t1 (v0 VARCHAR(64) <CUSTOM_COL_OPTIONS>,
- v1 VARCHAR(16384) <CUSTOM_COL_OPTIONS>,
-@@ -112,14 +111,15 @@
- ), ('text2','test1','test2');
- SELECT v0 FROM t1 WHERE MATCH(v1,v2) AGAINST ('contributing' IN NATURAL LANGUAGE MODE);
- v0
-+text1
- INSERT INTO t1 (v0,v1,v2) VALUES ('text3','test','test');
- SELECT v0, MATCH(v1,v2) AGAINST('contributing' IN NATURAL LANGUAGE MODE) AS rating FROM t1 WHERE MATCH(v1,v2) AGAINST ('contributing' IN NATURAL LANGUAGE MODE);
- v0 rating
--text1 0.2809644043445587
-+text1 0.45528939366340637
- INSERT INTO t1 (v0,v1,v2) VALUES ('text4','Contributing more...','...is a good idea'),('text5','test','test');
- SELECT v0, MATCH(v1) AGAINST('contributing') AS rating FROM t1 WHERE MATCH(v1) AGAINST ('contributing');
- v0 rating
--text4 1.3705332279205322
-+text4 0.4885590672492981
- SELECT v0 FROM t1 WHERE MATCH(v1,v2) AGAINST ('-test1 +critical +Cook*' IN BOOLEAN MODE);
- v0
- text1
-@@ -127,6 +127,6 @@
- v0
- SELECT v0, MATCH(v1,v2) AGAINST('database' WITH QUERY EXPANSION) AS rating FROM t1 WHERE MATCH(v1,v2) AGAINST ('database' WITH QUERY EXPANSION);
- v0 rating
--text1 190.56150817871094
--text4 1.1758291721343994
-+text1 229.60874938964844
-+text4 0.31671249866485596
- DROP TABLE t1;
diff --git a/storage/xtradb/mysql-test/storage_engine/index_enable_disable.rdiff b/storage/xtradb/mysql-test/storage_engine/index_enable_disable.rdiff
deleted file mode 100644
index f8e812e7edb..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/index_enable_disable.rdiff
+++ /dev/null
@@ -1,33 +0,0 @@
---- suite/storage_engine/index_enable_disable.result 2012-07-15 00:30:05.296641931 +0400
-+++ suite/storage_engine/index_enable_disable.reject 2012-07-15 17:49:12.988081281 +0400
-@@ -11,15 +11,19 @@
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
- t1 1 a 1 a # # NULL NULL YES BTREE
- ALTER TABLE t1 DISABLE KEYS;
-+Warnings:
-+Note 1031 Storage engine <STORAGE_ENGINE> of the table `test`.`t1` doesn't have this option
- SHOW INDEX IN t1;
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
--t1 1 a 1 a # # NULL NULL YES BTREE disabled
-+t1 1 a 1 a # # NULL NULL YES BTREE
- EXPLAIN SELECT a FROM t1 ORDER BY a;
- id select_type table type possible_keys key key_len ref rows Extra
--1 SIMPLE t1 ALL NULL NULL NULL NULL 19 Using filesort
-+1 SIMPLE t1 index NULL a 5 NULL 19 Using index
- INSERT INTO t1 (a) VALUES
- (11),(12),(13),(14),(15),(16),(17),(18),(19),(20);
- ALTER TABLE t1 ENABLE KEYS;
-+Warnings:
-+Note 1031 Storage engine <STORAGE_ENGINE> of the table `test`.`t1` doesn't have this option
- SHOW INDEX IN t1;
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
- t1 1 a 1 a # # NULL NULL YES BTREE
-@@ -32,6 +36,8 @@
- (1),(2),(3),(4),(5),(6),(7),(8),(9),
- (21),(22),(23),(24),(25),(26),(27),(28),(29);
- ALTER TABLE t1 DISABLE KEYS;
-+Warnings:
-+Note 1031 Storage engine <STORAGE_ENGINE> of the table `test`.`t1` doesn't have this option
- INSERT INTO t1 (a) VALUES (29);
- ERROR 23000: Duplicate entry '29' for key 'a'
- # Statement ended with one of expected results (ER_DUP_ENTRY,ER_DUP_KEY).
diff --git a/storage/xtradb/mysql-test/storage_engine/index_type_hash.rdiff b/storage/xtradb/mysql-test/storage_engine/index_type_hash.rdiff
deleted file mode 100644
index 02f9d93588f..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/index_type_hash.rdiff
+++ /dev/null
@@ -1,60 +0,0 @@
---- suite/storage_engine/index_type_hash.result 2012-07-15 01:10:17.919128889 +0400
-+++ suite/storage_engine/index_type_hash.reject 2012-07-15 17:49:26.135915989 +0400
-@@ -4,7 +4,7 @@
- ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- SHOW KEYS IN t1;
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
--t1 1 a 1 a # # NULL NULL # HASH
-+t1 1 a 1 a # # NULL NULL # BTREE
- DROP TABLE t1;
- CREATE TABLE t1 (a <INT_COLUMN>,
- b <CHAR_COLUMN>,
-@@ -12,8 +12,8 @@
- ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- SHOW KEYS IN t1;
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
--t1 1 a_b 1 a # # NULL NULL # HASH a_b index
--t1 1 a_b 2 b # # NULL NULL # HASH a_b index
-+t1 1 a_b 1 a # # NULL NULL # BTREE a_b index
-+t1 1 a_b 2 b # # NULL NULL # BTREE a_b index
- DROP TABLE t1;
- CREATE TABLE t1 (a <INT_COLUMN>,
- b <CHAR_COLUMN>,
-@@ -22,8 +22,8 @@
- ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- SHOW KEYS IN t1;
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
--t1 1 a 1 a # # NULL NULL # HASH
--t1 1 b 1 b # # NULL NULL # HASH
-+t1 1 a 1 a # # NULL NULL # BTREE
-+t1 1 b 1 b # # NULL NULL # BTREE
- DROP TABLE t1;
- CREATE TABLE t1 (a <INT_COLUMN>,
- b <CHAR_COLUMN>,
-@@ -31,7 +31,7 @@
- ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- SHOW KEYS IN t1;
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
--t1 0 a 1 a # # NULL NULL # HASH
-+t1 0 a 1 a # # NULL NULL # BTREE
- INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
- INSERT INTO t1 (a,b) VALUES (1,'c');
- ERROR 23000: Duplicate entry '1' for key 'a'
-@@ -43,7 +43,7 @@
- ALTER TABLE t1 ADD <CUSTOM_INDEX> (a) USING HASH COMMENT 'simple index on a';
- SHOW INDEX FROM t1;
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
--t1 1 a 1 a # # NULL NULL # HASH simple index on a
-+t1 1 a 1 a # # NULL NULL # BTREE simple index on a
- ALTER TABLE t1 DROP KEY a;
- DROP TABLE t1;
- CREATE TABLE t1 (a <INT_COLUMN>,
-@@ -52,7 +52,7 @@
- ) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- SHOW KEYS IN t1;
- Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_part Packed Null Index_type Comment Index_comment
--t1 0 a 1 a # # NULL NULL # HASH
-+t1 0 a 1 a # # NULL NULL # BTREE
- INSERT INTO t1 (a,b) VALUES (1,'a'),(2,'b');
- INSERT INTO t1 (a,b) VALUES (1,'c');
- ERROR 23000: Duplicate entry '1' for key 'a'
diff --git a/storage/xtradb/mysql-test/storage_engine/insert_delayed.rdiff b/storage/xtradb/mysql-test/storage_engine/insert_delayed.rdiff
deleted file mode 100644
index 9e6cddf03f0..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/insert_delayed.rdiff
+++ /dev/null
@@ -1,26 +0,0 @@
---- suite/storage_engine/insert_delayed.result 2013-01-23 01:23:49.461254916 +0400
-+++ suite/storage_engine/insert_delayed.reject 2013-01-23 01:47:05.975698364 +0400
-@@ -5,7 +5,16 @@
- connect con0,localhost,root,,;
- SET lock_wait_timeout = 1;
- INSERT DELAYED INTO t1 (a,b) VALUES (3,'c');
-+ERROR HY000: DELAYED option not supported for table 't1'
-+# ------------ UNEXPECTED RESULT ------------
-+# The statement|command finished with ER_DELAYED_NOT_SUPPORTED.
-+# INSERT DELAYED or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
-+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
-+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
-+# Also, this problem may cause a chain effect (more errors of different kinds in the test).
-+# -------------------------------------------
- INSERT DELAYED INTO t1 SET a=4, b='d';
-+ERROR HY000: DELAYED option not supported for table 't1'
- INSERT DELAYED INTO t1 (a,b) SELECT 5, 'e';
- ERROR HY000: Lock wait timeout exceeded; try restarting transaction
- disconnect con0;
-@@ -20,6 +29,4 @@
- a b
- 1 f
- 2 b
--3 c
--4 d
- DROP TABLE t1;
diff --git a/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff b/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff
deleted file mode 100644
index 6b7a52046e2..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/lock_concurrent.rdiff
+++ /dev/null
@@ -1,25 +0,0 @@
---- suite/storage_engine/lock_concurrent.result 2012-06-24 23:55:19.539380000 +0400
-+++ suite/storage_engine/lock_concurrent.reject 2012-07-15 17:50:21.279222746 +0400
-@@ -4,6 +4,14 @@
- connect con1,localhost,root,,;
- SET lock_wait_timeout = 1;
- LOCK TABLES t1 READ LOCAL;
-+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
-+# ------------ UNEXPECTED RESULT ------------
-+# The statement|command finished with ER_LOCK_WAIT_TIMEOUT.
-+# LOCK .. WRITE CONCURRENT or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
-+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
-+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
-+# Also, this problem may cause a chain effect (more errors of different kinds in the test).
-+# -------------------------------------------
- UNLOCK TABLES;
- connection default;
- UNLOCK TABLES;
-@@ -11,6 +19,7 @@
- LOCK TABLES t1 READ LOCAL;
- connection default;
- LOCK TABLES t1 WRITE CONCURRENT, t1 AS t2 READ;
-+ERROR HY000: Lock wait timeout exceeded; try restarting transaction
- UNLOCK TABLES;
- connection con1;
- UNLOCK TABLES;
diff --git a/storage/xtradb/mysql-test/storage_engine/optimize_table.rdiff b/storage/xtradb/mysql-test/storage_engine/optimize_table.rdiff
deleted file mode 100644
index 54d1f600516..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/optimize_table.rdiff
+++ /dev/null
@@ -1,37 +0,0 @@
---- suite/storage_engine/optimize_table.result 2012-07-12 19:13:53.741428591 +0400
-+++ suite/storage_engine/optimize_table.reject 2012-07-15 17:50:30.843102510 +0400
-@@ -5,25 +5,32 @@
- INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d');
- OPTIMIZE TABLE t1;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t1 optimize status OK
- INSERT INTO t2 (a,b) VALUES (4,'d');
- OPTIMIZE NO_WRITE_TO_BINLOG TABLE t2;
- Table Op Msg_type Msg_text
-+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t2 optimize status OK
- INSERT INTO t2 (a,b) VALUES (5,'e');
- INSERT INTO t1 (a,b) VALUES (6,'f');
- OPTIMIZE LOCAL TABLE t1, t2;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t1 optimize status OK
-+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t2 optimize status OK
- OPTIMIZE TABLE t1, t2;
- Table Op Msg_type Msg_text
--test.t1 optimize status Table is already up to date
--test.t2 optimize status Table is already up to date
-+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
-+test.t1 optimize status OK
-+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead
-+test.t2 optimize status OK
- DROP TABLE t1, t2;
- CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- INSERT INTO t1 (a,b) VALUES (1,'a'),(100,'b'),(2,'c'),(3,'d');
- OPTIMIZE TABLE t1;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t1 optimize status OK
- DROP TABLE t1;
diff --git a/storage/xtradb/mysql-test/storage_engine/parts/checksum_table.rdiff b/storage/xtradb/mysql-test/storage_engine/parts/checksum_table.rdiff
deleted file mode 100644
index c8aabb787e9..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/parts/checksum_table.rdiff
+++ /dev/null
@@ -1,13 +0,0 @@
---- suite/storage_engine/parts/checksum_table.result 2013-11-08 22:30:34.000000000 +0400
-+++ suite/storage_engine/parts/checksum_table.reject 2013-11-08 22:32:30.000000000 +0400
-@@ -31,8 +31,8 @@
- test.t1 4272806499
- CHECKSUM TABLE t1, t2 QUICK;
- Table Checksum
--test.t1 4272806499
--test.t2 0
-+test.t1 NULL
-+test.t2 NULL
- CHECKSUM TABLE t1, t2 EXTENDED;
- Table Checksum
- test.t1 4272806499
diff --git a/storage/xtradb/mysql-test/storage_engine/parts/create_table.rdiff b/storage/xtradb/mysql-test/storage_engine/parts/create_table.rdiff
deleted file mode 100644
index 0df91c6fc6e..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/parts/create_table.rdiff
+++ /dev/null
@@ -1,20 +0,0 @@
---- suite/storage_engine/parts/create_table.result 2012-07-12 21:56:38.618667460 +0400
-+++ suite/storage_engine/parts/create_table.reject 2012-07-15 20:06:43.496358345 +0400
-@@ -65,7 +65,7 @@
- 1 SIMPLE t1 abc,def # # # # # # #
- EXPLAIN PARTITIONS SELECT a FROM t1 WHERE a = 100;
- id select_type table partitions type possible_keys key key_len ref rows Extra
--1 SIMPLE NULL NULL # # # # # # #
-+1 SIMPLE t1 def # # # # # # #
- INSERT INTO t1 (a) VALUES (50);
- ERROR HY000: Table has no partition for value 50
- DROP TABLE t1;
-@@ -81,7 +81,7 @@
- 1 SIMPLE t1 abc_abcsp0,def_defsp0 # # # # # # #
- EXPLAIN PARTITIONS SELECT a FROM t1 WHERE a = 100;
- id select_type table partitions type possible_keys key key_len ref rows Extra
--1 SIMPLE NULL NULL # # # # # # #
-+1 SIMPLE t1 def_defsp0 # # # # # # #
- SELECT TABLE_SCHEMA, TABLE_NAME, PARTITION_NAME, SUBPARTITION_NAME, PARTITION_METHOD, SUBPARTITION_METHOD
- FROM INFORMATION_SCHEMA.PARTITIONS WHERE TABLE_NAME = 't1';
- TABLE_SCHEMA TABLE_NAME PARTITION_NAME SUBPARTITION_NAME PARTITION_METHOD SUBPARTITION_METHOD
diff --git a/storage/xtradb/mysql-test/storage_engine/parts/disabled.def b/storage/xtradb/mysql-test/storage_engine/parts/disabled.def
deleted file mode 100644
index 796bdfc751b..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/parts/disabled.def
+++ /dev/null
@@ -1 +0,0 @@
-repair_table : InnoDB of 5.6.10 does not support repair on partitioned tables (fixed by 5.6.14)
diff --git a/storage/xtradb/mysql-test/storage_engine/parts/optimize_table.rdiff b/storage/xtradb/mysql-test/storage_engine/parts/optimize_table.rdiff
deleted file mode 100644
index a35ba5167d9..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/parts/optimize_table.rdiff
+++ /dev/null
@@ -1,58 +0,0 @@
---- suite/storage_engine/parts/optimize_table.result 2013-07-18 22:55:38.000000000 +0400
-+++ suite/storage_engine/parts/optimize_table.reject 2013-08-05 19:45:19.000000000 +0400
-@@ -9,18 +9,22 @@
- INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d');
- ALTER TABLE t1 OPTIMIZE PARTITION p1;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize on partitions. All partitions will be rebuilt and analyzed.
- test.t1 optimize status OK
- INSERT INTO t2 (a,b) VALUES (4,'d');
- ALTER TABLE t2 OPTIMIZE PARTITION p0 NO_WRITE_TO_BINLOG;
- Table Op Msg_type Msg_text
-+test.t2 optimize note Table does not support optimize on partitions. All partitions will be rebuilt and analyzed.
- test.t2 optimize status OK
- INSERT INTO t1 (a,b) VALUES (6,'f');
- ALTER TABLE t1 OPTIMIZE PARTITION ALL LOCAL;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize on partitions. All partitions will be rebuilt and analyzed.
- test.t1 optimize status OK
- INSERT INTO t2 (a,b) VALUES (5,'e');
- ALTER TABLE t2 OPTIMIZE PARTITION p1,p0;
- Table Op Msg_type Msg_text
-+test.t2 optimize note Table does not support optimize on partitions. All partitions will be rebuilt and analyzed.
- test.t2 optimize status OK
- DROP TABLE t1, t2;
- DROP TABLE IF EXISTS t1,t2;
-@@ -30,25 +34,32 @@
- INSERT INTO t1 (a,b) VALUES (3,'c'),(4,'d');
- OPTIMIZE TABLE t1;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t1 optimize status OK
- INSERT INTO t2 (a,b) VALUES (4,'d');
- OPTIMIZE NO_WRITE_TO_BINLOG TABLE t2;
- Table Op Msg_type Msg_text
-+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t2 optimize status OK
- INSERT INTO t2 (a,b) VALUES (5,'e');
- INSERT INTO t1 (a,b) VALUES (6,'f');
- OPTIMIZE LOCAL TABLE t1, t2;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t1 optimize status OK
-+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t2 optimize status OK
- OPTIMIZE TABLE t1, t2;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t1 optimize status OK
-+test.t2 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t2 optimize status OK
- DROP TABLE t1, t2;
- CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS> PARTITION BY HASH(a) PARTITIONS 2;
- INSERT INTO t1 (a,b) VALUES (1,'a'),(100,'b'),(2,'c'),(3,'d');
- OPTIMIZE TABLE t1;
- Table Op Msg_type Msg_text
-+test.t1 optimize note Table does not support optimize, doing recreate + analyze instead
- test.t1 optimize status OK
- DROP TABLE t1;
diff --git a/storage/xtradb/mysql-test/storage_engine/parts/repair_table.rdiff b/storage/xtradb/mysql-test/storage_engine/parts/repair_table.rdiff
deleted file mode 100644
index 35b150e82d1..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/parts/repair_table.rdiff
+++ /dev/null
@@ -1,158 +0,0 @@
---- suite/storage_engine/parts/repair_table.result 2013-07-18 22:55:38.000000000 +0400
-+++ suite/storage_engine/parts/repair_table.reject 2013-08-05 19:54:09.000000000 +0400
-@@ -25,7 +25,7 @@
- INSERT INTO t1 (a,b) VALUES (10,'j');
- ALTER TABLE t1 REPAIR PARTITION p1 QUICK USE_FRM;
- Table Op Msg_type Msg_text
--test.t1 repair status OK
-+test.t1 repair note The storage engine for the table doesn't support repair
- INSERT INTO t2 (a,b) VALUES (12,'l');
- ALTER TABLE t2 REPAIR PARTITION NO_WRITE_TO_BINLOG ALL QUICK EXTENDED USE_FRM;
- Table Op Msg_type Msg_text
-@@ -58,8 +58,8 @@
- INSERT INTO t2 (a,b) VALUES (11,'k');
- REPAIR TABLE t1, t2 QUICK USE_FRM;
- Table Op Msg_type Msg_text
--test.t1 repair status OK
--test.t2 repair status OK
-+test.t1 repair note The storage engine for the table doesn't support repair
-+test.t2 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 (a,b) VALUES (12,'l');
- INSERT INTO t2 (a,b) VALUES (13,'m');
- REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2 QUICK EXTENDED USE_FRM;
-@@ -101,119 +101,13 @@
- INSERT INTO t1 (a,b) VALUES (10,'j');
- REPAIR TABLE t1 USE_FRM;
- Table Op Msg_type Msg_text
--test.t1 repair status OK
--t1#P#p0.MYD
--t1#P#p0.MYI
--t1#P#p1.MYD
--t1#P#p1.MYI
-+test.t1 repair note The storage engine for the table doesn't support repair
- t1.frm
- t1.par
- INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
- # Statement ended with one of expected results (0,144).
- # If you got a difference in error message, just add it to rdiff file
- FLUSH TABLE t1;
--Restoring <DATADIR>/test/t1#P#p0.MYD
--CHECK TABLE t1;
--Table Op Msg_type Msg_text
--test.t1 check error Size of datafile is: 26 Should be: 39
--test.t1 check error Partition p0 returned error
--test.t1 check error Corrupt
--SELECT a,b FROM t1;
--a b
--8 h
--10 j
--7 g
--15 o
--Warnings:
--Error 145 Table './test/t1#P#p0' is marked as crashed and should be repaired
--Error 1194 Table 't1' is marked as crashed and should be repaired
--Error 1034 Number of rows changed from 3 to 2
--# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
--# If you got a difference in error message, just add it to rdiff file
--INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
--# Statement ended with one of expected results (0,144).
--# If you got a difference in error message, just add it to rdiff file
--FLUSH TABLE t1;
--Restoring <DATADIR>/test/t1#P#p0.MYI
--CHECK TABLE t1;
--Table Op Msg_type Msg_text
--test.t1 check warning Size of datafile is: 39 Should be: 26
--test.t1 check error Record-count is not ok; is 3 Should be: 2
--test.t1 check warning Found 3 key parts. Should be: 2
--test.t1 check error Partition p0 returned error
--test.t1 check error Corrupt
--SELECT a,b FROM t1;
--a b
--8 h
--10 j
--14 n
--7 g
--15 o
--15 o
--Warnings:
--Error 145 Table './test/t1#P#p0' is marked as crashed and should be repaired
--Error 1194 Table 't1' is marked as crashed and should be repaired
--Error 1034 Number of rows changed from 2 to 3
--# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
--# If you got a difference in error message, just add it to rdiff file
--INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
--# Statement ended with one of expected results (0,144).
--# If you got a difference in error message, just add it to rdiff file
--FLUSH TABLE t1;
--Restoring <DATADIR>/test/t1#P#p1.MYD
--CHECK TABLE t1;
--Table Op Msg_type Msg_text
--test.t1 check error Size of datafile is: 39 Should be: 52
--test.t1 check error Partition p1 returned error
--test.t1 check error Corrupt
--SELECT a,b FROM t1;
--a b
--8 h
--10 j
--14 n
--14 n
--7 g
--15 o
--15 o
--Warnings:
--Error 145 Table './test/t1#P#p1' is marked as crashed and should be repaired
--Error 1194 Table 't1' is marked as crashed and should be repaired
--Error 1034 Number of rows changed from 4 to 3
--# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
--# If you got a difference in error message, just add it to rdiff file
--INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
--# Statement ended with one of expected results (0,144).
--# If you got a difference in error message, just add it to rdiff file
--FLUSH TABLE t1;
--Restoring <DATADIR>/test/t1#P#p1.MYI
--CHECK TABLE t1;
--Table Op Msg_type Msg_text
--test.t1 check warning Size of datafile is: 52 Should be: 39
--test.t1 check error Record-count is not ok; is 4 Should be: 3
--test.t1 check warning Found 4 key parts. Should be: 3
--test.t1 check error Partition p1 returned error
--test.t1 check error Corrupt
--SELECT a,b FROM t1;
--a b
--8 h
--10 j
--14 n
--14 n
--14 n
--7 g
--15 o
--15 o
--15 o
--Warnings:
--Error 145 Table './test/t1#P#p1' is marked as crashed and should be repaired
--Error 1194 Table 't1' is marked as crashed and should be repaired
--Error 1034 Number of rows changed from 3 to 4
--# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
--# If you got a difference in error message, just add it to rdiff file
--INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
--# Statement ended with one of expected results (0,144).
--# If you got a difference in error message, just add it to rdiff file
--FLUSH TABLE t1;
- Restoring <DATADIR>/test/t1.par
- CHECK TABLE t1;
- Table Op Msg_type Msg_text
-@@ -223,14 +117,8 @@
- 8 h
- 10 j
- 14 n
--14 n
--14 n
--14 n
- 7 g
- 15 o
--15 o
--15 o
--15 o
- # Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
- # If you got a difference in error message, just add it to rdiff file
- DROP TABLE t1;
diff --git a/storage/xtradb/mysql-test/storage_engine/parts/suite.opt b/storage/xtradb/mysql-test/storage_engine/parts/suite.opt
deleted file mode 100644
index 66f581b56d0..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/parts/suite.opt
+++ /dev/null
@@ -1,2 +0,0 @@
---innodb
-
diff --git a/storage/xtradb/mysql-test/storage_engine/repair_table.rdiff b/storage/xtradb/mysql-test/storage_engine/repair_table.rdiff
deleted file mode 100644
index be3709c5833..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/repair_table.rdiff
+++ /dev/null
@@ -1,138 +0,0 @@
---- suite/storage_engine/repair_table.result 2013-10-03 20:35:06.000000000 +0400
-+++ suite/storage_engine/repair_table.reject 2013-11-08 22:04:22.000000000 +0400
-@@ -4,56 +4,57 @@
- CREATE TABLE t2 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- REPAIR TABLE t1;
- Table Op Msg_type Msg_text
--test.t1 repair status OK
-+test.t1 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 (a,b) VALUES (3,'c');
- INSERT INTO t2 (a,b) VALUES (4,'d');
- REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2;
- Table Op Msg_type Msg_text
--test.t1 repair status OK
--test.t2 repair status OK
-+test.t1 repair note The storage engine for the table doesn't support repair
-+test.t2 repair note The storage engine for the table doesn't support repair
- INSERT INTO t2 (a,b) VALUES (5,'e'),(6,'f');
- REPAIR LOCAL TABLE t2;
- Table Op Msg_type Msg_text
--test.t2 repair status OK
-+test.t2 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h');
- INSERT INTO t2 (a,b) VALUES (9,'i');
- REPAIR LOCAL TABLE t2, t1 EXTENDED;
- Table Op Msg_type Msg_text
--test.t2 repair status OK
--test.t1 repair status OK
-+test.t2 repair note The storage engine for the table doesn't support repair
-+test.t1 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 (a,b) VALUES (10,'j');
- INSERT INTO t2 (a,b) VALUES (11,'k');
- REPAIR TABLE t1, t2 QUICK USE_FRM;
- Table Op Msg_type Msg_text
--test.t1 repair warning Number of rows changed from 0 to 6
--test.t1 repair status OK
--test.t2 repair warning Number of rows changed from 0 to 5
--test.t2 repair status OK
-+test.t1 repair note The storage engine for the table doesn't support repair
-+test.t2 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 (a,b) VALUES (12,'l');
- INSERT INTO t2 (a,b) VALUES (13,'m');
- REPAIR NO_WRITE_TO_BINLOG TABLE t1, t2 QUICK EXTENDED USE_FRM;
- Table Op Msg_type Msg_text
--test.t1 repair warning Number of rows changed from 0 to 7
--test.t1 repair status OK
--test.t2 repair warning Number of rows changed from 0 to 6
--test.t2 repair status OK
-+test.t1 repair note The storage engine for the table doesn't support repair
-+test.t2 repair note The storage engine for the table doesn't support repair
- FLUSH TABLE t1;
- INSERT INTO t1 (a,b) VALUES (14,'n');
--ERROR HY000: Incorrect file format 't1'
- # Statement ended with one of expected results (0,130,ER_FAILED_READ_FROM_PAR_FILE,ER_OPEN_AS_READONLY).
- # If you got a difference in error message, just add it to rdiff file
- CHECK TABLE t1;
- Table Op Msg_type Msg_text
--test.t1 check Error Incorrect file format 't1'
--test.t1 check error Corrupt
-+test.t1 check status OK
- SELECT a,b FROM t1;
--ERROR HY000: Incorrect file format 't1'
-+a b
-+1 a
-+2 b
-+3 c
-+7 g
-+8 h
-+10 j
-+12 l
-+14 n
- # Statement ended with one of expected results (0,130,ER_FAILED_READ_FROM_PAR_FILE,ER_OPEN_AS_READONLY).
- # If you got a difference in error message, just add it to rdiff file
- REPAIR TABLE t1;
- Table Op Msg_type Msg_text
--test.t1 repair Error Incorrect file format 't1'
--test.t1 repair error Corrupt
-+test.t1 repair note The storage engine for the table doesn't support repair
- DROP TABLE t1, t2;
- call mtr.add_suppression("Got an error from thread_id=.*");
- call mtr.add_suppression("MySQL thread id .*, query id .* localhost.*root Checking table");
-@@ -62,45 +63,32 @@
- CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>, <CUSTOM_INDEX> (a)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- REPAIR TABLE t1;
- Table Op Msg_type Msg_text
--test.t1 repair status OK
-+test.t1 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 (a,b) VALUES (7,'g'),(8,'h');
- REPAIR TABLE t1 EXTENDED;
- Table Op Msg_type Msg_text
--test.t1 repair status OK
-+test.t1 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 (a,b) VALUES (10,'j');
- REPAIR TABLE t1 USE_FRM;
- Table Op Msg_type Msg_text
--test.t1 repair warning Number of rows changed from 0 to 3
--test.t1 repair status OK
--t1.MYD
--t1.MYI
-+test.t1 repair note The storage engine for the table doesn't support repair
- t1.frm
-+t1.ibd
- INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
- # Statement ended with one of expected results (0,144).
- # If you got a difference in error message, just add it to rdiff file
- FLUSH TABLE t1;
--Restoring <DATADIR>/test/t1.MYD
-+Restoring <DATADIR>/test/t1.ibd
- CHECK TABLE t1;
- Table Op Msg_type Msg_text
--test.t1 check error Size of datafile is: 39 Should be: 65
--test.t1 check error Corrupt
-+test.t1 check status OK
- SELECT a,b FROM t1;
--ERROR HY000: Incorrect key file for table 't1'; try to repair it
--# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
--# If you got a difference in error message, just add it to rdiff file
--INSERT INTO t1 (a,b) VALUES (14,'n'),(15,'o');
--ERROR HY000: Table './test/t1' is marked as crashed and last (automatic?) repair failed
--# Statement ended with one of expected results (0,144).
--# If you got a difference in error message, just add it to rdiff file
--FLUSH TABLE t1;
--Restoring <DATADIR>/test/t1.MYI
--CHECK TABLE t1;
--Table Op Msg_type Msg_text
--test.t1 check warning Table is marked as crashed and last repair failed
--test.t1 check error Size of datafile is: 39 Should be: 65
--test.t1 check error Corrupt
--SELECT a,b FROM t1;
--ERROR HY000: Table './test/t1' is marked as crashed and last (automatic?) repair failed
-+a b
-+7 g
-+8 h
-+10 j
-+14 n
-+15 o
- # Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
- # If you got a difference in error message, just add it to rdiff file
- DROP TABLE t1;
diff --git a/storage/xtradb/mysql-test/storage_engine/suite.opt b/storage/xtradb/mysql-test/storage_engine/suite.opt
deleted file mode 100644
index 8c10cefc626..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/suite.opt
+++ /dev/null
@@ -1 +0,0 @@
---innodb
diff --git a/storage/xtradb/mysql-test/storage_engine/suite.pm b/storage/xtradb/mysql-test/storage_engine/suite.pm
deleted file mode 100644
index e186a532dcc..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/suite.pm
+++ /dev/null
@@ -1,8 +0,0 @@
-package My::Suite::SE::XtraDB;
-
-@ISA = qw(My::Suite);
-
-return "Need XtraDB engine";
-
-bless { };
-
diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff
deleted file mode 100644
index e09e50b17ec..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff
+++ /dev/null
@@ -1,23 +0,0 @@
---- suite/storage_engine/tbl_opt_data_index_dir.result 2013-10-03 20:35:06.000000000 +0400
-+++ suite/storage_engine/tbl_opt_data_index_dir.reject 2013-11-08 22:06:54.000000000 +0400
-@@ -1,10 +1,12 @@
- DROP TABLE IF EXISTS t1;
-+Warnings:
-+Warning 1618 <INDEX DIRECTORY> option ignored
- SHOW CREATE TABLE t1;
- Table Create Table
- t1 CREATE TABLE `t1` (
- `a` int(11) DEFAULT NULL,
- `b` char(8) DEFAULT NULL
--) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>' INDEX DIRECTORY='<INDEX_DIR>'
-+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>'
- Warnings:
- Warning 1618 <INDEX DIRECTORY> option ignored
- SHOW CREATE TABLE t1;
-@@ -12,5 +14,5 @@
- t1 CREATE TABLE `t1` (
- `a` int(11) DEFAULT NULL,
- `b` char(8) DEFAULT NULL
--) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>' INDEX DIRECTORY='<INDEX_DIR>'
-+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 DATA DIRECTORY='<DATA_DIR>'
- DROP TABLE t1;
diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff
deleted file mode 100644
index 468b82926f0..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff
+++ /dev/null
@@ -1,11 +0,0 @@
---- suite/storage_engine/tbl_opt_insert_method.result 2012-06-24 23:55:19.539380000 +0400
-+++ suite/storage_engine/tbl_opt_insert_method.reject 2012-07-15 17:51:09.978610512 +0400
-@@ -5,7 +5,7 @@
- t1 CREATE TABLE `t1` (
- `a` int(11) DEFAULT NULL,
- `b` char(8) DEFAULT NULL
--) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 INSERT_METHOD=FIRST
-+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
- ALTER TABLE t1 INSERT_METHOD=NO;
- SHOW CREATE TABLE t1;
- Table Create Table
diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_key_block_size.opt b/storage/xtradb/mysql-test/storage_engine/tbl_opt_key_block_size.opt
deleted file mode 100644
index 7cd737b2b87..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_key_block_size.opt
+++ /dev/null
@@ -1,3 +0,0 @@
---innodb-file-per-table=1
---innodb-file-format=Barracuda
-
diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.opt b/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.opt
deleted file mode 100644
index 7cd737b2b87..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.opt
+++ /dev/null
@@ -1,3 +0,0 @@
---innodb-file-per-table=1
---innodb-file-format=Barracuda
-
diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.rdiff
deleted file mode 100644
index a6572ffa7f0..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_row_format.rdiff
+++ /dev/null
@@ -1,10 +0,0 @@
---- suite/storage_engine/tbl_opt_row_format.result 2012-06-24 23:55:19.539380000 +0400
-+++ suite/storage_engine/tbl_opt_row_format.reject 2012-07-15 19:26:02.235049157 +0400
-@@ -1,5 +1,7 @@
- DROP TABLE IF EXISTS t1;
- CREATE TABLE t1 (a <INT_COLUMN>, b <CHAR_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS> ROW_FORMAT=FIXED;
-+Warnings:
-+Warning 1478 <STORAGE_ENGINE>: assuming ROW_FORMAT=COMPACT.
- SHOW CREATE TABLE t1;
- Table Create Table
- t1 CREATE TABLE `t1` (
diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_union.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_union.rdiff
deleted file mode 100644
index cbdf5818022..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_union.rdiff
+++ /dev/null
@@ -1,16 +0,0 @@
---- suite/storage_engine/tbl_opt_union.result 2012-06-24 23:55:19.539380000 +0400
-+++ suite/storage_engine/tbl_opt_union.reject 2012-07-15 17:51:31.014346053 +0400
-@@ -4,11 +4,11 @@
- Table Create Table
- t1 CREATE TABLE `t1` (
- `a` int(11) DEFAULT NULL
--) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 UNION=(`child1`)
-+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
- ALTER TABLE t1 UNION = (child1,child2);
- SHOW CREATE TABLE t1;
- Table Create Table
- t1 CREATE TABLE `t1` (
- `a` int(11) DEFAULT NULL
--) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1 UNION=(`child1`,`child2`)
-+) ENGINE=<STORAGE_ENGINE> DEFAULT CHARSET=latin1
- DROP TABLE t1, child1, child2;
diff --git a/storage/xtradb/mysql-test/storage_engine/trx/cons_snapshot_serializable.rdiff b/storage/xtradb/mysql-test/storage_engine/trx/cons_snapshot_serializable.rdiff
deleted file mode 100644
index e6149be58dc..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/trx/cons_snapshot_serializable.rdiff
+++ /dev/null
@@ -1,18 +0,0 @@
---- suite/storage_engine/trx/cons_snapshot_serializable.result 2013-11-27 18:46:36.000000000 +0400
-+++ suite/storage_engine/trx/cons_snapshot_serializable.reject 2013-11-28 19:17:02.000000000 +0400
-@@ -5,12 +5,15 @@
- CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
- START TRANSACTION WITH CONSISTENT SNAPSHOT;
-+Warnings:
-+Warning 138 InnoDB: WITH CONSISTENT SNAPSHOT was ignored because this phrase can only be used with REPEATABLE READ isolation level.
- connection con2;
- INSERT INTO t1 (a) VALUES (1);
- connection con1;
- # If consistent read works on this isolation level (SERIALIZABLE), the following SELECT should not return the value we inserted (1)
- SELECT a FROM t1;
- a
-+1
- COMMIT;
- connection default;
- disconnect con1;
diff --git a/storage/xtradb/mysql-test/storage_engine/trx/level_read_committed.rdiff b/storage/xtradb/mysql-test/storage_engine/trx/level_read_committed.rdiff
deleted file mode 100644
index cb64d32138b..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/trx/level_read_committed.rdiff
+++ /dev/null
@@ -1,11 +0,0 @@
---- suite/storage_engine/trx/level_read_committed.result 2013-11-28 19:18:48.000000000 +0400
-+++ suite/storage_engine/trx/level_read_committed.reject 2013-11-28 19:18:59.000000000 +0400
-@@ -77,6 +77,8 @@
- CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
- START TRANSACTION WITH CONSISTENT SNAPSHOT;
-+Warnings:
-+Warning 138 InnoDB: WITH CONSISTENT SNAPSHOT was ignored because this phrase can only be used with REPEATABLE READ isolation level.
- connection con2;
- INSERT INTO t1 (a) VALUES (1);
- connection con1;
diff --git a/storage/xtradb/mysql-test/storage_engine/trx/level_read_uncommitted.rdiff b/storage/xtradb/mysql-test/storage_engine/trx/level_read_uncommitted.rdiff
deleted file mode 100644
index 6a79abe3ca5..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/trx/level_read_uncommitted.rdiff
+++ /dev/null
@@ -1,11 +0,0 @@
---- suite/storage_engine/trx/level_read_uncommitted.result 2013-11-28 19:18:48.000000000 +0400
-+++ suite/storage_engine/trx/level_read_uncommitted.reject 2013-11-28 19:19:50.000000000 +0400
-@@ -102,6 +102,8 @@
- CREATE TABLE t1 (a <INT_COLUMN>) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
- SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
- START TRANSACTION WITH CONSISTENT SNAPSHOT;
-+Warnings:
-+Warning 138 InnoDB: WITH CONSISTENT SNAPSHOT was ignored because this phrase can only be used with REPEATABLE READ isolation level.
- connection con2;
- INSERT INTO t1 (a) VALUES (1);
- connection con1;
diff --git a/storage/xtradb/mysql-test/storage_engine/trx/suite.opt b/storage/xtradb/mysql-test/storage_engine/trx/suite.opt
deleted file mode 100644
index 64bbe8b554c..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/trx/suite.opt
+++ /dev/null
@@ -1,3 +0,0 @@
---innodb
---innodb-lock-wait-timeout=1
-
diff --git a/storage/xtradb/mysql-test/storage_engine/type_blob.opt b/storage/xtradb/mysql-test/storage_engine/type_blob.opt
deleted file mode 100644
index 40445305fc6..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/type_blob.opt
+++ /dev/null
@@ -1 +0,0 @@
---innodb_log_file_size=100M
diff --git a/storage/xtradb/mysql-test/storage_engine/type_char_indexes.rdiff b/storage/xtradb/mysql-test/storage_engine/type_char_indexes.rdiff
deleted file mode 100644
index 7a388552c57..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/type_char_indexes.rdiff
+++ /dev/null
@@ -1,11 +0,0 @@
---- suite/storage_engine/type_char_indexes.result 2012-07-12 19:27:42.191013570 +0400
-+++ suite/storage_engine/type_char_indexes.reject 2012-07-15 17:51:55.810034331 +0400
-@@ -135,7 +135,7 @@
- r3a
- EXPLAIN SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
- id select_type table type possible_keys key key_len ref rows Extra
--# # # range # v16 # # # #
-+# # # ALL # NULL # # # #
- SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
- c c20 v16 v128
- a char1 varchar1a varchar1b
diff --git a/storage/xtradb/mysql-test/storage_engine/type_float_indexes.rdiff b/storage/xtradb/mysql-test/storage_engine/type_float_indexes.rdiff
deleted file mode 100644
index 6ebfd61d876..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/type_float_indexes.rdiff
+++ /dev/null
@@ -1,11 +0,0 @@
---- suite/storage_engine/type_float_indexes.result 2012-07-12 19:37:27.031661128 +0400
-+++ suite/storage_engine/type_float_indexes.reject 2012-07-15 17:52:12.189828410 +0400
-@@ -60,7 +60,7 @@
- ALTER TABLE t1 ADD UNIQUE KEY(d);
- EXPLAIN SELECT d FROM t1 WHERE r > 0 and d > 0 ORDER BY d;
- id select_type table type possible_keys key key_len ref rows Extra
--# # # # # d # # # #
-+# # # # # NULL # # # #
- SELECT d FROM t1 WHERE r > 0 and d > 0 ORDER BY d;
- d
- 1.2345
diff --git a/storage/xtradb/mysql-test/storage_engine/type_spatial_indexes.rdiff b/storage/xtradb/mysql-test/storage_engine/type_spatial_indexes.rdiff
deleted file mode 100644
index 154116b748c..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/type_spatial_indexes.rdiff
+++ /dev/null
@@ -1,712 +0,0 @@
---- suite/storage_engine/type_spatial_indexes.result 2013-08-05 18:08:49.000000000 +0400
-+++ suite/storage_engine/type_spatial_indexes.reject 2013-08-05 18:25:24.000000000 +0400
-@@ -702,699 +702,15 @@
- DROP DATABASE IF EXISTS gis_ogs;
- CREATE DATABASE gis_ogs;
- CREATE TABLE gis_point (fid <INT_COLUMN>, g POINT NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_line (fid <INT_COLUMN>, g LINESTRING NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_polygon (fid <INT_COLUMN>, g POLYGON NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_multi_point (fid <INT_COLUMN>, g MULTIPOINT NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_multi_line (fid <INT_COLUMN>, g MULTILINESTRING NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_multi_polygon (fid <INT_COLUMN>, g MULTIPOLYGON NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_geometrycollection (fid <INT_COLUMN>, g GEOMETRYCOLLECTION NOT NULL, SPATIAL INDEX(g)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE gis_geometry (fid <INT_COLUMN>, g GEOMETRY NOT NULL) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--USE gis_ogs;
--CREATE TABLE lakes (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--shore POLYGON NOT NULL, SPATIAL INDEX s(shore)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE road_segments (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--aliases CHAR(64) <CUSTOM_COL_OPTIONS>,
--num_lanes INT <CUSTOM_COL_OPTIONS>,
--centerline LINESTRING NOT NULL, SPATIAL INDEX c(centerline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE divided_routes (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--num_lanes INT <CUSTOM_COL_OPTIONS>,
--centerlines MULTILINESTRING NOT NULL, SPATIAL INDEX c(centerlines)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE forests (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--boundary MULTIPOLYGON NOT NULL, SPATIAL INDEX b(boundary)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE bridges (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--position POINT NOT NULL, SPATIAL INDEX p(position)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE streams (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--centerline LINESTRING NOT NULL, SPATIAL INDEX c(centerline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE buildings (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--position POINT NOT NULL,
--footprint POLYGON NOT NULL, SPATIAL INDEX p(position), SPATIAL INDEX f(footprint)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE ponds (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--type CHAR(64) <CUSTOM_COL_OPTIONS>,
--shores MULTIPOLYGON NOT NULL, SPATIAL INDEX s(shores)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE named_places (fid INT <CUSTOM_COL_OPTIONS>,
--name CHAR(64) <CUSTOM_COL_OPTIONS>,
--boundary POLYGON NOT NULL, SPATIAL INDEX b(boundary)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--CREATE TABLE map_neatlines (fid INT <CUSTOM_COL_OPTIONS>,
--neatline POLYGON NOT NULL, SPATIAL INDEX n(neatline)) ENGINE=<STORAGE_ENGINE> <CUSTOM_TABLE_OPTIONS>;
--USE test;
--SHOW FIELDS FROM gis_point;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g point NO MUL NULL
--SHOW FIELDS FROM gis_line;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g linestring NO MUL NULL
--SHOW FIELDS FROM gis_polygon;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g polygon NO MUL NULL
--SHOW FIELDS FROM gis_multi_point;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g multipoint NO MUL NULL
--SHOW FIELDS FROM gis_multi_line;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g multilinestring NO MUL NULL
--SHOW FIELDS FROM gis_multi_polygon;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g multipolygon NO MUL NULL
--SHOW FIELDS FROM gis_geometrycollection;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g geometrycollection NO MUL NULL
--SHOW FIELDS FROM gis_geometry;
--Field Type Null Key Default Extra
--fid int(11) YES NULL
--g geometry NO NULL
--INSERT INTO gis_point (fid,g) VALUES
--(101, PointFromText('POINT(10 10)')),
--(102, PointFromText('POINT(20 10)')),
--(103, PointFromText('POINT(20 20)')),
--(104, PointFromWKB(AsWKB(PointFromText('POINT(10 20)'))));
--INSERT INTO gis_line (fid,g) VALUES
--(105, LineFromText('LINESTRING(0 0,0 10,10 0)')),
--(106, LineStringFromText('LINESTRING(10 10,20 10,20 20,10 20,10 10)')),
--(107, LineStringFromWKB(AsWKB(LineString(Point(10, 10), Point(40, 10)))));
--INSERT INTO gis_polygon (fid,g) VALUES
--(108, PolygonFromText('POLYGON((10 10,20 10,20 20,10 20,10 10))')),
--(109, PolyFromText('POLYGON((0 0,50 0,50 50,0 50,0 0), (10 10,20 10,20 20,10 20,10 10))')),
--(110, PolyFromWKB(AsWKB(Polygon(LineString(Point(0, 0), Point(30, 0), Point(30, 30), Point(0, 0))))));
--INSERT INTO gis_multi_point (fid,g) VALUES
--(111, MultiPointFromText('MULTIPOINT(0 0,10 10,10 20,20 20)')),
--(112, MPointFromText('MULTIPOINT(1 1,11 11,11 21,21 21)')),
--(113, MPointFromWKB(AsWKB(MultiPoint(Point(3, 6), Point(4, 10)))));
--INSERT INTO gis_multi_line (fid,g) VALUES
--(114, MultiLineStringFromText('MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48))')),
--(115, MLineFromText('MULTILINESTRING((10 48,10 21,10 0))')),
--(116, MLineFromWKB(AsWKB(MultiLineString(LineString(Point(1, 2), Point(3, 5)), LineString(Point(2, 5), Point(5, 8), Point(21, 7))))));
--INSERT INTO gis_multi_polygon (fid,g) VALUES
--(117, MultiPolygonFromText('MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))')),
--(118, MPolyFromText('MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))')),
--(119, MPolyFromWKB(AsWKB(MultiPolygon(Polygon(LineString(Point(0, 3), Point(3, 3), Point(3, 0), Point(0, 3)))))));
--INSERT INTO gis_geometrycollection (fid,g) VALUES
--(120, GeomCollFromText('GEOMETRYCOLLECTION(POINT(0 0), LINESTRING(0 0,10 10))')),
--(121, GeometryFromWKB(AsWKB(GeometryCollection(Point(44, 6), LineString(Point(3, 6), Point(7, 9)))))),
--(122, GeomFromText('GeometryCollection()')),
--(123, GeomFromText('GeometryCollection EMPTY'));
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_point;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_line;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_polygon;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_point;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_line;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_polygon;
--INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_geometrycollection;
--SELECT fid, AsText(g) FROM gis_point;
--fid AsText(g)
--101 POINT(10 10)
--102 POINT(20 10)
--103 POINT(20 20)
--104 POINT(10 20)
--SELECT fid, AsText(g) FROM gis_line;
--fid AsText(g)
--105 LINESTRING(0 0,0 10,10 0)
--106 LINESTRING(10 10,20 10,20 20,10 20,10 10)
--107 LINESTRING(10 10,40 10)
--SELECT fid, AsText(g) FROM gis_polygon;
--fid AsText(g)
--108 POLYGON((10 10,20 10,20 20,10 20,10 10))
--109 POLYGON((0 0,50 0,50 50,0 50,0 0),(10 10,20 10,20 20,10 20,10 10))
--110 POLYGON((0 0,30 0,30 30,0 0))
--SELECT fid, AsText(g) FROM gis_multi_point;
--fid AsText(g)
--111 MULTIPOINT(0 0,10 10,10 20,20 20)
--112 MULTIPOINT(1 1,11 11,11 21,21 21)
--113 MULTIPOINT(3 6,4 10)
--SELECT fid, AsText(g) FROM gis_multi_line;
--fid AsText(g)
--114 MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48))
--115 MULTILINESTRING((10 48,10 21,10 0))
--116 MULTILINESTRING((1 2,3 5),(2 5,5 8,21 7))
--SELECT fid, AsText(g) FROM gis_multi_polygon;
--fid AsText(g)
--117 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))
--118 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))
--119 MULTIPOLYGON(((0 3,3 3,3 0,0 3)))
--SELECT fid, AsText(g) FROM gis_geometrycollection;
--fid AsText(g)
--120 GEOMETRYCOLLECTION(POINT(0 0),LINESTRING(0 0,10 10))
--121 GEOMETRYCOLLECTION(POINT(44 6),LINESTRING(3 6,7 9))
--122 GEOMETRYCOLLECTION EMPTY
--123 GEOMETRYCOLLECTION EMPTY
--SELECT fid, AsText(g) FROM gis_geometry;
--fid AsText(g)
--101 POINT(10 10)
--102 POINT(20 10)
--103 POINT(20 20)
--104 POINT(10 20)
--105 LINESTRING(0 0,0 10,10 0)
--106 LINESTRING(10 10,20 10,20 20,10 20,10 10)
--107 LINESTRING(10 10,40 10)
--108 POLYGON((10 10,20 10,20 20,10 20,10 10))
--109 POLYGON((0 0,50 0,50 50,0 50,0 0),(10 10,20 10,20 20,10 20,10 10))
--110 POLYGON((0 0,30 0,30 30,0 0))
--111 MULTIPOINT(0 0,10 10,10 20,20 20)
--112 MULTIPOINT(1 1,11 11,11 21,21 21)
--113 MULTIPOINT(3 6,4 10)
--114 MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48))
--115 MULTILINESTRING((10 48,10 21,10 0))
--116 MULTILINESTRING((1 2,3 5),(2 5,5 8,21 7))
--117 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))
--118 MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))
--119 MULTIPOLYGON(((0 3,3 3,3 0,0 3)))
--120 GEOMETRYCOLLECTION(POINT(0 0),LINESTRING(0 0,10 10))
--121 GEOMETRYCOLLECTION(POINT(44 6),LINESTRING(3 6,7 9))
--122 GEOMETRYCOLLECTION EMPTY
--123 GEOMETRYCOLLECTION EMPTY
--SELECT fid, Dimension(g) FROM gis_geometry;
--fid Dimension(g)
--101 0
--102 0
--103 0
--104 0
--105 1
--106 1
--107 1
--108 2
--109 2
--110 2
--111 0
--112 0
--113 0
--114 1
--115 1
--116 1
--117 2
--118 2
--119 2
--120 1
--121 1
--122 0
--123 0
--SELECT fid, GeometryType(g) FROM gis_geometry;
--fid GeometryType(g)
--101 POINT
--102 POINT
--103 POINT
--104 POINT
--105 LINESTRING
--106 LINESTRING
--107 LINESTRING
--108 POLYGON
--109 POLYGON
--110 POLYGON
--111 MULTIPOINT
--112 MULTIPOINT
--113 MULTIPOINT
--114 MULTILINESTRING
--115 MULTILINESTRING
--116 MULTILINESTRING
--117 MULTIPOLYGON
--118 MULTIPOLYGON
--119 MULTIPOLYGON
--120 GEOMETRYCOLLECTION
--121 GEOMETRYCOLLECTION
--122 GEOMETRYCOLLECTION
--123 GEOMETRYCOLLECTION
--SELECT fid, IsEmpty(g) FROM gis_geometry;
--fid IsEmpty(g)
--101 0
--102 0
--103 0
--104 0
--105 0
--106 0
--107 0
--108 0
--109 0
--110 0
--111 0
--112 0
--113 0
--114 0
--115 0
--116 0
--117 0
--118 0
--119 0
--120 0
--121 0
--122 0
--123 0
--SELECT fid, AsText(Envelope(g)) FROM gis_geometry;
--fid AsText(Envelope(g))
--101 POLYGON((10 10,10 10,10 10,10 10,10 10))
--102 POLYGON((20 10,20 10,20 10,20 10,20 10))
--103 POLYGON((20 20,20 20,20 20,20 20,20 20))
--104 POLYGON((10 20,10 20,10 20,10 20,10 20))
--105 POLYGON((0 0,10 0,10 10,0 10,0 0))
--106 POLYGON((10 10,20 10,20 20,10 20,10 10))
--107 POLYGON((10 10,40 10,40 10,10 10,10 10))
--108 POLYGON((10 10,20 10,20 20,10 20,10 10))
--109 POLYGON((0 0,50 0,50 50,0 50,0 0))
--110 POLYGON((0 0,30 0,30 30,0 30,0 0))
--111 POLYGON((0 0,20 0,20 20,0 20,0 0))
--112 POLYGON((1 1,21 1,21 21,1 21,1 1))
--113 POLYGON((3 6,4 6,4 10,3 10,3 6))
--114 POLYGON((10 0,16 0,16 48,10 48,10 0))
--115 POLYGON((10 0,10 0,10 48,10 48,10 0))
--116 POLYGON((1 2,21 2,21 8,1 8,1 2))
--117 POLYGON((28 0,84 0,84 42,28 42,28 0))
--118 POLYGON((28 0,84 0,84 42,28 42,28 0))
--119 POLYGON((0 0,3 0,3 3,0 3,0 0))
--120 POLYGON((0 0,10 0,10 10,0 10,0 0))
--121 POLYGON((3 6,44 6,44 9,3 9,3 6))
--122 GEOMETRYCOLLECTION EMPTY
--123 GEOMETRYCOLLECTION EMPTY
--SELECT fid, X(g) FROM gis_point;
--fid X(g)
--101 10
--102 20
--103 20
--104 10
--SELECT fid, Y(g) FROM gis_point;
--fid Y(g)
--101 10
--102 10
--103 20
--104 20
--SELECT fid, AsText(StartPoint(g)) FROM gis_line;
--fid AsText(StartPoint(g))
--105 POINT(0 0)
--106 POINT(10 10)
--107 POINT(10 10)
--SELECT fid, AsText(EndPoint(g)) FROM gis_line;
--fid AsText(EndPoint(g))
--105 POINT(10 0)
--106 POINT(10 10)
--107 POINT(40 10)
--SELECT fid, GLength(g) FROM gis_line;
--fid GLength(g)
--105 24.14213562373095
--106 40
--107 30
--SELECT fid, NumPoints(g) FROM gis_line;
--fid NumPoints(g)
--105 3
--106 5
--107 2
--SELECT fid, AsText(PointN(g, 2)) FROM gis_line;
--fid AsText(PointN(g, 2))
--105 POINT(0 10)
--106 POINT(20 10)
--107 POINT(40 10)
--SELECT fid, IsClosed(g) FROM gis_line;
--fid IsClosed(g)
--105 0
--106 1
--107 0
--SELECT fid, AsText(Centroid(g)) FROM gis_polygon;
--fid AsText(Centroid(g))
--108 POINT(15 15)
--109 POINT(25.416666666666668 25.416666666666668)
--110 POINT(20 10)
--SELECT fid, Area(g) FROM gis_polygon;
--fid Area(g)
--108 100
--109 2400
--110 450
--SELECT fid, AsText(ExteriorRing(g)) FROM gis_polygon;
--fid AsText(ExteriorRing(g))
--108 LINESTRING(10 10,20 10,20 20,10 20,10 10)
--109 LINESTRING(0 0,50 0,50 50,0 50,0 0)
--110 LINESTRING(0 0,30 0,30 30,0 0)
--SELECT fid, NumInteriorRings(g) FROM gis_polygon;
--fid NumInteriorRings(g)
--108 0
--109 1
--110 0
--SELECT fid, AsText(InteriorRingN(g, 1)) FROM gis_polygon;
--fid AsText(InteriorRingN(g, 1))
--108 NULL
--109 LINESTRING(10 10,20 10,20 20,10 20,10 10)
--110 NULL
--SELECT fid, IsClosed(g) FROM gis_multi_line;
--fid IsClosed(g)
--114 0
--115 0
--116 0
--SELECT fid, AsText(Centroid(g)) FROM gis_multi_polygon;
--fid AsText(Centroid(g))
--117 POINT(55.58852775304245 17.426536064113982)
--118 POINT(55.58852775304245 17.426536064113982)
--119 POINT(2 2)
--SELECT fid, Area(g) FROM gis_multi_polygon;
--fid Area(g)
--117 1684.5
--118 1684.5
--119 4.5
--SELECT fid, NumGeometries(g) from gis_multi_point;
--fid NumGeometries(g)
--111 4
--112 4
--113 2
--SELECT fid, NumGeometries(g) from gis_multi_line;
--fid NumGeometries(g)
--114 2
--115 1
--116 2
--SELECT fid, NumGeometries(g) from gis_multi_polygon;
--fid NumGeometries(g)
--117 2
--118 2
--119 1
--SELECT fid, NumGeometries(g) from gis_geometrycollection;
--fid NumGeometries(g)
--120 2
--121 2
--122 0
--123 0
--SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_point;
--fid AsText(GeometryN(g, 2))
--111 POINT(10 10)
--112 POINT(11 11)
--113 POINT(4 10)
--SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_line;
--fid AsText(GeometryN(g, 2))
--114 LINESTRING(16 0,16 23,16 48)
--115 NULL
--116 LINESTRING(2 5,5 8,21 7)
--SELECT fid, AsText(GeometryN(g, 2)) from gis_multi_polygon;
--fid AsText(GeometryN(g, 2))
--117 POLYGON((59 18,67 18,67 13,59 13,59 18))
--118 POLYGON((59 18,67 18,67 13,59 13,59 18))
--119 NULL
--SELECT fid, AsText(GeometryN(g, 2)) from gis_geometrycollection;
--fid AsText(GeometryN(g, 2))
--120 LINESTRING(0 0,10 10)
--121 LINESTRING(3 6,7 9)
--122 NULL
--123 NULL
--SELECT fid, AsText(GeometryN(g, 1)) from gis_geometrycollection;
--fid AsText(GeometryN(g, 1))
--120 POINT(0 0)
--121 POINT(44 6)
--122 NULL
--123 NULL
--SELECT g1.fid as first, g2.fid as second,
--Within(g1.g, g2.g) as w, Contains(g1.g, g2.g) as c, Overlaps(g1.g, g2.g) as o,
--Equals(g1.g, g2.g) as e, Disjoint(g1.g, g2.g) as d, Touches(g1.g, g2.g) as t,
--Intersects(g1.g, g2.g) as i, Crosses(g1.g, g2.g) as r
--FROM gis_geometrycollection g1, gis_geometrycollection g2 ORDER BY first, second;
--first second w c o e d t i r
--120 120 1 1 0 1 0 0 1 0
--120 121 0 0 1 0 0 0 1 0
--120 122 NULL NULL NULL NULL NULL NULL NULL NULL
--120 123 NULL NULL NULL NULL NULL NULL NULL NULL
--121 120 0 0 1 0 0 0 1 0
--121 121 1 1 0 1 0 0 1 0
--121 122 NULL NULL NULL NULL NULL NULL NULL NULL
--121 123 NULL NULL NULL NULL NULL NULL NULL NULL
--122 120 NULL NULL NULL NULL NULL NULL NULL NULL
--122 121 NULL NULL NULL NULL NULL NULL NULL NULL
--122 122 NULL NULL NULL NULL NULL NULL NULL NULL
--122 123 NULL NULL NULL NULL NULL NULL NULL NULL
--123 120 NULL NULL NULL NULL NULL NULL NULL NULL
--123 121 NULL NULL NULL NULL NULL NULL NULL NULL
--123 122 NULL NULL NULL NULL NULL NULL NULL NULL
--123 123 NULL NULL NULL NULL NULL NULL NULL NULL
--DROP TABLE gis_point, gis_line, gis_polygon, gis_multi_point, gis_multi_line, gis_multi_polygon, gis_geometrycollection, gis_geometry;
--USE gis_ogs;
--# Lakes
--INSERT INTO lakes (fid,name,shore) VALUES (
--101, 'BLUE LAKE',
--PolyFromText(
--'POLYGON(
-- (52 18,66 23,73 9,48 6,52 18),
-- (59 18,67 18,67 13,59 13,59 18)
-- )',
--101));
--# Road Segments
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(102, 'Route 5', NULL, 2,
--LineFromText(
--'LINESTRING( 0 18, 10 21, 16 23, 28 26, 44 31 )' ,101));
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(103, 'Route 5', 'Main Street', 4,
--LineFromText(
--'LINESTRING( 44 31, 56 34, 70 38 )' ,101));
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(104, 'Route 5', NULL, 2,
--LineFromText(
--'LINESTRING( 70 38, 72 48 )' ,101));
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(105, 'Main Street', NULL, 4,
--LineFromText(
--'LINESTRING( 70 38, 84 42 )' ,101));
--INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(106, 'Dirt Road by Green Forest', NULL,
--1,
--LineFromText(
--'LINESTRING( 28 26, 28 0 )',101));
--# DividedRoutes
--INSERT INTO divided_routes (fid,name,num_lanes,centerlines) VALUES(119, 'Route 75', 4,
--MLineFromText(
--'MULTILINESTRING((10 48,10 21,10 0),
-- (16 0,16 23,16 48))', 101));
--# Forests
--INSERT INTO forests (fid,name,boundary) VALUES(109, 'Green Forest',
--MPolyFromText(
--'MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),
-- (52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))',
--101));
--# Bridges
--INSERT INTO bridges (fid,name,position) VALUES(110, 'Cam Bridge', PointFromText(
--'POINT( 44 31 )', 101));
--# Streams
--INSERT INTO streams (fid,name,centerline) VALUES(111, 'Cam Stream',
--LineFromText(
--'LINESTRING( 38 48, 44 41, 41 36, 44 31, 52 18 )', 101));
--INSERT INTO streams (fid,name,centerline) VALUES(112, NULL,
--LineFromText(
--'LINESTRING( 76 0, 78 4, 73 9 )', 101));
--# Buildings
--INSERT INTO buildings (fid,name,position,footprint) VALUES(113, '123 Main Street',
--PointFromText(
--'POINT( 52 30 )', 101),
--PolyFromText(
--'POLYGON( ( 50 31, 54 31, 54 29, 50 29, 50 31) )', 101));
--INSERT INTO buildings (fid,name,position,footprint) VALUES(114, '215 Main Street',
--PointFromText(
--'POINT( 64 33 )', 101),
--PolyFromText(
--'POLYGON( ( 66 34, 62 34, 62 32, 66 32, 66 34) )', 101));
--# Ponds
--INSERT INTO ponds (fid,name,type,shores) VALUES(120, NULL, 'Stock Pond',
--MPolyFromText(
--'MULTIPOLYGON( ( ( 24 44, 22 42, 24 40, 24 44) ),
-- ( ( 26 44, 26 40, 28 42, 26 44) ) )', 101));
--# Named Places
--INSERT INTO named_places (fid,name,boundary) VALUES(117, 'Ashton',
--PolyFromText(
--'POLYGON( ( 62 48, 84 48, 84 30, 56 30, 56 34, 62 48) )', 101));
--INSERT INTO named_places (fid,name,boundary) VALUES(118, 'Goose Island',
--PolyFromText(
--'POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )', 101));
--# Map Neatlines
--INSERT INTO map_neatlines (fid,neatline) VALUES(115,
--PolyFromText(
--'POLYGON( ( 0 0, 0 48, 84 48, 84 0, 0 0 ) )', 101));
--SELECT Dimension(shore)
--FROM lakes
--WHERE name = 'Blue Lake';
--Dimension(shore)
--2
--SELECT GeometryType(centerlines)
--FROM divided_routes
--WHERE name = 'Route 75';
--GeometryType(centerlines)
--MULTILINESTRING
--SELECT AsText(boundary)
--FROM named_places
--WHERE name = 'Goose Island';
--AsText(boundary)
--POLYGON((67 13,67 18,59 18,59 13,67 13))
--SELECT AsText(PolyFromWKB(AsBinary(boundary),101))
--FROM named_places
--WHERE name = 'Goose Island';
--AsText(PolyFromWKB(AsBinary(boundary),101))
--POLYGON((67 13,67 18,59 18,59 13,67 13))
--SELECT SRID(boundary)
--FROM named_places
--WHERE name = 'Goose Island';
--SRID(boundary)
--101
--SELECT IsEmpty(centerline)
--FROM road_segments
--WHERE name = 'Route 5'
--AND aliases = 'Main Street';
--IsEmpty(centerline)
--0
--SELECT AsText(Envelope(boundary))
--FROM named_places
--WHERE name = 'Goose Island';
--AsText(Envelope(boundary))
--POLYGON((59 13,67 13,67 18,59 18,59 13))
--SELECT X(position)
--FROM bridges
--WHERE name = 'Cam Bridge';
--X(position)
--44
--SELECT Y(position)
--FROM bridges
--WHERE name = 'Cam Bridge';
--Y(position)
--31
--SELECT AsText(StartPoint(centerline))
--FROM road_segments
--WHERE fid = 102;
--AsText(StartPoint(centerline))
--POINT(0 18)
--SELECT AsText(EndPoint(centerline))
--FROM road_segments
--WHERE fid = 102;
--AsText(EndPoint(centerline))
--POINT(44 31)
--SELECT GLength(centerline)
--FROM road_segments
--WHERE fid = 106;
--GLength(centerline)
--26
--SELECT NumPoints(centerline)
--FROM road_segments
--WHERE fid = 102;
--NumPoints(centerline)
--5
--SELECT AsText(PointN(centerline, 1))
--FROM road_segments
--WHERE fid = 102;
--AsText(PointN(centerline, 1))
--POINT(0 18)
--SELECT AsText(Centroid(boundary))
--FROM named_places
--WHERE name = 'Goose Island';
--AsText(Centroid(boundary))
--POINT(63 15.5)
--SELECT Area(boundary)
--FROM named_places
--WHERE name = 'Goose Island';
--Area(boundary)
--40
--SELECT AsText(ExteriorRing(shore))
--FROM lakes
--WHERE name = 'Blue Lake';
--AsText(ExteriorRing(shore))
--LINESTRING(52 18,66 23,73 9,48 6,52 18)
--SELECT NumInteriorRings(shore)
--FROM lakes
--WHERE name = 'Blue Lake';
--NumInteriorRings(shore)
--1
--SELECT AsText(InteriorRingN(shore, 1))
--FROM lakes
--WHERE name = 'Blue Lake';
--AsText(InteriorRingN(shore, 1))
--LINESTRING(59 18,67 18,67 13,59 13,59 18)
--SELECT NumGeometries(centerlines)
--FROM divided_routes
--WHERE name = 'Route 75';
--NumGeometries(centerlines)
--2
--SELECT AsText(GeometryN(centerlines, 2))
--FROM divided_routes
--WHERE name = 'Route 75';
--AsText(GeometryN(centerlines, 2))
--LINESTRING(16 0,16 23,16 48)
--SELECT IsClosed(centerlines)
--FROM divided_routes
--WHERE name = 'Route 75';
--IsClosed(centerlines)
--0
--SELECT GLength(centerlines)
--FROM divided_routes
--WHERE name = 'Route 75';
--GLength(centerlines)
--96
--SELECT AsText(Centroid(shores))
--FROM ponds
--WHERE fid = 120;
--AsText(Centroid(shores))
--POINT(25 42)
--SELECT Area(shores)
--FROM ponds
--WHERE fid = 120;
--Area(shores)
--8
--SELECT ST_Equals(boundary,
--PolyFromText('POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )',1))
--FROM named_places
--WHERE name = 'Goose Island';
--ST_Equals(boundary,
--PolyFromText('POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )',1))
--1
--SELECT ST_Disjoint(centerlines, boundary)
--FROM divided_routes, named_places
--WHERE divided_routes.name = 'Route 75'
--AND named_places.name = 'Ashton';
--ST_Disjoint(centerlines, boundary)
--1
--SELECT ST_Touches(centerline, shore)
--FROM streams, lakes
--WHERE streams.name = 'Cam Stream'
--AND lakes.name = 'Blue Lake';
--ST_Touches(centerline, shore)
--1
--SELECT Crosses(road_segments.centerline, divided_routes.centerlines)
--FROM road_segments, divided_routes
--WHERE road_segments.fid = 102
--AND divided_routes.name = 'Route 75';
--Crosses(road_segments.centerline, divided_routes.centerlines)
--1
--SELECT ST_Intersects(road_segments.centerline, divided_routes.centerlines)
--FROM road_segments, divided_routes
--WHERE road_segments.fid = 102
--AND divided_routes.name = 'Route 75';
--ST_Intersects(road_segments.centerline, divided_routes.centerlines)
--1
--SELECT ST_Contains(forests.boundary, named_places.boundary)
--FROM forests, named_places
--WHERE forests.name = 'Green Forest'
--AND named_places.name = 'Ashton';
--ST_Contains(forests.boundary, named_places.boundary)
--0
--SELECT ST_Distance(position, boundary)
--FROM bridges, named_places
--WHERE bridges.name = 'Cam Bridge'
--AND named_places.name = 'Ashton';
--ST_Distance(position, boundary)
--12
--SELECT AsText(ST_Difference(named_places.boundary, forests.boundary))
--FROM named_places, forests
--WHERE named_places.name = 'Ashton'
--AND forests.name = 'Green Forest';
--AsText(ST_Difference(named_places.boundary, forests.boundary))
--POLYGON((56 34,62 48,84 48,84 42,56 34))
--SELECT AsText(ST_Union(shore, boundary))
--FROM lakes, named_places
--WHERE lakes.name = 'Blue Lake'
--AND named_places.name = 'Goose Island';
--AsText(ST_Union(shore, boundary))
--POLYGON((48 6,52 18,66 23,73 9,48 6))
--SELECT AsText(ST_SymDifference(shore, boundary))
--FROM lakes, named_places
--WHERE lakes.name = 'Blue Lake'
--AND named_places.name = 'Ashton';
--AsText(ST_SymDifference(shore, boundary))
--MULTIPOLYGON(((48 6,52 18,66 23,73 9,48 6),(59 13,59 18,67 18,67 13,59 13)),((56 30,56 34,62 48,84 48,84 30,56 30)))
--SELECT count(*)
--FROM buildings, bridges
--WHERE ST_Contains(ST_Buffer(bridges.position, 15.0), buildings.footprint) = 1;
--count(*)
--1
-+ERROR HY000: The storage engine <STORAGE_ENGINE> doesn't support SPATIAL indexes
-+# ERROR: Statement ended with errno 1464, errname ER_TABLE_CANT_HANDLE_SPKEYS (expected to succeed)
-+# ------------ UNEXPECTED RESULT ------------
-+# [ CREATE TABLE gis_point (fid INT(11) /*!*/ /*Custom column options*/, g POINT NOT NULL, SPATIAL INDEX(g)) ENGINE=InnoDB /*!*/ /*Custom table options*/ ]
-+# The statement|command finished with ER_TABLE_CANT_HANDLE_SPKEYS.
-+# Geometry types or spatial indexes or the mix could be unsupported|malfunctioning, or the problem was caused by previous errors.
-+# You can change the engine code, or create an rdiff, or disable the test by adding it to disabled.def.
-+# Further in this test, the message might sometimes be suppressed; a part of the test might be skipped.
-+# Also, this problem may cause a chain effect (more errors of different kinds in the test).
-+# -------------------------------------------
- DROP DATABASE gis_ogs;
- USE test;
diff --git a/storage/xtradb/mysql-test/storage_engine/type_text.opt b/storage/xtradb/mysql-test/storage_engine/type_text.opt
deleted file mode 100644
index 40445305fc6..00000000000
--- a/storage/xtradb/mysql-test/storage_engine/type_text.opt
+++ /dev/null
@@ -1 +0,0 @@
---innodb_log_file_size=100M
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
deleted file mode 100644
index 7fbee0312ee..00000000000
--- a/storage/xtradb/os/os0file.cc
+++ /dev/null
@@ -1,6545 +0,0 @@
-/***********************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-***********************************************************************/
-
-/**************************************************//**
-@file os/os0file.cc
-The interface to the operating system file i/o primitives
-
-Created 10/21/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0file.h"
-
-#ifdef UNIV_NONINL
-#include "os0file.ic"
-#endif
-#include "ha_prototypes.h"
-#include "ut0mem.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "fil0fil.h"
-#include "fsp0fsp.h"
-#include "fil0pagecompress.h"
-#include "buf0buf.h"
-#include "btr0types.h"
-#include "trx0trx.h"
-#include "srv0mon.h"
-#include "srv0srv.h"
-#ifdef HAVE_LINUX_UNISTD_H
-#include "unistd.h"
-#endif
-#ifndef UNIV_HOTBACKUP
-# include "os0sync.h"
-# include "os0thread.h"
-#else /* !UNIV_HOTBACKUP */
-# ifdef __WIN__
-/* Add includes for the _stat() call to compile on Windows */
-# include <sys/types.h>
-# include <sys/stat.h>
-# include <errno.h>
-# endif /* __WIN__ */
-#endif /* !UNIV_HOTBACKUP */
-
-#if defined(LINUX_NATIVE_AIO)
-#include <libaio.h>
-#endif
-
-#ifdef _WIN32
-#define IOCP_SHUTDOWN_KEY (ULONG_PTR)-1
-#endif
-
-#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H)
-# include <sys/ioctl.h>
-# ifndef DFS_IOCTL_ATOMIC_WRITE_SET
-# define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
-# endif
-#endif
-
-#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
-#include <sys/statvfs.h>
-#endif
-
-#if defined(UNIV_LINUX) && defined(HAVE_LINUX_FALLOC_H)
-#include <linux/falloc.h>
-#endif
-
-#ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
-# include <fcntl.h>
-# include <linux/falloc.h>
-#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
-
-#ifdef HAVE_LZO
-#include "lzo/lzo1x.h"
-#endif
-
-#ifdef HAVE_SNAPPY
-#include "snappy-c.h"
-#endif
-
-/** Insert buffer segment id */
-static const ulint IO_IBUF_SEGMENT = 0;
-
-/** Log segment id */
-static const ulint IO_LOG_SEGMENT = 1;
-
-/* This specifies the file permissions InnoDB uses when it creates files in
-Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to
-my_umask */
-
-#ifndef __WIN__
-/** Umask for creating files */
-UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP;
-# define os_file_invalid (-1)
-#else
-/** Umask for creating files */
-UNIV_INTERN ulint os_innodb_umask = 0;
-# define ECANCELED 125
-# define os_file_invalid INVALID_HANDLE_VALUE
-#endif /* __WIN__ */
-
-#ifndef UNIV_HOTBACKUP
-/* We use these mutexes to protect lseek + file i/o operation, if the
-OS does not provide an atomic pread or pwrite, or similar */
-#define OS_FILE_N_SEEK_MUTEXES 16
-UNIV_INTERN os_ib_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES];
-
-/* In simulated aio, merge at most this many consecutive i/os */
-#define OS_AIO_MERGE_N_CONSECUTIVE 64
-
-#ifdef WITH_INNODB_DISALLOW_WRITES
-#define WAIT_ALLOW_WRITES() if (!IS_XTRABACKUP()) os_event_wait(srv_allow_writes_event)
-#else
-#define WAIT_ALLOW_WRITES() do { } while (0)
-#endif /* WITH_INNODB_DISALLOW_WRITES */
-
-/**********************************************************************
-
-InnoDB AIO Implementation:
-=========================
-
-We support native AIO for windows and linux. For rest of the platforms
-we simulate AIO by special io-threads servicing the IO-requests.
-
-Simulated AIO:
-==============
-
-In platforms where we 'simulate' AIO following is a rough explanation
-of the high level design.
-There are four io-threads (for ibuf, log, read, write).
-All synchronous IO requests are serviced by the calling thread using
-os_file_write/os_file_read. The Asynchronous requests are queued up
-in an array (there are four such arrays) by the calling thread.
-Later these requests are picked up by the io-thread and are serviced
-synchronously.
-
-Windows native AIO:
-==================
-
-If srv_use_native_aio is not set then windows follow the same
-code as simulated AIO. If the flag is set then native AIO interface
-is used. On windows, one of the limitation is that if a file is opened
-for AIO no synchronous IO can be done on it. Therefore we have an
-extra fifth array to queue up synchronous IO requests.
-There are innodb_file_io_threads helper threads. These threads work
-on the four arrays mentioned above in Simulated AIO. No thread is
-required for the sync array.
-If a synchronous IO request is made, it is first queued in the sync
-array. Then the calling thread itself waits on the request, thus
-making the call synchronous.
-If an AIO request is made the calling thread not only queues it in the
-array but also submits the requests. The helper thread then collects
-the completed IO request and calls completion routine on it.
-
-Linux native AIO:
-=================
-
-If we have libaio installed on the system and innodb_use_native_aio
-is set to TRUE we follow the code path of native AIO, otherwise we
-do simulated AIO.
-There are innodb_file_io_threads helper threads. These threads work
-on the four arrays mentioned above in Simulated AIO.
-If a synchronous IO request is made, it is handled by calling
-os_file_write/os_file_read.
-If an AIO request is made the calling thread not only queues it in the
-array but also submits the requests. The helper thread then collects
-the completed IO request and calls completion routine on it.
-
-**********************************************************************/
-
-/** Flag: enable debug printout for asynchronous i/o */
-UNIV_INTERN ibool os_aio_print_debug = FALSE;
-
-#ifdef UNIV_PFS_IO
-/* Keys to register InnoDB I/O with performance schema */
-UNIV_INTERN mysql_pfs_key_t innodb_file_data_key;
-UNIV_INTERN mysql_pfs_key_t innodb_file_log_key;
-UNIV_INTERN mysql_pfs_key_t innodb_file_temp_key;
-UNIV_INTERN mysql_pfs_key_t innodb_file_bmp_key;
-#endif /* UNIV_PFS_IO */
-
-/** The asynchronous i/o array slot structure */
-struct os_aio_slot_t{
-#ifdef WIN_ASYNC_IO
- OVERLAPPED control; /*!< Windows control block for the
- aio request, MUST be first element in the structure*/
- void *arr; /*!< Array this slot belongs to*/
-#endif
-
- ibool is_read; /*!< TRUE if a read operation */
- ulint pos; /*!< index of the slot in the aio
- array */
- ibool reserved; /*!< TRUE if this slot is reserved */
- time_t reservation_time;/*!< time when reserved */
- ulint len; /*!< length of the block to read or
- write */
- byte* buf; /*!< buffer used in i/o */
- ulint type; /*!< OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log; /*!< 1 is OS_FILE_LOG or 0 */
- ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */
-
- os_offset_t offset; /*!< file offset in bytes */
- pfs_os_file_t file; /*!< file where to read or write */
- const char* name; /*!< file name or path */
- ibool io_already_done;/*!< used only in simulated aio:
- TRUE if the physical i/o already
- made and only the slot message
- needs to be passed to the caller
- of os_aio_simulated_handle */
- ulint space_id;
- fil_node_t* message1; /*!< message which is given by the */
- void* message2; /*!< the requester of an aio operation
- and which can be used to identify
- which pending aio operation was
- completed */
- ulint bitmap;
-
-
- ulint* write_size; /*!< Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-
- ulint file_block_size;/*!< file block size */
-
-#ifdef LINUX_NATIVE_AIO
- struct iocb control; /* Linux control block for aio */
- int n_bytes; /* bytes written/read. */
- int ret; /* AIO return code */
-#endif /* WIN_ASYNC_IO */
-};
-
-/** The asynchronous i/o array structure */
-struct os_aio_array_t{
- os_ib_mutex_t mutex; /*!< the mutex protecting the aio array */
- os_event_t not_full;
- /*!< The event which is set to the
- signaled state when there is space in
- the aio outside the ibuf segment;
- os_event_set() and os_event_reset()
- are protected by os_aio_array_t::mutex */
- os_event_t is_empty;
- /*!< The event which is set to the
- signaled state when there are no
- pending i/os in this array;
- os_event_set() and os_event_reset()
- are protected by os_aio_array_t::mutex */
- ulint n_slots;/*!< Total number of slots in the aio
- array. This must be divisible by
- n_threads. */
- ulint n_segments;
- /*!< Number of segments in the aio
- array of pending aio requests. A
- thread can wait separately for any one
- of the segments. */
- ulint cur_seg;/*!< We reserve IO requests in round
- robin fashion to different segments.
- This points to the segment that is to
- be used to service next IO request. */
- ulint n_reserved;
- /*!< Number of reserved slots in the
- aio array outside the ibuf segment */
- os_aio_slot_t* slots; /*!< Pointer to the slots in the array */
-
-#if defined(LINUX_NATIVE_AIO)
- io_context_t* aio_ctx;
- /* completion queue for IO. There is
- one such queue per segment. Each thread
- will work on one ctx exclusively. */
- struct io_event* aio_events;
- /* The array to collect completed IOs.
- There is one such event for each
- possible pending IO. The size of the
- array is equal to n_slots. */
-#endif /* LINUX_NATIV_AIO */
-};
-
-#if defined(LINUX_NATIVE_AIO)
-/** timeout for each io_getevents() call = 500ms. */
-#define OS_AIO_REAP_TIMEOUT (500000000UL)
-
-/** time to sleep, in microseconds if io_setup() returns EAGAIN. */
-#define OS_AIO_IO_SETUP_RETRY_SLEEP (500000UL)
-
-/** number of attempts before giving up on io_setup(). */
-#define OS_AIO_IO_SETUP_RETRY_ATTEMPTS 5
-#endif
-
-/** Array of events used in simulated aio. */
-static os_event_t* os_aio_segment_wait_events;
-
-/** The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
-are NULL when the module has not yet been initialized. @{ */
-static os_aio_array_t* os_aio_read_array = NULL; /*!< Reads */
-static os_aio_array_t* os_aio_write_array = NULL; /*!< Writes */
-static os_aio_array_t* os_aio_ibuf_array = NULL; /*!< Insert buffer */
-static os_aio_array_t* os_aio_log_array = NULL; /*!< Redo log */
-static os_aio_array_t* os_aio_sync_array = NULL; /*!< Synchronous I/O */
-/* @} */
-
-/** Number of asynchronous I/O segments. Set by os_aio_init(). */
-static ulint os_aio_n_segments = ULINT_UNDEFINED;
-
-/** If the following is TRUE, read i/o handler threads try to
-wait until a batch of new read requests have been posted */
-static ibool os_aio_recommend_sleep_for_read_threads = FALSE;
-#endif /* !UNIV_HOTBACKUP */
-
-UNIV_INTERN ulint os_n_file_reads = 0;
-UNIV_INTERN ulint os_bytes_read_since_printout = 0;
-UNIV_INTERN ulint os_n_file_writes = 0;
-UNIV_INTERN ulint os_n_fsyncs = 0;
-UNIV_INTERN ulint os_n_file_reads_old = 0;
-UNIV_INTERN ulint os_n_file_writes_old = 0;
-UNIV_INTERN ulint os_n_fsyncs_old = 0;
-UNIV_INTERN time_t os_last_printout;
-
-UNIV_INTERN ibool os_has_said_disk_full = FALSE;
-
-#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO)
-/** After first fallocate failure we will disable os_file_trim */
-static bool os_fallocate_failed;
-
-/**********************************************************************//**
-Directly manipulate the allocated disk space by deallocating for the file referred to
-by fd for the byte range starting at offset and continuing for len bytes.
-Within the specified range, partial file system blocks are zeroed, and whole
-file system blocks are removed from the file. After a successful call,
-subsequent reads from this range will return zeroes.
-@return true if success, false if error */
-static
-ibool
-os_file_trim(
-/*=========*/
- os_aio_slot_t* slot); /*!< in: slot structure */
-#endif /* WIN_ASYNC_IO || LINUX_NATIVE_AIO */
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool on_error_silent,/*!< in: if TRUE then don't print
- any message to the log. */
- const char* file, /*!< in: file name */
- const ulint line); /*!< in: line */
-
-/****************************************************************//**
-Tries to enable the atomic write feature, if available, for the specified file
-handle.
-@return TRUE if success */
-static __attribute__((warn_unused_result))
-ibool
-os_file_set_atomic_writes(
-/*======================*/
- const char* name, /*!< in: name of the file */
- os_file_t file); /*!< in: handle to the file */
-
-#ifdef UNIV_DEBUG
-# ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Validates the consistency the aio system some of the time.
-@return TRUE if ok or the check was skipped */
-UNIV_INTERN
-ibool
-os_aio_validate_skip(void)
-/*======================*/
-{
-/** Try os_aio_validate() every this many times */
-# define OS_AIO_VALIDATE_SKIP 13
-
- /** The os_aio_validate() call skip counter.
- Use a signed type because of the race condition below. */
- static int os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
-
- /* There is a race condition below, but it does not matter,
- because this call is only for heuristic purposes. We want to
- reduce the call frequency of the costly os_aio_validate()
- check in debug builds. */
- if (--os_aio_validate_count > 0) {
- return(TRUE);
- }
-
- os_aio_validate_count = OS_AIO_VALIDATE_SKIP;
- return(os_aio_validate());
-}
-# endif /* !UNIV_HOTBACKUP */
-#endif /* UNIV_DEBUG */
-
-#ifdef _WIN32
-/** IO completion port used by background io threads */
-static HANDLE completion_port;
-/** IO completion port used by background io READ threads */
-static HANDLE read_completion_port;
-/** Thread local storage index for the per-thread event used for synchronous IO */
-static DWORD tls_sync_io = TLS_OUT_OF_INDEXES;
-#endif
-
-#ifdef __WIN__
-/***********************************************************************//**
-Gets the operating system version. Currently works only on Windows.
-@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
-OS_WIN7. */
-UNIV_INTERN
-ulint
-os_get_os_version(void)
-/*===================*/
-{
- OSVERSIONINFO os_info;
-
- os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
-
- ut_a(GetVersionEx(&os_info));
-
- if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) {
- return(OS_WIN31);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) {
- return(OS_WIN95);
- } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) {
- switch (os_info.dwMajorVersion) {
- case 3:
- case 4:
- return(OS_WINNT);
- case 5:
- return (os_info.dwMinorVersion == 0)
- ? OS_WIN2000 : OS_WINXP;
- case 6:
- return (os_info.dwMinorVersion == 0)
- ? OS_WINVISTA : OS_WIN7;
- default:
- return(OS_WIN7);
- }
- } else {
- ut_error;
- return(0);
- }
-}
-#endif /* __WIN__ */
-
-
-#ifdef _WIN32
-/*
-Windows : Handling synchronous IO on files opened asynchronously.
-
-If file is opened for asynchronous IO (FILE_FLAG_OVERLAPPED) and also bound to
-a completion port, then every IO on this file would normally be enqueued to the
-completion port. Sometimes however we would like to do a synchronous IO. This is
-possible if we initialitze have overlapped.hEvent with a valid event and set its
-lowest order bit to 1 (see MSDN ReadFile and WriteFile description for more info)
-
-We'll create this special event once for each thread and store in thread local
-storage.
-*/
-
-
-/***********************************************************************//**
-Initialize tls index.for event handle used for synchronized IO on files that
-might be opened with FILE_FLAG_OVERLAPPED.
-*/
-static void win_init_syncio_event()
-{
- tls_sync_io = TlsAlloc();
- ut_a(tls_sync_io != TLS_OUT_OF_INDEXES);
-}
-
-/***********************************************************************//**
-Retrieve per-thread event for doing synchronous io on asyncronously opened files
-*/
-static HANDLE win_get_syncio_event()
-{
- HANDLE h;
- if(tls_sync_io == TLS_OUT_OF_INDEXES){
- win_init_syncio_event();
- }
-
- h = (HANDLE)TlsGetValue(tls_sync_io);
- if (h)
- return h;
- h = CreateEventA(NULL, FALSE, FALSE, NULL);
- ut_a(h);
- h = (HANDLE)((uintptr_t)h | 1);
- TlsSetValue(tls_sync_io, h);
- return h;
-}
-
-/*
- TLS destructor, inspired by Chromium code
- http://src.chromium.org/svn/trunk/src/base/threading/thread_local_storage_win.cc
-*/
-
-static void win_free_syncio_event()
-{
- HANDLE h = win_get_syncio_event();
- if (h) {
- CloseHandle(h);
- }
-}
-
-static void NTAPI win_tls_thread_exit(PVOID module, DWORD reason, PVOID reserved) {
- if (DLL_THREAD_DETACH == reason || DLL_PROCESS_DETACH == reason)
- win_free_syncio_event();
-}
-
-extern "C" {
-#ifdef _WIN64
-#pragma comment(linker, "/INCLUDE:_tls_used")
-#pragma comment(linker, "/INCLUDE:p_thread_callback_base")
-#pragma const_seg(".CRT$XLB")
-extern const PIMAGE_TLS_CALLBACK p_thread_callback_base;
-const PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit;
-#pragma data_seg()
-#else
-#pragma comment(linker, "/INCLUDE:__tls_used")
-#pragma comment(linker, "/INCLUDE:_p_thread_callback_base")
-#pragma data_seg(".CRT$XLB")
-PIMAGE_TLS_CALLBACK p_thread_callback_base = win_tls_thread_exit;
-#pragma data_seg()
-#endif
-}
-#endif /*_WIN32 */
-
-/***********************************************************************//**
-For an EINVAL I/O error, prints a diagnostic message if innodb_flush_method
-== ALL_O_DIRECT.
-@return true if the diagnostic message was printed
-@return false if the diagnostic message does not apply */
-static
-bool
-os_diagnose_all_o_direct_einval(
-/*============================*/
- ulint err) /*!< in: C error code */
-{
- if ((err == EINVAL)
- && (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "The error might be caused by redo log I/O not "
- "satisfying innodb_flush_method=ALL_O_DIRECT "
- "requirements by the underlying file system.");
- if (srv_log_block_size != 512)
- ib_logf(IB_LOG_LEVEL_INFO,
- "This might be caused by an incompatible "
- "non-default innodb_log_block_size value %lu.",
- srv_log_block_size);
- ib_logf(IB_LOG_LEVEL_INFO,
- "Please file a bug at https://bugs.percona.com and "
- "include this error message, my.cnf settings, and "
- "information about the file system where the redo log "
- "resides.");
- ib_logf(IB_LOG_LEVEL_INFO,
- "A possible workaround is to change "
- "innodb_flush_method value to something else "
- "than ALL_O_DIRECT.");
- return(true);
- }
- return(false);
-}
-
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@return error number, or OS error number + 100 */
-static
-ulint
-os_file_get_last_error_low(
-/*=======================*/
- bool report_all_errors, /*!< in: TRUE if we want an error
- message printed of all errors */
- bool on_error_silent) /*!< in: TRUE then don't print any
- diagnostic to the log */
-{
-#ifdef __WIN__
-
- ulint err = (ulint) GetLastError();
- if (err == ERROR_SUCCESS) {
- return(0);
- }
-
- if (report_all_errors
- || (!on_error_silent
- && err != ERROR_DISK_FULL
- && err != ERROR_FILE_EXISTS)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %lu"
- " in a file operation.\n", (ulong) err);
-
- if (err == ERROR_PATH_NOT_FOUND) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
-
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == ERROR_ACCESS_DENIED) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory. It may also be"
- " you have created a subdirectory\n"
- "InnoDB: of the same name as a data file.\n");
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- fprintf(stderr,
- "InnoDB: The error means that another program"
- " is using InnoDB's files.\n"
- "InnoDB: This might be a backup or antivirus"
- " software or another instance\n"
- "InnoDB: of MySQL."
- " Please close it to get rid of this error.\n");
- } else if (err == ERROR_WORKING_SET_QUOTA
- || err == ERROR_NO_SYSTEM_RESOURCES) {
- fprintf(stderr,
- "InnoDB: The error means that there are no"
- " sufficient system resources or quota to"
- " complete the operation.\n");
- } else if (err == ERROR_OPERATION_ABORTED) {
- fprintf(stderr,
- "InnoDB: The error means that the I/O"
- " operation has been aborted\n"
- "InnoDB: because of either a thread exit"
- " or an application request.\n"
- "InnoDB: Retry attempt is made.\n");
- } else if (err == ECANCELED || err == ENOTTY) {
- if (strerror(err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %d"
- " means '%s'.\n",
- err, strerror(err));
- }
-
- if(srv_use_atomic_writes) {
- fprintf(stderr,
- "InnoDB: Error trying to enable atomic writes on "
- "non-supported destination!\n");
- }
- } else {
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN
- "operating-system-error-codes.html\n");
- }
- }
-
- fflush(stderr);
-
- if (err == ERROR_FILE_NOT_FOUND) {
- return(OS_FILE_NOT_FOUND);
- } else if (err == ERROR_DISK_FULL) {
- return(OS_FILE_DISK_FULL);
- } else if (err == ERROR_FILE_EXISTS) {
- return(OS_FILE_ALREADY_EXISTS);
- } else if (err == ERROR_SHARING_VIOLATION
- || err == ERROR_LOCK_VIOLATION) {
- return(OS_FILE_SHARING_VIOLATION);
- } else if (err == ERROR_WORKING_SET_QUOTA
- || err == ERROR_NO_SYSTEM_RESOURCES) {
- return(OS_FILE_INSUFFICIENT_RESOURCE);
- } else if (err == ERROR_OPERATION_ABORTED) {
- return(OS_FILE_OPERATION_ABORTED);
- } else if (err == ERROR_ACCESS_DENIED) {
- return(OS_FILE_ACCESS_VIOLATION);
- } else if (err == ERROR_BUFFER_OVERFLOW) {
- return(OS_FILE_NAME_TOO_LONG);
- } else {
- return(OS_FILE_ERROR_MAX + err);
- }
-#else
- int err = errno;
- if (err == 0) {
- return(0);
- }
-
- if (report_all_errors
- || (err != ENOSPC && err != EEXIST && !on_error_silent)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Operating system error number %d"
- " in a file operation.\n", err);
-
- if (err == ENOENT) {
- fprintf(stderr,
- "InnoDB: The error means the system"
- " cannot find the path specified.\n");
-
- if (srv_is_being_started) {
- fprintf(stderr,
- "InnoDB: If you are installing InnoDB,"
- " remember that you must create\n"
- "InnoDB: directories yourself, InnoDB"
- " does not create them.\n");
- }
- } else if (err == EACCES) {
- fprintf(stderr,
- "InnoDB: The error means mysqld does not have"
- " the access rights to\n"
- "InnoDB: the directory.\n");
- } else if (err == ECANCELED || err == ENOTTY) {
- if (strerror(err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %d"
- " means '%s'.\n",
- err, strerror(err));
- }
-
-
- if(srv_use_atomic_writes) {
- fprintf(stderr,
- "InnoDB: Error trying to enable atomic writes on "
- "non-supported destination!\n");
- }
- } else if (!os_diagnose_all_o_direct_einval(err)) {
- if (strerror(err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %d"
- " means '%s'.\n",
- err, strerror(err));
- }
-
-
- fprintf(stderr,
- "InnoDB: Some operating system"
- " error numbers are described at\n"
- "InnoDB: "
- REFMAN
- "operating-system-error-codes.html\n");
- }
- }
-
- fflush(stderr);
-
- switch (err) {
- case ENOSPC:
- return(OS_FILE_DISK_FULL);
- case ENOENT:
- return(OS_FILE_NOT_FOUND);
- case EEXIST:
- return(OS_FILE_ALREADY_EXISTS);
- case ENAMETOOLONG:
- return(OS_FILE_NAME_TOO_LONG);
- case EXDEV:
- case ENOTDIR:
- case EISDIR:
- return(OS_FILE_PATH_ERROR);
- case EAGAIN:
- if (srv_use_native_aio) {
- return(OS_FILE_AIO_RESOURCES_RESERVED);
- }
- break;
- case ECANCELED:
- case ENOTTY:
- return(OS_FILE_OPERATION_NOT_SUPPORTED);
- case EINTR:
- if (srv_use_native_aio) {
- return(OS_FILE_AIO_INTERRUPTED);
- }
- break;
- case EACCES:
- return(OS_FILE_ACCESS_VIOLATION);
- }
- return(OS_FILE_ERROR_MAX + err);
-#endif
-}
-
-/***********************************************************************//**
-Retrieves the last error number if an error occurs in a file io function.
-The number should be retrieved before any other OS calls (because they may
-overwrite the error number). If the number is not known to this program,
-the OS error number + 100 is returned.
-@return error number, or OS error number + 100 */
-UNIV_INTERN
-ulint
-os_file_get_last_error(
-/*===================*/
- bool report_all_errors) /*!< in: TRUE if we want an error
- message printed of all errors */
-{
- return(os_file_get_last_error_low(report_all_errors, false));
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-Conditionally exits (calling exit(3)) based on should_exit value and the
-error type, if should_exit is TRUE then on_error_silent is ignored.
-@return TRUE if we should retry the operation */
-ibool
-os_file_handle_error_cond_exit(
-/*===========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool should_exit, /*!< in: call exit(3) if unknown error
- and this parameter is TRUE */
- ibool on_error_silent,/*!< in: if TRUE then don't print
- any message to the log iff it is
- an unknown non-fatal error */
- const char* file, /*!< in: file name */
- const ulint line) /*!< in: line */
-{
- ulint err;
-
- err = os_file_get_last_error_low(false, on_error_silent);
-
- switch (err) {
- case OS_FILE_DISK_FULL:
- /* We only print a warning about disk full once */
-
- if (os_has_said_disk_full) {
-
- return(FALSE);
- }
-
- /* Disk full error is reported irrespective of the
- on_error_silent setting. */
-
- if (name) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Encountered a problem with"
- " file %s\n", name);
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Disk is full. Try to clean the disk"
- " to free space.\n");
-
- os_has_said_disk_full = TRUE;
-
- fprintf(stderr,
- " InnoDB: at file %s and at line %ld\n", file, line);
-
- fflush(stderr);
-
- ut_error;
- return(FALSE);
-
- case OS_FILE_AIO_RESOURCES_RESERVED:
- case OS_FILE_AIO_INTERRUPTED:
-
- return(TRUE);
-
- case OS_FILE_PATH_ERROR:
- case OS_FILE_ALREADY_EXISTS:
- case OS_FILE_ACCESS_VIOLATION:
-
- return(FALSE);
-
- case OS_FILE_SHARING_VIOLATION:
-
- os_thread_sleep(10000000); /* 10 sec */
- return(TRUE);
-
- case OS_FILE_OPERATION_ABORTED:
- case OS_FILE_INSUFFICIENT_RESOURCE:
-
- os_thread_sleep(100000); /* 100 ms */
- return(TRUE);
-
- default:
-
- /* If it is an operation that can crash on error then it
- is better to ignore on_error_silent and print an error message
- to the log. */
-
- if (should_exit || !on_error_silent) {
- fprintf(stderr,
- " InnoDB: Operation %s to file %s and at line %ld\n",
- operation, file, line);
- }
-
- if (should_exit || !on_error_silent) {
- ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS "
- "error " ULINTPF ".%s", name ? name : "(unknown)",
- operation, err, should_exit
- ? " Cannot continue operation" : "");
- }
-
- if (should_exit) {
- abort();
- }
- }
-
- return(FALSE);
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-static
-ibool
-os_file_handle_error(
-/*=================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- const char* file, /*!< in: file name */
- const ulint line) /*!< in: line */
-{
- /* exit in case of unknown error */
- return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line));
-}
-
-/****************************************************************//**
-Does error handling when a file operation fails.
-@return TRUE if we should retry the operation */
-ibool
-os_file_handle_error_no_exit(
-/*=========================*/
- const char* name, /*!< in: name of a file or NULL */
- const char* operation, /*!< in: operation */
- ibool on_error_silent,/*!< in: if TRUE then don't print
- any message to the log. */
- const char* file, /*!< in: file name */
- const ulint line) /*!< in: line */
-{
- /* don't exit in case of unknown error */
- return(os_file_handle_error_cond_exit(
- name, operation, FALSE, on_error_silent, file, line));
-}
-
-#undef USE_FILE_LOCK
-#define USE_FILE_LOCK
-#if defined(UNIV_HOTBACKUP) || defined(__WIN__)
-/* InnoDB Hot Backup does not lock the data files.
- * On Windows, mandatory locking is used.
- */
-# undef USE_FILE_LOCK
-#endif
-#ifdef USE_FILE_LOCK
-/****************************************************************//**
-Obtain an exclusive lock on a file.
-@return 0 on success */
-static
-int
-os_file_lock(
-/*=========*/
- int fd, /*!< in: file descriptor */
- const char* name) /*!< in: file name */
-{
- struct flock lk;
-
- ut_ad(!srv_read_only_mode);
-
- lk.l_type = F_WRLCK;
- lk.l_whence = SEEK_SET;
- lk.l_start = lk.l_len = 0;
-
- if (fcntl(fd, F_SETLK, &lk) == -1) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to lock %s, error: %d", name, errno);
-
- if (errno == EAGAIN || errno == EACCES) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Check that you do not already have "
- "another mysqld process using the "
- "same InnoDB data or log files.");
- }
-
- return(-1);
- }
-
- return(0);
-}
-#endif /* USE_FILE_LOCK */
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Creates the seek mutexes used in positioned reads and writes. */
-void
-os_io_init_simple(void)
-/*===================*/
-{
- for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
- os_file_seek_mutexes[i] = os_mutex_create();
- }
-#ifdef _WIN32
- win_init_syncio_event();
-#endif
-}
-
-/** Create a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
-parameter (--tmpdir).
-@param[in] path location for creating temporary file
-@return temporary file handle, or NULL on error */
-UNIV_INTERN
-FILE*
-os_file_create_tmpfile(
- const char* path)
-{
- WAIT_ALLOW_WRITES();
-
- FILE* file = NULL;
- int fd = innobase_mysql_tmpfile(path);
-
- ut_ad(!srv_read_only_mode);
-
- if (fd >= 0) {
- file = fdopen(fd, "w+b");
- }
-
- if (!file) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: unable to create temporary file;"
- " errno: %d\n", errno);
- if (fd >= 0) {
- close(fd);
- }
- }
-
- return(file);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-The os_file_opendir() function opens a directory stream corresponding to the
-directory named by the dirname argument. The directory stream is positioned
-at the first entry. In both Unix and Windows we automatically skip the '.'
-and '..' items at the start of the directory listing.
-@return directory stream, NULL if error */
-UNIV_INTERN
-os_file_dir_t
-os_file_opendir(
-/*============*/
- const char* dirname, /*!< in: directory name; it must not
- contain a trailing '\' or '/' */
- ibool error_is_fatal) /*!< in: TRUE if we should treat an
- error as a fatal error; if we try to
- open symlinks then we do not wish a
- fatal error if it happens not to be
- a directory */
-{
- os_file_dir_t dir;
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- char path[OS_FILE_MAX_PATH + 3];
-
- ut_a(strlen(dirname) < OS_FILE_MAX_PATH);
-
- strcpy(path, dirname);
- strcpy(path + strlen(path), "\\*");
-
- /* Note that in Windows opening the 'directory stream' also retrieves
- the first entry in the directory. Since it is '.', that is no problem,
- as we will skip over the '.' and '..' entries anyway. */
-
- lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
- ut_malloc(sizeof(WIN32_FIND_DATA)));
-
- dir = FindFirstFile((LPCTSTR) path, lpFindFileData);
-
- ut_free(lpFindFileData);
-
- if (dir == INVALID_HANDLE_VALUE) {
-
- if (error_is_fatal) {
- os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
- }
-
- return(NULL);
- }
-
- return(dir);
-#else
- dir = opendir(dirname);
-
- if (dir == NULL && error_is_fatal) {
- os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
- }
-
- return(dir);
-#endif /* __WIN__ */
-}
-
-/***********************************************************************//**
-Closes a directory stream.
-@return 0 if success, -1 if failure */
-UNIV_INTERN
-int
-os_file_closedir(
-/*=============*/
- os_file_dir_t dir) /*!< in: directory stream */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = FindClose(dir);
-
- if (!ret) {
- os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
-
- return(-1);
- }
-
- return(0);
-#else
- int ret;
-
- ret = closedir(dir);
-
- if (ret) {
- os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
- }
-
- return(ret);
-#endif /* __WIN__ */
-}
-
-/***********************************************************************//**
-This function returns information of the next file in the directory. We jump
-over the '.' and '..' entries in the directory.
-@return 0 if ok, -1 if error, 1 if at the end of the directory */
-UNIV_INTERN
-int
-os_file_readdir_next_file(
-/*======================*/
- const char* dirname,/*!< in: directory name or path */
- os_file_dir_t dir, /*!< in: directory stream */
- os_file_stat_t* info) /*!< in/out: buffer where the info is returned */
-{
-#ifdef __WIN__
- LPWIN32_FIND_DATA lpFindFileData;
- BOOL ret;
-
- lpFindFileData = static_cast<LPWIN32_FIND_DATA>(
- ut_malloc(sizeof(WIN32_FIND_DATA)));
-next_file:
- ret = FindNextFile(dir, lpFindFileData);
-
- if (ret) {
- ut_a(strlen((char*) lpFindFileData->cFileName)
- < OS_FILE_MAX_PATH);
-
- if (strcmp((char*) lpFindFileData->cFileName, ".") == 0
- || strcmp((char*) lpFindFileData->cFileName, "..") == 0) {
-
- goto next_file;
- }
-
- strcpy(info->name, (char*) lpFindFileData->cFileName);
-
- info->size = (ib_int64_t)(lpFindFileData->nFileSizeLow)
- + (((ib_int64_t)(lpFindFileData->nFileSizeHigh))
- << 32);
-
- if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_REPARSE_POINT) {
- /* TODO: test Windows symlinks */
- /* TODO: MySQL has apparently its own symlink
- implementation in Windows, dbname.sym can
- redirect a database directory:
- REFMAN "windows-symbolic-links.html" */
- info->type = OS_FILE_TYPE_LINK;
- } else if (lpFindFileData->dwFileAttributes
- & FILE_ATTRIBUTE_DIRECTORY) {
- info->type = OS_FILE_TYPE_DIR;
- } else {
- /* It is probably safest to assume that all other
- file types are normal. Better to check them rather
- than blindly skip them. */
-
- info->type = OS_FILE_TYPE_FILE;
- }
- }
-
- ut_free(lpFindFileData);
-
- if (ret) {
- return(0);
- } else if (GetLastError() == ERROR_NO_MORE_FILES) {
-
- return(1);
- } else {
- os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__);
- return(-1);
- }
-#else
- struct dirent* ent;
- char* full_path;
- int ret;
- struct stat statinfo;
-
-next_file:
-
- ent = readdir(dir);
-
- if (ent == NULL) {
-
- return(1);
- }
- ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH);
-
- if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) {
-
- goto next_file;
- }
-
- strcpy(info->name, ent->d_name);
-
- full_path = static_cast<char*>(
- ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10));
-
- sprintf(full_path, "%s/%s", dirname, ent->d_name);
-
- ret = stat(full_path, &statinfo);
-
- if (ret) {
-
- if (errno == ENOENT) {
- /* readdir() returned a file that does not exist,
- it must have been deleted in the meantime. Do what
- would have happened if the file was deleted before
- readdir() - ignore and go to the next entry.
- If this is the last entry then info->name will still
- contain the name of the deleted file when this
- function returns, but this is not an issue since the
- caller shouldn't be looking at info when end of
- directory is returned. */
-
- ut_free(full_path);
-
- goto next_file;
- }
-
- os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__);
-
- ut_free(full_path);
-
- return(-1);
- }
-
- info->size = (ib_int64_t) statinfo.st_size;
-
- if (S_ISDIR(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- info->type = OS_FILE_TYPE_FILE;
- } else {
- info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
- ut_free(full_path);
-
- return(0);
-#endif
-}
-
-/*****************************************************************//**
-This function attempts to create a directory named pathname. The new
-directory gets default permissions. On Unix the permissions are
-(0770 & ~umask). If the directory exists already, nothing is done and
-the call succeeds, unless the fail_if_exists arguments is true.
-If another error occurs, such as a permission error, this does not crash,
-but reports the error and returns FALSE.
-@return TRUE if call succeeds, FALSE on error */
-UNIV_INTERN
-ibool
-os_file_create_directory(
-/*=====================*/
- const char* pathname, /*!< in: directory name as
- null-terminated string */
- ibool fail_if_exists) /*!< in: if TRUE, pre-existing directory
- is treated as an error. */
-{
-#ifdef __WIN__
- BOOL rcode;
-
- rcode = CreateDirectory((LPCTSTR) pathname, NULL);
- if (!(rcode != 0
- || (GetLastError() == ERROR_ALREADY_EXISTS
- && !fail_if_exists))) {
-
- os_file_handle_error_no_exit(
- pathname, "CreateDirectory", FALSE, __FILE__, __LINE__);
-
- return(FALSE);
- }
-
- return(TRUE);
-#else
- int rcode;
- WAIT_ALLOW_WRITES();
-
- rcode = mkdir(pathname, 0770);
-
- if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
- /* failure */
- os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__);
-
- return(FALSE);
- }
-
- return (TRUE);
-#endif /* __WIN__ */
-}
-
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create_simple(), not directly
-this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-os_file_t
-os_file_create_simple_func(
-/*=======================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY or
- OS_FILE_READ_WRITE */
- ibool* success)/*!< out: TRUE if succeed, FALSE if error */
-{
- os_file_t file;
- ibool retry;
-
- *success = FALSE;
-#ifdef __WIN__
- DWORD access;
- DWORD create_flag;
- DWORD attributes = 0;
-
- ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
- ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-
- if (create_mode == OS_FILE_OPEN) {
-
- create_flag = OPEN_EXISTING;
-
- } else if (srv_read_only_mode) {
-
- create_flag = OPEN_EXISTING;
-
- } else if (create_mode == OS_FILE_CREATE) {
-
- create_flag = CREATE_NEW;
-
- } else if (create_mode == OS_FILE_CREATE_PATH) {
-
- ut_a(!srv_read_only_mode);
-
- /* Create subdirs along the path if needed */
- *success = os_file_create_subdirs_if_needed(name);
-
- if (!*success) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to create subdirectories '%s'",
- name);
-
- return((os_file_t) -1);
- }
-
- create_flag = CREATE_NEW;
- create_mode = OS_FILE_CREATE;
-
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
-
- return((os_file_t) -1);
- }
-
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (srv_read_only_mode) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "read only mode set. Unable to "
- "open file '%s' in RW mode, trying RO mode", name);
-
- access = GENERIC_READ;
-
- } else if (access_type == OS_FILE_READ_WRITE
- || access_type == OS_FILE_READ_WRITE_CACHED) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file access type (%lu) for file '%s'",
- access_type, name);
-
- return((os_file_t) -1);
- }
-
- do {
- /* Use default security attributes and no template file. */
-
- file = CreateFile(
- (LPCTSTR) name, access, FILE_SHARE_READ, NULL,
- create_flag, attributes, NULL);
-
- if (file == INVALID_HANDLE_VALUE) {
-
- *success = FALSE;
-
- retry = os_file_handle_error(
- name, create_mode == OS_FILE_OPEN ?
- "open" : "create", __FILE__, __LINE__);
-
- } else {
- *success = TRUE;
- retry = false;
- }
-
- } while (retry);
-
-#else /* __WIN__ */
- int create_flag;
- if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
- WAIT_ALLOW_WRITES();
-
- ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
- ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-
- if (create_mode == OS_FILE_OPEN) {
-
- if (access_type == OS_FILE_READ_ONLY) {
- create_flag = O_RDONLY;
- } else if (srv_read_only_mode) {
- create_flag = O_RDONLY;
- } else {
- create_flag = O_RDWR;
- }
-
- } else if (srv_read_only_mode) {
-
- create_flag = O_RDONLY;
-
- } else if (create_mode == OS_FILE_CREATE) {
-
- create_flag = O_RDWR | O_CREAT | O_EXCL;
-
- } else if (create_mode == OS_FILE_CREATE_PATH) {
-
- /* Create subdirs along the path if needed */
-
- *success = os_file_create_subdirs_if_needed(name);
-
- if (!*success) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to create subdirectories '%s'",
- name);
-
- return((os_file_t) -1);
- }
-
- create_flag = O_RDWR | O_CREAT | O_EXCL;
- create_mode = OS_FILE_CREATE;
- } else {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
-
- return((os_file_t) -1);
- }
-
- do {
- file = ::open(name, create_flag, os_innodb_umask);
-
- if (file == -1) {
- *success = FALSE;
-
- retry = os_file_handle_error(
- name,
- create_mode == OS_FILE_OPEN
- ? "open" : "create", __FILE__, __LINE__);
- } else {
- *success = TRUE;
- retry = false;
- }
-
- } while (retry);
-
-#ifdef USE_FILE_LOCK
- if (!srv_read_only_mode
- && *success
- && (access_type == OS_FILE_READ_WRITE
- || access_type == OS_FILE_READ_WRITE_CACHED)
- && os_file_lock(file, name)) {
-
- *success = FALSE;
- close(file);
- file = -1;
- }
-#endif /* USE_FILE_LOCK */
-
-#endif /* __WIN__ */
-
- return(file);
-}
-
-/** Disable OS I/O caching on the file if the file type and server
-configuration requires it.
-@param file handle to the file
-@param name name of the file, for diagnostics
-@param mode_str operation on the file, for diagnostics
-@param type OS_LOG_FILE or OS_DATA_FILE
-@param access_type if OS_FILE_READ_WRITE_CACHED, then caching will be disabled
-unconditionally, ignored otherwise */
-static
-void
-os_file_set_nocache_if_needed(os_file_t file, const char* name,
- const char *mode_str, ulint type,
- ulint access_type)
-{
- if (srv_read_only_mode || access_type == OS_FILE_READ_WRITE_CACHED) {
- return;
- }
-
- if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT
- || (type == OS_DATA_FILE
- && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT
- || (srv_unix_file_flush_method
- == SRV_UNIX_O_DIRECT_NO_FSYNC))))
- /* Do fsync() on log files when setting O_DIRECT fails.
- See log_io_complete() */
- if (!os_file_set_nocache(file, name, mode_str)
- && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT)
- srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
-}
-
-/****************************************************************//**
-NOTE! Use the corresponding macro
-os_file_create_simple_no_error_handling(), not directly this function!
-A simple function to open or create a file.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-pfs_os_file_t
-os_file_create_simple_no_error_handling_func(
-/*=========================================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint access_type,/*!< in: OS_FILE_READ_ONLY,
- OS_FILE_READ_WRITE,
- OS_FILE_READ_ALLOW_DELETE (used by a backup
- program reading the file), or
- OS_FILE_READ_WRITE_CACHED (disable O_DIRECT
- if it would be enabled otherwise) */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes) /*! in: atomic writes table option
- value */
-{
- pfs_os_file_t file;
- atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
-
- *success = FALSE;
-#ifdef __WIN__
- DWORD access;
- DWORD create_flag;
- DWORD attributes = 0;
- DWORD share_mode = FILE_SHARE_READ;
- ut_a(name);
-
- ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
- ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-
- if (create_mode == OS_FILE_OPEN) {
- create_flag = OPEN_EXISTING;
- } else if (srv_read_only_mode) {
- create_flag = OPEN_EXISTING;
- } else if (create_mode == OS_FILE_CREATE) {
- create_flag = CREATE_NEW;
- } else {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
- file = INVALID_HANDLE_VALUE;
- return(file);
- }
-
- if (access_type == OS_FILE_READ_ONLY) {
- access = GENERIC_READ;
- } else if (srv_read_only_mode) {
- access = GENERIC_READ;
- } else if (access_type == OS_FILE_READ_WRITE
- || access_type == OS_FILE_READ_WRITE_CACHED) {
- access = GENERIC_READ | GENERIC_WRITE;
- } else if (access_type == OS_FILE_READ_ALLOW_DELETE) {
-
- ut_a(!srv_read_only_mode);
-
- access = GENERIC_READ;
-
- /*!< A backup program has to give mysqld the maximum
- freedom to do what it likes with the file */
-
- share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE;
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file access type (%lu) for file '%s'",
- access_type, name);
- file = INVALID_HANDLE_VALUE;
- return(file);
- }
-
- if (IS_XTRABACKUP()) {
- share_mode |= FILE_SHARE_DELETE | FILE_SHARE_WRITE;
- }
-
- file = CreateFile((LPCTSTR) name,
- access,
- share_mode,
- NULL, // Security attributes
- create_flag,
- attributes,
- NULL); // No template file
-
- /* If we have proper file handle and atomic writes should be used,
- try to set atomic writes and if that fails when creating a new
- table, produce a error. If atomic writes are used on existing
- file, ignore error and use traditional writes for that file */
- if (file != INVALID_HANDLE_VALUE
- && (awrites == ATOMIC_WRITES_ON ||
- (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
- && !os_file_set_atomic_writes(name, file)) {
- if (create_mode == OS_FILE_CREATE) {
- fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
- CloseHandle(file);
- os_file_delete_if_exists_func(name);
- *success = FALSE;
- file = INVALID_HANDLE_VALUE;
- }
- }
-
- *success = file != INVALID_HANDLE_VALUE;
-#else /* __WIN__ */
- int create_flag;
- const char* mode_str = NULL;
- ut_a(name);
- if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
- WAIT_ALLOW_WRITES();
-
- ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT));
- ut_a(!(create_mode & OS_FILE_ON_ERROR_NO_EXIT));
-
- if (create_mode == OS_FILE_OPEN) {
-
- mode_str = "OPEN";
-
- if (access_type == OS_FILE_READ_ONLY) {
-
- create_flag = O_RDONLY;
-
- } else if (srv_read_only_mode) {
-
- create_flag = O_RDONLY;
-
- } else {
-
- ut_a(access_type == OS_FILE_READ_WRITE
- || access_type == OS_FILE_READ_ALLOW_DELETE
- || access_type == OS_FILE_READ_WRITE_CACHED);
-
- create_flag = O_RDWR;
- }
-
- } else if (srv_read_only_mode) {
-
- mode_str = "OPEN";
-
- create_flag = O_RDONLY;
-
- } else if (create_mode == OS_FILE_CREATE) {
-
- mode_str = "CREATE";
-
- create_flag = O_RDWR | O_CREAT | O_EXCL;
-
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
- file = -1;
- return(file);
- }
-
- file = open(name, create_flag, os_innodb_umask);
-
- *success = file != -1;
-
- /* This function is always called for data files, we should disable
- OS caching (O_DIRECT) here as we do in os_file_create_func(), so
- we open the same file in the same mode, see man page of open(2). */
- if (*success) {
- os_file_set_nocache_if_needed(file, name, mode_str,
- OS_DATA_FILE, access_type);
- }
-
-#ifdef USE_FILE_LOCK
- if (!srv_read_only_mode
- && *success
- && (access_type == OS_FILE_READ_WRITE
- || access_type == OS_FILE_READ_WRITE_CACHED)
- && os_file_lock(file, name)) {
-
- *success = FALSE;
- close(file);
- file = -1;
-
- }
-#endif /* USE_FILE_LOCK */
-
- /* If we have proper file handle and atomic writes should be used,
- try to set atomic writes and if that fails when creating a new
- table, produce a error. If atomic writes are used on existing
- file, ignore error and use traditional writes for that file */
- if (file != -1
- && (awrites == ATOMIC_WRITES_ON ||
- (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
- && !os_file_set_atomic_writes(name, file)) {
- if (create_mode == OS_FILE_CREATE) {
- fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
- close(file);
- os_file_delete_if_exists_func(name);
- *success = FALSE;
- file = -1;
- }
- }
-
-#endif /* __WIN__ */
-
- return(file);
-}
-
-/****************************************************************//**
-Tries to disable OS caching on an opened file descriptor.
-@return TRUE if operation is success and FALSE otherwise */
-UNIV_INTERN
-bool
-os_file_set_nocache(
-/*================*/
- os_file_t fd /*!< in: file descriptor to alter */
- MY_ATTRIBUTE((unused)),
- const char* file_name /*!< in: used in the diagnostic
- message */
- MY_ATTRIBUTE((unused)),
- const char* operation_name MY_ATTRIBUTE((unused)))
- /*!< in: "open" or "create"; used
- in the diagnostic message */
-{
- /* some versions of Solaris may not have DIRECTIO_ON */
-#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON)
- if (directio(fd, DIRECTIO_ON) == -1) {
- int errno_save = errno;
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failed to set DIRECTIO_ON on file %s: %s: %s, "
- "continuing anyway.",
- file_name, operation_name, strerror(errno_save));
- return false;
- }
-#elif defined(O_DIRECT)
- if (fcntl(fd, F_SETFL, O_DIRECT) == -1) {
- int errno_save = errno;
- static bool warning_message_printed = false;
- if (errno_save == EINVAL) {
- if (!warning_message_printed) {
- warning_message_printed = true;
-# ifdef UNIV_LINUX
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set O_DIRECT on file "
- "%s: %s: %s, continuing anyway. "
- "O_DIRECT is known to result "
- "in 'Invalid argument' on Linux on "
- "tmpfs, see MySQL Bug#26662.",
- file_name, operation_name,
- strerror(errno_save));
-# else /* UNIV_LINUX */
- goto short_warning;
-# endif /* UNIV_LINUX */
- }
- } else {
-# ifndef UNIV_LINUX
-short_warning:
-# endif
- ib_logf(IB_LOG_LEVEL_WARN,
- "Failed to set O_DIRECT on file %s: %s: %s, "
- "continuing anyway.",
- file_name, operation_name, strerror(errno_save));
- }
- return false;
- }
-#endif /* defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) */
- return true;
-}
-
-
-/****************************************************************//**
-Tries to enable the atomic write feature, if available, for the specified file
-handle.
-@return TRUE if success */
-static MY_ATTRIBUTE((warn_unused_result))
-ibool
-os_file_set_atomic_writes(
-/*======================*/
- const char* name /*!< in: name of the file */
- MY_ATTRIBUTE((unused)),
- os_file_t file /*!< in: handle to the file */
- MY_ATTRIBUTE((unused)))
-
-{
-#ifdef DFS_IOCTL_ATOMIC_WRITE_SET
- int atomic_option = 1;
-
- if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
-
- fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on "
- "file %s on non-supported platform!\n", name);
- os_file_handle_error_no_exit(name, "ioctl(DFS_IOCTL_ATOMIC_WRITE_SET)", FALSE, __FILE__, __LINE__);
- return(FALSE);
- }
-
- return(TRUE);
-#else
- fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
- "file %s on non-supported platform!\n", name);
- return(FALSE);
-#endif
-}
-
-/****************************************************************//**
-NOTE! Use the corresponding macro os_file_create(), not directly
-this function!
-Opens an existing file or creates a new.
-@return own: handle to the file, not defined if error, error number
-can be retrieved with os_file_get_last_error */
-UNIV_INTERN
-pfs_os_file_t
-os_file_create_func(
-/*================*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- ulint create_mode,/*!< in: create mode */
- ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
- non-buffered i/o is desired,
- OS_FILE_NORMAL, if any normal file;
- NOTE that it also depends on type, os_aio_..
- and srv_.. variables whether we really use
- async i/o or unbuffered i/o: look in the
- function source code for the exact rules */
- ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
- ibool* success,/*!< out: TRUE if succeed, FALSE if error */
- ulint atomic_writes) /*! in: atomic writes table option
- value */
-{
- pfs_os_file_t file;
- ibool retry;
- ibool on_error_no_exit;
- ibool on_error_silent;
- atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
-
-#ifdef __WIN__
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_disk_full",
- *success = FALSE;
- SetLastError(ERROR_DISK_FULL);
- file = INVALID_HANDLE_VALUE;
- return(file);
- );
-#else /* __WIN__ */
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_disk_full",
- *success = FALSE;
- errno = ENOSPC;
- file = -1;
- return(file);
- );
-#endif /* __WIN__ */
-
-#ifdef __WIN__
- DWORD create_flag;
- DWORD share_mode = FILE_SHARE_READ;
-
- on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
- ? TRUE : FALSE;
-
- on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
- ? TRUE : FALSE;
-
- create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
- create_mode &= ~OS_FILE_ON_ERROR_SILENT;
- if (srv_backup_mode){
- /* Permit others to write, while I'm reading. */
- share_mode |= FILE_SHARE_WRITE;
- }
- if (create_mode == OS_FILE_OPEN_RAW) {
-
- ut_a(!srv_read_only_mode);
-
- create_flag = OPEN_EXISTING;
-
- /* On Windows Physical devices require admin privileges and
- have to have the write-share mode set. See the remarks
- section for the CreateFile() function documentation in MSDN. */
-
- share_mode |= FILE_SHARE_WRITE;
-
- } else if (create_mode == OS_FILE_OPEN
- || create_mode == OS_FILE_OPEN_RETRY) {
-
- create_flag = OPEN_EXISTING;
-
- } else if (srv_read_only_mode) {
-
- create_flag = OPEN_EXISTING;
-
- } else if (create_mode == OS_FILE_CREATE) {
-
- create_flag = CREATE_NEW;
-
- } else if (create_mode == OS_FILE_OVERWRITE) {
-
- create_flag = CREATE_ALWAYS;
-
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
-
- file = INVALID_HANDLE_VALUE;
- return(file);
- }
-
- DWORD attributes = 0;
-
-#ifdef UNIV_HOTBACKUP
- attributes |= FILE_FLAG_NO_BUFFERING;
-#else
- if (purpose == OS_FILE_AIO) {
-#ifdef WIN_ASYNC_IO
- /* If specified, use asynchronous (overlapped) io and no
- buffering of writes in the OS */
-
- if (srv_use_native_aio) {
- attributes |= FILE_FLAG_OVERLAPPED;
- }
-#endif /* WIN_ASYNC_IO */
-
- } else if (purpose == OS_FILE_NORMAL) {
- /* Use default setting. */
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown purpose flag (%lu) while opening file '%s'",
- purpose, name);
- file = INVALID_HANDLE_VALUE;
- return(file);
- }
-
-#ifdef UNIV_NON_BUFFERED_IO
- // TODO: Create a bug, this looks wrong. The flush log
- // parameter is dynamic.
- if (type == OS_LOG_FILE && thd_flush_log_at_trx_commit(NULL) == 2) {
-
- /* Do not use unbuffered i/o for the log files because
- value 2 denotes that we do not flush the log at every
- commit, but only once per second */
-
- } else if (srv_win_file_flush_method == SRV_WIN_IO_UNBUFFERED) {
-
- attributes |= FILE_FLAG_NO_BUFFERING;
- }
-#endif /* UNIV_NON_BUFFERED_IO */
-
-#endif /* UNIV_HOTBACKUP */
- DWORD access = GENERIC_READ;
-
- if (!srv_read_only_mode) {
- access |= GENERIC_WRITE;
- }
-
- if (type == OS_LOG_FILE) {
- if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
- /* Map O_DSYNC to WRITE_THROUGH */
- attributes |= FILE_FLAG_WRITE_THROUGH;
- } else if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) {
- /* Open log file without buffering */
- attributes |= FILE_FLAG_NO_BUFFERING;
- }
- }
-
- do {
- /* Use default security attributes and no template file. */
- file = CreateFile(
- (LPCTSTR) name, access, share_mode, NULL,
- create_flag, attributes, NULL);
-
- if (file == INVALID_HANDLE_VALUE) {
- const char* operation;
-
- operation = (create_mode == OS_FILE_CREATE
- && !srv_read_only_mode)
- ? "create" : "open";
-
- *success = FALSE;
-
- if (on_error_no_exit) {
- retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent, __FILE__, __LINE__);
- } else {
- retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
- }
- } else {
- *success = TRUE;
- retry = FALSE;
- if (srv_use_native_aio && ((attributes & FILE_FLAG_OVERLAPPED) != 0)) {
- ut_a(CreateIoCompletionPort(file, completion_port, 0, 0));
- }
- }
-
- } while (retry);
-
- /* If we have proper file handle and atomic writes should be used,
- try to set atomic writes and if that fails when creating a new
- table, produce a error. If atomic writes are used on existing
- file, ignore error and use traditional writes for that file */
- if (file != INVALID_HANDLE_VALUE && type == OS_DATA_FILE
- && (awrites == ATOMIC_WRITES_ON ||
- (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
- && !os_file_set_atomic_writes(name, file)) {
- if (create_mode == OS_FILE_CREATE) {
- fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
- CloseHandle(file);
- os_file_delete_if_exists_func(name);
- *success = FALSE;
- file = INVALID_HANDLE_VALUE;
- }
- }
-
-#else /* __WIN__ */
- int create_flag;
- const char* mode_str = NULL;
- if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW)
- WAIT_ALLOW_WRITES();
-
- on_error_no_exit = create_mode & OS_FILE_ON_ERROR_NO_EXIT
- ? TRUE : FALSE;
- on_error_silent = create_mode & OS_FILE_ON_ERROR_SILENT
- ? TRUE : FALSE;
-
- create_mode &= ~OS_FILE_ON_ERROR_NO_EXIT;
- create_mode &= ~OS_FILE_ON_ERROR_SILENT;
-
- if (create_mode == OS_FILE_OPEN
- || create_mode == OS_FILE_OPEN_RAW
- || create_mode == OS_FILE_OPEN_RETRY) {
-
- mode_str = "OPEN";
-
- create_flag = srv_read_only_mode ? O_RDONLY : O_RDWR;
-
- } else if (srv_read_only_mode) {
-
- mode_str = "OPEN";
-
- create_flag = O_RDONLY;
-
- } else if (create_mode == OS_FILE_CREATE) {
-
- mode_str = "CREATE";
- create_flag = O_RDWR | O_CREAT | O_EXCL;
-
- } else if (create_mode == OS_FILE_OVERWRITE) {
-
- mode_str = "OVERWRITE";
- create_flag = O_RDWR | O_CREAT | O_TRUNC;
-
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unknown file create mode (%lu) for file '%s'",
- create_mode, name);
-
- file = -1;
- return(file);
- }
-
- ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE);
- ut_a(purpose == OS_FILE_AIO || purpose == OS_FILE_NORMAL);
-
-#ifdef O_SYNC
- /* We let O_SYNC only affect log files; note that we map O_DSYNC to
- O_SYNC because the datasync options seemed to corrupt files in 2001
- in both Linux and Solaris */
-
- if (!srv_read_only_mode
- && type == OS_LOG_FILE
- && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
-
- create_flag |= O_SYNC;
- }
-#endif /* O_SYNC */
-
- do {
- file = open(name, create_flag, os_innodb_umask);
-
- if (file == -1) {
- const char* operation;
-
- operation = (create_mode == OS_FILE_CREATE
- && !srv_read_only_mode)
- ? "create" : "open";
-
- *success = FALSE;
-
- if (on_error_no_exit) {
- retry = os_file_handle_error_no_exit(
- name, operation, on_error_silent, __FILE__, __LINE__);
- } else {
- retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
- }
- } else {
- *success = TRUE;
- retry = false;
- }
-
- } while (retry);
-
- /* We disable OS caching (O_DIRECT) only on data files */
-
- if (*success) {
- os_file_set_nocache_if_needed(file, name, mode_str, type, 0);
- }
-
-#ifdef USE_FILE_LOCK
- if (!srv_read_only_mode
- && *success
- && create_mode != OS_FILE_OPEN_RAW
- && os_file_lock(file, name)) {
-
- if (create_mode == OS_FILE_OPEN_RETRY) {
-
- ut_a(!srv_read_only_mode);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Retrying to lock the first data file");
-
- for (int i = 0; i < 100; i++) {
- os_thread_sleep(1000000);
-
- if (!os_file_lock(file, name)) {
- *success = TRUE;
- return(file);
- }
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Unable to open the first data file");
- }
-
- *success = FALSE;
- close(file);
- file = -1;
- }
-#endif /* USE_FILE_LOCK */
-
- /* If we have proper file handle and atomic writes should be used,
- try to set atomic writes and if that fails when creating a new
- table, produce a error. If atomic writes are used on existing
- file, ignore error and use traditional writes for that file */
- if (file != -1 && type == OS_DATA_FILE
- && (awrites == ATOMIC_WRITES_ON ||
- (srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
- && !os_file_set_atomic_writes(name, file)) {
- if (create_mode == OS_FILE_CREATE) {
- fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
- close(file);
- os_file_delete_if_exists_func(name);
- *success = FALSE;
- file = -1;
- }
- }
-
-
-#endif /* __WIN__ */
-
- return(file);
-}
-
-/***********************************************************************//**
-Deletes a file if it exists. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_if_exists_func(
-/*==========================*/
- const char* name) /*!< in: file path as a null-terminated
- string */
-{
-#ifdef __WIN__
- bool ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if mysqlbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR) name);
-
- if (ret) {
- return(true);
- }
-
- DWORD lasterr = GetLastError();
- if (lasterr == ERROR_FILE_NOT_FOUND
- || lasterr == ERROR_PATH_NOT_FOUND) {
- /* the file does not exist, this not an error */
-
- return(true);
- }
-
- count++;
-
- if (count > 100 && 0 == (count % 10)) {
- os_file_get_last_error(true); /* print error information */
-
- ib_logf(IB_LOG_LEVEL_WARN, "Delete of file %s failed.", name);
- }
-
- os_thread_sleep(500000); /* sleep for 0.5 second */
-
- if (count > 2000) {
-
- return(false);
- }
-
- goto loop;
-#else
- int ret;
- WAIT_ALLOW_WRITES();
-
- ret = unlink(name);
-
- if (ret != 0 && errno != ENOENT) {
- os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
-
- return(false);
- }
-
- return(true);
-#endif /* __WIN__ */
-}
-
-/***********************************************************************//**
-Deletes a file. The file has to be closed before calling this.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_delete_func(
-/*================*/
- const char* name) /*!< in: file path as a null-terminated
- string */
-{
-#ifdef __WIN__
- BOOL ret;
- ulint count = 0;
-loop:
- /* In Windows, deleting an .ibd file may fail if mysqlbackup is copying
- it */
-
- ret = DeleteFile((LPCTSTR) name);
-
- if (ret) {
- return(true);
- }
-
- if (GetLastError() == ERROR_FILE_NOT_FOUND) {
- /* If the file does not exist, we classify this as a 'mild'
- error and return */
-
- return(false);
- }
-
- count++;
-
- if (count > 100 && 0 == (count % 10)) {
- os_file_get_last_error(true); /* print error information */
-
- fprintf(stderr,
- "InnoDB: Warning: cannot delete file %s\n"
- "InnoDB: Are you running mysqlbackup"
- " to back up the file?\n", name);
- }
-
- os_thread_sleep(1000000); /* sleep for a second */
-
- if (count > 2000) {
-
- return(false);
- }
-
- goto loop;
-#else
- int ret;
- WAIT_ALLOW_WRITES();
-
- ret = unlink(name);
-
- if (ret != 0) {
- os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
-
- return(false);
- }
-
- return(true);
-#endif
-}
-
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_rename(), not directly this function!
-Renames a file (can also move it to another directory). It is safest that the
-file is closed before calling this function.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_rename_func(
-/*================*/
- const char* oldpath,/*!< in: old file path as a null-terminated
- string */
- const char* newpath)/*!< in: new file path */
-{
-#ifdef UNIV_DEBUG
- os_file_type_t type;
- ibool exists;
-
- /* New path must not exist. */
- ut_ad(os_file_status(newpath, &exists, &type));
- ut_ad(!exists);
-
- /* Old path must exist. */
- ut_ad(os_file_status(oldpath, &exists, &type));
- ut_ad(exists);
-#endif /* UNIV_DEBUG */
-
-#ifdef __WIN__
- BOOL ret;
-
- ret = MoveFileEx((LPCTSTR)oldpath, (LPCTSTR)newpath, MOVEFILE_REPLACE_EXISTING);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
-
- return(FALSE);
-#else
- int ret;
- WAIT_ALLOW_WRITES();
-
- ret = rename(oldpath, newpath);
-
- if (ret != 0) {
- os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif /* __WIN__ */
-}
-
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_close(), not directly this function!
-Closes a file handle. In case of error, error number can be retrieved with
-os_file_get_last_error.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_close_func(
-/*===============*/
- os_file_t file) /*!< in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = CloseHandle(file);
-
- if (ret) {
- return(TRUE);
- }
-
- os_file_handle_error(NULL, "close", __FILE__, __LINE__);
-
- return(FALSE);
-#else
- int ret;
-
- ret = close(file);
-
- if (ret == -1) {
- os_file_handle_error(NULL, "close", __FILE__, __LINE__);
-
- return(FALSE);
- }
-
- return(TRUE);
-#endif /* __WIN__ */
-}
-
-/***********************************************************************//**
-Closes a file handle.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_close_no_error_handling_func(
-/*============================*/
- os_file_t file) /*!< in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- ret = CloseHandle(file);
-
- if (ret) {
- return(true);
- }
-
- return(false);
-#else
- int ret;
-
- ret = close(file);
-
- if (ret == -1) {
-
- return(false);
- }
-
- return(true);
-#endif /* __WIN__ */
-}
-
-#ifdef HAVE_POSIX_FALLOCATE
-/***********************************************************************//**
-Ensures that disk space is allocated for the file.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_allocate_func(
- os_file_t file, /*!< in, own: handle to a file */
- os_offset_t offset, /*!< in: file region offset */
- os_offset_t len) /*!< in: file region length */
-{
- return(posix_fallocate(file, offset, len) == 0);
-}
-#endif
-
-/***********************************************************************//**
-Checks if the file is marked as invalid.
-@return TRUE if invalid */
-UNIV_INTERN
-bool
-os_file_is_invalid(
- pfs_os_file_t file) /*!< in, own: handle to a file */
-{
- return(file == os_file_invalid);
-}
-
-/***********************************************************************//**
-Marks the file as invalid. */
-UNIV_INTERN
-void
-os_file_mark_invalid(
- pfs_os_file_t* file) /*!< out: pointer to a handle to a file */
-{
- file->m_file = os_file_invalid;
-}
-
-/***********************************************************************//**
-Announces an intention to access file data in a specific pattern in the
-future.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_advise(
- pfs_os_file_t file, /*!< in, own: handle to a file */
- os_offset_t offset, /*!< in: file region offset */
- os_offset_t len, /*!< in: file region length */
- ulint advice)/*!< in: advice for access pattern */
-{
-#ifdef __WIN__
- return(true);
-#else
-#ifdef UNIV_LINUX
- int native_advice = 0;
- if ((advice & OS_FILE_ADVISE_NORMAL) != 0)
- native_advice |= POSIX_FADV_NORMAL;
- if ((advice & OS_FILE_ADVISE_RANDOM) != 0)
- native_advice |= POSIX_FADV_RANDOM;
- if ((advice & OS_FILE_ADVISE_SEQUENTIAL) != 0)
- native_advice |= POSIX_FADV_SEQUENTIAL;
- if ((advice & OS_FILE_ADVISE_WILLNEED) != 0)
- native_advice |= POSIX_FADV_WILLNEED;
- if ((advice & OS_FILE_ADVISE_DONTNEED) != 0)
- native_advice |= POSIX_FADV_DONTNEED;
- if ((advice & OS_FILE_ADVISE_NOREUSE) != 0)
- native_advice |= POSIX_FADV_NOREUSE;
-
- return(posix_fadvise(file, offset, len, native_advice) == 0);
-#else
- return(true);
-#endif
-#endif /* __WIN__ */
-}
-
-/***********************************************************************//**
-Gets a file size.
-@return file size, or (os_offset_t) -1 on failure */
-UNIV_INTERN
-os_offset_t
-os_file_get_size(
-/*=============*/
- pfs_os_file_t file) /*!< in: handle to a file */
-{
-#ifdef __WIN__
- os_offset_t offset;
- DWORD high;
- DWORD low;
-
- low = GetFileSize(file, &high);
-
- if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) {
- return((os_offset_t) -1);
- }
-
- offset = (os_offset_t) low | ((os_offset_t) high << 32);
-
- return(offset);
-#else
- return((os_offset_t) lseek(file, 0, SEEK_END));
-
-#endif /* __WIN__ */
-}
-
-/** Set the size of a newly created file.
-@param[in] name file name
-@param[in] file file handle
-@param[in] size desired file size
-@param[in] sparse whether to create a sparse file (no preallocating)
-@return whether the operation succeeded */
-UNIV_INTERN
-bool
-os_file_set_size(
- const char* name,
- pfs_os_file_t file,
- os_offset_t size,
- bool is_sparse)
-{
-#ifdef _WIN32
- FILE_END_OF_FILE_INFO feof;
- feof.EndOfFile.QuadPart = size;
- bool success = SetFileInformationByHandle(file,
- FileEndOfFileInfo,
- &feof, sizeof feof);
- if (!success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "os_file_set_size() of file %s"
- " to " INT64PF " bytes failed with %u",
- name, size, GetLastError());
- }
- return(success);
-#else
- if (is_sparse) {
- bool success = !ftruncate(file, size);
- if (!success) {
- ib_logf(IB_LOG_LEVEL_ERROR, "ftruncate of file %s"
- " to " INT64PF " bytes failed with error %d",
- name, size, errno);
- }
- return(success);
- }
-
-# ifdef HAVE_POSIX_FALLOCATE
- if (srv_use_posix_fallocate) {
- int err;
- do {
- err = posix_fallocate(file, 0, size);
- } while (err == EINTR
- && srv_shutdown_state == SRV_SHUTDOWN_NONE);
-
- if (err) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "preallocating " INT64PF " bytes for"
- "file %s failed with error %d",
- size, name, err);
- }
- return(!err);
- }
-# endif
-
- /* Write up to 1 megabyte at a time. */
- ulint buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
- * UNIV_PAGE_SIZE;
- os_offset_t current_size = 0;
-
- byte* buf2 = static_cast<byte*>(calloc(1, buf_size + UNIV_PAGE_SIZE));
-
- if (!buf2) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot allocate " ULINTPF " bytes to extend file\n",
- buf_size + UNIV_PAGE_SIZE);
- return(false);
- }
-
- /* Align the buffer for possible raw i/o */
- byte* buf = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
- bool ret;
-
- do {
- ulint n_bytes;
-
- if (size - current_size < (os_offset_t) buf_size) {
- n_bytes = (ulint) (size - current_size);
- } else {
- n_bytes = buf_size;
- }
-
- ret = os_file_write(name, file, buf, current_size, n_bytes);
-
- if (!ret) {
- break;
- }
-
- current_size += n_bytes;
- } while (current_size < size);
-
- free(buf2);
-
- return(ret && os_file_flush(file));
-#endif
-}
-
-/***********************************************************************//**
-Truncates a file at its current position.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_set_eof(
-/*============*/
- FILE* file) /*!< in: file to be truncated */
-{
-#ifdef __WIN__
- HANDLE h = (HANDLE) _get_osfhandle(fileno(file));
- return(SetEndOfFile(h));
-#else /* __WIN__ */
- WAIT_ALLOW_WRITES();
- return(!ftruncate(fileno(file), ftell(file)));
-#endif /* __WIN__ */
-}
-
-/***********************************************************************//**
-Truncates a file at the specified position.
-@return TRUE if success */
-UNIV_INTERN
-bool
-os_file_set_eof_at_func(
- os_file_t file, /*!< in: handle to a file */
- ib_uint64_t new_len)/*!< in: new file length */
-{
-#ifdef __WIN__
- LARGE_INTEGER li, li2;
- li.QuadPart = new_len;
- return(SetFilePointerEx(file, li, &li2,FILE_BEGIN)
- && SetEndOfFile(file));
-#else
- WAIT_ALLOW_WRITES();
- /* TODO: works only with -D_FILE_OFFSET_BITS=64 ? */
- return(!ftruncate(file, new_len));
-#endif
-}
-
-
-#ifndef __WIN__
-/***********************************************************************//**
-Wrapper to fsync(2) that retries the call on some errors.
-Returns the value 0 if successful; otherwise the value -1 is returned and
-the global variable errno is set to indicate the error.
-@return 0 if success, -1 otherwise */
-
-static
-int
-os_file_fsync(
-/*==========*/
- os_file_t file) /*!< in: handle to a file */
-{
- int ret;
- int failures;
- ibool retry;
-
- failures = 0;
-
- do {
- ret = fsync(file);
-
- os_n_fsyncs++;
-
- if (ret == -1 && errno == ENOLCK) {
-
- if (failures % 100 == 0) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: fsync(): "
- "No locks available; retrying\n");
- }
-
- os_thread_sleep(200000 /* 0.2 sec */);
-
- failures++;
-
- retry = TRUE;
- } else if (ret == -1 && errno == EINTR) {
- /* Handle signal interruptions correctly */
- retry = TRUE;
- } else {
-
- retry = FALSE;
- }
- } while (retry);
-
- return(ret);
-}
-#endif /* !__WIN__ */
-
-/***********************************************************************//**
-NOTE! Use the corresponding macro os_file_flush(), not directly this function!
-Flushes the write buffers of a given file to the disk.
-@return TRUE if success */
-UNIV_INTERN
-ibool
-os_file_flush_func(
-/*===============*/
- os_file_t file) /*!< in, own: handle to a file */
-{
-#ifdef __WIN__
- BOOL ret;
-
- os_n_fsyncs++;
-
- ret = FlushFileBuffers(file);
-
- if (ret) {
- return(TRUE);
- }
-
- /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is
- actually a raw device, we choose to ignore that error if we are using
- raw disks */
-
- if (srv_start_raw_disk_in_use && GetLastError()
- == ERROR_INVALID_FUNCTION) {
- return(TRUE);
- }
-
- os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
-
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
-
- return(FALSE);
-#else
- int ret;
- WAIT_ALLOW_WRITES();
-
-#if defined(HAVE_DARWIN_THREADS)
-# ifndef F_FULLFSYNC
- /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */
-# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */
-# elif F_FULLFSYNC != 51
-# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3"
-# endif
- /* Apple has disabled fsync() for internal disk drives in OS X. That
- caused corruption for a user when he tested a power outage. Let us in
- OS X use a nonstandard flush method recommended by an Apple
- engineer. */
-
- if (!srv_have_fullfsync) {
- /* If we are not on an operating system that supports this,
- then fall back to a plain fsync. */
-
- ret = os_file_fsync(file);
- } else {
- ret = fcntl(file, F_FULLFSYNC, NULL);
-
- if (ret) {
- /* If we are not on a file system that supports this,
- then fall back to a plain fsync. */
- ret = os_file_fsync(file);
- }
- }
-#else
- ret = os_file_fsync(file);
-#endif
-
- if (ret == 0) {
- return(TRUE);
- }
-
- /* Since Linux returns EINVAL if the 'file' is actually a raw device,
- we choose to ignore that error if we are using raw disks */
-
- if (srv_start_raw_disk_in_use && errno == EINVAL) {
-
- return(TRUE);
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
-
- os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
-
- /* It is a fatal error if a file flush does not succeed, because then
- the database can get corrupt on disk */
- ut_error;
-
- return(FALSE);
-#endif
-}
-
-#ifndef __WIN__
-/*******************************************************************//**
-Does a synchronous read operation in Posix.
-@return number of bytes read, -1 if error */
-static MY_ATTRIBUTE((nonnull(2), warn_unused_result))
-ssize_t
-os_file_pread(
-/*==========*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- ulint n, /*!< in: number of bytes to read */
- os_offset_t offset, /*!< in: file offset from where to read */
- trx_t* trx)
-{
- off_t offs;
- ulint sec;
- ulint ms;
- ib_uint64_t start_time;
- ib_uint64_t finish_time;
-
- ut_ad(n);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
- offs = (off_t) offset;
-
- if (sizeof(off_t) <= 4) {
- if (offset != (os_offset_t) offs) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "File read at offset > 4 GB");
- }
- }
-
- os_n_file_reads++;
-
- if (UNIV_UNLIKELY(trx && trx->take_stats))
- {
- trx->io_reads++;
- trx->io_read += n;
- ut_usectime(&sec, &ms);
- start_time = (ib_uint64_t)sec * 1000000 + ms;
- } else {
- start_time = 0;
- }
-
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS);
-#ifdef HAVE_PREAD
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
-
- ssize_t n_bytes;
-
- /* Handle partial reads and signal interruptions correctly */
- for (n_bytes = 0; n_bytes < (ssize_t) n; ) {
- ssize_t n_read = pread(file, buf, (ssize_t)n - n_bytes, offs);
- if (n_read > 0) {
- n_bytes += n_read;
- offs += n_read;
- buf = (char *)buf + n_read;
- } else if (n_read == -1 && errno == EINTR) {
- continue;
- } else {
- break;
- }
- }
-
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
-
- if (UNIV_UNLIKELY(start_time != 0))
- {
- ut_usectime(&sec, &ms);
- finish_time = (ib_uint64_t)sec * 1000000 + ms;
- trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
- }
-
- return(n_bytes);
-#else
- {
- off_t ret_offset;
- ssize_t ret;
- ssize_t n_read;
-#ifndef UNIV_HOTBACKUP
- ulint i;
-#endif /* !UNIV_HOTBACKUP */
-
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
-#ifndef UNIV_HOTBACKUP
- /* Protect the seek / read operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- ret_offset = lseek(file, offs, SEEK_SET);
-
- if (ret_offset < 0) {
- ret = -1;
- } else {
- /* Handle signal interruptions correctly */
- for (ret = 0; ret < (ssize_t) n; ) {
- n_read = read(file, buf, (ssize_t)n);
- if (n_read > 0) {
- ret += n_read;
- } else if (n_read == -1 && errno == EINTR) {
- continue;
- } else {
- break;
- }
- }
- }
-
-#ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-#endif /* !UNIV_HOTBACKUP */
-
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
-
- if (UNIV_UNLIKELY(start_time != 0)
- {
- ut_usectime(&sec, &ms);
- finish_time = (ib_uint64_t)sec * 1000000 + ms;
- trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
- }
-
- return(ret);
- }
-#endif
-}
-
-/*******************************************************************//**
-Does a synchronous write operation in Posix.
-@return number of bytes written, -1 if error */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ssize_t
-os_file_pwrite(
-/*===========*/
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from where to write */
- ulint n, /*!< in: number of bytes to write */
- os_offset_t offset) /*!< in: file offset where to write */
-{
- ssize_t ret;
- ssize_t n_written;
- off_t offs;
-
- ut_ad(n);
- ut_ad(!srv_read_only_mode);
-
- /* If off_t is > 4 bytes in size, then we assume we can pass a
- 64-bit address */
- offs = (off_t) offset;
-
- if (sizeof(off_t) <= 4) {
- if (offset != (os_offset_t) offs) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "File write at offset > 4 GB.");
- }
- }
-
- os_n_file_writes++;
-
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_WRITES);
-#ifdef HAVE_PWRITE
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
-
- /* Handle partial writes and signal interruptions correctly */
- for (ret = 0; ret < (ssize_t) n; ) {
- n_written = pwrite(file, buf, (ssize_t)n - ret, offs);
- DBUG_EXECUTE_IF("xb_simulate_all_o_direct_write_failure",
- n_written = -1;
- errno = EINVAL;);
- if (n_written >= 0) {
- ret += n_written;
- offs += n_written;
- buf = (char *)buf + n_written;
- } else if (n_written == -1 && errno == EINTR) {
- continue;
- } else {
- break;
- }
- }
-
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
-
- return(ret);
-#else
- {
- off_t ret_offset;
-# ifndef UNIV_HOTBACKUP
- ulint i;
-# endif /* !UNIV_HOTBACKUP */
-
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
-
-# ifndef UNIV_HOTBACKUP
- /* Protect the seek / write operation with a mutex */
- i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES;
-
- os_mutex_enter(os_file_seek_mutexes[i]);
-# endif /* UNIV_HOTBACKUP */
-
- ret_offset = lseek(file, offs, SEEK_SET);
-
- if (ret_offset < 0) {
- ret = -1;
-
- goto func_exit;
- }
-
- /* Handle signal interruptions correctly */
- for (ret = 0; ret < (ssize_t) n; ) {
- n_written = write(file, buf, (ssize_t)n);
- if (n_written > 0) {
- ret += n_written;
- } else if (n_written == -1 && errno == EINTR) {
- continue;
- } else {
- break;
- }
- }
-
-func_exit:
-# ifndef UNIV_HOTBACKUP
- os_mutex_exit(os_file_seek_mutexes[i]);
-# endif /* !UNIV_HOTBACKUP */
-
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
- return(ret);
- }
-#endif /* HAVE_PWRITE */
-}
-#endif
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read(), not directly this
-function!
-Requests a synchronous positioned read operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_func(
-/*==============*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n, /*!< in: number of bytes to read */
- trx_t* trx)
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- ibool retry;
- OVERLAPPED overlapped;
-
-
- /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
- no more than 32 bits. */
- ut_a((n & 0xFFFFFFFFUL) == n);
-
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS);
-
-try_again:
- ut_ad(buf);
- ut_ad(n > 0);
-
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
-
- memset (&overlapped, 0, sizeof (overlapped));
- overlapped.Offset = (DWORD)(offset & 0xFFFFFFFF);
- overlapped.OffsetHigh = (DWORD)(offset >> 32);
- overlapped.hEvent = win_get_syncio_event();
- ret = ReadFile(file, buf, n, NULL, &overlapped);
- if (ret) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
- }
- else if(GetLastError() == ERROR_IO_PENDING) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
- }
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
-
- if (ret && len == n) {
- return(TRUE);
- }
-#else /* __WIN__ */
- ibool retry;
- ssize_t ret;
-
- os_bytes_read_since_printout += n;
-
-try_again:
- ret = os_file_pread(file, buf, n, offset, trx);
-
- DBUG_EXECUTE_IF("xb_simulate_all_o_direct_read_failure",
- ret = -1;
- errno = EINVAL;);
-
- if ((ulint) ret == n) {
- return(TRUE);
- } else if (ret == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error in system call pread(). The operating"
- " system error number is %lu.",(ulint) errno);
- } else {
- /* Partial read occurred */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tried to read " ULINTPF " bytes at offset "
- UINT64PF ". Was only able to read %ld.",
- n, offset, (lint) ret);
- }
-#endif /* __WIN__ */
- retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
-
- if (retry) {
- goto try_again;
- }
-
- fprintf(stderr,
- "InnoDB: Fatal error: cannot read from file."
- " OS error number %lu.\n",
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif /* __WIN__ */
- );
- fflush(stderr);
-
- ut_error;
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_read_no_error_handling(),
-not directly this function!
-Requests a synchronous positioned read operation. This function does not do
-any error handling. In case of error it returns FALSE.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_read_no_error_handling_func(
-/*================================*/
- os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read */
- os_offset_t offset, /*!< in: file offset where to read */
- ulint n) /*!< in: number of bytes to read */
-{
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- ibool retry;
- OVERLAPPED overlapped;
- overlapped.Offset = (DWORD)(offset & 0xFFFFFFFF);
- overlapped.OffsetHigh = (DWORD)(offset >> 32);
-
-
- /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
- no more than 32 bits. */
- ut_a((n & 0xFFFFFFFFUL) == n);
-
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_READS);
-
-try_again:
- ut_ad(buf);
- ut_ad(n > 0);
-
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_READS, monitor);
-
- memset (&overlapped, 0, sizeof (overlapped));
- overlapped.Offset = (DWORD)(offset & 0xFFFFFFFF);
- overlapped.OffsetHigh = (DWORD)(offset >> 32);
- overlapped.hEvent = win_get_syncio_event();
- ret = ReadFile(file, buf, n, NULL, &overlapped);
- if (ret) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
- }
- else if(GetLastError() == ERROR_IO_PENDING) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
- }
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_READS, monitor);
-
- if (ret && len == n) {
- return(TRUE);
- }
-#else /* __WIN__ */
- ibool retry;
- ssize_t ret;
-
- os_bytes_read_since_printout += n;
-
-try_again:
- ret = os_file_pread(file, buf, n, offset, NULL);
-
- if ((ulint) ret == n) {
- return(TRUE);
- } else if (ret == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error in system call pread(). The operating"
- " system error number is %lu.",(ulint) errno);
- } else {
- /* Partial read occurred */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tried to read " ULINTPF " bytes at offset "
- UINT64PF ". Was only able to read %ld.",
- n, offset, (lint) ret);
- }
-#endif /* __WIN__ */
- retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
-
- if (retry) {
- goto try_again;
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Rewind file to its start, read at most size - 1 bytes from it to str, and
-NUL-terminate str. All errors are silently ignored. This function is
-mostly meant to be used with temporary files. */
-UNIV_INTERN
-void
-os_file_read_string(
-/*================*/
- FILE* file, /*!< in: file to read from */
- char* str, /*!< in: buffer where to read */
- ulint size) /*!< in: size of buffer */
-{
- size_t flen;
-
- if (size == 0) {
- return;
- }
-
- rewind(file);
- flen = fread(str, 1, size - 1, file);
- str[flen] = '\0';
-}
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_file_write(), not directly
-this function!
-Requests a synchronous write operation.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-os_file_write_func(
-/*===============*/
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- os_file_t file, /*!< in: handle to a file */
- const void* buf, /*!< in: buffer from which to write */
- os_offset_t offset, /*!< in: file offset where to write */
- ulint n) /*!< in: number of bytes to write */
-{
- ut_ad(!srv_read_only_mode);
-
-#ifdef __WIN__
- BOOL ret;
- DWORD len;
- ulint n_retries = 0;
- ulint err;
- OVERLAPPED overlapped;
- DWORD saved_error = 0;
-
- /* On 64-bit Windows, ulint is 64 bits. But offset and n should be
- no more than 32 bits. */
- ut_a((n & 0xFFFFFFFFUL) == n);
-
- os_n_file_writes++;
-
- ut_ad(buf);
- ut_ad(n > 0);
- const bool monitor = MONITOR_IS_ON(MONITOR_OS_PENDING_WRITES);
-retry:
-
- MONITOR_ATOMIC_INC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
-
- memset (&overlapped, 0, sizeof (overlapped));
- overlapped.Offset = (DWORD)(offset & 0xFFFFFFFF);
- overlapped.OffsetHigh = (DWORD)(offset >> 32);
-
- overlapped.hEvent = win_get_syncio_event();
- ret = WriteFile(file, buf, n, NULL, &overlapped);
- if (ret) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, FALSE);
- }
- else if ( GetLastError() == ERROR_IO_PENDING) {
- ret = GetOverlappedResult(file, &overlapped, (DWORD *)&len, TRUE);
- }
-
- MONITOR_ATOMIC_DEC_LOW(MONITOR_OS_PENDING_WRITES, monitor);
-
- if (ret && len == n) {
-
- return(TRUE);
- }
-
- /* If some background file system backup tool is running, then, at
- least in Windows 2000, we may get here a specific error. Let us
- retry the operation 100 times, with 1 second waits. */
-
- if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) {
-
- os_thread_sleep(1000000);
-
- n_retries++;
-
- goto retry;
- }
-
- if (!os_has_said_disk_full) {
- char *winmsg = NULL;
-
- saved_error = GetLastError();
- err = (ulint) saved_error;
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset %llu.\n"
- "InnoDB: %lu bytes should have been written,"
- " only %lu were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, offset,
- (ulong) n, (ulong) len, (ulong) err);
-
- /* Ask Windows to prepare a standard message for a
- GetLastError() */
-
- FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
- FORMAT_MESSAGE_FROM_SYSTEM |
- FORMAT_MESSAGE_IGNORE_INSERTS,
- NULL, saved_error,
- MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
- (LPSTR)&winmsg, 0, NULL);
-
- if (winmsg) {
- fprintf(stderr,
- "InnoDB: FormatMessage: Error number %lu means '%s'.\n",
- (ulong) saved_error, winmsg);
- LocalFree(winmsg);
- }
-
- if (strerror((int) err) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %lu means '%s'.\n",
- (ulong) err, strerror((int) err));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n");
-
- os_has_said_disk_full = TRUE;
- }
-
- return(FALSE);
-#else
- ssize_t ret;
- WAIT_ALLOW_WRITES();
-
- ret = os_file_pwrite(file, buf, n, offset);
-
- if ((ulint) ret == n) {
-
- return(TRUE);
- }
-
- if (!os_has_said_disk_full) {
-
- ut_print_timestamp(stderr);
-
- if(ret == -1) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Failure of system call pwrite(). Operating"
- " system error number is %lu.",
- (ulint) errno);
- } else {
- fprintf(stderr,
- " InnoDB: Error: Write to file %s failed"
- " at offset " UINT64PF ".\n"
- "InnoDB: %lu bytes should have been written,"
- " only %ld were written.\n"
- "InnoDB: Operating system error number %lu.\n"
- "InnoDB: Check that your OS and file system"
- " support files of this size.\n"
- "InnoDB: Check also that the disk is not full"
- " or a disk quota exceeded.\n",
- name, offset, n, (lint) ret,
- (ulint) errno);
- }
-
- if (strerror(errno) != NULL) {
- fprintf(stderr,
- "InnoDB: Error number %d means '%s'.\n",
- errno, strerror(errno));
- }
-
- fprintf(stderr,
- "InnoDB: Some operating system error numbers"
- " are described at\n"
- "InnoDB: "
- REFMAN "operating-system-error-codes.html\n");
-
- os_diagnose_all_o_direct_einval(errno);
-
- os_has_said_disk_full = TRUE;
- }
-
- return(FALSE);
-#endif
-}
-
-/*******************************************************************//**
-Check the existence and type of the given file.
-@return TRUE if call succeeded */
-UNIV_INTERN
-ibool
-os_file_status(
-/*===========*/
- const char* path, /*!< in: pathname of the file */
- ibool* exists, /*!< out: TRUE if file exists */
- os_file_type_t* type) /*!< out: type of the file (if it exists) */
-{
-#ifdef __WIN__
- int ret;
- struct _stat64 statinfo;
-
- ret = _stat64(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
-
- return(FALSE);
- }
-
- if (_S_IFDIR & statinfo.st_mode) {
- *type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
-
- *exists = TRUE;
-
- return(TRUE);
-#else
- int ret;
- struct stat statinfo;
-
- ret = stat(path, &statinfo);
- if (ret && (errno == ENOENT || errno == ENOTDIR || errno == ENAMETOOLONG)) {
- /* file does not exist */
- *exists = FALSE;
- return(TRUE);
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
-
- return(FALSE);
- }
-
- if (S_ISDIR(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_DIR;
- } else if (S_ISLNK(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_LINK;
- } else if (S_ISREG(statinfo.st_mode)) {
- *type = OS_FILE_TYPE_FILE;
- } else {
- *type = OS_FILE_TYPE_UNKNOWN;
- }
-
- *exists = TRUE;
-
- return(TRUE);
-#endif
-}
-
-/*******************************************************************//**
-This function returns information about the specified file
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-os_file_get_status(
-/*===============*/
- const char* path, /*!< in: pathname of the file */
- os_file_stat_t* stat_info, /*!< information of a file in a
- directory */
- bool check_rw_perm) /*!< in: for testing whether the
- file can be opened in RW mode */
-{
- int ret;
-
-#ifdef __WIN__
- struct _stat64 statinfo;
-
- ret = _stat64(path, &statinfo);
-
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
-
- return(DB_NOT_FOUND);
-
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
-
- return(DB_FAIL);
-
- } else if (_S_IFDIR & statinfo.st_mode) {
- stat_info->type = OS_FILE_TYPE_DIR;
- } else if (_S_IFREG & statinfo.st_mode) {
-
- DWORD access = GENERIC_READ;
-
- if (!srv_read_only_mode) {
- access |= GENERIC_WRITE;
- }
-
- stat_info->type = OS_FILE_TYPE_FILE;
-
- /* Check if we can open it in read-only mode. */
-
- if (check_rw_perm) {
- HANDLE fh;
-
- fh = CreateFile(
- (LPCTSTR) path, // File to open
- access,
- FILE_SHARE_READ|FILE_SHARE_WRITE,
- NULL, // Default security
- OPEN_EXISTING, // Existing file only
- FILE_ATTRIBUTE_NORMAL, // Normal file
- NULL); // No attr. template
-
- if (fh == INVALID_HANDLE_VALUE) {
- stat_info->rw_perm = false;
- } else {
- stat_info->rw_perm = true;
- CloseHandle(fh);
- }
- }
- } else {
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
- }
-#else
- struct stat statinfo;
-
- ret = stat(path, &statinfo);
-
- if (ret && (errno == ENOENT || errno == ENOTDIR)) {
- /* file does not exist */
-
- return(DB_NOT_FOUND);
-
- } else if (ret) {
- /* file exists, but stat call failed */
-
- os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
-
- return(DB_FAIL);
-
- }
-
- switch (statinfo.st_mode & S_IFMT) {
- case S_IFDIR:
- stat_info->type = OS_FILE_TYPE_DIR;
- break;
- case S_IFLNK:
- stat_info->type = OS_FILE_TYPE_LINK;
- break;
- case S_IFBLK:
- /* Handle block device as regular file. */
- case S_IFCHR:
- /* Handle character device as regular file. */
- case S_IFREG:
- stat_info->type = OS_FILE_TYPE_FILE;
- break;
- default:
- stat_info->type = OS_FILE_TYPE_UNKNOWN;
- }
-
-
- if (check_rw_perm && stat_info->type == OS_FILE_TYPE_FILE) {
-
- int fh;
- int access;
-
- access = !srv_read_only_mode ? O_RDWR : O_RDONLY;
-
- fh = ::open(path, access, os_innodb_umask);
-
- if (fh == -1) {
- stat_info->rw_perm = false;
- } else {
- stat_info->rw_perm = true;
- close(fh);
- }
- }
-
-#endif /* _WIN_ */
-
- stat_info->ctime = statinfo.st_ctime;
- stat_info->atime = statinfo.st_atime;
- stat_info->mtime = statinfo.st_mtime;
- stat_info->size = statinfo.st_size;
-
- return(DB_SUCCESS);
-}
-
-/* path name separator character */
-#ifdef __WIN__
-# define OS_FILE_PATH_SEPARATOR '\\'
-#else
-# define OS_FILE_PATH_SEPARATOR '/'
-#endif
-
-/****************************************************************//**
-This function returns a new path name after replacing the basename
-in an old path with a new basename. The old_path is a full path
-name including the extension. The tablename is in the normal
-form "databasename/tablename". The new base name is found after
-the forward slash. Both input strings are null terminated.
-
-This function allocates memory to be returned. It is the callers
-responsibility to free the return value after it is no longer needed.
-
-@return own: new full pathname */
-UNIV_INTERN
-char*
-os_file_make_new_pathname(
-/*======================*/
- const char* old_path, /*!< in: pathname */
- const char* tablename) /*!< in: contains new base name */
-{
- ulint dir_len;
- char* last_slash;
- char* base_name;
- char* new_path;
- ulint new_path_len;
-
- /* Split the tablename into its database and table name components.
- They are separated by a '/'. */
- last_slash = strrchr((char*) tablename, '/');
- base_name = last_slash ? last_slash + 1 : (char*) tablename;
-
- /* Find the offset of the last slash. We will strip off the
- old basename.ibd which starts after that slash. */
- last_slash = strrchr((char*) old_path, OS_FILE_PATH_SEPARATOR);
- dir_len = last_slash ? last_slash - old_path : strlen(old_path);
-
- /* allocate a new path and move the old directory path to it. */
- new_path_len = dir_len + strlen(base_name) + sizeof "/.ibd";
- new_path = static_cast<char*>(mem_alloc(new_path_len));
- memcpy(new_path, old_path, dir_len);
-
- ut_snprintf(new_path + dir_len,
- new_path_len - dir_len,
- "%c%s.ibd",
- OS_FILE_PATH_SEPARATOR,
- base_name);
-
- return(new_path);
-}
-
-/****************************************************************//**
-This function returns a remote path name by combining a data directory
-path provided in a DATA DIRECTORY clause with the tablename which is
-in the form 'database/tablename'. It strips the file basename (which
-is the tablename) found after the last directory in the path provided.
-The full filepath created will include the database name as a directory
-under the path provided. The filename is the tablename with the '.ibd'
-extension. All input and output strings are null-terminated.
-
-This function allocates memory to be returned. It is the callers
-responsibility to free the return value after it is no longer needed.
-
-@return own: A full pathname; data_dir_path/databasename/tablename.ibd */
-UNIV_INTERN
-char*
-os_file_make_remote_pathname(
-/*=========================*/
- const char* data_dir_path, /*!< in: pathname */
- const char* tablename, /*!< in: tablename */
- const char* extention) /*!< in: file extention; ibd,cfg */
-{
- ulint data_dir_len;
- char* last_slash;
- char* new_path;
- ulint new_path_len;
-
- ut_ad(extention && strlen(extention) == 3);
-
- /* Find the offset of the last slash. We will strip off the
- old basename or tablename which starts after that slash. */
- last_slash = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
- data_dir_len = last_slash ? last_slash - data_dir_path : strlen(data_dir_path);
-
- /* allocate a new path and move the old directory path to it. */
- new_path_len = data_dir_len + strlen(tablename)
- + sizeof "/." + strlen(extention);
- new_path = static_cast<char*>(mem_alloc(new_path_len));
- memcpy(new_path, data_dir_path, data_dir_len);
- ut_snprintf(new_path + data_dir_len,
- new_path_len - data_dir_len,
- "%c%s.%s",
- OS_FILE_PATH_SEPARATOR,
- tablename,
- extention);
-
- srv_normalize_path_for_win(new_path);
-
- return(new_path);
-}
-
-/****************************************************************//**
-This function reduces a null-terminated full remote path name into
-the path that is sent by MySQL for DATA DIRECTORY clause. It replaces
-the 'databasename/tablename.ibd' found at the end of the path with just
-'tablename'.
-
-Since the result is always smaller than the path sent in, no new memory
-is allocated. The caller should allocate memory for the path sent in.
-This function manipulates that path in place.
-
-If the path format is not as expected, just return. The result is used
-to inform a SHOW CREATE TABLE command. */
-UNIV_INTERN
-void
-os_file_make_data_dir_path(
-/*========================*/
- char* data_dir_path) /*!< in/out: full path/data_dir_path */
-{
- char* ptr;
- char* tablename;
- ulint tablename_len;
-
- /* Replace the period before the extension with a null byte. */
- ptr = strrchr((char*) data_dir_path, '.');
- if (!ptr) {
- return;
- }
- ptr[0] = '\0';
-
- /* The tablename starts after the last slash. */
- ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
- if (!ptr) {
- return;
- }
- ptr[0] = '\0';
- tablename = ptr + 1;
-
- /* The databasename starts after the next to last slash. */
- ptr = strrchr((char*) data_dir_path, OS_FILE_PATH_SEPARATOR);
- if (!ptr) {
- return;
- }
- tablename_len = ut_strlen(tablename);
-
- ut_memmove(++ptr, tablename, tablename_len);
-
- ptr[tablename_len] = '\0';
-}
-
-/****************************************************************//**
-The function os_file_dirname returns a directory component of a
-null-terminated pathname string. In the usual case, dirname returns
-the string up to, but not including, the final '/', and basename
-is the component following the final '/'. Trailing '/' characters
-are not counted as part of the pathname.
-
-If path does not contain a slash, dirname returns the string ".".
-
-Concatenating the string returned by dirname, a "/", and the basename
-yields a complete pathname.
-
-The return value is a copy of the directory component of the pathname.
-The copy is allocated from heap. It is the caller responsibility
-to free it after it is no longer needed.
-
-The following list of examples (taken from SUSv2) shows the strings
-returned by dirname and basename for different paths:
-
- path dirname basename
- "/usr/lib" "/usr" "lib"
- "/usr/" "/" "usr"
- "usr" "." "usr"
- "/" "/" "/"
- "." "." "."
- ".." "." ".."
-
-@return own: directory component of the pathname */
-UNIV_INTERN
-char*
-os_file_dirname(
-/*============*/
- const char* path) /*!< in: pathname */
-{
- /* Find the offset of the last slash */
- const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR);
- if (!last_slash) {
- /* No slash in the path, return "." */
-
- return(mem_strdup("."));
- }
-
- /* Ok, there is a slash */
-
- if (last_slash == path) {
- /* last slash is the first char of the path */
-
- return(mem_strdup("/"));
- }
-
- /* Non-trivial directory component */
-
- return(mem_strdupl(path, last_slash - path));
-}
-
-/****************************************************************//**
-Creates all missing subdirectories along the given path.
-@return TRUE if call succeeded FALSE otherwise */
-UNIV_INTERN
-ibool
-os_file_create_subdirs_if_needed(
-/*=============================*/
- const char* path) /*!< in: path name */
-{
- if (srv_read_only_mode) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "read only mode set. Can't create subdirectories '%s'",
- path);
-
- return(FALSE);
-
- }
-
- char* subdir = os_file_dirname(path);
-
- if (strlen(subdir) == 1
- && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) {
- /* subdir is root or cwd, nothing to do */
- mem_free(subdir);
-
- return(TRUE);
- }
-
- /* Test if subdir exists */
- os_file_type_t type;
- ibool subdir_exists;
- ibool success = os_file_status(subdir, &subdir_exists, &type);
-
- if (success && !subdir_exists) {
-
- /* subdir does not exist, create it */
- success = os_file_create_subdirs_if_needed(subdir);
-
- if (!success) {
- mem_free(subdir);
-
- return(FALSE);
- }
-
- success = os_file_create_directory(subdir, FALSE);
- }
-
- mem_free(subdir);
-
- return(success);
-}
-
-#ifndef UNIV_HOTBACKUP
-/****************************************************************//**
-Returns a pointer to the nth slot in the aio array.
-@return pointer to slot */
-static
-os_aio_slot_t*
-os_aio_array_get_nth_slot(
-/*======================*/
- os_aio_array_t* array, /*!< in: aio array */
- ulint index) /*!< in: index of the slot */
-{
- ut_a(index < array->n_slots);
-
- return(&array->slots[index]);
-}
-
-#if defined(LINUX_NATIVE_AIO)
-/******************************************************************//**
-Creates an io_context for native linux AIO.
-@return TRUE on success. */
-static
-ibool
-os_aio_linux_create_io_ctx(
-/*=======================*/
- ulint max_events, /*!< in: number of events. */
- io_context_t* io_ctx) /*!< out: io_ctx to initialize. */
-{
- int ret;
- ulint retries = 0;
-
-retry:
- memset(io_ctx, 0x0, sizeof(*io_ctx));
-
- /* Initialize the io_ctx. Tell it how many pending
- IO requests this context will handle. */
-
- ret = io_setup(max_events, io_ctx);
- if (ret == 0) {
-#if defined(UNIV_AIO_DEBUG)
- fprintf(stderr,
- "InnoDB: Linux native AIO:"
- " initialized io_ctx for segment\n");
-#endif
- /* Success. Return now. */
- return(TRUE);
- }
-
- /* If we hit EAGAIN we'll make a few attempts before failing. */
-
- switch (ret) {
- case -EAGAIN:
- if (retries == 0) {
- /* First time around. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: io_setup() failed"
- " with EAGAIN. Will make %d attempts"
- " before giving up.\n",
- OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
- }
-
- if (retries < OS_AIO_IO_SETUP_RETRY_ATTEMPTS) {
- ++retries;
- fprintf(stderr,
- "InnoDB: Warning: io_setup() attempt"
- " %lu failed.\n",
- retries);
- os_thread_sleep(OS_AIO_IO_SETUP_RETRY_SLEEP);
- goto retry;
- }
-
- /* Have tried enough. Better call it a day. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: io_setup() failed"
- " with EAGAIN after %d attempts.\n",
- OS_AIO_IO_SETUP_RETRY_ATTEMPTS);
- break;
-
- case -ENOSYS:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Linux Native AIO interface"
- " is not supported on this platform. Please"
- " check your OS documentation and install"
- " appropriate binary of InnoDB.\n");
-
- break;
-
- default:
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: Linux Native AIO setup"
- " returned following error[%d]\n", -ret);
- break;
- }
-
- fprintf(stderr,
- "InnoDB: You can disable Linux Native AIO by"
- " setting innodb_use_native_aio = 0 in my.cnf\n");
- return(FALSE);
-}
-
-/******************************************************************//**
-Checks if the system supports native linux aio. On some kernel
-versions where native aio is supported it won't work on tmpfs. In such
-cases we can't use native aio as it is not possible to mix simulated
-and native aio.
-@return: TRUE if supported, FALSE otherwise. */
-static
-ibool
-os_aio_native_aio_supported(void)
-/*=============================*/
-{
- int fd;
- io_context_t io_ctx;
- char name[1000];
-
- if (!os_aio_linux_create_io_ctx(1, &io_ctx)) {
- /* The platform does not support native aio. */
- return(FALSE);
- } else if (!srv_read_only_mode) {
- /* Now check if tmpdir supports native aio ops. */
- fd = innobase_mysql_tmpfile(NULL);
-
- if (fd < 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Unable to create temp file to check "
- "native AIO support.");
-
- return(FALSE);
- }
- } else {
-
- srv_normalize_path_for_win(srv_log_group_home_dir);
-
- ulint dirnamelen = strlen(srv_log_group_home_dir);
- ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
- memcpy(name, srv_log_group_home_dir, dirnamelen);
-
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- strcpy(name + dirnamelen, "ib_logfile0");
-
- fd = ::open(name, O_RDONLY);
-
- if (fd == -1) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Unable to open \"%s\" to check "
- "native AIO read support.", name);
-
- return(FALSE);
- }
- }
-
- struct io_event io_event;
-
- memset(&io_event, 0x0, sizeof(io_event));
-
- byte* buf = static_cast<byte*>(ut_malloc(UNIV_PAGE_SIZE * 2));
- byte* ptr = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
-
- struct iocb iocb;
-
- /* Suppress valgrind warning. */
- memset(buf, 0x00, UNIV_PAGE_SIZE * 2);
- memset(&iocb, 0x0, sizeof(iocb));
-
- struct iocb* p_iocb = &iocb;
-
- if (!srv_read_only_mode) {
- io_prep_pwrite(p_iocb, fd, ptr, UNIV_PAGE_SIZE, 0);
- } else {
- ut_a(UNIV_PAGE_SIZE >= 512);
- io_prep_pread(p_iocb, fd, ptr, 512, 0);
- }
-
- int err = io_submit(io_ctx, 1, &p_iocb);
-
- if (err >= 1) {
- /* Now collect the submitted IO request. */
- err = io_getevents(io_ctx, 1, 1, &io_event, NULL);
- }
-
- ut_free(buf);
- close(fd);
-
- switch (err) {
- case 1:
- return(TRUE);
-
- case -EINVAL:
- case -ENOSYS:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Linux Native AIO not supported. You can either "
- "move %s to a file system that supports native "
- "AIO or you can set innodb_use_native_aio to "
- "FALSE to avoid this message.",
- srv_read_only_mode ? name : "tmpdir");
-
- /* fall through. */
- default:
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Linux Native AIO check on %s returned error[%d]",
- srv_read_only_mode ? name : "tmpdir", -err);
- }
-
- return(FALSE);
-}
-#endif /* LINUX_NATIVE_AIO */
-
-/******************************************************************//**
-Creates an aio wait array. Note that we return NULL in case of failure.
-We don't care about freeing memory here because we assume that a
-failure will result in server refusing to start up.
-@return own: aio array, NULL on failure */
-static
-os_aio_array_t*
-os_aio_array_create(
-/*================*/
- ulint n, /*!< in: maximum number of pending aio
- operations allowed; n must be
- divisible by n_segments */
- ulint n_segments) /*!< in: number of segments in the aio array */
-{
- os_aio_array_t* array;
-#ifdef LINUX_NATIVE_AIO
- struct io_event* io_event = NULL;
-#endif
- ut_a(n > 0);
- ut_a(n_segments > 0);
-
- array = static_cast<os_aio_array_t*>(ut_malloc(sizeof(*array)));
- memset(array, 0x0, sizeof(*array));
-
- array->mutex = os_mutex_create();
- array->not_full = os_event_create();
- array->is_empty = os_event_create();
-
- os_event_set(array->is_empty);
-
- array->n_slots = n;
- array->n_segments = n_segments;
-
- array->slots = static_cast<os_aio_slot_t*>(
- ut_malloc(n * sizeof(*array->slots)));
-
- memset(array->slots, 0x0, n * sizeof(*array->slots));
-
-#if defined(LINUX_NATIVE_AIO)
- array->aio_ctx = NULL;
- array->aio_events = NULL;
-
- /* If we are not using native aio interface then skip this
- part of initialization. */
- if (!srv_use_native_aio) {
- goto skip_native_aio;
- }
-
- /* Initialize the io_context array. One io_context
- per segment in the array. */
-
- array->aio_ctx = static_cast<io_context**>(
- ut_malloc(n_segments * sizeof(*array->aio_ctx)));
-
- for (ulint i = 0; i < n_segments; ++i) {
- if (!os_aio_linux_create_io_ctx(n/n_segments,
- &array->aio_ctx[i])) {
- /* If something bad happened during aio setup
- we disable linux native aio.
- The disadvantage will be a small memory leak
- at shutdown but that's ok compared to a crash
- or a not working server.
- This frequently happens when running the test suite
- with many threads on a system with low fs.aio-max-nr!
- */
-
- fprintf(stderr,
- " InnoDB: Warning: Linux Native AIO disabled "
- "because os_aio_linux_create_io_ctx() "
- "failed. To get rid of this warning you can "
- "try increasing system "
- "fs.aio-max-nr to 1048576 or larger or "
- "setting innodb_use_native_aio = 0 in my.cnf\n");
- srv_use_native_aio = FALSE;
- goto skip_native_aio;
- }
- }
-
- /* Initialize the event array. One event per slot. */
- io_event = static_cast<struct io_event*>(
- ut_malloc(n * sizeof(*io_event)));
-
- memset(io_event, 0x0, sizeof(*io_event) * n);
- array->aio_events = io_event;
-
-skip_native_aio:
-#endif /* LINUX_NATIVE_AIO */
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
-
- slot = os_aio_array_get_nth_slot(array, i);
- slot->pos = i;
- slot->reserved = FALSE;
-#ifdef LINUX_NATIVE_AIO
- memset(&slot->control, 0x0, sizeof(slot->control));
- slot->n_bytes = 0;
- slot->ret = 0;
-#endif /* WIN_ASYNC_IO */
- }
-
- return(array);
-}
-
-/************************************************************************//**
-Frees an aio wait array. */
-static
-void
-os_aio_array_free(
-/*==============*/
- os_aio_array_t*& array) /*!< in, own: array to free */
-{
- os_mutex_free(array->mutex);
- os_event_free(array->not_full);
- os_event_free(array->is_empty);
-
-#if defined(LINUX_NATIVE_AIO)
- if (srv_use_native_aio) {
- ut_free(array->aio_events);
- ut_free(array->aio_ctx);
- }
-#endif /* LINUX_NATIVE_AIO */
-
- ut_free(array->slots);
- ut_free(array);
-
- array = 0;
-}
-
-/***********************************************************************
-Initializes the asynchronous io system. Creates one array each for ibuf
-and log i/o. Also creates one array each for read and write where each
-array is divided logically into n_read_segs and n_write_segs
-respectively. The caller must create an i/o handler thread for each
-segment in these arrays. This function also creates the sync array.
-No i/o handler thread needs to be created for that */
-UNIV_INTERN
-ibool
-os_aio_init(
-/*========*/
- ulint n_per_seg, /*<! in: maximum number of pending aio
- operations allowed per segment */
- ulint n_read_segs, /*<! in: number of reader threads */
- ulint n_write_segs, /*<! in: number of writer threads */
- ulint n_slots_sync) /*<! in: number of slots in the sync aio
- array */
-{
- os_io_init_simple();
-
-#if defined(LINUX_NATIVE_AIO)
- /* Check if native aio is supported on this system and tmpfs */
- if (srv_use_native_aio && !os_aio_native_aio_supported()) {
-
- ib_logf(IB_LOG_LEVEL_WARN, "Linux Native AIO disabled.");
-
- srv_use_native_aio = FALSE;
- }
-#endif /* LINUX_NATIVE_AIO */
-
- srv_reset_io_thread_op_info();
-
- os_aio_read_array = os_aio_array_create(
- n_read_segs * n_per_seg, n_read_segs);
-
- if (os_aio_read_array == NULL) {
- return(FALSE);
- }
-
- ulint start = (srv_read_only_mode) ? 0 : 2;
- ulint n_segs = n_read_segs + start;
-
- /* 0 is the ibuf segment and 1 is the insert buffer segment. */
- for (ulint i = start; i < n_segs; ++i) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "read thread";
- }
-
- ulint n_segments = n_read_segs;
-
- if (!srv_read_only_mode) {
-
- os_aio_log_array = os_aio_array_create(n_per_seg, 1);
-
- if (os_aio_log_array == NULL) {
- return(FALSE);
- }
-
- ++n_segments;
-
- srv_io_thread_function[1] = "log thread";
-
- os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
-
- if (os_aio_ibuf_array == NULL) {
- return(FALSE);
- }
-
- ++n_segments;
-
- srv_io_thread_function[0] = "insert buffer thread";
-
- os_aio_write_array = os_aio_array_create(
- n_write_segs * n_per_seg, n_write_segs);
-
- if (os_aio_write_array == NULL) {
- return(FALSE);
- }
-
- n_segments += n_write_segs;
-
- for (ulint i = start + n_read_segs; i < n_segments; ++i) {
- ut_a(i < SRV_MAX_N_IO_THREADS);
- srv_io_thread_function[i] = "write thread";
- }
-
- ut_ad(n_segments >= 4);
- } else {
- ut_ad(n_segments > 0);
- }
-
- os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
-
- if (os_aio_sync_array == NULL) {
- return(FALSE);
- }
-
- os_aio_n_segments = n_segments;
-
- os_aio_validate();
-
- os_last_printout = ut_time();
-
-#ifdef _WIN32
- ut_a(completion_port == 0 && read_completion_port == 0);
- completion_port = CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
- read_completion_port = srv_read_only_mode? completion_port : CreateIoCompletionPort(INVALID_HANDLE_VALUE, NULL, 0, 0);
- ut_a(completion_port && read_completion_port);
-#endif
-
- if (srv_use_native_aio) {
- return(TRUE);
- }
-
- os_aio_segment_wait_events = static_cast<os_event_t*>(
- ut_malloc(n_segments * sizeof *os_aio_segment_wait_events));
-
- for (ulint i = 0; i < n_segments; ++i) {
- os_aio_segment_wait_events[i] = os_event_create();
- }
-
- return(TRUE);
-}
-
-/***********************************************************************
-Frees the asynchronous io system. */
-UNIV_INTERN
-void
-os_aio_free(void)
-/*=============*/
-{
- if (os_aio_ibuf_array != 0) {
- os_aio_array_free(os_aio_ibuf_array);
- }
-
- if (os_aio_log_array != 0) {
- os_aio_array_free(os_aio_log_array);
- }
-
- if (os_aio_write_array != 0) {
- os_aio_array_free(os_aio_write_array);
- }
-
- if (os_aio_sync_array != 0) {
- os_aio_array_free(os_aio_sync_array);
- }
-
- os_aio_array_free(os_aio_read_array);
-
- if (!srv_use_native_aio) {
- for (ulint i = 0; i < os_aio_n_segments; i++) {
- os_event_free(os_aio_segment_wait_events[i]);
- }
- }
-
- for (ulint i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
- os_mutex_free(os_file_seek_mutexes[i]);
- }
-
- ut_free(os_aio_segment_wait_events);
- os_aio_segment_wait_events = 0;
- os_aio_n_segments = 0;
-#ifdef _WIN32
- completion_port = 0;
- read_completion_port = 0;
-#endif
-}
-
-#ifdef WIN_ASYNC_IO
-/************************************************************************//**
-Wakes up all async i/o threads in the array in Windows async i/o at
-shutdown. */
-static
-void
-os_aio_array_wake_win_aio_at_shutdown(
-/*==================================*/
- os_aio_array_t* array) /*!< in: aio array */
-{
- if(completion_port)
- {
- PostQueuedCompletionStatus(completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
- PostQueuedCompletionStatus(read_completion_port, 0, IOCP_SHUTDOWN_KEY, NULL);
- }
-}
-#endif
-
-/************************************************************************//**
-Wakes up all async i/o threads so that they know to exit themselves in
-shutdown. */
-UNIV_INTERN
-void
-os_aio_wake_all_threads_at_shutdown(void)
-/*=====================================*/
-{
-#ifdef WIN_ASYNC_IO
- /* This code wakes up all ai/o threads in Windows native aio */
- os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array);
- if (os_aio_write_array != 0) {
- os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array);
- }
-
- if (os_aio_ibuf_array != 0) {
- os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array);
- }
-
- if (os_aio_log_array != 0) {
- os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array);
- }
-#elif defined(LINUX_NATIVE_AIO)
- /* When using native AIO interface the io helper threads
- wait on io_getevents with a timeout value of 500ms. At
- each wake up these threads check the server status.
- No need to do anything to wake them up. */
-#endif /* !WIN_ASYNC_AIO */
-
- if (srv_use_native_aio) {
- return;
- }
-
- /* This loop wakes up all simulated ai/o threads */
-
- for (ulint i = 0; i < os_aio_n_segments; i++) {
-
- os_event_set(os_aio_segment_wait_events[i]);
- }
-}
-
-/************************************************************************//**
-Waits until there are no pending writes in os_aio_write_array. There can
-be other, synchronous, pending writes. */
-UNIV_INTERN
-void
-os_aio_wait_until_no_pending_writes(void)
-/*=====================================*/
-{
- ut_ad(!srv_read_only_mode);
- os_event_wait(os_aio_write_array->is_empty);
-}
-
-/**********************************************************************//**
-Calculates segment number for a slot.
-@return segment number (which is the number used by, for example,
-i/o-handler threads) */
-static
-ulint
-os_aio_get_segment_no_from_slot(
-/*============================*/
- os_aio_array_t* array, /*!< in: aio wait array */
- os_aio_slot_t* slot) /*!< in: slot in this array */
-{
- ulint segment;
- ulint seg_len;
-
- if (array == os_aio_ibuf_array) {
- ut_ad(!srv_read_only_mode);
-
- segment = IO_IBUF_SEGMENT;
-
- } else if (array == os_aio_log_array) {
- ut_ad(!srv_read_only_mode);
-
- segment = IO_LOG_SEGMENT;
-
- } else if (array == os_aio_read_array) {
- seg_len = os_aio_read_array->n_slots
- / os_aio_read_array->n_segments;
-
- segment = (srv_read_only_mode ? 0 : 2) + slot->pos / seg_len;
- } else {
- ut_ad(!srv_read_only_mode);
- ut_a(array == os_aio_write_array);
-
- seg_len = os_aio_write_array->n_slots
- / os_aio_write_array->n_segments;
-
- segment = os_aio_read_array->n_segments + 2
- + slot->pos / seg_len;
- }
-
- return(segment);
-}
-
-/**********************************************************************//**
-Calculates local segment number and aio array from global segment number.
-@return local segment number within the aio array */
-static
-ulint
-os_aio_get_array_and_local_segment(
-/*===============================*/
- os_aio_array_t** array, /*!< out: aio wait array */
- ulint global_segment)/*!< in: global segment number */
-{
- ulint segment;
-
- ut_a(global_segment < os_aio_n_segments);
-
- if (srv_read_only_mode) {
- *array = os_aio_read_array;
-
- return(global_segment);
- } else if (global_segment == IO_IBUF_SEGMENT) {
- *array = os_aio_ibuf_array;
- segment = 0;
-
- } else if (global_segment == IO_LOG_SEGMENT) {
- *array = os_aio_log_array;
- segment = 0;
-
- } else if (global_segment < os_aio_read_array->n_segments + 2) {
- *array = os_aio_read_array;
-
- segment = global_segment - 2;
- } else {
- *array = os_aio_write_array;
-
- segment = global_segment - (os_aio_read_array->n_segments + 2);
- }
-
- return(segment);
-}
-
-/*******************************************************************//**
-Requests for a slot in the aio array. If no slot is available, waits until
-not_full-event becomes signaled.
-@return pointer to slot */
-static
-os_aio_slot_t*
-os_aio_array_reserve_slot(
-/*======================*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- os_aio_array_t* array, /*!< in: aio array */
- fil_node_t* message1,/*!< in: message to be passed along with
- the aio operation */
- void* message2,/*!< in: message to be passed along with
- the aio operation */
- pfs_os_file_t file, /*!< in: file handle */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset */
- ulint len, /*!< in: length of the block to read or write */
- ulint page_size, /*!< in: page size in bytes */
- ulint space_id,
- ulint* write_size)/*!< in/out: Actual write size initialized
- after first successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-{
- os_aio_slot_t* slot = NULL;
-#ifdef WIN_ASYNC_IO
- OVERLAPPED* control;
-
-#elif defined(LINUX_NATIVE_AIO)
-
- struct iocb* iocb;
- off_t aio_offset;
-
-#endif /* WIN_ASYNC_IO */
- ulint i;
- ulint counter;
- ulint slots_per_seg;
- ulint local_seg;
-
-#ifdef WIN_ASYNC_IO
- ut_a((len & 0xFFFFFFFFUL) == len);
-#endif /* WIN_ASYNC_IO */
-
- /* No need of a mutex. Only reading constant fields */
- slots_per_seg = array->n_slots / array->n_segments;
-
- /* We attempt to keep adjacent blocks in the same local
- segment. This can help in merging IO requests when we are
- doing simulated AIO */
- local_seg = (offset >> (UNIV_PAGE_SIZE_SHIFT + 6))
- % array->n_segments;
-
-loop:
- os_mutex_enter(array->mutex);
-
- if (array->n_reserved == array->n_slots) {
- os_mutex_exit(array->mutex);
-
- if (!srv_use_native_aio) {
- /* If the handler threads are suspended, wake them
- so that we get more slots */
-
- os_aio_simulated_wake_handler_threads();
- }
-
- os_event_wait(array->not_full);
-
- goto loop;
- }
-
- /* We start our search for an available slot from our preferred
- local segment and do a full scan of the array. We are
- guaranteed to find a slot in full scan. */
- for (i = local_seg * slots_per_seg, counter = 0;
- counter < array->n_slots;
- i++, counter++) {
-
- i %= array->n_slots;
-
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved == FALSE) {
- goto found;
- }
- }
-
- /* We MUST always be able to get hold of a reserved slot. */
- ut_error;
-
-found:
- ut_a(slot->reserved == FALSE);
- array->n_reserved++;
-
- if (array->n_reserved == 1) {
- os_event_reset(array->is_empty);
- }
-
- if (array->n_reserved == array->n_slots) {
- os_event_reset(array->not_full);
- }
-
- slot->reserved = TRUE;
- slot->reservation_time = ut_time();
- slot->message1 = message1;
- slot->message2 = message2;
- slot->file = file;
- slot->name = name;
- slot->len = len;
- slot->type = type;
- slot->buf = static_cast<byte*>(buf);
- slot->offset = offset;
- slot->io_already_done = FALSE;
- slot->space_id = space_id;
- slot->is_log = is_log;
- slot->page_size = page_size;
-
- if (message1) {
- slot->file_block_size = fil_node_get_block_size(message1);
- }
-
- slot->buf = (byte *)buf;
-
-#ifdef WIN_ASYNC_IO
- control = &slot->control;
- control->Offset = (DWORD) offset & 0xFFFFFFFF;
- control->OffsetHigh = (DWORD) (offset >> 32);
- control->hEvent = 0;
- slot->arr = array;
-
-#elif defined(LINUX_NATIVE_AIO)
-
- /* If we are not using native AIO skip this part. */
- if (!srv_use_native_aio) {
- goto skip_native_aio;
- }
-
- /* Check if we are dealing with 64 bit arch.
- If not then make sure that offset fits in 32 bits. */
- aio_offset = (off_t) offset;
-
- ut_a(sizeof(aio_offset) >= sizeof(offset)
- || ((os_offset_t) aio_offset) == offset);
-
- iocb = &slot->control;
-
- if (type == OS_FILE_READ) {
- io_prep_pread(iocb, file, buf, len, aio_offset);
- } else {
- ut_a(type == OS_FILE_WRITE);
- io_prep_pwrite(iocb, file, buf, len, aio_offset);
- }
-
- iocb->data = (void*) slot;
- slot->n_bytes = 0;
- slot->ret = 0;
-
-skip_native_aio:
-#endif /* LINUX_NATIVE_AIO */
- os_mutex_exit(array->mutex);
-
- return(slot);
-}
-
-/*******************************************************************//**
-Frees a slot in the aio array. */
-static
-void
-os_aio_array_free_slot(
-/*===================*/
- os_aio_array_t* array, /*!< in: aio array */
- os_aio_slot_t* slot) /*!< in: pointer to slot */
-{
- os_mutex_enter(array->mutex);
-
- ut_ad(slot->reserved);
-
- slot->reserved = FALSE;
-
- array->n_reserved--;
-
- if (array->n_reserved == array->n_slots - 1) {
- os_event_set(array->not_full);
- }
-
- if (array->n_reserved == 0) {
- os_event_set(array->is_empty);
- }
-
-#ifdef LINUX_NATIVE_AIO
-
- if (srv_use_native_aio) {
- memset(&slot->control, 0x0, sizeof(slot->control));
- slot->n_bytes = 0;
- slot->ret = 0;
- /*fprintf(stderr, "Freed up Linux native slot.\n");*/
- } else {
- /* These fields should not be used if we are not
- using native AIO. */
- ut_ad(slot->n_bytes == 0);
- ut_ad(slot->ret == 0);
- }
-
-#endif
- os_mutex_exit(array->mutex);
-}
-
-/**********************************************************************//**
-Wakes up a simulated aio i/o-handler thread if it has something to do. */
-static
-void
-os_aio_simulated_wake_handler_thread(
-/*=================================*/
- ulint global_segment) /*!< in: the number of the segment in the aio
- arrays */
-{
- os_aio_array_t* array;
- ulint segment;
-
- ut_ad(!srv_use_native_aio);
-
- segment = os_aio_get_array_and_local_segment(&array, global_segment);
-
- ulint n = array->n_slots / array->n_segments;
-
- segment *= n;
-
- /* Look through n slots after the segment * n'th slot */
-
- os_mutex_enter(array->mutex);
-
- for (ulint i = 0; i < n; ++i) {
- const os_aio_slot_t* slot;
-
- slot = os_aio_array_get_nth_slot(array, segment + i);
-
- if (slot->reserved) {
-
- /* Found an i/o request */
-
- os_mutex_exit(array->mutex);
-
- os_event_t event;
-
- event = os_aio_segment_wait_events[global_segment];
-
- os_event_set(event);
-
- return;
- }
- }
-
- os_mutex_exit(array->mutex);
-}
-
-/**********************************************************************//**
-Wakes up simulated aio i/o-handler threads if they have something to do. */
-UNIV_INTERN
-void
-os_aio_simulated_wake_handler_threads(void)
-/*=======================================*/
-{
- if (srv_use_native_aio) {
- /* We do not use simulated aio: do nothing */
-
- return;
- }
-
- os_aio_recommend_sleep_for_read_threads = FALSE;
-
- for (ulint i = 0; i < os_aio_n_segments; i++) {
- os_aio_simulated_wake_handler_thread(i);
- }
-}
-
-#ifdef _WIN32
-/**********************************************************************//**
-This function can be called if one wants to post a batch of reads and
-prefers an i/o-handler thread to handle them all at once later. You must
-call os_aio_simulated_wake_handler_threads later to ensure the threads
-are not left sleeping! */
-UNIV_INTERN
-void
-os_aio_simulated_put_read_threads_to_sleep()
-{
-
-/* The idea of putting background IO threads to sleep is only for
-Windows when using simulated AIO. Windows XP seems to schedule
-background threads too eagerly to allow for coalescing during
-readahead requests. */
-
- os_aio_array_t* array;
-
- if (srv_use_native_aio) {
- /* We do not use simulated aio: do nothing */
-
- return;
- }
-
- os_aio_recommend_sleep_for_read_threads = TRUE;
-
- for (ulint i = 0; i < os_aio_n_segments; i++) {
- os_aio_get_array_and_local_segment(&array, i);
-
- if (array == os_aio_read_array) {
-
- os_event_reset(os_aio_segment_wait_events[i]);
- }
- }
-}
-#endif /* _WIN32 */
-
-#if defined(LINUX_NATIVE_AIO)
-/*******************************************************************//**
-Dispatch an AIO request to the kernel.
-@return TRUE on success. */
-static
-ibool
-os_aio_linux_dispatch(
-/*==================*/
- os_aio_array_t* array, /*!< in: io request array. */
- os_aio_slot_t* slot) /*!< in: an already reserved slot. */
-{
- int ret;
- ulint io_ctx_index;
- struct iocb* iocb;
-
- ut_ad(slot != NULL);
- ut_ad(array);
-
- ut_a(slot->reserved);
-
- /* Find out what we are going to work with.
- The iocb struct is directly in the slot.
- The io_context is one per segment. */
-
- iocb = &slot->control;
- io_ctx_index = (slot->pos * array->n_segments) / array->n_slots;
-
- ret = io_submit(array->aio_ctx[io_ctx_index], 1, &iocb);
-
-#if defined(UNIV_AIO_DEBUG)
- fprintf(stderr,
- "io_submit[%c] ret[%d]: slot[%p] ctx[%p] seg[%lu]\n",
- (slot->type == OS_FILE_WRITE) ? 'w' : 'r', ret, slot,
- array->aio_ctx[io_ctx_index], (ulong) io_ctx_index);
-#endif
-
- /* io_submit returns number of successfully
- queued requests or -errno. */
- if (UNIV_UNLIKELY(ret != 1)) {
- errno = -ret;
- return(FALSE);
- }
-
- return(TRUE);
-}
-#endif /* LINUX_NATIVE_AIO */
-
-
-/*******************************************************************//**
-NOTE! Use the corresponding macro os_aio(), not directly this function!
-Requests an asynchronous i/o operation.
-@return TRUE if request was queued successfully, FALSE if fail */
-UNIV_INTERN
-ibool
-os_aio_func(
-/*========*/
- ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
- ulint is_log, /*!< in: 1 is OS_FILE_LOG or 0 */
- ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
- to OS_AIO_SIMULATED_WAKE_LATER: the
- last flag advises this function not to wake
- i/o-handler threads, but the caller will
- do the waking explicitly later, in this
- way the caller can post several requests in
- a batch; NOTE that the batch must not be
- so big that it exhausts the slots in aio
- arrays! NOTE that a simulated batch
- may introduce hidden chances of deadlocks,
- because i/os are not actually handled until
- all have been posted: use with great
- caution! */
- const char* name, /*!< in: name of the file or path as a
- null-terminated string */
- pfs_os_file_t file, /*!< in: handle to a file */
- void* buf, /*!< in: buffer where to read or from which
- to write */
- os_offset_t offset, /*!< in: file offset where to read or write */
- ulint n, /*!< in: number of bytes to read or write */
- ulint page_size, /*!< in: page size in bytes */
- fil_node_t* message1,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- void* message2,/*!< in: message for the aio handler
- (can be used to identify a completed
- aio operation); ignored if mode is
- OS_AIO_SYNC */
- ulint space_id,
- trx_t* trx,
- ulint* write_size)/*!< in/out: Actual write size initialized
- after fist successfull trim
- operation for this page and if
- initialized we do not trim again if
- actual page size does not decrease. */
-{
- os_aio_array_t* array;
- os_aio_slot_t* slot;
-#ifdef WIN_ASYNC_IO
- void* buffer = NULL;
- DWORD len = (DWORD) n;
- BOOL ret;
-#endif
- ulint wake_later;
- ut_ad(buf);
- ut_ad(n > 0);
- ut_ad(n % OS_MIN_LOG_BLOCK_SIZE == 0);
- ut_ad(offset % OS_MIN_LOG_BLOCK_SIZE == 0);
- ut_ad(os_aio_validate_skip());
-#ifdef WIN_ASYNC_IO
- ut_ad((n & 0xFFFFFFFFUL) == n);
-#endif
-
-
- wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
- mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- mode = OS_AIO_SYNC; os_has_said_disk_full = FALSE;);
-
- if (mode == OS_AIO_SYNC) {
- ibool ret;
- /* This is actually an ordinary synchronous read or write:
- no need to use an i/o-handler thread */
-
- if (type == OS_FILE_READ) {
- ret = os_file_read_func(file, buf, offset, n, trx);
- } else {
- ut_ad(!srv_read_only_mode);
- ut_a(type == OS_FILE_WRITE);
-
- ret = os_file_write(name, file, buf, offset, n);
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- os_has_said_disk_full = FALSE; ret = 0; errno = 28;);
-
- if (!ret) {
- os_file_handle_error_cond_exit(name, "os_file_write_func", TRUE, FALSE,
- __FILE__, __LINE__);
- }
- }
-
- if (!ret) {
- fprintf(stderr, "FAIL");
- }
-
- return ret;
- }
-
-try_again:
- switch (mode) {
- case OS_AIO_NORMAL:
- if (type == OS_FILE_READ) {
- array = os_aio_read_array;
- } else {
- ut_ad(!srv_read_only_mode);
- array = os_aio_write_array;
- }
- break;
- case OS_AIO_IBUF:
- ut_ad(type == OS_FILE_READ);
- /* Reduce probability of deadlock bugs in connection with ibuf:
- do not let the ibuf i/o handler sleep */
-
- wake_later = FALSE;
-
- if (srv_read_only_mode) {
- array = os_aio_read_array;
- } else {
- array = os_aio_ibuf_array;
- }
- break;
- case OS_AIO_LOG:
- if (srv_read_only_mode) {
- array = os_aio_read_array;
- } else {
- array = os_aio_log_array;
- }
- break;
- case OS_AIO_SYNC:
- array = os_aio_sync_array;
-#if defined(LINUX_NATIVE_AIO)
- /* In Linux native AIO we don't use sync IO array. */
- ut_a(!srv_use_native_aio);
-#endif /* LINUX_NATIVE_AIO */
- break;
- default:
- ut_error;
- array = NULL; /* Eliminate compiler warning */
- }
-
- if (trx && type == OS_FILE_READ)
- {
- trx->io_reads++;
- trx->io_read += n;
- }
-
- slot = os_aio_array_reserve_slot(type, is_log, array, message1, message2, file,
- name, buf, offset, n, page_size, space_id,
- write_size);
-
- if (type == OS_FILE_READ) {
- if (srv_use_native_aio) {
- os_n_file_reads++;
- os_bytes_read_since_printout += n;
-#ifdef WIN_ASYNC_IO
- ret = ReadFile(file, buf, (DWORD) n, &len,
- &(slot->control));
- if(!ret && GetLastError() != ERROR_IO_PENDING)
- goto err_exit;
-
-#elif defined(LINUX_NATIVE_AIO)
- if (!os_aio_linux_dispatch(array, slot)) {
- goto err_exit;
- }
-#endif /* WIN_ASYNC_IO */
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(
- os_aio_get_segment_no_from_slot(
- array, slot));
- }
- }
- } else if (type == OS_FILE_WRITE) {
- ut_ad(!srv_read_only_mode);
- if (srv_use_native_aio) {
- os_n_file_writes++;
-#ifdef WIN_ASYNC_IO
- n = slot->len;
- buffer = buf;
- ret = WriteFile(file, buffer, (DWORD) n, &len,
- &(slot->control));
-
- if(!ret && GetLastError() != ERROR_IO_PENDING)
- goto err_exit;
-#elif defined(LINUX_NATIVE_AIO)
- if (!os_aio_linux_dispatch(array, slot)) {
- goto err_exit;
- }
-#endif /* WIN_ASYNC_IO */
- } else {
- if (!wake_later) {
- os_aio_simulated_wake_handler_thread(
- os_aio_get_segment_no_from_slot(
- array, slot));
- }
- }
- } else {
- ut_error;
- }
-
- /* aio was queued successfully! */
- return(TRUE);
-
-#if defined LINUX_NATIVE_AIO || defined WIN_ASYNC_IO
-err_exit:
-#endif /* LINUX_NATIVE_AIO || WIN_ASYNC_IO */
- os_aio_array_free_slot(array, slot);
-
- if (os_file_handle_error(
- name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) {
-
- goto try_again;
- }
-
- return(FALSE);
-}
-
-#ifdef WIN_ASYNC_IO
-#define READ_SEGMENT(x) (x < srv_n_read_io_threads)
-#define WRITE_SEGMENT(x) !READ_SEGMENT(x)
-
-/**********************************************************************//**
-This function is only used in Windows asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait the
-for completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_windows_handle(
-/*==================*/
- ulint segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads; if
- this is ULINT_UNDEFINED, then it means that
- sync aio is used, and this parameter is
- ignored */
- ulint pos, /*!< this parameter is used only in sync aio:
- wait for the aio slot at this position */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
- ulint* space_id)
-{
- ulint orig_seg = segment;
- os_aio_slot_t* slot;
- ibool ret_val;
- BOOL ret;
- DWORD len;
- BOOL retry = FALSE;
- ULONG_PTR key;
- HANDLE port = READ_SEGMENT(segment)? read_completion_port : completion_port;
-
- for(;;) {
- ret = GetQueuedCompletionStatus(port, &len, &key,
- (OVERLAPPED **)&slot, INFINITE);
-
- /* If shutdown key was received, repost the shutdown message and exit */
- if (ret && (key == IOCP_SHUTDOWN_KEY)) {
- PostQueuedCompletionStatus(port, 0, key, NULL);
- os_thread_exit(NULL);
- }
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
-
- if(WRITE_SEGMENT(segment)&& slot->type == OS_FILE_READ) {
- /*
- Redirect read completions to the dedicated completion port
- and thread. We need to split read and write threads. If we do not
- do that, and just allow all io threads process all IO, it is possible
- to get stuck in a deadlock in buffer pool code,
-
- Currently, the problem is solved this way - "write io" threads
- always get all completion notifications, from both async reads and
- writes. Write completion is handled in the same thread that gets it.
- Read completion is forwarded via PostQueueCompletionStatus())
- to the second completion port dedicated solely to reads. One of the
- "read io" threads waiting on this port will finally handle the IO.
-
- Forwarding IO completion this way costs a context switch , and this
- seems tolerable since asynchronous reads are by far less frequent.
- */
- ut_a(PostQueuedCompletionStatus(read_completion_port, len, key,
- &slot->control));
- }
- else {
- break;
- }
- }
- *message1 = slot->message1;
- *message2 = slot->message2;
-
- *type = slot->type;
- *space_id = slot->space_id;
-
- if (ret && len == slot->len) {
-
- ret_val = TRUE;
- } else if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) {
-
- retry = TRUE;
- } else {
-
- ret_val = FALSE;
- }
-
- if (retry) {
-
- ut_a((slot->len & 0xFFFFFFFFUL) == slot->len);
-
- switch (slot->type) {
- case OS_FILE_WRITE:
- ret_val = os_file_write(
- slot->name, slot->file, slot->buf,
- slot->offset, slot->len);
- break;
- case OS_FILE_READ:
- ret_val = os_file_read(
- slot->file, slot->buf,
- slot->offset, slot->len);
- break;
- default:
- ut_error;
- }
-
- }
-
- if (slot->type == OS_FILE_WRITE) {
- if (!slot->is_log && srv_use_trim && !os_fallocate_failed) {
- // Deallocate unused blocks from file system
- os_file_trim(slot);
- }
- }
-
- os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot);
-
- return(ret_val);
-}
-#endif
-
-#if defined(LINUX_NATIVE_AIO)
-/******************************************************************//**
-This function is only used in Linux native asynchronous i/o. This is
-called from within the io-thread. If there are no completed IO requests
-in the slot array, the thread calls this function to collect more
-requests from the kernel.
-The io-thread waits on io_getevents(), which is a blocking call, with
-a timeout value. Unless the system is very heavy loaded, keeping the
-io-thread very busy, the io-thread will spend most of its time waiting
-in this function.
-The io-thread also exits in this function. It checks server status at
-each wakeup and that is why we use timed wait in io_getevents(). */
-static
-void
-os_aio_linux_collect(
-/*=================*/
- os_aio_array_t* array, /*!< in/out: slot array. */
- ulint segment, /*!< in: local segment no. */
- ulint seg_size) /*!< in: segment size. */
-{
- int i;
- int ret;
- ulint start_pos;
- ulint end_pos;
- struct timespec timeout;
- struct io_event* events;
- struct io_context* io_ctx;
-
- /* sanity checks. */
- ut_ad(array != NULL);
- ut_ad(seg_size > 0);
- ut_ad(segment < array->n_segments);
-
- /* Which part of event array we are going to work on. */
- events = &array->aio_events[segment * seg_size];
-
- /* Which io_context we are going to use. */
- io_ctx = array->aio_ctx[segment];
-
- /* Starting point of the segment we will be working on. */
- start_pos = segment * seg_size;
-
- /* End point. */
- end_pos = start_pos + seg_size;
-
-retry:
-
- /* Initialize the events. The timeout value is arbitrary.
- We probably need to experiment with it a little. */
- memset(events, 0, sizeof(*events) * seg_size);
- timeout.tv_sec = 0;
- timeout.tv_nsec = OS_AIO_REAP_TIMEOUT;
-
- ret = io_getevents(io_ctx, 1, seg_size, events, &timeout);
-
- if (ret > 0) {
- for (i = 0; i < ret; i++) {
- os_aio_slot_t* slot;
- struct iocb* control;
-
- control = (struct iocb*) events[i].obj;
- ut_a(control != NULL);
-
- slot = (os_aio_slot_t*) control->data;
-
- /* Some sanity checks. */
- ut_a(slot != NULL);
- ut_a(slot->reserved);
-
-#if defined(UNIV_AIO_DEBUG)
- fprintf(stderr,
- "io_getevents[%c]: slot[%p] ctx[%p]"
- " seg[%lu]\n",
- (slot->type == OS_FILE_WRITE) ? 'w' : 'r',
- slot, io_ctx, segment);
-#endif
-
- /* We are not scribbling previous segment. */
- ut_a(slot->pos >= start_pos);
-
- /* We have not overstepped to next segment. */
- ut_a(slot->pos < end_pos);
-
- if (slot->type == OS_FILE_WRITE) {
- if (!slot->is_log && srv_use_trim
- && !os_fallocate_failed) {
- // Deallocate unused blocks from file system
- os_file_trim(slot);
- }
- }
-
- /* Mark this request as completed. The error handling
- will be done in the calling function. */
- os_mutex_enter(array->mutex);
- slot->n_bytes = events[i].res;
- slot->ret = events[i].res2;
- slot->io_already_done = TRUE;
- os_mutex_exit(array->mutex);
- }
- return;
- }
-
- if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
- return;
- }
-
- /* This error handling is for any error in collecting the
- IO requests. The errors, if any, for any particular IO
- request are simply passed on to the calling routine. */
-
- switch (ret) {
- case -EAGAIN:
- /* Not enough resources! Try again. */
- case -EINTR:
- /* Interrupted! I have tested the behaviour in case of an
- interrupt. If we have some completed IOs available then
- the return code will be the number of IOs. We get EINTR only
- if there are no completed IOs and we have been interrupted. */
- case 0:
- /* No pending request! Go back and check again. */
- goto retry;
- }
-
- /* All other errors should cause a trap for now. */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: unexpected ret_code[%d] from io_getevents()!\n",
- ret);
- ut_error;
-}
-
-/**********************************************************************//**
-This function is only used in Linux native asynchronous i/o.
-Waits for an aio operation to complete. This function is used to wait for
-the completed requests. The aio array of pending requests is divided
-into segments. The thread specifies which segment or slot it wants to wait
-for. NOTE: this function will also take care of freeing the aio slot,
-therefore no other thread is allowed to do the freeing!
-@return TRUE if the IO was successful */
-UNIV_INTERN
-ibool
-os_aio_linux_handle(
-/*================*/
- ulint global_seg, /*!< in: segment number in the aio array
- to wait for; segment 0 is the ibuf
- i/o thread, segment 1 is log i/o thread,
- then follow the non-ibuf read threads,
- and the last are the non-ibuf write
- threads. */
- fil_node_t**message1, /*!< out: the messages passed with the */
- void** message2, /*!< aio request; note that in case the
- aio operation failed, these output
- parameters are valid and can be used to
- restart the operation. */
- ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
- ulint* space_id)
-{
- ulint segment;
- os_aio_array_t* array;
- os_aio_slot_t* slot;
- ulint n;
- ulint i;
- ibool ret = FALSE;
-
- /* Should never be doing Sync IO here. */
- ut_a(global_seg != ULINT_UNDEFINED);
-
- /* Find the array and the local segment. */
- segment = os_aio_get_array_and_local_segment(&array, global_seg);
- n = array->n_slots / array->n_segments;
-
- wait_for_event:
- /* Loop until we have found a completed request. */
- for (;;) {
- ibool any_reserved = FALSE;
- os_mutex_enter(array->mutex);
- for (i = 0; i < n; ++i) {
- slot = os_aio_array_get_nth_slot(
- array, i + segment * n);
- if (!slot->reserved) {
- continue;
- } else if (slot->io_already_done) {
- /* Something for us to work on. */
- goto found;
- } else {
- any_reserved = TRUE;
- }
- }
-
- os_mutex_exit(array->mutex);
-
- /* There is no completed request.
- If there is no pending request at all,
- and the system is being shut down, exit. */
- if (UNIV_UNLIKELY
- (!any_reserved
- && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) {
- *message1 = NULL;
- *message2 = NULL;
- return(TRUE);
- }
-
- /* Wait for some request. Note that we return
- from wait iff we have found a request. */
-
- srv_set_io_thread_op_info(global_seg,
- "waiting for completed aio requests");
- os_aio_linux_collect(array, segment, n);
- }
-
-found:
- /* Note that it may be that there are more then one completed
- IO requests. We process them one at a time. We may have a case
- here to improve the performance slightly by dealing with all
- requests in one sweep. */
- srv_set_io_thread_op_info(global_seg,
- "processing completed aio requests");
-
- /* Ensure that we are scribbling only our segment. */
- ut_a(i < n);
-
- ut_ad(slot != NULL);
- ut_ad(slot->reserved);
- ut_ad(slot->io_already_done);
-
- *message1 = slot->message1;
- *message2 = slot->message2;
-
- *type = slot->type;
- *space_id = slot->space_id;
-
- if (slot->ret == 0 && slot->n_bytes == (long) slot->len) {
-
- ret = TRUE;
- } else if ((slot->ret == 0) && (slot->n_bytes > 0)
- && (slot->n_bytes < (long) slot->len)) {
- /* Partial read or write scenario */
- int submit_ret;
- struct iocb* iocb;
- slot->buf = (byte*)slot->buf + slot->n_bytes;
- slot->offset = slot->offset + slot->n_bytes;
- slot->len = slot->len - slot->n_bytes;
- /* Resetting the bytes read/written */
- slot->n_bytes = 0;
- slot->io_already_done = FALSE;
- iocb = &(slot->control);
-
- if (slot->type == OS_FILE_READ) {
- io_prep_pread(&slot->control, slot->file,
- slot->buf, slot->len,
- (off_t) slot->offset);
- } else {
- ut_a(slot->type == OS_FILE_WRITE);
- io_prep_pwrite(&slot->control, slot->file,
- slot->buf, slot->len,
- (off_t) slot->offset);
- }
- /* Resubmit an I/O request */
- submit_ret = io_submit(array->aio_ctx[segment], 1, &iocb);
- if (submit_ret < 0 ) {
- /* Aborting in case of submit failure */
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Native Linux AIO interface. io_submit()"
- " call failed when resubmitting a partial"
- " I/O request on the file %s.",
- slot->name);
- } else {
- ret = FALSE;
- os_mutex_exit(array->mutex);
- goto wait_for_event;
- }
- } else {
- errno = -slot->ret;
-
- if (slot->ret == 0) {
- fprintf(stderr,
- "InnoDB: Number of bytes after aio %d requested %lu\n"
- "InnoDB: from file %s\n",
- slot->n_bytes, slot->len, slot->name);
- }
-
- /* os_file_handle_error does tell us if we should retry
- this IO. As it stands now, we don't do this retry when
- reaping requests from a different context than
- the dispatcher. This non-retry logic is the same for
- windows and linux native AIO.
- We should probably look into this to transparently
- re-submit the IO. */
- os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__);
-
- ret = FALSE;
- }
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, slot);
-
- return(ret);
-}
-#endif /* LINUX_NATIVE_AIO */
-
-/**********************************************************************//**
-Does simulated aio. This function should be called by an i/o-handler
-thread.
-@return TRUE if the aio operation succeeded */
-UNIV_INTERN
-ibool
-os_aio_simulated_handle(
-/*====================*/
- ulint global_segment, /*!< in: the number of the segment in the aio
- arrays to wait for; segment 0 is the ibuf
- i/o thread, segment 1 the log i/o thread,
- then follow the non-ibuf read threads, and as
- the last are the non-ibuf write threads */
- fil_node_t**message1, /*!< out: the messages passed with the aio
- request; note that also in the case where
- the aio operation failed, these output
- parameters are valid and can be used to
- restart the operation, for example */
- void** message2,
- ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
- ulint* space_id)
-{
- os_aio_array_t* array;
- ulint segment;
- os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
- ulint n_consecutive;
- ulint total_len;
- ulint offs;
- os_offset_t lowest_offset;
- ulint biggest_age;
- ulint age;
- byte* combined_buf;
- byte* combined_buf2;
- ibool ret;
- ibool any_reserved;
- ulint n;
- os_aio_slot_t* aio_slot;
-
- /* Fix compiler warning */
- *consecutive_ios = NULL;
-
- segment = os_aio_get_array_and_local_segment(&array, global_segment);
-
-restart:
- /* NOTE! We only access constant fields in os_aio_array. Therefore
- we do not have to acquire the protecting mutex yet */
-
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (a)");
- ut_ad(os_aio_validate_skip());
- ut_ad(segment < array->n_segments);
-
- n = array->n_slots / array->n_segments;
-
- /* Look through n slots after the segment * n'th slot */
-
- if (array == os_aio_read_array
- && os_aio_recommend_sleep_for_read_threads) {
-
- /* Give other threads chance to add several i/os to the array
- at once. */
-
- goto recommended_sleep;
- }
-
- srv_set_io_thread_op_info(global_segment,
- "looking for i/o requests (b)");
-
- /* Check if there is a slot for which the i/o has already been
- done */
- any_reserved = FALSE;
-
- os_mutex_enter(array->mutex);
-
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
-
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
- if (!slot->reserved) {
- continue;
- } else if (slot->io_already_done) {
-
- if (os_aio_print_debug) {
- fprintf(stderr,
- "InnoDB: i/o for slot %lu"
- " already done, returning\n",
- (ulong) i);
- }
-
- aio_slot = slot;
- ret = TRUE;
- goto slot_io_done;
- } else {
- any_reserved = TRUE;
- }
- }
-
- /* There is no completed request.
- If there is no pending request at all,
- and the system is being shut down, exit. */
- if (!any_reserved && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_mutex_exit(array->mutex);
- *message1 = NULL;
- *message2 = NULL;
- return(TRUE);
- }
-
- n_consecutive = 0;
-
- /* If there are at least 2 seconds old requests, then pick the oldest
- one to prevent starvation. If several requests have the same age,
- then pick the one at the lowest offset. */
-
- biggest_age = 0;
- lowest_offset = IB_UINT64_MAX;
-
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
-
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
- if (slot->reserved) {
-
- age = (ulint) difftime(
- ut_time(), slot->reservation_time);
-
- if ((age >= 2 && age > biggest_age)
- || (age >= 2 && age == biggest_age
- && slot->offset < lowest_offset)) {
-
- /* Found an i/o request */
- consecutive_ios[0] = slot;
-
- n_consecutive = 1;
-
- biggest_age = age;
- lowest_offset = slot->offset;
- }
- }
- }
-
- if (n_consecutive == 0) {
- /* There were no old requests. Look for an i/o request at the
- lowest offset in the array (we ignore the high 32 bits of the
- offset in these heuristics) */
-
- lowest_offset = IB_UINT64_MAX;
-
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
-
- slot = os_aio_array_get_nth_slot(
- array, i + segment * n);
-
- if (slot->reserved && slot->offset < lowest_offset) {
-
- /* Found an i/o request */
- consecutive_ios[0] = slot;
-
- n_consecutive = 1;
-
- lowest_offset = slot->offset;
- }
- }
- }
-
- if (n_consecutive == 0) {
-
- /* No i/o requested at the moment */
-
- goto wait_for_io;
- }
-
- /* if n_consecutive != 0, then we have assigned
- something valid to consecutive_ios[0] */
- ut_ad(n_consecutive != 0);
- ut_ad(consecutive_ios[0] != NULL);
-
- aio_slot = consecutive_ios[0];
-
- /* Check if there are several consecutive blocks to read or write */
-
-consecutive_loop:
- for (ulint i = 0; i < n; i++) {
- os_aio_slot_t* slot;
-
- slot = os_aio_array_get_nth_slot(array, i + segment * n);
- if (slot->reserved
- && slot != aio_slot
- && slot->offset == aio_slot->offset + aio_slot->len
- && slot->type == aio_slot->type
- && slot->file == aio_slot->file) {
-
- /* Found a consecutive i/o request */
-
- consecutive_ios[n_consecutive] = slot;
- n_consecutive++;
-
- aio_slot = slot;
-
- if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
-
- goto consecutive_loop;
- } else {
- break;
- }
- }
- }
-
- srv_set_io_thread_op_info(global_segment, "consecutive i/o requests");
-
- /* We have now collected n_consecutive i/o requests in the array;
- allocate a single buffer which can hold all data, and perform the
- i/o */
-
- total_len = 0;
- aio_slot = consecutive_ios[0];
-
- for (ulint i = 0; i < n_consecutive; i++) {
- total_len += consecutive_ios[i]->len;
- }
-
- if (n_consecutive == 1) {
- /* We can use the buffer of the i/o request */
- combined_buf = aio_slot->buf;
- combined_buf2 = NULL;
- } else {
- combined_buf2 = static_cast<byte*>(
- ut_malloc(total_len + UNIV_PAGE_SIZE));
-
- ut_a(combined_buf2);
-
- combined_buf = static_cast<byte*>(
- ut_align(combined_buf2, UNIV_PAGE_SIZE));
- }
-
- /* We release the array mutex for the time of the i/o: NOTE that
- this assumes that there is just one i/o-handler thread serving
- a single segment of slots! */
-
- os_mutex_exit(array->mutex);
-
- if (aio_slot->type == OS_FILE_WRITE && n_consecutive > 1) {
- /* Copy the buffers to the combined buffer */
- offs = 0;
-
- for (ulint i = 0; i < n_consecutive; i++) {
-
- ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf,
- consecutive_ios[i]->len);
-
- offs += consecutive_ios[i]->len;
- }
- }
-
- srv_set_io_thread_op_info(global_segment, "doing file i/o");
-
- /* Do the i/o with ordinary, synchronous i/o functions: */
- if (aio_slot->type == OS_FILE_WRITE) {
- ut_ad(!srv_read_only_mode);
- ret = os_file_write(
- aio_slot->name, aio_slot->file, combined_buf,
- aio_slot->offset, total_len);
-
- DBUG_EXECUTE_IF("ib_os_aio_func_io_failure_28",
- os_has_said_disk_full = FALSE; ret = 0; errno = 28;);
-
- if (!ret) {
- os_file_handle_error_cond_exit(aio_slot->name, "os_file_write_func", TRUE, FALSE,
- __FILE__, __LINE__);
- }
-
- } else {
- ret = os_file_read(
- aio_slot->file, combined_buf,
- aio_slot->offset, total_len);
- }
-
- srv_set_io_thread_op_info(global_segment, "file i/o done");
-
- if (aio_slot->type == OS_FILE_READ && n_consecutive > 1) {
- /* Copy the combined buffer to individual buffers */
- offs = 0;
-
- for (ulint i = 0; i < n_consecutive; i++) {
-
- ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs,
- consecutive_ios[i]->len);
- offs += consecutive_ios[i]->len;
- }
- }
-
- if (combined_buf2) {
- ut_free(combined_buf2);
- }
-
- os_mutex_enter(array->mutex);
-
- /* Mark the i/os done in slots */
-
- for (ulint i = 0; i < n_consecutive; i++) {
- consecutive_ios[i]->io_already_done = TRUE;
- }
-
- /* We return the messages for the first slot now, and if there were
- several slots, the messages will be returned with subsequent calls
- of this function */
-
-slot_io_done:
-
- ut_a(aio_slot->reserved);
-
- *message1 = aio_slot->message1;
- *message2 = aio_slot->message2;
-
- *type = aio_slot->type;
- *space_id = aio_slot->space_id;
-
- os_mutex_exit(array->mutex);
-
- os_aio_array_free_slot(array, aio_slot);
-
- return(ret);
-
-wait_for_io:
- srv_set_io_thread_op_info(global_segment, "resetting wait event");
-
- /* We wait here until there again can be i/os in the segment
- of this thread */
-
- os_event_reset(os_aio_segment_wait_events[global_segment]);
-
- os_mutex_exit(array->mutex);
-
-recommended_sleep:
- srv_set_io_thread_op_info(global_segment, "waiting for i/o request");
-
- os_event_wait(os_aio_segment_wait_events[global_segment]);
-
- goto restart;
-}
-
-/**********************************************************************//**
-Validates the consistency of an aio array.
-@return true if ok */
-static
-bool
-os_aio_array_validate(
-/*==================*/
- os_aio_array_t* array) /*!< in: aio wait array */
-{
- ulint i;
- ulint n_reserved = 0;
-
- os_mutex_enter(array->mutex);
-
- ut_a(array->n_slots > 0);
- ut_a(array->n_segments > 0);
-
- for (i = 0; i < array->n_slots; i++) {
- os_aio_slot_t* slot;
-
- slot = os_aio_array_get_nth_slot(array, i);
-
- if (slot->reserved) {
- n_reserved++;
- ut_a(slot->len > 0);
- }
- }
-
- ut_a(array->n_reserved == n_reserved);
-
- os_mutex_exit(array->mutex);
-
- return(true);
-}
-
-/**********************************************************************//**
-Validates the consistency the aio system.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-os_aio_validate(void)
-/*=================*/
-{
- os_aio_array_validate(os_aio_read_array);
-
- if (os_aio_write_array != 0) {
- os_aio_array_validate(os_aio_write_array);
- }
-
- if (os_aio_ibuf_array != 0) {
- os_aio_array_validate(os_aio_ibuf_array);
- }
-
- if (os_aio_log_array != 0) {
- os_aio_array_validate(os_aio_log_array);
- }
-
- if (os_aio_sync_array != 0) {
- os_aio_array_validate(os_aio_sync_array);
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Prints pending IO requests per segment of an aio array.
-We probably don't need per segment statistics but they can help us
-during development phase to see if the IO requests are being
-distributed as expected. */
-static
-void
-os_aio_print_segment_info(
-/*======================*/
- FILE* file, /*!< in: file where to print */
- ulint* n_seg, /*!< in: pending IO array */
- os_aio_array_t* array) /*!< in: array to process */
-{
- ulint i;
-
- ut_ad(array);
- ut_ad(n_seg);
- ut_ad(array->n_segments > 0);
-
- if (array->n_segments == 1) {
- return;
- }
-
- fprintf(file, " [");
- for (i = 0; i < array->n_segments; i++) {
- if (i != 0) {
- fprintf(file, ", ");
- }
-
- fprintf(file, "%lu", n_seg[i]);
- }
- fprintf(file, "] ");
-}
-
-/**********************************************************************//**
-Prints info about the aio array. */
-UNIV_INTERN
-void
-os_aio_print_array(
-/*==============*/
- FILE* file, /*!< in: file where to print */
- os_aio_array_t* array) /*!< in: aio array to print */
-{
- ulint n_reserved = 0;
- ulint n_res_seg[SRV_MAX_N_IO_THREADS];
-
- os_mutex_enter(array->mutex);
-
- ut_a(array->n_slots > 0);
- ut_a(array->n_segments > 0);
-
- memset(n_res_seg, 0x0, sizeof(n_res_seg));
-
- for (ulint i = 0; i < array->n_slots; ++i) {
- os_aio_slot_t* slot;
- ulint seg_no;
-
- slot = os_aio_array_get_nth_slot(array, i);
-
- seg_no = (i * array->n_segments) / array->n_slots;
-
- if (slot->reserved) {
- ++n_reserved;
- ++n_res_seg[seg_no];
-
- ut_a(slot->len > 0);
- }
- }
-
- ut_a(array->n_reserved == n_reserved);
-
- fprintf(file, " %lu", (ulong) n_reserved);
-
- os_aio_print_segment_info(file, n_res_seg, array);
-
- os_mutex_exit(array->mutex);
-}
-
-/**********************************************************************//**
-Prints info of the aio arrays. */
-UNIV_INTERN
-void
-os_aio_print(
-/*=========*/
- FILE* file) /*!< in: file where to print */
-{
- time_t current_time;
- double time_elapsed;
- double avg_bytes_read;
-
- for (ulint i = 0; i < srv_n_file_io_threads; ++i) {
- fprintf(file, "I/O thread %lu state: %s (%s)",
- (ulong) i,
- srv_io_thread_op_info[i],
- srv_io_thread_function[i]);
-
-#ifndef _WIN32
- if (!srv_use_native_aio
- && os_aio_segment_wait_events[i]->is_set()) {
- fprintf(file, " ev set");
- }
-#endif /* _WIN32 */
-
- fprintf(file, "\n");
- }
-
- fputs("Pending normal aio reads:", file);
-
- os_aio_print_array(file, os_aio_read_array);
-
- if (os_aio_write_array != 0) {
- fputs(", aio writes:", file);
- os_aio_print_array(file, os_aio_write_array);
- }
-
- if (os_aio_ibuf_array != 0) {
- fputs(",\n ibuf aio reads:", file);
- os_aio_print_array(file, os_aio_ibuf_array);
- }
-
- if (os_aio_log_array != 0) {
- fputs(", log i/o's:", file);
- os_aio_print_array(file, os_aio_log_array);
- }
-
- if (os_aio_sync_array != 0) {
- fputs(", sync i/o's:", file);
- os_aio_print_array(file, os_aio_sync_array);
- }
-
- putc('\n', file);
- current_time = ut_time();
- time_elapsed = 0.001 + difftime(current_time, os_last_printout);
-
- fprintf(file,
- "Pending flushes (fsync) log: " ULINTPF
- "; buffer pool: " ULINTPF "\n"
- ULINTPF " OS file reads, "
- ULINTPF " OS file writes, "
- ULINTPF " OS fsyncs\n",
- fil_n_pending_log_flushes,
- fil_n_pending_tablespace_flushes,
- os_n_file_reads,
- os_n_file_writes,
- os_n_fsyncs);
-
- const ulint n_reads = ulint(MONITOR_VALUE(MONITOR_OS_PENDING_READS));
- const ulint n_writes = ulint(MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
-
- if (n_reads != 0 || n_writes != 0) {
- fprintf(file,
- ULINTPF " pending reads, " ULINTPF " pending writes\n",
- n_reads, n_writes);
- }
-
- if (os_n_file_reads == os_n_file_reads_old) {
- avg_bytes_read = 0.0;
- } else {
- avg_bytes_read = (double) os_bytes_read_since_printout
- / (os_n_file_reads - os_n_file_reads_old);
- }
-
- fprintf(file,
- "%.2f reads/s, %lu avg bytes/read,"
- " %.2f writes/s, %.2f fsyncs/s\n",
- (os_n_file_reads - os_n_file_reads_old)
- / time_elapsed,
- (ulong) avg_bytes_read,
- (os_n_file_writes - os_n_file_writes_old)
- / time_elapsed,
- (os_n_fsyncs - os_n_fsyncs_old)
- / time_elapsed);
-
- os_n_file_reads_old = os_n_file_reads;
- os_n_file_writes_old = os_n_file_writes;
- os_n_fsyncs_old = os_n_fsyncs;
- os_bytes_read_since_printout = 0;
-
- os_last_printout = current_time;
-}
-
-/**********************************************************************//**
-Refreshes the statistics used to print per-second averages. */
-UNIV_INTERN
-void
-os_aio_refresh_stats(void)
-/*======================*/
-{
- os_n_file_reads_old = os_n_file_reads;
- os_n_file_writes_old = os_n_file_writes;
- os_n_fsyncs_old = os_n_fsyncs;
- os_bytes_read_since_printout = 0;
-
- os_last_printout = time(NULL);
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Checks that all slots in the system have been freed, that is, there are
-no pending io operations.
-@return TRUE if all free */
-UNIV_INTERN
-ibool
-os_aio_all_slots_free(void)
-/*=======================*/
-{
- os_aio_array_t* array;
- ulint n_res = 0;
-
- array = os_aio_read_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- if (!srv_read_only_mode) {
- ut_a(os_aio_write_array == 0);
-
- array = os_aio_write_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- ut_a(os_aio_ibuf_array == 0);
-
- array = os_aio_ibuf_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
- }
-
- ut_a(os_aio_log_array == 0);
-
- array = os_aio_log_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- array = os_aio_sync_array;
-
- os_mutex_enter(array->mutex);
-
- n_res += array->n_reserved;
-
- os_mutex_exit(array->mutex);
-
- if (n_res == 0) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef _WIN32
-#include <winioctl.h>
-#ifndef FSCTL_FILE_LEVEL_TRIM
-#define FSCTL_FILE_LEVEL_TRIM CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA)
-typedef struct _FILE_LEVEL_TRIM_RANGE {
- DWORDLONG Offset;
- DWORDLONG Length;
-} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE;
-
-typedef struct _FILE_LEVEL_TRIM {
- DWORD Key;
- DWORD NumRanges;
- FILE_LEVEL_TRIM_RANGE Ranges[1];
-} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM;
-#endif
-#endif
-
-#if defined(WIN_ASYNC_IO) || defined(LINUX_NATIVE_AIO)
-/**********************************************************************//**
-Directly manipulate the allocated disk space by deallocating for the file referred to
-by fd for the byte range starting at offset and continuing for len bytes.
-Within the specified range, partial file system blocks are zeroed, and whole
-file system blocks are removed from the file. After a successful call,
-subsequent reads from this range will return zeroes.
-@return true if success, false if error */
-static
-ibool
-os_file_trim(
-/*=========*/
- os_aio_slot_t* slot) /*!< in: slot structure */
-{
- size_t len = slot->len;
- size_t trim_len = slot->page_size - slot->len;
- os_offset_t off __attribute__((unused)) = slot->offset + len;
- size_t bsize = slot->file_block_size;
-
-#ifdef UNIV_TRIM_DEBUG
- fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu off %lu bz %lu\n",
- slot->write_size ? *slot->write_size : 0, trim_len, len, off, bsize);
-#endif
-
- // Nothing to do if trim length is zero or if actual write
- // size is initialized and it is smaller than current write size.
- // In first write if we trim we set write_size to actual bytes
- // written and rest of the page is trimmed. In following writes
- // there is no need to trim again if write_size only increases
- // because rest of the page is already trimmed. If actual write
- // size decreases we need to trim again.
- if (trim_len == 0 ||
- (slot->write_size &&
- *slot->write_size > 0 &&
- len >= *slot->write_size)) {
-
- if (slot->write_size) {
- if (*slot->write_size > 0 && len >= *slot->write_size) {
- srv_stats.page_compressed_trim_op_saved.inc();
- }
-
- *slot->write_size = len;
- }
-
- return (TRUE);
- }
-
-#ifdef __linux__
-#if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE)
- int ret = fallocate(slot->file,
- FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- off, trim_len);
-
- if (ret) {
- /* After first failure do not try to trim again */
- os_fallocate_failed = true;
- srv_use_trim = FALSE;
- ib_logf(IB_LOG_LEVEL_WARN,
- "fallocate() failed with error %d."
- " start: " UINT64PF " len: " ULINTPF " payload: " ULINTPF "."
- " Disabling fallocate for now.",
- errno, off, ulint(trim_len), ulint(len));
-
- os_file_handle_error_no_exit(slot->name,
- " fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
- FALSE, __FILE__, __LINE__);
-
- if (slot->write_size) {
- *slot->write_size = 0;
- }
-
- return (FALSE);
- } else {
- if (slot->write_size) {
- *slot->write_size = len;
- }
- }
-#else
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: fallocate not supported on this installation."
- " InnoDB: Disabling fallocate for now.");
- os_fallocate_failed = true;
- srv_use_trim = FALSE;
- if (slot->write_size) {
- *slot->write_size = 0;
- }
-
-#endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE ... */
-
-#elif defined(_WIN32)
- FILE_LEVEL_TRIM flt;
- flt.Key = 0;
- flt.NumRanges = 1;
- flt.Ranges[0].Offset = off;
- flt.Ranges[0].Length = trim_len;
-
- OVERLAPPED overlapped = { 0 };
- overlapped.hEvent = win_get_syncio_event();
- BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM,
- &flt, sizeof(flt), NULL, NULL, NULL, &overlapped);
- DWORD tmp;
- if (ret) {
- ret = GetOverlappedResult(slot->file, &overlapped, &tmp, FALSE);
- }
- else if (GetLastError() == ERROR_IO_PENDING) {
- ret = GetOverlappedResult(slot->file, &overlapped, &tmp, TRUE);
- }
- if (!ret) {
- DWORD last_error = GetLastError();
- /* After first failure do not try to trim again */
- os_fallocate_failed = true;
- srv_use_trim = FALSE;
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Warning: DeviceIoControl(FSCTL_FILE_LEVEL_TRIM) call failed with error %u%s. Disabling trimming.\n",
- last_error, last_error == ERROR_NOT_SUPPORTED ? "(ERROR_NOT_SUPPORTED)" : "");
-
- if (slot->write_size) {
- *slot->write_size = 0;
- }
- return (FALSE);
- } else {
- if (slot->write_size) {
- *slot->write_size = len;
- }
- }
-#endif
-
- switch(bsize) {
- case 512:
- srv_stats.page_compression_trim_sect512.add((trim_len / bsize));
- break;
- case 1024:
- srv_stats.page_compression_trim_sect1024.add((trim_len / bsize));
- break;
- case 2948:
- srv_stats.page_compression_trim_sect2048.add((trim_len / bsize));
- break;
- case 4096:
- srv_stats.page_compression_trim_sect4096.add((trim_len / bsize));
- break;
- case 8192:
- srv_stats.page_compression_trim_sect8192.add((trim_len / bsize));
- break;
- case 16384:
- srv_stats.page_compression_trim_sect16384.add((trim_len / bsize));
- break;
- case 32768:
- srv_stats.page_compression_trim_sect32768.add((trim_len / bsize));
- break;
- default:
- break;
- }
-
- srv_stats.page_compressed_trim_op.inc();
-
- return (TRUE);
-
-}
-#endif /* WIN_ASYNC_IO || LINUX_NATIVE_AIO */
-
-/***********************************************************************//**
-Try to get number of bytes per sector from file system.
-@return file block size */
-UNIV_INTERN
-ulint
-os_file_get_block_size(
-/*===================*/
- os_file_t file, /*!< in: handle to a file */
- const char* name) /*!< in: file name */
-{
- ulint fblock_size = 512;
-
-#if defined(UNIV_LINUX) && defined(HAVE_SYS_STATVFS_H)
- struct statvfs fstat;
- int err;
-
- err = fstatvfs(file, &fstat);
-
- if (err != 0) {
- fprintf(stderr, "InnoDB: Warning: fstatvfs() failed on file %s\n", name);
- os_file_handle_error_no_exit(name, "fstatvfs()", FALSE, __FILE__, __LINE__);
- } else {
- fblock_size = fstat.f_bsize;
- }
-#endif /* UNIV_LINUX */
-#ifdef __WIN__
- {
- DWORD SectorsPerCluster = 0;
- DWORD BytesPerSector = 0;
- DWORD NumberOfFreeClusters = 0;
- DWORD TotalNumberOfClusters = 0;
-
- /*
- if (GetFreeSpace((LPCTSTR)name, &SectorsPerCluster, &BytesPerSector, &NumberOfFreeClusters, &TotalNumberOfClusters)) {
- fblock_size = BytesPerSector;
- } else {
- fprintf(stderr, "InnoDB: Warning: GetFreeSpace() failed on file %s\n", name);
- os_file_handle_error_no_exit(name, "GetFreeSpace()", FALSE, __FILE__, __LINE__);
- }
- */
- }
-#endif /* __WIN__*/
-
- /* Currently we support file block size up to 4Kb */
- if (fblock_size > 4096 || fblock_size < 512) {
- if (fblock_size < 512) {
- fblock_size = 512;
- } else {
- fblock_size = 4096;
- }
- }
-
- return fblock_size;
-}
diff --git a/storage/xtradb/os/os0proc.cc b/storage/xtradb/os/os0proc.cc
deleted file mode 100644
index ff6d65e4ae6..00000000000
--- a/storage/xtradb/os/os0proc.cc
+++ /dev/null
@@ -1,232 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file os/os0proc.cc
-The interface to the operating system
-process control primitives
-
-Created 9/30/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0proc.h"
-#ifdef UNIV_NONINL
-#include "os0proc.ic"
-#endif
-
-#include "ut0mem.h"
-#include "ut0byte.h"
-
-/* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and
-MAP_ANON but MAP_ANON is marked as deprecated */
-#if defined(MAP_ANONYMOUS)
-#define OS_MAP_ANON MAP_ANONYMOUS
-#elif defined(MAP_ANON)
-#define OS_MAP_ANON MAP_ANON
-#endif
-
-UNIV_INTERN ibool os_use_large_pages;
-/* Large page size. This may be a boot-time option on some platforms */
-UNIV_INTERN ulint os_large_page_size;
-
-/****************************************************************//**
-Converts the current process id to a number. It is not guaranteed that the
-number is unique. In Linux returns the 'process number' of the current
-thread. That number is the same as one sees in 'top', for example. In Linux
-the thread id is not the same as one sees in 'top'.
-@return process id as a number */
-UNIV_INTERN
-ulint
-os_proc_get_number(void)
-/*====================*/
-{
-#ifdef __WIN__
- return((ulint)GetCurrentProcessId());
-#else
- return((ulint) getpid());
-#endif
-}
-
-/****************************************************************//**
-Allocates large pages memory.
-@return allocated memory */
-UNIV_INTERN
-void*
-os_mem_alloc_large(
-/*===============*/
- ulint* n) /*!< in/out: number of bytes */
-{
- void* ptr;
- ulint size;
-#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
- int shmid;
- struct shmid_ds buf;
-
- if (!os_use_large_pages || !os_large_page_size) {
- goto skip;
- }
-
- /* Align block size to os_large_page_size */
- ut_ad(ut_is_2pow(os_large_page_size));
- size = ut_2pow_round(*n + (os_large_page_size - 1),
- os_large_page_size);
-
- shmid = shmget(IPC_PRIVATE, (size_t) size, SHM_HUGETLB | SHM_R | SHM_W);
- if (shmid < 0) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
- " %lu bytes. errno %d\n", size, errno);
- ptr = NULL;
- } else {
- ptr = shmat(shmid, NULL, 0);
- if (ptr == (void*)-1) {
- fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
- " attach shared memory segment, errno %d\n",
- errno);
- ptr = NULL;
- }
-
- /* Remove the shared memory segment so that it will be
- automatically freed after memory is detached or
- process exits */
- shmctl(shmid, IPC_RMID, &buf);
- }
-
- if (ptr) {
- *n = size;
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_ALLOC(ptr, size);
- return(ptr);
- }
-
- fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
- " memory pool\n");
-skip:
-#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
-
-#ifdef __WIN__
- SYSTEM_INFO system_info;
- GetSystemInfo(&system_info);
-
- /* Align block size to system page size */
- ut_ad(ut_is_2pow(system_info.dwPageSize));
- /* system_info.dwPageSize is only 32-bit. Casting to ulint is required
- on 64-bit Windows. */
- size = *n = ut_2pow_round(*n + (system_info.dwPageSize - 1),
- (ulint) system_info.dwPageSize);
- ptr = VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE,
- PAGE_READWRITE);
- if (!ptr) {
- fprintf(stderr, "InnoDB: VirtualAlloc(%lu bytes) failed;"
- " Windows error %lu\n",
- (ulong) size, (ulong) GetLastError());
- } else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_ALLOC(ptr, size);
- }
-#elif !defined OS_MAP_ANON
- size = *n;
- ptr = ut_malloc_low(size, TRUE, FALSE);
-#else
-# ifdef HAVE_GETPAGESIZE
- size = getpagesize();
-# else
- size = UNIV_PAGE_SIZE;
-# endif
- /* Align block size to system page size */
- ut_ad(ut_is_2pow(size));
- size = *n = ut_2pow_round(*n + (size - 1), size);
- ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | OS_MAP_ANON, -1, 0);
- if (UNIV_UNLIKELY(ptr == (void*) -1)) {
- fprintf(stderr, "InnoDB: mmap(%lu bytes) failed;"
- " errno %lu\n",
- (ulong) size, (ulong) errno);
- ptr = NULL;
- } else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_total_allocated_memory += size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_ALLOC(ptr, size);
- }
-#endif
- return(ptr);
-}
-
-/****************************************************************//**
-Frees large pages memory. */
-UNIV_INTERN
-void
-os_mem_free_large(
-/*==============*/
- void *ptr, /*!< in: pointer returned by
- os_mem_alloc_large() */
- ulint size) /*!< in: size returned by
- os_mem_alloc_large() */
-{
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- os_fast_mutex_unlock(&ut_list_mutex);
-
-#if defined HAVE_LARGE_PAGES && defined UNIV_LINUX
- if (os_use_large_pages && os_large_page_size && !shmdt(ptr)) {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_FREE(ptr, size);
- return;
- }
-#endif /* HAVE_LARGE_PAGES && UNIV_LINUX */
-#ifdef __WIN__
- /* When RELEASE memory, the size parameter must be 0.
- Do not use MEM_RELEASE with MEM_DECOMMIT. */
- if (!VirtualFree(ptr, 0, MEM_RELEASE)) {
- fprintf(stderr, "InnoDB: VirtualFree(%p, %lu) failed;"
- " Windows error %lu\n",
- ptr, (ulong) size, (ulong) GetLastError());
- } else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_FREE(ptr, size);
- }
-#elif !defined OS_MAP_ANON
- ut_free(ptr);
-#else
-# if defined(UNIV_SOLARIS)
- if (munmap(static_cast<caddr_t>(ptr), size)) {
-# else
- if (munmap(ptr, size)) {
-# endif /* UNIV_SOLARIS */
- fprintf(stderr, "InnoDB: munmap(%p, %lu) failed;"
- " errno %lu\n",
- ptr, (ulong) size, (ulong) errno);
- } else {
- os_fast_mutex_lock(&ut_list_mutex);
- ut_a(ut_total_allocated_memory >= size);
- ut_total_allocated_memory -= size;
- os_fast_mutex_unlock(&ut_list_mutex);
- UNIV_MEM_FREE(ptr, size);
- }
-#endif
-}
diff --git a/storage/xtradb/os/os0stacktrace.cc b/storage/xtradb/os/os0stacktrace.cc
deleted file mode 100644
index c4c428e0db3..00000000000
--- a/storage/xtradb/os/os0stacktrace.cc
+++ /dev/null
@@ -1,131 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
-
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-#include "os0thread.h"
-
-#if defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS
-
-#if HAVE_EXECINFO_H
-#include <execinfo.h>
-#endif
-
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-#ifndef __USE_GNU
-#define __USE_GNU
-#endif
-#ifndef _XOPEN_SOURCE
-#define _XOPEN_SOURCE
-#endif
-
-/* Since kernel version 2.2 the undocumented parameter to the signal handler has been declared
-obsolete in adherence with POSIX.1b. A more correct way to retrieve additional information is
-to use the SA_SIGINFO option when setting the handler */
-#undef USE_SIGCONTEXT
-
-#ifndef USE_SIGCONTEXT
-/* get REG_EIP / REG_RIP from ucontext.h */
-#include <ucontext.h>
-
- #ifndef EIP
- #define EIP 14
- #endif
-
- #if (defined (__x86_64__))
- #ifndef REG_RIP
- #define REG_RIP REG_INDEX(rip) /* seems to be 16 */
- #endif
- #endif
-
-#endif
-
-#define OS_STACKTRACE_MAX_DEPTH 128
-
-/***************************************************************//**
-Prints stacktrace for this thread.
-*/
-void
-os_stacktrace_print(
-/*================*/
- int sig_num,
- siginfo_t* info,
- void* ucontext)
-{
- void* array[OS_STACKTRACE_MAX_DEPTH];
- char** messages;
- int size, i;
- void* caller_address = NULL;
-
- /* Get the address at the time the signal was raised */
-#if defined(__x86_64__)
- ucontext_t* uc = (ucontext_t*) ucontext;
- caller_address = (void*) uc->uc_mcontext.gregs[REG_RIP] ;
-#elif defined(__hppa__)
- ucontext_t* uc = (ucontext_t*) ucontext;
- caller_address = (void*) (uc->uc_mcontext.sc_iaoq[0] & ~0x3UL) ;
-#elif (defined (__ppc__)) || (defined (__powerpc__))
- ucontext_t* uc = (ucontext_t*) ucontext;
- caller_address = (void*) uc->uc_mcontext.regs->nip ;
-#elif defined(__sparc__)
- struct sigcontext* sc = (struct sigcontext*) ucontext;
-#if __WORDSIZE == 64
- caller_address = (void*) sc->sigc_regs.tpc ;
-#else
- caller_address = (void*) sc->si_regs.pc ;
-#endif
-#elif defined(__i386__)
- ucontext_t* uc = (ucontext_t*) ucontext;
- caller_address = (void*) uc->uc_mcontext.gregs[REG_EIP] ;
-#else
- /* Unsupported return */
- return;
-#endif
-
- fprintf(stderr, "InnoDB: signal %d (%s), address is %p from %p\n",
- sig_num, strsignal(sig_num), info->si_addr,
- (void *)caller_address);
-
- size = backtrace(array, OS_STACKTRACE_MAX_DEPTH);
-
- /* overwrite sigaction with caller's address */
- array[1] = caller_address;
-
- messages = backtrace_symbols(array, size);
-
- fprintf(stderr,
- "InnoDB: Stacktrace for Thread %lu \n",
- (ulong) os_thread_pf(os_thread_get_curr_id()));
-
- /* skip first stack frame (points here) */
- for (i = 1; i < size && messages != NULL; ++i)
- {
- fprintf(stderr, "InnoDB: [bt]: (%d) %s\n", i, messages[i]);
- }
-
- free(messages);
-}
-
-#endif /* __linux__ */
diff --git a/storage/xtradb/os/os0sync.cc b/storage/xtradb/os/os0sync.cc
deleted file mode 100644
index df878c88105..00000000000
--- a/storage/xtradb/os/os0sync.cc
+++ /dev/null
@@ -1,635 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file os/os0sync.cc
-The interface to the operating system
-synchronization primitives.
-
-Created 9/6/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0sync.h"
-#ifdef UNIV_NONINL
-#include "os0sync.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#endif
-
-#include "ut0mem.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-
-/* Type definition for an operating system mutex struct */
-struct os_mutex_t{
- os_event_t event; /*!< Used by sync0arr.cc for queing threads */
- void* handle; /*!< OS handle to mutex */
- ulint count; /*!< we use this counter to check
- that the same thread does not
- recursively lock the mutex: we
- do not assume that the OS mutex
- supports recursive locking, though
- NT seems to do that */
-};
-
-// All the os_*_count variables are accessed atomically
-
-/** This is incremented by 1 in os_thread_create and decremented by 1 in
-os_thread_exit. */
-UNIV_INTERN ulint os_thread_count = 0;
-
-UNIV_INTERN ulint os_event_count = 0;
-UNIV_INTERN ulint os_mutex_count = 0;
-UNIV_INTERN ulint os_fast_mutex_count = 0;
-
-/* The number of microsecnds in a second. */
-static const ulint MICROSECS_IN_A_SECOND = 1000000;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t event_os_mutex_key;
-UNIV_INTERN mysql_pfs_key_t os_mutex_key;
-#endif
-
-/*********************************************************//**
-Initialitze condition variable */
-UNIV_INLINE
-void
-os_cond_init(
-/*=========*/
- os_cond_t* cond) /*!< in: condition variable. */
-{
- ut_a(cond);
-
-#ifdef __WIN__
- InitializeConditionVariable(cond);
-#else
- ut_a(pthread_cond_init(cond, NULL) == 0);
-#endif
-}
-
-/*********************************************************//**
-Do a timed wait on condition variable.
-@return TRUE if timed out, FALSE otherwise */
-UNIV_INLINE
-ibool
-os_cond_wait_timed(
-/*===============*/
- os_cond_t* cond, /*!< in: condition variable. */
- os_fast_mutex_t* fast_mutex, /*!< in: fast mutex */
-#ifndef __WIN__
- const struct timespec* abstime /*!< in: timeout */
-#else
- DWORD time_in_ms /*!< in: timeout in
- milliseconds*/
-#endif /* !__WIN__ */
-)
-{
- fast_mutex_t* mutex = &fast_mutex->mutex;
-#ifdef __WIN__
- BOOL ret;
- DWORD err;
-
-
- ret = SleepConditionVariableCS(cond, mutex, time_in_ms);
-
- if (!ret) {
- err = GetLastError();
- /* From http://msdn.microsoft.com/en-us/library/ms686301%28VS.85%29.aspx,
- "Condition variables are subject to spurious wakeups
- (those not associated with an explicit wake) and stolen wakeups
- (another thread manages to run before the woken thread)."
- Check for both types of timeouts.
- Conditions are checked by the caller.*/
- if ((err == WAIT_TIMEOUT) || (err == ERROR_TIMEOUT)) {
- return(TRUE);
- }
- }
-
- ut_a(ret);
-
- return(FALSE);
-#else
- int ret;
-
- ret = pthread_cond_timedwait(cond, mutex, abstime);
-
- switch (ret) {
- case 0:
- case ETIMEDOUT:
- /* We play it safe by checking for EINTR even though
- according to the POSIX documentation it can't return EINTR. */
- case EINTR:
- break;
-
- default:
- fprintf(stderr, " InnoDB: pthread_cond_timedwait() returned: "
- "%d: abstime={%lu,%lu}\n",
- ret, (ulong) abstime->tv_sec, (ulong) abstime->tv_nsec);
- ut_error;
- }
-
- return(ret == ETIMEDOUT);
-#endif
-}
-/*********************************************************//**
-Wait on condition variable */
-UNIV_INLINE
-void
-os_cond_wait(
-/*=========*/
- os_cond_t* cond, /*!< in: condition variable. */
- os_fast_mutex_t* fast_mutex)/*!< in: fast mutex */
-{
- fast_mutex_t* mutex = &fast_mutex->mutex;
- ut_a(cond);
- ut_a(mutex);
-
-#ifdef __WIN__
- ut_a(SleepConditionVariableCS(cond, mutex, INFINITE));
-#else
- ut_a(pthread_cond_wait(cond, mutex) == 0);
-#endif
-}
-
-/*********************************************************//**
-Wakes all threads waiting for condition variable */
-UNIV_INLINE
-void
-os_cond_broadcast(
-/*==============*/
- os_cond_t* cond) /*!< in: condition variable. */
-{
- ut_a(cond);
-
-#ifdef __WIN__
- WakeAllConditionVariable(cond);
-#else
- ut_a(pthread_cond_broadcast(cond) == 0);
-#endif
-}
-
-/*********************************************************//**
-Destroys condition variable */
-UNIV_INLINE
-void
-os_cond_destroy(
-/*============*/
- os_cond_t* cond) /*!< in: condition variable. */
-{
-#ifdef __WIN__
- /* Do nothing */
-#else
- ut_a(pthread_cond_destroy(cond) == 0);
-#endif
-}
-
-/*********************************************************//**
-Initializes global event and OS 'slow' mutex lists. */
-UNIV_INTERN
-void
-os_sync_init(void)
-/*==============*/
-{
-}
-
-/** Create an event semaphore, i.e., a semaphore which may just have two
-states: signaled and nonsignaled. The created event is manual reset: it must be
-reset explicitly by calling sync_os_reset_event.
-@param[in,out] event memory block where to create the event */
-UNIV_INTERN
-void
-os_event_create(os_event_t event)
-{
-#ifndef PFS_SKIP_EVENT_MUTEX
- os_fast_mutex_init(event_os_mutex_key, &event->os_mutex);
-#else
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &event->os_mutex);
-#endif
-
- os_cond_init(&(event->cond_var));
-
- event->init_count_and_set();
-
- os_atomic_increment_ulint(&os_event_count, 1);
-}
-
-/*********************************************************//**
-Creates an event semaphore, i.e., a semaphore which may just have two
-states: signaled and nonsignaled. The created event is manual reset: it
-must be reset explicitly by calling sync_os_reset_event.
-@return the event handle */
-UNIV_INTERN
-os_event_t
-os_event_create(void)
-/*==================*/
-{
- os_event_t event = static_cast<os_event_t>(ut_malloc(sizeof(*event)));
-
- os_event_create(event);
-
- return(event);
-}
-
-/**********************************************************//**
-Sets an event semaphore to the signaled state: lets waiting threads
-proceed. */
-UNIV_INTERN
-void
-os_event_set(
-/*=========*/
- os_event_t event) /*!< in: event to set */
-{
- ut_a(event);
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (UNIV_UNLIKELY(event->is_set())) {
- /* Do nothing */
- } else {
- event->set();
- event->inc_signal_count();
- os_cond_broadcast(&(event->cond_var));
- }
-
- os_fast_mutex_unlock(&(event->os_mutex));
-}
-
-/**********************************************************//**
-Resets an event semaphore to the nonsignaled state. Waiting threads will
-stop to wait for the event.
-The return value should be passed to os_even_wait_low() if it is desired
-that this thread should not wait in case of an intervening call to
-os_event_set() between this os_event_reset() and the
-os_event_wait_low() call. See comments for os_event_wait_low().
-@return current signal_count. */
-UNIV_INTERN
-ib_int64_t
-os_event_reset(
-/*===========*/
- os_event_t event) /*!< in: event to reset */
-{
- ib_int64_t ret = 0;
-
- ut_a(event);
-
- os_fast_mutex_lock(&(event->os_mutex));
-
- if (UNIV_UNLIKELY(!event->is_set())) {
- /* Do nothing */
- } else {
- event->reset();
- }
- ret = event->signal_count();
-
- os_fast_mutex_unlock(&(event->os_mutex));
- return(ret);
-}
-
-/**********************************************************//**
-Frees an event object. */
-UNIV_INTERN
-void
-os_event_free(
-/*==========*/
- os_event_t event, /*!< in: event to free */
- bool free_memory)/*!< in: if true, deallocate the event
- memory block too */
-
-{
- ut_a(event);
-
- os_fast_mutex_free(&(event->os_mutex));
-
- os_cond_destroy(&(event->cond_var));
-
- os_atomic_decrement_ulint(&os_event_count, 1);
-
- if (free_memory)
- ut_free(event);
-}
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state.
-
-Typically, if the event has been signalled after the os_event_reset()
-we'll return immediately because event->is_set == TRUE.
-There are, however, situations (e.g.: sync_array code) where we may
-lose this information. For example:
-
-thread A calls os_event_reset()
-thread B calls os_event_set() [event->is_set == TRUE]
-thread C calls os_event_reset() [event->is_set == FALSE]
-thread A calls os_event_wait() [infinite wait!]
-thread C calls os_event_wait() [infinite wait!]
-
-Where such a scenario is possible, to avoid infinite wait, the
-value returned by os_event_reset() should be passed in as
-reset_sig_count. */
-UNIV_INTERN
-void
-os_event_wait_low(
-/*==============*/
- os_event_t event, /*!< in: event to wait */
- ib_int64_t reset_sig_count)/*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-{
-
- os_fast_mutex_lock(&event->os_mutex);
-
- if (!reset_sig_count) {
- reset_sig_count = event->signal_count();
- }
-
- while (!event->is_set() && event->signal_count() == reset_sig_count) {
- os_cond_wait(&(event->cond_var), &(event->os_mutex));
-
- /* Solaris manual said that spurious wakeups may occur: we
- have to check if the event really has been signaled after
- we came here to wait */
- }
-
- os_fast_mutex_unlock(&event->os_mutex);
-}
-
-/**********************************************************//**
-Waits for an event object until it is in the signaled state or
-a timeout is exceeded.
-@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
-UNIV_INTERN
-ulint
-os_event_wait_time_low(
-/*===================*/
- os_event_t event, /*!< in: event to wait */
- ulint time_in_usec, /*!< in: timeout in
- microseconds, or
- OS_SYNC_INFINITE_TIME */
- ib_int64_t reset_sig_count) /*!< in: zero or the value
- returned by previous call of
- os_event_reset(). */
-{
- ibool timed_out = FALSE;
-
-#ifdef __WIN__
- DWORD time_in_ms;
- if (time_in_usec != OS_SYNC_INFINITE_TIME) {
- time_in_ms = static_cast<DWORD>(time_in_usec / 1000);
- } else {
- time_in_ms = INFINITE;
- }
-#else
- struct timespec abstime;
-
- if (time_in_usec != OS_SYNC_INFINITE_TIME) {
- struct timeval tv;
- int ret;
- ulint sec;
- ulint usec;
-
- ret = ut_usectime(&sec, &usec);
- ut_a(ret == 0);
-
- tv.tv_sec = sec;
- tv.tv_usec = usec;
-
- tv.tv_usec += time_in_usec;
-
- if ((ulint) tv.tv_usec >= MICROSECS_IN_A_SECOND) {
- tv.tv_sec += tv.tv_usec / MICROSECS_IN_A_SECOND;
- tv.tv_usec %= MICROSECS_IN_A_SECOND;
- }
-
- abstime.tv_sec = tv.tv_sec;
- abstime.tv_nsec = tv.tv_usec * 1000;
- } else {
- abstime.tv_nsec = 999999999;
- abstime.tv_sec = (time_t) ULINT_MAX;
- }
-
- ut_a(abstime.tv_nsec <= 999999999);
-
-#endif /* __WIN__ */
-
- os_fast_mutex_lock(&event->os_mutex);
-
- if (!reset_sig_count) {
- reset_sig_count = event->signal_count();
- }
-
- do {
- if (event->is_set()
- || event->signal_count() != reset_sig_count) {
-
- break;
- }
-
- timed_out = os_cond_wait_timed(
- &event->cond_var, &event->os_mutex,
-#ifndef __WIN__
- &abstime
-#else
- time_in_ms
-#endif /* !__WIN__ */
- );
-
- } while (!timed_out);
-
- os_fast_mutex_unlock(&event->os_mutex);
-
- return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0);
-}
-
-/*********************************************************//**
-Creates an operating system mutex semaphore. Because these are slow, the
-mutex semaphore of InnoDB itself (ib_mutex_t) should be used where possible.
-@return the mutex handle */
-UNIV_INTERN
-os_ib_mutex_t
-os_mutex_create(void)
-/*=================*/
-{
- os_fast_mutex_t* mutex;
- os_ib_mutex_t mutex_str;
-
- mutex = static_cast<os_fast_mutex_t*>(
- ut_malloc(sizeof(os_fast_mutex_t)));
-
- os_fast_mutex_init(os_mutex_key, mutex);
-
- mutex_str = static_cast<os_ib_mutex_t>(ut_malloc(sizeof *mutex_str));
-
- mutex_str->handle = mutex;
- mutex_str->count = 0;
- mutex_str->event = os_event_create();
-
- os_atomic_increment_ulint(&os_mutex_count, 1);
-
- return(mutex_str);
-}
-
-/**********************************************************//**
-Acquires ownership of a mutex semaphore. */
-UNIV_INTERN
-void
-os_mutex_enter(
-/*===========*/
- os_ib_mutex_t mutex) /*!< in: mutex to acquire */
-{
- os_fast_mutex_lock(static_cast<os_fast_mutex_t*>(mutex->handle));
-
- (mutex->count)++;
-
- ut_a(mutex->count == 1);
-}
-
-/**********************************************************//**
-Releases ownership of a mutex. */
-UNIV_INTERN
-void
-os_mutex_exit(
-/*==========*/
- os_ib_mutex_t mutex) /*!< in: mutex to release */
-{
- ut_a(mutex);
-
- ut_a(mutex->count == 1);
-
- (mutex->count)--;
- os_fast_mutex_unlock(static_cast<os_fast_mutex_t*>(mutex->handle));
-}
-
-/**********************************************************//**
-Frees a mutex object. */
-UNIV_INTERN
-void
-os_mutex_free(
-/*==========*/
- os_ib_mutex_t mutex) /*!< in: mutex to free */
-{
- ut_a(mutex);
-
- os_event_free(mutex->event);
-
- os_atomic_decrement_ulint(&os_mutex_count, 1);
-
- os_fast_mutex_free(static_cast<os_fast_mutex_t*>(mutex->handle));
- ut_free(mutex->handle);
- ut_free(mutex);
-}
-
-/*********************************************************//**
-Initializes an operating system fast mutex semaphore. */
-UNIV_INTERN
-void
-os_fast_mutex_init_func(
-/*====================*/
- fast_mutex_t* fast_mutex) /*!< in: fast mutex */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST));
-#endif
- os_atomic_increment_ulint(&os_fast_mutex_count, 1);
-}
-
-/**********************************************************//**
-Acquires ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_lock_func(
-/*====================*/
- fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
-{
-#ifdef __WIN__
- EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- pthread_mutex_lock(fast_mutex);
-#endif
-}
-
-/**********************************************************//**
-Releases ownership of a fast mutex. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_func(
-/*======================*/
- fast_mutex_t* fast_mutex) /*!< in: mutex to release */
-{
-#ifdef __WIN__
- LeaveCriticalSection(fast_mutex);
-#else
- pthread_mutex_unlock(fast_mutex);
-#endif
-}
-
-/**********************************************************//**
-Releases ownership of a fast mutex. Implies a full memory barrier even on
-platforms such as PowerPC where this is not normally required. */
-UNIV_INTERN
-void
-os_fast_mutex_unlock_full_barrier(
-/*=================*/
- os_fast_mutex_t* fast_mutex) /*!< in: mutex to release */
-{
-#ifdef __WIN__
- LeaveCriticalSection(&fast_mutex->mutex);
-#else
- pthread_mutex_unlock(&fast_mutex->mutex);
-#ifdef __powerpc__
- os_mb;
-#endif
-#endif
-}
-
-/**********************************************************//**
-Frees a mutex object. */
-UNIV_INTERN
-void
-os_fast_mutex_free_func(
-/*====================*/
- fast_mutex_t* fast_mutex) /*!< in: mutex to free */
-{
-#ifdef __WIN__
- ut_a(fast_mutex);
-
- DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex);
-#else
- int ret;
-
- ret = pthread_mutex_destroy(fast_mutex);
-
- if (UNIV_UNLIKELY(ret != 0)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: error: return value %lu when calling\n"
- "InnoDB: pthread_mutex_destroy().\n", (ulint) ret);
- fprintf(stderr,
- "InnoDB: Byte contents of the pthread mutex at %p:\n",
- (void*) fast_mutex);
- ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t));
- putc('\n', stderr);
- }
-#endif
-
- os_atomic_decrement_ulint(&os_fast_mutex_count, 1);
-}
diff --git a/storage/xtradb/os/os0thread.cc b/storage/xtradb/os/os0thread.cc
deleted file mode 100644
index 8baf06b9bb7..00000000000
--- a/storage/xtradb/os/os0thread.cc
+++ /dev/null
@@ -1,355 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file os/os0thread.cc
-The interface to the operating system thread control primitives
-
-Created 9/8/1995 Heikki Tuuri
-*******************************************************/
-
-#include "os0thread.h"
-#ifdef UNIV_NONINL
-#include "os0thread.ic"
-#endif
-
-#ifdef __WIN__
-#include <windows.h>
-#elif UNIV_LINUX
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <sys/types.h>
-#endif
-
-#ifndef UNIV_HOTBACKUP
-#include "srv0srv.h"
-#include "os0sync.h"
-
-/***************************************************************//**
-Compares two thread ids for equality.
-@return TRUE if equal */
-UNIV_INTERN
-ibool
-os_thread_eq(
-/*=========*/
- os_thread_id_t a, /*!< in: OS thread or thread id */
- os_thread_id_t b) /*!< in: OS thread or thread id */
-{
-#ifdef __WIN__
- if (a == b) {
- return(TRUE);
- }
-
- return(FALSE);
-#else
- if (pthread_equal(a, b)) {
- return(TRUE);
- }
-
- return(FALSE);
-#endif
-}
-
-/****************************************************************//**
-Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is
-unique for the thread though!
-@return thread identifier as a number */
-UNIV_INTERN
-ulint
-os_thread_pf(
-/*=========*/
- os_thread_id_t a) /*!< in: OS thread identifier */
-{
-#ifdef UNIV_HPUX10
- /* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2,
- field3. We do not know if field1 determines the thread uniquely. */
-
- return((ulint)(a.field1));
-#else
- return((ulint) a);
-#endif
-}
-
-/*****************************************************************//**
-Returns the thread identifier of current thread. Currently the thread
-identifier in Unix is the thread handle itself. Note that in HP-UX
-pthread_t is a struct of 3 fields.
-@return current thread identifier */
-UNIV_INTERN
-os_thread_id_t
-os_thread_get_curr_id(void)
-/*=======================*/
-{
-#ifdef __WIN__
- return(GetCurrentThreadId());
-#else
- return(pthread_self());
-#endif
-}
-
-/*****************************************************************//**
-Returns the system-specific thread identifier of current thread. On Linux,
-returns tid. On other systems currently returns os_thread_get_curr_id().
-
-@return current thread identifier */
-UNIV_INTERN
-os_tid_t
-os_thread_get_tid(void)
-/*===================*/
-{
-#ifdef UNIV_LINUX
- return((os_tid_t)syscall(SYS_gettid));
-#else
- return(os_thread_get_curr_id());
-#endif
-}
-
-
-/****************************************************************//**
-Creates a new thread of execution. The execution starts from
-the function given. The start function takes a void* parameter
-and returns an ulint.
-@return handle to the thread */
-UNIV_INTERN
-os_thread_t
-os_thread_create_func(
-/*==================*/
- os_thread_func_t func, /*!< in: pointer to function
- from which to start */
- void* arg, /*!< in: argument to start
- function */
- os_thread_id_t* thread_id) /*!< out: id of the created
- thread, or NULL */
-{
- /* the new thread should look recent changes up here so far. */
- os_wmb;
-
-#ifdef __WIN__
- os_thread_t thread;
- DWORD win_thread_id;
-
- os_atomic_increment_ulint(&os_thread_count, 1);
-
- thread = CreateThread(NULL, /* no security attributes */
- 0, /* default size stack */
- func,
- arg,
- 0, /* thread runs immediately */
- &win_thread_id);
-
- if (thread_id) {
- *thread_id = win_thread_id;
- }
-
- return((os_thread_t)thread);
-#else
- int ret;
- os_thread_t pthread;
- pthread_attr_t attr;
-
-#ifndef UNIV_HPUX10
- pthread_attr_init(&attr);
-#endif
-
-#ifdef UNIV_AIX
- /* We must make sure a thread stack is at least 32 kB, otherwise
- InnoDB might crash; we do not know if the default stack size on
- AIX is always big enough. An empirical test on AIX-4.3 suggested
- the size was 96 kB, though. */
-
- ret = pthread_attr_setstacksize(&attr,
- (size_t)(PTHREAD_STACK_MIN
- + 32 * 1024));
- if (ret) {
- fprintf(stderr,
- "InnoDB: Error: pthread_attr_setstacksize"
- " returned %d\n", ret);
- exit(1);
- }
-#endif
- ulint new_count = os_atomic_increment_ulint(&os_thread_count, 1);
- ut_a(new_count <= OS_THREAD_MAX_N);
-
-#ifdef UNIV_HPUX10
- ret = pthread_create(&pthread, pthread_attr_default, func, arg);
-#else
- ret = pthread_create(&pthread, &attr, func, arg);
-#endif
- ut_a(ret == 0);
-
-#ifndef UNIV_HPUX10
- pthread_attr_destroy(&attr);
-#endif
-
- if (thread_id) {
- *thread_id = pthread;
- }
-
- return(pthread);
-#endif
-}
-
-/** Waits until the specified thread completes and joins it.
-Its return value is ignored.
-@param[in,out] thread thread to join */
-UNIV_INTERN
-void
-os_thread_join(
- os_thread_t thread)
-{
- /* This function is currently only used to workaround glibc bug
- described in http://bugs.mysql.com/bug.php?id=82886
-
- On Windows, no workarounds are necessary, all threads
- are "detached" upon thread exit (handle is closed), so we do
- nothing.
- */
-#ifdef __WIN__
- /* Do nothing. */
-#else
-#ifdef UNIV_DEBUG
- const int ret MY_ATTRIBUTE((unused)) =
-#endif /* UNIV_DEBUG */
- pthread_join(thread, NULL);
-
- /* Waiting on already-quit threads is allowed. */
- ut_ad(ret == 0 || ret == ESRCH);
-#endif /* __WIN__ */
-}
-
-/*****************************************************************//**
-Exits the current thread. */
-UNIV_INTERN
-void
-os_thread_exit(
-/*===========*/
- void* exit_value, /*!< in: exit value; in Windows this void*
- is cast as a DWORD */
- bool detach) /*!< in: if true, the thread will be detached
- right before exiting. If false, another thread
- is responsible for joining this thread. */
-{
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Thread exits, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
-#ifdef UNIV_PFS_THREAD
- pfs_delete_thread();
-#endif
-
- os_atomic_decrement_ulint(&os_thread_count, 1);
-
-#ifdef __WIN__
- ExitThread((DWORD) exit_value);
-#else
- if (detach) {
- pthread_detach(pthread_self());
- }
- pthread_exit(exit_value);
-#endif
-}
-
-/*****************************************************************//**
-Advises the os to give up remainder of the thread's time slice. */
-UNIV_INTERN
-void
-os_thread_yield(void)
-/*=================*/
-{
-#if defined(__WIN__)
- SwitchToThread();
-#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H))
- sched_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG)
- pthread_yield();
-#elif defined(HAVE_PTHREAD_YIELD_ONE_ARG)
- pthread_yield(0);
-#else
- os_thread_sleep(0);
-#endif
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*****************************************************************//**
-The thread sleeps at least the time given in microseconds. */
-UNIV_INTERN
-void
-os_thread_sleep(
-/*============*/
- ulint tm) /*!< in: time in microseconds */
-{
-#ifdef __WIN__
- Sleep((DWORD) tm / 1000);
-#else
- struct timeval t;
-
- t.tv_sec = tm / 1000000;
- t.tv_usec = tm % 1000000;
-
- select(0, NULL, NULL, NULL, &t);
-#endif
-}
-
-/*****************************************************************//**
-Set relative scheduling priority for a given thread on Linux. Currently a
-no-op on other systems.
-
-@return An actual thread priority after the update */
-UNIV_INTERN
-ulint
-os_thread_set_priority(
-/*===================*/
- os_tid_t thread_id, /*!< in: thread id */
- ulint relative_priority) /*!< in: system-specific
- priority value */
-{
-#ifdef UNIV_LINUX
- lint thread_nice = 19 - relative_priority;
- if (setpriority(PRIO_PROCESS, thread_id, thread_nice) == -1) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Setting thread %lu nice to %ld failed, "
- "current nice %d, errno %d",
- os_thread_pf(thread_id), thread_nice,
- getpriority(PRIO_PROCESS, thread_id), errno);
- }
- return(19 - getpriority(PRIO_PROCESS, thread_id));
-#else
- return(relative_priority);
-#endif
-}
-
-/*****************************************************************//**
-Get priority for a given thread on Linux. Currently a
-no-op on other systems.
-
-@return An actual thread priority */
-UNIV_INTERN
-ulint
-os_thread_get_priority(
-/*===================*/
- os_tid_t thread_id) /*!< in: thread id */
-{
-#ifdef UNIV_LINUX
- return (getpriority(PRIO_PROCESS, thread_id));
-#else
- return (0);
-#endif
-}
diff --git a/storage/xtradb/page/page0cur.cc b/storage/xtradb/page/page0cur.cc
deleted file mode 100644
index 76e4c2aed9b..00000000000
--- a/storage/xtradb/page/page0cur.cc
+++ /dev/null
@@ -1,2180 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file page/page0cur.cc
-The page cursor
-
-Created 10/4/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "page0cur.h"
-#ifdef UNIV_NONINL
-#include "page0cur.ic"
-#endif
-
-#include "page0zip.h"
-#include "btr0btr.h"
-#include "mtr0log.h"
-#include "log0recv.h"
-#include "ut0ut.h"
-#ifndef UNIV_HOTBACKUP
-#include "rem0cmp.h"
-
-#ifdef PAGE_CUR_ADAPT
-# ifdef UNIV_SEARCH_PERF_STAT
-static ulint page_cur_short_succ = 0;
-# endif /* UNIV_SEARCH_PERF_STAT */
-
-/*******************************************************************//**
-This is a linear congruential generator PRNG. Returns a pseudo random
-number between 0 and 2^64-1 inclusive. The formula and the constants
-being used are:
-X[n+1] = (a * X[n] + c) mod m
-where:
-X[0] = ut_time_us(NULL)
-a = 1103515245 (3^5 * 5 * 7 * 129749)
-c = 12345 (3 * 5 * 823)
-m = 18446744073709551616 (2^64)
-
-@return number between 0 and 2^64-1 */
-static
-ib_uint64_t
-page_cur_lcg_prng(void)
-/*===================*/
-{
-#define LCG_a 1103515245
-#define LCG_c 12345
- static ib_uint64_t lcg_current = 0;
- static ibool initialized = FALSE;
-
- if (!initialized) {
- lcg_current = (ib_uint64_t) ut_time_us(NULL);
- initialized = TRUE;
- }
-
- /* no need to "% 2^64" explicitly because lcg_current is
- 64 bit and this will be done anyway */
- lcg_current = LCG_a * lcg_current + LCG_c;
-
- return(lcg_current);
-}
-
-/****************************************************************//**
-Tries a search shortcut based on the last insert.
-@return TRUE on success */
-UNIV_INLINE
-ibool
-page_cur_try_search_shortcut(
-/*=========================*/
- const buf_block_t* block, /*!< in: index page */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint* iup_matched_fields,
- /*!< in/out: already matched
- fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- ulint* ilow_matched_fields,
- /*!< in/out: already matched
- fields in lower limit record */
- ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor) /*!< out: page cursor */
-{
- const rec_t* rec;
- const rec_t* next_rec;
- ulint low_match;
- ulint low_bytes;
- ulint up_match;
- ulint up_bytes;
-#ifdef UNIV_SEARCH_DEBUG
- page_cur_t cursor2;
-#endif
- ibool success = FALSE;
- const page_t* page = buf_block_get_frame(block);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(dtuple_check_typed(tuple));
-
- rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
- offsets = rec_get_offsets(rec, index, offsets,
- dtuple_get_n_fields(tuple), &heap);
-
- ut_ad(rec);
- ut_ad(page_rec_is_user_rec(rec));
-
- ut_pair_min(&low_match, &low_bytes,
- *ilow_matched_fields, *ilow_matched_bytes,
- *iup_matched_fields, *iup_matched_bytes);
-
- up_match = low_match;
- up_bytes = low_bytes;
-
- if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
- &low_match, &low_bytes) < 0) {
- goto exit_func;
- }
-
- next_rec = page_rec_get_next_const(rec);
- offsets = rec_get_offsets(next_rec, index, offsets,
- dtuple_get_n_fields(tuple), &heap);
-
- if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
- &up_match, &up_bytes) >= 0) {
- goto exit_func;
- }
-
- page_cur_position(rec, block, cursor);
-
-#ifdef UNIV_SEARCH_DEBUG
- page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG,
- iup_matched_fields,
- iup_matched_bytes,
- ilow_matched_fields,
- ilow_matched_bytes,
- &cursor2);
- ut_a(cursor2.rec == cursor->rec);
-
- if (!page_rec_is_supremum(next_rec)) {
-
- ut_a(*iup_matched_fields == up_match);
- ut_a(*iup_matched_bytes == up_bytes);
- }
-
- ut_a(*ilow_matched_fields == low_match);
- ut_a(*ilow_matched_bytes == low_bytes);
-#endif
- if (!page_rec_is_supremum(next_rec)) {
-
- *iup_matched_fields = up_match;
- *iup_matched_bytes = up_bytes;
- }
-
- *ilow_matched_fields = low_match;
- *ilow_matched_bytes = low_bytes;
-
-#ifdef UNIV_SEARCH_PERF_STAT
- page_cur_short_succ++;
-#endif
- success = TRUE;
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(success);
-}
-
-#endif
-
-#ifdef PAGE_CUR_LE_OR_EXTENDS
-/****************************************************************//**
-Checks if the nth field in a record is a character type field which extends
-the nth field in tuple, i.e., the field is longer or equal in length and has
-common first characters.
-@return TRUE if rec field extends tuple field */
-static
-ibool
-page_cur_rec_field_extends(
-/*=======================*/
- const dtuple_t* tuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: record */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: compare nth field */
-{
- const dtype_t* type;
- const dfield_t* dfield;
- const byte* rec_f;
- ulint rec_f_len;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- dfield = dtuple_get_nth_field(tuple, n);
-
- type = dfield_get_type(dfield);
-
- rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len);
-
- if (type->mtype == DATA_VARCHAR
- || type->mtype == DATA_CHAR
- || type->mtype == DATA_FIXBINARY
- || type->mtype == DATA_BINARY
- || type->mtype == DATA_BLOB
- || type->mtype == DATA_VARMYSQL
- || type->mtype == DATA_MYSQL) {
-
- if (dfield_get_len(dfield) != UNIV_SQL_NULL
- && rec_f_len != UNIV_SQL_NULL
- && rec_f_len >= dfield_get_len(dfield)
- && !cmp_data_data_slow(type->mtype, type->prtype,
- dfield_get_data(dfield),
- dfield_get_len(dfield),
- rec_f, dfield_get_len(dfield))) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-
-/****************************************************************//**
-Searches the right position for a page cursor. */
-UNIV_INTERN
-void
-page_cur_search_with_match(
-/*=======================*/
- const buf_block_t* block, /*!< in: buffer block */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* tuple, /*!< in: data tuple */
- ulint mode, /*!< in: PAGE_CUR_L,
- PAGE_CUR_LE, PAGE_CUR_G, or
- PAGE_CUR_GE */
- ulint* iup_matched_fields,
- /*!< in/out: already matched
- fields in upper limit record */
- ulint* iup_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- ulint* ilow_matched_fields,
- /*!< in/out: already matched
- fields in lower limit record */
- ulint* ilow_matched_bytes,
- /*!< in/out: already matched
- bytes in a field not yet
- completely matched */
- page_cur_t* cursor) /*!< out: page cursor */
-{
- ulint up;
- ulint low;
- ulint mid;
- const page_t* page;
- const page_dir_slot_t* slot;
- const rec_t* up_rec;
- const rec_t* low_rec;
- const rec_t* mid_rec;
- ulint up_matched_fields;
- ulint up_matched_bytes;
- ulint low_matched_fields;
- ulint low_matched_bytes;
- ulint cur_matched_fields;
- ulint cur_matched_bytes;
- int cmp;
-#ifdef UNIV_SEARCH_DEBUG
- int dbg_cmp;
- ulint dbg_matched_fields;
- ulint dbg_matched_bytes;
-#endif
-#ifdef UNIV_ZIP_DEBUG
- const page_zip_des_t* page_zip = buf_block_get_page_zip(block);
-#endif /* UNIV_ZIP_DEBUG */
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes
- && ilow_matched_fields && ilow_matched_bytes && cursor);
- ut_ad(dtuple_validate(tuple));
-#ifdef UNIV_DEBUG
-# ifdef PAGE_CUR_DBG
- if (mode != PAGE_CUR_DBG)
-# endif /* PAGE_CUR_DBG */
-# ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode != PAGE_CUR_LE_OR_EXTENDS)
-# endif /* PAGE_CUR_LE_OR_EXTENDS */
- ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
- || mode == PAGE_CUR_G || mode == PAGE_CUR_GE);
-#endif /* UNIV_DEBUG */
- page = buf_block_get_frame(block);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- page_check_dir(page);
-
-#ifdef PAGE_CUR_ADAPT
- if (page_is_leaf(page)
- && (mode == PAGE_CUR_LE)
- && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
- && (page_header_get_ptr(page, PAGE_LAST_INSERT))
- && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
-
- if (page_cur_try_search_shortcut(
- block, index, tuple,
- iup_matched_fields, iup_matched_bytes,
- ilow_matched_fields, ilow_matched_bytes,
- cursor)) {
- return;
- }
- }
-# ifdef PAGE_CUR_DBG
- if (mode == PAGE_CUR_DBG) {
- mode = PAGE_CUR_LE;
- }
-# endif
-#endif
-
- /* The following flag does not work for non-latin1 char sets because
- cmp_full_field does not tell how many bytes matched */
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-
- /* If mode PAGE_CUR_G is specified, we are trying to position the
- cursor to answer a query of the form "tuple < X", where tuple is
- the input parameter, and X denotes an arbitrary physical record on
- the page. We want to position the cursor on the first X which
- satisfies the condition. */
-
- up_matched_fields = *iup_matched_fields;
- up_matched_bytes = *iup_matched_bytes;
- low_matched_fields = *ilow_matched_fields;
- low_matched_bytes = *ilow_matched_bytes;
-
- /* Perform binary search. First the search is done through the page
- directory, after that as a linear search in the list of records
- owned by the upper limit directory slot. */
-
- low = 0;
- up = page_dir_get_n_slots(page) - 1;
-
- /* Perform binary search until the lower and upper limit directory
- slots come to the distance 1 of each other */
-
- while (up - low > 1) {
- mid = (low + up) / 2;
- slot = page_dir_get_nth_slot(page, mid);
- mid_rec = page_dir_slot_get_rec(slot);
-
- ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
- low_matched_fields, low_matched_bytes,
- up_matched_fields, up_matched_bytes);
-
- offsets = rec_get_offsets(mid_rec, index, offsets,
- dtuple_get_n_fields_cmp(tuple),
- &heap);
-
- cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
- &cur_matched_fields,
- &cur_matched_bytes);
- if (UNIV_LIKELY(cmp > 0)) {
-low_slot_match:
- low = mid;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
-
- } else if (UNIV_EXPECT(cmp, -1)) {
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode == PAGE_CUR_LE_OR_EXTENDS
- && page_cur_rec_field_extends(
- tuple, mid_rec, offsets,
- cur_matched_fields)) {
-
- goto low_slot_match;
- }
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-up_slot_match:
- up = mid;
- up_matched_fields = cur_matched_fields;
- up_matched_bytes = cur_matched_bytes;
-
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- || mode == PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- ) {
-
- goto low_slot_match;
- } else {
-
- goto up_slot_match;
- }
- }
-
- slot = page_dir_get_nth_slot(page, low);
- low_rec = page_dir_slot_get_rec(slot);
- slot = page_dir_get_nth_slot(page, up);
- up_rec = page_dir_slot_get_rec(slot);
-
- /* Perform linear search until the upper and lower records come to
- distance 1 of each other. */
-
- while (page_rec_get_next_const(low_rec) != up_rec) {
-
- mid_rec = page_rec_get_next_const(low_rec);
-
- ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
- low_matched_fields, low_matched_bytes,
- up_matched_fields, up_matched_bytes);
-
- offsets = rec_get_offsets(mid_rec, index, offsets,
- dtuple_get_n_fields_cmp(tuple),
- &heap);
-
- cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
- &cur_matched_fields,
- &cur_matched_bytes);
- if (UNIV_LIKELY(cmp > 0)) {
-low_rec_match:
- low_rec = mid_rec;
- low_matched_fields = cur_matched_fields;
- low_matched_bytes = cur_matched_bytes;
-
- } else if (UNIV_EXPECT(cmp, -1)) {
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- if (mode == PAGE_CUR_LE_OR_EXTENDS
- && page_cur_rec_field_extends(
- tuple, mid_rec, offsets,
- cur_matched_fields)) {
-
- goto low_rec_match;
- }
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
-up_rec_match:
- up_rec = mid_rec;
- up_matched_fields = cur_matched_fields;
- up_matched_bytes = cur_matched_bytes;
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
-#ifdef PAGE_CUR_LE_OR_EXTENDS
- || mode == PAGE_CUR_LE_OR_EXTENDS
-#endif /* PAGE_CUR_LE_OR_EXTENDS */
- ) {
-
- goto low_rec_match;
- } else {
-
- goto up_rec_match;
- }
- }
-
-#ifdef UNIV_SEARCH_DEBUG
-
- /* Check that the lower and upper limit records have the
- right alphabetical order compared to tuple. */
- dbg_matched_fields = 0;
- dbg_matched_bytes = 0;
-
- offsets = rec_get_offsets(low_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
- &dbg_matched_fields,
- &dbg_matched_bytes);
- if (mode == PAGE_CUR_G) {
- ut_a(dbg_cmp >= 0);
- } else if (mode == PAGE_CUR_GE) {
- ut_a(dbg_cmp == 1);
- } else if (mode == PAGE_CUR_L) {
- ut_a(dbg_cmp == 1);
- } else if (mode == PAGE_CUR_LE) {
- ut_a(dbg_cmp >= 0);
- }
-
- if (!page_rec_is_infimum(low_rec)) {
-
- ut_a(low_matched_fields == dbg_matched_fields);
- ut_a(low_matched_bytes == dbg_matched_bytes);
- }
-
- dbg_matched_fields = 0;
- dbg_matched_bytes = 0;
-
- offsets = rec_get_offsets(up_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
- &dbg_matched_fields,
- &dbg_matched_bytes);
- if (mode == PAGE_CUR_G) {
- ut_a(dbg_cmp == -1);
- } else if (mode == PAGE_CUR_GE) {
- ut_a(dbg_cmp <= 0);
- } else if (mode == PAGE_CUR_L) {
- ut_a(dbg_cmp <= 0);
- } else if (mode == PAGE_CUR_LE) {
- ut_a(dbg_cmp == -1);
- }
-
- if (!page_rec_is_supremum(up_rec)) {
-
- ut_a(up_matched_fields == dbg_matched_fields);
- ut_a(up_matched_bytes == dbg_matched_bytes);
- }
-#endif
- if (mode <= PAGE_CUR_GE) {
- page_cur_position(up_rec, block, cursor);
- } else {
- page_cur_position(low_rec, block, cursor);
- }
-
- *iup_matched_fields = up_matched_fields;
- *iup_matched_bytes = up_matched_bytes;
- *ilow_matched_fields = low_matched_fields;
- *ilow_matched_bytes = low_matched_bytes;
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***********************************************************//**
-Positions a page cursor on a randomly chosen user record on a page. If there
-are no user records, sets the cursor on the infimum record. */
-UNIV_INTERN
-void
-page_cur_open_on_rnd_user_rec(
-/*==========================*/
- buf_block_t* block, /*!< in: page */
- page_cur_t* cursor) /*!< out: page cursor */
-{
- ulint rnd;
- ulint n_recs = page_get_n_recs(buf_block_get_frame(block));
-
- page_cur_set_before_first(block, cursor);
-
- if (UNIV_UNLIKELY(n_recs == 0)) {
-
- return;
- }
-
- rnd = (ulint) (page_cur_lcg_prng() % n_recs);
-
- do {
- page_cur_move_to_next(cursor);
- } while (rnd--);
-}
-
-/***********************************************************//**
-Writes the log record of a record insert on a page. */
-static
-void
-page_cur_insert_rec_write_log(
-/*==========================*/
- rec_t* insert_rec, /*!< in: inserted physical record */
- ulint rec_size, /*!< in: insert_rec size */
- rec_t* cursor_rec, /*!< in: record the
- cursor is pointing to */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- ulint cur_rec_size;
- ulint extra_size;
- ulint cur_extra_size;
- const byte* ins_ptr;
- byte* log_ptr;
- const byte* log_end;
- ulint i;
-
- ut_a(rec_size < UNIV_PAGE_SIZE);
- ut_ad(page_align(insert_rec) == page_align(cursor_rec));
- ut_ad(!page_rec_is_comp(insert_rec)
- == !dict_table_is_comp(index->table));
-
- {
- mem_heap_t* heap = NULL;
- ulint cur_offs_[REC_OFFS_NORMAL_SIZE];
- ulint ins_offs_[REC_OFFS_NORMAL_SIZE];
-
- ulint* cur_offs;
- ulint* ins_offs;
-
- rec_offs_init(cur_offs_);
- rec_offs_init(ins_offs_);
-
- cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
- ULINT_UNDEFINED, &heap);
- ins_offs = rec_get_offsets(insert_rec, index, ins_offs_,
- ULINT_UNDEFINED, &heap);
-
- extra_size = rec_offs_extra_size(ins_offs);
- cur_extra_size = rec_offs_extra_size(cur_offs);
- ut_ad(rec_size == rec_offs_size(ins_offs));
- cur_rec_size = rec_offs_size(cur_offs);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- ins_ptr = insert_rec - extra_size;
-
- i = 0;
-
- if (cur_extra_size == extra_size) {
- ulint min_rec_size = ut_min(cur_rec_size, rec_size);
-
- const byte* cur_ptr = cursor_rec - cur_extra_size;
-
- /* Find out the first byte in insert_rec which differs from
- cursor_rec; skip the bytes in the record info */
-
- do {
- if (*ins_ptr == *cur_ptr) {
- i++;
- ins_ptr++;
- cur_ptr++;
- } else if ((i < extra_size)
- && (i >= extra_size
- - page_rec_get_base_extra_size
- (insert_rec))) {
- i = extra_size;
- ins_ptr = insert_rec;
- cur_ptr = cursor_rec;
- } else {
- break;
- }
- } while (i < min_rec_size);
- }
-
- if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
-
- if (page_rec_is_comp(insert_rec)) {
- log_ptr = mlog_open_and_write_index(
- mtr, insert_rec, index, MLOG_COMP_REC_INSERT,
- 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
- if (UNIV_UNLIKELY(!log_ptr)) {
- /* Logging in mtr is switched off
- during crash recovery: in that case
- mlog_open returns NULL */
- return;
- }
- } else {
- log_ptr = mlog_open(mtr, 11
- + 2 + 5 + 1 + 5 + 5
- + MLOG_BUF_MARGIN);
- if (UNIV_UNLIKELY(!log_ptr)) {
- /* Logging in mtr is switched off
- during crash recovery: in that case
- mlog_open returns NULL */
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(
- insert_rec, MLOG_REC_INSERT, log_ptr, mtr);
- }
-
- log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
- /* Write the cursor rec offset as a 2-byte ulint */
- mach_write_to_2(log_ptr, page_offset(cursor_rec));
- log_ptr += 2;
- } else {
- log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash
- recovery: in that case mlog_open returns NULL */
- return;
- }
- log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
- }
-
- if (page_rec_is_comp(insert_rec)) {
- if (UNIV_UNLIKELY
- (rec_get_info_and_status_bits(insert_rec, TRUE)
- != rec_get_info_and_status_bits(cursor_rec, TRUE))) {
-
- goto need_extra_info;
- }
- } else {
- if (UNIV_UNLIKELY
- (rec_get_info_and_status_bits(insert_rec, FALSE)
- != rec_get_info_and_status_bits(cursor_rec, FALSE))) {
-
- goto need_extra_info;
- }
- }
-
- if (extra_size != cur_extra_size || rec_size != cur_rec_size) {
-need_extra_info:
- /* Write the record end segment length
- and the extra info storage flag */
- log_ptr += mach_write_compressed(log_ptr,
- 2 * (rec_size - i) + 1);
-
- /* Write the info bits */
- mach_write_to_1(log_ptr,
- rec_get_info_and_status_bits(
- insert_rec,
- page_rec_is_comp(insert_rec)));
- log_ptr++;
-
- /* Write the record origin offset */
- log_ptr += mach_write_compressed(log_ptr, extra_size);
-
- /* Write the mismatch index */
- log_ptr += mach_write_compressed(log_ptr, i);
-
- ut_a(i < UNIV_PAGE_SIZE);
- ut_a(extra_size < UNIV_PAGE_SIZE);
- } else {
- /* Write the record end segment length
- and the extra info storage flag */
- log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i));
- }
-
- /* Write to the log the inserted index record end segment which
- differs from the cursor record */
-
- rec_size -= i;
-
- if (log_ptr + rec_size <= log_end) {
- memcpy(log_ptr, ins_ptr, rec_size);
- mlog_close(mtr, log_ptr + rec_size);
- } else {
- mlog_close(mtr, log_ptr);
- ut_a(rec_size < UNIV_PAGE_SIZE);
- mlog_catenate_string(mtr, ins_ptr, rec_size);
- }
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_cur_insert_rec_write_log(ins_rec,size,cur,index,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a log record of a record insert on a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_cur_parse_insert_rec(
-/*======================*/
- ibool is_short,/*!< in: TRUE if short inserts */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ulint origin_offset;
- ulint end_seg_len;
- ulint mismatch_index;
- page_t* page;
- rec_t* cursor_rec;
- byte buf1[1024];
- byte* buf;
- byte* ptr2 = ptr;
- ulint info_and_status_bits = 0; /* remove warning */
- page_cur_t cursor;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- page = block ? buf_block_get_frame(block) : NULL;
-
- if (is_short) {
- cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
- } else {
- ulint offset;
-
- /* Read the cursor rec offset as a 2-byte ulint */
-
- if (UNIV_UNLIKELY(end_ptr < ptr + 2)) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- cursor_rec = page + offset;
-
- if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) {
-
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
- }
-
- ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (end_seg_len & 0x1UL) {
- /* Read the info bits */
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- info_and_status_bits = mach_read_from_1(ptr);
- ptr++;
-
- ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ut_a(origin_offset < UNIV_PAGE_SIZE);
-
- ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- ut_a(mismatch_index < UNIV_PAGE_SIZE);
- }
-
- if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) {
-
- return(NULL);
- }
-
- if (!block) {
-
- return(ptr + (end_seg_len >> 1));
- }
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
- ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
-
- /* Read from the log the inserted index record end segment which
- differs from the cursor record */
-
- offsets = rec_get_offsets(cursor_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!(end_seg_len & 0x1UL)) {
- info_and_status_bits = rec_get_info_and_status_bits(
- cursor_rec, page_is_comp(page));
- origin_offset = rec_offs_extra_size(offsets);
- mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1);
- }
-
- end_seg_len >>= 1;
-
- if (mismatch_index + end_seg_len < sizeof buf1) {
- buf = buf1;
- } else {
- buf = static_cast<byte*>(
- mem_alloc(mismatch_index + end_seg_len));
- }
-
- /* Build the inserted record to buf */
-
- if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "Is short %lu, info_and_status_bits %lu, offset %lu, "
- "o_offset %lu\n"
- "mismatch index %lu, end_seg_len %lu\n"
- "parsed len %lu\n",
- (ulong) is_short, (ulong) info_and_status_bits,
- (ulong) page_offset(cursor_rec),
- (ulong) origin_offset,
- (ulong) mismatch_index, (ulong) end_seg_len,
- (ulong) (ptr - ptr2));
-
- fputs("Dump of 300 bytes of log:\n", stderr);
- ut_print_buf(stderr, ptr2, 300);
- putc('\n', stderr);
-
- buf_page_print(page, 0, 0);
-
- ut_error;
- }
-
- ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
- ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
-
- if (page_is_comp(page)) {
- rec_set_info_and_status_bits(buf + origin_offset,
- info_and_status_bits);
- } else {
- rec_set_info_bits_old(buf + origin_offset,
- info_and_status_bits);
- }
-
- page_cur_position(cursor_rec, block, &cursor);
-
- offsets = rec_get_offsets(buf + origin_offset, index, offsets,
- ULINT_UNDEFINED, &heap);
- if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor,
- buf + origin_offset,
- index, offsets, mtr))) {
- /* The redo log record should only have been written
- after the write was successful. */
- ut_error;
- }
-
- if (buf != buf1) {
-
- mem_free(buf);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(ptr + end_seg_len);
-}
-
-/***********************************************************//**
-Inserts a record next to page cursor on an uncompressed page.
-Returns pointer to inserted record if succeed, i.e., enough
-space available, NULL otherwise. The cursor stays at the same position.
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
-rec_t*
-page_cur_insert_rec_low(
-/*====================*/
- rec_t* current_rec,/*!< in: pointer to current record after
- which the new record is inserted */
- dict_index_t* index, /*!< in: record descriptor */
- const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
-{
- byte* insert_buf;
- ulint rec_size;
- page_t* page; /*!< the relevant page */
- rec_t* last_insert; /*!< cursor position at previous
- insert */
- rec_t* free_rec; /*!< a free record that was reused,
- or NULL */
- rec_t* insert_rec; /*!< inserted record */
- ulint heap_no; /*!< heap number of the inserted
- record */
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- page = page_align(current_rec);
- ut_ad(dict_table_is_comp(index->table)
- == (ibool) !!page_is_comp(page));
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
- == index->id || recv_recovery_is_on()
- || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
-
- ut_ad(!page_rec_is_supremum(current_rec));
-
- /* 1. Get the size of the physical record in the page */
- rec_size = rec_offs_size(offsets);
-
-#ifdef UNIV_DEBUG_VALGRIND
- {
- const void* rec_start
- = rec - rec_offs_extra_size(offsets);
- ulint extra_size
- = rec_offs_extra_size(offsets)
- - (rec_offs_comp(offsets)
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES);
-
- /* All data bytes of the record must be valid. */
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- /* The variable-length header must be valid. */
- UNIV_MEM_ASSERT_RW(rec_start, extra_size);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- /* 2. Try to find suitable space from page memory management */
-
- free_rec = page_header_get_ptr(page, PAGE_FREE);
- if (UNIV_LIKELY_NULL(free_rec)) {
- /* Try to allocate from the head of the free list. */
- ulint foffsets_[REC_OFFS_NORMAL_SIZE];
- ulint* foffsets = foffsets_;
- mem_heap_t* heap = NULL;
-
- rec_offs_init(foffsets_);
-
- foffsets = rec_get_offsets(
- free_rec, index, foffsets, ULINT_UNDEFINED, &heap);
- if (rec_offs_size(foffsets) < rec_size) {
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- goto use_heap;
- }
-
- insert_buf = free_rec - rec_offs_extra_size(foffsets);
-
- if (page_is_comp(page)) {
- heap_no = rec_get_heap_no_new(free_rec);
- page_mem_alloc_free(page, NULL,
- rec_get_next_ptr(free_rec, TRUE),
- rec_size);
- } else {
- heap_no = rec_get_heap_no_old(free_rec);
- page_mem_alloc_free(page, NULL,
- rec_get_next_ptr(free_rec, FALSE),
- rec_size);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- } else {
-use_heap:
- free_rec = NULL;
- insert_buf = page_mem_alloc_heap(page, NULL,
- rec_size, &heap_no);
-
- if (UNIV_UNLIKELY(insert_buf == NULL)) {
- return(NULL);
- }
- }
-
- /* 3. Create the record */
- insert_rec = rec_copy(insert_buf, rec, offsets);
- rec_offs_make_valid(insert_rec, index, offsets);
-
- /* This is because assertion below is debug assertion */
-#ifdef UNIV_DEBUG
- if (UNIV_UNLIKELY(current_rec == insert_rec)) {
- ulint extra_len, data_len;
- extra_len = rec_offs_extra_size(offsets);
- data_len = rec_offs_data_size(offsets);
-
- fprintf(stderr, "InnoDB: Error: current_rec == insert_rec "
- " extra_len %lu data_len %lu insert_buf %p rec %p\n",
- extra_len, data_len, insert_buf, rec);
- fprintf(stderr, "InnoDB; Physical record: \n");
- rec_print(stderr, rec, index);
- fprintf(stderr, "InnoDB: Inserted record: \n");
- rec_print(stderr, insert_rec, index);
- fprintf(stderr, "InnoDB: Current record: \n");
- rec_print(stderr, current_rec, index);
- ut_a(current_rec != insert_rec);
- }
-#endif /* UNIV_DEBUG */
-
- /* 4. Insert the record in the linked list of records */
- ut_ad(current_rec != insert_rec);
-
- {
- /* next record after current before the insertion */
- rec_t* next_rec = page_rec_get_next(current_rec);
-#ifdef UNIV_DEBUG
- if (page_is_comp(page)) {
- ut_ad(rec_get_status(current_rec)
- <= REC_STATUS_INFIMUM);
- ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
- ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
- }
-#endif
- page_rec_set_next(insert_rec, next_rec);
- page_rec_set_next(current_rec, insert_rec);
- }
-
- page_header_set_field(page, NULL, PAGE_N_RECS,
- 1 + page_get_n_recs(page));
-
- /* 5. Set the n_owned field in the inserted record to zero,
- and set the heap_no field */
- if (page_is_comp(page)) {
- rec_set_n_owned_new(insert_rec, NULL, 0);
- rec_set_heap_no_new(insert_rec, heap_no);
- } else {
- rec_set_n_owned_old(insert_rec, 0);
- rec_set_heap_no_old(insert_rec, heap_no);
- }
-
- UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
- rec_offs_size(offsets));
- /* 6. Update the last insertion info in page header */
-
- last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
- ut_ad(!last_insert || !page_is_comp(page)
- || rec_get_node_ptr_flag(last_insert)
- == rec_get_node_ptr_flag(insert_rec));
-
- if (UNIV_UNLIKELY(last_insert == NULL)) {
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
-
- } else if ((last_insert == current_rec)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_LEFT)) {
-
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_RIGHT);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
-
- } else if ((page_rec_get_next(insert_rec) == last_insert)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_RIGHT)) {
-
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_LEFT);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
- } else {
- page_header_set_field(page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
- }
-
- page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec);
-
- /* 7. It remains to update the owner record. */
- {
- rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
- ulint n_owned;
- if (page_is_comp(page)) {
- n_owned = rec_get_n_owned_new(owner_rec);
- rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
- } else {
- n_owned = rec_get_n_owned_old(owner_rec);
- rec_set_n_owned_old(owner_rec, n_owned + 1);
- }
-
- /* 8. Now we have incremented the n_owned field of the owner
- record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
- we have to split the corresponding directory slot in two. */
-
- if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
- page_dir_split_slot(
- page, NULL,
- page_dir_find_owner_slot(owner_rec));
- }
- }
-
- /* 9. Write log record of the insert */
- if (UNIV_LIKELY(mtr != NULL)) {
- page_cur_insert_rec_write_log(insert_rec, rec_size,
- current_rec, index, mtr);
- }
-
- btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert");
-
- return(insert_rec);
-}
-
-/***********************************************************//**
-Inserts a record next to page cursor on a compressed and uncompressed
-page. Returns pointer to inserted record if succeed, i.e.,
-enough space available, NULL otherwise.
-The cursor stays at the same position.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to record if succeed, NULL otherwise */
-UNIV_INTERN
-rec_t*
-page_cur_insert_rec_zip(
-/*====================*/
- page_cur_t* cursor, /*!< in/out: page cursor */
- dict_index_t* index, /*!< in: record descriptor */
- const rec_t* rec, /*!< in: pointer to a physical record */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
-{
- byte* insert_buf;
- ulint rec_size;
- page_t* page; /*!< the relevant page */
- rec_t* last_insert; /*!< cursor position at previous
- insert */
- rec_t* free_rec; /*!< a free record that was reused,
- or NULL */
- rec_t* insert_rec; /*!< inserted record */
- ulint heap_no; /*!< heap number of the inserted
- record */
- page_zip_des_t* page_zip;
-
- page_zip = page_cur_get_page_zip(cursor);
- ut_ad(page_zip);
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- page = page_cur_get_page(cursor);
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(page_is_comp(page));
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
- == index->id || recv_recovery_is_on()
- || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
-
- ut_ad(!page_cur_is_after_last(cursor));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- /* 1. Get the size of the physical record in the page */
- rec_size = rec_offs_size(offsets);
-
-#ifdef UNIV_DEBUG_VALGRIND
- {
- const void* rec_start
- = rec - rec_offs_extra_size(offsets);
- ulint extra_size
- = rec_offs_extra_size(offsets)
- - (rec_offs_comp(offsets)
- ? REC_N_NEW_EXTRA_BYTES
- : REC_N_OLD_EXTRA_BYTES);
-
- /* All data bytes of the record must be valid. */
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- /* The variable-length header must be valid. */
- UNIV_MEM_ASSERT_RW(rec_start, extra_size);
- }
-#endif /* UNIV_DEBUG_VALGRIND */
-
- const bool reorg_before_insert = page_has_garbage(page)
- && rec_size > page_get_max_insert_size(page, 1)
- && rec_size <= page_get_max_insert_size_after_reorganize(
- page, 1);
-
- /* 2. Try to find suitable space from page memory management */
- if (!page_zip_available(page_zip, dict_index_is_clust(index),
- rec_size, 1)
- || reorg_before_insert) {
- /* The values can change dynamically. */
- bool log_compressed = page_zip_log_pages;
- ulint level = page_zip_level;
-#ifdef UNIV_DEBUG
- rec_t* cursor_rec = page_cur_get_rec(cursor);
-#endif /* UNIV_DEBUG */
-
- /* If we are not writing compressed page images, we
- must reorganize the page before attempting the
- insert. */
- if (recv_recovery_is_on()) {
- /* Insert into the uncompressed page only.
- The page reorganization or creation that we
- would attempt outside crash recovery would
- have been covered by a previous redo log record. */
- } else if (page_is_empty(page)) {
- ut_ad(page_cur_is_before_first(cursor));
-
- /* This is an empty page. Recreate it to
- get rid of the modification log. */
- page_create_zip(page_cur_get_block(cursor), index,
- page_header_get_field(page, PAGE_LEVEL),
- 0, mtr);
- ut_ad(!page_header_get_ptr(page, PAGE_FREE));
-
- if (page_zip_available(
- page_zip, dict_index_is_clust(index),
- rec_size, 1)) {
- goto use_heap;
- }
-
- /* The cursor should remain on the page infimum. */
- return(NULL);
- } else if (!page_zip->m_nonempty && !page_has_garbage(page)) {
- /* The page has been freshly compressed, so
- reorganizing it will not help. */
- } else if (log_compressed && !reorg_before_insert) {
- /* Insert into uncompressed page only, and
- try page_zip_reorganize() afterwards. */
- } else if (btr_page_reorganize_low(
- recv_recovery_is_on(), level,
- cursor, index, mtr)) {
- ut_ad(!page_header_get_ptr(page, PAGE_FREE));
-
- if (page_zip_available(
- page_zip, dict_index_is_clust(index),
- rec_size, 1)) {
- /* After reorganizing, there is space
- available. */
- goto use_heap;
- }
- } else {
- ut_ad(cursor->rec == cursor_rec);
- return(NULL);
- }
-
- /* Try compressing the whole page afterwards. */
- insert_rec = page_cur_insert_rec_low(
- cursor->rec, index, rec, offsets, NULL);
-
- /* If recovery is on, this implies that the compression
- of the page was successful during runtime. Had that not
- been the case or had the redo logging of compressed
- pages been enabled during runtime then we'd have seen
- a MLOG_ZIP_PAGE_COMPRESS redo record. Therefore, we
- know that we don't need to reorganize the page. We,
- however, do need to recompress the page. That will
- happen when the next redo record is read which must
- be of type MLOG_ZIP_PAGE_COMPRESS_NO_DATA and it must
- contain a valid compression level value.
- This implies that during recovery from this point till
- the next redo is applied the uncompressed and
- compressed versions are not identical and
- page_zip_validate will fail but that is OK because
- we call page_zip_validate only after processing
- all changes to a page under a single mtr during
- recovery. */
- if (insert_rec == NULL) {
- /* Out of space.
- This should never occur during crash recovery,
- because the MLOG_COMP_REC_INSERT should only
- be logged after a successful operation. */
- ut_ad(!recv_recovery_is_on());
- } else if (recv_recovery_is_on()) {
- /* This should be followed by
- MLOG_ZIP_PAGE_COMPRESS_NO_DATA,
- which should succeed. */
- rec_offs_make_valid(insert_rec, index, offsets);
- } else {
- ulint pos = page_rec_get_n_recs_before(insert_rec);
- ut_ad(pos > 0);
-
- if (!log_compressed) {
- if (page_zip_compress(
- page_zip, page, index,
- level, NULL)) {
- page_cur_insert_rec_write_log(
- insert_rec, rec_size,
- cursor->rec, index, mtr);
- page_zip_compress_write_log_no_data(
- level, page, index, mtr);
-
- rec_offs_make_valid(
- insert_rec, index, offsets);
- return(insert_rec);
- }
-
- /* Page compress failed. If this happened on a
- leaf page, put the data size into the sample
- buffer. */
- if (page_is_leaf(page)) {
- ulint occupied = page_get_data_size(page)
- + page_dir_calc_reserved_space(
- page_get_n_recs(page));
- index->stat_defrag_data_size_sample[
- index->stat_defrag_sample_next_slot] =
- occupied;
- index->stat_defrag_sample_next_slot =
- (index->stat_defrag_sample_next_slot
- + 1) % STAT_DEFRAG_DATA_SIZE_N_SAMPLE;
- }
-
- ut_ad(cursor->rec
- == (pos > 1
- ? page_rec_get_nth(
- page, pos - 1)
- : page + PAGE_NEW_INFIMUM));
- } else {
- /* We are writing entire page images
- to the log. Reduce the redo log volume
- by reorganizing the page at the same time. */
- if (page_zip_reorganize(
- cursor->block, index, mtr)) {
- /* The page was reorganized:
- Seek to pos. */
- if (pos > 1) {
- cursor->rec = page_rec_get_nth(
- page, pos - 1);
- } else {
- cursor->rec = page
- + PAGE_NEW_INFIMUM;
- }
-
- insert_rec = page + rec_get_next_offs(
- cursor->rec, TRUE);
- rec_offs_make_valid(
- insert_rec, index, offsets);
- return(insert_rec);
- }
-
- /* Theoretically, we could try one
- last resort of btr_page_reorganize_low()
- followed by page_zip_available(), but
- that would be very unlikely to
- succeed. (If the full reorganized page
- failed to compress, why would it
- succeed to compress the page, plus log
- the insert of this record? */
- }
-
- /* Out of space: restore the page */
- btr_blob_dbg_remove(page, index, "insert_zip_fail");
- if (!page_zip_decompress(page_zip, page, FALSE)) {
- ut_error; /* Memory corrupted? */
- }
- ut_ad(page_validate(page, index));
- btr_blob_dbg_add(page, index, "insert_zip_fail");
- insert_rec = NULL;
- }
-
- return(insert_rec);
- }
-
- free_rec = page_header_get_ptr(page, PAGE_FREE);
- if (UNIV_LIKELY_NULL(free_rec)) {
- /* Try to allocate from the head of the free list. */
- lint extra_size_diff;
- ulint foffsets_[REC_OFFS_NORMAL_SIZE];
- ulint* foffsets = foffsets_;
- mem_heap_t* heap = NULL;
-
- rec_offs_init(foffsets_);
-
- foffsets = rec_get_offsets(free_rec, index, foffsets,
- ULINT_UNDEFINED, &heap);
- if (rec_offs_size(foffsets) < rec_size) {
-too_small:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- goto use_heap;
- }
-
- insert_buf = free_rec - rec_offs_extra_size(foffsets);
-
- /* On compressed pages, do not relocate records from
- the free list. If extra_size would grow, use the heap. */
- extra_size_diff
- = rec_offs_extra_size(offsets)
- - rec_offs_extra_size(foffsets);
-
- if (UNIV_UNLIKELY(extra_size_diff < 0)) {
- /* Add an offset to the extra_size. */
- if (rec_offs_size(foffsets)
- < rec_size - extra_size_diff) {
-
- goto too_small;
- }
-
- insert_buf -= extra_size_diff;
- } else if (UNIV_UNLIKELY(extra_size_diff)) {
- /* Do not allow extra_size to grow */
-
- goto too_small;
- }
-
- heap_no = rec_get_heap_no_new(free_rec);
- page_mem_alloc_free(page, page_zip,
- rec_get_next_ptr(free_rec, TRUE),
- rec_size);
-
- if (!page_is_leaf(page)) {
- /* Zero out the node pointer of free_rec,
- in case it will not be overwritten by
- insert_rec. */
-
- ut_ad(rec_size > REC_NODE_PTR_SIZE);
-
- if (rec_offs_extra_size(foffsets)
- + rec_offs_data_size(foffsets) > rec_size) {
-
- memset(rec_get_end(free_rec, foffsets)
- - REC_NODE_PTR_SIZE, 0,
- REC_NODE_PTR_SIZE);
- }
- } else if (dict_index_is_clust(index)) {
- /* Zero out the DB_TRX_ID and DB_ROLL_PTR
- columns of free_rec, in case it will not be
- overwritten by insert_rec. */
-
- ulint trx_id_col;
- ulint trx_id_offs;
- ulint len;
-
- trx_id_col = dict_index_get_sys_col_pos(index,
- DATA_TRX_ID);
- ut_ad(trx_id_col > 0);
- ut_ad(trx_id_col != ULINT_UNDEFINED);
-
- trx_id_offs = rec_get_nth_field_offs(foffsets,
- trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
-
- if (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + trx_id_offs
- + rec_offs_extra_size(foffsets) > rec_size) {
- /* We will have to zero out the
- DB_TRX_ID and DB_ROLL_PTR, because
- they will not be fully overwritten by
- insert_rec. */
-
- memset(free_rec + trx_id_offs, 0,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- }
-
- ut_ad(free_rec + trx_id_offs + DATA_TRX_ID_LEN
- == rec_get_nth_field(free_rec, foffsets,
- trx_id_col + 1, &len));
- ut_ad(len == DATA_ROLL_PTR_LEN);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- } else {
-use_heap:
- free_rec = NULL;
- insert_buf = page_mem_alloc_heap(page, page_zip,
- rec_size, &heap_no);
-
- if (UNIV_UNLIKELY(insert_buf == NULL)) {
- return(NULL);
- }
-
- page_zip_dir_add_slot(page_zip, dict_index_is_clust(index));
- }
-
- /* 3. Create the record */
- insert_rec = rec_copy(insert_buf, rec, offsets);
- rec_offs_make_valid(insert_rec, index, offsets);
-
- /* 4. Insert the record in the linked list of records */
- ut_ad(cursor->rec != insert_rec);
-
- {
- /* next record after current before the insertion */
- const rec_t* next_rec = page_rec_get_next_low(
- cursor->rec, TRUE);
- ut_ad(rec_get_status(cursor->rec)
- <= REC_STATUS_INFIMUM);
- ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
- ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
-
- page_rec_set_next(insert_rec, next_rec);
- page_rec_set_next(cursor->rec, insert_rec);
- }
-
- page_header_set_field(page, page_zip, PAGE_N_RECS,
- 1 + page_get_n_recs(page));
-
- /* 5. Set the n_owned field in the inserted record to zero,
- and set the heap_no field */
- rec_set_n_owned_new(insert_rec, NULL, 0);
- rec_set_heap_no_new(insert_rec, heap_no);
-
- UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
- rec_offs_size(offsets));
-
- page_zip_dir_insert(page_zip, cursor->rec, free_rec, insert_rec);
-
- /* 6. Update the last insertion info in page header */
-
- last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
- ut_ad(!last_insert
- || rec_get_node_ptr_flag(last_insert)
- == rec_get_node_ptr_flag(insert_rec));
-
- if (UNIV_UNLIKELY(last_insert == NULL)) {
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
-
- } else if ((last_insert == cursor->rec)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_LEFT)) {
-
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_RIGHT);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
-
- } else if ((page_rec_get_next(insert_rec) == last_insert)
- && (page_header_get_field(page, PAGE_DIRECTION)
- != PAGE_RIGHT)) {
-
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_LEFT);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
- page_header_get_field(
- page, PAGE_N_DIRECTION) + 1);
- } else {
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
- }
-
- page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
-
- /* 7. It remains to update the owner record. */
- {
- rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
- ulint n_owned;
-
- n_owned = rec_get_n_owned_new(owner_rec);
- rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
-
- /* 8. Now we have incremented the n_owned field of the owner
- record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
- we have to split the corresponding directory slot in two. */
-
- if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
- page_dir_split_slot(
- page, page_zip,
- page_dir_find_owner_slot(owner_rec));
- }
- }
-
- page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
-
- btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok");
-
- /* 9. Write log record of the insert */
- if (UNIV_LIKELY(mtr != NULL)) {
- page_cur_insert_rec_write_log(insert_rec, rec_size,
- cursor->rec, index, mtr);
- }
-
- return(insert_rec);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Writes a log record of copying a record list end to a new created page.
-@return 4-byte field where to write the log data length, or NULL if
-logging is disabled */
-UNIV_INLINE
-byte*
-page_copy_rec_list_to_created_page_write_log(
-/*=========================================*/
- page_t* page, /*!< in: index page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, page, index,
- page_is_comp(page)
- ? MLOG_COMP_LIST_END_COPY_CREATED
- : MLOG_LIST_END_COPY_CREATED, 4);
- if (UNIV_LIKELY(log_ptr != NULL)) {
- mlog_close(mtr, log_ptr + 4);
- }
-
- return(log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Parses a log record of copying a record list end to a new created page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_copy_rec_list_to_created_page(
-/*=====================================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- byte* rec_end;
- ulint log_data_len;
- page_t* page;
- page_zip_des_t* page_zip;
-
- if (ptr + 4 > end_ptr) {
-
- return(NULL);
- }
-
- log_data_len = mach_read_from_4(ptr);
- ptr += 4;
-
- rec_end = ptr + log_data_len;
-
- if (rec_end > end_ptr) {
-
- return(NULL);
- }
-
- if (!block) {
-
- return(rec_end);
- }
-
- while (ptr < rec_end) {
- ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
- block, index, mtr);
- }
-
- ut_a(ptr == rec_end);
-
- page = buf_block_get_frame(block);
- page_zip = buf_block_get_page_zip(block);
-
- page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, page_zip, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
-
- return(rec_end);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Copies records from page to a newly created page, from a given record onward,
-including that record. Infimum and supremum records are not copied.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if this is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
-void
-page_copy_rec_list_end_to_created_page(
-/*===================================*/
- page_t* new_page, /*!< in/out: index page to copy to */
- rec_t* rec, /*!< in: first record to copy */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_dir_slot_t* slot = 0; /* remove warning */
- byte* heap_top;
- rec_t* insert_rec = 0; /* remove warning */
- rec_t* prev_rec;
- ulint count;
- ulint n_recs;
- ulint slot_index;
- ulint rec_size;
- ulint log_mode;
- byte* log_ptr;
- ulint log_data_len;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
- ut_ad(page_align(rec) != new_page);
- ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
-
- if (page_rec_is_infimum(rec)) {
-
- rec = page_rec_get_next(rec);
- }
-
- if (page_rec_is_supremum(rec)) {
-
- return;
- }
-
-#ifdef UNIV_DEBUG
- /* To pass the debug tests we have to set these dummy values
- in the debug version */
- page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
- page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
- new_page + UNIV_PAGE_SIZE - 1);
-#endif
-
- log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
- index, mtr);
-
- log_data_len = dyn_array_get_data_size(&(mtr->log));
-
- /* Individual inserts are logged in a shorter form */
-
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
-
- prev_rec = page_get_infimum_rec(new_page);
- if (page_is_comp(new_page)) {
- heap_top = new_page + PAGE_NEW_SUPREMUM_END;
- } else {
- heap_top = new_page + PAGE_OLD_SUPREMUM_END;
- }
- count = 0;
- slot_index = 0;
- n_recs = 0;
-
- do {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- insert_rec = rec_copy(heap_top, rec, offsets);
-
- if (page_is_comp(new_page)) {
- rec_set_next_offs_new(prev_rec,
- page_offset(insert_rec));
-
- rec_set_n_owned_new(insert_rec, NULL, 0);
- rec_set_heap_no_new(insert_rec,
- PAGE_HEAP_NO_USER_LOW + n_recs);
- } else {
- rec_set_next_offs_old(prev_rec,
- page_offset(insert_rec));
-
- rec_set_n_owned_old(insert_rec, 0);
- rec_set_heap_no_old(insert_rec,
- PAGE_HEAP_NO_USER_LOW + n_recs);
- }
-
- count++;
- n_recs++;
-
- if (UNIV_UNLIKELY
- (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) {
-
- slot_index++;
-
- slot = page_dir_get_nth_slot(new_page, slot_index);
-
- page_dir_slot_set_rec(slot, insert_rec);
- page_dir_slot_set_n_owned(slot, NULL, count);
-
- count = 0;
- }
-
- rec_size = rec_offs_size(offsets);
-
- ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
-
- heap_top += rec_size;
-
- rec_offs_make_valid(insert_rec, index, offsets);
- btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end");
-
- page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
- index, mtr);
- prev_rec = insert_rec;
- rec = page_rec_get_next(rec);
- } while (!page_rec_is_supremum(rec));
-
- if ((slot_index > 0) && (count + 1
- + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
- <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
- /* We can merge the two last dir slots. This operation is
- here to make this function imitate exactly the equivalent
- task made using page_cur_insert_rec, which we use in database
- recovery to reproduce the task performed by this function.
- To be able to check the correctness of recovery, it is good
- that it imitates exactly. */
-
- count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
-
- page_dir_slot_set_n_owned(slot, NULL, 0);
-
- slot_index--;
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
-
- ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
-
- if (UNIV_LIKELY(log_ptr != NULL)) {
- mach_write_to_4(log_ptr, log_data_len);
- }
-
- if (page_is_comp(new_page)) {
- rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM);
- } else {
- rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
- }
-
- slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
-
- page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
- page_dir_slot_set_n_owned(slot, NULL, count + 1);
-
- page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
- page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top);
- page_dir_set_n_heap(new_page, NULL, PAGE_HEAP_NO_USER_LOW + n_recs);
- page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
-
- page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
- page_header_set_field(new_page, NULL, PAGE_DIRECTION,
- PAGE_NO_DIRECTION);
- page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
-
- /* Restore the log mode */
-
- mtr_set_log_mode(mtr, log_mode);
-}
-
-/***********************************************************//**
-Writes log record of a record delete on a page. */
-UNIV_INLINE
-void
-page_cur_delete_rec_write_log(
-/*==========================*/
- rec_t* rec, /*!< in: record to be deleted */
- const dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mini-transaction handle */
-{
- byte* log_ptr;
-
- ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index,
- page_rec_is_comp(rec)
- ? MLOG_COMP_REC_DELETE
- : MLOG_REC_DELETE, 2);
-
- if (!log_ptr) {
- /* Logging in mtr is switched off during crash recovery:
- in that case mlog_open returns NULL */
- return;
- }
-
- /* Write the cursor rec offset as a 2-byte ulint */
- mach_write_to_2(log_ptr, page_offset(rec));
-
- mlog_close(mtr, log_ptr + 2);
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_cur_delete_rec_write_log(rec,index,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses log record of a record delete on a page.
-@return pointer to record end or NULL */
-UNIV_INTERN
-byte*
-page_cur_parse_delete_rec(
-/*======================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in: page or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ulint offset;
- page_cur_t cursor;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- /* Read the cursor rec offset as a 2-byte ulint */
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- ut_a(offset <= UNIV_PAGE_SIZE);
-
- if (block) {
- page_t* page = buf_block_get_frame(block);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_t* rec = page + offset;
- rec_offs_init(offsets_);
-
- page_cur_position(rec, block, &cursor);
- ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
-
- page_cur_delete_rec(&cursor, index,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap),
- mtr);
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- return(ptr);
-}
-
-/***********************************************************//**
-Deletes a record at the page cursor. The cursor is moved to the next
-record after the deleted one. */
-UNIV_INTERN
-void
-page_cur_delete_rec(
-/*================*/
- page_cur_t* cursor, /*!< in/out: a page cursor */
- const dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets,/*!< in: rec_get_offsets(
- cursor->rec, index) */
- mtr_t* mtr) /*!< in: mini-transaction handle
- or NULL */
-{
- page_dir_slot_t* cur_dir_slot;
- page_dir_slot_t* prev_slot;
- page_t* page;
- page_zip_des_t* page_zip;
- rec_t* current_rec;
- rec_t* prev_rec = NULL;
- rec_t* next_rec;
- ulint cur_slot_no;
- ulint cur_n_owned;
- rec_t* rec;
-
- page = page_cur_get_page(cursor);
- page_zip = page_cur_get_page_zip(cursor);
-
- /* page_zip_validate() will fail here when
- btr_cur_pessimistic_delete() invokes btr_set_min_rec_mark().
- Then, both "page_zip" and "page" would have the min-rec-mark
- set on the smallest user record, but "page" would additionally
- have it set on the smallest-but-one record. Because sloppy
- page_zip_validate_low() only ignores min-rec-flag differences
- in the smallest user record, it cannot be used here either. */
-
- current_rec = cursor->rec;
- ut_ad(rec_offs_validate(current_rec, index, offsets));
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
- == index->id || recv_recovery_is_on()
- || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
-
- /* The record must not be the supremum or infimum record. */
- ut_ad(page_rec_is_user_rec(current_rec));
-
- if (page_get_n_recs(page) == 1 && !recv_recovery_is_on()) {
- /* Empty the page, unless we are applying the redo log
- during crash recovery. During normal operation, the
- page_create_empty() gets logged as one of MLOG_PAGE_CREATE,
- MLOG_COMP_PAGE_CREATE, MLOG_ZIP_PAGE_COMPRESS. */
- ut_ad(page_is_leaf(page));
- /* Usually, this should be the root page,
- and the whole index tree should become empty.
- However, this could also be a call in
- btr_cur_pessimistic_update() to delete the only
- record in the page and to insert another one. */
- page_cur_move_to_next(cursor);
- ut_ad(page_cur_is_after_last(cursor));
- page_create_empty(page_cur_get_block(cursor),
- const_cast<dict_index_t*>(index), mtr);
- return;
- }
-
- /* Save to local variables some data associated with current_rec */
- cur_slot_no = page_dir_find_owner_slot(current_rec);
- ut_ad(cur_slot_no > 0);
- cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
- cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
-
- /* 0. Write the log record */
- if (mtr != 0) {
- page_cur_delete_rec_write_log(current_rec, index, mtr);
- }
-
- /* 1. Reset the last insert info in the page header and increment
- the modify clock for the frame */
-
- page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
-
- /* The page gets invalid for optimistic searches: increment the
- frame modify clock only if there is an mini-transaction covering
- the change. During IMPORT we allocate local blocks that are not
- part of the buffer pool. */
-
- if (mtr != 0) {
- buf_block_modify_clock_inc(page_cur_get_block(cursor));
- }
-
- /* 2. Find the next and the previous record. Note that the cursor is
- left at the next record. */
-
- ut_ad(cur_slot_no > 0);
- prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
-
- rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
-
- /* rec now points to the record of the previous directory slot. Look
- for the immediate predecessor of current_rec in a loop. */
-
- while(current_rec != rec) {
- prev_rec = rec;
- rec = page_rec_get_next(rec);
- }
-
- page_cur_move_to_next(cursor);
- next_rec = cursor->rec;
-
- /* 3. Remove the record from the linked list of records */
-
- page_rec_set_next(prev_rec, next_rec);
-
- /* 4. If the deleted record is pointed to by a dir slot, update the
- record pointer in slot. In the following if-clause we assume that
- prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
- >= 2. */
-
-#if PAGE_DIR_SLOT_MIN_N_OWNED < 2
-# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2"
-#endif
- ut_ad(cur_n_owned > 1);
-
- if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
- page_dir_slot_set_rec(cur_dir_slot, prev_rec);
- }
-
- /* 5. Update the number of owned records of the slot */
-
- page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
-
- /* 6. Free the memory occupied by the record */
- btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index),
- offsets, "delete");
- page_mem_free(page, page_zip, current_rec, index, offsets);
-
- /* 7. Now we have decremented the number of owned records of the slot.
- If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
- slots. */
-
- if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
- page_dir_balance_slot(page, page_zip, cur_slot_no);
- }
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-/*******************************************************************//**
-Print the first n numbers, generated by page_cur_lcg_prng() to make sure
-(visually) that it works properly. */
-void
-test_page_cur_lcg_prng(
-/*===================*/
- int n) /*!< in: print first n numbers */
-{
- int i;
- unsigned long long rnd;
-
- for (i = 0; i < n; i++) {
- rnd = page_cur_lcg_prng();
- printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n",
- rnd,
- rnd % 2,
- rnd % 3,
- rnd % 5,
- rnd % 7,
- rnd % 11);
- }
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/xtradb/page/page0page.cc b/storage/xtradb/page/page0page.cc
deleted file mode 100644
index 3f8e47adafd..00000000000
--- a/storage/xtradb/page/page0page.cc
+++ /dev/null
@@ -1,2872 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file page/page0page.cc
-Index page routines
-
-Created 2/2/1994 Heikki Tuuri
-*******************************************************/
-
-#define THIS_MODULE
-#include "page0page.h"
-#ifdef UNIV_NONINL
-#include "page0page.ic"
-#endif
-#undef THIS_MODULE
-
-#include "ha_prototypes.h"
-#include "buf0checksum.h"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "page0cur.h"
-#include "page0zip.h"
-#include "buf0buf.h"
-#include "btr0btr.h"
-#ifndef UNIV_HOTBACKUP
-# include "srv0srv.h"
-# include "lock0lock.h"
-# include "fut0lst.h"
-# include "btr0sea.h"
-#endif /* !UNIV_HOTBACKUP */
-
-/* THE INDEX PAGE
- ==============
-
-The index page consists of a page header which contains the page's
-id and other information. On top of it are the index records
-in a heap linked into a one way linear list according to alphabetic order.
-
-Just below page end is an array of pointers which we call page directory,
-to about every sixth record in the list. The pointers are placed in
-the directory in the alphabetical order of the records pointed to,
-enabling us to make binary search using the array. Each slot n:o I
-in the directory points to a record, where a 4-bit field contains a count
-of those records which are in the linear list between pointer I and
-the pointer I - 1 in the directory, including the record
-pointed to by pointer I and not including the record pointed to by I - 1.
-We say that the record pointed to by slot I, or that slot I, owns
-these records. The count is always kept in the range 4 to 8, with
-the exception that it is 1 for the first slot, and 1--8 for the second slot.
-
-An essentially binary search can be performed in the list of index
-records, like we could do if we had pointer to every record in the
-page directory. The data structure is, however, more efficient when
-we are doing inserts, because most inserts are just pushed on a heap.
-Only every 8th insert requires block move in the directory pointer
-table, which itself is quite small. A record is deleted from the page
-by just taking it off the linear list and updating the number of owned
-records-field of the record which owns it, and updating the page directory,
-if necessary. A special case is the one when the record owns itself.
-Because the overhead of inserts is so small, we may also increase the
-page size from the projected default of 8 kB to 64 kB without too
-much loss of efficiency in inserts. Bigger page becomes actual
-when the disk transfer rate compared to seek and latency time rises.
-On the present system, the page size is set so that the page transfer
-time (3 ms) is 20 % of the disk random access time (15 ms).
-
-When the page is split, merged, or becomes full but contains deleted
-records, we have to reorganize the page.
-
-Assuming a page size of 8 kB, a typical index page of a secondary
-index contains 300 index entries, and the size of the page directory
-is 50 x 4 bytes = 200 bytes. */
-
-/***************************************************************//**
-Looks for the directory slot which owns the given record.
-@return the directory slot number */
-UNIV_INTERN
-ulint
-page_dir_find_owner_slot(
-/*=====================*/
- const rec_t* rec) /*!< in: the physical record */
-{
- const page_t* page;
- register uint16 rec_offs_bytes;
- register const page_dir_slot_t* slot;
- register const page_dir_slot_t* first_slot;
- register const rec_t* r = rec;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
- first_slot = page_dir_get_nth_slot(page, 0);
- slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);
-
- if (page_is_comp(page)) {
- while (rec_get_n_owned_new(r) == 0) {
- r = rec_get_next_ptr_const(r, TRUE);
- ut_ad(r >= page + PAGE_NEW_SUPREMUM);
- ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
- }
- } else {
- while (rec_get_n_owned_old(r) == 0) {
- r = rec_get_next_ptr_const(r, FALSE);
- ut_ad(r >= page + PAGE_OLD_SUPREMUM);
- ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
- }
- }
-
- rec_offs_bytes = mach_encode_2(r - page);
-
- while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
-
- if (UNIV_UNLIKELY(slot == first_slot)) {
- fprintf(stderr,
- "InnoDB: Probable data corruption on"
- " page %lu\n"
- "InnoDB: Original record ",
- (ulong) page_get_page_no(page));
-
- if (page_is_comp(page)) {
- fputs("(compact record)", stderr);
- } else {
- rec_print_old(stderr, rec);
- }
-
- fputs("\n"
- "InnoDB: on that page.\n"
- "InnoDB: Cannot find the dir slot for record ",
- stderr);
- if (page_is_comp(page)) {
- fputs("(compact record)", stderr);
- } else {
- rec_print_old(stderr, page
- + mach_decode_2(rec_offs_bytes));
- }
- fputs("\n"
- "InnoDB: on that page!\n", stderr);
-
- buf_page_print(page, 0, 0);
-
- ut_error;
- }
-
- slot += PAGE_DIR_SLOT_SIZE;
- }
-
- return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
-}
-
-/**************************************************************//**
-Used to check the consistency of a directory slot.
-@return TRUE if succeed */
-static
-ibool
-page_dir_slot_check(
-/*================*/
- const page_dir_slot_t* slot) /*!< in: slot */
-{
- const page_t* page;
- ulint n_slots;
- ulint n_owned;
-
- ut_a(slot);
-
- page = page_align(slot);
-
- n_slots = page_dir_get_n_slots(page);
-
- ut_a(slot <= page_dir_get_nth_slot(page, 0));
- ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));
-
- ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
-
- if (page_is_comp(page)) {
- n_owned = rec_get_n_owned_new(page_dir_slot_get_rec(slot));
- } else {
- n_owned = rec_get_n_owned_old(page_dir_slot_get_rec(slot));
- }
-
- if (slot == page_dir_get_nth_slot(page, 0)) {
- ut_a(n_owned == 1);
- } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
- ut_a(n_owned >= 1);
- ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
- } else {
- ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
- ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
- }
-
- return(TRUE);
-}
-
-/*************************************************************//**
-Sets the max trx id field value. */
-UNIV_INTERN
-void
-page_set_max_trx_id(
-/*================*/
- buf_block_t* block, /*!< in/out: page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in/out: mini-transaction, or NULL */
-{
- page_t* page = buf_block_get_frame(block);
-#ifndef UNIV_HOTBACKUP
- ut_ad(!mtr || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
-#endif /* !UNIV_HOTBACKUP */
-
- /* It is not necessary to write this change to the redo log, as
- during a database recovery we assume that the max trx id of every
- page is the maximum trx id assigned before the crash. */
-
- if (page_zip) {
- mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
- page_zip_write_header(page_zip,
- page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
- 8, mtr);
-#ifndef UNIV_HOTBACKUP
- } else if (mtr) {
- mlog_write_ull(page + (PAGE_HEADER + PAGE_MAX_TRX_ID),
- trx_id, mtr);
-#endif /* !UNIV_HOTBACKUP */
- } else {
- mach_write_to_8(page + (PAGE_HEADER + PAGE_MAX_TRX_ID), trx_id);
- }
-}
-
-/************************************************************//**
-Allocates a block of memory from the heap of an index page.
-@return pointer to start of allocated buffer, or NULL if allocation fails */
-UNIV_INTERN
-byte*
-page_mem_alloc_heap(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page with enough
- space available for inserting the record,
- or NULL */
- ulint need, /*!< in: total number of bytes needed */
- ulint* heap_no)/*!< out: this contains the heap number
- of the allocated record
- if allocation succeeds */
-{
- byte* block;
- ulint avl_space;
-
- ut_ad(page && heap_no);
-
- avl_space = page_get_max_insert_size(page, 1);
-
- if (avl_space >= need) {
- block = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
- page_header_set_ptr(page, page_zip, PAGE_HEAP_TOP,
- block + need);
- *heap_no = page_dir_get_n_heap(page);
-
- page_dir_set_n_heap(page, page_zip, 1 + *heap_no);
-
- return(block);
- }
-
- return(NULL);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Writes a log record of page creation. */
-UNIV_INLINE
-void
-page_create_write_log(
-/*==================*/
- buf_frame_t* frame, /*!< in: a buffer frame where the page is
- created */
- mtr_t* mtr, /*!< in: mini-transaction handle */
- ibool comp) /*!< in: TRUE=compact page format */
-{
- mlog_write_initial_log_record(frame, comp
- ? MLOG_COMP_PAGE_CREATE
- : MLOG_PAGE_CREATE, mtr);
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_create_write_log(frame,mtr,comp) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of creating a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_create(
-/*==============*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- ulint comp, /*!< in: nonzero=compact page format */
- buf_block_t* block, /*!< in: block or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- /* The record is empty, except for the record initial part */
-
- if (block) {
- page_create(block, mtr, comp);
- }
-
- return(ptr);
-}
-
-/**********************************************************//**
-The index page creation function.
-@return pointer to the page */
-static
-page_t*
-page_create_low(
-/*============*/
- buf_block_t* block, /*!< in: a buffer block where the
- page is created */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- page_dir_slot_t* slot;
- mem_heap_t* heap;
- dtuple_t* tuple;
- dfield_t* field;
- byte* heap_top;
- rec_t* infimum_rec;
- rec_t* supremum_rec;
- page_t* page;
- dict_index_t* index;
- ulint* offsets;
-
- ut_ad(block);
-#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
-#endif
-#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
-# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
-#endif
-
- /* The infimum and supremum records use a dummy index. */
- if (UNIV_LIKELY(comp)) {
- index = dict_ind_compact;
- } else {
- index = dict_ind_redundant;
- }
-
- /* 1. INCREMENT MODIFY CLOCK */
- buf_block_modify_clock_inc(block);
-
- page = buf_block_get_frame(block);
-
- fil_page_set_type(page, FIL_PAGE_INDEX);
-
- heap = mem_heap_create(200);
-
- /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */
-
- /* Create first a data tuple for infimum record */
- tuple = dtuple_create(heap, 1);
- dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
- field = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(field, "infimum", 8);
- dtype_set(dfield_get_type(field),
- DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8);
- /* Set the corresponding physical record to its place in the page
- record heap */
-
- heap_top = page + PAGE_DATA;
-
- infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
-
- if (UNIV_LIKELY(comp)) {
- ut_a(infimum_rec == page + PAGE_NEW_INFIMUM);
-
- rec_set_n_owned_new(infimum_rec, NULL, 1);
- rec_set_heap_no_new(infimum_rec, 0);
- } else {
- ut_a(infimum_rec == page + PAGE_OLD_INFIMUM);
-
- rec_set_n_owned_old(infimum_rec, 1);
- rec_set_heap_no_old(infimum_rec, 0);
- }
-
- offsets = rec_get_offsets(infimum_rec, index, NULL,
- ULINT_UNDEFINED, &heap);
-
- heap_top = rec_get_end(infimum_rec, offsets);
-
- /* Create then a tuple for supremum */
-
- tuple = dtuple_create(heap, 1);
- dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
- field = dtuple_get_nth_field(tuple, 0);
-
- dfield_set_data(field, "supremum", comp ? 8 : 9);
- dtype_set(dfield_get_type(field),
- DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9);
-
- supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0);
-
- if (UNIV_LIKELY(comp)) {
- ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM);
-
- rec_set_n_owned_new(supremum_rec, NULL, 1);
- rec_set_heap_no_new(supremum_rec, 1);
- } else {
- ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM);
-
- rec_set_n_owned_old(supremum_rec, 1);
- rec_set_heap_no_old(supremum_rec, 1);
- }
-
- offsets = rec_get_offsets(supremum_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- heap_top = rec_get_end(supremum_rec, offsets);
-
- ut_ad(heap_top == page
- + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
-
- mem_heap_free(heap);
-
- /* 4. INITIALIZE THE PAGE */
-
- page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2);
- page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top);
- page_header_set_field(page, NULL, PAGE_N_HEAP, comp
- ? 0x8000 | PAGE_HEAP_NO_USER_LOW
- : PAGE_HEAP_NO_USER_LOW);
- page_header_set_ptr(page, NULL, PAGE_FREE, NULL);
- page_header_set_field(page, NULL, PAGE_GARBAGE, 0);
- page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL);
- page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION);
- page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
- page_header_set_field(page, NULL, PAGE_N_RECS, 0);
- page_set_max_trx_id(block, NULL, 0, NULL);
- memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
- - page_offset(heap_top));
-
- /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */
-
- /* Set the slots to point to infimum and supremum. */
-
- slot = page_dir_get_nth_slot(page, 0);
- page_dir_slot_set_rec(slot, infimum_rec);
-
- slot = page_dir_get_nth_slot(page, 1);
- page_dir_slot_set_rec(slot, supremum_rec);
-
- /* Set the next pointers in infimum and supremum */
-
- if (UNIV_LIKELY(comp)) {
- rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM);
- rec_set_next_offs_new(supremum_rec, 0);
- } else {
- rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM);
- rec_set_next_offs_old(supremum_rec, 0);
- }
-
- return(page);
-}
-
-/**********************************************************//**
-Create an uncompressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
-page_t*
-page_create(
-/*========*/
- buf_block_t* block, /*!< in: a buffer block where the
- page is created */
- mtr_t* mtr, /*!< in: mini-transaction handle */
- ulint comp) /*!< in: nonzero=compact page format */
-{
- page_create_write_log(buf_block_get_frame(block), mtr, comp);
- return(page_create_low(block, comp));
-}
-
-/**********************************************************//**
-Create a compressed B-tree index page.
-@return pointer to the page */
-UNIV_INTERN
-page_t*
-page_create_zip(
-/*============*/
- buf_block_t* block, /*!< in/out: a buffer frame where the
- page is created */
- dict_index_t* index, /*!< in: the index of the page */
- ulint level, /*!< in: the B-tree level of the page */
- trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- page_t* page;
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
-
- ut_ad(block);
- ut_ad(page_zip);
- ut_ad(index);
- ut_ad(dict_table_is_comp(index->table));
-
- page = page_create_low(block, TRUE);
- mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level);
- mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id);
-
- if (!page_zip_compress(page_zip, page, index,
- page_zip_level, mtr)) {
- /* The compression of a newly created page
- should always succeed. */
- ut_error;
- }
-
- return(page);
-}
-
-/**********************************************************//**
-Empty a previously created B-tree index page. */
-UNIV_INTERN
-void
-page_create_empty(
-/*==============*/
- buf_block_t* block, /*!< in/out: B-tree block */
- dict_index_t* index, /*!< in: the index of the page */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- trx_id_t max_trx_id = 0;
- const page_t* page = buf_block_get_frame(block);
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
-
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
- max_trx_id = page_get_max_trx_id(page);
- ut_ad(max_trx_id);
- }
-
- if (page_zip) {
- page_create_zip(block, index,
- page_header_get_field(page, PAGE_LEVEL),
- max_trx_id, mtr);
- } else {
- page_create(block, mtr, page_is_comp(page));
-
- if (max_trx_id) {
- page_update_max_trx_id(
- block, page_zip, max_trx_id, mtr);
- }
- }
-}
-
-/*************************************************************//**
-Differs from page_copy_rec_list_end, because this function does not
-touch the lock table and max trx id on page or compress the page.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit(). */
-UNIV_INTERN
-void
-page_copy_rec_list_end_no_locks(
-/*============================*/
- buf_block_t* new_block, /*!< in: index page to copy to */
- buf_block_t* block, /*!< in: index page of rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* new_page = buf_block_get_frame(new_block);
- page_cur_t cur1;
- rec_t* cur2;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- page_cur_position(rec, block, &cur1);
-
- if (page_cur_is_before_first(&cur1)) {
-
- page_cur_move_to_next(&cur1);
- }
-
- btr_assert_not_corrupted(new_block, index);
- ut_a(page_is_comp(new_page) == page_rec_is_comp(rec));
- ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
- (page_is_comp(new_page) ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
-
- cur2 = page_get_infimum_rec(buf_block_get_frame(new_block));
-
- /* Copy records from the original page to the new page */
-
- while (!page_cur_is_after_last(&cur1)) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
- rec_t* ins_rec;
- offsets = rec_get_offsets(cur1_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- ins_rec = page_cur_insert_rec_low(cur2, index,
- cur1_rec, offsets, mtr);
- if (UNIV_UNLIKELY(!ins_rec)) {
- /* Track an assertion failure reported on the mailing
- list on June 18th, 2003 */
-
- buf_page_print(new_page, 0,
- BUF_PAGE_PRINT_NO_CRASH);
- buf_page_print(page_align(rec), 0,
- BUF_PAGE_PRINT_NO_CRASH);
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- "InnoDB: rec offset %lu, cur1 offset %lu,"
- " cur2 offset %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(page_cur_get_rec(&cur1)),
- (ulong) page_offset(cur2));
- ut_error;
- }
-
- page_cur_move_to_next(&cur1);
- cur2 = ins_rec;
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Copies records from page to new_page, from a given record onward,
-including that record. Infimum and supremum records are not copied.
-The records are copied to the start of the record list on new_page.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to the original successor of the infimum record on
-new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
-rec_t*
-page_copy_rec_list_end(
-/*===================*/
- buf_block_t* new_block, /*!< in/out: index page to copy to */
- buf_block_t* block, /*!< in: index page containing rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* new_page = buf_block_get_frame(new_block);
- page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
- page_t* page = page_align(rec);
- rec_t* ret = page_rec_get_next(
- page_get_infimum_rec(new_page));
- ulint log_mode = 0; /* remove warning */
-
-#ifdef UNIV_ZIP_DEBUG
- if (new_page_zip) {
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
- ut_a(page_zip);
-
- /* Strict page_zip_validate() may fail here.
- Furthermore, btr_compress() may set FIL_PAGE_PREV to
- FIL_NULL on new_page while leaving it intact on
- new_page_zip. So, we cannot validate new_page_zip. */
- ut_a(page_zip_validate_low(page_zip, page, index, TRUE));
- }
-#endif /* UNIV_ZIP_DEBUG */
- ut_ad(buf_block_get_frame(block) == page);
- ut_ad(page_is_leaf(page) == page_is_leaf(new_page));
- ut_ad(page_is_comp(page) == page_is_comp(new_page));
- /* Here, "ret" may be pointing to a user record or the
- predefined supremum record. */
-
- if (new_page_zip) {
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
- }
-
- if (page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW) {
- page_copy_rec_list_end_to_created_page(new_page, rec,
- index, mtr);
- } else {
- page_copy_rec_list_end_no_locks(new_block, block, rec,
- index, mtr);
- }
-
- /* Update PAGE_MAX_TRX_ID on the uncompressed page.
- Modifications will be redo logged and copied to the compressed
- page in page_zip_compress() or page_zip_reorganize() below. */
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
- page_update_max_trx_id(new_block, NULL,
- page_get_max_trx_id(page), mtr);
- }
-
- if (new_page_zip) {
- mtr_set_log_mode(mtr, log_mode);
-
- if (!page_zip_compress(new_page_zip, new_page,
- index, page_zip_level, mtr)) {
- /* Before trying to reorganize the page,
- store the number of preceding records on the page. */
- ulint ret_pos
- = page_rec_get_n_recs_before(ret);
- /* Before copying, "ret" was the successor of
- the predefined infimum record. It must still
- have at least one predecessor (the predefined
- infimum record, or a freshly copied record
- that is smaller than "ret"). */
- ut_a(ret_pos > 0);
-
- if (!page_zip_reorganize(new_block, index, mtr)) {
-
- btr_blob_dbg_remove(new_page, index,
- "copy_end_reorg_fail");
- if (!page_zip_decompress(new_page_zip,
- new_page, FALSE)) {
- ut_error;
- }
- ut_ad(page_validate(new_page, index));
- btr_blob_dbg_add(new_page, index,
- "copy_end_reorg_fail");
- return(NULL);
- } else {
- /* The page was reorganized:
- Seek to ret_pos. */
- ret = new_page + PAGE_NEW_INFIMUM;
-
- do {
- ret = rec_get_next_ptr(ret, TRUE);
- } while (--ret_pos);
- }
- }
- }
-
- /* Update the lock table and possible hash index */
-
- lock_move_rec_list_end(new_block, block, rec);
-
- btr_search_move_or_delete_hash_entries(new_block, block, index);
-
- return(ret);
-}
-
-/*************************************************************//**
-Copies records from page to new_page, up to the given record,
-NOT including that record. Infimum and supremum records are not copied.
-The records are copied to the end of the record list on new_page.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return pointer to the original predecessor of the supremum record on
-new_page, or NULL on zip overflow (new_block will be decompressed) */
-UNIV_INTERN
-rec_t*
-page_copy_rec_list_start(
-/*=====================*/
- buf_block_t* new_block, /*!< in/out: index page to copy to */
- buf_block_t* block, /*!< in: index page containing rec */
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* new_page = buf_block_get_frame(new_block);
- page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block);
- page_cur_t cur1;
- rec_t* cur2;
- ulint log_mode = 0 /* remove warning */;
- mem_heap_t* heap = NULL;
- rec_t* ret
- = page_rec_get_prev(page_get_supremum_rec(new_page));
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- /* Here, "ret" may be pointing to a user record or the
- predefined infimum record. */
-
- if (page_rec_is_infimum(rec)) {
-
- return(ret);
- }
-
- if (new_page_zip) {
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
- }
-
- page_cur_set_before_first(block, &cur1);
- page_cur_move_to_next(&cur1);
-
- cur2 = ret;
-
- /* Copy records from the original page to the new page */
-
- while (page_cur_get_rec(&cur1) != rec) {
- rec_t* cur1_rec = page_cur_get_rec(&cur1);
- offsets = rec_get_offsets(cur1_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- cur2 = page_cur_insert_rec_low(cur2, index,
- cur1_rec, offsets, mtr);
- ut_a(cur2);
-
- page_cur_move_to_next(&cur1);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Update PAGE_MAX_TRX_ID on the uncompressed page.
- Modifications will be redo logged and copied to the compressed
- page in page_zip_compress() or page_zip_reorganize() below. */
- if (dict_index_is_sec_or_ibuf(index)
- && page_is_leaf(page_align(rec))) {
- page_update_max_trx_id(new_block, NULL,
- page_get_max_trx_id(page_align(rec)),
- mtr);
- }
-
- if (new_page_zip) {
- mtr_set_log_mode(mtr, log_mode);
-
- DBUG_EXECUTE_IF("page_copy_rec_list_start_compress_fail",
- goto zip_reorganize;);
-
- if (!page_zip_compress(new_page_zip, new_page, index,
- page_zip_level, mtr)) {
-
- ulint ret_pos;
-#ifndef DBUG_OFF
-zip_reorganize:
-#endif /* DBUG_OFF */
- /* Before trying to reorganize the page,
- store the number of preceding records on the page. */
- ret_pos = page_rec_get_n_recs_before(ret);
- /* Before copying, "ret" was the predecessor
- of the predefined supremum record. If it was
- the predefined infimum record, then it would
- still be the infimum, and we would have
- ret_pos == 0. */
-
- if (UNIV_UNLIKELY
- (!page_zip_reorganize(new_block, index, mtr))) {
-
- btr_blob_dbg_remove(new_page, index,
- "copy_start_reorg_fail");
- if (UNIV_UNLIKELY
- (!page_zip_decompress(new_page_zip,
- new_page, FALSE))) {
- ut_error;
- }
- ut_ad(page_validate(new_page, index));
- btr_blob_dbg_add(new_page, index,
- "copy_start_reorg_fail");
- return(NULL);
- }
-
- /* The page was reorganized: Seek to ret_pos. */
- ret = page_rec_get_nth(new_page, ret_pos);
- }
- }
-
- /* Update the lock table and possible hash index */
-
- lock_move_rec_list_start(new_block, block, rec, ret);
-
- btr_search_move_or_delete_hash_entries(new_block, block, index);
-
- return(ret);
-}
-
-/**********************************************************//**
-Writes a log record of a record list end or start deletion. */
-UNIV_INLINE
-void
-page_delete_rec_list_write_log(
-/*===========================*/
- rec_t* rec, /*!< in: record on page */
- dict_index_t* index, /*!< in: record descriptor */
- byte type, /*!< in: operation type:
- MLOG_LIST_END_DELETE, ... */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- ut_ad(type == MLOG_LIST_END_DELETE
- || type == MLOG_LIST_START_DELETE
- || type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE);
-
- log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
- if (log_ptr) {
- /* Write the parameter as a 2-byte ulint */
- mach_write_to_2(log_ptr, page_offset(rec));
- mlog_close(mtr, log_ptr + 2);
- }
-}
-#else /* !UNIV_HOTBACKUP */
-# define page_delete_rec_list_write_log(rec,index,type,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Parses a log record of a record list end or start deletion.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_parse_delete_rec_list(
-/*=======================*/
- byte type, /*!< in: MLOG_LIST_END_DELETE,
- MLOG_LIST_START_DELETE,
- MLOG_COMP_LIST_END_DELETE or
- MLOG_COMP_LIST_START_DELETE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- buf_block_t* block, /*!< in/out: buffer block or NULL */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- page_t* page;
- ulint offset;
-
- ut_ad(type == MLOG_LIST_END_DELETE
- || type == MLOG_LIST_START_DELETE
- || type == MLOG_COMP_LIST_END_DELETE
- || type == MLOG_COMP_LIST_START_DELETE);
-
- /* Read the record offset as a 2-byte ulint */
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- ptr += 2;
-
- if (!block) {
-
- return(ptr);
- }
-
- page = buf_block_get_frame(block);
-
- ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- if (type == MLOG_LIST_END_DELETE
- || type == MLOG_COMP_LIST_END_DELETE) {
- page_delete_rec_list_end(page + offset, block, index,
- ULINT_UNDEFINED, ULINT_UNDEFINED,
- mtr);
- } else {
- page_delete_rec_list_start(page + offset, block, index, mtr);
- }
-
- return(ptr);
-}
-
-/*************************************************************//**
-Deletes records from a page from a given record onward, including that record.
-The infimum and supremum records are not deleted. */
-UNIV_INTERN
-void
-page_delete_rec_list_end(
-/*=====================*/
- rec_t* rec, /*!< in: pointer to record on page */
- buf_block_t* block, /*!< in: buffer block of the page */
- dict_index_t* index, /*!< in: record descriptor */
- ulint n_recs, /*!< in: number of records to delete,
- or ULINT_UNDEFINED if not known */
- ulint size, /*!< in: the sum of the sizes of the
- records in the end of the chain to
- delete, or ULINT_UNDEFINED if not known */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_dir_slot_t*slot;
- ulint slot_index;
- rec_t* last_rec;
- rec_t* prev_rec;
- ulint n_owned;
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
- page_t* page = page_align(rec);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
- ut_ad(!page_zip || page_rec_is_comp(rec));
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (page_rec_is_supremum(rec)) {
- ut_ad(n_recs == 0 || n_recs == ULINT_UNDEFINED);
- /* Nothing to do, there are no records bigger than the
- page supremum. */
- return;
- }
-
- if (recv_recovery_is_on()) {
- /* If we are replaying a redo log record, we must
- replay it exactly. Since MySQL 5.6.11, we should be
- generating a redo log record for page creation if
- the page would become empty. Thus, this branch should
- only be executed when applying redo log that was
- generated by an older version of MySQL. */
- } else if (page_rec_is_infimum(rec)
- || n_recs == page_get_n_recs(page)) {
-delete_all:
- /* We are deleting all records. */
- page_create_empty(block, index, mtr);
- return;
- } else if (page_is_comp(page)) {
- if (page_rec_get_next_low(page + PAGE_NEW_INFIMUM, 1) == rec) {
- /* We are deleting everything from the first
- user record onwards. */
- goto delete_all;
- }
- } else {
- if (page_rec_get_next_low(page + PAGE_OLD_INFIMUM, 0) == rec) {
- /* We are deleting everything from the first
- user record onwards. */
- goto delete_all;
- }
- }
-
- /* Reset the last insert info in the page header and increment
- the modify clock for the frame */
-
- page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
-
- /* The page gets invalid for optimistic searches: increment the
- frame modify clock */
-
- buf_block_modify_clock_inc(block);
-
- page_delete_rec_list_write_log(rec, index, page_is_comp(page)
- ? MLOG_COMP_LIST_END_DELETE
- : MLOG_LIST_END_DELETE, mtr);
-
- if (page_zip) {
- ulint log_mode;
-
- ut_a(page_is_comp(page));
- /* Individual deletes are not logged */
-
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
- do {
- page_cur_t cur;
- page_cur_position(rec, block, &cur);
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- rec = rec_get_next_ptr(rec, TRUE);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- page_cur_delete_rec(&cur, index, offsets, mtr);
- } while (page_offset(rec) != PAGE_NEW_SUPREMUM);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Restore log mode */
-
- mtr_set_log_mode(mtr, log_mode);
- return;
- }
-
- prev_rec = page_rec_get_prev(rec);
-
- last_rec = page_rec_get_prev(page_get_supremum_rec(page));
-
- bool scrub = srv_immediate_scrub_data_uncompressed;
- if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED) ||
- scrub) {
- rec_t* rec2 = rec;
- /* Calculate the sum of sizes and the number of records */
- size = 0;
- n_recs = 0;
-
- do {
- ulint s;
- offsets = rec_get_offsets(rec2, index, offsets,
- ULINT_UNDEFINED, &heap);
- s = rec_offs_size(offsets);
- ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
- < UNIV_PAGE_SIZE);
- ut_ad(size + s < UNIV_PAGE_SIZE);
- size += s;
- n_recs++;
-
- if (scrub) {
- /* scrub record */
- uint recsize = rec_offs_data_size(offsets);
- memset(rec2, 0, recsize);
- }
-
- rec2 = page_rec_get_next(rec2);
- } while (!page_rec_is_supremum(rec2));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- ut_ad(size < UNIV_PAGE_SIZE);
-
- /* Update the page directory; there is no need to balance the number
- of the records owned by the supremum record, as it is allowed to be
- less than PAGE_DIR_SLOT_MIN_N_OWNED */
-
- if (page_is_comp(page)) {
- rec_t* rec2 = rec;
- ulint count = 0;
-
- while (rec_get_n_owned_new(rec2) == 0) {
- count++;
-
- rec2 = rec_get_next_ptr(rec2, TRUE);
- }
-
- ut_ad(rec_get_n_owned_new(rec2) > count);
-
- n_owned = rec_get_n_owned_new(rec2) - count;
- slot_index = page_dir_find_owner_slot(rec2);
- ut_ad(slot_index > 0);
- slot = page_dir_get_nth_slot(page, slot_index);
- } else {
- rec_t* rec2 = rec;
- ulint count = 0;
-
- while (rec_get_n_owned_old(rec2) == 0) {
- count++;
-
- rec2 = rec_get_next_ptr(rec2, FALSE);
- }
-
- ut_ad(rec_get_n_owned_old(rec2) > count);
-
- n_owned = rec_get_n_owned_old(rec2) - count;
- slot_index = page_dir_find_owner_slot(rec2);
- ut_ad(slot_index > 0);
- slot = page_dir_get_nth_slot(page, slot_index);
- }
-
- page_dir_slot_set_rec(slot, page_get_supremum_rec(page));
- page_dir_slot_set_n_owned(slot, NULL, n_owned);
-
- page_dir_set_n_slots(page, NULL, slot_index + 1);
-
- /* Remove the record chain segment from the record chain */
- page_rec_set_next(prev_rec, page_get_supremum_rec(page));
-
- btr_blob_dbg_op(page, rec, index, "delete_end",
- btr_blob_dbg_remove_rec);
-
- /* Catenate the deleted chain segment to the page free list */
-
- page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE));
- page_header_set_ptr(page, NULL, PAGE_FREE, rec);
-
- page_header_set_field(page, NULL, PAGE_GARBAGE, size
- + page_header_get_field(page, PAGE_GARBAGE));
-
- page_header_set_field(page, NULL, PAGE_N_RECS,
- (ulint)(page_get_n_recs(page) - n_recs));
-}
-
-/*************************************************************//**
-Deletes records from page, up to the given record, NOT including
-that record. Infimum and supremum records are not deleted. */
-UNIV_INTERN
-void
-page_delete_rec_list_start(
-/*=======================*/
- rec_t* rec, /*!< in: record on page */
- buf_block_t* block, /*!< in: buffer block of the page */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_cur_t cur1;
- ulint log_mode;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- mem_heap_t* heap = NULL;
- byte type;
-
- rec_offs_init(offsets_);
-
- ut_ad((ibool) !!page_rec_is_comp(rec)
- == dict_table_is_comp(index->table));
-#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* page_zip= buf_block_get_page_zip(block);
- page_t* page = buf_block_get_frame(block);
-
- /* page_zip_validate() would detect a min_rec_mark mismatch
- in btr_page_split_and_insert()
- between btr_attach_half_pages() and insert_page = ...
- when btr_page_get_split_rec_to_left() holds
- (direction == FSP_DOWN). */
- ut_a(!page_zip
- || page_zip_validate_low(page_zip, page, index, TRUE));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- if (page_rec_is_infimum(rec)) {
- return;
- }
-
- if (page_rec_is_supremum(rec)) {
- /* We are deleting all records. */
- page_create_empty(block, index, mtr);
- return;
- }
-
- if (page_rec_is_comp(rec)) {
- type = MLOG_COMP_LIST_START_DELETE;
- } else {
- type = MLOG_LIST_START_DELETE;
- }
-
- page_delete_rec_list_write_log(rec, index, type, mtr);
-
- page_cur_set_before_first(block, &cur1);
- page_cur_move_to_next(&cur1);
-
- /* Individual deletes are not logged */
-
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
- while (page_cur_get_rec(&cur1) != rec) {
- offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
- offsets, ULINT_UNDEFINED, &heap);
- page_cur_delete_rec(&cur1, index, offsets, mtr);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Restore log mode */
-
- mtr_set_log_mode(mtr, log_mode);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Moves record list end to another page. Moved records include
-split_rec.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return TRUE on success; FALSE on compression failure (new_block will
-be decompressed) */
-UNIV_INTERN
-ibool
-page_move_rec_list_end(
-/*===================*/
- buf_block_t* new_block, /*!< in/out: index page where to move */
- buf_block_t* block, /*!< in: index page from where to move */
- rec_t* split_rec, /*!< in: first record to move */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* new_page = buf_block_get_frame(new_block);
- ulint old_data_size;
- ulint new_data_size;
- ulint old_n_recs;
- ulint new_n_recs;
-
- old_data_size = page_get_data_size(new_page);
- old_n_recs = page_get_n_recs(new_page);
-#ifdef UNIV_ZIP_DEBUG
- {
- page_zip_des_t* new_page_zip
- = buf_block_get_page_zip(new_block);
- page_zip_des_t* page_zip
- = buf_block_get_page_zip(block);
- ut_a(!new_page_zip == !page_zip);
- ut_a(!new_page_zip
- || page_zip_validate(new_page_zip, new_page, index));
- ut_a(!page_zip
- || page_zip_validate(page_zip, page_align(split_rec),
- index));
- }
-#endif /* UNIV_ZIP_DEBUG */
-
- if (UNIV_UNLIKELY(!page_copy_rec_list_end(new_block, block,
- split_rec, index, mtr))) {
- return(FALSE);
- }
-
- new_data_size = page_get_data_size(new_page);
- new_n_recs = page_get_n_recs(new_page);
-
- ut_ad(new_data_size >= old_data_size);
-
- page_delete_rec_list_end(split_rec, block, index,
- new_n_recs - old_n_recs,
- new_data_size - old_data_size, mtr);
-
- return(TRUE);
-}
-
-/*************************************************************//**
-Moves record list start to another page. Moved records do not include
-split_rec.
-
-IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
-if new_block is a compressed leaf page in a secondary index.
-This has to be done either within the same mini-transaction,
-or by invoking ibuf_reset_free_bits() before mtr_commit().
-
-@return TRUE on success; FALSE on compression failure */
-UNIV_INTERN
-ibool
-page_move_rec_list_start(
-/*=====================*/
- buf_block_t* new_block, /*!< in/out: index page where to move */
- buf_block_t* block, /*!< in/out: page containing split_rec */
- rec_t* split_rec, /*!< in: first record not to move */
- dict_index_t* index, /*!< in: record descriptor */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (UNIV_UNLIKELY(!page_copy_rec_list_start(new_block, block,
- split_rec, index, mtr))) {
- return(FALSE);
- }
-
- page_delete_rec_list_start(split_rec, block, index, mtr);
-
- return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**************************************************************//**
-Used to delete n slots from the directory. This function updates
-also n_owned fields in the records, so that the first slot after
-the deleted ones inherits the records of the deleted slots. */
-UNIV_INLINE
-void
-page_dir_delete_slot(
-/*=================*/
- page_t* page, /*!< in/out: the index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint slot_no)/*!< in: slot to be deleted */
-{
- page_dir_slot_t* slot;
- ulint n_owned;
- ulint i;
- ulint n_slots;
-
- ut_ad(!page_zip || page_is_comp(page));
- ut_ad(slot_no > 0);
- ut_ad(slot_no + 1 < page_dir_get_n_slots(page));
-
- n_slots = page_dir_get_n_slots(page);
-
- /* 1. Reset the n_owned fields of the slots to be
- deleted */
- slot = page_dir_get_nth_slot(page, slot_no);
- n_owned = page_dir_slot_get_n_owned(slot);
- page_dir_slot_set_n_owned(slot, page_zip, 0);
-
- /* 2. Update the n_owned value of the first non-deleted slot */
-
- slot = page_dir_get_nth_slot(page, slot_no + 1);
- page_dir_slot_set_n_owned(slot, page_zip,
- n_owned + page_dir_slot_get_n_owned(slot));
-
- /* 3. Destroy the slot by copying slots */
- for (i = slot_no + 1; i < n_slots; i++) {
- rec_t* rec = (rec_t*)
- page_dir_slot_get_rec(page_dir_get_nth_slot(page, i));
- page_dir_slot_set_rec(page_dir_get_nth_slot(page, i - 1), rec);
- }
-
- /* 4. Zero out the last slot, which will be removed */
- mach_write_to_2(page_dir_get_nth_slot(page, n_slots - 1), 0);
-
- /* 5. Update the page header */
- page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots - 1);
-}
-
-/**************************************************************//**
-Used to add n slots to the directory. Does not set the record pointers
-in the added slots or update n_owned values: this is the responsibility
-of the caller. */
-UNIV_INLINE
-void
-page_dir_add_slot(
-/*==============*/
- page_t* page, /*!< in/out: the index page */
- page_zip_des_t* page_zip,/*!< in/out: comprssed page, or NULL */
- ulint start) /*!< in: the slot above which the new slots
- are added */
-{
- page_dir_slot_t* slot;
- ulint n_slots;
-
- n_slots = page_dir_get_n_slots(page);
-
- ut_ad(start < n_slots - 1);
-
- /* Update the page header */
- page_dir_set_n_slots(page, page_zip, n_slots + 1);
-
- /* Move slots up */
- slot = page_dir_get_nth_slot(page, n_slots);
- memmove(slot, slot + PAGE_DIR_SLOT_SIZE,
- (n_slots - 1 - start) * PAGE_DIR_SLOT_SIZE);
-}
-
-/****************************************************************//**
-Splits a directory slot which owns too many records. */
-UNIV_INTERN
-void
-page_dir_split_slot(
-/*================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page whose
- uncompressed part will be written, or NULL */
- ulint slot_no)/*!< in: the directory slot */
-{
- rec_t* rec;
- page_dir_slot_t* new_slot;
- page_dir_slot_t* prev_slot;
- page_dir_slot_t* slot;
- ulint i;
- ulint n_owned;
-
- ut_ad(!page_zip || page_is_comp(page));
- ut_ad(slot_no > 0);
-
- slot = page_dir_get_nth_slot(page, slot_no);
-
- n_owned = page_dir_slot_get_n_owned(slot);
- ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);
-
- /* 1. We loop to find a record approximately in the middle of the
- records owned by the slot. */
-
- prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
- rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
-
- for (i = 0; i < n_owned / 2; i++) {
- rec = page_rec_get_next(rec);
- }
-
- ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);
-
- /* 2. We add one directory slot immediately below the slot to be
- split. */
-
- page_dir_add_slot(page, page_zip, slot_no - 1);
-
- /* The added slot is now number slot_no, and the old slot is
- now number slot_no + 1 */
-
- new_slot = page_dir_get_nth_slot(page, slot_no);
- slot = page_dir_get_nth_slot(page, slot_no + 1);
-
- /* 3. We store the appropriate values to the new slot. */
-
- page_dir_slot_set_rec(new_slot, rec);
- page_dir_slot_set_n_owned(new_slot, page_zip, n_owned / 2);
-
- /* 4. Finally, we update the number of records field of the
- original slot */
-
- page_dir_slot_set_n_owned(slot, page_zip, n_owned - (n_owned / 2));
-}
-
-/*************************************************************//**
-Tries to balance the given directory slot with too few records with the upper
-neighbor, so that there are at least the minimum number of records owned by
-the slot; this may result in the merging of two slots. */
-UNIV_INTERN
-void
-page_dir_balance_slot(
-/*==================*/
- page_t* page, /*!< in/out: index page */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- ulint slot_no)/*!< in: the directory slot */
-{
- page_dir_slot_t* slot;
- page_dir_slot_t* up_slot;
- ulint n_owned;
- ulint up_n_owned;
- rec_t* old_rec;
- rec_t* new_rec;
-
- ut_ad(!page_zip || page_is_comp(page));
- ut_ad(slot_no > 0);
-
- slot = page_dir_get_nth_slot(page, slot_no);
-
- /* The last directory slot cannot be balanced with the upper
- neighbor, as there is none. */
-
- if (UNIV_UNLIKELY(slot_no == page_dir_get_n_slots(page) - 1)) {
-
- return;
- }
-
- up_slot = page_dir_get_nth_slot(page, slot_no + 1);
-
- n_owned = page_dir_slot_get_n_owned(slot);
- up_n_owned = page_dir_slot_get_n_owned(up_slot);
-
- ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);
-
- /* If the upper slot has the minimum value of n_owned, we will merge
- the two slots, therefore we assert: */
- ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
-
- if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {
-
- /* In this case we can just transfer one record owned
- by the upper slot to the property of the lower slot */
- old_rec = (rec_t*) page_dir_slot_get_rec(slot);
-
- if (page_is_comp(page)) {
- new_rec = rec_get_next_ptr(old_rec, TRUE);
-
- rec_set_n_owned_new(old_rec, page_zip, 0);
- rec_set_n_owned_new(new_rec, page_zip, n_owned + 1);
- } else {
- new_rec = rec_get_next_ptr(old_rec, FALSE);
-
- rec_set_n_owned_old(old_rec, 0);
- rec_set_n_owned_old(new_rec, n_owned + 1);
- }
-
- page_dir_slot_set_rec(slot, new_rec);
-
- page_dir_slot_set_n_owned(up_slot, page_zip, up_n_owned -1);
- } else {
- /* In this case we may merge the two slots */
- page_dir_delete_slot(page, page_zip, slot_no);
- }
-}
-
-/************************************************************//**
-Returns the nth record of the record list.
-This is the inverse function of page_rec_get_n_recs_before().
-@return nth record */
-UNIV_INTERN
-const rec_t*
-page_rec_get_nth_const(
-/*===================*/
- const page_t* page, /*!< in: page */
- ulint nth) /*!< in: nth record */
-{
- const page_dir_slot_t* slot;
- ulint i;
- ulint n_owned;
- const rec_t* rec;
-
- if (nth == 0) {
- return(page_get_infimum_rec(page));
- }
-
- ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
-
- for (i = 0;; i++) {
-
- slot = page_dir_get_nth_slot(page, i);
- n_owned = page_dir_slot_get_n_owned(slot);
-
- if (n_owned > nth) {
- break;
- } else {
- nth -= n_owned;
- }
- }
-
- ut_ad(i > 0);
- slot = page_dir_get_nth_slot(page, i - 1);
- rec = page_dir_slot_get_rec(slot);
-
- if (page_is_comp(page)) {
- do {
- rec = page_rec_get_next_low(rec, TRUE);
- ut_ad(rec);
- } while (nth--);
- } else {
- do {
- rec = page_rec_get_next_low(rec, FALSE);
- ut_ad(rec);
- } while (nth--);
- }
-
- return(rec);
-}
-
-/***************************************************************//**
-Returns the number of records before the given record in chain.
-The number includes infimum and supremum records.
-@return number of records */
-UNIV_INTERN
-ulint
-page_rec_get_n_recs_before(
-/*=======================*/
- const rec_t* rec) /*!< in: the physical record */
-{
- const page_dir_slot_t* slot;
- const rec_t* slot_rec;
- const page_t* page;
- ulint i;
- lint n = 0;
-
- ut_ad(page_rec_check(rec));
-
- page = page_align(rec);
- if (page_is_comp(page)) {
- while (rec_get_n_owned_new(rec) == 0) {
-
- rec = rec_get_next_ptr_const(rec, TRUE);
- n--;
- }
-
- for (i = 0; ; i++) {
- slot = page_dir_get_nth_slot(page, i);
- slot_rec = page_dir_slot_get_rec(slot);
-
- n += rec_get_n_owned_new(slot_rec);
-
- if (rec == slot_rec) {
-
- break;
- }
- }
- } else {
- while (rec_get_n_owned_old(rec) == 0) {
-
- rec = rec_get_next_ptr_const(rec, FALSE);
- n--;
- }
-
- for (i = 0; ; i++) {
- slot = page_dir_get_nth_slot(page, i);
- slot_rec = page_dir_slot_get_rec(slot);
-
- n += rec_get_n_owned_old(slot_rec);
-
- if (rec == slot_rec) {
-
- break;
- }
- }
- }
-
- n--;
-
- ut_ad(n >= 0);
- ut_ad((ulong) n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1));
-
- return((ulint) n);
-}
-
-#ifndef UNIV_HOTBACKUP
-/************************************************************//**
-Prints record contents including the data relevant only in
-the index page context. */
-UNIV_INTERN
-void
-page_rec_print(
-/*===========*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: record descriptor */
-{
- ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
- rec_print_new(stderr, rec, offsets);
- if (page_rec_is_comp(rec)) {
- fprintf(stderr,
- " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
- (ulong) rec_get_n_owned_new(rec),
- (ulong) rec_get_heap_no_new(rec),
- (ulong) rec_get_next_offs(rec, TRUE));
- } else {
- fprintf(stderr,
- " n_owned: %lu; heap_no: %lu; next rec: %lu\n",
- (ulong) rec_get_n_owned_old(rec),
- (ulong) rec_get_heap_no_old(rec),
- (ulong) rec_get_next_offs(rec, FALSE));
- }
-
- page_rec_check(rec);
- rec_validate(rec, offsets);
-}
-
-# ifdef UNIV_BTR_PRINT
-/***************************************************************//**
-This is used to print the contents of the directory for
-debugging purposes. */
-UNIV_INTERN
-void
-page_dir_print(
-/*===========*/
- page_t* page, /*!< in: index page */
- ulint pr_n) /*!< in: print n first and n last entries */
-{
- ulint n;
- ulint i;
- page_dir_slot_t* slot;
-
- n = page_dir_get_n_slots(page);
-
- fprintf(stderr, "--------------------------------\n"
- "PAGE DIRECTORY\n"
- "Page address %p\n"
- "Directory stack top at offs: %lu; number of slots: %lu\n",
- page, (ulong) page_offset(page_dir_get_nth_slot(page, n - 1)),
- (ulong) n);
- for (i = 0; i < n; i++) {
- slot = page_dir_get_nth_slot(page, i);
- if ((i == pr_n) && (i < n - pr_n)) {
- fputs(" ... \n", stderr);
- }
- if ((i < pr_n) || (i >= n - pr_n)) {
- fprintf(stderr,
- "Contents of slot: %lu: n_owned: %lu,"
- " rec offs: %lu\n",
- (ulong) i,
- (ulong) page_dir_slot_get_n_owned(slot),
- (ulong)
- page_offset(page_dir_slot_get_rec(slot)));
- }
- }
- fprintf(stderr, "Total of %lu records\n"
- "--------------------------------\n",
- (ulong) (PAGE_HEAP_NO_USER_LOW + page_get_n_recs(page)));
-}
-
-/***************************************************************//**
-This is used to print the contents of the page record list for
-debugging purposes. */
-UNIV_INTERN
-void
-page_print_list(
-/*============*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index, /*!< in: dictionary index of the page */
- ulint pr_n) /*!< in: print n first and n last entries */
-{
- page_t* page = block->frame;
- page_cur_t cur;
- ulint count;
- ulint n_recs;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
-
- fprintf(stderr,
- "--------------------------------\n"
- "PAGE RECORD LIST\n"
- "Page address %p\n", page);
-
- n_recs = page_get_n_recs(page);
-
- page_cur_set_before_first(block, &cur);
- count = 0;
- for (;;) {
- offsets = rec_get_offsets(cur.rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(cur.rec, offsets);
-
- if (count == pr_n) {
- break;
- }
- if (page_cur_is_after_last(&cur)) {
- break;
- }
- page_cur_move_to_next(&cur);
- count++;
- }
-
- if (n_recs > 2 * pr_n) {
- fputs(" ... \n", stderr);
- }
-
- while (!page_cur_is_after_last(&cur)) {
- page_cur_move_to_next(&cur);
-
- if (count + pr_n >= n_recs) {
- offsets = rec_get_offsets(cur.rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- page_rec_print(cur.rec, offsets);
- }
- count++;
- }
-
- fprintf(stderr,
- "Total of %lu records \n"
- "--------------------------------\n",
- (ulong) (count + 1));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***************************************************************//**
-Prints the info in a page header. */
-UNIV_INTERN
-void
-page_header_print(
-/*==============*/
- const page_t* page)
-{
- fprintf(stderr,
- "--------------------------------\n"
- "PAGE HEADER INFO\n"
- "Page address %p, n records %lu (%s)\n"
- "n dir slots %lu, heap top %lu\n"
- "Page n heap %lu, free %lu, garbage %lu\n"
- "Page last insert %lu, direction %lu, n direction %lu\n",
- page, (ulong) page_header_get_field(page, PAGE_N_RECS),
- page_is_comp(page) ? "compact format" : "original format",
- (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong) page_dir_get_n_heap(page),
- (ulong) page_header_get_field(page, PAGE_FREE),
- (ulong) page_header_get_field(page, PAGE_GARBAGE),
- (ulong) page_header_get_field(page, PAGE_LAST_INSERT),
- (ulong) page_header_get_field(page, PAGE_DIRECTION),
- (ulong) page_header_get_field(page, PAGE_N_DIRECTION));
-}
-
-/***************************************************************//**
-This is used to print the contents of the page for
-debugging purposes. */
-UNIV_INTERN
-void
-page_print(
-/*=======*/
- buf_block_t* block, /*!< in: index page */
- dict_index_t* index, /*!< in: dictionary index of the page */
- ulint dn, /*!< in: print dn first and last entries
- in directory */
- ulint rn) /*!< in: print rn first and last records
- in directory */
-{
- page_t* page = block->frame;
-
- page_header_print(page);
- page_dir_print(page, dn);
- page_print_list(block, index, rn);
-}
-# endif /* UNIV_BTR_PRINT */
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-The following is used to validate a record on a page. This function
-differs from rec_validate as it can also check the n_owned field and
-the heap_no field.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_rec_validate(
-/*==============*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n_owned;
- ulint heap_no;
- const page_t* page;
-
- page = page_align(rec);
- ut_a(!page_is_comp(page) == !rec_offs_comp(offsets));
-
- page_rec_check(rec);
- rec_validate(rec, offsets);
-
- if (page_rec_is_comp(rec)) {
- n_owned = rec_get_n_owned_new(rec);
- heap_no = rec_get_heap_no_new(rec);
- } else {
- n_owned = rec_get_n_owned_old(rec);
- heap_no = rec_get_heap_no_old(rec);
- }
-
- if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) {
- fprintf(stderr,
- "InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
- (ulong) page_offset(rec), (ulong) n_owned);
- return(FALSE);
- }
-
- if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) {
- fprintf(stderr,
- "InnoDB: Heap no of rec %lu too big %lu %lu\n",
- (ulong) page_offset(rec), (ulong) heap_no,
- (ulong) page_dir_get_n_heap(page));
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Checks that the first directory slot points to the infimum record and
-the last to the supremum. This function is intended to track if the
-bug fixed in 4.0.14 has caused corruption to users' databases. */
-UNIV_INTERN
-void
-page_check_dir(
-/*===========*/
- const page_t* page) /*!< in: index page */
-{
- ulint n_slots;
- ulint infimum_offs;
- ulint supremum_offs;
-
- n_slots = page_dir_get_n_slots(page);
- infimum_offs = mach_read_from_2(page_dir_get_nth_slot(page, 0));
- supremum_offs = mach_read_from_2(page_dir_get_nth_slot(page,
- n_slots - 1));
-
- if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) {
-
- fprintf(stderr,
- "InnoDB: Page directory corruption:"
- " infimum not pointed to\n");
- buf_page_print(page, 0, 0);
- }
-
- if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) {
-
- fprintf(stderr,
- "InnoDB: Page directory corruption:"
- " supremum not pointed to\n");
- buf_page_print(page, 0, 0);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_simple_validate_old(
-/*=====================*/
- const page_t* page) /*!< in: index page in ROW_FORMAT=REDUNDANT */
-{
- const page_dir_slot_t* slot;
- ulint slot_no;
- ulint n_slots;
- const rec_t* rec;
- const byte* rec_heap_top;
- ulint count;
- ulint own_count;
- ibool ret = FALSE;
-
- ut_a(!page_is_comp(page));
-
- /* Check first that the record heap and the directory do not
- overlap. */
-
- n_slots = page_dir_get_n_slots(page);
-
- if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
- fprintf(stderr,
- "InnoDB: Nonsensical number %lu of page dir slots\n",
- (ulong) n_slots);
-
- goto func_exit;
- }
-
- rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
- if (UNIV_UNLIKELY(rec_heap_top
- > page_dir_get_nth_slot(page, n_slots - 1))) {
-
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap on a page,"
- " heap top %lu, dir %lu\n",
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong)
- page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
-
- goto func_exit;
- }
-
- /* Validate the record list in a loop checking also that it is
- consistent with the page record directory. */
-
- count = 0;
- own_count = 1;
- slot_no = 0;
- slot = page_dir_get_nth_slot(page, slot_no);
-
- rec = page_get_infimum_rec(page);
-
- for (;;) {
- if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Record %lu is above"
- " rec heap top %lu\n",
- (ulong)(rec - page),
- (ulong)(rec_heap_top - page));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_owned_old(rec))) {
- /* This is a record pointed to by a dir slot */
- if (UNIV_UNLIKELY(rec_get_n_owned_old(rec)
- != own_count)) {
-
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu,"
- " rec %lu\n",
- (ulong) rec_get_n_owned_old(rec),
- (ulong) own_count,
- (ulong)(rec - page));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY
- (page_dir_slot_get_rec(slot) != rec)) {
- fprintf(stderr,
- "InnoDB: Dir slot does not point"
- " to right rec %lu\n",
- (ulong)(rec - page));
-
- goto func_exit;
- }
-
- own_count = 0;
-
- if (!page_rec_is_supremum(rec)) {
- slot_no++;
- slot = page_dir_get_nth_slot(page, slot_no);
- }
- }
-
- if (page_rec_is_supremum(rec)) {
-
- break;
- }
-
- if (UNIV_UNLIKELY
- (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA
- || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Next record offset"
- " nonsensical %lu for rec %lu\n",
- (ulong) rec_get_next_offs(rec, FALSE),
- (ulong) (rec - page));
-
- goto func_exit;
- }
-
- count++;
-
- if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page record list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next_const(rec);
- own_count++;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
- fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW
- != count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- /* Check then the free list */
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- while (rec != NULL) {
- if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
- || rec >= page + UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Free list record has"
- " a nonsensical offset %lu\n",
- (ulong) (rec - page));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Free list record %lu"
- " is above rec heap top %lu\n",
- (ulong) (rec - page),
- (ulong) (rec_heap_top - page));
-
- goto func_exit;
- }
-
- count++;
-
- if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page free list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next_const(rec);
- }
-
- if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
-
- fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- ret = TRUE;
-
-func_exit:
- return(ret);
-}
-
-/***************************************************************//**
-This function checks the consistency of an index page when we do not
-know the index. This is also resilient so that this should never crash
-even if the page is total garbage.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_simple_validate_new(
-/*=====================*/
- const page_t* page) /*!< in: index page in ROW_FORMAT!=REDUNDANT */
-{
- const page_dir_slot_t* slot;
- ulint slot_no;
- ulint n_slots;
- const rec_t* rec;
- const byte* rec_heap_top;
- ulint count;
- ulint own_count;
- ibool ret = FALSE;
-
- ut_a(page_is_comp(page));
-
- /* Check first that the record heap and the directory do not
- overlap. */
-
- n_slots = page_dir_get_n_slots(page);
-
- if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) {
- fprintf(stderr,
- "InnoDB: Nonsensical number %lu"
- " of page dir slots\n", (ulong) n_slots);
-
- goto func_exit;
- }
-
- rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
-
- if (UNIV_UNLIKELY(rec_heap_top
- > page_dir_get_nth_slot(page, n_slots - 1))) {
-
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap on a page,"
- " heap top %lu, dir %lu\n",
- (ulong) page_header_get_field(page, PAGE_HEAP_TOP),
- (ulong)
- page_offset(page_dir_get_nth_slot(page, n_slots - 1)));
-
- goto func_exit;
- }
-
- /* Validate the record list in a loop checking also that it is
- consistent with the page record directory. */
-
- count = 0;
- own_count = 1;
- slot_no = 0;
- slot = page_dir_get_nth_slot(page, slot_no);
-
- rec = page_get_infimum_rec(page);
-
- for (;;) {
- if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Record %lu is above rec"
- " heap top %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(rec_heap_top));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
- /* This is a record pointed to by a dir slot */
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec)
- != own_count)) {
-
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu,"
- " rec %lu\n",
- (ulong) rec_get_n_owned_new(rec),
- (ulong) own_count,
- (ulong) page_offset(rec));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY
- (page_dir_slot_get_rec(slot) != rec)) {
- fprintf(stderr,
- "InnoDB: Dir slot does not point"
- " to right rec %lu\n",
- (ulong) page_offset(rec));
-
- goto func_exit;
- }
-
- own_count = 0;
-
- if (!page_rec_is_supremum(rec)) {
- slot_no++;
- slot = page_dir_get_nth_slot(page, slot_no);
- }
- }
-
- if (page_rec_is_supremum(rec)) {
-
- break;
- }
-
- if (UNIV_UNLIKELY
- (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA
- || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Next record offset nonsensical %lu"
- " for rec %lu\n",
- (ulong) rec_get_next_offs(rec, TRUE),
- (ulong) page_offset(rec));
-
- goto func_exit;
- }
-
- count++;
-
- if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page record list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next_const(rec);
- own_count++;
- }
-
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
- fprintf(stderr, "InnoDB: n owned is zero"
- " in a supremum rec\n");
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW
- != count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- /* Check then the free list */
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- while (rec != NULL) {
- if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA
- || rec >= page + UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Free list record has"
- " a nonsensical offset %lu\n",
- (ulong) page_offset(rec));
-
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(rec > rec_heap_top)) {
- fprintf(stderr,
- "InnoDB: Free list record %lu"
- " is above rec heap top %lu\n",
- (ulong) page_offset(rec),
- (ulong) page_offset(rec_heap_top));
-
- goto func_exit;
- }
-
- count++;
-
- if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) {
- fprintf(stderr,
- "InnoDB: Page free list appears"
- " to be circular %lu\n",
- (ulong) count);
- goto func_exit;
- }
-
- rec = page_rec_get_next_const(rec);
- }
-
- if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
-
- fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) (count + 1));
-
- goto func_exit;
- }
-
- ret = TRUE;
-
-func_exit:
- return(ret);
-}
-
-/***************************************************************//**
-This function checks the consistency of an index page.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-page_validate(
-/*==========*/
- const page_t* page, /*!< in: index page */
- dict_index_t* index) /*!< in: data dictionary index containing
- the page record type definition */
-{
- const page_dir_slot_t* slot;
- mem_heap_t* heap;
- byte* buf;
- ulint count;
- ulint own_count;
- ulint rec_own_count;
- ulint slot_no;
- ulint data_size;
- const rec_t* rec;
- const rec_t* old_rec = NULL;
- ulint offs;
- ulint n_slots;
- ibool ret = FALSE;
- ulint i;
- ulint* offsets = NULL;
- ulint* old_offsets = NULL;
-
- if (UNIV_UNLIKELY((ibool) !!page_is_comp(page)
- != dict_table_is_comp(index->table))) {
- fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
- goto func_exit2;
- }
- if (page_is_comp(page)) {
- if (UNIV_UNLIKELY(!page_simple_validate_new(page))) {
- goto func_exit2;
- }
- } else {
- if (UNIV_UNLIKELY(!page_simple_validate_old(page))) {
- goto func_exit2;
- }
- }
-
- if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)
- && !page_is_empty(page)) {
- trx_id_t max_trx_id = page_get_max_trx_id(page);
- trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id();
-
- if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "PAGE_MAX_TRX_ID out of bounds: "
- TRX_ID_FMT ", " TRX_ID_FMT,
- max_trx_id, sys_max_trx_id);
- goto func_exit2;
- }
- }
-
- heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
-
- /* The following buffer is used to check that the
- records in the page record heap do not overlap */
-
- buf = static_cast<byte*>(mem_heap_zalloc(heap, UNIV_PAGE_SIZE));
-
- /* Check first that the record heap and the directory do not
- overlap. */
-
- n_slots = page_dir_get_n_slots(page);
-
- if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP)
- <= page_dir_get_nth_slot(page, n_slots - 1)))) {
-
- fprintf(stderr,
- "InnoDB: Record heap and dir overlap"
- " on space %lu page %lu index %s, %p, %p\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page), index->name,
- page_header_get_ptr(page, PAGE_HEAP_TOP),
- page_dir_get_nth_slot(page, n_slots - 1));
-
- goto func_exit;
- }
-
- /* Validate the record list in a loop checking also that
- it is consistent with the directory. */
- count = 0;
- data_size = 0;
- own_count = 1;
- slot_no = 0;
- slot = page_dir_get_nth_slot(page, slot_no);
-
- rec = page_get_infimum_rec(page);
-
- for (;;) {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (page_is_comp(page) && page_rec_is_user_rec(rec)
- && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec)
- == page_is_leaf(page))) {
- fputs("InnoDB: node_ptr flag mismatch\n", stderr);
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
- goto func_exit;
- }
-
-#ifndef UNIV_HOTBACKUP
- /* Check that the records are in the ascending order */
- if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW)
- && !page_rec_is_supremum(rec)) {
- if (UNIV_UNLIKELY
- (1 != cmp_rec_rec(rec, old_rec,
- offsets, old_offsets, index))) {
- fprintf(stderr,
- "InnoDB: Records in wrong order"
- " on space %lu page %lu index %s\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page),
- index->name);
- fputs("\nInnoDB: previous record ", stderr);
- rec_print_new(stderr, old_rec, old_offsets);
- fputs("\nInnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
-
- goto func_exit;
- }
- }
-#endif /* !UNIV_HOTBACKUP */
-
- if (page_rec_is_user_rec(rec)) {
-
- data_size += rec_offs_size(offsets);
- }
-
- offs = page_offset(rec_get_start(rec, offsets));
- i = rec_offs_size(offsets);
- if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
- fputs("InnoDB: record offset out of bounds\n", stderr);
- goto func_exit;
- }
-
- while (i--) {
- if (UNIV_UNLIKELY(buf[offs + i])) {
- /* No other record may overlap this */
-
- fputs("InnoDB: Record overlaps another\n",
- stderr);
- goto func_exit;
- }
-
- buf[offs + i] = 1;
- }
-
- if (page_is_comp(page)) {
- rec_own_count = rec_get_n_owned_new(rec);
- } else {
- rec_own_count = rec_get_n_owned_old(rec);
- }
-
- if (UNIV_UNLIKELY(rec_own_count)) {
- /* This is a record pointed to by a dir slot */
- if (UNIV_UNLIKELY(rec_own_count != own_count)) {
- fprintf(stderr,
- "InnoDB: Wrong owned count %lu, %lu\n",
- (ulong) rec_own_count,
- (ulong) own_count);
- goto func_exit;
- }
-
- if (page_dir_slot_get_rec(slot) != rec) {
- fputs("InnoDB: Dir slot does not"
- " point to right rec\n",
- stderr);
- goto func_exit;
- }
-
- page_dir_slot_check(slot);
-
- own_count = 0;
- if (!page_rec_is_supremum(rec)) {
- slot_no++;
- slot = page_dir_get_nth_slot(page, slot_no);
- }
- }
-
- if (page_rec_is_supremum(rec)) {
- break;
- }
-
- count++;
- own_count++;
- old_rec = rec;
- rec = page_rec_get_next_const(rec);
-
- /* set old_offsets to offsets; recycle offsets */
- {
- ulint* offs = old_offsets;
- old_offsets = offsets;
- offsets = offs;
- }
- }
-
- if (page_is_comp(page)) {
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) {
-
- goto n_owned_zero;
- }
- } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) {
-n_owned_zero:
- fputs("InnoDB: n owned is zero\n", stderr);
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(slot_no != n_slots - 1)) {
- fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
- (ulong) slot_no, (ulong) (n_slots - 1));
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW
- != count + 1)) {
- fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
- (ulong) page_header_get_field(page, PAGE_N_RECS)
- + PAGE_HEAP_NO_USER_LOW,
- (ulong) (count + 1));
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) {
- fprintf(stderr,
- "InnoDB: Summed data size %lu, returned by func %lu\n",
- (ulong) data_size, (ulong) page_get_data_size(page));
- goto func_exit;
- }
-
- /* Check then the free list */
- rec = page_header_get_ptr(page, PAGE_FREE);
-
- while (rec != NULL) {
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- if (UNIV_UNLIKELY(!page_rec_validate(rec, offsets))) {
-
- goto func_exit;
- }
-
- count++;
- offs = page_offset(rec_get_start(rec, offsets));
- i = rec_offs_size(offsets);
- if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
- fputs("InnoDB: record offset out of bounds\n", stderr);
- goto func_exit;
- }
-
- while (i--) {
-
- if (UNIV_UNLIKELY(buf[offs + i])) {
- fputs("InnoDB: Record overlaps another"
- " in free list\n", stderr);
- goto func_exit;
- }
-
- buf[offs + i] = 1;
- }
-
- rec = page_rec_get_next_const(rec);
- }
-
- if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) {
- fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
- (ulong) page_dir_get_n_heap(page),
- (ulong) count + 1);
- goto func_exit;
- }
-
- ret = TRUE;
-
-func_exit:
- mem_heap_free(heap);
-
- if (UNIV_UNLIKELY(ret == FALSE)) {
-func_exit2:
- fprintf(stderr,
- "InnoDB: Apparent corruption"
- " in space %lu page %lu index %s\n",
- (ulong) page_get_space_id(page),
- (ulong) page_get_page_no(page),
- index->name);
- buf_page_print(page, 0, 0);
- }
-
- return(ret);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Looks in the page record list for a record with the given heap number.
-@return record, NULL if not found */
-UNIV_INTERN
-const rec_t*
-page_find_rec_with_heap_no(
-/*=======================*/
- const page_t* page, /*!< in: index page */
- ulint heap_no)/*!< in: heap number */
-{
- const rec_t* rec;
-
- if (page_is_comp(page)) {
- rec = page + PAGE_NEW_INFIMUM;
-
- for(;;) {
- ulint rec_heap_no = rec_get_heap_no_new(rec);
-
- if (rec_heap_no == heap_no) {
-
- return(rec);
- } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
-
- return(NULL);
- }
-
- rec = page + rec_get_next_offs(rec, TRUE);
- }
- } else {
- rec = page + PAGE_OLD_INFIMUM;
-
- for (;;) {
- ulint rec_heap_no = rec_get_heap_no_old(rec);
-
- if (rec_heap_no == heap_no) {
-
- return(rec);
- } else if (rec_heap_no == PAGE_HEAP_NO_SUPREMUM) {
-
- return(NULL);
- }
-
- rec = page + rec_get_next_offs(rec, FALSE);
- }
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************//**
-Removes the record from a leaf page. This function does not log
-any changes. It is used by the IMPORT tablespace functions.
-The cursor is moved to the next record after the deleted one.
-@return true if success, i.e., the page did not become too empty */
-UNIV_INTERN
-bool
-page_delete_rec(
-/*============*/
- const dict_index_t* index, /*!< in: The index that the record
- belongs to */
- page_cur_t* pcur, /*!< in/out: page cursor on record
- to delete */
- page_zip_des_t* page_zip,/*!< in: compressed page descriptor */
- const ulint* offsets)/*!< in: offsets for record */
-{
- bool no_compress_needed;
- buf_block_t* block = pcur->block;
- page_t* page = buf_block_get_frame(block);
-
- ut_ad(page_is_leaf(page));
-
- if (!rec_offs_any_extern(offsets)
- && ((page_get_data_size(page) - rec_offs_size(offsets)
- < BTR_CUR_PAGE_COMPRESS_LIMIT)
- || (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
- && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)
- || (page_get_n_recs(page) < 2))) {
-
- ulint root_page_no = dict_index_get_page(index);
-
- /* The page fillfactor will drop below a predefined
- minimum value, OR the level in the B-tree contains just
- one page, OR the page will become empty: we recommend
- compression if this is not the root page. */
-
- no_compress_needed = page_get_page_no(page) == root_page_no;
- } else {
- no_compress_needed = true;
- }
-
- if (no_compress_needed) {
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- page_cur_delete_rec(pcur, index, offsets, 0);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- }
-
- return(no_compress_needed);
-}
-
-/** Get the last non-delete-marked record on a page.
-@param[in] page index tree leaf page
-@return the last record, not delete-marked
-@retval infimum record if all records are delete-marked */
-
-const rec_t*
-page_find_rec_max_not_deleted(
- const page_t* page)
-{
- const rec_t* rec = page_get_infimum_rec(page);
- const rec_t* prev_rec = NULL; // remove warning
-
- /* Because the page infimum is never delete-marked,
- prev_rec will always be assigned to it first. */
- ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
- if (page_is_comp(page)) {
- do {
- if (!rec_get_deleted_flag(rec, true)) {
- prev_rec = rec;
- }
- rec = page_rec_get_next_low(rec, true);
- } while (rec != page + PAGE_NEW_SUPREMUM);
- } else {
- do {
- if (!rec_get_deleted_flag(rec, false)) {
- prev_rec = rec;
- }
- rec = page_rec_get_next_low(rec, false);
- } while (rec != page + PAGE_OLD_SUPREMUM);
- }
- return(prev_rec);
-}
-
-#endif /* #ifndef UNIV_INNOCHECKSUM */
-
-/** Issue a warning when the checksum that is stored in the page is valid,
-but different than the global setting innodb_checksum_algorithm.
-@param[in] current_algo current checksum algorithm
-@param[in] page_checksum page valid checksum
-@param[in] space_id tablespace id
-@param[in] page_no page number */
-void
-page_warn_strict_checksum(
- srv_checksum_algorithm_t curr_algo,
- srv_checksum_algorithm_t page_checksum,
- ulint space_id,
- ulint page_no)
-{
- srv_checksum_algorithm_t curr_algo_nonstrict;
- switch (curr_algo) {
- case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
- curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_CRC32;
- break;
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
- curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_INNODB;
- break;
- case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
- curr_algo_nonstrict = SRV_CHECKSUM_ALGORITHM_NONE;
- break;
- default:
- ut_error;
- }
-
-#ifdef UNIV_INNOCHECKSUM
- fprintf(stderr,
-#else
- ib_logf(IB_LOG_LEVEL_WARN,
-#endif
- "innodb_checksum_algorithm is set to \"%s\""
- " but the page [page id: space=" ULINTPF ","
- " page number=" ULINTPF "] contains a valid checksum \"%s\"."
- " Accepting the page as valid. Change innodb_checksum_algorithm"
- " to \"%s\" to silently accept such pages or rewrite all pages"
- " so that they contain \"%s\" checksum.",
- buf_checksum_algorithm_name(curr_algo),
- space_id, page_no,
- buf_checksum_algorithm_name(page_checksum),
- buf_checksum_algorithm_name(curr_algo_nonstrict),
- buf_checksum_algorithm_name(curr_algo_nonstrict));
-}
diff --git a/storage/xtradb/page/page0zip.cc b/storage/xtradb/page/page0zip.cc
deleted file mode 100644
index 32e76fb44e6..00000000000
--- a/storage/xtradb/page/page0zip.cc
+++ /dev/null
@@ -1,5066 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file page/page0zip.cc
-Compressed page interface
-
-Created June 2005 by Marko Makela
-*******************************************************/
-
-// First include (the generated) my_config.h, to get correct platform defines.
-#include "my_config.h"
-
-#include <map>
-using namespace std;
-
-#define THIS_MODULE
-#include "page0zip.h"
-#ifdef UNIV_NONINL
-# include "page0zip.ic"
-#endif
-#undef THIS_MODULE
-#include "buf0checksum.h"
-#include "page0page.h"
-#ifndef UNIV_INNOCHECKSUM
-#include "mtr0log.h"
-#include "dict0dict.h"
-#include "btr0cur.h"
-#include "log0recv.h"
-#endif /* !UNIV_INNOCHECKSUM */
-#include "zlib.h"
-#include "fil0fil.h"
-#include "ut0sort.h"
-#include "page0types.h"
-#ifndef UNIV_HOTBACKUP
-#ifndef UNIV_INNOCHECKSUM
-# include "buf0buf.h"
-# include "btr0sea.h"
-# include "dict0boot.h"
-# include "lock0lock.h"
-# include "srv0mon.h"
-# include "srv0srv.h"
-#endif /* !UNIV_INNOCHECKSUM */
-# include "buf0lru.h"
-# include "ut0crc32.h"
-#else /* !UNIV_HOTBACKUP */
-# define lock_move_reorganize_page(block, temp_block) ((void) 0)
-# define buf_LRU_stat_inc_unzip() ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef UNIV_INNOCHECKSUM
-#include "mach0data.h"
-#endif /* UNIV_INNOCHECKSUM */
-
-#ifndef UNIV_HOTBACKUP
-#ifndef UNIV_INNOCHECKSUM
-/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */
-UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX];
-/** Statistics on compression, indexed by index->id */
-UNIV_INTERN page_zip_stat_per_index_t page_zip_stat_per_index;
-/** Mutex protecting page_zip_stat_per_index */
-UNIV_INTERN ib_mutex_t page_zip_stat_per_index_mutex;
-#ifdef HAVE_PSI_INTERFACE
-UNIV_INTERN mysql_pfs_key_t page_zip_stat_per_index_mutex_key;
-#endif /* HAVE_PSI_INTERFACE */
-#endif /* !UNIV_INNOCHECKSUM */
-#endif /* !UNIV_HOTBACKUP */
-
-/* Compression level to be used by zlib. Settable by user. */
-UNIV_INTERN uint page_zip_level = DEFAULT_COMPRESSION_LEVEL;
-
-/* Whether or not to log compressed page images to avoid possible
-compression algorithm changes in zlib. */
-UNIV_INTERN my_bool page_zip_log_pages = false;
-
-/* Please refer to ../include/page0zip.ic for a description of the
-compressed page format. */
-
-#ifndef UNIV_INNOCHECKSUM
-
-/* The infimum and supremum records are omitted from the compressed page.
-On compress, we compare that the records are there, and on uncompress we
-restore the records. */
-/** Extra bytes of an infimum record */
-static const byte infimum_extra[] = {
- 0x01, /* info_bits=0, n_owned=1 */
- 0x00, 0x02 /* heap_no=0, status=2 */
- /* ?, ? */ /* next=(first user rec, or supremum) */
-};
-/** Data bytes of an infimum record */
-static const byte infimum_data[] = {
- 0x69, 0x6e, 0x66, 0x69,
- 0x6d, 0x75, 0x6d, 0x00 /* "infimum\0" */
-};
-/** Extra bytes and data bytes of a supremum record */
-static const byte supremum_extra_data[] = {
- /* 0x0?, */ /* info_bits=0, n_owned=1..8 */
- 0x00, 0x0b, /* heap_no=1, status=3 */
- 0x00, 0x00, /* next=0 */
- 0x73, 0x75, 0x70, 0x72,
- 0x65, 0x6d, 0x75, 0x6d /* "supremum" */
-};
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/** Assert that a block of memory is filled with zero bytes.
-Compare at most sizeof(field_ref_zero) bytes.
-@param b in: memory block
-@param s in: size of the memory block, in bytes */
-#define ASSERT_ZERO(b, s) \
- ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero)))
-/** Assert that a BLOB pointer is filled with zero bytes.
-@param b in: BLOB pointer */
-#define ASSERT_ZERO_BLOB(b) \
- ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero))
-
-/* Enable some extra debugging output. This code can be enabled
-independently of any UNIV_ debugging conditions. */
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-# include <stdarg.h>
-MY_ATTRIBUTE((format (printf, 1, 2)))
-/**********************************************************************//**
-Report a failure to decompress or compress.
-@return number of characters printed */
-static
-int
-page_zip_fail_func(
-/*===============*/
- const char* fmt, /*!< in: printf(3) format string */
- ...) /*!< in: arguments corresponding to fmt */
-{
- int res;
- va_list ap;
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: ", stderr);
- va_start(ap, fmt);
- res = vfprintf(stderr, fmt, ap);
- va_end(ap);
-
- return(res);
-}
-/** Wrapper for page_zip_fail_func()
-@param fmt_args in: printf(3) format string and arguments */
-# define page_zip_fail(fmt_args) page_zip_fail_func fmt_args
-#else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-/** Dummy wrapper for page_zip_fail_func()
-@param fmt_args ignored: printf(3) format string and arguments */
-# define page_zip_fail(fmt_args) /* empty */
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-
-#ifndef UNIV_INNOCHECKSUM
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Determine the guaranteed free space on an empty page.
-@return minimum payload size on the page */
-UNIV_INTERN
-ulint
-page_zip_empty_size(
-/*================*/
- ulint n_fields, /*!< in: number of columns in the index */
- ulint zip_size) /*!< in: compressed page size in bytes */
-{
- lint size = zip_size
- /* subtract the page header and the longest
- uncompressed data needed for one record */
- - (PAGE_DATA
- + PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN
- + 1/* encoded heap_no==2 in page_zip_write_rec() */
- + 1/* end of modification log */
- - REC_N_NEW_EXTRA_BYTES/* omitted bytes */)
- /* subtract the space for page_zip_fields_encode() */
- - compressBound(static_cast<uLong>(2 * (n_fields + 1)));
- return(size > 0 ? (ulint) size : 0);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Gets the number of elements in the dense page directory,
-including deleted records (the free list).
-@return number of elements in the dense page directory */
-UNIV_INLINE
-ulint
-page_zip_dir_elems(
-/*===============*/
- const page_zip_des_t* page_zip) /*!< in: compressed page */
-{
- /* Exclude the page infimum and supremum from the record count. */
- return(page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW);
-}
-
-/*************************************************************//**
-Gets the size of the compressed page trailer (the dense page directory),
-including deleted records (the free list).
-@return length of dense page directory, in bytes */
-UNIV_INLINE
-ulint
-page_zip_dir_size(
-/*==============*/
- const page_zip_des_t* page_zip) /*!< in: compressed page */
-{
- return(PAGE_ZIP_DIR_SLOT_SIZE * page_zip_dir_elems(page_zip));
-}
-
-/*************************************************************//**
-Gets an offset to the compressed page trailer (the dense page directory),
-including deleted records (the free list).
-@return offset of the dense page directory */
-UNIV_INLINE
-ulint
-page_zip_dir_start_offs(
-/*====================*/
- const page_zip_des_t* page_zip, /*!< in: compressed page */
- ulint n_dense) /*!< in: directory size */
-{
- ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip));
-
- return(page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
-}
-
-/*************************************************************//**
-Gets a pointer to the compressed page trailer (the dense page directory),
-including deleted records (the free list).
-@param[in] page_zip compressed page
-@param[in] n_dense number of entries in the directory
-@return pointer to the dense page directory */
-#define page_zip_dir_start_low(page_zip, n_dense) \
- ((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense))
-/*************************************************************//**
-Gets a pointer to the compressed page trailer (the dense page directory),
-including deleted records (the free list).
-@param[in] page_zip compressed page
-@return pointer to the dense page directory */
-#define page_zip_dir_start(page_zip) \
- page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip))
-
-/*************************************************************//**
-Gets the size of the compressed page trailer (the dense page directory),
-only including user records (excluding the free list).
-@return length of dense page directory comprising existing records, in bytes */
-UNIV_INLINE
-ulint
-page_zip_dir_user_size(
-/*===================*/
- const page_zip_des_t* page_zip) /*!< in: compressed page */
-{
- ulint size = PAGE_ZIP_DIR_SLOT_SIZE
- * page_get_n_recs(page_zip->data);
- ut_ad(size <= page_zip_dir_size(page_zip));
- return(size);
-}
-
-/*************************************************************//**
-Find the slot of the given record in the dense page directory.
-@return dense directory slot, or NULL if record not found */
-UNIV_INLINE
-byte*
-page_zip_dir_find_low(
-/*==================*/
- byte* slot, /*!< in: start of records */
- byte* end, /*!< in: end of records */
- ulint offset) /*!< in: offset of user record */
-{
- ut_ad(slot <= end);
-
- for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
- if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK)
- == offset) {
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/*************************************************************//**
-Find the slot of the given non-free record in the dense page directory.
-@return dense directory slot, or NULL if record not found */
-UNIV_INLINE
-byte*
-page_zip_dir_find(
-/*==============*/
- page_zip_des_t* page_zip, /*!< in: compressed page */
- ulint offset) /*!< in: offset of user record */
-{
- byte* end = page_zip->data + page_zip_get_size(page_zip);
-
- ut_ad(page_zip_simple_validate(page_zip));
-
- return(page_zip_dir_find_low(end - page_zip_dir_user_size(page_zip),
- end,
- offset));
-}
-
-/*************************************************************//**
-Find the slot of the given free record in the dense page directory.
-@return dense directory slot, or NULL if record not found */
-UNIV_INLINE
-byte*
-page_zip_dir_find_free(
-/*===================*/
- page_zip_des_t* page_zip, /*!< in: compressed page */
- ulint offset) /*!< in: offset of user record */
-{
- byte* end = page_zip->data + page_zip_get_size(page_zip);
-
- ut_ad(page_zip_simple_validate(page_zip));
-
- return(page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
- end - page_zip_dir_user_size(page_zip),
- offset));
-}
-
-/*************************************************************//**
-Read a given slot in the dense page directory.
-@return record offset on the uncompressed page, possibly ORed with
-PAGE_ZIP_DIR_SLOT_DEL or PAGE_ZIP_DIR_SLOT_OWNED */
-UNIV_INLINE
-ulint
-page_zip_dir_get(
-/*=============*/
- const page_zip_des_t* page_zip, /*!< in: compressed page */
- ulint slot) /*!< in: slot
- (0=first user record) */
-{
- ut_ad(page_zip_simple_validate(page_zip));
- ut_ad(slot < page_zip_dir_size(page_zip) / PAGE_ZIP_DIR_SLOT_SIZE);
- return(mach_read_from_2(page_zip->data + page_zip_get_size(page_zip)
- - PAGE_ZIP_DIR_SLOT_SIZE * (slot + 1)));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Write a log record of compressing an index page. */
-static
-void
-page_zip_compress_write_log(
-/*========================*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- const page_t* page, /*!< in: uncompressed page */
- dict_index_t* index, /*!< in: index of the B-tree node */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- byte* log_ptr;
- ulint trailer_size;
-
- ut_ad(!dict_index_is_ibuf(index));
-
- log_ptr = mlog_open(mtr, 11 + 2 + 2);
-
- if (!log_ptr) {
-
- return;
- }
-
- /* Read the number of user records. */
- trailer_size = page_dir_get_n_heap(page_zip->data)
- - PAGE_HEAP_NO_USER_LOW;
- /* Multiply by uncompressed of size stored per record */
- if (!page_is_leaf(page)) {
- trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
- } else if (dict_index_is_clust(index)) {
- trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
- } else {
- trailer_size *= PAGE_ZIP_DIR_SLOT_SIZE;
- }
- /* Add the space occupied by BLOB pointers. */
- trailer_size += page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
- ut_a(page_zip->m_end > PAGE_DATA);
-#if FIL_PAGE_DATA > PAGE_DATA
-# error "FIL_PAGE_DATA > PAGE_DATA"
-#endif
- ut_a(page_zip->m_end + trailer_size <= page_zip_get_size(page_zip));
-
- log_ptr = mlog_write_initial_log_record_fast((page_t*) page,
- MLOG_ZIP_PAGE_COMPRESS,
- log_ptr, mtr);
- mach_write_to_2(log_ptr, page_zip->m_end - FIL_PAGE_TYPE);
- log_ptr += 2;
- mach_write_to_2(log_ptr, trailer_size);
- log_ptr += 2;
- mlog_close(mtr, log_ptr);
-
- /* Write FIL_PAGE_PREV and FIL_PAGE_NEXT */
- mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_PREV, 4);
- mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_NEXT, 4);
- /* Write most of the page header, the compressed stream and
- the modification log. */
- mlog_catenate_string(mtr, page_zip->data + FIL_PAGE_TYPE,
- page_zip->m_end - FIL_PAGE_TYPE);
- /* Write the uncompressed trailer of the compressed page. */
- mlog_catenate_string(mtr, page_zip->data + page_zip_get_size(page_zip)
- - trailer_size, trailer_size);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/******************************************************//**
-Determine how many externally stored columns are contained
-in existing records with smaller heap_no than rec. */
-static
-ulint
-page_zip_get_n_prev_extern(
-/*=======================*/
- const page_zip_des_t* page_zip,/*!< in: dense page directory on
- compressed page */
- const rec_t* rec, /*!< in: compact physical record
- on a B-tree leaf page */
- const dict_index_t* index) /*!< in: record descriptor */
-{
- const page_t* page = page_align(rec);
- ulint n_ext = 0;
- ulint i;
- ulint left;
- ulint heap_no;
- ulint n_recs = page_get_n_recs(page_zip->data);
-
- ut_ad(page_is_leaf(page));
- ut_ad(page_is_comp(page));
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(dict_index_is_clust(index));
- ut_ad(!dict_index_is_ibuf(index));
-
- heap_no = rec_get_heap_no_new(rec);
- ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
- left = heap_no - PAGE_HEAP_NO_USER_LOW;
- if (UNIV_UNLIKELY(!left)) {
- return(0);
- }
-
- for (i = 0; i < n_recs; i++) {
- const rec_t* r = page + (page_zip_dir_get(page_zip, i)
- & PAGE_ZIP_DIR_SLOT_MASK);
-
- if (rec_get_heap_no_new(r) < heap_no) {
- n_ext += rec_get_n_extern_new(r, index,
- ULINT_UNDEFINED);
- if (!--left) {
- break;
- }
- }
- }
-
- return(n_ext);
-}
-
-/**********************************************************************//**
-Encode the length of a fixed-length column.
-@return buf + length of encoded val */
-static
-byte*
-page_zip_fixed_field_encode(
-/*========================*/
- byte* buf, /*!< in: pointer to buffer where to write */
- ulint val) /*!< in: value to write */
-{
- ut_ad(val >= 2);
-
- if (UNIV_LIKELY(val < 126)) {
- /*
- 0 = nullable variable field of at most 255 bytes length;
- 1 = not null variable field of at most 255 bytes length;
- 126 = nullable variable field with maximum length >255;
- 127 = not null variable field with maximum length >255
- */
- *buf++ = (byte) val;
- } else {
- *buf++ = (byte) (0x80 | val >> 8);
- *buf++ = (byte) val;
- }
-
- return(buf);
-}
-
-/**********************************************************************//**
-Write the index information for the compressed page.
-@return used size of buf */
-static
-ulint
-page_zip_fields_encode(
-/*===================*/
- ulint n, /*!< in: number of fields to compress */
- dict_index_t* index, /*!< in: index comprising at least n fields */
- ulint trx_id_pos,/*!< in: position of the trx_id column
- in the index, or ULINT_UNDEFINED if
- this is a non-leaf page */
- byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */
-{
- const byte* buf_start = buf;
- ulint i;
- ulint col;
- ulint trx_id_col = 0;
- /* sum of lengths of preceding non-nullable fixed fields, or 0 */
- ulint fixed_sum = 0;
-
- ut_ad(trx_id_pos == ULINT_UNDEFINED || trx_id_pos < n);
-
- for (i = col = 0; i < n; i++) {
- dict_field_t* field = dict_index_get_nth_field(index, i);
- ulint val;
-
- if (dict_field_get_col(field)->prtype & DATA_NOT_NULL) {
- val = 1; /* set the "not nullable" flag */
- } else {
- val = 0; /* nullable field */
- }
-
- if (!field->fixed_len) {
- /* variable-length field */
- const dict_col_t* column
- = dict_field_get_col(field);
-
- if (UNIV_UNLIKELY(column->len > 255)
- || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) {
- val |= 0x7e; /* max > 255 bytes */
- }
-
- if (fixed_sum) {
- /* write out the length of any
- preceding non-nullable fields */
- buf = page_zip_fixed_field_encode(
- buf, fixed_sum << 1 | 1);
- fixed_sum = 0;
- col++;
- }
-
- *buf++ = (byte) val;
- col++;
- } else if (val) {
- /* fixed-length non-nullable field */
-
- if (fixed_sum && UNIV_UNLIKELY
- (fixed_sum + field->fixed_len
- > DICT_MAX_FIXED_COL_LEN)) {
- /* Write out the length of the
- preceding non-nullable fields,
- to avoid exceeding the maximum
- length of a fixed-length column. */
- buf = page_zip_fixed_field_encode(
- buf, fixed_sum << 1 | 1);
- fixed_sum = 0;
- col++;
- }
-
- if (i && UNIV_UNLIKELY(i == trx_id_pos)) {
- if (fixed_sum) {
- /* Write out the length of any
- preceding non-nullable fields,
- and start a new trx_id column. */
- buf = page_zip_fixed_field_encode(
- buf, fixed_sum << 1 | 1);
- col++;
- }
-
- trx_id_col = col;
- fixed_sum = field->fixed_len;
- } else {
- /* add to the sum */
- fixed_sum += field->fixed_len;
- }
- } else {
- /* fixed-length nullable field */
-
- if (fixed_sum) {
- /* write out the length of any
- preceding non-nullable fields */
- buf = page_zip_fixed_field_encode(
- buf, fixed_sum << 1 | 1);
- fixed_sum = 0;
- col++;
- }
-
- buf = page_zip_fixed_field_encode(
- buf, field->fixed_len << 1);
- col++;
- }
- }
-
- if (fixed_sum) {
- /* Write out the lengths of last fixed-length columns. */
- buf = page_zip_fixed_field_encode(buf, fixed_sum << 1 | 1);
- }
-
- if (trx_id_pos != ULINT_UNDEFINED) {
- /* Write out the position of the trx_id column */
- i = trx_id_col;
- } else {
- /* Write out the number of nullable fields */
- i = index->n_nullable;
- }
-
- if (i < 128) {
- *buf++ = (byte) i;
- } else {
- *buf++ = (byte) (0x80 | i >> 8);
- *buf++ = (byte) i;
- }
-
- ut_ad((ulint) (buf - buf_start) <= (n + 2) * 2);
- return((ulint) (buf - buf_start));
-}
-
-/**********************************************************************//**
-Populate the dense page directory from the sparse directory. */
-static
-void
-page_zip_dir_encode(
-/*================*/
- const page_t* page, /*!< in: compact page */
- byte* buf, /*!< in: pointer to dense page directory[-1];
- out: dense directory on compressed page */
- const rec_t** recs) /*!< in: pointer to an array of 0, or NULL;
- out: dense page directory sorted by ascending
- address (and heap_no) */
-{
- const byte* rec;
- ulint status;
- ulint min_mark;
- ulint heap_no;
- ulint i;
- ulint n_heap;
- ulint offs;
-
- min_mark = 0;
-
- if (page_is_leaf(page)) {
- status = REC_STATUS_ORDINARY;
- } else {
- status = REC_STATUS_NODE_PTR;
- if (UNIV_UNLIKELY
- (mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL)) {
- min_mark = REC_INFO_MIN_REC_FLAG;
- }
- }
-
- n_heap = page_dir_get_n_heap(page);
-
- /* Traverse the list of stored records in the collation order,
- starting from the first user record. */
-
- rec = page + PAGE_NEW_INFIMUM;
-
- i = 0;
-
- for (;;) {
- ulint info_bits;
- offs = rec_get_next_offs(rec, TRUE);
- if (UNIV_UNLIKELY(offs == PAGE_NEW_SUPREMUM)) {
- break;
- }
- rec = page + offs;
- heap_no = rec_get_heap_no_new(rec);
- ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
- ut_a(heap_no < n_heap);
- ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR);
- ut_a(offs >= PAGE_ZIP_START);
-#if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1)
-# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2"
-#endif
-#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_ZIP_SIZE_MAX - 1
-# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1"
-#endif
- if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) {
- offs |= PAGE_ZIP_DIR_SLOT_OWNED;
- }
-
- info_bits = rec_get_info_bits(rec, TRUE);
- if (info_bits & REC_INFO_DELETED_FLAG) {
- info_bits &= ~REC_INFO_DELETED_FLAG;
- offs |= PAGE_ZIP_DIR_SLOT_DEL;
- }
- ut_a(info_bits == min_mark);
- /* Only the smallest user record can have
- REC_INFO_MIN_REC_FLAG set. */
- min_mark = 0;
-
- mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
-
- if (UNIV_LIKELY_NULL(recs)) {
- /* Ensure that each heap_no occurs at most once. */
- ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
- /* exclude infimum and supremum */
- recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
- }
-
- ut_a(rec_get_status(rec) == status);
- }
-
- offs = page_header_get_field(page, PAGE_FREE);
-
- /* Traverse the free list (of deleted records). */
- while (offs) {
- ut_ad(!(offs & ~PAGE_ZIP_DIR_SLOT_MASK));
- rec = page + offs;
-
- heap_no = rec_get_heap_no_new(rec);
- ut_a(heap_no >= PAGE_HEAP_NO_USER_LOW);
- ut_a(heap_no < n_heap);
-
- ut_a(!rec[-REC_N_NEW_EXTRA_BYTES]); /* info_bits and n_owned */
- ut_a(rec_get_status(rec) == status);
-
- mach_write_to_2(buf - PAGE_ZIP_DIR_SLOT_SIZE * ++i, offs);
-
- if (UNIV_LIKELY_NULL(recs)) {
- /* Ensure that each heap_no occurs at most once. */
- ut_a(!recs[heap_no - PAGE_HEAP_NO_USER_LOW]);
- /* exclude infimum and supremum */
- recs[heap_no - PAGE_HEAP_NO_USER_LOW] = rec;
- }
-
- offs = rec_get_next_offs(rec, TRUE);
- }
-
- /* Ensure that each heap no occurs at least once. */
- ut_a(i + PAGE_HEAP_NO_USER_LOW == n_heap);
-}
-
-extern "C" {
-
-/**********************************************************************//**
-Allocate memory for zlib. */
-static
-void*
-page_zip_zalloc(
-/*============*/
- void* opaque, /*!< in/out: memory heap */
- uInt items, /*!< in: number of items to allocate */
- uInt size) /*!< in: size of an item in bytes */
-{
- return(mem_heap_zalloc(static_cast<mem_heap_t*>(opaque), items * size));
-}
-
-/**********************************************************************//**
-Deallocate memory for zlib. */
-static
-void
-page_zip_free(
-/*==========*/
- void* opaque MY_ATTRIBUTE((unused)), /*!< in: memory heap */
- void* address MY_ATTRIBUTE((unused)))/*!< in: object to free */
-{
-}
-
-} /* extern "C" */
-
-/**********************************************************************//**
-Configure the zlib allocator to use the given memory heap. */
-UNIV_INTERN
-void
-page_zip_set_alloc(
-/*===============*/
- void* stream, /*!< in/out: zlib stream */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- z_stream* strm = static_cast<z_stream*>(stream);
-
- strm->zalloc = page_zip_zalloc;
- strm->zfree = page_zip_free;
- strm->opaque = heap;
-}
-
-#if 0 || defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
-/** Symbol for enabling compression and decompression diagnostics */
-# define PAGE_ZIP_COMPRESS_DBG
-#endif
-
-#ifdef PAGE_ZIP_COMPRESS_DBG
-/** Set this variable in a debugger to enable
-excessive logging in page_zip_compress(). */
-UNIV_INTERN ibool page_zip_compress_dbg;
-/** Set this variable in a debugger to enable
-binary logging of the data passed to deflate().
-When this variable is nonzero, it will act
-as a log file name generator. */
-UNIV_INTERN unsigned page_zip_compress_log;
-
-/**********************************************************************//**
-Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set.
-@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
-static
-int
-page_zip_compress_deflate(
-/*======================*/
- FILE* logfile,/*!< in: log file, or NULL */
- z_streamp strm, /*!< in/out: compressed stream for deflate() */
- int flush) /*!< in: deflate() flushing method */
-{
- int status;
- if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
- ut_print_buf(stderr, strm->next_in, strm->avail_in);
- }
- if (UNIV_LIKELY_NULL(logfile)) {
- fwrite(strm->next_in, 1, strm->avail_in, logfile);
- }
- status = deflate(strm, flush);
- if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
- fprintf(stderr, " -> %d\n", status);
- }
- return(status);
-}
-
-/* Redefine deflate(). */
-# undef deflate
-/** Debug wrapper for the zlib compression routine deflate().
-Log the operation if page_zip_compress_dbg is set.
-@param strm in/out: compressed stream
-@param flush in: flushing method
-@return deflate() status: Z_OK, Z_BUF_ERROR, ... */
-# define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush)
-/** Declaration of the logfile parameter */
-# define FILE_LOGFILE FILE* logfile,
-/** The logfile parameter */
-# define LOGFILE logfile,
-#else /* PAGE_ZIP_COMPRESS_DBG */
-/** Empty declaration of the logfile parameter */
-# define FILE_LOGFILE
-/** Missing logfile parameter */
-# define LOGFILE
-#endif /* PAGE_ZIP_COMPRESS_DBG */
-
-/**********************************************************************//**
-Compress the records of a node pointer page.
-@return Z_OK, or a zlib error code */
-static
-int
-page_zip_compress_node_ptrs(
-/*========================*/
- FILE_LOGFILE
- z_stream* c_stream, /*!< in/out: compressed page stream */
- const rec_t** recs, /*!< in: dense page directory
- sorted by address */
- ulint n_dense, /*!< in: size of recs[] */
- dict_index_t* index, /*!< in: the index of the page */
- byte* storage, /*!< in: end of dense page directory */
- mem_heap_t* heap) /*!< in: temporary memory heap */
-{
- int err = Z_OK;
- ulint* offsets = NULL;
-
- do {
- const rec_t* rec = *recs++;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Only leaf nodes may contain externally stored columns. */
- ut_ad(!rec_offs_any_extern(offsets));
-
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- /* Compress the extra bytes. */
- c_stream->avail_in = static_cast<uInt>(
- rec - REC_N_NEW_EXTRA_BYTES - c_stream->next_in);
-
- if (c_stream->avail_in) {
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
- break;
- }
- }
- ut_ad(!c_stream->avail_in);
-
- /* Compress the data bytes, except node_ptr. */
- c_stream->next_in = (byte*) rec;
- c_stream->avail_in = static_cast<uInt>(
- rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE);
-
- if (c_stream->avail_in) {
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
- break;
- }
- }
-
- ut_ad(!c_stream->avail_in);
-
- memcpy(storage - REC_NODE_PTR_SIZE
- * (rec_get_heap_no_new(rec) - 1),
- c_stream->next_in, REC_NODE_PTR_SIZE);
- c_stream->next_in += REC_NODE_PTR_SIZE;
- } while (--n_dense);
-
- return(err);
-}
-
-/**********************************************************************//**
-Compress the records of a leaf node of a secondary index.
-@return Z_OK, or a zlib error code */
-static
-int
-page_zip_compress_sec(
-/*==================*/
- FILE_LOGFILE
- z_stream* c_stream, /*!< in/out: compressed page stream */
- const rec_t** recs, /*!< in: dense page directory
- sorted by address */
- ulint n_dense) /*!< in: size of recs[] */
-{
- int err = Z_OK;
-
- ut_ad(n_dense > 0);
-
- do {
- const rec_t* rec = *recs++;
-
- /* Compress everything up to this record. */
- c_stream->avail_in = static_cast<uInt>(
- rec - REC_N_NEW_EXTRA_BYTES
- - c_stream->next_in);
-
- if (UNIV_LIKELY(c_stream->avail_in != 0)) {
- UNIV_MEM_ASSERT_RW(c_stream->next_in,
- c_stream->avail_in);
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
- break;
- }
- }
-
- ut_ad(!c_stream->avail_in);
- ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
-
- /* Skip the REC_N_NEW_EXTRA_BYTES. */
-
- c_stream->next_in = (byte*) rec;
- } while (--n_dense);
-
- return(err);
-}
-
-/**********************************************************************//**
-Compress a record of a leaf node of a clustered index that contains
-externally stored columns.
-@return Z_OK, or a zlib error code */
-static
-int
-page_zip_compress_clust_ext(
-/*========================*/
- FILE_LOGFILE
- z_stream* c_stream, /*!< in/out: compressed page stream */
- const rec_t* rec, /*!< in: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
- ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
- byte* deleted, /*!< in: dense directory entry pointing
- to the head of the free list */
- byte* storage, /*!< in: end of dense page directory */
- byte** externs, /*!< in/out: pointer to the next
- available BLOB pointer */
- ulint* n_blobs) /*!< in/out: number of
- externally stored columns */
-{
- int err;
- ulint i;
-
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- ulint len;
- const byte* src;
-
- if (UNIV_UNLIKELY(i == trx_id_col)) {
- ut_ad(!rec_offs_nth_extern(offsets, i));
- /* Store trx_id and roll_ptr
- in uncompressed form. */
- src = rec_get_nth_field(rec, offsets, i, &len);
- ut_ad(src + DATA_TRX_ID_LEN
- == rec_get_nth_field(rec, offsets,
- i + 1, &len));
- ut_ad(len == DATA_ROLL_PTR_LEN);
-
- /* Compress any preceding bytes. */
- c_stream->avail_in = static_cast<uInt>(
- src - c_stream->next_in);
-
- if (c_stream->avail_in) {
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
-
- return(err);
- }
- }
-
- ut_ad(!c_stream->avail_in);
- ut_ad(c_stream->next_in == src);
-
- memcpy(storage
- - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
- * (rec_get_heap_no_new(rec) - 1),
- c_stream->next_in,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- c_stream->next_in
- += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
-
- /* Skip also roll_ptr */
- i++;
- } else if (rec_offs_nth_extern(offsets, i)) {
- src = rec_get_nth_field(rec, offsets, i, &len);
- ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
- src += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- c_stream->avail_in = static_cast<uInt>(
- src - c_stream->next_in);
- if (UNIV_LIKELY(c_stream->avail_in != 0)) {
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
-
- return(err);
- }
- }
-
- ut_ad(!c_stream->avail_in);
- ut_ad(c_stream->next_in == src);
-
- /* Reserve space for the data at
- the end of the space reserved for
- the compressed data and the page
- modification log. */
-
- if (UNIV_UNLIKELY
- (c_stream->avail_out
- <= BTR_EXTERN_FIELD_REF_SIZE)) {
- /* out of space */
- return(Z_BUF_ERROR);
- }
-
- ut_ad(*externs == c_stream->next_out
- + c_stream->avail_out
- + 1/* end of modif. log */);
-
- c_stream->next_in
- += BTR_EXTERN_FIELD_REF_SIZE;
-
- /* Skip deleted records. */
- if (UNIV_LIKELY_NULL
- (page_zip_dir_find_low(
- storage, deleted,
- page_offset(rec)))) {
- continue;
- }
-
- (*n_blobs)++;
- c_stream->avail_out
- -= BTR_EXTERN_FIELD_REF_SIZE;
- *externs -= BTR_EXTERN_FIELD_REF_SIZE;
-
- /* Copy the BLOB pointer */
- memcpy(*externs, c_stream->next_in
- - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- }
- }
-
- return(Z_OK);
-}
-
-/**********************************************************************//**
-Compress the records of a leaf node of a clustered index.
-@return Z_OK, or a zlib error code */
-static
-int
-page_zip_compress_clust(
-/*====================*/
- FILE_LOGFILE
- z_stream* c_stream, /*!< in/out: compressed page stream */
- const rec_t** recs, /*!< in: dense page directory
- sorted by address */
- ulint n_dense, /*!< in: size of recs[] */
- dict_index_t* index, /*!< in: the index of the page */
- ulint* n_blobs, /*!< in: 0; out: number of
- externally stored columns */
- ulint trx_id_col, /*!< index of the trx_id column */
- byte* deleted, /*!< in: dense directory entry pointing
- to the head of the free list */
- byte* storage, /*!< in: end of dense page directory */
- mem_heap_t* heap) /*!< in: temporary memory heap */
-{
- int err = Z_OK;
- ulint* offsets = NULL;
- /* BTR_EXTERN_FIELD_REF storage */
- byte* externs = storage - n_dense
- * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- ut_ad(*n_blobs == 0);
-
- do {
- const rec_t* rec = *recs++;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- ut_ad(rec_offs_n_fields(offsets)
- == dict_index_get_n_fields(index));
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- /* Compress the extra bytes. */
- c_stream->avail_in = static_cast<uInt>(
- rec - REC_N_NEW_EXTRA_BYTES
- - c_stream->next_in);
-
- if (c_stream->avail_in) {
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
-
- goto func_exit;
- }
- }
- ut_ad(!c_stream->avail_in);
- ut_ad(c_stream->next_in == rec - REC_N_NEW_EXTRA_BYTES);
-
- /* Compress the data bytes. */
-
- c_stream->next_in = (byte*) rec;
-
- /* Check if there are any externally stored columns.
- For each externally stored column, store the
- BTR_EXTERN_FIELD_REF separately. */
- if (rec_offs_any_extern(offsets)) {
- ut_ad(dict_index_is_clust(index));
-
- err = page_zip_compress_clust_ext(
- LOGFILE
- c_stream, rec, offsets, trx_id_col,
- deleted, storage, &externs, n_blobs);
-
- if (UNIV_UNLIKELY(err != Z_OK)) {
-
- goto func_exit;
- }
- } else {
- ulint len;
- const byte* src;
-
- /* Store trx_id and roll_ptr in uncompressed form. */
- src = rec_get_nth_field(rec, offsets,
- trx_id_col, &len);
- ut_ad(src + DATA_TRX_ID_LEN
- == rec_get_nth_field(rec, offsets,
- trx_id_col + 1, &len));
- ut_ad(len == DATA_ROLL_PTR_LEN);
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- /* Compress any preceding bytes. */
- c_stream->avail_in = static_cast<uInt>(
- src - c_stream->next_in);
-
- if (c_stream->avail_in) {
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
-
- return(err);
- }
- }
-
- ut_ad(!c_stream->avail_in);
- ut_ad(c_stream->next_in == src);
-
- memcpy(storage
- - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
- * (rec_get_heap_no_new(rec) - 1),
- c_stream->next_in,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- c_stream->next_in
- += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
-
- /* Skip also roll_ptr */
- ut_ad(trx_id_col + 1 < rec_offs_n_fields(offsets));
- }
-
- /* Compress the last bytes of the record. */
- c_stream->avail_in = static_cast<uInt>(
- rec + rec_offs_data_size(offsets) - c_stream->next_in);
-
- if (c_stream->avail_in) {
- err = deflate(c_stream, Z_NO_FLUSH);
- if (UNIV_UNLIKELY(err != Z_OK)) {
-
- goto func_exit;
- }
- }
- ut_ad(!c_stream->avail_in);
- } while (--n_dense);
-
-func_exit:
- return(err);
-}
-
-/**********************************************************************//**
-Compress a page.
-@return TRUE on success, FALSE on failure; page_zip will be left
-intact on failure. */
-UNIV_INTERN
-ibool
-page_zip_compress(
-/*==============*/
- page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs,
- m_start, m_end, m_nonempty */
- const page_t* page, /*!< in: uncompressed page */
- dict_index_t* index, /*!< in: index of the B-tree node */
- ulint level, /*!< in: compression level */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
-{
- z_stream c_stream;
- int err;
- ulint n_fields;/* number of index fields needed */
- byte* fields; /*!< index field information */
- byte* buf; /*!< compressed payload of the page */
- byte* buf_end;/* end of buf */
- ulint n_dense;
- ulint slot_size;/* amount of uncompressed bytes per record */
- const rec_t** recs; /*!< dense page directory, sorted by address */
- mem_heap_t* heap;
- ulint trx_id_col;
- ulint n_blobs = 0;
- byte* storage;/* storage of uncompressed columns */
-#ifndef UNIV_HOTBACKUP
- ullint usec = ut_time_us(NULL);
-#endif /* !UNIV_HOTBACKUP */
-#ifdef PAGE_ZIP_COMPRESS_DBG
- FILE* logfile = NULL;
-#endif
- /* A local copy of srv_cmp_per_index_enabled to avoid reading that
- variable multiple times in this function since it can be changed at
- anytime. */
- my_bool cmp_per_index_enabled = srv_cmp_per_index_enabled;
-
- ut_a(page_is_comp(page));
- ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(page_simple_validate_new((page_t*) page));
- ut_ad(page_zip_simple_validate(page_zip));
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(!dict_index_is_ibuf(index));
-
- UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
-
- /* Check the data that will be omitted. */
- ut_a(!memcmp(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
- infimum_extra, sizeof infimum_extra));
- ut_a(!memcmp(page + PAGE_NEW_INFIMUM,
- infimum_data, sizeof infimum_data));
- ut_a(page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES]
- /* info_bits == 0, n_owned <= max */
- <= PAGE_DIR_SLOT_MAX_N_OWNED);
- ut_a(!memcmp(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
- supremum_extra_data, sizeof supremum_extra_data));
-
- if (page_is_empty(page)) {
- ut_a(rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE)
- == PAGE_NEW_SUPREMUM);
- }
-
- if (page_is_leaf(page)) {
- n_fields = dict_index_get_n_fields(index);
- } else {
- n_fields = dict_index_get_n_unique_in_tree(index);
- }
-
- /* The dense directory excludes the infimum and supremum records. */
- n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
-#ifdef PAGE_ZIP_COMPRESS_DBG
- if (UNIV_UNLIKELY(page_zip_compress_dbg)) {
- fprintf(stderr, "compress %p %p %lu %lu %lu\n",
- (void*) page_zip, (void*) page,
- (ibool) page_is_leaf(page),
- n_fields, n_dense);
- }
- if (UNIV_UNLIKELY(page_zip_compress_log)) {
- /* Create a log file for every compression attempt. */
- char logfilename[9];
- ut_snprintf(logfilename, sizeof logfilename,
- "%08x", page_zip_compress_log++);
- logfile = fopen(logfilename, "wb");
-
- if (logfile) {
- /* Write the uncompressed page to the log. */
- fwrite(page, 1, UNIV_PAGE_SIZE, logfile);
- /* Record the compressed size as zero.
- This will be overwritten at successful exit. */
- putc(0, logfile);
- putc(0, logfile);
- putc(0, logfile);
- putc(0, logfile);
- }
- }
-#endif /* PAGE_ZIP_COMPRESS_DBG */
-#ifndef UNIV_HOTBACKUP
- page_zip_stat[page_zip->ssize - 1].compressed++;
- if (cmp_per_index_enabled) {
- mutex_enter(&page_zip_stat_per_index_mutex);
- page_zip_stat_per_index[index->id].compressed++;
- mutex_exit(&page_zip_stat_per_index_mutex);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
- >= page_zip_get_size(page_zip))) {
-
- goto err_exit;
- }
-
- MONITOR_INC(MONITOR_PAGE_COMPRESS);
-
- /* Simulate a compression failure with a probability determined by
- innodb_simulate_comp_failures, only if the page has 2 or more
- records. */
-
- if (srv_simulate_comp_failures
- && !dict_index_is_ibuf(index)
- && page_get_n_recs(page) >= 2
- && ((ulint)(rand() % 100) < srv_simulate_comp_failures)
- && strcasecmp(index->table_name, "IBUF_DUMMY") != 0) {
-
-#ifdef UNIV_DEBUG
- fprintf(stderr,
- "InnoDB: Simulating a compression failure"
- " for table %s, index %s, page %lu (%s)\n",
- index->table_name,
- index->name,
- page_get_page_no(page),
- page_is_leaf(page) ? "leaf" : "non-leaf");
-
-#endif
-
- goto err_exit;
- }
-
- heap = mem_heap_create(page_zip_get_size(page_zip)
- + n_fields * (2 + sizeof(ulint))
- + REC_OFFS_HEADER_SIZE
- + n_dense * ((sizeof *recs)
- - PAGE_ZIP_DIR_SLOT_SIZE)
- + UNIV_PAGE_SIZE * 4
- + (512 << MAX_MEM_LEVEL));
-
- recs = static_cast<const rec_t**>(
- mem_heap_zalloc(heap, n_dense * sizeof *recs));
-
- fields = static_cast<byte*>(mem_heap_alloc(heap, (n_fields + 1) * 2));
-
- buf = static_cast<byte*>(
- mem_heap_alloc(heap, page_zip_get_size(page_zip) - PAGE_DATA));
-
- buf_end = buf + page_zip_get_size(page_zip) - PAGE_DATA;
-
- /* Compress the data payload. */
- page_zip_set_alloc(&c_stream, heap);
-
- err = deflateInit2(&c_stream, static_cast<int>(level),
- Z_DEFLATED, UNIV_PAGE_SIZE_SHIFT,
- MAX_MEM_LEVEL, Z_DEFAULT_STRATEGY);
- ut_a(err == Z_OK);
-
- c_stream.next_out = buf;
- /* Subtract the space reserved for uncompressed data. */
- /* Page header and the end marker of the modification log */
- c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1);
-
- /* Dense page directory and uncompressed columns, if any */
- if (page_is_leaf(page)) {
- if (dict_index_is_clust(index)) {
- trx_id_col = dict_index_get_sys_col_pos(
- index, DATA_TRX_ID);
- ut_ad(trx_id_col > 0);
- ut_ad(trx_id_col != ULINT_UNDEFINED);
-
- slot_size = PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
- } else {
- /* Signal the absence of trx_id
- in page_zip_fields_encode() */
- ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
- == ULINT_UNDEFINED);
- trx_id_col = 0;
- slot_size = PAGE_ZIP_DIR_SLOT_SIZE;
- }
- } else {
- slot_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
- trx_id_col = ULINT_UNDEFINED;
- }
-
- if (UNIV_UNLIKELY(c_stream.avail_out <= n_dense * slot_size
- + 6/* sizeof(zlib header and footer) */)) {
- goto zlib_error;
- }
-
- c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size);
- c_stream.avail_in = static_cast<uInt>(
- page_zip_fields_encode(n_fields, index, trx_id_col, fields));
- c_stream.next_in = fields;
- if (UNIV_LIKELY(!trx_id_col)) {
- trx_id_col = ULINT_UNDEFINED;
- }
-
- UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
- err = deflate(&c_stream, Z_FULL_FLUSH);
- if (err != Z_OK) {
- goto zlib_error;
- }
-
- ut_ad(!c_stream.avail_in);
-
- page_zip_dir_encode(page, buf_end, recs);
-
- c_stream.next_in = (byte*) page + PAGE_ZIP_START;
-
- storage = buf_end - n_dense * PAGE_ZIP_DIR_SLOT_SIZE;
-
- /* Compress the records in heap_no order. */
- if (UNIV_UNLIKELY(!n_dense)) {
- } else if (!page_is_leaf(page)) {
- /* This is a node pointer page. */
- err = page_zip_compress_node_ptrs(LOGFILE
- &c_stream, recs, n_dense,
- index, storage, heap);
- if (UNIV_UNLIKELY(err != Z_OK)) {
- goto zlib_error;
- }
- } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
- /* This is a leaf page in a secondary index. */
- err = page_zip_compress_sec(LOGFILE
- &c_stream, recs, n_dense);
- if (UNIV_UNLIKELY(err != Z_OK)) {
- goto zlib_error;
- }
- } else {
- /* This is a leaf page in a clustered index. */
- err = page_zip_compress_clust(LOGFILE
- &c_stream, recs, n_dense,
- index, &n_blobs, trx_id_col,
- buf_end - PAGE_ZIP_DIR_SLOT_SIZE
- * page_get_n_recs(page),
- storage, heap);
- if (UNIV_UNLIKELY(err != Z_OK)) {
- goto zlib_error;
- }
- }
-
- /* Finish the compression. */
- ut_ad(!c_stream.avail_in);
- /* Compress any trailing garbage, in case the last record was
- allocated from an originally longer space on the free list,
- or the data of the last record from page_zip_compress_sec(). */
- c_stream.avail_in = static_cast<uInt>(
- page_header_get_field(page, PAGE_HEAP_TOP)
- - (c_stream.next_in - page));
- ut_a(c_stream.avail_in <= UNIV_PAGE_SIZE - PAGE_ZIP_START - PAGE_DIR);
-
- UNIV_MEM_ASSERT_RW(c_stream.next_in, c_stream.avail_in);
- err = deflate(&c_stream, Z_FINISH);
-
- if (UNIV_UNLIKELY(err != Z_STREAM_END)) {
-zlib_error:
- deflateEnd(&c_stream);
- mem_heap_free(heap);
-err_exit:
-#ifdef PAGE_ZIP_COMPRESS_DBG
- if (logfile) {
- fclose(logfile);
- }
-#endif /* PAGE_ZIP_COMPRESS_DBG */
-#ifndef UNIV_HOTBACKUP
- if (page_is_leaf(page)) {
- dict_index_zip_failure(index);
- }
-
- ullint time_diff = ut_time_us(NULL) - usec;
- page_zip_stat[page_zip->ssize - 1].compressed_usec
- += time_diff;
- if (cmp_per_index_enabled) {
- mutex_enter(&page_zip_stat_per_index_mutex);
- page_zip_stat_per_index[index->id].compressed_usec
- += time_diff;
- mutex_exit(&page_zip_stat_per_index_mutex);
- }
-#endif /* !UNIV_HOTBACKUP */
- return(FALSE);
- }
-
- err = deflateEnd(&c_stream);
- ut_a(err == Z_OK);
-
- ut_ad(buf + c_stream.total_out == c_stream.next_out);
- ut_ad((ulint) (storage - c_stream.next_out) >= c_stream.avail_out);
-
- /* Valgrind believes that zlib does not initialize some bits
- in the last 7 or 8 bytes of the stream. Make Valgrind happy. */
- UNIV_MEM_VALID(buf, c_stream.total_out);
-
- /* Zero out the area reserved for the modification log.
- Space for the end marker of the modification log is not
- included in avail_out. */
- memset(c_stream.next_out, 0, c_stream.avail_out + 1/* end marker */);
-
-#ifdef UNIV_DEBUG
- page_zip->m_start =
-#endif /* UNIV_DEBUG */
- page_zip->m_end = PAGE_DATA + c_stream.total_out;
- page_zip->m_nonempty = FALSE;
- page_zip->n_blobs = n_blobs;
- /* Copy those header fields that will not be written
- in buf_flush_init_for_writing() */
- memcpy(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
- FIL_PAGE_LSN - FIL_PAGE_PREV);
- memcpy(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2);
- memcpy(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
- PAGE_DATA - FIL_PAGE_DATA);
- /* Copy the rest of the compressed page */
- memcpy(page_zip->data + PAGE_DATA, buf,
- page_zip_get_size(page_zip) - PAGE_DATA);
- mem_heap_free(heap);
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (mtr) {
-#ifndef UNIV_HOTBACKUP
- page_zip_compress_write_log(page_zip, page, index, mtr);
-#endif /* !UNIV_HOTBACKUP */
- }
-
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
-#ifdef PAGE_ZIP_COMPRESS_DBG
- if (logfile) {
- /* Record the compressed size of the block. */
- byte sz[4];
- mach_write_to_4(sz, c_stream.total_out);
- fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET);
- fwrite(sz, 1, sizeof sz, logfile);
- fclose(logfile);
- }
-#endif /* PAGE_ZIP_COMPRESS_DBG */
-#ifndef UNIV_HOTBACKUP
- ullint time_diff = ut_time_us(NULL) - usec;
- page_zip_stat[page_zip->ssize - 1].compressed_ok++;
- page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff;
- if (cmp_per_index_enabled) {
- mutex_enter(&page_zip_stat_per_index_mutex);
- page_zip_stat_per_index[index->id].compressed_ok++;
- page_zip_stat_per_index[index->id].compressed_usec += time_diff;
- mutex_exit(&page_zip_stat_per_index_mutex);
- }
-
- if (page_is_leaf(page)) {
- dict_index_zip_success(index);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Compare two page directory entries.
-@return positive if rec1 > rec2 */
-UNIV_INLINE
-ibool
-page_zip_dir_cmp(
-/*=============*/
- const rec_t* rec1, /*!< in: rec1 */
- const rec_t* rec2) /*!< in: rec2 */
-{
- return(rec1 > rec2);
-}
-
-/**********************************************************************//**
-Sort the dense page directory by address (heap_no). */
-static
-void
-page_zip_dir_sort(
-/*==============*/
- rec_t** arr, /*!< in/out: dense page directory */
- rec_t** aux_arr,/*!< in/out: work area */
- ulint low, /*!< in: lower bound of the sorting area, inclusive */
- ulint high) /*!< in: upper bound of the sorting area, exclusive */
-{
- UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high,
- page_zip_dir_cmp);
-}
-
-/**********************************************************************//**
-Deallocate the index information initialized by page_zip_fields_decode(). */
-static
-void
-page_zip_fields_free(
-/*=================*/
- dict_index_t* index) /*!< in: dummy index to be freed */
-{
- if (index) {
- dict_table_t* table = index->table;
- dict_index_zip_pad_mutex_destroy(index);
- mem_heap_free(index->heap);
-
- dict_mem_table_free(table);
- }
-}
-
-/**********************************************************************//**
-Read the index information for the compressed page.
-@return own: dummy index describing the page, or NULL on error */
-static
-dict_index_t*
-page_zip_fields_decode(
-/*===================*/
- const byte* buf, /*!< in: index information */
- const byte* end, /*!< in: end of buf */
- ulint* trx_id_col)/*!< in: NULL for non-leaf pages;
- for leaf pages, pointer to where to store
- the position of the trx_id column */
-{
- const byte* b;
- ulint n;
- ulint i;
- ulint val;
- dict_table_t* table;
- dict_index_t* index;
-
- /* Determine the number of fields. */
- for (b = buf, n = 0; b < end; n++) {
- if (*b++ & 0x80) {
- b++; /* skip the second byte */
- }
- }
-
- n--; /* n_nullable or trx_id */
-
- if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
-
- page_zip_fail(("page_zip_fields_decode: n = %lu\n",
- (ulong) n));
- return(NULL);
- }
-
- if (UNIV_UNLIKELY(b > end)) {
-
- page_zip_fail(("page_zip_fields_decode: %p > %p\n",
- (const void*) b, (const void*) end));
- return(NULL);
- }
-
- table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n,
- DICT_TF_COMPACT, 0);
- index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY",
- DICT_HDR_SPACE, 0, n);
- index->table = table;
- index->n_uniq = n;
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- index->cached = TRUE;
-
- /* Initialize the fields. */
- for (b = buf, i = 0; i < n; i++) {
- ulint mtype;
- ulint len;
-
- val = *b++;
-
- if (UNIV_UNLIKELY(val & 0x80)) {
- /* fixed length > 62 bytes */
- val = (val & 0x7f) << 8 | *b++;
- len = val >> 1;
- mtype = DATA_FIXBINARY;
- } else if (UNIV_UNLIKELY(val >= 126)) {
- /* variable length with max > 255 bytes */
- len = 0x7fff;
- mtype = DATA_BINARY;
- } else if (val <= 1) {
- /* variable length with max <= 255 bytes */
- len = 0;
- mtype = DATA_BINARY;
- } else {
- /* fixed length < 62 bytes */
- len = val >> 1;
- mtype = DATA_FIXBINARY;
- }
-
- dict_mem_table_add_col(table, NULL, NULL, mtype,
- val & 1 ? DATA_NOT_NULL : 0, len);
- dict_index_add_col(index, table,
- dict_table_get_nth_col(table, i), 0);
- }
-
- val = *b++;
- if (UNIV_UNLIKELY(val & 0x80)) {
- val = (val & 0x7f) << 8 | *b++;
- }
-
- /* Decode the position of the trx_id column. */
- if (trx_id_col) {
- if (!val) {
- val = ULINT_UNDEFINED;
- } else if (UNIV_UNLIKELY(val >= n)) {
- page_zip_fields_free(index);
- index = NULL;
- } else {
- index->type = DICT_CLUSTERED;
- }
-
- *trx_id_col = val;
- } else {
- /* Decode the number of nullable fields. */
- if (UNIV_UNLIKELY(index->n_nullable > val)) {
- page_zip_fields_free(index);
- index = NULL;
- } else {
- index->n_nullable = val;
- }
- }
-
- ut_ad(b == end);
-
- return(index);
-}
-
-/**********************************************************************//**
-Populate the sparse page directory from the dense directory.
-@return TRUE on success, FALSE on failure */
-static
-ibool
-page_zip_dir_decode(
-/*================*/
- const page_zip_des_t* page_zip,/*!< in: dense page directory on
- compressed page */
- page_t* page, /*!< in: compact page with valid header;
- out: trailer and sparse page directory
- filled in */
- rec_t** recs, /*!< out: dense page directory sorted by
- ascending address (and heap_no) */
- rec_t** recs_aux,/*!< in/out: scratch area */
- ulint n_dense)/*!< in: number of user records, and
- size of recs[] and recs_aux[] */
-{
- ulint i;
- ulint n_recs;
- byte* slot;
-
- n_recs = page_get_n_recs(page);
-
- if (UNIV_UNLIKELY(n_recs > n_dense)) {
- page_zip_fail(("page_zip_dir_decode 1: %lu > %lu\n",
- (ulong) n_recs, (ulong) n_dense));
- return(FALSE);
- }
-
- /* Traverse the list of stored records in the sorting order,
- starting from the first user record. */
-
- slot = page + (UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE);
- UNIV_PREFETCH_RW(slot);
-
- /* Zero out the page trailer. */
- memset(slot + PAGE_DIR_SLOT_SIZE, 0, PAGE_DIR);
-
- mach_write_to_2(slot, PAGE_NEW_INFIMUM);
- slot -= PAGE_DIR_SLOT_SIZE;
- UNIV_PREFETCH_RW(slot);
-
- /* Initialize the sparse directory and copy the dense directory. */
- for (i = 0; i < n_recs; i++) {
- ulint offs = page_zip_dir_get(page_zip, i);
-
- if (offs & PAGE_ZIP_DIR_SLOT_OWNED) {
- mach_write_to_2(slot, offs & PAGE_ZIP_DIR_SLOT_MASK);
- slot -= PAGE_DIR_SLOT_SIZE;
- UNIV_PREFETCH_RW(slot);
- }
-
- if (UNIV_UNLIKELY((offs & PAGE_ZIP_DIR_SLOT_MASK)
- < PAGE_ZIP_START + REC_N_NEW_EXTRA_BYTES)) {
- page_zip_fail(("page_zip_dir_decode 2: %u %u %lx\n",
- (unsigned) i, (unsigned) n_recs,
- (ulong) offs));
- return(FALSE);
- }
-
- recs[i] = page + (offs & PAGE_ZIP_DIR_SLOT_MASK);
- }
-
- mach_write_to_2(slot, PAGE_NEW_SUPREMUM);
- {
- const page_dir_slot_t* last_slot = page_dir_get_nth_slot(
- page, page_dir_get_n_slots(page) - 1);
-
- if (UNIV_UNLIKELY(slot != last_slot)) {
- page_zip_fail(("page_zip_dir_decode 3: %p != %p\n",
- (const void*) slot,
- (const void*) last_slot));
- return(FALSE);
- }
- }
-
- /* Copy the rest of the dense directory. */
- for (; i < n_dense; i++) {
- ulint offs = page_zip_dir_get(page_zip, i);
-
- if (UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
- page_zip_fail(("page_zip_dir_decode 4: %u %u %lx\n",
- (unsigned) i, (unsigned) n_dense,
- (ulong) offs));
- return(FALSE);
- }
-
- recs[i] = page + offs;
- }
-
- if (UNIV_LIKELY(n_dense > 1)) {
- page_zip_dir_sort(recs, recs_aux, 0, n_dense);
- }
- return(TRUE);
-}
-
-/**********************************************************************//**
-Initialize the REC_N_NEW_EXTRA_BYTES of each record.
-@return TRUE on success, FALSE on failure */
-static
-ibool
-page_zip_set_extra_bytes(
-/*=====================*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- page_t* page, /*!< in/out: uncompressed page */
- ulint info_bits)/*!< in: REC_INFO_MIN_REC_FLAG or 0 */
-{
- ulint n;
- ulint i;
- ulint n_owned = 1;
- ulint offs;
- rec_t* rec;
-
- n = page_get_n_recs(page);
- rec = page + PAGE_NEW_INFIMUM;
-
- for (i = 0; i < n; i++) {
- offs = page_zip_dir_get(page_zip, i);
-
- if (offs & PAGE_ZIP_DIR_SLOT_DEL) {
- info_bits |= REC_INFO_DELETED_FLAG;
- }
- if (UNIV_UNLIKELY(offs & PAGE_ZIP_DIR_SLOT_OWNED)) {
- info_bits |= n_owned;
- n_owned = 1;
- } else {
- n_owned++;
- }
- offs &= PAGE_ZIP_DIR_SLOT_MASK;
- if (UNIV_UNLIKELY(offs < PAGE_ZIP_START
- + REC_N_NEW_EXTRA_BYTES)) {
- page_zip_fail(("page_zip_set_extra_bytes 1:"
- " %u %u %lx\n",
- (unsigned) i, (unsigned) n,
- (ulong) offs));
- return(FALSE);
- }
-
- rec_set_next_offs_new(rec, offs);
- rec = page + offs;
- rec[-REC_N_NEW_EXTRA_BYTES] = (byte) info_bits;
- info_bits = 0;
- }
-
- /* Set the next pointer of the last user record. */
- rec_set_next_offs_new(rec, PAGE_NEW_SUPREMUM);
-
- /* Set n_owned of the supremum record. */
- page[PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES] = (byte) n_owned;
-
- /* The dense directory excludes the infimum and supremum records. */
- n = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW;
-
- if (i >= n) {
- if (UNIV_LIKELY(i == n)) {
- return(TRUE);
- }
-
- page_zip_fail(("page_zip_set_extra_bytes 2: %u != %u\n",
- (unsigned) i, (unsigned) n));
- return(FALSE);
- }
-
- offs = page_zip_dir_get(page_zip, i);
-
- /* Set the extra bytes of deleted records on the free list. */
- for (;;) {
- if (UNIV_UNLIKELY(!offs)
- || UNIV_UNLIKELY(offs & ~PAGE_ZIP_DIR_SLOT_MASK)) {
-
- page_zip_fail(("page_zip_set_extra_bytes 3: %lx\n",
- (ulong) offs));
- return(FALSE);
- }
-
- rec = page + offs;
- rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
-
- if (++i == n) {
- break;
- }
-
- offs = page_zip_dir_get(page_zip, i);
- rec_set_next_offs_new(rec, offs);
- }
-
- /* Terminate the free list. */
- rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
- rec_set_next_offs_new(rec, 0);
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Apply the modification log to a record containing externally stored
-columns. Do not copy the fields that are stored separately.
-@return pointer to modification log, or NULL on failure */
-static
-const byte*
-page_zip_apply_log_ext(
-/*===================*/
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
- ulint trx_id_col, /*!< in: position of of DB_TRX_ID */
- const byte* data, /*!< in: modification log */
- const byte* end) /*!< in: end of modification log */
-{
- ulint i;
- ulint len;
- byte* next_out = rec;
-
- /* Check if there are any externally stored columns.
- For each externally stored column, skip the
- BTR_EXTERN_FIELD_REF. */
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- byte* dst;
-
- if (UNIV_UNLIKELY(i == trx_id_col)) {
- /* Skip trx_id and roll_ptr */
- dst = rec_get_nth_field(rec, offsets,
- i, &len);
- if (UNIV_UNLIKELY(dst - next_out >= end - data)
- || UNIV_UNLIKELY
- (len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))
- || rec_offs_nth_extern(offsets, i)) {
- page_zip_fail(("page_zip_apply_log_ext:"
- " trx_id len %lu,"
- " %p - %p >= %p - %p\n",
- (ulong) len,
- (const void*) dst,
- (const void*) next_out,
- (const void*) end,
- (const void*) data));
- return(NULL);
- }
-
- memcpy(next_out, data, dst - next_out);
- data += dst - next_out;
- next_out = dst + (DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN);
- } else if (rec_offs_nth_extern(offsets, i)) {
- dst = rec_get_nth_field(rec, offsets,
- i, &len);
- ut_ad(len
- >= BTR_EXTERN_FIELD_REF_SIZE);
-
- len += dst - next_out
- - BTR_EXTERN_FIELD_REF_SIZE;
-
- if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log_ext: "
- "ext %p+%lu >= %p\n",
- (const void*) data,
- (ulong) len,
- (const void*) end));
- return(NULL);
- }
-
- memcpy(next_out, data, len);
- data += len;
- next_out += len
- + BTR_EXTERN_FIELD_REF_SIZE;
- }
- }
-
- /* Copy the last bytes of the record. */
- len = rec_get_end(rec, offsets) - next_out;
- if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log_ext: "
- "last %p+%lu >= %p\n",
- (const void*) data,
- (ulong) len,
- (const void*) end));
- return(NULL);
- }
- memcpy(next_out, data, len);
- data += len;
-
- return(data);
-}
-
-/**********************************************************************//**
-Apply the modification log to an uncompressed page.
-Do not copy the fields that are stored separately.
-@return pointer to end of modification log, or NULL on failure */
-static
-const byte*
-page_zip_apply_log(
-/*===============*/
- const byte* data, /*!< in: modification log */
- ulint size, /*!< in: maximum length of the log, in bytes */
- rec_t** recs, /*!< in: dense page directory,
- sorted by address (indexed by
- heap_no - PAGE_HEAP_NO_USER_LOW) */
- ulint n_dense,/*!< in: size of recs[] */
- ulint trx_id_col,/*!< in: column number of trx_id in the index,
- or ULINT_UNDEFINED if none */
- ulint heap_status,
- /*!< in: heap_no and status bits for
- the next record to uncompress */
- dict_index_t* index, /*!< in: index of the page */
- ulint* offsets)/*!< in/out: work area for
- rec_get_offsets_reverse() */
-{
- const byte* const end = data + size;
-
- for (;;) {
- ulint val;
- rec_t* rec;
- ulint len;
- ulint hs;
-
- val = *data++;
- if (UNIV_UNLIKELY(!val)) {
- return(data - 1);
- }
- if (val & 0x80) {
- val = (val & 0x7f) << 8 | *data++;
- if (UNIV_UNLIKELY(!val)) {
- page_zip_fail(("page_zip_apply_log:"
- " invalid val %x%x\n",
- data[-2], data[-1]));
- return(NULL);
- }
- }
- if (UNIV_UNLIKELY(data >= end)) {
- page_zip_fail(("page_zip_apply_log: %p >= %p\n",
- (const void*) data,
- (const void*) end));
- return(NULL);
- }
- if (UNIV_UNLIKELY((val >> 1) > n_dense)) {
- page_zip_fail(("page_zip_apply_log: %lu>>1 > %lu\n",
- (ulong) val, (ulong) n_dense));
- return(NULL);
- }
-
- /* Determine the heap number and status bits of the record. */
- rec = recs[(val >> 1) - 1];
-
- hs = ((val >> 1) + 1) << REC_HEAP_NO_SHIFT;
- hs |= heap_status & ((1 << REC_HEAP_NO_SHIFT) - 1);
-
- /* This may either be an old record that is being
- overwritten (updated in place, or allocated from
- the free list), or a new record, with the next
- available_heap_no. */
- if (UNIV_UNLIKELY(hs > heap_status)) {
- page_zip_fail(("page_zip_apply_log: %lu > %lu\n",
- (ulong) hs, (ulong) heap_status));
- return(NULL);
- } else if (hs == heap_status) {
- /* A new record was allocated from the heap. */
- if (UNIV_UNLIKELY(val & 1)) {
- /* Only existing records may be cleared. */
- page_zip_fail(("page_zip_apply_log:"
- " attempting to create"
- " deleted rec %lu\n",
- (ulong) hs));
- return(NULL);
- }
- heap_status += 1 << REC_HEAP_NO_SHIFT;
- }
-
- mach_write_to_2(rec - REC_NEW_HEAP_NO, hs);
-
- if (val & 1) {
- /* Clear the data bytes of the record. */
- mem_heap_t* heap = NULL;
- ulint* offs;
- offs = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- memset(rec, 0, rec_offs_data_size(offs));
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- continue;
- }
-
-#if REC_STATUS_NODE_PTR != TRUE
-# error "REC_STATUS_NODE_PTR != TRUE"
-#endif
- rec_get_offsets_reverse(data, index,
- hs & REC_STATUS_NODE_PTR,
- offsets);
- rec_offs_make_valid(rec, index, offsets);
-
- /* Copy the extra bytes (backwards). */
- {
- byte* start = rec_get_start(rec, offsets);
- byte* b = rec - REC_N_NEW_EXTRA_BYTES;
- while (b != start) {
- *--b = *data++;
- }
- }
-
- /* Copy the data bytes. */
- if (UNIV_UNLIKELY(rec_offs_any_extern(offsets))) {
- /* Non-leaf nodes should not contain any
- externally stored columns. */
- if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
- page_zip_fail(("page_zip_apply_log: "
- "%lu&REC_STATUS_NODE_PTR\n",
- (ulong) hs));
- return(NULL);
- }
-
- data = page_zip_apply_log_ext(
- rec, offsets, trx_id_col, data, end);
-
- if (UNIV_UNLIKELY(!data)) {
- return(NULL);
- }
- } else if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) {
- len = rec_offs_data_size(offsets)
- - REC_NODE_PTR_SIZE;
- /* Copy the data bytes, except node_ptr. */
- if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log: "
- "node_ptr %p+%lu >= %p\n",
- (const void*) data,
- (ulong) len,
- (const void*) end));
- return(NULL);
- }
- memcpy(rec, data, len);
- data += len;
- } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
- len = rec_offs_data_size(offsets);
-
- /* Copy all data bytes of
- a record in a secondary index. */
- if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log: "
- "sec %p+%lu >= %p\n",
- (const void*) data,
- (ulong) len,
- (const void*) end));
- return(NULL);
- }
-
- memcpy(rec, data, len);
- data += len;
- } else {
- /* Skip DB_TRX_ID and DB_ROLL_PTR. */
- ulint l = rec_get_nth_field_offs(offsets,
- trx_id_col, &len);
- byte* b;
-
- if (UNIV_UNLIKELY(data + l >= end)
- || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN))) {
- page_zip_fail(("page_zip_apply_log: "
- "trx_id %p+%lu >= %p\n",
- (const void*) data,
- (ulong) l,
- (const void*) end));
- return(NULL);
- }
-
- /* Copy any preceding data bytes. */
- memcpy(rec, data, l);
- data += l;
-
- /* Copy any bytes following DB_TRX_ID, DB_ROLL_PTR. */
- b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- len = rec_get_end(rec, offsets) - b;
- if (UNIV_UNLIKELY(data + len >= end)) {
- page_zip_fail(("page_zip_apply_log: "
- "clust %p+%lu >= %p\n",
- (const void*) data,
- (ulong) len,
- (const void*) end));
- return(NULL);
- }
- memcpy(b, data, len);
- data += len;
- }
- }
-}
-
-/**********************************************************************//**
-Set the heap_no in a record, and skip the fixed-size record header
-that is not included in the d_stream.
-@return TRUE on success, FALSE if d_stream does not end at rec */
-static
-ibool
-page_zip_decompress_heap_no(
-/*========================*/
- z_stream* d_stream, /*!< in/out: compressed page stream */
- rec_t* rec, /*!< in/out: record */
- ulint& heap_status) /*!< in/out: heap_no and status bits */
-{
- if (d_stream->next_out != rec - REC_N_NEW_EXTRA_BYTES) {
- /* n_dense has grown since the page was last compressed. */
- return(FALSE);
- }
-
- /* Skip the REC_N_NEW_EXTRA_BYTES. */
- d_stream->next_out = rec;
-
- /* Set heap_no and the status bits. */
- mach_write_to_2(rec - REC_NEW_HEAP_NO, heap_status);
- heap_status += 1 << REC_HEAP_NO_SHIFT;
- return(TRUE);
-}
-
-/**********************************************************************//**
-Decompress the records of a node pointer page.
-@return TRUE on success, FALSE on failure */
-static
-ibool
-page_zip_decompress_node_ptrs(
-/*==========================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- z_stream* d_stream, /*!< in/out: compressed page stream */
- rec_t** recs, /*!< in: dense page directory
- sorted by address */
- ulint n_dense, /*!< in: size of recs[] */
- dict_index_t* index, /*!< in: the index of the page */
- ulint* offsets, /*!< in/out: temporary offsets */
- mem_heap_t* heap) /*!< in: temporary memory heap */
-{
- ulint heap_status = REC_STATUS_NODE_PTR
- | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
- ulint slot;
- const byte* storage;
-
- /* Subtract the space reserved for uncompressed data. */
- d_stream->avail_in -= static_cast<uInt>(
- n_dense * (PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE));
-
- /* Decompress the records in heap_no order. */
- for (slot = 0; slot < n_dense; slot++) {
- rec_t* rec = recs[slot];
-
- d_stream->avail_out = static_cast<uInt>(
- rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
-
- ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
- - PAGE_ZIP_START - PAGE_DIR);
- switch (inflate(d_stream, Z_SYNC_FLUSH)) {
- case Z_STREAM_END:
- page_zip_decompress_heap_no(
- d_stream, rec, heap_status);
- goto zlib_done;
- case Z_OK:
- case Z_BUF_ERROR:
- if (!d_stream->avail_out) {
- break;
- }
- /* fall through */
- default:
- page_zip_fail(("page_zip_decompress_node_ptrs:"
- " 1 inflate(Z_SYNC_FLUSH)=%s\n",
- d_stream->msg));
- goto zlib_error;
- }
-
- if (!page_zip_decompress_heap_no(
- d_stream, rec, heap_status)) {
- ut_ad(0);
- }
-
- /* Read the offsets. The status bits are needed here. */
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- /* Non-leaf nodes should not have any externally
- stored columns. */
- ut_ad(!rec_offs_any_extern(offsets));
-
- /* Decompress the data bytes, except node_ptr. */
- d_stream->avail_out =static_cast<uInt>(
- rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE);
-
- switch (inflate(d_stream, Z_SYNC_FLUSH)) {
- case Z_STREAM_END:
- goto zlib_done;
- case Z_OK:
- case Z_BUF_ERROR:
- if (!d_stream->avail_out) {
- break;
- }
- /* fall through */
- default:
- page_zip_fail(("page_zip_decompress_node_ptrs:"
- " 2 inflate(Z_SYNC_FLUSH)=%s\n",
- d_stream->msg));
- goto zlib_error;
- }
-
- /* Clear the node pointer in case the record
- will be deleted and the space will be reallocated
- to a smaller record. */
- memset(d_stream->next_out, 0, REC_NODE_PTR_SIZE);
- d_stream->next_out += REC_NODE_PTR_SIZE;
-
- ut_ad(d_stream->next_out == rec_get_end(rec, offsets));
- }
-
- /* Decompress any trailing garbage, in case the last record was
- allocated from an originally longer space on the free list. */
- d_stream->avail_out = static_cast<uInt>(
- page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
- - page_offset(d_stream->next_out));
- if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
- - PAGE_ZIP_START - PAGE_DIR)) {
-
- page_zip_fail(("page_zip_decompress_node_ptrs:"
- " avail_out = %u\n",
- d_stream->avail_out));
- goto zlib_error;
- }
-
- if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
- page_zip_fail(("page_zip_decompress_node_ptrs:"
- " inflate(Z_FINISH)=%s\n",
- d_stream->msg));
-zlib_error:
- inflateEnd(d_stream);
- return(FALSE);
- }
-
- /* Note that d_stream->avail_out > 0 may hold here
- if the modification log is nonempty. */
-
-zlib_done:
- if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
- ut_error;
- }
-
- {
- page_t* page = page_align(d_stream->next_out);
-
- /* Clear the unused heap space on the uncompressed page. */
- memset(d_stream->next_out, 0,
- page_dir_get_nth_slot(page,
- page_dir_get_n_slots(page) - 1)
- - d_stream->next_out);
- }
-
-#ifdef UNIV_DEBUG
- page_zip->m_start = PAGE_DATA + d_stream->total_in;
-#endif /* UNIV_DEBUG */
-
- /* Apply the modification log. */
- {
- const byte* mod_log_ptr;
- mod_log_ptr = page_zip_apply_log(d_stream->next_in,
- d_stream->avail_in + 1,
- recs, n_dense,
- ULINT_UNDEFINED, heap_status,
- index, offsets);
-
- if (UNIV_UNLIKELY(!mod_log_ptr)) {
- return(FALSE);
- }
- page_zip->m_end = mod_log_ptr - page_zip->data;
- page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
- }
-
- if (UNIV_UNLIKELY
- (page_zip_get_trailer_len(page_zip,
- dict_index_is_clust(index))
- + page_zip->m_end >= page_zip_get_size(page_zip))) {
- page_zip_fail(("page_zip_decompress_node_ptrs:"
- " %lu + %lu >= %lu, %lu\n",
- (ulong) page_zip_get_trailer_len(
- page_zip, dict_index_is_clust(index)),
- (ulong) page_zip->m_end,
- (ulong) page_zip_get_size(page_zip),
- (ulong) dict_index_is_clust(index)));
- return(FALSE);
- }
-
- /* Restore the uncompressed columns in heap_no order. */
- storage = page_zip_dir_start_low(page_zip, n_dense);
-
- for (slot = 0; slot < n_dense; slot++) {
- rec_t* rec = recs[slot];
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- /* Non-leaf nodes should not have any externally
- stored columns. */
- ut_ad(!rec_offs_any_extern(offsets));
- storage -= REC_NODE_PTR_SIZE;
-
- memcpy(rec_get_end(rec, offsets) - REC_NODE_PTR_SIZE,
- storage, REC_NODE_PTR_SIZE);
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Decompress the records of a leaf node of a secondary index.
-@return TRUE on success, FALSE on failure */
-static
-ibool
-page_zip_decompress_sec(
-/*====================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- z_stream* d_stream, /*!< in/out: compressed page stream */
- rec_t** recs, /*!< in: dense page directory
- sorted by address */
- ulint n_dense, /*!< in: size of recs[] */
- dict_index_t* index, /*!< in: the index of the page */
- ulint* offsets) /*!< in/out: temporary offsets */
-{
- ulint heap_status = REC_STATUS_ORDINARY
- | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
- ulint slot;
-
- ut_a(!dict_index_is_clust(index));
-
- /* Subtract the space reserved for uncompressed data. */
- d_stream->avail_in -= static_cast<uint>(
- n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
-
- for (slot = 0; slot < n_dense; slot++) {
- rec_t* rec = recs[slot];
-
- /* Decompress everything up to this record. */
- d_stream->avail_out = static_cast<uint>(
- rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
-
- if (UNIV_LIKELY(d_stream->avail_out)) {
- switch (inflate(d_stream, Z_SYNC_FLUSH)) {
- case Z_STREAM_END:
- page_zip_decompress_heap_no(
- d_stream, rec, heap_status);
- goto zlib_done;
- case Z_OK:
- case Z_BUF_ERROR:
- if (!d_stream->avail_out) {
- break;
- }
- /* fall through */
- default:
- page_zip_fail(("page_zip_decompress_sec:"
- " inflate(Z_SYNC_FLUSH)=%s\n",
- d_stream->msg));
- goto zlib_error;
- }
- }
-
- if (!page_zip_decompress_heap_no(
- d_stream, rec, heap_status)) {
- ut_ad(0);
- }
- }
-
- /* Decompress the data of the last record and any trailing garbage,
- in case the last record was allocated from an originally longer space
- on the free list. */
- d_stream->avail_out = static_cast<uInt>(
- page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
- - page_offset(d_stream->next_out));
- if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
- - PAGE_ZIP_START - PAGE_DIR)) {
-
- page_zip_fail(("page_zip_decompress_sec:"
- " avail_out = %u\n",
- d_stream->avail_out));
- goto zlib_error;
- }
-
- if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
- page_zip_fail(("page_zip_decompress_sec:"
- " inflate(Z_FINISH)=%s\n",
- d_stream->msg));
-zlib_error:
- inflateEnd(d_stream);
- return(FALSE);
- }
-
- /* Note that d_stream->avail_out > 0 may hold here
- if the modification log is nonempty. */
-
-zlib_done:
- if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
- ut_error;
- }
-
- {
- page_t* page = page_align(d_stream->next_out);
-
- /* Clear the unused heap space on the uncompressed page. */
- memset(d_stream->next_out, 0,
- page_dir_get_nth_slot(page,
- page_dir_get_n_slots(page) - 1)
- - d_stream->next_out);
- }
-
-#ifdef UNIV_DEBUG
- page_zip->m_start = PAGE_DATA + d_stream->total_in;
-#endif /* UNIV_DEBUG */
-
- /* Apply the modification log. */
- {
- const byte* mod_log_ptr;
- mod_log_ptr = page_zip_apply_log(d_stream->next_in,
- d_stream->avail_in + 1,
- recs, n_dense,
- ULINT_UNDEFINED, heap_status,
- index, offsets);
-
- if (UNIV_UNLIKELY(!mod_log_ptr)) {
- return(FALSE);
- }
- page_zip->m_end = mod_log_ptr - page_zip->data;
- page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
- }
-
- if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, FALSE)
- + page_zip->m_end >= page_zip_get_size(page_zip))) {
-
- page_zip_fail(("page_zip_decompress_sec: %lu + %lu >= %lu\n",
- (ulong) page_zip_get_trailer_len(
- page_zip, FALSE),
- (ulong) page_zip->m_end,
- (ulong) page_zip_get_size(page_zip)));
- return(FALSE);
- }
-
- /* There are no uncompressed columns on leaf pages of
- secondary indexes. */
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Decompress a record of a leaf node of a clustered index that contains
-externally stored columns.
-@return TRUE on success */
-static
-ibool
-page_zip_decompress_clust_ext(
-/*==========================*/
- z_stream* d_stream, /*!< in/out: compressed page stream */
- rec_t* rec, /*!< in/out: record */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
- ulint trx_id_col) /*!< in: position of of DB_TRX_ID */
-{
- ulint i;
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- ulint len;
- byte* dst;
-
- if (UNIV_UNLIKELY(i == trx_id_col)) {
- /* Skip trx_id and roll_ptr */
- dst = rec_get_nth_field(rec, offsets, i, &len);
- if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN)) {
-
- page_zip_fail(("page_zip_decompress_clust_ext:"
- " len[%lu] = %lu\n",
- (ulong) i, (ulong) len));
- return(FALSE);
- }
-
- if (rec_offs_nth_extern(offsets, i)) {
-
- page_zip_fail(("page_zip_decompress_clust_ext:"
- " DB_TRX_ID at %lu is ext\n",
- (ulong) i));
- return(FALSE);
- }
-
- d_stream->avail_out = static_cast<uInt>(
- dst - d_stream->next_out);
-
- switch (inflate(d_stream, Z_SYNC_FLUSH)) {
- case Z_STREAM_END:
- case Z_OK:
- case Z_BUF_ERROR:
- if (!d_stream->avail_out) {
- break;
- }
- /* fall through */
- default:
- page_zip_fail(("page_zip_decompress_clust_ext:"
- " 1 inflate(Z_SYNC_FLUSH)=%s\n",
- d_stream->msg));
- return(FALSE);
- }
-
- ut_ad(d_stream->next_out == dst);
-
- /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
- avoid uninitialized bytes in case the record
- is affected by page_zip_apply_log(). */
- memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- d_stream->next_out += DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN;
- } else if (rec_offs_nth_extern(offsets, i)) {
- dst = rec_get_nth_field(rec, offsets, i, &len);
- ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
- dst += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- d_stream->avail_out = static_cast<uInt>(
- dst - d_stream->next_out);
- switch (inflate(d_stream, Z_SYNC_FLUSH)) {
- case Z_STREAM_END:
- case Z_OK:
- case Z_BUF_ERROR:
- if (!d_stream->avail_out) {
- break;
- }
- /* fall through */
- default:
- page_zip_fail(("page_zip_decompress_clust_ext:"
- " 2 inflate(Z_SYNC_FLUSH)=%s\n",
- d_stream->msg));
- return(FALSE);
- }
-
- ut_ad(d_stream->next_out == dst);
-
- /* Clear the BLOB pointer in case
- the record will be deleted and the
- space will not be reused. Note that
- the final initialization of the BLOB
- pointers (copying from "externs"
- or clearing) will have to take place
- only after the page modification log
- has been applied. Otherwise, we
- could end up with an uninitialized
- BLOB pointer when a record is deleted,
- reallocated and deleted. */
- memset(d_stream->next_out, 0,
- BTR_EXTERN_FIELD_REF_SIZE);
- d_stream->next_out
- += BTR_EXTERN_FIELD_REF_SIZE;
- }
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Compress the records of a leaf node of a clustered index.
-@return TRUE on success, FALSE on failure */
-static
-ibool
-page_zip_decompress_clust(
-/*======================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- z_stream* d_stream, /*!< in/out: compressed page stream */
- rec_t** recs, /*!< in: dense page directory
- sorted by address */
- ulint n_dense, /*!< in: size of recs[] */
- dict_index_t* index, /*!< in: the index of the page */
- ulint trx_id_col, /*!< index of the trx_id column */
- ulint* offsets, /*!< in/out: temporary offsets */
- mem_heap_t* heap) /*!< in: temporary memory heap */
-{
- int err;
- ulint slot;
- ulint heap_status = REC_STATUS_ORDINARY
- | PAGE_HEAP_NO_USER_LOW << REC_HEAP_NO_SHIFT;
- const byte* storage;
- const byte* externs;
-
- ut_a(dict_index_is_clust(index));
-
- /* Subtract the space reserved for uncompressed data. */
- d_stream->avail_in -= static_cast<uInt>(n_dense)
- * (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN);
-
- /* Decompress the records in heap_no order. */
- for (slot = 0; slot < n_dense; slot++) {
- rec_t* rec = recs[slot];
-
- d_stream->avail_out =static_cast<uInt>(
- rec - REC_N_NEW_EXTRA_BYTES - d_stream->next_out);
-
- ut_ad(d_stream->avail_out < UNIV_PAGE_SIZE
- - PAGE_ZIP_START - PAGE_DIR);
- err = inflate(d_stream, Z_SYNC_FLUSH);
- switch (err) {
- case Z_STREAM_END:
- page_zip_decompress_heap_no(
- d_stream, rec, heap_status);
- goto zlib_done;
- case Z_OK:
- case Z_BUF_ERROR:
- if (UNIV_LIKELY(!d_stream->avail_out)) {
- break;
- }
- /* fall through */
- default:
- page_zip_fail(("page_zip_decompress_clust:"
- " 1 inflate(Z_SYNC_FLUSH)=%s\n",
- d_stream->msg));
- goto zlib_error;
- }
-
- if (!page_zip_decompress_heap_no(
- d_stream, rec, heap_status)) {
- ut_ad(0);
- }
-
- /* Read the offsets. The status bits are needed here. */
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- /* This is a leaf page in a clustered index. */
-
- /* Check if there are any externally stored columns.
- For each externally stored column, restore the
- BTR_EXTERN_FIELD_REF separately. */
-
- if (rec_offs_any_extern(offsets)) {
- if (UNIV_UNLIKELY
- (!page_zip_decompress_clust_ext(
- d_stream, rec, offsets, trx_id_col))) {
-
- goto zlib_error;
- }
- } else {
- /* Skip trx_id and roll_ptr */
- ulint len;
- byte* dst = rec_get_nth_field(rec, offsets,
- trx_id_col, &len);
- if (UNIV_UNLIKELY(len < DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN)) {
-
- page_zip_fail(("page_zip_decompress_clust:"
- " len = %lu\n", (ulong) len));
- goto zlib_error;
- }
-
- d_stream->avail_out = static_cast<uInt>(
- dst - d_stream->next_out);
-
- switch (inflate(d_stream, Z_SYNC_FLUSH)) {
- case Z_STREAM_END:
- case Z_OK:
- case Z_BUF_ERROR:
- if (!d_stream->avail_out) {
- break;
- }
- /* fall through */
- default:
- page_zip_fail(("page_zip_decompress_clust:"
- " 2 inflate(Z_SYNC_FLUSH)=%s\n",
- d_stream->msg));
- goto zlib_error;
- }
-
- ut_ad(d_stream->next_out == dst);
-
- /* Clear DB_TRX_ID and DB_ROLL_PTR in order to
- avoid uninitialized bytes in case the record
- is affected by page_zip_apply_log(). */
- memset(dst, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- d_stream->next_out += DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN;
- }
-
- /* Decompress the last bytes of the record. */
- d_stream->avail_out = static_cast<uInt>(
- rec_get_end(rec, offsets) - d_stream->next_out);
-
- switch (inflate(d_stream, Z_SYNC_FLUSH)) {
- case Z_STREAM_END:
- case Z_OK:
- case Z_BUF_ERROR:
- if (!d_stream->avail_out) {
- break;
- }
- /* fall through */
- default:
- page_zip_fail(("page_zip_decompress_clust:"
- " 3 inflate(Z_SYNC_FLUSH)=%s\n",
- d_stream->msg));
- goto zlib_error;
- }
- }
-
- /* Decompress any trailing garbage, in case the last record was
- allocated from an originally longer space on the free list. */
- d_stream->avail_out = static_cast<uInt>(
- page_header_get_field(page_zip->data, PAGE_HEAP_TOP)
- - page_offset(d_stream->next_out));
- if (UNIV_UNLIKELY(d_stream->avail_out > UNIV_PAGE_SIZE
- - PAGE_ZIP_START - PAGE_DIR)) {
-
- page_zip_fail(("page_zip_decompress_clust:"
- " avail_out = %u\n",
- d_stream->avail_out));
- goto zlib_error;
- }
-
- if (UNIV_UNLIKELY(inflate(d_stream, Z_FINISH) != Z_STREAM_END)) {
- page_zip_fail(("page_zip_decompress_clust:"
- " inflate(Z_FINISH)=%s\n",
- d_stream->msg));
-zlib_error:
- inflateEnd(d_stream);
- return(FALSE);
- }
-
- /* Note that d_stream->avail_out > 0 may hold here
- if the modification log is nonempty. */
-
-zlib_done:
- if (UNIV_UNLIKELY(inflateEnd(d_stream) != Z_OK)) {
- ut_error;
- }
-
- {
- page_t* page = page_align(d_stream->next_out);
-
- /* Clear the unused heap space on the uncompressed page. */
- memset(d_stream->next_out, 0,
- page_dir_get_nth_slot(page,
- page_dir_get_n_slots(page) - 1)
- - d_stream->next_out);
- }
-
-#ifdef UNIV_DEBUG
- page_zip->m_start = PAGE_DATA + d_stream->total_in;
-#endif /* UNIV_DEBUG */
-
- /* Apply the modification log. */
- {
- const byte* mod_log_ptr;
- mod_log_ptr = page_zip_apply_log(d_stream->next_in,
- d_stream->avail_in + 1,
- recs, n_dense,
- trx_id_col, heap_status,
- index, offsets);
-
- if (UNIV_UNLIKELY(!mod_log_ptr)) {
- return(FALSE);
- }
- page_zip->m_end = mod_log_ptr - page_zip->data;
- page_zip->m_nonempty = mod_log_ptr != d_stream->next_in;
- }
-
- if (UNIV_UNLIKELY(page_zip_get_trailer_len(page_zip, TRUE)
- + page_zip->m_end >= page_zip_get_size(page_zip))) {
-
- page_zip_fail(("page_zip_decompress_clust: %lu + %lu >= %lu\n",
- (ulong) page_zip_get_trailer_len(
- page_zip, TRUE),
- (ulong) page_zip->m_end,
- (ulong) page_zip_get_size(page_zip)));
- return(FALSE);
- }
-
- storage = page_zip_dir_start_low(page_zip, n_dense);
-
- externs = storage - n_dense
- * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- /* Restore the uncompressed columns in heap_no order. */
-
- for (slot = 0; slot < n_dense; slot++) {
- ulint i;
- ulint len;
- byte* dst;
- rec_t* rec = recs[slot];
- ibool exists = !page_zip_dir_find_free(
- page_zip, page_offset(rec));
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- dst = rec_get_nth_field(rec, offsets,
- trx_id_col, &len);
- ut_ad(len >= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- storage -= DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
- memcpy(dst, storage,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- /* Check if there are any externally stored
- columns in this record. For each externally
- stored column, restore or clear the
- BTR_EXTERN_FIELD_REF. */
- if (!rec_offs_any_extern(offsets)) {
- continue;
- }
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- if (!rec_offs_nth_extern(offsets, i)) {
- continue;
- }
- dst = rec_get_nth_field(rec, offsets, i, &len);
-
- if (UNIV_UNLIKELY(len < BTR_EXTERN_FIELD_REF_SIZE)) {
- page_zip_fail(("page_zip_decompress_clust:"
- " %lu < 20\n",
- (ulong) len));
- return(FALSE);
- }
-
- dst += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- if (UNIV_LIKELY(exists)) {
- /* Existing record:
- restore the BLOB pointer */
- externs -= BTR_EXTERN_FIELD_REF_SIZE;
-
- if (UNIV_UNLIKELY
- (externs < page_zip->data
- + page_zip->m_end)) {
- page_zip_fail(("page_zip_"
- "decompress_clust: "
- "%p < %p + %lu\n",
- (const void*) externs,
- (const void*)
- page_zip->data,
- (ulong)
- page_zip->m_end));
- return(FALSE);
- }
-
- memcpy(dst, externs,
- BTR_EXTERN_FIELD_REF_SIZE);
-
- page_zip->n_blobs++;
- } else {
- /* Deleted record:
- clear the BLOB pointer */
- memset(dst, 0,
- BTR_EXTERN_FIELD_REF_SIZE);
- }
- }
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Decompress a page. This function should tolerate errors on the compressed
-page. Instead of letting assertions fail, it will return FALSE if an
-inconsistency is detected.
-@return TRUE on success, FALSE on failure */
-UNIV_INTERN
-ibool
-page_zip_decompress(
-/*================*/
- page_zip_des_t* page_zip,/*!< in: data, ssize;
- out: m_start, m_end, m_nonempty, n_blobs */
- page_t* page, /*!< out: uncompressed page, may be trashed */
- ibool all) /*!< in: TRUE=decompress the whole page;
- FALSE=verify but do not copy some
- page header fields that should not change
- after page creation */
-{
- z_stream d_stream;
- dict_index_t* index = NULL;
- rec_t** recs; /*!< dense page directory, sorted by address */
- ulint n_dense;/* number of user records on the page */
- ulint trx_id_col = ULINT_UNDEFINED;
- mem_heap_t* heap;
- ulint* offsets;
-#ifndef UNIV_HOTBACKUP
- ullint usec = ut_time_us(NULL);
-#endif /* !UNIV_HOTBACKUP */
-
- ut_ad(page_zip_simple_validate(page_zip));
- UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
- /* The dense directory excludes the infimum and supremum records. */
- n_dense = page_dir_get_n_heap(page_zip->data) - PAGE_HEAP_NO_USER_LOW;
- if (UNIV_UNLIKELY(n_dense * PAGE_ZIP_DIR_SLOT_SIZE
- >= page_zip_get_size(page_zip))) {
- page_zip_fail(("page_zip_decompress 1: %lu %lu\n",
- (ulong) n_dense,
- (ulong) page_zip_get_size(page_zip)));
- return(FALSE);
- }
-
- heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE);
-
- recs = static_cast<rec_t**>(
- mem_heap_alloc(heap, n_dense * (2 * sizeof *recs)));
-
- if (all) {
- /* Copy the page header. */
- memcpy(page, page_zip->data, PAGE_DATA);
- } else {
- /* Check that the bytes that we skip are identical. */
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
- ut_a(!memcmp(FIL_PAGE_TYPE + page,
- FIL_PAGE_TYPE + page_zip->data,
- PAGE_HEADER - FIL_PAGE_TYPE));
- ut_a(!memcmp(PAGE_HEADER + PAGE_LEVEL + page,
- PAGE_HEADER + PAGE_LEVEL + page_zip->data,
- PAGE_DATA - (PAGE_HEADER + PAGE_LEVEL)));
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-
- /* Copy the mutable parts of the page header. */
- memcpy(page, page_zip->data, FIL_PAGE_TYPE);
- memcpy(PAGE_HEADER + page, PAGE_HEADER + page_zip->data,
- PAGE_LEVEL - PAGE_N_DIR_SLOTS);
-
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
- /* Check that the page headers match after copying. */
- ut_a(!memcmp(page, page_zip->data, PAGE_DATA));
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
- }
-
-#ifdef UNIV_ZIP_DEBUG
- /* Clear the uncompressed page, except the header. */
- memset(PAGE_DATA + page, 0x55, UNIV_PAGE_SIZE - PAGE_DATA);
-#endif /* UNIV_ZIP_DEBUG */
- UNIV_MEM_INVALID(PAGE_DATA + page, UNIV_PAGE_SIZE - PAGE_DATA);
-
- /* Copy the page directory. */
- if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs,
- recs + n_dense, n_dense))) {
-zlib_error:
- mem_heap_free(heap);
- return(FALSE);
- }
-
- /* Copy the infimum and supremum records. */
- memcpy(page + (PAGE_NEW_INFIMUM - REC_N_NEW_EXTRA_BYTES),
- infimum_extra, sizeof infimum_extra);
- if (page_is_empty(page)) {
- rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
- PAGE_NEW_SUPREMUM);
- } else {
- rec_set_next_offs_new(page + PAGE_NEW_INFIMUM,
- page_zip_dir_get(page_zip, 0)
- & PAGE_ZIP_DIR_SLOT_MASK);
- }
- memcpy(page + PAGE_NEW_INFIMUM, infimum_data, sizeof infimum_data);
- memcpy(page + (PAGE_NEW_SUPREMUM - REC_N_NEW_EXTRA_BYTES + 1),
- supremum_extra_data, sizeof supremum_extra_data);
-
- page_zip_set_alloc(&d_stream, heap);
-
- d_stream.next_in = page_zip->data + PAGE_DATA;
- /* Subtract the space reserved for
- the page header and the end marker of the modification log. */
- d_stream.avail_in = static_cast<uInt>(
- page_zip_get_size(page_zip) - (PAGE_DATA + 1));
- d_stream.next_out = page + PAGE_ZIP_START;
- d_stream.avail_out = UNIV_PAGE_SIZE - PAGE_ZIP_START;
-
- if (UNIV_UNLIKELY(inflateInit2(&d_stream, UNIV_PAGE_SIZE_SHIFT)
- != Z_OK)) {
- ut_error;
- }
-
- /* Decode the zlib header and the index information. */
- if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
-
- page_zip_fail(("page_zip_decompress:"
- " 1 inflate(Z_BLOCK)=%s\n", d_stream.msg));
- goto zlib_error;
- }
-
- if (UNIV_UNLIKELY(inflate(&d_stream, Z_BLOCK) != Z_OK)) {
-
- page_zip_fail(("page_zip_decompress:"
- " 2 inflate(Z_BLOCK)=%s\n", d_stream.msg));
- goto zlib_error;
- }
-
- index = page_zip_fields_decode(
- page + PAGE_ZIP_START, d_stream.next_out,
- page_is_leaf(page) ? &trx_id_col : NULL);
-
- if (UNIV_UNLIKELY(!index)) {
-
- goto zlib_error;
- }
-
- /* Decompress the user records. */
- page_zip->n_blobs = 0;
- d_stream.next_out = page + PAGE_ZIP_START;
-
- {
- /* Pre-allocate the offsets for rec_get_offsets_reverse(). */
- ulint n = 1 + 1/* node ptr */ + REC_OFFS_HEADER_SIZE
- + dict_index_get_n_fields(index);
-
- offsets = static_cast<ulint*>(
- mem_heap_alloc(heap, n * sizeof(ulint)));
-
- *offsets = n;
- }
-
- /* Decompress the records in heap_no order. */
- if (!page_is_leaf(page)) {
- /* This is a node pointer page. */
- ulint info_bits;
-
- if (UNIV_UNLIKELY
- (!page_zip_decompress_node_ptrs(page_zip, &d_stream,
- recs, n_dense, index,
- offsets, heap))) {
- goto err_exit;
- }
-
- info_bits = mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL
- ? REC_INFO_MIN_REC_FLAG : 0;
-
- if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip, page,
- info_bits))) {
- goto err_exit;
- }
- } else if (UNIV_LIKELY(trx_id_col == ULINT_UNDEFINED)) {
- /* This is a leaf page in a secondary index. */
- if (UNIV_UNLIKELY(!page_zip_decompress_sec(page_zip, &d_stream,
- recs, n_dense,
- index, offsets))) {
- goto err_exit;
- }
-
- if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
- page, 0))) {
-err_exit:
- page_zip_fields_free(index);
- mem_heap_free(heap);
- return(FALSE);
- }
- } else {
- /* This is a leaf page in a clustered index. */
- if (UNIV_UNLIKELY(!page_zip_decompress_clust(page_zip,
- &d_stream, recs,
- n_dense, index,
- trx_id_col,
- offsets, heap))) {
- goto err_exit;
- }
-
- if (UNIV_UNLIKELY(!page_zip_set_extra_bytes(page_zip,
- page, 0))) {
- goto err_exit;
- }
- }
-
- ut_a(page_is_comp(page));
- UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
-
- page_zip_fields_free(index);
- mem_heap_free(heap);
-#ifndef UNIV_HOTBACKUP
- ullint time_diff = ut_time_us(NULL) - usec;
- page_zip_stat[page_zip->ssize - 1].decompressed++;
- page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff;
-
- index_id_t index_id = btr_page_get_index_id(page);
-
- if (srv_cmp_per_index_enabled) {
- mutex_enter(&page_zip_stat_per_index_mutex);
- page_zip_stat_per_index[index_id].decompressed++;
- page_zip_stat_per_index[index_id].decompressed_usec += time_diff;
- mutex_exit(&page_zip_stat_per_index_mutex);
- }
-#endif /* !UNIV_HOTBACKUP */
-
- /* Update the stat counter for LRU policy. */
- buf_LRU_stat_inc_unzip();
-
- MONITOR_INC(MONITOR_PAGE_DECOMPRESS);
-
- return(TRUE);
-}
-
-#ifdef UNIV_ZIP_DEBUG
-/**********************************************************************//**
-Dump a block of memory on the standard error stream. */
-static
-void
-page_zip_hexdump_func(
-/*==================*/
- const char* name, /*!< in: name of the data structure */
- const void* buf, /*!< in: data */
- ulint size) /*!< in: length of the data, in bytes */
-{
- const byte* s = static_cast<const byte*>(buf);
- ulint addr;
- const ulint width = 32; /* bytes per line */
-
- fprintf(stderr, "%s:\n", name);
-
- for (addr = 0; addr < size; addr += width) {
- ulint i;
-
- fprintf(stderr, "%04lx ", (ulong) addr);
-
- i = ut_min(width, size - addr);
-
- while (i--) {
- fprintf(stderr, "%02x", *s++);
- }
-
- putc('\n', stderr);
- }
-}
-
-/** Dump a block of memory on the standard error stream.
-@param buf in: data
-@param size in: length of the data, in bytes */
-#define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size)
-
-/** Flag: make page_zip_validate() compare page headers only */
-UNIV_INTERN ibool page_zip_validate_header_only = FALSE;
-
-/**********************************************************************//**
-Check that the compressed and decompressed pages match.
-@return TRUE if valid, FALSE if not */
-UNIV_INTERN
-ibool
-page_zip_validate_low(
-/*==================*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- const page_t* page, /*!< in: uncompressed page */
- const dict_index_t* index, /*!< in: index of the page, if known */
- ibool sloppy) /*!< in: FALSE=strict,
- TRUE=ignore the MIN_REC_FLAG */
-{
- page_zip_des_t temp_page_zip;
- byte* temp_page_buf;
- page_t* temp_page;
- ibool valid;
-
- if (memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
- FIL_PAGE_LSN - FIL_PAGE_PREV)
- || memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE, 2)
- || memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
- PAGE_DATA - FIL_PAGE_DATA)) {
- page_zip_fail(("page_zip_validate: page header\n"));
- page_zip_hexdump(page_zip, sizeof *page_zip);
- page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
- page_zip_hexdump(page, UNIV_PAGE_SIZE);
- return(FALSE);
- }
-
- ut_a(page_is_comp(page));
-
- if (page_zip_validate_header_only) {
- return(TRUE);
- }
-
- /* page_zip_decompress() expects the uncompressed page to be
- UNIV_PAGE_SIZE aligned. */
- temp_page_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
- temp_page = static_cast<byte*>(ut_align(temp_page_buf, UNIV_PAGE_SIZE));
-
- UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
- temp_page_zip = *page_zip;
- valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE);
- if (!valid) {
- fputs("page_zip_validate(): failed to decompress\n", stderr);
- goto func_exit;
- }
- if (page_zip->n_blobs != temp_page_zip.n_blobs) {
- page_zip_fail(("page_zip_validate: n_blobs: %u!=%u\n",
- page_zip->n_blobs, temp_page_zip.n_blobs));
- valid = FALSE;
- }
-#ifdef UNIV_DEBUG
- if (page_zip->m_start != temp_page_zip.m_start) {
- page_zip_fail(("page_zip_validate: m_start: %u!=%u\n",
- page_zip->m_start, temp_page_zip.m_start));
- valid = FALSE;
- }
-#endif /* UNIV_DEBUG */
- if (page_zip->m_end != temp_page_zip.m_end) {
- page_zip_fail(("page_zip_validate: m_end: %u!=%u\n",
- page_zip->m_end, temp_page_zip.m_end));
- valid = FALSE;
- }
- if (page_zip->m_nonempty != temp_page_zip.m_nonempty) {
- page_zip_fail(("page_zip_validate(): m_nonempty: %u!=%u\n",
- page_zip->m_nonempty,
- temp_page_zip.m_nonempty));
- valid = FALSE;
- }
- if (memcmp(page + PAGE_HEADER, temp_page + PAGE_HEADER,
- UNIV_PAGE_SIZE - PAGE_HEADER - FIL_PAGE_DATA_END)) {
-
- /* In crash recovery, the "minimum record" flag may be
- set incorrectly until the mini-transaction is
- committed. Let us tolerate that difference when we
- are performing a sloppy validation. */
-
- ulint* offsets;
- mem_heap_t* heap;
- const rec_t* rec;
- const rec_t* trec;
- byte info_bits_diff;
- ulint offset
- = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE);
- ut_a(offset >= PAGE_NEW_SUPREMUM);
- offset -= 5/*REC_NEW_INFO_BITS*/;
-
- info_bits_diff = page[offset] ^ temp_page[offset];
-
- if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
- temp_page[offset] = page[offset];
-
- if (!memcmp(page + PAGE_HEADER,
- temp_page + PAGE_HEADER,
- UNIV_PAGE_SIZE - PAGE_HEADER
- - FIL_PAGE_DATA_END)) {
-
- /* Only the minimum record flag
- differed. Let us ignore it. */
- page_zip_fail(("page_zip_validate: "
- "min_rec_flag "
- "(%s"
- "%lu,%lu,0x%02lx)\n",
- sloppy ? "ignored, " : "",
- page_get_space_id(page),
- page_get_page_no(page),
- (ulong) page[offset]));
- valid = sloppy;
- goto func_exit;
- }
- }
-
- /* Compare the pointers in the PAGE_FREE list. */
- rec = page_header_get_ptr(page, PAGE_FREE);
- trec = page_header_get_ptr(temp_page, PAGE_FREE);
-
- while (rec || trec) {
- if (page_offset(rec) != page_offset(trec)) {
- page_zip_fail(("page_zip_validate: "
- "PAGE_FREE list: %u!=%u\n",
- (unsigned) page_offset(rec),
- (unsigned) page_offset(trec)));
- valid = FALSE;
- goto func_exit;
- }
-
- rec = page_rec_get_next_low(rec, TRUE);
- trec = page_rec_get_next_low(trec, TRUE);
- }
-
- /* Compare the records. */
- heap = NULL;
- offsets = NULL;
- rec = page_rec_get_next_low(
- page + PAGE_NEW_INFIMUM, TRUE);
- trec = page_rec_get_next_low(
- temp_page + PAGE_NEW_INFIMUM, TRUE);
-
- do {
- if (page_offset(rec) != page_offset(trec)) {
- page_zip_fail(("page_zip_validate: "
- "record list: 0x%02x!=0x%02x\n",
- (unsigned) page_offset(rec),
- (unsigned) page_offset(trec)));
- valid = FALSE;
- break;
- }
-
- if (index) {
- /* Compare the data. */
- offsets = rec_get_offsets(
- rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (memcmp(rec - rec_offs_extra_size(offsets),
- trec - rec_offs_extra_size(offsets),
- rec_offs_size(offsets))) {
- page_zip_fail(
- ("page_zip_validate: "
- "record content: 0x%02x",
- (unsigned) page_offset(rec)));
- valid = FALSE;
- break;
- }
- }
-
- rec = page_rec_get_next_low(rec, TRUE);
- trec = page_rec_get_next_low(trec, TRUE);
- } while (rec || trec);
-
- if (heap) {
- mem_heap_free(heap);
- }
- }
-
-func_exit:
- if (!valid) {
- page_zip_hexdump(page_zip, sizeof *page_zip);
- page_zip_hexdump(page_zip->data, page_zip_get_size(page_zip));
- page_zip_hexdump(page, UNIV_PAGE_SIZE);
- page_zip_hexdump(temp_page, UNIV_PAGE_SIZE);
- }
- ut_free(temp_page_buf);
- return(valid);
-}
-
-/**********************************************************************//**
-Check that the compressed and decompressed pages match.
-@return TRUE if valid, FALSE if not */
-UNIV_INTERN
-ibool
-page_zip_validate(
-/*==============*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- const page_t* page, /*!< in: uncompressed page */
- const dict_index_t* index) /*!< in: index of the page, if known */
-{
- return(page_zip_validate_low(page_zip, page, index,
- recv_recovery_is_on()));
-}
-#endif /* UNIV_ZIP_DEBUG */
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Assert that the compressed and decompressed page headers match.
-@return TRUE */
-static
-ibool
-page_zip_header_cmp(
-/*================*/
- const page_zip_des_t* page_zip,/*!< in: compressed page */
- const byte* page) /*!< in: uncompressed page */
-{
- ut_ad(!memcmp(page_zip->data + FIL_PAGE_PREV, page + FIL_PAGE_PREV,
- FIL_PAGE_LSN - FIL_PAGE_PREV));
- ut_ad(!memcmp(page_zip->data + FIL_PAGE_TYPE, page + FIL_PAGE_TYPE,
- 2));
- ut_ad(!memcmp(page_zip->data + FIL_PAGE_DATA, page + FIL_PAGE_DATA,
- PAGE_DATA - FIL_PAGE_DATA));
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/**********************************************************************//**
-Write a record on the compressed page that contains externally stored
-columns. The data must already have been written to the uncompressed page.
-@return end of modification log */
-static
-byte*
-page_zip_write_rec_ext(
-/*===================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- const page_t* page, /*!< in: page containing rec */
- const byte* rec, /*!< in: record being written */
- dict_index_t* index, /*!< in: record descriptor */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- ulint create, /*!< in: nonzero=insert, zero=update */
- ulint trx_id_col, /*!< in: position of DB_TRX_ID */
- ulint heap_no, /*!< in: heap number of rec */
- byte* storage, /*!< in: end of dense page directory */
- byte* data) /*!< in: end of modification log */
-{
- const byte* start = rec;
- ulint i;
- ulint len;
- byte* externs = storage;
- ulint n_ext = rec_offs_n_extern(offsets);
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- externs -= (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
- * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW);
-
- /* Note that this will not take into account
- the BLOB columns of rec if create==TRUE. */
- ut_ad(data + rec_offs_data_size(offsets)
- - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
- - n_ext * BTR_EXTERN_FIELD_REF_SIZE
- < externs - BTR_EXTERN_FIELD_REF_SIZE * page_zip->n_blobs);
-
- {
- ulint blob_no = page_zip_get_n_prev_extern(
- page_zip, rec, index);
- byte* ext_end = externs - page_zip->n_blobs
- * BTR_EXTERN_FIELD_REF_SIZE;
- ut_ad(blob_no <= page_zip->n_blobs);
- externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
-
- if (create) {
- page_zip->n_blobs += static_cast<unsigned>(n_ext);
- ASSERT_ZERO_BLOB(ext_end - n_ext
- * BTR_EXTERN_FIELD_REF_SIZE);
- memmove(ext_end - n_ext
- * BTR_EXTERN_FIELD_REF_SIZE,
- ext_end,
- externs - ext_end);
- }
-
- ut_a(blob_no + n_ext <= page_zip->n_blobs);
- }
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- const byte* src;
-
- if (UNIV_UNLIKELY(i == trx_id_col)) {
- ut_ad(!rec_offs_nth_extern(offsets,
- i));
- ut_ad(!rec_offs_nth_extern(offsets,
- i + 1));
- /* Locate trx_id and roll_ptr. */
- src = rec_get_nth_field(rec, offsets,
- i, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- ut_ad(src + DATA_TRX_ID_LEN
- == rec_get_nth_field(
- rec, offsets,
- i + 1, &len));
- ut_ad(len == DATA_ROLL_PTR_LEN);
-
- /* Log the preceding fields. */
- ASSERT_ZERO(data, src - start);
- memcpy(data, start, src - start);
- data += src - start;
- start = src + (DATA_TRX_ID_LEN
- + DATA_ROLL_PTR_LEN);
-
- /* Store trx_id and roll_ptr. */
- memcpy(storage - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
- * (heap_no - 1),
- src, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- i++; /* skip also roll_ptr */
- } else if (rec_offs_nth_extern(offsets, i)) {
- src = rec_get_nth_field(rec, offsets,
- i, &len);
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(len
- >= BTR_EXTERN_FIELD_REF_SIZE);
- src += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- ASSERT_ZERO(data, src - start);
- memcpy(data, start, src - start);
- data += src - start;
- start = src + BTR_EXTERN_FIELD_REF_SIZE;
-
- /* Store the BLOB pointer. */
- externs -= BTR_EXTERN_FIELD_REF_SIZE;
- ut_ad(data < externs);
- memcpy(externs, src, BTR_EXTERN_FIELD_REF_SIZE);
- }
- }
-
- /* Log the last bytes of the record. */
- len = rec_offs_data_size(offsets) - (start - rec);
-
- ASSERT_ZERO(data, len);
- memcpy(data, start, len);
- data += len;
-
- return(data);
-}
-
-/**********************************************************************//**
-Write an entire record on the compressed page. The data must already
-have been written to the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_write_rec(
-/*===============*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record being written */
- dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint create) /*!< in: nonzero=insert, zero=update */
-{
- const page_t* page;
- byte* data;
- byte* storage;
- ulint heap_no;
- byte* slot;
-
- ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
- ut_ad(page_zip_simple_validate(page_zip));
- ut_ad(page_zip_get_size(page_zip)
- > PAGE_DATA + page_zip_dir_size(page_zip));
- ut_ad(rec_offs_comp(offsets));
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- ut_ad(page_zip->m_start >= PAGE_DATA);
-
- page = page_align(rec);
-
- ut_ad(page_zip_header_cmp(page_zip, page));
- ut_ad(page_simple_validate_new((page_t*) page));
-
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- slot = page_zip_dir_find(page_zip, page_offset(rec));
- ut_a(slot);
- /* Copy the delete mark. */
- if (rec_get_deleted_flag(rec, TRUE)) {
- *slot |= PAGE_ZIP_DIR_SLOT_DEL >> 8;
- } else {
- *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
- }
-
- ut_ad(rec_get_start((rec_t*) rec, offsets) >= page + PAGE_ZIP_START);
- ut_ad(rec_get_end((rec_t*) rec, offsets) <= page + UNIV_PAGE_SIZE
- - PAGE_DIR - PAGE_DIR_SLOT_SIZE
- * page_dir_get_n_slots(page));
-
- heap_no = rec_get_heap_no_new(rec);
- ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW); /* not infimum or supremum */
- ut_ad(heap_no < page_dir_get_n_heap(page));
-
- /* Append to the modification log. */
- data = page_zip->data + page_zip->m_end;
- ut_ad(!*data);
-
- /* Identify the record by writing its heap number - 1.
- 0 is reserved to indicate the end of the modification log. */
-
- if (UNIV_UNLIKELY(heap_no - 1 >= 64)) {
- *data++ = (byte) (0x80 | (heap_no - 1) >> 7);
- ut_ad(!*data);
- }
- *data++ = (byte) ((heap_no - 1) << 1);
- ut_ad(!*data);
-
- {
- const byte* start = rec - rec_offs_extra_size(offsets);
- const byte* b = rec - REC_N_NEW_EXTRA_BYTES;
-
- /* Write the extra bytes backwards, so that
- rec_offs_extra_size() can be easily computed in
- page_zip_apply_log() by invoking
- rec_get_offsets_reverse(). */
-
- while (b != start) {
- *data++ = *--b;
- ut_ad(!*data);
- }
- }
-
- /* Write the data bytes. Store the uncompressed bytes separately. */
- storage = page_zip_dir_start(page_zip);
-
- if (page_is_leaf(page)) {
- ulint len;
-
- if (dict_index_is_clust(index)) {
- ulint trx_id_col;
-
- trx_id_col = dict_index_get_sys_col_pos(index,
- DATA_TRX_ID);
- ut_ad(trx_id_col != ULINT_UNDEFINED);
-
- /* Store separately trx_id, roll_ptr and
- the BTR_EXTERN_FIELD_REF of each BLOB column. */
- if (rec_offs_any_extern(offsets)) {
- data = page_zip_write_rec_ext(
- page_zip, page,
- rec, index, offsets, create,
- trx_id_col, heap_no, storage, data);
- } else {
- /* Locate trx_id and roll_ptr. */
- const byte* src
- = rec_get_nth_field(rec, offsets,
- trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- ut_ad(src + DATA_TRX_ID_LEN
- == rec_get_nth_field(
- rec, offsets,
- trx_id_col + 1, &len));
- ut_ad(len == DATA_ROLL_PTR_LEN);
-
- /* Log the preceding fields. */
- ASSERT_ZERO(data, src - rec);
- memcpy(data, rec, src - rec);
- data += src - rec;
-
- /* Store trx_id and roll_ptr. */
- memcpy(storage
- - (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
- * (heap_no - 1),
- src,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- src += DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
-
- /* Log the last bytes of the record. */
- len = rec_offs_data_size(offsets)
- - (src - rec);
-
- ASSERT_ZERO(data, len);
- memcpy(data, src, len);
- data += len;
- }
- } else {
- /* Leaf page of a secondary index:
- no externally stored columns */
- ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID)
- == ULINT_UNDEFINED);
- ut_ad(!rec_offs_any_extern(offsets));
-
- /* Log the entire record. */
- len = rec_offs_data_size(offsets);
-
- ASSERT_ZERO(data, len);
- memcpy(data, rec, len);
- data += len;
- }
- } else {
- /* This is a node pointer page. */
- ulint len;
-
- /* Non-leaf nodes should not have any externally
- stored columns. */
- ut_ad(!rec_offs_any_extern(offsets));
-
- /* Copy the data bytes, except node_ptr. */
- len = rec_offs_data_size(offsets) - REC_NODE_PTR_SIZE;
- ut_ad(data + len < storage - REC_NODE_PTR_SIZE
- * (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW));
- ASSERT_ZERO(data, len);
- memcpy(data, rec, len);
- data += len;
-
- /* Copy the node pointer to the uncompressed area. */
- memcpy(storage - REC_NODE_PTR_SIZE
- * (heap_no - 1),
- rec + len,
- REC_NODE_PTR_SIZE);
- }
-
- ut_a(!*data);
- ut_ad((ulint) (data - page_zip->data) < page_zip_get_size(page_zip));
- page_zip->m_end = data - page_zip->data;
- page_zip->m_nonempty = TRUE;
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page_align(rec), index));
-#endif /* UNIV_ZIP_DEBUG */
-}
-
-/***********************************************************//**
-Parses a log record of writing a BLOB pointer of a record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_write_blob_ptr(
-/*==========================*/
- byte* ptr, /*!< in: redo log buffer */
- byte* end_ptr,/*!< in: redo log buffer end */
- page_t* page, /*!< in/out: uncompressed page */
- page_zip_des_t* page_zip)/*!< in/out: compressed page */
-{
- ulint offset;
- ulint z_offset;
-
- ut_ad(!page == !page_zip);
-
- if (UNIV_UNLIKELY
- (end_ptr < ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE))) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- z_offset = mach_read_from_2(ptr + 2);
-
- if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
- || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
- || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
-corrupt:
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (page) {
- if (UNIV_UNLIKELY(!page_zip)
- || UNIV_UNLIKELY(!page_is_leaf(page))) {
-
- goto corrupt;
- }
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, NULL));
-#endif /* UNIV_ZIP_DEBUG */
-
- memcpy(page + offset,
- ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
- memcpy(page_zip->data + z_offset,
- ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, NULL));
-#endif /* UNIV_ZIP_DEBUG */
- }
-
- return(ptr + (2 + 2 + BTR_EXTERN_FIELD_REF_SIZE));
-}
-
-/**********************************************************************//**
-Write a BLOB pointer of a record on the leaf page of a clustered index.
-The information must already have been updated on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_write_blob_ptr(
-/*====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in/out: record whose data is being
- written */
- dict_index_t* index, /*!< in: index of the page */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint n, /*!< in: column index */
- mtr_t* mtr) /*!< in: mini-transaction handle,
- or NULL if no logging is needed */
-{
- const byte* field;
- byte* externs;
- const page_t* page = page_align(rec);
- ulint blob_no;
- ulint len;
-
- ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
- ut_ad(page_simple_validate_new((page_t*) page));
- ut_ad(page_zip_simple_validate(page_zip));
- ut_ad(page_zip_get_size(page_zip)
- > PAGE_DATA + page_zip_dir_size(page_zip));
- ut_ad(rec_offs_comp(offsets));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(rec_offs_any_extern(offsets));
- ut_ad(rec_offs_nth_extern(offsets, n));
-
- ut_ad(page_zip->m_start >= PAGE_DATA);
- ut_ad(page_zip_header_cmp(page_zip, page));
-
- ut_ad(page_is_leaf(page));
- ut_ad(dict_index_is_clust(index));
-
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- blob_no = page_zip_get_n_prev_extern(page_zip, rec, index)
- + rec_get_n_extern_new(rec, index, n);
- ut_a(blob_no < page_zip->n_blobs);
-
- externs = page_zip->data + page_zip_get_size(page_zip)
- - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
- * (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- field = rec_get_nth_field(rec, offsets, n, &len);
-
- externs -= (blob_no + 1) * BTR_EXTERN_FIELD_REF_SIZE;
- field += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-
- if (mtr) {
-#ifndef UNIV_HOTBACKUP
- byte* log_ptr = mlog_open(
- mtr, 11 + 2 + 2 + BTR_EXTERN_FIELD_REF_SIZE);
- if (UNIV_UNLIKELY(!log_ptr)) {
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(
- (byte*) field, MLOG_ZIP_WRITE_BLOB_PTR, log_ptr, mtr);
- mach_write_to_2(log_ptr, page_offset(field));
- log_ptr += 2;
- mach_write_to_2(log_ptr, externs - page_zip->data);
- log_ptr += 2;
- memcpy(log_ptr, externs, BTR_EXTERN_FIELD_REF_SIZE);
- log_ptr += BTR_EXTERN_FIELD_REF_SIZE;
- mlog_close(mtr, log_ptr);
-#endif /* !UNIV_HOTBACKUP */
- }
-}
-
-/***********************************************************//**
-Parses a log record of writing the node pointer of a record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_write_node_ptr(
-/*==========================*/
- byte* ptr, /*!< in: redo log buffer */
- byte* end_ptr,/*!< in: redo log buffer end */
- page_t* page, /*!< in/out: uncompressed page */
- page_zip_des_t* page_zip)/*!< in/out: compressed page */
-{
- ulint offset;
- ulint z_offset;
-
- ut_ad(!page == !page_zip);
-
- if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) {
-
- return(NULL);
- }
-
- offset = mach_read_from_2(ptr);
- z_offset = mach_read_from_2(ptr + 2);
-
- if (UNIV_UNLIKELY(offset < PAGE_ZIP_START)
- || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)
- || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) {
-corrupt:
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (page) {
- byte* storage_end;
- byte* field;
- byte* storage;
- ulint heap_no;
-
- if (UNIV_UNLIKELY(!page_zip)
- || UNIV_UNLIKELY(page_is_leaf(page))) {
-
- goto corrupt;
- }
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, NULL));
-#endif /* UNIV_ZIP_DEBUG */
-
- field = page + offset;
- storage = page_zip->data + z_offset;
-
- storage_end = page_zip_dir_start(page_zip);
-
- heap_no = 1 + (storage_end - storage) / REC_NODE_PTR_SIZE;
-
- if (UNIV_UNLIKELY((storage_end - storage) % REC_NODE_PTR_SIZE)
- || UNIV_UNLIKELY(heap_no < PAGE_HEAP_NO_USER_LOW)
- || UNIV_UNLIKELY(heap_no >= page_dir_get_n_heap(page))) {
-
- goto corrupt;
- }
-
- memcpy(field, ptr + 4, REC_NODE_PTR_SIZE);
- memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, NULL));
-#endif /* UNIV_ZIP_DEBUG */
- }
-
- return(ptr + (2 + 2 + REC_NODE_PTR_SIZE));
-}
-
-/**********************************************************************//**
-Write the node pointer of a record on a non-leaf compressed page. */
-UNIV_INTERN
-void
-page_zip_write_node_ptr(
-/*====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in/out: record */
- ulint size, /*!< in: data size of rec */
- ulint ptr, /*!< in: node pointer */
- mtr_t* mtr) /*!< in: mini-transaction, or NULL */
-{
- byte* field;
- byte* storage;
-#ifdef UNIV_DEBUG
- page_t* page = page_align(rec);
-#endif /* UNIV_DEBUG */
-
- ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
- ut_ad(page_simple_validate_new(page));
- ut_ad(page_zip_simple_validate(page_zip));
- ut_ad(page_zip_get_size(page_zip)
- > PAGE_DATA + page_zip_dir_size(page_zip));
- ut_ad(page_rec_is_comp(rec));
-
- ut_ad(page_zip->m_start >= PAGE_DATA);
- ut_ad(page_zip_header_cmp(page_zip, page));
-
- ut_ad(!page_is_leaf(page));
-
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- UNIV_MEM_ASSERT_RW(rec, size);
-
- storage = page_zip_dir_start(page_zip)
- - (rec_get_heap_no_new(rec) - 1) * REC_NODE_PTR_SIZE;
- field = rec + size - REC_NODE_PTR_SIZE;
-
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
- ut_a(!memcmp(storage, field, REC_NODE_PTR_SIZE));
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#if REC_NODE_PTR_SIZE != 4
-# error "REC_NODE_PTR_SIZE != 4"
-#endif
- mach_write_to_4(field, ptr);
- memcpy(storage, field, REC_NODE_PTR_SIZE);
-
- if (mtr) {
-#ifndef UNIV_HOTBACKUP
- byte* log_ptr = mlog_open(mtr,
- 11 + 2 + 2 + REC_NODE_PTR_SIZE);
- if (UNIV_UNLIKELY(!log_ptr)) {
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(
- field, MLOG_ZIP_WRITE_NODE_PTR, log_ptr, mtr);
- mach_write_to_2(log_ptr, page_offset(field));
- log_ptr += 2;
- mach_write_to_2(log_ptr, storage - page_zip->data);
- log_ptr += 2;
- memcpy(log_ptr, field, REC_NODE_PTR_SIZE);
- log_ptr += REC_NODE_PTR_SIZE;
- mlog_close(mtr, log_ptr);
-#endif /* !UNIV_HOTBACKUP */
- }
-}
-
-/**********************************************************************//**
-Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
-UNIV_INTERN
-void
-page_zip_write_trx_id_and_roll_ptr(
-/*===============================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in/out: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
- trx_id_t trx_id, /*!< in: transaction identifier */
- roll_ptr_t roll_ptr)/*!< in: roll_ptr */
-{
- byte* field;
- byte* storage;
-#ifdef UNIV_DEBUG
- page_t* page = page_align(rec);
-#endif /* UNIV_DEBUG */
- ulint len;
-
- ut_ad(PAGE_ZIP_MATCH(rec, page_zip));
-
- ut_ad(page_simple_validate_new(page));
- ut_ad(page_zip_simple_validate(page_zip));
- ut_ad(page_zip_get_size(page_zip)
- > PAGE_DATA + page_zip_dir_size(page_zip));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(rec_offs_comp(offsets));
-
- ut_ad(page_zip->m_start >= PAGE_DATA);
- ut_ad(page_zip_header_cmp(page_zip, page));
-
- ut_ad(page_is_leaf(page));
-
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
- storage = page_zip_dir_start(page_zip)
- - (rec_get_heap_no_new(rec) - 1)
- * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
- field = rec_get_nth_field(rec, offsets, trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- ut_ad(field + DATA_TRX_ID_LEN
- == rec_get_nth_field(rec, offsets, trx_id_col + 1, &len));
- ut_ad(len == DATA_ROLL_PTR_LEN);
-#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG
- ut_a(!memcmp(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
-#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
- mach_write_to_6(field, trx_id);
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
- mach_write_to_7(field + DATA_TRX_ID_LEN, roll_ptr);
- memcpy(storage, field, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-}
-
-/**********************************************************************//**
-Clear an area on the uncompressed and compressed page.
-Do not clear the data payload, as that would grow the modification log. */
-static
-void
-page_zip_clear_rec(
-/*===============*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- byte* rec, /*!< in: record to clear */
- const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
-{
- ulint heap_no;
- page_t* page = page_align(rec);
- byte* storage;
- byte* field;
- ulint len;
- /* page_zip_validate() would fail here if a record
- containing externally stored columns is being deleted. */
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(!page_zip_dir_find(page_zip, page_offset(rec)));
- ut_ad(page_zip_dir_find_free(page_zip, page_offset(rec)));
- ut_ad(page_zip_header_cmp(page_zip, page));
-
- heap_no = rec_get_heap_no_new(rec);
- ut_ad(heap_no >= PAGE_HEAP_NO_USER_LOW);
-
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- if (!page_is_leaf(page)) {
- /* Clear node_ptr. On the compressed page,
- there is an array of node_ptr immediately before the
- dense page directory, at the very end of the page. */
- storage = page_zip_dir_start(page_zip);
- ut_ad(dict_index_get_n_unique_in_tree(index) ==
- rec_offs_n_fields(offsets) - 1);
- field = rec_get_nth_field(rec, offsets,
- rec_offs_n_fields(offsets) - 1,
- &len);
- ut_ad(len == REC_NODE_PTR_SIZE);
-
- ut_ad(!rec_offs_any_extern(offsets));
- memset(field, 0, REC_NODE_PTR_SIZE);
- memset(storage - (heap_no - 1) * REC_NODE_PTR_SIZE,
- 0, REC_NODE_PTR_SIZE);
- } else if (dict_index_is_clust(index)) {
- /* Clear trx_id and roll_ptr. On the compressed page,
- there is an array of these fields immediately before the
- dense page directory, at the very end of the page. */
- const ulint trx_id_pos
- = dict_col_get_clust_pos(
- dict_table_get_sys_col(
- index->table, DATA_TRX_ID), index);
- storage = page_zip_dir_start(page_zip);
- field = rec_get_nth_field(rec, offsets, trx_id_pos, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
-
- memset(field, 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- memset(storage - (heap_no - 1)
- * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
- 0, DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- if (rec_offs_any_extern(offsets)) {
- ulint i;
-
- for (i = rec_offs_n_fields(offsets); i--; ) {
- /* Clear all BLOB pointers in order to make
- page_zip_validate() pass. */
- if (rec_offs_nth_extern(offsets, i)) {
- field = rec_get_nth_field(
- rec, offsets, i, &len);
- ut_ad(len
- == BTR_EXTERN_FIELD_REF_SIZE);
- memset(field + len
- - BTR_EXTERN_FIELD_REF_SIZE,
- 0, BTR_EXTERN_FIELD_REF_SIZE);
- }
- }
- }
- } else {
- ut_ad(!rec_offs_any_extern(offsets));
- }
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
-}
-
-/**********************************************************************//**
-Write the "deleted" flag of a record on a compressed page. The flag must
-already have been written on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_rec_set_deleted(
-/*=====================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record on the uncompressed page */
- ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */
-{
- byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
- ut_a(slot);
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- if (flag) {
- *slot |= (PAGE_ZIP_DIR_SLOT_DEL >> 8);
- } else {
- *slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
- }
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page_align(rec), NULL));
-#endif /* UNIV_ZIP_DEBUG */
-}
-
-/**********************************************************************//**
-Write the "owned" flag of a record on a compressed page. The n_owned field
-must already have been written on the uncompressed page. */
-UNIV_INTERN
-void
-page_zip_rec_set_owned(
-/*===================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* rec, /*!< in: record on the uncompressed page */
- ulint flag) /*!< in: the owned flag (nonzero=TRUE) */
-{
- byte* slot = page_zip_dir_find(page_zip, page_offset(rec));
- ut_a(slot);
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- if (flag) {
- *slot |= (PAGE_ZIP_DIR_SLOT_OWNED >> 8);
- } else {
- *slot &= ~(PAGE_ZIP_DIR_SLOT_OWNED >> 8);
- }
-}
-
-/**********************************************************************//**
-Insert a record to the dense page directory. */
-UNIV_INTERN
-void
-page_zip_dir_insert(
-/*================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- const byte* prev_rec,/*!< in: record after which to insert */
- const byte* free_rec,/*!< in: record from which rec was
- allocated, or NULL */
- byte* rec) /*!< in: record to insert */
-{
- ulint n_dense;
- byte* slot_rec;
- byte* slot_free;
-
- ut_ad(prev_rec != rec);
- ut_ad(page_rec_get_next((rec_t*) prev_rec) == rec);
- ut_ad(page_zip_simple_validate(page_zip));
-
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
- if (page_rec_is_infimum(prev_rec)) {
- /* Use the first slot. */
- slot_rec = page_zip->data + page_zip_get_size(page_zip);
- } else {
- byte* end = page_zip->data + page_zip_get_size(page_zip);
- byte* start = end - page_zip_dir_user_size(page_zip);
-
- if (UNIV_LIKELY(!free_rec)) {
- /* PAGE_N_RECS was already incremented
- in page_cur_insert_rec_zip(), but the
- dense directory slot at that position
- contains garbage. Skip it. */
- start += PAGE_ZIP_DIR_SLOT_SIZE;
- }
-
- slot_rec = page_zip_dir_find_low(start, end,
- page_offset(prev_rec));
- ut_a(slot_rec);
- }
-
- /* Read the old n_dense (n_heap may have been incremented). */
- n_dense = page_dir_get_n_heap(page_zip->data)
- - (PAGE_HEAP_NO_USER_LOW + 1);
-
- if (UNIV_LIKELY_NULL(free_rec)) {
- /* The record was allocated from the free list.
- Shift the dense directory only up to that slot.
- Note that in this case, n_dense is actually
- off by one, because page_cur_insert_rec_zip()
- did not increment n_heap. */
- ut_ad(rec_get_heap_no_new(rec) < n_dense + 1
- + PAGE_HEAP_NO_USER_LOW);
- ut_ad(rec >= free_rec);
- slot_free = page_zip_dir_find(page_zip, page_offset(free_rec));
- ut_ad(slot_free);
- slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
- } else {
- /* The record was allocated from the heap.
- Shift the entire dense directory. */
- ut_ad(rec_get_heap_no_new(rec) == n_dense
- + PAGE_HEAP_NO_USER_LOW);
-
- /* Shift to the end of the dense page directory. */
- slot_free = page_zip->data + page_zip_get_size(page_zip)
- - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
- }
-
- /* Shift the dense directory to allocate place for rec. */
- memmove(slot_free - PAGE_ZIP_DIR_SLOT_SIZE, slot_free,
- slot_rec - slot_free);
-
- /* Write the entry for the inserted record.
- The "owned" and "deleted" flags must be zero. */
- mach_write_to_2(slot_rec - PAGE_ZIP_DIR_SLOT_SIZE, page_offset(rec));
-}
-
-/**********************************************************************//**
-Shift the dense page directory and the array of BLOB pointers
-when a record is deleted. */
-UNIV_INTERN
-void
-page_zip_dir_delete(
-/*================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- byte* rec, /*!< in: deleted record */
- const dict_index_t* index, /*!< in: index of rec */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
- const byte* free) /*!< in: previous start of
- the free list */
-{
- byte* slot_rec;
- byte* slot_free;
- ulint n_ext;
- page_t* page = page_align(rec);
-
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(rec_offs_comp(offsets));
-
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
- UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
- UNIV_MEM_ASSERT_RW(rec - rec_offs_extra_size(offsets),
- rec_offs_extra_size(offsets));
-
- slot_rec = page_zip_dir_find(page_zip, page_offset(rec));
-
- ut_a(slot_rec);
-
- /* This could not be done before page_zip_dir_find(). */
- page_header_set_field(page, page_zip, PAGE_N_RECS,
- (ulint)(page_get_n_recs(page) - 1));
-
- if (UNIV_UNLIKELY(!free)) {
- /* Make the last slot the start of the free list. */
- slot_free = page_zip->data + page_zip_get_size(page_zip)
- - PAGE_ZIP_DIR_SLOT_SIZE
- * (page_dir_get_n_heap(page_zip->data)
- - PAGE_HEAP_NO_USER_LOW);
- } else {
- slot_free = page_zip_dir_find_free(page_zip,
- page_offset(free));
- ut_a(slot_free < slot_rec);
- /* Grow the free list by one slot by moving the start. */
- slot_free += PAGE_ZIP_DIR_SLOT_SIZE;
- }
-
- if (UNIV_LIKELY(slot_rec > slot_free)) {
- memmove(slot_free + PAGE_ZIP_DIR_SLOT_SIZE,
- slot_free,
- slot_rec - slot_free);
- }
-
- /* Write the entry for the deleted record.
- The "owned" and "deleted" flags will be cleared. */
- mach_write_to_2(slot_free, page_offset(rec));
-
- if (!page_is_leaf(page) || !dict_index_is_clust(index)) {
- ut_ad(!rec_offs_any_extern(offsets));
- goto skip_blobs;
- }
-
- n_ext = rec_offs_n_extern(offsets);
- if (UNIV_UNLIKELY(n_ext)) {
- /* Shift and zero fill the array of BLOB pointers. */
- ulint blob_no;
- byte* externs;
- byte* ext_end;
-
- blob_no = page_zip_get_n_prev_extern(page_zip, rec, index);
- ut_a(blob_no + n_ext <= page_zip->n_blobs);
-
- externs = page_zip->data + page_zip_get_size(page_zip)
- - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW)
- * (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
-
- ext_end = externs - page_zip->n_blobs
- * BTR_EXTERN_FIELD_REF_SIZE;
- externs -= blob_no * BTR_EXTERN_FIELD_REF_SIZE;
-
- page_zip->n_blobs -= static_cast<unsigned>(n_ext);
- /* Shift and zero fill the array. */
- memmove(ext_end + n_ext * BTR_EXTERN_FIELD_REF_SIZE, ext_end,
- (page_zip->n_blobs - blob_no)
- * BTR_EXTERN_FIELD_REF_SIZE);
- memset(ext_end, 0, n_ext * BTR_EXTERN_FIELD_REF_SIZE);
- }
-
-skip_blobs:
- /* The compression algorithm expects info_bits and n_owned
- to be 0 for deleted records. */
- rec[-REC_N_NEW_EXTRA_BYTES] = 0; /* info_bits and n_owned */
-
- page_zip_clear_rec(page_zip, rec, index, offsets);
-}
-
-/**********************************************************************//**
-Add a slot to the dense page directory. */
-UNIV_INTERN
-void
-page_zip_dir_add_slot(
-/*==================*/
- page_zip_des_t* page_zip, /*!< in/out: compressed page */
- ulint is_clustered) /*!< in: nonzero for clustered index,
- zero for others */
-{
- ulint n_dense;
- byte* dir;
- byte* stored;
-
- ut_ad(page_is_comp(page_zip->data));
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
- /* Read the old n_dense (n_heap has already been incremented). */
- n_dense = page_dir_get_n_heap(page_zip->data)
- - (PAGE_HEAP_NO_USER_LOW + 1);
-
- dir = page_zip->data + page_zip_get_size(page_zip)
- - PAGE_ZIP_DIR_SLOT_SIZE * n_dense;
-
- if (!page_is_leaf(page_zip->data)) {
- ut_ad(!page_zip->n_blobs);
- stored = dir - n_dense * REC_NODE_PTR_SIZE;
- } else if (is_clustered) {
- /* Move the BLOB pointer array backwards to make space for the
- roll_ptr and trx_id columns and the dense directory slot. */
- byte* externs;
-
- stored = dir - n_dense
- * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- externs = stored
- - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
- ASSERT_ZERO(externs
- - (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
- PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE
- + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN),
- externs, stored - externs);
- } else {
- stored = dir
- - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE;
- ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE,
- PAGE_ZIP_DIR_SLOT_SIZE);
- }
-
- /* Move the uncompressed area backwards to make space
- for one directory slot. */
- memmove(stored - PAGE_ZIP_DIR_SLOT_SIZE, stored, dir - stored);
-}
-
-/***********************************************************//**
-Parses a log record of writing to the header of a page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_write_header(
-/*========================*/
- byte* ptr, /*!< in: redo log buffer */
- byte* end_ptr,/*!< in: redo log buffer end */
- page_t* page, /*!< in/out: uncompressed page */
- page_zip_des_t* page_zip)/*!< in/out: compressed page */
-{
- ulint offset;
- ulint len;
-
- ut_ad(ptr && end_ptr);
- ut_ad(!page == !page_zip);
-
- if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) {
-
- return(NULL);
- }
-
- offset = (ulint) *ptr++;
- len = (ulint) *ptr++;
-
- if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) {
-corrupt:
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- if (UNIV_UNLIKELY(end_ptr < ptr + len)) {
-
- return(NULL);
- }
-
- if (page) {
- if (UNIV_UNLIKELY(!page_zip)) {
-
- goto corrupt;
- }
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, NULL));
-#endif /* UNIV_ZIP_DEBUG */
-
- memcpy(page + offset, ptr, len);
- memcpy(page_zip->data + offset, ptr, len);
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, NULL));
-#endif /* UNIV_ZIP_DEBUG */
- }
-
- return(ptr + len);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Write a log record of writing to the uncompressed header portion of a page. */
-UNIV_INTERN
-void
-page_zip_write_header_log(
-/*======================*/
- const byte* data, /*!< in: data on the uncompressed page */
- ulint length, /*!< in: length of the data */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- byte* log_ptr = mlog_open(mtr, 11 + 1 + 1);
- ulint offset = page_offset(data);
-
- ut_ad(offset < PAGE_DATA);
- ut_ad(offset + length < PAGE_DATA);
-#if PAGE_DATA > 255
-# error "PAGE_DATA > 255"
-#endif
- ut_ad(length < 256);
-
- /* If no logging is requested, we may return now */
- if (UNIV_UNLIKELY(!log_ptr)) {
-
- return;
- }
-
- log_ptr = mlog_write_initial_log_record_fast(
- (byte*) data, MLOG_ZIP_WRITE_HEADER, log_ptr, mtr);
- *log_ptr++ = (byte) offset;
- *log_ptr++ = (byte) length;
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(mtr, data, length);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Reorganize and compress a page. This is a low-level operation for
-compressed pages, to be used when page_zip_compress() fails.
-On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
-The function btr_page_reorganize() should be preferred whenever possible.
-IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
-non-clustered index, the caller must update the insert buffer free
-bits in the same mini-transaction in such a way that the modification
-will be redo-logged.
-@return TRUE on success, FALSE on failure; page_zip will be left
-intact on failure, but page will be overwritten. */
-UNIV_INTERN
-ibool
-page_zip_reorganize(
-/*================*/
- buf_block_t* block, /*!< in/out: page with compressed page;
- on the compressed page, in: size;
- out: data, n_blobs,
- m_start, m_end, m_nonempty */
- dict_index_t* index, /*!< in: index of the B-tree node */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
-#ifndef UNIV_HOTBACKUP
- buf_pool_t* buf_pool = buf_pool_from_block(block);
-#endif /* !UNIV_HOTBACKUP */
- page_zip_des_t* page_zip = buf_block_get_page_zip(block);
- page_t* page = buf_block_get_frame(block);
- buf_block_t* temp_block;
- page_t* temp_page;
- ulint log_mode;
-
- ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
- ut_ad(page_is_comp(page));
- ut_ad(!dict_index_is_ibuf(index));
- /* Note that page_zip_validate(page_zip, page, index) may fail here. */
- UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
- UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
-
- /* Disable logging */
- log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);
-
-#ifndef UNIV_HOTBACKUP
- temp_block = buf_block_alloc(buf_pool);
- btr_search_drop_page_hash_index(block);
- block->check_index_page_at_flush = TRUE;
-#else /* !UNIV_HOTBACKUP */
- ut_ad(block == back_block1);
- temp_block = back_block2;
-#endif /* !UNIV_HOTBACKUP */
- temp_page = temp_block->frame;
-
- /* Copy the old page to temporary space */
- buf_frame_copy(temp_page, page);
-
- btr_blob_dbg_remove(page, index, "zip_reorg");
-
- /* Recreate the page: note that global data on page (possible
- segment headers, next page-field, etc.) is preserved intact */
-
- page_create(block, mtr, TRUE);
-
- /* Copy the records from the temporary space to the recreated page;
- do not copy the lock bits yet */
-
- page_copy_rec_list_end_no_locks(block, temp_block,
- page_get_infimum_rec(temp_page),
- index, mtr);
-
- if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) {
- /* Copy max trx id to recreated page */
- trx_id_t max_trx_id = page_get_max_trx_id(temp_page);
- page_set_max_trx_id(block, NULL, max_trx_id, NULL);
- ut_ad(max_trx_id != 0);
- }
-
- /* Restore logging. */
- mtr_set_log_mode(mtr, log_mode);
-
- if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) {
-
-#ifndef UNIV_HOTBACKUP
- buf_block_free(temp_block);
-#endif /* !UNIV_HOTBACKUP */
- return(FALSE);
- }
-
- lock_move_reorganize_page(block, temp_block);
-
-#ifndef UNIV_HOTBACKUP
- buf_block_free(temp_block);
-#endif /* !UNIV_HOTBACKUP */
- return(TRUE);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Copy the records of a page byte for byte. Do not copy the page header
-or trailer, except those B-tree header fields that are directly
-related to the storage of records. Also copy PAGE_MAX_TRX_ID.
-NOTE: The caller must update the lock table and the adaptive hash index. */
-UNIV_INTERN
-void
-page_zip_copy_recs(
-/*===============*/
- page_zip_des_t* page_zip, /*!< out: copy of src_zip
- (n_blobs, m_start, m_end,
- m_nonempty, data[0..size-1]) */
- page_t* page, /*!< out: copy of src */
- const page_zip_des_t* src_zip, /*!< in: compressed page */
- const page_t* src, /*!< in: page */
- dict_index_t* index, /*!< in: index of the B-tree */
- mtr_t* mtr) /*!< in: mini-transaction */
-{
- ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
- ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX));
- ut_ad(!dict_index_is_ibuf(index));
-#ifdef UNIV_ZIP_DEBUG
- /* The B-tree operations that call this function may set
- FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
- mismatch. A strict page_zip_validate() will be executed later
- during the B-tree operations. */
- ut_a(page_zip_validate_low(src_zip, src, index, TRUE));
-#endif /* UNIV_ZIP_DEBUG */
- ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
- if (UNIV_UNLIKELY(src_zip->n_blobs)) {
- ut_a(page_is_leaf(src));
- ut_a(dict_index_is_clust(index));
- }
-
- /* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary
- indexes. It does not matter on other pages. */
- ut_a(dict_index_is_clust(index) || !page_is_leaf(src)
- || page_get_max_trx_id(src));
-
- UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE);
- UNIV_MEM_ASSERT_W(page_zip->data, page_zip_get_size(page_zip));
- UNIV_MEM_ASSERT_RW(src, UNIV_PAGE_SIZE);
- UNIV_MEM_ASSERT_RW(src_zip->data, page_zip_get_size(page_zip));
-
- /* Copy those B-tree page header fields that are related to
- the records stored in the page. Also copy the field
- PAGE_MAX_TRX_ID. Skip the rest of the page header and
- trailer. On the compressed page, there is no trailer. */
-#if PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END
-# error "PAGE_MAX_TRX_ID + 8 != PAGE_HEADER_PRIV_END"
-#endif
- memcpy(PAGE_HEADER + page, PAGE_HEADER + src,
- PAGE_HEADER_PRIV_END);
- memcpy(PAGE_DATA + page, PAGE_DATA + src,
- UNIV_PAGE_SIZE - PAGE_DATA - FIL_PAGE_DATA_END);
- memcpy(PAGE_HEADER + page_zip->data, PAGE_HEADER + src_zip->data,
- PAGE_HEADER_PRIV_END);
- memcpy(PAGE_DATA + page_zip->data, PAGE_DATA + src_zip->data,
- page_zip_get_size(page_zip) - PAGE_DATA);
-
- /* Copy all fields of src_zip to page_zip, except the pointer
- to the compressed data page. */
- {
- page_zip_t* data = page_zip->data;
- memcpy(page_zip, src_zip, sizeof *page_zip);
- page_zip->data = data;
- }
- ut_ad(page_zip_get_trailer_len(page_zip, dict_index_is_clust(index))
- + page_zip->m_end < page_zip_get_size(page_zip));
-
- if (!page_is_leaf(src)
- && UNIV_UNLIKELY(mach_read_from_4(src + FIL_PAGE_PREV) == FIL_NULL)
- && UNIV_LIKELY(mach_read_from_4(page
- + FIL_PAGE_PREV) != FIL_NULL)) {
- /* Clear the REC_INFO_MIN_REC_FLAG of the first user record. */
- ulint offs = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
- TRUE);
- if (UNIV_LIKELY(offs != PAGE_NEW_SUPREMUM)) {
- rec_t* rec = page + offs;
- ut_a(rec[-REC_N_NEW_EXTRA_BYTES]
- & REC_INFO_MIN_REC_FLAG);
- rec[-REC_N_NEW_EXTRA_BYTES] &= ~ REC_INFO_MIN_REC_FLAG;
- }
- }
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page, index));
-#endif /* UNIV_ZIP_DEBUG */
- btr_blob_dbg_add(page, index, "page_zip_copy_recs");
-
- page_zip_compress_write_log(page_zip, page, index, mtr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Parses a log record of compressing an index page.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-page_zip_parse_compress(
-/*====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< out: uncompressed page */
- page_zip_des_t* page_zip)/*!< out: compressed page */
-{
- ulint size;
- ulint trailer_size;
-
- ut_ad(!page == !page_zip);
-
- if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) {
-
- return(NULL);
- }
-
- size = mach_read_from_2(ptr);
- ptr += 2;
- trailer_size = mach_read_from_2(ptr);
- ptr += 2;
-
- if (UNIV_UNLIKELY(ptr + 8 + size + trailer_size > end_ptr)) {
-
- return(NULL);
- }
-
- if (page) {
- if (UNIV_UNLIKELY(!page_zip)
- || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) {
-corrupt:
- recv_sys->found_corrupt_log = TRUE;
-
- return(NULL);
- }
-
- memcpy(page_zip->data + FIL_PAGE_PREV, ptr, 4);
- memcpy(page_zip->data + FIL_PAGE_NEXT, ptr + 4, 4);
- memcpy(page_zip->data + FIL_PAGE_TYPE, ptr + 8, size);
- memset(page_zip->data + FIL_PAGE_TYPE + size, 0,
- page_zip_get_size(page_zip) - trailer_size
- - (FIL_PAGE_TYPE + size));
- memcpy(page_zip->data + page_zip_get_size(page_zip)
- - trailer_size, ptr + 8 + size, trailer_size);
-
- if (UNIV_UNLIKELY(!page_zip_decompress(page_zip, page,
- TRUE))) {
-
- goto corrupt;
- }
- }
-
- return(ptr + 8 + size + trailer_size);
-}
-#endif /* !UNIV_INNOCHECKSUM */
-
-/**********************************************************************//**
-Calculate the compressed page checksum.
-@return page checksum */
-UNIV_INTERN
-ulint
-page_zip_calc_checksum(
-/*===================*/
- const void* data, /*!< in: compressed page */
- ulint size, /*!< in: size of compressed page */
- srv_checksum_algorithm_t algo) /*!< in: algorithm to use */
-{
- uLong adler;
- ib_uint32_t crc32;
- const Bytef* s = static_cast<const byte*>(data);
-
- /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN,
- and FIL_PAGE_FILE_FLUSH_LSN from the checksum. */
-
- switch (algo) {
- case SRV_CHECKSUM_ALGORITHM_CRC32:
- case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
-
- ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- crc32 = ut_crc32(s + FIL_PAGE_OFFSET,
- FIL_PAGE_LSN - FIL_PAGE_OFFSET)
- ^ ut_crc32(s + FIL_PAGE_TYPE, 2)
- ^ ut_crc32(s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- return((ulint) crc32);
- case SRV_CHECKSUM_ALGORITHM_INNODB:
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
- ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- adler = adler32(0L, s + FIL_PAGE_OFFSET,
- FIL_PAGE_LSN - FIL_PAGE_OFFSET);
- adler = adler32(adler, s + FIL_PAGE_TYPE, 2);
- adler = adler32(
- adler, s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- static_cast<uInt>(size)
- - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-
- return((ulint) adler);
- case SRV_CHECKSUM_ALGORITHM_NONE:
- case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
- return(BUF_NO_CHECKSUM_MAGIC);
- /* no default so the compiler will emit a warning if new enum
- is added and not handled here */
- }
-
- ut_error;
- return(0);
-}
-
-/**********************************************************************//**
-Verify a compressed page's checksum.
-@return TRUE if the stored checksum is valid according to the value of
-innodb_checksum_algorithm */
-UNIV_INTERN
-ibool
-page_zip_verify_checksum(
-/*=====================*/
- const void* data, /*!< in: compressed page */
- ulint size) /*!< in: size of compressed page */
-{
- ib_uint32_t stored;
- ib_uint32_t calc;
- ib_uint32_t crc32 = 0 /* silence bogus warning */;
- ib_uint32_t innodb = 0 /* silence bogus warning */;
-
- stored = static_cast<ib_uint32_t>(mach_read_from_4(
- static_cast<const unsigned char*>(data) + FIL_PAGE_SPACE_OR_CHKSUM));
-
- ulint page_no = mach_read_from_4(static_cast<const unsigned char*> (data) + FIL_PAGE_OFFSET);
- ulint space_id = mach_read_from_4(static_cast<const unsigned char*>
- (data) + FIL_PAGE_SPACE_ID);
-
-#if FIL_PAGE_LSN % 8
-#error "FIL_PAGE_LSN must be 64 bit aligned"
-#endif
-
- /* Check if page is empty */
- if (stored == 0
- && *reinterpret_cast<const ib_uint64_t*>(static_cast<const char*>(
- data)
- + FIL_PAGE_LSN) == 0) {
- /* make sure that the page is really empty */
- ulint i;
- for (i = 0; i < size; i++) {
- if (*((const char*) data + i) != 0) {
- return(FALSE);
- }
- }
- /* Empty page */
- return(TRUE);
- }
-
- const srv_checksum_algorithm_t curr_algo =
- static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm);
-
- if (curr_algo == SRV_CHECKSUM_ALGORITHM_NONE) {
- return(TRUE);
- }
-
- calc = static_cast<ib_uint32_t>(page_zip_calc_checksum(
- data, size, curr_algo));
-
- if (stored == calc) {
- return(TRUE);
- }
-
- switch (curr_algo) {
- case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32:
- case SRV_CHECKSUM_ALGORITHM_CRC32:
-
- if (stored == BUF_NO_CHECKSUM_MAGIC) {
- if (curr_algo
- == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_NONE,
- space_id, page_no);
- }
-
- return(TRUE);
- }
-
- innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum(
- data, size, SRV_CHECKSUM_ALGORITHM_INNODB));
-
- if (stored == innodb) {
- if (curr_algo
- == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_INNODB,
- space_id, page_no);
- }
-
- return(TRUE);
- }
-
- break;
- case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB:
- case SRV_CHECKSUM_ALGORITHM_INNODB:
-
- if (stored == BUF_NO_CHECKSUM_MAGIC) {
- if (curr_algo
- == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_NONE,
- space_id, page_no);
- }
-
- return(TRUE);
- }
-
- crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum(
- data, size, SRV_CHECKSUM_ALGORITHM_CRC32));
-
- if (stored == crc32) {
- if (curr_algo
- == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_CRC32,
- space_id, page_no);
- }
-
- return(TRUE);
- }
-
- break;
- case SRV_CHECKSUM_ALGORITHM_STRICT_NONE:
-
- crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum(
- data, size, SRV_CHECKSUM_ALGORITHM_CRC32));
-
- if (stored == crc32) {
- page_warn_strict_checksum(
- curr_algo, SRV_CHECKSUM_ALGORITHM_CRC32,
- space_id, page_no);
-
- return(TRUE);
- }
-
- innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum(
- data, size, SRV_CHECKSUM_ALGORITHM_INNODB));
-
- if (stored == innodb) {
- page_warn_strict_checksum(
- curr_algo,
- SRV_CHECKSUM_ALGORITHM_INNODB,
- space_id, page_no);
- return(TRUE);
- }
-
- break;
- case SRV_CHECKSUM_ALGORITHM_NONE:
- ut_error;
- /* no default so the compiler will emit a warning if new enum
- is added and not handled here */
- }
-
- return(FALSE);
-}
diff --git a/storage/xtradb/pars/lexyy.cc b/storage/xtradb/pars/lexyy.cc
deleted file mode 100644
index 62122bb9f6f..00000000000
--- a/storage/xtradb/pars/lexyy.cc
+++ /dev/null
@@ -1,3132 +0,0 @@
-#include "univ.i"
-#line 2 "lexyy.cc"
-
-#line 4 "lexyy.cc"
-
-#define YY_INT_ALIGNED short int
-
-/* A lexical scanner generated by flex */
-
-#define FLEX_SCANNER
-#define YY_FLEX_MAJOR_VERSION 2
-#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
-#if YY_FLEX_SUBMINOR_VERSION > 0
-#define FLEX_BETA
-#endif
-
-/* First, we deal with platform-specific or compiler-specific issues. */
-
-/* begin standard C headers. */
-#include <stdio.h>
-#include <string.h>
-#include <errno.h>
-#include <stdlib.h>
-
-/* end standard C headers. */
-
-/* flex integer type definitions */
-
-#ifndef FLEXINT_H
-#define FLEXINT_H
-
-/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */
-
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
-
-/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
- * if you want the limit (max/min) macros for int types.
- */
-#ifndef __STDC_LIMIT_MACROS
-#define __STDC_LIMIT_MACROS 1
-#endif
-
-#include <inttypes.h>
-typedef int8_t flex_int8_t;
-typedef uint8_t flex_uint8_t;
-typedef int16_t flex_int16_t;
-typedef uint16_t flex_uint16_t;
-typedef int32_t flex_int32_t;
-typedef uint32_t flex_uint32_t;
-#else
-typedef signed char flex_int8_t;
-typedef short int flex_int16_t;
-typedef int flex_int32_t;
-typedef unsigned char flex_uint8_t;
-typedef unsigned short int flex_uint16_t;
-typedef unsigned int flex_uint32_t;
-
-/* Limits of integral types. */
-#ifndef INT8_MIN
-#define INT8_MIN (-128)
-#endif
-#ifndef INT16_MIN
-#define INT16_MIN (-32767-1)
-#endif
-#ifndef INT32_MIN
-#define INT32_MIN (-2147483647-1)
-#endif
-#ifndef INT8_MAX
-#define INT8_MAX (127)
-#endif
-#ifndef INT16_MAX
-#define INT16_MAX (32767)
-#endif
-#ifndef INT32_MAX
-#define INT32_MAX (2147483647)
-#endif
-#ifndef UINT8_MAX
-#define UINT8_MAX (255U)
-#endif
-#ifndef UINT16_MAX
-#define UINT16_MAX (65535U)
-#endif
-#ifndef UINT32_MAX
-#define UINT32_MAX (4294967295U)
-#endif
-
-#endif /* ! C99 */
-
-#endif /* ! FLEXINT_H */
-
-#ifdef __cplusplus
-
-/* The "const" storage-class-modifier is valid. */
-#define YY_USE_CONST
-
-#else /* ! __cplusplus */
-
-/* C99 requires __STDC__ to be defined as 1. */
-#if defined (__STDC__)
-
-#define YY_USE_CONST
-
-#endif /* defined (__STDC__) */
-#endif /* ! __cplusplus */
-
-#ifdef YY_USE_CONST
-#define yyconst const
-#else
-#define yyconst
-#endif
-
-/* Returned upon end-of-file. */
-#define YY_NULL 0
-
-/* Promotes a possibly negative, possibly signed char to an unsigned
- * integer for use as an array index. If the signed char is negative,
- * we want to instead treat it as an 8-bit unsigned char, hence the
- * double cast.
- */
-#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
-
-/* Enter a start condition. This macro really ought to take a parameter,
- * but we do it the disgusting crufty way forced on us by the ()-less
- * definition of BEGIN.
- */
-#define BEGIN (yy_start) = 1 + 2 *
-
-/* Translate the current start state into a value that can be later handed
- * to BEGIN to return to the state. The YYSTATE alias is for lex
- * compatibility.
- */
-#define YY_START (((yy_start) - 1) / 2)
-#define YYSTATE YY_START
-
-/* Action number for EOF rule of a given start state. */
-#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
-
-/* Special action meaning "start processing a new file". */
-#define YY_NEW_FILE yyrestart(yyin )
-
-#define YY_END_OF_BUFFER_CHAR 0
-
-/* Size of default input buffer. */
-#ifndef YY_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k.
- * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
- * Ditto for the __ia64__ case accordingly.
- */
-#define YY_BUF_SIZE 32768
-#else
-#define YY_BUF_SIZE 16384
-#endif /* __ia64__ */
-#endif
-
-/* The state buf must be large enough to hold one state per character in the main buffer.
- */
-#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
-
-#ifndef YY_TYPEDEF_YY_BUFFER_STATE
-#define YY_TYPEDEF_YY_BUFFER_STATE
-typedef struct yy_buffer_state *YY_BUFFER_STATE;
-#endif
-
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
-extern yy_size_t yyleng;
-
-extern FILE *yyin, *yyout;
-
-#define EOB_ACT_CONTINUE_SCAN 0
-#define EOB_ACT_END_OF_FILE 1
-#define EOB_ACT_LAST_MATCH 2
-
- #define YY_LESS_LINENO(n)
-
-/* Return all but the first "n" matched characters back to the input stream. */
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- *yy_cp = (yy_hold_char); \
- YY_RESTORE_YY_MORE_OFFSET \
- (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
- YY_DO_BEFORE_ACTION; /* set up yytext again */ \
- } \
- while ( 0 )
-
-#define unput(c) yyunput( c, (yytext_ptr) )
-
-#ifndef YY_STRUCT_YY_BUFFER_STATE
-#define YY_STRUCT_YY_BUFFER_STATE
-struct yy_buffer_state
- {
- FILE *yy_input_file;
-
- char *yy_ch_buf; /* input buffer */
- char *yy_buf_pos; /* current position in input buffer */
-
- /* Size of input buffer in bytes, not including room for EOB
- * characters.
- */
- yy_size_t yy_buf_size;
-
- /* Number of characters read into yy_ch_buf, not including EOB
- * characters.
- */
- yy_size_t yy_n_chars;
-
- /* Whether we "own" the buffer - i.e., we know we created it,
- * and can realloc() it to grow it, and should free() it to
- * delete it.
- */
- int yy_is_our_buffer;
-
- /* Whether this is an "interactive" input source; if so, and
- * if we're using stdio for input, then we want to use getc()
- * instead of fread(), to make sure we stop fetching input after
- * each newline.
- */
- int yy_is_interactive;
-
- /* Whether we're considered to be at the beginning of a line.
- * If so, '^' rules will be active on the next match, otherwise
- * not.
- */
- int yy_at_bol;
-
- int yy_bs_lineno; /**< The line count. */
- int yy_bs_column; /**< The column count. */
-
- /* Whether to try to fill the input buffer when we reach the
- * end of it.
- */
- int yy_fill_buffer;
-
- int yy_buffer_status;
-
-#define YY_BUFFER_NEW 0
-#define YY_BUFFER_NORMAL 1
- /* When an EOF's been seen but there's still some text to process
- * then we mark the buffer as YY_EOF_PENDING, to indicate that we
- * shouldn't try reading from the input source any more. We might
- * still have a bunch of tokens to match, though, because of
- * possible backing-up.
- *
- * When we actually see the EOF, we change the status to "new"
- * (via yyrestart()), so that the user can continue scanning by
- * just pointing yyin at a new input file.
- */
-#define YY_BUFFER_EOF_PENDING 2
-
- };
-#endif /* !YY_STRUCT_YY_BUFFER_STATE */
-
-/* Stack of input buffers. */
-static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
-static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
-static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */
-
-/* We provide macros for accessing buffer states in case in the
- * future we want to put the buffer states in a more general
- * "scanner state".
- *
- * Returns the top of the stack, or NULL.
- */
-#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
- ? (yy_buffer_stack)[(yy_buffer_stack_top)] \
- : NULL)
-
-/* Same as previous macro, but useful when we know that the buffer stack is not
- * NULL or when we need an lvalue. For internal use only.
- */
-#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
-
-/* yy_hold_char holds the character lost when yytext is formed. */
-static char yy_hold_char;
-static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */
-yy_size_t yyleng;
-
-/* Points to current character in buffer. */
-static char *yy_c_buf_p = (char *) 0;
-static int yy_init = 0; /* whether we need to initialize */
-static int yy_start = 0; /* start state number */
-
-/* Flag which is used to allow yywrap()'s to do buffer switches
- * instead of setting up a fresh yyin. A bit of a hack ...
- */
-static int yy_did_buffer_switch_on_eof;
-
-void yyrestart (FILE *input_file );
-MY_ATTRIBUTE((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer );
-static YY_BUFFER_STATE yy_create_buffer (FILE *file,int size );
-void yy_delete_buffer (YY_BUFFER_STATE b );
-void yy_flush_buffer (YY_BUFFER_STATE b );
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer );
-void yypop_buffer_state (void );
-
-static void yyensure_buffer_stack (void );
-static void yy_load_buffer_state (void );
-static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file );
-
-#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER )
-
-YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size );
-YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str );
-YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len );
-
-void *yyalloc (yy_size_t );
-void *yyrealloc (void *,yy_size_t );
-void yyfree (void * );
-
-#define yy_new_buffer yy_create_buffer
-
-#define yy_set_interactive(is_interactive) \
- { \
- if ( ! YY_CURRENT_BUFFER ){ \
- yyensure_buffer_stack (); \
- YY_CURRENT_BUFFER_LVALUE = \
- yy_create_buffer(yyin,YY_BUF_SIZE ); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
- }
-
-#define yy_set_bol(at_bol) \
- { \
- if ( ! YY_CURRENT_BUFFER ){\
- yyensure_buffer_stack (); \
- YY_CURRENT_BUFFER_LVALUE = \
- yy_create_buffer(yyin,YY_BUF_SIZE ); \
- } \
- YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
- }
-
-#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
-
-/* Begin user sect3 */
-
-#define yywrap(n) 1
-#define YY_SKIP_YYWRAP
-
-typedef unsigned char YY_CHAR;
-
-FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0;
-
-typedef int yy_state_type;
-
-extern int yylineno;
-
-int yylineno = 1;
-
-extern char *yytext;
-#define yytext_ptr yytext
-
-static yy_state_type yy_get_previous_state (void );
-static yy_state_type yy_try_NUL_trans (yy_state_type current_state );
-static int yy_get_next_buffer (void );
-static void yy_fatal_error (yyconst char msg[] );
-
-/* Done after the current pattern has been matched and before the
- * corresponding action - sets up yytext.
- */
-#define YY_DO_BEFORE_ACTION \
- (yytext_ptr) = yy_bp; \
- yyleng = (size_t) (yy_cp - yy_bp); \
- (yy_hold_char) = *yy_cp; \
- *yy_cp = '\0'; \
- (yy_c_buf_p) = yy_cp;
-
-#define YY_NUM_RULES 124
-#define YY_END_OF_BUFFER 125
-/* This struct is not used in this scanner,
- but its presence is necessary. */
-struct yy_trans_info
- {
- flex_int32_t yy_verify;
- flex_int32_t yy_nxt;
- };
-static yyconst flex_int16_t yy_accept[425] =
- { 0,
- 0, 0, 119, 119, 0, 0, 0, 0, 125, 123,
- 122, 122, 8, 123, 114, 5, 103, 109, 112, 110,
- 107, 111, 123, 113, 1, 123, 108, 106, 104, 105,
- 117, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 115, 116, 119, 120, 6, 7, 9, 10, 122, 4,
- 98, 118, 2, 1, 3, 99, 100, 102, 101, 0,
- 96, 0, 96, 96, 96, 96, 96, 44, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 28, 17, 25, 96, 96, 96,
-
- 96, 96, 96, 54, 63, 96, 14, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 119, 120, 120, 121, 6,
- 7, 9, 10, 2, 0, 97, 13, 45, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 96, 96, 27, 96, 96,
- 96, 41, 96, 96, 96, 96, 21, 96, 96, 96,
- 96, 96, 15, 96, 96, 96, 18, 96, 96, 96,
- 96, 96, 82, 96, 96, 96, 51, 96, 12, 96,
- 36, 96, 96, 96, 96, 96, 96, 96, 96, 96,
-
- 96, 96, 0, 97, 96, 96, 96, 96, 20, 96,
- 24, 96, 96, 96, 96, 96, 96, 96, 96, 96,
- 96, 96, 46, 96, 96, 30, 96, 89, 96, 96,
- 39, 96, 96, 96, 96, 96, 48, 96, 94, 91,
- 32, 93, 96, 11, 66, 96, 96, 96, 42, 96,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 29,
- 96, 96, 96, 96, 96, 96, 96, 96, 96, 87,
- 0, 96, 26, 96, 96, 96, 68, 96, 96, 96,
- 96, 37, 96, 96, 96, 96, 96, 96, 96, 31,
- 67, 23, 96, 59, 96, 77, 96, 96, 96, 43,
-
- 96, 96, 96, 96, 96, 96, 96, 96, 92, 96,
- 96, 56, 96, 96, 96, 96, 96, 96, 96, 40,
- 33, 0, 81, 95, 19, 96, 96, 85, 96, 76,
- 55, 96, 65, 96, 52, 96, 96, 96, 47, 96,
- 78, 96, 80, 96, 96, 34, 96, 96, 96, 35,
- 74, 96, 96, 96, 96, 60, 96, 50, 49, 96,
- 96, 96, 57, 53, 64, 96, 96, 96, 22, 96,
- 96, 75, 83, 96, 96, 79, 96, 70, 96, 96,
- 96, 96, 96, 38, 96, 90, 69, 96, 86, 96,
- 96, 96, 88, 96, 96, 61, 96, 16, 96, 72,
-
- 71, 96, 58, 96, 84, 96, 96, 96, 96, 96,
- 96, 96, 96, 96, 96, 73, 96, 96, 96, 96,
- 96, 96, 62, 0
- } ;
-
-static yyconst flex_int32_t yy_ec[256] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 1, 4, 5, 6, 7, 1, 8, 9,
- 10, 11, 12, 13, 14, 15, 16, 17, 17, 17,
- 17, 17, 17, 17, 17, 17, 17, 18, 19, 20,
- 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
- 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
- 1, 1, 1, 1, 51, 1, 34, 34, 34, 34,
-
- 34, 34, 34, 34, 34, 34, 34, 52, 34, 34,
- 34, 34, 53, 34, 54, 34, 34, 34, 34, 34,
- 34, 34, 55, 1, 56, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1
- } ;
-
-static yyconst flex_int32_t yy_meta[57] =
- { 0,
- 1, 1, 1, 2, 3, 1, 1, 4, 1, 1,
- 5, 1, 1, 1, 1, 6, 7, 1, 1, 1,
- 8, 1, 1, 6, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
- 9, 9, 9, 9, 1, 1
- } ;
-
-static yyconst flex_int16_t yy_base[438] =
- { 0,
- 0, 0, 293, 287, 284, 281, 272, 256, 254, 1357,
- 55, 57, 1357, 0, 1357, 1357, 1357, 1357, 1357, 1357,
- 1357, 1357, 238, 227, 46, 205, 1357, 43, 1357, 203,
- 1357, 46, 50, 56, 52, 66, 64, 51, 81, 92,
- 91, 94, 96, 111, 113, 116, 130, 134, 53, 143,
- 1357, 1357, 0, 106, 0, 212, 0, 210, 141, 0,
- 1357, 1357, 192, 56, 173, 1357, 1357, 1357, 1357, 168,
- 140, 150, 152, 154, 155, 161, 167, 171, 177, 172,
- 184, 174, 188, 189, 191, 194, 203, 212, 215, 217,
- 219, 221, 226, 228, 231, 240, 233, 235, 246, 251,
-
- 258, 253, 255, 256, 269, 271, 278, 272, 285, 283,
- 287, 289, 296, 305, 298, 315, 319, 321, 322, 326,
- 332, 333, 342, 339, 343, 0, 112, 173, 1357, 0,
- 155, 0, 156, 132, 93, 0, 355, 357, 358, 360,
- 364, 367, 374, 370, 379, 380, 389, 383, 390, 392,
- 395, 408, 411, 409, 415, 418, 425, 427, 429, 436,
- 431, 441, 446, 448, 450, 452, 453, 462, 471, 464,
- 473, 474, 478, 485, 488, 490, 491, 494, 500, 501,
- 504, 506, 507, 517, 518, 519, 520, 521, 522, 523,
- 533, 536, 538, 543, 549, 554, 555, 561, 556, 566,
-
- 567, 576, 60, 0, 573, 578, 580, 582, 583, 593,
- 589, 596, 598, 603, 605, 607, 610, 617, 619, 621,
- 622, 628, 633, 634, 635, 639, 640, 649, 650, 652,
- 653, 655, 659, 664, 668, 669, 665, 671, 674, 678,
- 681, 685, 687, 688, 692, 697, 698, 701, 703, 704,
- 707, 708, 717, 713, 728, 730, 724, 740, 734, 745,
- 746, 750, 751, 756, 757, 760, 761, 762, 771, 773,
- 42, 778, 782, 783, 787, 789, 792, 794, 793, 804,
- 805, 808, 809, 810, 819, 823, 826, 828, 829, 830,
- 835, 840, 844, 846, 847, 856, 857, 858, 859, 860,
-
- 863, 872, 873, 878, 879, 882, 885, 889, 894, 895,
- 896, 898, 905, 910, 908, 912, 914, 915, 926, 930,
- 931, 73, 932, 933, 935, 937, 942, 944, 946, 947,
- 948, 949, 951, 958, 961, 965, 967, 972, 978, 979,
- 981, 984, 983, 985, 994, 988, 999, 1000, 1001, 1004,
- 1013, 1015, 1022, 1016, 1019, 1026, 1032, 1033, 1035, 1036,
- 1038, 1039, 1048, 1049, 1050, 1051, 1053, 1054, 1060, 1063,
- 1065, 1066, 1069, 1070, 1072, 1082, 1084, 1085, 1087, 1096,
- 1097, 1098, 1099, 1101, 1113, 1114, 1115, 1116, 1117, 1118,
- 1119, 1128, 1130, 1131, 1134, 1133, 1135, 1137, 1150, 1151,
-
- 1153, 1155, 1157, 1162, 1160, 1167, 1172, 1173, 1174, 1176,
- 1185, 1190, 1183, 1187, 1189, 1199, 1204, 1206, 1208, 1210,
- 1215, 1220, 1222, 1357, 1269, 1278, 1287, 1290, 1293, 1297,
- 1306, 1315, 1324, 1333, 1340, 1344, 1347
- } ;
-
-static yyconst flex_int16_t yy_def[438] =
- { 0,
- 424, 1, 425, 425, 426, 426, 427, 427, 424, 424,
- 424, 424, 424, 428, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 429, 424, 424, 424, 424,
- 424, 430, 430, 430, 430, 430, 34, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 424, 424, 431, 432, 433, 424, 434, 424, 424, 428,
- 424, 424, 424, 424, 429, 424, 424, 424, 424, 435,
- 430, 436, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
-
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 431, 432, 432, 424, 433,
- 424, 434, 424, 424, 424, 437, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
-
- 430, 430, 424, 437, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 424, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
-
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 424, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
-
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 430, 430, 430, 430, 430, 430, 430,
- 430, 430, 430, 0, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424
- } ;
-
-static yyconst flex_int16_t yy_nxt[1414] =
- { 0,
- 10, 11, 12, 13, 10, 14, 15, 16, 17, 18,
- 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,
- 29, 30, 31, 10, 32, 33, 34, 35, 36, 37,
- 38, 38, 39, 38, 38, 40, 41, 42, 43, 44,
- 38, 45, 46, 47, 48, 49, 50, 38, 38, 38,
- 38, 38, 38, 38, 51, 52, 59, 59, 59, 59,
- 63, 70, 64, 67, 68, 70, 70, 70, 70, 72,
- 63, 70, 64, 72, 72, 72, 72, 123, 75, 72,
- 84, 70, 76, 73, 85, 77, 136, 79, 74, 72,
- 86, 80, 90, 322, 81, 71, 70, 82, 78, 91,
-
- 83, 87, 92, 88, 72, 93, 70, 70, 94, 70,
- 95, 70, 271, 89, 72, 72, 128, 72, 96, 72,
- 98, 129, 424, 97, 99, 104, 70, 424, 70, 101,
- 100, 70, 102, 105, 72, 106, 72, 107, 103, 72,
- 108, 110, 59, 59, 113, 70, 203, 114, 134, 70,
- 111, 112, 109, 72, 118, 70, 115, 72, 70, 133,
- 116, 119, 131, 72, 117, 70, 72, 70, 120, 70,
- 70, 121, 135, 122, 124, 72, 70, 72, 72, 137,
- 138, 125, 70, 128, 72, 140, 70, 70, 129, 70,
- 72, 141, 70, 424, 72, 72, 139, 72, 142, 70,
-
- 72, 144, 150, 70, 70, 143, 70, 72, 134, 70,
- 145, 72, 72, 133, 72, 152, 146, 72, 70, 131,
- 147, 148, 156, 69, 153, 66, 72, 70, 149, 151,
- 70, 154, 70, 155, 70, 72, 70, 62, 72, 158,
- 72, 70, 72, 70, 72, 157, 70, 159, 70, 72,
- 70, 72, 61, 424, 72, 70, 72, 161, 72, 58,
- 160, 70, 162, 72, 163, 164, 70, 165, 70, 72,
- 70, 70, 168, 70, 72, 58, 72, 170, 72, 72,
- 169, 72, 166, 167, 70, 172, 70, 70, 56, 171,
- 174, 56, 72, 70, 72, 72, 173, 54, 70, 175,
-
- 70, 72, 70, 54, 70, 176, 72, 180, 72, 424,
- 72, 70, 72, 70, 183, 177, 424, 178, 424, 72,
- 70, 72, 181, 179, 184, 424, 182, 424, 72, 188,
- 70, 186, 424, 189, 70, 185, 70, 70, 72, 187,
- 190, 70, 72, 424, 72, 72, 193, 70, 70, 72,
- 194, 191, 424, 424, 70, 72, 72, 70, 70, 424,
- 198, 192, 72, 424, 196, 72, 72, 200, 424, 424,
- 70, 201, 70, 70, 197, 70, 195, 199, 72, 70,
- 72, 72, 70, 72, 202, 70, 205, 72, 424, 70,
- 72, 208, 206, 72, 70, 70, 207, 72, 70, 209,
-
- 210, 424, 72, 72, 70, 70, 72, 70, 424, 216,
- 70, 211, 72, 72, 424, 72, 218, 424, 72, 424,
- 424, 212, 213, 70, 70, 214, 70, 217, 215, 424,
- 70, 72, 72, 70, 72, 223, 219, 220, 72, 222,
- 70, 72, 70, 221, 70, 424, 70, 424, 72, 424,
- 72, 70, 72, 226, 72, 230, 70, 227, 224, 72,
- 225, 70, 229, 70, 72, 70, 424, 70, 70, 72,
- 424, 72, 228, 72, 232, 72, 72, 70, 233, 70,
- 234, 236, 231, 424, 424, 72, 70, 72, 70, 70,
- 424, 237, 238, 70, 72, 235, 72, 72, 240, 239,
-
- 70, 72, 242, 70, 424, 70, 70, 243, 72, 70,
- 424, 72, 241, 72, 72, 70, 70, 72, 246, 70,
- 244, 70, 70, 72, 72, 245, 248, 72, 249, 72,
- 72, 247, 70, 70, 70, 70, 70, 70, 70, 250,
- 72, 72, 72, 72, 72, 72, 72, 255, 70, 424,
- 251, 70, 253, 70, 424, 424, 72, 252, 70, 72,
- 424, 72, 256, 258, 70, 257, 72, 424, 254, 70,
- 70, 70, 72, 259, 261, 262, 70, 72, 72, 72,
- 260, 70, 70, 424, 72, 266, 263, 265, 70, 72,
- 72, 70, 424, 70, 264, 70, 72, 70, 70, 72,
-
- 267, 72, 269, 72, 70, 72, 72, 268, 70, 424,
- 270, 70, 72, 70, 272, 273, 72, 274, 70, 72,
- 70, 72, 70, 275, 277, 70, 72, 276, 72, 280,
- 72, 281, 70, 72, 70, 279, 70, 70, 424, 424,
- 72, 278, 72, 70, 72, 72, 286, 284, 70, 70,
- 70, 72, 424, 282, 70, 70, 72, 72, 72, 285,
- 283, 424, 72, 72, 70, 70, 288, 70, 70, 290,
- 70, 287, 72, 72, 70, 72, 72, 424, 72, 70,
- 70, 291, 72, 70, 70, 289, 70, 72, 72, 70,
- 424, 72, 72, 70, 72, 292, 70, 72, 293, 297,
-
- 70, 72, 70, 70, 72, 295, 294, 70, 72, 296,
- 72, 72, 70, 70, 298, 72, 70, 424, 70, 70,
- 72, 72, 70, 70, 72, 299, 72, 72, 70, 302,
- 72, 72, 70, 424, 424, 424, 72, 424, 300, 70,
- 72, 301, 306, 70, 424, 70, 303, 72, 304, 70,
- 305, 72, 307, 72, 308, 70, 424, 72, 309, 424,
- 70, 70, 312, 72, 311, 70, 70, 310, 72, 72,
- 424, 70, 70, 72, 72, 70, 70, 70, 313, 72,
- 72, 314, 424, 72, 72, 72, 70, 317, 70, 319,
- 320, 424, 424, 70, 72, 315, 72, 70, 70, 321,
-
- 316, 72, 70, 318, 70, 72, 72, 70, 70, 70,
- 72, 424, 72, 424, 424, 72, 72, 72, 424, 70,
- 70, 323, 327, 70, 70, 70, 324, 72, 72, 424,
- 329, 72, 72, 72, 70, 325, 328, 331, 70, 326,
- 424, 70, 72, 70, 70, 70, 72, 332, 330, 72,
- 70, 72, 72, 72, 335, 70, 424, 424, 72, 70,
- 333, 70, 70, 72, 334, 336, 337, 72, 424, 72,
- 72, 70, 70, 70, 70, 70, 338, 424, 70, 72,
- 72, 72, 72, 72, 424, 340, 72, 70, 70, 341,
- 339, 424, 343, 70, 70, 72, 72, 70, 424, 344,
-
- 70, 72, 72, 342, 70, 72, 348, 424, 72, 70,
- 70, 70, 72, 70, 424, 346, 345, 72, 72, 72,
- 70, 72, 347, 70, 424, 70, 349, 70, 72, 70,
- 70, 72, 350, 72, 354, 72, 351, 72, 72, 352,
- 356, 70, 353, 358, 355, 70, 70, 70, 70, 72,
- 70, 357, 70, 72, 72, 72, 72, 70, 72, 70,
- 72, 70, 70, 70, 70, 72, 70, 72, 359, 72,
- 72, 72, 72, 70, 72, 424, 70, 424, 424, 361,
- 70, 72, 70, 362, 72, 360, 365, 70, 72, 363,
- 72, 366, 364, 70, 70, 72, 70, 424, 70, 70,
-
- 70, 72, 72, 70, 72, 367, 72, 72, 72, 70,
- 368, 72, 424, 424, 70, 70, 70, 72, 424, 70,
- 369, 370, 72, 72, 72, 424, 374, 72, 70, 371,
- 70, 70, 424, 375, 70, 372, 72, 70, 72, 72,
- 373, 70, 72, 376, 379, 72, 377, 70, 70, 72,
- 70, 70, 424, 70, 70, 72, 72, 378, 72, 72,
- 380, 72, 72, 70, 70, 70, 70, 383, 70, 70,
- 382, 72, 72, 72, 72, 70, 72, 72, 70, 381,
- 70, 70, 424, 72, 70, 70, 72, 70, 72, 72,
- 387, 386, 72, 72, 384, 72, 385, 70, 424, 70,
-
- 70, 424, 70, 424, 389, 72, 388, 72, 72, 390,
- 72, 70, 70, 70, 70, 392, 70, 424, 424, 72,
- 72, 72, 72, 393, 72, 391, 396, 424, 70, 70,
- 70, 70, 70, 70, 70, 394, 72, 72, 72, 72,
- 72, 72, 72, 70, 398, 70, 70, 395, 70, 70,
- 70, 72, 70, 72, 72, 424, 72, 72, 72, 424,
- 72, 399, 403, 397, 404, 70, 70, 400, 70, 401,
- 70, 424, 70, 72, 72, 70, 72, 70, 72, 405,
- 72, 402, 70, 72, 424, 72, 424, 70, 70, 70,
- 72, 70, 406, 424, 407, 72, 72, 72, 70, 72,
-
- 70, 412, 70, 424, 70, 70, 72, 424, 72, 410,
- 72, 408, 72, 72, 70, 409, 424, 413, 414, 70,
- 415, 70, 72, 70, 411, 70, 424, 72, 416, 72,
- 70, 72, 424, 72, 419, 70, 424, 70, 72, 417,
- 418, 424, 424, 72, 420, 72, 424, 424, 421, 424,
- 424, 424, 424, 424, 424, 424, 422, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 423, 53,
- 53, 53, 53, 53, 53, 53, 53, 53, 55, 55,
- 55, 55, 55, 55, 55, 55, 55, 57, 57, 57,
- 57, 57, 57, 57, 57, 57, 60, 424, 60, 65,
-
- 65, 65, 71, 71, 424, 71, 126, 126, 126, 126,
- 424, 126, 126, 126, 126, 127, 127, 127, 127, 127,
- 127, 127, 127, 127, 130, 130, 130, 424, 130, 130,
- 130, 130, 130, 132, 424, 132, 132, 132, 132, 132,
- 132, 132, 136, 424, 424, 424, 424, 424, 136, 72,
- 72, 424, 72, 204, 424, 204, 9, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
-
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424
- } ;
-
-static yyconst flex_int16_t yy_chk[1414] =
- { 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 11, 11, 12, 12,
- 25, 32, 25, 28, 28, 33, 38, 35, 49, 32,
- 64, 34, 64, 33, 38, 35, 49, 49, 33, 34,
- 35, 36, 33, 32, 35, 33, 322, 34, 32, 36,
- 35, 34, 37, 271, 34, 37, 39, 34, 33, 37,
-
- 34, 36, 37, 36, 39, 37, 41, 40, 37, 42,
- 39, 43, 203, 36, 41, 40, 54, 42, 39, 43,
- 40, 54, 127, 39, 40, 43, 44, 127, 45, 41,
- 40, 46, 42, 43, 44, 43, 45, 43, 42, 46,
- 43, 45, 59, 59, 46, 47, 135, 46, 134, 48,
- 45, 45, 44, 47, 47, 71, 46, 48, 50, 133,
- 46, 47, 131, 71, 46, 72, 50, 73, 47, 74,
- 75, 48, 70, 48, 50, 73, 76, 74, 75, 73,
- 74, 50, 77, 128, 76, 75, 78, 80, 128, 82,
- 77, 76, 79, 65, 78, 80, 74, 82, 76, 81,
-
- 79, 79, 82, 83, 84, 77, 85, 81, 63, 86,
- 80, 83, 84, 58, 85, 84, 80, 86, 87, 56,
- 81, 81, 86, 30, 84, 26, 87, 88, 81, 83,
- 89, 84, 90, 85, 91, 88, 92, 24, 89, 88,
- 90, 93, 91, 94, 92, 87, 95, 89, 97, 93,
- 98, 94, 23, 9, 95, 96, 97, 91, 98, 8,
- 90, 99, 92, 96, 93, 94, 100, 96, 102, 99,
- 103, 104, 98, 101, 100, 7, 102, 100, 103, 104,
- 99, 101, 96, 96, 105, 101, 106, 108, 6, 100,
- 103, 5, 105, 107, 106, 108, 102, 4, 110, 106,
-
- 109, 107, 111, 3, 112, 107, 110, 110, 109, 0,
- 111, 113, 112, 115, 111, 108, 0, 109, 0, 113,
- 114, 115, 110, 109, 112, 0, 110, 0, 114, 114,
- 116, 113, 0, 115, 117, 112, 118, 119, 116, 113,
- 116, 120, 117, 0, 118, 119, 118, 121, 122, 120,
- 119, 116, 0, 0, 124, 121, 122, 123, 125, 0,
- 122, 117, 124, 0, 121, 123, 125, 124, 0, 0,
- 137, 124, 138, 139, 121, 140, 120, 123, 137, 141,
- 138, 139, 142, 140, 125, 144, 139, 141, 0, 143,
- 142, 142, 140, 144, 145, 146, 141, 143, 148, 143,
-
- 143, 0, 145, 146, 147, 149, 148, 150, 0, 148,
- 151, 144, 147, 149, 0, 150, 150, 0, 151, 0,
- 0, 145, 146, 152, 154, 147, 153, 149, 147, 0,
- 155, 152, 154, 156, 153, 154, 151, 151, 155, 153,
- 157, 156, 158, 152, 159, 0, 161, 0, 157, 0,
- 158, 160, 159, 157, 161, 161, 162, 157, 155, 160,
- 156, 163, 160, 164, 162, 165, 0, 166, 167, 163,
- 0, 164, 159, 165, 164, 166, 167, 168, 165, 170,
- 166, 167, 163, 0, 0, 168, 169, 170, 171, 172,
- 0, 167, 168, 173, 169, 166, 171, 172, 170, 169,
-
- 174, 173, 172, 175, 0, 176, 177, 173, 174, 178,
- 0, 175, 171, 176, 177, 179, 180, 178, 176, 181,
- 174, 182, 183, 179, 180, 175, 179, 181, 180, 182,
- 183, 178, 184, 185, 186, 187, 188, 189, 190, 181,
- 184, 185, 186, 187, 188, 189, 190, 186, 191, 0,
- 182, 192, 184, 193, 0, 0, 191, 183, 194, 192,
- 0, 193, 188, 192, 195, 190, 194, 0, 185, 196,
- 197, 199, 195, 193, 195, 195, 198, 196, 197, 199,
- 194, 200, 201, 0, 198, 198, 195, 197, 205, 200,
- 201, 202, 0, 206, 196, 207, 205, 208, 209, 202,
-
- 199, 206, 201, 207, 211, 208, 209, 200, 210, 0,
- 202, 212, 211, 213, 205, 206, 210, 207, 214, 212,
- 215, 213, 216, 208, 212, 217, 214, 210, 215, 215,
- 216, 216, 218, 217, 219, 214, 220, 221, 0, 0,
- 218, 213, 219, 222, 220, 221, 221, 219, 223, 224,
- 225, 222, 0, 217, 226, 227, 223, 224, 225, 220,
- 218, 0, 226, 227, 228, 229, 224, 230, 231, 227,
- 232, 222, 228, 229, 233, 230, 231, 0, 232, 234,
- 237, 229, 233, 235, 236, 225, 238, 234, 237, 239,
- 0, 235, 236, 240, 238, 230, 241, 239, 232, 236,
-
- 242, 240, 243, 244, 241, 234, 233, 245, 242, 235,
- 243, 244, 246, 247, 238, 245, 248, 0, 249, 250,
- 246, 247, 251, 252, 248, 243, 249, 250, 254, 248,
- 251, 252, 253, 0, 0, 0, 254, 0, 246, 257,
- 253, 247, 253, 255, 0, 256, 250, 257, 251, 259,
- 252, 255, 254, 256, 255, 258, 0, 259, 256, 0,
- 260, 261, 259, 258, 258, 262, 263, 257, 260, 261,
- 0, 264, 265, 262, 263, 266, 267, 268, 261, 264,
- 265, 262, 0, 266, 267, 268, 269, 265, 270, 267,
- 268, 0, 0, 272, 269, 263, 270, 273, 274, 269,
-
- 264, 272, 275, 266, 276, 273, 274, 277, 279, 278,
- 275, 0, 276, 0, 0, 277, 279, 278, 0, 280,
- 281, 272, 278, 282, 283, 284, 274, 280, 281, 0,
- 280, 282, 283, 284, 285, 275, 279, 283, 286, 276,
- 0, 287, 285, 288, 289, 290, 286, 284, 281, 287,
- 291, 288, 289, 290, 287, 292, 0, 0, 291, 293,
- 285, 294, 295, 292, 286, 288, 289, 293, 0, 294,
- 295, 296, 297, 298, 299, 300, 293, 0, 301, 296,
- 297, 298, 299, 300, 0, 297, 301, 302, 303, 298,
- 295, 0, 301, 304, 305, 302, 303, 306, 0, 302,
-
- 307, 304, 305, 299, 308, 306, 306, 0, 307, 309,
- 310, 311, 308, 312, 0, 304, 303, 309, 310, 311,
- 313, 312, 305, 315, 0, 314, 307, 316, 313, 317,
- 318, 315, 308, 314, 314, 316, 310, 317, 318, 311,
- 316, 319, 313, 318, 315, 320, 321, 323, 324, 319,
- 325, 317, 326, 320, 321, 323, 324, 327, 325, 328,
- 326, 329, 330, 331, 332, 327, 333, 328, 319, 329,
- 330, 331, 332, 334, 333, 0, 335, 0, 0, 326,
- 336, 334, 337, 327, 335, 325, 334, 338, 336, 329,
- 337, 336, 332, 339, 340, 338, 341, 0, 343, 342,
-
- 344, 339, 340, 346, 341, 337, 343, 342, 344, 345,
- 338, 346, 0, 0, 347, 348, 349, 345, 0, 350,
- 340, 342, 347, 348, 349, 0, 348, 350, 351, 344,
- 352, 354, 0, 349, 355, 345, 351, 353, 352, 354,
- 347, 356, 355, 352, 355, 353, 353, 357, 358, 356,
- 359, 360, 0, 361, 362, 357, 358, 354, 359, 360,
- 357, 361, 362, 363, 364, 365, 366, 362, 367, 368,
- 361, 363, 364, 365, 366, 369, 367, 368, 370, 360,
- 371, 372, 0, 369, 373, 374, 370, 375, 371, 372,
- 370, 368, 373, 374, 366, 375, 367, 376, 0, 377,
-
- 378, 0, 379, 0, 374, 376, 371, 377, 378, 375,
- 379, 380, 381, 382, 383, 379, 384, 0, 0, 380,
- 381, 382, 383, 380, 384, 377, 383, 0, 385, 386,
- 387, 388, 389, 390, 391, 381, 385, 386, 387, 388,
- 389, 390, 391, 392, 388, 393, 394, 382, 396, 395,
- 397, 392, 398, 393, 394, 0, 396, 395, 397, 0,
- 398, 390, 395, 385, 397, 399, 400, 391, 401, 392,
- 402, 0, 403, 399, 400, 405, 401, 404, 402, 399,
- 403, 394, 406, 405, 0, 404, 0, 407, 408, 409,
- 406, 410, 402, 0, 404, 407, 408, 409, 413, 410,
-
- 411, 410, 414, 0, 415, 412, 413, 0, 411, 408,
- 414, 406, 415, 412, 416, 407, 0, 411, 412, 417,
- 413, 418, 416, 419, 409, 420, 0, 417, 414, 418,
- 421, 419, 0, 420, 418, 422, 0, 423, 421, 415,
- 417, 0, 0, 422, 419, 423, 0, 0, 420, 0,
- 0, 0, 0, 0, 0, 0, 421, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 422, 425,
- 425, 425, 425, 425, 425, 425, 425, 425, 426, 426,
- 426, 426, 426, 426, 426, 426, 426, 427, 427, 427,
- 427, 427, 427, 427, 427, 427, 428, 0, 428, 429,
-
- 429, 429, 430, 430, 0, 430, 431, 431, 431, 431,
- 0, 431, 431, 431, 431, 432, 432, 432, 432, 432,
- 432, 432, 432, 432, 433, 433, 433, 0, 433, 433,
- 433, 433, 433, 434, 0, 434, 434, 434, 434, 434,
- 434, 434, 435, 0, 0, 0, 0, 0, 435, 436,
- 436, 0, 436, 437, 0, 437, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
-
- 424, 424, 424, 424, 424, 424, 424, 424, 424, 424,
- 424, 424, 424
- } ;
-
-static yy_state_type yy_last_accepting_state;
-static char *yy_last_accepting_cpos;
-
-extern int yy_flex_debug;
-int yy_flex_debug = 0;
-
-/* The intent behind this definition is that it'll catch
- * any uses of REJECT which flex missed.
- */
-#define REJECT reject_used_but_not_detected
-#define yymore() yymore_used_but_not_detected
-#define YY_MORE_ADJ 0
-#define YY_RESTORE_YY_MORE_OFFSET
-char *yytext;
-#line 1 "pars0lex.l"
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-/******************************************************
-SQL parser lexical analyzer: input file for the GNU Flex lexer generator
-
-The InnoDB parser is frozen because MySQL takes care of SQL parsing.
-Therefore we normally keep the InnoDB parser C files as they are, and do
-not automatically generate them from pars0grm.y and pars0lex.l.
-
-How to make the InnoDB parser and lexer C files:
-
-1. Run ./make_flex.sh to generate lexer files.
-
-2. Run ./make_bison.sh to generate parser files.
-
-These instructions seem to work at least with bison-1.875d and flex-2.5.31 on
-Linux.
-
-Created 12/14/1997 Heikki Tuuri
-*******************************************************/
-#define YY_NO_INPUT 1
-#define YY_NO_UNISTD_H 1
-#line 53 "pars0lex.l"
-#define YYSTYPE que_node_t*
-
-#include "univ.i"
-#include "pars0pars.h"
-#include "pars0grm.h"
-#include "pars0sym.h"
-#include "mem0mem.h"
-#include "os0proc.h"
-
-#define malloc(A) ut_malloc(A)
-#define free(A) ut_free(A)
-#define realloc(P, A) ut_realloc(P, A)
-#define exit(A) ut_error
-
-/* Note: We cast &result to int* from yysize_t* */
-#define YY_INPUT(buf, result, max_size) \
- (result = pars_get_lex_chars(buf, max_size))
-
-/* String buffer for removing quotes */
-static ulint stringbuf_len_alloc = 0; /* Allocated length */
-static ulint stringbuf_len = 0; /* Current length */
-static char* stringbuf; /* Start of buffer */
-/** Appends a string to the buffer. */
-static
-void
-string_append(
-/*==========*/
- const char* str, /*!< in: string to be appended */
- ulint len) /*!< in: length of the string */
-{
- if (stringbuf == NULL) {
- stringbuf = static_cast<char*>(malloc(1));
- stringbuf_len_alloc = 1;
- }
-
- if (stringbuf_len + len > stringbuf_len_alloc) {
- while (stringbuf_len + len > stringbuf_len_alloc) {
- stringbuf_len_alloc <<= 1;
- }
-
- stringbuf = static_cast<char*>(
- realloc(stringbuf, stringbuf_len_alloc));
- }
-
- memcpy(stringbuf + stringbuf_len, str, len);
- stringbuf_len += len;
-}
-
-
-
-
-#line 1006 "lexyy.cc"
-
-#define INITIAL 0
-#define comment 1
-#define quoted 2
-#define id 3
-
-#ifndef YY_NO_UNISTD_H
-/* Special case for "unistd.h", since it is non-ANSI. We include it way
- * down here because we want the user's section 1 to have been scanned first.
- * The user has a chance to override it with an option.
- */
-#include <unistd.h>
-#endif
-
-#ifndef YY_EXTRA_TYPE
-#define YY_EXTRA_TYPE void *
-#endif
-
-static int yy_init_globals (void );
-
-/* Accessor methods to globals.
- These are made visible to non-reentrant scanners for convenience. */
-
-MY_ATTRIBUTE((unused)) static int yylex_destroy (void );
-
-int yyget_debug (void );
-
-void yyset_debug (int debug_flag );
-
-YY_EXTRA_TYPE yyget_extra (void );
-
-void yyset_extra (YY_EXTRA_TYPE user_defined );
-
-FILE *yyget_in (void );
-
-void yyset_in (FILE * in_str );
-
-FILE *yyget_out (void );
-
-void yyset_out (FILE * out_str );
-
-yy_size_t yyget_leng (void );
-
-char *yyget_text (void );
-
-int yyget_lineno (void );
-
-void yyset_lineno (int line_number );
-
-/* Macros after this point can all be overridden by user definitions in
- * section 1.
- */
-
-#ifndef YY_SKIP_YYWRAP
-#ifdef __cplusplus
-extern "C" int yywrap (void );
-#else
-extern int yywrap (void );
-#endif
-#endif
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char *,yyconst char *,int );
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * );
-#endif
-
-#ifndef YY_NO_INPUT
-
-#ifdef __cplusplus
-static int yyinput (void );
-#else
-static int input (void );
-#endif
-
-#endif
-
-/* Amount of stuff to slurp up with each read. */
-#ifndef YY_READ_BUF_SIZE
-#ifdef __ia64__
-/* On IA-64, the buffer size is 16k, not 8k */
-#define YY_READ_BUF_SIZE 16384
-#else
-#define YY_READ_BUF_SIZE 8192
-#endif /* __ia64__ */
-#endif
-
-/* Copy whatever the last rule matched to the standard output. */
-#ifndef ECHO
-/* This used to be an fputs(), but since the string might contain NUL's,
- * we now use fwrite().
- */
-#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
-#endif
-
-/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
- * is returned in "result".
- */
-#ifndef YY_INPUT
-#define YY_INPUT(buf,result,max_size) \
- if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
- { \
- int c = '*'; \
- size_t n; \
- for ( n = 0; n < max_size && \
- (c = getc( yyin )) != EOF && c != '\n'; ++n ) \
- buf[n] = (char) c; \
- if ( c == '\n' ) \
- buf[n++] = (char) c; \
- if ( c == EOF && ferror( yyin ) ) \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- result = n; \
- } \
- else \
- { \
- errno=0; \
- while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
- { \
- if( errno != EINTR) \
- { \
- YY_FATAL_ERROR( "input in flex scanner failed" ); \
- break; \
- } \
- errno=0; \
- clearerr(yyin); \
- } \
- }\
-\
-
-#endif
-
-/* No semi-colon after return; correct usage is to write "yyterminate();" -
- * we don't want an extra ';' after the "return" because that will cause
- * some compilers to complain about unreachable statements.
- */
-#ifndef yyterminate
-#define yyterminate() return YY_NULL
-#endif
-
-/* Number of entries by which start-condition stack grows. */
-#ifndef YY_START_STACK_INCR
-#define YY_START_STACK_INCR 25
-#endif
-
-/* Report a fatal error. */
-#ifndef YY_FATAL_ERROR
-#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
-#endif
-
-/* end tables serialization structures and prototypes */
-
-/* Default declaration of generated scanner - a define so the user can
- * easily add parameters.
- */
-#ifndef YY_DECL
-#define YY_DECL_IS_OURS 1
-
-extern int yylex (void);
-
-#define YY_DECL int yylex (void)
-#endif /* !YY_DECL */
-
-/* Code executed at the beginning of each rule, after yytext and yyleng
- * have been set up.
- */
-#ifndef YY_USER_ACTION
-#define YY_USER_ACTION
-#endif
-
-/* Code executed at the end of each rule. */
-#ifndef YY_BREAK
-#define YY_BREAK break;
-#endif
-
-#define YY_RULE_SETUP \
- YY_USER_ACTION
-
-/** The main scanner function which does all the work.
- */
-YY_DECL
-{
- register yy_state_type yy_current_state;
- register char *yy_cp, *yy_bp;
- register int yy_act;
-
-#line 112 "pars0lex.l"
-
-
-#line 1197 "lexyy.cc"
-
- if ( !(yy_init) )
- {
- (yy_init) = 1;
-
-#ifdef YY_USER_INIT
- YY_USER_INIT;
-#endif
-
- if ( ! (yy_start) )
- (yy_start) = 1; /* first start state */
-
- if ( ! yyin )
- yyin = stdin;
-
- if ( ! yyout )
- yyout = stdout;
-
- if ( ! YY_CURRENT_BUFFER ) {
- yyensure_buffer_stack ();
- YY_CURRENT_BUFFER_LVALUE =
- yy_create_buffer(yyin,YY_BUF_SIZE );
- }
-
- yy_load_buffer_state( );
- }
-
- while ( 1 ) /* loops until end-of-file is reached */
- {
- yy_cp = (yy_c_buf_p);
-
- /* Support of yytext. */
- *yy_cp = (yy_hold_char);
-
- /* yy_bp points to the position in yy_ch_buf of the start of
- * the current run.
- */
- yy_bp = yy_cp;
-
- yy_current_state = (yy_start);
-yy_match:
- do
- {
- register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)];
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 425 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- ++yy_cp;
- }
- while ( yy_current_state != 424 );
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
-
-yy_find_action:
- yy_act = yy_accept[yy_current_state];
-
- YY_DO_BEFORE_ACTION;
-
-do_action: /* This label is used only to access EOF actions. */
-
- switch ( yy_act )
- { /* beginning of action switch */
- case 0: /* must back up */
- /* undo the effects of YY_DO_BEFORE_ACTION */
- *yy_cp = (yy_hold_char);
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
- goto yy_find_action;
-
-case 1:
-YY_RULE_SETUP
-#line 114 "pars0lex.l"
-{
- yylval = sym_tab_add_int_lit(pars_sym_tab_global,
- atoi(yytext));
- return(PARS_INT_LIT);
-}
- YY_BREAK
-case 2:
-YY_RULE_SETUP
-#line 120 "pars0lex.l"
-{
- ut_error; /* not implemented */
-
- return(PARS_FLOAT_LIT);
-}
- YY_BREAK
-case 3:
-YY_RULE_SETUP
-#line 126 "pars0lex.l"
-{
- ulint type;
-
- yylval = sym_tab_add_bound_lit(pars_sym_tab_global,
- yytext + 1, &type);
-
- return((int) type);
-}
- YY_BREAK
-case 4:
-YY_RULE_SETUP
-#line 135 "pars0lex.l"
-{
- yylval = sym_tab_add_bound_id(pars_sym_tab_global,
- yytext + 1);
-
- return(PARS_ID_TOKEN);
-}
- YY_BREAK
-case 5:
-YY_RULE_SETUP
-#line 142 "pars0lex.l"
-{
-/* Quoted character string literals are handled in an explicit
-start state 'quoted'. This state is entered and the buffer for
-the scanned string is emptied upon encountering a starting quote.
-
-In the state 'quoted', only two actions are possible (defined below). */
- BEGIN(quoted);
- stringbuf_len = 0;
-}
- YY_BREAK
-case 6:
-/* rule 6 can match eol */
-YY_RULE_SETUP
-#line 151 "pars0lex.l"
-{
- /* Got a sequence of characters other than "'":
- append to string buffer */
- string_append(yytext, yyleng);
-}
- YY_BREAK
-case 7:
-YY_RULE_SETUP
-#line 156 "pars0lex.l"
-{
- /* Got a sequence of "'" characters:
- append half of them to string buffer,
- as "''" represents a single "'".
- We apply truncating division,
- so that "'''" will result in "'". */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- string literal. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_str_lit(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
- return(PARS_STR_LIT);
- }
-}
- YY_BREAK
-case 8:
-YY_RULE_SETUP
-#line 180 "pars0lex.l"
-{
-/* Quoted identifiers are handled in an explicit start state 'id'.
-This state is entered and the buffer for the scanned string is emptied
-upon encountering a starting quote.
-
-In the state 'id', only two actions are possible (defined below). */
- BEGIN(id);
- stringbuf_len = 0;
-}
- YY_BREAK
-case 9:
-/* rule 9 can match eol */
-YY_RULE_SETUP
-#line 189 "pars0lex.l"
-{
- /* Got a sequence of characters other than '"':
- append to string buffer */
- string_append(yytext, yyleng);
-}
- YY_BREAK
-case 10:
-YY_RULE_SETUP
-#line 194 "pars0lex.l"
-{
- /* Got a sequence of '"' characters:
- append half of them to string buffer,
- as '""' represents a single '"'.
- We apply truncating division,
- so that '"""' will result in '"'. */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- identifier. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_id(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
-
- return(PARS_ID_TOKEN);
- }
-}
- YY_BREAK
-case 11:
-YY_RULE_SETUP
-#line 219 "pars0lex.l"
-{
- yylval = sym_tab_add_null_lit(pars_sym_tab_global);
-
- return(PARS_NULL_LIT);
-}
- YY_BREAK
-case 12:
-YY_RULE_SETUP
-#line 225 "pars0lex.l"
-{
- /* Implicit cursor name */
- yylval = sym_tab_add_str_lit(pars_sym_tab_global,
- (byte*) yytext, yyleng);
- return(PARS_SQL_TOKEN);
-}
- YY_BREAK
-case 13:
-YY_RULE_SETUP
-#line 232 "pars0lex.l"
-{
- return(PARS_AND_TOKEN);
-}
- YY_BREAK
-case 14:
-YY_RULE_SETUP
-#line 236 "pars0lex.l"
-{
- return(PARS_OR_TOKEN);
-}
- YY_BREAK
-case 15:
-YY_RULE_SETUP
-#line 240 "pars0lex.l"
-{
- return(PARS_NOT_TOKEN);
-}
- YY_BREAK
-case 16:
-YY_RULE_SETUP
-#line 244 "pars0lex.l"
-{
- return(PARS_PROCEDURE_TOKEN);
-}
- YY_BREAK
-case 17:
-YY_RULE_SETUP
-#line 248 "pars0lex.l"
-{
- return(PARS_IN_TOKEN);
-}
- YY_BREAK
-case 18:
-YY_RULE_SETUP
-#line 252 "pars0lex.l"
-{
- return(PARS_OUT_TOKEN);
-}
- YY_BREAK
-case 19:
-YY_RULE_SETUP
-#line 256 "pars0lex.l"
-{
- return(PARS_BINARY_TOKEN);
-}
- YY_BREAK
-case 20:
-YY_RULE_SETUP
-#line 260 "pars0lex.l"
-{
- return(PARS_BLOB_TOKEN);
-}
- YY_BREAK
-case 21:
-YY_RULE_SETUP
-#line 264 "pars0lex.l"
-{
- return(PARS_INT_TOKEN);
-}
- YY_BREAK
-case 22:
-YY_RULE_SETUP
-#line 268 "pars0lex.l"
-{
- return(PARS_INT_TOKEN);
-}
- YY_BREAK
-case 23:
-YY_RULE_SETUP
-#line 272 "pars0lex.l"
-{
- return(PARS_FLOAT_TOKEN);
-}
- YY_BREAK
-case 24:
-YY_RULE_SETUP
-#line 276 "pars0lex.l"
-{
- return(PARS_CHAR_TOKEN);
-}
- YY_BREAK
-case 25:
-YY_RULE_SETUP
-#line 280 "pars0lex.l"
-{
- return(PARS_IS_TOKEN);
-}
- YY_BREAK
-case 26:
-YY_RULE_SETUP
-#line 284 "pars0lex.l"
-{
- return(PARS_BEGIN_TOKEN);
-}
- YY_BREAK
-case 27:
-YY_RULE_SETUP
-#line 288 "pars0lex.l"
-{
- return(PARS_END_TOKEN);
-}
- YY_BREAK
-case 28:
-YY_RULE_SETUP
-#line 292 "pars0lex.l"
-{
- return(PARS_IF_TOKEN);
-}
- YY_BREAK
-case 29:
-YY_RULE_SETUP
-#line 296 "pars0lex.l"
-{
- return(PARS_THEN_TOKEN);
-}
- YY_BREAK
-case 30:
-YY_RULE_SETUP
-#line 300 "pars0lex.l"
-{
- return(PARS_ELSE_TOKEN);
-}
- YY_BREAK
-case 31:
-YY_RULE_SETUP
-#line 304 "pars0lex.l"
-{
- return(PARS_ELSIF_TOKEN);
-}
- YY_BREAK
-case 32:
-YY_RULE_SETUP
-#line 308 "pars0lex.l"
-{
- return(PARS_LOOP_TOKEN);
-}
- YY_BREAK
-case 33:
-YY_RULE_SETUP
-#line 312 "pars0lex.l"
-{
- return(PARS_WHILE_TOKEN);
-}
- YY_BREAK
-case 34:
-YY_RULE_SETUP
-#line 316 "pars0lex.l"
-{
- return(PARS_RETURN_TOKEN);
-}
- YY_BREAK
-case 35:
-YY_RULE_SETUP
-#line 320 "pars0lex.l"
-{
- return(PARS_SELECT_TOKEN);
-}
- YY_BREAK
-case 36:
-YY_RULE_SETUP
-#line 324 "pars0lex.l"
-{
- return(PARS_SUM_TOKEN);
-}
- YY_BREAK
-case 37:
-YY_RULE_SETUP
-#line 328 "pars0lex.l"
-{
- return(PARS_COUNT_TOKEN);
-}
- YY_BREAK
-case 38:
-YY_RULE_SETUP
-#line 332 "pars0lex.l"
-{
- return(PARS_DISTINCT_TOKEN);
-}
- YY_BREAK
-case 39:
-YY_RULE_SETUP
-#line 336 "pars0lex.l"
-{
- return(PARS_FROM_TOKEN);
-}
- YY_BREAK
-case 40:
-YY_RULE_SETUP
-#line 340 "pars0lex.l"
-{
- return(PARS_WHERE_TOKEN);
-}
- YY_BREAK
-case 41:
-YY_RULE_SETUP
-#line 344 "pars0lex.l"
-{
- return(PARS_FOR_TOKEN);
-}
- YY_BREAK
-case 42:
-YY_RULE_SETUP
-#line 348 "pars0lex.l"
-{
- return(PARS_READ_TOKEN);
-}
- YY_BREAK
-case 43:
-YY_RULE_SETUP
-#line 352 "pars0lex.l"
-{
- return(PARS_ORDER_TOKEN);
-}
- YY_BREAK
-case 44:
-YY_RULE_SETUP
-#line 356 "pars0lex.l"
-{
- return(PARS_BY_TOKEN);
-}
- YY_BREAK
-case 45:
-YY_RULE_SETUP
-#line 360 "pars0lex.l"
-{
- return(PARS_ASC_TOKEN);
-}
- YY_BREAK
-case 46:
-YY_RULE_SETUP
-#line 364 "pars0lex.l"
-{
- return(PARS_DESC_TOKEN);
-}
- YY_BREAK
-case 47:
-YY_RULE_SETUP
-#line 368 "pars0lex.l"
-{
- return(PARS_INSERT_TOKEN);
-}
- YY_BREAK
-case 48:
-YY_RULE_SETUP
-#line 372 "pars0lex.l"
-{
- return(PARS_INTO_TOKEN);
-}
- YY_BREAK
-case 49:
-YY_RULE_SETUP
-#line 376 "pars0lex.l"
-{
- return(PARS_VALUES_TOKEN);
-}
- YY_BREAK
-case 50:
-YY_RULE_SETUP
-#line 380 "pars0lex.l"
-{
- return(PARS_UPDATE_TOKEN);
-}
- YY_BREAK
-case 51:
-YY_RULE_SETUP
-#line 384 "pars0lex.l"
-{
- return(PARS_SET_TOKEN);
-}
- YY_BREAK
-case 52:
-YY_RULE_SETUP
-#line 388 "pars0lex.l"
-{
- return(PARS_DELETE_TOKEN);
-}
- YY_BREAK
-case 53:
-YY_RULE_SETUP
-#line 392 "pars0lex.l"
-{
- return(PARS_CURRENT_TOKEN);
-}
- YY_BREAK
-case 54:
-YY_RULE_SETUP
-#line 396 "pars0lex.l"
-{
- return(PARS_OF_TOKEN);
-}
- YY_BREAK
-case 55:
-YY_RULE_SETUP
-#line 400 "pars0lex.l"
-{
- return(PARS_CREATE_TOKEN);
-}
- YY_BREAK
-case 56:
-YY_RULE_SETUP
-#line 404 "pars0lex.l"
-{
- return(PARS_TABLE_TOKEN);
-}
- YY_BREAK
-case 57:
-YY_RULE_SETUP
-#line 408 "pars0lex.l"
-{
- return(PARS_COMPACT_TOKEN);
-}
- YY_BREAK
-case 58:
-YY_RULE_SETUP
-#line 412 "pars0lex.l"
-{
- return(PARS_BLOCK_SIZE_TOKEN);
-}
- YY_BREAK
-case 59:
-YY_RULE_SETUP
-#line 416 "pars0lex.l"
-{
- return(PARS_INDEX_TOKEN);
-}
- YY_BREAK
-case 60:
-YY_RULE_SETUP
-#line 420 "pars0lex.l"
-{
- return(PARS_UNIQUE_TOKEN);
-}
- YY_BREAK
-case 61:
-YY_RULE_SETUP
-#line 424 "pars0lex.l"
-{
- return(PARS_CLUSTERED_TOKEN);
-}
- YY_BREAK
-case 62:
-YY_RULE_SETUP
-#line 428 "pars0lex.l"
-{
- return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
-}
- YY_BREAK
-case 63:
-YY_RULE_SETUP
-#line 432 "pars0lex.l"
-{
- return(PARS_ON_TOKEN);
-}
- YY_BREAK
-case 64:
-YY_RULE_SETUP
-#line 436 "pars0lex.l"
-{
- return(PARS_DECLARE_TOKEN);
-}
- YY_BREAK
-case 65:
-YY_RULE_SETUP
-#line 440 "pars0lex.l"
-{
- return(PARS_CURSOR_TOKEN);
-}
- YY_BREAK
-case 66:
-YY_RULE_SETUP
-#line 444 "pars0lex.l"
-{
- return(PARS_OPEN_TOKEN);
-}
- YY_BREAK
-case 67:
-YY_RULE_SETUP
-#line 448 "pars0lex.l"
-{
- return(PARS_FETCH_TOKEN);
-}
- YY_BREAK
-case 68:
-YY_RULE_SETUP
-#line 452 "pars0lex.l"
-{
- return(PARS_CLOSE_TOKEN);
-}
- YY_BREAK
-case 69:
-YY_RULE_SETUP
-#line 456 "pars0lex.l"
-{
- return(PARS_NOTFOUND_TOKEN);
-}
- YY_BREAK
-case 70:
-YY_RULE_SETUP
-#line 460 "pars0lex.l"
-{
- return(PARS_TO_CHAR_TOKEN);
-}
- YY_BREAK
-case 71:
-YY_RULE_SETUP
-#line 464 "pars0lex.l"
-{
- return(PARS_TO_NUMBER_TOKEN);
-}
- YY_BREAK
-case 72:
-YY_RULE_SETUP
-#line 468 "pars0lex.l"
-{
- return(PARS_TO_BINARY_TOKEN);
-}
- YY_BREAK
-case 73:
-YY_RULE_SETUP
-#line 472 "pars0lex.l"
-{
- return(PARS_BINARY_TO_NUMBER_TOKEN);
-}
- YY_BREAK
-case 74:
-YY_RULE_SETUP
-#line 476 "pars0lex.l"
-{
- return(PARS_SUBSTR_TOKEN);
-}
- YY_BREAK
-case 75:
-YY_RULE_SETUP
-#line 480 "pars0lex.l"
-{
- return(PARS_REPLSTR_TOKEN);
-}
- YY_BREAK
-case 76:
-YY_RULE_SETUP
-#line 484 "pars0lex.l"
-{
- return(PARS_CONCAT_TOKEN);
-}
- YY_BREAK
-case 77:
-YY_RULE_SETUP
-#line 488 "pars0lex.l"
-{
- return(PARS_INSTR_TOKEN);
-}
- YY_BREAK
-case 78:
-YY_RULE_SETUP
-#line 492 "pars0lex.l"
-{
- return(PARS_LENGTH_TOKEN);
-}
- YY_BREAK
-case 79:
-YY_RULE_SETUP
-#line 496 "pars0lex.l"
-{
- return(PARS_SYSDATE_TOKEN);
-}
- YY_BREAK
-case 80:
-YY_RULE_SETUP
-#line 500 "pars0lex.l"
-{
- return(PARS_PRINTF_TOKEN);
-}
- YY_BREAK
-case 81:
-YY_RULE_SETUP
-#line 504 "pars0lex.l"
-{
- return(PARS_ASSERT_TOKEN);
-}
- YY_BREAK
-case 82:
-YY_RULE_SETUP
-#line 508 "pars0lex.l"
-{
- return(PARS_RND_TOKEN);
-}
- YY_BREAK
-case 83:
-YY_RULE_SETUP
-#line 512 "pars0lex.l"
-{
- return(PARS_RND_STR_TOKEN);
-}
- YY_BREAK
-case 84:
-YY_RULE_SETUP
-#line 516 "pars0lex.l"
-{
- return(PARS_ROW_PRINTF_TOKEN);
-}
- YY_BREAK
-case 85:
-YY_RULE_SETUP
-#line 520 "pars0lex.l"
-{
- return(PARS_COMMIT_TOKEN);
-}
- YY_BREAK
-case 86:
-YY_RULE_SETUP
-#line 524 "pars0lex.l"
-{
- return(PARS_ROLLBACK_TOKEN);
-}
- YY_BREAK
-case 87:
-YY_RULE_SETUP
-#line 528 "pars0lex.l"
-{
- return(PARS_WORK_TOKEN);
-}
- YY_BREAK
-case 88:
-YY_RULE_SETUP
-#line 532 "pars0lex.l"
-{
- return(PARS_UNSIGNED_TOKEN);
-}
- YY_BREAK
-case 89:
-YY_RULE_SETUP
-#line 536 "pars0lex.l"
-{
- return(PARS_EXIT_TOKEN);
-}
- YY_BREAK
-case 90:
-YY_RULE_SETUP
-#line 540 "pars0lex.l"
-{
- return(PARS_FUNCTION_TOKEN);
-}
- YY_BREAK
-case 91:
-YY_RULE_SETUP
-#line 544 "pars0lex.l"
-{
- return(PARS_LOCK_TOKEN);
-}
- YY_BREAK
-case 92:
-YY_RULE_SETUP
-#line 548 "pars0lex.l"
-{
- return(PARS_SHARE_TOKEN);
-}
- YY_BREAK
-case 93:
-YY_RULE_SETUP
-#line 552 "pars0lex.l"
-{
- return(PARS_MODE_TOKEN);
-}
- YY_BREAK
-case 94:
-YY_RULE_SETUP
-#line 556 "pars0lex.l"
-{
- return(PARS_LIKE_TOKEN);
-}
- YY_BREAK
-case 95:
-YY_RULE_SETUP
-#line 560 "pars0lex.l"
-{
- return(PARS_BIGINT_TOKEN);
-}
- YY_BREAK
-case 96:
-YY_RULE_SETUP
-#line 564 "pars0lex.l"
-{
- yylval = sym_tab_add_id(pars_sym_tab_global,
- (byte*) yytext,
- ut_strlen(yytext));
- return(PARS_ID_TOKEN);
-}
- YY_BREAK
-case 97:
-YY_RULE_SETUP
-#line 571 "pars0lex.l"
-{
- yylval = sym_tab_add_id(pars_sym_tab_global,
- (byte*) yytext,
- ut_strlen(yytext));
- return(PARS_TABLE_NAME_TOKEN);
-}
- YY_BREAK
-case 98:
-YY_RULE_SETUP
-#line 578 "pars0lex.l"
-{
- return(PARS_DDOT_TOKEN);
-}
- YY_BREAK
-case 99:
-YY_RULE_SETUP
-#line 582 "pars0lex.l"
-{
- return(PARS_ASSIGN_TOKEN);
-}
- YY_BREAK
-case 100:
-YY_RULE_SETUP
-#line 586 "pars0lex.l"
-{
- return(PARS_LE_TOKEN);
-}
- YY_BREAK
-case 101:
-YY_RULE_SETUP
-#line 590 "pars0lex.l"
-{
- return(PARS_GE_TOKEN);
-}
- YY_BREAK
-case 102:
-YY_RULE_SETUP
-#line 594 "pars0lex.l"
-{
- return(PARS_NE_TOKEN);
-}
- YY_BREAK
-case 103:
-YY_RULE_SETUP
-#line 598 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 104:
-YY_RULE_SETUP
-#line 603 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 105:
-YY_RULE_SETUP
-#line 608 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 106:
-YY_RULE_SETUP
-#line 613 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 107:
-YY_RULE_SETUP
-#line 618 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 108:
-YY_RULE_SETUP
-#line 623 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 109:
-YY_RULE_SETUP
-#line 628 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 110:
-YY_RULE_SETUP
-#line 633 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 111:
-YY_RULE_SETUP
-#line 638 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 112:
-YY_RULE_SETUP
-#line 643 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 113:
-YY_RULE_SETUP
-#line 648 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 114:
-YY_RULE_SETUP
-#line 653 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 115:
-YY_RULE_SETUP
-#line 658 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 116:
-YY_RULE_SETUP
-#line 663 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 117:
-YY_RULE_SETUP
-#line 668 "pars0lex.l"
-{
-
- return((int)(*yytext));
-}
- YY_BREAK
-case 118:
-YY_RULE_SETUP
-#line 673 "pars0lex.l"
-BEGIN(comment); /* eat up comment */
- YY_BREAK
-case 119:
-/* rule 119 can match eol */
-YY_RULE_SETUP
-#line 675 "pars0lex.l"
-
- YY_BREAK
-case 120:
-/* rule 120 can match eol */
-YY_RULE_SETUP
-#line 676 "pars0lex.l"
-
- YY_BREAK
-case 121:
-YY_RULE_SETUP
-#line 677 "pars0lex.l"
-BEGIN(INITIAL);
- YY_BREAK
-case 122:
-/* rule 122 can match eol */
-YY_RULE_SETUP
-#line 679 "pars0lex.l"
-/* eat up whitespace */
- YY_BREAK
-case 123:
-YY_RULE_SETUP
-#line 682 "pars0lex.l"
-{
- fprintf(stderr,"Unrecognized character: %02x\n",
- *yytext);
-
- ut_error;
-
- return(0);
-}
- YY_BREAK
-case 124:
-YY_RULE_SETUP
-#line 691 "pars0lex.l"
-YY_FATAL_ERROR( "flex scanner jammed" );
- YY_BREAK
-#line 2237 "lexyy.cc"
-case YY_STATE_EOF(INITIAL):
-case YY_STATE_EOF(comment):
-case YY_STATE_EOF(quoted):
-case YY_STATE_EOF(id):
- yyterminate();
-
- case YY_END_OF_BUFFER:
- {
- /* Amount of text matched not including the EOB char. */
- int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
-
- /* Undo the effects of YY_DO_BEFORE_ACTION. */
- *yy_cp = (yy_hold_char);
- YY_RESTORE_YY_MORE_OFFSET
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
- {
- /* We're scanning a new file or input source. It's
- * possible that this happened because the user
- * just pointed yyin at a new source and called
- * yylex(). If so, then we have to assure
- * consistency between YY_CURRENT_BUFFER and our
- * globals. Here is the right place to do so, because
- * this is the first action (other than possibly a
- * back-up) that will match for the new input source.
- */
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
- }
-
- /* Note that here we test for yy_c_buf_p "<=" to the position
- * of the first EOB in the buffer, since yy_c_buf_p will
- * already have been incremented past the NUL character
- * (since all states make transitions on EOB to the
- * end-of-buffer state). Contrast this with the test
- * in input().
- */
- if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
- { /* This was really a NUL. */
- yy_state_type yy_next_state;
-
- (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( );
-
- /* Okay, we're now positioned to make the NUL
- * transition. We couldn't have
- * yy_get_previous_state() go ahead and do it
- * for us because it doesn't know how to deal
- * with the possibility of jamming (and we don't
- * want to build jamming into it because then it
- * will run more slowly).
- */
-
- yy_next_state = yy_try_NUL_trans( yy_current_state );
-
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
-
- if ( yy_next_state )
- {
- /* Consume the NUL. */
- yy_cp = ++(yy_c_buf_p);
- yy_current_state = yy_next_state;
- goto yy_match;
- }
-
- else
- {
- yy_cp = (yy_last_accepting_cpos);
- yy_current_state = (yy_last_accepting_state);
- goto yy_find_action;
- }
- }
-
- else switch ( yy_get_next_buffer( ) )
- {
- case EOB_ACT_END_OF_FILE:
- {
- (yy_did_buffer_switch_on_eof) = 0;
-
- if ( yywrap( ) )
- {
- /* Note: because we've taken care in
- * yy_get_next_buffer() to have set up
- * yytext, we can now set up
- * yy_c_buf_p so that if some total
- * hoser (like flex itself) wants to
- * call the scanner after we return the
- * YY_NULL, it'll still work - another
- * YY_NULL will get returned.
- */
- (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
-
- yy_act = YY_STATE_EOF(YY_START);
- goto do_action;
- }
-
- else
- {
- if ( ! (yy_did_buffer_switch_on_eof) )
- YY_NEW_FILE;
- }
- break;
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- (yy_c_buf_p) =
- (yytext_ptr) + yy_amount_of_matched_text;
-
- yy_current_state = yy_get_previous_state( );
-
- yy_cp = (yy_c_buf_p);
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
- goto yy_match;
-
- case EOB_ACT_LAST_MATCH:
- (yy_c_buf_p) =
- &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
-
- yy_current_state = yy_get_previous_state( );
-
- yy_cp = (yy_c_buf_p);
- yy_bp = (yytext_ptr) + YY_MORE_ADJ;
- goto yy_find_action;
- }
- break;
- }
-
- default:
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--no action found" );
- } /* end of action switch */
- } /* end of scanning one token */
-} /* end of yylex */
-
-/* yy_get_next_buffer - try to read in a new buffer
- *
- * Returns a code representing an action:
- * EOB_ACT_LAST_MATCH -
- * EOB_ACT_CONTINUE_SCAN - continue scanning from current position
- * EOB_ACT_END_OF_FILE - end of file
- */
-static int yy_get_next_buffer (void)
-{
- register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
- register char *source = (yytext_ptr);
- register int number_to_move, i;
- int ret_val;
-
- if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
- YY_FATAL_ERROR(
- "fatal flex scanner internal error--end of buffer missed" );
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
- { /* Don't try to fill the buffer, so this is an EOF. */
- if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
- {
- /* We matched a single character, the EOB, so
- * treat this as a final EOF.
- */
- return EOB_ACT_END_OF_FILE;
- }
-
- else
- {
- /* We matched some text prior to the EOB, first
- * process it.
- */
- return EOB_ACT_LAST_MATCH;
- }
- }
-
- /* Try to read more data. */
-
- /* First move last chars to start of buffer. */
- number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1;
-
- for ( i = 0; i < number_to_move; ++i )
- *(dest++) = *(source++);
-
- if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
- /* don't do the read, it's not guaranteed to return an EOF,
- * just force an EOF
- */
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
-
- else
- {
- int num_to_read = static_cast<int>(
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1);
-
- while ( num_to_read <= 0 )
- { /* Not enough room in the buffer - grow it. */
-
- /* just a shorter name for the current buffer */
- YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
-
- int yy_c_buf_p_offset =
- (int) ((yy_c_buf_p) - b->yy_ch_buf);
-
- if ( b->yy_is_our_buffer )
- {
- int new_size = static_cast<int>(b->yy_buf_size * 2);
-
- if ( new_size <= 0 )
- b->yy_buf_size += b->yy_buf_size / 8;
- else
- b->yy_buf_size *= 2;
-
- b->yy_ch_buf = (char *)
- /* Include room in for 2 EOB chars. */
- yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 );
- }
- else
- /* Can't grow it, we don't own it. */
- b->yy_ch_buf = 0;
-
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR(
- "fatal error - scanner input buffer overflow" );
-
- (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
-
- num_to_read = static_cast<int>(
- YY_CURRENT_BUFFER_LVALUE->yy_buf_size
- - number_to_move - 1);
-
- }
-
- if ( num_to_read > YY_READ_BUF_SIZE )
- num_to_read = YY_READ_BUF_SIZE;
-
- /* Read in more data. */
- YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- (yy_n_chars), (size_t) num_to_read );
-
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- if ( (yy_n_chars) == 0 )
- {
- if ( number_to_move == YY_MORE_ADJ )
- {
- ret_val = EOB_ACT_END_OF_FILE;
- yyrestart(yyin );
- }
-
- else
- {
- ret_val = EOB_ACT_LAST_MATCH;
- YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
- YY_BUFFER_EOF_PENDING;
- }
- }
-
- else
- ret_val = EOB_ACT_CONTINUE_SCAN;
-
- if ((yy_size_t) ((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
- /* Extend the array by 50%, plus the number we really need. */
- yy_size_t new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size );
- if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
- }
-
- (yy_n_chars) += number_to_move;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
- YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
-
- (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
-
- return ret_val;
-}
-
-/* yy_get_previous_state - get the state just before the EOB char was reached */
-
- yy_state_type yy_get_previous_state (void)
-{
- register yy_state_type yy_current_state;
- register char *yy_cp;
-
- yy_current_state = (yy_start);
-
- for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
- {
- register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1);
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 425 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- }
-
- return yy_current_state;
-}
-
-/* yy_try_NUL_trans - try to make a transition on the NUL character
- *
- * synopsis
- * next_state = yy_try_NUL_trans( current_state );
- */
- static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
-{
- register int yy_is_jam;
- register char *yy_cp = (yy_c_buf_p);
-
- register YY_CHAR yy_c = 1;
- if ( yy_accept[yy_current_state] )
- {
- (yy_last_accepting_state) = yy_current_state;
- (yy_last_accepting_cpos) = yy_cp;
- }
- while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state )
- {
- yy_current_state = (int) yy_def[yy_current_state];
- if ( yy_current_state >= 425 )
- yy_c = yy_meta[(unsigned int) yy_c];
- }
- yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
- yy_is_jam = (yy_current_state == 424);
-
- return yy_is_jam ? 0 : yy_current_state;
-}
-
-#ifndef YY_NO_INPUT
-#ifdef __cplusplus
- static int yyinput (void)
-#else
- static int input (void)
-#endif
-
-{
- int c;
-
- *(yy_c_buf_p) = (yy_hold_char);
-
- if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
- {
- /* yy_c_buf_p now points to the character we want to return.
- * If this occurs *before* the EOB characters, then it's a
- * valid NUL; if not, then we've hit the end of the buffer.
- */
- if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
- /* This was really a NUL. */
- *(yy_c_buf_p) = '\0';
-
- else
- { /* need more input */
- int offset = (int)((yy_c_buf_p) - (yytext_ptr));
- ++(yy_c_buf_p);
-
- switch ( yy_get_next_buffer( ) )
- {
- case EOB_ACT_LAST_MATCH:
- /* This happens because yy_g_n_b()
- * sees that we've accumulated a
- * token and flags that we need to
- * try matching the token before
- * proceeding. But for input(),
- * there's no matching to consider.
- * So convert the EOB_ACT_LAST_MATCH
- * to EOB_ACT_END_OF_FILE.
- */
-
- /* Reset buffer status. */
- yyrestart(yyin );
-
- /*FALLTHROUGH*/
-
- case EOB_ACT_END_OF_FILE:
- {
- if ( yywrap( ) )
- return EOF;
-
- if ( ! (yy_did_buffer_switch_on_eof) )
- YY_NEW_FILE;
-#ifdef __cplusplus
- return yyinput();
-#else
- return input();
-#endif
- }
-
- case EOB_ACT_CONTINUE_SCAN:
- (yy_c_buf_p) = (yytext_ptr) + offset;
- break;
- }
- }
- }
-
- c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
- *(yy_c_buf_p) = '\0'; /* preserve yytext */
- (yy_hold_char) = *++(yy_c_buf_p);
-
- return c;
-}
-#endif /* ifndef YY_NO_INPUT */
-
-/** Immediately switch to a different input stream.
- * @param input_file A readable stream.
- *
- * @note This function does not reset the start condition to @c INITIAL .
- */
- void yyrestart (FILE * input_file )
-{
-
- if ( ! YY_CURRENT_BUFFER ){
- yyensure_buffer_stack ();
- YY_CURRENT_BUFFER_LVALUE =
- yy_create_buffer(yyin,YY_BUF_SIZE );
- }
-
- yy_init_buffer(YY_CURRENT_BUFFER,input_file );
- yy_load_buffer_state( );
-}
-
-/** Switch to a different input buffer.
- * @param new_buffer The new input buffer.
- *
- */
- MY_ATTRIBUTE((unused)) static void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
-{
-
- /* TODO. We should be able to replace this entire function body
- * with
- * yypop_buffer_state();
- * yypush_buffer_state(new_buffer);
- */
- yyensure_buffer_stack ();
- if ( YY_CURRENT_BUFFER == new_buffer )
- return;
-
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *(yy_c_buf_p) = (yy_hold_char);
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
- yy_load_buffer_state( );
-
- /* We don't actually know whether we did this switch during
- * EOF (yywrap()) processing, but the only time this flag
- * is looked at is after yywrap() is called, so it's safe
- * to go ahead and always set it.
- */
- (yy_did_buffer_switch_on_eof) = 1;
-}
-
-static void yy_load_buffer_state (void)
-{
- (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
- (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
- yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
- (yy_hold_char) = *(yy_c_buf_p);
-}
-
-/** Allocate and initialize an input buffer state.
- * @param file A readable stream.
- * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
- *
- * @return the allocated buffer state.
- */
- static YY_BUFFER_STATE yy_create_buffer (FILE * file, int size )
-{
- YY_BUFFER_STATE b;
-
- b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) );
- if ( ! b )
- YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
- b->yy_buf_size = size;
-
- /* yy_ch_buf has to be 2 characters longer than the size given because
- * we need to put in 2 end-of-buffer characters.
- */
- b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 );
- if ( ! b->yy_ch_buf )
- YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
-
- b->yy_is_our_buffer = 1;
-
- yy_init_buffer(b,file );
-
- return b;
-}
-
-/** Destroy the buffer.
- * @param b a buffer created with yy_create_buffer()
- *
- */
- void yy_delete_buffer (YY_BUFFER_STATE b )
-{
-
- if ( ! b )
- return;
-
- if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
- YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
-
- if ( b->yy_is_our_buffer )
- yyfree((void *) b->yy_ch_buf );
-
- yyfree((void *) b );
-}
-
-/* Initializes or reinitializes a buffer.
- * This function is sometimes called more than once on the same buffer,
- * such as during a yyrestart() or at EOF.
- */
- static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file )
-
-{
- int oerrno = errno;
-
- yy_flush_buffer(b );
-
- b->yy_input_file = file;
- b->yy_fill_buffer = 1;
-
- /* If b is the current buffer, then yy_init_buffer was _probably_
- * called from yyrestart() or through yy_get_next_buffer.
- * In that case, we don't want to reset the lineno or column.
- */
- if (b != YY_CURRENT_BUFFER){
- b->yy_bs_lineno = 1;
- b->yy_bs_column = 0;
- }
-
- b->yy_is_interactive = 0;
-
- errno = oerrno;
-}
-
-/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
- * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
- *
- */
- void yy_flush_buffer (YY_BUFFER_STATE b )
-{
- if ( ! b )
- return;
-
- b->yy_n_chars = 0;
-
- /* We always need two end-of-buffer characters. The first causes
- * a transition to the end-of-buffer state. The second causes
- * a jam in that state.
- */
- b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
- b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
-
- b->yy_buf_pos = &b->yy_ch_buf[0];
-
- b->yy_at_bol = 1;
- b->yy_buffer_status = YY_BUFFER_NEW;
-
- if ( b == YY_CURRENT_BUFFER )
- yy_load_buffer_state( );
-}
-
-/** Pushes the new state onto the stack. The new state becomes
- * the current state. This function will allocate the stack
- * if necessary.
- * @param new_buffer The new state.
- *
- */
-void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
-{
- if (new_buffer == NULL)
- return;
-
- yyensure_buffer_stack();
-
- /* This block is copied from yy_switch_to_buffer. */
- if ( YY_CURRENT_BUFFER )
- {
- /* Flush out information for old buffer. */
- *(yy_c_buf_p) = (yy_hold_char);
- YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
- YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
- }
-
- /* Only push if top exists. Otherwise, replace top. */
- if (YY_CURRENT_BUFFER)
- (yy_buffer_stack_top)++;
- YY_CURRENT_BUFFER_LVALUE = new_buffer;
-
- /* copied from yy_switch_to_buffer. */
- yy_load_buffer_state( );
- (yy_did_buffer_switch_on_eof) = 1;
-}
-
-/** Removes and deletes the top of the stack, if present.
- * The next element becomes the new top.
- *
- */
-void yypop_buffer_state (void)
-{
- if (!YY_CURRENT_BUFFER)
- return;
-
- yy_delete_buffer(YY_CURRENT_BUFFER );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- if ((yy_buffer_stack_top) > 0)
- --(yy_buffer_stack_top);
-
- if (YY_CURRENT_BUFFER) {
- yy_load_buffer_state( );
- (yy_did_buffer_switch_on_eof) = 1;
- }
-}
-
-/* Allocates the stack if it does not exist.
- * Guarantees space for at least one push.
- */
-static void yyensure_buffer_stack (void)
-{
- int num_to_alloc;
-
- if (!(yy_buffer_stack)) {
-
- /* First allocation is just for 2 elements, since we don't know if this
- * scanner will even need a stack. We use 2 instead of 1 to avoid an
- * immediate realloc on the next call.
- */
- num_to_alloc = 1;
- (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
- (num_to_alloc * sizeof(struct yy_buffer_state*)
- );
- if ( ! (yy_buffer_stack) )
- YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
-
- memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
-
- (yy_buffer_stack_max) = num_to_alloc;
- (yy_buffer_stack_top) = 0;
- return;
- }
-
- if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
-
- /* Increase the buffer to prepare for a possible push. */
- int grow_size = 8 /* arbitrary grow size */;
-
- num_to_alloc = static_cast<int>(
- (yy_buffer_stack_max) + grow_size);
- (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
- ((yy_buffer_stack),
- num_to_alloc * sizeof(struct yy_buffer_state*)
- );
- if ( ! (yy_buffer_stack) )
- YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
-
- /* zero only the new slots.*/
- memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
- (yy_buffer_stack_max) = num_to_alloc;
- }
-}
-
-#ifndef YY_EXIT_FAILURE
-#define YY_EXIT_FAILURE 2
-#endif
-
-static void yy_fatal_error (yyconst char* msg )
-{
- (void) fprintf( stderr, "%s\n", msg );
- exit( YY_EXIT_FAILURE );
-}
-
-/* Redefine yyless() so it works in section 3 code. */
-
-#undef yyless
-#define yyless(n) \
- do \
- { \
- /* Undo effects of setting up yytext. */ \
- int yyless_macro_arg = (n); \
- YY_LESS_LINENO(yyless_macro_arg);\
- yytext[yyleng] = (yy_hold_char); \
- (yy_c_buf_p) = yytext + yyless_macro_arg; \
- (yy_hold_char) = *(yy_c_buf_p); \
- *(yy_c_buf_p) = '\0'; \
- yyleng = yyless_macro_arg; \
- } \
- while ( 0 )
-
-/* Accessor methods (get/set functions) to struct members. */
-
-/** Get the current line number.
- *
- */
-int yyget_lineno (void)
-{
-
- return yylineno;
-}
-
-/** Get the input stream.
- *
- */
-FILE *yyget_in (void)
-{
- return yyin;
-}
-
-/** Get the output stream.
- *
- */
-FILE *yyget_out (void)
-{
- return yyout;
-}
-
-/** Get the length of the current token.
- *
- */
-yy_size_t yyget_leng (void)
-{
- return yyleng;
-}
-
-/** Get the current token.
- *
- */
-
-char *yyget_text (void)
-{
- return yytext;
-}
-
-/** Set the current line number.
- * @param line_number
- *
- */
-void yyset_lineno (int line_number )
-{
-
- yylineno = line_number;
-}
-
-/** Set the input stream. This does not discard the current
- * input buffer.
- * @param in_str A readable stream.
- *
- * @see yy_switch_to_buffer
- */
-void yyset_in (FILE * in_str )
-{
- yyin = in_str ;
-}
-
-void yyset_out (FILE * out_str )
-{
- yyout = out_str ;
-}
-
-int yyget_debug (void)
-{
- return yy_flex_debug;
-}
-
-void yyset_debug (int bdebug )
-{
- yy_flex_debug = bdebug ;
-}
-
-static int yy_init_globals (void)
-{
- /* Initialization is the same as for the non-reentrant scanner.
- * This function is called from yylex_destroy(), so don't allocate here.
- */
-
- (yy_buffer_stack) = 0;
- (yy_buffer_stack_top) = 0;
- (yy_buffer_stack_max) = 0;
- (yy_c_buf_p) = (char *) 0;
- (yy_init) = 0;
- (yy_start) = 0;
-
-/* Defined in main.c */
-#ifdef YY_STDINIT
- yyin = stdin;
- yyout = stdout;
-#else
- yyin = (FILE *) 0;
- yyout = (FILE *) 0;
-#endif
-
- /* For future reference: Set errno on error, since we are called by
- * yylex_init()
- */
- return 0;
-}
-
-/* yylex_destroy is for both reentrant and non-reentrant scanners. */
-MY_ATTRIBUTE((unused)) static int yylex_destroy (void)
-{
-
- /* Pop the buffer stack, destroying each element. */
- while(YY_CURRENT_BUFFER){
- yy_delete_buffer(YY_CURRENT_BUFFER );
- YY_CURRENT_BUFFER_LVALUE = NULL;
- yypop_buffer_state();
- }
-
- /* Destroy the stack itself. */
- yyfree((yy_buffer_stack) );
- (yy_buffer_stack) = NULL;
-
- /* Reset the globals. This is important in a non-reentrant scanner so the next time
- * yylex() is called, initialization will occur. */
- yy_init_globals( );
-
- return 0;
-}
-
-/*
- * Internal utility routines.
- */
-
-#ifndef yytext_ptr
-static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
-{
- register int i;
- for ( i = 0; i < n; ++i )
- s1[i] = s2[i];
-}
-#endif
-
-#ifdef YY_NEED_STRLEN
-static int yy_flex_strlen (yyconst char * s )
-{
- register int n;
- for ( n = 0; s[n]; ++n )
- ;
-
- return n;
-}
-#endif
-
-void *yyalloc (yy_size_t size )
-{
- return (void *) malloc( size );
-}
-
-void *yyrealloc (void * ptr, yy_size_t size )
-{
- /* The cast to (char *) in the following accommodates both
- * implementations that use char* generic pointers, and those
- * that use void* generic pointers. It works with the latter
- * because both ANSI C and C++ allow castless assignment from
- * any pointer type to void*, and deal with argument conversions
- * as though doing an assignment.
- */
- return (void *) realloc( (char *) ptr, size );
-}
-
-void yyfree (void * ptr )
-{
- free( (char*) ptr ); /* see yyrealloc() for (char *) cast */
-}
-
-#define YYTABLES_NAME "yytables"
-
-#line 691 "pars0lex.l"
-
-
-
-/**********************************************************************
-Release any resources used by the lexer. */
-UNIV_INTERN
-void
-pars_lexer_close(void)
-/*==================*/
-{
- if (yy_buffer_stack)
- yylex_destroy();
- if (stringbuf)
- free(stringbuf);
- stringbuf = NULL;
- stringbuf_len_alloc = stringbuf_len = 0;
-}
-
diff --git a/storage/xtradb/pars/make_bison.sh b/storage/xtradb/pars/make_bison.sh
deleted file mode 100755
index 2618be102bc..00000000000
--- a/storage/xtradb/pars/make_bison.sh
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-#
-# generate parser files from bison input files.
-
-set -eu
-TMPFILE=pars0grm.tab.c
-OUTFILE=pars0grm.cc
-
-bison -d pars0grm.y
-mv pars0grm.tab.h ../include/pars0grm.h
-
-sed -e '
-s/'"$TMPFILE"'/'"$OUTFILE"'/;
-s/^\(\(YYSTYPE\|int\) yy\(char\|nerrs\)\)/static \1/;
-s/\(\(YYSTYPE\|int\) yy\(lval\|parse\)\)/UNIV_INTERN \1/;
-' < "$TMPFILE" > "$OUTFILE"
-
-rm "$TMPFILE"
diff --git a/storage/xtradb/pars/make_flex.sh b/storage/xtradb/pars/make_flex.sh
deleted file mode 100755
index c3db8aea298..00000000000
--- a/storage/xtradb/pars/make_flex.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/bin/bash
-#
-# Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-#
-# This program is free software; you can redistribute it and/or modify it under
-# the terms of the GNU General Public License as published by the Free Software
-# Foundation; version 2 of the License.
-#
-# This program is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along with
-# this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-#
-# generate lexer files from flex input files.
-
-set -eu
-
-TMPFILE=_flex_tmp.cc
-OUTFILE=lexyy.cc
-
-flex -o $TMPFILE pars0lex.l
-
-# AIX needs its includes done in a certain order, so include "univ.i" first
-# to be sure we get it right.
-echo '#include "univ.i"' > $OUTFILE
-
-# flex assigns a pointer to an int in one place without a cast, resulting in
-# a warning on Win64. Add the cast. Also define some symbols as static.
-sed -e '
-s/'"$TMPFILE"'/'"$OUTFILE"'/;
-s/\(int offset = \)\((yy_c_buf_p) - (yytext_ptr)\);/\1(int)(\2);/;
-s/\(void yy\(restart\|_\(delete\|flush\)_buffer\)\)/static \1/;
-s/\(void yy_switch_to_buffer\)/MY_ATTRIBUTE((unused)) static \1/;
-s/\(void yy\(push\|pop\)_buffer_state\)/MY_ATTRIBUTE((unused)) static \1/;
-s/\(YY_BUFFER_STATE yy_create_buffer\)/static \1/;
-s/\(\(int\|void\) yy[gs]et_\)/MY_ATTRIBUTE((unused)) static \1/;
-s/\(void \*\?yy\(\(re\)\?alloc\|free\)\)/static \1/;
-s/\(extern \)\?\(int yy\(leng\|lineno\|_flex_debug\)\)/static \2/;
-s/\(int yylex_destroy\)/MY_ATTRIBUTE((unused)) static \1/;
-s/\(extern \)\?\(int yylex \)/UNIV_INTERN \2/;
-s/^\(\(FILE\|char\) *\* *yyget\)/MY_ATTRIBUTE((unused)) static \1/;
-s/^\(extern \)\?\(\(FILE\|char\) *\* *yy\)/static \2/;
-' < $TMPFILE >> $OUTFILE
-
-rm $TMPFILE
diff --git a/storage/xtradb/pars/pars0grm.cc b/storage/xtradb/pars/pars0grm.cc
deleted file mode 100644
index b360f36e597..00000000000
--- a/storage/xtradb/pars/pars0grm.cc
+++ /dev/null
@@ -1,3034 +0,0 @@
-/* A Bison parser, made by GNU Bison 2.3. */
-
-/* Skeleton implementation for Bison's Yacc-like parsers in C
-
- Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006
- Free Software Foundation, Inc.
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor,
- Boston, MA 02110-1301, USA. */
-
-/* As a special exception, you may create a larger work that contains
- part or all of the Bison parser skeleton and distribute that work
- under terms of your choice, so long as that work isn't itself a
- parser generator using the skeleton or a modified version thereof
- as a parser skeleton. Alternatively, if you modify or redistribute
- the parser skeleton itself, you may (at your option) remove this
- special exception, which will cause the skeleton and the resulting
- Bison output files to be licensed under the GNU General Public
- License without this special exception.
-
- This special exception was added by the Free Software Foundation in
- version 2.2 of Bison. */
-
-/* C LALR(1) parser skeleton written by Richard Stallman, by
- simplifying the original so-called "semantic" parser. */
-
-/* All symbols defined below should begin with yy or YY, to avoid
- infringing on user name space. This should be done even for local
- variables, as they might otherwise be expanded by user macros.
- There are some unavoidable exceptions within include files to
- define necessary library symbols; they are noted "INFRINGES ON
- USER NAME SPACE" below. */
-
-/* Identify Bison output. */
-#define YYBISON 1
-
-/* Bison version. */
-#define YYBISON_VERSION "2.3"
-
-/* Skeleton name. */
-#define YYSKELETON_NAME "yacc.c"
-
-/* Pure parsers. */
-#define YYPURE 0
-
-/* Using locations. */
-#define YYLSP_NEEDED 0
-
-
-
-/* Tokens. */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
- /* Put the tokens into the symbol table, so that GDB and other debuggers
- know about them. */
- enum yytokentype {
- PARS_INT_LIT = 258,
- PARS_FLOAT_LIT = 259,
- PARS_STR_LIT = 260,
- PARS_FIXBINARY_LIT = 261,
- PARS_BLOB_LIT = 262,
- PARS_NULL_LIT = 263,
- PARS_ID_TOKEN = 264,
- PARS_AND_TOKEN = 265,
- PARS_OR_TOKEN = 266,
- PARS_NOT_TOKEN = 267,
- PARS_GE_TOKEN = 268,
- PARS_LE_TOKEN = 269,
- PARS_NE_TOKEN = 270,
- PARS_PROCEDURE_TOKEN = 271,
- PARS_IN_TOKEN = 272,
- PARS_OUT_TOKEN = 273,
- PARS_BINARY_TOKEN = 274,
- PARS_BLOB_TOKEN = 275,
- PARS_INT_TOKEN = 276,
- PARS_INTEGER_TOKEN = 277,
- PARS_FLOAT_TOKEN = 278,
- PARS_CHAR_TOKEN = 279,
- PARS_IS_TOKEN = 280,
- PARS_BEGIN_TOKEN = 281,
- PARS_END_TOKEN = 282,
- PARS_IF_TOKEN = 283,
- PARS_THEN_TOKEN = 284,
- PARS_ELSE_TOKEN = 285,
- PARS_ELSIF_TOKEN = 286,
- PARS_LOOP_TOKEN = 287,
- PARS_WHILE_TOKEN = 288,
- PARS_RETURN_TOKEN = 289,
- PARS_SELECT_TOKEN = 290,
- PARS_SUM_TOKEN = 291,
- PARS_COUNT_TOKEN = 292,
- PARS_DISTINCT_TOKEN = 293,
- PARS_FROM_TOKEN = 294,
- PARS_WHERE_TOKEN = 295,
- PARS_FOR_TOKEN = 296,
- PARS_DDOT_TOKEN = 297,
- PARS_READ_TOKEN = 298,
- PARS_ORDER_TOKEN = 299,
- PARS_BY_TOKEN = 300,
- PARS_ASC_TOKEN = 301,
- PARS_DESC_TOKEN = 302,
- PARS_INSERT_TOKEN = 303,
- PARS_INTO_TOKEN = 304,
- PARS_VALUES_TOKEN = 305,
- PARS_UPDATE_TOKEN = 306,
- PARS_SET_TOKEN = 307,
- PARS_DELETE_TOKEN = 308,
- PARS_CURRENT_TOKEN = 309,
- PARS_OF_TOKEN = 310,
- PARS_CREATE_TOKEN = 311,
- PARS_TABLE_TOKEN = 312,
- PARS_INDEX_TOKEN = 313,
- PARS_UNIQUE_TOKEN = 314,
- PARS_CLUSTERED_TOKEN = 315,
- PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316,
- PARS_ON_TOKEN = 317,
- PARS_ASSIGN_TOKEN = 318,
- PARS_DECLARE_TOKEN = 319,
- PARS_CURSOR_TOKEN = 320,
- PARS_SQL_TOKEN = 321,
- PARS_OPEN_TOKEN = 322,
- PARS_FETCH_TOKEN = 323,
- PARS_CLOSE_TOKEN = 324,
- PARS_NOTFOUND_TOKEN = 325,
- PARS_TO_CHAR_TOKEN = 326,
- PARS_TO_NUMBER_TOKEN = 327,
- PARS_TO_BINARY_TOKEN = 328,
- PARS_BINARY_TO_NUMBER_TOKEN = 329,
- PARS_SUBSTR_TOKEN = 330,
- PARS_REPLSTR_TOKEN = 331,
- PARS_CONCAT_TOKEN = 332,
- PARS_INSTR_TOKEN = 333,
- PARS_LENGTH_TOKEN = 334,
- PARS_SYSDATE_TOKEN = 335,
- PARS_PRINTF_TOKEN = 336,
- PARS_ASSERT_TOKEN = 337,
- PARS_RND_TOKEN = 338,
- PARS_RND_STR_TOKEN = 339,
- PARS_ROW_PRINTF_TOKEN = 340,
- PARS_COMMIT_TOKEN = 341,
- PARS_ROLLBACK_TOKEN = 342,
- PARS_WORK_TOKEN = 343,
- PARS_UNSIGNED_TOKEN = 344,
- PARS_EXIT_TOKEN = 345,
- PARS_FUNCTION_TOKEN = 346,
- PARS_LOCK_TOKEN = 347,
- PARS_SHARE_TOKEN = 348,
- PARS_MODE_TOKEN = 349,
- PARS_LIKE_TOKEN = 350,
- PARS_LIKE_TOKEN_EXACT = 351,
- PARS_LIKE_TOKEN_PREFIX = 352,
- PARS_LIKE_TOKEN_SUFFIX = 353,
- PARS_LIKE_TOKEN_SUBSTR = 354,
- PARS_TABLE_NAME_TOKEN = 355,
- PARS_COMPACT_TOKEN = 356,
- PARS_BLOCK_SIZE_TOKEN = 357,
- PARS_BIGINT_TOKEN = 358,
- NEG = 359
- };
-#endif
-/* Tokens. */
-#define PARS_INT_LIT 258
-#define PARS_FLOAT_LIT 259
-#define PARS_STR_LIT 260
-#define PARS_FIXBINARY_LIT 261
-#define PARS_BLOB_LIT 262
-#define PARS_NULL_LIT 263
-#define PARS_ID_TOKEN 264
-#define PARS_AND_TOKEN 265
-#define PARS_OR_TOKEN 266
-#define PARS_NOT_TOKEN 267
-#define PARS_GE_TOKEN 268
-#define PARS_LE_TOKEN 269
-#define PARS_NE_TOKEN 270
-#define PARS_PROCEDURE_TOKEN 271
-#define PARS_IN_TOKEN 272
-#define PARS_OUT_TOKEN 273
-#define PARS_BINARY_TOKEN 274
-#define PARS_BLOB_TOKEN 275
-#define PARS_INT_TOKEN 276
-#define PARS_INTEGER_TOKEN 277
-#define PARS_FLOAT_TOKEN 278
-#define PARS_CHAR_TOKEN 279
-#define PARS_IS_TOKEN 280
-#define PARS_BEGIN_TOKEN 281
-#define PARS_END_TOKEN 282
-#define PARS_IF_TOKEN 283
-#define PARS_THEN_TOKEN 284
-#define PARS_ELSE_TOKEN 285
-#define PARS_ELSIF_TOKEN 286
-#define PARS_LOOP_TOKEN 287
-#define PARS_WHILE_TOKEN 288
-#define PARS_RETURN_TOKEN 289
-#define PARS_SELECT_TOKEN 290
-#define PARS_SUM_TOKEN 291
-#define PARS_COUNT_TOKEN 292
-#define PARS_DISTINCT_TOKEN 293
-#define PARS_FROM_TOKEN 294
-#define PARS_WHERE_TOKEN 295
-#define PARS_FOR_TOKEN 296
-#define PARS_DDOT_TOKEN 297
-#define PARS_READ_TOKEN 298
-#define PARS_ORDER_TOKEN 299
-#define PARS_BY_TOKEN 300
-#define PARS_ASC_TOKEN 301
-#define PARS_DESC_TOKEN 302
-#define PARS_INSERT_TOKEN 303
-#define PARS_INTO_TOKEN 304
-#define PARS_VALUES_TOKEN 305
-#define PARS_UPDATE_TOKEN 306
-#define PARS_SET_TOKEN 307
-#define PARS_DELETE_TOKEN 308
-#define PARS_CURRENT_TOKEN 309
-#define PARS_OF_TOKEN 310
-#define PARS_CREATE_TOKEN 311
-#define PARS_TABLE_TOKEN 312
-#define PARS_INDEX_TOKEN 313
-#define PARS_UNIQUE_TOKEN 314
-#define PARS_CLUSTERED_TOKEN 315
-#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316
-#define PARS_ON_TOKEN 317
-#define PARS_ASSIGN_TOKEN 318
-#define PARS_DECLARE_TOKEN 319
-#define PARS_CURSOR_TOKEN 320
-#define PARS_SQL_TOKEN 321
-#define PARS_OPEN_TOKEN 322
-#define PARS_FETCH_TOKEN 323
-#define PARS_CLOSE_TOKEN 324
-#define PARS_NOTFOUND_TOKEN 325
-#define PARS_TO_CHAR_TOKEN 326
-#define PARS_TO_NUMBER_TOKEN 327
-#define PARS_TO_BINARY_TOKEN 328
-#define PARS_BINARY_TO_NUMBER_TOKEN 329
-#define PARS_SUBSTR_TOKEN 330
-#define PARS_REPLSTR_TOKEN 331
-#define PARS_CONCAT_TOKEN 332
-#define PARS_INSTR_TOKEN 333
-#define PARS_LENGTH_TOKEN 334
-#define PARS_SYSDATE_TOKEN 335
-#define PARS_PRINTF_TOKEN 336
-#define PARS_ASSERT_TOKEN 337
-#define PARS_RND_TOKEN 338
-#define PARS_RND_STR_TOKEN 339
-#define PARS_ROW_PRINTF_TOKEN 340
-#define PARS_COMMIT_TOKEN 341
-#define PARS_ROLLBACK_TOKEN 342
-#define PARS_WORK_TOKEN 343
-#define PARS_UNSIGNED_TOKEN 344
-#define PARS_EXIT_TOKEN 345
-#define PARS_FUNCTION_TOKEN 346
-#define PARS_LOCK_TOKEN 347
-#define PARS_SHARE_TOKEN 348
-#define PARS_MODE_TOKEN 349
-#define PARS_LIKE_TOKEN 350
-#define PARS_LIKE_TOKEN_EXACT 351
-#define PARS_LIKE_TOKEN_PREFIX 352
-#define PARS_LIKE_TOKEN_SUFFIX 353
-#define PARS_LIKE_TOKEN_SUBSTR 354
-#define PARS_TABLE_NAME_TOKEN 355
-#define PARS_COMPACT_TOKEN 356
-#define PARS_BLOCK_SIZE_TOKEN 357
-#define PARS_BIGINT_TOKEN 358
-#define NEG 359
-
-
-
-
-/* Copy the first part of user declarations. */
-#line 28 "pars0grm.y"
-
-/* The value of the semantic attribute is a pointer to a query tree node
-que_node_t */
-
-#include "univ.i"
-#include <math.h> /* Can't be before univ.i */
-#include "pars0pars.h"
-#include "mem0mem.h"
-#include "que0types.h"
-#include "que0que.h"
-#include "row0sel.h"
-
-#define YYSTYPE que_node_t*
-
-/* #define __STDC__ */
-
-int
-yylex(void);
-
-
-/* Enabling traces. */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
-
-/* Enabling verbose error messages. */
-#ifdef YYERROR_VERBOSE
-# undef YYERROR_VERBOSE
-# define YYERROR_VERBOSE 1
-#else
-# define YYERROR_VERBOSE 0
-#endif
-
-/* Enabling the token table. */
-#ifndef YYTOKEN_TABLE
-# define YYTOKEN_TABLE 0
-#endif
-
-#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-typedef int YYSTYPE;
-# define yystype YYSTYPE /* obsolescent; will be withdrawn */
-# define YYSTYPE_IS_DECLARED 1
-# define YYSTYPE_IS_TRIVIAL 1
-#endif
-
-
-
-/* Copy the second part of user declarations. */
-
-
-/* Line 216 of yacc.c. */
-#line 334 "pars0grm.cc"
-
-#ifdef short
-# undef short
-#endif
-
-#ifdef YYTYPE_UINT8
-typedef YYTYPE_UINT8 yytype_uint8;
-#else
-typedef unsigned char yytype_uint8;
-#endif
-
-#ifdef YYTYPE_INT8
-typedef YYTYPE_INT8 yytype_int8;
-#elif (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-typedef signed char yytype_int8;
-#else
-typedef short int yytype_int8;
-#endif
-
-#ifdef YYTYPE_UINT16
-typedef YYTYPE_UINT16 yytype_uint16;
-#else
-typedef unsigned short int yytype_uint16;
-#endif
-
-#ifdef YYTYPE_INT16
-typedef YYTYPE_INT16 yytype_int16;
-#else
-typedef short int yytype_int16;
-#endif
-
-#ifndef YYSIZE_T
-# ifdef __SIZE_TYPE__
-# define YYSIZE_T __SIZE_TYPE__
-# elif defined size_t
-# define YYSIZE_T size_t
-# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-# include <stddef.h> /* INFRINGES ON USER NAME SPACE */
-# define YYSIZE_T size_t
-# else
-# define YYSIZE_T unsigned int
-# endif
-#endif
-
-#define YYSIZE_MAXIMUM ((YYSIZE_T) -1)
-
-#ifndef YY_
-# if defined YYENABLE_NLS && YYENABLE_NLS
-# if ENABLE_NLS
-# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
-# define YY_(msgid) dgettext ("bison-runtime", msgid)
-# endif
-# endif
-# ifndef YY_
-# define YY_(msgid) msgid
-# endif
-#endif
-
-/* Suppress unused-variable warnings by "using" E. */
-#if ! defined lint || defined __GNUC__
-# define YYUSE(e) ((void) (e))
-#else
-# define YYUSE(e) /* empty */
-#endif
-
-/* Identity function, used to suppress warnings about constant conditions. */
-#ifndef lint
-# define YYID(n) (n)
-#else
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static int
-YYID (int i)
-#else
-static int
-YYID (i)
- int i;
-#endif
-{
- return i;
-}
-#endif
-
-#if ! defined yyoverflow || YYERROR_VERBOSE
-
-/* The parser invokes alloca or malloc; define the necessary symbols. */
-
-# ifdef YYSTACK_USE_ALLOCA
-# if YYSTACK_USE_ALLOCA
-# ifdef __GNUC__
-# define YYSTACK_ALLOC __builtin_alloca
-# elif defined __BUILTIN_VA_ARG_INCR
-# include <alloca.h> /* INFRINGES ON USER NAME SPACE */
-# elif defined _AIX
-# define YYSTACK_ALLOC __alloca
-# elif defined _MSC_VER
-# include <malloc.h> /* INFRINGES ON USER NAME SPACE */
-# define alloca _alloca
-# else
-# define YYSTACK_ALLOC alloca
-# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef _STDLIB_H
-# define _STDLIB_H 1
-# endif
-# endif
-# endif
-# endif
-# endif
-
-# ifdef YYSTACK_ALLOC
- /* Pacify GCC's `empty if-body' warning. */
-# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0))
-# ifndef YYSTACK_ALLOC_MAXIMUM
- /* The OS might guarantee only one guard page at the bottom of the stack,
- and a page size can be as small as 4096 bytes. So we cannot safely
- invoke alloca (N) if N exceeds 4096. Use a slightly smaller number
- to allow for a few compiler-allocated temporary stack slots. */
-# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */
-# endif
-# else
-# define YYSTACK_ALLOC YYMALLOC
-# define YYSTACK_FREE YYFREE
-# ifndef YYSTACK_ALLOC_MAXIMUM
-# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM
-# endif
-# if (defined __cplusplus && ! defined _STDLIB_H \
- && ! ((defined YYMALLOC || defined malloc) \
- && (defined YYFREE || defined free)))
-# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
-# ifndef _STDLIB_H
-# define _STDLIB_H 1
-# endif
-# endif
-# ifndef YYMALLOC
-# define YYMALLOC malloc
-# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */
-# endif
-# endif
-# ifndef YYFREE
-# define YYFREE free
-# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-void free (void*); /* INFRINGES ON USER NAME SPACE */
-# endif
-# endif
-# endif
-#endif /* ! defined yyoverflow || YYERROR_VERBOSE */
-
-
-#if (! defined yyoverflow \
- && (! defined __cplusplus \
- || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL)))
-
-/* A type that is properly aligned for any stack member. */
-union yyalloc
-{
- yytype_int16 yyss;
- YYSTYPE yyvs;
- };
-
-/* The size of the maximum gap between one aligned stack and the next. */
-# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1)
-
-/* The size of an array large to enough to hold all stacks, each with
- N elements. */
-# define YYSTACK_BYTES(N) \
- ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \
- + YYSTACK_GAP_MAXIMUM)
-
-/* Copy COUNT objects from FROM to TO. The source and destination do
- not overlap. */
-# ifndef YYCOPY
-# if defined __GNUC__ && 1 < __GNUC__
-# define YYCOPY(To, From, Count) \
- __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
-# else
-# define YYCOPY(To, From, Count) \
- do \
- { \
- YYSIZE_T yyi; \
- for (yyi = 0; yyi < (Count); yyi++) \
- (To)[yyi] = (From)[yyi]; \
- } \
- while (YYID (0))
-# endif
-# endif
-
-/* Relocate STACK from its old location to the new one. The
- local variables YYSIZE and YYSTACKSIZE give the old and new number of
- elements in the stack, and YYPTR gives the new location of the
- stack. Advance YYPTR to a properly aligned location for the next
- stack. */
-# define YYSTACK_RELOCATE(Stack) \
- do \
- { \
- YYSIZE_T yynewbytes; \
- YYCOPY (&yyptr->Stack, Stack, yysize); \
- Stack = &yyptr->Stack; \
- yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \
- yyptr += yynewbytes / sizeof (*yyptr); \
- } \
- while (YYID (0))
-
-#endif
-
-/* YYFINAL -- State number of the termination state. */
-#define YYFINAL 5
-/* YYLAST -- Last index in YYTABLE. */
-#define YYLAST 816
-
-/* YYNTOKENS -- Number of terminals. */
-#define YYNTOKENS 120
-/* YYNNTS -- Number of nonterminals. */
-#define YYNNTS 73
-/* YYNRULES -- Number of rules. */
-#define YYNRULES 183
-/* YYNRULES -- Number of states. */
-#define YYNSTATES 350
-
-/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */
-#define YYUNDEFTOK 2
-#define YYMAXUTOK 359
-
-#define YYTRANSLATE(YYX) \
- ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK)
-
-/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */
-static const yytype_uint8 yytranslate[] =
-{
- 0, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 112, 2, 2,
- 114, 115, 109, 108, 117, 107, 2, 110, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 113,
- 105, 104, 106, 116, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 118, 2, 119, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 1, 2, 3, 4,
- 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
- 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
- 45, 46, 47, 48, 49, 50, 51, 52, 53, 54,
- 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
- 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
- 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
- 85, 86, 87, 88, 89, 90, 91, 92, 93, 94,
- 95, 96, 97, 98, 99, 100, 101, 102, 103, 111
-};
-
-#if YYDEBUG
-/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in
- YYRHS. */
-static const yytype_uint16 yyprhs[] =
-{
- 0, 0, 3, 6, 8, 11, 14, 17, 20, 23,
- 26, 29, 32, 35, 38, 41, 44, 47, 50, 53,
- 56, 59, 62, 65, 68, 71, 73, 76, 78, 83,
- 85, 87, 89, 91, 93, 95, 97, 101, 105, 109,
- 113, 116, 120, 124, 128, 132, 136, 140, 144, 148,
- 152, 156, 159, 163, 167, 169, 171, 173, 175, 177,
- 179, 181, 183, 185, 187, 189, 190, 192, 196, 203,
- 208, 210, 212, 214, 218, 220, 224, 225, 227, 231,
- 232, 234, 238, 240, 245, 251, 256, 257, 259, 263,
- 265, 269, 271, 272, 275, 276, 279, 280, 285, 286,
- 288, 290, 291, 296, 305, 309, 315, 318, 322, 324,
- 328, 333, 338, 341, 344, 348, 351, 354, 357, 361,
- 366, 368, 371, 372, 375, 377, 385, 392, 403, 405,
- 407, 410, 413, 418, 423, 429, 431, 435, 436, 440,
- 441, 443, 444, 447, 448, 450, 451, 453, 454, 458,
- 468, 470, 474, 475, 477, 478, 480, 491, 493, 495,
- 498, 501, 503, 505, 507, 509, 511, 513, 517, 521,
- 522, 524, 528, 532, 533, 535, 538, 545, 550, 552,
- 554, 555, 557, 560
-};
-
-/* YYRHS -- A `-1'-separated list of the rules' RHS. */
-static const yytype_int16 yyrhs[] =
-{
- 121, 0, -1, 192, 113, -1, 127, -1, 128, 113,
- -1, 160, 113, -1, 161, 113, -1, 162, 113, -1,
- 159, 113, -1, 163, 113, -1, 155, 113, -1, 142,
- 113, -1, 144, 113, -1, 154, 113, -1, 152, 113,
- -1, 153, 113, -1, 149, 113, -1, 150, 113, -1,
- 164, 113, -1, 166, 113, -1, 165, 113, -1, 181,
- 113, -1, 182, 113, -1, 175, 113, -1, 179, 113,
- -1, 122, -1, 123, 122, -1, 9, -1, 125, 114,
- 133, 115, -1, 3, -1, 4, -1, 5, -1, 6,
- -1, 7, -1, 8, -1, 66, -1, 124, 108, 124,
- -1, 124, 107, 124, -1, 124, 109, 124, -1, 124,
- 110, 124, -1, 107, 124, -1, 114, 124, 115, -1,
- 124, 104, 124, -1, 124, 95, 5, -1, 124, 105,
- 124, -1, 124, 106, 124, -1, 124, 13, 124, -1,
- 124, 14, 124, -1, 124, 15, 124, -1, 124, 10,
- 124, -1, 124, 11, 124, -1, 12, 124, -1, 9,
- 112, 70, -1, 66, 112, 70, -1, 71, -1, 72,
- -1, 73, -1, 74, -1, 75, -1, 77, -1, 78,
- -1, 79, -1, 80, -1, 83, -1, 84, -1, -1,
- 116, -1, 126, 117, 116, -1, 118, 9, 114, 126,
- 115, 119, -1, 129, 114, 133, 115, -1, 76, -1,
- 81, -1, 82, -1, 9, 114, 115, -1, 180, -1,
- 131, 117, 180, -1, -1, 9, -1, 132, 117, 9,
- -1, -1, 124, -1, 133, 117, 124, -1, 124, -1,
- 37, 114, 109, 115, -1, 37, 114, 38, 9, 115,
- -1, 36, 114, 124, 115, -1, -1, 134, -1, 135,
- 117, 134, -1, 109, -1, 135, 49, 132, -1, 135,
- -1, -1, 40, 124, -1, -1, 41, 51, -1, -1,
- 92, 17, 93, 94, -1, -1, 46, -1, 47, -1,
- -1, 44, 45, 9, 140, -1, 35, 136, 39, 131,
- 137, 138, 139, 141, -1, 48, 49, 180, -1, 143,
- 50, 114, 133, 115, -1, 143, 142, -1, 9, 104,
- 124, -1, 145, -1, 146, 117, 145, -1, 40, 54,
- 55, 9, -1, 51, 180, 52, 146, -1, 148, 137,
- -1, 148, 147, -1, 53, 39, 180, -1, 151, 137,
- -1, 151, 147, -1, 85, 142, -1, 9, 63, 124,
- -1, 31, 124, 29, 123, -1, 156, -1, 157, 156,
- -1, -1, 30, 123, -1, 157, -1, 28, 124, 29,
- 123, 158, 27, 28, -1, 33, 124, 32, 123, 27,
- 32, -1, 41, 9, 17, 124, 42, 124, 32, 123,
- 27, 32, -1, 90, -1, 34, -1, 67, 9, -1,
- 69, 9, -1, 68, 9, 49, 132, -1, 68, 9,
- 49, 130, -1, 9, 183, 169, 170, 171, -1, 167,
- -1, 168, 117, 167, -1, -1, 114, 3, 115, -1,
- -1, 89, -1, -1, 12, 8, -1, -1, 61, -1,
- -1, 101, -1, -1, 102, 104, 3, -1, 56, 57,
- 180, 114, 168, 115, 172, 173, 174, -1, 9, -1,
- 176, 117, 9, -1, -1, 59, -1, -1, 60, -1,
- 56, 177, 178, 58, 9, 62, 180, 114, 176, 115,
- -1, 9, -1, 100, -1, 86, 88, -1, 87, 88,
- -1, 21, -1, 22, -1, 103, -1, 24, -1, 19,
- -1, 20, -1, 9, 17, 183, -1, 9, 18, 183,
- -1, -1, 184, -1, 185, 117, 184, -1, 9, 183,
- 113, -1, -1, 186, -1, 187, 186, -1, 64, 65,
- 9, 25, 142, 113, -1, 64, 91, 9, 113, -1,
- 188, -1, 189, -1, -1, 190, -1, 191, 190, -1,
- 16, 9, 114, 185, 115, 25, 187, 191, 26, 123,
- 27, -1
-};
-
-/* YYRLINE[YYN] -- source line where rule number YYN was defined. */
-static const yytype_uint16 yyrline[] =
-{
- 0, 162, 162, 165, 166, 167, 168, 169, 170, 171,
- 172, 173, 174, 175, 176, 177, 178, 179, 180, 181,
- 182, 183, 184, 185, 186, 190, 191, 196, 197, 199,
- 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
- 210, 211, 212, 213, 215, 216, 217, 218, 219, 220,
- 221, 222, 223, 225, 230, 231, 232, 233, 235, 236,
- 237, 238, 239, 240, 241, 244, 246, 247, 251, 257,
- 262, 263, 264, 268, 272, 273, 278, 279, 280, 285,
- 286, 287, 291, 292, 297, 303, 310, 311, 312, 317,
- 319, 322, 326, 327, 331, 332, 337, 338, 343, 344,
- 345, 349, 350, 357, 372, 377, 380, 388, 394, 395,
- 400, 406, 415, 423, 431, 438, 446, 454, 460, 467,
- 473, 474, 479, 480, 482, 486, 493, 499, 509, 513,
- 517, 524, 531, 535, 543, 552, 553, 558, 559, 564,
- 565, 571, 572, 578, 579, 585, 586, 591, 592, 597,
- 608, 609, 614, 615, 619, 620, 624, 638, 639, 643,
- 648, 653, 654, 655, 656, 657, 658, 662, 667, 675,
- 676, 677, 682, 688, 690, 691, 695, 703, 709, 710,
- 713, 715, 716, 720
-};
-#endif
-
-#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
-/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
- First, the terminals, then, starting at YYNTOKENS, nonterminals. */
-static const char *const yytname[] =
-{
- "$end", "error", "$undefined", "PARS_INT_LIT", "PARS_FLOAT_LIT",
- "PARS_STR_LIT", "PARS_FIXBINARY_LIT", "PARS_BLOB_LIT", "PARS_NULL_LIT",
- "PARS_ID_TOKEN", "PARS_AND_TOKEN", "PARS_OR_TOKEN", "PARS_NOT_TOKEN",
- "PARS_GE_TOKEN", "PARS_LE_TOKEN", "PARS_NE_TOKEN",
- "PARS_PROCEDURE_TOKEN", "PARS_IN_TOKEN", "PARS_OUT_TOKEN",
- "PARS_BINARY_TOKEN", "PARS_BLOB_TOKEN", "PARS_INT_TOKEN",
- "PARS_INTEGER_TOKEN", "PARS_FLOAT_TOKEN", "PARS_CHAR_TOKEN",
- "PARS_IS_TOKEN", "PARS_BEGIN_TOKEN", "PARS_END_TOKEN", "PARS_IF_TOKEN",
- "PARS_THEN_TOKEN", "PARS_ELSE_TOKEN", "PARS_ELSIF_TOKEN",
- "PARS_LOOP_TOKEN", "PARS_WHILE_TOKEN", "PARS_RETURN_TOKEN",
- "PARS_SELECT_TOKEN", "PARS_SUM_TOKEN", "PARS_COUNT_TOKEN",
- "PARS_DISTINCT_TOKEN", "PARS_FROM_TOKEN", "PARS_WHERE_TOKEN",
- "PARS_FOR_TOKEN", "PARS_DDOT_TOKEN", "PARS_READ_TOKEN",
- "PARS_ORDER_TOKEN", "PARS_BY_TOKEN", "PARS_ASC_TOKEN", "PARS_DESC_TOKEN",
- "PARS_INSERT_TOKEN", "PARS_INTO_TOKEN", "PARS_VALUES_TOKEN",
- "PARS_UPDATE_TOKEN", "PARS_SET_TOKEN", "PARS_DELETE_TOKEN",
- "PARS_CURRENT_TOKEN", "PARS_OF_TOKEN", "PARS_CREATE_TOKEN",
- "PARS_TABLE_TOKEN", "PARS_INDEX_TOKEN", "PARS_UNIQUE_TOKEN",
- "PARS_CLUSTERED_TOKEN", "PARS_DOES_NOT_FIT_IN_MEM_TOKEN",
- "PARS_ON_TOKEN", "PARS_ASSIGN_TOKEN", "PARS_DECLARE_TOKEN",
- "PARS_CURSOR_TOKEN", "PARS_SQL_TOKEN", "PARS_OPEN_TOKEN",
- "PARS_FETCH_TOKEN", "PARS_CLOSE_TOKEN", "PARS_NOTFOUND_TOKEN",
- "PARS_TO_CHAR_TOKEN", "PARS_TO_NUMBER_TOKEN", "PARS_TO_BINARY_TOKEN",
- "PARS_BINARY_TO_NUMBER_TOKEN", "PARS_SUBSTR_TOKEN", "PARS_REPLSTR_TOKEN",
- "PARS_CONCAT_TOKEN", "PARS_INSTR_TOKEN", "PARS_LENGTH_TOKEN",
- "PARS_SYSDATE_TOKEN", "PARS_PRINTF_TOKEN", "PARS_ASSERT_TOKEN",
- "PARS_RND_TOKEN", "PARS_RND_STR_TOKEN", "PARS_ROW_PRINTF_TOKEN",
- "PARS_COMMIT_TOKEN", "PARS_ROLLBACK_TOKEN", "PARS_WORK_TOKEN",
- "PARS_UNSIGNED_TOKEN", "PARS_EXIT_TOKEN", "PARS_FUNCTION_TOKEN",
- "PARS_LOCK_TOKEN", "PARS_SHARE_TOKEN", "PARS_MODE_TOKEN",
- "PARS_LIKE_TOKEN", "PARS_LIKE_TOKEN_EXACT", "PARS_LIKE_TOKEN_PREFIX",
- "PARS_LIKE_TOKEN_SUFFIX", "PARS_LIKE_TOKEN_SUBSTR",
- "PARS_TABLE_NAME_TOKEN", "PARS_COMPACT_TOKEN", "PARS_BLOCK_SIZE_TOKEN",
- "PARS_BIGINT_TOKEN", "'='", "'<'", "'>'", "'-'", "'+'", "'*'", "'/'",
- "NEG", "'%'", "';'", "'('", "')'", "'?'", "','", "'{'", "'}'", "$accept",
- "top_statement", "statement", "statement_list", "exp", "function_name",
- "question_mark_list", "stored_procedure_call",
- "predefined_procedure_call", "predefined_procedure_name",
- "user_function_call", "table_list", "variable_list", "exp_list",
- "select_item", "select_item_list", "select_list", "search_condition",
- "for_update_clause", "lock_shared_clause", "order_direction",
- "order_by_clause", "select_statement", "insert_statement_start",
- "insert_statement", "column_assignment", "column_assignment_list",
- "cursor_positioned", "update_statement_start",
- "update_statement_searched", "update_statement_positioned",
- "delete_statement_start", "delete_statement_searched",
- "delete_statement_positioned", "row_printf_statement",
- "assignment_statement", "elsif_element", "elsif_list", "else_part",
- "if_statement", "while_statement", "for_statement", "exit_statement",
- "return_statement", "open_cursor_statement", "close_cursor_statement",
- "fetch_statement", "column_def", "column_def_list", "opt_column_len",
- "opt_unsigned", "opt_not_null", "not_fit_in_memory", "compact",
- "block_size", "create_table", "column_list", "unique_def",
- "clustered_def", "create_index", "table_name", "commit_statement",
- "rollback_statement", "type_name", "parameter_declaration",
- "parameter_declaration_list", "variable_declaration",
- "variable_declaration_list", "cursor_declaration",
- "function_declaration", "declaration", "declaration_list",
- "procedure_definition", 0
-};
-#endif
-
-# ifdef YYPRINT
-/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to
- token YYLEX-NUM. */
-static const yytype_uint16 yytoknum[] =
-{
- 0, 256, 257, 258, 259, 260, 261, 262, 263, 264,
- 265, 266, 267, 268, 269, 270, 271, 272, 273, 274,
- 275, 276, 277, 278, 279, 280, 281, 282, 283, 284,
- 285, 286, 287, 288, 289, 290, 291, 292, 293, 294,
- 295, 296, 297, 298, 299, 300, 301, 302, 303, 304,
- 305, 306, 307, 308, 309, 310, 311, 312, 313, 314,
- 315, 316, 317, 318, 319, 320, 321, 322, 323, 324,
- 325, 326, 327, 328, 329, 330, 331, 332, 333, 334,
- 335, 336, 337, 338, 339, 340, 341, 342, 343, 344,
- 345, 346, 347, 348, 349, 350, 351, 352, 353, 354,
- 355, 356, 357, 358, 61, 60, 62, 45, 43, 42,
- 47, 359, 37, 59, 40, 41, 63, 44, 123, 125
-};
-# endif
-
-/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */
-static const yytype_uint8 yyr1[] =
-{
- 0, 120, 121, 122, 122, 122, 122, 122, 122, 122,
- 122, 122, 122, 122, 122, 122, 122, 122, 122, 122,
- 122, 122, 122, 122, 122, 123, 123, 124, 124, 124,
- 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
- 124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
- 124, 124, 124, 124, 125, 125, 125, 125, 125, 125,
- 125, 125, 125, 125, 125, 126, 126, 126, 127, 128,
- 129, 129, 129, 130, 131, 131, 132, 132, 132, 133,
- 133, 133, 134, 134, 134, 134, 135, 135, 135, 136,
- 136, 136, 137, 137, 138, 138, 139, 139, 140, 140,
- 140, 141, 141, 142, 143, 144, 144, 145, 146, 146,
- 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
- 157, 157, 158, 158, 158, 159, 160, 161, 162, 163,
- 164, 165, 166, 166, 167, 168, 168, 169, 169, 170,
- 170, 171, 171, 172, 172, 173, 173, 174, 174, 175,
- 176, 176, 177, 177, 178, 178, 179, 180, 180, 181,
- 182, 183, 183, 183, 183, 183, 183, 184, 184, 185,
- 185, 185, 186, 187, 187, 187, 188, 189, 190, 190,
- 191, 191, 191, 192
-};
-
-/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */
-static const yytype_uint8 yyr2[] =
-{
- 0, 2, 2, 1, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 1, 2, 1, 4, 1,
- 1, 1, 1, 1, 1, 1, 3, 3, 3, 3,
- 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 2, 3, 3, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 0, 1, 3, 6, 4,
- 1, 1, 1, 3, 1, 3, 0, 1, 3, 0,
- 1, 3, 1, 4, 5, 4, 0, 1, 3, 1,
- 3, 1, 0, 2, 0, 2, 0, 4, 0, 1,
- 1, 0, 4, 8, 3, 5, 2, 3, 1, 3,
- 4, 4, 2, 2, 3, 2, 2, 2, 3, 4,
- 1, 2, 0, 2, 1, 7, 6, 10, 1, 1,
- 2, 2, 4, 4, 5, 1, 3, 0, 3, 0,
- 1, 0, 2, 0, 1, 0, 1, 0, 3, 9,
- 1, 3, 0, 1, 0, 1, 10, 1, 1, 2,
- 2, 1, 1, 1, 1, 1, 1, 3, 3, 0,
- 1, 3, 3, 0, 1, 2, 6, 4, 1, 1,
- 0, 1, 2, 11
-};
-
-/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state
- STATE-NUM when YYTABLE doesn't specify something else to do. Zero
- means the default is an error. */
-static const yytype_uint8 yydefact[] =
-{
- 0, 0, 0, 0, 0, 1, 2, 169, 0, 170,
- 0, 0, 0, 0, 0, 165, 166, 161, 162, 164,
- 163, 167, 168, 173, 171, 0, 174, 180, 0, 0,
- 175, 178, 179, 181, 0, 172, 0, 0, 0, 182,
- 0, 0, 0, 0, 0, 129, 86, 0, 0, 0,
- 0, 152, 0, 0, 0, 70, 71, 72, 0, 0,
- 0, 128, 0, 25, 0, 3, 0, 0, 0, 0,
- 0, 92, 0, 0, 92, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 177, 0, 29, 30, 31, 32, 33, 34,
- 27, 0, 35, 54, 55, 56, 57, 58, 59, 60,
- 61, 62, 63, 64, 0, 0, 0, 0, 0, 0,
- 0, 89, 82, 87, 91, 0, 0, 0, 157, 158,
- 0, 0, 0, 153, 154, 130, 0, 131, 117, 159,
- 160, 0, 183, 26, 4, 79, 11, 0, 106, 12,
- 0, 112, 113, 16, 17, 115, 116, 14, 15, 13,
- 10, 8, 5, 6, 7, 9, 18, 20, 19, 23,
- 24, 21, 22, 0, 118, 0, 51, 0, 40, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 79, 0, 0, 0, 76, 0,
- 0, 0, 104, 0, 114, 0, 155, 0, 76, 65,
- 80, 0, 79, 0, 93, 176, 52, 53, 41, 49,
- 50, 46, 47, 48, 122, 43, 42, 44, 45, 37,
- 36, 38, 39, 0, 0, 0, 0, 0, 77, 90,
- 88, 92, 74, 0, 0, 108, 111, 0, 0, 77,
- 133, 132, 66, 0, 69, 0, 0, 0, 0, 0,
- 120, 124, 0, 28, 0, 85, 0, 83, 0, 0,
- 0, 94, 0, 0, 0, 0, 135, 0, 0, 0,
- 0, 0, 81, 105, 110, 123, 0, 121, 0, 126,
- 84, 78, 75, 0, 96, 0, 107, 109, 137, 143,
- 0, 0, 73, 68, 67, 0, 125, 95, 0, 101,
- 0, 0, 139, 144, 145, 136, 0, 119, 0, 0,
- 103, 0, 0, 140, 141, 146, 147, 0, 0, 0,
- 0, 138, 0, 134, 0, 149, 150, 0, 97, 98,
- 127, 142, 0, 156, 0, 99, 100, 102, 148, 151
-};
-
-/* YYDEFGOTO[NTERM-NUM]. */
-static const yytype_int16 yydefgoto[] =
-{
- -1, 2, 63, 64, 210, 117, 253, 65, 66, 67,
- 250, 241, 239, 211, 123, 124, 125, 151, 294, 309,
- 347, 320, 68, 69, 70, 245, 246, 152, 71, 72,
- 73, 74, 75, 76, 77, 78, 260, 261, 262, 79,
- 80, 81, 82, 83, 84, 85, 86, 276, 277, 312,
- 324, 333, 314, 326, 335, 87, 337, 134, 207, 88,
- 130, 89, 90, 21, 9, 10, 26, 27, 31, 32,
- 33, 34, 3
-};
-
-/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing
- STATE-NUM. */
-#define YYPACT_NINF -179
-static const yytype_int16 yypact[] =
-{
- 24, 36, 58, -48, -25, -179, -179, 57, 31, -179,
- -74, 14, 14, 50, 57, -179, -179, -179, -179, -179,
- -179, -179, -179, 72, -179, 14, -179, 3, -26, -28,
- -179, -179, -179, -179, 4, -179, 91, 95, 589, -179,
- 80, -6, 43, 285, 285, -179, 19, 99, 69, -5,
- 81, -13, 110, 112, 114, -179, -179, -179, 89, 37,
- 41, -179, 122, -179, 406, -179, 25, 40, 44, -3,
- 46, 116, 49, 51, 116, 52, 53, 54, 55, 56,
- 59, 61, 62, 70, 73, 74, 75, 76, 77, 78,
- 79, 89, -179, 285, -179, -179, -179, -179, -179, -179,
- 82, 285, 83, -179, -179, -179, -179, -179, -179, -179,
- -179, -179, -179, -179, 285, 285, 577, 92, 618, 94,
- 97, -179, 706, -179, -33, 124, 153, -5, -179, -179,
- 141, -5, -5, -179, 136, -179, 148, -179, -179, -179,
- -179, 98, -179, -179, -179, 285, -179, 101, -179, -179,
- 195, -179, -179, -179, -179, -179, -179, -179, -179, -179,
- -179, -179, -179, -179, -179, -179, -179, -179, -179, -179,
- -179, -179, -179, 100, 706, 135, 6, 154, -7, 206,
- 285, 285, 285, 285, 285, 589, 218, 285, 285, 285,
- 285, 285, 285, 285, 285, 589, 285, -27, 216, 173,
- -5, 285, -179, 217, -179, 113, -179, 171, 221, 119,
- 706, -56, 285, 185, 706, -179, -179, -179, -179, 6,
- 6, 27, 27, 706, 345, -179, 27, 27, 27, 35,
- 35, -7, -7, -53, 467, 223, 232, 127, -179, 126,
- -179, -31, -179, 638, 151, -179, 142, 251, 253, 150,
- -179, 126, -179, -46, -179, 285, -45, 256, 589, 285,
- -179, 240, 249, -179, 245, -179, 166, -179, 273, 285,
- -5, 242, 285, 285, 217, 14, -179, -39, 222, 170,
- 167, 179, 706, -179, -179, 589, 679, -179, 268, -179,
- -179, -179, -179, 247, 207, 686, 706, -179, 186, 243,
- 251, -5, -179, -179, -179, 589, -179, -179, 286, 261,
- 589, 303, 219, -179, 224, -179, 193, 589, 226, 272,
- -179, 528, 205, -179, 310, -179, 233, 314, 230, 317,
- 302, -179, 328, -179, 235, -179, -179, -38, -179, 7,
- -179, -179, 334, -179, 331, -179, -179, -179, -179, -179
-};
-
-/* YYPGOTO[NTERM-NUM]. */
-static const yytype_int16 yypgoto[] =
-{
- -179, -179, -63, -178, -41, -179, -179, -179, -179, -179,
- -179, -179, 133, -155, 143, -179, -179, -68, -179, -179,
- -179, -179, -40, -179, -179, 71, -179, 269, -179, -179,
- -179, -179, -179, -179, -179, -179, 85, -179, -179, -179,
- -179, -179, -179, -179, -179, -179, -179, 47, -179, -179,
- -179, -179, -179, -179, -179, -179, -179, -179, -179, -179,
- -117, -179, -179, -12, 330, -179, 321, -179, -179, -179,
- 315, -179, -179
-};
-
-/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If
- positive, shift that token. If negative, reduce the rule which
- number is the opposite. If zero, do what YYDEFACT says.
- If YYTABLE_NINF, syntax error. */
-#define YYTABLE_NINF -1
-static const yytype_uint16 yytable[] =
-{
- 22, 143, 116, 118, 128, 122, 155, 224, 184, 269,
- 202, 236, 25, 28, 204, 205, 198, 234, 138, 182,
- 183, 184, 94, 95, 96, 97, 98, 99, 100, 148,
- 38, 101, 46, 15, 16, 17, 18, 36, 19, 233,
- 1, 13, 184, 14, 132, 4, 133, 147, 11, 12,
- 184, 173, 174, 345, 346, 119, 120, 256, 5, 254,
- 176, 255, 263, 37, 255, 6, 8, 29, 29, 280,
- 283, 281, 255, 178, 179, 23, 299, 343, 300, 344,
- 285, 25, 237, 242, 199, 102, 270, 35, 186, 7,
- 103, 104, 105, 106, 107, 129, 108, 109, 110, 111,
- 40, 186, 112, 113, 41, 91, 93, 92, 126, 214,
- 187, 188, 189, 190, 191, 192, 193, 20, 127, 135,
- 131, 136, 186, 137, 46, 139, 114, 317, 121, 140,
- 186, 141, 321, 115, 190, 191, 192, 193, 144, 219,
- 220, 221, 222, 223, 192, 193, 226, 227, 228, 229,
- 230, 231, 232, 292, 145, 235, 150, 146, 122, 149,
- 243, 143, 153, 200, 154, 157, 158, 159, 160, 161,
- 201, 143, 162, 271, 163, 164, 94, 95, 96, 97,
- 98, 99, 100, 165, 316, 101, 166, 167, 168, 169,
- 170, 171, 172, 203, 175, 177, 206, 208, 94, 95,
- 96, 97, 98, 99, 100, 216, 194, 101, 196, 119,
- 120, 197, 209, 215, 282, 212, 180, 181, 286, 182,
- 183, 184, 143, 225, 217, 238, 244, 247, 214, 248,
- 249, 295, 296, 180, 181, 252, 182, 183, 184, 102,
- 257, 266, 267, 268, 103, 104, 105, 106, 107, 213,
- 108, 109, 110, 111, 143, 273, 112, 113, 143, 274,
- 275, 102, 278, 298, 279, 284, 103, 104, 105, 106,
- 107, 259, 108, 109, 110, 111, 288, 289, 112, 113,
- 114, 290, 291, 293, 301, 302, 303, 115, 94, 95,
- 96, 97, 98, 99, 100, 304, 306, 101, 307, 308,
- 311, 186, 114, 318, 313, 319, 322, 327, 323, 115,
- 187, 188, 189, 190, 191, 192, 193, 329, 186, 328,
- 331, 218, 332, 336, 338, 325, 339, 187, 188, 189,
- 190, 191, 192, 193, 340, 334, 341, 348, 265, 342,
- 349, 251, 240, 156, 24, 297, 287, 315, 30, 39,
- 0, 102, 0, 0, 42, 0, 103, 104, 105, 106,
- 107, 0, 108, 109, 110, 111, 0, 0, 112, 113,
- 0, 0, 0, 43, 0, 258, 259, 0, 44, 45,
- 46, 0, 0, 0, 0, 0, 47, 0, 0, 0,
- 0, 0, 114, 48, 0, 0, 49, 0, 50, 115,
- 0, 51, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 52, 53, 54, 42, 0, 0, 0, 0,
- 0, 55, 0, 0, 0, 0, 56, 57, 0, 0,
- 58, 59, 60, 142, 43, 61, 0, 0, 0, 44,
- 45, 46, 0, 0, 0, 0, 0, 47, 0, 0,
- 0, 0, 0, 0, 48, 0, 0, 49, 0, 50,
- 0, 0, 51, 62, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 52, 53, 54, 42, 0, 0, 0,
- 0, 0, 55, 0, 0, 0, 0, 56, 57, 0,
- 0, 58, 59, 60, 264, 43, 61, 0, 0, 0,
- 44, 45, 46, 0, 0, 0, 0, 0, 47, 0,
- 0, 0, 0, 0, 0, 48, 0, 0, 49, 0,
- 50, 0, 0, 51, 62, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 52, 53, 54, 42, 0, 0,
- 0, 0, 0, 55, 0, 0, 0, 0, 56, 57,
- 0, 0, 58, 59, 60, 330, 43, 61, 0, 0,
- 0, 44, 45, 46, 0, 0, 0, 0, 0, 47,
- 0, 0, 0, 0, 0, 0, 48, 0, 0, 49,
- 0, 50, 0, 0, 51, 62, 0, 180, 181, 0,
- 182, 183, 184, 0, 0, 52, 53, 54, 42, 0,
- 0, 0, 0, 0, 55, 0, 185, 0, 0, 56,
- 57, 0, 0, 58, 59, 60, 0, 43, 61, 0,
- 0, 0, 44, 45, 46, 0, 0, 0, 180, 181,
- 47, 182, 183, 184, 0, 0, 0, 48, 0, 0,
- 49, 0, 50, 0, 0, 51, 62, 0, 180, 181,
- 195, 182, 183, 184, 0, 0, 52, 53, 54, 0,
- 0, 0, 0, 0, 0, 55, 0, 0, 0, 0,
- 56, 57, 186, 0, 58, 59, 60, 0, 0, 61,
- 272, 187, 188, 189, 190, 191, 192, 193, 0, 180,
- 181, 0, 182, 183, 184, 0, 180, 181, 0, 182,
- 183, 184, 0, 0, 0, 0, 0, 62, 305, 0,
- 0, 0, 0, 186, 0, 0, 180, 181, 310, 182,
- 183, 184, 187, 188, 189, 190, 191, 192, 193, 0,
- 0, 0, 0, 186, 0, 0, 0, 0, 0, 0,
- 0, 0, 187, 188, 189, 190, 191, 192, 193, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 186, 0, 0, 0, 0, 0,
- 0, 186, 0, 187, 188, 189, 190, 191, 192, 193,
- 187, 188, 189, 190, 191, 192, 193, 0, 0, 0,
- 0, 186, 0, 0, 0, 0, 0, 0, 0, 0,
- 187, 188, 189, 190, 191, 192, 193
-};
-
-static const yytype_int16 yycheck[] =
-{
- 12, 64, 43, 44, 9, 46, 74, 185, 15, 40,
- 127, 38, 9, 25, 131, 132, 49, 195, 58, 13,
- 14, 15, 3, 4, 5, 6, 7, 8, 9, 69,
- 26, 12, 35, 19, 20, 21, 22, 65, 24, 194,
- 16, 115, 15, 117, 57, 9, 59, 50, 17, 18,
- 15, 91, 93, 46, 47, 36, 37, 212, 0, 115,
- 101, 117, 115, 91, 117, 113, 9, 64, 64, 115,
- 115, 117, 117, 114, 115, 25, 115, 115, 117, 117,
- 258, 9, 109, 200, 117, 66, 117, 113, 95, 114,
- 71, 72, 73, 74, 75, 100, 77, 78, 79, 80,
- 9, 95, 83, 84, 9, 25, 63, 113, 9, 150,
- 104, 105, 106, 107, 108, 109, 110, 103, 49, 9,
- 39, 9, 95, 9, 35, 88, 107, 305, 109, 88,
- 95, 9, 310, 114, 107, 108, 109, 110, 113, 180,
- 181, 182, 183, 184, 109, 110, 187, 188, 189, 190,
- 191, 192, 193, 270, 114, 196, 40, 113, 199, 113,
- 201, 224, 113, 39, 113, 113, 113, 113, 113, 113,
- 17, 234, 113, 241, 113, 113, 3, 4, 5, 6,
- 7, 8, 9, 113, 301, 12, 113, 113, 113, 113,
- 113, 113, 113, 52, 112, 112, 60, 49, 3, 4,
- 5, 6, 7, 8, 9, 70, 114, 12, 114, 36,
- 37, 114, 114, 113, 255, 114, 10, 11, 259, 13,
- 14, 15, 285, 5, 70, 9, 9, 114, 269, 58,
- 9, 272, 273, 10, 11, 116, 13, 14, 15, 66,
- 55, 9, 115, 117, 71, 72, 73, 74, 75, 54,
- 77, 78, 79, 80, 317, 104, 83, 84, 321, 117,
- 9, 66, 9, 275, 114, 9, 71, 72, 73, 74,
- 75, 31, 77, 78, 79, 80, 27, 32, 83, 84,
- 107, 115, 9, 41, 62, 115, 119, 114, 3, 4,
- 5, 6, 7, 8, 9, 116, 28, 12, 51, 92,
- 114, 95, 107, 17, 61, 44, 3, 114, 89, 114,
- 104, 105, 106, 107, 108, 109, 110, 45, 95, 93,
- 115, 115, 12, 9, 94, 101, 9, 104, 105, 106,
- 107, 108, 109, 110, 32, 102, 8, 3, 115, 104,
- 9, 208, 199, 74, 14, 274, 261, 300, 27, 34,
- -1, 66, -1, -1, 9, -1, 71, 72, 73, 74,
- 75, -1, 77, 78, 79, 80, -1, -1, 83, 84,
- -1, -1, -1, 28, -1, 30, 31, -1, 33, 34,
- 35, -1, -1, -1, -1, -1, 41, -1, -1, -1,
- -1, -1, 107, 48, -1, -1, 51, -1, 53, 114,
- -1, 56, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, 67, 68, 69, 9, -1, -1, -1, -1,
- -1, 76, -1, -1, -1, -1, 81, 82, -1, -1,
- 85, 86, 87, 27, 28, 90, -1, -1, -1, 33,
- 34, 35, -1, -1, -1, -1, -1, 41, -1, -1,
- -1, -1, -1, -1, 48, -1, -1, 51, -1, 53,
- -1, -1, 56, 118, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, 67, 68, 69, 9, -1, -1, -1,
- -1, -1, 76, -1, -1, -1, -1, 81, 82, -1,
- -1, 85, 86, 87, 27, 28, 90, -1, -1, -1,
- 33, 34, 35, -1, -1, -1, -1, -1, 41, -1,
- -1, -1, -1, -1, -1, 48, -1, -1, 51, -1,
- 53, -1, -1, 56, 118, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 67, 68, 69, 9, -1, -1,
- -1, -1, -1, 76, -1, -1, -1, -1, 81, 82,
- -1, -1, 85, 86, 87, 27, 28, 90, -1, -1,
- -1, 33, 34, 35, -1, -1, -1, -1, -1, 41,
- -1, -1, -1, -1, -1, -1, 48, -1, -1, 51,
- -1, 53, -1, -1, 56, 118, -1, 10, 11, -1,
- 13, 14, 15, -1, -1, 67, 68, 69, 9, -1,
- -1, -1, -1, -1, 76, -1, 29, -1, -1, 81,
- 82, -1, -1, 85, 86, 87, -1, 28, 90, -1,
- -1, -1, 33, 34, 35, -1, -1, -1, 10, 11,
- 41, 13, 14, 15, -1, -1, -1, 48, -1, -1,
- 51, -1, 53, -1, -1, 56, 118, -1, 10, 11,
- 32, 13, 14, 15, -1, -1, 67, 68, 69, -1,
- -1, -1, -1, -1, -1, 76, -1, -1, -1, -1,
- 81, 82, 95, -1, 85, 86, 87, -1, -1, 90,
- 42, 104, 105, 106, 107, 108, 109, 110, -1, 10,
- 11, -1, 13, 14, 15, -1, 10, 11, -1, 13,
- 14, 15, -1, -1, -1, -1, -1, 118, 29, -1,
- -1, -1, -1, 95, -1, -1, 10, 11, 32, 13,
- 14, 15, 104, 105, 106, 107, 108, 109, 110, -1,
- -1, -1, -1, 95, -1, -1, -1, -1, -1, -1,
- -1, -1, 104, 105, 106, 107, 108, 109, 110, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, 95, -1, -1, -1, -1, -1,
- -1, 95, -1, 104, 105, 106, 107, 108, 109, 110,
- 104, 105, 106, 107, 108, 109, 110, -1, -1, -1,
- -1, 95, -1, -1, -1, -1, -1, -1, -1, -1,
- 104, 105, 106, 107, 108, 109, 110
-};
-
-/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing
- symbol of state STATE-NUM. */
-static const yytype_uint8 yystos[] =
-{
- 0, 16, 121, 192, 9, 0, 113, 114, 9, 184,
- 185, 17, 18, 115, 117, 19, 20, 21, 22, 24,
- 103, 183, 183, 25, 184, 9, 186, 187, 183, 64,
- 186, 188, 189, 190, 191, 113, 65, 91, 26, 190,
- 9, 9, 9, 28, 33, 34, 35, 41, 48, 51,
- 53, 56, 67, 68, 69, 76, 81, 82, 85, 86,
- 87, 90, 118, 122, 123, 127, 128, 129, 142, 143,
- 144, 148, 149, 150, 151, 152, 153, 154, 155, 159,
- 160, 161, 162, 163, 164, 165, 166, 175, 179, 181,
- 182, 25, 113, 63, 3, 4, 5, 6, 7, 8,
- 9, 12, 66, 71, 72, 73, 74, 75, 77, 78,
- 79, 80, 83, 84, 107, 114, 124, 125, 124, 36,
- 37, 109, 124, 134, 135, 136, 9, 49, 9, 100,
- 180, 39, 57, 59, 177, 9, 9, 9, 142, 88,
- 88, 9, 27, 122, 113, 114, 113, 50, 142, 113,
- 40, 137, 147, 113, 113, 137, 147, 113, 113, 113,
- 113, 113, 113, 113, 113, 113, 113, 113, 113, 113,
- 113, 113, 113, 142, 124, 112, 124, 112, 124, 124,
- 10, 11, 13, 14, 15, 29, 95, 104, 105, 106,
- 107, 108, 109, 110, 114, 32, 114, 114, 49, 117,
- 39, 17, 180, 52, 180, 180, 60, 178, 49, 114,
- 124, 133, 114, 54, 124, 113, 70, 70, 115, 124,
- 124, 124, 124, 124, 123, 5, 124, 124, 124, 124,
- 124, 124, 124, 133, 123, 124, 38, 109, 9, 132,
- 134, 131, 180, 124, 9, 145, 146, 114, 58, 9,
- 130, 132, 116, 126, 115, 117, 133, 55, 30, 31,
- 156, 157, 158, 115, 27, 115, 9, 115, 117, 40,
- 117, 137, 42, 104, 117, 9, 167, 168, 9, 114,
- 115, 117, 124, 115, 9, 123, 124, 156, 27, 32,
- 115, 9, 180, 41, 138, 124, 124, 145, 183, 115,
- 117, 62, 115, 119, 116, 29, 28, 51, 92, 139,
- 32, 114, 169, 61, 172, 167, 180, 123, 17, 44,
- 141, 123, 3, 89, 170, 101, 173, 114, 93, 45,
- 27, 115, 12, 171, 102, 174, 9, 176, 94, 9,
- 32, 8, 104, 115, 117, 46, 47, 140, 3, 9
-};
-
-#define yyerrok (yyerrstatus = 0)
-#define yyclearin (yychar = YYEMPTY)
-#define YYEMPTY (-2)
-#define YYEOF 0
-
-#define YYACCEPT goto yyacceptlab
-#define YYABORT goto yyabortlab
-#define YYERROR goto yyerrorlab
-
-
-/* Like YYERROR except do call yyerror. This remains here temporarily
- to ease the transition to the new meaning of YYERROR, for GCC.
- Once GCC version 2 has supplanted version 1, this can go. */
-
-#define YYFAIL goto yyerrlab
-
-#define YYRECOVERING() (!!yyerrstatus)
-
-#define YYBACKUP(Token, Value) \
-do \
- if (yychar == YYEMPTY && yylen == 1) \
- { \
- yychar = (Token); \
- yylval = (Value); \
- yytoken = YYTRANSLATE (yychar); \
- YYPOPSTACK (1); \
- goto yybackup; \
- } \
- else \
- { \
- yyerror (YY_("syntax error: cannot back up")); \
- YYERROR; \
- } \
-while (YYID (0))
-
-
-#define YYTERROR 1
-#define YYERRCODE 256
-
-
-/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
- If N is 0, then set CURRENT to the empty location which ends
- the previous symbol: RHS[0] (always defined). */
-
-#define YYRHSLOC(Rhs, K) ((Rhs)[K])
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N) \
- do \
- if (YYID (N)) \
- { \
- (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
- (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
- (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
- (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
- } \
- else \
- { \
- (Current).first_line = (Current).last_line = \
- YYRHSLOC (Rhs, 0).last_line; \
- (Current).first_column = (Current).last_column = \
- YYRHSLOC (Rhs, 0).last_column; \
- } \
- while (YYID (0))
-#endif
-
-
-/* YY_LOCATION_PRINT -- Print the location on the stream.
- This macro was not mandated originally: define only if we know
- we won't break user code: when these are the locations we know. */
-
-#ifndef YY_LOCATION_PRINT
-# if defined YYLTYPE_IS_TRIVIAL && YYLTYPE_IS_TRIVIAL
-# define YY_LOCATION_PRINT(File, Loc) \
- fprintf (File, "%d.%d-%d.%d", \
- (Loc).first_line, (Loc).first_column, \
- (Loc).last_line, (Loc).last_column)
-# else
-# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
-# endif
-#endif
-
-
-/* YYLEX -- calling `yylex' with the right arguments. */
-
-#ifdef YYLEX_PARAM
-# define YYLEX yylex (YYLEX_PARAM)
-#else
-# define YYLEX yylex ()
-#endif
-
-/* Enable debugging if requested. */
-#if YYDEBUG
-
-# ifndef YYFPRINTF
-# include <stdio.h> /* INFRINGES ON USER NAME SPACE */
-# define YYFPRINTF fprintf
-# endif
-
-# define YYDPRINTF(Args) \
-do { \
- if (yydebug) \
- YYFPRINTF Args; \
-} while (YYID (0))
-
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
-do { \
- if (yydebug) \
- { \
- YYFPRINTF (stderr, "%s ", Title); \
- yy_symbol_print (stderr, \
- Type, Value); \
- YYFPRINTF (stderr, "\n"); \
- } \
-} while (YYID (0))
-
-
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-/*ARGSUSED*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
-#else
-static void
-yy_symbol_value_print (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE const * const yyvaluep;
-#endif
-{
- if (!yyvaluep)
- return;
-# ifdef YYPRINT
- if (yytype < YYNTOKENS)
- YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
-# else
- YYUSE (yyoutput);
-# endif
- switch (yytype)
- {
- default:
- break;
- }
-}
-
-
-/*--------------------------------.
-| Print this symbol on YYOUTPUT. |
-`--------------------------------*/
-
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep)
-#else
-static void
-yy_symbol_print (yyoutput, yytype, yyvaluep)
- FILE *yyoutput;
- int yytype;
- YYSTYPE const * const yyvaluep;
-#endif
-{
- if (yytype < YYNTOKENS)
- YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
- else
- YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
-
- yy_symbol_value_print (yyoutput, yytype, yyvaluep);
- YYFPRINTF (yyoutput, ")");
-}
-
-/*------------------------------------------------------------------.
-| yy_stack_print -- Print the state stack from its BOTTOM up to its |
-| TOP (included). |
-`------------------------------------------------------------------*/
-
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_stack_print (yytype_int16 *bottom, yytype_int16 *top)
-#else
-static void
-yy_stack_print (bottom, top)
- yytype_int16 *bottom;
- yytype_int16 *top;
-#endif
-{
- YYFPRINTF (stderr, "Stack now");
- for (; bottom <= top; ++bottom)
- YYFPRINTF (stderr, " %d", *bottom);
- YYFPRINTF (stderr, "\n");
-}
-
-# define YY_STACK_PRINT(Bottom, Top) \
-do { \
- if (yydebug) \
- yy_stack_print ((Bottom), (Top)); \
-} while (YYID (0))
-
-
-/*------------------------------------------------.
-| Report that the YYRULE is going to be reduced. |
-`------------------------------------------------*/
-
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yy_reduce_print (YYSTYPE *yyvsp, int yyrule)
-#else
-static void
-yy_reduce_print (yyvsp, yyrule)
- YYSTYPE *yyvsp;
- int yyrule;
-#endif
-{
- int yynrhs = yyr2[yyrule];
- int yyi;
- unsigned long int yylno = yyrline[yyrule];
- YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n",
- yyrule - 1, yylno);
- /* The symbols being reduced. */
- for (yyi = 0; yyi < yynrhs; yyi++)
- {
- fprintf (stderr, " $%d = ", yyi + 1);
- yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi],
- &(yyvsp[(yyi + 1) - (yynrhs)])
- );
- fprintf (stderr, "\n");
- }
-}
-
-# define YY_REDUCE_PRINT(Rule) \
-do { \
- if (yydebug) \
- yy_reduce_print (yyvsp, Rule); \
-} while (YYID (0))
-
-/* Nonzero means print parse trace. It is left uninitialized so that
- multiple parsers can coexist. */
-int yydebug;
-#else /* !YYDEBUG */
-# define YYDPRINTF(Args)
-# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
-# define YY_STACK_PRINT(Bottom, Top)
-# define YY_REDUCE_PRINT(Rule)
-#endif /* !YYDEBUG */
-
-
-/* YYINITDEPTH -- initial size of the parser's stacks. */
-#ifndef YYINITDEPTH
-# define YYINITDEPTH 200
-#endif
-
-/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only
- if the built-in stack extension method is used).
-
- Do not make this value too large; the results are undefined if
- YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH)
- evaluated with infinite-precision integer arithmetic. */
-
-#ifndef YYMAXDEPTH
-# define YYMAXDEPTH 10000
-#endif
-
-
-
-#if YYERROR_VERBOSE
-
-# ifndef yystrlen
-# if defined __GLIBC__ && defined _STRING_H
-# define yystrlen strlen
-# else
-/* Return the length of YYSTR. */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static YYSIZE_T
-yystrlen (const char *yystr)
-#else
-static YYSIZE_T
-yystrlen (yystr)
- const char *yystr;
-#endif
-{
- YYSIZE_T yylen;
- for (yylen = 0; yystr[yylen]; yylen++)
- continue;
- return yylen;
-}
-# endif
-# endif
-
-# ifndef yystpcpy
-# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE
-# define yystpcpy stpcpy
-# else
-/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in
- YYDEST. */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static char *
-yystpcpy (char *yydest, const char *yysrc)
-#else
-static char *
-yystpcpy (yydest, yysrc)
- char *yydest;
- const char *yysrc;
-#endif
-{
- char *yyd = yydest;
- const char *yys = yysrc;
-
- while ((*yyd++ = *yys++) != '\0')
- continue;
-
- return yyd - 1;
-}
-# endif
-# endif
-
-# ifndef yytnamerr
-/* Copy to YYRES the contents of YYSTR after stripping away unnecessary
- quotes and backslashes, so that it's suitable for yyerror. The
- heuristic is that double-quoting is unnecessary unless the string
- contains an apostrophe, a comma, or backslash (other than
- backslash-backslash). YYSTR is taken from yytname. If YYRES is
- null, do not copy; instead, return the length of what the result
- would have been. */
-static YYSIZE_T
-yytnamerr (char *yyres, const char *yystr)
-{
- if (*yystr == '"')
- {
- YYSIZE_T yyn = 0;
- char const *yyp = yystr;
-
- for (;;)
- switch (*++yyp)
- {
- case '\'':
- case ',':
- goto do_not_strip_quotes;
-
- case '\\':
- if (*++yyp != '\\')
- goto do_not_strip_quotes;
- /* Fall through. */
- default:
- if (yyres)
- yyres[yyn] = *yyp;
- yyn++;
- break;
-
- case '"':
- if (yyres)
- yyres[yyn] = '\0';
- return yyn;
- }
- do_not_strip_quotes: ;
- }
-
- if (! yyres)
- return yystrlen (yystr);
-
- return yystpcpy (yyres, yystr) - yyres;
-}
-# endif
-
-/* Copy into YYRESULT an error message about the unexpected token
- YYCHAR while in state YYSTATE. Return the number of bytes copied,
- including the terminating null byte. If YYRESULT is null, do not
- copy anything; just return the number of bytes that would be
- copied. As a special case, return 0 if an ordinary "syntax error"
- message will do. Return YYSIZE_MAXIMUM if overflow occurs during
- size calculation. */
-static YYSIZE_T
-yysyntax_error (char *yyresult, int yystate, int yychar)
-{
- int yyn = yypact[yystate];
-
- if (! (YYPACT_NINF < yyn && yyn <= YYLAST))
- return 0;
- else
- {
- int yytype = YYTRANSLATE (yychar);
- YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]);
- YYSIZE_T yysize = yysize0;
- YYSIZE_T yysize1;
- int yysize_overflow = 0;
- enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
- char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
- int yyx;
-
-# if 0
- /* This is so xgettext sees the translatable formats that are
- constructed on the fly. */
- YY_("syntax error, unexpected %s");
- YY_("syntax error, unexpected %s, expecting %s");
- YY_("syntax error, unexpected %s, expecting %s or %s");
- YY_("syntax error, unexpected %s, expecting %s or %s or %s");
- YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s");
-# endif
- char *yyfmt;
- char const *yyf;
- static char const yyunexpected[] = "syntax error, unexpected %s";
- static char const yyexpecting[] = ", expecting %s";
- static char const yyor[] = " or %s";
- char yyformat[sizeof yyunexpected
- + sizeof yyexpecting - 1
- + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2)
- * (sizeof yyor - 1))];
- char const *yyprefix = yyexpecting;
-
- /* Start YYX at -YYN if negative to avoid negative indexes in
- YYCHECK. */
- int yyxbegin = yyn < 0 ? -yyn : 0;
-
- /* Stay within bounds of both yycheck and yytname. */
- int yychecklim = YYLAST - yyn + 1;
- int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS;
- int yycount = 1;
-
- yyarg[0] = yytname[yytype];
- yyfmt = yystpcpy (yyformat, yyunexpected);
-
- for (yyx = yyxbegin; yyx < yyxend; ++yyx)
- if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR)
- {
- if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM)
- {
- yycount = 1;
- yysize = yysize0;
- yyformat[sizeof yyunexpected - 1] = '\0';
- break;
- }
- yyarg[yycount++] = yytname[yyx];
- yysize1 = yysize + yytnamerr (0, yytname[yyx]);
- yysize_overflow |= (yysize1 < yysize);
- yysize = yysize1;
- yyfmt = yystpcpy (yyfmt, yyprefix);
- yyprefix = yyor;
- }
-
- yyf = YY_(yyformat);
- yysize1 = yysize + yystrlen (yyf);
- yysize_overflow |= (yysize1 < yysize);
- yysize = yysize1;
-
- if (yysize_overflow)
- return YYSIZE_MAXIMUM;
-
- if (yyresult)
- {
- /* Avoid sprintf, as that infringes on the user's name space.
- Don't have undefined behavior even if the translation
- produced a string with the wrong number of "%s"s. */
- char *yyp = yyresult;
- int yyi = 0;
- while ((*yyp = *yyf) != '\0')
- {
- if (*yyp == '%' && yyf[1] == 's' && yyi < yycount)
- {
- yyp += yytnamerr (yyp, yyarg[yyi++]);
- yyf += 2;
- }
- else
- {
- yyp++;
- yyf++;
- }
- }
- }
- return yysize;
- }
-}
-#endif /* YYERROR_VERBOSE */
-
-
-/*-----------------------------------------------.
-| Release the memory associated to this symbol. |
-`-----------------------------------------------*/
-
-/*ARGSUSED*/
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-static void
-yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
-#else
-static void
-yydestruct (yymsg, yytype, yyvaluep)
- const char *yymsg;
- int yytype;
- YYSTYPE *yyvaluep;
-#endif
-{
- YYUSE (yyvaluep);
-
- if (!yymsg)
- yymsg = "Deleting";
- YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
-}
-
-
-/* Prevent warnings from -Wmissing-prototypes. */
-
-#ifdef YYPARSE_PARAM
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void *YYPARSE_PARAM);
-#else
-int yyparse ();
-#endif
-#else /* ! YYPARSE_PARAM */
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void);
-#else
-int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
-
-
-
-/* The look-ahead symbol. */
-int yychar;
-
-/* The semantic value of the look-ahead symbol. */
-YYSTYPE yylval;
-
-/* Number of syntax errors so far. */
-int yynerrs;
-
-
-
-/*----------.
-| yyparse. |
-`----------*/
-
-#ifdef YYPARSE_PARAM
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-int
-yyparse (void *YYPARSE_PARAM)
-#else
-int
-yyparse (YYPARSE_PARAM)
- void *YYPARSE_PARAM;
-#endif
-#else /* ! YYPARSE_PARAM */
-#if (defined __STDC__ || defined __C99__FUNC__ \
- || defined __cplusplus || defined _MSC_VER)
-int
-yyparse (void)
-#else
-int
-yyparse ()
-
-#endif
-#endif
-{
-
- int yystate;
- int yyn;
- int yyresult;
- /* Number of tokens to shift before error messages enabled. */
- int yyerrstatus;
- /* Look-ahead token as an internal (translated) token number. */
- int yytoken = 0;
-#if YYERROR_VERBOSE
- /* Buffer for error messages, and its allocated size. */
- char yymsgbuf[128];
- char *yymsg = yymsgbuf;
- YYSIZE_T yymsg_alloc = sizeof yymsgbuf;
-#endif
-
- /* Three stacks and their tools:
- `yyss': related to states,
- `yyvs': related to semantic values,
- `yyls': related to locations.
-
- Refer to the stacks thru separate pointers, to allow yyoverflow
- to reallocate them elsewhere. */
-
- /* The state stack. */
- yytype_int16 yyssa[YYINITDEPTH];
- yytype_int16 *yyss = yyssa;
- yytype_int16 *yyssp;
-
- /* The semantic value stack. */
- YYSTYPE yyvsa[YYINITDEPTH];
- YYSTYPE *yyvs = yyvsa;
- YYSTYPE *yyvsp;
-
-
-
-#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N))
-
- YYSIZE_T yystacksize = YYINITDEPTH;
-
- /* The variables used to return semantic value and location from the
- action routines. */
- YYSTYPE yyval;
-
-
- /* The number of symbols on the RHS of the reduced rule.
- Keep to zero when no symbol should be popped. */
- int yylen = 0;
-
- YYDPRINTF ((stderr, "Starting parse\n"));
-
- yystate = 0;
- yyerrstatus = 0;
- yynerrs = 0;
- yychar = YYEMPTY; /* Cause a token to be read. */
-
- /* Initialize stack pointers.
- Waste one element of value and location stack
- so that they stay on the same level as the state stack.
- The wasted elements are never initialized. */
-
- yyssp = yyss;
- yyvsp = yyvs;
-
- goto yysetstate;
-
-/*------------------------------------------------------------.
-| yynewstate -- Push a new state, which is found in yystate. |
-`------------------------------------------------------------*/
- yynewstate:
- /* In all cases, when you get here, the value and location stacks
- have just been pushed. So pushing a state here evens the stacks. */
- yyssp++;
-
- yysetstate:
- *yyssp = yystate;
-
- if (yyss + yystacksize - 1 <= yyssp)
- {
- /* Get the current used size of the three stacks, in elements. */
- YYSIZE_T yysize = yyssp - yyss + 1;
-
-#ifdef yyoverflow
- {
- /* Give user a chance to reallocate the stack. Use copies of
- these so that the &'s don't force the real ones into
- memory. */
- YYSTYPE *yyvs1 = yyvs;
- yytype_int16 *yyss1 = yyss;
-
-
- /* Each stack pointer address is followed by the size of the
- data in use in that stack, in bytes. This used to be a
- conditional around just the two extra args, but that might
- be undefined if yyoverflow is a macro. */
- yyoverflow (YY_("memory exhausted"),
- &yyss1, yysize * sizeof (*yyssp),
- &yyvs1, yysize * sizeof (*yyvsp),
-
- &yystacksize);
-
- yyss = yyss1;
- yyvs = yyvs1;
- }
-#else /* no yyoverflow */
-# ifndef YYSTACK_RELOCATE
- goto yyexhaustedlab;
-# else
- /* Extend the stack our own way. */
- if (YYMAXDEPTH <= yystacksize)
- goto yyexhaustedlab;
- yystacksize *= 2;
- if (YYMAXDEPTH < yystacksize)
- yystacksize = YYMAXDEPTH;
-
- {
- yytype_int16 *yyss1 = yyss;
- union yyalloc *yyptr =
- (union yyalloc*) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize));
- if (! yyptr)
- goto yyexhaustedlab;
- YYSTACK_RELOCATE (yyss);
- YYSTACK_RELOCATE (yyvs);
-
-# undef YYSTACK_RELOCATE
- if (yyss1 != yyssa)
- YYSTACK_FREE (yyss1);
- }
-# endif
-#endif /* no yyoverflow */
-
- yyssp = yyss + yysize - 1;
- yyvsp = yyvs + yysize - 1;
-
-
- YYDPRINTF ((stderr, "Stack size increased to %lu\n",
- (unsigned long int) yystacksize));
-
- if (yyss + yystacksize - 1 <= yyssp)
- YYABORT;
- }
-
- YYDPRINTF ((stderr, "Entering state %d\n", yystate));
-
- goto yybackup;
-
-/*-----------.
-| yybackup. |
-`-----------*/
-yybackup:
-
- /* Do appropriate processing given the current state. Read a
- look-ahead token if we need one and don't already have one. */
-
- /* First try to decide what to do without reference to look-ahead token. */
- yyn = yypact[yystate];
- if (yyn == YYPACT_NINF)
- goto yydefault;
-
- /* Not known => get a look-ahead token if don't already have one. */
-
- /* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */
- if (yychar == YYEMPTY)
- {
- YYDPRINTF ((stderr, "Reading a token: "));
- yychar = YYLEX;
- }
-
- if (yychar <= YYEOF)
- {
- yychar = yytoken = YYEOF;
- YYDPRINTF ((stderr, "Now at end of input.\n"));
- }
- else
- {
- yytoken = YYTRANSLATE (yychar);
- YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
- }
-
- /* If the proper action on seeing token YYTOKEN is to reduce or to
- detect an error, take that action. */
- yyn += yytoken;
- if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken)
- goto yydefault;
- yyn = yytable[yyn];
- if (yyn <= 0)
- {
- if (yyn == 0 || yyn == YYTABLE_NINF)
- goto yyerrlab;
- yyn = -yyn;
- goto yyreduce;
- }
-
- if (yyn == YYFINAL)
- YYACCEPT;
-
- /* Count tokens shifted since error; after three, turn off error
- status. */
- if (yyerrstatus)
- yyerrstatus--;
-
- /* Shift the look-ahead token. */
- YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
-
- /* Discard the shifted token unless it is eof. */
- if (yychar != YYEOF)
- yychar = YYEMPTY;
-
- yystate = yyn;
- *++yyvsp = yylval;
-
- goto yynewstate;
-
-
-/*-----------------------------------------------------------.
-| yydefault -- do the default action for the current state. |
-`-----------------------------------------------------------*/
-yydefault:
- yyn = yydefact[yystate];
- if (yyn == 0)
- goto yyerrlab;
- goto yyreduce;
-
-
-/*-----------------------------.
-| yyreduce -- Do a reduction. |
-`-----------------------------*/
-yyreduce:
- /* yyn is the number of a rule to reduce with. */
- yylen = yyr2[yyn];
-
- /* If YYLEN is nonzero, implement the default value of the action:
- `$$ = $1'.
-
- Otherwise, the following line sets YYVAL to garbage.
- This behavior is undocumented and Bison
- users should not rely upon it. Assigning to YYVAL
- unconditionally makes the parser a bit smaller, and it avoids a
- GCC warning that YYVAL may be used uninitialized. */
- yyval = yyvsp[1-yylen];
-
-
- YY_REDUCE_PRINT (yyn);
- switch (yyn)
- {
- case 25:
-#line 190 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 26:
-#line 192 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); ;}
- break;
-
- case 27:
-#line 196 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
- break;
-
- case 28:
-#line 198 "pars0grm.y"
- { (yyval) = pars_func((yyvsp[(1) - (4)]), (yyvsp[(3) - (4)])); ;}
- break;
-
- case 29:
-#line 199 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
- break;
-
- case 30:
-#line 200 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
- break;
-
- case 31:
-#line 201 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
- break;
-
- case 32:
-#line 202 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
- break;
-
- case 33:
-#line 203 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
- break;
-
- case 34:
-#line 204 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
- break;
-
- case 35:
-#line 205 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]);;}
- break;
-
- case 36:
-#line 206 "pars0grm.y"
- { (yyval) = pars_op('+', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 37:
-#line 207 "pars0grm.y"
- { (yyval) = pars_op('-', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 38:
-#line 208 "pars0grm.y"
- { (yyval) = pars_op('*', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 39:
-#line 209 "pars0grm.y"
- { (yyval) = pars_op('/', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 40:
-#line 210 "pars0grm.y"
- { (yyval) = pars_op('-', (yyvsp[(2) - (2)]), NULL); ;}
- break;
-
- case 41:
-#line 211 "pars0grm.y"
- { (yyval) = (yyvsp[(2) - (3)]); ;}
- break;
-
- case 42:
-#line 212 "pars0grm.y"
- { (yyval) = pars_op('=', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 43:
-#line 214 "pars0grm.y"
- { (yyval) = pars_op(PARS_LIKE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 44:
-#line 215 "pars0grm.y"
- { (yyval) = pars_op('<', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 45:
-#line 216 "pars0grm.y"
- { (yyval) = pars_op('>', (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 46:
-#line 217 "pars0grm.y"
- { (yyval) = pars_op(PARS_GE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 47:
-#line 218 "pars0grm.y"
- { (yyval) = pars_op(PARS_LE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 48:
-#line 219 "pars0grm.y"
- { (yyval) = pars_op(PARS_NE_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 49:
-#line 220 "pars0grm.y"
- { (yyval) = pars_op(PARS_AND_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 50:
-#line 221 "pars0grm.y"
- { (yyval) = pars_op(PARS_OR_TOKEN, (yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 51:
-#line 222 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOT_TOKEN, (yyvsp[(2) - (2)]), NULL); ;}
- break;
-
- case 52:
-#line 224 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[(1) - (3)]), NULL); ;}
- break;
-
- case 53:
-#line 226 "pars0grm.y"
- { (yyval) = pars_op(PARS_NOTFOUND_TOKEN, (yyvsp[(1) - (3)]), NULL); ;}
- break;
-
- case 54:
-#line 230 "pars0grm.y"
- { (yyval) = &pars_to_char_token; ;}
- break;
-
- case 55:
-#line 231 "pars0grm.y"
- { (yyval) = &pars_to_number_token; ;}
- break;
-
- case 56:
-#line 232 "pars0grm.y"
- { (yyval) = &pars_to_binary_token; ;}
- break;
-
- case 57:
-#line 234 "pars0grm.y"
- { (yyval) = &pars_binary_to_number_token; ;}
- break;
-
- case 58:
-#line 235 "pars0grm.y"
- { (yyval) = &pars_substr_token; ;}
- break;
-
- case 59:
-#line 236 "pars0grm.y"
- { (yyval) = &pars_concat_token; ;}
- break;
-
- case 60:
-#line 237 "pars0grm.y"
- { (yyval) = &pars_instr_token; ;}
- break;
-
- case 61:
-#line 238 "pars0grm.y"
- { (yyval) = &pars_length_token; ;}
- break;
-
- case 62:
-#line 239 "pars0grm.y"
- { (yyval) = &pars_sysdate_token; ;}
- break;
-
- case 63:
-#line 240 "pars0grm.y"
- { (yyval) = &pars_rnd_token; ;}
- break;
-
- case 64:
-#line 241 "pars0grm.y"
- { (yyval) = &pars_rnd_str_token; ;}
- break;
-
- case 68:
-#line 252 "pars0grm.y"
- { (yyval) = pars_stored_procedure_call(
- static_cast<sym_node_t*>((yyvsp[(2) - (6)]))); ;}
- break;
-
- case 69:
-#line 258 "pars0grm.y"
- { (yyval) = pars_procedure_call((yyvsp[(1) - (4)]), (yyvsp[(3) - (4)])); ;}
- break;
-
- case 70:
-#line 262 "pars0grm.y"
- { (yyval) = &pars_replstr_token; ;}
- break;
-
- case 71:
-#line 263 "pars0grm.y"
- { (yyval) = &pars_printf_token; ;}
- break;
-
- case 72:
-#line 264 "pars0grm.y"
- { (yyval) = &pars_assert_token; ;}
- break;
-
- case 73:
-#line 268 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (3)]); ;}
- break;
-
- case 74:
-#line 272 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 75:
-#line 274 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 76:
-#line 278 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 77:
-#line 279 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 78:
-#line 281 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 79:
-#line 285 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 80:
-#line 286 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)]));;}
- break;
-
- case 81:
-#line 287 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 82:
-#line 291 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
-
- case 83:
-#line 293 "pars0grm.y"
- { (yyval) = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- sym_tab_add_int_lit(
- pars_sym_tab_global, 1))); ;}
- break;
-
- case 84:
-#line 298 "pars0grm.y"
- { (yyval) = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- pars_func(&pars_distinct_token,
- que_node_list_add_last(
- NULL, (yyvsp[(4) - (5)]))))); ;}
- break;
-
- case 85:
-#line 304 "pars0grm.y"
- { (yyval) = pars_func(&pars_sum_token,
- que_node_list_add_last(NULL,
- (yyvsp[(3) - (4)]))); ;}
- break;
-
- case 86:
-#line 310 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 87:
-#line 311 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 88:
-#line 313 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 89:
-#line 317 "pars0grm.y"
- { (yyval) = pars_select_list(&pars_star_denoter,
- NULL); ;}
- break;
-
- case 90:
-#line 320 "pars0grm.y"
- { (yyval) = pars_select_list(
- (yyvsp[(1) - (3)]), static_cast<sym_node_t*>((yyvsp[(3) - (3)]))); ;}
- break;
-
- case 91:
-#line 322 "pars0grm.y"
- { (yyval) = pars_select_list((yyvsp[(1) - (1)]), NULL); ;}
- break;
-
- case 92:
-#line 326 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 93:
-#line 327 "pars0grm.y"
- { (yyval) = (yyvsp[(2) - (2)]); ;}
- break;
-
- case 94:
-#line 331 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 95:
-#line 333 "pars0grm.y"
- { (yyval) = &pars_update_token; ;}
- break;
-
- case 96:
-#line 337 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 97:
-#line 339 "pars0grm.y"
- { (yyval) = &pars_share_token; ;}
- break;
-
- case 98:
-#line 343 "pars0grm.y"
- { (yyval) = &pars_asc_token; ;}
- break;
-
- case 99:
-#line 344 "pars0grm.y"
- { (yyval) = &pars_asc_token; ;}
- break;
-
- case 100:
-#line 345 "pars0grm.y"
- { (yyval) = &pars_desc_token; ;}
- break;
-
- case 101:
-#line 349 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 102:
-#line 351 "pars0grm.y"
- { (yyval) = pars_order_by(
- static_cast<sym_node_t*>((yyvsp[(3) - (4)])),
- static_cast<pars_res_word_t*>((yyvsp[(4) - (4)]))); ;}
- break;
-
- case 103:
-#line 362 "pars0grm.y"
- { (yyval) = pars_select_statement(
- static_cast<sel_node_t*>((yyvsp[(2) - (8)])),
- static_cast<sym_node_t*>((yyvsp[(4) - (8)])),
- static_cast<que_node_t*>((yyvsp[(5) - (8)])),
- static_cast<pars_res_word_t*>((yyvsp[(6) - (8)])),
- static_cast<pars_res_word_t*>((yyvsp[(7) - (8)])),
- static_cast<order_node_t*>((yyvsp[(8) - (8)]))); ;}
- break;
-
- case 104:
-#line 373 "pars0grm.y"
- { (yyval) = (yyvsp[(3) - (3)]); ;}
- break;
-
- case 105:
-#line 378 "pars0grm.y"
- { (yyval) = pars_insert_statement(
- static_cast<sym_node_t*>((yyvsp[(1) - (5)])), (yyvsp[(4) - (5)]), NULL); ;}
- break;
-
- case 106:
-#line 381 "pars0grm.y"
- { (yyval) = pars_insert_statement(
- static_cast<sym_node_t*>((yyvsp[(1) - (2)])),
- NULL,
- static_cast<sel_node_t*>((yyvsp[(2) - (2)]))); ;}
- break;
-
- case 107:
-#line 388 "pars0grm.y"
- { (yyval) = pars_column_assignment(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- static_cast<que_node_t*>((yyvsp[(3) - (3)]))); ;}
- break;
-
- case 108:
-#line 394 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 109:
-#line 396 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 110:
-#line 402 "pars0grm.y"
- { (yyval) = (yyvsp[(4) - (4)]); ;}
- break;
-
- case 111:
-#line 408 "pars0grm.y"
- { (yyval) = pars_update_statement_start(
- FALSE,
- static_cast<sym_node_t*>((yyvsp[(2) - (4)])),
- static_cast<col_assign_node_t*>((yyvsp[(4) - (4)]))); ;}
- break;
-
- case 112:
-#line 416 "pars0grm.y"
- { (yyval) = pars_update_statement(
- static_cast<upd_node_t*>((yyvsp[(1) - (2)])),
- NULL,
- static_cast<que_node_t*>((yyvsp[(2) - (2)]))); ;}
- break;
-
- case 113:
-#line 424 "pars0grm.y"
- { (yyval) = pars_update_statement(
- static_cast<upd_node_t*>((yyvsp[(1) - (2)])),
- static_cast<sym_node_t*>((yyvsp[(2) - (2)])),
- NULL); ;}
- break;
-
- case 114:
-#line 432 "pars0grm.y"
- { (yyval) = pars_update_statement_start(
- TRUE,
- static_cast<sym_node_t*>((yyvsp[(3) - (3)])), NULL); ;}
- break;
-
- case 115:
-#line 439 "pars0grm.y"
- { (yyval) = pars_update_statement(
- static_cast<upd_node_t*>((yyvsp[(1) - (2)])),
- NULL,
- static_cast<que_node_t*>((yyvsp[(2) - (2)]))); ;}
- break;
-
- case 116:
-#line 447 "pars0grm.y"
- { (yyval) = pars_update_statement(
- static_cast<upd_node_t*>((yyvsp[(1) - (2)])),
- static_cast<sym_node_t*>((yyvsp[(2) - (2)])),
- NULL); ;}
- break;
-
- case 117:
-#line 455 "pars0grm.y"
- { (yyval) = pars_row_printf_statement(
- static_cast<sel_node_t*>((yyvsp[(2) - (2)]))); ;}
- break;
-
- case 118:
-#line 461 "pars0grm.y"
- { (yyval) = pars_assignment_statement(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- static_cast<que_node_t*>((yyvsp[(3) - (3)]))); ;}
- break;
-
- case 119:
-#line 469 "pars0grm.y"
- { (yyval) = pars_elsif_element((yyvsp[(2) - (4)]), (yyvsp[(4) - (4)])); ;}
- break;
-
- case 120:
-#line 473 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 121:
-#line 475 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (2)]), (yyvsp[(2) - (2)])); ;}
- break;
-
- case 122:
-#line 479 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 123:
-#line 481 "pars0grm.y"
- { (yyval) = (yyvsp[(2) - (2)]); ;}
- break;
-
- case 124:
-#line 482 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
-
- case 125:
-#line 489 "pars0grm.y"
- { (yyval) = pars_if_statement((yyvsp[(2) - (7)]), (yyvsp[(4) - (7)]), (yyvsp[(5) - (7)])); ;}
- break;
-
- case 126:
-#line 495 "pars0grm.y"
- { (yyval) = pars_while_statement((yyvsp[(2) - (6)]), (yyvsp[(4) - (6)])); ;}
- break;
-
- case 127:
-#line 503 "pars0grm.y"
- { (yyval) = pars_for_statement(
- static_cast<sym_node_t*>((yyvsp[(2) - (10)])),
- (yyvsp[(4) - (10)]), (yyvsp[(6) - (10)]), (yyvsp[(8) - (10)])); ;}
- break;
-
- case 128:
-#line 509 "pars0grm.y"
- { (yyval) = pars_exit_statement(); ;}
- break;
-
- case 129:
-#line 513 "pars0grm.y"
- { (yyval) = pars_return_statement(); ;}
- break;
-
- case 130:
-#line 518 "pars0grm.y"
- { (yyval) = pars_open_statement(
- ROW_SEL_OPEN_CURSOR,
- static_cast<sym_node_t*>((yyvsp[(2) - (2)]))); ;}
- break;
-
- case 131:
-#line 525 "pars0grm.y"
- { (yyval) = pars_open_statement(
- ROW_SEL_CLOSE_CURSOR,
- static_cast<sym_node_t*>((yyvsp[(2) - (2)]))); ;}
- break;
-
- case 132:
-#line 532 "pars0grm.y"
- { (yyval) = pars_fetch_statement(
- static_cast<sym_node_t*>((yyvsp[(2) - (4)])),
- static_cast<sym_node_t*>((yyvsp[(4) - (4)])), NULL); ;}
- break;
-
- case 133:
-#line 536 "pars0grm.y"
- { (yyval) = pars_fetch_statement(
- static_cast<sym_node_t*>((yyvsp[(2) - (4)])),
- NULL,
- static_cast<sym_node_t*>((yyvsp[(4) - (4)]))); ;}
- break;
-
- case 134:
-#line 544 "pars0grm.y"
- { (yyval) = pars_column_def(
- static_cast<sym_node_t*>((yyvsp[(1) - (5)])),
- static_cast<pars_res_word_t*>((yyvsp[(2) - (5)])),
- static_cast<sym_node_t*>((yyvsp[(3) - (5)])),
- (yyvsp[(4) - (5)]), (yyvsp[(5) - (5)])); ;}
- break;
-
- case 135:
-#line 552 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 136:
-#line 554 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 137:
-#line 558 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 138:
-#line 560 "pars0grm.y"
- { (yyval) = (yyvsp[(2) - (3)]); ;}
- break;
-
- case 139:
-#line 564 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 140:
-#line 566 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 141:
-#line 571 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 142:
-#line 573 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 143:
-#line 578 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 144:
-#line 580 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 145:
-#line 585 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 146:
-#line 586 "pars0grm.y"
- { (yyval) = &pars_int_token;
- /* pass any non-NULL pointer */ ;}
- break;
-
- case 147:
-#line 591 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 148:
-#line 593 "pars0grm.y"
- { (yyval) = (yyvsp[(3) - (3)]); ;}
- break;
-
- case 149:
-#line 600 "pars0grm.y"
- { (yyval) = pars_create_table(
- static_cast<sym_node_t*>((yyvsp[(3) - (9)])),
- static_cast<sym_node_t*>((yyvsp[(5) - (9)])),
- static_cast<sym_node_t*>((yyvsp[(8) - (9)])),
- static_cast<sym_node_t*>((yyvsp[(9) - (9)])), (yyvsp[(7) - (9)])); ;}
- break;
-
- case 150:
-#line 608 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 151:
-#line 610 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 152:
-#line 614 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 153:
-#line 615 "pars0grm.y"
- { (yyval) = &pars_unique_token; ;}
- break;
-
- case 154:
-#line 619 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 155:
-#line 620 "pars0grm.y"
- { (yyval) = &pars_clustered_token; ;}
- break;
-
- case 156:
-#line 629 "pars0grm.y"
- { (yyval) = pars_create_index(
- static_cast<pars_res_word_t*>((yyvsp[(2) - (10)])),
- static_cast<pars_res_word_t*>((yyvsp[(3) - (10)])),
- static_cast<sym_node_t*>((yyvsp[(5) - (10)])),
- static_cast<sym_node_t*>((yyvsp[(7) - (10)])),
- static_cast<sym_node_t*>((yyvsp[(9) - (10)]))); ;}
- break;
-
- case 157:
-#line 638 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
-
- case 158:
-#line 639 "pars0grm.y"
- { (yyval) = (yyvsp[(1) - (1)]); ;}
- break;
-
- case 159:
-#line 644 "pars0grm.y"
- { (yyval) = pars_commit_statement(); ;}
- break;
-
- case 160:
-#line 649 "pars0grm.y"
- { (yyval) = pars_rollback_statement(); ;}
- break;
-
- case 161:
-#line 653 "pars0grm.y"
- { (yyval) = &pars_int_token; ;}
- break;
-
- case 162:
-#line 654 "pars0grm.y"
- { (yyval) = &pars_int_token; ;}
- break;
-
- case 163:
-#line 655 "pars0grm.y"
- { (yyval) = &pars_bigint_token; ;}
- break;
-
- case 164:
-#line 656 "pars0grm.y"
- { (yyval) = &pars_char_token; ;}
- break;
-
- case 165:
-#line 657 "pars0grm.y"
- { (yyval) = &pars_binary_token; ;}
- break;
-
- case 166:
-#line 658 "pars0grm.y"
- { (yyval) = &pars_blob_token; ;}
- break;
-
- case 167:
-#line 663 "pars0grm.y"
- { (yyval) = pars_parameter_declaration(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- PARS_INPUT,
- static_cast<pars_res_word_t*>((yyvsp[(3) - (3)]))); ;}
- break;
-
- case 168:
-#line 668 "pars0grm.y"
- { (yyval) = pars_parameter_declaration(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- PARS_OUTPUT,
- static_cast<pars_res_word_t*>((yyvsp[(3) - (3)]))); ;}
- break;
-
- case 169:
-#line 675 "pars0grm.y"
- { (yyval) = NULL; ;}
- break;
-
- case 170:
-#line 676 "pars0grm.y"
- { (yyval) = que_node_list_add_last(NULL, (yyvsp[(1) - (1)])); ;}
- break;
-
- case 171:
-#line 678 "pars0grm.y"
- { (yyval) = que_node_list_add_last((yyvsp[(1) - (3)]), (yyvsp[(3) - (3)])); ;}
- break;
-
- case 172:
-#line 683 "pars0grm.y"
- { (yyval) = pars_variable_declaration(
- static_cast<sym_node_t*>((yyvsp[(1) - (3)])),
- static_cast<pars_res_word_t*>((yyvsp[(2) - (3)]))); ;}
- break;
-
- case 176:
-#line 697 "pars0grm.y"
- { (yyval) = pars_cursor_declaration(
- static_cast<sym_node_t*>((yyvsp[(3) - (6)])),
- static_cast<sel_node_t*>((yyvsp[(5) - (6)]))); ;}
- break;
-
- case 177:
-#line 704 "pars0grm.y"
- { (yyval) = pars_function_declaration(
- static_cast<sym_node_t*>((yyvsp[(3) - (4)]))); ;}
- break;
-
- case 183:
-#line 726 "pars0grm.y"
- { (yyval) = pars_procedure_definition(
- static_cast<sym_node_t*>((yyvsp[(2) - (11)])),
- static_cast<sym_node_t*>((yyvsp[(4) - (11)])),
- (yyvsp[(10) - (11)])); ;}
- break;
-
-
-/* Line 1267 of yacc.c. */
-#line 2826 "pars0grm.cc"
- default: break;
- }
- YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc);
-
- YYPOPSTACK (yylen);
- yylen = 0;
- YY_STACK_PRINT (yyss, yyssp);
-
- *++yyvsp = yyval;
-
-
- /* Now `shift' the result of the reduction. Determine what state
- that goes to, based on the state we popped back to and the rule
- number reduced by. */
-
- yyn = yyr1[yyn];
-
- yystate = yypgoto[yyn - YYNTOKENS] + *yyssp;
- if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp)
- yystate = yytable[yystate];
- else
- yystate = yydefgoto[yyn - YYNTOKENS];
-
- goto yynewstate;
-
-
-/*------------------------------------.
-| yyerrlab -- here on detecting error |
-`------------------------------------*/
-yyerrlab:
- /* If not already recovering from an error, report this error. */
- if (!yyerrstatus)
- {
- ++yynerrs;
-#if ! YYERROR_VERBOSE
- yyerror (YY_("syntax error"));
-#else
- {
- YYSIZE_T yysize = yysyntax_error (0, yystate, yychar);
- if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM)
- {
- YYSIZE_T yyalloc = 2 * yysize;
- if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM))
- yyalloc = YYSTACK_ALLOC_MAXIMUM;
- if (yymsg != yymsgbuf)
- YYSTACK_FREE (yymsg);
- yymsg = (char*) YYSTACK_ALLOC (yyalloc);
- if (yymsg)
- yymsg_alloc = yyalloc;
- else
- {
- yymsg = yymsgbuf;
- yymsg_alloc = sizeof yymsgbuf;
- }
- }
-
- if (0 < yysize && yysize <= yymsg_alloc)
- {
- (void) yysyntax_error (yymsg, yystate, yychar);
- yyerror (yymsg);
- }
- else
- {
- yyerror (YY_("syntax error"));
- if (yysize != 0)
- goto yyexhaustedlab;
- }
- }
-#endif
- }
-
-
-
- if (yyerrstatus == 3)
- {
- /* If just tried and failed to reuse look-ahead token after an
- error, discard it. */
-
- if (yychar <= YYEOF)
- {
- /* Return failure if at end of input. */
- if (yychar == YYEOF)
- YYABORT;
- }
- else
- {
- yydestruct ("Error: discarding",
- yytoken, &yylval);
- yychar = YYEMPTY;
- }
- }
-
- /* Else will try to reuse look-ahead token after shifting the error
- token. */
- goto yyerrlab1;
-
-
-/*---------------------------------------------------.
-| yyerrorlab -- error raised explicitly by YYERROR. |
-`---------------------------------------------------*/
-yyerrorlab:
-
- /* Pacify compilers like GCC when the user code never invokes
- YYERROR and the label yyerrorlab therefore never appears in user
- code. */
- if (/*CONSTCOND*/ 0)
- goto yyerrorlab;
-
- /* Do not reclaim the symbols of the rule which action triggered
- this YYERROR. */
- YYPOPSTACK (yylen);
- yylen = 0;
- YY_STACK_PRINT (yyss, yyssp);
- yystate = *yyssp;
- goto yyerrlab1;
-
-
-/*-------------------------------------------------------------.
-| yyerrlab1 -- common code for both syntax error and YYERROR. |
-`-------------------------------------------------------------*/
-yyerrlab1:
- yyerrstatus = 3; /* Each real token shifted decrements this. */
-
- for (;;)
- {
- yyn = yypact[yystate];
- if (yyn != YYPACT_NINF)
- {
- yyn += YYTERROR;
- if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
- {
- yyn = yytable[yyn];
- if (0 < yyn)
- break;
- }
- }
-
- /* Pop the current state because it cannot handle the error token. */
- if (yyssp == yyss)
- YYABORT;
-
-
- yydestruct ("Error: popping",
- yystos[yystate], yyvsp);
- YYPOPSTACK (1);
- yystate = *yyssp;
- YY_STACK_PRINT (yyss, yyssp);
- }
-
- if (yyn == YYFINAL)
- YYACCEPT;
-
- *++yyvsp = yylval;
-
-
- /* Shift the error token. */
- YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
-
- yystate = yyn;
- goto yynewstate;
-
-
-/*-------------------------------------.
-| yyacceptlab -- YYACCEPT comes here. |
-`-------------------------------------*/
-yyacceptlab:
- yyresult = 0;
- goto yyreturn;
-
-/*-----------------------------------.
-| yyabortlab -- YYABORT comes here. |
-`-----------------------------------*/
-yyabortlab:
- yyresult = 1;
- goto yyreturn;
-
-#ifndef yyoverflow
-/*-------------------------------------------------.
-| yyexhaustedlab -- memory exhaustion comes here. |
-`-------------------------------------------------*/
-yyexhaustedlab:
- yyerror (YY_("memory exhausted"));
- yyresult = 2;
- /* Fall through. */
-#endif
-
-yyreturn:
- if (yychar != YYEOF && yychar != YYEMPTY)
- yydestruct ("Cleanup: discarding lookahead",
- yytoken, &yylval);
- /* Do not reclaim the symbols of the rule which action triggered
- this YYABORT or YYACCEPT. */
- YYPOPSTACK (yylen);
- YY_STACK_PRINT (yyss, yyssp);
- while (yyssp != yyss)
- {
- yydestruct ("Cleanup: popping",
- yystos[*yyssp], yyvsp);
- YYPOPSTACK (1);
- }
-#ifndef yyoverflow
- if (yyss != yyssa)
- YYSTACK_FREE (yyss);
-#endif
-#if YYERROR_VERBOSE
- if (yymsg != yymsgbuf)
- YYSTACK_FREE (yymsg);
-#endif
- /* Make sure YYID is used. */
- return YYID (yyresult);
-}
-
-
-#line 732 "pars0grm.y"
-
-
diff --git a/storage/xtradb/pars/pars0grm.y b/storage/xtradb/pars/pars0grm.y
deleted file mode 100644
index 60913287cc4..00000000000
--- a/storage/xtradb/pars/pars0grm.y
+++ /dev/null
@@ -1,732 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************
-SQL parser: input file for the GNU Bison parser generator
-
-Look from pars0lex.l for instructions how to generate the C files for
-the InnoDB parser.
-
-Created 12/14/1997 Heikki Tuuri
-*******************************************************/
-
-%{
-/* The value of the semantic attribute is a pointer to a query tree node
-que_node_t */
-
-#include "univ.i"
-#include <math.h> /* Can't be before univ.i */
-#include "pars0pars.h"
-#include "mem0mem.h"
-#include "que0types.h"
-#include "que0que.h"
-#include "row0sel.h"
-
-#define YYSTYPE que_node_t*
-
-/* #define __STDC__ */
-
-int
-yylex(void);
-%}
-
-%token PARS_INT_LIT
-%token PARS_FLOAT_LIT
-%token PARS_STR_LIT
-%token PARS_FIXBINARY_LIT
-%token PARS_BLOB_LIT
-%token PARS_NULL_LIT
-%token PARS_ID_TOKEN
-%token PARS_AND_TOKEN
-%token PARS_OR_TOKEN
-%token PARS_NOT_TOKEN
-%token PARS_GE_TOKEN
-%token PARS_LE_TOKEN
-%token PARS_NE_TOKEN
-%token PARS_PROCEDURE_TOKEN
-%token PARS_IN_TOKEN
-%token PARS_OUT_TOKEN
-%token PARS_BINARY_TOKEN
-%token PARS_BLOB_TOKEN
-%token PARS_INT_TOKEN
-%token PARS_INTEGER_TOKEN
-%token PARS_FLOAT_TOKEN
-%token PARS_CHAR_TOKEN
-%token PARS_IS_TOKEN
-%token PARS_BEGIN_TOKEN
-%token PARS_END_TOKEN
-%token PARS_IF_TOKEN
-%token PARS_THEN_TOKEN
-%token PARS_ELSE_TOKEN
-%token PARS_ELSIF_TOKEN
-%token PARS_LOOP_TOKEN
-%token PARS_WHILE_TOKEN
-%token PARS_RETURN_TOKEN
-%token PARS_SELECT_TOKEN
-%token PARS_SUM_TOKEN
-%token PARS_COUNT_TOKEN
-%token PARS_DISTINCT_TOKEN
-%token PARS_FROM_TOKEN
-%token PARS_WHERE_TOKEN
-%token PARS_FOR_TOKEN
-%token PARS_DDOT_TOKEN
-%token PARS_READ_TOKEN
-%token PARS_ORDER_TOKEN
-%token PARS_BY_TOKEN
-%token PARS_ASC_TOKEN
-%token PARS_DESC_TOKEN
-%token PARS_INSERT_TOKEN
-%token PARS_INTO_TOKEN
-%token PARS_VALUES_TOKEN
-%token PARS_UPDATE_TOKEN
-%token PARS_SET_TOKEN
-%token PARS_DELETE_TOKEN
-%token PARS_CURRENT_TOKEN
-%token PARS_OF_TOKEN
-%token PARS_CREATE_TOKEN
-%token PARS_TABLE_TOKEN
-%token PARS_INDEX_TOKEN
-%token PARS_UNIQUE_TOKEN
-%token PARS_CLUSTERED_TOKEN
-%token PARS_DOES_NOT_FIT_IN_MEM_TOKEN
-%token PARS_ON_TOKEN
-%token PARS_ASSIGN_TOKEN
-%token PARS_DECLARE_TOKEN
-%token PARS_CURSOR_TOKEN
-%token PARS_SQL_TOKEN
-%token PARS_OPEN_TOKEN
-%token PARS_FETCH_TOKEN
-%token PARS_CLOSE_TOKEN
-%token PARS_NOTFOUND_TOKEN
-%token PARS_TO_CHAR_TOKEN
-%token PARS_TO_NUMBER_TOKEN
-%token PARS_TO_BINARY_TOKEN
-%token PARS_BINARY_TO_NUMBER_TOKEN
-%token PARS_SUBSTR_TOKEN
-%token PARS_REPLSTR_TOKEN
-%token PARS_CONCAT_TOKEN
-%token PARS_INSTR_TOKEN
-%token PARS_LENGTH_TOKEN
-%token PARS_SYSDATE_TOKEN
-%token PARS_PRINTF_TOKEN
-%token PARS_ASSERT_TOKEN
-%token PARS_RND_TOKEN
-%token PARS_RND_STR_TOKEN
-%token PARS_ROW_PRINTF_TOKEN
-%token PARS_COMMIT_TOKEN
-%token PARS_ROLLBACK_TOKEN
-%token PARS_WORK_TOKEN
-%token PARS_UNSIGNED_TOKEN
-%token PARS_EXIT_TOKEN
-%token PARS_FUNCTION_TOKEN
-%token PARS_LOCK_TOKEN
-%token PARS_SHARE_TOKEN
-%token PARS_MODE_TOKEN
-%token PARS_LIKE_TOKEN
-%token PARS_LIKE_TOKEN_EXACT
-%token PARS_LIKE_TOKEN_PREFIX
-%token PARS_LIKE_TOKEN_SUFFIX
-%token PARS_LIKE_TOKEN_SUBSTR
-%token PARS_TABLE_NAME_TOKEN
-%token PARS_COMPACT_TOKEN
-%token PARS_BLOCK_SIZE_TOKEN
-%token PARS_BIGINT_TOKEN
-
-%left PARS_AND_TOKEN PARS_OR_TOKEN
-%left PARS_NOT_TOKEN
-%left '=' '<' '>' PARS_GE_TOKEN PARS_LE_TOKEN
-%left '-' '+'
-%left '*' '/'
-%left NEG /* negation--unary minus */
-%left '%'
-
-/* Grammar follows */
-%%
-
-top_statement:
- procedure_definition ';'
-
-statement:
- stored_procedure_call
- | predefined_procedure_call ';'
- | while_statement ';'
- | for_statement ';'
- | exit_statement ';'
- | if_statement ';'
- | return_statement ';'
- | assignment_statement ';'
- | select_statement ';'
- | insert_statement ';'
- | row_printf_statement ';'
- | delete_statement_searched ';'
- | delete_statement_positioned ';'
- | update_statement_searched ';'
- | update_statement_positioned ';'
- | open_cursor_statement ';'
- | fetch_statement ';'
- | close_cursor_statement ';'
- | commit_statement ';'
- | rollback_statement ';'
- | create_table ';'
- | create_index ';'
-;
-
-statement_list:
- statement { $$ = que_node_list_add_last(NULL, $1); }
- | statement_list statement
- { $$ = que_node_list_add_last($1, $2); }
-;
-
-exp:
- PARS_ID_TOKEN { $$ = $1;}
- | function_name '(' exp_list ')'
- { $$ = pars_func($1, $3); }
- | PARS_INT_LIT { $$ = $1;}
- | PARS_FLOAT_LIT { $$ = $1;}
- | PARS_STR_LIT { $$ = $1;}
- | PARS_FIXBINARY_LIT { $$ = $1;}
- | PARS_BLOB_LIT { $$ = $1;}
- | PARS_NULL_LIT { $$ = $1;}
- | PARS_SQL_TOKEN { $$ = $1;}
- | exp '+' exp { $$ = pars_op('+', $1, $3); }
- | exp '-' exp { $$ = pars_op('-', $1, $3); }
- | exp '*' exp { $$ = pars_op('*', $1, $3); }
- | exp '/' exp { $$ = pars_op('/', $1, $3); }
- | '-' exp %prec NEG { $$ = pars_op('-', $2, NULL); }
- | '(' exp ')' { $$ = $2; }
- | exp '=' exp { $$ = pars_op('=', $1, $3); }
- | exp PARS_LIKE_TOKEN PARS_STR_LIT
- { $$ = pars_op(PARS_LIKE_TOKEN, $1, $3); }
- | exp '<' exp { $$ = pars_op('<', $1, $3); }
- | exp '>' exp { $$ = pars_op('>', $1, $3); }
- | exp PARS_GE_TOKEN exp { $$ = pars_op(PARS_GE_TOKEN, $1, $3); }
- | exp PARS_LE_TOKEN exp { $$ = pars_op(PARS_LE_TOKEN, $1, $3); }
- | exp PARS_NE_TOKEN exp { $$ = pars_op(PARS_NE_TOKEN, $1, $3); }
- | exp PARS_AND_TOKEN exp{ $$ = pars_op(PARS_AND_TOKEN, $1, $3); }
- | exp PARS_OR_TOKEN exp { $$ = pars_op(PARS_OR_TOKEN, $1, $3); }
- | PARS_NOT_TOKEN exp { $$ = pars_op(PARS_NOT_TOKEN, $2, NULL); }
- | PARS_ID_TOKEN '%' PARS_NOTFOUND_TOKEN
- { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); }
- | PARS_SQL_TOKEN '%' PARS_NOTFOUND_TOKEN
- { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); }
-;
-
-function_name:
- PARS_TO_CHAR_TOKEN { $$ = &pars_to_char_token; }
- | PARS_TO_NUMBER_TOKEN { $$ = &pars_to_number_token; }
- | PARS_TO_BINARY_TOKEN { $$ = &pars_to_binary_token; }
- | PARS_BINARY_TO_NUMBER_TOKEN
- { $$ = &pars_binary_to_number_token; }
- | PARS_SUBSTR_TOKEN { $$ = &pars_substr_token; }
- | PARS_CONCAT_TOKEN { $$ = &pars_concat_token; }
- | PARS_INSTR_TOKEN { $$ = &pars_instr_token; }
- | PARS_LENGTH_TOKEN { $$ = &pars_length_token; }
- | PARS_SYSDATE_TOKEN { $$ = &pars_sysdate_token; }
- | PARS_RND_TOKEN { $$ = &pars_rnd_token; }
- | PARS_RND_STR_TOKEN { $$ = &pars_rnd_str_token; }
-;
-
-question_mark_list:
- /* Nothing */
- | '?'
- | question_mark_list ',' '?'
-;
-
-stored_procedure_call:
- '{' PARS_ID_TOKEN '(' question_mark_list ')' '}'
- { $$ = pars_stored_procedure_call(
- static_cast<sym_node_t*>($2)); }
-;
-
-predefined_procedure_call:
- predefined_procedure_name '(' exp_list ')'
- { $$ = pars_procedure_call($1, $3); }
-;
-
-predefined_procedure_name:
- PARS_REPLSTR_TOKEN { $$ = &pars_replstr_token; }
- | PARS_PRINTF_TOKEN { $$ = &pars_printf_token; }
- | PARS_ASSERT_TOKEN { $$ = &pars_assert_token; }
-;
-
-user_function_call:
- PARS_ID_TOKEN '(' ')' { $$ = $1; }
-;
-
-table_list:
- table_name { $$ = que_node_list_add_last(NULL, $1); }
- | table_list ',' table_name
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-variable_list:
- /* Nothing */ { $$ = NULL; }
- | PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
- | variable_list ',' PARS_ID_TOKEN
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-exp_list:
- /* Nothing */ { $$ = NULL; }
- | exp { $$ = que_node_list_add_last(NULL, $1);}
- | exp_list ',' exp { $$ = que_node_list_add_last($1, $3); }
-;
-
-select_item:
- exp { $$ = $1; }
- | PARS_COUNT_TOKEN '(' '*' ')'
- { $$ = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- sym_tab_add_int_lit(
- pars_sym_tab_global, 1))); }
- | PARS_COUNT_TOKEN '(' PARS_DISTINCT_TOKEN PARS_ID_TOKEN ')'
- { $$ = pars_func(&pars_count_token,
- que_node_list_add_last(NULL,
- pars_func(&pars_distinct_token,
- que_node_list_add_last(
- NULL, $4)))); }
- | PARS_SUM_TOKEN '(' exp ')'
- { $$ = pars_func(&pars_sum_token,
- que_node_list_add_last(NULL,
- $3)); }
-;
-
-select_item_list:
- /* Nothing */ { $$ = NULL; }
- | select_item { $$ = que_node_list_add_last(NULL, $1); }
- | select_item_list ',' select_item
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-select_list:
- '*' { $$ = pars_select_list(&pars_star_denoter,
- NULL); }
- | select_item_list PARS_INTO_TOKEN variable_list
- { $$ = pars_select_list(
- $1, static_cast<sym_node_t*>($3)); }
- | select_item_list { $$ = pars_select_list($1, NULL); }
-;
-
-search_condition:
- /* Nothing */ { $$ = NULL; }
- | PARS_WHERE_TOKEN exp { $$ = $2; }
-;
-
-for_update_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_FOR_TOKEN PARS_UPDATE_TOKEN
- { $$ = &pars_update_token; }
-;
-
-lock_shared_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_LOCK_TOKEN PARS_IN_TOKEN PARS_SHARE_TOKEN PARS_MODE_TOKEN
- { $$ = &pars_share_token; }
-;
-
-order_direction:
- /* Nothing */ { $$ = &pars_asc_token; }
- | PARS_ASC_TOKEN { $$ = &pars_asc_token; }
- | PARS_DESC_TOKEN { $$ = &pars_desc_token; }
-;
-
-order_by_clause:
- /* Nothing */ { $$ = NULL; }
- | PARS_ORDER_TOKEN PARS_BY_TOKEN PARS_ID_TOKEN order_direction
- { $$ = pars_order_by(
- static_cast<sym_node_t*>($3),
- static_cast<pars_res_word_t*>($4)); }
-;
-
-select_statement:
- PARS_SELECT_TOKEN select_list
- PARS_FROM_TOKEN table_list
- search_condition
- for_update_clause
- lock_shared_clause
- order_by_clause { $$ = pars_select_statement(
- static_cast<sel_node_t*>($2),
- static_cast<sym_node_t*>($4),
- static_cast<que_node_t*>($5),
- static_cast<pars_res_word_t*>($6),
- static_cast<pars_res_word_t*>($7),
- static_cast<order_node_t*>($8)); }
-;
-
-insert_statement_start:
- PARS_INSERT_TOKEN PARS_INTO_TOKEN
- table_name { $$ = $3; }
-;
-
-insert_statement:
- insert_statement_start PARS_VALUES_TOKEN '(' exp_list ')'
- { $$ = pars_insert_statement(
- static_cast<sym_node_t*>($1), $4, NULL); }
- | insert_statement_start select_statement
- { $$ = pars_insert_statement(
- static_cast<sym_node_t*>($1),
- NULL,
- static_cast<sel_node_t*>($2)); }
-;
-
-column_assignment:
- PARS_ID_TOKEN '=' exp { $$ = pars_column_assignment(
- static_cast<sym_node_t*>($1),
- static_cast<que_node_t*>($3)); }
-;
-
-column_assignment_list:
- column_assignment { $$ = que_node_list_add_last(NULL, $1); }
- | column_assignment_list ',' column_assignment
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-cursor_positioned:
- PARS_WHERE_TOKEN
- PARS_CURRENT_TOKEN PARS_OF_TOKEN
- PARS_ID_TOKEN { $$ = $4; }
-;
-
-update_statement_start:
- PARS_UPDATE_TOKEN table_name
- PARS_SET_TOKEN
- column_assignment_list { $$ = pars_update_statement_start(
- FALSE,
- static_cast<sym_node_t*>($2),
- static_cast<col_assign_node_t*>($4)); }
-;
-
-update_statement_searched:
- update_statement_start
- search_condition { $$ = pars_update_statement(
- static_cast<upd_node_t*>($1),
- NULL,
- static_cast<que_node_t*>($2)); }
-;
-
-update_statement_positioned:
- update_statement_start
- cursor_positioned { $$ = pars_update_statement(
- static_cast<upd_node_t*>($1),
- static_cast<sym_node_t*>($2),
- NULL); }
-;
-
-delete_statement_start:
- PARS_DELETE_TOKEN PARS_FROM_TOKEN
- table_name { $$ = pars_update_statement_start(
- TRUE,
- static_cast<sym_node_t*>($3), NULL); }
-;
-
-delete_statement_searched:
- delete_statement_start
- search_condition { $$ = pars_update_statement(
- static_cast<upd_node_t*>($1),
- NULL,
- static_cast<que_node_t*>($2)); }
-;
-
-delete_statement_positioned:
- delete_statement_start
- cursor_positioned { $$ = pars_update_statement(
- static_cast<upd_node_t*>($1),
- static_cast<sym_node_t*>($2),
- NULL); }
-;
-
-row_printf_statement:
- PARS_ROW_PRINTF_TOKEN select_statement
- { $$ = pars_row_printf_statement(
- static_cast<sel_node_t*>($2)); }
-;
-
-assignment_statement:
- PARS_ID_TOKEN PARS_ASSIGN_TOKEN exp
- { $$ = pars_assignment_statement(
- static_cast<sym_node_t*>($1),
- static_cast<que_node_t*>($3)); }
-;
-
-elsif_element:
- PARS_ELSIF_TOKEN
- exp PARS_THEN_TOKEN statement_list
- { $$ = pars_elsif_element($2, $4); }
-;
-
-elsif_list:
- elsif_element { $$ = que_node_list_add_last(NULL, $1); }
- | elsif_list elsif_element
- { $$ = que_node_list_add_last($1, $2); }
-;
-
-else_part:
- /* Nothing */ { $$ = NULL; }
- | PARS_ELSE_TOKEN statement_list
- { $$ = $2; }
- | elsif_list { $$ = $1; }
-;
-
-if_statement:
- PARS_IF_TOKEN exp PARS_THEN_TOKEN statement_list
- else_part
- PARS_END_TOKEN PARS_IF_TOKEN
- { $$ = pars_if_statement($2, $4, $5); }
-;
-
-while_statement:
- PARS_WHILE_TOKEN exp PARS_LOOP_TOKEN statement_list
- PARS_END_TOKEN PARS_LOOP_TOKEN
- { $$ = pars_while_statement($2, $4); }
-;
-
-for_statement:
- PARS_FOR_TOKEN PARS_ID_TOKEN PARS_IN_TOKEN
- exp PARS_DDOT_TOKEN exp
- PARS_LOOP_TOKEN statement_list
- PARS_END_TOKEN PARS_LOOP_TOKEN
- { $$ = pars_for_statement(
- static_cast<sym_node_t*>($2),
- $4, $6, $8); }
-;
-
-exit_statement:
- PARS_EXIT_TOKEN { $$ = pars_exit_statement(); }
-;
-
-return_statement:
- PARS_RETURN_TOKEN { $$ = pars_return_statement(); }
-;
-
-open_cursor_statement:
- PARS_OPEN_TOKEN PARS_ID_TOKEN
- { $$ = pars_open_statement(
- ROW_SEL_OPEN_CURSOR,
- static_cast<sym_node_t*>($2)); }
-;
-
-close_cursor_statement:
- PARS_CLOSE_TOKEN PARS_ID_TOKEN
- { $$ = pars_open_statement(
- ROW_SEL_CLOSE_CURSOR,
- static_cast<sym_node_t*>($2)); }
-;
-
-fetch_statement:
- PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN variable_list
- { $$ = pars_fetch_statement(
- static_cast<sym_node_t*>($2),
- static_cast<sym_node_t*>($4), NULL); }
- | PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN user_function_call
- { $$ = pars_fetch_statement(
- static_cast<sym_node_t*>($2),
- NULL,
- static_cast<sym_node_t*>($4)); }
-;
-
-column_def:
- PARS_ID_TOKEN type_name opt_column_len opt_unsigned opt_not_null
- { $$ = pars_column_def(
- static_cast<sym_node_t*>($1),
- static_cast<pars_res_word_t*>($2),
- static_cast<sym_node_t*>($3),
- $4, $5); }
-;
-
-column_def_list:
- column_def { $$ = que_node_list_add_last(NULL, $1); }
- | column_def_list ',' column_def
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-opt_column_len:
- /* Nothing */ { $$ = NULL; }
- | '(' PARS_INT_LIT ')'
- { $$ = $2; }
-;
-
-opt_unsigned:
- /* Nothing */ { $$ = NULL; }
- | PARS_UNSIGNED_TOKEN
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-opt_not_null:
- /* Nothing */ { $$ = NULL; }
- | PARS_NOT_TOKEN PARS_NULL_LIT
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-not_fit_in_memory:
- /* Nothing */ { $$ = NULL; }
- | PARS_DOES_NOT_FIT_IN_MEM_TOKEN
- { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-compact:
- /* Nothing */ { $$ = NULL; }
- | PARS_COMPACT_TOKEN { $$ = &pars_int_token;
- /* pass any non-NULL pointer */ }
-;
-
-block_size:
- /* Nothing */ { $$ = NULL; }
- | PARS_BLOCK_SIZE_TOKEN '=' PARS_INT_LIT
- { $$ = $3; }
-;
-
-create_table:
- PARS_CREATE_TOKEN PARS_TABLE_TOKEN
- table_name '(' column_def_list ')'
- not_fit_in_memory compact block_size
- { $$ = pars_create_table(
- static_cast<sym_node_t*>($3),
- static_cast<sym_node_t*>($5),
- static_cast<sym_node_t*>($8),
- static_cast<sym_node_t*>($9), $7); }
-;
-
-column_list:
- PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); }
- | column_list ',' PARS_ID_TOKEN
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-unique_def:
- /* Nothing */ { $$ = NULL; }
- | PARS_UNIQUE_TOKEN { $$ = &pars_unique_token; }
-;
-
-clustered_def:
- /* Nothing */ { $$ = NULL; }
- | PARS_CLUSTERED_TOKEN { $$ = &pars_clustered_token; }
-;
-
-create_index:
- PARS_CREATE_TOKEN unique_def
- clustered_def
- PARS_INDEX_TOKEN
- PARS_ID_TOKEN PARS_ON_TOKEN
- table_name
- '(' column_list ')' { $$ = pars_create_index(
- static_cast<pars_res_word_t*>($2),
- static_cast<pars_res_word_t*>($3),
- static_cast<sym_node_t*>($5),
- static_cast<sym_node_t*>($7),
- static_cast<sym_node_t*>($9)); }
-;
-
-table_name:
- PARS_ID_TOKEN { $$ = $1; }
- | PARS_TABLE_NAME_TOKEN { $$ = $1; }
-;
-
-commit_statement:
- PARS_COMMIT_TOKEN PARS_WORK_TOKEN
- { $$ = pars_commit_statement(); }
-;
-
-rollback_statement:
- PARS_ROLLBACK_TOKEN PARS_WORK_TOKEN
- { $$ = pars_rollback_statement(); }
-;
-
-type_name:
- PARS_INT_TOKEN { $$ = &pars_int_token; }
- | PARS_INTEGER_TOKEN { $$ = &pars_int_token; }
- | PARS_BIGINT_TOKEN { $$ = &pars_bigint_token; }
- | PARS_CHAR_TOKEN { $$ = &pars_char_token; }
- | PARS_BINARY_TOKEN { $$ = &pars_binary_token; }
- | PARS_BLOB_TOKEN { $$ = &pars_blob_token; }
-;
-
-parameter_declaration:
- PARS_ID_TOKEN PARS_IN_TOKEN type_name
- { $$ = pars_parameter_declaration(
- static_cast<sym_node_t*>($1),
- PARS_INPUT,
- static_cast<pars_res_word_t*>($3)); }
- | PARS_ID_TOKEN PARS_OUT_TOKEN type_name
- { $$ = pars_parameter_declaration(
- static_cast<sym_node_t*>($1),
- PARS_OUTPUT,
- static_cast<pars_res_word_t*>($3)); }
-;
-
-parameter_declaration_list:
- /* Nothing */ { $$ = NULL; }
- | parameter_declaration { $$ = que_node_list_add_last(NULL, $1); }
- | parameter_declaration_list ',' parameter_declaration
- { $$ = que_node_list_add_last($1, $3); }
-;
-
-variable_declaration:
- PARS_ID_TOKEN type_name ';'
- { $$ = pars_variable_declaration(
- static_cast<sym_node_t*>($1),
- static_cast<pars_res_word_t*>($2)); }
-;
-
-variable_declaration_list:
- /* Nothing */
- | variable_declaration
- | variable_declaration_list variable_declaration
-;
-
-cursor_declaration:
- PARS_DECLARE_TOKEN PARS_CURSOR_TOKEN PARS_ID_TOKEN
- PARS_IS_TOKEN select_statement ';'
- { $$ = pars_cursor_declaration(
- static_cast<sym_node_t*>($3),
- static_cast<sel_node_t*>($5)); }
-;
-
-function_declaration:
- PARS_DECLARE_TOKEN PARS_FUNCTION_TOKEN PARS_ID_TOKEN ';'
- { $$ = pars_function_declaration(
- static_cast<sym_node_t*>($3)); }
-;
-
-declaration:
- cursor_declaration
- | function_declaration
-;
-
-declaration_list:
- /* Nothing */
- | declaration
- | declaration_list declaration
-;
-
-procedure_definition:
- PARS_PROCEDURE_TOKEN PARS_ID_TOKEN '(' parameter_declaration_list ')'
- PARS_IS_TOKEN
- variable_declaration_list
- declaration_list
- PARS_BEGIN_TOKEN
- statement_list
- PARS_END_TOKEN { $$ = pars_procedure_definition(
- static_cast<sym_node_t*>($2),
- static_cast<sym_node_t*>($4),
- $10); }
-;
-
-%%
diff --git a/storage/xtradb/pars/pars0lex.l b/storage/xtradb/pars/pars0lex.l
deleted file mode 100644
index f800410fa3f..00000000000
--- a/storage/xtradb/pars/pars0lex.l
+++ /dev/null
@@ -1,706 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************
-SQL parser lexical analyzer: input file for the GNU Flex lexer generator
-
-The InnoDB parser is frozen because MySQL takes care of SQL parsing.
-Therefore we normally keep the InnoDB parser C files as they are, and do
-not automatically generate them from pars0grm.y and pars0lex.l.
-
-How to make the InnoDB parser and lexer C files:
-
-1. Run ./make_flex.sh to generate lexer files.
-
-2. Run ./make_bison.sh to generate parser files.
-
-These instructions seem to work at least with bison-1.875d and flex-2.5.31 on
-Linux.
-
-Created 12/14/1997 Heikki Tuuri
-*******************************************************/
-
-%option nostdinit
-%option 8bit
-%option warn
-%option pointer
-%option never-interactive
-%option nodefault
-%option noinput
-%option nounput
-%option noyywrap
-%option noyy_scan_buffer
-%option noyy_scan_bytes
-%option noyy_scan_string
-%option nounistd
-
-%{
-#define YYSTYPE que_node_t*
-
-#include "univ.i"
-#include "pars0pars.h"
-#include "pars0grm.h"
-#include "pars0sym.h"
-#include "mem0mem.h"
-#include "os0proc.h"
-
-#define malloc(A) ut_malloc(A)
-#define free(A) ut_free(A)
-#define realloc(P, A) ut_realloc(P, A)
-#define exit(A) ut_error
-
-/* Note: We cast &result to int* from yysize_t* */
-#define YY_INPUT(buf, result, max_size) \
- pars_get_lex_chars(buf, (int*) &result, max_size)
-
-/* String buffer for removing quotes */
-static ulint stringbuf_len_alloc = 0; /* Allocated length */
-static ulint stringbuf_len = 0; /* Current length */
-static char* stringbuf; /* Start of buffer */
-/** Appends a string to the buffer. */
-static
-void
-string_append(
-/*==========*/
- const char* str, /*!< in: string to be appended */
- ulint len) /*!< in: length of the string */
-{
- if (stringbuf == NULL) {
- stringbuf = static_cast<char*>(malloc(1));
- stringbuf_len_alloc = 1;
- }
-
- if (stringbuf_len + len > stringbuf_len_alloc) {
- while (stringbuf_len + len > stringbuf_len_alloc) {
- stringbuf_len_alloc <<= 1;
- }
-
- stringbuf = static_cast<char*>(
- realloc(stringbuf, stringbuf_len_alloc));
- }
-
- memcpy(stringbuf + stringbuf_len, str, len);
- stringbuf_len += len;
-}
-
-%}
-
-DIGIT [0-9]
-ID [a-z_A-Z][a-z_A-Z0-9]*
-TABLE_NAME [a-z_A-Z][@a-z_A-Z0-9]*\/(#sql-|[a-z_A-Z])[a-z_A-Z0-9]*
-BOUND_LIT \:[a-z_A-Z0-9]+
-BOUND_ID \$[a-z_A-Z0-9]+
-
-%x comment
-%x quoted
-%x id
-%%
-
-{DIGIT}+ {
- yylval = sym_tab_add_int_lit(pars_sym_tab_global,
- atoi(yytext));
- return(PARS_INT_LIT);
-}
-
-{DIGIT}+"."{DIGIT}* {
- ut_error; /* not implemented */
-
- return(PARS_FLOAT_LIT);
-}
-
-{BOUND_LIT} {
- ulint type;
-
- yylval = sym_tab_add_bound_lit(pars_sym_tab_global,
- yytext + 1, &type);
-
- return((int) type);
-}
-
-{BOUND_ID} {
- yylval = sym_tab_add_bound_id(pars_sym_tab_global,
- yytext + 1);
-
- return(PARS_ID_TOKEN);
-}
-
-"'" {
-/* Quoted character string literals are handled in an explicit
-start state 'quoted'. This state is entered and the buffer for
-the scanned string is emptied upon encountering a starting quote.
-
-In the state 'quoted', only two actions are possible (defined below). */
- BEGIN(quoted);
- stringbuf_len = 0;
-}
-<quoted>[^\']+ {
- /* Got a sequence of characters other than "'":
- append to string buffer */
- string_append(yytext, yyleng);
-}
-<quoted>"'"+ {
- /* Got a sequence of "'" characters:
- append half of them to string buffer,
- as "''" represents a single "'".
- We apply truncating division,
- so that "'''" will result in "'". */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- string literal. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_str_lit(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
- return(PARS_STR_LIT);
- }
-}
-
-\" {
-/* Quoted identifiers are handled in an explicit start state 'id'.
-This state is entered and the buffer for the scanned string is emptied
-upon encountering a starting quote.
-
-In the state 'id', only two actions are possible (defined below). */
- BEGIN(id);
- stringbuf_len = 0;
-}
-<id>[^\"]+ {
- /* Got a sequence of characters other than '"':
- append to string buffer */
- string_append(yytext, yyleng);
-}
-<id>\"+ {
- /* Got a sequence of '"' characters:
- append half of them to string buffer,
- as '""' represents a single '"'.
- We apply truncating division,
- so that '"""' will result in '"'. */
-
- string_append(yytext, yyleng / 2);
-
- /* If we got an odd number of quotes, then the
- last quote we got is the terminating quote.
- At the end of the string, we return to the
- initial start state and report the scanned
- identifier. */
-
- if (yyleng % 2) {
- BEGIN(INITIAL);
- yylval = sym_tab_add_id(
- pars_sym_tab_global,
- (byte*) stringbuf, stringbuf_len);
-
- return(PARS_ID_TOKEN);
- }
-}
-
-"NULL" {
- yylval = sym_tab_add_null_lit(pars_sym_tab_global);
-
- return(PARS_NULL_LIT);
-}
-
-"SQL" {
- /* Implicit cursor name */
- yylval = sym_tab_add_str_lit(pars_sym_tab_global,
- (byte*) yytext, yyleng);
- return(PARS_SQL_TOKEN);
-}
-
-"AND" {
- return(PARS_AND_TOKEN);
-}
-
-"OR" {
- return(PARS_OR_TOKEN);
-}
-
-"NOT" {
- return(PARS_NOT_TOKEN);
-}
-
-"PROCEDURE" {
- return(PARS_PROCEDURE_TOKEN);
-}
-
-"IN" {
- return(PARS_IN_TOKEN);
-}
-
-"OUT" {
- return(PARS_OUT_TOKEN);
-}
-
-"BINARY" {
- return(PARS_BINARY_TOKEN);
-}
-
-"BLOB" {
- return(PARS_BLOB_TOKEN);
-}
-
-"INT" {
- return(PARS_INT_TOKEN);
-}
-
-"INTEGER" {
- return(PARS_INT_TOKEN);
-}
-
-"FLOAT" {
- return(PARS_FLOAT_TOKEN);
-}
-
-"CHAR" {
- return(PARS_CHAR_TOKEN);
-}
-
-"IS" {
- return(PARS_IS_TOKEN);
-}
-
-"BEGIN" {
- return(PARS_BEGIN_TOKEN);
-}
-
-"END" {
- return(PARS_END_TOKEN);
-}
-
-"IF" {
- return(PARS_IF_TOKEN);
-}
-
-"THEN" {
- return(PARS_THEN_TOKEN);
-}
-
-"ELSE" {
- return(PARS_ELSE_TOKEN);
-}
-
-"ELSIF" {
- return(PARS_ELSIF_TOKEN);
-}
-
-"LOOP" {
- return(PARS_LOOP_TOKEN);
-}
-
-"WHILE" {
- return(PARS_WHILE_TOKEN);
-}
-
-"RETURN" {
- return(PARS_RETURN_TOKEN);
-}
-
-"SELECT" {
- return(PARS_SELECT_TOKEN);
-}
-
-"SUM" {
- return(PARS_SUM_TOKEN);
-}
-
-"COUNT" {
- return(PARS_COUNT_TOKEN);
-}
-
-"DISTINCT" {
- return(PARS_DISTINCT_TOKEN);
-}
-
-"FROM" {
- return(PARS_FROM_TOKEN);
-}
-
-"WHERE" {
- return(PARS_WHERE_TOKEN);
-}
-
-"FOR" {
- return(PARS_FOR_TOKEN);
-}
-
-"READ" {
- return(PARS_READ_TOKEN);
-}
-
-"ORDER" {
- return(PARS_ORDER_TOKEN);
-}
-
-"BY" {
- return(PARS_BY_TOKEN);
-}
-
-"ASC" {
- return(PARS_ASC_TOKEN);
-}
-
-"DESC" {
- return(PARS_DESC_TOKEN);
-}
-
-"INSERT" {
- return(PARS_INSERT_TOKEN);
-}
-
-"INTO" {
- return(PARS_INTO_TOKEN);
-}
-
-"VALUES" {
- return(PARS_VALUES_TOKEN);
-}
-
-"UPDATE" {
- return(PARS_UPDATE_TOKEN);
-}
-
-"SET" {
- return(PARS_SET_TOKEN);
-}
-
-"DELETE" {
- return(PARS_DELETE_TOKEN);
-}
-
-"CURRENT" {
- return(PARS_CURRENT_TOKEN);
-}
-
-"OF" {
- return(PARS_OF_TOKEN);
-}
-
-"CREATE" {
- return(PARS_CREATE_TOKEN);
-}
-
-"TABLE" {
- return(PARS_TABLE_TOKEN);
-}
-
-"COMPACT" {
- return(PARS_COMPACT_TOKEN);
-}
-
-"BLOCK_SIZE" {
- return(PARS_BLOCK_SIZE_TOKEN);
-}
-
-"INDEX" {
- return(PARS_INDEX_TOKEN);
-}
-
-"UNIQUE" {
- return(PARS_UNIQUE_TOKEN);
-}
-
-"CLUSTERED" {
- return(PARS_CLUSTERED_TOKEN);
-}
-
-"DOES_NOT_FIT_IN_MEMORY" {
- return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN);
-}
-
-"ON" {
- return(PARS_ON_TOKEN);
-}
-
-"DECLARE" {
- return(PARS_DECLARE_TOKEN);
-}
-
-"CURSOR" {
- return(PARS_CURSOR_TOKEN);
-}
-
-"OPEN" {
- return(PARS_OPEN_TOKEN);
-}
-
-"FETCH" {
- return(PARS_FETCH_TOKEN);
-}
-
-"CLOSE" {
- return(PARS_CLOSE_TOKEN);
-}
-
-"NOTFOUND" {
- return(PARS_NOTFOUND_TOKEN);
-}
-
-"TO_CHAR" {
- return(PARS_TO_CHAR_TOKEN);
-}
-
-"TO_NUMBER" {
- return(PARS_TO_NUMBER_TOKEN);
-}
-
-"TO_BINARY" {
- return(PARS_TO_BINARY_TOKEN);
-}
-
-"BINARY_TO_NUMBER" {
- return(PARS_BINARY_TO_NUMBER_TOKEN);
-}
-
-"SUBSTR" {
- return(PARS_SUBSTR_TOKEN);
-}
-
-"REPLSTR" {
- return(PARS_REPLSTR_TOKEN);
-}
-
-"CONCAT" {
- return(PARS_CONCAT_TOKEN);
-}
-
-"INSTR" {
- return(PARS_INSTR_TOKEN);
-}
-
-"LENGTH" {
- return(PARS_LENGTH_TOKEN);
-}
-
-"SYSDATE" {
- return(PARS_SYSDATE_TOKEN);
-}
-
-"PRINTF" {
- return(PARS_PRINTF_TOKEN);
-}
-
-"ASSERT" {
- return(PARS_ASSERT_TOKEN);
-}
-
-"RND" {
- return(PARS_RND_TOKEN);
-}
-
-"RND_STR" {
- return(PARS_RND_STR_TOKEN);
-}
-
-"ROW_PRINTF" {
- return(PARS_ROW_PRINTF_TOKEN);
-}
-
-"COMMIT" {
- return(PARS_COMMIT_TOKEN);
-}
-
-"ROLLBACK" {
- return(PARS_ROLLBACK_TOKEN);
-}
-
-"WORK" {
- return(PARS_WORK_TOKEN);
-}
-
-"UNSIGNED" {
- return(PARS_UNSIGNED_TOKEN);
-}
-
-"EXIT" {
- return(PARS_EXIT_TOKEN);
-}
-
-"FUNCTION" {
- return(PARS_FUNCTION_TOKEN);
-}
-
-"LOCK" {
- return(PARS_LOCK_TOKEN);
-}
-
-"SHARE" {
- return(PARS_SHARE_TOKEN);
-}
-
-"MODE" {
- return(PARS_MODE_TOKEN);
-}
-
-"LIKE" {
- return(PARS_LIKE_TOKEN);
-}
-
-"BIGINT" {
- return(PARS_BIGINT_TOKEN);
-}
-
-{ID} {
- yylval = sym_tab_add_id(pars_sym_tab_global,
- (byte*) yytext,
- ut_strlen(yytext));
- return(PARS_ID_TOKEN);
-}
-
-{TABLE_NAME} {
- yylval = sym_tab_add_id(pars_sym_tab_global,
- (byte*) yytext,
- ut_strlen(yytext));
- return(PARS_TABLE_NAME_TOKEN);
-}
-
-".." {
- return(PARS_DDOT_TOKEN);
-}
-
-":=" {
- return(PARS_ASSIGN_TOKEN);
-}
-
-"<=" {
- return(PARS_LE_TOKEN);
-}
-
-">=" {
- return(PARS_GE_TOKEN);
-}
-
-"<>" {
- return(PARS_NE_TOKEN);
-}
-
-"(" {
-
- return((int)(*yytext));
-}
-
-"=" {
-
- return((int)(*yytext));
-}
-
-">" {
-
- return((int)(*yytext));
-}
-
-"<" {
-
- return((int)(*yytext));
-}
-
-"," {
-
- return((int)(*yytext));
-}
-
-";" {
-
- return((int)(*yytext));
-}
-
-")" {
-
- return((int)(*yytext));
-}
-
-"+" {
-
- return((int)(*yytext));
-}
-
-"-" {
-
- return((int)(*yytext));
-}
-
-"*" {
-
- return((int)(*yytext));
-}
-
-"/" {
-
- return((int)(*yytext));
-}
-
-"%" {
-
- return((int)(*yytext));
-}
-
-"{" {
-
- return((int)(*yytext));
-}
-
-"}" {
-
- return((int)(*yytext));
-}
-
-"?" {
-
- return((int)(*yytext));
-}
-
-"/*" BEGIN(comment); /* eat up comment */
-
-<comment>[^*]*
-<comment>"*"+[^*/]*
-<comment>"*"+"/" BEGIN(INITIAL);
-
-[ \t\n]+ /* eat up whitespace */
-
-
-. {
- fprintf(stderr,"Unrecognized character: %02x\n",
- *yytext);
-
- ut_error;
-
- return(0);
-}
-
-%%
-
-/**********************************************************************
-Release any resources used by the lexer. */
-UNIV_INTERN
-void
-pars_lexer_close(void)
-/*==================*/
-{
- if (yy_buffer_stack)
- yylex_destroy();
- if (stringbuf)
- free(stringbuf);
- stringbuf = NULL;
- stringbuf_len_alloc = stringbuf_len = 0;
-}
diff --git a/storage/xtradb/pars/pars0opt.cc b/storage/xtradb/pars/pars0opt.cc
deleted file mode 100644
index 5a7e1861d74..00000000000
--- a/storage/xtradb/pars/pars0opt.cc
+++ /dev/null
@@ -1,1261 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file pars/pars0opt.cc
-Simple SQL optimizer
-
-Created 12/21/1997 Heikki Tuuri
-*******************************************************/
-
-#include "pars0opt.h"
-
-#ifdef UNIV_NONINL
-#include "pars0opt.ic"
-#endif
-
-#include "row0sel.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "dict0dict.h"
-#include "dict0mem.h"
-#include "que0que.h"
-#include "pars0grm.h"
-#include "pars0pars.h"
-#include "lock0lock.h"
-
-#define OPT_EQUAL 1 /* comparison by = */
-#define OPT_COMPARISON 2 /* comparison by <, >, <=, or >= */
-
-#define OPT_NOT_COND 1
-#define OPT_END_COND 2
-#define OPT_TEST_COND 3
-#define OPT_SCROLL_COND 4
-
-
-/*******************************************************************//**
-Inverts a comparison operator.
-@return the equivalent operator when the order of the arguments is switched */
-static
-int
-opt_invert_cmp_op(
-/*==============*/
- int op) /*!< in: operator */
-{
- if (op == '<') {
- return('>');
- } else if (op == '>') {
- return('<');
- } else if (op == '=') {
- return('=');
- } else if (op == PARS_LE_TOKEN) {
- return(PARS_GE_TOKEN);
- } else if (op == PARS_GE_TOKEN) {
- return(PARS_LE_TOKEN);
- } else {
- /* TODO: LIKE operator */
- ut_error;
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Checks if the value of an expression can be calculated BEFORE the nth table
-in a join is accessed. If this is the case, it can possibly be used in an
-index search for the nth table.
-@return TRUE if already determined */
-static
-ibool
-opt_check_exp_determined_before(
-/*============================*/
- que_node_t* exp, /*!< in: expression */
- sel_node_t* sel_node, /*!< in: select node */
- ulint nth_table) /*!< in: nth table will be accessed */
-{
- func_node_t* func_node;
- sym_node_t* sym_node;
- dict_table_t* table;
- que_node_t* arg;
- ulint i;
-
- ut_ad(exp && sel_node);
-
- if (que_node_get_type(exp) == QUE_NODE_FUNC) {
- func_node = static_cast<func_node_t*>(exp);
-
- arg = func_node->args;
-
- while (arg) {
- if (!opt_check_exp_determined_before(arg, sel_node,
- nth_table)) {
- return(FALSE);
- }
-
- arg = que_node_get_next(arg);
- }
-
- return(TRUE);
- }
-
- ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL);
-
- sym_node = static_cast<sym_node_t*>(exp);
-
- if (sym_node->token_type != SYM_COLUMN) {
-
- return(TRUE);
- }
-
- for (i = 0; i < nth_table; i++) {
-
- table = sel_node_get_nth_plan(sel_node, i)->table;
-
- if (sym_node->table == table) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Looks in a comparison condition if a column value is already restricted by
-it BEFORE the nth table is accessed.
-@return expression restricting the value of the column, or NULL if not known */
-static
-que_node_t*
-opt_look_for_col_in_comparison_before(
-/*==================================*/
- ulint cmp_type, /*!< in: OPT_EQUAL, OPT_COMPARISON */
- ulint col_no, /*!< in: column number */
- func_node_t* search_cond, /*!< in: comparison condition */
- sel_node_t* sel_node, /*!< in: select node */
- ulint nth_table, /*!< in: nth table in a join (a query
- from a single table is considered a
- join of 1 table) */
- ulint* op) /*!< out: comparison operator ('=',
- PARS_GE_TOKEN, ... ); this is inverted
- if the column appears on the right
- side */
-{
- sym_node_t* sym_node;
- dict_table_t* table;
- que_node_t* exp;
- que_node_t* arg;
-
- ut_ad(search_cond);
-
- ut_a((search_cond->func == '<')
- || (search_cond->func == '>')
- || (search_cond->func == '=')
- || (search_cond->func == PARS_GE_TOKEN)
- || (search_cond->func == PARS_LE_TOKEN)
- || (search_cond->func == PARS_LIKE_TOKEN_EXACT)
- || (search_cond->func == PARS_LIKE_TOKEN_PREFIX)
- || (search_cond->func == PARS_LIKE_TOKEN_SUFFIX)
- || (search_cond->func == PARS_LIKE_TOKEN_SUBSTR));
-
- table = sel_node_get_nth_plan(sel_node, nth_table)->table;
-
- if ((cmp_type == OPT_EQUAL)
- && (search_cond->func != '=')
- && (search_cond->func != PARS_LIKE_TOKEN_EXACT)
- && (search_cond->func != PARS_LIKE_TOKEN_PREFIX)) {
-
- return(NULL);
-
- } else if ((cmp_type == OPT_COMPARISON)
- && (search_cond->func != '<')
- && (search_cond->func != '>')
- && (search_cond->func != PARS_GE_TOKEN)
- && (search_cond->func != PARS_LE_TOKEN)
- && (search_cond->func != PARS_LIKE_TOKEN_PREFIX)
- && (search_cond->func != PARS_LIKE_TOKEN_SUFFIX)) {
-
- return(NULL);
- }
-
- arg = search_cond->args;
-
- if (que_node_get_type(arg) == QUE_NODE_SYMBOL) {
- sym_node = static_cast<sym_node_t*>(arg);
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)
- && (sym_node->col_no == col_no)) {
-
- /* sym_node contains the desired column id */
-
- /* Check if the expression on the right side of the
- operator is already determined */
-
- exp = que_node_get_next(arg);
-
- if (opt_check_exp_determined_before(exp, sel_node,
- nth_table)) {
- *op = search_cond->func;
-
- return(exp);
- }
- }
- }
-
- exp = search_cond->args;
- arg = que_node_get_next(arg);
-
- if (que_node_get_type(arg) == QUE_NODE_SYMBOL) {
- sym_node = static_cast<sym_node_t*>(arg);
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)
- && (sym_node->col_no == col_no)) {
-
- if (opt_check_exp_determined_before(exp, sel_node,
- nth_table)) {
- *op = opt_invert_cmp_op(search_cond->func);
-
- return(exp);
- }
- }
- }
-
- return(NULL);
-}
-
-/*******************************************************************//**
-Looks in a search condition if a column value is already restricted by the
-search condition BEFORE the nth table is accessed. Takes into account that
-if we will fetch in an ascending order, we cannot utilize an upper limit for
-a column value; in a descending order, respectively, a lower limit.
-@return expression restricting the value of the column, or NULL if not known */
-static
-que_node_t*
-opt_look_for_col_in_cond_before(
-/*============================*/
- ulint cmp_type, /*!< in: OPT_EQUAL, OPT_COMPARISON */
- ulint col_no, /*!< in: column number */
- func_node_t* search_cond, /*!< in: search condition or NULL */
- sel_node_t* sel_node, /*!< in: select node */
- ulint nth_table, /*!< in: nth table in a join (a query
- from a single table is considered a
- join of 1 table) */
- ulint* op) /*!< out: comparison operator ('=',
- PARS_GE_TOKEN, ... ) */
-{
- func_node_t* new_cond;
- que_node_t* exp;
-
- if (search_cond == NULL) {
-
- return(NULL);
- }
-
- ut_a(que_node_get_type(search_cond) == QUE_NODE_FUNC);
- ut_a(search_cond->func != PARS_OR_TOKEN);
- ut_a(search_cond->func != PARS_NOT_TOKEN);
-
- if (search_cond->func == PARS_AND_TOKEN) {
- new_cond = static_cast<func_node_t*>(search_cond->args);
-
- exp = opt_look_for_col_in_cond_before(cmp_type, col_no,
- new_cond, sel_node,
- nth_table, op);
- if (exp) {
-
- return(exp);
- }
-
- new_cond = static_cast<func_node_t*>(
- que_node_get_next(new_cond));
-
- exp = opt_look_for_col_in_cond_before(cmp_type, col_no,
- new_cond, sel_node,
- nth_table, op);
- return(exp);
- }
-
- exp = opt_look_for_col_in_comparison_before(cmp_type, col_no,
- search_cond, sel_node,
- nth_table, op);
- if (exp == NULL) {
-
- return(NULL);
- }
-
- /* If we will fetch in an ascending order, we cannot utilize an upper
- limit for a column value; in a descending order, respectively, a lower
- limit */
-
- if (sel_node->asc && ((*op == '<') || (*op == PARS_LE_TOKEN))) {
-
- return(NULL);
-
- } else if (!sel_node->asc
- && ((*op == '>') || (*op == PARS_GE_TOKEN))) {
-
- return(NULL);
- }
-
- return(exp);
-}
-
-/*******************************************************************//**
-Calculates the goodness for an index according to a select node. The
-goodness is 4 times the number of first fields in index whose values we
-already know exactly in the query. If we have a comparison condition for
-an additional field, 2 point are added. If the index is unique, and we know
-all the unique fields for the index we add 1024 points. For a clustered index
-we add 1 point.
-@return goodness */
-static
-ulint
-opt_calc_index_goodness(
-/*====================*/
- dict_index_t* index, /*!< in: index */
- sel_node_t* sel_node, /*!< in: parsed select node */
- ulint nth_table, /*!< in: nth table in a join */
- que_node_t** index_plan, /*!< in/out: comparison expressions for
- this index */
- ulint* last_op) /*!< out: last comparison operator, if
- goodness > 1 */
-{
- que_node_t* exp;
- ulint goodness;
- ulint n_fields;
- ulint col_no;
- ulint op;
- ulint j;
-
- /* At least for now we don't support using FTS indexes for queries
- done through InnoDB's own SQL parser. */
- if (dict_index_is_online_ddl(index) || (index->type & DICT_FTS)) {
- return(0);
- }
-
- goodness = 0;
-
- /* Note that as higher level node pointers in the B-tree contain
- page addresses as the last field, we must not put more fields in
- the search tuple than dict_index_get_n_unique_in_tree(index); see
- the note in btr_cur_search_to_nth_level. */
-
- n_fields = dict_index_get_n_unique_in_tree(index);
-
- for (j = 0; j < n_fields; j++) {
-
- col_no = dict_index_get_nth_col_no(index, j);
-
- exp = opt_look_for_col_in_cond_before(
- OPT_EQUAL, col_no,
- static_cast<func_node_t*>(sel_node->search_cond),
- sel_node, nth_table, &op);
- if (exp) {
- /* The value for this column is exactly known already
- at this stage of the join */
-
- index_plan[j] = exp;
- *last_op = op;
- goodness += 4;
- } else {
- /* Look for non-equality comparisons */
-
- exp = opt_look_for_col_in_cond_before(
- OPT_COMPARISON, col_no,
- static_cast<func_node_t*>(
- sel_node->search_cond),
- sel_node, nth_table, &op);
- if (exp) {
- index_plan[j] = exp;
- *last_op = op;
- goodness += 2;
- }
-
- break;
- }
- }
-
- if (goodness >= 4 * dict_index_get_n_unique(index)) {
- goodness += 1024;
-
- if (dict_index_is_clust(index)) {
-
- goodness += 1024;
- }
- }
-
- /* We have to test for goodness here, as last_op may not be set */
- if (goodness && dict_index_is_clust(index)) {
-
- goodness++;
- }
-
- return(goodness);
-}
-
-/*******************************************************************//**
-Calculates the number of matched fields based on an index goodness.
-@return number of excatly or partially matched fields */
-UNIV_INLINE
-ulint
-opt_calc_n_fields_from_goodness(
-/*============================*/
- ulint goodness) /*!< in: goodness */
-{
- return(((goodness % 1024) + 2) / 4);
-}
-
-/*******************************************************************//**
-Converts a comparison operator to the corresponding search mode PAGE_CUR_GE,
-...
-@return search mode */
-UNIV_INLINE
-ulint
-opt_op_to_search_mode(
-/*==================*/
- ibool asc, /*!< in: TRUE if the rows should be fetched in an
- ascending order */
- ulint op) /*!< in: operator '=', PARS_GE_TOKEN, ... */
-{
- if (op == '='
- || op == PARS_LIKE_TOKEN_EXACT
- || op == PARS_LIKE_TOKEN_PREFIX
- || op == PARS_LIKE_TOKEN_SUFFIX
- || op == PARS_LIKE_TOKEN_SUBSTR) {
-
- if (asc) {
- return(PAGE_CUR_GE);
- } else {
- return(PAGE_CUR_LE);
- }
- } else if (op == '<') {
- ut_a(!asc);
- return(PAGE_CUR_L);
- } else if (op == '>') {
- ut_a(asc);
- return(PAGE_CUR_G);
- } else if (op == PARS_GE_TOKEN) {
- ut_a(asc);
- return(PAGE_CUR_GE);
- } else if (op == PARS_LE_TOKEN) {
- ut_a(!asc);
- return(PAGE_CUR_LE);
- } else {
- ut_error;
- }
-
- return(0);
-}
-
-/*******************************************************************//**
-Determines if a node is an argument node of a function node.
-@return TRUE if is an argument */
-static
-ibool
-opt_is_arg(
-/*=======*/
- que_node_t* arg_node, /*!< in: possible argument node */
- func_node_t* func_node) /*!< in: function node */
-{
- que_node_t* arg;
-
- arg = func_node->args;
-
- while (arg) {
- if (arg == arg_node) {
-
- return(TRUE);
- }
-
- arg = que_node_get_next(arg);
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Decides if the fetching of rows should be made in a descending order, and
-also checks that the chosen query plan produces a result which satisfies
-the order-by. */
-static
-void
-opt_check_order_by(
-/*===============*/
- sel_node_t* sel_node) /*!< in: select node; asserts an error
- if the plan does not agree with the
- order-by */
-{
- order_node_t* order_node;
- dict_table_t* order_table;
- ulint order_col_no;
- plan_t* plan;
- ulint i;
-
- if (!sel_node->order_by) {
-
- return;
- }
-
- order_node = sel_node->order_by;
- order_col_no = order_node->column->col_no;
- order_table = order_node->column->table;
-
- /* If there is an order-by clause, the first non-exactly matched field
- in the index used for the last table in the table list should be the
- column defined in the order-by clause, and for all the other tables
- we should get only at most a single row, otherwise we cannot presently
- calculate the order-by, as we have no sort utility */
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- if (i < sel_node->n_tables - 1) {
- ut_a(dict_index_get_n_unique(plan->index)
- <= plan->n_exact_match);
- } else {
- ut_a(plan->table == order_table);
-
- ut_a((dict_index_get_n_unique(plan->index)
- <= plan->n_exact_match)
- || (dict_index_get_nth_col_no(plan->index,
- plan->n_exact_match)
- == order_col_no));
- }
- }
-}
-
-/*******************************************************************//**
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-static
-void
-opt_search_plan_for_table(
-/*======================*/
- sel_node_t* sel_node, /*!< in: parsed select node */
- ulint i, /*!< in: this is the ith table */
- dict_table_t* table) /*!< in: table */
-{
- plan_t* plan;
- dict_index_t* index;
- dict_index_t* best_index;
- ulint n_fields;
- ulint goodness;
- ulint last_op = 75946965; /* Eliminate a Purify
- warning */
- ulint best_goodness;
- ulint best_last_op = 0; /* remove warning */
- que_node_t* index_plan[256];
- que_node_t* best_index_plan[256];
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- plan->table = table;
- plan->asc = sel_node->asc;
- plan->pcur_is_open = FALSE;
- plan->cursor_at_end = FALSE;
-
- /* Calculate goodness for each index of the table */
-
- index = dict_table_get_first_index(table);
- best_index = index; /* Eliminate compiler warning */
- best_goodness = 0;
-
- /* should be do ... until ? comment by Jani */
- while (index) {
- goodness = opt_calc_index_goodness(index, sel_node, i,
- index_plan, &last_op);
- if (goodness > best_goodness) {
-
- best_index = index;
- best_goodness = goodness;
- n_fields = opt_calc_n_fields_from_goodness(goodness);
-
- ut_memcpy(best_index_plan, index_plan,
- n_fields * sizeof(void*));
- best_last_op = last_op;
- }
-
- dict_table_next_uncorrupted_index(index);
- }
-
- plan->index = best_index;
-
- n_fields = opt_calc_n_fields_from_goodness(best_goodness);
-
- if (n_fields == 0) {
- plan->tuple = NULL;
- plan->n_exact_match = 0;
- } else {
- plan->tuple = dtuple_create(pars_sym_tab_global->heap,
- n_fields);
- dict_index_copy_types(plan->tuple, plan->index, n_fields);
-
- plan->tuple_exps = static_cast<que_node_t**>(
- mem_heap_alloc(
- pars_sym_tab_global->heap,
- n_fields * sizeof(void*)));
-
- ut_memcpy(plan->tuple_exps, best_index_plan,
- n_fields * sizeof(void*));
- if (best_last_op == '='
- || best_last_op == PARS_LIKE_TOKEN_EXACT
- || best_last_op == PARS_LIKE_TOKEN_PREFIX
- || best_last_op == PARS_LIKE_TOKEN_SUFFIX
- || best_last_op == PARS_LIKE_TOKEN_SUBSTR) {
- plan->n_exact_match = n_fields;
- } else {
- plan->n_exact_match = n_fields - 1;
- }
-
- plan->mode = opt_op_to_search_mode(sel_node->asc,
- best_last_op);
- }
-
- if (dict_index_is_clust(best_index)
- && (plan->n_exact_match >= dict_index_get_n_unique(best_index))) {
-
- plan->unique_search = TRUE;
- } else {
- plan->unique_search = FALSE;
- }
-
- plan->old_vers_heap = NULL;
-
- btr_pcur_init(&(plan->pcur));
- btr_pcur_init(&(plan->clust_pcur));
-}
-
-/*******************************************************************//**
-Looks at a comparison condition and decides if it can, and need, be tested for
-a table AFTER the table has been accessed.
-@return OPT_NOT_COND if not for this table, else OPT_END_COND,
-OPT_TEST_COND, or OPT_SCROLL_COND, where the last means that the
-condition need not be tested, except when scroll cursors are used */
-static
-ulint
-opt_classify_comparison(
-/*====================*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i, /*!< in: ith table in the join */
- func_node_t* cond) /*!< in: comparison condition */
-{
- plan_t* plan;
- ulint n_fields;
- ulint op;
- ulint j;
-
- ut_ad(cond && sel_node);
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- /* Check if the condition is determined after the ith table has been
- accessed, but not after the i - 1:th */
-
- if (!opt_check_exp_determined_before(cond, sel_node, i + 1)) {
-
- return(OPT_NOT_COND);
- }
-
- if ((i > 0) && opt_check_exp_determined_before(cond, sel_node, i)) {
-
- return(OPT_NOT_COND);
- }
-
- /* If the condition is an exact match condition used in constructing
- the search tuple, it is classified as OPT_END_COND */
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
- } else {
- n_fields = 0;
- }
-
- for (j = 0; j < plan->n_exact_match; j++) {
-
- if (opt_is_arg(plan->tuple_exps[j], cond)) {
-
- return(OPT_END_COND);
- }
- }
-
- /* If the condition is an non-exact match condition used in
- constructing the search tuple, it is classified as OPT_SCROLL_COND.
- When the cursor is positioned, and if a non-scroll cursor is used,
- there is no need to test this condition; if a scroll cursor is used
- the testing is necessary when the cursor is reversed. */
-
- if ((n_fields > plan->n_exact_match)
- && opt_is_arg(plan->tuple_exps[n_fields - 1], cond)) {
-
- return(OPT_SCROLL_COND);
- }
-
- /* If the condition is a non-exact match condition on the first field
- in index for which there is no exact match, and it limits the search
- range from the opposite side of the search tuple already BEFORE we
- access the table, it is classified as OPT_END_COND */
-
- if ((dict_index_get_n_fields(plan->index) > plan->n_exact_match)
- && opt_look_for_col_in_comparison_before(
- OPT_COMPARISON,
- dict_index_get_nth_col_no(plan->index,
- plan->n_exact_match),
- cond, sel_node, i, &op)) {
-
- if (sel_node->asc && ((op == '<') || (op == PARS_LE_TOKEN))) {
-
- return(OPT_END_COND);
- }
-
- if (!sel_node->asc && ((op == '>') || (op == PARS_GE_TOKEN))) {
-
- return(OPT_END_COND);
- }
- }
-
- /* Otherwise, cond is classified as OPT_TEST_COND */
-
- return(OPT_TEST_COND);
-}
-
-/*******************************************************************//**
-Recursively looks for test conditions for a table in a join. */
-static
-void
-opt_find_test_conds(
-/*================*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i, /*!< in: ith table in the join */
- func_node_t* cond) /*!< in: conjunction of search
- conditions or NULL */
-{
- func_node_t* new_cond;
- ulint fclass;
- plan_t* plan;
-
- if (cond == NULL) {
-
- return;
- }
-
- if (cond->func == PARS_AND_TOKEN) {
- new_cond = static_cast<func_node_t*>(cond->args);
-
- opt_find_test_conds(sel_node, i, new_cond);
-
- new_cond = static_cast<func_node_t*>(
- que_node_get_next(new_cond));
-
- opt_find_test_conds(sel_node, i, new_cond);
-
- return;
- }
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- fclass = opt_classify_comparison(sel_node, i, cond);
-
- if (fclass == OPT_END_COND) {
- UT_LIST_ADD_LAST(cond_list, plan->end_conds, cond);
-
- } else if (fclass == OPT_TEST_COND) {
- UT_LIST_ADD_LAST(cond_list, plan->other_conds, cond);
-
- }
-}
-
-/*******************************************************************//**
-Normalizes a list of comparison conditions so that a column of the table
-appears on the left side of the comparison if possible. This is accomplished
-by switching the arguments of the operator. */
-static
-void
-opt_normalize_cmp_conds(
-/*====================*/
- func_node_t* cond, /*!< in: first in a list of comparison
- conditions, or NULL */
- dict_table_t* table) /*!< in: table */
-{
- que_node_t* arg1;
- que_node_t* arg2;
- sym_node_t* sym_node;
-
- while (cond) {
- arg1 = cond->args;
- arg2 = que_node_get_next(arg1);
-
- if (que_node_get_type(arg2) == QUE_NODE_SYMBOL) {
-
- sym_node = static_cast<sym_node_t*>(arg2);
-
- if ((sym_node->token_type == SYM_COLUMN)
- && (sym_node->table == table)) {
-
- /* Switch the order of the arguments */
-
- cond->args = arg2;
- que_node_list_add_last(NULL, arg2);
- que_node_list_add_last(arg2, arg1);
-
- /* Invert the operator */
- cond->func = opt_invert_cmp_op(cond->func);
- }
- }
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-}
-
-/*******************************************************************//**
-Finds out the search condition conjuncts we can, and need, to test as the ith
-table in a join is accessed. The search tuple can eliminate the need to test
-some conjuncts. */
-static
-void
-opt_determine_and_normalize_test_conds(
-/*===================================*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i) /*!< in: ith table in the join */
-{
- plan_t* plan;
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- UT_LIST_INIT(plan->end_conds);
- UT_LIST_INIT(plan->other_conds);
-
- /* Recursively go through the conjuncts and classify them */
-
- opt_find_test_conds(
- sel_node,
- i,
- static_cast<func_node_t*>(sel_node->search_cond));
-
- opt_normalize_cmp_conds(UT_LIST_GET_FIRST(plan->end_conds),
- plan->table);
-
- ut_a(UT_LIST_GET_LEN(plan->end_conds) >= plan->n_exact_match);
-}
-
-/*******************************************************************//**
-Looks for occurrences of the columns of the table in the query subgraph and
-adds them to the list of columns if an occurrence of the same column does not
-already exist in the list. If the column is already in the list, puts a value
-indirection to point to the occurrence in the column list, except if the
-column occurrence we are looking at is in the column list, in which case
-nothing is done. */
-UNIV_INTERN
-void
-opt_find_all_cols(
-/*==============*/
- ibool copy_val, /*!< in: if TRUE, new found columns are
- added as columns to copy */
- dict_index_t* index, /*!< in: index of the table to use */
- sym_node_list_t* col_list, /*!< in: base node of a list where
- to add new found columns */
- plan_t* plan, /*!< in: plan or NULL */
- que_node_t* exp) /*!< in: expression or condition or
- NULL */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- sym_node_t* col_node;
- ulint col_pos;
-
- if (exp == NULL) {
-
- return;
- }
-
- if (que_node_get_type(exp) == QUE_NODE_FUNC) {
- func_node = static_cast<func_node_t*>(exp);
-
- for (arg = func_node->args;
- arg != 0;
- arg = que_node_get_next(arg)) {
-
- opt_find_all_cols(
- copy_val, index, col_list, plan, arg);
- }
-
- return;
- }
-
- ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL);
-
- sym_node = static_cast<sym_node_t*>(exp);
-
- if (sym_node->token_type != SYM_COLUMN) {
-
- return;
- }
-
- if (sym_node->table != index->table) {
-
- return;
- }
-
- /* Look for an occurrence of the same column in the plan column
- list */
-
- col_node = UT_LIST_GET_FIRST(*col_list);
-
- while (col_node) {
- if (col_node->col_no == sym_node->col_no) {
-
- if (col_node == sym_node) {
- /* sym_node was already in a list: do
- nothing */
-
- return;
- }
-
- /* Put an indirection */
- sym_node->indirection = col_node;
- sym_node->alias = col_node;
-
- return;
- }
-
- col_node = UT_LIST_GET_NEXT(col_var_list, col_node);
- }
-
- /* The same column did not occur in the list: add it */
-
- UT_LIST_ADD_LAST(col_var_list, *col_list, sym_node);
-
- sym_node->copy_val = copy_val;
-
- /* Fill in the field_no fields in sym_node */
-
- sym_node->field_nos[SYM_CLUST_FIELD_NO] = dict_index_get_nth_col_pos(
- dict_table_get_first_index(index->table), sym_node->col_no,
- NULL);
- if (!dict_index_is_clust(index)) {
-
- ut_a(plan);
-
- col_pos = dict_index_get_nth_col_pos(index, sym_node->col_no,
- NULL);
-
- if (col_pos == ULINT_UNDEFINED) {
-
- plan->must_get_clust = TRUE;
- }
-
- sym_node->field_nos[SYM_SEC_FIELD_NO] = col_pos;
- }
-}
-
-/*******************************************************************//**
-Looks for occurrences of the columns of the table in conditions which are
-not yet determined AFTER the join operation has fetched a row in the ith
-table. The values for these column must be copied to dynamic memory for
-later use. */
-static
-void
-opt_find_copy_cols(
-/*===============*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i, /*!< in: ith table in the join */
- func_node_t* search_cond) /*!< in: search condition or NULL */
-{
- func_node_t* new_cond;
- plan_t* plan;
-
- if (search_cond == NULL) {
-
- return;
- }
-
- ut_ad(que_node_get_type(search_cond) == QUE_NODE_FUNC);
-
- if (search_cond->func == PARS_AND_TOKEN) {
- new_cond = static_cast<func_node_t*>(search_cond->args);
-
- opt_find_copy_cols(sel_node, i, new_cond);
-
- new_cond = static_cast<func_node_t*>(
- que_node_get_next(new_cond));
-
- opt_find_copy_cols(sel_node, i, new_cond);
-
- return;
- }
-
- if (!opt_check_exp_determined_before(search_cond, sel_node, i + 1)) {
-
- /* Any ith table columns occurring in search_cond should be
- copied, as this condition cannot be tested already on the
- fetch from the ith table */
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan,
- search_cond);
- }
-}
-
-/*******************************************************************//**
-Classifies the table columns according to whether we use the column only while
-holding the latch on the page, or whether we have to copy the column value to
-dynamic memory. Puts the first occurrence of a column to either list in the
-plan node, and puts indirections to later occurrences of the column. */
-static
-void
-opt_classify_cols(
-/*==============*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint i) /*!< in: ith table in the join */
-{
- plan_t* plan;
- que_node_t* exp;
-
- plan = sel_node_get_nth_plan(sel_node, i);
-
- /* The final value of the following field will depend on the
- environment of the select statement: */
-
- plan->must_get_clust = FALSE;
-
- UT_LIST_INIT(plan->columns);
-
- /* All select list columns should be copied: therefore TRUE as the
- first argument */
-
- for (exp = sel_node->select_list;
- exp != 0;
- exp = que_node_get_next(exp)) {
-
- opt_find_all_cols(
- TRUE, plan->index, &(plan->columns), plan, exp);
- }
-
- opt_find_copy_cols(
- sel_node, i, static_cast<func_node_t*>(sel_node->search_cond));
-
- /* All remaining columns in the search condition are temporary
- columns: therefore FALSE */
-
- opt_find_all_cols(
- FALSE, plan->index, &plan->columns, plan,
- static_cast<func_node_t*>(sel_node->search_cond));
-}
-
-/*******************************************************************//**
-Fills in the info in plan which is used in accessing a clustered index
-record. The columns must already be classified for the plan node. */
-static
-void
-opt_clust_access(
-/*=============*/
- sel_node_t* sel_node, /*!< in: select node */
- ulint n) /*!< in: nth table in select */
-{
- plan_t* plan;
- dict_table_t* table;
- dict_index_t* clust_index;
- dict_index_t* index;
- mem_heap_t* heap;
- ulint n_fields;
- ulint pos;
- ulint i;
-
- plan = sel_node_get_nth_plan(sel_node, n);
-
- index = plan->index;
-
- /* The final value of the following field depends on the environment
- of the select statement: */
-
- plan->no_prefetch = FALSE;
-
- if (dict_index_is_clust(index)) {
- plan->clust_map = NULL;
- plan->clust_ref = NULL;
-
- return;
- }
-
- table = index->table;
-
- clust_index = dict_table_get_first_index(table);
-
- n_fields = dict_index_get_n_unique(clust_index);
-
- heap = pars_sym_tab_global->heap;
-
- plan->clust_ref = dtuple_create(heap, n_fields);
-
- dict_index_copy_types(plan->clust_ref, clust_index, n_fields);
-
- plan->clust_map = static_cast<ulint*>(
- mem_heap_alloc(heap, n_fields * sizeof(ulint)));
-
- for (i = 0; i < n_fields; i++) {
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- /* We optimize here only queries to InnoDB's internal system
- tables, and they should not contain column prefix indexes. */
-
- if (dict_index_get_nth_field(index, pos)->prefix_len != 0
- || dict_index_get_nth_field(clust_index, i)
- ->prefix_len != 0) {
- fprintf(stderr,
- "InnoDB: Error in pars0opt.cc:"
- " table %s has prefix_len != 0\n",
- index->table_name);
- }
-
- *(plan->clust_map + i) = pos;
-
- ut_ad(pos != ULINT_UNDEFINED);
- }
-}
-
-/*******************************************************************//**
-Optimizes a select. Decides which indexes to tables to use. The tables
-are accessed in the order that they were written to the FROM part in the
-select statement. */
-UNIV_INTERN
-void
-opt_search_plan(
-/*============*/
- sel_node_t* sel_node) /*!< in: parsed select node */
-{
- sym_node_t* table_node;
- dict_table_t* table;
- order_node_t* order_by;
- ulint i;
-
- sel_node->plans = static_cast<plan_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap,
- sel_node->n_tables * sizeof(plan_t)));
-
- /* Analyze the search condition to find out what we know at each
- join stage about the conditions that the columns of a table should
- satisfy */
-
- table_node = sel_node->table_list;
-
- if (sel_node->order_by == NULL) {
- sel_node->asc = TRUE;
- } else {
- order_by = sel_node->order_by;
-
- sel_node->asc = order_by->asc;
- }
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- table = table_node->table;
-
- /* Choose index through which to access the table */
-
- opt_search_plan_for_table(sel_node, i, table);
-
- /* Determine the search condition conjuncts we can test at
- this table; normalize the end conditions */
-
- opt_determine_and_normalize_test_conds(sel_node, i);
-
- table_node = static_cast<sym_node_t*>(
- que_node_get_next(table_node));
- }
-
- table_node = sel_node->table_list;
-
- for (i = 0; i < sel_node->n_tables; i++) {
-
- /* Classify the table columns into those we only need to access
- but not copy, and to those we must copy to dynamic memory */
-
- opt_classify_cols(sel_node, i);
-
- /* Calculate possible info for accessing the clustered index
- record */
-
- opt_clust_access(sel_node, i);
-
- table_node = static_cast<sym_node_t*>(
- que_node_get_next(table_node));
- }
-
- /* Check that the plan obeys a possible order-by clause: if not,
- an assertion error occurs */
-
- opt_check_order_by(sel_node);
-
-#ifdef UNIV_SQL_DEBUG
- opt_print_query_plan(sel_node);
-#endif
-}
-
-/********************************************************************//**
-Prints info of a query plan. */
-UNIV_INTERN
-void
-opt_print_query_plan(
-/*=================*/
- sel_node_t* sel_node) /*!< in: select node */
-{
- plan_t* plan;
- ulint n_fields;
- ulint i;
-
- fputs("QUERY PLAN FOR A SELECT NODE\n", stderr);
-
- fputs(sel_node->asc ? "Asc. search; " : "Desc. search; ", stderr);
-
- if (sel_node->set_x_locks) {
- fputs("sets row x-locks; ", stderr);
- ut_a(sel_node->row_lock_mode == LOCK_X);
- ut_a(!sel_node->consistent_read);
- } else if (sel_node->consistent_read) {
- fputs("consistent read; ", stderr);
- } else {
- ut_a(sel_node->row_lock_mode == LOCK_S);
- fputs("sets row s-locks; ", stderr);
- }
-
- putc('\n', stderr);
-
- for (i = 0; i < sel_node->n_tables; i++) {
- plan = sel_node_get_nth_plan(sel_node, i);
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
- } else {
- n_fields = 0;
- }
-
- fputs("Table ", stderr);
- dict_index_name_print(stderr, NULL, plan->index);
- fprintf(stderr,"; exact m. %lu, match %lu, end conds %lu\n",
- (unsigned long) plan->n_exact_match,
- (unsigned long) n_fields,
- (unsigned long) UT_LIST_GET_LEN(plan->end_conds));
- }
-}
diff --git a/storage/xtradb/pars/pars0pars.cc b/storage/xtradb/pars/pars0pars.cc
deleted file mode 100644
index ce61d6e1e3b..00000000000
--- a/storage/xtradb/pars/pars0pars.cc
+++ /dev/null
@@ -1,2670 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St,
-Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file pars/pars0pars.c
-SQL parser
-
-Created 11/19/1996 Heikki Tuuri
-*******************************************************/
-
-/* Historical note: Innobase executed its first SQL string (CREATE TABLE)
-on 1/27/1998 */
-
-#include "pars0pars.h"
-
-#ifdef UNIV_NONINL
-#include "pars0pars.ic"
-#endif
-
-#include "row0sel.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "dict0dict.h"
-#include "dict0mem.h"
-#include "dict0crea.h"
-#include "que0que.h"
-#include "pars0grm.h"
-#include "pars0opt.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "lock0lock.h"
-#include "eval0eval.h"
-
-#ifdef UNIV_SQL_DEBUG
-/** If the following is set TRUE, the lexer will print the SQL string
-as it tokenizes it */
-UNIV_INTERN ibool pars_print_lexed = FALSE;
-#endif /* UNIV_SQL_DEBUG */
-
-/* Global variable used while parsing a single procedure or query : the code is
-NOT re-entrant */
-UNIV_INTERN sym_tab_t* pars_sym_tab_global;
-
-/* Global variables used to denote certain reserved words, used in
-constructing the parsing tree */
-
-UNIV_INTERN pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN};
-UNIV_INTERN pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN};
-UNIV_INTERN pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN};
-UNIV_INTERN pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN};
-UNIV_INTERN pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN};
-UNIV_INTERN pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN};
-UNIV_INTERN pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN};
-UNIV_INTERN pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_binary_token = {PARS_BINARY_TOKEN};
-UNIV_INTERN pars_res_word_t pars_blob_token = {PARS_BLOB_TOKEN};
-UNIV_INTERN pars_res_word_t pars_int_token = {PARS_INT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_bigint_token = {PARS_BIGINT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN};
-UNIV_INTERN pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN};
-UNIV_INTERN pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN};
-UNIV_INTERN pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN};
-UNIV_INTERN pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN};
-UNIV_INTERN pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN};
-UNIV_INTERN pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN};
-
-/** Global variable used to denote the '*' in SELECT * FROM.. */
-UNIV_INTERN ulint pars_star_denoter = 12345678;
-
-/********************************************************************
-Get user function with the given name.*/
-UNIV_INLINE
-pars_user_func_t*
-pars_info_lookup_user_func(
-/*=======================*/
- /* out: user func, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: function name to find*/
-{
- if (info && info->funcs) {
- ulint i;
- ib_vector_t* vec = info->funcs;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_user_func_t* puf;
-
- puf = static_cast<pars_user_func_t*>(
- ib_vector_get(vec, i));
-
- if (strcmp(puf->name, name) == 0) {
- return(puf);
- }
- }
- }
-
- return(NULL);
-}
-
-/********************************************************************
-Get bound identifier with the given name.*/
-UNIV_INLINE
-pars_bound_id_t*
-pars_info_lookup_bound_id(
-/*======================*/
- /* out: bound literal, or NULL if
- not found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: bound literal name to find */
-{
- if (info && info->bound_ids) {
- ulint i;
- ib_vector_t* vec = info->bound_ids;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_bound_id_t* bid;
-
- bid = static_cast<pars_bound_id_t*>(
- ib_vector_get(vec, i));
-
- if (strcmp(bid->name, name) == 0) {
- return(bid);
- }
- }
- }
-
- return(NULL);
-}
-
-/********************************************************************
-Get bound literal with the given name.*/
-UNIV_INLINE
-pars_bound_lit_t*
-pars_info_lookup_bound_lit(
-/*=======================*/
- /* out: bound literal, or NULL if
- not found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: bound literal name to find */
-{
- if (info && info->bound_lits) {
- ulint i;
- ib_vector_t* vec = info->bound_lits;
-
- for (i = 0; i < ib_vector_size(vec); i++) {
- pars_bound_lit_t* pbl;
-
- pbl = static_cast<pars_bound_lit_t*>(
- ib_vector_get(vec, i));
-
- if (strcmp(pbl->name, name) == 0) {
- return(pbl);
- }
- }
- }
-
- return(NULL);
-}
-
-/*********************************************************************//**
-Determines the class of a function code.
-@return function class: PARS_FUNC_ARITH, ... */
-static
-ulint
-pars_func_get_class(
-/*================*/
- int func) /*!< in: function code: '=', PARS_GE_TOKEN, ... */
-{
- switch (func) {
- case '+': case '-': case '*': case '/':
- return(PARS_FUNC_ARITH);
-
- case '=': case '<': case '>':
- case PARS_GE_TOKEN: case PARS_LE_TOKEN: case PARS_NE_TOKEN:
- return(PARS_FUNC_CMP);
-
- case PARS_AND_TOKEN: case PARS_OR_TOKEN: case PARS_NOT_TOKEN:
- return(PARS_FUNC_LOGICAL);
-
- case PARS_COUNT_TOKEN: case PARS_SUM_TOKEN:
- return(PARS_FUNC_AGGREGATE);
-
- case PARS_TO_CHAR_TOKEN:
- case PARS_TO_NUMBER_TOKEN:
- case PARS_TO_BINARY_TOKEN:
- case PARS_BINARY_TO_NUMBER_TOKEN:
- case PARS_SUBSTR_TOKEN:
- case PARS_CONCAT_TOKEN:
- case PARS_LENGTH_TOKEN:
- case PARS_INSTR_TOKEN:
- case PARS_SYSDATE_TOKEN:
- case PARS_NOTFOUND_TOKEN:
- case PARS_PRINTF_TOKEN:
- case PARS_ASSERT_TOKEN:
- case PARS_RND_TOKEN:
- case PARS_RND_STR_TOKEN:
- case PARS_REPLSTR_TOKEN:
- return(PARS_FUNC_PREDEFINED);
-
- default:
- return(PARS_FUNC_OTHER);
- }
-}
-
-/*********************************************************************//**
-Parses an operator or predefined function expression.
-@return own: function node in a query tree */
-static
-func_node_t*
-pars_func_low(
-/*==========*/
- int func, /*!< in: function token code */
- que_node_t* arg) /*!< in: first argument in the argument list */
-{
- func_node_t* node;
-
- node = static_cast<func_node_t*>(
- mem_heap_alloc(pars_sym_tab_global->heap, sizeof(func_node_t)));
-
- node->common.type = QUE_NODE_FUNC;
- dfield_set_data(&(node->common.val), NULL, 0);
- node->common.val_buf_size = 0;
-
- node->func = func;
-
- node->fclass = pars_func_get_class(func);
-
- node->args = arg;
-
- UT_LIST_ADD_LAST(func_node_list, pars_sym_tab_global->func_node_list,
- node);
- return(node);
-}
-
-/*********************************************************************//**
-Parses a function expression.
-@return own: function node in a query tree */
-UNIV_INTERN
-func_node_t*
-pars_func(
-/*======*/
- que_node_t* res_word,/*!< in: function name reserved word */
- que_node_t* arg) /*!< in: first argument in the argument list */
-{
- return(pars_func_low(((pars_res_word_t*) res_word)->code, arg));
-}
-
-/*************************************************************************
-Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded
-within the search string.*/
-
-int
-pars_like_rebind(
-/*=============*/
- /* out, own: function node in a query tree */
- sym_node_t* node, /* in: The search string node.*/
- const byte* ptr, /* in: literal to (re) bind */
- ulint ptr_len)/* in: length of literal to (re) bind*/
-{
- dtype_t* dtype;
- dfield_t* dfield;
- ib_like_t op_check;
- sym_node_t* like_node;
- sym_node_t* str_node = NULL;
- ib_like_t op = IB_LIKE_EXACT;
- int func = PARS_LIKE_TOKEN_EXACT;
-
- /* Is this a STRING% ? */
- if (ptr[ptr_len - 1] == '%') {
- op = IB_LIKE_PREFIX;
- }
-
- /* Is this a '%STRING' or %STRING% ?*/
- if (*ptr == '%') {
- op = (op == IB_LIKE_PREFIX) ? IB_LIKE_SUBSTR : IB_LIKE_SUFFIX;
- }
-
- if (node->like_node == NULL) {
- /* Add the LIKE operator info node to the node list.
- This will be used during the comparison phase to determine
- how to match.*/
- like_node = sym_tab_add_int_lit(node->sym_table, op);
- que_node_list_add_last(NULL, like_node);
- node->like_node = like_node;
- str_node = sym_tab_add_str_lit(node->sym_table, ptr, ptr_len);
- que_node_list_add_last(like_node, str_node);
- } else {
- like_node = node->like_node;
-
- /* Change the value of the string in the existing
- string node of like node */
- str_node = static_cast<sym_node_t*>(
- que_node_list_get_last(like_node));
-
- /* Must find the string node */
- ut_a(str_node);
- ut_a(str_node != like_node);
- ut_a(str_node->token_type == SYM_LIT);
-
- dfield = que_node_get_val(str_node);
- dfield_set_data(dfield, ptr, ptr_len);
- }
-
- dfield = que_node_get_val(like_node);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_INT);
- op_check = static_cast<ib_like_t>(
- mach_read_from_4(static_cast<byte*>(dfield_get_data(dfield))));
-
- switch (op_check) {
- case IB_LIKE_PREFIX:
- case IB_LIKE_SUFFIX:
- case IB_LIKE_SUBSTR:
- case IB_LIKE_EXACT:
- break;
-
- default:
- ut_error;
- }
-
- mach_write_to_4(static_cast<byte*>(dfield_get_data(dfield)), op);
-
- dfield = que_node_get_val(node);
-
- /* Adjust the length of the search value so the '%' is not
- visible. Then create and add a search string node to the
- search value node. Searching for %SUFFIX and %SUBSTR% requires
- a full table scan and so we set the search value to ''.
- For PREFIX% we simply remove the trailing '%'.*/
-
- switch (op) {
- case IB_LIKE_EXACT:
- dfield = que_node_get_val(str_node);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR);
-
- dfield_set_data(dfield, ptr, ptr_len);
- break;
-
- case IB_LIKE_PREFIX:
- func = PARS_LIKE_TOKEN_PREFIX;
-
- /* Modify the original node */
- dfield_set_len(dfield, ptr_len - 1);
-
- dfield = que_node_get_val(str_node);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR);
-
- dfield_set_data(dfield, ptr, ptr_len - 1);
- break;
-
- case IB_LIKE_SUFFIX:
- func = PARS_LIKE_TOKEN_SUFFIX;
-
- /* Modify the original node */
- /* Make it an '' empty string */
- dfield_set_len(dfield, 0);
-
- dfield = que_node_get_val(str_node);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR);
-
- dfield_set_data(dfield, ptr + 1, ptr_len - 1);
- break;
-
- case IB_LIKE_SUBSTR:
- func = PARS_LIKE_TOKEN_SUBSTR;
-
- /* Modify the original node */
- /* Make it an '' empty string */
- dfield_set_len(dfield, 0);
-
- dfield = que_node_get_val(str_node);
- dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_VARCHAR);
-
- dfield_set_data(dfield, ptr + 1, ptr_len - 2);
- break;
-
- default:
- ut_error;
- }
-
- return(func);
-}
-
-/*************************************************************************
-Parses a LIKE operator expression. */
-static
-int
-pars_like_op(
-/*=========*/
- /* out, own: function node in a query tree */
- que_node_t* arg) /* in: LIKE comparison string.*/
-{
- char* ptr;
- ulint ptr_len;
- int func = PARS_LIKE_TOKEN_EXACT;
- dfield_t* dfield = que_node_get_val(arg);
- dtype_t* dtype = dfield_get_type(dfield);
-
- ut_a(dtype_get_mtype(dtype) == DATA_CHAR
- || dtype_get_mtype(dtype) == DATA_VARCHAR);
-
- ptr = static_cast<char*>(dfield_get_data(dfield));
- ptr_len = strlen(ptr);
-
- if (ptr_len) {
-
- func = pars_like_rebind(
- static_cast<sym_node_t*>(arg), (byte*) ptr, ptr_len);
- }
-
- return(func);
-}
-/*********************************************************************//**
-Parses an operator expression.
-@return own: function node in a query tree */
-UNIV_INTERN
-func_node_t*
-pars_op(
-/*====*/
- int func, /*!< in: operator token code */
- que_node_t* arg1, /*!< in: first argument */
- que_node_t* arg2) /*!< in: second argument or NULL for an unary
- operator */
-{
- que_node_list_add_last(NULL, arg1);
-
- if (arg2) {
- que_node_list_add_last(arg1, arg2);
- }
-
- /* We need to parse the string and determine whether it's a
- PREFIX, SUFFIX or SUBSTRING comparison */
- if (func == PARS_LIKE_TOKEN) {
-
- ut_a(que_node_get_type(arg2) == QUE_NODE_SYMBOL);
-
- func = pars_like_op(arg2);
-
- ut_a(func == PARS_LIKE_TOKEN_EXACT
- || func == PARS_LIKE_TOKEN_PREFIX
- || func == PARS_LIKE_TOKEN_SUFFIX
- || func == PARS_LIKE_TOKEN_SUBSTR);
- }
-
- return(pars_func_low(func, arg1));
-}
-
-/*********************************************************************//**
-Parses an ORDER BY clause. Order by a single column only is supported.
-@return own: order-by node in a query tree */
-UNIV_INTERN
-order_node_t*
-pars_order_by(
-/*==========*/
- sym_node_t* column, /*!< in: column name */
- pars_res_word_t* asc) /*!< in: &pars_asc_token or pars_desc_token */
-{
- order_node_t* node;
-
- node = static_cast<order_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(order_node_t)));
-
- node->common.type = QUE_NODE_ORDER;
-
- node->column = column;
-
- if (asc == &pars_asc_token) {
- node->asc = TRUE;
- } else {
- ut_a(asc == &pars_desc_token);
- node->asc = FALSE;
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Determine if a data type is a built-in string data type of the InnoDB
-SQL parser.
-@return TRUE if string data type */
-static
-ibool
-pars_is_string_type(
-/*================*/
- ulint mtype) /*!< in: main data type */
-{
- switch (mtype) {
- case DATA_VARCHAR: case DATA_CHAR:
- case DATA_FIXBINARY: case DATA_BINARY:
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Resolves the data type of a function in an expression. The argument data
-types must already be resolved. */
-static
-void
-pars_resolve_func_data_type(
-/*========================*/
- func_node_t* node) /*!< in: function node */
-{
- que_node_t* arg;
-
- ut_a(que_node_get_type(node) == QUE_NODE_FUNC);
-
- arg = node->args;
-
- switch (node->func) {
- case PARS_SUM_TOKEN:
- case '+': case '-': case '*': case '/':
- /* Inherit the data type from the first argument (which must
- not be the SQL null literal whose type is DATA_ERROR) */
-
- dtype_copy(que_node_get_data_type(node),
- que_node_get_data_type(arg));
-
- ut_a(dtype_get_mtype(que_node_get_data_type(node))
- == DATA_INT);
- break;
-
- case PARS_COUNT_TOKEN:
- ut_a(arg);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_TO_CHAR_TOKEN:
- case PARS_RND_STR_TOKEN:
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- break;
-
- case PARS_TO_BINARY_TOKEN:
- if (dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT) {
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- } else {
- dtype_set(que_node_get_data_type(node), DATA_BINARY,
- 0, 0);
- }
- break;
-
- case PARS_TO_NUMBER_TOKEN:
- case PARS_BINARY_TO_NUMBER_TOKEN:
- case PARS_LENGTH_TOKEN:
- case PARS_INSTR_TOKEN:
- ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_SYSDATE_TOKEN:
- ut_a(arg == NULL);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_SUBSTR_TOKEN:
- case PARS_CONCAT_TOKEN:
- ut_a(pars_is_string_type(que_node_get_data_type(arg)->mtype));
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- break;
-
- case '>': case '<': case '=':
- case PARS_GE_TOKEN:
- case PARS_LE_TOKEN:
- case PARS_NE_TOKEN:
- case PARS_AND_TOKEN:
- case PARS_OR_TOKEN:
- case PARS_NOT_TOKEN:
- case PARS_NOTFOUND_TOKEN:
-
- /* We currently have no iboolean type: use integer type */
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_RND_TOKEN:
- ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT);
- dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4);
- break;
-
- case PARS_LIKE_TOKEN_EXACT:
- case PARS_LIKE_TOKEN_PREFIX:
- case PARS_LIKE_TOKEN_SUFFIX:
- case PARS_LIKE_TOKEN_SUBSTR:
- dtype_set(que_node_get_data_type(node), DATA_VARCHAR,
- DATA_ENGLISH, 0);
- break;
-
- default:
- ut_error;
- }
-}
-
-/*********************************************************************//**
-Resolves the meaning of variables in an expression and the data types of
-functions. It is an error if some identifier cannot be resolved here. */
-static
-void
-pars_resolve_exp_variables_and_types(
-/*=================================*/
- sel_node_t* select_node, /*!< in: select node or NULL; if
- this is not NULL then the variable
- sym nodes are added to the
- copy_variables list of select_node */
- que_node_t* exp_node) /*!< in: expression */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- sym_node_t* node;
-
- ut_a(exp_node);
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
- func_node = static_cast<func_node_t*>(exp_node);
-
- arg = func_node->args;
-
- while (arg) {
- pars_resolve_exp_variables_and_types(select_node, arg);
-
- arg = que_node_get_next(arg);
- }
-
- pars_resolve_func_data_type(func_node);
-
- return;
- }
-
- ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL);
-
- sym_node = static_cast<sym_node_t*>(exp_node);
-
- if (sym_node->resolved) {
-
- return;
- }
-
- /* Not resolved yet: look in the symbol table for a variable
- or a cursor or a function with the same name */
-
- node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list);
-
- while (node) {
- if (node->resolved
- && ((node->token_type == SYM_VAR)
- || (node->token_type == SYM_CURSOR)
- || (node->token_type == SYM_FUNCTION))
- && node->name
- && (sym_node->name_len == node->name_len)
- && (ut_memcmp(sym_node->name, node->name,
- node->name_len) == 0)) {
-
- /* Found a variable or a cursor declared with
- the same name */
-
- break;
- }
-
- node = UT_LIST_GET_NEXT(sym_list, node);
- }
-
- if (!node) {
- fprintf(stderr, "PARSER ERROR: Unresolved identifier %s\n",
- sym_node->name);
- }
-
- ut_a(node);
-
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_IMPLICIT_VAR;
- sym_node->alias = node;
- sym_node->indirection = node;
-
- if (select_node) {
- UT_LIST_ADD_LAST(col_var_list, select_node->copy_variables,
- sym_node);
- }
-
- dfield_set_type(que_node_get_val(sym_node),
- que_node_get_data_type(node));
-}
-
-/*********************************************************************//**
-Resolves the meaning of variables in an expression list. It is an error if
-some identifier cannot be resolved here. Resolves also the data types of
-functions. */
-static
-void
-pars_resolve_exp_list_variables_and_types(
-/*======================================*/
- sel_node_t* select_node, /*!< in: select node or NULL */
- que_node_t* exp_node) /*!< in: expression list first node, or
- NULL */
-{
- while (exp_node) {
- pars_resolve_exp_variables_and_types(select_node, exp_node);
-
- exp_node = que_node_get_next(exp_node);
- }
-}
-
-/*********************************************************************//**
-Resolves the columns in an expression. */
-static
-void
-pars_resolve_exp_columns(
-/*=====================*/
- sym_node_t* table_node, /*!< in: first node in a table list */
- que_node_t* exp_node) /*!< in: expression */
-{
- func_node_t* func_node;
- que_node_t* arg;
- sym_node_t* sym_node;
- dict_table_t* table;
- sym_node_t* t_node;
- ulint n_cols;
- ulint i;
-
- ut_a(exp_node);
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
- func_node = static_cast<func_node_t*>(exp_node);
-
- arg = func_node->args;
-
- while (arg) {
- pars_resolve_exp_columns(table_node, arg);
-
- arg = que_node_get_next(arg);
- }
-
- return;
- }
-
- ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL);
-
- sym_node = static_cast<sym_node_t*>(exp_node);
-
- if (sym_node->resolved) {
-
- return;
- }
-
- /* Not resolved yet: look in the table list for a column with the
- same name */
-
- t_node = table_node;
-
- while (t_node) {
- table = t_node->table;
-
- n_cols = dict_table_get_n_cols(table);
-
- for (i = 0; i < n_cols; i++) {
- const dict_col_t* col
- = dict_table_get_nth_col(table, i);
- const char* col_name
- = dict_table_get_col_name(table, i);
-
- if ((sym_node->name_len == ut_strlen(col_name))
- && (0 == ut_memcmp(sym_node->name, col_name,
- sym_node->name_len))) {
- /* Found */
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_COLUMN;
- sym_node->table = table;
- sym_node->col_no = i;
- sym_node->prefetch_buf = NULL;
-
- dict_col_copy_type(
- col,
- dfield_get_type(&sym_node
- ->common.val));
-
- return;
- }
- }
-
- t_node = static_cast<sym_node_t*>(que_node_get_next(t_node));
- }
-}
-
-/*********************************************************************//**
-Resolves the meaning of columns in an expression list. */
-static
-void
-pars_resolve_exp_list_columns(
-/*==========================*/
- sym_node_t* table_node, /*!< in: first node in a table list */
- que_node_t* exp_node) /*!< in: expression list first node, or
- NULL */
-{
- while (exp_node) {
- pars_resolve_exp_columns(table_node, exp_node);
-
- exp_node = que_node_get_next(exp_node);
- }
-}
-
-/*********************************************************************//**
-Retrieves the table definition for a table name id. */
-static
-void
-pars_retrieve_table_def(
-/*====================*/
- sym_node_t* sym_node) /*!< in: table node */
-{
- ut_a(sym_node);
- ut_a(que_node_get_type(sym_node) == QUE_NODE_SYMBOL);
-
- /* Open the table only if it is not already opened. */
- if (sym_node->token_type != SYM_TABLE_REF_COUNTED) {
-
- ut_a(sym_node->table == NULL);
-
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_TABLE_REF_COUNTED;
-
- sym_node->table = dict_table_open_on_name(
- sym_node->name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
-
- ut_a(sym_node->table != NULL);
- }
-}
-
-/*********************************************************************//**
-Retrieves the table definitions for a list of table name ids.
-@return number of tables */
-static
-ulint
-pars_retrieve_table_list_defs(
-/*==========================*/
- sym_node_t* sym_node) /*!< in: first table node in list */
-{
- ulint count = 0;
-
- if (sym_node == NULL) {
-
- return(count);
- }
-
- while (sym_node) {
- pars_retrieve_table_def(sym_node);
-
- count++;
-
- sym_node = static_cast<sym_node_t*>(
- que_node_get_next(sym_node));
- }
-
- return(count);
-}
-
-/*********************************************************************//**
-Adds all columns to the select list if the query is SELECT * FROM ... */
-static
-void
-pars_select_all_columns(
-/*====================*/
- sel_node_t* select_node) /*!< in: select node already containing
- the table list */
-{
- sym_node_t* col_node;
- sym_node_t* table_node;
- dict_table_t* table;
- ulint i;
-
- select_node->select_list = NULL;
-
- table_node = select_node->table_list;
-
- while (table_node) {
- table = table_node->table;
-
- for (i = 0; i < dict_table_get_n_user_cols(table); i++) {
- const char* col_name = dict_table_get_col_name(
- table, i);
-
- col_node = sym_tab_add_id(pars_sym_tab_global,
- (byte*) col_name,
- ut_strlen(col_name));
-
- select_node->select_list = que_node_list_add_last(
- select_node->select_list, col_node);
- }
-
- table_node = static_cast<sym_node_t*>(
- que_node_get_next(table_node));
- }
-}
-
-/*********************************************************************//**
-Parses a select list; creates a query graph node for the whole SELECT
-statement.
-@return own: select node in a query tree */
-UNIV_INTERN
-sel_node_t*
-pars_select_list(
-/*=============*/
- que_node_t* select_list, /*!< in: select list */
- sym_node_t* into_list) /*!< in: variables list or NULL */
-{
- sel_node_t* node;
-
- node = sel_node_create(pars_sym_tab_global->heap);
-
- node->select_list = select_list;
- node->into_list = into_list;
-
- pars_resolve_exp_list_variables_and_types(NULL, into_list);
-
- return(node);
-}
-
-/*********************************************************************//**
-Checks if the query is an aggregate query, in which case the selct list must
-contain only aggregate function items. */
-static
-void
-pars_check_aggregate(
-/*=================*/
- sel_node_t* select_node) /*!< in: select node already containing
- the select list */
-{
- que_node_t* exp_node;
- func_node_t* func_node;
- ulint n_nodes = 0;
- ulint n_aggregate_nodes = 0;
-
- exp_node = select_node->select_list;
-
- while (exp_node) {
-
- n_nodes++;
-
- if (que_node_get_type(exp_node) == QUE_NODE_FUNC) {
-
- func_node = static_cast<func_node_t*>(exp_node);
-
- if (func_node->fclass == PARS_FUNC_AGGREGATE) {
-
- n_aggregate_nodes++;
- }
- }
-
- exp_node = que_node_get_next(exp_node);
- }
-
- if (n_aggregate_nodes > 0) {
- ut_a(n_nodes == n_aggregate_nodes);
-
- select_node->is_aggregate = TRUE;
- } else {
- select_node->is_aggregate = FALSE;
- }
-}
-
-/*********************************************************************//**
-Parses a select statement.
-@return own: select node in a query tree */
-UNIV_INTERN
-sel_node_t*
-pars_select_statement(
-/*==================*/
- sel_node_t* select_node, /*!< in: select node already containing
- the select list */
- sym_node_t* table_list, /*!< in: table list */
- que_node_t* search_cond, /*!< in: search condition or NULL */
- pars_res_word_t* for_update, /*!< in: NULL or &pars_update_token */
- pars_res_word_t* lock_shared, /*!< in: NULL or &pars_share_token */
- order_node_t* order_by) /*!< in: NULL or an order-by node */
-{
- select_node->state = SEL_NODE_OPEN;
-
- select_node->table_list = table_list;
- select_node->n_tables = pars_retrieve_table_list_defs(table_list);
-
- if (select_node->select_list == &pars_star_denoter) {
-
- /* SELECT * FROM ... */
- pars_select_all_columns(select_node);
- }
-
- if (select_node->into_list) {
- ut_a(que_node_list_get_len(select_node->into_list)
- == que_node_list_get_len(select_node->select_list));
- }
-
- UT_LIST_INIT(select_node->copy_variables);
-
- pars_resolve_exp_list_columns(table_list, select_node->select_list);
- pars_resolve_exp_list_variables_and_types(select_node,
- select_node->select_list);
- pars_check_aggregate(select_node);
-
- select_node->search_cond = search_cond;
-
- if (search_cond) {
- pars_resolve_exp_columns(table_list, search_cond);
- pars_resolve_exp_variables_and_types(select_node, search_cond);
- }
-
- if (for_update) {
- ut_a(!lock_shared);
-
- select_node->set_x_locks = TRUE;
- select_node->row_lock_mode = LOCK_X;
-
- select_node->consistent_read = FALSE;
- select_node->read_view = NULL;
- } else if (lock_shared){
- select_node->set_x_locks = FALSE;
- select_node->row_lock_mode = LOCK_S;
-
- select_node->consistent_read = FALSE;
- select_node->read_view = NULL;
- } else {
- select_node->set_x_locks = FALSE;
- select_node->row_lock_mode = LOCK_S;
-
- select_node->consistent_read = TRUE;
- }
-
- select_node->order_by = order_by;
-
- if (order_by) {
- pars_resolve_exp_columns(table_list, order_by->column);
- }
-
- /* The final value of the following fields depend on the environment
- where the select statement appears: */
-
- select_node->can_get_updated = FALSE;
- select_node->explicit_cursor = NULL;
-
- opt_search_plan(select_node);
-
- return(select_node);
-}
-
-/*********************************************************************//**
-Parses a cursor declaration.
-@return sym_node */
-UNIV_INTERN
-que_node_t*
-pars_cursor_declaration(
-/*====================*/
- sym_node_t* sym_node, /*!< in: cursor id node in the symbol
- table */
- sel_node_t* select_node) /*!< in: select node */
-{
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_CURSOR;
- sym_node->cursor_def = select_node;
-
- select_node->state = SEL_NODE_CLOSED;
- select_node->explicit_cursor = sym_node;
-
- return(sym_node);
-}
-
-/*********************************************************************//**
-Parses a function declaration.
-@return sym_node */
-UNIV_INTERN
-que_node_t*
-pars_function_declaration(
-/*======================*/
- sym_node_t* sym_node) /*!< in: function id node in the symbol
- table */
-{
- sym_node->resolved = TRUE;
- sym_node->token_type = SYM_FUNCTION;
-
- /* Check that the function exists. */
- ut_a(pars_info_lookup_user_func(
- pars_sym_tab_global->info, sym_node->name));
-
- return(sym_node);
-}
-
-/*********************************************************************//**
-Parses a delete or update statement start.
-@return own: update node in a query tree */
-UNIV_INTERN
-upd_node_t*
-pars_update_statement_start(
-/*========================*/
- ibool is_delete, /*!< in: TRUE if delete */
- sym_node_t* table_sym, /*!< in: table name node */
- col_assign_node_t* col_assign_list)/*!< in: column assignment list, NULL
- if delete */
-{
- upd_node_t* node;
-
- node = upd_node_create(pars_sym_tab_global->heap);
-
- node->is_delete = is_delete;
-
- node->table_sym = table_sym;
- node->col_assign_list = col_assign_list;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a column assignment in an update.
-@return column assignment node */
-UNIV_INTERN
-col_assign_node_t*
-pars_column_assignment(
-/*===================*/
- sym_node_t* column, /*!< in: column to assign */
- que_node_t* exp) /*!< in: value to assign */
-{
- col_assign_node_t* node;
-
- node = static_cast<col_assign_node_t*>(
- mem_heap_alloc(pars_sym_tab_global->heap,
- sizeof(col_assign_node_t)));
- node->common.type = QUE_NODE_COL_ASSIGNMENT;
-
- node->col = column;
- node->val = exp;
-
- return(node);
-}
-
-/*********************************************************************//**
-Processes an update node assignment list. */
-static
-void
-pars_process_assign_list(
-/*=====================*/
- upd_node_t* node) /*!< in: update node */
-{
- col_assign_node_t* col_assign_list;
- sym_node_t* table_sym;
- col_assign_node_t* assign_node;
- upd_field_t* upd_field;
- dict_index_t* clust_index;
- sym_node_t* col_sym;
- ulint changes_ord_field;
- ulint changes_field_size;
- ulint n_assigns;
- ulint i;
-
- table_sym = node->table_sym;
- col_assign_list = static_cast<col_assign_node_t*>(
- node->col_assign_list);
- clust_index = dict_table_get_first_index(node->table);
-
- assign_node = col_assign_list;
- n_assigns = 0;
-
- while (assign_node) {
- pars_resolve_exp_columns(table_sym, assign_node->col);
- pars_resolve_exp_columns(table_sym, assign_node->val);
- pars_resolve_exp_variables_and_types(NULL, assign_node->val);
-#if 0
- ut_a(dtype_get_mtype(
- dfield_get_type(que_node_get_val(
- assign_node->col)))
- == dtype_get_mtype(
- dfield_get_type(que_node_get_val(
- assign_node->val))));
-#endif
-
- /* Add to the update node all the columns found in assignment
- values as columns to copy: therefore, TRUE */
-
- opt_find_all_cols(TRUE, clust_index, &(node->columns), NULL,
- assign_node->val);
- n_assigns++;
-
- assign_node = static_cast<col_assign_node_t*>(
- que_node_get_next(assign_node));
- }
-
- node->update = upd_create(n_assigns, pars_sym_tab_global->heap);
-
- assign_node = col_assign_list;
-
- changes_field_size = UPD_NODE_NO_SIZE_CHANGE;
-
- for (i = 0; i < n_assigns; i++) {
- upd_field = upd_get_nth_field(node->update, i);
-
- col_sym = assign_node->col;
-
- upd_field_set_field_no(upd_field, dict_index_get_nth_col_pos(
- clust_index, col_sym->col_no,
- NULL),
- clust_index, NULL);
- upd_field->exp = assign_node->val;
-
- if (!dict_col_get_fixed_size(
- dict_index_get_nth_col(clust_index,
- upd_field->field_no),
- dict_table_is_comp(node->table))) {
- changes_field_size = 0;
- }
-
- assign_node = static_cast<col_assign_node_t*>(
- que_node_get_next(assign_node));
- }
-
- /* Find out if the update can modify an ordering field in any index */
-
- changes_ord_field = UPD_NODE_NO_ORD_CHANGE;
-
- if (row_upd_changes_some_index_ord_field_binary(node->table,
- node->update)) {
- changes_ord_field = 0;
- }
-
- node->cmpl_info = changes_ord_field | changes_field_size;
-}
-
-/*********************************************************************//**
-Parses an update or delete statement.
-@return own: update node in a query tree */
-UNIV_INTERN
-upd_node_t*
-pars_update_statement(
-/*==================*/
- upd_node_t* node, /*!< in: update node */
- sym_node_t* cursor_sym, /*!< in: pointer to a cursor entry in
- the symbol table or NULL */
- que_node_t* search_cond) /*!< in: search condition or NULL */
-{
- sym_node_t* table_sym;
- sel_node_t* sel_node;
- plan_t* plan;
-
- table_sym = node->table_sym;
-
- pars_retrieve_table_def(table_sym);
- node->table = table_sym->table;
-
- UT_LIST_INIT(node->columns);
-
- /* Make the single table node into a list of table nodes of length 1 */
-
- que_node_list_add_last(NULL, table_sym);
-
- if (cursor_sym) {
- pars_resolve_exp_variables_and_types(NULL, cursor_sym);
-
- sel_node = cursor_sym->alias->cursor_def;
-
- node->searched_update = FALSE;
- } else {
- sel_node = pars_select_list(NULL, NULL);
-
- pars_select_statement(sel_node, table_sym, search_cond, NULL,
- &pars_share_token, NULL);
- node->searched_update = TRUE;
- sel_node->common.parent = node;
- }
-
- node->select = sel_node;
-
- ut_a(!node->is_delete || (node->col_assign_list == NULL));
- ut_a(node->is_delete || (node->col_assign_list != NULL));
-
- if (node->is_delete) {
- node->cmpl_info = 0;
- } else {
- pars_process_assign_list(node);
- }
-
- if (node->searched_update) {
- node->has_clust_rec_x_lock = TRUE;
- sel_node->set_x_locks = TRUE;
- sel_node->row_lock_mode = LOCK_X;
- } else {
- node->has_clust_rec_x_lock = sel_node->set_x_locks;
- }
-
- ut_a(sel_node->n_tables == 1);
- ut_a(sel_node->consistent_read == FALSE);
- ut_a(sel_node->order_by == NULL);
- ut_a(sel_node->is_aggregate == FALSE);
-
- sel_node->can_get_updated = TRUE;
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- plan = sel_node_get_nth_plan(sel_node, 0);
-
- plan->no_prefetch = TRUE;
-
- if (!dict_index_is_clust(plan->index)) {
-
- plan->must_get_clust = TRUE;
-
- node->pcur = &(plan->clust_pcur);
- } else {
- node->pcur = &(plan->pcur);
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an insert statement.
-@return own: update node in a query tree */
-UNIV_INTERN
-ins_node_t*
-pars_insert_statement(
-/*==================*/
- sym_node_t* table_sym, /*!< in: table name node */
- que_node_t* values_list, /*!< in: value expression list or NULL */
- sel_node_t* select) /*!< in: select condition or NULL */
-{
- ins_node_t* node;
- dtuple_t* row;
- ulint ins_type;
-
- ut_a(values_list || select);
- ut_a(!values_list || !select);
-
- if (values_list) {
- ins_type = INS_VALUES;
- } else {
- ins_type = INS_SEARCHED;
- }
-
- pars_retrieve_table_def(table_sym);
-
- node = ins_node_create(ins_type, table_sym->table,
- pars_sym_tab_global->heap);
-
- row = dtuple_create(pars_sym_tab_global->heap,
- dict_table_get_n_cols(node->table));
-
- dict_table_copy_types(row, table_sym->table);
-
- ins_node_set_new_row(node, row);
-
- node->select = select;
-
- if (select) {
- select->common.parent = node;
-
- ut_a(que_node_list_get_len(select->select_list)
- == dict_table_get_n_user_cols(table_sym->table));
- }
-
- node->values_list = values_list;
-
- if (node->values_list) {
- pars_resolve_exp_list_variables_and_types(NULL, values_list);
-
- ut_a(que_node_list_get_len(values_list)
- == dict_table_get_n_user_cols(table_sym->table));
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Set the type of a dfield. */
-static
-void
-pars_set_dfield_type(
-/*=================*/
- dfield_t* dfield, /*!< in: dfield */
- pars_res_word_t* type, /*!< in: pointer to a type
- token */
- ulint len, /*!< in: length, or 0 */
- ibool is_unsigned, /*!< in: if TRUE, column is
- UNSIGNED. */
- ibool is_not_null) /*!< in: if TRUE, column is
- NOT NULL. */
-{
- ulint flags = 0;
-
- if (is_not_null) {
- flags |= DATA_NOT_NULL;
- }
-
- if (is_unsigned) {
- flags |= DATA_UNSIGNED;
- }
-
- if (type == &pars_bigint_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_INT, flags, 8);
- } else if (type == &pars_int_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_INT, flags, 4);
-
- } else if (type == &pars_char_token) {
- //ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_VARCHAR,
- DATA_ENGLISH | flags, len);
- } else if (type == &pars_binary_token) {
- ut_a(len != 0);
-
- dtype_set(dfield_get_type(dfield), DATA_FIXBINARY,
- DATA_BINARY_TYPE | flags, len);
- } else if (type == &pars_blob_token) {
- ut_a(len == 0);
-
- dtype_set(dfield_get_type(dfield), DATA_BLOB,
- DATA_BINARY_TYPE | flags, 0);
- } else {
- ut_error;
- }
-}
-
-/*********************************************************************//**
-Parses a variable declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_variable_declaration(
-/*======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the variable */
- pars_res_word_t* type) /*!< in: pointer to a type token */
-{
- node->resolved = TRUE;
- node->token_type = SYM_VAR;
-
- node->param_type = PARS_NOT_PARAM;
-
- pars_set_dfield_type(que_node_get_val(node), type, 0, FALSE, FALSE);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a procedure parameter declaration.
-@return own: symbol table node of type SYM_VAR */
-UNIV_INTERN
-sym_node_t*
-pars_parameter_declaration(
-/*=======================*/
- sym_node_t* node, /*!< in: symbol table node allocated for the
- id of the parameter */
- ulint param_type,
- /*!< in: PARS_INPUT or PARS_OUTPUT */
- pars_res_word_t* type) /*!< in: pointer to a type token */
-{
- ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT));
-
- pars_variable_declaration(node, type);
-
- node->param_type = param_type;
-
- return(node);
-}
-
-/*********************************************************************//**
-Sets the parent field in a query node list. */
-static
-void
-pars_set_parent_in_list(
-/*====================*/
- que_node_t* node_list, /*!< in: first node in a list */
- que_node_t* parent) /*!< in: parent value to set in all
- nodes of the list */
-{
- que_common_t* common;
-
- common = static_cast<que_common_t*>(node_list);
-
- while (common) {
- common->parent = parent;
-
- common = static_cast<que_common_t*>(que_node_get_next(common));
- }
-}
-
-/*********************************************************************//**
-Parses an elsif element.
-@return elsif node */
-UNIV_INTERN
-elsif_node_t*
-pars_elsif_element(
-/*===============*/
- que_node_t* cond, /*!< in: if-condition */
- que_node_t* stat_list) /*!< in: statement list */
-{
- elsif_node_t* node;
-
- node = static_cast<elsif_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(elsif_node_t)));
-
- node->common.type = QUE_NODE_ELSIF;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an if-statement.
-@return if-statement node */
-UNIV_INTERN
-if_node_t*
-pars_if_statement(
-/*==============*/
- que_node_t* cond, /*!< in: if-condition */
- que_node_t* stat_list, /*!< in: statement list */
- que_node_t* else_part) /*!< in: else-part statement list
- or elsif element list */
-{
- if_node_t* node;
- elsif_node_t* elsif_node;
-
- node = static_cast<if_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(if_node_t)));
-
- node->common.type = QUE_NODE_IF;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- if (else_part && (que_node_get_type(else_part) == QUE_NODE_ELSIF)) {
-
- /* There is a list of elsif conditions */
-
- node->else_part = NULL;
- node->elsif_list = static_cast<elsif_node_t*>(else_part);
-
- elsif_node = static_cast<elsif_node_t*>(else_part);
-
- while (elsif_node) {
- pars_set_parent_in_list(elsif_node->stat_list, node);
-
- elsif_node = static_cast<elsif_node_t*>(
- que_node_get_next(elsif_node));
- }
- } else {
- node->else_part = else_part;
- node->elsif_list = NULL;
-
- pars_set_parent_in_list(else_part, node);
- }
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a while-statement.
-@return while-statement node */
-UNIV_INTERN
-while_node_t*
-pars_while_statement(
-/*=================*/
- que_node_t* cond, /*!< in: while-condition */
- que_node_t* stat_list) /*!< in: statement list */
-{
- while_node_t* node;
-
- node = static_cast<while_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(while_node_t)));
-
- node->common.type = QUE_NODE_WHILE;
-
- node->cond = cond;
-
- pars_resolve_exp_variables_and_types(NULL, cond);
-
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a for-loop-statement.
-@return for-statement node */
-UNIV_INTERN
-for_node_t*
-pars_for_statement(
-/*===============*/
- sym_node_t* loop_var, /*!< in: loop variable */
- que_node_t* loop_start_limit,/*!< in: loop start expression */
- que_node_t* loop_end_limit, /*!< in: loop end expression */
- que_node_t* stat_list) /*!< in: statement list */
-{
- for_node_t* node;
-
- node = static_cast<for_node_t*>(
- mem_heap_alloc(pars_sym_tab_global->heap, sizeof(for_node_t)));
-
- node->common.type = QUE_NODE_FOR;
-
- pars_resolve_exp_variables_and_types(NULL, loop_var);
- pars_resolve_exp_variables_and_types(NULL, loop_start_limit);
- pars_resolve_exp_variables_and_types(NULL, loop_end_limit);
-
- node->loop_var = loop_var->indirection;
-
- ut_a(loop_var->indirection);
-
- node->loop_start_limit = loop_start_limit;
- node->loop_end_limit = loop_end_limit;
-
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an exit statement.
-@return exit statement node */
-UNIV_INTERN
-exit_node_t*
-pars_exit_statement(void)
-/*=====================*/
-{
- exit_node_t* node;
-
- node = static_cast<exit_node_t*>(
- mem_heap_alloc(pars_sym_tab_global->heap, sizeof(exit_node_t)));
- node->common.type = QUE_NODE_EXIT;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a return-statement.
-@return return-statement node */
-UNIV_INTERN
-return_node_t*
-pars_return_statement(void)
-/*=======================*/
-{
- return_node_t* node;
-
- node = static_cast<return_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(return_node_t)));
- node->common.type = QUE_NODE_RETURN;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an assignment statement.
-@return assignment statement node */
-UNIV_INTERN
-assign_node_t*
-pars_assignment_statement(
-/*======================*/
- sym_node_t* var, /*!< in: variable to assign */
- que_node_t* val) /*!< in: value to assign */
-{
- assign_node_t* node;
-
- node = static_cast<assign_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(assign_node_t)));
- node->common.type = QUE_NODE_ASSIGNMENT;
-
- node->var = var;
- node->val = val;
-
- pars_resolve_exp_variables_and_types(NULL, var);
- pars_resolve_exp_variables_and_types(NULL, val);
-
- ut_a(dtype_get_mtype(dfield_get_type(que_node_get_val(var)))
- == dtype_get_mtype(dfield_get_type(que_node_get_val(val))));
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a procedure call.
-@return function node */
-UNIV_INTERN
-func_node_t*
-pars_procedure_call(
-/*================*/
- que_node_t* res_word,/*!< in: procedure name reserved word */
- que_node_t* args) /*!< in: argument list */
-{
- func_node_t* node;
-
- node = pars_func(res_word, args);
-
- pars_resolve_exp_list_variables_and_types(NULL, args);
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a fetch statement. into_list or user_func (but not both) must be
-non-NULL.
-@return fetch statement node */
-UNIV_INTERN
-fetch_node_t*
-pars_fetch_statement(
-/*=================*/
- sym_node_t* cursor, /*!< in: cursor node */
- sym_node_t* into_list, /*!< in: variables to set, or NULL */
- sym_node_t* user_func) /*!< in: user function name, or NULL */
-{
- sym_node_t* cursor_decl;
- fetch_node_t* node;
-
- /* Logical XOR. */
- ut_a(!into_list != !user_func);
-
- node = static_cast<fetch_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(fetch_node_t)));
-
- node->common.type = QUE_NODE_FETCH;
-
- pars_resolve_exp_variables_and_types(NULL, cursor);
-
- if (into_list) {
- pars_resolve_exp_list_variables_and_types(NULL, into_list);
- node->into_list = into_list;
- node->func = NULL;
- } else {
- pars_resolve_exp_variables_and_types(NULL, user_func);
-
- node->func = pars_info_lookup_user_func(
- pars_sym_tab_global->info, user_func->name);
-
- ut_a(node->func);
-
- node->into_list = NULL;
- }
-
- cursor_decl = cursor->alias;
-
- ut_a(cursor_decl->token_type == SYM_CURSOR);
-
- node->cursor_def = cursor_decl->cursor_def;
-
- if (into_list) {
- ut_a(que_node_list_get_len(into_list)
- == que_node_list_get_len(node->cursor_def->select_list));
- }
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an open or close cursor statement.
-@return fetch statement node */
-UNIV_INTERN
-open_node_t*
-pars_open_statement(
-/*================*/
- ulint type, /*!< in: ROW_SEL_OPEN_CURSOR
- or ROW_SEL_CLOSE_CURSOR */
- sym_node_t* cursor) /*!< in: cursor node */
-{
- sym_node_t* cursor_decl;
- open_node_t* node;
-
- node = static_cast<open_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(open_node_t)));
-
- node->common.type = QUE_NODE_OPEN;
-
- pars_resolve_exp_variables_and_types(NULL, cursor);
-
- cursor_decl = cursor->alias;
-
- ut_a(cursor_decl->token_type == SYM_CURSOR);
-
- node->op_type = static_cast<open_node_op>(type);
- node->cursor_def = cursor_decl->cursor_def;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a row_printf-statement.
-@return row_printf-statement node */
-UNIV_INTERN
-row_printf_node_t*
-pars_row_printf_statement(
-/*======================*/
- sel_node_t* sel_node) /*!< in: select node */
-{
- row_printf_node_t* node;
-
- node = static_cast<row_printf_node_t*>(
- mem_heap_alloc(
- pars_sym_tab_global->heap, sizeof(row_printf_node_t)));
- node->common.type = QUE_NODE_ROW_PRINTF;
-
- node->sel_node = sel_node;
-
- sel_node->common.parent = node;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a commit statement.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-pars_commit_statement(void)
-/*=======================*/
-{
- return(trx_commit_node_create(pars_sym_tab_global->heap));
-}
-
-/*********************************************************************//**
-Parses a rollback statement.
-@return own: rollback node struct */
-UNIV_INTERN
-roll_node_t*
-pars_rollback_statement(void)
-/*=========================*/
-{
- return(roll_node_create(pars_sym_tab_global->heap));
-}
-
-/*********************************************************************//**
-Parses a column definition at a table creation.
-@return column sym table node */
-UNIV_INTERN
-sym_node_t*
-pars_column_def(
-/*============*/
- sym_node_t* sym_node, /*!< in: column node in the
- symbol table */
- pars_res_word_t* type, /*!< in: data type */
- sym_node_t* len, /*!< in: length of column, or
- NULL */
- void* is_unsigned, /*!< in: if not NULL, column
- is of type UNSIGNED. */
- void* is_not_null) /*!< in: if not NULL, column
- is of type NOT NULL. */
-{
- ulint len2;
-
- if (len) {
- len2 = eval_node_get_int_val(len);
- } else {
- len2 = 0;
- }
-
- pars_set_dfield_type(que_node_get_val(sym_node), type, len2,
- is_unsigned != NULL, is_not_null != NULL);
-
- return(sym_node);
-}
-
-/*********************************************************************//**
-Parses a table creation operation.
-@return table create subgraph */
-UNIV_INTERN
-tab_node_t*
-pars_create_table(
-/*==============*/
- sym_node_t* table_sym, /*!< in: table name node in the symbol
- table */
- sym_node_t* column_defs, /*!< in: list of column names */
- sym_node_t* compact, /* in: non-NULL if COMPACT table. */
- sym_node_t* block_size, /* in: block size (can be NULL) */
- void* not_fit_in_memory MY_ATTRIBUTE((unused)))
- /*!< in: a non-NULL pointer means that
- this is a table which in simulations
- should be simulated as not fitting
- in memory; thread is put to sleep
- to simulate disk accesses; NOTE that
- this flag is not stored to the data
- dictionary on disk, and the database
- will forget about non-NULL value if
- it has to reload the table definition
- from disk */
-{
- dict_table_t* table;
- sym_node_t* column;
- tab_node_t* node;
- const dtype_t* dtype;
- ulint n_cols;
- ulint flags = 0;
- ulint flags2 = 0;
-
- if (compact != NULL) {
-
- /* System tables currently only use the REDUNDANT row
- format therefore the check for srv_file_per_table should be
- safe for now. */
-
- flags |= DICT_TF_COMPACT;
-
- /* FIXME: Ideally this should be part of the SQL syntax
- or use some other mechanism. We want to reduce dependency
- on global variables. There is an inherent race here but
- that has always existed around this variable. */
- if (srv_file_per_table) {
- flags2 |= DICT_TF2_USE_TABLESPACE;
- }
- }
-
- if (block_size != NULL) {
- ulint size;
- dfield_t* dfield;
-
- dfield = que_node_get_val(block_size);
-
- ut_a(dfield_get_len(dfield) == 4);
- size = mach_read_from_4(static_cast<byte*>(
- dfield_get_data(dfield)));
-
-
- switch (size) {
- case 0:
- break;
-
- case 1: case 2: case 4: case 8: case 16:
- flags |= DICT_TF_COMPACT;
- /* FTS-FIXME: needs the zip changes */
- /* flags |= size << DICT_TF_COMPRESSED_SHIFT; */
- break;
-
- default:
- ut_error;
- }
- }
-
- /* Set the flags2 when create table or alter tables */
- flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
- DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
-
-
- n_cols = que_node_list_get_len(column_defs);
-
- table = dict_mem_table_create(
- table_sym->name, 0, n_cols, flags, flags2);
-
-#ifdef UNIV_DEBUG
- if (not_fit_in_memory != NULL) {
- table->does_not_fit_in_memory = TRUE;
- }
-#endif /* UNIV_DEBUG */
- column = column_defs;
-
- while (column) {
- dtype = dfield_get_type(que_node_get_val(column));
-
- dict_mem_table_add_col(table, table->heap,
- column->name, dtype->mtype,
- dtype->prtype, dtype->len);
- column->resolved = TRUE;
- column->token_type = SYM_COLUMN;
-
- column = static_cast<sym_node_t*>(que_node_get_next(column));
- }
-
- node = tab_create_graph_create(table, pars_sym_tab_global->heap, true,
- FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
-
- table_sym->resolved = TRUE;
- table_sym->token_type = SYM_TABLE;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses an index creation operation.
-@return index create subgraph */
-UNIV_INTERN
-ind_node_t*
-pars_create_index(
-/*==============*/
- pars_res_word_t* unique_def, /*!< in: not NULL if a unique index */
- pars_res_word_t* clustered_def, /*!< in: not NULL if a clustered index */
- sym_node_t* index_sym, /*!< in: index name node in the symbol
- table */
- sym_node_t* table_sym, /*!< in: table name node in the symbol
- table */
- sym_node_t* column_list) /*!< in: list of column names */
-{
- dict_index_t* index;
- sym_node_t* column;
- ind_node_t* node;
- ulint n_fields;
- ulint ind_type;
-
- n_fields = que_node_list_get_len(column_list);
-
- ind_type = 0;
-
- if (unique_def) {
- ind_type = ind_type | DICT_UNIQUE;
- }
-
- if (clustered_def) {
- ind_type = ind_type | DICT_CLUSTERED;
- }
-
- index = dict_mem_index_create(table_sym->name, index_sym->name, 0,
- ind_type, n_fields);
- column = column_list;
-
- while (column) {
- dict_mem_index_add_field(index, column->name, 0);
-
- column->resolved = TRUE;
- column->token_type = SYM_COLUMN;
-
- column = static_cast<sym_node_t*>(que_node_get_next(column));
- }
-
- node = ind_create_graph_create(index, pars_sym_tab_global->heap, true);
-
- table_sym->resolved = TRUE;
- table_sym->token_type = SYM_TABLE;
-
- index_sym->resolved = TRUE;
- index_sym->token_type = SYM_TABLE;
-
- return(node);
-}
-
-/*********************************************************************//**
-Parses a procedure definition.
-@return query fork node */
-UNIV_INTERN
-que_fork_t*
-pars_procedure_definition(
-/*======================*/
- sym_node_t* sym_node, /*!< in: procedure id node in the symbol
- table */
- sym_node_t* param_list, /*!< in: parameter declaration list */
- que_node_t* stat_list) /*!< in: statement list */
-{
- proc_node_t* node;
- que_fork_t* fork;
- que_thr_t* thr;
- mem_heap_t* heap;
-
- heap = pars_sym_tab_global->heap;
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE, heap);
- fork->trx = NULL;
-
- thr = que_thr_create(fork, heap);
-
- node = static_cast<proc_node_t*>(
- mem_heap_alloc(heap, sizeof(proc_node_t)));
-
- node->common.type = QUE_NODE_PROC;
- node->common.parent = thr;
-
- sym_node->token_type = SYM_PROCEDURE_NAME;
- sym_node->resolved = TRUE;
-
- node->proc_id = sym_node;
- node->param_list = param_list;
- node->stat_list = stat_list;
-
- pars_set_parent_in_list(stat_list, node);
-
- node->sym_tab = pars_sym_tab_global;
-
- thr->child = node;
-
- pars_sym_tab_global->query_graph = fork;
-
- return(fork);
-}
-
-/*************************************************************//**
-Parses a stored procedure call, when this is not within another stored
-procedure, that is, the client issues a procedure call directly.
-In MySQL/InnoDB, stored InnoDB procedures are invoked via the
-parsed procedure tree, not via InnoDB SQL, so this function is not used.
-@return query graph */
-UNIV_INTERN
-que_fork_t*
-pars_stored_procedure_call(
-/*=======================*/
- sym_node_t* sym_node MY_ATTRIBUTE((unused)))
- /*!< in: stored procedure name */
-{
- ut_error;
- return(NULL);
-}
-
-/*************************************************************//**
-Retrieves characters to the lexical analyzer. */
-UNIV_INTERN
-int
-pars_get_lex_chars(
-/*===============*/
- char* buf, /*!< in/out: buffer where to copy */
- int max_size) /*!< in: maximum number of characters which fit
- in the buffer */
-{
- int len;
-
- len = static_cast<int>(
- pars_sym_tab_global->string_len
- - pars_sym_tab_global->next_char_pos);
- if (len == 0) {
-#ifdef YYDEBUG
- /* fputs("SQL string ends\n", stderr); */
-#endif
- return(0);
- }
-
- if (len > max_size) {
- len = max_size;
- }
-
-#ifdef UNIV_SQL_DEBUG
- if (pars_print_lexed) {
-
- if (len >= 5) {
- len = 5;
- }
-
- fwrite(pars_sym_tab_global->sql_string
- + pars_sym_tab_global->next_char_pos,
- 1, len, stderr);
- }
-#endif /* UNIV_SQL_DEBUG */
-
- ut_memcpy(buf, pars_sym_tab_global->sql_string
- + pars_sym_tab_global->next_char_pos, len);
-
- pars_sym_tab_global->next_char_pos += len;
-
- return(len);
-}
-
-/*************************************************************//**
-Called by yyparse on error. */
-UNIV_INTERN
-void
-yyerror(
-/*====*/
- const char* s MY_ATTRIBUTE((unused)))
- /*!< in: error message string */
-{
- ut_ad(s);
-
- fputs("PARSER ERROR: Syntax error in SQL string\n", stderr);
-
- ut_error;
-}
-
-/*************************************************************//**
-Parses an SQL string returning the query graph.
-@return own: the query graph */
-UNIV_INTERN
-que_t*
-pars_sql(
-/*=====*/
- pars_info_t* info, /*!< in: extra information, or NULL */
- const char* str) /*!< in: SQL string */
-{
- sym_node_t* sym_node;
- mem_heap_t* heap;
- que_t* graph;
-
- ut_ad(str);
-
- heap = mem_heap_create(16000);
-
- /* Currently, the parser is not reentrant: */
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- pars_sym_tab_global = sym_tab_create(heap);
-
- pars_sym_tab_global->string_len = strlen(str);
- pars_sym_tab_global->sql_string = static_cast<char*>(
- mem_heap_dup(heap, str, pars_sym_tab_global->string_len + 1));
- pars_sym_tab_global->next_char_pos = 0;
- pars_sym_tab_global->info = info;
-
- yyparse();
-
- sym_node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list);
-
- while (sym_node) {
- ut_a(sym_node->resolved);
-
- sym_node = UT_LIST_GET_NEXT(sym_list, sym_node);
- }
-
- graph = pars_sym_tab_global->query_graph;
-
- graph->sym_tab = pars_sym_tab_global;
- graph->info = info;
-
- pars_sym_tab_global = NULL;
-
- /* fprintf(stderr, "SQL graph size %lu\n", mem_heap_get_size(heap)); */
-
- return(graph);
-}
-
-/******************************************************************//**
-Completes a query graph by adding query thread and fork nodes
-above it and prepares the graph for running. The fork created is of
-type QUE_FORK_MYSQL_INTERFACE.
-@return query thread node to run */
-UNIV_INTERN
-que_thr_t*
-pars_complete_graph_for_exec(
-/*=========================*/
- que_node_t* node, /*!< in: root node for an incomplete
- query graph, or NULL for dummy graph */
- trx_t* trx, /*!< in: transaction handle */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
-{
- que_fork_t* fork;
- que_thr_t* thr;
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_MYSQL_INTERFACE, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
-
- thr->child = node;
-
- if (node) {
- que_node_set_parent(node, thr);
- }
-
- trx->graph = NULL;
-
- return(thr);
-}
-
-/****************************************************************//**
-Create parser info struct.
-@return own: info struct */
-UNIV_INTERN
-pars_info_t*
-pars_info_create(void)
-/*==================*/
-{
- pars_info_t* info;
- mem_heap_t* heap;
-
- heap = mem_heap_create(512);
-
- info = static_cast<pars_info_t*>(mem_heap_alloc(heap, sizeof(*info)));
-
- info->heap = heap;
- info->funcs = NULL;
- info->bound_lits = NULL;
- info->bound_ids = NULL;
- info->graph_owns_us = TRUE;
-
- return(info);
-}
-
-/****************************************************************//**
-Free info struct and everything it contains. */
-UNIV_INTERN
-void
-pars_info_free(
-/*===========*/
- pars_info_t* info) /*!< in, own: info struct */
-{
- mem_heap_free(info->heap);
-}
-
-/****************************************************************//**
-Add bound literal. */
-UNIV_INTERN
-void
-pars_info_add_literal(
-/*==================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const void* address, /*!< in: address */
- ulint length, /*!< in: length of data */
- ulint type, /*!< in: type, e.g. DATA_FIXBINARY */
- ulint prtype) /*!< in: precise type, e.g.
- DATA_UNSIGNED */
-{
- pars_bound_lit_t* pbl;
-
- ut_ad(!pars_info_get_bound_lit(info, name));
-
- pbl = static_cast<pars_bound_lit_t*>(
- mem_heap_alloc(info->heap, sizeof(*pbl)));
-
- pbl->name = name;
-
- pbl->address = address;
- pbl->length = length;
- pbl->type = type;
- pbl->prtype = prtype;
-
- if (!info->bound_lits) {
- ib_alloc_t* heap_alloc;
-
- heap_alloc = ib_heap_allocator_create(info->heap);
-
- info->bound_lits = ib_vector_create(heap_alloc, sizeof(*pbl), 8);
- }
-
- ib_vector_push(info->bound_lits, pbl);
-}
-
-/****************************************************************//**
-Equivalent to pars_info_add_literal(info, name, str, strlen(str),
-DATA_VARCHAR, DATA_ENGLISH). */
-UNIV_INTERN
-void
-pars_info_add_str_literal(
-/*======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const char* str) /*!< in: string */
-{
- pars_info_add_literal(info, name, str, strlen(str),
- DATA_VARCHAR, DATA_ENGLISH);
-}
-
-/********************************************************************
-If the literal value already exists then it rebinds otherwise it
-creates a new entry.*/
-UNIV_INTERN
-void
-pars_info_bind_literal(
-/*===================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const void* address, /* in: address */
- ulint length, /* in: length of data */
- ulint type, /* in: type, e.g. DATA_FIXBINARY */
- ulint prtype) /* in: precise type, e.g. */
-{
- pars_bound_lit_t* pbl;
-
- pbl = pars_info_lookup_bound_lit(info, name);
-
- if (!pbl) {
- pars_info_add_literal(
- info, name, address, length, type, prtype);
- } else {
- pbl->address = address;
- pbl->length = length;
-
- sym_tab_rebind_lit(pbl->node, address, length);
- }
-}
-
-/********************************************************************
-If the literal value already exists then it rebinds otherwise it
-creates a new entry.*/
-UNIV_INTERN
-void
-pars_info_bind_varchar_literal(
-/*===========================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const byte* str, /*!< in: string */
- ulint str_len) /*!< in: string length */
-{
- pars_bound_lit_t* pbl;
-
- pbl = pars_info_lookup_bound_lit(info, name);
-
- if (!pbl) {
- pars_info_add_literal(
- info, name, str, str_len, DATA_VARCHAR, DATA_ENGLISH);
- } else {
-
- pbl->address = str;
- pbl->length = str_len;
-
- sym_tab_rebind_lit(pbl->node, str, str_len);
- }
-}
-
-/****************************************************************//**
-Equivalent to:
-
-char buf[4];
-mach_write_to_4(buf, val);
-pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_add_int4_literal(
-/*=======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- lint val) /*!< in: value */
-{
- byte* buf = static_cast<byte*>(mem_heap_alloc(info->heap, 4));
-
- mach_write_to_4(buf, val);
- pars_info_add_literal(info, name, buf, 4, DATA_INT, 0);
-}
-
-/********************************************************************
-If the literal value already exists then it rebinds otherwise it
-creates a new entry. */
-UNIV_INTERN
-void
-pars_info_bind_int4_literal(
-/*========================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const ib_uint32_t* val) /* in: value */
-{
- pars_bound_lit_t* pbl;
-
- pbl = pars_info_lookup_bound_lit(info, name);
-
- if (!pbl) {
- pars_info_add_literal(info, name, val, 4, DATA_INT, 0);
- } else {
-
- pbl->address = val;
- pbl->length = sizeof(*val);
-
- sym_tab_rebind_lit(pbl->node, val, sizeof(*val));
- }
-}
-
-/********************************************************************
-If the literal value already exists then it rebinds otherwise it
-creates a new entry. */
-UNIV_INTERN
-void
-pars_info_bind_int8_literal(
-/*========================*/
- pars_info_t* info, /* in: info struct */
- const char* name, /* in: name */
- const ib_uint64_t* val) /* in: value */
-{
- pars_bound_lit_t* pbl;
-
- pbl = pars_info_lookup_bound_lit(info, name);
-
- if (!pbl) {
- pars_info_add_literal(
- info, name, val, sizeof(*val), DATA_INT, 0);
- } else {
-
- pbl->address = val;
- pbl->length = sizeof(*val);
-
- sym_tab_rebind_lit(pbl->node, val, sizeof(*val));
- }
-}
-
-/****************************************************************//**
-Equivalent to:
-
-char buf[8];
-mach_write_to_8(buf, val);
-pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
-
-except that the buffer is dynamically allocated from the info struct's
-heap. */
-UNIV_INTERN
-void
-pars_info_add_ull_literal(
-/*======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- ib_uint64_t val) /*!< in: value */
-{
- byte* buf = static_cast<byte*>(mem_heap_alloc(info->heap, 8));
-
- mach_write_to_8(buf, val);
-
- pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
-}
-
-/****************************************************************//**
-If the literal value already exists then it rebinds otherwise it
-creates a new entry. */
-UNIV_INTERN
-void
-pars_info_bind_ull_literal(
-/*=======================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: name */
- const ib_uint64_t* val) /*!< in: value */
-{
- pars_bound_lit_t* pbl;
-
- pbl = pars_info_lookup_bound_lit(info, name);
-
- if (!pbl) {
- pars_info_add_literal(
- info, name, val, sizeof(*val), DATA_FIXBINARY, 0);
- } else {
-
- pbl->address = val;
- pbl->length = sizeof(*val);
-
- sym_tab_rebind_lit(pbl->node, val, sizeof(*val));
- }
-}
-
-/****************************************************************//**
-Add user function. */
-UNIV_INTERN
-void
-pars_info_bind_function(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name, /*!< in: function name */
- pars_user_func_cb_t func, /*!< in: function address */
- void* arg) /*!< in: user-supplied argument */
-{
- pars_user_func_t* puf;
-
- puf = pars_info_lookup_user_func(info, name);
-
- if (!puf) {
- if (!info->funcs) {
- ib_alloc_t* heap_alloc;
-
- heap_alloc = ib_heap_allocator_create(info->heap);
-
- info->funcs = ib_vector_create(
- heap_alloc, sizeof(*puf), 8);
- }
-
- /* Create a "new" element */
- puf = static_cast<pars_user_func_t*>(
- ib_vector_push(info->funcs, NULL));
- puf->name = name;
- }
-
- puf->arg = arg;
- puf->func = func;
-}
-
-/********************************************************************
-Add bound id. */
-UNIV_INTERN
-void
-pars_info_bind_id(
-/*==============*/
- pars_info_t* info, /*!< in: info struct */
- ibool copy_name, /* in: copy name if TRUE */
- const char* name, /*!< in: name */
- const char* id) /*!< in: id */
-{
- pars_bound_id_t* bid;
-
- bid = pars_info_lookup_bound_id(info, name);
-
- if (!bid) {
-
- if (!info->bound_ids) {
- ib_alloc_t* heap_alloc;
-
- heap_alloc = ib_heap_allocator_create(info->heap);
-
- info->bound_ids = ib_vector_create(
- heap_alloc, sizeof(*bid), 8);
- }
-
- /* Create a "new" element */
- bid = static_cast<pars_bound_id_t*>(
- ib_vector_push(info->bound_ids, NULL));
-
- bid->name = (copy_name)
- ? mem_heap_strdup(info->heap, name) : name;
- }
-
- bid->id = id;
-}
-
-/********************************************************************
-Get bound identifier with the given name.*/
-
-pars_bound_id_t*
-pars_info_get_bound_id(
-/*===================*/
- /* out: bound id, or NULL if not
- found */
- pars_info_t* info, /* in: info struct */
- const char* name) /* in: bound id name to find */
-{
- return(pars_info_lookup_bound_id(info, name));
-}
-
-/****************************************************************//**
-Get bound literal with the given name.
-@return bound literal, or NULL if not found */
-UNIV_INTERN
-pars_bound_lit_t*
-pars_info_get_bound_lit(
-/*====================*/
- pars_info_t* info, /*!< in: info struct */
- const char* name) /*!< in: bound literal name to find */
-{
- return(pars_info_lookup_bound_lit(info, name));
-}
diff --git a/storage/xtradb/pars/pars0sym.cc b/storage/xtradb/pars/pars0sym.cc
deleted file mode 100644
index b01a69cb33a..00000000000
--- a/storage/xtradb/pars/pars0sym.cc
+++ /dev/null
@@ -1,440 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file pars/pars0sym.cc
-SQL parser symbol table
-
-Created 12/15/1997 Heikki Tuuri
-*******************************************************/
-
-#include "pars0sym.h"
-
-#ifdef UNIV_NONINL
-#include "pars0sym.ic"
-#endif
-
-#include "mem0mem.h"
-#include "data0type.h"
-#include "data0data.h"
-#include "pars0grm.h"
-#include "pars0pars.h"
-#include "que0que.h"
-#include "eval0eval.h"
-#include "row0sel.h"
-
-/******************************************************************//**
-Creates a symbol table for a single stored procedure or query.
-@return own: symbol table */
-UNIV_INTERN
-sym_tab_t*
-sym_tab_create(
-/*===========*/
- mem_heap_t* heap) /*!< in: memory heap where to create */
-{
- sym_tab_t* sym_tab;
-
- sym_tab = static_cast<sym_tab_t*>(
- mem_heap_alloc(heap, sizeof(sym_tab_t)));
-
- UT_LIST_INIT(sym_tab->sym_list);
- UT_LIST_INIT(sym_tab->func_node_list);
-
- sym_tab->heap = heap;
-
- return(sym_tab);
-}
-
-
-/******************************************************************//**
-Frees the memory allocated dynamically AFTER parsing phase for variables
-etc. in the symbol table. Does not free the mem heap where the table was
-originally created. Frees also SQL explicit cursor definitions. */
-UNIV_INTERN
-void
-sym_tab_free_private(
-/*=================*/
- sym_tab_t* sym_tab) /*!< in, own: symbol table */
-{
- sym_node_t* sym;
- func_node_t* func;
-
- ut_ad(mutex_own(&dict_sys->mutex));
-
- for (sym = UT_LIST_GET_FIRST(sym_tab->sym_list);
- sym != NULL;
- sym = UT_LIST_GET_NEXT(sym_list, sym)) {
-
- /* Close the tables opened in pars_retrieve_table_def(). */
-
- if (sym->token_type == SYM_TABLE_REF_COUNTED) {
-
- dict_table_close(sym->table, TRUE, FALSE);
-
- sym->table = NULL;
- sym->resolved = FALSE;
- sym->token_type = SYM_UNSET;
- }
-
- eval_node_free_val_buf(sym);
-
- if (sym->prefetch_buf) {
- sel_col_prefetch_buf_free(sym->prefetch_buf);
- }
-
- if (sym->cursor_def) {
- que_graph_free_recursive(sym->cursor_def);
- }
- }
-
- for (func = UT_LIST_GET_FIRST(sym_tab->func_node_list);
- func != NULL;
- func = UT_LIST_GET_NEXT(func_node_list, func)) {
-
- eval_node_free_val_buf(func);
- }
-}
-
-/******************************************************************//**
-Adds an integer literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_int_lit(
-/*================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- ulint val) /*!< in: integer value */
-{
- sym_node_t* node;
- byte* data;
-
- node = static_cast<sym_node_t*>(
- mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->table = NULL;
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- dtype_set(dfield_get_type(&node->common.val), DATA_INT, 0, 4);
-
- data = static_cast<byte*>(mem_heap_alloc(sym_tab->heap, 4));
- mach_write_to_4(data, val);
-
- dfield_set_data(&(node->common.val), data, 4);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->like_node = NULL;
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Adds a string literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_str_lit(
-/*================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const byte* str, /*!< in: string with no quotes around
- it */
- ulint len) /*!< in: string length */
-{
- sym_node_t* node;
- byte* data;
-
- node = static_cast<sym_node_t*>(
- mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->table = NULL;
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- dtype_set(dfield_get_type(&node->common.val),
- DATA_VARCHAR, DATA_ENGLISH, 0);
-
- data = (len) ? static_cast<byte*>(mem_heap_dup(sym_tab->heap, str, len))
- : NULL;
-
- dfield_set_data(&(node->common.val), data, len);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->like_node = NULL;
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Add a bound literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_bound_lit(
-/*==================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const char* name, /*!< in: name of bound literal */
- ulint* lit_type) /*!< out: type of literal (PARS_*_LIT) */
-{
- sym_node_t* node;
- pars_bound_lit_t* blit;
- ulint len = 0;
-
- blit = pars_info_get_bound_lit(sym_tab->info, name);
- ut_a(blit);
-
- node = static_cast<sym_node_t*>(
- mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)));
-
- node->common.type = QUE_NODE_SYMBOL;
- node->common.brother = node->common.parent = NULL;
-
- node->table = NULL;
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- switch (blit->type) {
- case DATA_FIXBINARY:
- len = blit->length;
- *lit_type = PARS_FIXBINARY_LIT;
- break;
-
- case DATA_BLOB:
- *lit_type = PARS_BLOB_LIT;
- break;
-
- case DATA_VARCHAR:
- *lit_type = PARS_STR_LIT;
- break;
-
- case DATA_CHAR:
- ut_a(blit->length > 0);
-
- len = blit->length;
- *lit_type = PARS_STR_LIT;
- break;
-
- case DATA_INT:
- ut_a(blit->length > 0);
- ut_a(blit->length <= 8);
-
- len = blit->length;
- *lit_type = PARS_INT_LIT;
- break;
-
- default:
- ut_error;
- }
-
- dtype_set(dfield_get_type(&node->common.val),
- blit->type, blit->prtype, len);
-
- dfield_set_data(&(node->common.val), blit->address, blit->length);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- blit->node = node;
- node->like_node = NULL;
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/**********************************************************************
-Rebind literal to a node in the symbol table. */
-
-sym_node_t*
-sym_tab_rebind_lit(
-/*===============*/
- /* out: symbol table node */
- sym_node_t* node, /* in: node that is bound to literal*/
- const void* address, /* in: pointer to data */
- ulint length) /* in: length of data */
-{
- dfield_t* dfield = que_node_get_val(node);
- dtype_t* dtype = dfield_get_type(dfield);
-
- ut_a(node->token_type == SYM_LIT);
-
- dfield_set_data(&node->common.val, address, length);
-
- if (node->like_node) {
-
- ut_a(dtype_get_mtype(dtype) == DATA_CHAR
- || dtype_get_mtype(dtype) == DATA_VARCHAR);
-
- /* Don't force [FALSE] creation of sub-nodes (for LIKE) */
- pars_like_rebind(
- node,static_cast<const byte*>(address), length);
- }
-
- /* FIXME: What's this ? */
- node->common.val_buf_size = 0;
-
- if (node->prefetch_buf) {
- sel_col_prefetch_buf_free(node->prefetch_buf);
- node->prefetch_buf = NULL;
- }
-
- if (node->cursor_def) {
- que_graph_free_recursive(node->cursor_def);
- node->cursor_def = NULL;
- }
-
- return(node);
-}
-
-/******************************************************************//**
-Adds an SQL null literal to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_null_lit(
-/*=================*/
- sym_tab_t* sym_tab) /*!< in: symbol table */
-{
- sym_node_t* node;
-
- node = static_cast<sym_node_t*>(
- mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->table = NULL;
- node->resolved = TRUE;
- node->token_type = SYM_LIT;
-
- node->indirection = NULL;
-
- dfield_get_type(&node->common.val)->mtype = DATA_ERROR;
-
- dfield_set_null(&node->common.val);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- node->like_node = NULL;
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Adds an identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_id(
-/*===========*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- byte* name, /*!< in: identifier name */
- ulint len) /*!< in: identifier length */
-{
- sym_node_t* node;
-
- node = static_cast<sym_node_t*>(
- mem_heap_zalloc(sym_tab->heap, sizeof(*node)));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->name = mem_heap_strdupl(sym_tab->heap, (char*) name, len);
- node->name_len = len;
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- dfield_set_null(&node->common.val);
-
- node->sym_table = sym_tab;
-
- return(node);
-}
-
-/******************************************************************//**
-Add a bound identifier to a symbol table.
-@return symbol table node */
-UNIV_INTERN
-sym_node_t*
-sym_tab_add_bound_id(
-/*=================*/
- sym_tab_t* sym_tab, /*!< in: symbol table */
- const char* name) /*!< in: name of bound id */
-{
- sym_node_t* node;
- pars_bound_id_t* bid;
-
- bid = pars_info_get_bound_id(sym_tab->info, name);
- ut_a(bid);
-
- node = static_cast<sym_node_t*>(
- mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)));
-
- node->common.type = QUE_NODE_SYMBOL;
-
- node->table = NULL;
- node->resolved = FALSE;
- node->token_type = SYM_UNSET;
- node->indirection = NULL;
-
- node->name = mem_heap_strdup(sym_tab->heap, bid->id);
- node->name_len = strlen(node->name);
-
- UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node);
-
- dfield_set_null(&node->common.val);
-
- node->common.val_buf_size = 0;
- node->prefetch_buf = NULL;
- node->cursor_def = NULL;
-
- node->like_node = NULL;
-
- node->sym_table = sym_tab;
-
- return(node);
-}
diff --git a/storage/xtradb/que/que0que.cc b/storage/xtradb/que/que0que.cc
deleted file mode 100644
index e2dc0239e13..00000000000
--- a/storage/xtradb/que/que0que.cc
+++ /dev/null
@@ -1,1308 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file que/que0que.cc
-Query graph
-
-Created 5/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "que0que.h"
-
-#ifdef UNIV_NONINL
-#include "que0que.ic"
-#endif
-
-#include "usr0sess.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "row0undo.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "row0sel.h"
-#include "row0purge.h"
-#include "dict0crea.h"
-#include "log0log.h"
-#include "eval0proc.h"
-#include "lock0lock.h"
-#include "eval0eval.h"
-#include "pars0types.h"
-
-#define QUE_MAX_LOOPS_WITHOUT_CHECK 16
-
-#ifdef UNIV_DEBUG
-/* If the following flag is set TRUE, the module will print trace info
-of SQL execution in the UNIV_SQL_DEBUG version */
-UNIV_INTERN ibool que_trace_on = FALSE;
-#endif /* UNIV_DEBUG */
-
-/* Short introduction to query graphs
- ==================================
-
-A query graph consists of nodes linked to each other in various ways. The
-execution starts at que_run_threads() which takes a que_thr_t parameter.
-que_thr_t contains two fields that control query graph execution: run_node
-and prev_node. run_node is the next node to execute and prev_node is the
-last node executed.
-
-Each node has a pointer to a 'next' statement, i.e., its brother, and a
-pointer to its parent node. The next pointer is NULL in the last statement
-of a block.
-
-Loop nodes contain a link to the first statement of the enclosed statement
-list. While the loop runs, que_thr_step() checks if execution to the loop
-node came from its parent or from one of the statement nodes in the loop. If
-it came from the parent of the loop node it starts executing the first
-statement node in the loop. If it came from one of the statement nodes in
-the loop, then it checks if the statement node has another statement node
-following it, and runs it if so.
-
-To signify loop ending, the loop statements (see e.g. while_step()) set
-que_thr_t->run_node to the loop node's parent node. This is noticed on the
-next call of que_thr_step() and execution proceeds to the node pointed to by
-the loop node's 'next' pointer.
-
-For example, the code:
-
-X := 1;
-WHILE X < 5 LOOP
- X := X + 1;
- X := X + 1;
-X := 5
-
-will result in the following node hierarchy, with the X-axis indicating
-'next' links and the Y-axis indicating parent/child links:
-
-A - W - A
- |
- |
- A - A
-
-A = assign_node_t, W = while_node_t. */
-
-/* How a stored procedure containing COMMIT or ROLLBACK commands
-is executed?
-
-The commit or rollback can be seen as a subprocedure call.
-
-When the transaction starts to handle a rollback or commit.
-It builds a query graph which, when executed, will roll back
-or commit the incomplete transaction. The transaction
-is moved to the TRX_QUE_ROLLING_BACK or TRX_QUE_COMMITTING state.
-If specified, the SQL cursors opened by the transaction are closed.
-When the execution of the graph completes, it is like returning
-from a subprocedure: the query thread which requested the operation
-starts running again. */
-
-/**********************************************************************//**
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction.
-***NOTE***: This is the only function in which such a transition is allowed
-to happen! */
-static
-void
-que_thr_move_to_run_state(
-/*======================*/
- que_thr_t* thr); /*!< in: an query thread */
-
-/***********************************************************************//**
-Creates a query graph fork node.
-@return own: fork node */
-UNIV_INTERN
-que_fork_t*
-que_fork_create(
-/*============*/
- que_t* graph, /*!< in: graph, if NULL then this
- fork node is assumed to be the
- graph root */
- que_node_t* parent, /*!< in: parent node */
- ulint fork_type, /*!< in: fork type */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- que_fork_t* fork;
-
- ut_ad(heap);
-
- fork = static_cast<que_fork_t*>(mem_heap_zalloc(heap, sizeof(*fork)));
-
- fork->heap = heap;
-
- fork->fork_type = fork_type;
-
- fork->common.parent = parent;
-
- fork->common.type = QUE_NODE_FORK;
-
- fork->state = QUE_FORK_COMMAND_WAIT;
-
- fork->graph = (graph != NULL) ? graph : fork;
-
- return(fork);
-}
-
-/***********************************************************************//**
-Creates a query graph thread node.
-@return own: query thread node */
-UNIV_INTERN
-que_thr_t*
-que_thr_create(
-/*===========*/
- que_fork_t* parent, /*!< in: parent node, i.e., a fork node */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- que_thr_t* thr;
-
- ut_ad(parent && heap);
-
- thr = static_cast<que_thr_t*>(mem_heap_zalloc(heap, sizeof(*thr)));
-
- thr->graph = parent->graph;
-
- thr->common.parent = parent;
-
- thr->magic_n = QUE_THR_MAGIC_N;
-
- thr->common.type = QUE_NODE_THR;
-
- thr->state = QUE_THR_COMMAND_WAIT;
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
-
- UT_LIST_ADD_LAST(thrs, parent->thrs, thr);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Moves a suspended query thread to the QUE_THR_RUNNING state and may release
-a worker thread to execute it. This function should be used to end
-the wait state of a query thread waiting for a lock or a stored procedure
-completion.
-@return the query thread that needs to be released. */
-UNIV_INTERN
-que_thr_t*
-que_thr_end_lock_wait(
-/*==================*/
- trx_t* trx) /*!< in: transaction with que_state in
- QUE_THR_LOCK_WAIT */
-{
- que_thr_t* thr;
- ibool was_active;
- ulint sec;
- ulint ms;
- ib_uint64_t now;
-
- ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(trx));
-
- thr = trx->lock.wait_thr;
-
- ut_ad(thr != NULL);
-
- ut_ad(trx->lock.que_state == TRX_QUE_LOCK_WAIT);
- /* In MySQL this is the only possible state here */
- ut_a(thr->state == QUE_THR_LOCK_WAIT);
-
- was_active = thr->is_active;
-
- que_thr_move_to_run_state(thr);
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- ut_usectime(&sec, &ms);
- now = (ib_uint64_t)sec * 1000000 + ms;
- trx->lock_que_wait_timer
- += (ulint)(now - trx->lock_que_wait_ustarted);
- }
-
- trx->lock.que_state = TRX_QUE_RUNNING;
-
- trx->lock.wait_thr = NULL;
-
- /* In MySQL we let the OS thread (not just the query thread) to wait
- for the lock to be released: */
-
- return((!was_active && thr != NULL) ? thr : NULL);
-}
-
-/**********************************************************************//**
-Inits a query thread for a command. */
-UNIV_INLINE
-void
-que_thr_init_command(
-/*=================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- que_thr_move_to_run_state(thr);
-}
-
-/**********************************************************************//**
-Round robin scheduler.
-@return a query thread of the graph moved to QUE_THR_RUNNING state, or
-NULL; the query thread should be executed by que_run_threads by the
-caller */
-UNIV_INTERN
-que_thr_t*
-que_fork_scheduler_round_robin(
-/*===========================*/
- que_fork_t* fork, /*!< in: a query fork */
- que_thr_t* thr) /*!< in: current pos */
-{
- trx_mutex_enter(fork->trx);
-
- /* If no current, start first available. */
- if (thr == NULL) {
- thr = UT_LIST_GET_FIRST(fork->thrs);
- } else {
- thr = UT_LIST_GET_NEXT(thrs, thr);
- }
-
- if (thr) {
-
- fork->state = QUE_FORK_ACTIVE;
-
- fork->last_sel_node = NULL;
-
- switch (thr->state) {
- case QUE_THR_COMMAND_WAIT:
- case QUE_THR_COMPLETED:
- ut_a(!thr->is_active);
- que_thr_init_command(thr);
- break;
-
- case QUE_THR_SUSPENDED:
- case QUE_THR_LOCK_WAIT:
- default:
- ut_error;
-
- }
- }
-
- trx_mutex_exit(fork->trx);
-
- return(thr);
-}
-
-/**********************************************************************//**
-Starts execution of a command in a query fork. Picks a query thread which
-is not in the QUE_THR_RUNNING state and moves it to that state. If none
-can be chosen, a situation which may arise in parallelized fetches, NULL
-is returned.
-@return a query thread of the graph moved to QUE_THR_RUNNING state, or
-NULL; the query thread should be executed by que_run_threads by the
-caller */
-UNIV_INTERN
-que_thr_t*
-que_fork_start_command(
-/*===================*/
- que_fork_t* fork) /*!< in: a query fork */
-{
- que_thr_t* thr;
- que_thr_t* suspended_thr = NULL;
- que_thr_t* completed_thr = NULL;
-
- fork->state = QUE_FORK_ACTIVE;
-
- fork->last_sel_node = NULL;
-
- suspended_thr = NULL;
- completed_thr = NULL;
-
- /* Choose the query thread to run: usually there is just one thread,
- but in a parallelized select, which necessarily is non-scrollable,
- there may be several to choose from */
-
- /* First we try to find a query thread in the QUE_THR_COMMAND_WAIT
- state. Then we try to find a query thread in the QUE_THR_SUSPENDED
- state, finally we try to find a query thread in the QUE_THR_COMPLETED
- state */
-
- /* We make a single pass over the thr list within which we note which
- threads are ready to run. */
- for (thr = UT_LIST_GET_FIRST(fork->thrs);
- thr != NULL;
- thr = UT_LIST_GET_NEXT(thrs, thr)) {
-
- switch (thr->state) {
- case QUE_THR_COMMAND_WAIT:
-
- /* We have to send the initial message to query thread
- to start it */
-
- que_thr_init_command(thr);
-
- return(thr);
-
- case QUE_THR_SUSPENDED:
- /* In this case the execution of the thread was
- suspended: no initial message is needed because
- execution can continue from where it was left */
- if (!suspended_thr) {
- suspended_thr = thr;
- }
-
- break;
-
- case QUE_THR_COMPLETED:
- if (!completed_thr) {
- completed_thr = thr;
- }
-
- break;
-
- case QUE_THR_LOCK_WAIT:
- ut_error;
-
- }
- }
-
- if (suspended_thr) {
-
- thr = suspended_thr;
- que_thr_move_to_run_state(thr);
-
- } else if (completed_thr) {
-
- thr = completed_thr;
- que_thr_init_command(thr);
- } else {
- ut_error;
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Calls que_graph_free_recursive for statements in a statement list. */
-static
-void
-que_graph_free_stat_list(
-/*=====================*/
- que_node_t* node) /*!< in: first query graph node in the list */
-{
- while (node) {
- que_graph_free_recursive(node);
-
- node = que_node_get_next(node);
- }
-}
-
-/**********************************************************************//**
-Frees a query graph, but not the heap where it was created. Does not free
-explicit cursor declarations, they are freed in que_graph_free. */
-UNIV_INTERN
-void
-que_graph_free_recursive(
-/*=====================*/
- que_node_t* node) /*!< in: query graph node */
-{
- que_fork_t* fork;
- que_thr_t* thr;
- undo_node_t* undo;
- sel_node_t* sel;
- ins_node_t* ins;
- upd_node_t* upd;
- tab_node_t* cre_tab;
- ind_node_t* cre_ind;
- purge_node_t* purge;
-
- if (node == NULL) {
-
- return;
- }
-
- switch (que_node_get_type(node)) {
-
- case QUE_NODE_FORK:
- fork = static_cast<que_fork_t*>(node);
-
- thr = UT_LIST_GET_FIRST(fork->thrs);
-
- while (thr) {
- que_graph_free_recursive(thr);
-
- thr = UT_LIST_GET_NEXT(thrs, thr);
- }
-
- break;
- case QUE_NODE_THR:
-
- thr = static_cast<que_thr_t*>(node);
-
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt;"
- " magic n %lu\n",
- (unsigned long) thr->magic_n);
- mem_analyze_corruption(thr);
- ut_error;
- }
-
- thr->magic_n = QUE_THR_MAGIC_FREED;
-
- que_graph_free_recursive(thr->child);
-
- break;
- case QUE_NODE_UNDO:
-
- undo = static_cast<undo_node_t*>(node);
-
- mem_heap_free(undo->heap);
-
- break;
- case QUE_NODE_SELECT:
-
- sel = static_cast<sel_node_t*>(node);
-
- sel_node_free_private(sel);
-
- break;
- case QUE_NODE_INSERT:
-
- ins = static_cast<ins_node_t*>(node);
-
- que_graph_free_recursive(ins->select);
-
- mem_heap_free(ins->entry_sys_heap);
-
- break;
- case QUE_NODE_PURGE:
- purge = static_cast<purge_node_t*>(node);
-
- mem_heap_free(purge->heap);
-
- break;
-
- case QUE_NODE_UPDATE:
-
- upd = static_cast<upd_node_t*>(node);
-
- if (upd->in_mysql_interface) {
-
- btr_pcur_free_for_mysql(upd->pcur);
- }
-
- que_graph_free_recursive(upd->cascade_node);
-
- if (upd->cascade_heap) {
- mem_heap_free(upd->cascade_heap);
- }
-
- que_graph_free_recursive(upd->select);
-
- mem_heap_free(upd->heap);
-
- break;
- case QUE_NODE_CREATE_TABLE:
- cre_tab = static_cast<tab_node_t*>(node);
-
- que_graph_free_recursive(cre_tab->tab_def);
- que_graph_free_recursive(cre_tab->col_def);
- que_graph_free_recursive(cre_tab->commit_node);
-
- mem_heap_free(cre_tab->heap);
-
- break;
- case QUE_NODE_CREATE_INDEX:
- cre_ind = static_cast<ind_node_t*>(node);
-
- que_graph_free_recursive(cre_ind->ind_def);
- que_graph_free_recursive(cre_ind->field_def);
- que_graph_free_recursive(cre_ind->commit_node);
-
- mem_heap_free(cre_ind->heap);
-
- break;
- case QUE_NODE_PROC:
- que_graph_free_stat_list(((proc_node_t*) node)->stat_list);
-
- break;
- case QUE_NODE_IF:
- que_graph_free_stat_list(((if_node_t*) node)->stat_list);
- que_graph_free_stat_list(((if_node_t*) node)->else_part);
- que_graph_free_stat_list(((if_node_t*) node)->elsif_list);
-
- break;
- case QUE_NODE_ELSIF:
- que_graph_free_stat_list(((elsif_node_t*) node)->stat_list);
-
- break;
- case QUE_NODE_WHILE:
- que_graph_free_stat_list(((while_node_t*) node)->stat_list);
-
- break;
- case QUE_NODE_FOR:
- que_graph_free_stat_list(((for_node_t*) node)->stat_list);
-
- break;
-
- case QUE_NODE_ASSIGNMENT:
- case QUE_NODE_EXIT:
- case QUE_NODE_RETURN:
- case QUE_NODE_COMMIT:
- case QUE_NODE_ROLLBACK:
- case QUE_NODE_LOCK:
- case QUE_NODE_FUNC:
- case QUE_NODE_ORDER:
- case QUE_NODE_ROW_PRINTF:
- case QUE_NODE_OPEN:
- case QUE_NODE_FETCH:
- /* No need to do anything */
-
- break;
- default:
- fprintf(stderr,
- "que_node struct appears corrupt; type %lu\n",
- (unsigned long) que_node_get_type(node));
- mem_analyze_corruption(node);
- ut_error;
- }
-}
-
-/**********************************************************************//**
-Frees a query graph. */
-UNIV_INTERN
-void
-que_graph_free(
-/*===========*/
- que_t* graph) /*!< in: query graph; we assume that the memory
- heap where this graph was created is private
- to this graph: if not, then use
- que_graph_free_recursive and free the heap
- afterwards! */
-{
- ut_ad(graph);
-
- if (graph->sym_tab) {
- /* The following call frees dynamic memory allocated
- for variables etc. during execution. Frees also explicit
- cursor definitions. */
-
- sym_tab_free_private(graph->sym_tab);
- }
-
- if (graph->info && graph->info->graph_owns_us) {
- pars_info_free(graph->info);
- }
-
- que_graph_free_recursive(graph);
-
- mem_heap_free(graph->heap);
-}
-
-/****************************************************************//**
-Performs an execution step on a thr node.
-@return query thread to run next, or NULL if none */
-static
-que_thr_t*
-que_thr_node_step(
-/*==============*/
- que_thr_t* thr) /*!< in: query thread where run_node must
- be the thread node itself */
-{
- ut_ad(thr->run_node == thr);
-
- if (thr->prev_node == thr->common.parent) {
- /* If control to the node came from above, it is just passed
- on */
-
- thr->run_node = thr->child;
-
- return(thr);
- }
-
- trx_mutex_enter(thr_get_trx(thr));
-
- if (que_thr_peek_stop(thr)) {
-
- trx_mutex_exit(thr_get_trx(thr));
-
- return(thr);
- }
-
- /* Thread execution completed */
-
- thr->state = QUE_THR_COMPLETED;
-
- trx_mutex_exit(thr_get_trx(thr));
-
- return(NULL);
-}
-
-/**********************************************************************//**
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction if thr was
-not active.
-***NOTE***: This and ..._mysql are the only functions in which such a
-transition is allowed to happen! */
-static
-void
-que_thr_move_to_run_state(
-/*======================*/
- que_thr_t* thr) /*!< in: an query thread */
-{
- ut_ad(thr->state != QUE_THR_RUNNING);
-
- if (!thr->is_active) {
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- thr->graph->n_active_thrs++;
-
- trx->lock.n_active_thrs++;
-
- thr->is_active = TRUE;
- }
-
- thr->state = QUE_THR_RUNNING;
-}
-
-/**********************************************************************//**
-Stops a query thread if graph or trx is in a state requiring it. The
-conditions are tested in the order (1) graph, (2) trx.
-@return TRUE if stopped */
-UNIV_INTERN
-ibool
-que_thr_stop(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- que_t* graph;
- trx_t* trx = thr_get_trx(thr);
-
- graph = thr->graph;
-
- ut_ad(trx_mutex_own(trx));
-
- if (graph->state == QUE_FORK_COMMAND_WAIT) {
-
- thr->state = QUE_THR_SUSPENDED;
-
- } else if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
- trx->lock.wait_thr = thr;
- thr->state = QUE_THR_LOCK_WAIT;
-
- } else if (trx->error_state != DB_SUCCESS
- && trx->error_state != DB_LOCK_WAIT) {
-
- /* Error handling built for the MySQL interface */
- thr->state = QUE_THR_COMPLETED;
-
- } else if (graph->fork_type == QUE_FORK_ROLLBACK) {
-
- thr->state = QUE_THR_SUSPENDED;
- } else {
- ut_ad(graph->state == QUE_FORK_ACTIVE);
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Decrements the query thread reference counts in the query graph and the
-transaction.
-*** NOTE ***:
-This and que_thr_stop_for_mysql are the only functions where the reference
-count can be decremented and this function may only be called from inside
-que_run_threads! These restrictions exist to make the rollback code easier
-to maintain. */
-static
-void
-que_thr_dec_refer_count(
-/*====================*/
- que_thr_t* thr, /*!< in: query thread */
- que_thr_t** next_thr) /*!< in/out: next query thread to run;
- if the value which is passed in is
- a pointer to a NULL pointer, then the
- calling function can start running
- a new query thread */
-{
- trx_t* trx;
- que_fork_t* fork;
-
- trx = thr_get_trx(thr);
-
- ut_a(thr->is_active);
- ut_ad(trx_mutex_own(trx));
-
- if (thr->state == QUE_THR_RUNNING) {
-
- if (!que_thr_stop(thr)) {
-
- ut_a(next_thr != NULL && *next_thr == NULL);
-
- /* The reason for the thr suspension or wait was
- already canceled before we came here: continue
- running the thread.
-
- This is also possible because in trx_commit_step() we
- assume a single query thread. We set the query thread
- state to QUE_THR_RUNNING. */
-
- /* fprintf(stderr,
- "Wait already ended: trx: %p\n", trx); */
-
- /* Normally srv_suspend_mysql_thread resets
- the state to DB_SUCCESS before waiting, but
- in this case we have to do it here,
- otherwise nobody does it. */
-
- trx->error_state = DB_SUCCESS;
-
- *next_thr = thr;
-
- return;
- }
- }
-
- fork = static_cast<que_fork_t*>(thr->common.parent);
-
- --trx->lock.n_active_thrs;
-
- --fork->n_active_thrs;
-
- thr->is_active = FALSE;
-}
-
-/**********************************************************************//**
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The
-query thread is stopped and made inactive, except in the case where
-it was put to the lock wait state in lock0lock.cc, but the lock has already
-been granted or the transaction chosen as a victim in deadlock resolution. */
-UNIV_INTERN
-void
-que_thr_stop_for_mysql(
-/*===================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- /* Can't be the purge transaction. */
- ut_a(trx->id != 0);
-
- trx_mutex_enter(trx);
-
- if (thr->state == QUE_THR_RUNNING) {
-
- if (trx->error_state != DB_SUCCESS
- && trx->error_state != DB_LOCK_WAIT) {
-
- /* Error handling built for the MySQL interface */
- thr->state = QUE_THR_COMPLETED;
- } else {
- /* It must have been a lock wait but the lock was
- already released, or this transaction was chosen
- as a victim in selective deadlock resolution */
-
- trx_mutex_exit(trx);
-
- return;
- }
- }
-
- ut_ad(thr->is_active == TRUE);
- ut_ad(trx->lock.n_active_thrs == 1);
- ut_ad(thr->graph->n_active_thrs == 1);
-
- thr->is_active = FALSE;
- thr->graph->n_active_thrs--;
-
- trx->lock.n_active_thrs--;
-
- trx_mutex_exit(trx);
-}
-
-/**********************************************************************//**
-Moves a thread from another state to the QUE_THR_RUNNING state. Increments
-the n_active_thrs counters of the query graph and transaction if thr was
-not active. */
-UNIV_INTERN
-void
-que_thr_move_to_run_state_for_mysql(
-/*================================*/
- que_thr_t* thr, /*!< in: an query thread */
- trx_t* trx) /*!< in: transaction */
-{
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt; magic n %lu\n",
- (unsigned long) thr->magic_n);
-
- mem_analyze_corruption(thr);
-
- ut_error;
- }
-
- if (!thr->is_active) {
-
- thr->graph->n_active_thrs++;
-
- trx->lock.n_active_thrs++;
-
- thr->is_active = TRUE;
- }
-
- thr->state = QUE_THR_RUNNING;
-}
-
-/**********************************************************************//**
-A patch for MySQL used to 'stop' a dummy query thread used in MySQL
-select, when there is no error or lock wait. */
-UNIV_INTERN
-void
-que_thr_stop_for_mysql_no_error(
-/*============================*/
- que_thr_t* thr, /*!< in: query thread */
- trx_t* trx) /*!< in: transaction */
-{
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_ad(thr_get_trx(thr)->id != 0);
- ut_ad(thr->is_active == TRUE);
- ut_ad(trx->lock.n_active_thrs == 1);
- ut_ad(thr->graph->n_active_thrs == 1);
-
- if (thr->magic_n != QUE_THR_MAGIC_N) {
- fprintf(stderr,
- "que_thr struct appears corrupt; magic n %lu\n",
- (unsigned long) thr->magic_n);
-
- mem_analyze_corruption(thr);
-
- ut_error;
- }
-
- thr->state = QUE_THR_COMPLETED;
-
- thr->is_active = FALSE;
- thr->graph->n_active_thrs--;
-
- trx->lock.n_active_thrs--;
-}
-
-/****************************************************************//**
-Get the first containing loop node (e.g. while_node_t or for_node_t) for the
-given node, or NULL if the node is not within a loop.
-@return containing loop node, or NULL. */
-UNIV_INTERN
-que_node_t*
-que_node_get_containing_loop_node(
-/*==============================*/
- que_node_t* node) /*!< in: node */
-{
- ut_ad(node);
-
- for (;;) {
- ulint type;
-
- node = que_node_get_parent(node);
-
- if (!node) {
- break;
- }
-
- type = que_node_get_type(node);
-
- if ((type == QUE_NODE_FOR) || (type == QUE_NODE_WHILE)) {
- break;
- }
- }
-
- return(node);
-}
-
-/**********************************************************************//**
-Prints info of an SQL query graph node. */
-UNIV_INTERN
-void
-que_node_print_info(
-/*================*/
- que_node_t* node) /*!< in: query graph node */
-{
- ulint type;
- const char* str;
-
- type = que_node_get_type(node);
-
- if (type == QUE_NODE_SELECT) {
- str = "SELECT";
- } else if (type == QUE_NODE_INSERT) {
- str = "INSERT";
- } else if (type == QUE_NODE_UPDATE) {
- str = "UPDATE";
- } else if (type == QUE_NODE_WHILE) {
- str = "WHILE";
- } else if (type == QUE_NODE_ASSIGNMENT) {
- str = "ASSIGNMENT";
- } else if (type == QUE_NODE_IF) {
- str = "IF";
- } else if (type == QUE_NODE_FETCH) {
- str = "FETCH";
- } else if (type == QUE_NODE_OPEN) {
- str = "OPEN";
- } else if (type == QUE_NODE_PROC) {
- str = "STORED PROCEDURE";
- } else if (type == QUE_NODE_FUNC) {
- str = "FUNCTION";
- } else if (type == QUE_NODE_LOCK) {
- str = "LOCK";
- } else if (type == QUE_NODE_THR) {
- str = "QUERY THREAD";
- } else if (type == QUE_NODE_COMMIT) {
- str = "COMMIT";
- } else if (type == QUE_NODE_UNDO) {
- str = "UNDO ROW";
- } else if (type == QUE_NODE_PURGE) {
- str = "PURGE ROW";
- } else if (type == QUE_NODE_ROLLBACK) {
- str = "ROLLBACK";
- } else if (type == QUE_NODE_CREATE_TABLE) {
- str = "CREATE TABLE";
- } else if (type == QUE_NODE_CREATE_INDEX) {
- str = "CREATE INDEX";
- } else if (type == QUE_NODE_FOR) {
- str = "FOR LOOP";
- } else if (type == QUE_NODE_RETURN) {
- str = "RETURN";
- } else if (type == QUE_NODE_EXIT) {
- str = "EXIT";
- } else {
- str = "UNKNOWN NODE TYPE";
- }
-
- fprintf(stderr, "Node type %lu: %s, address %p\n",
- (ulong) type, str, (void*) node);
-}
-
-/**********************************************************************//**
-Performs an execution step on a query thread.
-@return query thread to run next: it may differ from the input
-parameter if, e.g., a subprocedure call is made */
-UNIV_INLINE
-que_thr_t*
-que_thr_step(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- que_node_t* node;
- que_thr_t* old_thr;
- trx_t* trx;
- ulint type;
-
- trx = thr_get_trx(thr);
-
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_a(trx->error_state == DB_SUCCESS);
-
- thr->resource++;
-
- node = thr->run_node;
- type = que_node_get_type(node);
-
- old_thr = thr;
-
-#ifdef UNIV_DEBUG
- if (que_trace_on) {
- fputs("To execute: ", stderr);
- que_node_print_info(node);
- }
-#endif
- if (type & QUE_NODE_CONTROL_STAT) {
- if ((thr->prev_node != que_node_get_parent(node))
- && que_node_get_next(thr->prev_node)) {
-
- /* The control statements, like WHILE, always pass the
- control to the next child statement if there is any
- child left */
-
- thr->run_node = que_node_get_next(thr->prev_node);
-
- } else if (type == QUE_NODE_IF) {
- if_step(thr);
- } else if (type == QUE_NODE_FOR) {
- for_step(thr);
- } else if (type == QUE_NODE_PROC) {
-
- /* We can access trx->undo_no without reserving
- trx->undo_mutex, because there cannot be active query
- threads doing updating or inserting at the moment! */
-
- if (thr->prev_node == que_node_get_parent(node)) {
- trx->last_sql_stat_start.least_undo_no
- = trx->undo_no;
- }
-
- proc_step(thr);
- } else if (type == QUE_NODE_WHILE) {
- while_step(thr);
- } else {
- ut_error;
- }
- } else if (type == QUE_NODE_ASSIGNMENT) {
- assign_step(thr);
- } else if (type == QUE_NODE_SELECT) {
- thr = row_sel_step(thr);
- } else if (type == QUE_NODE_INSERT) {
- thr = row_ins_step(thr);
- } else if (type == QUE_NODE_UPDATE) {
- thr = row_upd_step(thr);
- } else if (type == QUE_NODE_FETCH) {
- thr = fetch_step(thr);
- } else if (type == QUE_NODE_OPEN) {
- thr = open_step(thr);
- } else if (type == QUE_NODE_FUNC) {
- proc_eval_step(thr);
-
- } else if (type == QUE_NODE_LOCK) {
-
- ut_error;
- } else if (type == QUE_NODE_THR) {
- thr = que_thr_node_step(thr);
- } else if (type == QUE_NODE_COMMIT) {
- thr = trx_commit_step(thr);
- } else if (type == QUE_NODE_UNDO) {
- thr = row_undo_step(thr);
- } else if (type == QUE_NODE_PURGE) {
- thr = row_purge_step(thr);
- } else if (type == QUE_NODE_RETURN) {
- thr = return_step(thr);
- } else if (type == QUE_NODE_EXIT) {
- thr = exit_step(thr);
- } else if (type == QUE_NODE_ROLLBACK) {
- thr = trx_rollback_step(thr);
- } else if (type == QUE_NODE_CREATE_TABLE) {
- thr = dict_create_table_step(thr);
- } else if (type == QUE_NODE_CREATE_INDEX) {
- thr = dict_create_index_step(thr);
- } else if (type == QUE_NODE_ROW_PRINTF) {
- thr = row_printf_step(thr);
- } else {
- ut_error;
- }
-
- if (type == QUE_NODE_EXIT) {
- old_thr->prev_node = que_node_get_containing_loop_node(node);
- } else {
- old_thr->prev_node = node;
- }
-
- if (thr) {
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Run a query thread until it finishes or encounters e.g. a lock wait. */
-static
-void
-que_run_threads_low(
-/*================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- que_thr_t* next_thr;
-
- ut_ad(thr->state == QUE_THR_RUNNING);
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
- ut_ad(!trx_mutex_own(thr_get_trx(thr)));
-
- /* cumul_resource counts how much resources the OS thread (NOT the
- query thread) has spent in this function */
-
- trx = thr_get_trx(thr);
-
- do {
- /* Check that there is enough space in the log to accommodate
- possible log entries by this query step; if the operation can
- touch more than about 4 pages, checks must be made also within
- the query step! */
-
- log_free_check();
-
- /* Perform the actual query step: note that the query thread
- may change if, e.g., a subprocedure call is made */
-
- /*-------------------------*/
- next_thr = que_thr_step(thr);
- /*-------------------------*/
-
- trx_mutex_enter(trx);
-
- ut_a(next_thr == NULL || trx->error_state == DB_SUCCESS);
-
- if (next_thr != thr) {
- ut_a(next_thr == NULL);
-
- /* This can change next_thr to a non-NULL value
- if there was a lock wait that already completed. */
-
- que_thr_dec_refer_count(thr, &next_thr);
-
- if (next_thr != NULL) {
-
- thr = next_thr;
- }
- }
-
- ut_ad(trx == thr_get_trx(thr));
-
- trx_mutex_exit(trx);
-
- } while (next_thr != NULL);
-}
-
-/**********************************************************************//**
-Run a query thread. Handles lock waits. */
-UNIV_INTERN
-void
-que_run_threads(
-/*============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad(!trx_mutex_own(thr_get_trx(thr)));
-
-loop:
- ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS);
-
- que_run_threads_low(thr);
-
- switch (thr->state) {
-
- case QUE_THR_RUNNING:
- /* There probably was a lock wait, but it already ended
- before we came here: continue running thr */
-
- goto loop;
-
- case QUE_THR_LOCK_WAIT:
- lock_wait_suspend_thread(thr);
-
- trx_mutex_enter(thr_get_trx(thr));
-
- ut_a(thr_get_trx(thr)->id != 0);
-
- if (thr_get_trx(thr)->error_state != DB_SUCCESS) {
- /* thr was chosen as a deadlock victim or there was
- a lock wait timeout */
-
- que_thr_dec_refer_count(thr, NULL);
- trx_mutex_exit(thr_get_trx(thr));
- break;
- }
-
- trx_mutex_exit(thr_get_trx(thr));
- goto loop;
-
- case QUE_THR_COMPLETED:
- case QUE_THR_COMMAND_WAIT:
- /* Do nothing */
- break;
-
- default:
- ut_error;
- }
-}
-
-/*********************************************************************//**
-Evaluate the given SQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-que_eval_sql(
-/*=========*/
- pars_info_t* info, /*!< in: info struct, or NULL */
- const char* sql, /*!< in: SQL string */
- ibool reserve_dict_mutex,
- /*!< in: if TRUE, acquire/release
- dict_sys->mutex around call to pars_sql. */
- trx_t* trx) /*!< in: trx */
-{
- que_thr_t* thr;
- que_t* graph;
-
- ut_a(trx->error_state == DB_SUCCESS);
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- /* fake_changes should not access to system tables */
- fprintf(stderr, "InnoDB: ERROR: innodb_fake_changes tried to access to system tables.\n");
- return(DB_ERROR);
- }
-
- if (reserve_dict_mutex) {
- mutex_enter(&dict_sys->mutex);
- }
-
- graph = pars_sql(info, sql);
-
- if (reserve_dict_mutex) {
- mutex_exit(&dict_sys->mutex);
- }
-
- ut_a(graph);
-
- graph->trx = trx;
- trx->graph = NULL;
-
- graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
-
- ut_a(thr = que_fork_start_command(graph));
-
- que_run_threads(thr);
-
- if (reserve_dict_mutex) {
- mutex_enter(&dict_sys->mutex);
- }
-
- que_graph_free(graph);
-
- if (reserve_dict_mutex) {
- mutex_exit(&dict_sys->mutex);
- }
-
- return(trx->error_state);
-}
-
-/*********************************************************************//**
-Initialise the query sub-system. */
-UNIV_INTERN
-void
-que_init(void)
-/*==========*/
-{
- /* No op */
-}
-
-/*********************************************************************//**
-Close the query sub-system. */
-UNIV_INTERN
-void
-que_close(void)
-/*===========*/
-{
- /* No op */
-}
diff --git a/storage/xtradb/read/read0read.cc b/storage/xtradb/read/read0read.cc
deleted file mode 100644
index c350e24dbb0..00000000000
--- a/storage/xtradb/read/read0read.cc
+++ /dev/null
@@ -1,691 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file read/read0read.cc
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#include "read0read.h"
-#include "read0i_s.h"
-
-#ifdef UNIV_NONINL
-#include "read0read.ic"
-#endif
-
-#include "srv0srv.h"
-#include "trx0sys.h"
-
-/*
--------------------------------------------------------------------------------
-FACT A: Cursor read view on a secondary index sees only committed versions
--------
-of the records in the secondary index or those versions of rows created
-by transaction which created a cursor before cursor was created even
-if transaction which created the cursor has changed that clustered index page.
-
-PROOF: We must show that read goes always to the clustered index record
-to see that record is visible in the cursor read view. Consider e.g.
-following table and SQL-clauses:
-
-create table t1(a int not null, b int, primary key(a), index(b));
-insert into t1 values (1,1),(2,2);
-commit;
-
-Now consider that we have a cursor for a query
-
-select b from t1 where b >= 1;
-
-This query will use secondary key on the table t1. Now after the first fetch
-on this cursor if we do a update:
-
-update t1 set b = 5 where b = 2;
-
-Now second fetch of the cursor should not see record (2,5) instead it should
-see record (2,2).
-
-We also should show that if we have delete t1 where b = 5; we still
-can see record (2,2).
-
-When we access a secondary key record maximum transaction id is fetched
-from this record and this trx_id is compared to up_limit_id in the view.
-If trx_id in the record is greater or equal than up_limit_id in the view
-cluster record is accessed. Because trx_id of the creating
-transaction is stored when this view was created to the list of
-trx_ids not seen by this read view previous version of the
-record is requested to be built. This is build using clustered record.
-If the secondary key record is delete-marked, its corresponding
-clustered record can be already be purged only if records
-trx_id < low_limit_no. Purge can't remove any record deleted by a
-transaction which was active when cursor was created. But, we still
-may have a deleted secondary key record but no clustered record. But,
-this is not a problem because this case is handled in
-row_sel_get_clust_rec() function which is called
-whenever we note that this read view does not see trx_id in the
-record. Thus, we see correct version. Q. E. D.
-
--------------------------------------------------------------------------------
-FACT B: Cursor read view on a clustered index sees only committed versions
--------
-of the records in the clustered index or those versions of rows created
-by transaction which created a cursor before cursor was created even
-if transaction which created the cursor has changed that clustered index page.
-
-PROOF: Consider e.g.following table and SQL-clauses:
-
-create table t1(a int not null, b int, primary key(a));
-insert into t1 values (1),(2);
-commit;
-
-Now consider that we have a cursor for a query
-
-select a from t1 where a >= 1;
-
-This query will use clustered key on the table t1. Now after the first fetch
-on this cursor if we do a update:
-
-update t1 set a = 5 where a = 2;
-
-Now second fetch of the cursor should not see record (5) instead it should
-see record (2).
-
-We also should show that if we have execute delete t1 where a = 5; after
-the cursor is opened we still can see record (2).
-
-When accessing clustered record we always check if this read view sees
-trx_id stored to clustered record. By default we don't see any changes
-if record trx_id >= low_limit_id i.e. change was made transaction
-which started after transaction which created the cursor. If row
-was changed by the future transaction a previous version of the
-clustered record is created. Thus we see only committed version in
-this case. We see all changes made by committed transactions i.e.
-record trx_id < up_limit_id. In this case we don't need to do anything,
-we already see correct version of the record. We don't see any changes
-made by active transaction except creating transaction. We have stored
-trx_id of creating transaction to list of trx_ids when this view was
-created. Thus we can easily see if this record was changed by the
-creating transaction. Because we already have clustered record we can
-access roll_ptr. Using this roll_ptr we can fetch undo record.
-We can now check that undo_no of the undo record is less than undo_no of the
-trancaction which created a view when cursor was created. We see this
-clustered record only in case when record undo_no is less than undo_no
-in the view. If this is not true we build based on undo_rec previous
-version of the record. This record is found because purge can't remove
-records accessed by active transaction. Thus we see correct version. Q. E. D.
--------------------------------------------------------------------------------
-FACT C: Purge does not remove any delete-marked row that is visible
--------
-in any cursor read view.
-
-PROOF: We know that:
- 1: Currently active read views in trx_sys_t::view_list are ordered by
- read_view_t::low_limit_no in descending order, that is,
- newest read view first.
-
- 2: Purge clones the oldest read view and uses that to determine whether there
- are any active transactions that can see the to be purged records.
-
-Therefore any joining or active transaction will not have a view older
-than the purge view, according to 1.
-
-When purge needs to remove a delete-marked row from a secondary index,
-it will first check that the DB_TRX_ID value of the corresponding
-record in the clustered index is older than the purge view. It will
-also check if there is a newer version of the row (clustered index
-record) that is not delete-marked in the secondary index. If such a
-row exists and is collation-equal to the delete-marked secondary index
-record then purge will not remove the secondary index record.
-
-Delete-marked clustered index records will be removed by
-row_purge_remove_clust_if_poss(), unless the clustered index record
-(and its DB_ROLL_PTR) has been updated. Every new version of the
-clustered index record will update DB_ROLL_PTR, pointing to a new UNDO
-log entry that allows the old version to be reconstructed. The
-DB_ROLL_PTR in the oldest remaining version in the old-version chain
-may be pointing to garbage (an undo log record discarded by purge),
-but it will never be dereferenced, because the purge view is older
-than any active transaction.
-
-For details see: row_vers_old_has_index_entry() and row_purge_poss_sec()
-
-Some additional issues:
-
-What if trx_sys->view_list == NULL and some transaction T1 and Purge both
-try to open read_view at same time. Only one can acquire trx_sys->mutex.
-In which order will the views be opened? Should it matter? If no, why?
-
-The order does not matter. No new transactions can be created and no running
-transaction can commit or rollback (or free views).
-*/
-
-/*********************************************************************//**
-Creates a read view object.
-@return own: read view struct */
-UNIV_INLINE
-read_view_t*
-read_view_create_low(
-/*=================*/
- ulint n, /*!< in: number of cells in the trx_ids array */
- read_view_t*& view) /*!< in,out: pre-allocated view array or NULL if
- a new one needs to be created */
-{
- if (view == NULL) {
- view = static_cast<read_view_t*>(
- ut_malloc(sizeof(read_view_t)));
- os_atomic_increment_ulint(&srv_read_views_memory,
- sizeof(read_view_t));
- view->max_descr = 0;
- view->descriptors = NULL;
- }
-
- if (UNIV_UNLIKELY(view->max_descr < n)) {
-
- /* avoid frequent re-allocations by extending the array to the
- desired size + 10% */
-
- os_atomic_increment_ulint(&srv_read_views_memory,
- (n + n / 10 - view->max_descr) *
- sizeof(trx_id_t));
- view->max_descr = n + n / 10;
- view->descriptors = static_cast<trx_id_t*>(
- ut_realloc(view->descriptors,
- view->max_descr *
- sizeof *view->descriptors));
- }
-
- view->n_descr = n;
-
- return(view);
-}
-
-/*********************************************************************//**
-Clones a read view object. This function will allocate space for two read
-views contiguously, one identical in size and content as @param view (starting
-at returned pointer) and another view immediately following the trx_ids array.
-The second view will have space for an extra trx_id_t element.
-@return read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_clone(
-/*============*/
- const read_view_t* view, /*!< in: view to clone */
- read_view_t*& prebuilt_clone) /*!< in,out: prebuilt view or
- NULL */
-{
- read_view_t* clone;
- trx_id_t* old_descriptors;
- ulint old_max_descr;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- clone = read_view_create_low(view->n_descr, prebuilt_clone);
-
- old_descriptors = clone->descriptors;
- old_max_descr = clone->max_descr;
-
- memcpy(clone, view, sizeof(*view));
-
- clone->descriptors = old_descriptors;
- clone->max_descr = old_max_descr;
-
- if (view->n_descr) {
- memcpy(clone->descriptors, view->descriptors,
- view->n_descr * sizeof(trx_id_t));
- }
-
- return(clone);
-}
-
-/*********************************************************************//**
-Insert the view in the proper order into the trx_sys->view_list. The
-read view list is ordered by read_view_t::low_limit_no in descending order. */
-UNIV_INTERN
-void
-read_view_add(
-/*==========*/
- read_view_t* view) /*!< in: view to add to */
-{
- read_view_t* elem;
- read_view_t* prev_elem;
-
- ut_ad(mutex_own(&trx_sys->mutex));
- ut_ad(read_view_validate(view));
-
- /* Find the correct slot for insertion. */
- for (elem = UT_LIST_GET_FIRST(trx_sys->view_list), prev_elem = NULL;
- elem != NULL && view->low_limit_no < elem->low_limit_no;
- prev_elem = elem, elem = UT_LIST_GET_NEXT(view_list, elem)) {
- /* No op */
- }
-
- if (prev_elem == NULL) {
- UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view);
- } else {
- UT_LIST_INSERT_AFTER(
- view_list, trx_sys->view_list, prev_elem, view);
- }
-
- ut_ad(read_view_list_validate());
-}
-
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return own: read view struct */
-static
-read_view_t*
-read_view_open_now_low(
-/*===================*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or 0 used in purge */
- read_view_t*& view) /*!< in,out: pre-allocated view array or
- NULL if a new one needs to be created */
-{
- trx_id_t* descr;
- ulint i;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- view = read_view_create_low(trx_sys->descr_n_used, view);
-
- view->undo_no = 0;
- view->type = VIEW_NORMAL;
- view->creator_trx_id = cr_trx_id;
-
- /* No future transactions should be visible in the view */
-
- view->low_limit_no = trx_sys->max_trx_id;
- view->low_limit_id = view->low_limit_no;
-
- descr = trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- cr_trx_id);
- if (UNIV_LIKELY(descr != NULL)) {
- ut_ad(trx_sys->descr_n_used > 0);
- ut_ad(view->n_descr > 0);
-
- view->n_descr--;
-
- i = descr - trx_sys->descriptors;
- } else {
- i = trx_sys->descr_n_used;
- }
-
- if (UNIV_LIKELY(i > 0)) {
- /* Copy the [0; i-1] range */
- memcpy(view->descriptors, trx_sys->descriptors,
- i * sizeof(trx_id_t));
- }
-
- if (UNIV_UNLIKELY(i + 1 < trx_sys->descr_n_used)) {
- /* Copy the [i+1; descr_n_used-1] range */
- memcpy(view->descriptors + i,
- trx_sys->descriptors + i + 1,
- (trx_sys->descr_n_used - i - 1) *
- sizeof(trx_id_t));
- }
-
- /* NOTE that a transaction whose trx number is < trx_sys->max_trx_id can
- still be active, if it is in the middle of its commit! Note that when a
- transaction starts, we initialize trx->no to TRX_ID_MAX. */
-
- if (UT_LIST_GET_LEN(trx_sys->trx_serial_list) > 0) {
-
- trx_id_t trx_no;
-
- trx_no = UT_LIST_GET_FIRST(trx_sys->trx_serial_list)->no;
-
- if (trx_no < view->low_limit_no) {
- view->low_limit_no = trx_no;
- }
- }
-
- if (UNIV_LIKELY(view->n_descr > 0)) {
- /* The last active transaction has the smallest id: */
- view->up_limit_id = view->descriptors[0];
- } else {
- view->up_limit_id = view->low_limit_id;
- }
-
- /* Purge views are not added to the view list. */
- if (cr_trx_id > 0) {
- read_view_add(view);
- }
-
- return(view);
-}
-
-/*********************************************************************//**
-Opens a read view where exactly the transactions serialized before this
-point in time are seen in the view.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_open_now(
-/*===============*/
- trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or 0 used in purge */
- read_view_t*& view) /*!< in,out: pre-allocated view array or
- NULL if a new one needs to be created */
-{
- mutex_enter(&trx_sys->mutex);
-
- view = read_view_open_now_low(cr_trx_id, view);
-
- mutex_exit(&trx_sys->mutex);
-
- return(view);
-}
-
-/*********************************************************************//**
-Makes a copy of the oldest existing read view, with the exception that also
-the creating trx of the oldest view is set as not visible in the 'copied'
-view. Opens a new view if no views currently exist. The view must be closed
-with ..._close. This is used in purge.
-@return own: read view struct */
-UNIV_INTERN
-read_view_t*
-read_view_purge_open(
-/*=================*/
- read_view_t*& prebuilt_clone, /*!< in,out: pre-allocated view that
- will be used to clone the oldest view if
- exists */
- read_view_t*& prebuilt_view) /*!< in,out: pre-allocated view array or
- NULL if a new one needs to be created */
-{
- ulint i;
- read_view_t* view;
- read_view_t* oldest_view;
- trx_id_t creator_trx_id;
- ulint insert_done = 0;
-
- mutex_enter(&trx_sys->mutex);
-
- oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
-
- if (oldest_view == NULL) {
-
- view = read_view_open_now_low(0, prebuilt_view);
-
- mutex_exit(&trx_sys->mutex);
-
- return(view);
- }
-
- /* Clone the oldest view to a pre-allocated clone view */
-
- oldest_view = read_view_clone(oldest_view, prebuilt_clone);
-
- ut_ad(read_view_validate(oldest_view));
-
- mutex_exit(&trx_sys->mutex);
-
- ut_a(oldest_view->creator_trx_id > 0);
- creator_trx_id = oldest_view->creator_trx_id;
-
- view = read_view_create_low(oldest_view->n_descr + 1, prebuilt_view);
-
- /* Add the creator transaction id in the trx_ids array in the
- correct slot. */
-
- for (i = 0; i < oldest_view->n_descr; ++i) {
- trx_id_t id;
-
- id = oldest_view->descriptors[i - insert_done];
-
- if (insert_done == 0 && creator_trx_id < id) {
- id = creator_trx_id;
- insert_done = 1;
- }
-
- view->descriptors[i] = id;
- }
-
- if (insert_done == 0) {
- view->descriptors[i] = creator_trx_id;
- } else {
- ut_a(i > 0);
- view->descriptors[i] = oldest_view->descriptors[i - 1];
- }
-
- view->creator_trx_id = 0;
-
- view->low_limit_no = oldest_view->low_limit_no;
- view->low_limit_id = oldest_view->low_limit_id;
-
- if (view->n_descr > 0) {
- /* The last active transaction has the smallest id: */
-
- view->up_limit_id = view->descriptors[0];
- } else {
- view->up_limit_id = oldest_view->up_limit_id;
- }
-
- return(view);
-}
-
-/*********************************************************************//**
-Closes a consistent read view for MySQL. This function is called at an SQL
-statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */
-UNIV_INTERN
-void
-read_view_close_for_mysql(
-/*======================*/
- trx_t* trx) /*!< in: trx which has a read view */
-{
- ut_a(trx->global_read_view);
-
- read_view_remove(trx->global_read_view, false);
-
- trx->read_view = NULL;
- trx->global_read_view = NULL;
-}
-
-/*********************************************************************//**
-Prints a read view to file. */
-UNIV_INTERN
-void
-read_view_print(
-/*============*/
- FILE* file, /*!< in: file to print to */
- const read_view_t* view) /*!< in: read view */
-{
- ulint n_ids;
- ulint i;
-
- if (view->type == VIEW_HIGH_GRANULARITY) {
- fprintf(file,
- "High-granularity read view undo_n:o " TRX_ID_FMT "\n",
- view->undo_no);
- } else {
- fprintf(file, "Normal read view\n");
- }
-
- fprintf(file, "Read view low limit trx n:o " TRX_ID_FMT "\n",
- view->low_limit_no);
-
- fprintf(file, "Read view up limit trx id " TRX_ID_FMT "\n",
- view->up_limit_id);
-
- fprintf(file, "Read view low limit trx id " TRX_ID_FMT "\n",
- view->low_limit_id);
-
- fprintf(file, "Read view individually stored trx ids:\n");
-
- n_ids = view->n_descr;
-
- for (i = 0; i < n_ids; i++) {
- fprintf(file, "Read view trx id " TRX_ID_FMT "\n",
- view->descriptors[i]);
- }
-}
-
-UNIV_INTERN
-i_s_xtradb_read_view_t*
-read_fill_i_s_xtradb_read_view(i_s_xtradb_read_view_t* rv)
-{
- read_view_t* view;
-
- mutex_enter(&trx_sys->mutex);
-
- if (UT_LIST_GET_LEN(trx_sys->view_list)) {
- view = UT_LIST_GET_LAST(trx_sys->view_list);
- } else {
- mutex_exit(&trx_sys->mutex);
- return NULL;
- }
-
- if (view->type == VIEW_HIGH_GRANULARITY) {
- rv->undo_no = view->undo_no;
- } else {
- rv->undo_no = ULINT_UNDEFINED;
- }
-
- rv->low_limit_no = view->low_limit_no;
- rv->up_limit_id = view->up_limit_id;
- rv->low_limit_id = view->low_limit_id;
-
- mutex_exit(&trx_sys->mutex);
-
- return rv;
-}
-
-/*********************************************************************//**
-Frees resource allocated by a read view. */
-UNIV_INTERN
-void
-read_view_free(
-/*===========*/
- read_view_t*& view) /*< in,out: read view */
-{
- if (view == NULL) {
-
- return;
- }
-
- os_atomic_decrement_ulint(&srv_read_views_memory,
- sizeof(read_view_t) +
- view->max_descr * sizeof(trx_id_t));
-
- if (view->descriptors != NULL) {
- ut_free(view->descriptors);
- }
-
- ut_free(view);
-
- view = NULL;
-}
-
-/*********************************************************************//**
-Create a high-granularity consistent cursor view for mysql to be used
-in cursors. In this consistent read view modifications done by the
-creating transaction after the cursor is created or future transactions
-are not visible. */
-UNIV_INTERN
-cursor_view_t*
-read_cursor_view_create_for_mysql(
-/*==============================*/
- trx_t* cr_trx) /*!< in: trx where cursor view is created */
-{
- read_view_t* view;
- mem_heap_t* heap;
- cursor_view_t* curview;
-
- /* Use larger heap than in trx_create when creating a read_view
- because cursors are quite long. */
-
- heap = mem_heap_create(512);
-
- curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(*curview));
-
- curview->heap = heap;
-
- /* Drop cursor tables from consideration when evaluating the
- need of auto-commit */
-
- curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use;
-
- cr_trx->n_mysql_tables_in_use = 0;
-
- mutex_enter(&trx_sys->mutex);
-
- curview->read_view = NULL;
- read_view_open_now_low(UINT64_UNDEFINED, curview->read_view);
-
- view = curview->read_view;
- view->undo_no = cr_trx->undo_no;
- view->type = VIEW_HIGH_GRANULARITY;
-
- mutex_exit(&trx_sys->mutex);
-
- return(curview);
-}
-
-/*********************************************************************//**
-Close a given consistent cursor view for mysql and restore global read view
-back to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_view_close_for_mysql(
-/*=============================*/
- trx_t* trx, /*!< in: trx */
- cursor_view_t* curview)/*!< in: cursor view to be closed */
-{
- ut_a(curview);
- ut_a(curview->read_view);
- ut_a(curview->heap);
-
- /* Add cursor's tables to the global count of active tables that
- belong to this transaction */
- trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use;
-
- read_view_remove(curview->read_view, false);
- read_view_free(curview->read_view);
-
- trx->read_view = trx->global_read_view;
-
- mem_heap_free(curview->heap);
-}
-
-/*********************************************************************//**
-This function sets a given consistent cursor view to a transaction
-read view if given consistent cursor view is not NULL. Otherwise, function
-restores a global read view to a transaction read view. */
-UNIV_INTERN
-void
-read_cursor_set_for_mysql(
-/*======================*/
- trx_t* trx, /*!< in: transaction where cursor is set */
- cursor_view_t* curview)/*!< in: consistent cursor view to be set */
-{
- ut_a(trx);
-
- mutex_enter(&trx_sys->mutex);
-
- if (UNIV_LIKELY(curview != NULL)) {
- trx->read_view = curview->read_view;
- } else {
- trx->read_view = trx->global_read_view;
- }
-
- ut_ad(read_view_validate(trx->read_view));
-
- mutex_exit(&trx_sys->mutex);
-}
diff --git a/storage/xtradb/rem/rem0cmp.cc b/storage/xtradb/rem/rem0cmp.cc
deleted file mode 100644
index 616ef322fb5..00000000000
--- a/storage/xtradb/rem/rem0cmp.cc
+++ /dev/null
@@ -1,1465 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file rem/rem0cmp.cc
-Comparison services for records
-
-Created 7/1/1994 Heikki Tuuri
-************************************************************************/
-
-#include "rem0cmp.h"
-
-#ifdef UNIV_NONINL
-#include "rem0cmp.ic"
-#endif
-
-#include "ha_prototypes.h"
-#include "handler0alter.h"
-#include "srv0srv.h"
-
-/* ALPHABETICAL ORDER
- ==================
-
-The records are put into alphabetical order in the following
-way: let F be the first field where two records disagree.
-If there is a character in some position n where the
-records disagree, the order is determined by comparison of
-the characters at position n, possibly after
-collating transformation. If there is no such character,
-but the corresponding fields have different lengths, then
-if the data type of the fields is paddable,
-shorter field is padded with a padding character. If the
-data type is not paddable, longer field is considered greater.
-Finally, the SQL null is bigger than any other value.
-
-At the present, the comparison functions return 0 in the case,
-where two records disagree only in the way that one
-has more fields than the other. */
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n_cmp, /*!< in: number of fields to compare */
- ulint* matched_fields)/*!< in/out: number of already
- completely matched fields; when function
- returns, contains the value for current
- comparison */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-#endif /* UNIV_DEBUG */
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the one in ha_innobase.cc!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-extern
-int
-innobase_mysql_cmp(
-/*===============*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length); /*!< in: data field length,
- not UNIV_SQL_NULL */
-/*************************************************************//**
-This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. The prototype here
-must be a copy of the one in ha_innobase.cc!
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-extern
-int
-innobase_mysql_cmp_prefix(
-/*======================*/
- int mysql_type, /*!< in: MySQL type */
- uint charset_number, /*!< in: number of the charset */
- const unsigned char* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const unsigned char* b, /*!< in: data field */
- unsigned int b_length); /*!< in: data field length,
- not UNIV_SQL_NULL */
-/*********************************************************************//**
-Transforms the character code so that it is ordered appropriately for the
-language. This is only used for the latin1 char set. MySQL does the
-comparisons for other char sets.
-@return collation order position */
-UNIV_INLINE
-ulint
-cmp_collate(
-/*========*/
- ulint code) /*!< in: code of a character stored in database record */
-{
- return((ulint) srv_latin1_ordering[code]);
-}
-
-/*************************************************************//**
-Returns TRUE if two columns are equal for comparison purposes.
-@return TRUE if the columns are considered equal in comparisons */
-UNIV_INTERN
-ibool
-cmp_cols_are_equal(
-/*===============*/
- const dict_col_t* col1, /*!< in: column 1 */
- const dict_col_t* col2, /*!< in: column 2 */
- ibool check_charsets)
- /*!< in: whether to check charsets */
-{
- if (dtype_is_non_binary_string_type(col1->mtype, col1->prtype)
- && dtype_is_non_binary_string_type(col2->mtype, col2->prtype)) {
-
- /* Both are non-binary string types: they can be compared if
- and only if the charset-collation is the same */
-
- if (check_charsets) {
- return(dtype_get_charset_coll(col1->prtype)
- == dtype_get_charset_coll(col2->prtype));
- } else {
- return(TRUE);
- }
- }
-
- if (dtype_is_binary_string_type(col1->mtype, col1->prtype)
- && dtype_is_binary_string_type(col2->mtype, col2->prtype)) {
-
- /* Both are binary string types: they can be compared */
-
- return(TRUE);
- }
-
- if (col1->mtype != col2->mtype) {
-
- return(FALSE);
- }
-
- if (col1->mtype == DATA_INT
- && (col1->prtype & DATA_UNSIGNED)
- != (col2->prtype & DATA_UNSIGNED)) {
-
- /* The storage format of an unsigned integer is different
- from a signed integer: in a signed integer we OR
- 0x8000... to the value of positive integers. */
-
- return(FALSE);
- }
-
- return(col1->mtype != DATA_INT || col1->len == col2->len);
-}
-
-/*************************************************************//**
-Innobase uses this function to compare two data fields for which the data type
-is such that we must compare whole fields or call MySQL to do the comparison
-@return 1, 0, -1, if a is greater, equal, less than b, respectively */
-static
-int
-cmp_whole_field(
-/*============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* a, /*!< in: data field */
- unsigned int a_length, /*!< in: data field length,
- not UNIV_SQL_NULL */
- const byte* b, /*!< in: data field */
- unsigned int b_length) /*!< in: data field length,
- not UNIV_SQL_NULL */
-{
- float f_1;
- float f_2;
- double d_1;
- double d_2;
- int swap_flag = 1;
-
- switch (mtype) {
-
- case DATA_DECIMAL:
- /* Remove preceding spaces */
- for (; a_length && *a == ' '; a++, a_length--) { }
- for (; b_length && *b == ' '; b++, b_length--) { }
-
- if (*a == '-') {
- if (*b != '-') {
- return(-1);
- }
-
- a++; b++;
- a_length--;
- b_length--;
-
- swap_flag = -1;
-
- } else if (*b == '-') {
-
- return(1);
- }
-
- while (a_length > 0 && (*a == '+' || *a == '0')) {
- a++; a_length--;
- }
-
- while (b_length > 0 && (*b == '+' || *b == '0')) {
- b++; b_length--;
- }
-
- if (a_length != b_length) {
- if (a_length < b_length) {
- return(-swap_flag);
- }
-
- return(swap_flag);
- }
-
- while (a_length > 0 && *a == *b) {
-
- a++; b++; a_length--;
- }
-
- if (a_length == 0) {
-
- return(0);
- }
-
- if (*a > *b) {
- return(swap_flag);
- }
-
- return(-swap_flag);
- case DATA_DOUBLE:
- d_1 = mach_double_read(a);
- d_2 = mach_double_read(b);
-
- if (d_1 > d_2) {
- return(1);
- } else if (d_2 > d_1) {
- return(-1);
- }
-
- return(0);
-
- case DATA_FLOAT:
- f_1 = mach_float_read(a);
- f_2 = mach_float_read(b);
-
- if (f_1 > f_2) {
- return(1);
- } else if (f_2 > f_1) {
- return(-1);
- }
-
- return(0);
- case DATA_BLOB:
- if (prtype & DATA_BINARY_TYPE) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: comparing a binary BLOB"
- " with a character set sensitive\n"
- "InnoDB: comparison!\n");
- }
- /* fall through */
- case DATA_VARMYSQL:
- case DATA_MYSQL:
- return(innobase_mysql_cmp(
- (int)(prtype & DATA_MYSQL_TYPE_MASK),
- (uint) dtype_get_charset_coll(prtype),
- a, a_length, b, b_length));
- default:
- fprintf(stderr,
- "InnoDB: unknown type number %lu\n",
- (ulong) mtype);
- ut_error;
- }
-
- return(0);
-}
-
-/*****************************************************************
-This function is used to compare two dfields where at least the first
-has its data type field set. */
-UNIV_INTERN
-int
-cmp_dfield_dfield_like_prefix(
-/*==========================*/
- /* out: 1, 0, -1, if dfield1 is greater, equal,
- less than dfield2, respectively */
- dfield_t* dfield1,/* in: data field; must have type field set */
- dfield_t* dfield2)/* in: data field */
-{
- const dtype_t* type;
- int ret;
-
- ut_ad(dfield_check_typed(dfield1));
-
- type = dfield_get_type(dfield1);
-
- if (type->mtype >= DATA_FLOAT) {
- ret = innobase_mysql_cmp_prefix(
- static_cast<int>(type->prtype & DATA_MYSQL_TYPE_MASK),
- static_cast<uint>(dtype_get_charset_coll(type->prtype)),
- static_cast<byte*>(dfield_get_data(dfield1)),
- static_cast<uint>(dfield_get_len(dfield1)),
- static_cast<byte*>(dfield_get_data(dfield2)),
- static_cast<uint>(dfield_get_len(dfield2)));
- } else {
- ret = (cmp_data_data_like_prefix(
- static_cast<byte*>(dfield_get_data(dfield1)),
- dfield_get_len(dfield1),
- static_cast<byte*>(dfield_get_data(dfield2)),
- dfield_get_len(dfield2)));
- }
-
- return(ret);
-}
-
-/*************************************************************//**
-This function is used to compare two data fields for which we know the
-data type.
-@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */
-UNIV_INTERN
-int
-cmp_data_data_slow(
-/*===============*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- const byte* data1, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /*!< in: data field length or UNIV_SQL_NULL */
- const byte* data2, /*!< in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /*!< in: data field length or UNIV_SQL_NULL */
-{
- ulint data1_byte;
- ulint data2_byte;
- ulint cur_bytes;
-
- if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) {
-
- if (len1 == len2) {
-
- return(0);
- }
-
- if (len1 == UNIV_SQL_NULL) {
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
-
- return(-1);
- }
-
- return(1);
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- return(cmp_whole_field(mtype, prtype,
- data1, (unsigned) len1,
- data2, (unsigned) len2));
- }
-
- /* Compare then the fields */
-
- cur_bytes = 0;
-
- for (;;) {
- if (len1 <= cur_bytes) {
- if (len2 <= cur_bytes) {
-
- return(0);
- }
-
- data1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (data1_byte == ULINT_UNDEFINED) {
-
- return(-1);
- }
- } else {
- data1_byte = *data1;
- }
-
- if (len2 <= cur_bytes) {
- data2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (data2_byte == ULINT_UNDEFINED) {
-
- return(1);
- }
- } else {
- data2_byte = *data2;
- }
-
- if (data1_byte == data2_byte) {
- /* If the bytes are equal, they will remain such even
- after the collation transformation below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE))) {
-
- data1_byte = cmp_collate(data1_byte);
- data2_byte = cmp_collate(data2_byte);
- }
-
- if (data1_byte > data2_byte) {
-
- return(1);
- } else if (data1_byte < data2_byte) {
-
- return(-1);
- }
-next_byte:
- /* Next byte */
- cur_bytes++;
- data1++;
- data2++;
- }
-
- return(0); /* Not reached */
-}
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type to be VARCHAR */
-
-int
-cmp_data_data_slow_varchar(
-/*=======================*/
- /* out: 1, 0, -1, if lhs is greater, equal,
- less than rhs, respectively */
- const byte* lhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint lhs_len,/* in: data field length or UNIV_SQL_NULL */
- const byte* rhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint rhs_len)/* in: data field length or UNIV_SQL_NULL */
-{
- ulint i;
-
- ut_a(rhs_len != UNIV_SQL_NULL);
-
- if (lhs_len == UNIV_SQL_NULL) {
-
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
-
- return(-1);
- }
-
- /* Compare the values.*/
-
- for (i = 0; i < lhs_len && i < rhs_len; ++i, ++rhs, ++lhs) {
- ulint lhs_byte = *lhs;
- ulint rhs_byte = *rhs;
-
- if (lhs_byte != rhs_byte) {
- /* If the bytes are equal, they will remain such even
- after the collation transformation below */
-
- lhs_byte = cmp_collate(lhs_byte);
- rhs_byte = cmp_collate(rhs_byte);
-
- if (lhs_byte > rhs_byte) {
-
- return(1);
- } else if (lhs_byte < rhs_byte) {
-
- return(-1);
- }
- }
- }
-
- return((i == lhs_len && i == rhs_len) ? 0 :
- static_cast<int>(rhs_len - lhs_len));
-}
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
-int
-cmp_data_data_slow_like_prefix(
-/*===========================*/
- /* out: 1, 0, -1, if lhs is greater, equal,
- less than rhs, respectively */
- const byte* lhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len1, /* in: data field length or UNIV_SQL_NULL */
- const byte* rhs, /* in: data field (== a pointer to a memory
- buffer) */
- ulint len2) /* in: data field length or UNIV_SQL_NULL */
-{
- ulint i;
-
- ut_a(len2 != UNIV_SQL_NULL);
-
- if (len1 == UNIV_SQL_NULL) {
-
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
-
- return(-1);
- }
-
- /* Compare the values.*/
-
- for (i = 0; i < len1 && i < len2; ++i, ++rhs, ++lhs) {
- ulint lhs_byte = *lhs;
- ulint rhs_byte = *rhs;
-
- if (lhs_byte != rhs_byte) {
- /* If the bytes are equal, they will remain such even
- after the collation transformation below */
-
- lhs_byte = cmp_collate(lhs_byte);
- rhs_byte = cmp_collate(rhs_byte);
-
- if (lhs_byte > rhs_byte) {
-
- return(1);
- } else if (lhs_byte < rhs_byte) {
-
- return(-1);
- }
- }
- }
-
- return(i == len2 ? 0 : 1);
-}
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
-int
-cmp_data_data_slow_like_suffix(
-/*===========================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- /* in: data field (== a pointer to a
- memory buffer) */
- const byte* data1 UNIV_UNUSED,
- /* in: data field length or UNIV_SQL_NULL */
- ulint len1 UNIV_UNUSED,
- /* in: data field (== a pointer to a memory
- buffer) */
- const byte* data2 UNIV_UNUSED,
- /* in: data field length or UNIV_SQL_NULL */
- ulint len2 UNIV_UNUSED)
-
-{
- ut_error; // FIXME:
- return(1);
-}
-
-/*****************************************************************
-This function is used to compare two data fields for which we know the
-data type. The comparison is done for the LIKE operator.*/
-
-int
-cmp_data_data_slow_like_substr(
-/*===========================*/
- /* out: 1, 0, -1, if data1 is greater, equal,
- less than data2, respectively */
- /* in: data field (== a pointer to a
- memory buffer) */
- const byte* data1 UNIV_UNUSED,
- /* in: data field length or UNIV_SQL_NULL */
- ulint len1 UNIV_UNUSED,
- /* in: data field (== a pointer to a memory
- buffer) */
- const byte* data2 UNIV_UNUSED,
- /* in: data field length or UNIV_SQL_NULL */
- ulint len2 UNIV_UNUSED)
-{
- ut_error; // FIXME:
- return(1);
-}
-/*************************************************************//**
-This function is used to compare a data tuple to a physical record.
-Only dtuple->n_fields_cmp first fields are taken into account for
-the data tuple! If we denote by n = n_fields_cmp, then rec must
-have either m >= n fields, or it must differ from dtuple in some of
-the m fields rec has. If rec has an externally stored field we do not
-compare it but return with value 0 if such a comparison should be
-made.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared, or until
-the first externally stored field in rec */
-UNIV_INTERN
-int
-cmp_dtuple_rec_with_match_low(
-/*==========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n_cmp, /*!< in: number of fields to compare */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when function returns,
- contains the value for current comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when function returns, contains the
- value for current comparison */
-{
- const dfield_t* dtuple_field; /* current field in logical record */
- ulint dtuple_f_len; /* the length of the current field
- in the logical record */
- const byte* dtuple_b_ptr; /* pointer to the current byte in
- logical field data */
- ulint dtuple_byte; /* value of current byte to be compared
- in dtuple*/
- ulint rec_f_len; /* length of current field in rec */
- const byte* rec_b_ptr; /* pointer to the current byte in
- rec field */
- ulint rec_byte; /* value of current byte to be
- compared in rec */
- ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched bytes
- in current field */
- int ret; /* return value */
-
- ut_ad(dtuple != NULL);
- ut_ad(rec != NULL);
- ut_ad(matched_fields != NULL);
- ut_ad(matched_bytes != NULL);
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- cur_field = *matched_fields;
- cur_bytes = *matched_bytes;
-
- ut_ad(n_cmp > 0);
- ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
- ut_ad(cur_field <= n_cmp);
- ut_ad(cur_field <= rec_offs_n_fields(offsets));
-
- if (cur_bytes == 0 && cur_field == 0) {
- ulint rec_info = rec_get_info_bits(rec,
- rec_offs_comp(offsets));
- ulint tup_info = dtuple_get_info_bits(dtuple);
-
- if (UNIV_UNLIKELY(rec_info & REC_INFO_MIN_REC_FLAG)) {
- ret = !(tup_info & REC_INFO_MIN_REC_FLAG);
- goto order_resolved;
- } else if (UNIV_UNLIKELY(tup_info & REC_INFO_MIN_REC_FLAG)) {
- ret = -1;
- goto order_resolved;
- }
- }
-
- /* Match fields in a loop; stop if we run out of fields in dtuple
- or find an externally stored field */
-
- while (cur_field < n_cmp) {
-
- ulint mtype;
- ulint prtype;
-
- dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
- {
- const dtype_t* type
- = dfield_get_type(dtuple_field);
-
- mtype = type->mtype;
- prtype = type->prtype;
- }
-
- dtuple_f_len = dfield_get_len(dtuple_field);
-
- rec_b_ptr = rec_get_nth_field(rec, offsets,
- cur_field, &rec_f_len);
-
- /* If we have matched yet 0 bytes, it may be that one or
- both the fields are SQL null, or the record or dtuple may be
- the predefined minimum record, or the field is externally
- stored */
-
- if (UNIV_LIKELY(cur_bytes == 0)) {
- if (rec_offs_nth_extern(offsets, cur_field)) {
- /* We do not compare to an externally
- stored field */
-
- ret = 0;
-
- goto order_resolved;
- }
-
- if (dtuple_f_len == UNIV_SQL_NULL) {
- if (rec_f_len == UNIV_SQL_NULL) {
-
- goto next_field;
- }
-
- ret = -1;
- goto order_resolved;
- } else if (rec_f_len == UNIV_SQL_NULL) {
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
-
- ret = 1;
- goto order_resolved;
- }
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- ret = cmp_whole_field(
- mtype, prtype,
- static_cast<const byte*>(
- dfield_get_data(dtuple_field)),
- (unsigned) dtuple_f_len,
- rec_b_ptr, (unsigned) rec_f_len);
-
- if (ret != 0) {
- cur_bytes = 0;
-
- goto order_resolved;
- } else {
- goto next_field;
- }
- }
-
- /* Set the pointers at the current byte */
-
- rec_b_ptr = rec_b_ptr + cur_bytes;
- dtuple_b_ptr = (byte*) dfield_get_data(dtuple_field)
- + cur_bytes;
- /* Compare then the fields */
-
- for (;;) {
- if (UNIV_UNLIKELY(rec_f_len <= cur_bytes)) {
- if (dtuple_f_len <= cur_bytes) {
-
- goto next_field;
- }
-
- rec_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec_byte == ULINT_UNDEFINED) {
- ret = 1;
-
- goto order_resolved;
- }
- } else {
- rec_byte = *rec_b_ptr;
- }
-
- if (UNIV_UNLIKELY(dtuple_f_len <= cur_bytes)) {
- dtuple_byte = dtype_get_pad_char(mtype,
- prtype);
-
- if (dtuple_byte == ULINT_UNDEFINED) {
- ret = -1;
-
- goto order_resolved;
- }
- } else {
- dtuple_byte = *dtuple_b_ptr;
- }
-
- if (dtuple_byte == rec_byte) {
- /* If the bytes are equal, they will
- remain such even after the collation
- transformation below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec_byte = cmp_collate(rec_byte);
- dtuple_byte = cmp_collate(dtuple_byte);
- }
-
- ret = (int) (dtuple_byte - rec_byte);
- if (UNIV_LIKELY(ret)) {
- if (ret < 0) {
- ret = -1;
- goto order_resolved;
- } else {
- ret = 1;
- goto order_resolved;
- }
- }
-next_byte:
- /* Next byte */
- cur_bytes++;
- rec_b_ptr++;
- dtuple_b_ptr++;
- }
-
-next_field:
- cur_field++;
- cur_bytes = 0;
- }
-
- ut_ad(cur_bytes == 0);
-
- ret = 0; /* If we ran out of fields, dtuple was equal to rec
- up to the common fields */
-order_resolved:
- ut_ad((ret >= - 1) && (ret <= 1));
- ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets,
- n_cmp, matched_fields));
- ut_ad(*matched_fields == cur_field); /* In the debug version, the
- above cmp_debug_... sets
- *matched_fields to a value */
- *matched_fields = cur_field;
- *matched_bytes = cur_bytes;
-
- return(ret);
-}
-
-/**************************************************************//**
-Compares a data tuple to a physical record.
-@see cmp_dtuple_rec_with_match
-@return 1, 0, -1, if dtuple is greater, equal, less than rec, respectively */
-UNIV_INTERN
-int
-cmp_dtuple_rec(
-/*===========*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- &matched_fields, &matched_bytes));
-}
-
-/**************************************************************//**
-Checks if a dtuple is a prefix of a record. The last field in dtuple
-is allowed to be a prefix of the corresponding field in the record.
-@return TRUE if prefix */
-UNIV_INTERN
-ibool
-cmp_dtuple_is_prefix_of_rec(
-/*========================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- ulint matched_fields = 0;
- ulint matched_bytes = 0;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- n_fields = dtuple_get_n_fields(dtuple);
-
- if (n_fields > rec_offs_n_fields(offsets)) {
-
- return(FALSE);
- }
-
- cmp_dtuple_rec_with_match(dtuple, rec, offsets,
- &matched_fields, &matched_bytes);
- if (matched_fields == n_fields) {
-
- return(TRUE);
- }
-
- if (matched_fields == n_fields - 1
- && matched_bytes == dfield_get_len(
- dtuple_get_nth_field(dtuple, n_fields - 1))) {
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*************************************************************//**
-Compare two physical record fields.
-@retval 1 if rec1 field is greater than rec2
-@retval -1 if rec1 field is less than rec2
-@retval 0 if rec1 field equals to rec2 */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-int
-cmp_rec_rec_simple_field(
-/*=====================*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
- const dict_index_t* index, /*!< in: data dictionary index */
- ulint n) /*!< in: field to compare */
-{
- const byte* rec1_b_ptr;
- const byte* rec2_b_ptr;
- ulint rec1_f_len;
- ulint rec2_f_len;
- const dict_col_t* col = dict_index_get_nth_col(index, n);
-
- ut_ad(!rec_offs_nth_extern(offsets1, n));
- ut_ad(!rec_offs_nth_extern(offsets2, n));
-
- rec1_b_ptr = rec_get_nth_field(rec1, offsets1, n, &rec1_f_len);
- rec2_b_ptr = rec_get_nth_field(rec2, offsets2, n, &rec2_f_len);
-
- if (rec1_f_len == UNIV_SQL_NULL || rec2_f_len == UNIV_SQL_NULL) {
- if (rec1_f_len == rec2_f_len) {
- return(0);
- }
- /* We define the SQL null to be the smallest possible
- value of a field in the alphabetical order */
- return(rec1_f_len == UNIV_SQL_NULL ? -1 : 1);
- }
-
- if (col->mtype >= DATA_FLOAT
- || (col->mtype == DATA_BLOB
- && !(col->prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(col->prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
- return(cmp_whole_field(col->mtype, col->prtype,
- rec1_b_ptr, (unsigned) rec1_f_len,
- rec2_b_ptr, (unsigned) rec2_f_len));
- }
-
- /* Compare the fields */
- for (ulint cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
- ulint rec1_byte;
- ulint rec2_byte;
-
- if (rec2_f_len <= cur_bytes) {
- if (rec1_f_len <= cur_bytes) {
- return(0);
- }
-
- rec2_byte = dtype_get_pad_char(
- col->mtype, col->prtype);
-
- if (rec2_byte == ULINT_UNDEFINED) {
- return(1);
- }
- } else {
- rec2_byte = *rec2_b_ptr;
- }
-
- if (rec1_f_len <= cur_bytes) {
- rec1_byte = dtype_get_pad_char(
- col->mtype, col->prtype);
-
- if (rec1_byte == ULINT_UNDEFINED) {
- return(-1);
- }
- } else {
- rec1_byte = *rec1_b_ptr;
- }
-
- if (rec1_byte == rec2_byte) {
- /* If the bytes are equal, they will remain such
- even after the collation transformation below */
- continue;
- }
-
- if (col->mtype <= DATA_CHAR
- || (col->mtype == DATA_BLOB
- && !(col->prtype & DATA_BINARY_TYPE))) {
-
- rec1_byte = cmp_collate(rec1_byte);
- rec2_byte = cmp_collate(rec2_byte);
- }
-
- if (rec1_byte < rec2_byte) {
- return(-1);
- } else if (rec1_byte > rec2_byte) {
- return(1);
- }
- }
-}
-
-/*************************************************************//**
-Compare two physical records that contain the same number of columns,
-none of which are stored externally.
-@retval 1 if rec1 (including non-ordering columns) is greater than rec2
-@retval -1 if rec1 (including non-ordering columns) is less than rec2
-@retval 0 if rec1 is a duplicate of rec2 */
-UNIV_INTERN
-int
-cmp_rec_rec_simple(
-/*===============*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, ...) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, ...) */
- const dict_index_t* index, /*!< in: data dictionary index */
- struct TABLE* table) /*!< in: MySQL table, for reporting
- duplicate key value if applicable,
- or NULL */
-{
- ulint n;
- ulint n_uniq = dict_index_get_n_unique(index);
- bool null_eq = false;
-
- ut_ad(rec_offs_n_fields(offsets1) >= n_uniq);
- ut_ad(rec_offs_n_fields(offsets2) == rec_offs_n_fields(offsets2));
-
- ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
-
- for (n = 0; n < n_uniq; n++) {
- int cmp = cmp_rec_rec_simple_field(
- rec1, rec2, offsets1, offsets2, index, n);
-
- if (cmp) {
- return(cmp);
- }
-
- /* If the fields are internally equal, they must both
- be NULL or non-NULL. */
- ut_ad(rec_offs_nth_sql_null(offsets1, n)
- == rec_offs_nth_sql_null(offsets2, n));
-
- if (rec_offs_nth_sql_null(offsets1, n)) {
- ut_ad(!(dict_index_get_nth_col(index, n)->prtype
- & DATA_NOT_NULL));
- null_eq = true;
- }
- }
-
- /* If we ran out of fields, the ordering columns of rec1 were
- equal to rec2. Issue a duplicate key error if needed. */
-
- if (!null_eq && table && dict_index_is_unique(index)) {
- /* Report erroneous row using new version of table. */
- innobase_rec_to_mysql(table, rec1, index, offsets1);
- return(0);
- }
-
- /* Else, keep comparing so that we have the full internal
- order. */
- for (; n < dict_index_get_n_fields(index); n++) {
- int cmp = cmp_rec_rec_simple_field(
- rec1, rec2, offsets1, offsets2, index, n);
-
- if (cmp) {
- return(cmp);
- }
-
- /* If the fields are internally equal, they must both
- be NULL or non-NULL. */
- ut_ad(rec_offs_nth_sql_null(offsets1, n)
- == rec_offs_nth_sql_null(offsets2, n));
- }
-
- /* This should never be reached. Internally, an index must
- never contain duplicate entries. */
- ut_ad(0);
- return(0);
-}
-
-/*************************************************************//**
-This function is used to compare two physical records. Only the common
-first fields are compared, and if an externally stored field is
-encountered, then 0 is returned.
-@return 1, 0, -1 if rec1 is greater, equal, less, respectively */
-UNIV_INTERN
-int
-cmp_rec_rec_with_match(
-/*===================*/
- const rec_t* rec1, /*!< in: physical record */
- const rec_t* rec2, /*!< in: physical record */
- const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
- const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
- dict_index_t* index, /*!< in: data dictionary index */
- ibool nulls_unequal,
- /* in: TRUE if this is for index statistics
- cardinality estimation, and innodb_stats_method
- is "nulls_unequal" or "nulls_ignored" */
- ulint* matched_fields, /*!< in/out: number of already completely
- matched fields; when the function returns,
- contains the value the for current
- comparison */
- ulint* matched_bytes) /*!< in/out: number of already matched
- bytes within the first field not completely
- matched; when the function returns, contains
- the value for the current comparison */
-{
- ulint rec1_n_fields; /* the number of fields in rec */
- ulint rec1_f_len; /* length of current field in rec */
- const byte* rec1_b_ptr; /* pointer to the current byte
- in rec field */
- ulint rec1_byte; /* value of current byte to be
- compared in rec */
- ulint rec2_n_fields; /* the number of fields in rec */
- ulint rec2_f_len; /* length of current field in rec */
- const byte* rec2_b_ptr; /* pointer to the current byte
- in rec field */
- ulint rec2_byte; /* value of current byte to be
- compared in rec */
- ulint cur_field; /* current field number */
- ulint cur_bytes; /* number of already matched
- bytes in current field */
- int ret = 0; /* return value */
- ulint comp;
-
- ut_ad(rec1 != NULL);
- ut_ad(rec2 != NULL);
- ut_ad(index != NULL);
- ut_ad(rec_offs_validate(rec1, index, offsets1));
- ut_ad(rec_offs_validate(rec2, index, offsets2));
- ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
-
- comp = rec_offs_comp(offsets1);
- rec1_n_fields = rec_offs_n_fields(offsets1);
- rec2_n_fields = rec_offs_n_fields(offsets2);
-
- cur_field = *matched_fields;
- cur_bytes = *matched_bytes;
-
- /* Match fields in a loop */
-
- while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) {
-
- ulint mtype;
- ulint prtype;
-
- if (dict_index_is_univ(index)) {
- /* This is for the insert buffer B-tree. */
- mtype = DATA_BINARY;
- prtype = 0;
- } else {
- const dict_col_t* col
- = dict_index_get_nth_col(index, cur_field);
-
- mtype = col->mtype;
- prtype = col->prtype;
- }
-
- rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
- cur_field, &rec1_f_len);
- rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
- cur_field, &rec2_f_len);
-
- if (cur_bytes == 0) {
- if (cur_field == 0) {
- /* Test if rec is the predefined minimum
- record */
- if (UNIV_UNLIKELY(rec_get_info_bits(rec1, comp)
- & REC_INFO_MIN_REC_FLAG)) {
-
- if (!(rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG)) {
- ret = -1;
- }
-
- goto order_resolved;
-
- } else if (UNIV_UNLIKELY
- (rec_get_info_bits(rec2, comp)
- & REC_INFO_MIN_REC_FLAG)) {
-
- ret = 1;
-
- goto order_resolved;
- }
- }
-
- if (rec_offs_nth_extern(offsets1, cur_field)
- || rec_offs_nth_extern(offsets2, cur_field)) {
- /* We do not compare to an externally
- stored field */
-
- goto order_resolved;
- }
-
- if (rec1_f_len == UNIV_SQL_NULL
- || rec2_f_len == UNIV_SQL_NULL) {
-
- if (rec1_f_len == rec2_f_len) {
- /* This is limited to stats collection,
- cannot use it for regular search */
- if (nulls_unequal) {
- ret = -1;
- } else {
- goto next_field;
- }
- } else if (rec2_f_len == UNIV_SQL_NULL) {
-
- /* We define the SQL null to be the
- smallest possible value of a field
- in the alphabetical order */
-
- ret = 1;
- } else {
- ret = -1;
- }
-
- goto order_resolved;
- }
- }
-
- if (mtype >= DATA_FLOAT
- || (mtype == DATA_BLOB
- && 0 == (prtype & DATA_BINARY_TYPE)
- && dtype_get_charset_coll(prtype)
- != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
-
- ret = cmp_whole_field(mtype, prtype,
- rec1_b_ptr,
- (unsigned) rec1_f_len,
- rec2_b_ptr,
- (unsigned) rec2_f_len);
- if (ret != 0) {
- cur_bytes = 0;
-
- goto order_resolved;
- } else {
- goto next_field;
- }
- }
-
- /* Set the pointers at the current byte */
- rec1_b_ptr = rec1_b_ptr + cur_bytes;
- rec2_b_ptr = rec2_b_ptr + cur_bytes;
-
- /* Compare then the fields */
- for (;;) {
- if (rec2_f_len <= cur_bytes) {
-
- if (rec1_f_len <= cur_bytes) {
-
- goto next_field;
- }
-
- rec2_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec2_byte == ULINT_UNDEFINED) {
- ret = 1;
-
- goto order_resolved;
- }
- } else {
- rec2_byte = *rec2_b_ptr;
- }
-
- if (rec1_f_len <= cur_bytes) {
- rec1_byte = dtype_get_pad_char(mtype, prtype);
-
- if (rec1_byte == ULINT_UNDEFINED) {
- ret = -1;
-
- goto order_resolved;
- }
- } else {
- rec1_byte = *rec1_b_ptr;
- }
-
- if (rec1_byte == rec2_byte) {
- /* If the bytes are equal, they will remain
- such even after the collation transformation
- below */
-
- goto next_byte;
- }
-
- if (mtype <= DATA_CHAR
- || (mtype == DATA_BLOB
- && !(prtype & DATA_BINARY_TYPE))) {
-
- rec1_byte = cmp_collate(rec1_byte);
- rec2_byte = cmp_collate(rec2_byte);
- }
-
- if (rec1_byte < rec2_byte) {
- ret = -1;
- goto order_resolved;
- } else if (rec1_byte > rec2_byte) {
- ret = 1;
- goto order_resolved;
- }
-next_byte:
- /* Next byte */
-
- cur_bytes++;
- rec1_b_ptr++;
- rec2_b_ptr++;
- }
-
-next_field:
- cur_field++;
- cur_bytes = 0;
- }
-
- ut_ad(cur_bytes == 0);
-
- /* If we ran out of fields, rec1 was equal to rec2 up
- to the common fields */
- ut_ad(ret == 0);
-order_resolved:
-
- ut_ad((ret >= - 1) && (ret <= 1));
-
- *matched_fields = cur_field;
- *matched_bytes = cur_bytes;
-
- return(ret);
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Used in debug checking of cmp_dtuple_... .
-This function is used to compare a data tuple to a physical record. If
-dtuple has n fields then rec must have either m >= n fields, or it must
-differ from dtuple in some of the m fields rec has. If encounters an
-externally stored field, returns 0.
-@return 1, 0, -1, if dtuple is greater, equal, less than rec,
-respectively, when only the common first fields are compared */
-static
-int
-cmp_debug_dtuple_rec_with_match(
-/*============================*/
- const dtuple_t* dtuple, /*!< in: data tuple */
- const rec_t* rec, /*!< in: physical record which differs from
- dtuple in some of the common fields, or which
- has an equal number or more fields than
- dtuple */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n_cmp, /*!< in: number of fields to compare */
- ulint* matched_fields) /*!< in/out: number of already
- completely matched fields; when function
- returns, contains the value for current
- comparison */
-{
- const dfield_t* dtuple_field; /* current field in logical record */
- ulint dtuple_f_len; /* the length of the current field
- in the logical record */
- const byte* dtuple_f_data; /* pointer to the current logical
- field data */
- ulint rec_f_len; /* length of current field in rec */
- const byte* rec_f_data; /* pointer to the current rec field */
- int ret; /* return value */
- ulint cur_field; /* current field number */
-
- ut_ad(dtuple != NULL);
- ut_ad(rec != NULL);
- ut_ad(matched_fields != NULL);
- ut_ad(dtuple_check_typed(dtuple));
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- ut_ad(n_cmp > 0);
- ut_ad(n_cmp <= dtuple_get_n_fields(dtuple));
- ut_ad(*matched_fields <= n_cmp);
- ut_ad(*matched_fields <= rec_offs_n_fields(offsets));
-
- cur_field = *matched_fields;
-
- if (cur_field == 0) {
- if (UNIV_UNLIKELY
- (rec_get_info_bits(rec, rec_offs_comp(offsets))
- & REC_INFO_MIN_REC_FLAG)) {
-
- ret = !(dtuple_get_info_bits(dtuple)
- & REC_INFO_MIN_REC_FLAG);
-
- goto order_resolved;
- }
-
- if (UNIV_UNLIKELY
- (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG)) {
- ret = -1;
-
- goto order_resolved;
- }
- }
-
- /* Match fields in a loop; stop if we run out of fields in dtuple */
-
- while (cur_field < n_cmp) {
-
- ulint mtype;
- ulint prtype;
-
- dtuple_field = dtuple_get_nth_field(dtuple, cur_field);
- {
- const dtype_t* type
- = dfield_get_type(dtuple_field);
-
- mtype = type->mtype;
- prtype = type->prtype;
- }
-
- dtuple_f_data = static_cast<const byte*>(
- dfield_get_data(dtuple_field));
-
- dtuple_f_len = dfield_get_len(dtuple_field);
-
- rec_f_data = rec_get_nth_field(rec, offsets,
- cur_field, &rec_f_len);
-
- if (rec_offs_nth_extern(offsets, cur_field)) {
- /* We do not compare to an externally stored field */
-
- ret = 0;
-
- goto order_resolved;
- }
-
- ret = cmp_data_data(mtype, prtype, dtuple_f_data, dtuple_f_len,
- rec_f_data, rec_f_len);
- if (ret != 0) {
- goto order_resolved;
- }
-
- cur_field++;
- }
-
- ret = 0; /* If we ran out of fields, dtuple was equal to rec
- up to the common fields */
-order_resolved:
- ut_ad((ret >= - 1) && (ret <= 1));
-
- *matched_fields = cur_field;
-
- return(ret);
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/xtradb/rem/rem0rec.cc b/storage/xtradb/rem/rem0rec.cc
deleted file mode 100644
index c62e8c90434..00000000000
--- a/storage/xtradb/rem/rem0rec.cc
+++ /dev/null
@@ -1,2107 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file rem/rem0rec.cc
-Record manager
-
-Created 5/30/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "rem0rec.h"
-
-#ifdef UNIV_NONINL
-#include "rem0rec.ic"
-#endif
-
-#include "page0page.h"
-#include "mtr0mtr.h"
-#include "mtr0log.h"
-#include "fts0fts.h"
-#ifdef WITH_WSREP
-#include <ha_prototypes.h>
-#endif /* WITH_WSREP */
-
-/* PHYSICAL RECORD (OLD STYLE)
- ===========================
-
-The physical record, which is the data type of all the records
-found in index pages of the database, has the following format
-(lower addresses and more significant bits inside a byte are below
-represented on a higher text line):
-
-| offset of the end of the last field of data, the most significant
- bit is set to 1 if and only if the field is SQL-null,
- if the offset is 2-byte, then the second most significant
- bit is set to 1 if the field is stored on another page:
- mostly this will occur in the case of big BLOB fields |
-...
-| offset of the end of the first field of data + the SQL-null bit |
-| 4 bits used to delete mark a record, and mark a predefined
- minimum record in alphabetical order |
-| 4 bits giving the number of records owned by this record
- (this term is explained in page0page.h) |
-| 13 bits giving the order number of this record in the
- heap of the index page |
-| 10 bits giving the number of fields in this record |
-| 1 bit which is set to 1 if the offsets above are given in
- one byte format, 0 if in two byte format |
-| two bytes giving an absolute pointer to the next record in the page |
-ORIGIN of the record
-| first field of data |
-...
-| last field of data |
-
-The origin of the record is the start address of the first field
-of data. The offsets are given relative to the origin.
-The offsets of the data fields are stored in an inverted
-order because then the offset of the first fields are near the
-origin, giving maybe a better processor cache hit rate in searches.
-
-The offsets of the data fields are given as one-byte
-(if there are less than 127 bytes of data in the record)
-or two-byte unsigned integers. The most significant bit
-is not part of the offset, instead it indicates the SQL-null
-if the bit is set to 1. */
-
-/* PHYSICAL RECORD (NEW STYLE)
- ===========================
-
-The physical record, which is the data type of all the records
-found in index pages of the database, has the following format
-(lower addresses and more significant bits inside a byte are below
-represented on a higher text line):
-
-| length of the last non-null variable-length field of data:
- if the maximum length is 255, one byte; otherwise,
- 0xxxxxxx (one byte, length=0..127), or 1exxxxxxxxxxxxxx (two bytes,
- length=128..16383, extern storage flag) |
-...
-| length of first variable-length field of data |
-| SQL-null flags (1 bit per nullable field), padded to full bytes |
-| 4 bits used to delete mark a record, and mark a predefined
- minimum record in alphabetical order |
-| 4 bits giving the number of records owned by this record
- (this term is explained in page0page.h) |
-| 13 bits giving the order number of this record in the
- heap of the index page |
-| 3 bits record type: 000=conventional, 001=node pointer (inside B-tree),
- 010=infimum, 011=supremum, 1xx=reserved |
-| two bytes giving a relative pointer to the next record in the page |
-ORIGIN of the record
-| first field of data |
-...
-| last field of data |
-
-The origin of the record is the start address of the first field
-of data. The offsets are given relative to the origin.
-The offsets of the data fields are stored in an inverted
-order because then the offset of the first fields are near the
-origin, giving maybe a better processor cache hit rate in searches.
-
-The offsets of the data fields are given as one-byte
-(if there are less than 127 bytes of data in the record)
-or two-byte unsigned integers. The most significant bit
-is not part of the offset, instead it indicates the SQL-null
-if the bit is set to 1. */
-
-/* CANONICAL COORDINATES. A record can be seen as a single
-string of 'characters' in the following way: catenate the bytes
-in each field, in the order of fields. An SQL-null field
-is taken to be an empty sequence of bytes. Then after
-the position of each field insert in the string
-the 'character' <FIELD-END>, except that after an SQL-null field
-insert <NULL-FIELD-END>. Now the ordinal position of each
-byte in this canonical string is its canonical coordinate.
-So, for the record ("AA", SQL-NULL, "BB", ""), the canonical
-string is "AA<FIELD_END><NULL-FIELD-END>BB<FIELD-END><FIELD-END>".
-We identify prefixes (= initial segments) of a record
-with prefixes of the canonical string. The canonical
-length of the prefix is the length of the corresponding
-prefix of the canonical string. The canonical length of
-a record is the length of its canonical string.
-
-For example, the maximal common prefix of records
-("AA", SQL-NULL, "BB", "C") and ("AA", SQL-NULL, "B", "C")
-is "AA<FIELD-END><NULL-FIELD-END>B", and its canonical
-length is 5.
-
-A complete-field prefix of a record is a prefix which ends at the
-end of some field (containing also <FIELD-END>).
-A record is a complete-field prefix of another record, if
-the corresponding canonical strings have the same property. */
-
-/* this is used to fool compiler in rec_validate */
-UNIV_INTERN ulint rec_dummy;
-
-/***************************************************************//**
-Validates the consistency of an old-style physical record.
-@return TRUE if ok */
-static
-ibool
-rec_validate_old(
-/*=============*/
- const rec_t* rec); /*!< in: physical record */
-
-/******************************************************//**
-Determine how many of the first n columns in a compact
-physical record are stored externally.
-@return number of externally stored columns */
-UNIV_INTERN
-ulint
-rec_get_n_extern_new(
-/*=================*/
- const rec_t* rec, /*!< in: compact physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n) /*!< in: number of columns to scan */
-{
- const byte* nulls;
- const byte* lens;
- ulint null_mask;
- ulint n_extern;
- ulint i;
-
- ut_ad(dict_table_is_comp(index->table));
- ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
- ut_ad(n == ULINT_UNDEFINED || n <= dict_index_get_n_fields(index));
-
- if (n == ULINT_UNDEFINED) {
- n = dict_index_get_n_fields(index);
- }
-
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- null_mask = 1;
- n_extern = 0;
- i = 0;
-
- /* read the lengths of fields 0..n */
- do {
- const dict_field_t* field
- = dict_index_get_nth_field(index, i);
- const dict_col_t* col
- = dict_field_get_col(field);
- ulint len;
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields. */
- continue;
- }
- null_mask <<= 1;
- }
-
- if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
- len = *lens--;
- /* If the maximum length of the field is up
- to 255 bytes, the actual length is always
- stored in one byte. If the maximum length is
- more than 255 bytes, the actual length is
- stored in one byte for 0..127. The length
- will be encoded in two bytes when it is 128 or
- more, or when the field is stored externally. */
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
- if (len & 0x40) {
- n_extern++;
- }
- lens--;
- }
- }
- }
- } while (++i < n);
-
- return(n_extern);
-}
-
-/******************************************************//**
-Determine the offset to each field in a leaf-page record
-in ROW_FORMAT=COMPACT. This is a special case of
-rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INLINE MY_ATTRIBUTE((nonnull))
-void
-rec_init_offsets_comp_ordinary(
-/*===========================*/
- const rec_t* rec, /*!< in: physical record in
- ROW_FORMAT=COMPACT */
- bool temp, /*!< in: whether to use the
- format for temporary files in
- index creation */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-{
- ulint i = 0;
- ulint offs = 0;
- ulint any_ext = 0;
- ulint n_null = index->n_nullable;
- const byte* nulls = temp
- ? rec - 1
- : rec - (1 + REC_N_NEW_EXTRA_BYTES);
- const byte* lens = nulls - UT_BITS_IN_BYTES(n_null);
- ulint null_mask = 1;
-
-#ifdef UNIV_DEBUG
- /* We cannot invoke rec_offs_make_valid() here if temp=true.
- Similarly, rec_offs_validate() will fail in that case, because
- it invokes rec_get_status(). */
- offsets[2] = (ulint) rec;
- offsets[3] = (ulint) index;
-#endif /* UNIV_DEBUG */
-
- ut_ad(temp || dict_table_is_comp(index->table));
-
- if (temp && dict_table_is_comp(index->table)) {
- /* No need to do adjust fixed_len=0. We only need to
- adjust it for ROW_FORMAT=REDUNDANT. */
- temp = false;
- }
-
- /* read the lengths of fields 0..n */
- do {
- const dict_field_t* field
- = dict_index_get_nth_field(index, i);
- const dict_col_t* col
- = dict_field_get_col(field);
- ulint len;
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
- ut_ad(n_null--);
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields.
- We do not advance offs, and we set
- the length to zero and enable the
- SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
- goto resolved;
- }
- null_mask <<= 1;
- }
-
- if (!field->fixed_len
- || (temp && !dict_col_get_fixed_size(col, temp))) {
- /* Variable-length field: read the length */
- len = *lens--;
- /* If the maximum length of the field is up
- to 255 bytes, the actual length is always
- stored in one byte. If the maximum length is
- more than 255 bytes, the actual length is
- stored in one byte for 0..127. The length
- will be encoded in two bytes when it is 128 or
- more, or when the field is stored externally. */
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype
- == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
- len <<= 8;
- len |= *lens--;
-
- offs += len & 0x3fff;
- if (UNIV_UNLIKELY(len
- & 0x4000)) {
- ut_ad(dict_index_is_clust
- (index));
- any_ext = REC_OFFS_EXTERNAL;
- len = offs
- | REC_OFFS_EXTERNAL;
- } else {
- len = offs;
- }
-
- goto resolved;
- }
- }
-
- len = offs += len;
- } else {
- len = offs += field->fixed_len;
- }
-resolved:
- rec_offs_base(offsets)[i + 1] = len;
- } while (++i < rec_offs_n_fields(offsets));
-
- *rec_offs_base(offsets)
- = (rec - (lens + 1)) | REC_OFFS_COMPACT | any_ext;
-}
-
-/******************************************************//**
-The following function determines the offsets to each field in the
-record. The offsets are written to a previously allocated array of
-ulint, where rec_offs_n_fields(offsets) has been initialized to the
-number of fields in the record. The rest of the array will be
-initialized by this function. rec_offs_base(offsets)[0] will be set
-to the extra size (if REC_OFFS_COMPACT is set, the record is in the
-new format; if REC_OFFS_EXTERNAL is set, the record contains externally
-stored columns), and rec_offs_base(offsets)[1..n_fields] will be set to
-offsets past the end of fields 0..n_fields, or to the beginning of
-fields 1..n_fields+1. When the high-order bit of the offset at [i+1]
-is set (REC_OFFS_SQL_NULL), the field i is NULL. When the second
-high-order bit of the offset at [i+1] is set (REC_OFFS_EXTERNAL), the
-field i is being stored externally. */
-static
-void
-rec_init_offsets(
-/*=============*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-{
- ulint i = 0;
- ulint offs;
-
- rec_offs_make_valid(rec, index, offsets);
-
- if (dict_table_is_comp(index->table)) {
- const byte* nulls;
- const byte* lens;
- dict_field_t* field;
- ulint null_mask;
- ulint status = rec_get_status(rec);
- ulint n_node_ptr_field = ULINT_UNDEFINED;
-
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* the field is 8 bytes long */
- rec_offs_base(offsets)[0]
- = REC_N_NEW_EXTRA_BYTES | REC_OFFS_COMPACT;
- rec_offs_base(offsets)[1] = 8;
- return;
- case REC_STATUS_NODE_PTR:
- n_node_ptr_field
- = dict_index_get_n_unique_in_tree(index);
- break;
- case REC_STATUS_ORDINARY:
- rec_init_offsets_comp_ordinary(
- rec, false, index, offsets);
- return;
- }
-
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- offs = 0;
- null_mask = 1;
-
- /* read the lengths of fields 0..n */
- do {
- ulint len;
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- len = offs += REC_NODE_PTR_SIZE;
- goto resolved;
- }
-
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype
- & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields.
- We do not advance offs, and we set
- the length to zero and enable the
- SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
- goto resolved;
- }
- null_mask <<= 1;
- }
-
- if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
- len = *lens--;
- /* If the maximum length of the field
- is up to 255 bytes, the actual length
- is always stored in one byte. If the
- maximum length is more than 255 bytes,
- the actual length is stored in one
- byte for 0..127. The length will be
- encoded in two bytes when it is 128 or
- more, or when the field is stored
- externally. */
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype
- == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
-
- len <<= 8;
- len |= *lens--;
-
- /* B-tree node pointers
- must not contain externally
- stored columns. Thus
- the "e" flag must be 0. */
- ut_a(!(len & 0x4000));
- offs += len & 0x3fff;
- len = offs;
-
- goto resolved;
- }
- }
-
- len = offs += len;
- } else {
- len = offs += field->fixed_len;
- }
-resolved:
- rec_offs_base(offsets)[i + 1] = len;
- } while (++i < rec_offs_n_fields(offsets));
-
- *rec_offs_base(offsets)
- = (rec - (lens + 1)) | REC_OFFS_COMPACT;
- } else {
- /* Old-style record: determine extra size and end offsets */
- offs = REC_N_OLD_EXTRA_BYTES;
- if (rec_get_1byte_offs_flag(rec)) {
- offs += rec_offs_n_fields(offsets);
- *rec_offs_base(offsets) = offs;
- /* Determine offsets to fields */
- do {
- offs = rec_1_get_field_end_info(rec, i);
- if (offs & REC_1BYTE_SQL_NULL_MASK) {
- offs &= ~REC_1BYTE_SQL_NULL_MASK;
- offs |= REC_OFFS_SQL_NULL;
- }
- rec_offs_base(offsets)[1 + i] = offs;
- } while (++i < rec_offs_n_fields(offsets));
- } else {
- offs += 2 * rec_offs_n_fields(offsets);
- *rec_offs_base(offsets) = offs;
- /* Determine offsets to fields */
- do {
- offs = rec_2_get_field_end_info(rec, i);
- if (offs & REC_2BYTE_SQL_NULL_MASK) {
- offs &= ~REC_2BYTE_SQL_NULL_MASK;
- offs |= REC_OFFS_SQL_NULL;
- }
- if (offs & REC_2BYTE_EXTERN_MASK) {
- offs &= ~REC_2BYTE_EXTERN_MASK;
- offs |= REC_OFFS_EXTERNAL;
- *rec_offs_base(offsets) |= REC_OFFS_EXTERNAL;
- }
- rec_offs_base(offsets)[1 + i] = offs;
- } while (++i < rec_offs_n_fields(offsets));
- }
- }
-}
-
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously returned array.
-@return the new offsets */
-UNIV_INTERN
-ulint*
-rec_get_offsets_func(
-/*=================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets,/*!< in/out: array consisting of
- offsets[0] allocated elements,
- or an array from rec_get_offsets(),
- or NULL */
- ulint n_fields,/*!< in: maximum number of
- initialized fields
- (ULINT_UNDEFINED if all fields) */
-#ifdef UNIV_DEBUG
- const char* file, /*!< in: file name where called */
- ulint line, /*!< in: line number where called */
-#endif /* UNIV_DEBUG */
- mem_heap_t** heap) /*!< in/out: memory heap */
-{
- ulint n;
- ulint size;
-
- ut_ad(rec);
- ut_ad(index);
- ut_ad(heap);
-
- if (dict_table_is_comp(index->table)) {
- switch (UNIV_EXPECT(rec_get_status(rec),
- REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
- n = dict_index_get_n_fields(index);
- break;
- case REC_STATUS_NODE_PTR:
- /* Node pointer records consist of the
- uniquely identifying fields of the record
- followed by a child page number field. */
- n = dict_index_get_n_unique_in_tree(index) + 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record */
- n = 1;
- break;
- default:
- ut_error;
- return(NULL);
- }
- } else {
- n = rec_get_n_fields_old(rec);
- }
-
- if (UNIV_UNLIKELY(n_fields < n)) {
- n = n_fields;
- }
-
- /* The offsets header consists of the allocation size at
- offsets[0] and the REC_OFFS_HEADER_SIZE bytes. */
- size = n + (1 + REC_OFFS_HEADER_SIZE);
-
- if (UNIV_UNLIKELY(!offsets)
- || UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) {
- if (UNIV_UNLIKELY(!*heap)) {
- *heap = mem_heap_create_at(size * sizeof(ulint),
- file, line);
- }
- offsets = static_cast<ulint*>(
- mem_heap_alloc(*heap, size * sizeof(ulint)));
-
- rec_offs_set_n_alloc(offsets, size);
- }
-
- rec_offs_set_n_fields(offsets, n);
- rec_init_offsets(rec, index, offsets);
- return(offsets);
-}
-
-/******************************************************//**
-The following function determines the offsets to each field
-in the record. It can reuse a previously allocated array. */
-UNIV_INTERN
-void
-rec_get_offsets_reverse(
-/*====================*/
- const byte* extra, /*!< in: the extra bytes of a
- compact record in reverse order,
- excluding the fixed-size
- REC_N_NEW_EXTRA_BYTES */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint node_ptr,/*!< in: nonzero=node pointer,
- 0=leaf node */
- ulint* offsets)/*!< in/out: array consisting of
- offsets[0] allocated elements */
-{
- ulint n;
- ulint i;
- ulint offs;
- ulint any_ext;
- const byte* nulls;
- const byte* lens;
- dict_field_t* field;
- ulint null_mask;
- ulint n_node_ptr_field;
-
- ut_ad(extra);
- ut_ad(index);
- ut_ad(offsets);
- ut_ad(dict_table_is_comp(index->table));
-
- if (UNIV_UNLIKELY(node_ptr)) {
- n_node_ptr_field = dict_index_get_n_unique_in_tree(index);
- n = n_node_ptr_field + 1;
- } else {
- n_node_ptr_field = ULINT_UNDEFINED;
- n = dict_index_get_n_fields(index);
- }
-
- ut_a(rec_offs_get_n_alloc(offsets) >= n + (1 + REC_OFFS_HEADER_SIZE));
- rec_offs_set_n_fields(offsets, n);
-
- nulls = extra;
- lens = nulls + UT_BITS_IN_BYTES(index->n_nullable);
- i = offs = 0;
- null_mask = 1;
- any_ext = 0;
-
- /* read the lengths of fields 0..n */
- do {
- ulint len;
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- len = offs += REC_NODE_PTR_SIZE;
- goto resolved;
- }
-
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls++;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- /* No length is stored for NULL fields.
- We do not advance offs, and we set
- the length to zero and enable the
- SQL NULL flag in offsets[]. */
- len = offs | REC_OFFS_SQL_NULL;
- goto resolved;
- }
- null_mask <<= 1;
- }
-
- if (UNIV_UNLIKELY(!field->fixed_len)) {
- /* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
- len = *lens++;
- /* If the maximum length of the field is up
- to 255 bytes, the actual length is always
- stored in one byte. If the maximum length is
- more than 255 bytes, the actual length is
- stored in one byte for 0..127. The length
- will be encoded in two bytes when it is 128 or
- more, or when the field is stored externally. */
- if (UNIV_UNLIKELY(col->len > 255)
- || UNIV_UNLIKELY(col->mtype == DATA_BLOB)) {
- if (len & 0x80) {
- /* 1exxxxxxx xxxxxxxx */
- len <<= 8;
- len |= *lens++;
-
- offs += len & 0x3fff;
- if (UNIV_UNLIKELY(len & 0x4000)) {
- any_ext = REC_OFFS_EXTERNAL;
- len = offs | REC_OFFS_EXTERNAL;
- } else {
- len = offs;
- }
-
- goto resolved;
- }
- }
-
- len = offs += len;
- } else {
- len = offs += field->fixed_len;
- }
-resolved:
- rec_offs_base(offsets)[i + 1] = len;
- } while (++i < rec_offs_n_fields(offsets));
-
- ut_ad(lens >= extra);
- *rec_offs_base(offsets) = (lens - extra + REC_N_NEW_EXTRA_BYTES)
- | REC_OFFS_COMPACT | any_ext;
-}
-
-/************************************************************//**
-The following function is used to get the offset to the nth
-data field in an old-style record.
-@return offset to the field */
-UNIV_INTERN
-ulint
-rec_get_nth_field_offs_old(
-/*=======================*/
- const rec_t* rec, /*!< in: record */
- ulint n, /*!< in: index of the field */
- ulint* len) /*!< out: length of the field;
- UNIV_SQL_NULL if SQL null */
-{
- ulint os;
- ulint next_os;
-
- ut_ad(len);
- ut_a(rec);
- ut_a(n < rec_get_n_fields_old(rec));
-
- if (rec_get_1byte_offs_flag(rec)) {
- os = rec_1_get_field_start_offs(rec, n);
-
- next_os = rec_1_get_field_end_info(rec, n);
-
- if (next_os & REC_1BYTE_SQL_NULL_MASK) {
- *len = UNIV_SQL_NULL;
-
- return(os);
- }
-
- next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK;
- } else {
- os = rec_2_get_field_start_offs(rec, n);
-
- next_os = rec_2_get_field_end_info(rec, n);
-
- if (next_os & REC_2BYTE_SQL_NULL_MASK) {
- *len = UNIV_SQL_NULL;
-
- return(os);
- }
-
- next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
- | REC_2BYTE_EXTERN_MASK);
- }
-
- *len = next_os - os;
-
- ut_ad(*len < UNIV_PAGE_SIZE);
-
- return(os);
-}
-
-/**********************************************************//**
-Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INLINE MY_ATTRIBUTE((warn_unused_result))
-ulint
-rec_get_converted_size_comp_prefix_low(
-/*===================================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra, /*!< out: extra size */
- bool temp) /*!< in: whether this is a
- temporary file record */
-{
- ulint extra_size;
- ulint data_size;
- ulint i;
- ulint n_null = index->n_nullable;
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= dict_index_get_n_fields(index));
- ut_ad(!temp || extra);
-
- extra_size = temp
- ? UT_BITS_IN_BYTES(n_null)
- : REC_N_NEW_EXTRA_BYTES
- + UT_BITS_IN_BYTES(n_null);
- data_size = 0;
-
- if (temp && dict_table_is_comp(index->table)) {
- /* No need to do adjust fixed_len=0. We only need to
- adjust it for ROW_FORMAT=REDUNDANT. */
- temp = false;
- }
-
- /* read the lengths of fields 0..n */
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- ulint len;
- ulint fixed_len;
- const dict_col_t* col;
-
- field = dict_index_get_nth_field(index, i);
- len = dfield_get_len(&fields[i]);
- col = dict_field_get_col(field);
-
- ut_ad(dict_col_type_assert_equal(col,
- dfield_get_type(&fields[i])));
- /* All NULLable fields must be included in the n_null count. */
- ut_ad((col->prtype & DATA_NOT_NULL) || n_null--);
-
- if (dfield_is_null(&fields[i])) {
- /* No length is stored for NULL fields. */
- ut_ad(!(col->prtype & DATA_NOT_NULL));
- continue;
- }
-
- ut_ad(len <= col->len || col->mtype == DATA_BLOB ||
- ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY
- || col->mtype == DATA_VARMYSQL)
- && (col->len == 0
- || len <= col->len)));
-
- fixed_len = field->fixed_len;
- if (temp && fixed_len
- && !dict_col_get_fixed_size(col, temp)) {
- fixed_len = 0;
- }
- /* If the maximum length of a variable-length field
- is up to 255 bytes, the actual length is always stored
- in one byte. If the maximum length is more than 255
- bytes, the actual length is stored in one byte for
- 0..127. The length will be encoded in two bytes when
- it is 128 or more, or when the field is stored externally. */
-
- if (fixed_len) {
-#ifdef UNIV_DEBUG
- ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
- ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
-
- ut_ad(len <= fixed_len);
-
- ut_ad(!mbmaxlen || len >= mbminlen
- * (fixed_len / mbmaxlen));
-
- /* dict_index_add_col() should guarantee this */
- ut_ad(!field->prefix_len
- || fixed_len == field->prefix_len);
-#endif /* UNIV_DEBUG */
- } else if (dfield_is_ext(&fields[i])) {
- ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
- extra_size += 2;
- } else if (len < 128
- || (col->len < 256
- && col->mtype != DATA_BLOB)) {
- extra_size++;
- } else {
- /* For variable-length columns, we look up the
- maximum length from the column itself. If this
- is a prefix index column shorter than 256 bytes,
- this will waste one byte. */
- extra_size += 2;
- }
- data_size += len;
- }
-
- if (extra) {
- *extra = extra_size;
- }
-
- return(extra_size + data_size);
-}
-
-/**********************************************************//**
-Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_comp_prefix(
-/*===============================*/
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
-{
- ut_ad(dict_table_is_comp(index->table));
- return(rec_get_converted_size_comp_prefix_low(
- index, fields, n_fields, extra, false));
-}
-
-/**********************************************************//**
-Determines the size of a data tuple in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_comp(
-/*========================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
- ulint status, /*!< in: status bits of the record */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
-{
- ulint size;
- ut_ad(n_fields > 0);
-
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
- ut_ad(n_fields == dict_index_get_n_fields(index));
- size = 0;
- break;
- case REC_STATUS_NODE_PTR:
- n_fields--;
- ut_ad(n_fields == dict_index_get_n_unique_in_tree(index));
- ut_ad(dfield_get_len(&fields[n_fields]) == REC_NODE_PTR_SIZE);
- size = REC_NODE_PTR_SIZE; /* child page number */
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record, 8 data bytes */
- if (UNIV_LIKELY_NULL(extra)) {
- *extra = REC_N_NEW_EXTRA_BYTES;
- }
- return(REC_N_NEW_EXTRA_BYTES + 8);
- default:
- ut_error;
- return(ULINT_UNDEFINED);
- }
-
- return(size + rec_get_converted_size_comp_prefix_low(
- index, fields, n_fields, extra, false));
-}
-
-/***********************************************************//**
-Sets the value of the ith field SQL null bit of an old-style record. */
-UNIV_INTERN
-void
-rec_set_nth_field_null_bit(
-/*=======================*/
- rec_t* rec, /*!< in: record */
- ulint i, /*!< in: ith field */
- ibool val) /*!< in: value to set */
-{
- ulint info;
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- info = rec_1_get_field_end_info(rec, i);
-
- if (val) {
- info = info | REC_1BYTE_SQL_NULL_MASK;
- } else {
- info = info & ~REC_1BYTE_SQL_NULL_MASK;
- }
-
- rec_1_set_field_end_info(rec, i, info);
-
- return;
- }
-
- info = rec_2_get_field_end_info(rec, i);
-
- if (val) {
- info = info | REC_2BYTE_SQL_NULL_MASK;
- } else {
- info = info & ~REC_2BYTE_SQL_NULL_MASK;
- }
-
- rec_2_set_field_end_info(rec, i, info);
-}
-
-/***********************************************************//**
-Sets an old-style record field to SQL null.
-The physical size of the field is not changed. */
-UNIV_INTERN
-void
-rec_set_nth_field_sql_null(
-/*=======================*/
- rec_t* rec, /*!< in: record */
- ulint n) /*!< in: index of the field */
-{
- ulint offset;
-
- offset = rec_get_field_start_offs(rec, n);
-
- data_write_sql_null(rec + offset, rec_get_nth_field_size(rec, n));
-
- rec_set_nth_field_null_bit(rec, n, TRUE);
-}
-
-/*********************************************************//**
-Builds an old-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
-static
-rec_t*
-rec_convert_dtuple_to_rec_old(
-/*==========================*/
- byte* buf, /*!< in: start address of the physical record */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- const dfield_t* field;
- ulint n_fields;
- ulint data_size;
- rec_t* rec;
- ulint end_offset;
- ulint ored_offset;
- ulint len;
- ulint i;
-
- ut_ad(buf && dtuple);
- ut_ad(dtuple_validate(dtuple));
- ut_ad(dtuple_check_typed(dtuple));
-
- n_fields = dtuple_get_n_fields(dtuple);
- data_size = dtuple_get_data_size(dtuple, 0);
-
- ut_ad(n_fields > 0);
-
- /* Calculate the offset of the origin in the physical record */
-
- rec = buf + rec_get_converted_extra_size(data_size, n_fields, n_ext);
-#ifdef UNIV_DEBUG
- /* Suppress Valgrind warnings of ut_ad()
- in mach_write_to_1(), mach_write_to_2() et al. */
- memset(buf, 0xff, rec - buf + data_size);
-#endif /* UNIV_DEBUG */
- /* Store the number of fields */
- rec_set_n_fields_old(rec, n_fields);
-
- /* Set the info bits of the record */
- rec_set_info_bits_old(rec, dtuple_get_info_bits(dtuple)
- & REC_INFO_BITS_MASK);
-
- /* Store the data and the offsets */
-
- end_offset = 0;
-
- if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) {
-
- rec_set_1byte_offs_flag(rec, TRUE);
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(dtuple, i);
-
- if (dfield_is_null(field)) {
- len = dtype_get_sql_null_size(
- dfield_get_type(field), 0);
- data_write_sql_null(rec + end_offset, len);
-
- end_offset += len;
- ored_offset = end_offset
- | REC_1BYTE_SQL_NULL_MASK;
- } else {
- /* If the data is not SQL null, store it */
- len = dfield_get_len(field);
-
- memcpy(rec + end_offset,
- dfield_get_data(field), len);
-
- end_offset += len;
- ored_offset = end_offset;
- }
-
- rec_1_set_field_end_info(rec, i, ored_offset);
- }
- } else {
- rec_set_1byte_offs_flag(rec, FALSE);
-
- for (i = 0; i < n_fields; i++) {
-
- field = dtuple_get_nth_field(dtuple, i);
-
- if (dfield_is_null(field)) {
- len = dtype_get_sql_null_size(
- dfield_get_type(field), 0);
- data_write_sql_null(rec + end_offset, len);
-
- end_offset += len;
- ored_offset = end_offset
- | REC_2BYTE_SQL_NULL_MASK;
- } else {
- /* If the data is not SQL null, store it */
- len = dfield_get_len(field);
-
- memcpy(rec + end_offset,
- dfield_get_data(field), len);
-
- end_offset += len;
- ored_offset = end_offset;
-
- if (dfield_is_ext(field)) {
- ored_offset |= REC_2BYTE_EXTERN_MASK;
- }
- }
-
- rec_2_set_field_end_info(rec, i, ored_offset);
- }
- }
-
- return(rec);
-}
-
-/*********************************************************//**
-Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INLINE MY_ATTRIBUTE((nonnull))
-void
-rec_convert_dtuple_to_rec_comp(
-/*===========================*/
- rec_t* rec, /*!< in: origin of record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint status, /*!< in: status bits of the record */
- bool temp) /*!< in: whether to use the
- format for temporary files in
- index creation */
-{
- const dfield_t* field;
- const dtype_t* type;
- byte* end;
- byte* nulls;
- byte* lens;
- ulint len;
- ulint i;
- ulint n_node_ptr_field;
- ulint fixed_len;
- ulint null_mask = 1;
- ulint n_null;
-
- ut_ad(temp || dict_table_is_comp(index->table));
- ut_ad(n_fields > 0);
-
- if (temp) {
- ut_ad(status == REC_STATUS_ORDINARY);
- ut_ad(n_fields <= dict_index_get_n_fields(index));
- n_node_ptr_field = ULINT_UNDEFINED;
- nulls = rec - 1;
- if (dict_table_is_comp(index->table)) {
- /* No need to do adjust fixed_len=0. We only
- need to adjust it for ROW_FORMAT=REDUNDANT. */
- temp = false;
- }
- } else {
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
-
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
- ut_ad(n_fields <= dict_index_get_n_fields(index));
- n_node_ptr_field = ULINT_UNDEFINED;
- break;
- case REC_STATUS_NODE_PTR:
- ut_ad(n_fields
- == dict_index_get_n_unique_in_tree(index) + 1);
- n_node_ptr_field = n_fields - 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- ut_ad(n_fields == 1);
- n_node_ptr_field = ULINT_UNDEFINED;
- break;
- default:
- ut_error;
- return;
- }
- }
-
- end = rec;
- n_null = index->n_nullable;
- lens = nulls - UT_BITS_IN_BYTES(n_null);
- /* clear the SQL-null flags */
- memset(lens + 1, 0, nulls - lens);
-
- /* Store the data and the offsets */
-
- for (i = 0, field = fields; i < n_fields; i++, field++) {
- const dict_field_t* ifield;
-
- type = dfield_get_type(field);
- len = dfield_get_len(field);
-
- if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
- ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
- ut_ad(len == REC_NODE_PTR_SIZE);
- memcpy(end, dfield_get_data(field), len);
- end += REC_NODE_PTR_SIZE;
- break;
- }
-
- if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
- /* nullable field */
- ut_ad(n_null--);
-
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- ut_ad(*nulls < null_mask);
-
- /* set the null flag if necessary */
- if (dfield_is_null(field)) {
- *nulls |= null_mask;
- null_mask <<= 1;
- continue;
- }
-
- null_mask <<= 1;
- }
- /* only nullable fields can be null */
- ut_ad(!dfield_is_null(field));
-
- ifield = dict_index_get_nth_field(index, i);
- fixed_len = ifield->fixed_len;
- if (temp && fixed_len
- && !dict_col_get_fixed_size(ifield->col, temp)) {
- fixed_len = 0;
- }
- /* If the maximum length of a variable-length field
- is up to 255 bytes, the actual length is always stored
- in one byte. If the maximum length is more than 255
- bytes, the actual length is stored in one byte for
- 0..127. The length will be encoded in two bytes when
- it is 128 or more, or when the field is stored externally. */
- if (fixed_len) {
-#ifdef UNIV_DEBUG
- ulint mbminlen = DATA_MBMINLEN(
- ifield->col->mbminmaxlen);
- ulint mbmaxlen = DATA_MBMAXLEN(
- ifield->col->mbminmaxlen);
-
- ut_ad(len <= fixed_len);
- ut_ad(!mbmaxlen || len >= mbminlen
- * (fixed_len / mbmaxlen));
- ut_ad(!dfield_is_ext(field));
-#endif /* UNIV_DEBUG */
- } else if (dfield_is_ext(field)) {
- ut_ad(ifield->col->len >= 256
- || ifield->col->mtype == DATA_BLOB);
- ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE);
- *lens-- = (byte) (len >> 8) | 0xc0;
- *lens-- = (byte) len;
- } else {
- ut_ad(len <= dtype_get_len(type)
- || dtype_get_mtype(type) == DATA_BLOB
- || !strcmp(index->name,
- FTS_INDEX_TABLE_IND_NAME));
- if (len < 128
- || (dtype_get_len(type) < 256
- && dtype_get_mtype(type) != DATA_BLOB)) {
-
- *lens-- = (byte) len;
- } else {
- ut_ad(len < 16384);
- *lens-- = (byte) (len >> 8) | 0x80;
- *lens-- = (byte) len;
- }
- }
-
- if (len) {
- memcpy(end, dfield_get_data(field), len);
- end += len;
- }
- }
-}
-
-/*********************************************************//**
-Builds a new-style physical record out of a data tuple and
-stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
-static
-rec_t*
-rec_convert_dtuple_to_rec_new(
-/*==========================*/
- byte* buf, /*!< in: start address of
- the physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple) /*!< in: data tuple */
-{
- ulint extra_size;
- ulint status;
- rec_t* rec;
-
- status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
- rec_get_converted_size_comp(
- index, status, dtuple->fields, dtuple->n_fields, &extra_size);
- rec = buf + extra_size;
-
- rec_convert_dtuple_to_rec_comp(
- rec, index, dtuple->fields, dtuple->n_fields, status, false);
-
- /* Set the info bits of the record */
- rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
-
- return(rec);
-}
-
-/*********************************************************//**
-Builds a physical record out of a data tuple and
-stores it beginning from the start of the given buffer.
-@return pointer to the origin of physical record */
-UNIV_INTERN
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
- byte* buf, /*!< in: start address of the
- physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dtuple_t* dtuple, /*!< in: data tuple */
- ulint n_ext) /*!< in: number of
- externally stored columns */
-{
- rec_t* rec;
-
- ut_ad(buf != NULL);
- ut_ad(index != NULL);
- ut_ad(dtuple != NULL);
- ut_ad(dtuple_validate(dtuple));
- ut_ad(dtuple_check_typed(dtuple));
-
- if (dict_table_is_comp(index->table)) {
- rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple);
- } else {
- rec = rec_convert_dtuple_to_rec_old(buf, dtuple, n_ext);
- }
-
-#ifdef UNIV_DEBUG
- {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- ulint i;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index,
- offsets_, ULINT_UNDEFINED, &heap);
- ut_ad(rec_validate(rec, offsets));
- ut_ad(dtuple_get_n_fields(dtuple)
- == rec_offs_n_fields(offsets));
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- ut_ad(!dfield_is_ext(dtuple_get_nth_field(dtuple, i))
- == !rec_offs_nth_extern(offsets, i));
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-#endif /* UNIV_DEBUG */
- return(rec);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
-@return total size */
-UNIV_INTERN
-ulint
-rec_get_converted_size_temp(
-/*========================*/
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
-{
- return(rec_get_converted_size_comp_prefix_low(
- index, fields, n_fields, extra, true));
-}
-
-/******************************************************//**
-Determine the offset to each field in temporary file.
-@see rec_convert_dtuple_to_temp() */
-UNIV_INTERN
-void
-rec_init_offsets_temp(
-/*==================*/
- const rec_t* rec, /*!< in: temporary file record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-{
- rec_init_offsets_comp_ordinary(rec, true, index, offsets);
-}
-
-/*********************************************************//**
-Builds a temporary file record out of a data tuple.
-@see rec_init_offsets_temp() */
-UNIV_INTERN
-void
-rec_convert_dtuple_to_temp(
-/*=======================*/
- rec_t* rec, /*!< out: record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields) /*!< in: number of fields */
-{
- rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
- REC_STATUS_ORDINARY, true);
-}
-
-/**************************************************************//**
-Copies the first n fields of a physical record to a data tuple. The fields
-are copied to the memory heap. */
-UNIV_INTERN
-void
-rec_copy_prefix_to_dtuple(
-/*======================*/
- dtuple_t* tuple, /*!< out: data tuple */
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index, offsets, n_fields, &heap);
-
- ut_ad(rec_validate(rec, offsets));
- ut_ad(dtuple_check_typed(tuple));
-
- dtuple_set_info_bits(tuple, rec_get_info_bits(
- rec, dict_table_is_comp(index->table)));
-
- for (i = 0; i < n_fields; i++) {
- dfield_t* field;
- const byte* data;
- ulint len;
-
- field = dtuple_get_nth_field(tuple, i);
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- dfield_set_data(field,
- mem_heap_dup(heap, data, len), len);
- ut_ad(!rec_offs_nth_extern(offsets, i));
- } else {
- dfield_set_null(field);
- }
- }
-}
-
-/**************************************************************//**
-Copies the first n fields of an old-style physical record
-to a new physical record in a buffer.
-@return own: copied record */
-static
-rec_t*
-rec_copy_prefix_to_buf_old(
-/*=======================*/
- const rec_t* rec, /*!< in: physical record */
- ulint n_fields, /*!< in: number of fields to copy */
- ulint area_end, /*!< in: end of the prefix data */
- byte** buf, /*!< in/out: memory buffer for
- the copied prefix, or NULL */
- ulint* buf_size) /*!< in/out: buffer size */
-{
- rec_t* copy_rec;
- ulint area_start;
- ulint prefix_len;
-
- if (rec_get_1byte_offs_flag(rec)) {
- area_start = REC_N_OLD_EXTRA_BYTES + n_fields;
- } else {
- area_start = REC_N_OLD_EXTRA_BYTES + 2 * n_fields;
- }
-
- prefix_len = area_start + area_end;
-
- if ((*buf == NULL) || (*buf_size < prefix_len)) {
- if (*buf != NULL) {
- mem_free(*buf);
- }
-
- *buf = static_cast<byte*>(mem_alloc2(prefix_len, buf_size));
- }
-
- ut_memcpy(*buf, rec - area_start, prefix_len);
-
- copy_rec = *buf + area_start;
-
- rec_set_n_fields_old(copy_rec, n_fields);
-
- return(copy_rec);
-}
-
-/**************************************************************//**
-Copies the first n fields of a physical record to a new physical record in
-a buffer.
-@return own: copied record */
-UNIV_INTERN
-rec_t*
-rec_copy_prefix_to_buf(
-/*===================*/
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint n_fields, /*!< in: number of fields
- to copy */
- byte** buf, /*!< in/out: memory buffer
- for the copied prefix,
- or NULL */
- ulint* buf_size) /*!< in/out: buffer size */
-{
- const byte* nulls;
- const byte* lens;
- ulint i;
- ulint prefix_len;
- ulint null_mask;
- ulint status;
-
- UNIV_PREFETCH_RW(*buf);
-
- if (!dict_table_is_comp(index->table)) {
- ut_ad(rec_validate_old(rec));
- return(rec_copy_prefix_to_buf_old(
- rec, n_fields,
- rec_get_field_start_offs(rec, n_fields),
- buf, buf_size));
- }
-
- status = rec_get_status(rec);
-
- switch (status) {
- case REC_STATUS_ORDINARY:
- ut_ad(n_fields <= dict_index_get_n_fields(index));
- break;
- case REC_STATUS_NODE_PTR:
- /* it doesn't make sense to copy the child page number field */
- ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index));
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- /* infimum or supremum record: no sense to copy anything */
- default:
- ut_error;
- return(NULL);
- }
-
- nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
- lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
- UNIV_PREFETCH_R(lens);
- prefix_len = 0;
- null_mask = 1;
-
- /* read the lengths of fields 0..n */
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- const dict_col_t* col;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
-
- if (!(col->prtype & DATA_NOT_NULL)) {
- /* nullable field => read the null flag */
- if (UNIV_UNLIKELY(!(byte) null_mask)) {
- nulls--;
- null_mask = 1;
- }
-
- if (*nulls & null_mask) {
- null_mask <<= 1;
- continue;
- }
-
- null_mask <<= 1;
- }
-
- if (field->fixed_len) {
- prefix_len += field->fixed_len;
- } else {
- ulint len = *lens--;
- /* If the maximum length of the column is up
- to 255 bytes, the actual length is always
- stored in one byte. If the maximum length is
- more than 255 bytes, the actual length is
- stored in one byte for 0..127. The length
- will be encoded in two bytes when it is 128 or
- more, or when the column is stored externally. */
- if (col->len > 255 || col->mtype == DATA_BLOB) {
- if (len & 0x80) {
- /* 1exxxxxx */
- len &= 0x3f;
- len <<= 8;
- len |= *lens--;
- UNIV_PREFETCH_R(lens);
- }
- }
- prefix_len += len;
- }
- }
-
- UNIV_PREFETCH_R(rec + prefix_len);
-
- prefix_len += rec - (lens + 1);
-
- if ((*buf == NULL) || (*buf_size < prefix_len)) {
- if (*buf != NULL) {
- mem_free(*buf);
- }
-
- *buf = static_cast<byte*>(mem_alloc2(prefix_len, buf_size));
- }
-
- memcpy(*buf, lens + 1, prefix_len);
-
- return(*buf + (rec - (lens + 1)));
-}
-#endif /* UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Validates the consistency of an old-style physical record.
-@return TRUE if ok */
-static
-ibool
-rec_validate_old(
-/*=============*/
- const rec_t* rec) /*!< in: physical record */
-{
- const byte* data;
- ulint len;
- ulint n_fields;
- ulint len_sum = 0;
- ulint sum = 0;
- ulint i;
-
- ut_a(rec);
- n_fields = rec_get_n_fields_old(rec);
-
- if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
- (ulong) n_fields);
- return(FALSE);
- }
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field_old(rec, i, &len);
-
- if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
- fprintf(stderr,
- "InnoDB: Error: record field %lu len %lu\n",
- (ulong) i,
- (ulong) len);
- return(FALSE);
- }
-
- if (len != UNIV_SQL_NULL) {
- len_sum += len;
- sum += *(data + len -1); /* dereference the
- end of the field to
- cause a memory trap
- if possible */
- } else {
- len_sum += rec_get_nth_field_size(rec, i);
- }
- }
-
- if (len_sum != rec_get_data_size_old(rec)) {
- fprintf(stderr,
- "InnoDB: Error: record len should be %lu, len %lu\n",
- (ulong) len_sum,
- rec_get_data_size_old(rec));
- return(FALSE);
- }
-
- rec_dummy = sum; /* This is here only to fool the compiler */
-
- return(TRUE);
-}
-
-/***************************************************************//**
-Validates the consistency of a physical record.
-@return TRUE if ok */
-UNIV_INTERN
-ibool
-rec_validate(
-/*=========*/
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- const byte* data;
- ulint len;
- ulint n_fields;
- ulint len_sum = 0;
- ulint sum = 0;
- ulint i;
-
- ut_a(rec);
- n_fields = rec_offs_n_fields(offsets);
-
- if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) {
- fprintf(stderr, "InnoDB: Error: record has %lu fields\n",
- (ulong) n_fields);
- return(FALSE);
- }
-
- ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec));
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) {
- fprintf(stderr,
- "InnoDB: Error: record field %lu len %lu\n",
- (ulong) i,
- (ulong) len);
- return(FALSE);
- }
-
- if (len != UNIV_SQL_NULL) {
- len_sum += len;
- sum += *(data + len -1); /* dereference the
- end of the field to
- cause a memory trap
- if possible */
- } else if (!rec_offs_comp(offsets)) {
- len_sum += rec_get_nth_field_size(rec, i);
- }
- }
-
- if (len_sum != rec_offs_data_size(offsets)) {
- fprintf(stderr,
- "InnoDB: Error: record len should be %lu, len %lu\n",
- (ulong) len_sum,
- (ulong) rec_offs_data_size(offsets));
- return(FALSE);
- }
-
- rec_dummy = sum; /* This is here only to fool the compiler */
-
- if (!rec_offs_comp(offsets)) {
- ut_a(rec_validate_old(rec));
- }
-
- return(TRUE);
-}
-
-/***************************************************************//**
-Prints an old-style physical record. */
-UNIV_INTERN
-void
-rec_print_old(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec) /*!< in: physical record */
-{
- const byte* data;
- ulint len;
- ulint n;
- ulint i;
-
- ut_ad(rec);
-
- n = rec_get_n_fields_old(rec);
-
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " %u-byte offsets; info bits %lu\n",
- (ulong) n,
- rec_get_1byte_offs_flag(rec) ? 1 : 2,
- (ulong) rec_get_info_bits(rec, FALSE));
-
- for (i = 0; i < n; i++) {
-
- data = rec_get_nth_field_old(rec, i, &len);
-
- fprintf(file, " %lu:", (ulong) i);
-
- if (len != UNIV_SQL_NULL) {
- if (len <= 30) {
-
- ut_print_buf(file, data, len);
- } else {
- ut_print_buf(file, data, 30);
-
- fprintf(file, " (total %lu bytes)",
- (ulong) len);
- }
- } else {
- fprintf(file, " SQL NULL, size %lu ",
- rec_get_nth_field_size(rec, i));
- }
-
- putc(';', file);
- putc('\n', file);
- }
-
- rec_validate_old(rec);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Prints a physical record in ROW_FORMAT=COMPACT. Ignores the
-record header. */
-UNIV_INTERN
-void
-rec_print_comp(
-/*===========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint i;
-
- for (i = 0; i < rec_offs_n_fields(offsets); i++) {
- const byte* data;
- ulint len;
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- fprintf(file, " %lu:", (ulong) i);
-
- if (len != UNIV_SQL_NULL) {
- if (len <= 30) {
-
- ut_print_buf(file, data, len);
- } else if (rec_offs_nth_extern(offsets, i)) {
- ut_print_buf(file, data, 30);
- fprintf(file, " (total %lu bytes, external)",
- (ulong) len);
- ut_print_buf(file, data + len
- - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- } else {
- ut_print_buf(file, data, 30);
-
- fprintf(file, " (total %lu bytes)",
- (ulong) len);
- }
- } else {
- fputs(" SQL NULL", file);
- }
- putc(';', file);
- putc('\n', file);
- }
-}
-
-/***************************************************************//**
-Prints a physical record. */
-UNIV_INTERN
-void
-rec_print_new(
-/*==========*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec);
- ut_ad(offsets);
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (!rec_offs_comp(offsets)) {
- rec_print_old(file, rec);
- return;
- }
-
- fprintf(file, "PHYSICAL RECORD: n_fields %lu;"
- " compact format; info bits %lu\n",
- (ulong) rec_offs_n_fields(offsets),
- (ulong) rec_get_info_bits(rec, TRUE));
-
- rec_print_comp(file, rec, offsets);
- rec_validate(rec, offsets);
-}
-
-/***************************************************************//**
-Prints a physical record. */
-UNIV_INTERN
-void
-rec_print(
-/*======*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- const dict_index_t* index) /*!< in: record descriptor */
-{
- ut_ad(index);
-
- if (!dict_table_is_comp(index->table)) {
- rec_print_old(file, rec);
- return;
- } else {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- rec_print_new(file, rec,
- rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef WITH_WSREP
-int
-wsrep_rec_get_foreign_key(
- byte *buf, /* out: extracted key */
- ulint *buf_len, /* in/out: length of buf */
- const rec_t* rec, /* in: physical record */
- dict_index_t* index_for, /* in: index in foreign table */
- dict_index_t* index_ref, /* in: index in referenced table */
- ibool new_protocol) /* in: protocol > 1 */
-{
- const byte* data;
- ulint len;
- ulint key_len = 0;
- ulint i;
- uint key_parts;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
-
- ut_ad(index_for);
- ut_ad(index_ref);
-
- rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index_for, offsets_,
- ULINT_UNDEFINED, &heap);
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- ut_ad(rec);
-
- key_parts = dict_index_get_n_unique_in_tree(index_for);
- for (i = 0;
- i < key_parts &&
- (index_for->type & DICT_CLUSTERED || i < key_parts - 1);
- i++) {
- dict_field_t* field_f =
- dict_index_get_nth_field(index_for, i);
- const dict_col_t* col_f = dict_field_get_col(field_f);
- dict_field_t* field_r =
- dict_index_get_nth_field(index_ref, i);
- const dict_col_t* col_r = dict_field_get_col(field_r);
-
- data = rec_get_nth_field(rec, offsets, i, &len);
- if (key_len + ((len != UNIV_SQL_NULL) ? len + 1 : 1) >
- *buf_len) {
- fprintf (stderr,
- "WSREP: FK key len exceeded %lu %lu %lu\n",
- key_len, len, *buf_len);
- goto err_out;
- }
-
- if (len == UNIV_SQL_NULL) {
- ut_a(!(col_f->prtype & DATA_NOT_NULL));
- *buf++ = 1;
- key_len++;
- } else if (!new_protocol) {
- if (!(col_r->prtype & DATA_NOT_NULL)) {
- *buf++ = 0;
- key_len++;
- }
- memcpy(buf, data, len);
- *buf_len = wsrep_innobase_mysql_sort(
- (int)(col_f->prtype & DATA_MYSQL_TYPE_MASK),
- (uint)dtype_get_charset_coll(col_f->prtype),
- buf, len, *buf_len);
- } else { /* new protocol */
- if (!(col_r->prtype & DATA_NOT_NULL)) {
- *buf++ = 0;
- key_len++;
- }
- switch (col_f->mtype) {
- case DATA_INT: {
- byte* ptr = buf+len;
- for (;;) {
- ptr--;
- *ptr = *data;
- if (ptr == buf) {
- break;
- }
- data++;
- }
-
- if (!(col_f->prtype & DATA_UNSIGNED)) {
- buf[len-1] = (byte) (buf[len-1] ^ 128);
- }
-
- break;
- }
- case DATA_VARCHAR:
- case DATA_VARMYSQL:
- case DATA_CHAR:
- case DATA_MYSQL:
- /* Copy the actual data */
- ut_memcpy(buf, data, len);
- len = wsrep_innobase_mysql_sort(
- (int)
- (col_f->prtype & DATA_MYSQL_TYPE_MASK),
- (uint)
- dtype_get_charset_coll(col_f->prtype),
- buf, len, *buf_len);
- break;
- case DATA_BLOB:
- case DATA_BINARY:
- memcpy(buf, data, len);
- break;
- default:
- break;
- }
-
- key_len += len;
- buf += len;
- }
- }
-
- rec_validate(rec, offsets);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- *buf_len = key_len;
- return DB_SUCCESS;
-
- err_out:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return DB_ERROR;
-}
-#endif /* WITH_WSREP */
-
-# ifdef UNIV_DEBUG
-/************************************************************//**
-Reads the DB_TRX_ID of a clustered index record.
-@return the value of DB_TRX_ID */
-UNIV_INTERN
-trx_id_t
-rec_get_trx_id(
-/*===========*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index) /*!< in: clustered index */
-{
- const page_t* page
- = page_align(rec);
- ulint trx_id_col
- = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
- const byte* trx_id;
- ulint len;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
- ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
- == index->id);
- ut_ad(dict_index_is_clust(index));
- ut_ad(trx_id_col > 0);
- ut_ad(trx_id_col != ULINT_UNDEFINED);
-
- offsets = rec_get_offsets(rec, index, offsets, trx_id_col + 1, &heap);
-
- trx_id = rec_get_nth_field(rec, offsets, trx_id_col, &len);
-
- ut_ad(len == DATA_TRX_ID_LEN);
-
- if (heap) {
- mem_heap_free(heap);
- }
-
- return(trx_read_trx_id(trx_id));
-}
-#endif /* UNIV_DEBUG */
-
diff --git a/storage/xtradb/row/row0ext.cc b/storage/xtradb/row/row0ext.cc
deleted file mode 100644
index ad852577ad2..00000000000
--- a/storage/xtradb/row/row0ext.cc
+++ /dev/null
@@ -1,143 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0ext.cc
-Caching of externally stored column prefixes
-
-Created September 2006 Marko Makela
-*******************************************************/
-
-#include "row0ext.h"
-
-#ifdef UNIV_NONINL
-#include "row0ext.ic"
-#endif
-
-#include "btr0cur.h"
-
-/********************************************************************//**
-Fills the column prefix cache of an externally stored column. */
-static
-void
-row_ext_cache_fill(
-/*===============*/
- row_ext_t* ext, /*!< in/out: column prefix cache */
- ulint i, /*!< in: index of ext->ext[] */
- ulint zip_size,/*!< compressed page size in bytes, or 0 */
- const dfield_t* dfield) /*!< in: data field */
-{
- const byte* field = static_cast<const byte*>(
- dfield_get_data(dfield));
- ulint f_len = dfield_get_len(dfield);
- byte* buf = ext->buf + i * ext->max_len;
-
- ut_ad(ext->max_len > 0);
- ut_ad(i < ext->n_ext);
- ut_ad(dfield_is_ext(dfield));
- ut_a(f_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- if (UNIV_UNLIKELY(!memcmp(field_ref_zero,
- field + f_len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE))) {
- /* The BLOB pointer is not set: we cannot fetch it */
- ext->len[i] = 0;
- } else {
- if (ext->max_len == REC_VERSION_56_MAX_INDEX_COL_LEN
- && f_len > BTR_EXTERN_FIELD_REF_SIZE) {
- /* In this case, the field is in B format or beyond,
- (refer to the definition of row_ext_t.max_len)
- and the field is already fill with prefix, otherwise
- f_len would be BTR_EXTERN_FIELD_REF_SIZE.
- So there is no need to re-read the prefix externally,
- but just copy the local prefix to buf. Please note
- if the ext->len[i] is zero, it means an error
- as above. */
- memcpy(buf, field, f_len - BTR_EXTERN_FIELD_REF_SIZE);
- ext->len[i] = f_len - BTR_EXTERN_FIELD_REF_SIZE;
- } else {
- /* Fetch at most ext->max_len of the column.
- The column should be non-empty. However,
- trx_rollback_or_clean_all_recovered() may try to
- access a half-deleted BLOB if the server previously
- crashed during the execution of
- btr_free_externally_stored_field(). */
- ext->len[i] = btr_copy_externally_stored_field_prefix(
- buf, ext->max_len, zip_size, field, f_len,
- NULL);
- }
- }
-}
-
-/********************************************************************//**
-Creates a cache of column prefixes of externally stored columns.
-@return own: column prefix cache */
-UNIV_INTERN
-row_ext_t*
-row_ext_create(
-/*===========*/
- ulint n_ext, /*!< in: number of externally stored columns */
- const ulint* ext, /*!< in: col_no's of externally stored columns
- in the InnoDB table object, as reported by
- dict_col_get_no(); NOT relative to the records
- in the clustered index */
- ulint flags, /*!< in: table->flags */
- const dtuple_t* tuple, /*!< in: data tuple containing the field
- references of the externally stored
- columns; must be indexed by col_no;
- the clustered index record must be
- covered by a lock or a page latch
- to prevent deletion (rollback or purge). */
- mem_heap_t* heap) /*!< in: heap where created */
-{
- ulint i;
- ulint zip_size = dict_tf_get_zip_size(flags);
-
- row_ext_t* ret;
-
- ut_ad(n_ext > 0);
-
- ret = static_cast<row_ext_t*>(
- mem_heap_alloc(heap,
- (sizeof *ret) + (n_ext - 1) * sizeof ret->len));
-
- ut_ad(ut_is_2pow(zip_size));
- ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
-
- ret->n_ext = n_ext;
- ret->ext = ext;
- ret->max_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
-
- ret->buf = static_cast<byte*>(
- mem_heap_alloc(heap, n_ext * ret->max_len));
-
-#ifdef UNIV_DEBUG
- memset(ret->buf, 0xaa, n_ext * ret->max_len);
- UNIV_MEM_ALLOC(ret->buf, n_ext * ret->max_len);
-#endif
-
- /* Fetch the BLOB prefixes */
- for (i = 0; i < n_ext; i++) {
- const dfield_t* dfield;
-
- dfield = dtuple_get_nth_field(tuple, ext[i]);
- row_ext_cache_fill(ret, i, zip_size, dfield);
- }
-
- return(ret);
-}
diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc
deleted file mode 100644
index 7ffcc59dc5f..00000000000
--- a/storage/xtradb/row/row0ftsort.cc
+++ /dev/null
@@ -1,1662 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0ftsort.cc
-Create Full Text Index with (parallel) merge sort
-
-Created 10/13/2010 Jimmy Yang
-*******************************************************/
-
-#include "dict0dict.h" /* dict_table_stats_lock() */
-#include "row0merge.h"
-#include "pars0pars.h"
-#include "row0ftsort.h"
-#include "row0merge.h"
-#include "row0row.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-
-/** Read the next record to buffer N.
-@param N index into array of merge info structure */
-#define ROW_MERGE_READ_GET_NEXT(N) \
- do { \
- b[N] = row_merge_read_rec( \
- block[N], buf[N], b[N], index, \
- fd[N], &foffs[N], &mrec[N], offsets[N], \
- crypt_data, crypt_block[N], space); \
- if (UNIV_UNLIKELY(!b[N])) { \
- if (mrec[N]) { \
- goto exit; \
- } \
- } \
- } while (0)
-
-/** Parallel sort degree */
-UNIV_INTERN ulong fts_sort_pll_degree = 2;
-
-/*********************************************************************//**
-Create a temporary "fts sort index" used to merge sort the
-tokenized doc string. The index has three "fields":
-
-1) Tokenized word,
-2) Doc ID (depend on number of records to sort, it can be a 4 bytes or 8 bytes
-integer value)
-3) Word's position in original doc.
-
-@see fts_create_one_index_table()
-
-@return dict_index_t structure for the fts sort index */
-UNIV_INTERN
-dict_index_t*
-row_merge_create_fts_sort_index(
-/*============================*/
- dict_index_t* index, /*!< in: Original FTS index
- based on which this sort index
- is created */
- const dict_table_t* table, /*!< in: table that FTS index
- is being created on */
- ibool* opt_doc_id_size)
- /*!< out: whether to use 4 bytes
- instead of 8 bytes integer to
- store Doc ID during sort */
-{
- dict_index_t* new_index;
- dict_field_t* field;
- dict_field_t* idx_field;
- CHARSET_INFO* charset;
-
- // FIXME: This name shouldn't be hard coded here.
- new_index = dict_mem_index_create(
- index->table->name, "tmp_fts_idx", 0, DICT_FTS, 3);
-
- new_index->id = index->id;
- new_index->table = (dict_table_t*) table;
- new_index->n_uniq = FTS_NUM_FIELDS_SORT;
- new_index->n_def = FTS_NUM_FIELDS_SORT;
- new_index->cached = TRUE;
-
- btr_search_index_init(new_index);
-
- idx_field = dict_index_get_nth_field(index, 0);
- charset = fts_index_get_charset(index);
-
- /* The first field is on the Tokenized Word */
- field = dict_index_get_nth_field(new_index, 0);
- field->name = NULL;
- field->prefix_len = 0;
- field->col = static_cast<dict_col_t*>(
- mem_heap_alloc(new_index->heap, sizeof(dict_col_t)));
- field->col->prtype = idx_field->col->prtype | DATA_NOT_NULL;
- field->col->mtype = charset == &my_charset_latin1
- ? DATA_VARCHAR : DATA_VARMYSQL;
- field->col->mbminmaxlen = idx_field->col->mbminmaxlen;
- field->col->len = HA_FT_MAXCHARLEN * DATA_MBMAXLEN(field->col->mbminmaxlen);
-
- field->fixed_len = 0;
-
- /* Doc ID */
- field = dict_index_get_nth_field(new_index, 1);
- field->name = NULL;
- field->prefix_len = 0;
- field->col = static_cast<dict_col_t*>(
- mem_heap_alloc(new_index->heap, sizeof(dict_col_t)));
- field->col->mtype = DATA_INT;
- *opt_doc_id_size = FALSE;
-
- /* Check whether we can use 4 bytes instead of 8 bytes integer
- field to hold the Doc ID, thus reduce the overall sort size */
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
- /* If Doc ID column is being added by this create
- index, then just check the number of rows in the table */
- if (dict_table_get_n_rows(table) < MAX_DOC_ID_OPT_VAL) {
- *opt_doc_id_size = TRUE;
- }
- } else {
- doc_id_t max_doc_id;
-
- /* If the Doc ID column is supplied by user, then
- check the maximum Doc ID in the table */
- max_doc_id = fts_get_max_doc_id((dict_table_t*) table);
-
- if (max_doc_id && max_doc_id < MAX_DOC_ID_OPT_VAL) {
- *opt_doc_id_size = TRUE;
- }
- }
-
- if (*opt_doc_id_size) {
- field->col->len = sizeof(ib_uint32_t);
- field->fixed_len = sizeof(ib_uint32_t);
- } else {
- field->col->len = FTS_DOC_ID_LEN;
- field->fixed_len = FTS_DOC_ID_LEN;
- }
-
- field->col->prtype = DATA_NOT_NULL | DATA_BINARY_TYPE;
-
- field->col->mbminmaxlen = 0;
-
- /* The third field is on the word's position in the original doc */
- field = dict_index_get_nth_field(new_index, 2);
- field->name = NULL;
- field->prefix_len = 0;
- field->col = static_cast<dict_col_t*>(
- mem_heap_alloc(new_index->heap, sizeof(dict_col_t)));
- field->col->mtype = DATA_INT;
- field->col->len = 4 ;
- field->fixed_len = 4;
- field->col->prtype = DATA_NOT_NULL;
- field->col->mbminmaxlen = 0;
-
- return(new_index);
-}
-/*********************************************************************//**
-Initialize FTS parallel sort structures.
-@return TRUE if all successful */
-UNIV_INTERN
-ibool
-row_fts_psort_info_init(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- row_merge_dup_t* dup, /*!< in,own: descriptor of
- FTS index being created */
- const dict_table_t* new_table,/*!< in: table on which indexes are
- created */
- ibool opt_doc_id_size,
- /*!< in: whether to use 4 bytes
- instead of 8 bytes integer to
- store Doc ID during sort */
- fts_psort_t** psort, /*!< out: parallel sort info to be
- instantiated */
- fts_psort_t** merge) /*!< out: parallel merge info
- to be instantiated */
-{
- ulint i;
- ulint j;
- fts_psort_common_t* common_info = NULL;
- fts_psort_t* psort_info = NULL;
- fts_psort_t* merge_info = NULL;
- ulint block_size;
- ibool ret = TRUE;
- fil_space_crypt_t* crypt_data = NULL;
- bool encrypted = false;
-
- block_size = 3 * srv_sort_buf_size;
-
- *psort = psort_info = static_cast<fts_psort_t*>(mem_zalloc(
- fts_sort_pll_degree * sizeof *psort_info));
-
- if (!psort_info) {
- ut_free(dup);
- return(FALSE);
- }
-
- /* Common Info for all sort threads */
- common_info = static_cast<fts_psort_common_t*>(
- mem_alloc(sizeof *common_info));
-
- if (!common_info) {
- ut_free(dup);
- mem_free(psort_info);
- return(FALSE);
- }
-
- common_info->dup = dup;
- common_info->new_table = (dict_table_t*) new_table;
- common_info->trx = trx;
- common_info->all_info = psort_info;
- common_info->sort_event = os_event_create();
- common_info->merge_event = os_event_create();
- common_info->opt_doc_id_size = opt_doc_id_size;
-
- /* Theoretically the tablespace can be dropped straight away.
- In practice, the DDL completion will wait for this thread to
- finish. */
- if (fil_space_t* space = fil_space_acquire(new_table->space)) {
- crypt_data = space->crypt_data;
- fil_space_release(space);
- }
-
- if (crypt_data && crypt_data->should_encrypt()) {
- common_info->crypt_data = crypt_data;
- encrypted = true;
- } else {
- /* Not needed */
- common_info->crypt_data = NULL;
- crypt_data = NULL;
- }
-
- ut_ad(trx->mysql_thd != NULL);
- const char* path = thd_innodb_tmpdir(trx->mysql_thd);
-
- /* There will be FTS_NUM_AUX_INDEX number of "sort buckets" for
- each parallel sort thread. Each "sort bucket" holds records for
- a particular "FTS index partition" */
- for (j = 0; j < fts_sort_pll_degree; j++) {
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
-
- psort_info[j].merge_file[i] =
- static_cast<merge_file_t*>(
- mem_zalloc(sizeof(merge_file_t)));
-
- if (!psort_info[j].merge_file[i]) {
- ret = FALSE;
- goto func_exit;
- }
-
- psort_info[j].merge_buf[i] = row_merge_buf_create(
- dup->index);
-
- if (row_merge_file_create(psort_info[j].merge_file[i],
- path) < 0) {
- goto func_exit;
- }
-
- /* Need to align memory for O_DIRECT write */
- psort_info[j].block_alloc[i] =
- static_cast<row_merge_block_t*>(ut_malloc(
- block_size + 1024));
-
- psort_info[j].merge_block[i] =
- static_cast<row_merge_block_t*>(
- ut_align(
- psort_info[j].block_alloc[i], 1024));
-
- /* If tablespace is encrypted, allocate additional buffer for
- encryption/decryption. */
- if (encrypted) {
-
- /* Need to align memory for O_DIRECT write */
- psort_info[j].crypt_alloc[i] =
- static_cast<row_merge_block_t*>(ut_malloc(
- block_size + 1024));
-
- psort_info[j].crypt_block[i] =
- static_cast<row_merge_block_t*>(
- ut_align(
- psort_info[j].crypt_alloc[i], 1024));
-
- if (!psort_info[j].crypt_block[i]) {
- ret = FALSE;
- goto func_exit;
- }
- } else {
- psort_info[j].crypt_alloc[i] = NULL;
- psort_info[j].crypt_block[i] = NULL;
- }
-
- if (!psort_info[j].merge_block[i]) {
- ret = FALSE;
- goto func_exit;
- }
- }
-
- psort_info[j].child_status = 0;
- psort_info[j].state = 0;
- psort_info[j].psort_common = common_info;
- psort_info[j].error = DB_SUCCESS;
- psort_info[j].memory_used = 0;
- mutex_create(fts_pll_tokenize_mutex_key, &psort_info[j].mutex, SYNC_FTS_TOKENIZE);
- }
-
- /* Initialize merge_info structures parallel merge and insert
- into auxiliary FTS tables (FTS_INDEX_TABLE) */
- *merge = merge_info = static_cast<fts_psort_t*>(
- mem_alloc(FTS_NUM_AUX_INDEX * sizeof *merge_info));
-
- for (j = 0; j < FTS_NUM_AUX_INDEX; j++) {
-
- merge_info[j].child_status = 0;
- merge_info[j].state = 0;
- merge_info[j].psort_common = common_info;
- }
-
-func_exit:
- if (!ret) {
- row_fts_psort_info_destroy(psort_info, merge_info);
- }
-
- return(ret);
-}
-/*********************************************************************//**
-Clean up and deallocate FTS parallel sort structures, and close the
-merge sort files */
-UNIV_INTERN
-void
-row_fts_psort_info_destroy(
-/*=======================*/
- fts_psort_t* psort_info, /*!< parallel sort info */
- fts_psort_t* merge_info) /*!< parallel merge info */
-{
- ulint i;
- ulint j;
-
- if (psort_info) {
- for (j = 0; j < fts_sort_pll_degree; j++) {
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- if (psort_info[j].merge_file[i]) {
- row_merge_file_destroy(
- psort_info[j].merge_file[i]);
- }
-
- if (psort_info[j].block_alloc[i]) {
- ut_free(psort_info[j].block_alloc[i]);
- }
-
- if (psort_info[j].crypt_alloc[i]) {
- ut_free(psort_info[j].crypt_alloc[i]);
- }
-
- mem_free(psort_info[j].merge_file[i]);
- }
-
- mutex_free(&psort_info[j].mutex);
- }
-
- os_event_free(merge_info[0].psort_common->sort_event);
- os_event_free(merge_info[0].psort_common->merge_event);
- ut_free(merge_info[0].psort_common->dup);
- mem_free(merge_info[0].psort_common);
- mem_free(psort_info);
- }
-
- if (merge_info) {
- mem_free(merge_info);
- }
-}
-/*********************************************************************//**
-Free up merge buffers when merge sort is done */
-UNIV_INTERN
-void
-row_fts_free_pll_merge_buf(
-/*=======================*/
- fts_psort_t* psort_info) /*!< in: parallel sort info */
-{
- ulint j;
- ulint i;
-
- if (!psort_info) {
- return;
- }
-
- for (j = 0; j < fts_sort_pll_degree; j++) {
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- row_merge_buf_free(psort_info[j].merge_buf[i]);
- }
- }
-
- return;
-}
-
-/*********************************************************************//**
-Tokenize incoming text data and add to the sort buffer.
-@see row_merge_buf_encode()
-@return TRUE if the record passed, FALSE if out of space */
-static
-ibool
-row_merge_fts_doc_tokenize(
-/*=======================*/
- row_merge_buf_t** sort_buf, /*!< in/out: sort buffer */
- doc_id_t doc_id, /*!< in: Doc ID */
- fts_doc_t* doc, /*!< in: Doc to be tokenized */
- merge_file_t** merge_file, /*!< in/out: merge file */
- ibool opt_doc_id_size,/*!< in: whether to use 4 bytes
- instead of 8 bytes integer to
- store Doc ID during sort*/
- fts_tokenize_ctx_t* t_ctx) /*!< in/out: tokenize context */
-{
- ulint i;
- ulint inc;
- fts_string_t str;
- ulint len;
- row_merge_buf_t* buf;
- dfield_t* field;
- fts_string_t t_str;
- ibool buf_full = FALSE;
- byte str_buf[FTS_MAX_WORD_LEN + 1];
- ulint data_size[FTS_NUM_AUX_INDEX];
- ulint n_tuple[FTS_NUM_AUX_INDEX];
-
- t_str.f_n_char = 0;
- t_ctx->buf_used = 0;
-
- memset(n_tuple, 0, FTS_NUM_AUX_INDEX * sizeof(ulint));
- memset(data_size, 0, FTS_NUM_AUX_INDEX * sizeof(ulint));
-
- /* Tokenize the data and add each word string, its corresponding
- doc id and position to sort buffer */
- for (i = t_ctx->processed_len; i < doc->text.f_len; i += inc) {
- ib_rbt_bound_t parent;
- ulint idx = 0;
- ib_uint32_t position;
- ulint offset = 0;
- ulint cur_len;
- doc_id_t write_doc_id;
-
- inc = innobase_mysql_fts_get_token(
- doc->charset, doc->text.f_str + i,
- doc->text.f_str + doc->text.f_len, &str, &offset);
-
- ut_a(inc > 0);
-
- /* Ignore string whose character number is less than
- "fts_min_token_size" or more than "fts_max_token_size" */
- if (str.f_n_char < fts_min_token_size
- || str.f_n_char > fts_max_token_size) {
-
- t_ctx->processed_len += inc;
- continue;
- }
-
- t_str.f_len = innobase_fts_casedn_str(
- doc->charset, (char*) str.f_str, str.f_len,
- (char*) &str_buf, FTS_MAX_WORD_LEN + 1);
-
- t_str.f_str = (byte*) &str_buf;
-
- /* if "cached_stopword" is defined, ingore words in the
- stopword list */
- if (t_ctx->cached_stopword
- && rbt_search(t_ctx->cached_stopword,
- &parent, &t_str) == 0) {
-
- t_ctx->processed_len += inc;
- continue;
- }
-
- /* There are FTS_NUM_AUX_INDEX auxiliary tables, find
- out which sort buffer to put this word record in */
- t_ctx->buf_used = fts_select_index(
- doc->charset, t_str.f_str, t_str.f_len);
-
- buf = sort_buf[t_ctx->buf_used];
-
- ut_a(t_ctx->buf_used < FTS_NUM_AUX_INDEX);
- idx = t_ctx->buf_used;
-
- mtuple_t* mtuple = &buf->tuples[buf->n_tuples + n_tuple[idx]];
-
- field = mtuple->fields = static_cast<dfield_t*>(
- mem_heap_alloc(buf->heap,
- FTS_NUM_FIELDS_SORT * sizeof *field));
-
- /* The first field is the tokenized word */
- dfield_set_data(field, t_str.f_str, t_str.f_len);
- len = dfield_get_len(field);
-
- dict_col_copy_type(dict_index_get_nth_col(buf->index, 0), &field->type);
- field->type.prtype |= DATA_NOT_NULL;
- ut_ad(len <= field->type.len);
-
- /* For the temporary file, row_merge_buf_encode() uses
- 1 byte for representing the number of extra_size bytes.
- This number will always be 1, because for this 3-field index
- consisting of one variable-size column, extra_size will always
- be 1 or 2, which can be encoded in one byte.
-
- The extra_size is 1 byte if the length of the
- variable-length column is less than 128 bytes or the
- maximum length is less than 256 bytes. */
-
- /* One variable length column, word with its lenght less than
- fts_max_token_size, add one extra size and one extra byte.
-
- Since the max length for FTS token now is larger than 255,
- so we will need to signify length byte itself, so only 1 to 128
- bytes can be used for 1 bytes, larger than that 2 bytes. */
- if (len < 128 || field->type.len < 256) {
- /* Extra size is one byte. */
- cur_len = 2 + len;
- } else {
- /* Extra size is two bytes. */
- cur_len = 3 + len;
- }
-
- dfield_dup(field, buf->heap);
- field++;
-
- /* The second field is the Doc ID */
-
- ib_uint32_t doc_id_32_bit;
-
- if (!opt_doc_id_size) {
- fts_write_doc_id((byte*) &write_doc_id, doc_id);
-
- dfield_set_data(
- field, &write_doc_id, sizeof(write_doc_id));
- } else {
- mach_write_to_4(
- (byte*) &doc_id_32_bit, (ib_uint32_t) doc_id);
-
- dfield_set_data(
- field, &doc_id_32_bit, sizeof(doc_id_32_bit));
- }
-
- len = field->len;
- ut_ad(len == FTS_DOC_ID_LEN || len == sizeof(ib_uint32_t));
-
- field->type.mtype = DATA_INT;
- field->type.prtype = DATA_NOT_NULL | DATA_BINARY_TYPE;
- field->type.len = len;
- field->type.mbminmaxlen = 0;
-
- cur_len += len;
- dfield_dup(field, buf->heap);
-
- ++field;
-
- /* The third field is the position */
- mach_write_to_4(
- (byte*) &position,
- (i + offset + inc - str.f_len + t_ctx->init_pos));
-
- dfield_set_data(field, &position, sizeof(position));
- len = dfield_get_len(field);
- ut_ad(len == sizeof(ib_uint32_t));
-
- field->type.mtype = DATA_INT;
- field->type.prtype = DATA_NOT_NULL;
- field->type.len = len;
- field->type.mbminmaxlen = 0;
- cur_len += len;
- dfield_dup(field, buf->heap);
-
- /* Reserve one byte for the end marker of row_merge_block_t
- and we have reserved ROW_MERGE_RESERVE_SIZE (= 4) for
- encryption key_version in the beginning of the buffer. */
- if (buf->total_size + data_size[idx] + cur_len
- >= (srv_sort_buf_size - 1 - ROW_MERGE_RESERVE_SIZE)) {
-
- buf_full = TRUE;
- break;
- }
-
- /* Increment the number of tuples */
- n_tuple[idx]++;
- t_ctx->processed_len += inc;
- data_size[idx] += cur_len;
- }
-
- /* Update the data length and the number of new word tuples
- added in this round of tokenization */
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- /* The computation of total_size below assumes that no
- delete-mark flags will be stored and that all fields
- are NOT NULL and fixed-length. */
-
- sort_buf[i]->total_size += data_size[i];
-
- sort_buf[i]->n_tuples += n_tuple[i];
-
- merge_file[i]->n_rec += n_tuple[i];
- t_ctx->rows_added[i] += n_tuple[i];
- }
-
- if (!buf_full) {
- /* we pad one byte between text accross two fields */
- t_ctx->init_pos += doc->text.f_len + 1;
- }
-
- return(!buf_full);
-}
-
-/*********************************************************************//**
-Get next doc item from fts_doc_list */
-UNIV_INLINE
-void
-row_merge_fts_get_next_doc_item(
-/*============================*/
- fts_psort_t* psort_info, /*!< in: psort_info */
- fts_doc_item_t** doc_item) /*!< in/out: doc item */
-{
- if (*doc_item != NULL) {
- ut_free(*doc_item);
- }
-
- mutex_enter(&psort_info->mutex);
-
- *doc_item = UT_LIST_GET_FIRST(psort_info->fts_doc_list);
- if (*doc_item != NULL) {
- UT_LIST_REMOVE(doc_list, psort_info->fts_doc_list,
- *doc_item);
-
- ut_ad(psort_info->memory_used >= sizeof(fts_doc_item_t)
- + (*doc_item)->field->len);
- psort_info->memory_used -= sizeof(fts_doc_item_t)
- + (*doc_item)->field->len;
- }
-
- mutex_exit(&psort_info->mutex);
-}
-
-/*********************************************************************//**
-Function performs parallel tokenization of the incoming doc strings.
-It also performs the initial in memory sort of the parsed records.
-@return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
-os_thread_ret_t
-fts_parallel_tokenization(
-/*======================*/
- void* arg) /*!< in: psort_info for the thread */
-{
- fts_psort_t* psort_info = (fts_psort_t*) arg;
- ulint i;
- fts_doc_item_t* doc_item = NULL;
- row_merge_buf_t** buf;
- ibool processed = FALSE;
- merge_file_t** merge_file;
- row_merge_block_t** block;
- row_merge_block_t** crypt_block;
- int tmpfd[FTS_NUM_AUX_INDEX];
- ulint mycount[FTS_NUM_AUX_INDEX];
- ib_uint64_t total_rec = 0;
- ulint num_doc_processed = 0;
- doc_id_t last_doc_id = 0;
- ulint zip_size;
- mem_heap_t* blob_heap = NULL;
- fts_doc_t doc;
- dict_table_t* table = psort_info->psort_common->new_table;
- fts_tokenize_ctx_t t_ctx;
- ulint retried = 0;
- dberr_t error = DB_SUCCESS;
- fil_space_crypt_t* crypt_data = NULL;
-
- ut_ad(psort_info->psort_common->trx->mysql_thd != NULL);
-
- const char* path = thd_innodb_tmpdir(
- psort_info->psort_common->trx->mysql_thd);
-
- ut_ad(psort_info);
-
- buf = psort_info->merge_buf;
- merge_file = psort_info->merge_file;
- blob_heap = mem_heap_create(512);
- memset(&doc, 0, sizeof(doc));
- memset(&t_ctx, 0, sizeof(t_ctx));
- memset(mycount, 0, FTS_NUM_AUX_INDEX * sizeof(int));
-
- doc.charset = fts_index_get_charset(
- psort_info->psort_common->dup->index);
-
- block = psort_info->merge_block;
- crypt_block = psort_info->crypt_block;
- crypt_data = psort_info->psort_common->crypt_data;
- zip_size = dict_table_zip_size(table);
-
- row_merge_fts_get_next_doc_item(psort_info, &doc_item);
-
- t_ctx.cached_stopword = table->fts->cache->stopword_info.cached_stopword;
- processed = TRUE;
-loop:
- while (doc_item) {
- dfield_t* dfield = doc_item->field;
-
- last_doc_id = doc_item->doc_id;
-
- ut_ad (dfield->data != NULL
- && dfield_get_len(dfield) != UNIV_SQL_NULL);
-
- /* If finish processing the last item, update "doc" with
- strings in the doc_item, otherwise continue processing last
- item */
- if (processed) {
- byte* data;
- ulint data_len;
-
- dfield = doc_item->field;
- data = static_cast<byte*>(dfield_get_data(dfield));
- data_len = dfield_get_len(dfield);
-
- if (dfield_is_ext(dfield)) {
- doc.text.f_str =
- btr_copy_externally_stored_field(
- &doc.text.f_len, data,
- zip_size, data_len, blob_heap,
- NULL);
- } else {
- doc.text.f_str = data;
- doc.text.f_len = data_len;
- }
-
- doc.tokens = 0;
- t_ctx.processed_len = 0;
- } else {
- /* Not yet finish processing the "doc" on hand,
- continue processing it */
- ut_ad(doc.text.f_str);
- ut_ad(t_ctx.processed_len < doc.text.f_len);
- }
-
- processed = row_merge_fts_doc_tokenize(
- buf, doc_item->doc_id, &doc,
- merge_file, psort_info->psort_common->opt_doc_id_size,
- &t_ctx);
-
- /* Current sort buffer full, need to recycle */
- if (!processed) {
- ut_ad(t_ctx.processed_len < doc.text.f_len);
- ut_ad(t_ctx.rows_added[t_ctx.buf_used]);
- break;
- }
-
- num_doc_processed++;
-
- if (fts_enable_diag_print && num_doc_processed % 10000 == 1) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "number of doc processed %d\n",
- (int) num_doc_processed);
-#ifdef FTS_INTERNAL_DIAG_PRINT
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "ID %d, partition %d, word "
- "%d\n",(int) psort_info->psort_id,
- (int) i, (int) mycount[i]);
- }
-#endif
- }
-
- mem_heap_empty(blob_heap);
-
- row_merge_fts_get_next_doc_item(psort_info, &doc_item);
-
- if (doc_item && last_doc_id != doc_item->doc_id) {
- t_ctx.init_pos = 0;
- }
- }
-
- /* If we run out of current sort buffer, need to sort
- and flush the sort buffer to disk */
- if (t_ctx.rows_added[t_ctx.buf_used] && !processed) {
- row_merge_buf_sort(buf[t_ctx.buf_used], NULL);
- row_merge_buf_write(buf[t_ctx.buf_used],
- merge_file[t_ctx.buf_used],
- block[t_ctx.buf_used]);
-
- if (!row_merge_write(merge_file[t_ctx.buf_used]->fd,
- merge_file[t_ctx.buf_used]->offset++,
- block[t_ctx.buf_used],
- crypt_data,
- crypt_block[t_ctx.buf_used],
- table->space)) {
- error = DB_TEMP_FILE_WRITE_FAILURE;
- goto func_exit;
- }
-
- UNIV_MEM_INVALID(block[t_ctx.buf_used][0], srv_sort_buf_size);
- buf[t_ctx.buf_used] = row_merge_buf_empty(buf[t_ctx.buf_used]);
- mycount[t_ctx.buf_used] += t_ctx.rows_added[t_ctx.buf_used];
- t_ctx.rows_added[t_ctx.buf_used] = 0;
-
- ut_a(doc_item);
- goto loop;
- }
-
- /* Parent done scanning, and if finish processing all the docs, exit */
- if (psort_info->state == FTS_PARENT_COMPLETE) {
- if (UT_LIST_GET_LEN(psort_info->fts_doc_list) == 0) {
- goto exit;
- } else if (retried > 10000) {
- ut_ad(!doc_item);
- /* retied too many times and cannot get new record */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "InnoDB: FTS parallel sort processed "
- "%lu records, the sort queue has "
- "%lu records. But sort cannot get "
- "the next records", num_doc_processed,
- UT_LIST_GET_LEN(
- psort_info->fts_doc_list));
- goto exit;
- }
- } else if (psort_info->state == FTS_PARENT_EXITING) {
- /* Parent abort */
- goto func_exit;
- }
-
- if (doc_item == NULL) {
- os_thread_yield();
- }
-
- row_merge_fts_get_next_doc_item(psort_info, &doc_item);
-
- if (doc_item != NULL) {
- if (last_doc_id != doc_item->doc_id) {
- t_ctx.init_pos = 0;
- }
-
- retried = 0;
- } else if (psort_info->state == FTS_PARENT_COMPLETE) {
- retried++;
- }
-
- goto loop;
-
-exit:
- /* Do a final sort of the last (or latest) batch of records
- in block memory. Flush them to temp file if records cannot
- be hold in one block memory */
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- if (t_ctx.rows_added[i]) {
- row_merge_buf_sort(buf[i], NULL);
- row_merge_buf_write(
- buf[i], merge_file[i], block[i]);
-
- /* Write to temp file, only if records have
- been flushed to temp file before (offset > 0):
- The pseudo code for sort is following:
-
- while (there are rows) {
- tokenize rows, put result in block[]
- if (block[] runs out) {
- sort rows;
- write to temp file with
- row_merge_write();
- offset++;
- }
- }
-
- # write out the last batch
- if (offset > 0) {
- row_merge_write();
- offset++;
- } else {
- # no need to write anything
- offset stay as 0
- }
-
- so if merge_file[i]->offset is 0 when we come to
- here as the last batch, this means rows have
- never flush to temp file, it can be held all in
- memory */
- if (merge_file[i]->offset != 0) {
- if (!row_merge_write(merge_file[i]->fd,
- merge_file[i]->offset++,
- block[i],
- crypt_data,
- crypt_block[i],
- table->space)) {
- error = DB_TEMP_FILE_WRITE_FAILURE;
- goto func_exit;
- }
-
- UNIV_MEM_INVALID(block[i][0],
- srv_sort_buf_size);
-
- if (crypt_block[i]) {
- UNIV_MEM_INVALID(crypt_block[i][0],
- srv_sort_buf_size);
- }
- }
-
- buf[i] = row_merge_buf_empty(buf[i]);
- t_ctx.rows_added[i] = 0;
- }
- }
-
- if (fts_enable_diag_print) {
- DEBUG_FTS_SORT_PRINT(" InnoDB_FTS: start merge sort\n");
- }
-
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- if (!merge_file[i]->offset) {
- continue;
- }
-
- tmpfd[i] = row_merge_file_create_low(path);
- if (tmpfd[i] < 0) {
- error = DB_OUT_OF_MEMORY;
- goto func_exit;
- }
-
- error = row_merge_sort(psort_info->psort_common->trx,
- psort_info->psort_common->dup,
- merge_file[i], block[i], &tmpfd[i],
- false, 0.0/* pct_progress */, 0.0/* pct_cost */,
- crypt_data, crypt_block[i], table->space);
-
- if (error != DB_SUCCESS) {
- close(tmpfd[i]);
- goto func_exit;
- }
-
- total_rec += merge_file[i]->n_rec;
- close(tmpfd[i]);
- }
-
-func_exit:
- if (fts_enable_diag_print) {
- DEBUG_FTS_SORT_PRINT(" InnoDB_FTS: complete merge sort\n");
- }
-
- mem_heap_free(blob_heap);
-
- mutex_enter(&psort_info->mutex);
- psort_info->error = error;
- mutex_exit(&psort_info->mutex);
-
- if (UT_LIST_GET_LEN(psort_info->fts_doc_list) > 0) {
- /* child can exit either with error or told by parent. */
- ut_ad(error != DB_SUCCESS
- || psort_info->state == FTS_PARENT_EXITING);
- }
-
- /* Free fts doc list in case of error. */
- do {
- row_merge_fts_get_next_doc_item(psort_info, &doc_item);
- } while (doc_item != NULL);
-
- psort_info->child_status = FTS_CHILD_COMPLETE;
- os_event_set(psort_info->psort_common->sort_event);
- psort_info->child_status = FTS_CHILD_EXITING;
-
-#ifdef __WIN__
- CloseHandle(psort_info->thread_hdl);
-#endif /*__WIN__ */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*********************************************************************//**
-Start the parallel tokenization and parallel merge sort */
-UNIV_INTERN
-void
-row_fts_start_psort(
-/*================*/
- fts_psort_t* psort_info) /*!< parallel sort structure */
-{
- ulint i = 0;
- os_thread_id_t thd_id;
-
- for (i = 0; i < fts_sort_pll_degree; i++) {
- psort_info[i].psort_id = i;
- psort_info[i].thread_hdl = os_thread_create(
- fts_parallel_tokenization,
- (void*) &psort_info[i], &thd_id);
- }
-}
-
-/*********************************************************************//**
-Function performs the merge and insertion of the sorted records.
-@return OS_THREAD_DUMMY_RETURN */
-UNIV_INTERN
-os_thread_ret_t
-fts_parallel_merge(
-/*===============*/
- void* arg) /*!< in: parallel merge info */
-{
- fts_psort_t* psort_info = (fts_psort_t*) arg;
- ulint id;
-
- ut_ad(psort_info);
-
- id = psort_info->psort_id;
-
- row_fts_merge_insert(psort_info->psort_common->dup->index,
- psort_info->psort_common->new_table,
- psort_info->psort_common->all_info, id);
-
- psort_info->child_status = FTS_CHILD_COMPLETE;
- os_event_set(psort_info->psort_common->merge_event);
- psort_info->child_status = FTS_CHILD_EXITING;
-
-#ifdef __WIN__
- CloseHandle(psort_info->thread_hdl);
-#endif /*__WIN__ */
-
- os_thread_exit(NULL, false);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*********************************************************************//**
-Kick off the parallel merge and insert thread */
-UNIV_INTERN
-void
-row_fts_start_parallel_merge(
-/*=========================*/
- fts_psort_t* merge_info) /*!< in: parallel sort info */
-{
- int i = 0;
- os_thread_id_t thd_id;
-
- /* Kick off merge/insert threads */
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- merge_info[i].psort_id = i;
- merge_info[i].child_status = 0;
-
- merge_info[i].thread_hdl = os_thread_create(
- fts_parallel_merge, (void*) &merge_info[i], &thd_id);
- }
-}
-
-/********************************************************************//**
-Insert processed FTS data to auxillary index tables.
-@return DB_SUCCESS if insertion runs fine */
-static MY_ATTRIBUTE((nonnull))
-dberr_t
-row_merge_write_fts_word(
-/*=====================*/
- trx_t* trx, /*!< in: transaction */
- que_t** ins_graph, /*!< in: Insert query graphs */
- fts_tokenizer_word_t* word, /*!< in: sorted and tokenized
- word */
- fts_table_t* fts_table, /*!< in: fts aux table instance */
- CHARSET_INFO* charset) /*!< in: charset */
-{
- ulint selected;
- dberr_t ret = DB_SUCCESS;
-
- selected = fts_select_index(
- charset, word->text.f_str, word->text.f_len);
- fts_table->suffix = fts_get_suffix(selected);
-
- /* Pop out each fts_node in word->nodes write them to auxiliary table */
- while (ib_vector_size(word->nodes) > 0) {
- dberr_t error;
- fts_node_t* fts_node;
-
- fts_node = static_cast<fts_node_t*>(ib_vector_pop(word->nodes));
-
- error = fts_write_node(
- trx, &ins_graph[selected], fts_table, &word->text,
- fts_node);
-
- if (error != DB_SUCCESS) {
- fprintf(stderr, "InnoDB: failed to write"
- " word %s to FTS auxiliary index"
- " table, error (%s) \n",
- word->text.f_str, ut_strerr(error));
- ret = error;
- }
-
- ut_free(fts_node->ilist);
- fts_node->ilist = NULL;
- }
-
- return(ret);
-}
-
-/*********************************************************************//**
-Read sorted FTS data files and insert data tuples to auxillary tables.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-void
-row_fts_insert_tuple(
-/*=================*/
- fts_psort_insert_t*
- ins_ctx, /*!< in: insert context */
- fts_tokenizer_word_t* word, /*!< in: last processed
- tokenized word */
- ib_vector_t* positions, /*!< in: word position */
- doc_id_t* in_doc_id, /*!< in: last item doc id */
- dtuple_t* dtuple) /*!< in: entry to insert */
-{
- fts_node_t* fts_node = NULL;
- dfield_t* dfield;
- doc_id_t doc_id;
- ulint position;
- fts_string_t token_word;
- ulint i;
-
- /* Get fts_node for the FTS auxillary INDEX table */
- if (ib_vector_size(word->nodes) > 0) {
- fts_node = static_cast<fts_node_t*>(
- ib_vector_last(word->nodes));
- }
-
- if (fts_node == NULL
- || fts_node->ilist_size > FTS_ILIST_MAX_SIZE) {
-
- fts_node = static_cast<fts_node_t*>(
- ib_vector_push(word->nodes, NULL));
-
- memset(fts_node, 0x0, sizeof(*fts_node));
- }
-
- /* If dtuple == NULL, this is the last word to be processed */
- if (!dtuple) {
- if (fts_node && ib_vector_size(positions) > 0) {
- fts_cache_node_add_positions(
- NULL, fts_node, *in_doc_id,
- positions);
-
- /* Write out the current word */
- row_merge_write_fts_word(ins_ctx->trx,
- ins_ctx->ins_graph, word,
- &ins_ctx->fts_table,
- ins_ctx->charset);
-
- }
-
- return;
- }
-
- /* Get the first field for the tokenized word */
- dfield = dtuple_get_nth_field(dtuple, 0);
-
- token_word.f_n_char = 0;
- token_word.f_len = dfield->len;
- token_word.f_str = static_cast<byte*>(dfield_get_data(dfield));
-
- if (!word->text.f_str) {
- fts_utf8_string_dup(&word->text, &token_word, ins_ctx->heap);
- }
-
- /* compare to the last word, to see if they are the same
- word */
- if (innobase_fts_text_cmp(ins_ctx->charset,
- &word->text, &token_word) != 0) {
- ulint num_item;
-
- /* Getting a new word, flush the last position info
- for the currnt word in fts_node */
- if (ib_vector_size(positions) > 0) {
- fts_cache_node_add_positions(
- NULL, fts_node, *in_doc_id, positions);
- }
-
- /* Write out the current word */
- row_merge_write_fts_word(ins_ctx->trx, ins_ctx->ins_graph,
- word, &ins_ctx->fts_table,
- ins_ctx->charset);
-
- /* Copy the new word */
- fts_utf8_string_dup(&word->text, &token_word, ins_ctx->heap);
-
- num_item = ib_vector_size(positions);
-
- /* Clean up position queue */
- for (i = 0; i < num_item; i++) {
- ib_vector_pop(positions);
- }
-
- /* Reset Doc ID */
- *in_doc_id = 0;
- memset(fts_node, 0x0, sizeof(*fts_node));
- }
-
- /* Get the word's Doc ID */
- dfield = dtuple_get_nth_field(dtuple, 1);
-
- if (!ins_ctx->opt_doc_id_size) {
- doc_id = fts_read_doc_id(
- static_cast<byte*>(dfield_get_data(dfield)));
- } else {
- doc_id = (doc_id_t) mach_read_from_4(
- static_cast<byte*>(dfield_get_data(dfield)));
- }
-
- /* Get the word's position info */
- dfield = dtuple_get_nth_field(dtuple, 2);
- position = mach_read_from_4(static_cast<byte*>(dfield_get_data(dfield)));
-
- /* If this is the same word as the last word, and they
- have the same Doc ID, we just need to add its position
- info. Otherwise, we will flush position info to the
- fts_node and initiate a new position vector */
- if (!(*in_doc_id) || *in_doc_id == doc_id) {
- ib_vector_push(positions, &position);
- } else {
- ulint num_pos = ib_vector_size(positions);
-
- fts_cache_node_add_positions(NULL, fts_node,
- *in_doc_id, positions);
- for (i = 0; i < num_pos; i++) {
- ib_vector_pop(positions);
- }
- ib_vector_push(positions, &position);
- }
-
- /* record the current Doc ID */
- *in_doc_id = doc_id;
-}
-
-/*********************************************************************//**
-Propagate a newly added record up one level in the selection tree
-@return parent where this value propagated to */
-static
-int
-row_fts_sel_tree_propagate(
-/*=======================*/
- int propogated, /*<! in: tree node propagated */
- int* sel_tree, /*<! in: selection tree */
- const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
- dict_index_t* index) /*<! in/out: FTS index */
-{
- ulint parent;
- int child_left;
- int child_right;
- int selected;
-
- /* Find which parent this value will be propagated to */
- parent = (propogated - 1) / 2;
-
- /* Find out which value is smaller, and to propagate */
- child_left = sel_tree[parent * 2 + 1];
- child_right = sel_tree[parent * 2 + 2];
-
- if (child_left == -1 || mrec[child_left] == NULL) {
- if (child_right == -1
- || mrec[child_right] == NULL) {
- selected = -1;
- } else {
- selected = child_right ;
- }
- } else if (child_right == -1
- || mrec[child_right] == NULL) {
- selected = child_left;
- } else if (cmp_rec_rec_simple(mrec[child_left], mrec[child_right],
- offsets[child_left],
- offsets[child_right],
- index, NULL) < 0) {
- selected = child_left;
- } else {
- selected = child_right;
- }
-
- sel_tree[parent] = selected;
-
- return(static_cast<int>(parent));
-}
-
-/*********************************************************************//**
-Readjust selection tree after popping the root and read a new value
-@return the new root */
-static
-int
-row_fts_sel_tree_update(
-/*====================*/
- int* sel_tree, /*<! in/out: selection tree */
- ulint propagated, /*<! in: node to propagate up */
- ulint height, /*<! in: tree height */
- const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
- dict_index_t* index) /*<! in: index dictionary */
-{
- ulint i;
-
- for (i = 1; i <= height; i++) {
- propagated = static_cast<ulint>(row_fts_sel_tree_propagate(
- static_cast<int>(propagated), sel_tree, mrec, offsets, index));
- }
-
- return(sel_tree[0]);
-}
-
-/*********************************************************************//**
-Build selection tree at a specified level */
-static
-void
-row_fts_build_sel_tree_level(
-/*=========================*/
- int* sel_tree, /*<! in/out: selection tree */
- ulint level, /*<! in: selection tree level */
- const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
- dict_index_t* index) /*<! in: index dictionary */
-{
- ulint start;
- int child_left;
- int child_right;
- ulint i;
- ulint num_item = ulint(1) << level;
-
- start = num_item - 1;
-
- for (i = 0; i < num_item; i++) {
- child_left = sel_tree[(start + i) * 2 + 1];
- child_right = sel_tree[(start + i) * 2 + 2];
-
- if (child_left == -1) {
- if (child_right == -1) {
- sel_tree[start + i] = -1;
- } else {
- sel_tree[start + i] = child_right;
- }
- continue;
- } else if (child_right == -1) {
- sel_tree[start + i] = child_left;
- continue;
- }
-
- /* Deal with NULL child conditions */
- if (!mrec[child_left]) {
- if (!mrec[child_right]) {
- sel_tree[start + i] = -1;
- } else {
- sel_tree[start + i] = child_right;
- }
- continue;
- } else if (!mrec[child_right]) {
- sel_tree[start + i] = child_left;
- continue;
- }
-
- /* Select the smaller one to set parent pointer */
- int cmp = cmp_rec_rec_simple(
- mrec[child_left], mrec[child_right],
- offsets[child_left], offsets[child_right],
- index, NULL);
-
- sel_tree[start + i] = cmp < 0 ? child_left : child_right;
- }
-}
-
-/*********************************************************************//**
-Build a selection tree for merge. The selection tree is a binary tree
-and should have fts_sort_pll_degree / 2 levels. With root as level 0
-@return number of tree levels */
-static
-ulint
-row_fts_build_sel_tree(
-/*===================*/
- int* sel_tree, /*<! in/out: selection tree */
- const mrec_t** mrec, /*<! in: sort record */
- ulint** offsets, /*<! in: record offsets */
- dict_index_t* index) /*<! in: index dictionary */
-{
- ulint treelevel = 1;
- ulint num = 2;
- int i = 0;
- ulint start;
-
- /* No need to build selection tree if we only have two merge threads */
- if (fts_sort_pll_degree <= 2) {
- return(0);
- }
-
- while (num < fts_sort_pll_degree) {
- num = num << 1;
- treelevel++;
- }
-
- start = (ulint(1) << treelevel) - 1;
-
- for (i = 0; i < (int) fts_sort_pll_degree; i++) {
- sel_tree[i + start] = i;
- }
-
- for (i = static_cast<int>(treelevel) - 1; i >= 0; i--) {
- row_fts_build_sel_tree_level(
- sel_tree, static_cast<ulint>(i), mrec, offsets, index);
- }
-
- return(treelevel);
-}
-
-/*********************************************************************//**
-Read sorted file containing index data tuples and insert these data
-tuples to the index
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-row_fts_merge_insert(
-/*=================*/
- dict_index_t* index, /*!< in: index */
- dict_table_t* table, /*!< in: new table */
- fts_psort_t* psort_info, /*!< parallel sort info */
- ulint id) /* !< in: which auxiliary table's data
- to insert to */
-{
- const byte** b;
- mem_heap_t* tuple_heap;
- mem_heap_t* heap;
- dberr_t error = DB_SUCCESS;
- ulint* foffs;
- ulint** offsets;
- fts_tokenizer_word_t new_word;
- ib_vector_t* positions;
- doc_id_t last_doc_id;
- ib_alloc_t* heap_alloc;
- ulint n_bytes;
- ulint i;
- mrec_buf_t** buf;
- int* fd;
- byte** block;
- byte** crypt_block;
- const mrec_t** mrec;
- ulint count = 0;
- int* sel_tree;
- ulint height;
- ulint start;
- fts_psort_insert_t ins_ctx;
- ulint count_diag = 0;
- fil_space_crypt_t* crypt_data = NULL;
- ulint space;
-
- ut_ad(index);
- ut_ad(table);
-
- /* We use the insert query graph as the dummy graph
- needed in the row module call */
-
- ins_ctx.trx = trx_allocate_for_background();
-
- ins_ctx.trx->op_info = "inserting index entries";
-
- ins_ctx.opt_doc_id_size = psort_info[0].psort_common->opt_doc_id_size;
- crypt_data = psort_info[0].psort_common->crypt_data;
-
- heap = mem_heap_create(500 + sizeof(mrec_buf_t));
-
- b = (const byte**) mem_heap_alloc(
- heap, sizeof (*b) * fts_sort_pll_degree);
- foffs = (ulint*) mem_heap_alloc(
- heap, sizeof(*foffs) * fts_sort_pll_degree);
- offsets = (ulint**) mem_heap_alloc(
- heap, sizeof(*offsets) * fts_sort_pll_degree);
- buf = (mrec_buf_t**) mem_heap_alloc(
- heap, sizeof(*buf) * fts_sort_pll_degree);
- fd = (int*) mem_heap_alloc(heap, sizeof(*fd) * fts_sort_pll_degree);
- block = (byte**) mem_heap_alloc(
- heap, sizeof(*block) * fts_sort_pll_degree);
- crypt_block = (byte**) mem_heap_alloc(
- heap, sizeof(*block) * fts_sort_pll_degree);
- mrec = (const mrec_t**) mem_heap_alloc(
- heap, sizeof(*mrec) * fts_sort_pll_degree);
- sel_tree = (int*) mem_heap_alloc(
- heap, sizeof(*sel_tree) * (fts_sort_pll_degree * 2));
-
- tuple_heap = mem_heap_create(1000);
-
- ins_ctx.charset = fts_index_get_charset(index);
- ins_ctx.heap = heap;
-
- for (i = 0; i < fts_sort_pll_degree; i++) {
- ulint num;
-
- num = 1 + REC_OFFS_HEADER_SIZE
- + dict_index_get_n_fields(index);
- offsets[i] = static_cast<ulint*>(mem_heap_zalloc(
- heap, num * sizeof *offsets[i]));
- offsets[i][0] = num;
- offsets[i][1] = dict_index_get_n_fields(index);
- block[i] = psort_info[i].merge_block[id];
- crypt_block[i] = psort_info[i].crypt_block[id];
- b[i] = psort_info[i].merge_block[id];
- fd[i] = psort_info[i].merge_file[id]->fd;
- foffs[i] = 0;
-
- buf[i] = static_cast<mrec_buf_t*>(
- mem_heap_alloc(heap, sizeof *buf[i]));
-
- count_diag += (int) psort_info[i].merge_file[id]->n_rec;
- }
-
- if (fts_enable_diag_print) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB_FTS: to inserted %lu records\n",
- (ulong) count_diag);
- }
-
- /* Initialize related variables if creating FTS indexes */
- heap_alloc = ib_heap_allocator_create(heap);
-
- memset(&new_word, 0, sizeof(new_word));
-
- new_word.nodes = ib_vector_create(heap_alloc, sizeof(fts_node_t), 4);
- positions = ib_vector_create(heap_alloc, sizeof(ulint), 32);
- last_doc_id = 0;
-
- /* Allocate insert query graphs for FTS auxillary
- Index Table, note we have FTS_NUM_AUX_INDEX such index tables */
- n_bytes = sizeof(que_t*) * (FTS_NUM_AUX_INDEX + 1);
- ins_ctx.ins_graph = static_cast<que_t**>(mem_heap_alloc(heap, n_bytes));
- memset(ins_ctx.ins_graph, 0x0, n_bytes);
-
- /* We should set the flags2 with aux_table_name here,
- in order to get the correct aux table names. */
- index->table->flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
- DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name",
- index->table->flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;);
-
- ins_ctx.fts_table.type = FTS_INDEX_TABLE;
- ins_ctx.fts_table.index_id = index->id;
- ins_ctx.fts_table.table_id = table->id;
- ins_ctx.fts_table.parent = index->table->name;
- ins_ctx.fts_table.table = index->table;
- space = table->space;
-
- for (i = 0; i < fts_sort_pll_degree; i++) {
- if (psort_info[i].merge_file[id]->n_rec == 0) {
- /* No Rows to read */
- mrec[i] = b[i] = NULL;
- } else {
- /* Read from temp file only if it has been
- written to. Otherwise, block memory holds
- all the sorted records */
- if (psort_info[i].merge_file[id]->offset > 0
- && (!row_merge_read(
- fd[i], foffs[i],
- (row_merge_block_t*) block[i],
- crypt_data,
- (row_merge_block_t*) crypt_block[i],
- space))) {
- error = DB_CORRUPTION;
- goto exit;
- }
-
- ROW_MERGE_READ_GET_NEXT(i);
- }
- }
-
- height = row_fts_build_sel_tree(sel_tree, (const mrec_t **) mrec,
- offsets, index);
-
- start = (1 << height) - 1;
-
- /* Fetch sorted records from sort buffer and insert them into
- corresponding FTS index auxiliary tables */
- for (;;) {
- dtuple_t* dtuple;
- ulint n_ext;
- int min_rec = 0;
-
- if (fts_sort_pll_degree <= 2) {
- while (!mrec[min_rec]) {
- min_rec++;
-
- if (min_rec >= (int) fts_sort_pll_degree) {
- row_fts_insert_tuple(
- &ins_ctx, &new_word,
- positions, &last_doc_id,
- NULL);
-
- goto exit;
- }
- }
-
- for (i = min_rec + 1; i < fts_sort_pll_degree; i++) {
- if (!mrec[i]) {
- continue;
- }
-
- if (cmp_rec_rec_simple(
- mrec[i], mrec[min_rec],
- offsets[i], offsets[min_rec],
- index, NULL) < 0) {
- min_rec = static_cast<int>(i);
- }
- }
- } else {
- min_rec = sel_tree[0];
-
- if (min_rec == -1) {
- row_fts_insert_tuple(
- &ins_ctx, &new_word,
- positions, &last_doc_id,
- NULL);
-
- goto exit;
- }
- }
-
- dtuple = row_rec_to_index_entry_low(
- mrec[min_rec], index, offsets[min_rec], &n_ext,
- tuple_heap);
-
- row_fts_insert_tuple(
- &ins_ctx, &new_word, positions,
- &last_doc_id, dtuple);
-
-
- ROW_MERGE_READ_GET_NEXT(min_rec);
-
- if (fts_sort_pll_degree > 2) {
- if (!mrec[min_rec]) {
- sel_tree[start + min_rec] = -1;
- }
-
- row_fts_sel_tree_update(sel_tree, start + min_rec,
- height, mrec,
- offsets, index);
- }
-
- count++;
-
- mem_heap_empty(tuple_heap);
- }
-
-exit:
- fts_sql_commit(ins_ctx.trx);
-
- ins_ctx.trx->op_info = "";
-
- mem_heap_free(tuple_heap);
-
- for (i = 0; i < FTS_NUM_AUX_INDEX; i++) {
- if (ins_ctx.ins_graph[i]) {
- fts_que_graph_free(ins_ctx.ins_graph[i]);
- }
- }
-
- trx_free_for_background(ins_ctx.trx);
-
- mem_heap_free(heap);
-
- if (fts_enable_diag_print) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB_FTS: inserted %lu records\n",
- (ulong) count);
- }
-
- return(error);
-}
diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc
deleted file mode 100644
index 86b2d782b7b..00000000000
--- a/storage/xtradb/row/row0import.cc
+++ /dev/null
@@ -1,3774 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0import.cc
-Import a tablespace to a running instance.
-
-Created 2012-02-08 by Sunny Bains.
-*******************************************************/
-
-#include "row0import.h"
-
-#ifdef UNIV_NONINL
-#include "row0import.ic"
-#endif
-
-#include "btr0pcur.h"
-#include "que0que.h"
-#include "dict0boot.h"
-#include "ibuf0ibuf.h"
-#include "pars0pars.h"
-#include "row0upd.h"
-#include "row0sel.h"
-#include "row0mysql.h"
-#include "srv0start.h"
-#include "row0quiesce.h"
-#include "buf0buf.h"
-
-#include <vector>
-
-/** The size of the buffer to use for IO. Note: os_file_read() doesn't expect
-reads to fail. If you set the buffer size to be greater than a multiple of the
-file size then it will assert. TODO: Fix this limitation of the IO functions.
-@param n - page size of the tablespace.
-@retval number of pages */
-#define IO_BUFFER_SIZE(n) ((1024 * 1024) / n)
-
-/** For gathering stats on records during phase I */
-struct row_stats_t {
- ulint m_n_deleted; /*!< Number of deleted records
- found in the index */
-
- ulint m_n_purged; /*!< Number of records purged
- optimisatically */
-
- ulint m_n_rows; /*!< Number of rows */
-
- ulint m_n_purge_failed; /*!< Number of deleted rows
- that could not be purged */
-};
-
-/** Index information required by IMPORT. */
-struct row_index_t {
- index_id_t m_id; /*!< Index id of the table
- in the exporting server */
- byte* m_name; /*!< Index name */
-
- ulint m_space; /*!< Space where it is placed */
-
- ulint m_page_no; /*!< Root page number */
-
- ulint m_type; /*!< Index type */
-
- ulint m_trx_id_offset; /*!< Relevant only for clustered
- indexes, offset of transaction
- id system column */
-
- ulint m_n_user_defined_cols; /*!< User defined columns */
-
- ulint m_n_uniq; /*!< Number of columns that can
- uniquely identify the row */
-
- ulint m_n_nullable; /*!< Number of nullable
- columns */
-
- ulint m_n_fields; /*!< Total number of fields */
-
- dict_field_t* m_fields; /*!< Index fields */
-
- const dict_index_t*
- m_srv_index; /*!< Index instance in the
- importing server */
-
- row_stats_t m_stats; /*!< Statistics gathered during
- the import phase */
-
-};
-
-/** Meta data required by IMPORT. */
-struct row_import {
- row_import() UNIV_NOTHROW
- :
- m_table(),
- m_version(),
- m_hostname(),
- m_table_name(),
- m_autoinc(),
- m_page_size(),
- m_flags(),
- m_n_cols(),
- m_cols(),
- m_col_names(),
- m_n_indexes(),
- m_indexes(),
- m_missing(true) { }
-
- ~row_import() UNIV_NOTHROW;
-
- /**
- Find the index entry in in the indexes array.
- @param name - index name
- @return instance if found else 0. */
- row_index_t* get_index(const char* name) const UNIV_NOTHROW;
-
- /**
- Get the number of rows in the index.
- @param name - index name
- @return number of rows (doesn't include delete marked rows). */
- ulint get_n_rows(const char* name) const UNIV_NOTHROW;
-
- /**
- Find the ordinal value of the column name in the cfg table columns.
- @param name - of column to look for.
- @return ULINT_UNDEFINED if not found. */
- ulint find_col(const char* name) const UNIV_NOTHROW;
-
- /**
- Get the number of rows for which purge failed during the convert phase.
- @param name - index name
- @return number of rows for which purge failed. */
- ulint get_n_purge_failed(const char* name) const UNIV_NOTHROW;
-
- /**
- Check if the index is clean. ie. no delete-marked records
- @param name - index name
- @return true if index needs to be purged. */
- bool requires_purge(const char* name) const UNIV_NOTHROW
- {
- return(get_n_purge_failed(name) > 0);
- }
-
- /**
- Set the index root <space, pageno> using the index name */
- void set_root_by_name() UNIV_NOTHROW;
-
- /**
- Set the index root <space, pageno> using a heuristic
- @return DB_SUCCESS or error code */
- dberr_t set_root_by_heuristic() UNIV_NOTHROW;
-
- /** Check if the index schema that was read from the .cfg file
- matches the in memory index definition.
- Note: It will update row_import_t::m_srv_index to map the meta-data
- read from the .cfg file to the server index instance.
- @return DB_SUCCESS or error code. */
- dberr_t match_index_columns(
- THD* thd,
- const dict_index_t* index) UNIV_NOTHROW;
-
- /**
- Check if the table schema that was read from the .cfg file matches the
- in memory table definition.
- @param thd - MySQL session variable
- @return DB_SUCCESS or error code. */
- dberr_t match_table_columns(
- THD* thd) UNIV_NOTHROW;
-
- /**
- Check if the table (and index) schema that was read from the .cfg file
- matches the in memory table definition.
- @param thd - MySQL session variable
- @return DB_SUCCESS or error code. */
- dberr_t match_schema(
- THD* thd) UNIV_NOTHROW;
-
- dict_table_t* m_table; /*!< Table instance */
-
- ulint m_version; /*!< Version of config file */
-
- byte* m_hostname; /*!< Hostname where the
- tablespace was exported */
- byte* m_table_name; /*!< Exporting instance table
- name */
-
- ib_uint64_t m_autoinc; /*!< Next autoinc value */
-
- ulint m_page_size; /*!< Tablespace page size */
-
- ulint m_flags; /*!< Table flags */
-
- ulint m_n_cols; /*!< Number of columns in the
- meta-data file */
-
- dict_col_t* m_cols; /*!< Column data */
-
- byte** m_col_names; /*!< Column names, we store the
- column naems separately becuase
- there is no field to store the
- value in dict_col_t */
-
- ulint m_n_indexes; /*!< Number of indexes,
- including clustered index */
-
- row_index_t* m_indexes; /*!< Index meta data */
-
- bool m_missing; /*!< true if a .cfg file was
- found and was readable */
-};
-
-/** Use the page cursor to iterate over records in a block. */
-class RecIterator {
-public:
- /**
- Default constructor */
- RecIterator() UNIV_NOTHROW
- {
- memset(&m_cur, 0x0, sizeof(m_cur));
- }
-
- /**
- Position the cursor on the first user record. */
- void open(buf_block_t* block) UNIV_NOTHROW
- {
- page_cur_set_before_first(block, &m_cur);
-
- if (!end()) {
- next();
- }
- }
-
- /**
- Move to the next record. */
- void next() UNIV_NOTHROW
- {
- page_cur_move_to_next(&m_cur);
- }
-
- /**
- @return the current record */
- rec_t* current() UNIV_NOTHROW
- {
- ut_ad(!end());
- return(page_cur_get_rec(&m_cur));
- }
-
- /**
- @return true if cursor is at the end */
- bool end() UNIV_NOTHROW
- {
- return(page_cur_is_after_last(&m_cur) == TRUE);
- }
-
- /** Remove the current record
- @return true on success */
- bool remove(
- const dict_index_t* index,
- page_zip_des_t* page_zip,
- ulint* offsets) UNIV_NOTHROW
- {
- /* We can't end up with an empty page unless it is root. */
- if (page_get_n_recs(m_cur.block->frame) <= 1) {
- return(false);
- }
-
- return(page_delete_rec(index, &m_cur, page_zip, offsets));
- }
-
-private:
- page_cur_t m_cur;
-};
-
-/** Class that purges delete marked reocords from indexes, both secondary
-and cluster. It does a pessimistic delete. This should only be done if we
-couldn't purge the delete marked reocrds during Phase I. */
-class IndexPurge {
-public:
- /** Constructor
- @param trx - the user transaction covering the import tablespace
- @param index - to be imported
- @param space_id - space id of the tablespace */
- IndexPurge(
- trx_t* trx,
- dict_index_t* index) UNIV_NOTHROW
- :
- m_trx(trx),
- m_index(index),
- m_n_rows(0)
- {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Phase II - Purge records from index %s",
- index->name);
- }
-
- /** Descructor */
- ~IndexPurge() UNIV_NOTHROW { }
-
- /** Purge delete marked records.
- @return DB_SUCCESS or error code. */
- dberr_t garbage_collect() UNIV_NOTHROW;
-
- /** The number of records that are not delete marked.
- @return total records in the index after purge */
- ulint get_n_rows() const UNIV_NOTHROW
- {
- return(m_n_rows);
- }
-
-private:
- /**
- Begin import, position the cursor on the first record. */
- void open() UNIV_NOTHROW;
-
- /**
- Close the persistent curosr and commit the mini-transaction. */
- void close() UNIV_NOTHROW;
-
- /**
- Position the cursor on the next record.
- @return DB_SUCCESS or error code */
- dberr_t next() UNIV_NOTHROW;
-
- /**
- Store the persistent cursor position and reopen the
- B-tree cursor in BTR_MODIFY_TREE mode, because the
- tree structure may be changed during a pessimistic delete. */
- void purge_pessimistic_delete() UNIV_NOTHROW;
-
- /**
- Purge delete-marked records.
- @param offsets - current row offsets. */
- void purge() UNIV_NOTHROW;
-
-protected:
- // Disable copying
- IndexPurge();
- IndexPurge(const IndexPurge&);
- IndexPurge &operator=(const IndexPurge&);
-
-private:
- trx_t* m_trx; /*!< User transaction */
- mtr_t m_mtr; /*!< Mini-transaction */
- btr_pcur_t m_pcur; /*!< Persistent cursor */
- dict_index_t* m_index; /*!< Index to be processed */
- ulint m_n_rows; /*!< Records in index */
-};
-
-/** Functor that is called for each physical page that is read from the
-tablespace file. */
-class AbstractCallback : public PageCallback {
-public:
- /** Constructor
- @param trx - covering transaction */
- AbstractCallback(trx_t* trx)
- :
- m_trx(trx),
- m_space(ULINT_UNDEFINED),
- m_xdes(),
- m_xdes_page_no(ULINT_UNDEFINED),
- m_space_flags(ULINT_UNDEFINED) UNIV_NOTHROW { }
-
- /**
- Free any extent descriptor instance */
- virtual ~AbstractCallback()
- {
- delete [] m_xdes;
- }
-
- /** Determine the page size to use for traversing the tablespace
- @param file_size - size of the tablespace file in bytes
- @param block - contents of the first page in the tablespace file.
- @retval DB_SUCCESS or error code. */
- virtual dberr_t init(
- os_offset_t file_size,
- const buf_block_t* block) UNIV_NOTHROW;
-
- /** @return true if compressed table. */
- bool is_compressed_table() const UNIV_NOTHROW
- {
- return(get_zip_size() > 0);
- }
-
-protected:
- /**
- Get the data page depending on the table type, compressed or not.
- @param block - block read from disk
- @retval the buffer frame */
- buf_frame_t* get_frame(buf_block_t* block) const UNIV_NOTHROW
- {
- if (is_compressed_table()) {
- return(block->page.zip.data);
- }
-
- return(buf_block_get_frame(block));
- }
-
- /** Check for session interrupt. If required we could
- even flush to disk here every N pages.
- @retval DB_SUCCESS or error code */
- dberr_t periodic_check() UNIV_NOTHROW
- {
- if (trx_is_interrupted(m_trx)) {
- return(DB_INTERRUPTED);
- }
-
- return(DB_SUCCESS);
- }
-
- /**
- Get the physical offset of the extent descriptor within the page.
- @param page_no - page number of the extent descriptor
- @param page - contents of the page containing the extent descriptor.
- @return the start of the xdes array in a page */
- const xdes_t* xdes(
- ulint page_no,
- const page_t* page) const UNIV_NOTHROW
- {
- ulint offset;
-
- offset = xdes_calc_descriptor_index(get_zip_size(), page_no);
-
- return(page + XDES_ARR_OFFSET + XDES_SIZE * offset);
- }
-
- /**
- Set the current page directory (xdes). If the extent descriptor is
- marked as free then free the current extent descriptor and set it to
- 0. This implies that all pages that are covered by this extent
- descriptor are also freed.
-
- @param page_no - offset of page within the file
- @param page - page contents
- @return DB_SUCCESS or error code. */
- dberr_t set_current_xdes(
- ulint page_no,
- const page_t* page) UNIV_NOTHROW
- {
- m_xdes_page_no = page_no;
-
- delete[] m_xdes;
-
- m_xdes = 0;
-
- ulint state;
- const xdes_t* xdesc = page + XDES_ARR_OFFSET;
-
- state = mach_read_ulint(xdesc + XDES_STATE, MLOG_4BYTES);
-
- if (state != XDES_FREE) {
-
- m_xdes = new(std::nothrow) xdes_t[m_page_size];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_13",
- delete [] m_xdes; m_xdes = 0;);
-
- if (m_xdes == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- memcpy(m_xdes, page, m_page_size);
- }
-
- return(DB_SUCCESS);
- }
-
- /**
- @return true if it is a root page */
- bool is_root_page(const page_t* page) const UNIV_NOTHROW
- {
- ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
-
- return(mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL
- && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL);
- }
-
- /**
- Check if the page is marked as free in the extent descriptor.
- @param page_no - page number to check in the extent descriptor.
- @return true if the page is marked as free */
- bool is_free(ulint page_no) const UNIV_NOTHROW
- {
- ut_a(xdes_calc_descriptor_page(get_zip_size(), page_no)
- == m_xdes_page_no);
-
- if (m_xdes != 0) {
- const xdes_t* xdesc = xdes(page_no, m_xdes);
- ulint pos = page_no % FSP_EXTENT_SIZE;
-
- return(xdes_get_bit(xdesc, XDES_FREE_BIT, pos));
- }
-
- /* If the current xdes was free, the page must be free. */
- return(true);
- }
-
-protected:
- /** Covering transaction. */
- trx_t* m_trx;
-
- /** Space id of the file being iterated over. */
- ulint m_space;
-
- /** Minimum page number for which the free list has not been
- initialized: the pages >= this limit are, by definition, free;
- note that in a single-table tablespace where size < 64 pages,
- this number is 64, i.e., we have initialized the space about
- the first extent, but have not physically allocted those pages
- to the file. @see FSP_LIMIT. */
- ulint m_free_limit;
-
- /** Current size of the space in pages */
- ulint m_size;
-
- /** Current extent descriptor page */
- xdes_t* m_xdes;
-
- /** Physical page offset in the file of the extent descriptor */
- ulint m_xdes_page_no;
-
- /** Flags value read from the header page */
- ulint m_space_flags;
-};
-
-/** Determine the page size to use for traversing the tablespace
-@param file_size - size of the tablespace file in bytes
-@param block - contents of the first page in the tablespace file.
-@retval DB_SUCCESS or error code. */
-dberr_t
-AbstractCallback::init(
- os_offset_t file_size,
- const buf_block_t* block) UNIV_NOTHROW
-{
- const page_t* page = block->frame;
-
- m_space_flags = fsp_header_get_flags(page);
- if (!fsp_flags_is_valid(m_space_flags)) {
- ulint cflags = fsp_flags_convert_from_101(m_space_flags);
- if (cflags == ULINT_UNDEFINED) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Invalid FSP_SPACE_FLAGS=0x%x",
- int(m_space_flags));
- return(DB_CORRUPTION);
- }
- m_space_flags = cflags;
- }
-
- /* Clear the DATA_DIR flag, which is basically garbage. */
- m_space_flags &= ~(1U << FSP_FLAGS_POS_RESERVED);
-
- /* Since we don't know whether it is a compressed table
- or not, the data is always read into the block->frame. */
-
- dberr_t err = set_zip_size(block->frame);
-
- if (err != DB_SUCCESS) {
- return(DB_CORRUPTION);
- }
-
- /* Set the page size used to traverse the tablespace. */
-
- m_page_size = (is_compressed_table())
- ? get_zip_size() : fsp_flags_get_page_size(m_space_flags);
-
- if (m_page_size == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Page size is 0");
- return(DB_CORRUPTION);
- } else if (!is_compressed_table() && m_page_size != UNIV_PAGE_SIZE) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page size " ULINTPF " of ibd file is not the same "
- "as the server page size " ULINTPF,
- m_page_size, UNIV_PAGE_SIZE);
-
- return(DB_CORRUPTION);
-
- } else if ((file_size % m_page_size)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "File size " UINT64PF " is not a multiple "
- "of the page size " ULINTPF,
- (ib_uint64_t) file_size, m_page_size);
-
- return(DB_CORRUPTION);
- }
-
- ut_a(m_space == ULINT_UNDEFINED);
-
- m_size = mach_read_from_4(page + FSP_SIZE);
- m_free_limit = mach_read_from_4(page + FSP_FREE_LIMIT);
- m_space = mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID);
-
- if ((err = set_current_xdes(0, page)) != DB_SUCCESS) {
- return(err);
- }
-
- return(DB_SUCCESS);
-}
-
-/**
-Try and determine the index root pages by checking if the next/prev
-pointers are both FIL_NULL. We need to ensure that skip deleted pages. */
-struct FetchIndexRootPages : public AbstractCallback {
-
- /** Index information gathered from the .ibd file. */
- struct Index {
-
- Index(index_id_t id, ulint page_no)
- :
- m_id(id),
- m_page_no(page_no) { }
-
- index_id_t m_id; /*!< Index id */
- ulint m_page_no; /*!< Root page number */
- };
-
- typedef std::vector<Index> Indexes;
-
- /** Constructor
- @param trx - covering (user) transaction
- @param table - table definition in server .*/
- FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
- :
- AbstractCallback(trx),
- m_table(table) UNIV_NOTHROW { }
-
- /** Destructor */
- virtual ~FetchIndexRootPages() UNIV_NOTHROW { }
-
- /**
- @retval the space id of the tablespace being iterated over */
- virtual ulint get_space_id() const UNIV_NOTHROW
- {
- return(m_space);
- }
-
- /**
- Called for each block as it is read from the file.
- @param offset - physical offset in the file
- @param block - block to convert, it is not from the buffer pool.
- @retval DB_SUCCESS or error code. */
- virtual dberr_t operator() (
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW;
-
- /** Update the import configuration that will be used to import
- the tablespace. */
- dberr_t build_row_import(row_import* cfg) const UNIV_NOTHROW;
-
- /** Table definition in server. */
- const dict_table_t* m_table;
-
- /** Index information */
- Indexes m_indexes;
-};
-
-/**
-Called for each block as it is read from the file. Check index pages to
-determine the exact row format. We can't get that from the tablespace
-header flags alone.
-
-@param offset - physical offset in the file
-@param block - block to convert, it is not from the buffer pool.
-@retval DB_SUCCESS or error code. */
-dberr_t
-FetchIndexRootPages::operator() (
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW
-{
- dberr_t err;
-
- if ((err = periodic_check()) != DB_SUCCESS) {
- return(err);
- }
-
- const page_t* page = get_frame(block);
-
- ulint page_type = fil_page_get_type(page);
-
- if (block->page.offset * m_page_size != offset) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page offset doesn't match file offset: "
- "page offset: %u, file offset: " ULINTPF,
- block->page.offset,
- (ulint) (offset / m_page_size));
-
- err = DB_CORRUPTION;
- } else if (page_type == FIL_PAGE_TYPE_XDES) {
- err = set_current_xdes(block->page.offset, page);
- } else if (page_type == FIL_PAGE_INDEX
- && !is_free(block->page.offset)
- && is_root_page(page)) {
-
- index_id_t id = btr_page_get_index_id(page);
- ulint page_no = buf_block_get_page_no(block);
-
- m_indexes.push_back(Index(id, page_no));
-
- if (m_indexes.size() == 1) {
- /* Check that the tablespace flags match the table flags. */
- ulint expected = dict_tf_to_fsp_flags(m_table->flags);
- if (!fsp_flags_match(expected, m_space_flags)) {
- ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Expected FSP_SPACE_FLAGS=0x%x, .ibd "
- "file contains 0x%x.",
- unsigned(expected),
- unsigned(m_space_flags));
- return(DB_CORRUPTION);
- }
- }
- }
-
- return(err);
-}
-
-/**
-Update the import configuration that will be used to import the tablespace.
-@return error code or DB_SUCCESS */
-dberr_t
-FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
-{
- Indexes::const_iterator end = m_indexes.end();
-
- ut_a(cfg->m_table == m_table);
- cfg->m_page_size = m_page_size;
- cfg->m_n_indexes = m_indexes.size();
-
- if (cfg->m_n_indexes == 0) {
-
- ib_logf(IB_LOG_LEVEL_ERROR, "No B+Tree found in tablespace");
-
- return(DB_CORRUPTION);
- }
-
- cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_11",
- delete [] cfg->m_indexes; cfg->m_indexes = 0;);
-
- if (cfg->m_indexes == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
-
- row_index_t* cfg_index = cfg->m_indexes;
-
- for (Indexes::const_iterator it = m_indexes.begin();
- it != end;
- ++it, ++cfg_index) {
-
- char name[BUFSIZ];
-
- ut_snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id);
-
- ulint len = strlen(name) + 1;
-
- cfg_index->m_name = new(std::nothrow) byte[len];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_12",
- delete [] cfg_index->m_name;
- cfg_index->m_name = 0;);
-
- if (cfg_index->m_name == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- memcpy(cfg_index->m_name, name, len);
-
- cfg_index->m_id = it->m_id;
-
- cfg_index->m_space = m_space;
-
- cfg_index->m_page_no = it->m_page_no;
- }
-
- return(DB_SUCCESS);
-}
-
-/* Functor that is called for each physical page that is read from the
-tablespace file.
-
- 1. Check each page for corruption.
-
- 2. Update the space id and LSN on every page
- * For the header page
- - Validate the flags
- - Update the LSN
-
- 3. On Btree pages
- * Set the index id
- * Update the max trx id
- * In a cluster index, update the system columns
- * In a cluster index, update the BLOB ptr, set the space id
- * Purge delete marked records, but only if they can be easily
- removed from the page
- * Keep a counter of number of rows, ie. non-delete-marked rows
- * Keep a counter of number of delete marked rows
- * Keep a counter of number of purge failure
- * If a page is stamped with an index id that isn't in the .cfg file
- we assume it is deleted and the page can be ignored.
-
- 4. Set the page state to dirty so that it will be written to disk.
-*/
-class PageConverter : public AbstractCallback {
-public:
- /** Constructor
- * @param cfg - config of table being imported.
- * @param trx - transaction covering the import */
- PageConverter(row_import* cfg, trx_t* trx) UNIV_NOTHROW;
-
- virtual ~PageConverter() UNIV_NOTHROW
- {
- if (m_heap != 0) {
- mem_heap_free(m_heap);
- }
- }
-
- /**
- @retval the server space id of the tablespace being iterated over */
- virtual ulint get_space_id() const UNIV_NOTHROW
- {
- return(m_cfg->m_table->space);
- }
-
- /**
- Called for each block as it is read from the file.
- @param offset - physical offset in the file
- @param block - block to convert, it is not from the buffer pool.
- @retval DB_SUCCESS or error code. */
- virtual dberr_t operator() (
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW;
-private:
-
- /** Status returned by PageConverter::validate() */
- enum import_page_status_t {
- IMPORT_PAGE_STATUS_OK, /*!< Page is OK */
- IMPORT_PAGE_STATUS_ALL_ZERO, /*!< Page is all zeros */
- IMPORT_PAGE_STATUS_CORRUPTED /*!< Page is corrupted */
- };
-
- /**
- Update the page, set the space id, max trx id and index id.
- @param block - block read from file
- @param page_type - type of the page
- @retval DB_SUCCESS or error code */
- dberr_t update_page(
- buf_block_t* block,
- ulint& page_type) UNIV_NOTHROW;
-
-#if defined UNIV_DEBUG
- /**
- @return true error condition is enabled. */
- bool trigger_corruption() UNIV_NOTHROW
- {
- return(false);
- }
- #else
-#define trigger_corruption() (false)
-#endif /* UNIV_DEBUG */
-
- /**
- Update the space, index id, trx id.
- @param block - block to convert
- @return DB_SUCCESS or error code */
- dberr_t update_index_page(buf_block_t* block) UNIV_NOTHROW;
-
- /** Update the BLOB refrences and write UNDO log entries for
- rows that can't be purged optimistically.
- @param block - block to update
- @retval DB_SUCCESS or error code */
- dberr_t update_records(buf_block_t* block) UNIV_NOTHROW;
-
- /**
- Validate the page, check for corruption.
- @param offset - physical offset within file.
- @param page - page read from file.
- @return 0 on success, 1 if all zero, 2 if corrupted */
- import_page_status_t validate(
- os_offset_t offset,
- buf_block_t* page) UNIV_NOTHROW;
-
- /**
- Validate the space flags and update tablespace header page.
- @param block - block read from file, not from the buffer pool.
- @retval DB_SUCCESS or error code */
- dberr_t update_header(buf_block_t* block) UNIV_NOTHROW;
-
- /**
- Adjust the BLOB reference for a single column that is externally stored
- @param rec - record to update
- @param offsets - column offsets for the record
- @param i - column ordinal value
- @return DB_SUCCESS or error code */
- dberr_t adjust_cluster_index_blob_column(
- rec_t* rec,
- const ulint* offsets,
- ulint i) UNIV_NOTHROW;
-
- /**
- Adjusts the BLOB reference in the clustered index row for all
- externally stored columns.
- @param rec - record to update
- @param offsets - column offsets for the record
- @return DB_SUCCESS or error code */
- dberr_t adjust_cluster_index_blob_columns(
- rec_t* rec,
- const ulint* offsets) UNIV_NOTHROW;
-
- /**
- In the clustered index, adjist the BLOB pointers as needed.
- Also update the BLOB reference, write the new space id.
- @param rec - record to update
- @param offsets - column offsets for the record
- @return DB_SUCCESS or error code */
- dberr_t adjust_cluster_index_blob_ref(
- rec_t* rec,
- const ulint* offsets) UNIV_NOTHROW;
-
- /**
- Purge delete-marked records, only if it is possible to do
- so without re-organising the B+tree.
- @param offsets - current row offsets.
- @retval true if purged */
- bool purge(const ulint* offsets) UNIV_NOTHROW;
-
- /**
- Adjust the BLOB references and sys fields for the current record.
- @param index - the index being converted
- @param rec - record to update
- @param offsets - column offsets for the record
- @param deleted - true if row is delete marked
- @return DB_SUCCESS or error code. */
- dberr_t adjust_cluster_record(
- const dict_index_t* index,
- rec_t* rec,
- const ulint* offsets,
- bool deleted) UNIV_NOTHROW;
-
- /**
- Find an index with the matching id.
- @return row_index_t* instance or 0 */
- row_index_t* find_index(index_id_t id) UNIV_NOTHROW
- {
- row_index_t* index = &m_cfg->m_indexes[0];
-
- for (ulint i = 0; i < m_cfg->m_n_indexes; ++i, ++index) {
- if (id == index->m_id) {
- return(index);
- }
- }
-
- return(0);
-
- }
-private:
- /** Config for table that is being imported. */
- row_import* m_cfg;
-
- /** Current index whose pages are being imported */
- row_index_t* m_index;
-
- /** Current system LSN */
- lsn_t m_current_lsn;
-
- /** Alias for m_page_zip, only set for compressed pages. */
- page_zip_des_t* m_page_zip_ptr;
-
- /** Iterator over records in a block */
- RecIterator m_rec_iter;
-
- /** Record offset */
- ulint m_offsets_[REC_OFFS_NORMAL_SIZE];
-
- /** Pointer to m_offsets_ */
- ulint* m_offsets;
-
- /** Memory heap for the record offsets */
- mem_heap_t* m_heap;
-
- /** Cluster index instance */
- dict_index_t* m_cluster_index;
-};
-
-/**
-row_import destructor. */
-row_import::~row_import() UNIV_NOTHROW
-{
- for (ulint i = 0; m_indexes != 0 && i < m_n_indexes; ++i) {
- delete [] m_indexes[i].m_name;
-
- if (m_indexes[i].m_fields == 0) {
- continue;
- }
-
- dict_field_t* fields = m_indexes[i].m_fields;
- ulint n_fields = m_indexes[i].m_n_fields;
-
- for (ulint j = 0; j < n_fields; ++j) {
- delete [] fields[j].name;
- }
-
- delete [] fields;
- }
-
- for (ulint i = 0; m_col_names != 0 && i < m_n_cols; ++i) {
- delete [] m_col_names[i];
- }
-
- delete [] m_cols;
- delete [] m_indexes;
- delete [] m_col_names;
- delete [] m_table_name;
- delete [] m_hostname;
-}
-
-/**
-Find the index entry in in the indexes array.
-@param name - index name
-@return instance if found else 0. */
-row_index_t*
-row_import::get_index(
- const char* name) const UNIV_NOTHROW
-{
- for (ulint i = 0; i < m_n_indexes; ++i) {
- const char* index_name;
- row_index_t* index = &m_indexes[i];
-
- index_name = reinterpret_cast<const char*>(index->m_name);
-
- if (strcmp(index_name, name) == 0) {
-
- return(index);
- }
- }
-
- return(0);
-}
-
-/**
-Get the number of rows in the index.
-@param name - index name
-@return number of rows (doesn't include delete marked rows). */
-ulint
-row_import::get_n_rows(
- const char* name) const UNIV_NOTHROW
-{
- const row_index_t* index = get_index(name);
-
- ut_a(name != 0);
-
- return(index->m_stats.m_n_rows);
-}
-
-/**
-Get the number of rows for which purge failed uding the convert phase.
-@param name - index name
-@return number of rows for which purge failed. */
-ulint
-row_import::get_n_purge_failed(
- const char* name) const UNIV_NOTHROW
-{
- const row_index_t* index = get_index(name);
-
- ut_a(name != 0);
-
- return(index->m_stats.m_n_purge_failed);
-}
-
-/**
-Find the ordinal value of the column name in the cfg table columns.
-@param name - of column to look for.
-@return ULINT_UNDEFINED if not found. */
-ulint
-row_import::find_col(
- const char* name) const UNIV_NOTHROW
-{
- for (ulint i = 0; i < m_n_cols; ++i) {
- const char* col_name;
-
- col_name = reinterpret_cast<const char*>(m_col_names[i]);
-
- if (strcmp(col_name, name) == 0) {
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/**
-Check if the index schema that was read from the .cfg file matches the
-in memory index definition.
-@return DB_SUCCESS or error code. */
-dberr_t
-row_import::match_index_columns(
- THD* thd,
- const dict_index_t* index) UNIV_NOTHROW
-{
- row_index_t* cfg_index;
- dberr_t err = DB_SUCCESS;
-
- cfg_index = get_index(index->name);
-
- if (cfg_index == 0) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Index %s not found in tablespace meta-data file.",
- index->name);
-
- return(DB_ERROR);
- }
-
- if (cfg_index->m_n_fields != index->n_fields) {
-
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Index field count %u doesn't match"
- " tablespace metadata file value " ULINTPF,
- index->n_fields, cfg_index->m_n_fields);
-
- return(DB_ERROR);
- }
-
- cfg_index->m_srv_index = index;
-
- const dict_field_t* field = index->fields;
- const dict_field_t* cfg_field = cfg_index->m_fields;
-
- for (ulint i = 0; i < index->n_fields; ++i, ++field, ++cfg_field) {
-
- if (strcmp(field->name, cfg_field->name) != 0) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Index field name %s doesn't match"
- " tablespace metadata field name %s"
- " for field position " ULINTPF,
- field->name, cfg_field->name, i);
-
- err = DB_ERROR;
- }
-
- if (cfg_field->prefix_len != field->prefix_len) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Index %s field %s prefix len %u"
- " doesn't match metadata file value %u",
- index->name, field->name,
- field->prefix_len, cfg_field->prefix_len);
-
- err = DB_ERROR;
- }
-
- if (cfg_field->fixed_len != field->fixed_len) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Index %s field %s fixed len %u"
- " doesn't match metadata file value %u",
- index->name, field->name,
- field->fixed_len,
- cfg_field->fixed_len);
-
- err = DB_ERROR;
- }
- }
-
- return(err);
-}
-
-/**
-Check if the table schema that was read from the .cfg file matches the
-in memory table definition.
-@param thd - MySQL session variable
-@return DB_SUCCESS or error code. */
-dberr_t
-row_import::match_table_columns(
- THD* thd) UNIV_NOTHROW
-{
- dberr_t err = DB_SUCCESS;
- const dict_col_t* col = m_table->cols;
-
- for (ulint i = 0; i < m_table->n_cols; ++i, ++col) {
-
- const char* col_name;
- ulint cfg_col_index;
-
- col_name = dict_table_get_col_name(
- m_table, dict_col_get_no(col));
-
- cfg_col_index = find_col(col_name);
-
- if (cfg_col_index == ULINT_UNDEFINED) {
-
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s not found in tablespace.",
- col_name);
-
- err = DB_ERROR;
- } else if (cfg_col_index != col->ind) {
-
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s ordinal value mismatch, it's at %u"
- " in the table and " ULINTPF
- " in the tablespace meta-data file",
- col_name, col->ind, cfg_col_index);
-
- err = DB_ERROR;
- } else {
- const dict_col_t* cfg_col;
-
- cfg_col = &m_cols[cfg_col_index];
- ut_a(cfg_col->ind == cfg_col_index);
-
- if (cfg_col->prtype != col->prtype) {
- ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s precise type mismatch.",
- col_name);
- err = DB_ERROR;
- }
-
- if (cfg_col->mtype != col->mtype) {
- ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s main type mismatch.",
- col_name);
- err = DB_ERROR;
- }
-
- if (cfg_col->len != col->len) {
- ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s length mismatch.",
- col_name);
- err = DB_ERROR;
- }
-
- if (cfg_col->mbminmaxlen != col->mbminmaxlen) {
- ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s multi-byte len mismatch.",
- col_name);
- err = DB_ERROR;
- }
-
- if (cfg_col->ind != col->ind) {
- err = DB_ERROR;
- }
-
- if (cfg_col->ord_part != col->ord_part) {
- ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s ordering mismatch.",
- col_name);
- err = DB_ERROR;
- }
-
- if (cfg_col->max_prefix != col->max_prefix) {
- ib_errf(thd,
- IB_LOG_LEVEL_ERROR,
- ER_TABLE_SCHEMA_MISMATCH,
- "Column %s max prefix mismatch.",
- col_name);
- err = DB_ERROR;
- }
- }
- }
-
- return(err);
-}
-
-/**
-Check if the table (and index) schema that was read from the .cfg file
-matches the in memory table definition.
-@param thd - MySQL session variable
-@return DB_SUCCESS or error code. */
-dberr_t
-row_import::match_schema(
- THD* thd) UNIV_NOTHROW
-{
- /* Do some simple checks. */
-
- if ((m_table->flags ^ m_flags) & ~DICT_TF_MASK_DATA_DIR) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
- "Table flags don't match, server table has 0x%x"
- " and the meta-data file has 0x%lx",
- m_table->flags, ulong(m_flags));
-
- return(DB_ERROR);
- } else if (m_table->n_cols != m_n_cols) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
- "Number of columns don't match, table has %u "
- "columns but the tablespace meta-data file has "
- ULINTPF " columns",
- m_table->n_cols, m_n_cols);
-
- return(DB_ERROR);
- } else if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
-
- /* If the number of indexes don't match then it is better
- to abort the IMPORT. It is easy for the user to create a
- table matching the IMPORT definition. */
-
- ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
- "Number of indexes don't match, table has " ULINTPF
- " indexes but the tablespace meta-data file has "
- ULINTPF " indexes",
- UT_LIST_GET_LEN(m_table->indexes), m_n_indexes);
-
- return(DB_ERROR);
- }
-
- dberr_t err = match_table_columns(thd);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Check if the index definitions match. */
-
- const dict_index_t* index;
-
- for (index = UT_LIST_GET_FIRST(m_table->indexes);
- index != 0;
- index = UT_LIST_GET_NEXT(indexes, index)) {
-
- dberr_t index_err;
-
- index_err = match_index_columns(thd, index);
-
- if (index_err != DB_SUCCESS) {
- err = index_err;
- }
- }
-
- return(err);
-}
-
-/**
-Set the index root <space, pageno>, using index name. */
-void
-row_import::set_root_by_name() UNIV_NOTHROW
-{
- row_index_t* cfg_index = m_indexes;
-
- for (ulint i = 0; i < m_n_indexes; ++i, ++cfg_index) {
- dict_index_t* index;
-
- const char* index_name;
-
- index_name = reinterpret_cast<const char*>(cfg_index->m_name);
-
- index = dict_table_get_index_on_name(m_table, index_name);
-
- /* We've already checked that it exists. */
- ut_a(index != 0);
-
- /* Set the root page number and space id. */
- index->space = m_table->space;
- index->page = cfg_index->m_page_no;
- }
-}
-
-/**
-Set the index root <space, pageno>, using a heuristic.
-@return DB_SUCCESS or error code */
-dberr_t
-row_import::set_root_by_heuristic() UNIV_NOTHROW
-{
- row_index_t* cfg_index = m_indexes;
-
- ut_a(m_n_indexes > 0);
-
- // TODO: For now use brute force, based on ordinality
-
- if (UT_LIST_GET_LEN(m_table->indexes) != m_n_indexes) {
-
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), m_table->name, FALSE);
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Table %s should have " ULINTPF
- " indexes but the tablespace has " ULINTPF " indexes",
- table_name,
- UT_LIST_GET_LEN(m_table->indexes),
- m_n_indexes);
- }
-
- dict_mutex_enter_for_mysql();
-
- ulint i = 0;
- dberr_t err = DB_SUCCESS;
-
- for (dict_index_t* index = UT_LIST_GET_FIRST(m_table->indexes);
- index != 0;
- index = UT_LIST_GET_NEXT(indexes, index)) {
-
- if (index->type & DICT_FTS) {
- index->type |= DICT_CORRUPT;
- ib_logf(IB_LOG_LEVEL_WARN,
- "Skipping FTS index: %s", index->name);
- } else if (i < m_n_indexes) {
-
- delete [] cfg_index[i].m_name;
-
- ulint len = strlen(index->name) + 1;
-
- cfg_index[i].m_name = new(std::nothrow) byte[len];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_14",
- delete[] cfg_index[i].m_name;
- cfg_index[i].m_name = 0;);
-
- if (cfg_index[i].m_name == 0) {
- err = DB_OUT_OF_MEMORY;
- break;
- }
-
- memcpy(cfg_index[i].m_name, index->name, len);
-
- cfg_index[i].m_srv_index = index;
-
- index->space = m_table->space;
- index->page = cfg_index[i].m_page_no;
-
- ++i;
- }
- }
-
- dict_mutex_exit_for_mysql();
-
- return(err);
-}
-
-/**
-Purge delete marked records.
-@return DB_SUCCESS or error code. */
-dberr_t
-IndexPurge::garbage_collect() UNIV_NOTHROW
-{
- dberr_t err;
- ibool comp = dict_table_is_comp(m_index->table);
-
- /* Open the persistent cursor and start the mini-transaction. */
-
- open();
-
- while ((err = next()) == DB_SUCCESS) {
-
- rec_t* rec = btr_pcur_get_rec(&m_pcur);
- ibool deleted = rec_get_deleted_flag(rec, comp);
-
- if (!deleted) {
- ++m_n_rows;
- } else {
- purge();
- }
- }
-
- /* Close the persistent cursor and commit the mini-transaction. */
-
- close();
-
- return(err == DB_END_OF_INDEX ? DB_SUCCESS : err);
-}
-
-/**
-Begin import, position the cursor on the first record. */
-void
-IndexPurge::open() UNIV_NOTHROW
-{
- mtr_start(&m_mtr);
-
- mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
-
- btr_pcur_open_at_index_side(
- true, m_index, BTR_MODIFY_LEAF, &m_pcur, true, 0, &m_mtr);
-}
-
-/**
-Close the persistent curosr and commit the mini-transaction. */
-void
-IndexPurge::close() UNIV_NOTHROW
-{
- btr_pcur_close(&m_pcur);
- mtr_commit(&m_mtr);
-}
-
-/**
-Position the cursor on the next record.
-@return DB_SUCCESS or error code */
-dberr_t
-IndexPurge::next() UNIV_NOTHROW
-{
- btr_pcur_move_to_next_on_page(&m_pcur);
-
- /* When switching pages, commit the mini-transaction
- in order to release the latch on the old page. */
-
- if (!btr_pcur_is_after_last_on_page(&m_pcur)) {
- return(DB_SUCCESS);
- } else if (trx_is_interrupted(m_trx)) {
- /* Check after every page because the check
- is expensive. */
- return(DB_INTERRUPTED);
- }
-
- btr_pcur_store_position(&m_pcur, &m_mtr);
-
- mtr_commit(&m_mtr);
-
- mtr_start(&m_mtr);
-
- mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
-
- btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
-
- if (!btr_pcur_move_to_next_user_rec(&m_pcur, &m_mtr)) {
-
- return(DB_END_OF_INDEX);
- }
-
- return(DB_SUCCESS);
-}
-
-/**
-Store the persistent cursor position and reopen the
-B-tree cursor in BTR_MODIFY_TREE mode, because the
-tree structure may be changed during a pessimistic delete. */
-void
-IndexPurge::purge_pessimistic_delete() UNIV_NOTHROW
-{
- dberr_t err;
-
- btr_pcur_restore_position(BTR_MODIFY_TREE, &m_pcur, &m_mtr);
-
- ut_ad(rec_get_deleted_flag(
- btr_pcur_get_rec(&m_pcur),
- dict_table_is_comp(m_index->table)));
-
- btr_cur_pessimistic_delete(
- &err, FALSE, btr_pcur_get_btr_cur(&m_pcur), 0, RB_NONE, &m_mtr);
-
- ut_a(err == DB_SUCCESS);
-
- /* Reopen the B-tree cursor in BTR_MODIFY_LEAF mode */
- mtr_commit(&m_mtr);
-}
-
-/**
-Purge delete-marked records. */
-void
-IndexPurge::purge() UNIV_NOTHROW
-{
- btr_pcur_store_position(&m_pcur, &m_mtr);
-
- purge_pessimistic_delete();
-
- mtr_start(&m_mtr);
-
- mtr_set_log_mode(&m_mtr, MTR_LOG_NO_REDO);
-
- btr_pcur_restore_position(BTR_MODIFY_LEAF, &m_pcur, &m_mtr);
-}
-
-/**
-Constructor
-* @param cfg - config of table being imported.
-* @param trx - transaction covering the import */
-PageConverter::PageConverter(
- row_import* cfg,
- trx_t* trx)
- :
- AbstractCallback(trx),
- m_cfg(cfg),
- m_page_zip_ptr(0),
- m_heap(0) UNIV_NOTHROW
-{
- m_index = m_cfg->m_indexes;
-
- m_current_lsn = log_get_lsn();
- ut_a(m_current_lsn > 0);
-
- m_offsets = m_offsets_;
- rec_offs_init(m_offsets_);
-
- m_cluster_index = dict_table_get_first_index(m_cfg->m_table);
-}
-
-/**
-Adjust the BLOB reference for a single column that is externally stored
-@param rec - record to update
-@param offsets - column offsets for the record
-@param i - column ordinal value
-@return DB_SUCCESS or error code */
-dberr_t
-PageConverter::adjust_cluster_index_blob_column(
- rec_t* rec,
- const ulint* offsets,
- ulint i) UNIV_NOTHROW
-{
- ulint len;
- byte* field;
-
- field = rec_get_nth_field(rec, offsets, i, &len);
-
- DBUG_EXECUTE_IF("ib_import_trigger_corruption_2",
- len = BTR_EXTERN_FIELD_REF_SIZE - 1;);
-
- if (len < BTR_EXTERN_FIELD_REF_SIZE) {
-
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof(index_name),
- m_cluster_index->name, TRUE);
-
- ib_errf(m_trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_INNODB_INDEX_CORRUPT,
- "Externally stored column(" ULINTPF
- ") has a reference length of " ULINTPF
- " in the cluster index %s",
- i, len, index_name);
-
- return(DB_CORRUPTION);
- }
-
- field += BTR_EXTERN_SPACE_ID - BTR_EXTERN_FIELD_REF_SIZE + len;
-
- if (is_compressed_table()) {
- mach_write_to_4(field, get_space_id());
-
- page_zip_write_blob_ptr(
- m_page_zip_ptr, rec, m_cluster_index, offsets, i, 0);
- } else {
- mlog_write_ulint(field, get_space_id(), MLOG_4BYTES, 0);
- }
-
- return(DB_SUCCESS);
-}
-
-/**
-Adjusts the BLOB reference in the clustered index row for all externally
-stored columns.
-@param rec - record to update
-@param offsets - column offsets for the record
-@return DB_SUCCESS or error code */
-dberr_t
-PageConverter::adjust_cluster_index_blob_columns(
- rec_t* rec,
- const ulint* offsets) UNIV_NOTHROW
-{
- ut_ad(rec_offs_any_extern(offsets));
-
- /* Adjust the space_id in the BLOB pointers. */
-
- for (ulint i = 0; i < rec_offs_n_fields(offsets); ++i) {
-
- /* Only if the column is stored "externally". */
-
- if (rec_offs_nth_extern(offsets, i)) {
- dberr_t err;
-
- err = adjust_cluster_index_blob_column(rec, offsets, i);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/**
-In the clustered index, adjust BLOB pointers as needed. Also update the
-BLOB reference, write the new space id.
-@param rec - record to update
-@param offsets - column offsets for the record
-@return DB_SUCCESS or error code */
-dberr_t
-PageConverter::adjust_cluster_index_blob_ref(
- rec_t* rec,
- const ulint* offsets) UNIV_NOTHROW
-{
- if (rec_offs_any_extern(offsets)) {
- dberr_t err;
-
- err = adjust_cluster_index_blob_columns(rec, offsets);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/**
-Purge delete-marked records, only if it is possible to do so without
-re-organising the B+tree.
-@param offsets - current row offsets.
-@return true if purge succeeded */
-bool
-PageConverter::purge(const ulint* offsets) UNIV_NOTHROW
-{
- const dict_index_t* index = m_index->m_srv_index;
-
- /* We can't have a page that is empty and not root. */
- if (m_rec_iter.remove(index, m_page_zip_ptr, m_offsets)) {
-
- ++m_index->m_stats.m_n_purged;
-
- return(true);
- } else {
- ++m_index->m_stats.m_n_purge_failed;
- }
-
- return(false);
-}
-
-/**
-Adjust the BLOB references and sys fields for the current record.
-@param rec - record to update
-@param offsets - column offsets for the record
-@param deleted - true if row is delete marked
-@return DB_SUCCESS or error code. */
-dberr_t
-PageConverter::adjust_cluster_record(
- const dict_index_t* index,
- rec_t* rec,
- const ulint* offsets,
- bool deleted) UNIV_NOTHROW
-{
- dberr_t err;
-
- if ((err = adjust_cluster_index_blob_ref(rec, offsets)) == DB_SUCCESS) {
-
- /* Reset DB_TRX_ID and DB_ROLL_PTR. Normally, these fields
- are only written in conjunction with other changes to the
- record. */
-
- row_upd_rec_sys_fields(
- rec, m_page_zip_ptr, m_cluster_index, m_offsets,
- m_trx, 0);
- }
-
- return(err);
-}
-
-/**
-Update the BLOB refrences and write UNDO log entries for
-rows that can't be purged optimistically.
-@param block - block to update
-@retval DB_SUCCESS or error code */
-dberr_t
-PageConverter::update_records(
- buf_block_t* block) UNIV_NOTHROW
-{
- ibool comp = dict_table_is_comp(m_cfg->m_table);
- bool clust_index = m_index->m_srv_index == m_cluster_index;
-
- /* This will also position the cursor on the first user record. */
-
- m_rec_iter.open(block);
-
- while (!m_rec_iter.end()) {
-
- rec_t* rec = m_rec_iter.current();
-
- /* FIXME: Move out of the loop */
-
- if (rec_get_status(rec) == REC_STATUS_NODE_PTR) {
- break;
- }
-
- ibool deleted = rec_get_deleted_flag(rec, comp);
-
- /* For the clustered index we have to adjust the BLOB
- reference and the system fields irrespective of the
- delete marked flag. The adjustment of delete marked
- cluster records is required for purge to work later. */
-
- if (deleted || clust_index) {
- m_offsets = rec_get_offsets(
- rec, m_index->m_srv_index, m_offsets,
- ULINT_UNDEFINED, &m_heap);
- }
-
- if (clust_index) {
-
- dberr_t err = adjust_cluster_record(
- m_index->m_srv_index, rec, m_offsets,
- deleted);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- /* If it is a delete marked record then try an
- optimistic delete. */
-
- if (deleted) {
- /* A successful purge will move the cursor to the
- next record. */
-
- if (!purge(m_offsets)) {
- m_rec_iter.next();
- }
-
- ++m_index->m_stats.m_n_deleted;
- } else {
- ++m_index->m_stats.m_n_rows;
- m_rec_iter.next();
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/**
-Update the space, index id, trx id.
-@return DB_SUCCESS or error code */
-dberr_t
-PageConverter::update_index_page(
- buf_block_t* block) UNIV_NOTHROW
-{
- index_id_t id;
- buf_frame_t* page = block->frame;
-
- if (is_free(buf_block_get_page_no(block))) {
- return(DB_SUCCESS);
- } else if ((id = btr_page_get_index_id(page)) != m_index->m_id) {
-
- row_index_t* index = find_index(id);
-
- if (index == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Page for tablespace " ULINTPF " is "
- " index page with id " IB_ID_FMT " but that"
- " index is not found from configuration file."
- " Current index name %s and id " IB_ID_FMT ".",
- m_space,
- id,
- m_index->m_name,
- m_index->m_id);
- m_index = 0;
- return(DB_CORRUPTION);
- }
-
- /* Update current index */
- m_index = index;
- }
-
- /* If the .cfg file is missing and there is an index mismatch
- then ignore the error. */
- if (m_cfg->m_missing && (m_index == 0 || m_index->m_srv_index == 0)) {
- return(DB_SUCCESS);
- }
-
-#ifdef UNIV_ZIP_DEBUG
- ut_a(!is_compressed_table()
- || page_zip_validate(m_page_zip_ptr, page, m_index->m_srv_index));
-#endif /* UNIV_ZIP_DEBUG */
-
- /* This has to be written to uncompressed index header. Set it to
- the current index id. */
- btr_page_set_index_id(
- page, m_page_zip_ptr, m_index->m_srv_index->id, 0);
-
- page_set_max_trx_id(block, m_page_zip_ptr, m_trx->id, 0);
-
- if (page_is_empty(block->frame)) {
-
- /* Only a root page can be empty. */
- if (!is_root_page(block->frame)) {
- // TODO: We should relax this and skip secondary
- // indexes. Mark them as corrupt because they can
- // always be rebuilt.
- return(DB_CORRUPTION);
- }
-
- return(DB_SUCCESS);
- }
-
- return(update_records(block));
-}
-
-/**
-Validate the space flags and update tablespace header page.
-@param block - block read from file, not from the buffer pool.
-@retval DB_SUCCESS or error code */
-dberr_t
-PageConverter::update_header(
- buf_block_t* block) UNIV_NOTHROW
-{
- /* Check for valid header */
- switch(fsp_header_get_space_id(get_frame(block))) {
- case 0:
- return(DB_CORRUPTION);
- case ULINT_UNDEFINED:
- ib_logf(IB_LOG_LEVEL_WARN,
- "Space id check in the header failed "
- "- ignored");
- }
-
- mach_write_to_8(
- get_frame(block) + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION,
- m_current_lsn);
-
- /* Write back the adjusted flags. */
- mach_write_to_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS
- + get_frame(block), m_space_flags);
-
- /* Write space_id to the tablespace header, page 0. */
- mach_write_to_4(
- get_frame(block) + FSP_HEADER_OFFSET + FSP_SPACE_ID,
- get_space_id());
-
- /* This is on every page in the tablespace. */
- mach_write_to_4(
- get_frame(block) + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
- get_space_id());
-
- return(DB_SUCCESS);
-}
-
-/**
-Update the page, set the space id, max trx id and index id.
-@param block - block read from file
-@retval DB_SUCCESS or error code */
-dberr_t
-PageConverter::update_page(
- buf_block_t* block,
- ulint& page_type) UNIV_NOTHROW
-{
- dberr_t err = DB_SUCCESS;
-
- switch (page_type = fil_page_get_type(get_frame(block))) {
- case FIL_PAGE_TYPE_FSP_HDR:
- /* Work directly on the uncompressed page headers. */
- ut_a(buf_block_get_page_no(block) == 0);
- return(update_header(block));
-
- case FIL_PAGE_INDEX:
- /* We need to decompress the contents into block->frame
- before we can do any thing with Btree pages. */
-
- if (is_compressed_table() && !buf_zip_decompress(block, TRUE)) {
- return(DB_CORRUPTION);
- }
-
- /* This is on every page in the tablespace. */
- mach_write_to_4(
- get_frame(block)
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
-
- /* Only update the Btree nodes. */
- return(update_index_page(block));
-
- case FIL_PAGE_TYPE_SYS:
- /* This is page 0 in the system tablespace. */
- return(DB_CORRUPTION);
-
- case FIL_PAGE_TYPE_XDES:
- err = set_current_xdes(
- buf_block_get_page_no(block), get_frame(block));
- /* fall through */
- case FIL_PAGE_INODE:
- case FIL_PAGE_TYPE_TRX_SYS:
- case FIL_PAGE_IBUF_FREE_LIST:
- case FIL_PAGE_TYPE_ALLOCATED:
- case FIL_PAGE_IBUF_BITMAP:
- case FIL_PAGE_TYPE_BLOB:
- case FIL_PAGE_TYPE_ZBLOB:
- case FIL_PAGE_TYPE_ZBLOB2:
-
- /* Work directly on the uncompressed page headers. */
- /* This is on every page in the tablespace. */
- mach_write_to_4(
- get_frame(block)
- + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, get_space_id());
-
- return(err);
- }
-
- ib_logf(IB_LOG_LEVEL_WARN, "Unknown page type (" ULINTPF ")",
- page_type);
-
- return(DB_CORRUPTION);
-}
-
-/**
-Validate the page
-@param offset - physical offset within file.
-@param page - page read from file.
-@return status */
-PageConverter::import_page_status_t
-PageConverter::validate(
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW
-{
- buf_frame_t* page = get_frame(block);
-
- /* Check that the page number corresponds to the offset in
- the file. Flag as corrupt if it doesn't. Disable the check
- for LSN in buf_page_is_corrupted() */
-
- if (buf_page_is_corrupted(false, page, get_zip_size(), NULL)
- || (page_get_page_no(page) != offset / m_page_size
- && page_get_page_no(page) != 0)) {
-
- return(IMPORT_PAGE_STATUS_CORRUPTED);
-
- } else if (offset > 0 && page_get_page_no(page) == 0) {
- ulint checksum;
-
- checksum = mach_read_from_4(page + FIL_PAGE_SPACE_OR_CHKSUM);
- if (checksum != 0) {
- /* Checksum check passed in buf_page_is_corrupted(). */
- ib_logf(IB_LOG_LEVEL_WARN,
- "%s: Page %lu checksum " ULINTPF
- " should be zero.",
- m_filepath, (ulong) (offset / m_page_size),
- checksum);
- }
-
- const byte* b = page + FIL_PAGE_OFFSET;
- const byte* e = page + m_page_size
- - FIL_PAGE_END_LSN_OLD_CHKSUM;
-
- /* If the page number is zero and offset > 0 then
- the entire page MUST consist of zeroes. If not then
- we flag it as corrupt. */
-
- while (b != e) {
-
- if (*b++ && !trigger_corruption()) {
- return(IMPORT_PAGE_STATUS_CORRUPTED);
- }
- }
-
- /* The page is all zero: do nothing. */
- return(IMPORT_PAGE_STATUS_ALL_ZERO);
- }
-
- return(IMPORT_PAGE_STATUS_OK);
-}
-
-/**
-Called for every page in the tablespace. If the page was not
-updated then its state must be set to BUF_PAGE_NOT_USED.
-@param offset - physical offset within the file
-@param block - block read from file, note it is not from the buffer pool
-@retval DB_SUCCESS or error code. */
-dberr_t
-PageConverter::operator() (
- os_offset_t offset,
- buf_block_t* block) UNIV_NOTHROW
-{
- ulint page_type;
- dberr_t err = DB_SUCCESS;
-
- if ((err = periodic_check()) != DB_SUCCESS) {
- return(err);
- }
-
- if (is_compressed_table()) {
- m_page_zip_ptr = &block->page.zip;
- } else {
- ut_ad(m_page_zip_ptr == 0);
- }
-
- switch(validate(offset, block)) {
- case IMPORT_PAGE_STATUS_OK:
-
- /* We have to decompress the compressed pages before
- we can work on them */
-
- if ((err = update_page(block, page_type)) != DB_SUCCESS) {
- return(err);
- }
-
- /* Note: For compressed pages this function will write to the
- zip descriptor and for uncompressed pages it will write to
- page (ie. the block->frame). Therefore the caller should write
- out the descriptor contents and not block->frame for compressed
- pages. */
-
- if (!is_compressed_table() || page_type == FIL_PAGE_INDEX) {
-
- buf_flush_init_for_writing(
- !is_compressed_table()
- ? block->frame : block->page.zip.data,
- !is_compressed_table() ? 0 : m_page_zip_ptr,
- m_current_lsn);
- } else {
- /* Calculate and update the checksum of non-btree
- pages for compressed tables explicitly here. */
-
- buf_flush_update_zip_checksum(
- get_frame(block), get_zip_size(),
- m_current_lsn);
- }
-
- break;
-
- case IMPORT_PAGE_STATUS_ALL_ZERO:
- /* The page is all zero: leave it as is. */
- break;
-
- case IMPORT_PAGE_STATUS_CORRUPTED:
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "%s: Page %lu at offset " UINT64PF " looks corrupted.",
- m_filepath, (ulong) (offset / m_page_size), offset);
-
- return(DB_CORRUPTION);
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Clean up after import tablespace failure, this function will acquire
-the dictionary latches on behalf of the transaction if the transaction
-hasn't already acquired them. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_import_discard_changes(
-/*=======================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
- trx_t* trx, /*!< in/out: transaction for import */
- dberr_t err) /*!< in: error code */
-{
- dict_table_t* table = prebuilt->table;
-
- ut_a(err != DB_SUCCESS);
-
- prebuilt->trx->error_info = NULL;
-
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name),
- prebuilt->table->name, FALSE);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Discarding tablespace of table %s: %s",
- table_name, ut_strerr(err));
-
- if (trx->dict_operation_lock_mode != RW_X_LATCH) {
- ut_a(trx->dict_operation_lock_mode == 0);
- row_mysql_lock_data_dictionary(trx);
- }
-
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Since we update the index root page numbers on disk after
- we've done a successful import. The table will not be loadable.
- However, we need to ensure that the in memory root page numbers
- are reset to "NULL". */
-
- for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
- index != 0;
- index = UT_LIST_GET_NEXT(indexes, index)) {
-
- index->page = FIL_NULL;
- index->space = FIL_NULL;
- }
-
- table->file_unreadable = true;
-
- fil_close_tablespace(trx, table->space);
-}
-
-/*****************************************************************//**
-Clean up after import tablespace. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_cleanup(
-/*===============*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
- trx_t* trx, /*!< in/out: transaction for import */
- dberr_t err) /*!< in: error code */
-{
- ut_a(prebuilt->trx != trx);
-
- if (err != DB_SUCCESS) {
- row_import_discard_changes(prebuilt, trx, err);
- }
-
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- DBUG_EXECUTE_IF("ib_import_before_commit_crash", DBUG_SUICIDE(););
-
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx_free_for_mysql(trx);
-
- prebuilt->trx->op_info = "";
-
- DBUG_EXECUTE_IF("ib_import_before_checkpoint_crash", DBUG_SUICIDE(););
-
- log_make_checkpoint_at(LSN_MAX, TRUE);
-
- return(err);
-}
-
-/*****************************************************************//**
-Report error during tablespace import. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_error(
-/*=============*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from handler */
- trx_t* trx, /*!< in/out: transaction for import */
- dberr_t err) /*!< in: error code */
-{
- if (!trx_is_interrupted(trx)) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name),
- prebuilt->table->name, FALSE);
-
- ib_senderrf(
- trx->mysql_thd, IB_LOG_LEVEL_WARN,
- ER_INNODB_IMPORT_ERROR,
- table_name, (ulong) err, ut_strerr(err));
- }
-
- return(row_import_cleanup(prebuilt, trx, err));
-}
-
-/*****************************************************************//**
-Adjust the root page index node and leaf node segment headers, update
-with the new space id. For all the table's secondary indexes.
-@return error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_adjust_root_pages_of_secondary_indexes(
-/*==============================================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
- handler */
- trx_t* trx, /*!< in: transaction used for
- the import */
- dict_table_t* table, /*!< in: table the indexes
- belong to */
- const row_import& cfg) /*!< Import context */
-{
- dict_index_t* index;
- ulint n_rows_in_table;
- dberr_t err = DB_SUCCESS;
-
- /* Skip the clustered index. */
- index = dict_table_get_first_index(table);
-
- n_rows_in_table = cfg.get_n_rows(index->name);
-
- DBUG_EXECUTE_IF("ib_import_sec_rec_count_mismatch_failure",
- n_rows_in_table++;);
-
- /* Adjust the root pages of the secondary indexes only. */
- while ((index = dict_table_get_next_index(index)) != NULL) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof(index_name), index->name, TRUE);
-
- ut_a(!dict_index_is_clust(index));
-
- if (!(index->type & DICT_CORRUPT)
- && index->space != FIL_NULL
- && index->page != FIL_NULL) {
-
- /* Update the Btree segment headers for index node and
- leaf nodes in the root page. Set the new space id. */
-
- err = btr_root_adjust_on_import(index);
- } else {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Skip adjustment of root pages for "
- "index %s.", index->name);
-
- err = DB_CORRUPTION;
- }
-
- if (err != DB_SUCCESS) {
-
- if (index->type & DICT_CLUSTERED) {
- break;
- }
-
- ib_errf(trx->mysql_thd,
- IB_LOG_LEVEL_WARN,
- ER_INNODB_INDEX_CORRUPT,
- "Index '%s' not found or corrupt, "
- "you should recreate this index.",
- index_name);
-
- /* Do not bail out, so that the data
- can be recovered. */
-
- err = DB_SUCCESS;
- index->type |= DICT_CORRUPT;
- continue;
- }
-
- /* If we failed to purge any records in the index then
- do it the hard way.
-
- TODO: We can do this in the first pass by generating UNDO log
- records for the failed rows. */
-
- if (!cfg.requires_purge(index->name)) {
- continue;
- }
-
- IndexPurge purge(trx, index);
-
- trx->op_info = "secondary: purge delete marked records";
-
- err = purge.garbage_collect();
-
- trx->op_info = "";
-
- if (err != DB_SUCCESS) {
- break;
- } else if (purge.get_n_rows() != n_rows_in_table) {
-
- ib_errf(trx->mysql_thd,
- IB_LOG_LEVEL_WARN,
- ER_INNODB_INDEX_CORRUPT,
- "Index '%s' contains " ULINTPF " entries, "
- "should be " ULINTPF ", you should recreate "
- "this index.", index_name,
- purge.get_n_rows(), n_rows_in_table);
-
- index->type |= DICT_CORRUPT;
-
- /* Do not bail out, so that the data
- can be recovered. */
-
- err = DB_SUCCESS;
- }
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Ensure that dict_sys->row_id exceeds SELECT MAX(DB_ROW_ID).
-@return error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_set_sys_max_row_id(
-/*==========================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt from
- handler */
- const dict_table_t* table) /*!< in: table to import */
-{
- dberr_t err;
- const rec_t* rec;
- mtr_t mtr;
- btr_pcur_t pcur;
- row_id_t row_id = 0;
- dict_index_t* index;
-
- index = dict_table_get_first_index(table);
- ut_a(dict_index_is_clust(index));
-
- mtr_start(&mtr);
-
- mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
-
- btr_pcur_open_at_index_side(
- false, // High end
- index,
- BTR_SEARCH_LEAF,
- &pcur,
- true, // Init cursor
- 0, // Leaf level
- &mtr);
-
- btr_pcur_move_to_prev_on_page(&pcur);
- rec = btr_pcur_get_rec(&pcur);
-
- /* Check for empty table. */
- if (!page_rec_is_infimum(rec)) {
- ulint len;
- const byte* field;
- mem_heap_t* heap = NULL;
- ulint offsets_[1 + REC_OFFS_HEADER_SIZE];
- ulint* offsets;
-
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(
- rec, index, offsets_, ULINT_UNDEFINED, &heap);
-
- field = rec_get_nth_field(
- rec, offsets,
- dict_index_get_sys_col_pos(index, DATA_ROW_ID),
- &len);
-
- if (len == DATA_ROW_ID_LEN) {
- row_id = mach_read_from_6(field);
- err = DB_SUCCESS;
- } else {
- err = DB_CORRUPTION;
- }
-
- if (heap != NULL) {
- mem_heap_free(heap);
- }
- } else {
- /* The table is empty. */
- err = DB_SUCCESS;
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- DBUG_EXECUTE_IF("ib_import_set_max_rowid_failure",
- err = DB_CORRUPTION;);
-
- if (err != DB_SUCCESS) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof(index_name), index->name, TRUE);
-
- ib_errf(prebuilt->trx->mysql_thd,
- IB_LOG_LEVEL_WARN,
- ER_INNODB_INDEX_CORRUPT,
- "Index '%s' corruption detected, invalid DB_ROW_ID "
- "in index.", index_name);
-
- return(err);
-
- } else if (row_id > 0) {
-
- /* Update the system row id if the imported index row id is
- greater than the max system row id. */
-
- mutex_enter(&dict_sys->mutex);
-
- if (row_id >= dict_sys->row_id) {
- dict_sys->row_id = row_id + 1;
- dict_hdr_flush_row_id();
- }
-
- mutex_exit(&dict_sys->mutex);
- }
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Read the a string from the meta data file.
-@return DB_SUCCESS or error code. */
-static
-dberr_t
-row_import_cfg_read_string(
-/*=======================*/
- FILE* file, /*!< in/out: File to read from */
- byte* ptr, /*!< out: string to read */
- ulint max_len) /*!< in: maximum length of the output
- buffer in bytes */
-{
- DBUG_EXECUTE_IF("ib_import_string_read_error",
- errno = EINVAL; return(DB_IO_ERROR););
-
- ulint len = 0;
-
- while (!feof(file)) {
- int ch = fgetc(file);
-
- if (ch == EOF) {
- break;
- } else if (ch != 0) {
- if (len < max_len) {
- ptr[len++] = ch;
- } else {
- break;
- }
- /* max_len includes the NUL byte */
- } else if (len != max_len - 1) {
- break;
- } else {
- ptr[len] = 0;
- return(DB_SUCCESS);
- }
- }
-
- errno = EINVAL;
-
- return(DB_IO_ERROR);
-}
-
-/*********************************************************************//**
-Write the meta data (index user fields) config file.
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_cfg_read_index_fields(
-/*=============================*/
- FILE* file, /*!< in: file to write to */
- THD* thd, /*!< in/out: session */
- row_index_t* index, /*!< Index being read in */
- row_import* cfg) /*!< in/out: meta-data read */
-{
- byte row[sizeof(ib_uint32_t) * 3];
- ulint n_fields = index->m_n_fields;
-
- index->m_fields = new(std::nothrow) dict_field_t[n_fields];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_4",
- delete [] index->m_fields; index->m_fields = 0;);
-
- if (index->m_fields == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- dict_field_t* field = index->m_fields;
-
- memset(field, 0x0, sizeof(*field) * n_fields);
-
- for (ulint i = 0; i < n_fields; ++i, ++field) {
- byte* ptr = row;
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_1",
- (void) fseek(file, 0L, SEEK_END););
-
- if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while reading index fields.");
-
- return(DB_IO_ERROR);
- }
-
- field->prefix_len = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- field->fixed_len = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- /* Include the NUL byte in the length. */
- ulint len = mach_read_from_4(ptr);
-
- byte* name = new(std::nothrow) byte[len];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_5", delete [] name; name = 0;);
-
- if (name == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- field->name = reinterpret_cast<const char*>(name);
-
- dberr_t err = row_import_cfg_read_string(file, name, len);
-
- if (err != DB_SUCCESS) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while parsing table name.");
-
- return(err);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Read the index names and root page numbers of the indexes and set the values.
-Row format [root_page_no, len of str, str ... ]
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_read_index_data(
-/*=======================*/
- FILE* file, /*!< in: File to read from */
- THD* thd, /*!< in: session */
- row_import* cfg) /*!< in/out: meta-data read */
-{
- byte* ptr;
- row_index_t* cfg_index;
- byte row[sizeof(index_id_t) + sizeof(ib_uint32_t) * 9];
-
- /* FIXME: What is the max value? */
- ut_a(cfg->m_n_indexes > 0);
- ut_a(cfg->m_n_indexes < 1024);
-
- cfg->m_indexes = new(std::nothrow) row_index_t[cfg->m_n_indexes];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_6",
- delete [] cfg->m_indexes; cfg->m_indexes = 0;);
-
- if (cfg->m_indexes == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- memset(cfg->m_indexes, 0x0, sizeof(*cfg->m_indexes) * cfg->m_n_indexes);
-
- cfg_index = cfg->m_indexes;
-
- for (ulint i = 0; i < cfg->m_n_indexes; ++i, ++cfg_index) {
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_2",
- (void) fseek(file, 0L, SEEK_END););
-
- /* Read the index data. */
- size_t n_bytes = fread(row, 1, sizeof(row), file);
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error",
- (void) fseek(file, 0L, SEEK_END););
-
- if (n_bytes != sizeof(row)) {
- char msg[BUFSIZ];
-
- ut_snprintf(msg, sizeof(msg),
- "while reading index meta-data, expected "
- "to read %lu bytes but read only %lu "
- "bytes",
- (ulong) sizeof(row), (ulong) n_bytes);
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno), msg);
-
- ib_logf(IB_LOG_LEVEL_ERROR, "IO Error: %s", msg);
-
- return(DB_IO_ERROR);
- }
-
- ptr = row;
-
- cfg_index->m_id = mach_read_from_8(ptr);
- ptr += sizeof(index_id_t);
-
- cfg_index->m_space = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- cfg_index->m_page_no = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- cfg_index->m_type = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- cfg_index->m_trx_id_offset = mach_read_from_4(ptr);
- if (cfg_index->m_trx_id_offset != mach_read_from_4(ptr)) {
- ut_ad(0);
- /* Overflow. Pretend that the clustered index
- has a variable-length PRIMARY KEY. */
- cfg_index->m_trx_id_offset = 0;
- }
- ptr += sizeof(ib_uint32_t);
-
- cfg_index->m_n_user_defined_cols = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- cfg_index->m_n_uniq = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- cfg_index->m_n_nullable = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- cfg_index->m_n_fields = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- /* The NUL byte is included in the name length. */
- ulint len = mach_read_from_4(ptr);
-
- if (len > OS_FILE_MAX_PATH) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_INNODB_INDEX_CORRUPT,
- "Index name length (" ULINTPF ") is too long, "
- "the meta-data is corrupt", len);
-
- return(DB_CORRUPTION);
- }
-
- cfg_index->m_name = new(std::nothrow) byte[len];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_7",
- delete [] cfg_index->m_name;
- cfg_index->m_name = 0;);
-
- if (cfg_index->m_name == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- dberr_t err;
-
- err = row_import_cfg_read_string(file, cfg_index->m_name, len);
-
- if (err != DB_SUCCESS) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while parsing index name.");
-
- return(err);
- }
-
- err = row_import_cfg_read_index_fields(
- file, thd, cfg_index, cfg);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- }
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Set the index root page number for v1 format.
-@return DB_SUCCESS or error code. */
-static
-dberr_t
-row_import_read_indexes(
-/*====================*/
- FILE* file, /*!< in: File to read from */
- THD* thd, /*!< in: session */
- row_import* cfg) /*!< in/out: meta-data read */
-{
- byte row[sizeof(ib_uint32_t)];
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_3",
- (void) fseek(file, 0L, SEEK_END););
-
- /* Read the number of indexes. */
- if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while reading number of indexes.");
-
- return(DB_IO_ERROR);
- }
-
- cfg->m_n_indexes = mach_read_from_4(row);
-
- if (cfg->m_n_indexes == 0) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- "Number of indexes in meta-data file is 0");
-
- return(DB_CORRUPTION);
-
- } else if (cfg->m_n_indexes > 1024) {
- // FIXME: What is the upper limit? */
- ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- "Number of indexes in meta-data file is too high: "
- ULINTPF, cfg->m_n_indexes);
- cfg->m_n_indexes = 0;
-
- return(DB_CORRUPTION);
- }
-
- return(row_import_read_index_data(file, thd, cfg));
-}
-
-/*********************************************************************//**
-Read the meta data (table columns) config file. Deserialise the contents of
-dict_col_t structure, along with the column name. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_read_columns(
-/*====================*/
- FILE* file, /*!< in: file to write to */
- THD* thd, /*!< in/out: session */
- row_import* cfg) /*!< in/out: meta-data read */
-{
- dict_col_t* col;
- byte row[sizeof(ib_uint32_t) * 8];
-
- /* FIXME: What should the upper limit be? */
- ut_a(cfg->m_n_cols > 0);
- ut_a(cfg->m_n_cols < 1024);
-
- cfg->m_cols = new(std::nothrow) dict_col_t[cfg->m_n_cols];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_8",
- delete [] cfg->m_cols; cfg->m_cols = 0;);
-
- if (cfg->m_cols == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- cfg->m_col_names = new(std::nothrow) byte* [cfg->m_n_cols];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_9",
- delete [] cfg->m_col_names; cfg->m_col_names = 0;);
-
- if (cfg->m_col_names == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- memset(cfg->m_cols, 0x0, sizeof(cfg->m_cols) * cfg->m_n_cols);
- memset(cfg->m_col_names, 0x0, sizeof(cfg->m_col_names) * cfg->m_n_cols);
-
- col = cfg->m_cols;
-
- for (ulint i = 0; i < cfg->m_n_cols; ++i, ++col) {
- byte* ptr = row;
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_4",
- (void) fseek(file, 0L, SEEK_END););
-
- if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while reading table column meta-data.");
-
- return(DB_IO_ERROR);
- }
-
- col->prtype = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- col->mtype = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- col->len = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- col->mbminmaxlen = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- col->ind = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- col->ord_part = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- col->max_prefix = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- /* Read in the column name as [len, byte array]. The len
- includes the NUL byte. */
-
- ulint len = mach_read_from_4(ptr);
-
- /* FIXME: What is the maximum column name length? */
- if (len == 0 || len > 128) {
- ib_errf(thd, IB_LOG_LEVEL_ERROR,
- ER_IO_READ_ERROR,
- "Column name length " ULINTPF ", is invalid",
- len);
-
- return(DB_CORRUPTION);
- }
-
- cfg->m_col_names[i] = new(std::nothrow) byte[len];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_10",
- delete [] cfg->m_col_names[i];
- cfg->m_col_names[i] = 0;);
-
- if (cfg->m_col_names[i] == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- dberr_t err;
-
- err = row_import_cfg_read_string(
- file, cfg->m_col_names[i], len);
-
- if (err != DB_SUCCESS) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while parsing table column name.");
-
- return(err);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Read the contents of the <tablespace>.cfg file.
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_read_v1(
-/*===============*/
- FILE* file, /*!< in: File to read from */
- THD* thd, /*!< in: session */
- row_import* cfg) /*!< out: meta data */
-{
- byte value[sizeof(ib_uint32_t)];
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_5",
- (void) fseek(file, 0L, SEEK_END););
-
- /* Read the hostname where the tablespace was exported. */
- if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while reading meta-data export hostname length.");
-
- return(DB_IO_ERROR);
- }
-
- ulint len = mach_read_from_4(value);
-
- /* NUL byte is part of name length. */
- cfg->m_hostname = new(std::nothrow) byte[len];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_1",
- delete [] cfg->m_hostname; cfg->m_hostname = 0;);
-
- if (cfg->m_hostname == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- dberr_t err = row_import_cfg_read_string(file, cfg->m_hostname, len);
-
- if (err != DB_SUCCESS) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while parsing export hostname.");
-
- return(err);
- }
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_6",
- (void) fseek(file, 0L, SEEK_END););
-
- /* Read the table name of tablespace that was exported. */
- if (fread(value, 1, sizeof(value), file) != sizeof(value)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while reading meta-data table name length.");
-
- return(DB_IO_ERROR);
- }
-
- len = mach_read_from_4(value);
-
- /* NUL byte is part of name length. */
- cfg->m_table_name = new(std::nothrow) byte[len];
-
- /* Trigger OOM */
- DBUG_EXECUTE_IF("ib_import_OOM_2",
- delete [] cfg->m_table_name; cfg->m_table_name = 0;);
-
- if (cfg->m_table_name == 0) {
- return(DB_OUT_OF_MEMORY);
- }
-
- err = row_import_cfg_read_string(file, cfg->m_table_name, len);
-
- if (err != DB_SUCCESS) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while parsing table name.");
-
- return(err);
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Importing tablespace for table '%s' that was exported "
- "from host '%s'", cfg->m_table_name, cfg->m_hostname);
-
- byte row[sizeof(ib_uint32_t) * 3];
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_7",
- (void) fseek(file, 0L, SEEK_END););
-
- /* Read the autoinc value. */
- if (fread(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while reading autoinc value.");
-
- return(DB_IO_ERROR);
- }
-
- cfg->m_autoinc = mach_read_from_8(row);
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_8",
- (void) fseek(file, 0L, SEEK_END););
-
- /* Read the tablespace page size. */
- if (fread(row, 1, sizeof(row), file) != sizeof(row)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while reading meta-data header.");
-
- return(DB_IO_ERROR);
- }
-
- byte* ptr = row;
-
- cfg->m_page_size = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- if (cfg->m_page_size != UNIV_PAGE_SIZE) {
-
- ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_TABLE_SCHEMA_MISMATCH,
- "Tablespace to be imported has a different "
- "page size than this server. Server page size "
- "is " ULINTPF ", whereas tablespace page size is "
- ULINTPF,
- UNIV_PAGE_SIZE, cfg->m_page_size);
-
- return(DB_ERROR);
- }
-
- cfg->m_flags = mach_read_from_4(ptr);
- ptr += sizeof(ib_uint32_t);
-
- cfg->m_n_cols = mach_read_from_4(ptr);
-
- if (!dict_tf_is_valid(cfg->m_flags)) {
-
- return(DB_CORRUPTION);
-
- } else if ((err = row_import_read_columns(file, thd, cfg))
- != DB_SUCCESS) {
-
- return(err);
-
- } else if ((err = row_import_read_indexes(file, thd, cfg))
- != DB_SUCCESS) {
-
- return(err);
- }
-
- ut_a(err == DB_SUCCESS);
- return(err);
-}
-
-/**
-Read the contents of the <tablespace>.cfg file.
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_read_meta_data(
-/*======================*/
- dict_table_t* table, /*!< in: table */
- FILE* file, /*!< in: File to read from */
- THD* thd, /*!< in: session */
- row_import& cfg) /*!< out: contents of the .cfg file */
-{
- byte row[sizeof(ib_uint32_t)];
-
- /* Trigger EOF */
- DBUG_EXECUTE_IF("ib_import_io_read_error_9",
- (void) fseek(file, 0L, SEEK_END););
-
- if (fread(&row, 1, sizeof(row), file) != sizeof(row)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- errno, strerror(errno),
- "while reading meta-data version.");
-
- return(DB_IO_ERROR);
- }
-
- cfg.m_version = mach_read_from_4(row);
-
- /* Check the version number. */
- switch (cfg.m_version) {
- case IB_EXPORT_CFG_VERSION_V1:
-
- return(row_import_read_v1(file, thd, &cfg));
- default:
- ib_errf(thd, IB_LOG_LEVEL_ERROR, ER_IO_READ_ERROR,
- "Unsupported meta-data version number (" ULINTPF "), "
- "file ignored", cfg.m_version);
- }
-
- return(DB_ERROR);
-}
-
-/**
-Read the contents of the <tablename>.cfg file.
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_import_read_cfg(
-/*================*/
- dict_table_t* table, /*!< in: table */
- THD* thd, /*!< in: session */
- row_import& cfg) /*!< out: contents of the .cfg file */
-{
- dberr_t err;
- char name[OS_FILE_MAX_PATH];
-
- cfg.m_table = table;
-
- srv_get_meta_data_filename(table, name, sizeof(name));
-
- FILE* file = fopen(name, "rb");
-
- if (file == NULL) {
- char msg[BUFSIZ];
-
- ut_snprintf(msg, sizeof(msg),
- "Error opening '%s', will attempt to import "
- "without schema verification", name);
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_READ_ERROR,
- errno, strerror(errno), msg);
-
- cfg.m_missing = true;
-
- err = DB_FAIL;
- } else {
-
- cfg.m_missing = false;
-
- err = row_import_read_meta_data(table, file, thd, cfg);
- fclose(file);
- }
-
- return(err);
-}
-
-/*****************************************************************//**
-Update the <space, root page> of a table's indexes from the values
-in the data dictionary.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_import_update_index_root(
-/*=========================*/
- trx_t* trx, /*!< in/out: transaction that
- covers the update */
- const dict_table_t* table, /*!< in: Table for which we want
- to set the root page_no */
- bool reset, /*!< in: if true then set to
- FIL_NUL */
- bool dict_locked) /*!< in: Set to true if the
- caller already owns the
- dict_sys_t:: mutex. */
-
-{
- const dict_index_t* index;
- que_t* graph = 0;
- dberr_t err = DB_SUCCESS;
-
- static const char sql[] = {
- "PROCEDURE UPDATE_INDEX_ROOT() IS\n"
- "BEGIN\n"
- "UPDATE SYS_INDEXES\n"
- "SET SPACE = :space,\n"
- " PAGE_NO = :page,\n"
- " TYPE = :type\n"
- "WHERE TABLE_ID = :table_id AND ID = :index_id;\n"
- "END;\n"};
-
- if (!dict_locked) {
- mutex_enter(&dict_sys->mutex);
- }
-
- for (index = dict_table_get_first_index(table);
- index != 0;
- index = dict_table_get_next_index(index)) {
-
- pars_info_t* info;
- ib_uint32_t page;
- ib_uint32_t space;
- ib_uint32_t type;
- index_id_t index_id;
- table_id_t table_id;
-
- info = (graph != 0) ? graph->info : pars_info_create();
-
- mach_write_to_4(
- reinterpret_cast<byte*>(&type),
- index->type);
-
- mach_write_to_4(
- reinterpret_cast<byte*>(&page),
- reset ? FIL_NULL : index->page);
-
- mach_write_to_4(
- reinterpret_cast<byte*>(&space),
- reset ? FIL_NULL : index->space);
-
- mach_write_to_8(
- reinterpret_cast<byte*>(&index_id),
- index->id);
-
- mach_write_to_8(
- reinterpret_cast<byte*>(&table_id),
- table->id);
-
- /* If we set the corrupt bit during the IMPORT phase then
- we need to update the system tables. */
- pars_info_bind_int4_literal(info, "type", &type);
- pars_info_bind_int4_literal(info, "space", &space);
- pars_info_bind_int4_literal(info, "page", &page);
- pars_info_bind_ull_literal(info, "index_id", &index_id);
- pars_info_bind_ull_literal(info, "table_id", &table_id);
-
- if (graph == 0) {
- graph = pars_sql(info, sql);
- ut_a(graph);
- graph->trx = trx;
- }
-
- que_thr_t* thr;
-
- graph->fork_type = QUE_FORK_MYSQL_INTERFACE;
-
- ut_a(thr = que_fork_start_command(graph));
-
- que_run_threads(thr);
-
- DBUG_EXECUTE_IF("ib_import_internal_error",
- trx->error_state = DB_ERROR;);
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
- char index_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- index_name, sizeof(index_name),
- index->name, TRUE);
-
- ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_INTERNAL_ERROR,
- "While updating the <space, root page "
- "number> of index %s - %s",
- index_name, ut_strerr(err));
-
- break;
- }
- }
-
- que_graph_free(graph);
-
- if (!dict_locked) {
- mutex_exit(&dict_sys->mutex);
- }
-
- return(err);
-}
-
-/** Callback arg for row_import_set_discarded. */
-struct discard_t {
- ib_uint32_t flags2; /*!< Value read from column */
- bool state; /*!< New state of the flag */
- ulint n_recs; /*!< Number of recs processed */
-};
-
-/******************************************************************//**
-Fetch callback that sets or unsets the DISCARDED tablespace flag in
-SYS_TABLES. The flags is stored in MIX_LEN column.
-@return FALSE if all OK */
-static
-ibool
-row_import_set_discarded(
-/*=====================*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: bool set/unset flag */
-{
- sel_node_t* node = static_cast<sel_node_t*>(row);
- discard_t* discard = static_cast<discard_t*>(user_arg);
- dfield_t* dfield = que_node_get_val(node->select_list);
- dtype_t* type = dfield_get_type(dfield);
- ulint len = dfield_get_len(dfield);
-
- ut_a(dtype_get_mtype(type) == DATA_INT);
- ut_a(len == sizeof(ib_uint32_t));
-
- ulint flags2 = mach_read_from_4(
- static_cast<byte*>(dfield_get_data(dfield)));
-
- if (discard->state) {
- flags2 |= DICT_TF2_DISCARDED;
- } else {
- flags2 &= ~DICT_TF2_DISCARDED;
- }
-
- mach_write_to_4(reinterpret_cast<byte*>(&discard->flags2), flags2);
-
- ++discard->n_recs;
-
- /* There should be at most one matching record. */
- ut_a(discard->n_recs == 1);
-
- return(FALSE);
-}
-
-/*****************************************************************//**
-Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
-@return DB_SUCCESS or error code. */
-UNIV_INTERN
-dberr_t
-row_import_update_discarded_flag(
-/*=============================*/
- trx_t* trx, /*!< in/out: transaction that
- covers the update */
- table_id_t table_id, /*!< in: Table for which we want
- to set the root table->flags2 */
- bool discarded, /*!< in: set MIX_LEN column bit
- to discarded, if true */
- bool dict_locked) /*!< in: set to true if the
- caller already owns the
- dict_sys_t:: mutex. */
-
-{
- pars_info_t* info;
- discard_t discard;
-
- static const char sql[] =
- "PROCEDURE UPDATE_DISCARDED_FLAG() IS\n"
- "DECLARE FUNCTION my_func;\n"
- "DECLARE CURSOR c IS\n"
- " SELECT MIX_LEN "
- " FROM SYS_TABLES "
- " WHERE ID = :table_id FOR UPDATE;"
- "\n"
- "BEGIN\n"
- "OPEN c;\n"
- "WHILE 1 = 1 LOOP\n"
- " FETCH c INTO my_func();\n"
- " IF c % NOTFOUND THEN\n"
- " EXIT;\n"
- " END IF;\n"
- "END LOOP;\n"
- "UPDATE SYS_TABLES"
- " SET MIX_LEN = :flags2"
- " WHERE ID = :table_id;\n"
- "CLOSE c;\n"
- "END;\n";
-
- discard.n_recs = 0;
- discard.state = discarded;
- discard.flags2 = ULINT32_UNDEFINED;
-
- info = pars_info_create();
-
- pars_info_add_ull_literal(info, "table_id", table_id);
- pars_info_bind_int4_literal(info, "flags2", &discard.flags2);
-
- pars_info_bind_function(
- info, "my_func", row_import_set_discarded, &discard);
-
- dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
-
- ut_a(discard.n_recs == 1);
- ut_a(discard.flags2 != ULINT32_UNDEFINED);
-
- return(err);
-}
-
-/*****************************************************************//**
-Imports a tablespace. The space id in the .ibd file must match the space id
-of the table in the data dictionary.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_import_for_mysql(
-/*=================*/
- dict_table_t* table, /*!< in/out: table */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL */
-{
- dberr_t err;
- trx_t* trx;
- ib_uint64_t autoinc = 0;
- char table_name[MAX_FULL_NAME_LEN + 1];
- char* filepath = NULL;
-
- ut_ad(!srv_read_only_mode);
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- ut_a(table->space);
- ut_ad(prebuilt->trx);
- ut_a(table->file_unreadable);
-
- trx_start_if_not_started(prebuilt->trx);
-
- trx = trx_allocate_for_mysql();
-
- /* So that the table is not DROPped during recovery. */
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
- trx_start_if_not_started(trx);
-
- /* So that we can send error messages to the user. */
- trx->mysql_thd = prebuilt->trx->mysql_thd;
-
- /* Ensure that the table will be dropped by trx_rollback_active()
- in case of a crash. */
-
- trx->table_id = table->id;
-
- /* Assign an undo segment for the transaction, so that the
- transaction will be recovered after a crash. */
-
- mutex_enter(&trx->undo_mutex);
-
- err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
-
- mutex_exit(&trx->undo_mutex);
-
- DBUG_EXECUTE_IF("ib_import_undo_assign_failure",
- err = DB_TOO_MANY_CONCURRENT_TRXS;);
-
- if (err != DB_SUCCESS) {
-
- return(row_import_cleanup(prebuilt, trx, err));
-
- } else if (trx->update_undo == 0) {
-
- err = DB_TOO_MANY_CONCURRENT_TRXS;
- return(row_import_cleanup(prebuilt, trx, err));
- }
-
- prebuilt->trx->op_info = "read meta-data file";
-
- /* Prevent DDL operations while we are checking. */
-
- rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
-
- row_import cfg;
-
- memset(&cfg, 0x0, sizeof(cfg));
-
- err = row_import_read_cfg(table, trx->mysql_thd, cfg);
-
- /* Check if the table column definitions match the contents
- of the config file. */
-
- if (err == DB_SUCCESS) {
-
- /* We have a schema file, try and match it with the our
- data dictionary. */
-
- err = cfg.match_schema(trx->mysql_thd);
-
- /* Update index->page and SYS_INDEXES.PAGE_NO to match the
- B-tree root page numbers in the tablespace. Use the index
- name from the .cfg file to find match. */
-
- if (err == DB_SUCCESS) {
- cfg.set_root_by_name();
- autoinc = cfg.m_autoinc;
- }
-
- rw_lock_s_unlock_gen(&dict_operation_lock, 0);
-
- DBUG_EXECUTE_IF("ib_import_set_index_root_failure",
- err = DB_TOO_MANY_CONCURRENT_TRXS;);
-
- } else if (cfg.m_missing) {
-
- rw_lock_s_unlock_gen(&dict_operation_lock, 0);
-
- /* We don't have a schema file, we will have to discover
- the index root pages from the .ibd file and skip the schema
- matching step. */
-
- ut_a(err == DB_FAIL);
-
- cfg.m_page_size = UNIV_PAGE_SIZE;
-
- FetchIndexRootPages fetchIndexRootPages(table, trx);
-
- err = fil_tablespace_iterate(
- table, IO_BUFFER_SIZE(cfg.m_page_size),
- fetchIndexRootPages);
-
- if (err == DB_SUCCESS) {
-
- err = fetchIndexRootPages.build_row_import(&cfg);
-
- /* Update index->page and SYS_INDEXES.PAGE_NO
- to match the B-tree root page numbers in the
- tablespace. */
-
- if (err == DB_SUCCESS) {
- err = cfg.set_root_by_heuristic();
- }
- }
-
- } else {
- rw_lock_s_unlock_gen(&dict_operation_lock, 0);
- }
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
- }
-
- prebuilt->trx->op_info = "importing tablespace";
-
- ib_logf(IB_LOG_LEVEL_INFO, "Phase I - Update all pages");
-
- /* Iterate over all the pages and do the sanity checking and
- the conversion required to import the tablespace. */
-
- PageConverter converter(&cfg, trx);
-
- /* Set the IO buffer size in pages. */
-
- err = fil_tablespace_iterate(
- table, IO_BUFFER_SIZE(cfg.m_page_size), converter);
-
- DBUG_EXECUTE_IF("ib_import_reset_space_and_lsn_failure",
- err = DB_TOO_MANY_CONCURRENT_TRXS;);
-
- if (err != DB_SUCCESS) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- if (err != DB_DECRYPTION_FAILED) {
-
- ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_INTERNAL_ERROR,
- "Cannot reset LSNs in table '%s' : %s",
- table_name, ut_strerr(err));
- }
-
- return(row_import_cleanup(prebuilt, trx, err));
- }
-
- row_mysql_lock_data_dictionary(trx);
-
- /* If the table is stored in a remote tablespace, we need to
- determine that filepath from the link file and system tables.
- Find the space ID in SYS_TABLES since this is an ALTER TABLE. */
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- dict_get_and_save_data_dir_path(table, true);
- ut_a(table->data_dir_path);
-
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
- } else {
- filepath = fil_make_ibd_name(table->name, false);
- }
- ut_a(filepath);
-
- /* Open the tablespace so that we can access via the buffer pool.
- We set the 2nd param (fix_dict = true) here because we already
- have an x-lock on dict_operation_lock and dict_sys->mutex. */
-
- err = fil_open_single_table_tablespace(
- true, true, table->space,
- dict_tf_to_fsp_flags(table->flags),
- table->name, filepath);
-
- DBUG_EXECUTE_IF("ib_import_open_tablespace_failure",
- err = DB_TABLESPACE_NOT_FOUND;);
-
- if (err != DB_SUCCESS) {
- row_mysql_unlock_data_dictionary(trx);
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_GET_ERRMSG,
- err, ut_strerr(err), filepath);
-
- mem_free(filepath);
-
- return(row_import_cleanup(prebuilt, trx, err));
- }
-
- row_mysql_unlock_data_dictionary(trx);
-
- mem_free(filepath);
-
- err = ibuf_check_bitmap_on_import(trx, table->space);
-
- DBUG_EXECUTE_IF("ib_import_check_bitmap_failure", err = DB_CORRUPTION;);
-
- if (err != DB_SUCCESS) {
- return(row_import_cleanup(prebuilt, trx, err));
- }
-
- /* The first index must always be the clustered index. */
-
- dict_index_t* index = dict_table_get_first_index(table);
-
- if (!dict_index_is_clust(index)) {
- return(row_import_error(prebuilt, trx, DB_CORRUPTION));
- }
-
- /* Update the Btree segment headers for index node and
- leaf nodes in the root page. Set the new space id. */
-
- err = btr_root_adjust_on_import(index);
-
- DBUG_EXECUTE_IF("ib_import_cluster_root_adjust_failure",
- err = DB_CORRUPTION;);
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
- }
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
- } else if (cfg.requires_purge(index->name)) {
-
- /* Purge any delete-marked records that couldn't be
- purged during the page conversion phase from the
- cluster index. */
-
- IndexPurge purge(trx, index);
-
- trx->op_info = "cluster: purging delete marked records";
-
- err = purge.garbage_collect();
-
- trx->op_info = "";
- }
-
- DBUG_EXECUTE_IF("ib_import_cluster_failure", err = DB_CORRUPTION;);
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
- }
-
- /* For secondary indexes, purge any records that couldn't be purged
- during the page conversion phase. */
-
- err = row_import_adjust_root_pages_of_secondary_indexes(
- prebuilt, trx, table, cfg);
-
- DBUG_EXECUTE_IF("ib_import_sec_root_adjust_failure",
- err = DB_CORRUPTION;);
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
- }
-
- /* Ensure that the next available DB_ROW_ID is not smaller than
- any DB_ROW_ID stored in the table. */
-
- if (prebuilt->clust_index_was_generated) {
-
- err = row_import_set_sys_max_row_id(prebuilt, table);
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
- }
- }
-
- ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush changes to disk");
-
- /* Ensure that all pages dirtied during the IMPORT make it to disk.
- The only dirty pages generated should be from the pessimistic purge
- of delete marked records that couldn't be purged in Phase I. */
-
- buf_LRU_flush_or_remove_pages(
- prebuilt->table->space, BUF_REMOVE_FLUSH_WRITE, trx);
-
- if (trx_is_interrupted(trx)) {
- ib_logf(IB_LOG_LEVEL_INFO, "Phase III - Flush interrupted");
- return(row_import_error(prebuilt, trx, DB_INTERRUPTED));
- } else {
- ib_logf(IB_LOG_LEVEL_INFO, "Phase IV - Flush complete");
- }
-
- /* The dictionary latches will be released in in row_import_cleanup()
- after the transaction commit, for both success and error. */
-
- row_mysql_lock_data_dictionary(trx);
-
- /* Update the root pages of the table's indexes. */
- err = row_import_update_index_root(trx, table, false, true);
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
- }
-
- /* Update the table's discarded flag, unset it. */
- err = row_import_update_discarded_flag(trx, table->id, false, true);
-
- if (err != DB_SUCCESS) {
- return(row_import_error(prebuilt, trx, err));
- }
-
- table->file_unreadable = false;
- table->flags2 &= ~DICT_TF2_DISCARDED;
-
- if (autoinc != 0) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- ib_logf(IB_LOG_LEVEL_INFO, "%s autoinc value set to " IB_ID_FMT,
- table_name, autoinc);
-
- dict_table_autoinc_lock(table);
- dict_table_autoinc_initialize(table, autoinc);
- dict_table_autoinc_unlock(table);
- }
-
- ut_a(err == DB_SUCCESS);
-
- return(row_import_cleanup(prebuilt, trx, err));
-}
-
diff --git a/storage/xtradb/row/row0ins.cc b/storage/xtradb/row/row0ins.cc
deleted file mode 100644
index 6072b303d3a..00000000000
--- a/storage/xtradb/row/row0ins.cc
+++ /dev/null
@@ -1,3458 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0ins.cc
-Insert into a table
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0ins.h"
-
-#ifdef UNIV_NONINL
-#include "row0ins.ic"
-#endif
-
-#include "ha_prototypes.h"
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0rec.h"
-#include "trx0undo.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "mach0data.h"
-#include "que0que.h"
-#include "row0upd.h"
-#include "row0sel.h"
-#include "row0row.h"
-#include "row0log.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "log0log.h"
-#include "eval0eval.h"
-#include "data0data.h"
-#include "usr0sess.h"
-#include "buf0lru.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "m_string.h"
-
-/*************************************************************************
-IMPORTANT NOTE: Any operation that generates redo MUST check that there
-is enough space in the redo log before for that operation. This is
-done by calling log_free_check(). The reason for checking the
-availability of the redo log space before the start of the operation is
-that we MUST not hold any synchonization objects when performing the
-check.
-If you make a change in this module make sure that no codepath is
-introduced where a call to log_free_check() is bypassed. */
-
-/*********************************************************************//**
-Creates an insert node struct.
-@return own: insert node struct */
-UNIV_INTERN
-ins_node_t*
-ins_node_create(
-/*============*/
- ulint ins_type, /*!< in: INS_VALUES, ... */
- dict_table_t* table, /*!< in: table where to insert */
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- ins_node_t* node;
-
- node = static_cast<ins_node_t*>(
- mem_heap_alloc(heap, sizeof(ins_node_t)));
-
- node->common.type = QUE_NODE_INSERT;
-
- node->ins_type = ins_type;
-
- node->state = INS_NODE_SET_IX_LOCK;
- node->table = table;
- node->index = NULL;
- node->entry = NULL;
-
- node->select = NULL;
-
- node->trx_id = 0;
-
- node->entry_sys_heap = mem_heap_create(128);
-
- node->magic_n = INS_NODE_MAGIC_N;
-
- return(node);
-}
-
-/***********************************************************//**
-Creates an entry template for each index of a table. */
-static
-void
-ins_node_create_entry_list(
-/*=======================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- dict_index_t* index;
- dtuple_t* entry;
-
- ut_ad(node->entry_sys_heap);
-
- UT_LIST_INIT(node->entry_list);
-
- /* We will include all indexes (include those corrupted
- secondary indexes) in the entry list. Filteration of
- these corrupted index will be done in row_ins() */
-
- for (index = dict_table_get_first_index(node->table);
- index != 0;
- index = dict_table_get_next_index(index)) {
-
- entry = row_build_index_entry(
- node->row, NULL, index, node->entry_sys_heap);
-
- UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry);
- }
-}
-
-/*****************************************************************//**
-Adds system field buffers to a row. */
-static
-void
-row_ins_alloc_sys_fields(
-/*=====================*/
- ins_node_t* node) /*!< in: insert node */
-{
- dtuple_t* row;
- dict_table_t* table;
- mem_heap_t* heap;
- const dict_col_t* col;
- dfield_t* dfield;
- byte* ptr;
-
- row = node->row;
- table = node->table;
- heap = node->entry_sys_heap;
-
- ut_ad(row && table && heap);
- ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table));
-
- /* allocate buffer to hold the needed system created hidden columns. */
- uint len = DATA_ROW_ID_LEN + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
- ptr = static_cast<byte*>(mem_heap_zalloc(heap, len));
-
- /* 1. Populate row-id */
- col = dict_table_get_sys_col(table, DATA_ROW_ID);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
-
- dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN);
-
- node->row_id_buf = ptr;
-
- ptr += DATA_ROW_ID_LEN;
-
- /* 2. Populate trx id */
- col = dict_table_get_sys_col(table, DATA_TRX_ID);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
-
- dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN);
-
- node->trx_id_buf = ptr;
-
- ptr += DATA_TRX_ID_LEN;
-
- /* 3. Populate roll ptr */
-
- col = dict_table_get_sys_col(table, DATA_ROLL_PTR);
-
- dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
-
- dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN);
-}
-
-/*********************************************************************//**
-Sets a new row to insert for an INS_DIRECT node. This function is only used
-if we have constructed the row separately, which is a rare case; this
-function is quite slow. */
-UNIV_INTERN
-void
-ins_node_set_new_row(
-/*=================*/
- ins_node_t* node, /*!< in: insert node */
- dtuple_t* row) /*!< in: new row (or first row) for the node */
-{
- node->state = INS_NODE_SET_IX_LOCK;
- node->index = NULL;
- node->entry = NULL;
-
- node->row = row;
-
- mem_heap_empty(node->entry_sys_heap);
-
- /* Create templates for index entries */
-
- ins_node_create_entry_list(node);
-
- /* Allocate from entry_sys_heap buffers for sys fields */
-
- row_ins_alloc_sys_fields(node);
-
- /* As we allocated a new trx id buf, the trx id should be written
- there again: */
-
- node->trx_id = 0;
-}
-
-/*******************************************************************//**
-Does an insert operation by updating a delete-marked existing record
-in the index. This situation can occur if the delete-marked record is
-kept in the index for consistent reads.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_sec_index_entry_by_modify(
-/*==============================*/
- ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether mtr holds just a leaf
- latch or also a tree latch */
- btr_cur_t* cursor, /*!< in: B-tree cursor */
- ulint** offsets,/*!< in/out: offsets on cursor->page_cur.rec */
- mem_heap_t* offsets_heap,
- /*!< in/out: memory heap that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- const dtuple_t* entry, /*!< in: index entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
-{
- big_rec_t* dummy_big_rec;
- upd_t* update;
- rec_t* rec;
- dberr_t err;
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad(!dict_index_is_clust(cursor->index));
- ut_ad(rec_offs_validate(rec, cursor->index, *offsets));
- ut_ad(!entry->info_bits);
-
- /* We know that in the alphabetical ordering, entry and rec are
- identified. But in their binary form there may be differences if
- there are char fields in them. Therefore we have to calculate the
- difference. */
-
- update = row_upd_build_sec_rec_difference_binary(
- rec, cursor->index, *offsets, entry, heap);
-
- /* If operating in fake_change mode then flow will not mark the record
- deleted but will still assume it and take delete-mark path. Condition
- below has a different path if record is not marked deleted but we need
- to still by-pass it given that original flow has taken this path for
- fake_change mode execution assuming record is delete-marked. */
- if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))
- && UNIV_UNLIKELY(!thr_get_trx(thr)->fake_changes)) {
- /* We should never insert in place of a record that
- has not been delete-marked. The only exception is when
- online CREATE INDEX copied the changes that we already
- made to the clustered index, and completed the
- secondary index creation before we got here. In this
- case, the change would already be there. The CREATE
- INDEX should be waiting for a MySQL meta-data lock
- upgrade at least until this INSERT or UPDATE
- returns. After that point, the TEMP_INDEX_PREFIX
- would be dropped from the index name in
- commit_inplace_alter_table(). */
- ut_a(update->n_fields == 0);
- ut_a(*cursor->index->name == TEMP_INDEX_PREFIX);
- ut_ad(!dict_index_is_online_ddl(cursor->index));
- return(DB_SUCCESS);
- }
-
- if (mode == BTR_MODIFY_LEAF) {
- /* Try an optimistic updating of the record, keeping changes
- within the page */
-
- /* TODO: pass only *offsets */
- err = btr_cur_optimistic_update(
- flags | BTR_KEEP_SYS_FLAG, cursor,
- offsets, &offsets_heap, update, 0, thr,
- thr_get_trx(thr)->id, mtr);
- switch (err) {
- case DB_OVERFLOW:
- case DB_UNDERFLOW:
- case DB_ZIP_OVERFLOW:
- err = DB_FAIL;
- default:
- break;
- }
- } else {
- ut_a(mode == BTR_MODIFY_TREE);
- if (buf_LRU_buf_pool_running_out()) {
-
- return(DB_LOCK_TABLE_FULL);
- }
-
- err = btr_cur_pessimistic_update(
- flags | BTR_KEEP_SYS_FLAG, cursor,
- offsets, &offsets_heap,
- heap, &dummy_big_rec, update, 0,
- thr, thr_get_trx(thr)->id, mtr);
- ut_ad(!dummy_big_rec);
- }
-
- return(err);
-}
-
-/*******************************************************************//**
-Does an insert operation by delete unmarking and updating a delete marked
-existing record in the index. This situation can occur if the delete marked
-record is kept in the index for consistent reads.
-@return DB_SUCCESS, DB_FAIL, or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_clust_index_entry_by_modify(
-/*================================*/
- ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether mtr holds just a leaf
- latch or also a tree latch */
- btr_cur_t* cursor, /*!< in: B-tree cursor */
- ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */
- mem_heap_t** offsets_heap,
- /*!< in/out: pointer to memory heap that can
- be emptied, or NULL */
- mem_heap_t* heap, /*!< in/out: memory heap */
- big_rec_t** big_rec,/*!< out: possible big rec vector of fields
- which have to be stored externally by the
- caller */
- const dtuple_t* entry, /*!< in: index entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; must be committed before
- latching any further pages */
-{
- const rec_t* rec;
- const upd_t* update;
- dberr_t err;
-
- ut_ad(dict_index_is_clust(cursor->index));
-
- *big_rec = NULL;
-
- rec = btr_cur_get_rec(cursor);
-
- ut_ad(rec_get_deleted_flag(rec,
- dict_table_is_comp(cursor->index->table)));
-
- /* Build an update vector containing all the fields to be modified;
- NOTE that this vector may NOT contain system columns trx_id or
- roll_ptr */
-
- update = row_upd_build_difference_binary(
- cursor->index, entry, rec, NULL, true,
- thr_get_trx(thr), heap);
- if (mode != BTR_MODIFY_TREE) {
- ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
-
- /* Try optimistic updating of the record, keeping changes
- within the page */
-
- err = btr_cur_optimistic_update(
- flags, cursor, offsets, offsets_heap, update, 0, thr,
- thr_get_trx(thr)->id, mtr);
- switch (err) {
- case DB_OVERFLOW:
- case DB_UNDERFLOW:
- case DB_ZIP_OVERFLOW:
- err = DB_FAIL;
- default:
- break;
- }
- } else {
- if (buf_LRU_buf_pool_running_out()) {
-
- return(DB_LOCK_TABLE_FULL);
-
- }
- err = btr_cur_pessimistic_update(
- flags | BTR_KEEP_POS_FLAG,
- cursor, offsets, offsets_heap, heap,
- big_rec, update, 0, thr, thr_get_trx(thr)->id, mtr);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Returns TRUE if in a cascaded update/delete an ancestor node of node
-updates (not DELETE, but UPDATE) table.
-@return TRUE if an ancestor updates table */
-static
-ibool
-row_ins_cascade_ancestor_updates_table(
-/*===================================*/
- que_node_t* node, /*!< in: node in a query graph */
- dict_table_t* table) /*!< in: table */
-{
- que_node_t* parent;
-
- for (parent = que_node_get_parent(node);
- que_node_get_type(parent) == QUE_NODE_UPDATE;
- parent = que_node_get_parent(parent)) {
-
- upd_node_t* upd_node;
-
- upd_node = static_cast<upd_node_t*>(parent);
-
- if (upd_node->table == table && upd_node->is_delete == FALSE) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Returns the number of ancestor UPDATE or DELETE nodes of a
-cascaded update/delete node.
-@return number of ancestors */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ulint
-row_ins_cascade_n_ancestors(
-/*========================*/
- que_node_t* node) /*!< in: node in a query graph */
-{
- que_node_t* parent;
- ulint n_ancestors = 0;
-
- for (parent = que_node_get_parent(node);
- que_node_get_type(parent) == QUE_NODE_UPDATE;
- parent = que_node_get_parent(parent)) {
-
- n_ancestors++;
- }
-
- return(n_ancestors);
-}
-
-/******************************************************************//**
-Calculates the update vector node->cascade->update for a child table in
-a cascaded update.
-@return number of fields in the calculated update vector; the value
-can also be 0 if no foreign key fields changed; the returned value is
-ULINT_UNDEFINED if the column type in the child table is too short to
-fit the new value in the parent table: that means the update fails */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ulint
-row_ins_cascade_calc_update_vec(
-/*============================*/
- upd_node_t* node, /*!< in: update node of the parent
- table */
- dict_foreign_t* foreign, /*!< in: foreign key constraint whose
- type is != 0 */
- mem_heap_t* heap, /*!< in: memory heap to use as
- temporary storage */
- trx_t* trx, /*!< in: update transaction */
- ibool* fts_col_affected)/*!< out: is FTS column affected */
-{
- upd_node_t* cascade = node->cascade_node;
- dict_table_t* table = foreign->foreign_table;
- dict_index_t* index = foreign->foreign_index;
- upd_t* update;
- dict_table_t* parent_table;
- dict_index_t* parent_index;
- upd_t* parent_update;
- ulint n_fields_updated;
- ulint parent_field_no;
- ulint i;
- ulint j;
- ibool doc_id_updated = FALSE;
- ulint doc_id_pos = 0;
- doc_id_t new_doc_id = FTS_NULL_DOC_ID;
-
- ut_a(node);
- ut_a(foreign);
- ut_a(cascade);
- ut_a(table);
- ut_a(index);
-
- /* Calculate the appropriate update vector which will set the fields
- in the child index record to the same value (possibly padded with
- spaces if the column is a fixed length CHAR or FIXBINARY column) as
- the referenced index record will get in the update. */
-
- parent_table = node->table;
- ut_a(parent_table == foreign->referenced_table);
- parent_index = foreign->referenced_index;
- parent_update = node->update;
-
- update = cascade->update;
-
- update->info_bits = 0;
- update->n_fields = foreign->n_fields;
-
- n_fields_updated = 0;
-
- *fts_col_affected = FALSE;
-
- if (table->fts) {
- doc_id_pos = dict_table_get_nth_col_pos(
- table, table->fts->doc_col);
- }
-
- for (i = 0; i < foreign->n_fields; i++) {
-
- parent_field_no = dict_table_get_nth_col_pos(
- parent_table,
- dict_index_get_nth_col_no(parent_index, i));
-
- for (j = 0; j < parent_update->n_fields; j++) {
- const upd_field_t* parent_ufield
- = &parent_update->fields[j];
-
- if (parent_ufield->field_no == parent_field_no) {
-
- ulint min_size;
- const dict_col_t* col;
- ulint ufield_len;
- upd_field_t* ufield;
-
- col = dict_index_get_nth_col(index, i);
-
- /* A field in the parent index record is
- updated. Let us make the update vector
- field for the child table. */
-
- ufield = update->fields + n_fields_updated;
-
- ufield->field_no
- = dict_table_get_nth_col_pos(
- table, dict_col_get_no(col));
-
- ufield->orig_len = 0;
- ufield->exp = NULL;
-
- ufield->new_val = parent_ufield->new_val;
- ufield_len = dfield_get_len(&ufield->new_val);
-
- /* Clear the "external storage" flag */
- dfield_set_len(&ufield->new_val, ufield_len);
-
- /* Do not allow a NOT NULL column to be
- updated as NULL */
-
- if (dfield_is_null(&ufield->new_val)
- && (col->prtype & DATA_NOT_NULL)) {
-
- return(ULINT_UNDEFINED);
- }
-
- /* If the new value would not fit in the
- column, do not allow the update */
-
- if (!dfield_is_null(&ufield->new_val)
- && dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminmaxlen,
- col->len,
- ufield_len,
- static_cast<char*>(
- dfield_get_data(
- &ufield->new_val)))
- < ufield_len) {
-
- return(ULINT_UNDEFINED);
- }
-
- /* If the parent column type has a different
- length than the child column type, we may
- need to pad with spaces the new value of the
- child column */
-
- min_size = dict_col_get_min_size(col);
-
- /* Because UNIV_SQL_NULL (the marker
- of SQL NULL values) exceeds all possible
- values of min_size, the test below will
- not hold for SQL NULL columns. */
-
- if (min_size > ufield_len) {
-
- byte* pad;
- ulint pad_len;
- byte* padded_data;
- ulint mbminlen;
-
- padded_data = static_cast<byte*>(
- mem_heap_alloc(
- heap, min_size));
-
- pad = padded_data + ufield_len;
- pad_len = min_size - ufield_len;
-
- memcpy(padded_data,
- dfield_get_data(&ufield
- ->new_val),
- ufield_len);
-
- mbminlen = dict_col_get_mbminlen(col);
-
- ut_ad(!(ufield_len % mbminlen));
- ut_ad(!(min_size % mbminlen));
-
- if (mbminlen == 1
- && dtype_get_charset_coll(
- col->prtype)
- == DATA_MYSQL_BINARY_CHARSET_COLL) {
- /* Do not pad BINARY columns */
- return(ULINT_UNDEFINED);
- }
-
- row_mysql_pad_col(mbminlen,
- pad, pad_len);
- dfield_set_data(&ufield->new_val,
- padded_data, min_size);
- }
-
- /* Check whether the current column has
- FTS index on it */
- if (table->fts
- && dict_table_is_fts_column(
- table->fts->indexes,
- dict_col_get_no(col))
- != ULINT_UNDEFINED) {
- *fts_col_affected = TRUE;
- }
-
- /* If Doc ID is updated, check whether the
- Doc ID is valid */
- if (table->fts
- && ufield->field_no == doc_id_pos) {
- doc_id_t n_doc_id;
-
- n_doc_id =
- table->fts->cache->next_doc_id;
-
- new_doc_id = fts_read_doc_id(
- static_cast<const byte*>(
- dfield_get_data(
- &ufield->new_val)));
-
- if (new_doc_id <= 0) {
- fprintf(stderr,
- "InnoDB: FTS Doc ID "
- "must be larger than "
- "0 \n");
- return(ULINT_UNDEFINED);
- }
-
- if (new_doc_id < n_doc_id) {
- fprintf(stderr,
- "InnoDB: FTS Doc ID "
- "must be larger than "
- IB_ID_FMT" for table",
- n_doc_id -1);
-
- ut_print_name(stderr, trx,
- TRUE,
- table->name);
-
- putc('\n', stderr);
- return(ULINT_UNDEFINED);
- }
-
- *fts_col_affected = TRUE;
- doc_id_updated = TRUE;
- }
-
- n_fields_updated++;
- }
- }
- }
-
- /* Generate a new Doc ID if FTS index columns get updated */
- if (table->fts && *fts_col_affected) {
- if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- doc_id_t doc_id;
- upd_field_t* ufield;
-
- ut_ad(!doc_id_updated);
- ufield = update->fields + n_fields_updated;
- fts_get_next_doc_id(table, &trx->fts_next_doc_id);
- doc_id = fts_update_doc_id(table, ufield,
- &trx->fts_next_doc_id);
- n_fields_updated++;
- fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL);
- } else {
- if (doc_id_updated) {
- ut_ad(new_doc_id);
- fts_trx_add_op(trx, table, new_doc_id,
- FTS_INSERT, NULL);
- } else {
- fprintf(stderr, "InnoDB: FTS Doc ID must be "
- "updated along with FTS indexed "
- "column for table ");
- ut_print_name(stderr, trx, TRUE, table->name);
- putc('\n', stderr);
- return(ULINT_UNDEFINED);
- }
- }
- }
-
- update->n_fields = n_fields_updated;
-
- return(n_fields_updated);
-}
-
-/*********************************************************************//**
-Set detailed error message associated with foreign key errors for
-the given transaction. */
-static
-void
-row_ins_set_detailed(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- dict_foreign_t* foreign) /*!< in: foreign key constraint */
-{
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&srv_misc_tmpfile_mutex);
- rewind(srv_misc_tmpfile);
-
- if (os_file_set_eof(srv_misc_tmpfile)) {
- std::string fk_str;
- ut_print_name(srv_misc_tmpfile, trx, TRUE,
- foreign->foreign_table_name);
- fk_str = dict_print_info_on_foreign_key_in_create_format(
- trx, foreign, FALSE);
- fputs(fk_str.c_str(), srv_misc_tmpfile);
- trx_set_detailed_error_from_file(trx, srv_misc_tmpfile);
- } else {
- trx_set_detailed_error(trx, "temp file operation failed");
- }
-
- mutex_exit(&srv_misc_tmpfile_mutex);
-}
-
-/*********************************************************************//**
-Acquires dict_foreign_err_mutex, rewinds dict_foreign_err_file
-and displays information about the given transaction.
-The caller must release dict_foreign_err_mutex. */
-static
-void
-row_ins_foreign_trx_print(
-/*======================*/
- trx_t* trx) /*!< in: transaction */
-{
- ulint n_rec_locks;
- ulint n_trx_locks;
- ulint heap_size;
-
- if (srv_read_only_mode) {
- return;
- }
-
- lock_mutex_enter();
- n_rec_locks = lock_number_of_rows_locked(&trx->lock);
- n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
- heap_size = mem_heap_get_size(trx->lock.lock_heap);
- lock_mutex_exit();
-
- mutex_enter(&trx_sys->mutex);
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(dict_foreign_err_file);
- ut_print_timestamp(dict_foreign_err_file);
- fputs(" Transaction:\n", dict_foreign_err_file);
-
- trx_print_low(dict_foreign_err_file, trx, 600,
- n_rec_locks, n_trx_locks, heap_size);
-
- mutex_exit(&trx_sys->mutex);
-
- ut_ad(mutex_own(&dict_foreign_err_mutex));
-}
-
-/*********************************************************************//**
-Reports a foreign key error associated with an update or a delete of a
-parent table index entry. */
-static
-void
-row_ins_foreign_report_err(
-/*=======================*/
- const char* errstr, /*!< in: error string from the viewpoint
- of the parent table */
- que_thr_t* thr, /*!< in: query thread whose run_node
- is an update node */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- const rec_t* rec, /*!< in: a matching index record in the
- child table */
- const dtuple_t* entry) /*!< in: index entry in the parent
- table */
-{
- std::string fk_str;
-
- if (srv_read_only_mode) {
- return;
- }
-
- FILE* ef = dict_foreign_err_file;
- trx_t* trx = thr_get_trx(thr);
-
- row_ins_set_detailed(trx, foreign);
-
- row_ins_foreign_trx_print(trx);
-
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(":\n", ef);
- fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign,
- TRUE);
- fputs(fk_str.c_str(), ef);
- putc('\n', ef);
- fputs(errstr, ef);
- fputs(" in parent table, in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
- if (entry) {
- fputs(" tuple:\n", ef);
- dtuple_print(ef, entry);
- }
- fputs("\nBut in child table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(", in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
- if (rec) {
- fputs(", there is a record:\n", ef);
- rec_print(ef, rec, foreign->foreign_index);
- } else {
- fputs(", the record is not available\n", ef);
- }
- putc('\n', ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*********************************************************************//**
-Reports a foreign key error to dict_foreign_err_file when we are trying
-to add an index entry to a child table. Note that the adding may be the result
-of an update, too. */
-static
-void
-row_ins_foreign_report_add_err(
-/*===========================*/
- trx_t* trx, /*!< in: transaction */
- dict_foreign_t* foreign, /*!< in: foreign key constraint */
- const rec_t* rec, /*!< in: a record in the parent table:
- it does not match entry because we
- have an error! */
- const dtuple_t* entry) /*!< in: index entry to insert in the
- child table */
-{
- std::string fk_str;
-
- if (srv_read_only_mode) {
- return;
- }
-
- FILE* ef = dict_foreign_err_file;
-
- row_ins_set_detailed(trx, foreign);
-
- row_ins_foreign_trx_print(trx);
-
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- fputs(":\n", ef);
- fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign,
- TRUE);
- fputs(fk_str.c_str(), ef);
- fputs("\nTrying to add in child table, in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->foreign_index->name);
- if (entry) {
- fputs(" tuple:\n", ef);
- /* TODO: DB_TRX_ID and DB_ROLL_PTR may be uninitialized.
- It would be better to only display the user columns. */
- dtuple_print(ef, entry);
- }
- fputs("\nBut in parent table ", ef);
- ut_print_name(ef, trx, TRUE, foreign->referenced_table_name);
- fputs(", in index ", ef);
- ut_print_name(ef, trx, FALSE, foreign->referenced_index->name);
- fputs(",\nthe closest match we can find is record:\n", ef);
- if (rec && page_rec_is_supremum(rec)) {
- /* If the cursor ended on a supremum record, it is better
- to report the previous record in the error message, so that
- the user gets a more descriptive error message. */
- rec = page_rec_get_prev_const(rec);
- }
-
- if (rec) {
- rec_print(ef, rec, foreign->referenced_index);
- }
- putc('\n', ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-}
-
-/*********************************************************************//**
-Invalidate the query cache for the given table. */
-static
-void
-row_ins_invalidate_query_cache(
-/*===========================*/
- que_thr_t* thr, /*!< in: query thread whose run_node
- is an update node */
- const char* name) /*!< in: table name prefixed with
- database name and a '/' character */
-{
- char* buf;
- char* ptr;
- ulint len = strlen(name) + 1;
-
- buf = mem_strdupl(name, len);
-
- ptr = strchr(buf, '/');
- ut_a(ptr);
- *ptr = '\0';
-
- innobase_invalidate_query_cache(thr_get_trx(thr), buf, len);
- mem_free(buf);
-}
-#ifdef WITH_WSREP
-dberr_t wsrep_append_foreign_key(trx_t *trx,
- dict_foreign_t* foreign,
- const rec_t* clust_rec,
- dict_index_t* clust_index,
- ibool referenced,
- ibool shared);
-#endif /* WITH_WSREP */
-
-/*********************************************************************//**
-Perform referential actions or checks when a parent row is deleted or updated
-and the constraint had an ON DELETE or ON UPDATE condition which was not
-RESTRICT.
-@return DB_SUCCESS, DB_LOCK_WAIT, or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_foreign_check_on_constraint(
-/*================================*/
- que_thr_t* thr, /*!< in: query thread whose run_node
- is an update node */
- dict_foreign_t* foreign, /*!< in: foreign key constraint whose
- type is != 0 */
- btr_pcur_t* pcur, /*!< in: cursor placed on a matching
- index record in the child table */
- dtuple_t* entry, /*!< in: index entry in the parent
- table */
- mtr_t* mtr) /*!< in: mtr holding the latch of pcur
- page */
-{
- upd_node_t* node;
- upd_node_t* cascade;
- dict_table_t* table = foreign->foreign_table;
- dict_index_t* index;
- dict_index_t* clust_index;
- dtuple_t* ref;
- mem_heap_t* upd_vec_heap = NULL;
- const rec_t* rec;
- const rec_t* clust_rec;
- const buf_block_t* clust_block;
- upd_t* update;
- ulint n_to_update;
- dberr_t err;
- ulint i;
- trx_t* trx;
- mem_heap_t* tmp_heap = NULL;
- doc_id_t doc_id = FTS_NULL_DOC_ID;
- ibool fts_col_affacted = FALSE;
-
- ut_a(thr);
- ut_a(foreign);
- ut_a(pcur);
- ut_a(mtr);
-
- trx = thr_get_trx(thr);
-
- /* Since we are going to delete or update a row, we have to invalidate
- the MySQL query cache for table. A deadlock of threads is not possible
- here because the caller of this function does not hold any latches with
- the sync0sync.h rank above the lock_sys_t::mutex. The query cache mutex
- has a rank just above the lock_sys_t::mutex. */
-
- row_ins_invalidate_query_cache(thr, table->name);
-
- node = static_cast<upd_node_t*>(thr->run_node);
-
- if (node->is_delete && 0 == (foreign->type
- & (DICT_FOREIGN_ON_DELETE_CASCADE
- | DICT_FOREIGN_ON_DELETE_SET_NULL))) {
-
- row_ins_foreign_report_err("Trying to delete",
- thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- return(DB_ROW_IS_REFERENCED);
- }
-
- if (!node->is_delete && 0 == (foreign->type
- & (DICT_FOREIGN_ON_UPDATE_CASCADE
- | DICT_FOREIGN_ON_UPDATE_SET_NULL))) {
-
- /* This is an UPDATE */
-
- row_ins_foreign_report_err("Trying to update",
- thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- return(DB_ROW_IS_REFERENCED);
- }
-
- if (node->cascade_node == NULL) {
- /* Extend our query graph by creating a child to current
- update node. The child is used in the cascade or set null
- operation. */
-
- node->cascade_heap = mem_heap_create(128);
- node->cascade_node = row_create_update_node_for_mysql(
- table, node->cascade_heap);
- que_node_set_parent(node->cascade_node, node);
- }
-
- /* Initialize cascade_node to do the operation we want. Note that we
- use the SAME cascade node to do all foreign key operations of the
- SQL DELETE: the table of the cascade node may change if there are
- several child tables to the table where the delete is done! */
-
- cascade = node->cascade_node;
-
- cascade->table = table;
-
- cascade->foreign = foreign;
-
- if (node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) {
- cascade->is_delete = TRUE;
- } else {
- cascade->is_delete = FALSE;
-
- if (foreign->n_fields > cascade->update_n_fields) {
- /* We have to make the update vector longer */
-
- cascade->update = upd_create(foreign->n_fields,
- node->cascade_heap);
- cascade->update_n_fields = foreign->n_fields;
- }
- }
-
- /* We do not allow cyclic cascaded updating (DELETE is allowed,
- but not UPDATE) of the same table, as this can lead to an infinite
- cycle. Check that we are not updating the same table which is
- already being modified in this cascade chain. We have to check
- this also because the modification of the indexes of a 'parent'
- table may still be incomplete, and we must avoid seeing the indexes
- of the parent table in an inconsistent state! */
-
- if (!cascade->is_delete
- && row_ins_cascade_ancestor_updates_table(cascade, table)) {
-
- /* We do not know if this would break foreign key
- constraints, but play safe and return an error */
-
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying an update, possibly causing a cyclic"
- " cascaded update\n"
- "in the child table,", thr, foreign,
- btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- if (row_ins_cascade_n_ancestors(cascade) >= 15) {
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying a too deep cascaded delete or update\n",
- thr, foreign, btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- ut_a(index == foreign->foreign_index);
-
- rec = btr_pcur_get_rec(pcur);
-
- tmp_heap = mem_heap_create(256);
-
- if (dict_index_is_clust(index)) {
- /* pcur is already positioned in the clustered index of
- the child table */
-
- clust_index = index;
- clust_rec = rec;
- clust_block = btr_pcur_get_block(pcur);
- } else {
- /* We have to look for the record in the clustered index
- in the child table */
-
- clust_index = dict_table_get_first_index(table);
-
- ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec,
- tmp_heap);
- btr_pcur_open_with_no_init(clust_index, ref,
- PAGE_CUR_LE, BTR_SEARCH_LEAF,
- cascade->pcur, 0, mtr);
-
- clust_rec = btr_pcur_get_rec(cascade->pcur);
- clust_block = btr_pcur_get_block(cascade->pcur);
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(cascade->pcur)
- < dict_index_get_n_unique(clust_index)) {
-
- fputs("InnoDB: error in cascade of a foreign key op\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
-
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, rec, index);
- fputs("\n"
- "InnoDB: clustered record ", stderr);
- rec_print(stderr, clust_rec, clust_index);
- fputs("\n"
- "InnoDB: Submit a detailed bug report to"
- " http://bugs.mysql.com\n", stderr);
- ut_ad(0);
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
- }
-
- /* Set an X-lock on the row to delete or update in the child table */
-
- err = lock_table(0, table, LOCK_IX, thr);
-
- if (err == DB_SUCCESS) {
- /* Here it suffices to use a LOCK_REC_NOT_GAP type lock;
- we already have a normal shared lock on the appropriate
- gap if the search criterion was not unique */
-
- err = lock_clust_rec_read_check_and_lock_alt(
- 0, clust_block, clust_rec, clust_index,
- LOCK_X, LOCK_REC_NOT_GAP, thr);
- }
-
- if (err != DB_SUCCESS) {
-
- goto nonstandard_exit_func;
- }
-
- if (rec_get_deleted_flag(clust_rec, dict_table_is_comp(table))) {
- /* This can happen if there is a circular reference of
- rows such that cascading delete comes to delete a row
- already in the process of being delete marked */
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
-
- if (table->fts) {
- doc_id = fts_get_doc_id_from_rec(table, clust_rec, tmp_heap);
- }
-
- if (node->is_delete
- ? (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
- : (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)) {
-
- /* Build the appropriate update vector which sets
- foreign->n_fields first fields in rec to SQL NULL */
-
- update = cascade->update;
-
- update->info_bits = 0;
- update->n_fields = foreign->n_fields;
- UNIV_MEM_INVALID(update->fields,
- update->n_fields * sizeof *update->fields);
-
- for (i = 0; i < foreign->n_fields; i++) {
- upd_field_t* ufield = &update->fields[i];
-
- ufield->field_no = dict_table_get_nth_col_pos(
- table,
- dict_index_get_nth_col_no(index, i));
- ufield->orig_len = 0;
- ufield->exp = NULL;
- dfield_set_null(&ufield->new_val);
-
- if (table->fts && dict_table_is_fts_column(
- table->fts->indexes,
- dict_index_get_nth_col_no(index, i))
- != ULINT_UNDEFINED) {
- fts_col_affacted = TRUE;
- }
- }
-
- if (fts_col_affacted) {
- fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
- }
- } else if (table->fts && cascade->is_delete) {
- /* DICT_FOREIGN_ON_DELETE_CASCADE case */
- for (i = 0; i < foreign->n_fields; i++) {
- if (table->fts && dict_table_is_fts_column(
- table->fts->indexes,
- dict_index_get_nth_col_no(index, i))
- != ULINT_UNDEFINED) {
- fts_col_affacted = TRUE;
- }
- }
-
- if (fts_col_affacted) {
- fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
- }
- }
-
- if (!node->is_delete
- && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) {
-
- /* Build the appropriate update vector which sets changing
- foreign->n_fields first fields in rec to new values */
-
- upd_vec_heap = mem_heap_create(256);
-
- n_to_update = row_ins_cascade_calc_update_vec(
- node, foreign, upd_vec_heap, trx, &fts_col_affacted);
-
- if (n_to_update == ULINT_UNDEFINED) {
- err = DB_ROW_IS_REFERENCED;
-
- row_ins_foreign_report_err(
- "Trying a cascaded update where the"
- " updated value in the child\n"
- "table would not fit in the length"
- " of the column, or the value would\n"
- "be NULL and the column is"
- " declared as not NULL in the child table,",
- thr, foreign, btr_pcur_get_rec(pcur), entry);
-
- goto nonstandard_exit_func;
- }
-
- if (cascade->update->n_fields == 0) {
-
- /* The update does not change any columns referred
- to in this foreign key constraint: no need to do
- anything */
-
- err = DB_SUCCESS;
-
- goto nonstandard_exit_func;
- }
-
- /* Mark the old Doc ID as deleted */
- if (fts_col_affacted) {
- ut_ad(table->fts);
- fts_trx_add_op(trx, table, doc_id, FTS_DELETE, NULL);
- }
- }
-
- /* Store pcur position and initialize or store the cascade node
- pcur stored position */
-
- btr_pcur_store_position(pcur, mtr);
-
- if (index == clust_index) {
- btr_pcur_copy_stored_position(cascade->pcur, pcur);
- } else {
- btr_pcur_store_position(cascade->pcur, mtr);
- }
-
- mtr_commit(mtr);
-
- ut_a(cascade->pcur->rel_pos == BTR_PCUR_ON);
-
- cascade->state = UPD_NODE_UPDATE_CLUSTERED;
-
-#ifdef WITH_WSREP
- err = wsrep_append_foreign_key(
- thr_get_trx(thr),
- foreign,
- clust_rec,
- clust_index,
- FALSE, FALSE);
- if (err != DB_SUCCESS) {
- fprintf(stderr,
- "WSREP: foreign key append failed: %d\n", err);
- } else
-#endif /* WITH_WSREP */
- err = row_update_cascade_for_mysql(thr, cascade,
- foreign->foreign_table);
-
- if (foreign->foreign_table->n_foreign_key_checks_running == 0) {
- fprintf(stderr,
- "InnoDB: error: table %s has the counter 0"
- " though there is\n"
- "InnoDB: a FOREIGN KEY check running on it.\n",
- foreign->foreign_table->name);
- }
-
- /* Release the data dictionary latch for a while, so that we do not
- starve other threads from doing CREATE TABLE etc. if we have a huge
- cascaded operation running. The counter n_foreign_key_checks_running
- will prevent other users from dropping or ALTERing the table when we
- release the latch. */
-
- row_mysql_unfreeze_data_dictionary(thr_get_trx(thr));
-
- DEBUG_SYNC_C("innodb_dml_cascade_dict_unfreeze");
-
- row_mysql_freeze_data_dictionary(thr_get_trx(thr));
-
- mtr_start_trx(mtr, trx);
-
- /* Restore pcur position */
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
-
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- if (upd_vec_heap) {
- mem_heap_free(upd_vec_heap);
- }
-
- return(err);
-
-nonstandard_exit_func:
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- if (upd_vec_heap) {
- mem_heap_free(upd_vec_heap);
- }
-
- btr_pcur_store_position(pcur, mtr);
-
- mtr_commit(mtr);
- mtr_start_trx(mtr, trx);
-
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr);
-
- return(err);
-}
-
-/*********************************************************************//**
-Sets a shared lock on a record. Used in locking possible duplicate key
-records and also in checking foreign key constraints.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
-static
-dberr_t
-row_ins_set_shared_rec_lock(
-/*========================*/
- ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP type lock */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (dict_index_is_clust(index)) {
- err = lock_clust_rec_read_check_and_lock(
- 0, block, rec, index, offsets, LOCK_S, type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, block, rec, index, offsets, LOCK_S, type, thr);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Sets a exclusive lock on a record. Used in locking possible duplicate key
-records
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
-static
-dberr_t
-row_ins_set_exclusive_rec_lock(
-/*===========================*/
- ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOCK_REC_NOT_GAP type lock */
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (dict_index_is_clust(index)) {
- err = lock_clust_rec_read_check_and_lock(
- 0, block, rec, index, offsets, LOCK_X, type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, block, rec, index, offsets, LOCK_X, type, thr);
- }
-
- return(err);
-}
-
-/***************************************************************//**
-Checks if foreign key constraint fails for an index entry. Sets shared locks
-which lock either the success or the failure of the constraint. NOTE that
-the caller must have a shared latch on dict_operation_lock.
-@return DB_SUCCESS, DB_NO_REFERENCED_ROW, or DB_ROW_IS_REFERENCED */
-UNIV_INTERN
-dberr_t
-row_ins_check_foreign_constraint(
-/*=============================*/
- ibool check_ref,/*!< in: TRUE if we want to check that
- the referenced table is ok, FALSE if we
- want to check the foreign key table */
- dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the
- tables mentioned in it must be in the
- dictionary cache if they exist at all */
- dict_table_t* table, /*!< in: if check_ref is TRUE, then the foreign
- table, else the referenced table */
- dtuple_t* entry, /*!< in: index entry for index */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- upd_node_t* upd_node;
- dict_table_t* check_table;
- dict_index_t* check_index;
- ulint n_fields_cmp;
- btr_pcur_t pcur;
- int cmp;
- ulint i;
- mtr_t mtr;
- trx_t* trx = thr_get_trx(thr);
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
-run_again:
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- err = DB_SUCCESS;
-
- if (trx->check_foreigns == FALSE) {
- /* The user has suppressed foreign key checks currently for
- this session */
- goto exit_func;
- }
-
- /* If any of the foreign key fields in entry is SQL NULL, we
- suppress the foreign key check: this is compatible with Oracle,
- for example */
-
- for (i = 0; i < foreign->n_fields; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
- goto exit_func;
- }
- }
-
- if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) {
- upd_node = static_cast<upd_node_t*>(thr->run_node);
-
- if (!(upd_node->is_delete) && upd_node->foreign == foreign) {
- /* If a cascaded update is done as defined by a
- foreign key constraint, do not check that
- constraint for the child row. In ON UPDATE CASCADE
- the update of the parent row is only half done when
- we come here: if we would check the constraint here
- for the child row it would fail.
-
- A QUESTION remains: if in the child table there are
- several constraints which refer to the same parent
- table, we should merge all updates to the child as
- one update? And the updates can be contradictory!
- Currently we just perform the update associated
- with each foreign key constraint, one after
- another, and the user has problems predicting in
- which order they are performed. */
-
- goto exit_func;
- }
- }
-
- if (check_ref) {
- check_table = foreign->referenced_table;
- check_index = foreign->referenced_index;
- } else {
- check_table = foreign->foreign_table;
- check_index = foreign->foreign_index;
- }
-
- if (check_table == NULL
- || check_table->file_unreadable
- || check_index == NULL) {
-
- if (!srv_read_only_mode && check_ref) {
- FILE* ef = dict_foreign_err_file;
- std::string fk_str;
-
- row_ins_set_detailed(trx, foreign);
-
- row_ins_foreign_trx_print(trx);
-
- fputs("Foreign key constraint fails for table ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->foreign_table_name);
- fputs(":\n", ef);
- fk_str = dict_print_info_on_foreign_key_in_create_format(
- trx, foreign, TRUE);
- fputs(fk_str.c_str(), ef);
- fputs("\nTrying to add to index ", ef);
- ut_print_name(ef, trx, FALSE,
- foreign->foreign_index->name);
- fputs(" tuple:\n", ef);
- dtuple_print(ef, entry);
- fputs("\nBut the parent table ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->referenced_table_name);
- fputs("\nor its .ibd file does"
- " not currently exist!\n", ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- err = DB_NO_REFERENCED_ROW;
- }
-
- goto exit_func;
- }
-
- if (check_table != table) {
- /* We already have a LOCK_IX on table, but not necessarily
- on check_table */
-
- err = lock_table(0, check_table, LOCK_IS, thr);
-
- if (err != DB_SUCCESS) {
-
- goto do_possible_lock_wait;
- }
- }
-
- mtr_start_trx(&mtr, trx);
-
- /* Store old value on n_fields_cmp */
-
- n_fields_cmp = dtuple_get_n_fields_cmp(entry);
-
- dtuple_set_n_fields_cmp(entry, foreign->n_fields);
-
- btr_pcur_open(check_index, entry, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, &pcur, &mtr);
-
- /* Scan index records and check if there is a matching record */
-
- do {
- const rec_t* rec = btr_pcur_get_rec(&pcur);
- const buf_block_t* block = btr_pcur_get_block(&pcur);
-
- SRV_CORRUPT_TABLE_CHECK(block,
- {
- err = DB_CORRUPTION;
- goto exit_loop;
- });
-
- if (page_rec_is_infimum(rec)) {
-
- continue;
- }
-
- offsets = rec_get_offsets(rec, check_index,
- offsets, ULINT_UNDEFINED, &heap);
-
- if (page_rec_is_supremum(rec)) {
-
- err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, block,
- rec, check_index,
- offsets, thr);
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- case DB_SUCCESS:
- continue;
- default:
- goto end_scan;
- }
- }
-
- cmp = cmp_dtuple_rec(entry, rec, offsets);
-
- if (cmp == 0) {
- if (rec_get_deleted_flag(rec,
- rec_offs_comp(offsets))) {
- err = row_ins_set_shared_rec_lock(
- LOCK_ORDINARY, block,
- rec, check_index, offsets, thr);
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- case DB_SUCCESS:
- break;
- default:
- goto end_scan;
- }
- } else {
- /* Found a matching record. Lock only
- a record because we can allow inserts
- into gaps */
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP, block,
- rec, check_index, offsets, thr);
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- case DB_SUCCESS:
- break;
- default:
- goto end_scan;
- }
-
- if (check_ref) {
- err = DB_SUCCESS;
-#ifdef WITH_WSREP
- err = wsrep_append_foreign_key(
- thr_get_trx(thr),
- foreign,
- rec,
- check_index,
- check_ref, TRUE);
-#endif /* WITH_WSREP */
- goto end_scan;
- } else if (foreign->type != 0) {
- /* There is an ON UPDATE or ON DELETE
- condition: check them in a separate
- function */
-
- err = row_ins_foreign_check_on_constraint(
- thr, foreign, &pcur, entry,
- &mtr);
- if (err != DB_SUCCESS) {
- /* Since reporting a plain
- "duplicate key" error
- message to the user in
- cases where a long CASCADE
- operation would lead to a
- duplicate key in some
- other table is very
- confusing, map duplicate
- key errors resulting from
- FK constraints to a
- separate error code. */
-
- if (err == DB_DUPLICATE_KEY) {
- err = DB_FOREIGN_DUPLICATE_KEY;
- }
-
- goto end_scan;
- }
-
- /* row_ins_foreign_check_on_constraint
- may have repositioned pcur on a
- different block */
- block = btr_pcur_get_block(&pcur);
- } else {
- row_ins_foreign_report_err(
- "Trying to delete or update",
- thr, foreign, rec, entry);
-
- err = DB_ROW_IS_REFERENCED;
- goto end_scan;
- }
- }
- } else {
- ut_a(cmp < 0);
-
- err = row_ins_set_shared_rec_lock(
- LOCK_GAP, block,
- rec, check_index, offsets, thr);
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- case DB_SUCCESS:
- if (check_ref) {
- err = DB_NO_REFERENCED_ROW;
- row_ins_foreign_report_add_err(
- trx, foreign, rec, entry);
- } else {
- err = DB_SUCCESS;
- }
- default:
- break;
- }
-
- goto end_scan;
- }
- } while (btr_pcur_move_to_next(&pcur, &mtr));
-
-exit_loop:
- if (check_ref) {
- row_ins_foreign_report_add_err(
- trx, foreign, btr_pcur_get_rec(&pcur), entry);
- err = DB_NO_REFERENCED_ROW;
- } else {
- err = DB_SUCCESS;
- }
-
-end_scan:
- btr_pcur_close(&pcur);
-
- mtr_commit(&mtr);
-
- /* Restore old value */
- dtuple_set_n_fields_cmp(entry, n_fields_cmp);
-
-do_possible_lock_wait:
- if (err == DB_LOCK_WAIT) {
- bool verified = false;
-
- trx->error_state = err;
-
- que_thr_stop_for_mysql(thr);
-
- lock_wait_suspend_thread(thr);
-
- if (check_table->to_be_dropped) {
- /* The table is being dropped. We shall timeout
- this operation */
- err = DB_LOCK_WAIT_TIMEOUT;
- goto exit_func;
- }
-
- /* We had temporarily released dict_operation_lock in
- above lock sleep wait, now we have the lock again, and
- we will need to re-check whether the foreign key has been
- dropped. We only need to verify if the table is referenced
- table case (check_ref == 0), since MDL lock will prevent
- concurrent DDL and DML on the same table */
- if (!check_ref) {
- for (dict_foreign_set::iterator it
- = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
- if (*it == foreign) {
- verified = true;
- break;
- }
- }
- } else {
- verified = true;
- }
-
- if (!verified) {
- err = DB_DICT_CHANGED;
- } else if (trx->error_state == DB_SUCCESS) {
- goto run_again;
- } else {
- err = trx->error_state;
- }
- }
-
-exit_func:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
- err = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/***************************************************************//**
-Checks if foreign key constraints fail for an index entry. If index
-is not mentioned in any constraint, this function does nothing,
-Otherwise does searches to the indexes of referenced tables and
-sets shared locks which lock either the success or the failure of
-a constraint.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_check_foreign_constraints(
-/*==============================*/
- dict_table_t* table, /*!< in: table */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry for index */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_foreign_t* foreign;
- dberr_t err;
- trx_t* trx;
- ibool got_s_lock = FALSE;
-
- trx = thr_get_trx(thr);
-
- DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
- "foreign_constraint_check_for_ins");
-
- for (dict_foreign_set::iterator it = table->foreign_set.begin();
- it != table->foreign_set.end();
- ++it) {
-
- foreign = *it;
-
- if (foreign->foreign_index == index) {
- dict_table_t* ref_table = NULL;
- dict_table_t* foreign_table = foreign->foreign_table;
- dict_table_t* referenced_table
- = foreign->referenced_table;
-
- if (referenced_table == NULL) {
-
- ref_table = dict_table_open_on_name(
- foreign->referenced_table_name_lookup,
- FALSE, FALSE, DICT_ERR_IGNORE_NONE);
- }
-
- if (0 == trx->dict_operation_lock_mode) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- if (referenced_table) {
- os_inc_counter(dict_sys->mutex,
- foreign_table
- ->n_foreign_key_checks_running);
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_operation_lock temporarily!
- But the counter on the table protects the referenced
- table from being dropped while the check is running. */
-
- err = row_ins_check_foreign_constraint(
- TRUE, foreign, table, entry, thr);
-
- DBUG_EXECUTE_IF("row_ins_dict_change_err",
- err = DB_DICT_CHANGED;);
-
- if (referenced_table) {
- os_dec_counter(dict_sys->mutex,
- foreign_table
- ->n_foreign_key_checks_running);
- }
-
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- if (ref_table != NULL) {
- dict_table_close(ref_table, FALSE, FALSE);
- }
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/***************************************************************//**
-Checks if a unique key violation to rec would occur at the index entry
-insert.
-@return TRUE if error */
-static
-ibool
-row_ins_dupl_error_with_rec(
-/*========================*/
- const rec_t* rec, /*!< in: user record; NOTE that we assume
- that the caller already has a record lock on
- the record! */
- const dtuple_t* entry, /*!< in: entry to insert */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- ulint matched_fields;
- ulint matched_bytes;
- ulint n_unique;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- n_unique = dict_index_get_n_unique(index);
-
- matched_fields = 0;
- matched_bytes = 0;
-
- cmp_dtuple_rec_with_match(entry, rec, offsets,
- &matched_fields, &matched_bytes);
-
- if (matched_fields < n_unique) {
-
- return(FALSE);
- }
-
- /* In a unique secondary index we allow equal key values if they
- contain SQL NULLs */
-
- if (!dict_index_is_clust(index)) {
-
- for (i = 0; i < n_unique; i++) {
- if (dfield_is_null(dtuple_get_nth_field(entry, i))) {
-
- return(FALSE);
- }
- }
- }
-
- return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
-}
-
-/***************************************************************//**
-Scans a unique non-clustered index at a given index entry to determine
-whether a uniqueness violation has occurred for the key value of the entry.
-Set shared locks on possible duplicate records.
-@return DB_SUCCESS, DB_DUPLICATE_KEY, or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_scan_sec_index_for_duplicate(
-/*=================================*/
- ulint flags, /*!< in: undo logging and locking flags */
- dict_index_t* index, /*!< in: non-clustered unique index */
- dtuple_t* entry, /*!< in: index entry */
- que_thr_t* thr, /*!< in: query thread */
- bool s_latch,/*!< in: whether index->lock is being held */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- mem_heap_t* offsets_heap)
- /*!< in/out: memory heap that can be emptied */
-{
- ulint n_unique;
- int cmp;
- ulint n_fields_cmp;
- btr_pcur_t pcur;
- dberr_t err = DB_SUCCESS;
- ulint allow_duplicates;
- ulint* offsets = NULL;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(s_latch == rw_lock_own(&index->lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- n_unique = dict_index_get_n_unique(index);
-
- /* If the secondary index is unique, but one of the fields in the
- n_unique first fields is NULL, a unique key violation cannot occur,
- since we define NULL != NULL in this case */
-
- for (ulint i = 0; i < n_unique; i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(entry, i))) {
-
- return(DB_SUCCESS);
- }
- }
-
- /* Store old value on n_fields_cmp */
-
- n_fields_cmp = dtuple_get_n_fields_cmp(entry);
-
- dtuple_set_n_fields_cmp(entry, n_unique);
-
- btr_pcur_open(index, entry, PAGE_CUR_GE,
- s_latch
- ? BTR_SEARCH_LEAF | BTR_ALREADY_S_LATCHED
- : BTR_SEARCH_LEAF,
- &pcur, mtr);
-
- allow_duplicates = thr_get_trx(thr)->duplicates;
-
- /* Scan index records and check if there is a duplicate */
-
- do {
- const rec_t* rec = btr_pcur_get_rec(&pcur);
- const buf_block_t* block = btr_pcur_get_block(&pcur);
- const ulint lock_type = LOCK_ORDINARY;
-
- if (page_rec_is_infimum(rec)) {
-
- continue;
- }
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &offsets_heap);
-
- if (flags & BTR_NO_LOCKING_FLAG) {
- /* Set no locks when applying log
- in online table rebuild. */
- } else if (allow_duplicates) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- lock_type, block, rec, index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- lock_type, block, rec, index, offsets, thr);
- }
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- err = DB_SUCCESS;
- case DB_SUCCESS:
- break;
- default:
- goto end_scan;
- }
-
- if (page_rec_is_supremum(rec)) {
-
- continue;
- }
-
- cmp = cmp_dtuple_rec(entry, rec, offsets);
-
- if (cmp == 0) {
- if (row_ins_dupl_error_with_rec(rec, entry,
- index, offsets)) {
- err = DB_DUPLICATE_KEY;
-
- thr_get_trx(thr)->error_info = index;
-
- /* If the duplicate is on hidden FTS_DOC_ID,
- state so in the error log */
- if (DICT_TF2_FLAG_IS_SET(
- index->table,
- DICT_TF2_FTS_HAS_DOC_ID)
- && strcmp(index->name,
- FTS_DOC_ID_INDEX_NAME) == 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Duplicate FTS_DOC_ID value"
- " on table %s",
- index->table->name);
- }
-
- goto end_scan;
- }
- } else {
- ut_a(cmp < 0);
- goto end_scan;
- }
- } while (btr_pcur_move_to_next(&pcur, mtr));
-
-end_scan:
- /* Restore old value */
- dtuple_set_n_fields_cmp(entry, n_fields_cmp);
-
- return(err);
-}
-
-/** Checks for a duplicate when the table is being rebuilt online.
-@retval DB_SUCCESS when no duplicate is detected
-@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
-a newer version of entry (the entry should not be inserted)
-@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_duplicate_online(
-/*=====================*/
- ulint n_uniq, /*!< in: offset of DB_TRX_ID */
- const dtuple_t* entry, /*!< in: entry that is being inserted */
- const rec_t* rec, /*!< in: clustered index record */
- ulint* offsets)/*!< in/out: rec_get_offsets(rec) */
-{
- ulint fields = 0;
- ulint bytes = 0;
-
- /* During rebuild, there should not be any delete-marked rows
- in the new table. */
- ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
- ut_ad(dtuple_get_n_fields_cmp(entry) == n_uniq);
-
- /* Compare the PRIMARY KEY fields and the
- DB_TRX_ID, DB_ROLL_PTR. */
- cmp_dtuple_rec_with_match_low(
- entry, rec, offsets, n_uniq + 2, &fields, &bytes);
-
- if (fields < n_uniq) {
- /* Not a duplicate. */
- return(DB_SUCCESS);
- }
-
- if (fields == n_uniq + 2) {
- /* rec is an exact match of entry. */
- ut_ad(bytes == 0);
- return(DB_SUCCESS_LOCKED_REC);
- }
-
- return(DB_DUPLICATE_KEY);
-}
-
-/** Checks for a duplicate when the table is being rebuilt online.
-@retval DB_SUCCESS when no duplicate is detected
-@retval DB_SUCCESS_LOCKED_REC when rec is an exact match of entry or
-a newer version of entry (the entry should not be inserted)
-@retval DB_DUPLICATE_KEY when entry is a duplicate of rec */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_duplicate_error_in_clust_online(
-/*====================================*/
- ulint n_uniq, /*!< in: offset of DB_TRX_ID */
- const dtuple_t* entry, /*!< in: entry that is being inserted */
- const btr_cur_t*cursor, /*!< in: cursor on insert position */
- ulint** offsets,/*!< in/out: rec_get_offsets(rec) */
- mem_heap_t** heap) /*!< in/out: heap for offsets */
-{
- dberr_t err = DB_SUCCESS;
- const rec_t* rec = btr_cur_get_rec(cursor);
-
- if (cursor->low_match >= n_uniq && !page_rec_is_infimum(rec)) {
- *offsets = rec_get_offsets(rec, cursor->index, *offsets,
- ULINT_UNDEFINED, heap);
- err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- rec = page_rec_get_next_const(btr_cur_get_rec(cursor));
-
- if (cursor->up_match >= n_uniq && !page_rec_is_supremum(rec)) {
- *offsets = rec_get_offsets(rec, cursor->index, *offsets,
- ULINT_UNDEFINED, heap);
- err = row_ins_duplicate_online(n_uniq, entry, rec, *offsets);
- }
-
- return(err);
-}
-
-/***************************************************************//**
-Checks if a unique key violation error would occur at an index entry
-insert. Sets shared locks on possible duplicate records. Works only
-for a clustered index!
-@retval DB_SUCCESS if no error
-@retval DB_DUPLICATE_KEY if error,
-@retval DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate
-record */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_duplicate_error_in_clust(
- btr_cur_t* cursor, /*!< in: B-tree cursor */
- const dtuple_t* entry, /*!< in: entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
-{
- dberr_t err;
- rec_t* rec;
- ulint n_unique;
- trx_t* trx = thr_get_trx(thr);
- mem_heap_t*heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- UT_NOT_USED(mtr);
-
- ut_ad(dict_index_is_clust(cursor->index));
-
- /* NOTE: For unique non-clustered indexes there may be any number
- of delete marked records with the same value for the non-clustered
- index key (remember multiversioning), and which differ only in
- the row refererence part of the index record, containing the
- clustered index key fields. For such a secondary index record,
- to avoid race condition, we must FIRST do the insertion and after
- that check that the uniqueness condition is not breached! */
-
- /* NOTE: A problem is that in the B-tree node pointers on an
- upper level may match more to the entry than the actual existing
- user records on the leaf level. So, even if low_match would suggest
- that a duplicate key violation may occur, this may not be the case. */
-
- n_unique = dict_index_get_n_unique(cursor->index);
-
- if (cursor->low_match >= n_unique) {
-
- rec = btr_cur_get_rec(cursor);
-
- if (!page_rec_is_infimum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- /* We set a lock on the possible duplicate: this
- is needed in logical logging of MySQL to make
- sure that in roll-forward we get the same duplicate
- errors as in original execution */
-
- if (trx->duplicates) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- LOCK_REC_NOT_GAP,
- btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP,
- btr_cur_get_block(cursor), rec,
- cursor->index, offsets, thr);
- }
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- case DB_SUCCESS:
- break;
- default:
- goto func_exit;
- }
-
- if (row_ins_dupl_error_with_rec(
- rec, entry, cursor->index, offsets)) {
-duplicate:
- trx->error_info = cursor->index;
- err = DB_DUPLICATE_KEY;
- goto func_exit;
- }
- }
- }
-
- if (cursor->up_match >= n_unique) {
-
- rec = page_rec_get_next(btr_cur_get_rec(cursor));
-
- if (!page_rec_is_supremum(rec)) {
- offsets = rec_get_offsets(rec, cursor->index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (trx->duplicates) {
-
- /* If the SQL-query will update or replace
- duplicate key we will take X-lock for
- duplicates ( REPLACE, LOAD DATAFILE REPLACE,
- INSERT ON DUPLICATE KEY UPDATE). */
-
- err = row_ins_set_exclusive_rec_lock(
- LOCK_REC_NOT_GAP,
- btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
- } else {
-
- err = row_ins_set_shared_rec_lock(
- LOCK_REC_NOT_GAP,
- btr_cur_get_block(cursor),
- rec, cursor->index, offsets, thr);
- }
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- case DB_SUCCESS:
- break;
- default:
- goto func_exit;
- }
-
- if (row_ins_dupl_error_with_rec(
- rec, entry, cursor->index, offsets)) {
- goto duplicate;
- }
- }
-
- /* This should never happen */
- ut_error;
- }
-
- err = DB_SUCCESS;
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/***************************************************************//**
-Checks if an index entry has long enough common prefix with an
-existing record so that the intended insert of the entry must be
-changed to a modify of the existing record. In the case of a clustered
-index, the prefix must be n_unique fields long. In the case of a
-secondary index, all fields must be equal. InnoDB never updates
-secondary index records in place, other than clearing or setting the
-delete-mark flag. We could be able to update the non-unique fields
-of a unique secondary index record by checking the cursor->up_match,
-but we do not do so, because it could have some locking implications.
-@return TRUE if the existing record should be updated; FALSE if not */
-UNIV_INLINE
-ibool
-row_ins_must_modify_rec(
-/*====================*/
- const btr_cur_t* cursor) /*!< in: B-tree cursor */
-{
- /* NOTE: (compare to the note in row_ins_duplicate_error_in_clust)
- Because node pointers on upper levels of the B-tree may match more
- to entry than to actual user records on the leaf level, we
- have to check if the candidate record is actually a user record.
- A clustered index node pointer contains index->n_unique first fields,
- and a secondary index node pointer contains all index fields. */
-
- return(cursor->low_match
- >= dict_index_get_n_unique_in_tree(cursor->index)
- && !page_rec_is_infimum(btr_cur_get_rec(cursor)));
-}
-
-/***************************************************************//**
-Tries to insert an entry into a clustered index, ignoring foreign key
-constraints. If a record with the same unique key is found, the other
-record is necessarily marked deleted by a committed transaction, or a
-unique key violation error occurs. The delete marked record is then
-updated to an existing record, and we must write an undo log record on
-the delete marked record.
-@retval DB_SUCCESS on success
-@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
-@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
-@return error code */
-UNIV_INTERN
-dberr_t
-row_ins_clust_index_entry_low(
-/*==========================*/
- ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /*!< in: clustered index */
- ulint n_uniq, /*!< in: 0 or index->n_uniq */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- ulint n_ext, /*!< in: number of externally stored columns */
- que_thr_t* thr) /*!< in: query thread */
-{
- btr_cur_t cursor;
- ulint* offsets = NULL;
- dberr_t err = DB_SUCCESS;
- big_rec_t* big_rec = NULL;
- mtr_t mtr;
- mem_heap_t* offsets_heap = NULL;
- ulint search_mode;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(!dict_index_is_unique(index)
- || n_uniq == dict_index_get_n_unique(index));
- ut_ad(!n_uniq || n_uniq == dict_index_get_n_unique(index));
-
- /* If running with fake_changes mode on then switch from modify to
- search so that code takes only s-latch and not x-latch.
- For dry-run (fake-changes) s-latch is acceptable. Taking x-latch will
- make it more restrictive and will block real changes/workflow. */
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- search_mode = (mode & BTR_MODIFY_TREE)
- ? BTR_SEARCH_TREE : BTR_SEARCH_LEAF;
- } else {
- search_mode = mode;
- }
-
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- if (mode == BTR_MODIFY_LEAF && dict_index_is_online_ddl(index)) {
-
- /* We really don't need to OR mode but will leave it for
- code consistency. */
- mode |= BTR_ALREADY_S_LATCHED;
- search_mode |= BTR_ALREADY_S_LATCHED;
-
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- }
-
- cursor.thr = thr;
-
- /* Note that we use PAGE_CUR_LE as the search mode, because then
- the function will return in both low_match and up_match of the
- cursor sensible values */
-
- err = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, search_mode,
- &cursor, 0, __FILE__, __LINE__, &mtr);
-
- if (err != DB_SUCCESS) {
- index->table->file_unreadable = true;
- mtr_commit(&mtr);
- goto func_exit;
- }
-
-#ifdef UNIV_DEBUG
- {
- page_t* page = btr_cur_get_page(&cursor);
- rec_t* first_rec = page_rec_get_next(
- page_get_infimum_rec(page));
-
- ut_ad(page_rec_is_supremum(first_rec)
- || rec_get_n_fields(first_rec, index)
- == dtuple_get_n_fields(entry));
- }
-#endif
-
- if (n_uniq && (cursor.up_match >= n_uniq
- || cursor.low_match >= n_uniq)) {
-
- if (flags
- == (BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG)) {
- /* Set no locks when applying log
- in online table rebuild. Only check for duplicates. */
- err = row_ins_duplicate_error_in_clust_online(
- n_uniq, entry, &cursor,
- &offsets, &offsets_heap);
-
- switch (err) {
- case DB_SUCCESS:
- break;
- default:
- ut_ad(0);
- /* fall through */
- case DB_SUCCESS_LOCKED_REC:
- case DB_DUPLICATE_KEY:
- thr_get_trx(thr)->error_info = cursor.index;
- }
- } else {
- /* Note that the following may return also
- DB_LOCK_WAIT */
-
- err = row_ins_duplicate_error_in_clust(
- &cursor, entry, thr, &mtr);
- }
-
- if (err != DB_SUCCESS) {
-err_exit:
- mtr_commit(&mtr);
- goto func_exit;
- }
- }
-
- if (row_ins_must_modify_rec(&cursor)) {
- /* There is already an index entry with a long enough common
- prefix, we must convert the insert into a modify of an
- existing record */
- mem_heap_t* entry_heap = mem_heap_create(1024);
-
- err = row_ins_clust_index_entry_by_modify(
- flags, mode, &cursor, &offsets, &offsets_heap,
- entry_heap, &big_rec, entry, thr, &mtr);
-
- rec_t* rec = btr_cur_get_rec(&cursor);
-
- if (big_rec && UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)) {
- ut_a(err == DB_SUCCESS);
- /* Write out the externally stored
- columns while still x-latching
- index->lock and block->lock. Allocate
- pages for big_rec in the mtr that
- modified the B-tree, but be sure to skip
- any pages that were freed in mtr. We will
- write out the big_rec pages before
- committing the B-tree mini-transaction. If
- the system crashes so that crash recovery
- will not replay the mtr_commit(&mtr), the
- big_rec pages will be left orphaned until
- the pages are allocated for something else.
-
- TODO: If the allocation extends the
- tablespace, it will not be redo
- logged, in either mini-transaction.
- Tablespace extension should be
- redo-logged in the big_rec
- mini-transaction, so that recovery
- will not fail when the big_rec was
- written to the extended portion of the
- file, in case the file was somehow
- truncated in the crash. */
-
- DEBUG_SYNC_C_IF_THD(
- thr_get_trx(thr)->mysql_thd,
- "before_row_ins_upd_extern");
- err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr,
- BTR_STORE_INSERT_UPDATE);
- DEBUG_SYNC_C_IF_THD(
- thr_get_trx(thr)->mysql_thd,
- "after_row_ins_upd_extern");
- /* If writing big_rec fails (for
- example, because of DB_OUT_OF_FILE_SPACE),
- the record will be corrupted. Even if
- we did not update any externally
- stored columns, our update could cause
- the record to grow so that a
- non-updated column was selected for
- external storage. This non-update
- would not have been written to the
- undo log, and thus the record cannot
- be rolled back.
-
- However, because we have not executed
- mtr_commit(mtr) yet, the update will
- not be replayed in crash recovery, and
- the following assertion failure will
- effectively "roll back" the operation. */
- ut_a(err == DB_SUCCESS);
- dtuple_big_rec_free(big_rec);
- } else if (big_rec != NULL
- && UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- dtuple_big_rec_free(big_rec);
- }
-
- if (err == DB_SUCCESS
- && dict_index_is_online_ddl(index)
- && UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)) {
- row_log_table_insert(rec, index, offsets);
- }
-
- mtr_commit(&mtr);
- mem_heap_free(entry_heap);
- } else {
- rec_t* insert_rec;
-
- if (mode != BTR_MODIFY_TREE) {
- ut_ad(((mode & ~BTR_ALREADY_S_LATCHED)
- == BTR_MODIFY_LEAF)
- || thr_get_trx(thr)->fake_changes);
- err = btr_cur_optimistic_insert(
- flags, &cursor, &offsets, &offsets_heap,
- entry, &insert_rec, &big_rec,
- n_ext, thr, &mtr);
- } else {
- if (buf_LRU_buf_pool_running_out()) {
-
- err = DB_LOCK_TABLE_FULL;
- goto err_exit;
- }
-
- err = btr_cur_optimistic_insert(
- flags, &cursor,
- &offsets, &offsets_heap,
- entry, &insert_rec, &big_rec,
- n_ext, thr, &mtr);
-
- if (err == DB_FAIL) {
- err = btr_cur_pessimistic_insert(
- flags, &cursor,
- &offsets, &offsets_heap,
- entry, &insert_rec, &big_rec,
- n_ext, thr, &mtr);
- }
- }
-
- if (UNIV_LIKELY_NULL(big_rec)) {
- mtr_commit(&mtr);
-
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
-
- dtuple_convert_back_big_rec(
- index, entry, big_rec);
- goto func_exit;
- }
-
- /* Online table rebuild could read (and
- ignore) the incomplete record at this point.
- If online rebuild is in progress, the
- row_ins_index_entry_big_rec() will write log. */
-
- DBUG_EXECUTE_IF(
- "row_ins_extern_checkpoint",
- log_make_checkpoint_at(
- LSN_MAX, TRUE););
- err = row_ins_index_entry_big_rec(
- entry, big_rec, offsets, &offsets_heap, index,
- thr_get_trx(thr)->mysql_thd,
- __FILE__, __LINE__);
- dtuple_convert_back_big_rec(index, entry, big_rec);
- } else {
- if (err == DB_SUCCESS
- && dict_index_is_online_ddl(index)
- && !UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- row_log_table_insert(
- insert_rec, index, offsets);
- }
-
- mtr_commit(&mtr);
- }
- }
-
-func_exit:
- if (offsets_heap) {
- mem_heap_free(offsets_heap);
- }
-
- return(err);
-}
-
-/***************************************************************//**
-Starts a mini-transaction and checks if the index will be dropped.
-@return true if the index is to be dropped */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-row_ins_sec_mtr_start_trx_and_check_if_aborted(
-/*=======================================*/
- mtr_t* mtr, /*!< out: mini-transaction */
- trx_t* trx, /*!< in: transaction handle */
- dict_index_t* index, /*!< in/out: secondary index */
- bool check, /*!< in: whether to check */
- ulint search_mode)
- /*!< in: flags */
-{
- ut_ad(!dict_index_is_clust(index));
-
- mtr_start_trx(mtr, trx);
-
- if (!check) {
- return(false);
- }
-
- if (search_mode & BTR_ALREADY_S_LATCHED) {
- mtr_s_lock(dict_index_get_lock(index), mtr);
- } else {
- mtr_x_lock(dict_index_get_lock(index), mtr);
- }
-
- switch (index->online_status) {
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- ut_ad(*index->name == TEMP_INDEX_PREFIX);
- return(true);
- case ONLINE_INDEX_COMPLETE:
- return(false);
- case ONLINE_INDEX_CREATION:
- break;
- }
-
- ut_error;
- return(true);
-}
-
-/***************************************************************//**
-Tries to insert an entry into a secondary index. If a record with exactly the
-same fields is found, the other record is necessarily marked deleted.
-It is then unmarked. Otherwise, the entry is just inserted to the index.
-@retval DB_SUCCESS on success
-@retval DB_LOCK_WAIT on lock wait when !(flags & BTR_NO_LOCKING_FLAG)
-@retval DB_FAIL if retry with BTR_MODIFY_TREE is needed
-@return error code */
-UNIV_INTERN
-dberr_t
-row_ins_sec_index_entry_low(
-/*========================*/
- ulint flags, /*!< in: undo logging and locking flags */
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /*!< in: secondary index */
- mem_heap_t* offsets_heap,
- /*!< in/out: memory heap that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- trx_id_t trx_id, /*!< in: PAGE_MAX_TRX_ID during
- row_log_table_apply(), or 0 */
- que_thr_t* thr) /*!< in: query thread */
-{
- btr_cur_t cursor;
- ulint search_mode;
- dberr_t err = DB_SUCCESS;
- ulint n_unique;
- mtr_t mtr;
- ulint* offsets = NULL;
- trx_t* trx = thr_get_trx(thr);
-
- ut_ad(!dict_index_is_clust(index));
- ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
-
- cursor.thr = thr;
- ut_ad(thr_get_trx(thr)->id);
- mtr_start_trx(&mtr, trx);
-
- /* If running with fake_changes mode on then avoid using insert buffer
- and also switch from modify to search so that code takes only s-latch
- and not x-latch. For dry-run (fake-changes) s-latch is acceptable.
- Taking x-latch will make it more restrictive and will block real
- changes/workflow. */
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- search_mode = (mode & BTR_MODIFY_TREE)
- ? BTR_SEARCH_TREE : BTR_SEARCH_LEAF;
- } else {
- search_mode = mode | BTR_INSERT;
- }
-
- /* Ensure that we acquire index->lock when inserting into an
- index with index->online_status == ONLINE_INDEX_COMPLETE, but
- could still be subject to rollback_inplace_alter_table().
- This prevents a concurrent change of index->online_status.
- The memory object cannot be freed as long as we have an open
- reference to the table, or index->table->n_ref_count > 0. */
- const bool check = *index->name == TEMP_INDEX_PREFIX;
-
- if (check) {
-
- DEBUG_SYNC_C("row_ins_sec_index_enter");
-
- /* mode = MODIFY_LEAF is synonymous to search_mode = SEARCH_LEAF
- search_mode = SEARCH_TREE suggest operation in fake_change mode
- so continue to s-latch in this mode too. */
-
- if (mode == BTR_MODIFY_LEAF || search_mode == BTR_SEARCH_TREE) {
-
- ut_ad((search_mode == BTR_SEARCH_TREE
- && thr_get_trx(thr)->fake_changes)
- || mode == BTR_MODIFY_LEAF);
-
- search_mode |= BTR_ALREADY_S_LATCHED;
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- } else {
- mtr_x_lock(dict_index_get_lock(index), &mtr);
- }
-
- if (row_log_online_op_try(
- index, entry, thr_get_trx(thr)->id)) {
- goto func_exit;
- }
- }
-
- if (!thr_get_trx(thr)->check_unique_secondary) {
- search_mode |= BTR_IGNORE_SEC_UNIQUE;
- }
-
- /* Note that we use PAGE_CUR_LE as the search mode, because then
- the function will return in both low_match and up_match of the
- cursor sensible values */
- err = btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- search_mode,
- &cursor, 0, __FILE__, __LINE__, &mtr);
-
- if (err != DB_SUCCESS) {
- if (err == DB_DECRYPTION_FAILED) {
- ib_push_warning(trx->mysql_thd,
- DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- index->table->name);
- index->table->file_unreadable = true;
- }
- goto func_exit;
- }
-
- if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
- /* The insert was buffered during the search: we are done */
- goto func_exit;
- }
-
-#ifdef UNIV_DEBUG
- {
- page_t* page = btr_cur_get_page(&cursor);
- rec_t* first_rec = page_rec_get_next(
- page_get_infimum_rec(page));
-
- ut_ad(page_rec_is_supremum(first_rec)
- || rec_get_n_fields(first_rec, index)
- == dtuple_get_n_fields(entry));
- }
-#endif
-
- n_unique = dict_index_get_n_unique(index);
-
- if (dict_index_is_unique(index)
- && (cursor.low_match >= n_unique || cursor.up_match >= n_unique)) {
- mtr_commit(&mtr);
-
- DEBUG_SYNC_C("row_ins_sec_index_unique");
-
- if (row_ins_sec_mtr_start_trx_and_check_if_aborted(
- &mtr, trx, index, check, search_mode)) {
- goto func_exit;
- }
-
- err = row_ins_scan_sec_index_for_duplicate(
- flags, index, entry, thr, check, &mtr, offsets_heap);
-
- mtr_commit(&mtr);
-
- switch (err) {
- case DB_SUCCESS:
- break;
- case DB_DUPLICATE_KEY:
- if (*index->name == TEMP_INDEX_PREFIX) {
- ut_ad(!thr_get_trx(thr)
- ->dict_operation_lock_mode);
- mutex_enter(&dict_sys->mutex);
- dict_set_corrupted_index_cache_only(
- index, index->table);
- mutex_exit(&dict_sys->mutex);
- /* Do not return any error to the
- caller. The duplicate will be reported
- by ALTER TABLE or CREATE UNIQUE INDEX.
- Unfortunately we cannot report the
- duplicate key value to the DDL thread,
- because the altered_table object is
- private to its call stack. */
- err = DB_SUCCESS;
- }
- /* fall through */
- default:
- return(err);
- }
-
- if (row_ins_sec_mtr_start_trx_and_check_if_aborted(
- &mtr, trx, index, check, search_mode)) {
- goto func_exit;
- }
-
- DEBUG_SYNC_C("row_ins_sec_index_entry_dup_locks_created");
-
- /* We did not find a duplicate and we have now
- locked with s-locks the necessary records to
- prevent any insertion of a duplicate by another
- transaction. Let us now reposition the cursor and
- continue the insertion. */
-
- btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_LE,
- search_mode & ~(BTR_INSERT | BTR_IGNORE_SEC_UNIQUE),
- &cursor, 0, __FILE__, __LINE__, &mtr);
- }
-
- if (row_ins_must_modify_rec(&cursor)) {
- /* There is already an index entry with a long enough common
- prefix, we must convert the insert into a modify of an
- existing record */
- offsets = rec_get_offsets(
- btr_cur_get_rec(&cursor), index, offsets,
- ULINT_UNDEFINED, &offsets_heap);
-
- err = row_ins_sec_index_entry_by_modify(
- flags, mode, &cursor, &offsets,
- offsets_heap, heap, entry, thr, &mtr);
- } else {
- rec_t* insert_rec;
- big_rec_t* big_rec;
-
- if (mode == BTR_MODIFY_LEAF) {
- err = btr_cur_optimistic_insert(
- flags, &cursor, &offsets, &offsets_heap,
- entry, &insert_rec,
- &big_rec, 0, thr, &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- if (buf_LRU_buf_pool_running_out()) {
-
- err = DB_LOCK_TABLE_FULL;
- goto func_exit;
- }
-
- err = btr_cur_optimistic_insert(
- flags, &cursor,
- &offsets, &offsets_heap,
- entry, &insert_rec,
- &big_rec, 0, thr, &mtr);
- if (err == DB_FAIL) {
- err = btr_cur_pessimistic_insert(
- flags, &cursor,
- &offsets, &offsets_heap,
- entry, &insert_rec,
- &big_rec, 0, thr, &mtr);
- }
- }
-
- if (err == DB_SUCCESS && trx_id) {
- page_update_max_trx_id(
- btr_cur_get_block(&cursor),
- btr_cur_get_page_zip(&cursor),
- trx_id, &mtr);
- }
-
- ut_ad(!big_rec);
- }
-
-func_exit:
- mtr_commit(&mtr);
- return(err);
-}
-
-/***************************************************************//**
-Tries to insert the externally stored fields (off-page columns)
-of a clustered index entry.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
-dberr_t
-row_ins_index_entry_big_rec_func(
-/*=============================*/
- const dtuple_t* entry, /*!< in/out: index entry to insert */
- const big_rec_t* big_rec,/*!< in: externally stored fields */
- ulint* offsets,/*!< in/out: rec offsets */
- mem_heap_t** heap, /*!< in/out: memory heap */
- dict_index_t* index, /*!< in: index */
- const char* file, /*!< in: file name of caller */
-#ifndef DBUG_OFF
- const void* thd, /*!< in: connection, or NULL */
-#endif /* DBUG_OFF */
- ulint line) /*!< in: line number of caller */
-{
- mtr_t mtr;
- btr_cur_t cursor;
- rec_t* rec;
- dberr_t error;
-
- ut_ad(dict_index_is_clust(index));
-
- DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern_latch");
-
- mtr_start(&mtr);
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, 0,
- file, line, &mtr);
- rec = btr_cur_get_rec(&cursor);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, heap);
-
- DEBUG_SYNC_C_IF_THD(thd, "before_row_ins_extern");
- error = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(&cursor),
- rec, offsets, big_rec, &mtr, BTR_STORE_INSERT);
- DEBUG_SYNC_C_IF_THD(thd, "after_row_ins_extern");
-
- if (error == DB_SUCCESS
- && dict_index_is_online_ddl(index)) {
- row_log_table_insert(rec, index, offsets);
- }
-
- mtr_commit(&mtr);
-
- return(error);
-}
-
-/***************************************************************//**
-Inserts an entry into a clustered index. Tries first optimistic,
-then pessimistic descent down the tree. If the entry matches enough
-to a delete marked record, performs the insert by updating or delete
-unmarking the delete marked record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
-dberr_t
-row_ins_clust_index_entry(
-/*======================*/
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- que_thr_t* thr, /*!< in: query thread */
- ulint n_ext) /*!< in: number of externally stored columns */
-{
- dberr_t err;
- ulint n_uniq;
-
- if (!index->table->foreign_set.empty()) {
- err = row_ins_check_foreign_constraints(
- index->table, index, entry, thr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- n_uniq = dict_index_is_unique(index) ? index->n_uniq : 0;
-
- /* Try first optimistic descent to the B-tree */
-
- log_free_check();
-
- err = row_ins_clust_index_entry_low(
- 0, BTR_MODIFY_LEAF, index, n_uniq, entry, n_ext, thr);
-
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!thr_get_trx(thr)->ddl) {
- DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
- "after_row_ins_clust_index_entry_leaf");
- }
-#endif /* UNIV_DEBUG */
-
- if (err != DB_FAIL) {
- DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
- return(err);
- }
-
- /* Try then pessimistic descent to the B-tree */
-
- log_free_check();
-
- return(row_ins_clust_index_entry_low(
- 0, BTR_MODIFY_TREE, index, n_uniq, entry, n_ext, thr));
-}
-
-/***************************************************************//**
-Inserts an entry into a secondary index. Tries first optimistic,
-then pessimistic descent down the tree. If the entry matches enough
-to a delete marked record, performs the insert by updating or delete
-unmarking the delete marked record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-UNIV_INTERN
-dberr_t
-row_ins_sec_index_entry(
-/*====================*/
- dict_index_t* index, /*!< in: secondary index */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- mem_heap_t* offsets_heap;
- mem_heap_t* heap;
-
- if (!index->table->foreign_set.empty()) {
- err = row_ins_check_foreign_constraints(index->table, index,
- entry, thr);
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- ut_ad(thr_get_trx(thr)->id);
-
- offsets_heap = mem_heap_create(1024);
- heap = mem_heap_create(1024);
-
- /* Try first optimistic descent to the B-tree */
-
- log_free_check();
-
- err = row_ins_sec_index_entry_low(
- 0, BTR_MODIFY_LEAF, index, offsets_heap, heap, entry, 0, thr);
- if (err == DB_FAIL) {
- mem_heap_empty(heap);
-
- /* Try then pessimistic descent to the B-tree */
-
- log_free_check();
-
- err = row_ins_sec_index_entry_low(
- 0, BTR_MODIFY_TREE, index,
- offsets_heap, heap, entry, 0, thr);
- }
-
- mem_heap_free(heap);
- mem_heap_free(offsets_heap);
- return(err);
-}
-
-/***************************************************************//**
-Inserts an index entry to index. Tries first optimistic, then pessimistic
-descent down the tree. If the entry matches enough to a delete marked record,
-performs the insert by updating or delete unmarking the delete marked
-record.
-@return DB_SUCCESS, DB_LOCK_WAIT, DB_DUPLICATE_KEY, or some other error code */
-static
-dberr_t
-row_ins_index_entry(
-/*================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in/out: index entry to insert */
- que_thr_t* thr) /*!< in: query thread */
-{
- DBUG_EXECUTE_IF("row_ins_index_entry_timeout", {
- DBUG_SET("-d,row_ins_index_entry_timeout");
- return(DB_LOCK_WAIT);});
-
- if (dict_index_is_clust(index)) {
- return(row_ins_clust_index_entry(index, entry, thr, 0));
- } else {
- return(row_ins_sec_index_entry(index, entry, thr));
- }
-}
-
-/***********************************************************//**
-Sets the values of the dtuple fields in entry from the values of appropriate
-columns in row. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_ins_index_entry_set_vals(
-/*=========================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry to make */
- const dtuple_t* row) /*!< in: row */
-{
- ulint n_fields;
- ulint i;
-
- n_fields = dtuple_get_n_fields(entry);
-
- for (i = 0; i < n_fields; i++) {
- dict_field_t* ind_field;
- dfield_t* field;
- const dfield_t* row_field;
- ulint len;
-
- field = dtuple_get_nth_field(entry, i);
- ind_field = dict_index_get_nth_field(index, i);
- row_field = dtuple_get_nth_field(row, ind_field->col->ind);
- len = dfield_get_len(row_field);
-
- /* Check column prefix indexes */
- if (ind_field->prefix_len > 0
- && dfield_get_len(row_field) != UNIV_SQL_NULL) {
-
- const dict_col_t* col
- = dict_field_get_col(ind_field);
-
- len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminmaxlen,
- ind_field->prefix_len,
- len,
- static_cast<const char*>(
- dfield_get_data(row_field)));
-
- ut_ad(!dfield_is_ext(row_field));
- }
-
- dfield_set_data(field, dfield_get_data(row_field), len);
- if (dfield_is_ext(row_field)) {
- ut_ad(dict_index_is_clust(index));
- dfield_set_ext(field);
- }
- }
-}
-
-/***********************************************************//**
-Inserts a single index entry to the table.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins_index_entry_step(
-/*=====================*/
- ins_node_t* node, /*!< in: row insert node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
-
- ut_ad(dtuple_check_typed(node->row));
-
- row_ins_index_entry_set_vals(node->index, node->entry, node->row);
-
- ut_ad(dtuple_check_typed(node->entry));
-
- err = row_ins_index_entry(node->index, node->entry, thr);
-
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!thr_get_trx(thr)->ddl) {
- DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
- "after_row_ins_index_entry_step");
- }
-#endif /* UNIV_DEBUG */
-
- return(err);
-}
-
-/***********************************************************//**
-Allocates a row id for row and inits the node->index field. */
-UNIV_INLINE
-void
-row_ins_alloc_row_id_step(
-/*======================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- row_id_t row_id;
-
- ut_ad(node->state == INS_NODE_ALLOC_ROW_ID);
-
- if (dict_index_is_unique(dict_table_get_first_index(node->table))) {
-
- /* No row id is stored if the clustered index is unique */
-
- return;
- }
-
- /* Fill in row id value to row */
-
- row_id = dict_sys_get_new_row_id();
-
- dict_sys_write_row_id(node->row_id_buf, row_id);
-}
-
-/***********************************************************//**
-Gets a row to insert from the values list. */
-UNIV_INLINE
-void
-row_ins_get_row_from_values(
-/*========================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- que_node_t* list_node;
- dfield_t* dfield;
- dtuple_t* row;
- ulint i;
-
- /* The field values are copied in the buffers of the select node and
- it is safe to use them until we fetch from select again: therefore
- we can just copy the pointers */
-
- row = node->row;
-
- i = 0;
- list_node = node->values_list;
-
- while (list_node) {
- eval_exp(list_node);
-
- dfield = dtuple_get_nth_field(row, i);
- dfield_copy_data(dfield, que_node_get_val(list_node));
-
- i++;
- list_node = que_node_get_next(list_node);
- }
-}
-
-/***********************************************************//**
-Gets a row to insert from the select list. */
-UNIV_INLINE
-void
-row_ins_get_row_from_select(
-/*========================*/
- ins_node_t* node) /*!< in: row insert node */
-{
- que_node_t* list_node;
- dfield_t* dfield;
- dtuple_t* row;
- ulint i;
-
- /* The field values are copied in the buffers of the select node and
- it is safe to use them until we fetch from select again: therefore
- we can just copy the pointers */
-
- row = node->row;
-
- i = 0;
- list_node = node->select->select_list;
-
- while (list_node) {
- dfield = dtuple_get_nth_field(row, i);
- dfield_copy_data(dfield, que_node_get_val(list_node));
-
- i++;
- list_node = que_node_get_next(list_node);
- }
-}
-
-/***********************************************************//**
-Inserts a row to a table.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_ins(
-/*====*/
- ins_node_t* node, /*!< in: row insert node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
-
- if (node->state == INS_NODE_ALLOC_ROW_ID) {
-
- row_ins_alloc_row_id_step(node);
-
- node->index = dict_table_get_first_index(node->table);
- node->entry = UT_LIST_GET_FIRST(node->entry_list);
-
- if (node->ins_type == INS_SEARCHED) {
-
- row_ins_get_row_from_select(node);
-
- } else if (node->ins_type == INS_VALUES) {
-
- row_ins_get_row_from_values(node);
- }
-
- node->state = INS_NODE_INSERT_ENTRIES;
- }
-
- ut_ad(node->state == INS_NODE_INSERT_ENTRIES);
-
- while (node->index != NULL) {
- if (node->index->type != DICT_FTS) {
- err = row_ins_index_entry_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- node->index = dict_table_get_next_index(node->index);
- node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
-
- DBUG_EXECUTE_IF(
- "row_ins_skip_sec",
- node->index = NULL; node->entry = NULL; break;);
-
- /* Skip corrupted secondary index and its entry */
- while (node->index && dict_index_is_corrupted(node->index)) {
-
- node->index = dict_table_get_next_index(node->index);
- node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry);
- }
- }
-
- ut_ad(node->entry == NULL);
-
- node->state = INS_NODE_ALLOC_ROW_ID;
-
- return(DB_SUCCESS);
-}
-
-/***********************************************************//**
-Inserts a row to a table. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_ins_step(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ins_node_t* node;
- que_node_t* parent;
- sel_node_t* sel_node;
- trx_t* trx;
- dberr_t err;
-
- ut_ad(thr);
-
- trx = thr_get_trx(thr);
-
- trx_start_if_not_started_xa(trx);
-
- node = static_cast<ins_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_INSERT);
-
- parent = que_node_get_parent(node);
- sel_node = node->select;
-
- if (thr->prev_node == parent) {
- node->state = INS_NODE_SET_IX_LOCK;
- }
-
- /* If this is the first time this node is executed (or when
- execution resumes after wait for the table IX lock), set an
- IX lock on the table and reset the possible select node. MySQL's
- partitioned table code may also call an insert within the same
- SQL statement AFTER it has used this table handle to do a search.
- This happens, for example, when a row update moves it to another
- partition. In that case, we have already set the IX lock on the
- table during the search operation, and there is no need to set
- it again here. But we must write trx->id to node->trx_id_buf. */
-
- trx_write_trx_id(node->trx_id_buf, trx->id);
-
- if (node->state == INS_NODE_SET_IX_LOCK) {
-
- node->state = INS_NODE_ALLOC_ROW_ID;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- if (trx->id == node->trx_id) {
- /* No need to do IX-locking */
-
- goto same_trx;
- }
-
- err = lock_table(0, node->table, LOCK_IX, thr);
-
- DBUG_EXECUTE_IF("ib_row_ins_ix_lock_wait",
- err = DB_LOCK_WAIT;);
-
- if (err != DB_SUCCESS) {
-
- goto error_handling;
- }
-
- node->trx_id = trx->id;
-same_trx:
- if (node->ins_type == INS_SEARCHED) {
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch a row to insert */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
- }
-
- if ((node->ins_type == INS_SEARCHED)
- && (sel_node->state != SEL_NODE_FETCH)) {
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to insert */
- thr->run_node = parent;
-
- return(thr);
- }
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = row_ins(node, thr);
-
-error_handling:
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- /* err == DB_LOCK_WAIT or SQL error detected */
- return(NULL);
- }
-
- /* DO THE TRIGGER ACTIONS HERE */
-
- if (node->ins_type == INS_SEARCHED) {
- /* Fetch a row to insert */
-
- thr->run_node = sel_node;
- } else {
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc
deleted file mode 100644
index 2cd663fd600..00000000000
--- a/storage/xtradb/row/row0log.cc
+++ /dev/null
@@ -1,3710 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0log.cc
-Modification log for online index creation and online table rebuild
-
-Created 2011-05-26 Marko Makela
-*******************************************************/
-
-#include "row0log.h"
-
-#ifdef UNIV_NONINL
-#include "row0log.ic"
-#endif
-
-#include "row0row.h"
-#include "row0ins.h"
-#include "row0upd.h"
-#include "row0merge.h"
-#include "row0ext.h"
-#include "data0data.h"
-#include "que0que.h"
-#include "handler0alter.h"
-
-#include<map>
-
-ulint onlineddl_rowlog_rows;
-ulint onlineddl_rowlog_pct_used;
-ulint onlineddl_pct_progress;
-
-/** Table row modification operations during online table rebuild.
-Delete-marked records are not copied to the rebuilt table. */
-enum row_tab_op {
- /** Insert a record */
- ROW_T_INSERT = 0x41,
- /** Update a record in place */
- ROW_T_UPDATE,
- /** Delete (purge) a record */
- ROW_T_DELETE
-};
-
-/** Index record modification operations during online index creation */
-enum row_op {
- /** Insert a record */
- ROW_OP_INSERT = 0x61,
- /** Delete a record */
- ROW_OP_DELETE
-};
-
-#ifdef UNIV_DEBUG
-/** Write information about the applied record to the error log */
-# define ROW_LOG_APPLY_PRINT
-#endif /* UNIV_DEBUG */
-
-#ifdef ROW_LOG_APPLY_PRINT
-/** When set, write information about the applied record to the error log */
-static bool row_log_apply_print;
-#endif /* ROW_LOG_APPLY_PRINT */
-
-/** Size of the modification log entry header, in bytes */
-#define ROW_LOG_HEADER_SIZE 2/*op, extra_size*/
-
-/** Log block for modifications during online ALTER TABLE */
-struct row_log_buf_t {
- byte* block; /*!< file block buffer */
- mrec_buf_t buf; /*!< buffer for accessing a record
- that spans two blocks */
- ulint blocks; /*!< current position in blocks */
- ulint bytes; /*!< current position within block */
- ulonglong total; /*!< logical position, in bytes from
- the start of the row_log_table log;
- 0 for row_log_online_op() and
- row_log_apply(). */
- ulint size; /*!< allocated size of block */
-};
-
-/** Tracks BLOB allocation during online ALTER TABLE */
-class row_log_table_blob_t {
-public:
- /** Constructor (declaring a BLOB freed)
- @param offset_arg row_log_t::tail::total */
-#ifdef UNIV_DEBUG
- row_log_table_blob_t(ulonglong offset_arg) :
- old_offset (0), free_offset (offset_arg),
- offset (BLOB_FREED) {}
-#else /* UNIV_DEBUG */
- row_log_table_blob_t() :
- offset (BLOB_FREED) {}
-#endif /* UNIV_DEBUG */
-
- /** Declare a BLOB freed again.
- @param offset_arg row_log_t::tail::total */
-#ifdef UNIV_DEBUG
- void blob_free(ulonglong offset_arg)
-#else /* UNIV_DEBUG */
- void blob_free()
-#endif /* UNIV_DEBUG */
- {
- ut_ad(offset < offset_arg);
- ut_ad(offset != BLOB_FREED);
- ut_d(old_offset = offset);
- ut_d(free_offset = offset_arg);
- offset = BLOB_FREED;
- }
- /** Declare a freed BLOB reused.
- @param offset_arg row_log_t::tail::total */
- void blob_alloc(ulonglong offset_arg) {
- ut_ad(free_offset <= offset_arg);
- ut_d(old_offset = offset);
- offset = offset_arg;
- }
- /** Determine if a BLOB was freed at a given log position
- @param offset_arg row_log_t::head::total after the log record
- @return true if freed */
- bool is_freed(ulonglong offset_arg) const {
- /* This is supposed to be the offset at the end of the
- current log record. */
- ut_ad(offset_arg > 0);
- /* We should never get anywhere close the magic value. */
- ut_ad(offset_arg < BLOB_FREED);
- return(offset_arg < offset);
- }
-private:
- /** Magic value for a freed BLOB */
- static const ulonglong BLOB_FREED = ~0ULL;
-#ifdef UNIV_DEBUG
- /** Old offset, in case a page was freed, reused, freed, ... */
- ulonglong old_offset;
- /** Offset of last blob_free() */
- ulonglong free_offset;
-#endif /* UNIV_DEBUG */
- /** Byte offset to the log file */
- ulonglong offset;
-};
-
-/** @brief Map of off-page column page numbers to 0 or log byte offsets.
-
-If there is no mapping for a page number, it is safe to access.
-If a page number maps to 0, it is an off-page column that has been freed.
-If a page number maps to a nonzero number, the number is a byte offset
-into the index->online_log, indicating that the page is safe to access
-when applying log records starting from that offset. */
-typedef std::map<ulint, row_log_table_blob_t> page_no_map;
-
-/** @brief Buffer for logging modifications during online index creation
-
-All modifications to an index that is being created will be logged by
-row_log_online_op() to this buffer.
-
-All modifications to a table that is being rebuilt will be logged by
-row_log_table_delete(), row_log_table_update(), row_log_table_insert()
-to this buffer.
-
-When head.blocks == tail.blocks, the reader will access tail.block
-directly. When also head.bytes == tail.bytes, both counts will be
-reset to 0 and the file will be truncated. */
-struct row_log_t {
- int fd; /*!< file descriptor */
- ib_mutex_t mutex; /*!< mutex protecting error,
- max_trx and tail */
- page_no_map* blobs; /*!< map of page numbers of off-page columns
- that have been freed during table-rebuilding
- ALTER TABLE (row_log_table_*); protected by
- index->lock X-latch only */
- dict_table_t* table; /*!< table that is being rebuilt,
- or NULL when this is a secondary
- index that is being created online */
- bool same_pk;/*!< whether the definition of the PRIMARY KEY
- has remained the same */
- const dtuple_t* add_cols;
- /*!< default values of added columns, or NULL */
- const ulint* col_map;/*!< mapping of old column numbers to
- new ones, or NULL if !table */
- dberr_t error; /*!< error that occurred during online
- table rebuild */
- trx_id_t max_trx;/*!< biggest observed trx_id in
- row_log_online_op();
- protected by mutex and index->lock S-latch,
- or by index->lock X-latch only */
- row_log_buf_t tail; /*!< writer context;
- protected by mutex and index->lock S-latch,
- or by index->lock X-latch only */
- row_log_buf_t head; /*!< reader context; protected by MDL only;
- modifiable by row_log_apply_ops() */
- const char* path; /*!< where to create temporary file during
- log operation */
-};
-
-/** Create the file or online log if it does not exist.
-@param[in,out] log online rebuild log
-@return file descriptor. */
-static MY_ATTRIBUTE((warn_unused_result))
-int
-row_log_tmpfile(
- row_log_t* log)
-{
- DBUG_ENTER("row_log_tmpfile");
- if (log->fd < 0) {
- log->fd = row_merge_file_create_low(log->path);
- }
-
- DBUG_RETURN(log->fd);
-}
-
-/** Allocate the memory for the log buffer.
-@param[in,out] log_buf Buffer used for log operation
-@return TRUE if success, false if not */
-static MY_ATTRIBUTE((warn_unused_result))
-bool
-row_log_block_allocate(
- row_log_buf_t& log_buf)
-{
- DBUG_ENTER("row_log_block_allocate");
- if (log_buf.block == NULL) {
- log_buf.size = srv_sort_buf_size;
- log_buf.block = (byte*) os_mem_alloc_large(&log_buf.size);
- DBUG_EXECUTE_IF("simulate_row_log_allocation_failure",
- if (log_buf.block)
- os_mem_free_large(log_buf.block, log_buf.size);
- log_buf.block = NULL;);
- if (!log_buf.block) {
- DBUG_RETURN(false);
- }
- }
- DBUG_RETURN(true);
-}
-
-/** Free the log buffer.
-@param[in,out] log_buf Buffer used for log operation */
-static
-void
-row_log_block_free(
- row_log_buf_t& log_buf)
-{
- DBUG_ENTER("row_log_block_free");
- if (log_buf.block != NULL) {
- os_mem_free_large(log_buf.block, log_buf.size);
- log_buf.block = NULL;
- }
- DBUG_VOID_RETURN;
-}
-
-/******************************************************//**
-Logs an operation to a secondary index that is (or was) being created. */
-UNIV_INTERN
-void
-row_log_online_op(
-/*==============*/
- dict_index_t* index, /*!< in/out: index, S or X latched */
- const dtuple_t* tuple, /*!< in: index tuple */
- trx_id_t trx_id) /*!< in: transaction ID for insert,
- or 0 for delete */
-{
- byte* b;
- ulint extra_size;
- ulint size;
- ulint mrec_size;
- ulint avail_size;
- row_log_t* log;
-
- ut_ad(dtuple_validate(tuple));
- ut_ad(dtuple_get_n_fields(tuple) == dict_index_get_n_fields(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
- || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (dict_index_is_corrupted(index)) {
- return;
- }
-
- ut_ad(dict_index_is_online_ddl(index));
-
- /* Compute the size of the record. This differs from
- row_merge_buf_encode(), because here we do not encode
- extra_size+1 (and reserve 0 as the end-of-chunk marker). */
-
- size = rec_get_converted_size_temp(
- index, tuple->fields, tuple->n_fields, &extra_size);
- ut_ad(size >= extra_size);
- ut_ad(size <= sizeof log->tail.buf);
-
- mrec_size = ROW_LOG_HEADER_SIZE
- + (extra_size >= 0x80) + size
- + (trx_id ? DATA_TRX_ID_LEN : 0);
-
- log = index->online_log;
- mutex_enter(&log->mutex);
-
- if (trx_id > log->max_trx) {
- log->max_trx = trx_id;
- }
-
- if (!row_log_block_allocate(log->tail)) {
- log->error = DB_OUT_OF_MEMORY;
- goto err_exit;
- }
-
- UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
-
- ut_ad(log->tail.bytes < srv_sort_buf_size);
- avail_size = srv_sort_buf_size - log->tail.bytes;
-
- if (mrec_size > avail_size) {
- b = log->tail.buf;
- } else {
- b = log->tail.block + log->tail.bytes;
- }
-
- if (trx_id != 0) {
- *b++ = ROW_OP_INSERT;
- trx_write_trx_id(b, trx_id);
- b += DATA_TRX_ID_LEN;
- } else {
- *b++ = ROW_OP_DELETE;
- }
-
- if (extra_size < 0x80) {
- *b++ = (byte) extra_size;
- } else {
- ut_ad(extra_size < 0x8000);
- *b++ = (byte) (0x80 | (extra_size >> 8));
- *b++ = (byte) extra_size;
- }
-
- rec_convert_dtuple_to_temp(
- b + extra_size, index, tuple->fields, tuple->n_fields);
- b += size;
-
- if (mrec_size >= avail_size) {
- const os_offset_t byte_offset
- = (os_offset_t) log->tail.blocks
- * srv_sort_buf_size;
- ibool ret;
-
- if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
- goto write_failed;
- }
-
- if (mrec_size == avail_size) {
- ut_ad(b == &log->tail.block[srv_sort_buf_size]);
- } else {
- ut_ad(b == log->tail.buf + mrec_size);
- memcpy(log->tail.block + log->tail.bytes,
- log->tail.buf, avail_size);
- }
- UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
-
- if (row_log_tmpfile(log) < 0) {
- log->error = DB_OUT_OF_MEMORY;
- goto err_exit;
- }
-
- ret = os_file_write_int_fd(
- "(modification log)",
- log->fd,
- log->tail.block, byte_offset, srv_sort_buf_size);
- log->tail.blocks++;
- if (!ret) {
-write_failed:
- /* We set the flag directly instead of invoking
- dict_set_corrupted_index_cache_only(index) here,
- because the index is not "public" yet. */
- index->type |= DICT_CORRUPT;
- }
- UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size);
- memcpy(log->tail.block, log->tail.buf + avail_size,
- mrec_size - avail_size);
- log->tail.bytes = mrec_size - avail_size;
- } else {
- log->tail.bytes += mrec_size;
- ut_ad(b == log->tail.block + log->tail.bytes);
- }
-
- UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
-err_exit:
- mutex_exit(&log->mutex);
-}
-
-/******************************************************//**
-Gets the error status of the online index rebuild log.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_log_table_get_error(
-/*====================*/
- const dict_index_t* index) /*!< in: clustered index of a table
- that is being rebuilt online */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_is_online_ddl(index));
- return(index->online_log->error);
-}
-
-/******************************************************//**
-Starts logging an operation to a table that is being rebuilt.
-@return pointer to log, or NULL if no logging is necessary */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-byte*
-row_log_table_open(
-/*===============*/
- row_log_t* log, /*!< in/out: online rebuild log */
- ulint size, /*!< in: size of log record */
- ulint* avail) /*!< out: available size for log record */
-{
- mutex_enter(&log->mutex);
-
- UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
-
- if (log->error != DB_SUCCESS) {
-err_exit:
- mutex_exit(&log->mutex);
- return(NULL);
- }
-
- if (!row_log_block_allocate(log->tail)) {
- log->error = DB_OUT_OF_MEMORY;
- goto err_exit;
- }
-
- ut_ad(log->tail.bytes < srv_sort_buf_size);
- *avail = srv_sort_buf_size - log->tail.bytes;
-
- if (size > *avail) {
- return(log->tail.buf);
- } else {
- return(log->tail.block + log->tail.bytes);
- }
-}
-
-/******************************************************//**
-Stops logging an operation to a table that is being rebuilt. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_log_table_close_func(
-/*=====================*/
- row_log_t* log, /*!< in/out: online rebuild log */
-#ifdef UNIV_DEBUG
- const byte* b, /*!< in: end of log record */
-#endif /* UNIV_DEBUG */
- ulint size, /*!< in: size of log record */
- ulint avail) /*!< in: available size for log record */
-{
- ut_ad(mutex_own(&log->mutex));
-
- if (size >= avail) {
- const os_offset_t byte_offset
- = (os_offset_t) log->tail.blocks
- * srv_sort_buf_size;
- ibool ret;
-
- if (byte_offset + srv_sort_buf_size >= srv_online_max_size) {
- goto write_failed;
- }
-
- if (size == avail) {
- ut_ad(b == &log->tail.block[srv_sort_buf_size]);
- } else {
- ut_ad(b == log->tail.buf + size);
- memcpy(log->tail.block + log->tail.bytes,
- log->tail.buf, avail);
- }
- UNIV_MEM_ASSERT_RW(log->tail.block, srv_sort_buf_size);
-
- if (row_log_tmpfile(log) < 0) {
- log->error = DB_OUT_OF_MEMORY;
- goto err_exit;
- }
-
- ret = os_file_write_int_fd(
- "(modification log)",
- log->fd,
- log->tail.block, byte_offset, srv_sort_buf_size);
- log->tail.blocks++;
- if (!ret) {
-write_failed:
- log->error = DB_ONLINE_LOG_TOO_BIG;
- }
- UNIV_MEM_INVALID(log->tail.block, srv_sort_buf_size);
- memcpy(log->tail.block, log->tail.buf + avail, size - avail);
- log->tail.bytes = size - avail;
- } else {
- log->tail.bytes += size;
- ut_ad(b == log->tail.block + log->tail.bytes);
- }
-
- log->tail.total += size;
- UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf);
-err_exit:
- mutex_exit(&log->mutex);
-
- os_atomic_increment_ulint(&onlineddl_rowlog_rows, 1);
- /* 10000 means 100.00%, 4525 means 45.25% */
- onlineddl_rowlog_pct_used = (log->tail.total * 10000) / srv_online_max_size;
-}
-
-#ifdef UNIV_DEBUG
-# define row_log_table_close(log, b, size, avail) \
- row_log_table_close_func(log, b, size, avail)
-#else /* UNIV_DEBUG */
-# define row_log_table_close(log, b, size, avail) \
- row_log_table_close_func(log, size, avail)
-#endif /* UNIV_DEBUG */
-
-/******************************************************//**
-Logs a delete operation to a table that is being rebuilt.
-This will be merged in row_log_table_apply_delete(). */
-UNIV_INTERN
-void
-row_log_table_delete(
-/*=================*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
- const byte* sys) /*!< in: DB_TRX_ID,DB_ROLL_PTR that should
- be logged, or NULL to use those in rec */
-{
- ulint old_pk_extra_size;
- ulint old_pk_size;
- ulint ext_size = 0;
- ulint mrec_size;
- ulint avail_size;
- mem_heap_t* heap = NULL;
- const dtuple_t* old_pk;
- row_ext_t* ext;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
- ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
- || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (dict_index_is_corrupted(index)
- || !dict_index_is_online_ddl(index)
- || index->online_log->error != DB_SUCCESS) {
- return;
- }
-
- dict_table_t* new_table = index->online_log->table;
- dict_index_t* new_index = dict_table_get_first_index(new_table);
-
- ut_ad(dict_index_is_clust(new_index));
- ut_ad(!dict_index_is_online_ddl(new_index));
-
- /* Create the tuple PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in new_table. */
- if (index->online_log->same_pk) {
- dtuple_t* tuple;
- ut_ad(new_index->n_uniq == index->n_uniq);
-
- /* The PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR are in the first
- fields of the record. */
- heap = mem_heap_create(
- DATA_TRX_ID_LEN
- + DTUPLE_EST_ALLOC(new_index->n_uniq + 2));
- old_pk = tuple = dtuple_create(heap, new_index->n_uniq + 2);
- dict_index_copy_types(tuple, new_index, tuple->n_fields);
- dtuple_set_n_fields_cmp(tuple, new_index->n_uniq);
-
- for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
- ulint len;
- const void* field = rec_get_nth_field(
- rec, offsets, i, &len);
- dfield_t* dfield = dtuple_get_nth_field(
- tuple, i);
- ut_ad(len != UNIV_SQL_NULL);
- ut_ad(!rec_offs_nth_extern(offsets, i));
- dfield_set_data(dfield, field, len);
- }
-
- if (sys) {
- dfield_set_data(
- dtuple_get_nth_field(tuple,
- new_index->n_uniq),
- sys, DATA_TRX_ID_LEN);
- dfield_set_data(
- dtuple_get_nth_field(tuple,
- new_index->n_uniq + 1),
- sys + DATA_TRX_ID_LEN, DATA_ROLL_PTR_LEN);
- }
- } else {
- /* The PRIMARY KEY has changed. Translate the tuple. */
- old_pk = row_log_table_get_pk(
- rec, index, offsets, NULL, &heap);
-
- if (!old_pk) {
- ut_ad(index->online_log->error != DB_SUCCESS);
- if (heap) {
- goto func_exit;
- }
- return;
- }
- }
-
- ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
- old_pk, old_pk->n_fields - 2)->len);
- ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
- old_pk, old_pk->n_fields - 1)->len);
- old_pk_size = rec_get_converted_size_temp(
- new_index, old_pk->fields, old_pk->n_fields,
- &old_pk_extra_size);
- ut_ad(old_pk_extra_size < 0x100);
-
- mrec_size = 6 + old_pk_size;
-
- /* Log enough prefix of the BLOB unless both the
- old and new table are in COMPACT or REDUNDANT format,
- which store the prefix in the clustered index record. */
- if (rec_offs_any_extern(offsets)
- && (dict_table_get_format(index->table) >= UNIV_FORMAT_B
- || dict_table_get_format(new_table) >= UNIV_FORMAT_B)) {
-
- /* Build a cache of those off-page column prefixes
- that are referenced by secondary indexes. It can be
- that none of the off-page columns are needed. */
- row_build(ROW_COPY_DATA, index, rec,
- offsets, NULL, NULL, NULL, &ext, heap);
- if (ext) {
- /* Log the row_ext_t, ext->ext and ext->buf */
- ext_size = ext->n_ext * ext->max_len
- + sizeof(*ext)
- + ext->n_ext * sizeof(ulint)
- + (ext->n_ext - 1) * sizeof ext->len;
- mrec_size += ext_size;
- }
- }
-
- if (byte* b = row_log_table_open(index->online_log,
- mrec_size, &avail_size)) {
- *b++ = ROW_T_DELETE;
- *b++ = static_cast<byte>(old_pk_extra_size);
-
- /* Log the size of external prefix we saved */
- mach_write_to_4(b, ext_size);
- b += 4;
-
- rec_convert_dtuple_to_temp(
- b + old_pk_extra_size, new_index,
- old_pk->fields, old_pk->n_fields);
-
- b += old_pk_size;
-
- if (ext_size) {
- ulint cur_ext_size = sizeof(*ext)
- + (ext->n_ext - 1) * sizeof ext->len;
-
- memcpy(b, ext, cur_ext_size);
- b += cur_ext_size;
-
- /* Check if we need to col_map to adjust the column
- number. If columns were added/removed/reordered,
- adjust the column number. */
- if (const ulint* col_map =
- index->online_log->col_map) {
- for (ulint i = 0; i < ext->n_ext; i++) {
- const_cast<ulint&>(ext->ext[i]) =
- col_map[ext->ext[i]];
- }
- }
-
- memcpy(b, ext->ext, ext->n_ext * sizeof(*ext->ext));
- b += ext->n_ext * sizeof(*ext->ext);
-
- ext_size -= cur_ext_size
- + ext->n_ext * sizeof(*ext->ext);
- memcpy(b, ext->buf, ext_size);
- b += ext_size;
- }
-
- row_log_table_close(
- index->online_log, b, mrec_size, avail_size);
- }
-
-func_exit:
- mem_heap_free(heap);
-}
-
-/******************************************************//**
-Logs an insert or update to a table that is being rebuilt. */
-static
-void
-row_log_table_low_redundant(
-/*========================*/
- const rec_t* rec, /*!< in: clustered index leaf
- page record in ROW_FORMAT=REDUNDANT,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- bool insert, /*!< in: true if insert,
- false if update */
- const dtuple_t* old_pk, /*!< in: old PRIMARY KEY value
- (if !insert and a PRIMARY KEY
- is being created) */
- const dict_index_t* new_index)
- /*!< in: clustered index of the
- new table, not latched */
-{
- ulint old_pk_size;
- ulint old_pk_extra_size;
- ulint size;
- ulint extra_size;
- ulint mrec_size;
- ulint avail_size;
- mem_heap_t* heap = NULL;
- dtuple_t* tuple;
-
- ut_ad(!page_is_comp(page_align(rec)));
- ut_ad(dict_index_get_n_fields(index) == rec_get_n_fields_old(rec));
- ut_ad(dict_tf_is_valid(index->table->flags));
- ut_ad(!dict_table_is_comp(index->table)); /* redundant row format */
- ut_ad(dict_index_is_clust(new_index));
-
- heap = mem_heap_create(DTUPLE_EST_ALLOC(index->n_fields));
- tuple = dtuple_create(heap, index->n_fields);
- dict_index_copy_types(tuple, index, index->n_fields);
- dtuple_set_n_fields_cmp(tuple, dict_index_get_n_unique(index));
-
- if (rec_get_1byte_offs_flag(rec)) {
- for (ulint i = 0; i < index->n_fields; i++) {
- dfield_t* dfield;
- ulint len;
- const void* field;
-
- dfield = dtuple_get_nth_field(tuple, i);
- field = rec_get_nth_field_old(rec, i, &len);
-
- dfield_set_data(dfield, field, len);
- }
- } else {
- for (ulint i = 0; i < index->n_fields; i++) {
- dfield_t* dfield;
- ulint len;
- const void* field;
-
- dfield = dtuple_get_nth_field(tuple, i);
- field = rec_get_nth_field_old(rec, i, &len);
-
- dfield_set_data(dfield, field, len);
-
- if (rec_2_is_field_extern(rec, i)) {
- dfield_set_ext(dfield);
- }
- }
- }
-
- size = rec_get_converted_size_temp(
- index, tuple->fields, tuple->n_fields, &extra_size);
-
- mrec_size = ROW_LOG_HEADER_SIZE + size + (extra_size >= 0x80);
-
- if (insert || index->online_log->same_pk) {
- ut_ad(!old_pk);
- old_pk_extra_size = old_pk_size = 0;
- } else {
- ut_ad(old_pk);
- ut_ad(old_pk->n_fields == 2 + old_pk->n_fields_cmp);
- ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
- old_pk, old_pk->n_fields - 2)->len);
- ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
- old_pk, old_pk->n_fields - 1)->len);
-
- old_pk_size = rec_get_converted_size_temp(
- new_index, old_pk->fields, old_pk->n_fields,
- &old_pk_extra_size);
- ut_ad(old_pk_extra_size < 0x100);
- mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
- }
-
- if (byte* b = row_log_table_open(index->online_log,
- mrec_size, &avail_size)) {
- *b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE;
-
- if (old_pk_size) {
- *b++ = static_cast<byte>(old_pk_extra_size);
-
- rec_convert_dtuple_to_temp(
- b + old_pk_extra_size, new_index,
- old_pk->fields, old_pk->n_fields);
- b += old_pk_size;
- }
-
- if (extra_size < 0x80) {
- *b++ = static_cast<byte>(extra_size);
- } else {
- ut_ad(extra_size < 0x8000);
- *b++ = static_cast<byte>(0x80 | (extra_size >> 8));
- *b++ = static_cast<byte>(extra_size);
- }
-
- rec_convert_dtuple_to_temp(
- b + extra_size, index, tuple->fields, tuple->n_fields);
- b += size;
-
- row_log_table_close(
- index->online_log, b, mrec_size, avail_size);
- }
-
- mem_heap_free(heap);
-}
-
-/******************************************************//**
-Logs an insert or update to a table that is being rebuilt. */
-static MY_ATTRIBUTE((nonnull(1,2,3)))
-void
-row_log_table_low(
-/*==============*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
- bool insert, /*!< in: true if insert, false if update */
- const dtuple_t* old_pk) /*!< in: old PRIMARY KEY value (if !insert
- and a PRIMARY KEY is being created) */
-{
- ulint omit_size;
- ulint old_pk_size;
- ulint old_pk_extra_size;
- ulint extra_size;
- ulint mrec_size;
- ulint avail_size;
- const dict_index_t* new_index = dict_table_get_first_index(
- index->online_log->table);
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_is_clust(new_index));
- ut_ad(!dict_index_is_online_ddl(new_index));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(rec_offs_n_fields(offsets) == dict_index_get_n_fields(index));
- ut_ad(rec_offs_size(offsets) <= sizeof index->online_log->tail.buf);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
- || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX);
- ut_ad(page_is_leaf(page_align(rec)));
- ut_ad(!page_is_comp(page_align(rec)) == !rec_offs_comp(offsets));
-
- if (dict_index_is_corrupted(index)
- || !dict_index_is_online_ddl(index)
- || index->online_log->error != DB_SUCCESS) {
- return;
- }
-
- if (!rec_offs_comp(offsets)) {
- row_log_table_low_redundant(
- rec, index, insert, old_pk, new_index);
- return;
- }
-
- ut_ad(page_is_comp(page_align(rec)));
- ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY);
-
- omit_size = REC_N_NEW_EXTRA_BYTES;
-
- extra_size = rec_offs_extra_size(offsets) - omit_size;
-
- mrec_size = ROW_LOG_HEADER_SIZE
- + (extra_size >= 0x80) + rec_offs_size(offsets) - omit_size;
-
- if (insert || index->online_log->same_pk) {
- ut_ad(!old_pk);
- old_pk_extra_size = old_pk_size = 0;
- } else {
- ut_ad(old_pk);
- ut_ad(old_pk->n_fields == 2 + old_pk->n_fields_cmp);
- ut_ad(DATA_TRX_ID_LEN == dtuple_get_nth_field(
- old_pk, old_pk->n_fields - 2)->len);
- ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
- old_pk, old_pk->n_fields - 1)->len);
-
- old_pk_size = rec_get_converted_size_temp(
- new_index, old_pk->fields, old_pk->n_fields,
- &old_pk_extra_size);
- ut_ad(old_pk_extra_size < 0x100);
- mrec_size += 1/*old_pk_extra_size*/ + old_pk_size;
- }
-
- if (byte* b = row_log_table_open(index->online_log,
- mrec_size, &avail_size)) {
- *b++ = insert ? ROW_T_INSERT : ROW_T_UPDATE;
-
- if (old_pk_size) {
- *b++ = static_cast<byte>(old_pk_extra_size);
-
- rec_convert_dtuple_to_temp(
- b + old_pk_extra_size, new_index,
- old_pk->fields, old_pk->n_fields);
- b += old_pk_size;
- }
-
- if (extra_size < 0x80) {
- *b++ = static_cast<byte>(extra_size);
- } else {
- ut_ad(extra_size < 0x8000);
- *b++ = static_cast<byte>(0x80 | (extra_size >> 8));
- *b++ = static_cast<byte>(extra_size);
- }
-
- memcpy(b, rec - rec_offs_extra_size(offsets), extra_size);
- b += extra_size;
- memcpy(b, rec, rec_offs_data_size(offsets));
- b += rec_offs_data_size(offsets);
-
- row_log_table_close(
- index->online_log, b, mrec_size, avail_size);
- }
-}
-
-/******************************************************//**
-Logs an update to a table that is being rebuilt.
-This will be merged in row_log_table_apply_update(). */
-UNIV_INTERN
-void
-row_log_table_update(
-/*=================*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
- const dtuple_t* old_pk) /*!< in: row_log_table_get_pk()
- before the update */
-{
- row_log_table_low(rec, index, offsets, false, old_pk);
-}
-
-/** Gets the old table column of a PRIMARY KEY column.
-@param table old table (before ALTER TABLE)
-@param col_map mapping of old column numbers to new ones
-@param col_no column position in the new table
-@return old table column, or NULL if this is an added column */
-static
-const dict_col_t*
-row_log_table_get_pk_old_col(
-/*=========================*/
- const dict_table_t* table,
- const ulint* col_map,
- ulint col_no)
-{
- for (ulint i = 0; i < table->n_cols; i++) {
- if (col_no == col_map[i]) {
- return(dict_table_get_nth_col(table, i));
- }
- }
-
- return(NULL);
-}
-
-/** Maps an old table column of a PRIMARY KEY column.
-@param col old table column (before ALTER TABLE)
-@param ifield clustered index field in the new table (after ALTER TABLE)
-@param dfield clustered index tuple field in the new table
-@param heap memory heap for allocating dfield contents
-@param rec clustered index leaf page record in the old table
-@param offsets rec_get_offsets(rec)
-@param i rec field corresponding to col
-@param zip_size compressed page size of the old table, or 0 for uncompressed
-@param max_len maximum length of dfield
-@retval DB_INVALID_NULL if a NULL value is encountered
-@retval DB_TOO_BIG_INDEX_COL if the maximum prefix length is exceeded */
-static
-dberr_t
-row_log_table_get_pk_col(
-/*=====================*/
- const dict_col_t* col,
- const dict_field_t* ifield,
- dfield_t* dfield,
- mem_heap_t* heap,
- const rec_t* rec,
- const ulint* offsets,
- ulint i,
- ulint zip_size,
- ulint max_len)
-{
- const byte* field;
- ulint len;
-
- ut_ad(ut_is_2pow(zip_size));
-
- field = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len == UNIV_SQL_NULL) {
- return(DB_INVALID_NULL);
- }
-
- if (rec_offs_nth_extern(offsets, i)) {
- ulint field_len = ifield->prefix_len;
- byte* blob_field;
-
- if (!field_len) {
- field_len = ifield->fixed_len;
- if (!field_len) {
- field_len = max_len + 1;
- }
- }
-
- blob_field = static_cast<byte*>(
- mem_heap_alloc(heap, field_len));
-
- len = btr_copy_externally_stored_field_prefix(
- blob_field, field_len, zip_size, field, len, NULL);
- if (len >= max_len + 1) {
- return(DB_TOO_BIG_INDEX_COL);
- }
-
- dfield_set_data(dfield, blob_field, len);
- } else {
- dfield_set_data(dfield, mem_heap_dup(heap, field, len), len);
- }
-
- return(DB_SUCCESS);
-}
-
-/******************************************************//**
-Constructs the old PRIMARY KEY and DB_TRX_ID,DB_ROLL_PTR
-of a table that is being rebuilt.
-@return tuple of PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR in the rebuilt table,
-or NULL if the PRIMARY KEY definition does not change */
-UNIV_INTERN
-const dtuple_t*
-row_log_table_get_pk(
-/*=================*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index) */
- byte* sys, /*!< out: DB_TRX_ID,DB_ROLL_PTR for
- row_log_table_delete(), or NULL */
- mem_heap_t** heap) /*!< in/out: memory heap where allocated */
-{
- dtuple_t* tuple = NULL;
- row_log_t* log = index->online_log;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_is_online_ddl(index));
- ut_ad(!offsets || rec_offs_validate(rec, index, offsets));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
- || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(log);
- ut_ad(log->table);
-
- if (log->same_pk) {
- /* The PRIMARY KEY columns are unchanged. */
- if (sys) {
- /* Store the DB_TRX_ID,DB_ROLL_PTR. */
- ulint trx_id_offs = index->trx_id_offset;
-
- if (!trx_id_offs) {
- ulint pos = dict_index_get_sys_col_pos(
- index, DATA_TRX_ID);
- ulint len;
- ut_ad(pos > 0);
-
- if (!offsets) {
- offsets = rec_get_offsets(
- rec, index, NULL, pos + 1,
- heap);
- }
-
- trx_id_offs = rec_get_nth_field_offs(
- offsets, pos, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- }
-
- memcpy(sys, rec + trx_id_offs,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- }
-
- return(NULL);
- }
-
- mutex_enter(&log->mutex);
-
- /* log->error is protected by log->mutex. */
- if (log->error == DB_SUCCESS) {
- dict_table_t* new_table = log->table;
- dict_index_t* new_index
- = dict_table_get_first_index(new_table);
- const ulint new_n_uniq
- = dict_index_get_n_unique(new_index);
-
- if (!*heap) {
- ulint size = 0;
-
- if (!offsets) {
- size += (1 + REC_OFFS_HEADER_SIZE
- + index->n_fields)
- * sizeof *offsets;
- }
-
- for (ulint i = 0; i < new_n_uniq; i++) {
- size += dict_col_get_min_size(
- dict_index_get_nth_col(new_index, i));
- }
-
- *heap = mem_heap_create(
- DTUPLE_EST_ALLOC(new_n_uniq + 2) + size);
- }
-
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, NULL,
- ULINT_UNDEFINED, heap);
- }
-
- tuple = dtuple_create(*heap, new_n_uniq + 2);
- dict_index_copy_types(tuple, new_index, tuple->n_fields);
- dtuple_set_n_fields_cmp(tuple, new_n_uniq);
-
- const ulint max_len = DICT_MAX_FIELD_LEN_BY_FORMAT(new_table);
- const ulint zip_size = dict_table_zip_size(index->table);
-
- for (ulint new_i = 0; new_i < new_n_uniq; new_i++) {
- dict_field_t* ifield;
- dfield_t* dfield;
- ulint prtype;
- ulint mbminmaxlen;
-
- ifield = dict_index_get_nth_field(new_index, new_i);
- dfield = dtuple_get_nth_field(tuple, new_i);
-
- const ulint col_no
- = dict_field_get_col(ifield)->ind;
-
- if (const dict_col_t* col
- = row_log_table_get_pk_old_col(
- index->table, log->col_map, col_no)) {
- ulint i = dict_col_get_clust_pos(col, index);
-
- if (i == ULINT_UNDEFINED) {
- ut_ad(0);
- log->error = DB_CORRUPTION;
- goto err_exit;
- }
-
- log->error = row_log_table_get_pk_col(
- col, ifield, dfield, *heap,
- rec, offsets, i, zip_size, max_len);
-
- if (log->error != DB_SUCCESS) {
-err_exit:
- tuple = NULL;
- goto func_exit;
- }
-
- mbminmaxlen = col->mbminmaxlen;
- prtype = col->prtype;
- } else {
- /* No matching column was found in the old
- table, so this must be an added column.
- Copy the default value. */
- ut_ad(log->add_cols);
-
- dfield_copy(dfield, dtuple_get_nth_field(
- log->add_cols, col_no));
- mbminmaxlen = dfield->type.mbminmaxlen;
- prtype = dfield->type.prtype;
- }
-
- ut_ad(!dfield_is_ext(dfield));
- ut_ad(!dfield_is_null(dfield));
-
- if (ifield->prefix_len) {
- ulint len = dtype_get_at_most_n_mbchars(
- prtype, mbminmaxlen,
- ifield->prefix_len,
- dfield_get_len(dfield),
- static_cast<const char*>(
- dfield_get_data(dfield)));
-
- ut_ad(len <= dfield_get_len(dfield));
- dfield_set_len(dfield, len);
- }
- }
-
- const byte* trx_roll = rec
- + row_get_trx_id_offset(index, offsets);
-
- /* Copy the fields, because the fields will be updated
- or the record may be moved somewhere else in the B-tree
- as part of the upcoming operation. */
- if (sys) {
- memcpy(sys, trx_roll,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
- trx_roll = sys;
- } else {
- trx_roll = static_cast<const byte*>(
- mem_heap_dup(
- *heap, trx_roll,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN));
- }
-
- dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq),
- trx_roll, DATA_TRX_ID_LEN);
- dfield_set_data(dtuple_get_nth_field(tuple, new_n_uniq + 1),
- trx_roll + DATA_TRX_ID_LEN, DATA_ROLL_PTR_LEN);
- }
-
-func_exit:
- mutex_exit(&log->mutex);
- return(tuple);
-}
-
-/******************************************************//**
-Logs an insert to a table that is being rebuilt.
-This will be merged in row_log_table_apply_insert(). */
-UNIV_INTERN
-void
-row_log_table_insert(
-/*=================*/
- const rec_t* rec, /*!< in: clustered index leaf page record,
- page X-latched */
- dict_index_t* index, /*!< in/out: clustered index, S-latched
- or X-latched */
- const ulint* offsets)/*!< in: rec_get_offsets(rec,index) */
-{
- row_log_table_low(rec, index, offsets, true, NULL);
-}
-
-/******************************************************//**
-Notes that a BLOB is being freed during online ALTER TABLE. */
-UNIV_INTERN
-void
-row_log_table_blob_free(
-/*====================*/
- dict_index_t* index, /*!< in/out: clustered index, X-latched */
- ulint page_no)/*!< in: starting page number of the BLOB */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_is_online_ddl(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(page_no != FIL_NULL);
-
- if (index->online_log->error != DB_SUCCESS) {
- return;
- }
-
- page_no_map* blobs = index->online_log->blobs;
-
- if (!blobs) {
- index->online_log->blobs = blobs = new page_no_map();
- }
-
-#ifdef UNIV_DEBUG
- const ulonglong log_pos = index->online_log->tail.total;
-#else
-# define log_pos /* empty */
-#endif /* UNIV_DEBUG */
-
- const page_no_map::value_type v(page_no,
- row_log_table_blob_t(log_pos));
-
- std::pair<page_no_map::iterator,bool> p = blobs->insert(v);
-
- if (!p.second) {
- /* Update the existing mapping. */
- ut_ad(p.first->first == page_no);
- p.first->second.blob_free(log_pos);
- }
-#undef log_pos
-}
-
-/******************************************************//**
-Notes that a BLOB is being allocated during online ALTER TABLE. */
-UNIV_INTERN
-void
-row_log_table_blob_alloc(
-/*=====================*/
- dict_index_t* index, /*!< in/out: clustered index, X-latched */
- ulint page_no)/*!< in: starting page number of the BLOB */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_is_online_ddl(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(page_no != FIL_NULL);
-
- if (index->online_log->error != DB_SUCCESS) {
- return;
- }
-
- /* Only track allocations if the same page has been freed
- earlier. Double allocation without a free is not allowed. */
- if (page_no_map* blobs = index->online_log->blobs) {
- page_no_map::iterator p = blobs->find(page_no);
-
- if (p != blobs->end()) {
- ut_ad(p->first == page_no);
- p->second.blob_alloc(index->online_log->tail.total);
- }
- }
-}
-
-/******************************************************//**
-Converts a log record to a table row.
-@return converted row, or NULL if the conversion fails */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const dtuple_t*
-row_log_table_apply_convert_mrec(
-/*=============================*/
- const mrec_t* mrec, /*!< in: merge record */
- dict_index_t* index, /*!< in: index of mrec */
- const ulint* offsets, /*!< in: offsets of mrec */
- const row_log_t* log, /*!< in: rebuild context */
- mem_heap_t* heap, /*!< in/out: memory heap */
- trx_id_t trx_id, /*!< in: DB_TRX_ID of mrec */
- dberr_t* error) /*!< out: DB_SUCCESS or
- DB_MISSING_HISTORY or
- reason of failure */
-{
- dtuple_t* row;
-
- *error = DB_SUCCESS;
-
- /* This is based on row_build(). */
- if (log->add_cols) {
- row = dtuple_copy(log->add_cols, heap);
- /* dict_table_copy_types() would set the fields to NULL */
- for (ulint i = 0; i < dict_table_get_n_cols(log->table); i++) {
- dict_col_copy_type(
- dict_table_get_nth_col(log->table, i),
- dfield_get_type(dtuple_get_nth_field(row, i)));
- }
- } else {
- row = dtuple_create(heap, dict_table_get_n_cols(log->table));
- dict_table_copy_types(row, log->table);
- }
-
- for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) {
- const dict_field_t* ind_field
- = dict_index_get_nth_field(index, i);
-
- if (ind_field->prefix_len) {
- /* Column prefixes can only occur in key
- fields, which cannot be stored externally. For
- a column prefix, there should also be the full
- field in the clustered index tuple. The row
- tuple comprises full fields, not prefixes. */
- ut_ad(!rec_offs_nth_extern(offsets, i));
- continue;
- }
-
- const dict_col_t* col
- = dict_field_get_col(ind_field);
- ulint col_no
- = log->col_map[dict_col_get_no(col)];
-
- if (col_no == ULINT_UNDEFINED) {
- /* dropped column */
- continue;
- }
-
- dfield_t* dfield
- = dtuple_get_nth_field(row, col_no);
- ulint len;
- const byte* data;
-
- if (rec_offs_nth_extern(offsets, i)) {
- ut_ad(rec_offs_any_extern(offsets));
- rw_lock_x_lock(dict_index_get_lock(index));
-
- if (const page_no_map* blobs = log->blobs) {
- data = rec_get_nth_field(
- mrec, offsets, i, &len);
- ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- ulint page_no = mach_read_from_4(
- data + len - (BTR_EXTERN_FIELD_REF_SIZE
- - BTR_EXTERN_PAGE_NO));
- page_no_map::const_iterator p = blobs->find(
- page_no);
- if (p != blobs->end()
- && p->second.is_freed(log->head.total)) {
- /* This BLOB has been freed.
- We must not access the row. */
- *error = DB_MISSING_HISTORY;
- dfield_set_data(dfield, data, len);
- dfield_set_ext(dfield);
- goto blob_done;
- }
- }
-
- data = btr_rec_copy_externally_stored_field(
- mrec, offsets,
- dict_table_zip_size(index->table),
- i, &len, heap, NULL);
- ut_a(data);
- dfield_set_data(dfield, data, len);
-blob_done:
- rw_lock_x_unlock(dict_index_get_lock(index));
- } else {
- data = rec_get_nth_field(mrec, offsets, i, &len);
- dfield_set_data(dfield, data, len);
- }
-
- if (len != UNIV_SQL_NULL && col->mtype == DATA_MYSQL
- && col->len != len && !dict_table_is_comp(log->table)) {
-
- ut_ad(col->len >= len);
- if (dict_table_is_comp(index->table)) {
- byte* buf = (byte*) mem_heap_alloc(heap,
- col->len);
- memcpy(buf, dfield->data, len);
- memset(buf + len, 0x20, col->len - len);
-
- dfield_set_data(dfield, buf, col->len);
- } else {
- /* field length mismatch should not happen
- when rebuilding the redundant row format
- table. */
- ut_ad(0);
- *error = DB_CORRUPTION;
- return(NULL);
- }
- }
-
- /* See if any columns were changed to NULL or NOT NULL. */
- const dict_col_t* new_col
- = dict_table_get_nth_col(log->table, col_no);
- ut_ad(new_col->mtype == col->mtype);
-
- /* Assert that prtype matches except for nullability. */
- ut_ad(!((new_col->prtype ^ col->prtype) & ~DATA_NOT_NULL));
- ut_ad(!((new_col->prtype ^ dfield_get_type(dfield)->prtype)
- & ~DATA_NOT_NULL));
-
- if (new_col->prtype == col->prtype) {
- continue;
- }
-
- if ((new_col->prtype & DATA_NOT_NULL)
- && dfield_is_null(dfield)) {
- /* We got a NULL value for a NOT NULL column. */
- *error = DB_INVALID_NULL;
- return(NULL);
- }
-
- /* Adjust the DATA_NOT_NULL flag in the parsed row. */
- dfield_get_type(dfield)->prtype = new_col->prtype;
-
- ut_ad(dict_col_type_assert_equal(new_col,
- dfield_get_type(dfield)));
- }
-
- return(row);
-}
-
-/******************************************************//**
-Replays an insert operation on a table that was rebuilt.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_log_table_apply_insert_low(
-/*===========================*/
- que_thr_t* thr, /*!< in: query graph */
- const dtuple_t* row, /*!< in: table row
- in the old table definition */
- trx_id_t trx_id, /*!< in: trx_id of the row */
- mem_heap_t* offsets_heap, /*!< in/out: memory heap
- that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- row_merge_dup_t* dup) /*!< in/out: for reporting
- duplicate key errors */
-{
- dberr_t error;
- dtuple_t* entry;
- const row_log_t*log = dup->index->online_log;
- dict_index_t* index = dict_table_get_first_index(log->table);
- ulint n_index = 0;
-
- ut_ad(dtuple_validate(row));
- ut_ad(trx_id);
-
-#ifdef ROW_LOG_APPLY_PRINT
- if (row_log_apply_print) {
- fprintf(stderr, "table apply insert "
- IB_ID_FMT " " IB_ID_FMT "\n",
- index->table->id, index->id);
- dtuple_print(stderr, row);
- }
-#endif /* ROW_LOG_APPLY_PRINT */
-
- static const ulint flags
- = (BTR_CREATE_FLAG
- | BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG
- | BTR_KEEP_SYS_FLAG);
-
- entry = row_build_index_entry(row, NULL, index, heap);
-
- error = row_ins_clust_index_entry_low(
- flags, BTR_MODIFY_TREE, index, index->n_uniq, entry, 0, thr);
-
- switch (error) {
- case DB_SUCCESS:
- break;
- case DB_SUCCESS_LOCKED_REC:
- /* The row had already been copied to the table. */
- return(DB_SUCCESS);
- default:
- return(error);
- }
-
- do {
- n_index++;
-
- if (!(index = dict_table_get_next_index(index))) {
- break;
- }
-
- if (index->type & DICT_FTS) {
- continue;
- }
-
- entry = row_build_index_entry(row, NULL, index, heap);
- error = row_ins_sec_index_entry_low(
- flags, BTR_MODIFY_TREE,
- index, offsets_heap, heap, entry, trx_id, thr);
-
- /* Report correct index name for duplicate key error. */
- if (error == DB_DUPLICATE_KEY) {
- thr_get_trx(thr)->error_key_num = n_index;
- }
-
- } while (error == DB_SUCCESS);
-
- return(error);
-}
-
-/******************************************************//**
-Replays an insert operation on a table that was rebuilt.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_log_table_apply_insert(
-/*=======================*/
- que_thr_t* thr, /*!< in: query graph */
- const mrec_t* mrec, /*!< in: record to insert */
- const ulint* offsets, /*!< in: offsets of mrec */
- mem_heap_t* offsets_heap, /*!< in/out: memory heap
- that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- row_merge_dup_t* dup, /*!< in/out: for reporting
- duplicate key errors */
- trx_id_t trx_id) /*!< in: DB_TRX_ID of mrec */
-{
- const row_log_t*log = dup->index->online_log;
- dberr_t error;
- const dtuple_t* row = row_log_table_apply_convert_mrec(
- mrec, dup->index, offsets, log, heap, trx_id, &error);
-
- switch (error) {
- case DB_MISSING_HISTORY:
- ut_ad(log->blobs);
- /* Because some BLOBs are missing, we know that the
- transaction was rolled back later (a rollback of
- an insert can free BLOBs).
- We can simply skip the insert: the subsequent
- ROW_T_DELETE will be ignored, or a ROW_T_UPDATE will
- be interpreted as ROW_T_INSERT. */
- return(DB_SUCCESS);
- case DB_SUCCESS:
- ut_ad(row != NULL);
- break;
- default:
- ut_ad(0);
- case DB_INVALID_NULL:
- ut_ad(row == NULL);
- return(error);
- }
-
- error = row_log_table_apply_insert_low(
- thr, row, trx_id, offsets_heap, heap, dup);
- if (error != DB_SUCCESS) {
- /* Report the erroneous row using the new
- version of the table. */
- innobase_row_to_mysql(dup->table, log->table, row);
- }
- return(error);
-}
-
-/******************************************************//**
-Deletes a record from a table that is being rebuilt.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull(1, 2, 4, 5), warn_unused_result))
-dberr_t
-row_log_table_apply_delete_low(
-/*===========================*/
- btr_pcur_t* pcur, /*!< in/out: B-tree cursor,
- will be trashed */
- const ulint* offsets, /*!< in: offsets on pcur */
- const row_ext_t* save_ext, /*!< in: saved external field
- info, or NULL */
- mem_heap_t* heap, /*!< in/out: memory heap */
- mtr_t* mtr) /*!< in/out: mini-transaction,
- will be committed */
-{
- dberr_t error;
- row_ext_t* ext;
- dtuple_t* row;
- dict_index_t* index = btr_pcur_get_btr_cur(pcur)->index;
-
- ut_ad(dict_index_is_clust(index));
-
-#ifdef ROW_LOG_APPLY_PRINT
- if (row_log_apply_print) {
- fprintf(stderr, "table apply delete "
- IB_ID_FMT " " IB_ID_FMT "\n",
- index->table->id, index->id);
- rec_print_new(stderr, btr_pcur_get_rec(pcur), offsets);
- }
-#endif /* ROW_LOG_APPLY_PRINT */
- if (dict_table_get_next_index(index)) {
- /* Build a row template for purging secondary index entries. */
- row = row_build(
- ROW_COPY_DATA, index, btr_pcur_get_rec(pcur),
- offsets, NULL, NULL, NULL,
- save_ext ? NULL : &ext, heap);
- if (!save_ext) {
- save_ext = ext;
- }
- } else {
- row = NULL;
- }
-
- btr_cur_pessimistic_delete(&error, FALSE, btr_pcur_get_btr_cur(pcur),
- BTR_CREATE_FLAG, RB_NONE, mtr);
- mtr_commit(mtr);
-
- if (error != DB_SUCCESS) {
- return(error);
- }
-
- while ((index = dict_table_get_next_index(index)) != NULL) {
- if (index->type & DICT_FTS) {
- continue;
- }
-
- const dtuple_t* entry = row_build_index_entry(
- row, save_ext, index, heap);
- mtr_start(mtr);
- btr_pcur_open(index, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, pcur, mtr);
-#ifdef UNIV_DEBUG
- switch (btr_pcur_get_btr_cur(pcur)->flag) {
- case BTR_CUR_DELETE_REF:
- case BTR_CUR_DEL_MARK_IBUF:
- case BTR_CUR_DELETE_IBUF:
- case BTR_CUR_INSERT_TO_IBUF:
- /* We did not request buffering. */
- break;
- case BTR_CUR_HASH:
- case BTR_CUR_HASH_FAIL:
- case BTR_CUR_BINARY:
- goto flag_ok;
- }
- ut_ad(0);
-flag_ok:
-#endif /* UNIV_DEBUG */
-
- if (page_rec_is_infimum(btr_pcur_get_rec(pcur))
- || btr_pcur_get_low_match(pcur) < index->n_uniq) {
- /* All secondary index entries should be
- found, because new_table is being modified by
- this thread only, and all indexes should be
- updated in sync. */
- mtr_commit(mtr);
- return(DB_INDEX_CORRUPT);
- }
-
- btr_cur_pessimistic_delete(&error, FALSE,
- btr_pcur_get_btr_cur(pcur),
- BTR_CREATE_FLAG, RB_NONE, mtr);
- mtr_commit(mtr);
- }
-
- return(error);
-}
-
-/******************************************************//**
-Replays a delete operation on a table that was rebuilt.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull(1, 3, 4, 5, 6, 7), warn_unused_result))
-dberr_t
-row_log_table_apply_delete(
-/*=======================*/
- que_thr_t* thr, /*!< in: query graph */
- ulint trx_id_col, /*!< in: position of
- DB_TRX_ID in the new
- clustered index */
- const mrec_t* mrec, /*!< in: merge record */
- const ulint* moffsets, /*!< in: offsets of mrec */
- mem_heap_t* offsets_heap, /*!< in/out: memory heap
- that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- const row_log_t* log, /*!< in: online log */
- const row_ext_t* save_ext) /*!< in: saved external field
- info, or NULL */
-{
- dict_table_t* new_table = log->table;
- dict_index_t* index = dict_table_get_first_index(new_table);
- dtuple_t* old_pk;
- mtr_t mtr;
- btr_pcur_t pcur;
- ulint* offsets;
-
- ut_ad(rec_offs_n_fields(moffsets)
- == dict_index_get_n_unique(index) + 2);
- ut_ad(!rec_offs_any_extern(moffsets));
-
- /* Convert the row to a search tuple. */
- old_pk = dtuple_create(heap, index->n_uniq);
- dict_index_copy_types(old_pk, index, index->n_uniq);
-
- for (ulint i = 0; i < index->n_uniq; i++) {
- ulint len;
- const void* field;
- field = rec_get_nth_field(mrec, moffsets, i, &len);
- ut_ad(len != UNIV_SQL_NULL);
- dfield_set_data(dtuple_get_nth_field(old_pk, i),
- field, len);
- }
-
- mtr_start(&mtr);
- btr_pcur_open(index, old_pk, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &pcur, &mtr);
-#ifdef UNIV_DEBUG
- switch (btr_pcur_get_btr_cur(&pcur)->flag) {
- case BTR_CUR_DELETE_REF:
- case BTR_CUR_DEL_MARK_IBUF:
- case BTR_CUR_DELETE_IBUF:
- case BTR_CUR_INSERT_TO_IBUF:
- /* We did not request buffering. */
- break;
- case BTR_CUR_HASH:
- case BTR_CUR_HASH_FAIL:
- case BTR_CUR_BINARY:
- goto flag_ok;
- }
- ut_ad(0);
-flag_ok:
-#endif /* UNIV_DEBUG */
-
- if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
- || btr_pcur_get_low_match(&pcur) < index->n_uniq) {
-all_done:
- mtr_commit(&mtr);
- /* The record was not found. All done. */
- /* This should only happen when an earlier
- ROW_T_INSERT was skipped or
- ROW_T_UPDATE was interpreted as ROW_T_DELETE
- due to BLOBs having been freed by rollback. */
- return(DB_SUCCESS);
- }
-
- offsets = rec_get_offsets(btr_pcur_get_rec(&pcur), index, NULL,
- ULINT_UNDEFINED, &offsets_heap);
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(btr_pcur_get_rec(&pcur), offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- /* Only remove the record if DB_TRX_ID,DB_ROLL_PTR match. */
-
- {
- ulint len;
- const byte* mrec_trx_id
- = rec_get_nth_field(mrec, moffsets, trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- const byte* rec_trx_id
- = rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets,
- trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
-
- ut_ad(rec_get_nth_field(mrec, moffsets, trx_id_col + 1, &len)
- == mrec_trx_id + DATA_TRX_ID_LEN);
- ut_ad(len == DATA_ROLL_PTR_LEN);
- ut_ad(rec_get_nth_field(btr_pcur_get_rec(&pcur), offsets,
- trx_id_col + 1, &len)
- == rec_trx_id + DATA_TRX_ID_LEN);
- ut_ad(len == DATA_ROLL_PTR_LEN);
-
- if (memcmp(mrec_trx_id, rec_trx_id,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) {
- /* The ROW_T_DELETE was logged for a different
- PRIMARY KEY,DB_TRX_ID,DB_ROLL_PTR.
- This is possible if a ROW_T_INSERT was skipped
- or a ROW_T_UPDATE was interpreted as ROW_T_DELETE
- because some BLOBs were missing due to
- (1) rolling back the initial insert, or
- (2) purging the BLOB for a later ROW_T_DELETE
- (3) purging 'old values' for a later ROW_T_UPDATE
- or ROW_T_DELETE. */
- ut_ad(!log->same_pk);
- goto all_done;
- }
- }
-
- return(row_log_table_apply_delete_low(&pcur, offsets, save_ext,
- heap, &mtr));
-}
-
-/******************************************************//**
-Replays an update operation on a table that was rebuilt.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_log_table_apply_update(
-/*=======================*/
- que_thr_t* thr, /*!< in: query graph */
- ulint new_trx_id_col, /*!< in: position of
- DB_TRX_ID in the new
- clustered index */
- const mrec_t* mrec, /*!< in: new value */
- const ulint* offsets, /*!< in: offsets of mrec */
- mem_heap_t* offsets_heap, /*!< in/out: memory heap
- that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- row_merge_dup_t* dup, /*!< in/out: for reporting
- duplicate key errors */
- trx_id_t trx_id, /*!< in: DB_TRX_ID of mrec */
- const dtuple_t* old_pk) /*!< in: PRIMARY KEY and
- DB_TRX_ID,DB_ROLL_PTR
- of the old value,
- or PRIMARY KEY if same_pk */
-{
- const row_log_t*log = dup->index->online_log;
- const dtuple_t* row;
- dict_index_t* index = dict_table_get_first_index(log->table);
- mtr_t mtr;
- btr_pcur_t pcur;
- dberr_t error;
- ulint n_index = 0;
-
- ut_ad(dtuple_get_n_fields_cmp(old_pk)
- == dict_index_get_n_unique(index));
- ut_ad(dtuple_get_n_fields(old_pk)
- == dict_index_get_n_unique(index)
- + (log->same_pk ? 0 : 2));
-
- row = row_log_table_apply_convert_mrec(
- mrec, dup->index, offsets, log, heap, trx_id, &error);
-
- switch (error) {
- case DB_MISSING_HISTORY:
- /* The record contained BLOBs that are now missing. */
- ut_ad(log->blobs);
- /* Whether or not we are updating the PRIMARY KEY, we
- know that there should be a subsequent
- ROW_T_DELETE for rolling back a preceding ROW_T_INSERT,
- overriding this ROW_T_UPDATE record. (*1)
-
- This allows us to interpret this ROW_T_UPDATE
- as ROW_T_DELETE.
-
- When applying the subsequent ROW_T_DELETE, no matching
- record will be found. */
- /* fall through */
- case DB_SUCCESS:
- ut_ad(row != NULL);
- break;
- default:
- ut_ad(0);
- case DB_INVALID_NULL:
- ut_ad(row == NULL);
- return(error);
- }
-
- mtr_start(&mtr);
- btr_pcur_open(index, old_pk, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &pcur, &mtr);
-#ifdef UNIV_DEBUG
- switch (btr_pcur_get_btr_cur(&pcur)->flag) {
- case BTR_CUR_DELETE_REF:
- case BTR_CUR_DEL_MARK_IBUF:
- case BTR_CUR_DELETE_IBUF:
- case BTR_CUR_INSERT_TO_IBUF:
- ut_ad(0);/* We did not request buffering. */
- case BTR_CUR_HASH:
- case BTR_CUR_HASH_FAIL:
- case BTR_CUR_BINARY:
- break;
- }
-#endif /* UNIV_DEBUG */
-
- if (page_rec_is_infimum(btr_pcur_get_rec(&pcur))
- || btr_pcur_get_low_match(&pcur) < index->n_uniq) {
- /* The record was not found. This should only happen
- when an earlier ROW_T_INSERT or ROW_T_UPDATE was
- diverted because BLOBs were freed when the insert was
- later rolled back. */
-
- ut_ad(log->blobs);
-
- if (error == DB_SUCCESS) {
- /* An earlier ROW_T_INSERT could have been
- skipped because of a missing BLOB, like this:
-
- BEGIN;
- INSERT INTO t SET blob_col='blob value';
- UPDATE t SET blob_col='';
- ROLLBACK;
-
- This would generate the following records:
- ROW_T_INSERT (referring to 'blob value')
- ROW_T_UPDATE
- ROW_T_UPDATE (referring to 'blob value')
- ROW_T_DELETE
- [ROLLBACK removes the 'blob value']
-
- The ROW_T_INSERT would have been skipped
- because of a missing BLOB. Now we are
- executing the first ROW_T_UPDATE.
- The second ROW_T_UPDATE (for the ROLLBACK)
- would be interpreted as ROW_T_DELETE, because
- the BLOB would be missing.
-
- We could probably assume that the transaction
- has been rolled back and simply skip the
- 'insert' part of this ROW_T_UPDATE record.
- However, there might be some complex scenario
- that could interfere with such a shortcut.
- So, we will insert the row (and risk
- introducing a bogus duplicate key error
- for the ALTER TABLE), and a subsequent
- ROW_T_UPDATE or ROW_T_DELETE will delete it. */
- mtr_commit(&mtr);
- error = row_log_table_apply_insert_low(
- thr, row, trx_id, offsets_heap, heap, dup);
- } else {
- /* Some BLOBs are missing, so we are interpreting
- this ROW_T_UPDATE as ROW_T_DELETE (see *1).
- Because the record was not found, we do nothing. */
- ut_ad(error == DB_MISSING_HISTORY);
- error = DB_SUCCESS;
-func_exit:
- mtr_commit(&mtr);
- }
-func_exit_committed:
- ut_ad(mtr.state == MTR_COMMITTED);
-
- if (error != DB_SUCCESS) {
- /* Report the erroneous row using the new
- version of the table. */
- innobase_row_to_mysql(dup->table, log->table, row);
- }
-
- return(error);
- }
-
- /* Prepare to update (or delete) the record. */
- ulint* cur_offsets = rec_get_offsets(
- btr_pcur_get_rec(&pcur),
- index, NULL, ULINT_UNDEFINED, &offsets_heap);
-
- if (!log->same_pk) {
- /* Only update the record if DB_TRX_ID,DB_ROLL_PTR match what
- was buffered. */
- ulint len;
- const void* rec_trx_id
- = rec_get_nth_field(btr_pcur_get_rec(&pcur),
- cur_offsets, index->n_uniq, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- ut_ad(dtuple_get_nth_field(old_pk, index->n_uniq)->len
- == DATA_TRX_ID_LEN);
- ut_ad(dtuple_get_nth_field(old_pk, index->n_uniq + 1)->len
- == DATA_ROLL_PTR_LEN);
- ut_ad(DATA_TRX_ID_LEN + static_cast<const char*>(
- dtuple_get_nth_field(old_pk,
- index->n_uniq)->data)
- == dtuple_get_nth_field(old_pk,
- index->n_uniq + 1)->data);
- if (memcmp(rec_trx_id,
- dtuple_get_nth_field(old_pk, index->n_uniq)->data,
- DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)) {
- /* The ROW_T_UPDATE was logged for a different
- DB_TRX_ID,DB_ROLL_PTR. This is possible if an
- earlier ROW_T_INSERT or ROW_T_UPDATE was diverted
- because some BLOBs were missing due to rolling
- back the initial insert or due to purging
- the old BLOB values of an update. */
- ut_ad(log->blobs);
- if (error != DB_SUCCESS) {
- ut_ad(error == DB_MISSING_HISTORY);
- /* Some BLOBs are missing, so we are
- interpreting this ROW_T_UPDATE as
- ROW_T_DELETE (see *1).
- Because this is a different row,
- we will do nothing. */
- error = DB_SUCCESS;
- } else {
- /* Because the user record is missing due to
- BLOBs that were missing when processing
- an earlier log record, we should
- interpret the ROW_T_UPDATE as ROW_T_INSERT.
- However, there is a different user record
- with the same PRIMARY KEY value already. */
- error = DB_DUPLICATE_KEY;
- }
-
- goto func_exit;
- }
- }
-
- if (error != DB_SUCCESS) {
- ut_ad(error == DB_MISSING_HISTORY);
- ut_ad(log->blobs);
- /* Some BLOBs are missing, so we are interpreting
- this ROW_T_UPDATE as ROW_T_DELETE (see *1). */
- error = row_log_table_apply_delete_low(
- &pcur, cur_offsets, NULL, heap, &mtr);
- goto func_exit_committed;
- }
-
- dtuple_t* entry = row_build_index_entry(
- row, NULL, index, heap);
- const upd_t* update = row_upd_build_difference_binary(
- index, entry, btr_pcur_get_rec(&pcur), cur_offsets,
- false, NULL, heap);
-
- if (!update->n_fields) {
- /* Nothing to do. */
- goto func_exit;
- }
-
- const bool pk_updated
- = upd_get_nth_field(update, 0)->field_no < new_trx_id_col;
-
- if (pk_updated || rec_offs_any_extern(cur_offsets)) {
- /* If the record contains any externally stored
- columns, perform the update by delete and insert,
- because we will not write any undo log that would
- allow purge to free any orphaned externally stored
- columns. */
-
- if (pk_updated && log->same_pk) {
- /* The ROW_T_UPDATE log record should only be
- written when the PRIMARY KEY fields of the
- record did not change in the old table. We
- can only get a change of PRIMARY KEY columns
- in the rebuilt table if the PRIMARY KEY was
- redefined (!same_pk). */
- ut_ad(0);
- error = DB_CORRUPTION;
- goto func_exit;
- }
-
- error = row_log_table_apply_delete_low(
- &pcur, cur_offsets, NULL, heap, &mtr);
- ut_ad(mtr.state == MTR_COMMITTED);
-
- if (error == DB_SUCCESS) {
- error = row_log_table_apply_insert_low(
- thr, row, trx_id, offsets_heap, heap, dup);
- }
-
- goto func_exit_committed;
- }
-
- dtuple_t* old_row;
- row_ext_t* old_ext;
-
- if (dict_table_get_next_index(index)) {
- /* Construct the row corresponding to the old value of
- the record. */
- old_row = row_build(
- ROW_COPY_DATA, index, btr_pcur_get_rec(&pcur),
- cur_offsets, NULL, NULL, NULL, &old_ext, heap);
- ut_ad(old_row);
-#ifdef ROW_LOG_APPLY_PRINT
- if (row_log_apply_print) {
- fprintf(stderr, "table apply update "
- IB_ID_FMT " " IB_ID_FMT "\n",
- index->table->id, index->id);
- dtuple_print(stderr, old_row);
- dtuple_print(stderr, row);
- }
-#endif /* ROW_LOG_APPLY_PRINT */
- } else {
- old_row = NULL;
- old_ext = NULL;
- }
-
- big_rec_t* big_rec;
-
- error = btr_cur_pessimistic_update(
- BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG
- | BTR_KEEP_POS_FLAG,
- btr_pcur_get_btr_cur(&pcur),
- &cur_offsets, &offsets_heap, heap, &big_rec,
- update, 0, thr, 0, &mtr);
-
- if (big_rec) {
- if (error == DB_SUCCESS) {
- error = btr_store_big_rec_extern_fields(
- index, btr_pcur_get_block(&pcur),
- btr_pcur_get_rec(&pcur), cur_offsets,
- big_rec, &mtr, BTR_STORE_UPDATE);
- }
-
- dtuple_big_rec_free(big_rec);
- }
-
- while ((index = dict_table_get_next_index(index)) != NULL) {
- if (error != DB_SUCCESS) {
- break;
- }
-
- n_index++;
-
- if (index->type & DICT_FTS) {
- continue;
- }
-
- if (!row_upd_changes_ord_field_binary(
- index, update, thr, old_row, NULL)) {
- continue;
- }
-
- mtr_commit(&mtr);
-
- entry = row_build_index_entry(old_row, old_ext, index, heap);
- if (!entry) {
- ut_ad(0);
- return(DB_CORRUPTION);
- }
-
- mtr_start(&mtr);
-
- if (ROW_FOUND != row_search_index_entry(
- index, entry, BTR_MODIFY_TREE, &pcur, &mtr)) {
- ut_ad(0);
- error = DB_CORRUPTION;
- break;
- }
-
- btr_cur_pessimistic_delete(
- &error, FALSE, btr_pcur_get_btr_cur(&pcur),
- BTR_CREATE_FLAG, RB_NONE, &mtr);
-
- if (error != DB_SUCCESS) {
- break;
- }
-
- mtr_commit(&mtr);
-
- entry = row_build_index_entry(row, NULL, index, heap);
- error = row_ins_sec_index_entry_low(
- BTR_CREATE_FLAG | BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG,
- BTR_MODIFY_TREE, index, offsets_heap, heap,
- entry, trx_id, thr);
-
- /* Report correct index name for duplicate key error. */
- if (error == DB_DUPLICATE_KEY) {
- thr_get_trx(thr)->error_key_num = n_index;
- }
-
- mtr_start(&mtr);
- }
-
- goto func_exit;
-}
-
-/******************************************************//**
-Applies an operation to a table that was rebuilt.
-@return NULL on failure (mrec corruption) or when out of data;
-pointer to next record on success */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const mrec_t*
-row_log_table_apply_op(
-/*===================*/
- que_thr_t* thr, /*!< in: query graph */
- ulint trx_id_col, /*!< in: position of
- DB_TRX_ID in old index */
- ulint new_trx_id_col, /*!< in: position of
- DB_TRX_ID in new index */
- row_merge_dup_t* dup, /*!< in/out: for reporting
- duplicate key errors */
- dberr_t* error, /*!< out: DB_SUCCESS
- or error code */
- mem_heap_t* offsets_heap, /*!< in/out: memory heap
- that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- const mrec_t* mrec, /*!< in: merge record */
- const mrec_t* mrec_end, /*!< in: end of buffer */
- ulint* offsets) /*!< in/out: work area
- for parsing mrec */
-{
- row_log_t* log = dup->index->online_log;
- dict_index_t* new_index = dict_table_get_first_index(log->table);
- ulint extra_size;
- const mrec_t* next_mrec;
- dtuple_t* old_pk;
- row_ext_t* ext;
- ulint ext_size;
-
- ut_ad(dict_index_is_clust(dup->index));
- ut_ad(dup->index->table != log->table);
- ut_ad(log->head.total <= log->tail.total);
-
- *error = DB_SUCCESS;
-
- /* 3 = 1 (op type) + 1 (ext_size) + at least 1 byte payload */
- if (mrec + 3 >= mrec_end) {
- return(NULL);
- }
-
- const mrec_t* const mrec_start = mrec;
-
- switch (*mrec++) {
- default:
- ut_ad(0);
- *error = DB_CORRUPTION;
- return(NULL);
- case ROW_T_INSERT:
- extra_size = *mrec++;
-
- if (extra_size >= 0x80) {
- /* Read another byte of extra_size. */
-
- extra_size = (extra_size & 0x7f) << 8;
- extra_size |= *mrec++;
- }
-
- mrec += extra_size;
-
- if (mrec > mrec_end) {
- return(NULL);
- }
-
- rec_offs_set_n_fields(offsets, dup->index->n_fields);
- rec_init_offsets_temp(mrec, dup->index, offsets);
-
- next_mrec = mrec + rec_offs_data_size(offsets);
-
- if (next_mrec > mrec_end) {
- return(NULL);
- } else {
- log->head.total += next_mrec - mrec_start;
-
- ulint len;
- const byte* db_trx_id
- = rec_get_nth_field(
- mrec, offsets, trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- *error = row_log_table_apply_insert(
- thr, mrec, offsets, offsets_heap,
- heap, dup, trx_read_trx_id(db_trx_id));
- }
- break;
-
- case ROW_T_DELETE:
- /* 1 (extra_size) + 4 (ext_size) + at least 1 (payload) */
- if (mrec + 6 >= mrec_end) {
- return(NULL);
- }
-
- extra_size = *mrec++;
- ext_size = mach_read_from_4(mrec);
- mrec += 4;
- ut_ad(mrec < mrec_end);
-
- /* We assume extra_size < 0x100 for the PRIMARY KEY prefix.
- For fixed-length PRIMARY key columns, it is 0. */
- mrec += extra_size;
-
- rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
- rec_init_offsets_temp(mrec, new_index, offsets);
- next_mrec = mrec + rec_offs_data_size(offsets) + ext_size;
- if (next_mrec > mrec_end) {
- return(NULL);
- }
-
- log->head.total += next_mrec - mrec_start;
-
- /* If there are external fields, retrieve those logged
- prefix info and reconstruct the row_ext_t */
- if (ext_size) {
- /* We use memcpy to avoid unaligned
- access on some non-x86 platforms.*/
- ext = static_cast<row_ext_t*>(
- mem_heap_dup(heap,
- mrec + rec_offs_data_size(offsets),
- ext_size));
-
- byte* ext_start = reinterpret_cast<byte*>(ext);
-
- ulint ext_len = sizeof(*ext)
- + (ext->n_ext - 1) * sizeof ext->len;
-
- ext->ext = reinterpret_cast<ulint*>(ext_start + ext_len);
- ext_len += ext->n_ext * sizeof(*ext->ext);
-
- ext->buf = static_cast<byte*>(ext_start + ext_len);
- } else {
- ext = NULL;
- }
-
- *error = row_log_table_apply_delete(
- thr, new_trx_id_col,
- mrec, offsets, offsets_heap, heap,
- log, ext);
- break;
-
- case ROW_T_UPDATE:
- /* Logically, the log entry consists of the
- (PRIMARY KEY,DB_TRX_ID) of the old value (converted
- to the new primary key definition) followed by
- the new value in the old table definition. If the
- definition of the columns belonging to PRIMARY KEY
- is not changed, the log will only contain
- DB_TRX_ID,new_row. */
-
- if (dup->index->online_log->same_pk) {
- ut_ad(new_index->n_uniq == dup->index->n_uniq);
-
- extra_size = *mrec++;
-
- if (extra_size >= 0x80) {
- /* Read another byte of extra_size. */
-
- extra_size = (extra_size & 0x7f) << 8;
- extra_size |= *mrec++;
- }
-
- mrec += extra_size;
-
- if (mrec > mrec_end) {
- return(NULL);
- }
-
- rec_offs_set_n_fields(offsets, dup->index->n_fields);
- rec_init_offsets_temp(mrec, dup->index, offsets);
-
- next_mrec = mrec + rec_offs_data_size(offsets);
-
- if (next_mrec > mrec_end) {
- return(NULL);
- }
-
- old_pk = dtuple_create(heap, new_index->n_uniq);
- dict_index_copy_types(
- old_pk, new_index, old_pk->n_fields);
-
- /* Copy the PRIMARY KEY fields from mrec to old_pk. */
- for (ulint i = 0; i < new_index->n_uniq; i++) {
- const void* field;
- ulint len;
- dfield_t* dfield;
-
- ut_ad(!rec_offs_nth_extern(offsets, i));
-
- field = rec_get_nth_field(
- mrec, offsets, i, &len);
- ut_ad(len != UNIV_SQL_NULL);
-
- dfield = dtuple_get_nth_field(old_pk, i);
- dfield_set_data(dfield, field, len);
- }
- } else {
- /* We assume extra_size < 0x100
- for the PRIMARY KEY prefix. */
- mrec += *mrec + 1;
-
- if (mrec > mrec_end) {
- return(NULL);
- }
-
- /* Get offsets for PRIMARY KEY,
- DB_TRX_ID, DB_ROLL_PTR. */
- rec_offs_set_n_fields(offsets, new_index->n_uniq + 2);
- rec_init_offsets_temp(mrec, new_index, offsets);
-
- next_mrec = mrec + rec_offs_data_size(offsets);
- if (next_mrec + 2 > mrec_end) {
- return(NULL);
- }
-
- /* Copy the PRIMARY KEY fields and
- DB_TRX_ID, DB_ROLL_PTR from mrec to old_pk. */
- old_pk = dtuple_create(heap, new_index->n_uniq + 2);
- dict_index_copy_types(old_pk, new_index,
- old_pk->n_fields);
-
- for (ulint i = 0;
- i < dict_index_get_n_unique(new_index) + 2;
- i++) {
- const void* field;
- ulint len;
- dfield_t* dfield;
-
- ut_ad(!rec_offs_nth_extern(offsets, i));
-
- field = rec_get_nth_field(
- mrec, offsets, i, &len);
- ut_ad(len != UNIV_SQL_NULL);
-
- dfield = dtuple_get_nth_field(old_pk, i);
- dfield_set_data(dfield, field, len);
- }
-
- mrec = next_mrec;
-
- /* Fetch the new value of the row as it was
- in the old table definition. */
- extra_size = *mrec++;
-
- if (extra_size >= 0x80) {
- /* Read another byte of extra_size. */
-
- extra_size = (extra_size & 0x7f) << 8;
- extra_size |= *mrec++;
- }
-
- mrec += extra_size;
-
- if (mrec > mrec_end) {
- return(NULL);
- }
-
- rec_offs_set_n_fields(offsets, dup->index->n_fields);
- rec_init_offsets_temp(mrec, dup->index, offsets);
-
- next_mrec = mrec + rec_offs_data_size(offsets);
-
- if (next_mrec > mrec_end) {
- return(NULL);
- }
- }
-
- ut_ad(next_mrec <= mrec_end);
- log->head.total += next_mrec - mrec_start;
- dtuple_set_n_fields_cmp(old_pk, new_index->n_uniq);
-
- {
- ulint len;
- const byte* db_trx_id
- = rec_get_nth_field(
- mrec, offsets, trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- *error = row_log_table_apply_update(
- thr, new_trx_id_col,
- mrec, offsets, offsets_heap,
- heap, dup, trx_read_trx_id(db_trx_id), old_pk);
- }
-
- break;
- }
-
- ut_ad(log->head.total <= log->tail.total);
- mem_heap_empty(offsets_heap);
- mem_heap_empty(heap);
- return(next_mrec);
-}
-
-/******************************************************//**
-Applies operations to a table was rebuilt.
-@return DB_SUCCESS, or error code on failure */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_log_table_apply_ops(
-/*====================*/
- que_thr_t* thr, /*!< in: query graph */
- row_merge_dup_t*dup) /*!< in/out: for reporting duplicate key
- errors */
-{
- dberr_t error;
- const mrec_t* mrec = NULL;
- const mrec_t* next_mrec;
- const mrec_t* mrec_end = NULL; /* silence bogus warning */
- const mrec_t* next_mrec_end;
- mem_heap_t* heap;
- mem_heap_t* offsets_heap;
- ulint* offsets;
- bool has_index_lock;
- dict_index_t* index = const_cast<dict_index_t*>(
- dup->index);
- dict_table_t* new_table = index->online_log->table;
- dict_index_t* new_index = dict_table_get_first_index(
- new_table);
- const ulint i = 1 + REC_OFFS_HEADER_SIZE
- + ut_max(dict_index_get_n_fields(index),
- dict_index_get_n_unique(new_index) + 2);
- const ulint trx_id_col = dict_col_get_clust_pos(
- dict_table_get_sys_col(index->table, DATA_TRX_ID), index);
- const ulint new_trx_id_col = dict_col_get_clust_pos(
- dict_table_get_sys_col(new_table, DATA_TRX_ID), new_index);
- trx_t* trx = thr_get_trx(thr);
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_is_online_ddl(index));
- ut_ad(trx->mysql_thd);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!dict_index_is_online_ddl(new_index));
- ut_ad(trx_id_col > 0);
- ut_ad(trx_id_col != ULINT_UNDEFINED);
- ut_ad(new_trx_id_col > 0);
- ut_ad(new_trx_id_col != ULINT_UNDEFINED);
-
- UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
-
- offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
- offsets[0] = i;
- offsets[1] = dict_index_get_n_fields(index);
-
- heap = mem_heap_create(UNIV_PAGE_SIZE);
- offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
- has_index_lock = true;
-
-next_block:
- ut_ad(has_index_lock);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(index->online_log->head.bytes == 0);
-
- if (trx_is_interrupted(trx)) {
- goto interrupted;
- }
-
- if (dict_index_is_corrupted(index)) {
- error = DB_INDEX_CORRUPT;
- goto func_exit;
- }
-
- ut_ad(dict_index_is_online_ddl(index));
-
- error = index->online_log->error;
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(index->online_log->head.blocks
- > index->online_log->tail.blocks)) {
-unexpected_eof:
- fprintf(stderr, "InnoDB: unexpected end of temporary file"
- " for table %s\n", index->table_name);
-corruption:
- error = DB_CORRUPTION;
- goto func_exit;
- }
-
- if (index->online_log->head.blocks
- == index->online_log->tail.blocks) {
- if (index->online_log->head.blocks) {
-#ifdef HAVE_FTRUNCATE
- /* Truncate the file in order to save space. */
- if (index->online_log->fd != -1
- && ftruncate(index->online_log->fd, 0) == -1) {
- perror("ftruncate");
- }
-#endif /* HAVE_FTRUNCATE */
- index->online_log->head.blocks
- = index->online_log->tail.blocks = 0;
- }
-
- next_mrec = index->online_log->tail.block;
- next_mrec_end = next_mrec + index->online_log->tail.bytes;
-
- if (next_mrec_end == next_mrec) {
- /* End of log reached. */
-all_done:
- ut_ad(has_index_lock);
- ut_ad(index->online_log->head.blocks == 0);
- ut_ad(index->online_log->tail.blocks == 0);
- index->online_log->head.bytes = 0;
- index->online_log->tail.bytes = 0;
- error = DB_SUCCESS;
- goto func_exit;
- }
- } else {
- os_offset_t ofs;
- ibool success;
-
- ofs = (os_offset_t) index->online_log->head.blocks
- * srv_sort_buf_size;
-
- ut_ad(has_index_lock);
- has_index_lock = false;
- rw_lock_x_unlock(dict_index_get_lock(index));
-
- log_free_check();
-
- ut_ad(dict_index_is_online_ddl(index));
-
- if (!row_log_block_allocate(index->online_log->head)) {
- error = DB_OUT_OF_MEMORY;
- goto func_exit;
- }
-
- success = os_file_read_no_error_handling_int_fd(
- index->online_log->fd,
- index->online_log->head.block, ofs,
- srv_sort_buf_size);
- if (!success) {
- fprintf(stderr, "InnoDB: unable to read temporary file"
- " for table %s\n", index->table_name);
- goto corruption;
- }
-
-#ifdef POSIX_FADV_DONTNEED
- /* Each block is read exactly once. Free up the file cache. */
- posix_fadvise(index->online_log->fd,
- ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
-#endif /* POSIX_FADV_DONTNEED */
-#if 0 //def FALLOC_FL_PUNCH_HOLE
- /* Try to deallocate the space for the file on disk.
- This should work on ext4 on Linux 2.6.39 and later,
- and be ignored when the operation is unsupported. */
- fallocate(index->online_log->fd,
- FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- ofs, srv_sort_buf_size);
-#endif /* FALLOC_FL_PUNCH_HOLE */
-
- next_mrec = index->online_log->head.block;
- next_mrec_end = next_mrec + srv_sort_buf_size;
- }
-
- /* This read is not protected by index->online_log->mutex for
- performance reasons. We will eventually notice any error that
- was flagged by a DML thread. */
- error = index->online_log->error;
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (mrec) {
- /* A partial record was read from the previous block.
- Copy the temporary buffer full, as we do not know the
- length of the record. Parse subsequent records from
- the bigger buffer index->online_log->head.block
- or index->online_log->tail.block. */
-
- ut_ad(mrec == index->online_log->head.buf);
- ut_ad(mrec_end > mrec);
- ut_ad(mrec_end < (&index->online_log->head.buf)[1]);
-
- memcpy((mrec_t*) mrec_end, next_mrec,
- (&index->online_log->head.buf)[1] - mrec_end);
- mrec = row_log_table_apply_op(
- thr, trx_id_col, new_trx_id_col,
- dup, &error, offsets_heap, heap,
- index->online_log->head.buf,
- (&index->online_log->head.buf)[1], offsets);
- if (error != DB_SUCCESS) {
- goto func_exit;
- } else if (UNIV_UNLIKELY(mrec == NULL)) {
- /* The record was not reassembled properly. */
- goto corruption;
- }
- /* The record was previously found out to be
- truncated. Now that the parse buffer was extended,
- it should proceed beyond the old end of the buffer. */
- ut_a(mrec > mrec_end);
-
- index->online_log->head.bytes = mrec - mrec_end;
- next_mrec += index->online_log->head.bytes;
- }
-
- ut_ad(next_mrec <= next_mrec_end);
- /* The following loop must not be parsing the temporary
- buffer, but head.block or tail.block. */
-
- /* mrec!=NULL means that the next record starts from the
- middle of the block */
- ut_ad((mrec == NULL) == (index->online_log->head.bytes == 0));
-
-#ifdef UNIV_DEBUG
- if (next_mrec_end == index->online_log->head.block
- + srv_sort_buf_size) {
- /* If tail.bytes == 0, next_mrec_end can also be at
- the end of tail.block. */
- if (index->online_log->tail.bytes == 0) {
- ut_ad(next_mrec == next_mrec_end);
- ut_ad(index->online_log->tail.blocks == 0);
- ut_ad(index->online_log->head.blocks == 0);
- ut_ad(index->online_log->head.bytes == 0);
- } else {
- ut_ad(next_mrec == index->online_log->head.block
- + index->online_log->head.bytes);
- ut_ad(index->online_log->tail.blocks
- > index->online_log->head.blocks);
- }
- } else if (next_mrec_end == index->online_log->tail.block
- + index->online_log->tail.bytes) {
- ut_ad(next_mrec == index->online_log->tail.block
- + index->online_log->head.bytes);
- ut_ad(index->online_log->tail.blocks == 0);
- ut_ad(index->online_log->head.blocks == 0);
- ut_ad(index->online_log->head.bytes
- <= index->online_log->tail.bytes);
- } else {
- ut_error;
- }
-#endif /* UNIV_DEBUG */
-
- mrec_end = next_mrec_end;
-
- while (!trx_is_interrupted(trx)) {
- mrec = next_mrec;
- ut_ad(mrec < mrec_end);
-
- if (!has_index_lock) {
- /* We are applying operations from a different
- block than the one that is being written to.
- We do not hold index->lock in order to
- allow other threads to concurrently buffer
- modifications. */
- ut_ad(mrec >= index->online_log->head.block);
- ut_ad(mrec_end == index->online_log->head.block
- + srv_sort_buf_size);
- ut_ad(index->online_log->head.bytes
- < srv_sort_buf_size);
-
- /* Take the opportunity to do a redo log
- checkpoint if needed. */
- log_free_check();
- } else {
- /* We are applying operations from the last block.
- Do not allow other threads to buffer anything,
- so that we can finally catch up and synchronize. */
- ut_ad(index->online_log->head.blocks == 0);
- ut_ad(index->online_log->tail.blocks == 0);
- ut_ad(mrec_end == index->online_log->tail.block
- + index->online_log->tail.bytes);
- ut_ad(mrec >= index->online_log->tail.block);
- }
-
- /* This read is not protected by index->online_log->mutex
- for performance reasons. We will eventually notice any
- error that was flagged by a DML thread. */
- error = index->online_log->error;
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- next_mrec = row_log_table_apply_op(
- thr, trx_id_col, new_trx_id_col,
- dup, &error, offsets_heap, heap,
- mrec, mrec_end, offsets);
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- } else if (next_mrec == next_mrec_end) {
- /* The record happened to end on a block boundary.
- Do we have more blocks left? */
- if (has_index_lock) {
- /* The index will be locked while
- applying the last block. */
- goto all_done;
- }
-
- mrec = NULL;
-process_next_block:
- rw_lock_x_lock(dict_index_get_lock(index));
- has_index_lock = true;
-
- index->online_log->head.bytes = 0;
- index->online_log->head.blocks++;
- goto next_block;
- } else if (next_mrec != NULL) {
- ut_ad(next_mrec < next_mrec_end);
- index->online_log->head.bytes += next_mrec - mrec;
- } else if (has_index_lock) {
- /* When mrec is within tail.block, it should
- be a complete record, because we are holding
- index->lock and thus excluding the writer. */
- ut_ad(index->online_log->tail.blocks == 0);
- ut_ad(mrec_end == index->online_log->tail.block
- + index->online_log->tail.bytes);
- ut_ad(0);
- goto unexpected_eof;
- } else {
- memcpy(index->online_log->head.buf, mrec,
- mrec_end - mrec);
- mrec_end += index->online_log->head.buf - mrec;
- mrec = index->online_log->head.buf;
- goto process_next_block;
- }
- }
-
-interrupted:
- error = DB_INTERRUPTED;
-func_exit:
- if (!has_index_lock) {
- rw_lock_x_lock(dict_index_get_lock(index));
- }
-
- mem_heap_free(offsets_heap);
- mem_heap_free(heap);
- row_log_block_free(index->online_log->head);
- ut_free(offsets);
- return(error);
-}
-
-/******************************************************//**
-Apply the row_log_table log to a table upon completing rebuild.
-@return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
-dberr_t
-row_log_table_apply(
-/*================*/
- que_thr_t* thr, /*!< in: query graph */
- dict_table_t* old_table,
- /*!< in: old table */
- struct TABLE* table) /*!< in/out: MySQL table
- (for reporting duplicates) */
-{
- dberr_t error;
- dict_index_t* clust_index;
-
- thr_get_trx(thr)->error_key_num = 0;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- clust_index = dict_table_get_first_index(old_table);
-
- rw_lock_x_lock(dict_index_get_lock(clust_index));
-
- if (!clust_index->online_log) {
- ut_ad(dict_index_get_online_status(clust_index)
- == ONLINE_INDEX_COMPLETE);
- /* This function should not be called unless
- rebuilding a table online. Build in some fault
- tolerance. */
- ut_ad(0);
- error = DB_ERROR;
- } else {
- row_merge_dup_t dup = {
- clust_index, table,
- clust_index->online_log->col_map, 0
- };
-
- error = row_log_table_apply_ops(thr, &dup);
-
- ut_ad(error != DB_SUCCESS
- || clust_index->online_log->head.total
- == clust_index->online_log->tail.total);
- }
-
- rw_lock_x_unlock(dict_index_get_lock(clust_index));
- return(error);
-}
-
-/******************************************************//**
-Allocate the row log for an index and flag the index
-for online creation.
-@retval true if success, false if not */
-UNIV_INTERN
-bool
-row_log_allocate(
-/*=============*/
- dict_index_t* index, /*!< in/out: index */
- dict_table_t* table, /*!< in/out: new table being rebuilt,
- or NULL when creating a secondary index */
- bool same_pk,/*!< in: whether the definition of the
- PRIMARY KEY has remained the same */
- const dtuple_t* add_cols,
- /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map,/*!< in: mapping of old column
- numbers to new ones, or NULL if !table */
- const char* path) /*!< in: where to create temporary file */
-{
- row_log_t* log;
- DBUG_ENTER("row_log_allocate");
-
- ut_ad(!dict_index_is_online_ddl(index));
- ut_ad(dict_index_is_clust(index) == !!table);
- ut_ad(!table || index->table != table);
- ut_ad(same_pk || table);
- ut_ad(!table || col_map);
- ut_ad(!add_cols || col_map);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- log = (row_log_t*) ut_malloc(sizeof *log);
- if (!log) {
- DBUG_RETURN(false);
- }
-
- log->fd = -1;
- mutex_create(index_online_log_key, &log->mutex,
- SYNC_INDEX_ONLINE_LOG);
- log->blobs = NULL;
- log->table = table;
- log->same_pk = same_pk;
- log->add_cols = add_cols;
- log->col_map = col_map;
- log->error = DB_SUCCESS;
- log->max_trx = 0;
- log->tail.blocks = log->tail.bytes = 0;
- log->tail.total = 0;
- log->tail.block = log->head.block = NULL;
- log->head.blocks = log->head.bytes = 0;
- log->head.total = 0;
- log->path = path;
- dict_index_set_online_status(index, ONLINE_INDEX_CREATION);
- index->online_log = log;
-
- /* While we might be holding an exclusive data dictionary lock
- here, in row_log_abort_sec() we will not always be holding it. Use
- atomic operations in both cases. */
- MONITOR_ATOMIC_INC(MONITOR_ONLINE_CREATE_INDEX);
-
- DBUG_RETURN(true);
-}
-
-/******************************************************//**
-Free the row log for an index that was being created online. */
-UNIV_INTERN
-void
-row_log_free(
-/*=========*/
- row_log_t*& log) /*!< in,own: row log */
-{
- MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX);
-
- delete log->blobs;
- row_log_block_free(log->tail);
- row_log_block_free(log->head);
- row_merge_file_destroy_low(log->fd);
- mutex_free(&log->mutex);
- ut_free(log);
- log = 0;
-}
-
-/******************************************************//**
-Get the latest transaction ID that has invoked row_log_online_op()
-during online creation.
-@return latest transaction ID, or 0 if nothing was logged */
-UNIV_INTERN
-trx_id_t
-row_log_get_max_trx(
-/*================*/
- dict_index_t* index) /*!< in: index, must be locked */
-{
- ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_CREATION);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad((rw_lock_own(dict_index_get_lock(index), RW_LOCK_SHARED)
- && mutex_own(&index->online_log->mutex))
- || rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- return(index->online_log->max_trx);
-}
-
-/******************************************************//**
-Applies an operation to a secondary index that was being created. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_log_apply_op_low(
-/*=================*/
- dict_index_t* index, /*!< in/out: index */
- row_merge_dup_t*dup, /*!< in/out: for reporting
- duplicate key errors */
- dberr_t* error, /*!< out: DB_SUCCESS or error code */
- mem_heap_t* offsets_heap, /*!< in/out: memory heap for
- allocating offsets; can be emptied */
- bool has_index_lock, /*!< in: true if holding index->lock
- in exclusive mode */
- enum row_op op, /*!< in: operation being applied */
- trx_id_t trx_id, /*!< in: transaction identifier */
- const dtuple_t* entry) /*!< in: row */
-{
- mtr_t mtr;
- btr_cur_t cursor;
- ulint* offsets = NULL;
-
- ut_ad(!dict_index_is_clust(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
- == has_index_lock);
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(!dict_index_is_corrupted(index));
- ut_ad(trx_id != 0 || op == ROW_OP_DELETE);
-
- mtr_start(&mtr);
-
- /* We perform the pessimistic variant of the operations if we
- already hold index->lock exclusively. First, search the
- record. The operation may already have been performed,
- depending on when the row in the clustered index was
- scanned. */
- btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- has_index_lock
- ? BTR_MODIFY_TREE
- : BTR_MODIFY_LEAF,
- &cursor, 0, __FILE__, __LINE__,
- &mtr);
-
- ut_ad(dict_index_get_n_unique(index) > 0);
- /* This test is somewhat similar to row_ins_must_modify_rec(),
- but not identical for unique secondary indexes. */
- if (cursor.low_match >= dict_index_get_n_unique(index)
- && !page_rec_is_infimum(btr_cur_get_rec(&cursor))) {
- /* We have a matching record. */
- bool exists = (cursor.low_match
- == dict_index_get_n_fields(index));
-#ifdef UNIV_DEBUG
- rec_t* rec = btr_cur_get_rec(&cursor);
- ut_ad(page_rec_is_user_rec(rec));
- ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
-#endif /* UNIV_DEBUG */
-
- ut_ad(exists || dict_index_is_unique(index));
-
- switch (op) {
- case ROW_OP_DELETE:
- if (!exists) {
- /* The existing record matches the
- unique secondary index key, but the
- PRIMARY KEY columns differ. So, this
- exact record does not exist. For
- example, we could detect a duplicate
- key error in some old index before
- logging an ROW_OP_INSERT for our
- index. This ROW_OP_DELETE could have
- been logged for rolling back
- TRX_UNDO_INSERT_REC. */
- goto func_exit;
- }
-
- if (btr_cur_optimistic_delete(
- &cursor, BTR_CREATE_FLAG, &mtr)) {
- *error = DB_SUCCESS;
- break;
- }
-
- if (!has_index_lock) {
- /* This needs a pessimistic operation.
- Lock the index tree exclusively. */
- mtr_commit(&mtr);
- mtr_start(&mtr);
- btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, 0,
- __FILE__, __LINE__, &mtr);
-
- /* No other thread than the current one
- is allowed to modify the index tree.
- Thus, the record should still exist. */
- ut_ad(cursor.low_match
- >= dict_index_get_n_fields(index));
- ut_ad(page_rec_is_user_rec(
- btr_cur_get_rec(&cursor)));
- }
-
- /* As there are no externally stored fields in
- a secondary index record, the parameter
- rb_ctx = RB_NONE will be ignored. */
-
- btr_cur_pessimistic_delete(
- error, FALSE, &cursor,
- BTR_CREATE_FLAG, RB_NONE, &mtr);
- break;
- case ROW_OP_INSERT:
- if (exists) {
- /* The record already exists. There
- is nothing to be inserted.
- This could happen when processing
- TRX_UNDO_DEL_MARK_REC in statement
- rollback:
-
- UPDATE of PRIMARY KEY can lead to
- statement rollback if the updated
- value of the PRIMARY KEY already
- exists. In this case, the UPDATE would
- be mapped to DELETE;INSERT, and we
- only wrote undo log for the DELETE
- part. The duplicate key error would be
- triggered before logging the INSERT
- part.
-
- Theoretically, we could also get a
- similar situation when a DELETE operation
- is blocked by a FOREIGN KEY constraint. */
- goto func_exit;
- }
-
- if (dtuple_contains_null(entry)) {
- /* The UNIQUE KEY columns match, but
- there is a NULL value in the key, and
- NULL!=NULL. */
- goto insert_the_rec;
- }
-
- goto duplicate;
- }
- } else {
- switch (op) {
- rec_t* rec;
- big_rec_t* big_rec;
- case ROW_OP_DELETE:
- /* The record does not exist. For example, we
- could detect a duplicate key error in some old
- index before logging an ROW_OP_INSERT for our
- index. This ROW_OP_DELETE could be logged for
- rolling back TRX_UNDO_INSERT_REC. */
- goto func_exit;
- case ROW_OP_INSERT:
- if (dict_index_is_unique(index)
- && (cursor.up_match
- >= dict_index_get_n_unique(index)
- || cursor.low_match
- >= dict_index_get_n_unique(index))
- && (!index->n_nullable
- || !dtuple_contains_null(entry))) {
-duplicate:
- /* Duplicate key */
- ut_ad(dict_index_is_unique(index));
- row_merge_dup_report(dup, entry->fields);
- *error = DB_DUPLICATE_KEY;
- goto func_exit;
- }
-insert_the_rec:
- /* Insert the record. As we are inserting into
- a secondary index, there cannot be externally
- stored columns (!big_rec). */
- *error = btr_cur_optimistic_insert(
- BTR_NO_UNDO_LOG_FLAG
- | BTR_NO_LOCKING_FLAG
- | BTR_CREATE_FLAG,
- &cursor, &offsets, &offsets_heap,
- const_cast<dtuple_t*>(entry),
- &rec, &big_rec, 0, NULL, &mtr);
- ut_ad(!big_rec);
- if (*error != DB_FAIL) {
- break;
- }
-
- if (!has_index_lock) {
- /* This needs a pessimistic operation.
- Lock the index tree exclusively. */
- mtr_commit(&mtr);
- mtr_start(&mtr);
- btr_cur_search_to_nth_level(
- index, 0, entry, PAGE_CUR_LE,
- BTR_MODIFY_TREE, &cursor, 0,
- __FILE__, __LINE__, &mtr);
- }
-
- /* We already determined that the
- record did not exist. No other thread
- than the current one is allowed to
- modify the index tree. Thus, the
- record should still not exist. */
-
- *error = btr_cur_pessimistic_insert(
- BTR_NO_UNDO_LOG_FLAG
- | BTR_NO_LOCKING_FLAG
- | BTR_CREATE_FLAG,
- &cursor, &offsets, &offsets_heap,
- const_cast<dtuple_t*>(entry),
- &rec, &big_rec,
- 0, NULL, &mtr);
- ut_ad(!big_rec);
- break;
- }
- mem_heap_empty(offsets_heap);
- }
-
- if (*error == DB_SUCCESS && trx_id) {
- page_update_max_trx_id(btr_cur_get_block(&cursor),
- btr_cur_get_page_zip(&cursor),
- trx_id, &mtr);
- }
-
-func_exit:
- mtr_commit(&mtr);
-}
-
-/******************************************************//**
-Applies an operation to a secondary index that was being created.
-@return NULL on failure (mrec corruption) or when out of data;
-pointer to next record on success */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-const mrec_t*
-row_log_apply_op(
-/*=============*/
- dict_index_t* index, /*!< in/out: index */
- row_merge_dup_t*dup, /*!< in/out: for reporting
- duplicate key errors */
- dberr_t* error, /*!< out: DB_SUCCESS or error code */
- mem_heap_t* offsets_heap, /*!< in/out: memory heap for
- allocating offsets; can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap for
- allocating data tuples */
- bool has_index_lock, /*!< in: true if holding index->lock
- in exclusive mode */
- const mrec_t* mrec, /*!< in: merge record */
- const mrec_t* mrec_end, /*!< in: end of buffer */
- ulint* offsets) /*!< in/out: work area for
- rec_init_offsets_temp() */
-
-{
- enum row_op op;
- ulint extra_size;
- ulint data_size;
- ulint n_ext;
- dtuple_t* entry;
- trx_id_t trx_id;
-
- /* Online index creation is only used for secondary indexes. */
- ut_ad(!dict_index_is_clust(index));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)
- == has_index_lock);
-#endif /* UNIV_SYNC_DEBUG */
-
- if (dict_index_is_corrupted(index)) {
- *error = DB_INDEX_CORRUPT;
- return(NULL);
- }
-
- *error = DB_SUCCESS;
-
- if (mrec + ROW_LOG_HEADER_SIZE >= mrec_end) {
- return(NULL);
- }
-
- switch (*mrec) {
- case ROW_OP_INSERT:
- if (ROW_LOG_HEADER_SIZE + DATA_TRX_ID_LEN + mrec >= mrec_end) {
- return(NULL);
- }
-
- op = static_cast<enum row_op>(*mrec++);
- trx_id = trx_read_trx_id(mrec);
- mrec += DATA_TRX_ID_LEN;
- break;
- case ROW_OP_DELETE:
- op = static_cast<enum row_op>(*mrec++);
- trx_id = 0;
- break;
- default:
-corrupted:
- ut_ad(0);
- *error = DB_CORRUPTION;
- return(NULL);
- }
-
- extra_size = *mrec++;
-
- ut_ad(mrec < mrec_end);
-
- if (extra_size >= 0x80) {
- /* Read another byte of extra_size. */
-
- extra_size = (extra_size & 0x7f) << 8;
- extra_size |= *mrec++;
- }
-
- mrec += extra_size;
-
- if (mrec > mrec_end) {
- return(NULL);
- }
-
- rec_init_offsets_temp(mrec, index, offsets);
-
- if (rec_offs_any_extern(offsets)) {
- /* There should never be any externally stored fields
- in a secondary index, which is what online index
- creation is used for. Therefore, the log file must be
- corrupted. */
- goto corrupted;
- }
-
- data_size = rec_offs_data_size(offsets);
-
- mrec += data_size;
-
- if (mrec > mrec_end) {
- return(NULL);
- }
-
- entry = row_rec_to_index_entry_low(
- mrec - data_size, index, offsets, &n_ext, heap);
- /* Online index creation is only implemented for secondary
- indexes, which never contain off-page columns. */
- ut_ad(n_ext == 0);
-#ifdef ROW_LOG_APPLY_PRINT
- if (row_log_apply_print) {
- fprintf(stderr, "apply " IB_ID_FMT " " TRX_ID_FMT " %u %u ",
- index->id, trx_id,
- unsigned (op), unsigned (has_index_lock));
- for (const byte* m = mrec - data_size; m < mrec; m++) {
- fprintf(stderr, "%02x", *m);
- }
- putc('\n', stderr);
- }
-#endif /* ROW_LOG_APPLY_PRINT */
- row_log_apply_op_low(index, dup, error, offsets_heap,
- has_index_lock, op, trx_id, entry);
- return(mrec);
-}
-
-/******************************************************//**
-Applies operations to a secondary index that was being created.
-@return DB_SUCCESS, or error code on failure */
-static MY_ATTRIBUTE((nonnull))
-dberr_t
-row_log_apply_ops(
-/*==============*/
- trx_t* trx, /*!< in: transaction (for checking if
- the operation was interrupted) */
- dict_index_t* index, /*!< in/out: index */
- row_merge_dup_t*dup) /*!< in/out: for reporting duplicate key
- errors */
-{
- dberr_t error;
- const mrec_t* mrec = NULL;
- const mrec_t* next_mrec;
- const mrec_t* mrec_end= NULL; /* silence bogus warning */
- const mrec_t* next_mrec_end;
- mem_heap_t* offsets_heap;
- mem_heap_t* heap;
- ulint* offsets;
- bool has_index_lock;
- const ulint i = 1 + REC_OFFS_HEADER_SIZE
- + dict_index_get_n_fields(index);
-
- ut_ad(dict_index_is_online_ddl(index));
- ut_ad(*index->name == TEMP_INDEX_PREFIX);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(index->online_log);
- UNIV_MEM_INVALID(&mrec_end, sizeof mrec_end);
-
- offsets = static_cast<ulint*>(ut_malloc(i * sizeof *offsets));
- offsets[0] = i;
- offsets[1] = dict_index_get_n_fields(index);
-
- offsets_heap = mem_heap_create(UNIV_PAGE_SIZE);
- heap = mem_heap_create(UNIV_PAGE_SIZE);
- has_index_lock = true;
-
-next_block:
- ut_ad(has_index_lock);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(index->online_log->head.bytes == 0);
-
- if (trx_is_interrupted(trx)) {
- goto interrupted;
- }
-
- error = index->online_log->error;
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- if (dict_index_is_corrupted(index)) {
- error = DB_INDEX_CORRUPT;
- goto func_exit;
- }
-
- if (UNIV_UNLIKELY(index->online_log->head.blocks
- > index->online_log->tail.blocks)) {
-unexpected_eof:
- fprintf(stderr, "InnoDB: unexpected end of temporary file"
- " for index %s\n", index->name + 1);
-corruption:
- error = DB_CORRUPTION;
- goto func_exit;
- }
-
- if (index->online_log->head.blocks
- == index->online_log->tail.blocks) {
- if (index->online_log->head.blocks) {
-#ifdef HAVE_FTRUNCATE
- /* Truncate the file in order to save space. */
- if (index->online_log->fd != -1
- && ftruncate(index->online_log->fd, 0) == -1) {
- perror("ftruncate");
- }
-#endif /* HAVE_FTRUNCATE */
- index->online_log->head.blocks
- = index->online_log->tail.blocks = 0;
- }
-
- next_mrec = index->online_log->tail.block;
- next_mrec_end = next_mrec + index->online_log->tail.bytes;
-
- if (next_mrec_end == next_mrec) {
- /* End of log reached. */
-all_done:
- ut_ad(has_index_lock);
- ut_ad(index->online_log->head.blocks == 0);
- ut_ad(index->online_log->tail.blocks == 0);
- error = DB_SUCCESS;
- goto func_exit;
- }
- } else {
- os_offset_t ofs;
- ibool success;
-
- ofs = (os_offset_t) index->online_log->head.blocks
- * srv_sort_buf_size;
-
- ut_ad(has_index_lock);
- has_index_lock = false;
- rw_lock_x_unlock(dict_index_get_lock(index));
-
- log_free_check();
-
- if (!row_log_block_allocate(index->online_log->head)) {
- error = DB_OUT_OF_MEMORY;
- goto func_exit;
- }
-
- success = os_file_read_no_error_handling_int_fd(
- index->online_log->fd,
- index->online_log->head.block, ofs,
- srv_sort_buf_size);
-
- if (!success) {
- fprintf(stderr, "InnoDB: unable to read temporary file"
- " for index %s\n", index->name + 1);
- goto corruption;
- }
-
-#ifdef POSIX_FADV_DONTNEED
- /* Each block is read exactly once. Free up the file cache. */
- posix_fadvise(index->online_log->fd,
- ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
-#endif /* POSIX_FADV_DONTNEED */
-#if 0 //def FALLOC_FL_PUNCH_HOLE
- /* Try to deallocate the space for the file on disk.
- This should work on ext4 on Linux 2.6.39 and later,
- and be ignored when the operation is unsupported. */
- fallocate(index->online_log->fd,
- FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- ofs, srv_sort_buf_size);
-#endif /* FALLOC_FL_PUNCH_HOLE */
-
- next_mrec = index->online_log->head.block;
- next_mrec_end = next_mrec + srv_sort_buf_size;
- }
-
- if (mrec) {
- /* A partial record was read from the previous block.
- Copy the temporary buffer full, as we do not know the
- length of the record. Parse subsequent records from
- the bigger buffer index->online_log->head.block
- or index->online_log->tail.block. */
-
- ut_ad(mrec == index->online_log->head.buf);
- ut_ad(mrec_end > mrec);
- ut_ad(mrec_end < (&index->online_log->head.buf)[1]);
-
- memcpy((mrec_t*) mrec_end, next_mrec,
- (&index->online_log->head.buf)[1] - mrec_end);
- mrec = row_log_apply_op(
- index, dup, &error, offsets_heap, heap,
- has_index_lock, index->online_log->head.buf,
- (&index->online_log->head.buf)[1], offsets);
- if (error != DB_SUCCESS) {
- goto func_exit;
- } else if (UNIV_UNLIKELY(mrec == NULL)) {
- /* The record was not reassembled properly. */
- goto corruption;
- }
- /* The record was previously found out to be
- truncated. Now that the parse buffer was extended,
- it should proceed beyond the old end of the buffer. */
- ut_a(mrec > mrec_end);
-
- index->online_log->head.bytes = mrec - mrec_end;
- next_mrec += index->online_log->head.bytes;
- }
-
- ut_ad(next_mrec <= next_mrec_end);
- /* The following loop must not be parsing the temporary
- buffer, but head.block or tail.block. */
-
- /* mrec!=NULL means that the next record starts from the
- middle of the block */
- ut_ad((mrec == NULL) == (index->online_log->head.bytes == 0));
-
-#ifdef UNIV_DEBUG
- if (next_mrec_end == index->online_log->head.block
- + srv_sort_buf_size) {
- /* If tail.bytes == 0, next_mrec_end can also be at
- the end of tail.block. */
- if (index->online_log->tail.bytes == 0) {
- ut_ad(next_mrec == next_mrec_end);
- ut_ad(index->online_log->tail.blocks == 0);
- ut_ad(index->online_log->head.blocks == 0);
- ut_ad(index->online_log->head.bytes == 0);
- } else {
- ut_ad(next_mrec == index->online_log->head.block
- + index->online_log->head.bytes);
- ut_ad(index->online_log->tail.blocks
- > index->online_log->head.blocks);
- }
- } else if (next_mrec_end == index->online_log->tail.block
- + index->online_log->tail.bytes) {
- ut_ad(next_mrec == index->online_log->tail.block
- + index->online_log->head.bytes);
- ut_ad(index->online_log->tail.blocks == 0);
- ut_ad(index->online_log->head.blocks == 0);
- ut_ad(index->online_log->head.bytes
- <= index->online_log->tail.bytes);
- } else {
- ut_error;
- }
-#endif /* UNIV_DEBUG */
-
- mrec_end = next_mrec_end;
-
- while (!trx_is_interrupted(trx)) {
- mrec = next_mrec;
- ut_ad(mrec < mrec_end);
-
- if (!has_index_lock) {
- /* We are applying operations from a different
- block than the one that is being written to.
- We do not hold index->lock in order to
- allow other threads to concurrently buffer
- modifications. */
- ut_ad(mrec >= index->online_log->head.block);
- ut_ad(mrec_end == index->online_log->head.block
- + srv_sort_buf_size);
- ut_ad(index->online_log->head.bytes
- < srv_sort_buf_size);
-
- /* Take the opportunity to do a redo log
- checkpoint if needed. */
- log_free_check();
- } else {
- /* We are applying operations from the last block.
- Do not allow other threads to buffer anything,
- so that we can finally catch up and synchronize. */
- ut_ad(index->online_log->head.blocks == 0);
- ut_ad(index->online_log->tail.blocks == 0);
- ut_ad(mrec_end == index->online_log->tail.block
- + index->online_log->tail.bytes);
- ut_ad(mrec >= index->online_log->tail.block);
- }
-
- next_mrec = row_log_apply_op(
- index, dup, &error, offsets_heap, heap,
- has_index_lock, mrec, mrec_end, offsets);
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- } else if (next_mrec == next_mrec_end) {
- /* The record happened to end on a block boundary.
- Do we have more blocks left? */
- if (has_index_lock) {
- /* The index will be locked while
- applying the last block. */
- goto all_done;
- }
-
- mrec = NULL;
-process_next_block:
- rw_lock_x_lock(dict_index_get_lock(index));
- has_index_lock = true;
-
- index->online_log->head.bytes = 0;
- index->online_log->head.blocks++;
- goto next_block;
- } else if (next_mrec != NULL) {
- ut_ad(next_mrec < next_mrec_end);
- index->online_log->head.bytes += next_mrec - mrec;
- } else if (has_index_lock) {
- /* When mrec is within tail.block, it should
- be a complete record, because we are holding
- index->lock and thus excluding the writer. */
- ut_ad(index->online_log->tail.blocks == 0);
- ut_ad(mrec_end == index->online_log->tail.block
- + index->online_log->tail.bytes);
- ut_ad(0);
- goto unexpected_eof;
- } else {
- memcpy(index->online_log->head.buf, mrec,
- mrec_end - mrec);
- mrec_end += index->online_log->head.buf - mrec;
- mrec = index->online_log->head.buf;
- goto process_next_block;
- }
- }
-
-interrupted:
- error = DB_INTERRUPTED;
-func_exit:
- if (!has_index_lock) {
- rw_lock_x_lock(dict_index_get_lock(index));
- }
-
- switch (error) {
- case DB_SUCCESS:
- break;
- case DB_INDEX_CORRUPT:
- if (((os_offset_t) index->online_log->tail.blocks + 1)
- * srv_sort_buf_size >= srv_online_max_size) {
- /* The log file grew too big. */
- error = DB_ONLINE_LOG_TOO_BIG;
- }
- /* fall through */
- default:
- /* We set the flag directly instead of invoking
- dict_set_corrupted_index_cache_only(index) here,
- because the index is not "public" yet. */
- index->type |= DICT_CORRUPT;
- }
-
- mem_heap_free(heap);
- mem_heap_free(offsets_heap);
- row_log_block_free(index->online_log->head);
- ut_free(offsets);
- return(error);
-}
-
-/******************************************************//**
-Apply the row log to the index upon completing index creation.
-@return DB_SUCCESS, or error code on failure */
-UNIV_INTERN
-dberr_t
-row_log_apply(
-/*==========*/
- trx_t* trx, /*!< in: transaction (for checking if
- the operation was interrupted) */
- dict_index_t* index, /*!< in/out: secondary index */
- struct TABLE* table) /*!< in/out: MySQL table
- (for reporting duplicates) */
-{
- dberr_t error;
- row_log_t* log;
- row_merge_dup_t dup = { index, table, NULL, 0 };
- DBUG_ENTER("row_log_apply");
-
- ut_ad(dict_index_is_online_ddl(index));
- ut_ad(!dict_index_is_clust(index));
-
- log_free_check();
-
- rw_lock_x_lock(dict_index_get_lock(index));
-
- if (!index->table->corrupted) {
- error = row_log_apply_ops(trx, index, &dup);
- } else {
- error = DB_SUCCESS;
- }
-
- if (error != DB_SUCCESS) {
- ut_a(!dict_table_is_discarded(index->table));
- /* We set the flag directly instead of invoking
- dict_set_corrupted_index_cache_only(index) here,
- because the index is not "public" yet. */
- index->type |= DICT_CORRUPT;
- index->table->drop_aborted = TRUE;
-
- dict_index_set_online_status(index, ONLINE_INDEX_ABORTED);
- } else {
- ut_ad(dup.n_dup == 0);
- dict_index_set_online_status(index, ONLINE_INDEX_COMPLETE);
- }
-
- log = index->online_log;
- index->online_log = NULL;
- /* We could remove the TEMP_INDEX_PREFIX and update the data
- dictionary to say that this index is complete, if we had
- access to the .frm file here. If the server crashes before
- all requested indexes have been created, this completed index
- will be dropped. */
- rw_lock_x_unlock(dict_index_get_lock(index));
-
- row_log_free(log);
-
- DBUG_RETURN(error);
-}
diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc
deleted file mode 100644
index 6a1298087eb..00000000000
--- a/storage/xtradb/row/row0merge.cc
+++ /dev/null
@@ -1,4411 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0merge.cc
-New index creation routines using a merge sort
-
-Created 12/4/2005 Jan Lindstrom
-Completed by Sunny Bains and Marko Makela
-*******************************************************/
-#include <my_config.h>
-#include <log.h>
-#include <sql_class.h>
-
-#include "row0merge.h"
-#include "row0ext.h"
-#include "row0log.h"
-#include "row0ins.h"
-#include "row0sel.h"
-#include "dict0crea.h"
-#include "trx0purge.h"
-#include "lock0lock.h"
-#include "pars0pars.h"
-#include "ut0sort.h"
-#include "row0ftsort.h"
-#include "row0import.h"
-#include "handler0alter.h"
-#include "ha_prototypes.h"
-#include "math.h" /* log2() */
-#include "fil0crypt.h"
-
-float my_log2f(float n)
-{
- /* log(n) / log(2) is log2. */
- return (float)(log((double)n) / log((double)2));
-}
-
-/* Ignore posix_fadvise() on those platforms where it does not exist */
-#if defined __WIN__
-# define posix_fadvise(fd, offset, len, advice) /* nothing */
-#endif /* __WIN__ */
-
-#ifdef UNIV_DEBUG
-/** Set these in order ot enable debug printout. */
-/* @{ */
-/** Log each record read from temporary file. */
-static ibool row_merge_print_read;
-/** Log each record write to temporary file. */
-static ibool row_merge_print_write;
-/** Log each row_merge_blocks() call, merging two blocks of records to
-a bigger one. */
-static ibool row_merge_print_block;
-/** Log each block read from temporary file. */
-static ibool row_merge_print_block_read;
-/** Log each block read from temporary file. */
-static ibool row_merge_print_block_write;
-/* @} */
-#endif /* UNIV_DEBUG */
-
-/* Whether to disable file system cache */
-UNIV_INTERN char srv_disable_sort_file_cache;
-
-/* Maximum pending doc memory limit in bytes for a fts tokenization thread */
-#define FTS_PENDING_DOC_MEMORY_LIMIT 1000000
-
-
-/******************************************************//**
-Encrypt a merge block. */
-static
-void
-row_merge_encrypt_buf(
-/*==================*/
- fil_space_crypt_t* crypt_data, /*!< in: table crypt data */
- ulint offset, /*!< in: offset where to
- write */
- ulint space, /*!< in: tablespace id */
- const byte* input_buf, /*!< in: input buffer */
- byte* crypted_buf) /*!< out: crypted buffer */
-{
- uint key_version;
- uint dstlen=0;
- os_offset_t ofs = (os_offset_t)srv_sort_buf_size * (os_offset_t)offset;
-
- key_version = encryption_key_get_latest_version(crypt_data->key_id);
-
- /* Store key_version at the beginning of the input buffer */
- mach_write_to_4((byte *)crypted_buf, key_version);
-
- int rc = encryption_scheme_encrypt(input_buf+ROW_MERGE_RESERVE_SIZE,
- srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE,
- crypted_buf+ROW_MERGE_RESERVE_SIZE, &dstlen,
- crypt_data, key_version,
- space, ofs, 0);
-
- if (! ((rc == MY_AES_OK) && ((ulint)dstlen == srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE))) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Unable to encrypt data-block "
- " src: %p srclen: %lu buf: %p buflen: %u."
- " return-code: %d. Can't continue!\n",
- input_buf, srv_sort_buf_size,
- crypted_buf, dstlen, rc);
- }
-}
-
-/******************************************************//**
-Decrypt a merge block. */
-static
-bool
-row_merge_decrypt_buf(
-/*==================*/
- fil_space_crypt_t* crypt_data, /*!< in: table crypt data */
- ulint offset, /*!< in: offset where to
- write */
- ulint space, /*!< in: tablespace id */
- const byte* input_buf, /*!< in: input buffer */
- byte* crypted_buf) /*!< out: crypted buffer */
-{
- uint key_version;
- uint dstlen=0;
- os_offset_t ofs = (os_offset_t)srv_sort_buf_size * (os_offset_t)offset;
-
- /* Read key_version from beginning of the buffer */
- key_version = mach_read_from_4((byte *)input_buf);
-
- if (key_version == 0) {
- /* block not encrypted */
- return false;
- }
-
- int rc = encryption_scheme_decrypt(input_buf+ROW_MERGE_RESERVE_SIZE,
- srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE,
- crypted_buf+ROW_MERGE_RESERVE_SIZE, &dstlen,
- crypt_data, key_version,
- space, ofs, 0);
-
- if (! ((rc == MY_AES_OK) && ((ulint)dstlen == srv_sort_buf_size-ROW_MERGE_RESERVE_SIZE))) {
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Unable to encrypt data-block "
- " src: %p srclen: %lu buf: %p buflen: %d."
- " return-code: %d. Can't continue!\n",
- input_buf, srv_sort_buf_size,
- crypted_buf, dstlen, rc);
- }
-
- return (true);
-}
-
-#ifdef UNIV_DEBUG
-/******************************************************//**
-Display a merge tuple. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_merge_tuple_print(
-/*==================*/
- FILE* f, /*!< in: output stream */
- const mtuple_t* entry, /*!< in: tuple to print */
- ulint n_fields)/*!< in: number of fields in the tuple */
-{
- ulint j;
-
- for (j = 0; j < n_fields; j++) {
- const dfield_t* field = &entry->fields[j];
-
- if (dfield_is_null(field)) {
- fputs("\n NULL;", f);
- } else {
- ulint field_len = dfield_get_len(field);
- ulint len = ut_min(field_len, 20);
- if (dfield_is_ext(field)) {
- fputs("\nE", f);
- } else {
- fputs("\n ", f);
- }
- ut_print_buf(f, dfield_get_data(field), len);
- if (len != field_len) {
- fprintf(f, " (total " ULINTPF " bytes)",
- field_len);
- }
- }
- }
- putc('\n', f);
-}
-#endif /* UNIV_DEBUG */
-
-/******************************************************//**
-Encode an index record. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_merge_buf_encode(
-/*=================*/
- byte** b, /*!< in/out: pointer to
- current end of output buffer */
- const dict_index_t* index, /*!< in: index */
- const mtuple_t* entry, /*!< in: index fields
- of the record to encode */
- ulint n_fields) /*!< in: number of fields
- in the entry */
-{
- ulint size;
- ulint extra_size;
-
- size = rec_get_converted_size_temp(
- index, entry->fields, n_fields, &extra_size);
- ut_ad(size >= extra_size);
-
- /* Encode extra_size + 1 */
- if (extra_size + 1 < 0x80) {
- *(*b)++ = (byte) (extra_size + 1);
- } else {
- ut_ad((extra_size + 1) < 0x8000);
- *(*b)++ = (byte) (0x80 | ((extra_size + 1) >> 8));
- *(*b)++ = (byte) (extra_size + 1);
- }
-
- rec_convert_dtuple_to_temp(*b + extra_size, index,
- entry->fields, n_fields);
-
- *b += size;
-}
-
-/******************************************************//**
-Allocate a sort buffer.
-@return own: sort buffer */
-static MY_ATTRIBUTE((malloc, nonnull))
-row_merge_buf_t*
-row_merge_buf_create_low(
-/*=====================*/
- mem_heap_t* heap, /*!< in: heap where allocated */
- dict_index_t* index, /*!< in: secondary index */
- ulint max_tuples, /*!< in: maximum number of
- data tuples */
- ulint buf_size) /*!< in: size of the buffer,
- in bytes */
-{
- row_merge_buf_t* buf;
-
- ut_ad(max_tuples > 0);
-
- ut_ad(max_tuples <= srv_sort_buf_size);
-
- buf = static_cast<row_merge_buf_t*>(mem_heap_zalloc(heap, buf_size));
- buf->heap = heap;
- buf->index = index;
- buf->max_tuples = max_tuples;
- buf->tuples = static_cast<mtuple_t*>(
- ut_malloc(2 * max_tuples * sizeof *buf->tuples));
- buf->tmp_tuples = buf->tuples + max_tuples;
-
- return(buf);
-}
-
-/******************************************************//**
-Allocate a sort buffer.
-@return own: sort buffer */
-UNIV_INTERN
-row_merge_buf_t*
-row_merge_buf_create(
-/*=================*/
- dict_index_t* index) /*!< in: secondary index */
-{
- row_merge_buf_t* buf;
- ulint max_tuples;
- ulint buf_size;
- mem_heap_t* heap;
-
- max_tuples = (srv_sort_buf_size - ROW_MERGE_RESERVE_SIZE)
- / ut_max(1, dict_index_get_min_size(index));
-
- buf_size = (sizeof *buf);
-
- heap = mem_heap_create(buf_size);
-
- buf = row_merge_buf_create_low(heap, index, max_tuples, buf_size);
-
- return(buf);
-}
-
-/******************************************************//**
-Empty a sort buffer.
-@return sort buffer */
-UNIV_INTERN
-row_merge_buf_t*
-row_merge_buf_empty(
-/*================*/
- row_merge_buf_t* buf) /*!< in,own: sort buffer */
-{
- ulint buf_size = sizeof *buf;
- ulint max_tuples = buf->max_tuples;
- mem_heap_t* heap = buf->heap;
- dict_index_t* index = buf->index;
- mtuple_t* tuples = buf->tuples;
-
- mem_heap_empty(heap);
-
- buf = static_cast<row_merge_buf_t*>(mem_heap_zalloc(heap, buf_size));
- buf->heap = heap;
- buf->index = index;
- buf->max_tuples = max_tuples;
- buf->tuples = tuples;
- buf->tmp_tuples = buf->tuples + max_tuples;
-
- return(buf);
-}
-
-/******************************************************//**
-Deallocate a sort buffer. */
-UNIV_INTERN
-void
-row_merge_buf_free(
-/*===============*/
- row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */
-{
- ut_free(buf->tuples);
- mem_heap_free(buf->heap);
-}
-
-/** Convert the field data from compact to redundant format.
-@param[in] row_field field to copy from
-@param[out] field field to copy to
-@param[in] len length of the field data
-@param[in] zip_size compressed BLOB page size,
- zero for uncompressed BLOBs
-@param[in,out] heap memory heap where to allocate data when
- converting to ROW_FORMAT=REDUNDANT, or NULL
- when not to invoke
- row_merge_buf_redundant_convert(). */
-static
-void
-row_merge_buf_redundant_convert(
- const dfield_t* row_field,
- dfield_t* field,
- ulint len,
- ulint zip_size,
- mem_heap_t* heap,
- trx_t* trx)
-{
- ut_ad(DATA_MBMINLEN(field->type.mbminmaxlen) == 1);
- ut_ad(DATA_MBMAXLEN(field->type.mbminmaxlen) > 1);
-
- byte* buf = (byte*) mem_heap_alloc(heap, len);
- ulint field_len = row_field->len;
- ut_ad(field_len <= len);
-
- if (row_field->ext) {
- const byte* field_data = static_cast<byte*>(
- dfield_get_data(row_field));
- ulint ext_len;
-
- ut_a(field_len >= BTR_EXTERN_FIELD_REF_SIZE);
- ut_a(memcmp(field_data + field_len - BTR_EXTERN_FIELD_REF_SIZE,
- field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
-
- byte* data = btr_copy_externally_stored_field(
- &ext_len, field_data, zip_size, field_len, heap, trx);
-
- ut_ad(ext_len < len);
-
- memcpy(buf, data, ext_len);
- field_len = ext_len;
- } else {
- memcpy(buf, row_field->data, field_len);
- }
-
- memset(buf + field_len, 0x20, len - field_len);
-
- dfield_set_data(field, buf, len);
-}
-
-/** Insert a data tuple into a sort buffer.
-@param[in,out] buf sort buffer
-@param[in] fts_index fts index to be created
-@param[in] old_table original table
-@param[in,out] psort_info parallel sort info
-@param[in] row table row
-@param[in] ext cache of externally stored
- column prefixes, or NULL
-@param[in,out] doc_id Doc ID if we are creating
- FTS index
-@param[in,out] conv_heap memory heap where to allocate data when
- converting to ROW_FORMAT=REDUNDANT, or NULL
- when not to invoke
- row_merge_buf_redundant_convert()
-@param[in,out] exceed_page set if the record size exceeds the page size
- when converting to ROW_FORMAT=REDUNDANT
-@return number of rows added, 0 if out of space */
-static
-ulint
-row_merge_buf_add(
- row_merge_buf_t* buf,
- dict_index_t* fts_index,
- const dict_table_t* old_table,
- fts_psort_t* psort_info,
- const dtuple_t* row,
- const row_ext_t* ext,
- doc_id_t* doc_id,
- mem_heap_t* conv_heap,
- bool* exceed_page,
- trx_t* trx)
-{
- ulint i;
- const dict_index_t* index;
- mtuple_t* entry;
- dfield_t* field;
- const dict_field_t* ifield;
- ulint n_fields;
- ulint data_size;
- ulint extra_size;
- ulint bucket = 0;
- doc_id_t write_doc_id;
- ulint n_row_added = 0;
- DBUG_ENTER("row_merge_buf_add");
-
- if (buf->n_tuples >= buf->max_tuples) {
- DBUG_RETURN(0);
- }
-
- DBUG_EXECUTE_IF(
- "ib_row_merge_buf_add_two",
- if (buf->n_tuples >= 2) DBUG_RETURN(0););
-
- UNIV_PREFETCH_R(row->fields);
-
- /* If we are building FTS index, buf->index points to
- the 'fts_sort_idx', and real FTS index is stored in
- fts_index */
- index = (buf->index->type & DICT_FTS) ? fts_index : buf->index;
-
- n_fields = dict_index_get_n_fields(index);
-
- entry = &buf->tuples[buf->n_tuples];
- field = entry->fields = static_cast<dfield_t*>(
- mem_heap_alloc(buf->heap, n_fields * sizeof *entry->fields));
-
- data_size = 0;
- extra_size = UT_BITS_IN_BYTES(index->n_nullable);
-
- ifield = dict_index_get_nth_field(index, 0);
-
- for (i = 0; i < n_fields; i++, field++, ifield++) {
- ulint len;
- const dict_col_t* col;
- ulint col_no;
- ulint fixed_len;
- const dfield_t* row_field;
-
- col = ifield->col;
- col_no = dict_col_get_no(col);
-
- /* Process the Doc ID column */
- if (*doc_id > 0
- && col_no == index->table->fts->doc_col) {
- fts_write_doc_id((byte*) &write_doc_id, *doc_id);
-
- /* Note: field->data now points to a value on the
- stack: &write_doc_id after dfield_set_data(). Because
- there is only one doc_id per row, it shouldn't matter.
- We allocate a new buffer before we leave the function
- later below. */
-
- dfield_set_data(
- field, &write_doc_id, sizeof(write_doc_id));
-
- field->type.mtype = ifield->col->mtype;
- field->type.prtype = ifield->col->prtype;
- field->type.mbminmaxlen = DATA_MBMINMAXLEN(0, 0);
- field->type.len = ifield->col->len;
- } else {
- row_field = dtuple_get_nth_field(row, col_no);
-
- dfield_copy(field, row_field);
-
- /* Tokenize and process data for FTS */
- if (index->type & DICT_FTS) {
- fts_doc_item_t* doc_item;
- byte* value;
- void* ptr;
- const ulint max_trial_count = 10000;
- ulint trial_count = 0;
-
- /* fetch Doc ID if it already exists
- in the row, and not supplied by the
- caller. Even if the value column is
- NULL, we still need to get the Doc
- ID so to maintain the correct max
- Doc ID */
- if (*doc_id == 0) {
- const dfield_t* doc_field;
- doc_field = dtuple_get_nth_field(
- row,
- index->table->fts->doc_col);
- *doc_id = (doc_id_t) mach_read_from_8(
- static_cast<byte*>(
- dfield_get_data(doc_field)));
-
- if (*doc_id == 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "FTS Doc ID is zero. "
- "Record Skipped");
- DBUG_RETURN(0);
- }
- }
-
- if (dfield_is_null(field)) {
- n_row_added = 1;
- continue;
- }
-
- ptr = ut_malloc(sizeof(*doc_item)
- + field->len);
-
- doc_item = static_cast<fts_doc_item_t*>(ptr);
- value = static_cast<byte*>(ptr)
- + sizeof(*doc_item);
- memcpy(value, field->data, field->len);
- field->data = value;
-
- doc_item->field = field;
- doc_item->doc_id = *doc_id;
-
- bucket = *doc_id % fts_sort_pll_degree;
-
- /* Add doc item to fts_doc_list */
- mutex_enter(&psort_info[bucket].mutex);
-
- if (psort_info[bucket].error == DB_SUCCESS) {
- UT_LIST_ADD_LAST(
- doc_list,
- psort_info[bucket].fts_doc_list,
- doc_item);
- psort_info[bucket].memory_used +=
- sizeof(*doc_item) + field->len;
- } else {
- ut_free(doc_item);
- }
-
- mutex_exit(&psort_info[bucket].mutex);
-
- /* Sleep when memory used exceeds limit*/
- while (psort_info[bucket].memory_used
- > FTS_PENDING_DOC_MEMORY_LIMIT
- && trial_count++ < max_trial_count) {
- os_thread_sleep(1000);
- }
-
- n_row_added = 1;
- continue;
- }
-
- if (field->len != UNIV_SQL_NULL
- && col->mtype == DATA_MYSQL
- && col->len != field->len) {
-
- if (conv_heap != NULL) {
- row_merge_buf_redundant_convert(
- row_field, field, col->len,
- dict_table_zip_size(old_table),
- conv_heap, trx);
- } else {
- /* Field length mismatch should not
- happen when rebuilding redundant row
- format table. */
- ut_ad(dict_table_is_comp(index->table));
- }
- }
- }
-
- len = dfield_get_len(field);
-
- if (dfield_is_null(field)) {
- ut_ad(!(col->prtype & DATA_NOT_NULL));
- continue;
- } else if (!ext) {
- } else if (dict_index_is_clust(index)) {
- /* Flag externally stored fields. */
- const byte* buf = row_ext_lookup(ext, col_no,
- &len);
- if (UNIV_LIKELY_NULL(buf)) {
- ut_a(buf != field_ref_zero);
- if (i < dict_index_get_n_unique(index)) {
- dfield_set_data(field, buf, len);
- } else {
- dfield_set_ext(field);
- len = dfield_get_len(field);
- }
- }
- } else {
- const byte* buf = row_ext_lookup(ext, col_no,
- &len);
- if (UNIV_LIKELY_NULL(buf)) {
- ut_a(buf != field_ref_zero);
- dfield_set_data(field, buf, len);
- }
- }
-
- /* If a column prefix index, take only the prefix */
-
- if (ifield->prefix_len) {
- len = dtype_get_at_most_n_mbchars(
- col->prtype,
- col->mbminmaxlen,
- ifield->prefix_len,
- len,
- static_cast<char*>(dfield_get_data(field)));
- dfield_set_len(field, len);
- }
-
- ut_ad(len <= col->len || col->mtype == DATA_BLOB ||
- ((col->mtype == DATA_VARCHAR || col->mtype == DATA_BINARY
- || col->mtype == DATA_VARMYSQL)
- && (col->len == 0
- || len <= col->len)));
-
- fixed_len = ifield->fixed_len;
- if (fixed_len && !dict_table_is_comp(index->table)
- && DATA_MBMINLEN(col->mbminmaxlen)
- != DATA_MBMAXLEN(col->mbminmaxlen)) {
- /* CHAR in ROW_FORMAT=REDUNDANT is always
- fixed-length, but in the temporary file it is
- variable-length for variable-length character
- sets. */
- fixed_len = 0;
- }
-
- if (fixed_len) {
-#ifdef UNIV_DEBUG
- ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
- ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
-
- /* len should be between size calcualted base on
- mbmaxlen and mbminlen */
- ut_ad(len <= fixed_len);
- ut_ad(!mbmaxlen || len >= mbminlen
- * (fixed_len / mbmaxlen));
-
- ut_ad(!dfield_is_ext(field));
-#endif /* UNIV_DEBUG */
- } else if (dfield_is_ext(field)) {
- extra_size += 2;
- } else if (len < 128
- || (col->len < 256
- && col->mtype != DATA_BLOB)) {
- extra_size++;
- } else {
- /* For variable-length columns, we look up the
- maximum length from the column itself. If this
- is a prefix index column shorter than 256 bytes,
- this will waste one byte. */
- extra_size += 2;
- }
- data_size += len;
- }
-
- /* If this is FTS index, we already populated the sort buffer, return
- here */
- if (index->type & DICT_FTS) {
- DBUG_RETURN(n_row_added);
- }
-
-#ifdef UNIV_DEBUG
- {
- ulint size;
- ulint extra;
-
- size = rec_get_converted_size_temp(
- index, entry->fields, n_fields, &extra);
-
- ut_ad(data_size + extra_size == size);
- ut_ad(extra_size == extra);
- }
-#endif /* UNIV_DEBUG */
-
- /* Add to the total size of the record in row_merge_block_t
- the encoded length of extra_size and the extra bytes (extra_size).
- See row_merge_buf_write() for the variable-length encoding
- of extra_size. */
- data_size += (extra_size + 1) + ((extra_size + 1) >= 0x80);
-
- /* Record size can exceed page size while converting to
- redundant row format. But there is assert
- ut_ad(size < UNIV_PAGE_SIZE) in rec_offs_data_size().
- It may hit the assert before attempting to insert the row. */
- if (conv_heap != NULL && data_size > UNIV_PAGE_SIZE) {
- *exceed_page = true;
- }
-
- ut_ad(data_size < srv_sort_buf_size);
-
- /* Reserve bytes for the end marker of row_merge_block_t. */
- if (buf->total_size + data_size >= (srv_sort_buf_size - ROW_MERGE_RESERVE_SIZE)) {
- DBUG_RETURN(0);
- }
-
- buf->total_size += data_size;
- buf->n_tuples++;
- n_row_added++;
-
- field = entry->fields;
-
- /* Copy the data fields. */
-
- do {
- dfield_dup(field++, buf->heap);
- } while (--n_fields);
-
- if (conv_heap != NULL) {
- mem_heap_empty(conv_heap);
- }
-
- DBUG_RETURN(n_row_added);
-}
-
-/*************************************************************//**
-Report a duplicate key. */
-UNIV_INTERN
-void
-row_merge_dup_report(
-/*=================*/
- row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */
- const dfield_t* entry) /*!< in: duplicate index entry */
-{
- if (!dup->n_dup++) {
- /* Only report the first duplicate record,
- but count all duplicate records. */
- innobase_fields_to_mysql(dup->table, dup->index, entry);
- }
-}
-
-/*************************************************************//**
-Compare two tuples.
-@return 1, 0, -1 if a is greater, equal, less, respectively, than b */
-static MY_ATTRIBUTE((warn_unused_result))
-int
-row_merge_tuple_cmp(
-/*================*/
- ulint n_uniq, /*!< in: number of unique fields */
- ulint n_field,/*!< in: number of fields */
- const mtuple_t& a, /*!< in: first tuple to be compared */
- const mtuple_t& b, /*!< in: second tuple to be compared */
- row_merge_dup_t* dup) /*!< in/out: for reporting duplicates,
- NULL if non-unique index */
-{
- int cmp;
- const dfield_t* af = a.fields;
- const dfield_t* bf = b.fields;
- ulint n = n_uniq;
-
- ut_ad(n_uniq > 0);
- ut_ad(n_uniq <= n_field);
-
- /* Compare the fields of the tuples until a difference is
- found or we run out of fields to compare. If !cmp at the
- end, the tuples are equal. */
- do {
- cmp = cmp_dfield_dfield(af++, bf++);
- } while (!cmp && --n);
-
- if (cmp) {
- return(cmp);
- }
-
- if (dup) {
- /* Report a duplicate value error if the tuples are
- logically equal. NULL columns are logically inequal,
- although they are equal in the sorting order. Find
- out if any of the fields are NULL. */
- for (const dfield_t* df = a.fields; df != af; df++) {
- if (dfield_is_null(df)) {
- goto no_report;
- }
- }
-
- row_merge_dup_report(dup, a.fields);
- }
-
-no_report:
- /* The n_uniq fields were equal, but we compare all fields so
- that we will get the same (internal) order as in the B-tree. */
- for (n = n_field - n_uniq + 1; --n; ) {
- cmp = cmp_dfield_dfield(af++, bf++);
- if (cmp) {
- return(cmp);
- }
- }
-
- /* This should never be reached, except in a secondary index
- when creating a secondary index and a PRIMARY KEY, and there
- is a duplicate in the PRIMARY KEY that has not been detected
- yet. Internally, an index must never contain duplicates. */
- return(cmp);
-}
-
-/** Wrapper for row_merge_tuple_sort() to inject some more context to
-UT_SORT_FUNCTION_BODY().
-@param tuples array of tuples that being sorted
-@param aux work area, same size as tuples[]
-@param low lower bound of the sorting area, inclusive
-@param high upper bound of the sorting area, inclusive */
-#define row_merge_tuple_sort_ctx(tuples, aux, low, high) \
- row_merge_tuple_sort(n_uniq, n_field, dup, tuples, aux, low, high)
-/** Wrapper for row_merge_tuple_cmp() to inject some more context to
-UT_SORT_FUNCTION_BODY().
-@param a first tuple to be compared
-@param b second tuple to be compared
-@return 1, 0, -1 if a is greater, equal, less, respectively, than b */
-#define row_merge_tuple_cmp_ctx(a,b) \
- row_merge_tuple_cmp(n_uniq, n_field, a, b, dup)
-
-/**********************************************************************//**
-Merge sort the tuple buffer in main memory. */
-static MY_ATTRIBUTE((nonnull(4,5)))
-void
-row_merge_tuple_sort(
-/*=================*/
- ulint n_uniq, /*!< in: number of unique fields */
- ulint n_field,/*!< in: number of fields */
- row_merge_dup_t* dup, /*!< in/out: reporter of duplicates
- (NULL if non-unique index) */
- mtuple_t* tuples, /*!< in/out: tuples */
- mtuple_t* aux, /*!< in/out: work area */
- ulint low, /*!< in: lower bound of the
- sorting area, inclusive */
- ulint high) /*!< in: upper bound of the
- sorting area, exclusive */
-{
- ut_ad(n_field > 0);
- ut_ad(n_uniq <= n_field);
-
- UT_SORT_FUNCTION_BODY(row_merge_tuple_sort_ctx,
- tuples, aux, low, high, row_merge_tuple_cmp_ctx);
-}
-
-/******************************************************//**
-Sort a buffer. */
-UNIV_INTERN
-void
-row_merge_buf_sort(
-/*===============*/
- row_merge_buf_t* buf, /*!< in/out: sort buffer */
- row_merge_dup_t* dup) /*!< in/out: reporter of duplicates
- (NULL if non-unique index) */
-{
- row_merge_tuple_sort(dict_index_get_n_unique(buf->index),
- dict_index_get_n_fields(buf->index),
- dup,
- buf->tuples, buf->tmp_tuples, 0, buf->n_tuples);
-}
-
-/******************************************************//**
-Write a buffer to a block. */
-UNIV_INTERN
-void
-row_merge_buf_write(
-/*================*/
- const row_merge_buf_t* buf, /*!< in: sorted buffer */
- const merge_file_t* of UNIV_UNUSED,
- /*!< in: output file */
- row_merge_block_t* block) /*!< out: buffer for writing to file */
-{
- const dict_index_t* index = buf->index;
- ulint n_fields= dict_index_get_n_fields(index);
- byte* b = &block[ROW_MERGE_RESERVE_SIZE];
-
- for (ulint i = 0; i < buf->n_tuples; i++) {
- const mtuple_t* entry = &buf->tuples[i];
-
- row_merge_buf_encode(&b, index, entry, n_fields);
- ut_ad(b < &block[srv_sort_buf_size]);
-#ifdef UNIV_DEBUG
- if (row_merge_print_write) {
- fprintf(stderr, "row_merge_buf_write %p,%d,"
- ULINTPF " " ULINTPF,
- (void*) b, of->fd, of->offset, i);
- row_merge_tuple_print(stderr, entry, n_fields);
- }
-#endif /* UNIV_DEBUG */
- }
-
- /* Write an "end-of-chunk" marker. */
- ut_a(b < &block[srv_sort_buf_size]);
- ut_a(b == &block[0] + buf->total_size + ROW_MERGE_RESERVE_SIZE);
- *b++ = 0;
-#ifdef UNIV_DEBUG_VALGRIND
- /* The rest of the block is uninitialized. Initialize it
- to avoid bogus warnings. */
- memset(b, 0xff, &block[srv_sort_buf_size] - b);
-#endif /* UNIV_DEBUG_VALGRIND */
-#ifdef UNIV_DEBUG
- if (row_merge_print_write) {
- fprintf(stderr, "row_merge_buf_write %p,%d," ULINTPF " EOF\n",
- (void*) b, of->fd, of->offset);
- }
-#endif /* UNIV_DEBUG */
-}
-
-/******************************************************//**
-Create a memory heap and allocate space for row_merge_rec_offsets()
-and mrec_buf_t[3].
-@return memory heap */
-static
-mem_heap_t*
-row_merge_heap_create(
-/*==================*/
- const dict_index_t* index, /*!< in: record descriptor */
- mrec_buf_t** buf, /*!< out: 3 buffers */
- ulint** offsets1, /*!< out: offsets */
- ulint** offsets2) /*!< out: offsets */
-{
- ulint i = 1 + REC_OFFS_HEADER_SIZE
- + dict_index_get_n_fields(index);
- mem_heap_t* heap = mem_heap_create(2 * i * sizeof **offsets1
- + 3 * sizeof **buf);
-
- *buf = static_cast<mrec_buf_t*>(
- mem_heap_alloc(heap, 3 * sizeof **buf));
- *offsets1 = static_cast<ulint*>(
- mem_heap_alloc(heap, i * sizeof **offsets1));
- *offsets2 = static_cast<ulint*>(
- mem_heap_alloc(heap, i * sizeof **offsets2));
-
- (*offsets1)[0] = (*offsets2)[0] = i;
- (*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
-
- return(heap);
-}
-
-/********************************************************************//**
-Read a merge block from the file system.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-row_merge_read(
-/*===========*/
- int fd, /*!< in: file descriptor */
- ulint offset, /*!< in: offset where to read
- in number of row_merge_block_t
- elements */
- row_merge_block_t* buf, /*!< out: data */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_buf, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- os_offset_t ofs = ((os_offset_t) offset) * srv_sort_buf_size;
- ibool success;
-
- DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE););
-
-#ifdef UNIV_DEBUG
- if (row_merge_print_block_read) {
- fprintf(stderr, "row_merge_read fd=%d ofs=" ULINTPF "\n",
- fd, offset);
- }
-#endif /* UNIV_DEBUG */
-
- success = os_file_read_no_error_handling_int_fd(fd, buf,
- ofs, srv_sort_buf_size);
-
- /* For encrypted tables, decrypt data after reading and copy data */
- if (crypt_data && crypt_buf) {
- if( row_merge_decrypt_buf(crypt_data, offset, space, buf, crypt_buf)) {
- memcpy(buf, crypt_buf, srv_sort_buf_size);
- }
- }
-
-#ifdef POSIX_FADV_DONTNEED
- /* Each block is read exactly once. Free up the file cache. */
- posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED);
-#endif /* POSIX_FADV_DONTNEED */
-
- if (UNIV_UNLIKELY(!success)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: failed to read merge block at " UINT64PF "\n",
- ofs);
- }
-
- return(UNIV_LIKELY(success));
-}
-
-/********************************************************************//**
-Write a merge block to the file system.
-@return TRUE if request was successful, FALSE if fail */
-UNIV_INTERN
-ibool
-row_merge_write(
-/*============*/
- int fd, /*!< in: file descriptor */
- ulint offset, /*!< in: offset where to write,
- in number of row_merge_block_t elements */
- const void* buf, /*!< in: data */
- fil_space_crypt_t* crypt_data, /*!< in: table crypt data */
- void* crypt_buf, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- size_t buf_len = srv_sort_buf_size;
- os_offset_t ofs = buf_len * (os_offset_t) offset;
- ibool ret;
- void* out_buf = (void *)buf;
-
- DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE););
-
- /* For encrypted tables, encrypt data before writing */
- if (crypt_data && crypt_buf) {
- row_merge_encrypt_buf(crypt_data, offset, space, (const byte *)buf, (byte *)crypt_buf);
- out_buf = crypt_buf;
- } else {
- /* Mark block unencrypted */
- mach_write_to_4((byte *)out_buf, 0);
- }
-
- ret = os_file_write_int_fd("(merge)", fd, out_buf, ofs, buf_len);
-
-#ifdef UNIV_DEBUG
- if (row_merge_print_block_write) {
- fprintf(stderr, "row_merge_write fd=%d ofs=" ULINTPF "\n",
- fd, offset);
- }
-#endif /* UNIV_DEBUG */
-
-#ifdef POSIX_FADV_DONTNEED
- /* The block will be needed on the next merge pass,
- but it can be evicted from the file cache meanwhile. */
- posix_fadvise(fd, ofs, buf_len, POSIX_FADV_DONTNEED);
-#endif /* POSIX_FADV_DONTNEED */
-
- return(UNIV_LIKELY(ret));
-}
-
-/********************************************************************//**
-Read a merge record.
-@return pointer to next record, or NULL on I/O error or end of list */
-UNIV_INTERN
-const byte*
-row_merge_read_rec(
-/*===============*/
- row_merge_block_t* block, /*!< in/out: file buffer */
- mrec_buf_t* buf, /*!< in/out: secondary buffer */
- const byte* b, /*!< in: pointer to record */
- const dict_index_t* index, /*!< in: index of the record */
- int fd, /*!< in: file descriptor */
- ulint* foffs, /*!< in/out: file offset */
- const mrec_t** mrec, /*!< out: pointer to merge record,
- or NULL on end of list
- (non-NULL on I/O error) */
- ulint* offsets,/*!< out: offsets of mrec */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- ulint extra_size;
- ulint data_size;
- ulint avail_size;
-
- ut_ad(b >= &block[0]);
- ut_ad(b < &block[srv_sort_buf_size]);
-
- ut_ad(*offsets == 1 + REC_OFFS_HEADER_SIZE
- + dict_index_get_n_fields(index));
-
- if (b == &block[0]) {
- b+= ROW_MERGE_RESERVE_SIZE;
- }
-
- extra_size = *b++;
-
- if (UNIV_UNLIKELY(!extra_size)) {
- /* End of list */
- *mrec = NULL;
-#ifdef UNIV_DEBUG
- if (row_merge_print_read) {
- fprintf(stderr, "row_merge_read %p,%p,%d," ULINTPF
- " EOF\n",
- (const void*) b, (const void*) block,
- fd, *foffs);
- }
-#endif /* UNIV_DEBUG */
- return(NULL);
- }
-
- if (extra_size >= 0x80) {
- /* Read another byte of extra_size. */
-
- if (UNIV_UNLIKELY(b >= &block[srv_sort_buf_size])) {
- if (!row_merge_read(fd, ++(*foffs), block,
- crypt_data, crypt_block, space)) {
-err_exit:
- /* Signal I/O error. */
- *mrec = b;
- return(NULL);
- }
-
- /* Wrap around to the beginning of the buffer. */
- b = &block[ROW_MERGE_RESERVE_SIZE];
- }
-
- extra_size = (extra_size & 0x7f) << 8;
- extra_size |= *b++;
- }
-
- /* Normalize extra_size. Above, value 0 signals "end of list". */
- extra_size--;
-
- /* Read the extra bytes. */
-
- if (UNIV_UNLIKELY(b + extra_size >= &block[srv_sort_buf_size])) {
- /* The record spans two blocks. Copy the entire record
- to the auxiliary buffer and handle this as a special
- case. */
-
- avail_size = &block[srv_sort_buf_size] - b;
- ut_ad(avail_size < sizeof *buf);
- memcpy(*buf, b, avail_size);
-
- if (!row_merge_read(fd, ++(*foffs), block,
- crypt_data, crypt_block, space)) {
-
- goto err_exit;
- }
-
- /* Wrap around to the beginning of the buffer. */
- b = &block[ROW_MERGE_RESERVE_SIZE];
-
- /* Copy the record. */
- memcpy(*buf + avail_size, b, extra_size - avail_size);
- b += extra_size - avail_size;
-
- *mrec = *buf + extra_size;
-
- rec_init_offsets_temp(*mrec, index, offsets);
-
- data_size = rec_offs_data_size(offsets);
-
- /* These overflows should be impossible given that
- records are much smaller than either buffer, and
- the record starts near the beginning of each buffer. */
- ut_a(extra_size + data_size < sizeof *buf);
- ut_a(b + data_size < &block[srv_sort_buf_size]);
-
- /* Copy the data bytes. */
- memcpy(*buf + extra_size, b, data_size);
- b += data_size;
-
- goto func_exit;
- }
-
- *mrec = b + extra_size;
-
- rec_init_offsets_temp(*mrec, index, offsets);
-
- data_size = rec_offs_data_size(offsets);
- ut_ad(extra_size + data_size < sizeof *buf);
-
- b += extra_size + data_size;
-
- if (UNIV_LIKELY(b < &block[srv_sort_buf_size])) {
- /* The record fits entirely in the block.
- This is the normal case. */
- goto func_exit;
- }
-
- /* The record spans two blocks. Copy it to buf. */
-
- b -= extra_size + data_size;
- avail_size = &block[srv_sort_buf_size] - b;
- memcpy(*buf, b, avail_size);
- *mrec = *buf + extra_size;
-#ifdef UNIV_DEBUG
- /* We cannot invoke rec_offs_make_valid() here, because there
- are no REC_N_NEW_EXTRA_BYTES between extra_size and data_size.
- Similarly, rec_offs_validate() would fail, because it invokes
- rec_get_status(). */
- offsets[2] = (ulint) *mrec;
- offsets[3] = (ulint) index;
-#endif /* UNIV_DEBUG */
-
- if (!row_merge_read(fd, ++(*foffs), block,
- crypt_data, crypt_block, space)) {
-
- goto err_exit;
- }
-
- /* Wrap around to the beginning of the buffer. */
- b = &block[ROW_MERGE_RESERVE_SIZE];
-
- /* Copy the rest of the record. */
- memcpy(*buf + avail_size, b, extra_size + data_size - avail_size);
- b += extra_size + data_size - avail_size;
-
-func_exit:
-#ifdef UNIV_DEBUG
- if (row_merge_print_read) {
- fprintf(stderr, "row_merge_read %p,%p,%d," ULINTPF " ",
- (const void*) b, (const void*) block,
- fd, *foffs);
- rec_print_comp(stderr, *mrec, offsets);
- putc('\n', stderr);
- }
-#endif /* UNIV_DEBUG */
-
- return(b);
-}
-
-/********************************************************************//**
-Write a merge record. */
-static
-void
-row_merge_write_rec_low(
-/*====================*/
- byte* b, /*!< out: buffer */
- ulint e, /*!< in: encoded extra_size */
-#ifdef UNIV_DEBUG
- ulint size, /*!< in: total size to write */
- int fd, /*!< in: file descriptor */
- ulint foffs, /*!< in: file offset */
-#endif /* UNIV_DEBUG */
- const mrec_t* mrec, /*!< in: record to write */
- const ulint* offsets)/*!< in: offsets of mrec */
-#ifndef UNIV_DEBUG
-# define row_merge_write_rec_low(b, e, size, fd, foffs, mrec, offsets) \
- row_merge_write_rec_low(b, e, mrec, offsets)
-#endif /* !UNIV_DEBUG */
-{
-#ifdef UNIV_DEBUG
- const byte* const end = b + size;
- ut_ad(e == rec_offs_extra_size(offsets) + 1);
-
- if (row_merge_print_write) {
- fprintf(stderr, "row_merge_write %p,%d," ULINTPF " ",
- (void*) b, fd, foffs);
- rec_print_comp(stderr, mrec, offsets);
- putc('\n', stderr);
- }
-#endif /* UNIV_DEBUG */
-
- if (e < 0x80) {
- *b++ = (byte) e;
- } else {
- *b++ = (byte) (0x80 | (e >> 8));
- *b++ = (byte) e;
- }
-
- memcpy(b, mrec - rec_offs_extra_size(offsets), rec_offs_size(offsets));
- ut_ad(b + rec_offs_size(offsets) == end);
-}
-
-/********************************************************************//**
-Write a merge record.
-@return pointer to end of block, or NULL on error */
-static
-byte*
-row_merge_write_rec(
-/*================*/
- row_merge_block_t* block, /*!< in/out: file buffer */
- mrec_buf_t* buf, /*!< in/out: secondary buffer */
- byte* b, /*!< in: pointer to end of block */
- int fd, /*!< in: file descriptor */
- ulint* foffs, /*!< in/out: file offset */
- const mrec_t* mrec, /*!< in: record to write */
- const ulint* offsets,/*!< in: offsets of mrec */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- ulint extra_size;
- ulint size;
- ulint avail_size;
-
- ut_ad(block);
- ut_ad(buf);
- ut_ad(b >= &block[0]);
- ut_ad(b < &block[srv_sort_buf_size]);
- ut_ad(mrec);
- ut_ad(foffs);
- ut_ad(mrec < &block[0] || mrec > &block[srv_sort_buf_size]);
- ut_ad(mrec < buf[0] || mrec > buf[1]);
-
- /* Normalize extra_size. Value 0 signals "end of list". */
- extra_size = rec_offs_extra_size(offsets) + 1;
-
- size = extra_size + (extra_size >= 0x80)
- + rec_offs_data_size(offsets);
-
- if (b == &block[0]) {
- b+= ROW_MERGE_RESERVE_SIZE;
- }
-
- if (UNIV_UNLIKELY(b + size >= &block[srv_sort_buf_size])) {
- /* The record spans two blocks.
- Copy it to the temporary buffer first. */
- avail_size = &block[srv_sort_buf_size] - b;
-
- row_merge_write_rec_low(buf[0],
- extra_size, size, fd, *foffs,
- mrec, offsets);
-
- /* Copy the head of the temporary buffer, write
- the completed block, and copy the tail of the
- record to the head of the new block. */
- memcpy(b, buf[0], avail_size);
-
- if (!row_merge_write(fd, (*foffs)++, block,
- crypt_data, crypt_block, space)) {
- return(NULL);
- }
-
- UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
-
- /* Copy the rest. */
- b = &block[ROW_MERGE_RESERVE_SIZE];
- memcpy(b, buf[0] + avail_size, size - avail_size);
- b += size - avail_size;
- } else {
- row_merge_write_rec_low(b, extra_size, size, fd, *foffs,
- mrec, offsets);
- b += size;
- }
-
- return(b);
-}
-
-/********************************************************************//**
-Write an end-of-list marker.
-@return pointer to end of block, or NULL on error */
-static
-byte*
-row_merge_write_eof(
-/*================*/
- row_merge_block_t* block, /*!< in/out: file buffer */
- byte* b, /*!< in: pointer to end of block */
- int fd, /*!< in: file descriptor */
- ulint* foffs, /*!< in/out: file offset */
- fil_space_crypt_t* crypt_data, /*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- ut_ad(block);
- ut_ad(b >= &block[0]);
- ut_ad(b < &block[srv_sort_buf_size]);
- ut_ad(foffs);
-#ifdef UNIV_DEBUG
- if (row_merge_print_write) {
- fprintf(stderr, "row_merge_write %p,%p,%d," ULINTPF " EOF\n",
- (void*) b, (void*) block, fd, *foffs);
- }
-#endif /* UNIV_DEBUG */
-
- if (b == &block[0]) {
- b+= ROW_MERGE_RESERVE_SIZE;
- }
-
- *b++ = 0;
- UNIV_MEM_ASSERT_RW(&block[0], b - &block[0]);
- UNIV_MEM_ASSERT_W(&block[0], srv_sort_buf_size);
-
-#ifdef UNIV_DEBUG_VALGRIND
- /* The rest of the block is uninitialized. Initialize it
- to avoid bogus warnings. */
- memset(b, 0xff, &block[srv_sort_buf_size] - b);
-#endif /* UNIV_DEBUG_VALGRIND */
-
- if (!row_merge_write(fd, (*foffs)++, block,
- crypt_data, crypt_block, space)) {
- return(NULL);
- }
-
- UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
-
- return(&block[0]);
-}
-
-/** Create a temporary file if it has not been created already.
-@param[in,out] tmpfd temporary file handle
-@param[in] path path to create temporary file
-@return file descriptor, or -1 on failure */
-static MY_ATTRIBUTE((warn_unused_result))
-int
-row_merge_tmpfile_if_needed(
- int* tmpfd,
- const char* path)
-{
- if (*tmpfd < 0) {
- *tmpfd = row_merge_file_create_low(path);
- }
-
- return(*tmpfd);
-}
-
-/** Create a temporary file for merge sort if it was not created already.
-@param[in,out] file merge file structure
-@param[in,out] tmpfd temporary file structure
-@param[in] nrec number of records in the file
-@param[in] path path to create temporary files
-@return file descriptor, or -1 on failure */
-static MY_ATTRIBUTE((warn_unused_result))
-int
-row_merge_file_create_if_needed(
- merge_file_t* file,
- int* tmpfd,
- ulint nrec,
- const char* path)
-{
- ut_ad(file->fd < 0 || *tmpfd >=0);
- if (file->fd < 0 && row_merge_file_create(file, path) >= 0) {
- if (row_merge_tmpfile_if_needed(tmpfd, path) < 0) {
- return(-1);
- }
-
- file->n_rec = nrec;
- }
-
- ut_ad(file->fd < 0 || *tmpfd >=0);
- return(file->fd);
-}
-
-/** Reads clustered index of the table and create temporary files
-containing the index entries for the indexes to be built.
-@param[in] trx transaction
-@param[in,out] table MySQL table object, for reporting erroneous
- records
-@param[in] old_table table where rows are read from
-@param[in] new_table table where indexes are created; identical to
- old_table unless creating a PRIMARY KEY
-@param[in] online true if creating indexes online
-@param[in] index indexes to be created
-@param[in] fts_sort_idx full-text index to be created, or NULL
-@param[in] psort_info parallel sort info for fts_sort_idx creation,
- or NULL
-@param[in] files temporary files
-@param[in] key_numbers MySQL key numbers to create
-@param[in] n_index number of indexes to create
-@param[in] add_cols default values of added columns, or NULL
-@param[in] col_map mapping of old column numbers to new ones, or
- NULL if old_table == new_table
-@param[in] add_autoinc number of added AUTO_INCREMENT columns, or
- ULINT_UNDEFINED if none is added
-@param[in,out] sequence autoinc sequence
-@param[in,out] block file buffer
-@param[in,out] tmpfd temporary file handle
-return DB_SUCCESS or error */
-static MY_ATTRIBUTE((nonnull(1,2,3,4,6,9,10,16), warn_unused_result))
-dberr_t
-row_merge_read_clustered_index(
- trx_t* trx,
- struct TABLE* table,
- const dict_table_t* old_table,
- const dict_table_t* new_table,
- bool online,
- dict_index_t** index,
- dict_index_t* fts_sort_idx,
- fts_psort_t* psort_info,
- merge_file_t* files,
- const ulint* key_numbers,
- ulint n_index,
- const dtuple_t* add_cols,
- const ulint* col_map,
- ulint add_autoinc,
- ib_sequence_t& sequence,
- row_merge_block_t* block,
- int* tmpfd,
- float pct_cost, /*!< in: percent of task weight
- out of total alter job */
- fil_space_crypt_t* crypt_data,/*!< in: crypt data or NULL */
- row_merge_block_t* crypt_block)/*!< in: in/out: crypted file
- buffer */
-{
- dict_index_t* clust_index; /* Clustered index */
- mem_heap_t* row_heap; /* Heap memory to create
- clustered index tuples */
- row_merge_buf_t** merge_buf; /* Temporary list for records*/
- btr_pcur_t pcur; /* Cursor on the clustered
- index */
- mtr_t mtr; /* Mini transaction */
- dberr_t err = DB_SUCCESS;/* Return code */
- ulint n_nonnull = 0; /* number of columns
- changed to NOT NULL */
- ulint* nonnull = NULL; /* NOT NULL columns */
- dict_index_t* fts_index = NULL;/* FTS index */
- doc_id_t doc_id = 0;
- doc_id_t max_doc_id = 0;
- ibool add_doc_id = FALSE;
- os_event_t fts_parallel_sort_event = NULL;
- ibool fts_pll_sort = FALSE;
- ib_int64_t sig_count = 0;
- mem_heap_t* conv_heap = NULL;
-
- float curr_progress = 0.0;
- ib_int64_t read_rows = 0;
- ib_int64_t table_total_rows = 0;
-
- DBUG_ENTER("row_merge_read_clustered_index");
-
- ut_ad((old_table == new_table) == !col_map);
- ut_ad(!add_cols || col_map);
-
- table_total_rows = dict_table_get_n_rows(old_table);
- if(table_total_rows == 0) {
- /* We don't know total row count */
- table_total_rows = 1;
- }
-
- trx->op_info = "reading clustered index";
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- DEBUG_FTS_SORT_PRINT("FTS_SORT: Start Create Index\n");
-#endif
-
- ut_ad(trx->mysql_thd != NULL);
- const char* path = thd_innodb_tmpdir(trx->mysql_thd);
-
- /* Create and initialize memory for record buffers */
-
- merge_buf = static_cast<row_merge_buf_t**>(
- mem_alloc(n_index * sizeof *merge_buf));
-
- for (ulint i = 0; i < n_index; i++) {
- if (index[i]->type & DICT_FTS) {
-
- /* We are building a FT index, make sure
- we have the temporary 'fts_sort_idx' */
- ut_a(fts_sort_idx);
-
- fts_index = index[i];
-
- merge_buf[i] = row_merge_buf_create(fts_sort_idx);
-
- add_doc_id = DICT_TF2_FLAG_IS_SET(
- new_table, DICT_TF2_FTS_ADD_DOC_ID);
-
- /* If Doc ID does not exist in the table itself,
- fetch the first FTS Doc ID */
- if (add_doc_id) {
- fts_get_next_doc_id(
- (dict_table_t*) new_table,
- &doc_id);
- ut_ad(doc_id > 0);
- }
-
- fts_pll_sort = TRUE;
- row_fts_start_psort(psort_info);
- fts_parallel_sort_event =
- psort_info[0].psort_common->sort_event;
- } else {
- merge_buf[i] = row_merge_buf_create(index[i]);
- }
- }
-
- mtr_start(&mtr);
-
- /* Find the clustered index and create a persistent cursor
- based on that. */
-
- clust_index = dict_table_get_first_index(old_table);
-
- btr_pcur_open_at_index_side(
- true, clust_index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
-
- if (old_table != new_table) {
- /* The table is being rebuilt. Identify the columns
- that were flagged NOT NULL in the new table, so that
- we can quickly check that the records in the old table
- do not violate the added NOT NULL constraints. */
-
- nonnull = static_cast<ulint*>(
- mem_alloc(dict_table_get_n_cols(new_table)
- * sizeof *nonnull));
-
- for (ulint i = 0; i < dict_table_get_n_cols(old_table); i++) {
- if (dict_table_get_nth_col(old_table, i)->prtype
- & DATA_NOT_NULL) {
- continue;
- }
-
- const ulint j = col_map[i];
-
- if (j == ULINT_UNDEFINED) {
- /* The column was dropped. */
- continue;
- }
-
- if (dict_table_get_nth_col(new_table, j)->prtype
- & DATA_NOT_NULL) {
- nonnull[n_nonnull++] = j;
- }
- }
-
- if (!n_nonnull) {
- mem_free(nonnull);
- nonnull = NULL;
- }
- }
-
- row_heap = mem_heap_create(sizeof(mrec_buf_t));
-
- if (dict_table_is_comp(old_table)
- && !dict_table_is_comp(new_table)) {
- conv_heap = mem_heap_create(sizeof(mrec_buf_t));
- }
-
- /* Scan the clustered index. */
- for (;;) {
- const rec_t* rec;
- ulint* offsets;
- const dtuple_t* row;
- row_ext_t* ext;
- page_cur_t* cur = btr_pcur_get_page_cur(&pcur);
-
- /* Do not continue if table pages are still encrypted */
- if (!old_table->is_readable() ||
- !new_table->is_readable()) {
- err = DB_DECRYPTION_FAILED;
- trx->error_key_num = 0;
- goto func_exit;
- }
-
- page_cur_move_to_next(cur);
-
- if (page_cur_is_after_last(cur)) {
- if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
- err = DB_INTERRUPTED;
- trx->error_key_num = 0;
- goto func_exit;
- }
-
- if (online && old_table != new_table) {
- err = row_log_table_get_error(clust_index);
- if (err != DB_SUCCESS) {
- trx->error_key_num = 0;
- goto func_exit;
- }
- }
-#ifdef DBUG_OFF
-# define dbug_run_purge false
-#else /* DBUG_OFF */
- bool dbug_run_purge = false;
-#endif /* DBUG_OFF */
- DBUG_EXECUTE_IF(
- "ib_purge_on_create_index_page_switch",
- dbug_run_purge = true;);
-
- if (dbug_run_purge
- || rw_lock_get_waiters(
- dict_index_get_lock(clust_index))) {
- /* There are waiters on the clustered
- index tree lock, likely the purge
- thread. Store and restore the cursor
- position, and yield so that scanning a
- large table will not starve other
- threads. */
-
- /* Store the cursor position on the last user
- record on the page. */
- btr_pcur_move_to_prev_on_page(&pcur);
- /* Leaf pages must never be empty, unless
- this is the only page in the index tree. */
- ut_ad(btr_pcur_is_on_user_rec(&pcur)
- || buf_block_get_page_no(
- btr_pcur_get_block(&pcur))
- == clust_index->page);
-
- btr_pcur_store_position(&pcur, &mtr);
- mtr_commit(&mtr);
-
- if (dbug_run_purge) {
- /* This is for testing
- purposes only (see
- DBUG_EXECUTE_IF above). We
- signal the purge thread and
- hope that the purge batch will
- complete before we execute
- btr_pcur_restore_position(). */
- trx_purge_run();
- os_thread_sleep(1000000);
- }
-
- /* Give the waiters a chance to proceed. */
- os_thread_yield();
-
- mtr_start(&mtr);
- /* Restore position on the record, or its
- predecessor if the record was purged
- meanwhile. */
- btr_pcur_restore_position(
- BTR_SEARCH_LEAF, &pcur, &mtr);
- /* Move to the successor of the
- original record. */
- if (!btr_pcur_move_to_next_user_rec(
- &pcur, &mtr)) {
-end_of_index:
- row = NULL;
- mtr_commit(&mtr);
- mem_heap_free(row_heap);
- if (nonnull) {
- mem_free(nonnull);
- }
- goto write_buffers;
- }
- } else {
- ulint next_page_no;
- buf_block_t* block;
-
- next_page_no = btr_page_get_next(
- page_cur_get_page(cur), &mtr);
-
- if (next_page_no == FIL_NULL) {
- goto end_of_index;
- }
-
- block = page_cur_get_block(cur);
- block = btr_block_get(
- buf_block_get_space(block),
- buf_block_get_zip_size(block),
- next_page_no, BTR_SEARCH_LEAF,
- clust_index, &mtr);
-
- btr_leaf_page_release(page_cur_get_block(cur),
- BTR_SEARCH_LEAF, &mtr);
- page_cur_set_before_first(block, cur);
- page_cur_move_to_next(cur);
-
- ut_ad(!page_cur_is_after_last(cur));
- }
- }
-
- rec = page_cur_get_rec(cur);
-
- SRV_CORRUPT_TABLE_CHECK(rec,
- {
- err = DB_CORRUPTION;
- goto func_exit;
- });
-
- offsets = rec_get_offsets(rec, clust_index, NULL,
- ULINT_UNDEFINED, &row_heap);
-
- if (online) {
- /* Perform a REPEATABLE READ.
-
- When rebuilding the table online,
- row_log_table_apply() must not see a newer
- state of the table when applying the log.
- This is mainly to prevent false duplicate key
- errors, because the log will identify records
- by the PRIMARY KEY, and also to prevent unsafe
- BLOB access.
-
- When creating a secondary index online, this
- table scan must not see records that have only
- been inserted to the clustered index, but have
- not been written to the online_log of
- index[]. If we performed READ UNCOMMITTED, it
- could happen that the ADD INDEX reaches
- ONLINE_INDEX_COMPLETE state between the time
- the DML thread has updated the clustered index
- but has not yet accessed secondary index. */
- ut_ad(trx->read_view);
-
- if (!read_view_sees_trx_id(
- trx->read_view,
- row_get_rec_trx_id(
- rec, clust_index, offsets))) {
- rec_t* old_vers;
-
- row_vers_build_for_consistent_read(
- rec, &mtr, clust_index, &offsets,
- trx->read_view, &row_heap,
- row_heap, &old_vers);
-
- rec = old_vers;
-
- if (!rec) {
- continue;
- }
- }
-
- if (rec_get_deleted_flag(
- rec,
- dict_table_is_comp(old_table))) {
- /* This record was deleted in the latest
- committed version, or it was deleted and
- then reinserted-by-update before purge
- kicked in. Skip it. */
- continue;
- }
-
- ut_ad(!rec_offs_any_null_extern(rec, offsets));
- } else if (rec_get_deleted_flag(
- rec, dict_table_is_comp(old_table))) {
- /* Skip delete-marked records.
-
- Skipping delete-marked records will make the
- created indexes unuseable for transactions
- whose read views were created before the index
- creation completed, but preserving the history
- would make it tricky to detect duplicate
- keys. */
- continue;
- }
-
- /* When !online, we are holding a lock on old_table, preventing
- any inserts that could have written a record 'stub' before
- writing out off-page columns. */
- ut_ad(!rec_offs_any_null_extern(rec, offsets));
-
- /* Build a row based on the clustered index. */
-
- row = row_build(ROW_COPY_POINTERS, clust_index,
- rec, offsets, new_table,
- add_cols, col_map, &ext, row_heap);
- ut_ad(row);
-
- for (ulint i = 0; i < n_nonnull; i++) {
- const dfield_t* field = &row->fields[nonnull[i]];
-
- ut_ad(dfield_get_type(field)->prtype & DATA_NOT_NULL);
-
- if (dfield_is_null(field)) {
- err = DB_INVALID_NULL;
- trx->error_key_num = 0;
- goto func_exit;
- }
- }
-
- /* Get the next Doc ID */
- if (add_doc_id) {
- doc_id++;
- } else {
- doc_id = 0;
- }
-
- if (add_autoinc != ULINT_UNDEFINED) {
-
- ut_ad(add_autoinc
- < dict_table_get_n_user_cols(new_table));
-
- const dfield_t* dfield;
-
- dfield = dtuple_get_nth_field(row, add_autoinc);
- if (dfield_is_null(dfield)) {
- goto write_buffers;
- }
-
- const dtype_t* dtype = dfield_get_type(dfield);
- byte* b = static_cast<byte*>(dfield_get_data(dfield));
-
- if (sequence.eof()) {
- err = DB_ERROR;
- trx->error_key_num = 0;
-
- ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_AUTOINC_READ_FAILED, "[NULL]");
-
- goto func_exit;
- }
-
- ulonglong value = sequence++;
-
- switch (dtype_get_mtype(dtype)) {
- case DATA_INT: {
- ibool usign;
- ulint len = dfield_get_len(dfield);
-
- usign = dtype_get_prtype(dtype) & DATA_UNSIGNED;
- mach_write_ulonglong(b, value, len, usign);
-
- break;
- }
-
- case DATA_FLOAT:
- mach_float_write(
- b, static_cast<float>(value));
- break;
-
- case DATA_DOUBLE:
- mach_double_write(
- b, static_cast<double>(value));
- break;
-
- default:
- ut_ad(0);
- }
- }
-
-write_buffers:
- /* Build all entries for all the indexes to be created
- in a single scan of the clustered index. */
-
- for (ulint i = 0; i < n_index; i++) {
- row_merge_buf_t* buf = merge_buf[i];
- merge_file_t* file = &files[i];
- ulint rows_added = 0;
- bool exceed_page = false;
-
- if (UNIV_LIKELY
- (row && (rows_added = row_merge_buf_add(
- buf, fts_index, old_table,
- psort_info, row, ext, &doc_id,
- conv_heap, &exceed_page, trx)))) {
-
- /* If we are creating FTS index,
- a single row can generate more
- records for tokenized word */
- file->n_rec += rows_added;
-
- if (exceed_page) {
- err = DB_TOO_BIG_RECORD;
- break;
- }
-
- if (doc_id > max_doc_id) {
- max_doc_id = doc_id;
- }
-
- if (buf->index->type & DICT_FTS) {
- /* Check if error occurs in child thread */
- for (ulint j = 0; j < fts_sort_pll_degree; j++) {
- if (psort_info[j].error != DB_SUCCESS) {
- err = psort_info[j].error;
- trx->error_key_num = i;
- break;
- }
- }
-
- if (err != DB_SUCCESS) {
- break;
- }
- }
-
- continue;
- }
-
- if (buf->index->type & DICT_FTS) {
- if (!row || !doc_id) {
- continue;
- }
- }
-
- /* The buffer must be sufficiently large
- to hold at least one record. It may only
- be empty when we reach the end of the
- clustered index. row_merge_buf_add()
- must not have been called in this loop. */
- ut_ad(buf->n_tuples || row == NULL);
-
- /* We have enough data tuples to form a block.
- Sort them and write to disk. */
-
- if (buf->n_tuples) {
- if (dict_index_is_unique(buf->index)) {
- row_merge_dup_t dup = {
- buf->index, table, col_map, 0};
-
- row_merge_buf_sort(buf, &dup);
-
- if (dup.n_dup) {
- err = DB_DUPLICATE_KEY;
- trx->error_key_num
- = key_numbers[i];
- break;
- }
- } else {
- row_merge_buf_sort(buf, NULL);
- }
- } else if (online && new_table == old_table) {
- /* Note the newest transaction that
- modified this index when the scan was
- completed. We prevent older readers
- from accessing this index, to ensure
- read consistency. */
-
- trx_id_t max_trx_id;
-
- ut_a(row == NULL);
- rw_lock_x_lock(
- dict_index_get_lock(buf->index));
- ut_a(dict_index_get_online_status(buf->index)
- == ONLINE_INDEX_CREATION);
-
- max_trx_id = row_log_get_max_trx(buf->index);
-
- if (max_trx_id > buf->index->trx_id) {
- buf->index->trx_id = max_trx_id;
- }
-
- rw_lock_x_unlock(
- dict_index_get_lock(buf->index));
- }
-
- if (buf->n_tuples > 0) {
-
- if (row_merge_file_create_if_needed(
- file, tmpfd, buf->n_tuples, path) < 0) {
- err = DB_OUT_OF_MEMORY;
- trx->error_key_num = i;
- break;
- }
-
- ut_ad(file->n_rec > 0);
-
- row_merge_buf_write(buf, file, block);
-
- if (!row_merge_write(file->fd, file->offset++,
- block, crypt_data, crypt_block,
- new_table->space)) {
- err = DB_TEMP_FILE_WRITE_FAILURE;
- trx->error_key_num = i;
- break;
- }
- }
-
- UNIV_MEM_INVALID(&block[0], srv_sort_buf_size);
-
- merge_buf[i] = row_merge_buf_empty(buf);
-
- if (UNIV_LIKELY(row != NULL)) {
- /* Try writing the record again, now
- that the buffer has been written out
- and emptied. */
-
- if (UNIV_UNLIKELY
- (!(rows_added = row_merge_buf_add(
- buf, fts_index, old_table,
- psort_info, row, ext,
- &doc_id, conv_heap,
- &exceed_page, trx)))) {
- /* An empty buffer should have enough
- room for at least one record. */
- ut_error;
- }
-
- if (exceed_page) {
- err = DB_TOO_BIG_RECORD;
- break;
- }
-
- file->n_rec += rows_added;
- }
- }
-
- if (row == NULL) {
- goto all_done;
- }
-
- if (err != DB_SUCCESS) {
- goto func_exit;
- }
-
- mem_heap_empty(row_heap);
-
- /* Increment innodb_onlineddl_pct_progress status variable */
- read_rows++;
- if(read_rows % 1000 == 0) {
- /* Update progress for each 1000 rows */
- curr_progress = (read_rows >= table_total_rows) ?
- pct_cost :
- ((pct_cost * read_rows) / table_total_rows);
- /* presenting 10.12% as 1012 integer */
- onlineddl_pct_progress = (ulint) (curr_progress * 100);
- }
- }
-
-func_exit:
- mtr_commit(&mtr);
-
- mem_heap_free(row_heap);
-
- if (nonnull) {
- mem_free(nonnull);
- }
-
-all_done:
- if (conv_heap != NULL) {
- mem_heap_free(conv_heap);
- }
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Scan Table\n");
-#endif
- if (fts_pll_sort) {
- bool all_exit = false;
- ulint trial_count = 0;
- const ulint max_trial_count = 10000;
-
-wait_again:
- /* Check if error occurs in child thread */
- for (ulint j = 0; j < fts_sort_pll_degree; j++) {
- if (psort_info[j].error != DB_SUCCESS) {
- err = psort_info[j].error;
- trx->error_key_num = j;
- break;
- }
- }
-
- /* Tell all children that parent has done scanning */
- for (ulint i = 0; i < fts_sort_pll_degree; i++) {
- if (err == DB_SUCCESS) {
- psort_info[i].state = FTS_PARENT_COMPLETE;
- } else {
- psort_info[i].state = FTS_PARENT_EXITING;
- }
- }
-
- /* Now wait all children to report back to be completed */
- os_event_wait_time_low(fts_parallel_sort_event,
- 1000000, sig_count);
-
- for (ulint i = 0; i < fts_sort_pll_degree; i++) {
- if (psort_info[i].child_status != FTS_CHILD_COMPLETE
- && psort_info[i].child_status != FTS_CHILD_EXITING) {
- sig_count = os_event_reset(
- fts_parallel_sort_event);
- goto wait_again;
- }
- }
-
- /* Now all children should complete, wait a bit until
- they all finish setting the event, before we free everything.
- This has a 10 second timeout */
- do {
- all_exit = true;
-
- for (ulint j = 0; j < fts_sort_pll_degree; j++) {
- if (psort_info[j].child_status
- != FTS_CHILD_EXITING) {
- all_exit = false;
- os_thread_sleep(1000);
- break;
- }
- }
- trial_count++;
- } while (!all_exit && trial_count < max_trial_count);
-
- if (!all_exit) {
- ut_ad(0);
- ib_logf(IB_LOG_LEVEL_FATAL,
- "Not all child sort threads exited"
- " when creating FTS index '%s'",
- fts_sort_idx->name);
- }
- }
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Tokenization\n");
-#endif
- for (ulint i = 0; i < n_index; i++) {
- row_merge_buf_free(merge_buf[i]);
- }
-
- row_fts_free_pll_merge_buf(psort_info);
-
- mem_free(merge_buf);
-
- btr_pcur_close(&pcur);
-
- /* Update the next Doc ID we used. Table should be locked, so
- no concurrent DML */
- if (max_doc_id && err == DB_SUCCESS) {
- /* Sync fts cache for other fts indexes to keep all
- fts indexes consistent in sync_doc_id. */
- err = fts_sync_table(const_cast<dict_table_t*>(new_table),
- false, true, false);
-
- if (err == DB_SUCCESS) {
- fts_update_next_doc_id(
- 0, new_table, old_table->name, max_doc_id);
- }
- }
-
- trx->op_info = "";
-
- DBUG_RETURN(err);
-}
-
-/** Write a record via buffer 2 and read the next record to buffer N.
-@param N number of the buffer (0 or 1)
-@param INDEX record descriptor
-@param AT_END statement to execute at end of input */
-#define ROW_MERGE_WRITE_GET_NEXT(N, INDEX, AT_END) \
- do { \
- b2 = row_merge_write_rec(&block[2 * srv_sort_buf_size], \
- &buf[2], b2, \
- of->fd, &of->offset, \
- mrec##N, offsets##N, \
- crypt_data, \
- crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL , \
- space); \
- if (UNIV_UNLIKELY(!b2 || ++of->n_rec > file->n_rec)) { \
- goto corrupt; \
- } \
- b##N = row_merge_read_rec(&block[N * srv_sort_buf_size],\
- &buf[N], b##N, INDEX, \
- file->fd, foffs##N, \
- &mrec##N, offsets##N, \
- crypt_data, \
- crypt_block ? &crypt_block[N * srv_sort_buf_size] : NULL, \
- space); \
- \
- if (UNIV_UNLIKELY(!b##N)) { \
- if (mrec##N) { \
- goto corrupt; \
- } \
- AT_END; \
- } \
- } while (0)
-
-/*************************************************************//**
-Merge two blocks of records on disk and write a bigger block.
-@return DB_SUCCESS or error code */
-static __attribute__((nonnull(1,2,3,4,5,6), warn_unused_result))
-dberr_t
-row_merge_blocks(
-/*=============*/
- const row_merge_dup_t* dup, /*!< in: descriptor of
- index being created */
- const merge_file_t* file, /*!< in: file containing
- index entries */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- ulint* foffs0, /*!< in/out: offset of first
- source list in the file */
- ulint* foffs1, /*!< in/out: offset of second
- source list in the file */
- merge_file_t* of, /*!< in/out: output file */
- fil_space_crypt_t* crypt_data,/*!< in: crypt data or NULL */
- row_merge_block_t* crypt_block,/*!< in: in/out: crypted file
- buffer */
- ulint space) /*!< in: space id */
-{
- mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
-
- mrec_buf_t* buf; /*!< buffer for handling
- split mrec in block[] */
- const byte* b0; /*!< pointer to block[0] */
- const byte* b1; /*!< pointer to block[srv_sort_buf_size] */
- byte* b2; /*!< pointer to block[2 * srv_sort_buf_size] */
- const mrec_t* mrec0; /*!< merge rec, points to block[0] or buf[0] */
- const mrec_t* mrec1; /*!< merge rec, points to
- block[srv_sort_buf_size] or buf[1] */
- ulint* offsets0;/* offsets of mrec0 */
- ulint* offsets1;/* offsets of mrec1 */
-
-#ifdef UNIV_DEBUG
- if (row_merge_print_block) {
- fprintf(stderr,
- "row_merge_blocks fd=%d ofs=" ULINTPF
- " + fd=%d ofs=" ULINTPF
- " = fd=%d ofs=" ULINTPF "\n",
- file->fd, *foffs0,
- file->fd, *foffs1,
- of->fd, of->offset);
- }
-#endif /* UNIV_DEBUG */
-
- heap = row_merge_heap_create(dup->index, &buf, &offsets0, &offsets1);
-
- /* Write a record and read the next record. Split the output
- file in two halves, which can be merged on the following pass. */
-
- if (!row_merge_read(file->fd, *foffs0, &block[0],
- crypt_data, crypt_block ? &crypt_block[0] : NULL, space)
- || !row_merge_read(file->fd, *foffs1, &block[srv_sort_buf_size],
- crypt_data, crypt_block ? &crypt_block[srv_sort_buf_size] : NULL, space)) {
-corrupt:
- mem_heap_free(heap);
- return(DB_CORRUPTION);
- }
-
- b0 = &block[0];
- b1 = &block[srv_sort_buf_size];
- b2 = &block[2 * srv_sort_buf_size];
-
- b0 = row_merge_read_rec(
- &block[0], &buf[0], b0, dup->index,
- file->fd, foffs0, &mrec0, offsets0,
- crypt_data, crypt_block ? &crypt_block[0] : NULL, space);
-
- b1 = row_merge_read_rec(
- &block[srv_sort_buf_size],
- &buf[srv_sort_buf_size], b1, dup->index,
- file->fd, foffs1, &mrec1, offsets1,
- crypt_data, crypt_block ? &crypt_block[srv_sort_buf_size] : NULL, space);
-
- if (UNIV_UNLIKELY(!b0 && mrec0)
- || UNIV_UNLIKELY(!b1 && mrec1)) {
-
- goto corrupt;
- }
-
- while (mrec0 && mrec1) {
- switch (cmp_rec_rec_simple(
- mrec0, mrec1, offsets0, offsets1,
- dup->index, dup->table)) {
- case 0:
- mem_heap_free(heap);
- return(DB_DUPLICATE_KEY);
- case -1:
- ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto merged);
- break;
- case 1:
- ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto merged);
- break;
- default:
- ut_error;
- }
- }
-
-merged:
- if (mrec0) {
- /* append all mrec0 to output */
- for (;;) {
- ROW_MERGE_WRITE_GET_NEXT(0, dup->index, goto done0);
- }
- }
-done0:
- if (mrec1) {
- /* append all mrec1 to output */
- for (;;) {
- ROW_MERGE_WRITE_GET_NEXT(1, dup->index, goto done1);
- }
- }
-done1:
-
- mem_heap_free(heap);
-
- b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size],
- b2, of->fd, &of->offset,
- crypt_data, crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL, space);
-
- return(b2 ? DB_SUCCESS : DB_CORRUPTION);
-}
-
-/*************************************************************//**
-Copy a block of index entries.
-@return TRUE on success, FALSE on failure */
-static __attribute__((nonnull(1,2,3,4,5), warn_unused_result))
-ibool
-row_merge_blocks_copy(
-/*==================*/
- const dict_index_t* index, /*!< in: index being created */
- const merge_file_t* file, /*!< in: input file */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- ulint* foffs0, /*!< in/out: input file offset */
- merge_file_t* of, /*!< in/out: output file */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- mem_heap_t* heap; /*!< memory heap for offsets0, offsets1 */
-
- mrec_buf_t* buf; /*!< buffer for handling
- split mrec in block[] */
- const byte* b0; /*!< pointer to block[0] */
- byte* b2; /*!< pointer to block[2 * srv_sort_buf_size] */
- const mrec_t* mrec0; /*!< merge rec, points to block[0] */
- ulint* offsets0;/* offsets of mrec0 */
- ulint* offsets1;/* dummy offsets */
-
-#ifdef UNIV_DEBUG
- if (row_merge_print_block) {
- fprintf(stderr,
- "row_merge_blocks_copy fd=%d ofs=" ULINTPF
- " = fd=%d ofs=" ULINTPF "\n",
- file->fd, *foffs0,
- of->fd, of->offset);
- }
-#endif /* UNIV_DEBUG */
-
- heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
-
- /* Write a record and read the next record. Split the output
- file in two halves, which can be merged on the following pass. */
-
- if (!row_merge_read(file->fd, *foffs0, &block[0],
- crypt_data, crypt_block ? &crypt_block[0] : NULL, space)) {
-corrupt:
- mem_heap_free(heap);
- return(FALSE);
- }
-
- b0 = &block[0];
-
- b2 = &block[2 * srv_sort_buf_size];
-
- b0 = row_merge_read_rec(&block[0], &buf[0], b0, index,
- file->fd, foffs0, &mrec0, offsets0,
- crypt_data, crypt_block ? &crypt_block[0] : NULL, space);
-
- if (UNIV_UNLIKELY(!b0 && mrec0)) {
-
- goto corrupt;
- }
-
- if (mrec0) {
- /* append all mrec0 to output */
- for (;;) {
- ROW_MERGE_WRITE_GET_NEXT(0, index, goto done0);
- }
- }
-done0:
-
- /* The file offset points to the beginning of the last page
- that has been read. Update it to point to the next block. */
- (*foffs0)++;
-
- mem_heap_free(heap);
-
- return(row_merge_write_eof(&block[2 * srv_sort_buf_size],
- b2, of->fd, &of->offset,
- crypt_data,
- crypt_block ? &crypt_block[2 * srv_sort_buf_size] : NULL, space)
- != NULL);
-}
-
-/*************************************************************//**
-Merge disk files.
-@return DB_SUCCESS or error code */
-static __attribute__((nonnull(1,2,3,4,5,6,7)))
-dberr_t
-row_merge(
-/*======*/
- trx_t* trx, /*!< in: transaction */
- const row_merge_dup_t* dup, /*!< in: descriptor of
- index being created */
- merge_file_t* file, /*!< in/out: file containing
- index entries */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- int* tmpfd, /*!< in/out: temporary file handle */
- ulint* num_run,/*!< in/out: Number of runs remain
- to be merged */
- ulint* run_offset, /*!< in/out: Array contains the
- first offset number for each merge
- run */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- ulint foffs0; /*!< first input offset */
- ulint foffs1; /*!< second input offset */
- dberr_t error; /*!< error code */
- merge_file_t of; /*!< output file */
- const ulint ihalf = run_offset[*num_run / 2];
- /*!< half the input file */
- ulint n_run = 0;
- /*!< num of runs generated from this merge */
-
- UNIV_MEM_ASSERT_W(&block[0], 3 * srv_sort_buf_size);
-
- if (crypt_block) {
- UNIV_MEM_ASSERT_W(&crypt_block[0], 3 * srv_sort_buf_size);
- }
-
- ut_ad(ihalf < file->offset);
-
- of.fd = *tmpfd;
- of.offset = 0;
- of.n_rec = 0;
-
-#ifdef POSIX_FADV_SEQUENTIAL
- /* The input file will be read sequentially, starting from the
- beginning and the middle. In Linux, the POSIX_FADV_SEQUENTIAL
- affects the entire file. Each block will be read exactly once. */
- posix_fadvise(file->fd, 0, 0,
- POSIX_FADV_SEQUENTIAL | POSIX_FADV_NOREUSE);
-#endif /* POSIX_FADV_SEQUENTIAL */
-
- /* Merge blocks to the output file. */
- foffs0 = 0;
- foffs1 = ihalf;
-
- UNIV_MEM_INVALID(run_offset, *num_run * sizeof *run_offset);
-
- for (; foffs0 < ihalf && foffs1 < file->offset; foffs0++, foffs1++) {
-
- if (trx_is_interrupted(trx)) {
- return(DB_INTERRUPTED);
- }
-
- /* Remember the offset number for this run */
- run_offset[n_run++] = of.offset;
-
- error = row_merge_blocks(dup, file, block,
- &foffs0, &foffs1, &of,
- crypt_data, crypt_block, space);
-
- if (error != DB_SUCCESS) {
- return(error);
- }
-
- }
-
- /* Copy the last blocks, if there are any. */
-
- while (foffs0 < ihalf) {
-
- if (UNIV_UNLIKELY(trx_is_interrupted(trx))) {
- return(DB_INTERRUPTED);
- }
-
- /* Remember the offset number for this run */
- run_offset[n_run++] = of.offset;
-
- if (!row_merge_blocks_copy(dup->index, file, block,
- &foffs0, &of,
- crypt_data, crypt_block, space)) {
- return(DB_CORRUPTION);
- }
- }
-
- ut_ad(foffs0 == ihalf);
-
- while (foffs1 < file->offset) {
-
- if (trx_is_interrupted(trx)) {
- return(DB_INTERRUPTED);
- }
-
- /* Remember the offset number for this run */
- run_offset[n_run++] = of.offset;
-
- if (!row_merge_blocks_copy(dup->index, file, block,
- &foffs1, &of,
- crypt_data, crypt_block, space)) {
- return(DB_CORRUPTION);
- }
- }
-
- ut_ad(foffs1 == file->offset);
-
- if (UNIV_UNLIKELY(of.n_rec != file->n_rec)) {
- return(DB_CORRUPTION);
- }
-
- ut_ad(n_run <= *num_run);
-
- *num_run = n_run;
-
- /* Each run can contain one or more offsets. As merge goes on,
- the number of runs (to merge) will reduce until we have one
- single run. So the number of runs will always be smaller than
- the number of offsets in file */
- ut_ad((*num_run) <= file->offset);
-
- /* The number of offsets in output file is always equal or
- smaller than input file */
- ut_ad(of.offset <= file->offset);
-
- /* Swap file descriptors for the next pass. */
- *tmpfd = file->fd;
- *file = of;
-
- UNIV_MEM_INVALID(&block[0], 3 * srv_sort_buf_size);
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************//**
-Merge disk files.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_merge_sort(
-/*===========*/
- trx_t* trx, /*!< in: transaction */
- const row_merge_dup_t* dup, /*!< in: descriptor of
- index being created */
- merge_file_t* file, /*!< in/out: file containing
- index entries */
- row_merge_block_t* block, /*!< in/out: 3 buffers */
- int* tmpfd, /*!< in/out: temporary file handle
- */
- const bool update_progress,
- /*!< in: update progress
- status variable or not */
- const float pct_progress,
- /*!< in: total progress percent
- until now */
- const float pct_cost, /*!< in: current progress percent */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- const ulint half = file->offset / 2;
- ulint num_runs;
- ulint* run_offset;
- dberr_t error = DB_SUCCESS;
- ulint merge_count = 0;
- ulint total_merge_sort_count;
- float curr_progress = 0;
-
- DBUG_ENTER("row_merge_sort");
-
- /* Record the number of merge runs we need to perform */
- num_runs = file->offset;
-
- /* Find the number N which 2^N is greater or equal than num_runs */
- /* N is merge sort running count */
- total_merge_sort_count = (ulint) ceil(my_log2f(num_runs));
- if(total_merge_sort_count <= 0) {
- total_merge_sort_count=1;
- }
-
- /* If num_runs are less than 1, nothing to merge */
- if (num_runs <= 1) {
- DBUG_RETURN(error);
- }
-
- /* "run_offset" records each run's first offset number */
- run_offset = (ulint*) mem_alloc(file->offset * sizeof(ulint));
-
- /* This tells row_merge() where to start for the first round
- of merge. */
- run_offset[half] = half;
-
- /* The file should always contain at least one byte (the end
- of file marker). Thus, it must be at least one block. */
- ut_ad(file->offset > 0);
-
- /* Progress report only for "normal" indexes. */
- if (!(dup->index->type & DICT_FTS)) {
- thd_progress_init(trx->mysql_thd, 1);
- }
-
- if (global_system_variables.log_warnings > 2) {
- sql_print_information("InnoDB: Online DDL : merge-sorting"
- " has estimated " ULINTPF " runs",
- num_runs);
- }
-
- /* Merge the runs until we have one big run */
- do {
- /* Report progress of merge sort to MySQL for
- show processlist progress field */
- /* Progress report only for "normal" indexes. */
- if (!(dup->index->type & DICT_FTS)) {
- thd_progress_report(trx->mysql_thd, file->offset - num_runs, file->offset);
- }
-
- error = row_merge(trx, dup, file, block, tmpfd,
- &num_runs, run_offset,
- crypt_data, crypt_block, space);
-
- if(update_progress) {
- merge_count++;
- curr_progress = (merge_count >= total_merge_sort_count) ?
- pct_cost :
- ((pct_cost * merge_count) / total_merge_sort_count);
- /* presenting 10.12% as 1012 integer */;
- onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100);
- }
-
- if (error != DB_SUCCESS) {
- break;
- }
-
- UNIV_MEM_ASSERT_RW(run_offset, num_runs * sizeof *run_offset);
- } while (num_runs > 1);
-
- mem_free(run_offset);
-
- /* Progress report only for "normal" indexes. */
- if (!(dup->index->type & DICT_FTS)) {
- thd_progress_end(trx->mysql_thd);
- }
-
- DBUG_RETURN(error);
-}
-
-/*************************************************************//**
-Copy externally stored columns to the data tuple. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_merge_copy_blobs(
-/*=================*/
- const mrec_t* mrec, /*!< in: merge record */
- const ulint* offsets,/*!< in: offsets of mrec */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- dtuple_t* tuple, /*!< in/out: data tuple */
- mem_heap_t* heap) /*!< in/out: memory heap */
-{
- ut_ad(rec_offs_any_extern(offsets));
-
- for (ulint i = 0; i < dtuple_get_n_fields(tuple); i++) {
- ulint len;
- const void* data;
- dfield_t* field = dtuple_get_nth_field(tuple, i);
-
- if (!dfield_is_ext(field)) {
- continue;
- }
-
- ut_ad(!dfield_is_null(field));
-
- /* During the creation of a PRIMARY KEY, the table is
- X-locked, and we skip copying records that have been
- marked for deletion. Therefore, externally stored
- columns cannot possibly be freed between the time the
- BLOB pointers are read (row_merge_read_clustered_index())
- and dereferenced (below). */
- data = btr_rec_copy_externally_stored_field(
- mrec, offsets, zip_size, i, &len, heap, NULL);
- /* Because we have locked the table, any records
- written by incomplete transactions must have been
- rolled back already. There must not be any incomplete
- BLOB columns. */
- ut_a(data);
-
- dfield_set_data(field, data, len);
- }
-}
-
-/********************************************************************//**
-Read sorted file containing index data tuples and insert these data
-tuples to the index
-@return DB_SUCCESS or error number */
-static __attribute__((nonnull(2,3,5), warn_unused_result))
-dberr_t
-row_merge_insert_index_tuples(
-/*==========================*/
- trx_id_t trx_id, /*!< in: transaction identifier */
- dict_index_t* index, /*!< in: index */
- const dict_table_t* old_table,/*!< in: old table */
- int fd, /*!< in: file descriptor */
- row_merge_block_t* block, /*!< in/out: file buffer */
- const ib_int64_t table_total_rows, /*!< in: total rows of old table */
- const float pct_progress, /*!< in: total progress percent until now */
- const float pct_cost, /*!< in: current progress percent
- */
- fil_space_crypt_t* crypt_data,/*!< in: table crypt data */
- row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */
- ulint space) /*!< in: space id */
-{
- const byte* b;
- mem_heap_t* heap;
- mem_heap_t* tuple_heap;
- mem_heap_t* ins_heap;
- dberr_t error = DB_SUCCESS;
- ulint foffs = 0;
- ulint* offsets;
- mrec_buf_t* buf;
- ib_int64_t inserted_rows = 0;
- float curr_progress;
- DBUG_ENTER("row_merge_insert_index_tuples");
-
- ut_ad(!srv_read_only_mode);
- ut_ad(!(index->type & DICT_FTS));
- ut_ad(trx_id);
-
- tuple_heap = mem_heap_create(1000);
-
- {
- ulint i = 1 + REC_OFFS_HEADER_SIZE
- + dict_index_get_n_fields(index);
- heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
- ins_heap = mem_heap_create(sizeof *buf + i * sizeof *offsets);
- offsets = static_cast<ulint*>(
- mem_heap_alloc(heap, i * sizeof *offsets));
- offsets[0] = i;
- offsets[1] = dict_index_get_n_fields(index);
- }
-
- b = &block[0];
-
- if (!row_merge_read(fd, foffs, block,
- crypt_data, crypt_block, space)) {
- error = DB_CORRUPTION;
- } else {
- buf = static_cast<mrec_buf_t*>(
- mem_heap_alloc(heap, sizeof *buf));
-
- for (;;) {
- const mrec_t* mrec;
- dtuple_t* dtuple;
- ulint n_ext;
- big_rec_t* big_rec;
- rec_t* rec;
- btr_cur_t cursor;
- mtr_t mtr;
-
- b = row_merge_read_rec(block, buf, b, index,
- fd, &foffs, &mrec, offsets,
- crypt_data, crypt_block, space);
- if (UNIV_UNLIKELY(!b)) {
- /* End of list, or I/O error */
- if (mrec) {
- error = DB_CORRUPTION;
- }
- break;
- }
-
- dict_index_t* old_index
- = dict_table_get_first_index(old_table);
-
- if (dict_index_is_clust(index)
- && dict_index_is_online_ddl(old_index)) {
- error = row_log_table_get_error(old_index);
- if (error != DB_SUCCESS) {
- break;
- }
- }
-
- dtuple = row_rec_to_index_entry_low(
- mrec, index, offsets, &n_ext, tuple_heap);
-
- if (!n_ext) {
- /* There are no externally stored columns. */
- } else {
- ut_ad(dict_index_is_clust(index));
- /* Off-page columns can be fetched safely
- when concurrent modifications to the table
- are disabled. (Purge can process delete-marked
- records, but row_merge_read_clustered_index()
- would have skipped them.)
-
- When concurrent modifications are enabled,
- row_merge_read_clustered_index() will
- only see rows from transactions that were
- committed before the ALTER TABLE started
- (REPEATABLE READ).
-
- Any modifications after the
- row_merge_read_clustered_index() scan
- will go through row_log_table_apply().
- Any modifications to off-page columns
- will be tracked by
- row_log_table_blob_alloc() and
- row_log_table_blob_free(). */
- row_merge_copy_blobs(
- mrec, offsets,
- dict_table_zip_size(old_table),
- dtuple, tuple_heap);
- }
-
- ut_ad(dtuple_validate(dtuple));
- log_free_check();
-
- mtr_start(&mtr);
- /* Insert after the last user record. */
- btr_cur_open_at_index_side(
- false, index, BTR_MODIFY_LEAF,
- &cursor, 0, &mtr);
- page_cur_position(
- page_rec_get_prev(btr_cur_get_rec(&cursor)),
- btr_cur_get_block(&cursor),
- btr_cur_get_page_cur(&cursor));
- cursor.flag = BTR_CUR_BINARY;
-#ifdef UNIV_DEBUG
- /* Check that the records are inserted in order. */
- rec = btr_cur_get_rec(&cursor);
-
- if (!page_rec_is_infimum(rec)) {
- ulint* rec_offsets = rec_get_offsets(
- rec, index, offsets,
- ULINT_UNDEFINED, &tuple_heap);
- ut_ad(cmp_dtuple_rec(dtuple, rec, rec_offsets)
- > 0);
- }
-#endif /* UNIV_DEBUG */
- ulint* ins_offsets = NULL;
-
- error = btr_cur_optimistic_insert(
- BTR_NO_UNDO_LOG_FLAG | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
- &cursor, &ins_offsets, &ins_heap,
- dtuple, &rec, &big_rec, 0, NULL, &mtr);
-
- if (error == DB_FAIL) {
- ut_ad(!big_rec);
- mtr_commit(&mtr);
- mtr_start(&mtr);
- btr_cur_open_at_index_side(
- false, index, BTR_MODIFY_TREE,
- &cursor, 0, &mtr);
- page_cur_position(
- page_rec_get_prev(btr_cur_get_rec(
- &cursor)),
- btr_cur_get_block(&cursor),
- btr_cur_get_page_cur(&cursor));
-
- error = btr_cur_pessimistic_insert(
- BTR_NO_UNDO_LOG_FLAG
- | BTR_NO_LOCKING_FLAG
- | BTR_KEEP_SYS_FLAG | BTR_CREATE_FLAG,
- &cursor, &ins_offsets, &ins_heap,
- dtuple, &rec, &big_rec, 0, NULL, &mtr);
- }
-
- if (!dict_index_is_clust(index)) {
- page_update_max_trx_id(
- btr_cur_get_block(&cursor),
- btr_cur_get_page_zip(&cursor),
- trx_id, &mtr);
- }
-
- mtr_commit(&mtr);
-
- if (UNIV_LIKELY_NULL(big_rec)) {
- /* If the system crashes at this
- point, the clustered index record will
- contain a null BLOB pointer. This
- should not matter, because the copied
- table will be dropped on crash
- recovery anyway. */
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(error == DB_SUCCESS);
- error = row_ins_index_entry_big_rec(
- dtuple, big_rec,
- ins_offsets, &ins_heap,
- index, NULL, __FILE__, __LINE__);
- dtuple_convert_back_big_rec(
- index, dtuple, big_rec);
- }
-
- if (error != DB_SUCCESS) {
- goto err_exit;
- }
-
- mem_heap_empty(tuple_heap);
- mem_heap_empty(ins_heap);
-
- /* Increment innodb_onlineddl_pct_progress status variable */
- inserted_rows++;
- if(inserted_rows % 1000 == 0) {
- /* Update progress for each 1000 rows */
- curr_progress = (inserted_rows >= table_total_rows ||
- table_total_rows <= 0) ?
- pct_cost :
- ((pct_cost * inserted_rows) / table_total_rows);
-
- /* presenting 10.12% as 1012 integer */;
- onlineddl_pct_progress = (ulint) ((pct_progress + curr_progress) * 100);
- }
- }
- }
-
-err_exit:
- mem_heap_free(tuple_heap);
- mem_heap_free(ins_heap);
- mem_heap_free(heap);
-
- DBUG_RETURN(error);
-}
-
-/*********************************************************************//**
-Sets an exclusive lock on a table, for the duration of creating indexes.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_merge_lock_table(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */
-{
- mem_heap_t* heap;
- que_thr_t* thr;
- dberr_t err;
- sel_node_t* node;
-
- ut_ad(!srv_read_only_mode);
- ut_ad(mode == LOCK_X || mode == LOCK_S);
-
- heap = mem_heap_create(512);
-
- trx->op_info = "setting table lock for creating or dropping index";
-
- node = sel_node_create(heap);
- thr = pars_complete_graph_for_exec(node, trx, heap);
- thr->graph->state = QUE_FORK_ACTIVE;
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
-
- thr = static_cast<que_thr_t*>(
- que_fork_get_first_thr(
- static_cast<que_fork_t*>(que_node_get_parent(thr))));
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- err = lock_table(0, table, mode, thr);
-
- trx->error_state = err;
-
- if (UNIV_LIKELY(err == DB_SUCCESS)) {
- que_thr_stop_for_mysql_no_error(thr, trx);
- } else {
- que_thr_stop_for_mysql(thr);
-
- if (err != DB_QUE_THR_SUSPENDED) {
- bool was_lock_wait;
-
- was_lock_wait = row_mysql_handle_errors(
- &err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
- } else {
- que_thr_t* run_thr;
- que_node_t* parent;
-
- parent = que_node_get_parent(thr);
-
- run_thr = que_fork_start_command(
- static_cast<que_fork_t*>(parent));
-
- ut_a(run_thr == thr);
-
- /* There was a lock wait but the thread was not
- in a ready to run or running state. */
- trx->error_state = DB_LOCK_WAIT;
-
- goto run_again;
- }
- }
-
- que_graph_free(thr->graph);
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Drop an index that was created before an error occurred.
-The data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed. */
-static
-void
-row_merge_drop_index_dict(
-/*======================*/
- trx_t* trx, /*!< in/out: dictionary transaction */
- index_id_t index_id)/*!< in: index identifier */
-{
- static const char sql[] =
- "PROCEDURE DROP_INDEX_PROC () IS\n"
- "BEGIN\n"
- "DELETE FROM SYS_FIELDS WHERE INDEX_ID=:indexid;\n"
- "DELETE FROM SYS_INDEXES WHERE ID=:indexid;\n"
- "END;\n";
- dberr_t error;
- pars_info_t* info;
-
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- info = pars_info_create();
- pars_info_add_ull_literal(info, "indexid", index_id);
- trx->op_info = "dropping index from dictionary";
- error = que_eval_sql(info, sql, FALSE, trx);
-
- if (error != DB_SUCCESS) {
- /* Even though we ensure that DDL transactions are WAIT
- and DEADLOCK free, we could encounter other errors e.g.,
- DB_TOO_MANY_CONCURRENT_TRXS. */
- trx->error_state = DB_SUCCESS;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: row_merge_drop_index_dict "
- "failed with error code: %u.\n", (unsigned) error);
- }
-
- trx->op_info = "";
-}
-
-/*********************************************************************//**
-Drop indexes that were created before an error occurred.
-The data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed. */
-UNIV_INTERN
-void
-row_merge_drop_indexes_dict(
-/*========================*/
- trx_t* trx, /*!< in/out: dictionary transaction */
- table_id_t table_id)/*!< in: table identifier */
-{
- static const char sql[] =
- "PROCEDURE DROP_INDEXES_PROC () IS\n"
- "ixid CHAR;\n"
- "found INT;\n"
-
- "DECLARE CURSOR index_cur IS\n"
- " SELECT ID FROM SYS_INDEXES\n"
- " WHERE TABLE_ID=:tableid AND\n"
- " SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n"
- "FOR UPDATE;\n"
-
- "BEGIN\n"
- "found := 1;\n"
- "OPEN index_cur;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH index_cur INTO ixid;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FIELDS WHERE INDEX_ID=ixid;\n"
- " DELETE FROM SYS_INDEXES WHERE CURRENT OF index_cur;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE index_cur;\n"
-
- "END;\n";
- dberr_t error;
- pars_info_t* info;
-
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- /* It is possible that table->n_ref_count > 1 when
- locked=TRUE. In this case, all code that should have an open
- handle to the table be waiting for the next statement to execute,
- or waiting for a meta-data lock.
-
- A concurrent purge will be prevented by dict_operation_lock. */
-
- info = pars_info_create();
- pars_info_add_ull_literal(info, "tableid", table_id);
- trx->op_info = "dropping indexes";
- error = que_eval_sql(info, sql, FALSE, trx);
-
- if (error != DB_SUCCESS) {
- /* Even though we ensure that DDL transactions are WAIT
- and DEADLOCK free, we could encounter other errors e.g.,
- DB_TOO_MANY_CONCURRENT_TRXS. */
- trx->error_state = DB_SUCCESS;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: row_merge_drop_indexes_dict "
- "failed with error code: %u.\n", (unsigned) error);
- }
-
- trx->op_info = "";
-}
-
-/*********************************************************************//**
-Drop indexes that were created before an error occurred.
-The data dictionary must have been locked exclusively by the caller,
-because the transaction will not be committed. */
-UNIV_INTERN
-void
-row_merge_drop_indexes(
-/*===================*/
- trx_t* trx, /*!< in/out: dictionary transaction */
- dict_table_t* table, /*!< in/out: table containing the indexes */
- ibool locked) /*!< in: TRUE=table locked,
- FALSE=may need to do a lazy drop */
-{
- dict_index_t* index;
- dict_index_t* next_index;
-
- ut_ad(!srv_read_only_mode);
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- index = dict_table_get_first_index(table);
- ut_ad(dict_index_is_clust(index));
- ut_ad(dict_index_get_online_status(index) == ONLINE_INDEX_COMPLETE);
-
- /* the caller should have an open handle to the table */
- ut_ad(table->n_ref_count >= 1);
-
- /* It is possible that table->n_ref_count > 1 when
- locked=TRUE. In this case, all code that should have an open
- handle to the table be waiting for the next statement to execute,
- or waiting for a meta-data lock.
-
- A concurrent purge will be prevented by dict_operation_lock. */
-
- if (!locked && table->n_ref_count > 1) {
- /* We will have to drop the indexes later, when the
- table is guaranteed to be no longer in use. Mark the
- indexes as incomplete and corrupted, so that other
- threads will stop using them. Let dict_table_close()
- or crash recovery or the next invocation of
- prepare_inplace_alter_table() take care of dropping
- the indexes. */
-
- while ((index = dict_table_get_next_index(index)) != NULL) {
- ut_ad(!dict_index_is_clust(index));
-
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_ABORTED_DROPPED:
- continue;
- case ONLINE_INDEX_COMPLETE:
- if (*index->name != TEMP_INDEX_PREFIX) {
- /* Do nothing to already
- published indexes. */
- } else if (index->type & DICT_FTS) {
- /* Drop a completed FULLTEXT
- index, due to a timeout during
- MDL upgrade for
- commit_inplace_alter_table().
- Because only concurrent reads
- are allowed (and they are not
- seeing this index yet) we
- are safe to drop the index. */
- dict_index_t* prev = UT_LIST_GET_PREV(
- indexes, index);
- /* At least there should be
- the clustered index before
- this one. */
- ut_ad(prev);
- ut_a(table->fts);
- fts_drop_index(table, index, trx);
- /* Since
- INNOBASE_SHARE::idx_trans_tbl
- is shared between all open
- ha_innobase handles to this
- table, no thread should be
- accessing this dict_index_t
- object. Also, we should be
- holding LOCK=SHARED MDL on the
- table even after the MDL
- upgrade timeout. */
-
- /* We can remove a DICT_FTS
- index from the cache, because
- we do not allow ADD FULLTEXT INDEX
- with LOCK=NONE. If we allowed that,
- we should exclude FTS entries from
- prebuilt->ins_node->entry_list
- in ins_node_create_entry_list(). */
- dict_index_remove_from_cache(
- table, index);
- index = prev;
- } else {
- rw_lock_x_lock(
- dict_index_get_lock(index));
- dict_index_set_online_status(
- index, ONLINE_INDEX_ABORTED);
- index->type |= DICT_CORRUPT;
- table->drop_aborted = TRUE;
- goto drop_aborted;
- }
- continue;
- case ONLINE_INDEX_CREATION:
- rw_lock_x_lock(dict_index_get_lock(index));
- ut_ad(*index->name == TEMP_INDEX_PREFIX);
- row_log_abort_sec(index);
- drop_aborted:
- rw_lock_x_unlock(dict_index_get_lock(index));
-
- DEBUG_SYNC_C("merge_drop_index_after_abort");
- /* covered by dict_sys->mutex */
- MONITOR_INC(MONITOR_BACKGROUND_DROP_INDEX);
- /* fall through */
- case ONLINE_INDEX_ABORTED:
- /* Drop the index tree from the
- data dictionary and free it from
- the tablespace, but keep the object
- in the data dictionary cache. */
- row_merge_drop_index_dict(trx, index->id);
- rw_lock_x_lock(dict_index_get_lock(index));
- dict_index_set_online_status(
- index, ONLINE_INDEX_ABORTED_DROPPED);
- rw_lock_x_unlock(dict_index_get_lock(index));
- table->drop_aborted = TRUE;
- continue;
- }
- ut_error;
- }
-
- return;
- }
-
- row_merge_drop_indexes_dict(trx, table->id);
-
- /* Invalidate all row_prebuilt_t::ins_graph that are referring
- to this table. That is, force row_get_prebuilt_insert_row() to
- rebuild prebuilt->ins_node->entry_list). */
- ut_ad(table->def_trx_id <= trx->id);
- table->def_trx_id = trx->id;
-
- next_index = dict_table_get_next_index(index);
-
- while ((index = next_index) != NULL) {
- /* read the next pointer before freeing the index */
- next_index = dict_table_get_next_index(index);
-
- ut_ad(!dict_index_is_clust(index));
-
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* If it is FTS index, drop from table->fts
- and also drop its auxiliary tables */
- if (index->type & DICT_FTS) {
- ut_a(table->fts);
- fts_drop_index(table, index, trx);
- }
-
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_CREATION:
- /* This state should only be possible
- when prepare_inplace_alter_table() fails
- after invoking row_merge_create_index().
- In inplace_alter_table(),
- row_merge_build_indexes()
- should never leave the index in this state.
- It would invoke row_log_abort_sec() on
- failure. */
- case ONLINE_INDEX_COMPLETE:
- /* In these cases, we are able to drop
- the index straight. The DROP INDEX was
- never deferred. */
- break;
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- /* covered by dict_sys->mutex */
- MONITOR_DEC(MONITOR_BACKGROUND_DROP_INDEX);
- }
-
- dict_index_remove_from_cache(table, index);
- }
- }
-
- table->drop_aborted = FALSE;
- ut_d(dict_table_check_for_dup_indexes(table, CHECK_ALL_COMPLETE));
-}
-
-/*********************************************************************//**
-Drop all partially created indexes during crash recovery. */
-UNIV_INTERN
-void
-row_merge_drop_temp_indexes(void)
-/*=============================*/
-{
- static const char sql[] =
- "PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
- "ixid CHAR;\n"
- "found INT;\n"
-
- "DECLARE CURSOR index_cur IS\n"
- " SELECT ID FROM SYS_INDEXES\n"
- " WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "'\n"
- "FOR UPDATE;\n"
-
- "BEGIN\n"
- "found := 1;\n"
- "OPEN index_cur;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH index_cur INTO ixid;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FIELDS WHERE INDEX_ID=ixid;\n"
- " DELETE FROM SYS_INDEXES WHERE CURRENT OF index_cur;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE index_cur;\n"
- "END;\n";
- trx_t* trx;
- dberr_t error;
-
- /* Load the table definitions that contain partially defined
- indexes, so that the data dictionary information can be checked
- when accessing the tablename.ibd files. */
- trx = trx_allocate_for_background();
- trx->op_info = "dropping partially created indexes";
- row_mysql_lock_data_dictionary(trx);
- /* Ensure that this transaction will be rolled back and locks
- will be released, if the server gets killed before the commit
- gets written to the redo log. */
- trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
-
- trx->op_info = "dropping indexes";
- error = que_eval_sql(NULL, sql, FALSE, trx);
-
- if (error != DB_SUCCESS) {
- /* Even though we ensure that DDL transactions are WAIT
- and DEADLOCK free, we could encounter other errors e.g.,
- DB_TOO_MANY_CONCURRENT_TRXS. */
- trx->error_state = DB_SUCCESS;
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: row_merge_drop_temp_indexes "
- "failed with error code: %u.\n", (unsigned) error);
- }
-
- trx_commit_for_mysql(trx);
- row_mysql_unlock_data_dictionary(trx);
- trx_free_for_background(trx);
-}
-
-
-/** Create temporary merge files in the given paramater path, and if
-UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
-@param[in] path location for creating temporary merge files.
-@return File descriptor */
-UNIV_INTERN
-int
-row_merge_file_create_low(
- const char* path)
-{
- int fd;
-#ifdef UNIV_PFS_IO
- /* This temp file open does not go through normal
- file APIs, add instrumentation to register with
- performance schema */
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
- locker = PSI_FILE_CALL(get_thread_file_name_locker)(
- &state, innodb_file_temp_key, PSI_FILE_OPEN,
- "Innodb Merge Temp File", &locker);
- if (locker != NULL) {
- PSI_FILE_CALL(start_file_open_wait)(locker,
- __FILE__,
- __LINE__);
- }
-#endif
- fd = innobase_mysql_tmpfile(path);
-#ifdef UNIV_PFS_IO
- if (locker != NULL) {
- PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(
- locker, fd);
- }
-#endif
-
- if (fd < 0) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create temporary merge file");
- return (-1);
- }
- return(fd);
-}
-
-
-/** Create a merge file in the given location.
-@param[out] merge_file merge file structure
-@param[in] path location for creating temporary file
-@return file descriptor, or -1 on failure */
-UNIV_INTERN
-int
-row_merge_file_create(
- merge_file_t* merge_file,
- const char* path)
-{
- merge_file->fd = row_merge_file_create_low(path);
- merge_file->offset = 0;
- merge_file->n_rec = 0;
-
- if (merge_file->fd >= 0) {
- if (srv_disable_sort_file_cache) {
- os_file_set_nocache(OS_FILE_FROM_FD(merge_file->fd),
- "row0merge.cc", "sort");
- }
- }
- return(merge_file->fd);
-}
-
-/*********************************************************************//**
-Destroy a merge file. And de-register the file from Performance Schema
-if UNIV_PFS_IO is defined. */
-UNIV_INTERN
-void
-row_merge_file_destroy_low(
-/*=======================*/
- int fd) /*!< in: merge file descriptor */
-{
-#ifdef UNIV_PFS_IO
- struct PSI_file_locker* locker = NULL;
- PSI_file_locker_state state;
- locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
- &state, fd, PSI_FILE_CLOSE);
- if (locker != NULL) {
- PSI_FILE_CALL(start_file_wait)(
- locker, 0, __FILE__, __LINE__);
- }
-#endif
- if (fd >= 0) {
- close(fd);
- }
-#ifdef UNIV_PFS_IO
- if (locker != NULL) {
- PSI_FILE_CALL(end_file_wait)(locker, 0);
- }
-#endif
-}
-/*********************************************************************//**
-Destroy a merge file. */
-UNIV_INTERN
-void
-row_merge_file_destroy(
-/*===================*/
- merge_file_t* merge_file) /*!< in/out: merge file structure */
-{
- ut_ad(!srv_read_only_mode);
-
- if (merge_file->fd != -1) {
- row_merge_file_destroy_low(merge_file->fd);
- merge_file->fd = -1;
- }
-}
-
-/*********************************************************************//**
-Rename an index in the dictionary that was created. The data
-dictionary must have been locked exclusively by the caller, because
-the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-row_merge_rename_index_to_add(
-/*==========================*/
- trx_t* trx, /*!< in/out: transaction */
- table_id_t table_id, /*!< in: table identifier */
- index_id_t index_id) /*!< in: index identifier */
-{
- dberr_t err = DB_SUCCESS;
- pars_info_t* info = pars_info_create();
-
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in renaming indexes. */
-
- static const char rename_index[] =
- "PROCEDURE RENAME_INDEX_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_INDEXES SET NAME=SUBSTR(NAME,1,LENGTH(NAME)-1)\n"
- "WHERE TABLE_ID = :tableid AND ID = :indexid;\n"
- "END;\n";
-
- ut_ad(trx);
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-
- trx->op_info = "renaming index to add";
-
- pars_info_add_ull_literal(info, "tableid", table_id);
- pars_info_add_ull_literal(info, "indexid", index_id);
-
- err = que_eval_sql(info, rename_index, FALSE, trx);
-
- if (err != DB_SUCCESS) {
- /* Even though we ensure that DDL transactions are WAIT
- and DEADLOCK free, we could encounter other errors e.g.,
- DB_TOO_MANY_CONCURRENT_TRXS. */
- trx->error_state = DB_SUCCESS;
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: row_merge_rename_index_to_add "
- "failed with error code: %u.\n", (unsigned) err);
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Rename an index in the dictionary that is to be dropped. The data
-dictionary must have been locked exclusively by the caller, because
-the transaction will not be committed.
-@return DB_SUCCESS if all OK */
-UNIV_INTERN
-dberr_t
-row_merge_rename_index_to_drop(
-/*===========================*/
- trx_t* trx, /*!< in/out: transaction */
- table_id_t table_id, /*!< in: table identifier */
- index_id_t index_id) /*!< in: index identifier */
-{
- dberr_t err;
- pars_info_t* info = pars_info_create();
-
- ut_ad(!srv_read_only_mode);
-
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in renaming indexes. */
-
- static const char rename_index[] =
- "PROCEDURE RENAME_INDEX_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_INDEXES SET NAME=CONCAT('"
- TEMP_INDEX_PREFIX_STR "',NAME)\n"
- "WHERE TABLE_ID = :tableid AND ID = :indexid;\n"
- "END;\n";
-
- ut_ad(trx);
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-
- trx->op_info = "renaming index to drop";
-
- pars_info_add_ull_literal(info, "tableid", table_id);
- pars_info_add_ull_literal(info, "indexid", index_id);
-
- err = que_eval_sql(info, rename_index, FALSE, trx);
-
- if (err != DB_SUCCESS) {
- /* Even though we ensure that DDL transactions are WAIT
- and DEADLOCK free, we could encounter other errors e.g.,
- DB_TOO_MANY_CONCURRENT_TRXS. */
- trx->error_state = DB_SUCCESS;
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: row_merge_rename_index_to_drop "
- "failed with error code: %u.\n", (unsigned) err);
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Provide a new pathname for a table that is being renamed if it belongs to
-a file-per-table tablespace. The caller is responsible for freeing the
-memory allocated for the return value.
-@return new pathname of tablespace file, or NULL if space = 0 */
-UNIV_INTERN
-char*
-row_make_new_pathname(
-/*==================*/
- dict_table_t* table, /*!< in: table to be renamed */
- const char* new_name) /*!< in: new name */
-{
- char* new_path;
- char* old_path;
-
- ut_ad(table->space != TRX_SYS_SPACE);
-
- old_path = fil_space_get_first_path(table->space);
- ut_a(old_path);
-
- new_path = os_file_make_new_pathname(old_path, new_name);
-
- mem_free(old_path);
-
- return(new_path);
-}
-
-/*********************************************************************//**
-Rename the tables in the data dictionary. The data dictionary must
-have been locked exclusively by the caller, because the transaction
-will not be committed.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_merge_rename_tables_dict(
-/*=========================*/
- dict_table_t* old_table, /*!< in/out: old table, renamed to
- tmp_name */
- dict_table_t* new_table, /*!< in/out: new table, renamed to
- old_table->name */
- const char* tmp_name, /*!< in: new name for old_table */
- trx_t* trx) /*!< in/out: dictionary transaction */
-{
- dberr_t err = DB_ERROR;
- pars_info_t* info;
-
- ut_ad(!srv_read_only_mode);
- ut_ad(old_table != new_table);
- ut_ad(mutex_own(&dict_sys->mutex));
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
- ut_ad(trx_get_dict_operation(trx) == TRX_DICT_OP_TABLE
- || trx_get_dict_operation(trx) == TRX_DICT_OP_INDEX);
-
- trx->op_info = "renaming tables";
-
- /* We use the private SQL parser of Innobase to generate the query
- graphs needed in updating the dictionary data in system tables. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_name", new_table->name);
- pars_info_add_str_literal(info, "old_name", old_table->name);
- pars_info_add_str_literal(info, "tmp_name", tmp_name);
-
- err = que_eval_sql(info,
- "PROCEDURE RENAME_TABLES () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES SET NAME = :tmp_name\n"
- " WHERE NAME = :old_name;\n"
- "UPDATE SYS_TABLES SET NAME = :old_name\n"
- " WHERE NAME = :new_name;\n"
- "END;\n", FALSE, trx);
-
- /* Update SYS_TABLESPACES and SYS_DATAFILES if the old
- table is in a non-system tablespace where space > 0. */
- if (err == DB_SUCCESS
- && old_table->space != TRX_SYS_SPACE
- && fil_space_get(old_table->space) != NULL) {
- /* Make pathname to update SYS_DATAFILES. */
- char* tmp_path = row_make_new_pathname(old_table, tmp_name);
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "tmp_name", tmp_name);
- pars_info_add_str_literal(info, "tmp_path", tmp_path);
- pars_info_add_int4_literal(info, "old_space",
- (lint) old_table->space);
-
- err = que_eval_sql(info,
- "PROCEDURE RENAME_OLD_SPACE () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLESPACES"
- " SET NAME = :tmp_name\n"
- " WHERE SPACE = :old_space;\n"
- "UPDATE SYS_DATAFILES"
- " SET PATH = :tmp_path\n"
- " WHERE SPACE = :old_space;\n"
- "END;\n", FALSE, trx);
-
- mem_free(tmp_path);
- }
-
- /* Update SYS_TABLESPACES and SYS_DATAFILES if the new
- table is in a non-system tablespace where space > 0. */
- if (err == DB_SUCCESS && new_table->space != TRX_SYS_SPACE) {
- /* Make pathname to update SYS_DATAFILES. */
- char* old_path = row_make_new_pathname(
- new_table, old_table->name);
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "old_name", old_table->name);
- pars_info_add_str_literal(info, "old_path", old_path);
- pars_info_add_int4_literal(info, "new_space",
- (lint) new_table->space);
-
- err = que_eval_sql(info,
- "PROCEDURE RENAME_NEW_SPACE () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLESPACES"
- " SET NAME = :old_name\n"
- " WHERE SPACE = :new_space;\n"
- "UPDATE SYS_DATAFILES"
- " SET PATH = :old_path\n"
- " WHERE SPACE = :new_space;\n"
- "END;\n", FALSE, trx);
-
- mem_free(old_path);
- }
-
- if (err == DB_SUCCESS && dict_table_is_discarded(new_table)) {
- err = row_import_update_discarded_flag(
- trx, new_table->id, true, true);
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Create and execute a query graph for creating an index.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_merge_create_index_graph(
-/*=========================*/
- trx_t* trx, /*!< in: trx */
- dict_table_t* table, /*!< in: table */
- dict_index_t* index) /*!< in: index */
-{
- ind_node_t* node; /*!< Index creation node */
- mem_heap_t* heap; /*!< Memory heap */
- que_thr_t* thr; /*!< Query thread */
- dberr_t err;
-
- ut_ad(trx);
- ut_ad(table);
- ut_ad(index);
-
- heap = mem_heap_create(512);
-
- index->table = table;
- node = ind_create_graph_create(index, heap, false);
- thr = pars_complete_graph_for_exec(node, trx, heap);
-
- ut_a(thr == que_fork_start_command(
- static_cast<que_fork_t*>(que_node_get_parent(thr))));
-
- que_run_threads(thr);
-
- err = trx->error_state;
-
- que_graph_free((que_t*) que_node_get_parent(thr));
-
- return(err);
-}
-
-/*********************************************************************//**
-Create the index and load in to the dictionary.
-@return index, or NULL on error */
-UNIV_INTERN
-dict_index_t*
-row_merge_create_index(
-/*===================*/
- trx_t* trx, /*!< in/out: trx (sets error_state) */
- dict_table_t* table, /*!< in: the index is on this table */
- const index_def_t* index_def,
- /*!< in: the index definition */
- const char** col_names)
- /*! in: column names if columns are
- renamed or NULL */
-{
- dict_index_t* index;
- dberr_t err;
- ulint n_fields = index_def->n_fields;
- ulint i;
-
- ut_ad(!srv_read_only_mode);
-
- /* Create the index prototype, using the passed in def, this is not
- a persistent operation. We pass 0 as the space id, and determine at
- a lower level the space id where to store the table. */
-
- index = dict_mem_index_create(table->name, index_def->name,
- 0, index_def->ind_type, n_fields);
-
- ut_a(index);
-
- for (i = 0; i < n_fields; i++) {
- index_field_t* ifield = &index_def->fields[i];
- const char * col_name;
-
- /*
- Alter table renaming a column and then adding a index
- to this new name e.g ALTER TABLE t
- CHANGE COLUMN b c INT NOT NULL, ADD UNIQUE INDEX (c);
- requires additional check as column names are not yet
- changed when new index definitions are created. Table's
- new column names are on a array of column name pointers
- if any of the column names are changed. */
-
- if (col_names && col_names[i]) {
- col_name = col_names[i];
- } else {
- col_name = ifield->col_name ?
- dict_table_get_col_name_for_mysql(table, ifield->col_name) :
- dict_table_get_col_name(table, ifield->col_no);
- }
-
- dict_mem_index_add_field(
- index,
- col_name,
- ifield->prefix_len);
- }
-
- /* Add the index to SYS_INDEXES, using the index prototype. */
- err = row_merge_create_index_graph(trx, table, index);
-
- if (err == DB_SUCCESS) {
-
- index = dict_table_get_index_on_name(table, index_def->name);
-
- ut_a(index);
-
- /* Note the id of the transaction that created this
- index, we use it to restrict readers from accessing
- this index, to ensure read consistency. */
- ut_ad(index->trx_id == trx->id);
- } else {
- index = NULL;
- }
-
- return(index);
-}
-
-/*********************************************************************//**
-Check if a transaction can use an index. */
-UNIV_INTERN
-ibool
-row_merge_is_index_usable(
-/*======================*/
- const trx_t* trx, /*!< in: transaction */
- const dict_index_t* index) /*!< in: index to check */
-{
- if (!dict_index_is_clust(index)
- && dict_index_is_online_ddl(index)) {
- /* Indexes that are being created are not useable. */
- return(FALSE);
- }
-
- return(!dict_index_is_corrupted(index)
- && (dict_table_is_temporary(index->table)
- || !trx->read_view
- || read_view_sees_trx_id(trx->read_view, index->trx_id)));
-}
-
-/*********************************************************************//**
-Drop a table. The caller must have ensured that the background stats
-thread is not processing the table. This can be done by calling
-dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and
-before calling this function.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_merge_drop_table(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* table) /*!< in: table to drop */
-{
- ut_ad(!srv_read_only_mode);
-
- /* There must be no open transactions on the table. */
- ut_a(table->n_ref_count == 0);
-
- return(row_drop_table_for_mysql(table->name, trx, false, false, false));
-}
-
-/*********************************************************************//**
-Build indexes on a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_merge_build_indexes(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- dict_table_t* old_table, /*!< in: table where rows are
- read from */
- dict_table_t* new_table, /*!< in: table where indexes are
- created; identical to old_table
- unless creating a PRIMARY KEY */
- bool online, /*!< in: true if creating indexes
- online */
- dict_index_t** indexes, /*!< in: indexes to be created */
- const ulint* key_numbers, /*!< in: MySQL key numbers */
- ulint n_indexes, /*!< in: size of indexes[] */
- struct TABLE* table, /*!< in/out: MySQL table, for
- reporting erroneous key value
- if applicable */
- const dtuple_t* add_cols, /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map, /*!< in: mapping of old column
- numbers to new ones, or NULL
- if old_table == new_table */
- ulint add_autoinc, /*!< in: number of added
- AUTO_INCREMENT column, or
- ULINT_UNDEFINED if none is added */
- ib_sequence_t& sequence) /*!< in: autoinc instance if
- add_autoinc != ULINT_UNDEFINED */
-{
- merge_file_t* merge_files;
- row_merge_block_t* block;
- row_merge_block_t* crypt_block = NULL;
- ulint block_size;
- ulint i;
- ulint j;
- dberr_t error;
- int tmpfd = -1;
- dict_index_t* fts_sort_idx = NULL;
- fts_psort_t* psort_info = NULL;
- fts_psort_t* merge_info = NULL;
- ib_int64_t sig_count = 0;
- bool fts_psort_initiated = false;
- fil_space_crypt_t * crypt_data = NULL;
-
- float total_static_cost = 0;
- float total_dynamic_cost = 0;
- uint total_index_blocks = 0;
- float pct_cost=0;
- float pct_progress=0;
-
- DBUG_ENTER("row_merge_build_indexes");
-
- ut_ad(!srv_read_only_mode);
- ut_ad((old_table == new_table) == !col_map);
- ut_ad(!add_cols || col_map);
-
- /* Allocate memory for merge file data structure and initialize
- fields */
-
- block_size = 3 * srv_sort_buf_size;
- block = static_cast<row_merge_block_t*>(
- os_mem_alloc_large(&block_size));
-
- if (block == NULL) {
- DBUG_RETURN(DB_OUT_OF_MEMORY);
- }
-
- /* Get crypt data from tablespace if present. We should be protected
- from concurrent DDL (e.g. drop table) by MDL-locks. */
- fil_space_t* space = fil_space_acquire(new_table->space);
-
- if (space) {
- crypt_data = space->crypt_data;
- } else {
- DBUG_RETURN(DB_TABLESPACE_NOT_FOUND);
- }
-
- /* If tablespace is encrypted, allocate additional buffer for
- encryption/decryption. */
- if (crypt_data && crypt_data->should_encrypt()) {
- crypt_block = static_cast<row_merge_block_t*>(
- os_mem_alloc_large(&block_size));
-
- if (crypt_block == NULL) {
- fil_space_release(space);
- DBUG_RETURN(DB_OUT_OF_MEMORY);
- }
- } else {
- /* Not needed */
- crypt_data = NULL;
- }
-
- trx_start_if_not_started_xa(trx);
-
- merge_files = static_cast<merge_file_t*>(
- mem_alloc(n_indexes * sizeof *merge_files));
-
- /* Initialize all the merge file descriptors, so that we
- don't call row_merge_file_destroy() on uninitialized
- merge file descriptor */
-
- for (i = 0; i < n_indexes; i++) {
- merge_files[i].fd = -1;
- merge_files[i].offset = 0;
- }
-
- total_static_cost = COST_BUILD_INDEX_STATIC * n_indexes + COST_READ_CLUSTERED_INDEX;
- total_dynamic_cost = COST_BUILD_INDEX_DYNAMIC * n_indexes;
-
- for (i = 0; i < n_indexes; i++) {
-
- if (indexes[i]->type & DICT_FTS) {
- ibool opt_doc_id_size = FALSE;
-
- /* To build FTS index, we would need to extract
- doc's word, Doc ID, and word's position, so
- we need to build a "fts sort index" indexing
- on above three 'fields' */
- fts_sort_idx = row_merge_create_fts_sort_index(
- indexes[i], old_table, &opt_doc_id_size);
-
- row_merge_dup_t* dup = static_cast<row_merge_dup_t*>(
- ut_malloc(sizeof *dup));
- dup->index = fts_sort_idx;
- dup->table = table;
- dup->col_map = col_map;
- dup->n_dup = 0;
-
- row_fts_psort_info_init(
- trx, dup, new_table, opt_doc_id_size,
- &psort_info, &merge_info);
-
- /* "We need to ensure that we free the resources
- allocated */
- fts_psort_initiated = true;
- }
- }
-
- /* Reset the MySQL row buffer that is used when reporting
- duplicate keys. */
- innobase_rec_reset(table);
-
- if (global_system_variables.log_warnings > 2) {
- sql_print_information("InnoDB: Online DDL : Start reading"
- " clustered index of the table"
- " and create temporary files");
- }
-
- pct_cost = COST_READ_CLUSTERED_INDEX * 100 / (total_static_cost + total_dynamic_cost);
-
- /* Do not continue if we can't encrypt table pages */
- if (!old_table->is_readable() ||
- !new_table->is_readable()) {
- error = DB_DECRYPTION_FAILED;
- ib_push_warning(trx->mysql_thd, DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- !old_table->is_readable() ? old_table->name :
- new_table->name);
- goto func_exit;
- }
-
- /* Read clustered index of the table and create files for
- secondary index entries for merge sort */
-
- error = row_merge_read_clustered_index(
- trx, table, old_table, new_table, online, indexes,
- fts_sort_idx, psort_info, merge_files, key_numbers,
- n_indexes, add_cols, col_map,
- add_autoinc, sequence, block, &tmpfd, pct_cost,
- crypt_data, crypt_block);
-
- pct_progress += pct_cost;
-
- if (global_system_variables.log_warnings > 2) {
- sql_print_information("InnoDB: Online DDL : End of reading "
- "clustered index of the table"
- " and create temporary files");
- }
-
- for (i = 0; i < n_indexes; i++) {
- total_index_blocks += merge_files[i].offset;
- }
-
- if (error != DB_SUCCESS) {
- goto func_exit;
- }
-
- DEBUG_SYNC_C("row_merge_after_scan");
-
- /* Now we have files containing index entries ready for
- sorting and inserting. */
-
- DBUG_EXECUTE_IF(
- "ib_merge_wait_after_read",
- os_thread_sleep(20000000);); /* 20 sec */
-
- for (i = 0; i < n_indexes; i++) {
- dict_index_t* sort_idx = indexes[i];
-
- if (indexes[i]->type & DICT_FTS) {
- os_event_t fts_parallel_merge_event;
-
- sort_idx = fts_sort_idx;
-
- fts_parallel_merge_event
- = merge_info[0].psort_common->merge_event;
-
- if (FTS_PLL_MERGE) {
- ulint trial_count = 0;
- bool all_exit = false;
-
- os_event_reset(fts_parallel_merge_event);
- row_fts_start_parallel_merge(merge_info);
-wait_again:
- os_event_wait_time_low(
- fts_parallel_merge_event, 1000000,
- sig_count);
-
- for (j = 0; j < FTS_NUM_AUX_INDEX; j++) {
- if (merge_info[j].child_status
- != FTS_CHILD_COMPLETE
- && merge_info[j].child_status
- != FTS_CHILD_EXITING) {
- sig_count = os_event_reset(
- fts_parallel_merge_event);
-
- goto wait_again;
- }
- }
-
- /* Now all children should complete, wait
- a bit until they all finish using event */
- while (!all_exit && trial_count < 10000) {
- all_exit = true;
-
- for (j = 0; j < FTS_NUM_AUX_INDEX;
- j++) {
- if (merge_info[j].child_status
- != FTS_CHILD_EXITING) {
- all_exit = false;
- os_thread_sleep(1000);
- break;
- }
- }
- trial_count++;
- }
-
- if (!all_exit) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Not all child merge threads"
- " exited when creating FTS"
- " index '%s'",
- indexes[i]->name);
- } else {
- for (j = 0; j < FTS_NUM_AUX_INDEX;
- j++) {
-
- os_thread_join(merge_info[j]
- .thread_hdl);
- }
- }
- } else {
- /* This cannot report duplicates; an
- assertion would fail in that case. */
- error = row_fts_merge_insert(
- sort_idx, new_table,
- psort_info, 0);
- }
-
-#ifdef FTS_INTERNAL_DIAG_PRINT
- DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n");
-#endif
- } else if (merge_files[i].fd != -1) {
- char buf[NAME_LEN + 1];
- row_merge_dup_t dup = {
- sort_idx, table, col_map, 0};
-
- pct_cost = (COST_BUILD_INDEX_STATIC +
- (total_dynamic_cost * merge_files[i].offset /
- total_index_blocks)) /
- (total_static_cost + total_dynamic_cost)
- * PCT_COST_MERGESORT_INDEX * 100;
- char* bufend = innobase_convert_name(
- buf, sizeof buf,
- indexes[i]->name,
- strlen(indexes[i]->name),
- trx->mysql_thd,
- FALSE);
- buf[bufend - buf]='\0';
-
- if (global_system_variables.log_warnings > 2) {
- sql_print_information("InnoDB: Online DDL :"
- " Start merge-sorting"
- " index %s"
- " (" ULINTPF
- " / " ULINTPF "),"
- " estimated cost :"
- " %2.4f",
- buf, i + 1, n_indexes,
- pct_cost);
- }
-
- error = row_merge_sort(
- trx, &dup, &merge_files[i],
- block, &tmpfd, true,
- pct_progress, pct_cost,
- crypt_data, crypt_block, new_table->space);
-
- pct_progress += pct_cost;
-
- if (global_system_variables.log_warnings > 2) {
- sql_print_information("InnoDB: Online DDL :"
- " End of "
- " merge-sorting index %s"
- " (" ULINTPF
- " / " ULINTPF ")",
- buf, i + 1, n_indexes);
- }
-
- DBUG_EXECUTE_IF(
- "ib_merge_wait_after_sort",
- os_thread_sleep(20000000);); /* 20 sec */
-
- if (error == DB_SUCCESS) {
- pct_cost = (COST_BUILD_INDEX_STATIC +
- (total_dynamic_cost * merge_files[i].offset /
- total_index_blocks)) /
- (total_static_cost + total_dynamic_cost) *
- PCT_COST_INSERT_INDEX * 100;
-
- if (global_system_variables.log_warnings > 2) {
- sql_print_information(
- "InnoDB: Online DDL : Start "
- "building index %s"
- " (" ULINTPF
- " / " ULINTPF "), estimated "
- "cost : %2.4f", buf, i + 1,
- n_indexes, pct_cost);
- }
-
- error = row_merge_insert_index_tuples(
- trx->id, sort_idx, old_table,
- merge_files[i].fd, block,
- merge_files[i].n_rec, pct_progress, pct_cost,
- crypt_data, crypt_block, new_table->space);
- pct_progress += pct_cost;
-
- if (global_system_variables.log_warnings > 2) {
- sql_print_information(
- "InnoDB: Online DDL : "
- "End of building index %s"
- " (" ULINTPF " / " ULINTPF ")",
- buf, i + 1, n_indexes);
- }
- }
- }
-
- /* Close the temporary file to free up space. */
- row_merge_file_destroy(&merge_files[i]);
-
- if (indexes[i]->type & DICT_FTS) {
- row_fts_psort_info_destroy(psort_info, merge_info);
- fts_psort_initiated = false;
- } else if (error != DB_SUCCESS || !online) {
- /* Do not apply any online log. */
- } else if (old_table != new_table) {
- ut_ad(!sort_idx->online_log);
- ut_ad(sort_idx->online_status
- == ONLINE_INDEX_COMPLETE);
- } else {
- if (global_system_variables.log_warnings > 2) {
- sql_print_information(
- "InnoDB: Online DDL : Applying"
- " log to index");
- }
- DEBUG_SYNC_C("row_log_apply_before");
- error = row_log_apply(trx, sort_idx, table);
- DEBUG_SYNC_C("row_log_apply_after");
- }
-
- if (error != DB_SUCCESS) {
- trx->error_key_num = key_numbers[i];
- goto func_exit;
- }
-
- if (indexes[i]->type & DICT_FTS && fts_enable_diag_print) {
- char* name = (char*) indexes[i]->name;
-
- if (*name == TEMP_INDEX_PREFIX) {
- name++;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Finished building "
- "full-text index %s\n", name);
- }
- }
-
-func_exit:
- DBUG_EXECUTE_IF(
- "ib_build_indexes_too_many_concurrent_trxs",
- error = DB_TOO_MANY_CONCURRENT_TRXS;
- trx->error_state = error;);
-
- if (fts_psort_initiated) {
- /* Clean up FTS psort related resource */
- row_fts_psort_info_destroy(psort_info, merge_info);
- fts_psort_initiated = false;
- }
-
- row_merge_file_destroy_low(tmpfd);
-
- for (i = 0; i < n_indexes; i++) {
- row_merge_file_destroy(&merge_files[i]);
- }
-
- if (fts_sort_idx) {
- dict_mem_index_free(fts_sort_idx);
- }
-
- mem_free(merge_files);
- os_mem_free_large(block, block_size);
-
- if (crypt_block) {
- os_mem_free_large(crypt_block, block_size);
- }
-
- DICT_TF2_FLAG_UNSET(new_table, DICT_TF2_FTS_ADD_DOC_ID);
-
- if (online && old_table == new_table && error != DB_SUCCESS) {
- /* On error, flag all online secondary index creation
- as aborted. */
- for (i = 0; i < n_indexes; i++) {
- ut_ad(!(indexes[i]->type & DICT_FTS));
- ut_ad(*indexes[i]->name == TEMP_INDEX_PREFIX);
- ut_ad(!dict_index_is_clust(indexes[i]));
-
- /* Completed indexes should be dropped as
- well, and indexes whose creation was aborted
- should be dropped from the persistent
- storage. However, at this point we can only
- set some flags in the not-yet-published
- indexes. These indexes will be dropped later
- in row_merge_drop_indexes(), called by
- rollback_inplace_alter_table(). */
-
- switch (dict_index_get_online_status(indexes[i])) {
- case ONLINE_INDEX_COMPLETE:
- break;
- case ONLINE_INDEX_CREATION:
- rw_lock_x_lock(
- dict_index_get_lock(indexes[i]));
- row_log_abort_sec(indexes[i]);
- indexes[i]->type |= DICT_CORRUPT;
- rw_lock_x_unlock(
- dict_index_get_lock(indexes[i]));
- new_table->drop_aborted = TRUE;
- /* fall through */
- case ONLINE_INDEX_ABORTED_DROPPED:
- case ONLINE_INDEX_ABORTED:
- MONITOR_MUTEX_INC(
- &dict_sys->mutex,
- MONITOR_BACKGROUND_DROP_INDEX);
- }
- }
- }
-
- if (space) {
- fil_space_release(space);
- }
-
- DBUG_RETURN(error);
-}
diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc
deleted file mode 100644
index 0079fc79a0e..00000000000
--- a/storage/xtradb/row/row0mysql.cc
+++ /dev/null
@@ -1,5687 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2000, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0mysql.cc
-Interface between Innobase row operations and MySQL.
-Contains also create table and other data dictionary operations.
-
-Created 9/17/2000 Heikki Tuuri
-*******************************************************/
-
-#include "row0mysql.h"
-
-#ifdef UNIV_NONINL
-#include "row0mysql.ic"
-#endif
-
-#include "ha_prototypes.h"
-
-#include <sql_const.h>
-#include "row0ins.h"
-#include "row0merge.h"
-#include "row0sel.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "que0que.h"
-#include "pars0pars.h"
-#include "dict0dict.h"
-#include "dict0crea.h"
-#include "dict0load.h"
-#include "dict0priv.h"
-#include "dict0boot.h"
-#include "dict0stats.h"
-#include "dict0stats_bg.h"
-#include "trx0roll.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "trx0undo.h"
-#include "lock0lock.h"
-#include "rem0cmp.h"
-#include "log0log.h"
-#include "btr0sea.h"
-#include "btr0defragment.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#include "ibuf0ibuf.h"
-#include "fts0fts.h"
-#include "fts0types.h"
-#include "srv0start.h"
-#include "row0import.h"
-#include "m_string.h"
-#include "my_sys.h"
-#include "zlib.h"
-#include <algorithm>
-
-/** Provide optional 4.x backwards compatibility for 5.0 and above */
-UNIV_INTERN ibool row_rollback_on_timeout = FALSE;
-
-/** Chain node of the list of tables to drop in the background. */
-struct row_mysql_drop_t{
- char* table_name; /*!< table name */
- UT_LIST_NODE_T(row_mysql_drop_t)row_mysql_drop_list;
- /*!< list chain node */
-};
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register drop list mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t row_drop_list_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/** @brief List of tables we should drop in background.
-
-ALTER TABLE in MySQL requires that the table handler can drop the
-table in background when there are no queries to it any
-more. Protected by row_drop_list_mutex. */
-static UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list;
-
-/** Mutex protecting the background table drop list. */
-static ib_mutex_t row_drop_list_mutex;
-
-/** Flag: has row_mysql_drop_list been initialized? */
-static ibool row_mysql_drop_list_inited = FALSE;
-
-/** Magic table names for invoking various monitor threads */
-/* @{ */
-static const char S_innodb_monitor[] = "innodb_monitor";
-static const char S_innodb_lock_monitor[] = "innodb_lock_monitor";
-static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor";
-static const char S_innodb_table_monitor[] = "innodb_table_monitor";
-#ifdef UNIV_MEM_DEBUG
-static const char S_innodb_mem_validate[] = "innodb_mem_validate";
-#endif /* UNIV_MEM_DEBUG */
-/* @} */
-
-/** Evaluates to true if str1 equals str2_onstack, used for comparing
-the magic table names.
-@param str1 in: string to compare
-@param str1_len in: length of str1, in bytes, including terminating NUL
-@param str2_onstack in: char[] array containing a NUL terminated string
-@return TRUE if str1 equals str2_onstack */
-#define STR_EQ(str1, str1_len, str2_onstack) \
- ((str1_len) == sizeof(str2_onstack) \
- && memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0)
-
-/*******************************************************************//**
-Determine if the given name is a name reserved for MySQL system tables.
-@return TRUE if name is a MySQL system table name */
-static
-ibool
-row_mysql_is_system_table(
-/*======================*/
- const char* name)
-{
- if (strncmp(name, "mysql/", 6) != 0) {
-
- return(FALSE);
- }
-
- return(0 == strcmp(name + 6, "host")
- || 0 == strcmp(name + 6, "user")
- || 0 == strcmp(name + 6, "db"));
-}
-
-/*********************************************************************//**
-If a table is not yet in the drop list, adds the table to the list of tables
-which the master thread drops in background. We need this on Unix because in
-ALTER TABLE MySQL may call drop table even if the table has running queries on
-it. Also, if there are running foreign key checks on the table, we drop the
-table lazily.
-@return TRUE if the table was not yet in the drop list, and was added there */
-static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- const char* name); /*!< in: table name */
-
-/*******************************************************************//**
-Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */
-static
-void
-row_mysql_delay_if_needed(void)
-/*===========================*/
-{
- if (srv_dml_needed_delay) {
- os_thread_sleep(srv_dml_needed_delay);
- }
-}
-
-/*******************************************************************//**
-Frees the blob heap in prebuilt when no longer needed. */
-UNIV_INTERN
-void
-row_mysql_prebuilt_free_blob_heap(
-/*==============================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct of a
- ha_innobase:: table handle */
-{
- mem_heap_free(prebuilt->blob_heap);
- prebuilt->blob_heap = NULL;
-}
-
-/*******************************************************************//**
-Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
-format.
-@return pointer to the data, we skip the 1 or 2 bytes at the start
-that are used to store the len */
-UNIV_INTERN
-byte*
-row_mysql_store_true_var_len(
-/*=========================*/
- byte* dest, /*!< in: where to store */
- ulint len, /*!< in: length, must fit in two bytes */
- ulint lenlen) /*!< in: storage length of len: either 1 or 2 bytes */
-{
- if (lenlen == 2) {
- ut_a(len < 256 * 256);
-
- mach_write_to_2_little_endian(dest, len);
-
- return(dest + 2);
- }
-
- ut_a(lenlen == 1);
- ut_a(len < 256);
-
- mach_write_to_1(dest, len);
-
- return(dest + 1);
-}
-
-/*******************************************************************//**
-Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
-returns a pointer to the data.
-@return pointer to the data, we skip the 1 or 2 bytes at the start
-that are used to store the len */
-UNIV_INTERN
-const byte*
-row_mysql_read_true_varchar(
-/*========================*/
- ulint* len, /*!< out: variable-length field length */
- const byte* field, /*!< in: field in the MySQL format */
- ulint lenlen) /*!< in: storage length of len: either 1
- or 2 bytes */
-{
- if (lenlen == 2) {
- *len = mach_read_from_2_little_endian(field);
-
- return(field + 2);
- }
-
- ut_a(lenlen == 1);
-
- *len = mach_read_from_1(field);
-
- return(field + 1);
-}
-
-/*******************************************************************//**
-Stores a reference to a BLOB in the MySQL format. */
-UNIV_INTERN
-void
-row_mysql_store_blob_ref(
-/*=====================*/
- byte* dest, /*!< in: where to store */
- ulint col_len,/*!< in: dest buffer size: determines into
- how many bytes the BLOB length is stored,
- the space for the length may vary from 1
- to 4 bytes */
- const void* data, /*!< in: BLOB data; if the value to store
- is SQL NULL this should be NULL pointer */
- ulint len) /*!< in: BLOB length; if the value to store
- is SQL NULL this should be 0; remember
- also to set the NULL bit in the MySQL record
- header! */
-{
- /* MySQL might assume the field is set to zero except the length and
- the pointer fields */
-
- memset(dest, '\0', col_len);
-
- /* In dest there are 1 - 4 bytes reserved for the BLOB length,
- and after that 8 bytes reserved for the pointer to the data.
- In 32-bit architectures we only use the first 4 bytes of the pointer
- slot. */
-
- ut_a(col_len - 8 > 1 || len < 256);
- ut_a(col_len - 8 > 2 || len < 256 * 256);
- ut_a(col_len - 8 > 3 || len < 256 * 256 * 256);
-
- memcpy(dest + col_len - 8, &data, sizeof data);
- mach_write_to_n_little_endian(dest, col_len - 8, len);
-}
-
-/*******************************************************************//**
-Reads a reference to a BLOB in the MySQL format.
-@return pointer to BLOB data */
-UNIV_INTERN
-const byte*
-row_mysql_read_blob_ref(
-/*====================*/
- ulint* len, /*!< out: BLOB length */
- const byte* ref, /*!< in: BLOB reference in the
- MySQL format */
- ulint col_len) /*!< in: BLOB reference length
- (not BLOB length) */
-{
- byte* data = NULL;
-
- *len = mach_read_from_n_little_endian(ref, col_len - 8);
-
- memcpy(&data, ref + col_len - 8, sizeof data);
-
- return(data);
-}
-
-/**************************************************************//**
-Pad a column with spaces. */
-UNIV_INTERN
-void
-row_mysql_pad_col(
-/*==============*/
- ulint mbminlen, /*!< in: minimum size of a character,
- in bytes */
- byte* pad, /*!< out: padded buffer */
- ulint len) /*!< in: number of bytes to pad */
-{
- const byte* pad_end;
-
- switch (UNIV_EXPECT(mbminlen, 1)) {
- default:
- ut_error;
- case 1:
- /* space=0x20 */
- memset(pad, 0x20, len);
- break;
- case 2:
- /* space=0x0020 */
- pad_end = pad + len;
- ut_a(!(len % 2));
- while (pad < pad_end) {
- *pad++ = 0x00;
- *pad++ = 0x20;
- };
- break;
- case 4:
- /* space=0x00000020 */
- pad_end = pad + len;
- ut_a(!(len % 4));
- while (pad < pad_end) {
- *pad++ = 0x00;
- *pad++ = 0x00;
- *pad++ = 0x00;
- *pad++ = 0x20;
- }
- break;
- }
-}
-
-/**************************************************************//**
-Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
-The counterpart of this function is row_sel_field_store_in_mysql_format() in
-row0sel.cc.
-@return up to which byte we used buf in the conversion */
-UNIV_INTERN
-byte*
-row_mysql_store_col_in_innobase_format(
-/*===================================*/
- dfield_t* dfield, /*!< in/out: dfield where dtype
- information must be already set when
- this function is called! */
- byte* buf, /*!< in/out: buffer for a converted
- integer value; this must be at least
- col_len long then! NOTE that dfield
- may also get a pointer to 'buf',
- therefore do not discard this as long
- as dfield is used! */
- ibool row_format_col, /*!< TRUE if the mysql_data is from
- a MySQL row, FALSE if from a MySQL
- key value;
- in MySQL, a true VARCHAR storage
- format differs in a row and in a
- key value: in a key value the length
- is always stored in 2 bytes! */
- const byte* mysql_data, /*!< in: MySQL column value, not
- SQL NULL; NOTE that dfield may also
- get a pointer to mysql_data,
- therefore do not discard this as long
- as dfield is used! */
- ulint col_len, /*!< in: MySQL column length; NOTE that
- this is the storage length of the
- column in the MySQL format row, not
- necessarily the length of the actual
- payload data; if the column is a true
- VARCHAR then this is irrelevant */
- ulint comp) /*!< in: nonzero=compact format */
-{
- const byte* ptr = mysql_data;
- const dtype_t* dtype;
- ulint type;
- ulint lenlen;
-
- dtype = dfield_get_type(dfield);
-
- type = dtype->mtype;
-
- if (type == DATA_INT) {
- /* Store integer data in Innobase in a big-endian format,
- sign bit negated if the data is a signed integer. In MySQL,
- integers are stored in a little-endian format. */
-
- byte* p = buf + col_len;
-
- for (;;) {
- p--;
- *p = *mysql_data;
- if (p == buf) {
- break;
- }
- mysql_data++;
- }
-
- if (!(dtype->prtype & DATA_UNSIGNED)) {
-
- *buf ^= 128;
- }
-
- ptr = buf;
- buf += col_len;
- } else if ((type == DATA_VARCHAR
- || type == DATA_VARMYSQL
- || type == DATA_BINARY)) {
-
- if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) {
- /* The length of the actual data is stored to 1 or 2
- bytes at the start of the field */
-
- if (row_format_col) {
- if (dtype->prtype & DATA_LONG_TRUE_VARCHAR) {
- lenlen = 2;
- } else {
- lenlen = 1;
- }
- } else {
- /* In a MySQL key value, lenlen is always 2 */
- lenlen = 2;
- }
-
- ptr = row_mysql_read_true_varchar(
- &col_len, mysql_data, lenlen);
- } else {
- /* Remove trailing spaces from old style VARCHAR
- columns. */
-
- /* Handle Unicode strings differently. */
- ulint mbminlen = dtype_get_mbminlen(dtype);
-
- ptr = mysql_data;
-
- switch (mbminlen) {
- default:
- ut_error;
- case 4:
- /* space=0x00000020 */
- /* Trim "half-chars", just in case. */
- col_len &= ~3;
-
- while (col_len >= 4
- && ptr[col_len - 4] == 0x00
- && ptr[col_len - 3] == 0x00
- && ptr[col_len - 2] == 0x00
- && ptr[col_len - 1] == 0x20) {
- col_len -= 4;
- }
- break;
- case 2:
- /* space=0x0020 */
- /* Trim "half-chars", just in case. */
- col_len &= ~1;
-
- while (col_len >= 2 && ptr[col_len - 2] == 0x00
- && ptr[col_len - 1] == 0x20) {
- col_len -= 2;
- }
- break;
- case 1:
- /* space=0x20 */
- while (col_len > 0
- && ptr[col_len - 1] == 0x20) {
- col_len--;
- }
- }
- }
- } else if (comp && type == DATA_MYSQL
- && dtype_get_mbminlen(dtype) == 1
- && dtype_get_mbmaxlen(dtype) > 1) {
- /* In some cases we strip trailing spaces from UTF-8 and other
- multibyte charsets, from FIXED-length CHAR columns, to save
- space. UTF-8 would otherwise normally use 3 * the string length
- bytes to store an ASCII string! */
-
- /* We assume that this CHAR field is encoded in a
- variable-length character set where spaces have
- 1:1 correspondence to 0x20 bytes, such as UTF-8.
-
- Consider a CHAR(n) field, a field of n characters.
- It will contain between n * mbminlen and n * mbmaxlen bytes.
- We will try to truncate it to n bytes by stripping
- space padding. If the field contains single-byte
- characters only, it will be truncated to n characters.
- Consider a CHAR(5) field containing the string ".a "
- where "." denotes a 3-byte character represented by
- the bytes "$%&". After our stripping, the string will
- be stored as "$%&a " (5 bytes). The string ".abc "
- will be stored as "$%&abc" (6 bytes).
-
- The space padding will be restored in row0sel.cc, function
- row_sel_field_store_in_mysql_format(). */
-
- ulint n_chars;
-
- ut_a(!(dtype_get_len(dtype) % dtype_get_mbmaxlen(dtype)));
-
- n_chars = dtype_get_len(dtype) / dtype_get_mbmaxlen(dtype);
-
- /* Strip space padding. */
- while (col_len > n_chars && ptr[col_len - 1] == 0x20) {
- col_len--;
- }
- } else if (type == DATA_BLOB && row_format_col) {
-
- ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
- }
-
- dfield_set_data(dfield, ptr, col_len);
-
- return(buf);
-}
-
-/**************************************************************//**
-Convert a row in the MySQL format to a row in the Innobase format. Note that
-the function to convert a MySQL format key value to an InnoDB dtuple is
-row_sel_convert_mysql_key_to_innobase() in row0sel.cc. */
-static
-void
-row_mysql_convert_row_to_innobase(
-/*==============================*/
- dtuple_t* row, /*!< in/out: Innobase row where the
- field type information is already
- copied there! */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct where template
- must be of type ROW_MYSQL_WHOLE_ROW */
- byte* mysql_rec) /*!< in: row in the MySQL format;
- NOTE: do not discard as long as
- row is used, as row may contain
- pointers to this record! */
-{
- const mysql_row_templ_t*templ;
- dfield_t* dfield;
- ulint i;
-
- ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
- ut_ad(prebuilt->mysql_template);
-
- for (i = 0; i < prebuilt->n_template; i++) {
-
- templ = prebuilt->mysql_template + i;
- dfield = dtuple_get_nth_field(row, i);
-
- if (templ->mysql_null_bit_mask != 0) {
- /* Column may be SQL NULL */
-
- if (mysql_rec[templ->mysql_null_byte_offset]
- & (byte) (templ->mysql_null_bit_mask)) {
-
- /* It is SQL NULL */
-
- dfield_set_null(dfield);
-
- goto next_column;
- }
- }
-
- row_mysql_store_col_in_innobase_format(
- dfield,
- prebuilt->ins_upd_rec_buff + templ->mysql_col_offset,
- TRUE, /* MySQL row format data */
- mysql_rec + templ->mysql_col_offset,
- templ->mysql_col_len,
- dict_table_is_comp(prebuilt->table));
-next_column:
- ;
- }
-
- /* If there is a FTS doc id column and it is not user supplied (
- generated by server) then assign it a new doc id. */
- if (prebuilt->table->fts) {
-
- ut_a(prebuilt->table->fts->doc_col != ULINT_UNDEFINED);
-
- fts_create_doc_id(prebuilt->table, row, prebuilt->heap);
- }
-}
-
-/****************************************************************//**
-Handles user errors and lock waits detected by the database engine.
-@return true if it was a lock wait and we should continue running the
-query thread and in that case the thr is ALREADY in the running state. */
-UNIV_INTERN
-bool
-row_mysql_handle_errors(
-/*====================*/
- dberr_t* new_err,/*!< out: possible new error encountered in
- lock wait, or if no new error, the value
- of trx->error_state at the entry of this
- function */
- trx_t* trx, /*!< in: transaction */
- que_thr_t* thr, /*!< in: query thread, or NULL */
- trx_savept_t* savept) /*!< in: savepoint, or NULL */
-{
- dberr_t err;
-
-handle_new_error:
- err = trx->error_state;
-
- ut_a(err != DB_SUCCESS);
-
- trx->error_state = DB_SUCCESS;
-
- switch (err) {
- case DB_LOCK_WAIT_TIMEOUT:
- if (row_rollback_on_timeout) {
- trx_rollback_to_savepoint(trx, NULL);
- break;
- }
- /* fall through */
- case DB_DUPLICATE_KEY:
- case DB_FOREIGN_DUPLICATE_KEY:
- case DB_TOO_BIG_RECORD:
- case DB_TOO_BIG_FOR_REDO:
- case DB_UNDO_RECORD_TOO_BIG:
- case DB_ROW_IS_REFERENCED:
- case DB_NO_REFERENCED_ROW:
- case DB_CANNOT_ADD_CONSTRAINT:
- case DB_TOO_MANY_CONCURRENT_TRXS:
- case DB_OUT_OF_FILE_SPACE:
- case DB_READ_ONLY:
- case DB_FTS_INVALID_DOCID:
- case DB_INTERRUPTED:
- case DB_DICT_CHANGED:
- case DB_TABLE_NOT_FOUND:
- case DB_DECRYPTION_FAILED:
- if (savept) {
- /* Roll back the latest, possibly incomplete insertion
- or update */
-
- trx_rollback_to_savepoint(trx, savept);
- }
- /* MySQL will roll back the latest SQL statement */
- break;
- case DB_LOCK_WAIT:
- lock_wait_suspend_thread(thr);
-
- if (trx->error_state != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- goto handle_new_error;
- }
-
- *new_err = err;
-
- return(true);
-
- case DB_DEADLOCK:
- case DB_LOCK_TABLE_FULL:
- /* Roll back the whole transaction; this resolution was added
- to version 3.23.43 */
-
- trx_rollback_to_savepoint(trx, NULL);
- break;
-
- case DB_MUST_GET_MORE_FILE_SPACE:
- fputs("InnoDB: The database cannot continue"
- " operation because of\n"
- "InnoDB: lack of space. You must add"
- " a new data file to\n"
- "InnoDB: my.cnf and restart the database.\n", stderr);
- abort();
-
- case DB_CORRUPTION:
- case DB_PAGE_CORRUPTED:
- fputs("InnoDB: We detected index corruption"
- " in an InnoDB type table.\n"
- "InnoDB: You have to dump + drop + reimport"
- " the table or, in\n"
- "InnoDB: a case of widespread corruption,"
- " dump all InnoDB\n"
- "InnoDB: tables and recreate the"
- " whole InnoDB tablespace.\n"
- "InnoDB: If the mysqld server crashes"
- " after the startup or when\n"
- "InnoDB: you dump the tables, look at\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html"
- " for help.\n", stderr);
- break;
- case DB_FOREIGN_EXCEED_MAX_CASCADE:
- fprintf(stderr, "InnoDB: Cannot delete/update rows with"
- " cascading foreign key constraints that exceed max"
- " depth of %lu\n"
- "Please drop excessive foreign constraints"
- " and try again\n", (ulong) DICT_FK_MAX_RECURSIVE_LOAD);
- break;
- default:
- fprintf(stderr, "InnoDB: unknown error code %lu\n",
- (ulong) err);
- ut_error;
- }
-
- if (trx->error_state != DB_SUCCESS) {
- *new_err = trx->error_state;
- } else {
- *new_err = err;
- }
-
- trx->error_state = DB_SUCCESS;
-
- return(false);
-}
-
-/********************************************************************//**
-Create a prebuilt struct for a MySQL table handle.
-@return own: a prebuilt struct */
-UNIV_INTERN
-row_prebuilt_t*
-row_create_prebuilt(
-/*================*/
- dict_table_t* table, /*!< in: Innobase table handle */
- ulint mysql_row_len) /*!< in: length in bytes of a row in
- the MySQL format */
-{
- row_prebuilt_t* prebuilt;
- mem_heap_t* heap;
- dict_index_t* clust_index;
- dict_index_t* temp_index;
- dtuple_t* ref;
- ulint ref_len;
- uint srch_key_len = 0;
- ulint search_tuple_n_fields;
-
- search_tuple_n_fields = 2 * dict_table_get_n_cols(table);
-
- clust_index = dict_table_get_first_index(table);
-
- /* Make sure that search_tuple is long enough for clustered index */
- ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
-
- /* Maximum size of the buffer needed for conversion of INTs from
- little endian format to big endian format in an index. An index
- can have maximum 16 columns (MAX_REF_PARTS) in it. Therfore
- Max size for PK: 16 * 8 bytes (BIGINT's size) = 128 bytes
- Max size Secondary index: 16 * 8 bytes + PK = 256 bytes. */
-#define MAX_SRCH_KEY_VAL_BUFFER 2* (8 * MAX_REF_PARTS)
-
-#define PREBUILT_HEAP_INITIAL_SIZE \
- ( \
- sizeof(*prebuilt) \
- /* allocd in this function */ \
- + DTUPLE_EST_ALLOC(search_tuple_n_fields) \
- + DTUPLE_EST_ALLOC(ref_len) \
- /* allocd in row_prebuild_sel_graph() */ \
- + sizeof(sel_node_t) \
- + sizeof(que_fork_t) \
- + sizeof(que_thr_t) \
- /* allocd in row_get_prebuilt_update_vector() */ \
- + sizeof(upd_node_t) \
- + sizeof(upd_t) \
- + sizeof(upd_field_t) \
- * dict_table_get_n_cols(table) \
- + sizeof(que_fork_t) \
- + sizeof(que_thr_t) \
- /* allocd in row_get_prebuilt_insert_row() */ \
- + sizeof(ins_node_t) \
- /* mysql_row_len could be huge and we are not \
- sure if this prebuilt instance is going to be \
- used in inserts */ \
- + (mysql_row_len < 256 ? mysql_row_len : 0) \
- + DTUPLE_EST_ALLOC(dict_table_get_n_cols(table)) \
- + sizeof(que_fork_t) \
- + sizeof(que_thr_t) \
- )
-
- /* Calculate size of key buffer used to store search key in
- InnoDB format. MySQL stores INTs in little endian format and
- InnoDB stores INTs in big endian format with the sign bit
- flipped. All other field types are stored/compared the same
- in MySQL and InnoDB, so we must create a buffer containing
- the INT key parts in InnoDB format.We need two such buffers
- since both start and end keys are used in records_in_range(). */
-
- for (temp_index = dict_table_get_first_index(table); temp_index;
- temp_index = dict_table_get_next_index(temp_index)) {
- DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value",
- ut_a(temp_index->n_user_defined_cols
- == MAX_REF_PARTS););
- uint temp_len = 0;
- for (uint i = 0; i < temp_index->n_uniq; i++) {
- if (temp_index->fields[i].col->mtype == DATA_INT) {
- temp_len +=
- temp_index->fields[i].fixed_len;
- }
- }
- srch_key_len = max(srch_key_len,temp_len);
- }
-
- ut_a(srch_key_len <= MAX_SRCH_KEY_VAL_BUFFER);
-
- DBUG_EXECUTE_IF("innodb_srch_key_buffer_max_value",
- ut_a(srch_key_len == MAX_SRCH_KEY_VAL_BUFFER););
-
- /* We allocate enough space for the objects that are likely to
- be created later in order to minimize the number of malloc()
- calls */
- heap = mem_heap_create(PREBUILT_HEAP_INITIAL_SIZE + 2 * srch_key_len);
-
- prebuilt = static_cast<row_prebuilt_t*>(
- mem_heap_zalloc(heap, sizeof(*prebuilt)));
-
- prebuilt->magic_n = ROW_PREBUILT_ALLOCATED;
- prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED;
-
- prebuilt->table = table;
-
- prebuilt->sql_stat_start = TRUE;
- prebuilt->heap = heap;
-
- prebuilt->srch_key_val_len = srch_key_len;
- if (prebuilt->srch_key_val_len) {
- prebuilt->srch_key_val1 = static_cast<byte*>(
- mem_heap_alloc(prebuilt->heap,
- 2 * prebuilt->srch_key_val_len));
- prebuilt->srch_key_val2 = prebuilt->srch_key_val1 +
- prebuilt->srch_key_val_len;
- } else {
- prebuilt->srch_key_val1 = NULL;
- prebuilt->srch_key_val2 = NULL;
- }
-
- btr_pcur_reset(&prebuilt->pcur);
- btr_pcur_reset(&prebuilt->clust_pcur);
-
- prebuilt->select_lock_type = LOCK_NONE;
- prebuilt->stored_select_lock_type = LOCK_NONE_UNSET;
-
- prebuilt->search_tuple = dtuple_create(heap, search_tuple_n_fields);
-
- ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- prebuilt->clust_ref = ref;
-
- prebuilt->autoinc_error = DB_SUCCESS;
- prebuilt->autoinc_offset = 0;
-
- /* Default to 1, we will set the actual value later in
- ha_innobase::get_auto_increment(). */
- prebuilt->autoinc_increment = 1;
-
- prebuilt->autoinc_last_value = 0;
-
- /* During UPDATE and DELETE we need the doc id. */
- prebuilt->fts_doc_id = 0;
-
- prebuilt->mysql_row_len = mysql_row_len;
-
- return(prebuilt);
-}
-
-/********************************************************************//**
-Free a prebuilt struct for a MySQL table handle. */
-UNIV_INTERN
-void
-row_prebuilt_free(
-/*==============*/
- row_prebuilt_t* prebuilt, /*!< in, own: prebuilt struct */
- ibool dict_locked) /*!< in: TRUE=data dictionary locked */
-{
- ulint i;
-
- if (UNIV_UNLIKELY
- (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED
- || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu,"
- " magic n2 %lu, table name ",
- (ulong) prebuilt->magic_n,
- (ulong) prebuilt->magic_n2);
- ut_print_name(stderr, NULL, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- prebuilt->magic_n = ROW_PREBUILT_FREED;
- prebuilt->magic_n2 = ROW_PREBUILT_FREED;
-
- btr_pcur_reset(&prebuilt->pcur);
- btr_pcur_reset(&prebuilt->clust_pcur);
-
- if (prebuilt->mysql_template) {
- mem_free(prebuilt->mysql_template);
- }
-
- if (prebuilt->ins_graph) {
- que_graph_free_recursive(prebuilt->ins_graph);
- }
-
- if (prebuilt->sel_graph) {
- que_graph_free_recursive(prebuilt->sel_graph);
- }
-
- if (prebuilt->upd_graph) {
- que_graph_free_recursive(prebuilt->upd_graph);
- }
-
- if (prebuilt->blob_heap) {
- mem_heap_free(prebuilt->blob_heap);
- }
-
- if (prebuilt->old_vers_heap) {
- mem_heap_free(prebuilt->old_vers_heap);
- }
-
- if (prebuilt->fetch_cache[0] != NULL) {
- byte* base = prebuilt->fetch_cache[0] - 4;
- byte* ptr = base;
-
- for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
- byte* row;
- ulint magic1;
- ulint magic2;
-
- magic1 = mach_read_from_4(ptr);
- ptr += 4;
-
- row = ptr;
- ptr += prebuilt->mysql_row_len;
-
- magic2 = mach_read_from_4(ptr);
- ptr += 4;
-
- if (ROW_PREBUILT_FETCH_MAGIC_N != magic1
- || row != prebuilt->fetch_cache[i]
- || ROW_PREBUILT_FETCH_MAGIC_N != magic2) {
-
- fputs("InnoDB: Error: trying to free"
- " a corrupt fetch buffer.\n", stderr);
-
- mem_analyze_corruption(base);
- ut_error;
- }
- }
-
- mem_free(base);
- }
-
- dict_table_close(prebuilt->table, dict_locked, TRUE);
-
- mem_heap_free(prebuilt->heap);
-}
-
-/*********************************************************************//**
-Updates the transaction pointers in query graphs stored in the prebuilt
-struct. */
-UNIV_INTERN
-void
-row_update_prebuilt_trx(
-/*====================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
- in MySQL handle */
- trx_t* trx) /*!< in: transaction handle */
-{
- if (trx->magic_n != TRX_MAGIC_N) {
- fprintf(stderr,
- "InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: trx handle. Magic n %lu\n",
- (ulong) trx->magic_n);
-
- mem_analyze_corruption(trx);
-
- ut_error;
- }
-
- if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to use a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- prebuilt->trx = trx;
-
- if (prebuilt->ins_graph) {
- prebuilt->ins_graph->trx = trx;
- }
-
- if (prebuilt->upd_graph) {
- prebuilt->upd_graph->trx = trx;
- }
-
- if (prebuilt->sel_graph) {
- prebuilt->sel_graph->trx = trx;
- }
-}
-
-/*********************************************************************//**
-Gets pointer to a prebuilt dtuple used in insertions. If the insert graph
-has not yet been built in the prebuilt struct, then this function first
-builds it.
-@return prebuilt dtuple; the column type information is also set in it */
-static
-dtuple_t*
-row_get_prebuilt_insert_row(
-/*========================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- dict_table_t* table = prebuilt->table;
-
- ut_ad(prebuilt && table && prebuilt->trx);
-
- if (prebuilt->ins_node != 0) {
-
- /* Check if indexes have been dropped or added and we
- may need to rebuild the row insert template. */
-
- if (prebuilt->trx_id == table->def_trx_id
- && UT_LIST_GET_LEN(prebuilt->ins_node->entry_list)
- == UT_LIST_GET_LEN(table->indexes)) {
-
- return(prebuilt->ins_node->row);
- }
-
- ut_ad(prebuilt->trx_id < table->def_trx_id);
-
- que_graph_free_recursive(prebuilt->ins_graph);
-
- prebuilt->ins_graph = 0;
- }
-
- /* Create an insert node and query graph to the prebuilt struct */
-
- ins_node_t* node;
-
- node = ins_node_create(INS_DIRECT, table, prebuilt->heap);
-
- prebuilt->ins_node = node;
-
- if (prebuilt->ins_upd_rec_buff == 0) {
- prebuilt->ins_upd_rec_buff = static_cast<byte*>(
- mem_heap_alloc(
- prebuilt->heap,
- prebuilt->mysql_row_len));
- }
-
- dtuple_t* row;
-
- row = dtuple_create(prebuilt->heap, dict_table_get_n_cols(table));
-
- dict_table_copy_types(row, table);
-
- ins_node_set_new_row(node, row);
-
- prebuilt->ins_graph = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(
- node,
- prebuilt->trx, prebuilt->heap)));
-
- prebuilt->ins_graph->state = QUE_FORK_ACTIVE;
-
- prebuilt->trx_id = table->def_trx_id;
-
- return(prebuilt->ins_node->row);
-}
-
-/*********************************************************************//**
-Updates the table modification counter and calculates new estimates
-for table and index statistics if necessary. */
-UNIV_INLINE
-void
-row_update_statistics_if_needed(
-/*============================*/
- dict_table_t* table) /*!< in: table */
-{
- ib_uint64_t counter;
- ib_uint64_t n_rows;
-
- if (!table->stat_initialized) {
- DBUG_EXECUTE_IF(
- "test_upd_stats_if_needed_not_inited",
- fprintf(stderr, "test_upd_stats_if_needed_not_inited "
- "was executed\n");
- );
- return;
- }
-
- counter = table->stat_modified_counter++;
- n_rows = dict_table_get_n_rows(table);
-
- if (dict_stats_is_persistent_enabled(table)) {
- if (counter > n_rows / 10 /* 10% */
- && dict_stats_auto_recalc_is_enabled(table)) {
-
- dict_stats_recalc_pool_add(table);
- table->stat_modified_counter = 0;
- }
- return;
- }
-
- /* Calculate new statistics if 1 / 16 of table has been modified
- since the last time a statistics batch was run.
- We calculate statistics at most every 16th round, since we may have
- a counter table which is very small and updated very often. */
- ib_uint64_t threshold= 16 + n_rows / 16; /* 6.25% */
- if (srv_stats_modified_counter)
- threshold= ut_min(srv_stats_modified_counter, threshold);
-
- if (counter > threshold) {
-
- ut_ad(!mutex_own(&dict_sys->mutex));
- /* this will reset table->stat_modified_counter to 0 */
- dict_stats_update(table, DICT_STATS_RECALC_TRANSIENT);
- }
-}
-
-/*********************************************************************//**
-Sets an AUTO_INC type lock on the table mentioned in prebuilt. The
-AUTO_INC lock gives exclusive access to the auto-inc counter of the
-table. The lock is reserved only for the duration of an SQL statement.
-It is not compatible with another AUTO_INC or exclusive lock on the
-table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_lock_table_autoinc_for_mysql(
-/*=============================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in the MySQL
- table handle */
-{
- trx_t* trx = prebuilt->trx;
- ins_node_t* node = prebuilt->ins_node;
- const dict_table_t* table = prebuilt->table;
- que_thr_t* thr;
- dberr_t err;
- ibool was_lock_wait;
-
- ut_ad(trx);
-
- /* If we already hold an AUTOINC lock on the table then do nothing.
- Note: We peek at the value of the current owner without acquiring
- the lock mutex. **/
- if (trx == table->autoinc_trx) {
-
- return(DB_SUCCESS);
- }
-
- trx->op_info = "setting auto-inc lock";
-
- row_get_prebuilt_insert_row(prebuilt);
- node = prebuilt->ins_node;
-
- /* We use the insert query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(prebuilt->ins_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started_xa(trx);
-
- err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr);
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return(err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Sets a table lock on the table mentioned in prebuilt.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_lock_table_for_mysql(
-/*=====================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct in the MySQL
- table handle */
- dict_table_t* table, /*!< in: table to lock, or NULL
- if prebuilt->table should be
- locked as
- prebuilt->select_lock_type */
- ulint mode) /*!< in: lock mode of table
- (ignored if table==NULL) */
-{
- trx_t* trx = prebuilt->trx;
- que_thr_t* thr;
- dberr_t err;
- ibool was_lock_wait;
-
- ut_ad(trx);
-
- trx->op_info = "setting table lock";
-
- if (prebuilt->sel_graph == NULL) {
- /* Build a dummy select query graph */
- row_prebuild_sel_graph(prebuilt);
- }
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(prebuilt->sel_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started_xa(trx);
-
- if (table) {
- err = lock_table(
- 0, table,
- static_cast<enum lock_mode>(mode), thr);
- } else {
- err = lock_table(
- 0, prebuilt->table,
- static_cast<enum lock_mode>(
- prebuilt->select_lock_type),
- thr);
- }
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return(err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Determine is tablespace encrypted but decryption failed, is table corrupted
-or is tablespace .ibd file missing.
-@param[in] table Table
-@param[in] trx Transaction
-@param[in] push_warning true if we should push warning to user
-@return DB_DECRYPTION_FAILED table is encrypted but decryption failed
-DB_CORRUPTION table is corrupted
-DB_TABLESPACE_NOT_FOUND tablespace .ibd file not found */
-static
-dberr_t
-row_mysql_get_table_status(
- const dict_table_t* table,
- trx_t* trx,
- bool push_warning = true)
-{
- dberr_t err = DB_SUCCESS;
- FilSpace space(table->space, true);
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(table->name, TRUE, buf, sizeof(buf));
-
- if (space()) {
-
- if (space()->crypt_data && space()->crypt_data->is_encrypted()) {
- // maybe we cannot access the table due to failing
- // to decrypt
- if (push_warning) {
- ib_push_warning(trx,HA_ERR_DECRYPTION_FAILED,
- "Table %s in file %s is encrypted but encryption service or"
- " used key_id %u is not available. "
- " Can't continue reading table.",
- buf, space()->chain.start->name,
- space()->crypt_data->key_id);
- }
-
- err = DB_DECRYPTION_FAILED;
- } else {
- if (push_warning) {
- ib_push_warning(trx, DB_CORRUPTION,
- "Table %s in file %s corrupted.",
- buf, space()->chain.start->name);
- }
-
- err = DB_CORRUPTION;
- }
-
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "InnoDB: MySQL is trying to use a table handle"
- " but the .ibd file for"
- " table %s does not exist."
- " Have you deleted the .ibd file"
- " from the database directory under"
- " the MySQL datadir, or have you"
- " used DISCARD TABLESPACE?"
- " Look from " REFMAN "innodb-troubleshooting.html"
- " how you can resolve the problem.",
- buf);
-
- err = DB_TABLESPACE_NOT_FOUND;
- }
-
- return (err);
-}
-
-/*********************************************************************//**
-Does an insert for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_insert_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: row in the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- trx_savept_t savept;
- que_thr_t* thr;
- dberr_t err;
- ibool was_lock_wait;
- trx_t* trx = prebuilt->trx;
- ins_node_t* node = prebuilt->ins_node;
- dict_table_t* table = prebuilt->table;
-
- ut_ad(trx);
-
- if (dict_table_is_discarded(prebuilt->table)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The table %s doesn't have a corresponding "
- "tablespace, it was discarded.",
- prebuilt->table->name);
-
- return(DB_TABLESPACE_DELETED);
-
- } else if (!prebuilt->table->is_readable()) {
- return (row_mysql_get_table_status(prebuilt->table, trx, true));
- } else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- } else if (srv_force_recovery) {
- fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that"
- "InnoDB: innodb_force_... is removed.\n",
- stderr);
-
- return(DB_READ_ONLY);
- }
-
- trx->op_info = "inserting";
-
- row_mysql_delay_if_needed();
-
- trx_start_if_not_started_xa(trx);
-
- row_get_prebuilt_insert_row(prebuilt);
- node = prebuilt->ins_node;
-
- row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec);
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(prebuilt->ins_graph);
-
- if (prebuilt->sql_stat_start) {
- node->state = INS_NODE_SET_IX_LOCK;
- prebuilt->sql_stat_start = FALSE;
- } else {
- node->state = INS_NODE_ALLOC_ROW_ID;
- }
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- row_ins_step(thr);
-
- DEBUG_SYNC_C("ib_after_row_insert_step");
-
- err = trx->error_state;
-
- if (err != DB_SUCCESS) {
-error_exit:
- que_thr_stop_for_mysql(thr);
-
- /* FIXME: What's this ? */
- thr->lock_state = QUE_THR_LOCK_ROW;
-
- was_lock_wait = row_mysql_handle_errors(
- &err, trx, thr, &savept);
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
-
- if (was_lock_wait) {
- ut_ad(node->state == INS_NODE_INSERT_ENTRIES
- || node->state == INS_NODE_ALLOC_ROW_ID);
- goto run_again;
- }
-
- trx->op_info = "";
-
- return(err);
- }
-
- if (dict_table_has_fts_index(table)
- && UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)) {
- doc_id_t doc_id;
-
- /* Extract the doc id from the hidden FTS column */
- doc_id = fts_get_doc_id_from_row(table, node->row);
-
- if (doc_id <= 0) {
- fprintf(stderr,
- "InnoDB: FTS Doc ID must be large than 0 \n");
- err = DB_FTS_INVALID_DOCID;
- trx->error_state = DB_FTS_INVALID_DOCID;
- goto error_exit;
- }
-
- if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- doc_id_t next_doc_id
- = table->fts->cache->next_doc_id;
-
- if (doc_id < next_doc_id) {
- fprintf(stderr,
- "InnoDB: FTS Doc ID must be large than"
- " " UINT64PF " for table",
- next_doc_id - 1);
- ut_print_name(stderr, trx, TRUE, table->name);
- putc('\n', stderr);
-
- err = DB_FTS_INVALID_DOCID;
- trx->error_state = DB_FTS_INVALID_DOCID;
- goto error_exit;
- }
-
- /* Difference between Doc IDs are restricted within
- 4 bytes integer. See fts_get_encoded_len(). Consecutive
- doc_ids difference should not exceed
- FTS_DOC_ID_MAX_STEP value. */
-
- if (next_doc_id > 1
- && doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
- fprintf(stderr,
- "InnoDB: Doc ID " UINT64PF " is too"
- " big. Its difference with largest"
- " used Doc ID " UINT64PF " cannot"
- " exceed or equal to %d\n",
- doc_id, next_doc_id - 1,
- FTS_DOC_ID_MAX_STEP);
- err = DB_FTS_INVALID_DOCID;
- trx->error_state = DB_FTS_INVALID_DOCID;
- goto error_exit;
- }
- }
-
- /* Pass NULL for the columns affected, since an INSERT affects
- all FTS indexes. */
- fts_trx_add_op(trx, table, doc_id, FTS_INSERT, NULL);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- if (UNIV_LIKELY(!(trx->fake_changes))) {
- if (table->is_system_db) {
- srv_stats.n_system_rows_inserted.inc(size_t(trx->id));
- } else {
- srv_stats.n_rows_inserted.inc(size_t(trx->id));
- }
-
- if (prebuilt->clust_index_was_generated) {
- /* set row id to prebuilt */
- ut_memcpy(prebuilt->row_id, node->row_id_buf, DATA_ROW_ID_LEN);
- }
-
- /* Not protected by dict_table_stats_lock() for performance
- reasons, we would rather get garbage in stat_n_rows (which is
- just an estimate anyway) than protecting the following code
- with a latch. */
- dict_table_n_rows_inc(table);
-
- row_update_statistics_if_needed(table);
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Builds a dummy query graph used in selects. */
-UNIV_INTERN
-void
-row_prebuild_sel_graph(
-/*===================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- sel_node_t* node;
-
- ut_ad(prebuilt && prebuilt->trx);
-
- if (prebuilt->sel_graph == NULL) {
-
- node = sel_node_create(prebuilt->heap);
-
- prebuilt->sel_graph = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(
- static_cast<sel_node_t*>(node),
- prebuilt->trx, prebuilt->heap)));
-
- prebuilt->sel_graph->state = QUE_FORK_ACTIVE;
- }
-}
-
-/*********************************************************************//**
-Creates an query graph node of 'update' type to be used in the MySQL
-interface.
-@return own: update node */
-UNIV_INTERN
-upd_node_t*
-row_create_update_node_for_mysql(
-/*=============================*/
- dict_table_t* table, /*!< in: table to update */
- mem_heap_t* heap) /*!< in: mem heap from which allocated */
-{
- upd_node_t* node;
-
- node = upd_node_create(heap);
-
- node->in_mysql_interface = TRUE;
- node->is_delete = FALSE;
- node->searched_update = FALSE;
- node->select = NULL;
- node->pcur = btr_pcur_create_for_mysql();
- node->table = table;
-
- node->update = upd_create(dict_table_get_n_cols(table), heap);
-
- node->update_n_fields = dict_table_get_n_cols(table);
-
- UT_LIST_INIT(node->columns);
- node->has_clust_rec_x_lock = TRUE;
- node->cmpl_info = 0;
-
- node->table_sym = NULL;
- node->col_assign_list = NULL;
-
- return(node);
-}
-
-/*********************************************************************//**
-Gets pointer to a prebuilt update vector used in updates. If the update
-graph has not yet been built in the prebuilt struct, then this function
-first builds it.
-@return prebuilt update vector */
-UNIV_INTERN
-upd_t*
-row_get_prebuilt_update_vector(
-/*===========================*/
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- dict_table_t* table = prebuilt->table;
- upd_node_t* node;
-
- ut_ad(prebuilt && table && prebuilt->trx);
-
- if (prebuilt->upd_node == NULL) {
-
- /* Not called before for this handle: create an update node
- and query graph to the prebuilt struct */
-
- node = row_create_update_node_for_mysql(table, prebuilt->heap);
-
- prebuilt->upd_node = node;
-
- prebuilt->upd_graph = static_cast<que_fork_t*>(
- que_node_get_parent(
- pars_complete_graph_for_exec(
- static_cast<upd_node_t*>(node),
- prebuilt->trx, prebuilt->heap)));
-
- prebuilt->upd_graph->state = QUE_FORK_ACTIVE;
- }
-
- return(prebuilt->upd_node->update);
-}
-
-/********************************************************************
-Handle an update of a column that has an FTS index. */
-static
-void
-row_fts_do_update(
-/*==============*/
- trx_t* trx, /* in: transaction */
- dict_table_t* table, /* in: Table with FTS index */
- doc_id_t old_doc_id, /* in: old document id */
- doc_id_t new_doc_id) /* in: new document id */
-{
- if (trx->fts_next_doc_id) {
- fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
- fts_trx_add_op(trx, table, new_doc_id, FTS_INSERT, NULL);
- }
-}
-
-/************************************************************************
-Handles FTS matters for an update or a delete.
-NOTE: should not be called if the table does not have an FTS index. .*/
-static
-dberr_t
-row_fts_update_or_delete(
-/*=====================*/
- row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL
- handle */
-{
- trx_t* trx = prebuilt->trx;
- dict_table_t* table = prebuilt->table;
- upd_node_t* node = prebuilt->upd_node;
- doc_id_t old_doc_id = prebuilt->fts_doc_id;
-
- ut_a(dict_table_has_fts_index(prebuilt->table));
-
- /* Deletes are simple; get them out of the way first. */
- if (node->is_delete) {
- /* A delete affects all FTS indexes, so we pass NULL */
- fts_trx_add_op(trx, table, old_doc_id, FTS_DELETE, NULL);
- } else {
- doc_id_t new_doc_id;
-
- new_doc_id = fts_read_doc_id((byte*) &trx->fts_next_doc_id);
-
- if (new_doc_id == 0) {
- fprintf(stderr, " InnoDB FTS: Doc ID cannot be 0 \n");
- return(DB_FTS_INVALID_DOCID);
- }
-
- row_fts_do_update(trx, table, old_doc_id, new_doc_id);
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Initialize the Doc ID system for FK table with FTS index */
-static
-void
-init_fts_doc_id_for_ref(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- ulint* depth) /*!< in: recusive call depth */
-{
- dict_foreign_t* foreign;
-
- table->fk_max_recusive_level = 0;
-
- (*depth)++;
-
- /* Limit on tables involved in cascading delete/update */
- if (*depth > FK_MAX_CASCADE_DEL) {
- return;
- }
-
- /* Loop through this table's referenced list and also
- recursively traverse each table's foreign table list */
- for (dict_foreign_set::iterator it = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- if (foreign->foreign_table == NULL) {
- break;
- }
-
- if (foreign->foreign_table->fts != NULL) {
- fts_init_doc_id(foreign->foreign_table);
- }
-
- if (!foreign->foreign_table->referenced_set.empty()
- && foreign->foreign_table != table) {
- init_fts_doc_id_for_ref(
- foreign->foreign_table, depth);
- }
- }
-}
-
-/*********************************************************************//**
-Does an update or delete of a row for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_update_for_mysql(
-/*=================*/
- byte* mysql_rec, /*!< in: the row to be updated, in
- the MySQL format */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct in MySQL
- handle */
-{
- trx_savept_t savept;
- dberr_t err;
- que_thr_t* thr;
- ibool was_lock_wait;
- dict_index_t* clust_index;
- /* ulint ref_len; */
- upd_node_t* node;
- dict_table_t* table = prebuilt->table;
- trx_t* trx = prebuilt->trx;
- ulint fk_depth = 0;
-
- ut_ad(prebuilt != NULL);
- ut_ad(trx != NULL);
- UT_NOT_USED(mysql_rec);
-
- if (!table->is_readable()) {
- return (row_mysql_get_table_status(table, trx, true));
- }
-
- if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
- if (UNIV_UNLIKELY(srv_force_recovery)) {
- fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that"
- "InnoDB: innodb_force_... is removed.\n",
- stderr);
-
- return(DB_READ_ONLY);
- }
-
- DEBUG_SYNC_C("innodb_row_update_for_mysql_begin");
-
- trx->op_info = "updating or deleting";
-
- row_mysql_delay_if_needed();
-
- trx_start_if_not_started_xa(trx);
-
- if (dict_table_is_referenced_by_foreign_key(table)) {
- /* Share lock the data dictionary to prevent any
- table dictionary (for foreign constraint) change.
- This is similar to row_ins_check_foreign_constraint
- check protect by the dictionary lock as well.
- In the future, this can be removed once the Foreign
- key MDL is implemented */
- row_mysql_freeze_data_dictionary(trx);
- init_fts_doc_id_for_ref(table, &fk_depth);
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- node = prebuilt->upd_node;
-
- clust_index = dict_table_get_first_index(table);
-
- if (prebuilt->pcur.btr_cur.index == clust_index) {
- btr_pcur_copy_stored_position(node->pcur, &prebuilt->pcur);
- } else {
- btr_pcur_copy_stored_position(node->pcur,
- &prebuilt->clust_pcur);
- }
-
- ut_a(node->pcur->rel_pos == BTR_PCUR_ON);
-
- /* MySQL seems to call rnd_pos before updating each row it
- has cached: we can get the correct cursor position from
- prebuilt->pcur; NOTE that we cannot build the row reference
- from mysql_rec if the clustered index was automatically
- generated for the table: MySQL does not know anything about
- the row id used as the clustered index key */
-
- savept = trx_savept_take(trx);
-
- thr = que_fork_get_first_thr(prebuilt->upd_graph);
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- ut_ad(!prebuilt->sql_stat_start);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
- thr->fk_cascade_depth = 0;
-
- row_upd_step(thr);
-
- err = trx->error_state;
-
- /* Reset fk_cascade_depth back to 0 */
- thr->fk_cascade_depth = 0;
-
- if (err != DB_SUCCESS) {
- que_thr_stop_for_mysql(thr);
-
- if (err == DB_RECORD_NOT_FOUND) {
- trx->error_state = DB_SUCCESS;
- trx->op_info = "";
-
- return(err);
- }
-
- thr->lock_state= QUE_THR_LOCK_ROW;
-
- DEBUG_SYNC(trx->mysql_thd, "row_update_for_mysql_error");
-
- was_lock_wait = row_mysql_handle_errors(&err, trx, thr,
- &savept);
- thr->lock_state= QUE_THR_LOCK_NOLOCK;
-
- if (was_lock_wait) {
- goto run_again;
- }
-
- trx->op_info = "";
-
- return(err);
- }
-
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- if (UNIV_UNLIKELY(trx->fake_changes)) {
-
- trx->op_info = "";
- return(err);
- }
-
- if (dict_table_has_fts_index(table)
- && trx->fts_next_doc_id != UINT64_UNDEFINED) {
- err = row_fts_update_or_delete(prebuilt);
- if (err != DB_SUCCESS) {
- trx->op_info = "";
- return(err);
- }
- }
-
- if (node->is_delete) {
- /* Not protected by dict_table_stats_lock() for performance
- reasons, we would rather get garbage in stat_n_rows (which is
- just an estimate anyway) than protecting the following code
- with a latch. */
- dict_table_n_rows_dec(prebuilt->table);
-
- if (table->is_system_db) {
- srv_stats.n_system_rows_deleted.inc(size_t(trx->id));
- } else {
- srv_stats.n_rows_deleted.inc(size_t(trx->id));
- }
- } else {
- if (table->is_system_db) {
- srv_stats.n_system_rows_updated.inc(size_t(trx->id));
- } else {
- srv_stats.n_rows_updated.inc(size_t(trx->id));
- }
- }
-
- /* We update table statistics only if it is a DELETE or UPDATE
- that changes indexed columns, UPDATEs that change only non-indexed
- columns would not affect statistics. */
- if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- row_update_statistics_if_needed(prebuilt->table);
- } else {
- /* Update the table modification counter even when
- non-indexed columns change if statistics is initialized. */
- if (prebuilt->table->stat_initialized) {
- prebuilt->table->stat_modified_counter++;
- }
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
-session is using a READ COMMITTED or READ UNCOMMITTED isolation level.
-Before calling this function row_search_for_mysql() must have
-initialized prebuilt->new_rec_locks to store the information which new
-record locks really were set. This function removes a newly set
-clustered index record lock under prebuilt->pcur or
-prebuilt->clust_pcur. Thus, this implements a 'mini-rollback' that
-releases the latest clustered index record lock we set.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-void
-row_unlock_for_mysql(
-/*=================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct in MySQL
- handle */
- ibool has_latches_on_recs)/*!< in: TRUE if called so
- that we have the latches on
- the records under pcur and
- clust_pcur, and we do not need
- to reposition the cursors. */
-{
- btr_pcur_t* pcur = &prebuilt->pcur;
- btr_pcur_t* clust_pcur = &prebuilt->clust_pcur;
- trx_t* trx = prebuilt->trx;
-
- ut_ad(prebuilt != NULL);
- ut_ad(trx != NULL);
-
- if (UNIV_UNLIKELY
- (!srv_locks_unsafe_for_binlog
- && trx->isolation_level > TRX_ISO_READ_COMMITTED)) {
-
- fprintf(stderr,
- "InnoDB: Error: calling row_unlock_for_mysql though\n"
- "InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n"
- "InnoDB: this session is not using"
- " READ COMMITTED isolation level.\n");
- return;
- }
-
- trx->op_info = "unlock_row";
-
- if (prebuilt->new_rec_locks >= 1) {
-
- const rec_t* rec;
- dict_index_t* index;
- trx_id_t rec_trx_id;
- mtr_t mtr;
-
- mtr_start_trx(&mtr, trx);
-
- /* Restore the cursor position and find the record */
-
- if (!has_latches_on_recs) {
- btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr);
- }
-
- rec = btr_pcur_get_rec(pcur);
- index = btr_pcur_get_btr_cur(pcur)->index;
-
- if (prebuilt->new_rec_locks >= 2) {
- /* Restore the cursor position and find the record
- in the clustered index. */
-
- if (!has_latches_on_recs) {
- btr_pcur_restore_position(BTR_SEARCH_LEAF,
- clust_pcur, &mtr);
- }
-
- rec = btr_pcur_get_rec(clust_pcur);
- index = btr_pcur_get_btr_cur(clust_pcur)->index;
- }
-
- if (!dict_index_is_clust(index)) {
- /* This is not a clustered index record. We
- do not know how to unlock the record. */
- goto no_unlock;
- }
-
- /* If the record has been modified by this
- transaction, do not unlock it. */
-
- if (index->trx_id_offset) {
- rec_trx_id = trx_read_trx_id(rec
- + index->trx_id_offset);
- } else {
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- if (rec_trx_id != trx->id) {
- /* We did not update the record: unlock it */
-
- rec = btr_pcur_get_rec(pcur);
-
- lock_rec_unlock(
- trx,
- btr_pcur_get_block(pcur),
- rec,
- static_cast<enum lock_mode>(
- prebuilt->select_lock_type));
-
- if (prebuilt->new_rec_locks >= 2) {
- rec = btr_pcur_get_rec(clust_pcur);
-
- lock_rec_unlock(
- trx,
- btr_pcur_get_block(clust_pcur),
- rec,
- static_cast<enum lock_mode>(
- prebuilt->select_lock_type));
- }
- }
-no_unlock:
- mtr_commit(&mtr);
- }
-
- trx->op_info = "";
-}
-
-/**********************************************************************//**
-Does a cascaded delete or set null in a foreign key operation.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_update_cascade_for_mysql(
-/*=========================*/
- que_thr_t* thr, /*!< in: query thread */
- upd_node_t* node, /*!< in: update node used in the cascade
- or set null operation */
- dict_table_t* table) /*!< in: table where we do the operation */
-{
- dberr_t err;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- /* Increment fk_cascade_depth to record the recursive call depth on
- a single update/delete that affects multiple tables chained
- together with foreign key relations. */
- thr->fk_cascade_depth++;
-
- if (thr->fk_cascade_depth > FK_MAX_CASCADE_DEL) {
- return(DB_FOREIGN_EXCEED_MAX_CASCADE);
- }
-run_again:
- thr->run_node = node;
- thr->prev_node = node;
-
- DEBUG_SYNC_C("foreign_constraint_update_cascade");
-
- row_upd_step(thr);
-
- /* The recursive call for cascading update/delete happens
- in above row_upd_step(), reset the counter once we come
- out of the recursive call, so it does not accumulate for
- different row deletes */
- thr->fk_cascade_depth = 0;
-
- err = trx->error_state;
-
- /* Note that the cascade node is a subnode of another InnoDB
- query graph node. We do a normal lock wait in this node, but
- all errors are handled by the parent node. */
-
- if (err == DB_LOCK_WAIT) {
- /* Handle lock wait here */
-
- que_thr_stop_for_mysql(thr);
-
- lock_wait_suspend_thread(thr);
-
- /* Note that a lock wait may also end in a lock wait timeout,
- or this transaction is picked as a victim in selective
- deadlock resolution */
-
- if (trx->error_state != DB_SUCCESS) {
-
- return(trx->error_state);
- }
-
- /* Retry operation after a normal lock wait */
-
- goto run_again;
- }
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
-
- if (UNIV_UNLIKELY((trx->fake_changes))) {
-
- return(err);
- }
-
- if (node->is_delete) {
- /* Not protected by dict_table_stats_lock() for performance
- reasons, we would rather get garbage in stat_n_rows (which is
- just an estimate anyway) than protecting the following code
- with a latch. */
- dict_table_n_rows_dec(table);
-
- if (table->is_system_db) {
- srv_stats.n_system_rows_deleted.inc(size_t(trx->id));
- } else {
- srv_stats.n_rows_deleted.inc(size_t(trx->id));
- }
- } else {
- if (table->is_system_db) {
- srv_stats.n_system_rows_updated.inc(size_t(trx->id));
- } else {
- srv_stats.n_rows_updated.inc(size_t(trx->id));
- }
- }
-
- row_update_statistics_if_needed(table);
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if a table is such that we automatically created a clustered
-index on it (on row id).
-@return TRUE if the clustered index was generated automatically */
-UNIV_INTERN
-ibool
-row_table_got_default_clust_index(
-/*==============================*/
- const dict_table_t* table) /*!< in: table */
-{
- const dict_index_t* clust_index;
-
- clust_index = dict_table_get_first_index(table);
-
- return(dict_index_get_nth_col(clust_index, 0)->mtype == DATA_SYS);
-}
-
-/*********************************************************************//**
-Locks the data dictionary in shared mode from modifications, for performing
-foreign key check, rollback, or other operation invisible to MySQL. */
-UNIV_INTERN
-void
-row_mysql_freeze_data_dictionary_func(
-/*==================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- ulint line) /*!< in: line number */
-{
- ut_a(trx->dict_operation_lock_mode == 0);
-
- rw_lock_s_lock_inline(&dict_operation_lock, 0, file, line);
-
- trx->dict_operation_lock_mode = RW_S_LATCH;
-}
-
-/*********************************************************************//**
-Unlocks the data dictionary shared lock. */
-UNIV_INTERN
-void
-row_mysql_unfreeze_data_dictionary(
-/*===============================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
-
- ut_a(trx->dict_operation_lock_mode == RW_S_LATCH);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- trx->dict_operation_lock_mode = 0;
-}
-
-/*********************************************************************//**
-Locks the data dictionary exclusively for performing a table create or other
-data dictionary modification operation. */
-UNIV_INTERN
-void
-row_mysql_lock_data_dictionary_func(
-/*================================*/
- trx_t* trx, /*!< in/out: transaction */
- const char* file, /*!< in: file name */
- ulint line) /*!< in: line number */
-{
- ut_a(trx->dict_operation_lock_mode == 0
- || trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks or lock waits can occur then in these operations */
-
- rw_lock_x_lock_inline(&dict_operation_lock, 0, file, line);
- trx->dict_operation_lock_mode = RW_X_LATCH;
-
- mutex_enter(&(dict_sys->mutex));
-}
-
-/*********************************************************************//**
-Unlocks the data dictionary exclusive lock. */
-UNIV_INTERN
-void
-row_mysql_unlock_data_dictionary(
-/*=============================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_ad(lock_trx_has_sys_table_locks(trx) == NULL);
-
- ut_a(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- mutex_exit(&(dict_sys->mutex));
- rw_lock_x_unlock(&dict_operation_lock);
-
- trx->dict_operation_lock_mode = 0;
-}
-
-/*********************************************************************//**
-Creates a table for MySQL. If the name of the table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also start the printing of monitor
-output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate(). On failure the transaction will
-be rolled back and the 'table' object will be freed.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_create_table_for_mysql(
-/*=======================*/
- dict_table_t* table, /*!< in, own: table definition
- (will be freed, or on DB_SUCCESS
- added to the data dictionary cache) */
- trx_t* trx, /*!< in/out: transaction */
- bool commit, /*!< in: if true, commit the transaction */
- fil_encryption_t mode, /*!< in: encryption mode */
- ulint key_id) /*!< in: encryption key_id */
-{
- tab_node_t* node;
- mem_heap_t* heap;
- que_thr_t* thr;
- const char* table_name;
- ulint table_name_len;
- dberr_t err;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
- ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH);
-
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_at_start_of_row_create_table_for_mysql",
- goto err_exit;
- );
-
- trx->op_info = "creating table";
-
- if (row_mysql_is_system_table(table->name)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to create a MySQL system"
- " table %s of type InnoDB.\n"
- "InnoDB: MySQL system tables must be"
- " of the MyISAM type!\n",
- table->name);
-
-#ifndef DBUG_OFF
-err_exit:
-#endif /* !DBUG_OFF */
- dict_mem_table_free(table);
-
- if (commit) {
- trx_commit_for_mysql(trx);
- }
-
- trx->op_info = "";
-
- return(DB_ERROR);
- }
-
- trx_start_if_not_started_xa(trx);
-
- /* The table name is prefixed with the database name and a '/'.
- Certain table names starting with 'innodb_' have their special
- meaning regardless of the database name. Thus, we need to
- ignore the database name prefix in the comparisons. */
- table_name = dict_remove_db_name(table->name);
- table_name_len = strlen(table_name) + 1;
-
- if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) {
-
- /* Table equals "innodb_monitor":
- start monitor prints */
-
- srv_print_innodb_monitor = TRUE;
-
- /* The lock timeout monitor thread also takes care
- of InnoDB monitor prints */
-
- os_event_set(srv_monitor_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_lock_monitor)) {
-
- srv_print_innodb_monitor = TRUE;
- srv_print_innodb_lock_monitor = TRUE;
- os_event_set(srv_monitor_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_tablespace_monitor)) {
-
- srv_print_innodb_tablespace_monitor = TRUE;
- os_event_set(srv_monitor_event);
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_table_monitor)) {
-
- srv_print_innodb_table_monitor = TRUE;
- os_event_set(srv_monitor_event);
-#ifdef UNIV_MEM_DEBUG
- } else if (STR_EQ(table_name, table_name_len,
- S_innodb_mem_validate)) {
- /* We define here a debugging feature intended for
- developers */
-
- fputs("Validating InnoDB memory:\n"
- "to use this feature you must compile InnoDB with\n"
- "UNIV_MEM_DEBUG defined in univ.i and"
- " the server must be\n"
- "quiet because allocation from a mem heap"
- " is not protected\n"
- "by any semaphore.\n", stderr);
- ut_a(mem_validate());
- fputs("Memory validated\n", stderr);
-#endif /* UNIV_MEM_DEBUG */
- }
-
- heap = mem_heap_create(512);
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- case TRX_DICT_OP_TABLE:
- break;
- case TRX_DICT_OP_INDEX:
- /* If the transaction was previously flagged as
- TRX_DICT_OP_INDEX, we should be creating auxiliary
- tables for full-text indexes. */
- ut_ad(strstr(table->name, "/FTS_") != NULL);
- }
-
- node = tab_create_graph_create(table, heap, commit, mode, key_id);
-
- thr = pars_complete_graph_for_exec(node, trx, heap);
-
- ut_a(thr == que_fork_start_command(
- static_cast<que_fork_t*>(que_node_get_parent(thr))));
-
- que_run_threads(thr);
-
- err = trx->error_state;
-
- if (table->space != TRX_SYS_SPACE) {
- ut_a(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_TABLESPACE));
-
- /* Update SYS_TABLESPACES and SYS_DATAFILES if a new
- tablespace was created. */
- if (err == DB_SUCCESS) {
- char* path;
- path = fil_space_get_first_path(table->space);
-
- err = dict_create_add_tablespace_to_dictionary(
- table->space, table->name,
- fil_space_get_flags(table->space),
- path, trx, commit);
-
- mem_free(path);
- }
-
- if (err != DB_SUCCESS) {
- /* We must delete the link file. */
- fil_delete_link_file(table->name);
- }
- }
-
- switch (err) {
- case DB_SUCCESS:
- break;
- case DB_OUT_OF_FILE_SPACE:
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: cannot create table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" because tablespace full\n", stderr);
-
- if (dict_table_open_on_name(table->name, TRUE, FALSE,
- DICT_ERR_IGNORE_NONE)) {
-
- /* Make things easy for the drop table code. */
-
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
-
- dict_table_close(table, TRUE, FALSE);
-
- row_drop_table_for_mysql(table->name, trx, FALSE, TRUE);
-
- if (commit) {
- trx_commit_for_mysql(trx);
- }
- } else {
- dict_mem_table_free(table);
- }
-
- break;
-
- case DB_TOO_MANY_CONCURRENT_TRXS:
- /* We already have .ibd file here. it should be deleted. */
-
- if (table->space
- && fil_delete_tablespace(
- table->space,
- BUF_REMOVE_FLUSH_NO_WRITE)
- != DB_SUCCESS) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: not able to"
- " delete tablespace %lu of table ",
- (ulong) table->space);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("!\n", stderr);
- }
- /* fall through */
-
- case DB_DUPLICATE_KEY:
- case DB_TABLESPACE_EXISTS:
- default:
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- dict_mem_table_free(table);
- break;
- }
-
- que_graph_free((que_t*) que_node_get_parent(thr));
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Does an index creation operation for MySQL. TODO: currently failure
-to create an index results in dropping the whole table! This is no problem
-currently as all indexes must be created at the same time as the table.
-@return error number or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_create_index_for_mysql(
-/*=======================*/
- dict_index_t* index, /*!< in, own: index definition
- (will be freed) */
- trx_t* trx, /*!< in: transaction handle */
- const ulint* field_lengths) /*!< in: if not NULL, must contain
- dict_index_get_n_fields(index)
- actual field lengths for the
- index columns, which are
- then checked for not being too
- large. */
-{
- ind_node_t* node;
- mem_heap_t* heap;
- que_thr_t* thr;
- dberr_t err;
- ulint i;
- ulint len;
- char* table_name;
- char* index_name;
- dict_table_t* table;
- ibool is_fts;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mutex_own(&(dict_sys->mutex)));
-
- trx->op_info = "creating index";
-
- /* Copy the table name because we may want to drop the
- table later, after the index object is freed (inside
- que_run_threads()) and thus index->table_name is not available. */
- table_name = mem_strdup(index->table_name);
- index_name = mem_strdup(index->name);
-
- is_fts = (index->type == DICT_FTS);
-
- table = dict_table_open_on_name(table_name, TRUE, TRUE,
- DICT_ERR_IGNORE_NONE);
-
- trx_start_if_not_started_xa(trx);
-
- for (i = 0; i < index->n_def; i++) {
- /* Check that prefix_len and actual length
- < DICT_MAX_INDEX_COL_LEN */
-
- len = dict_index_get_nth_field(index, i)->prefix_len;
-
- if (field_lengths && field_lengths[i]) {
- len = ut_max(len, field_lengths[i]);
- }
-
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_at_create_index",
- len = DICT_MAX_FIELD_LEN_BY_FORMAT(table) + 1;
- );
-
- /* Column or prefix length exceeds maximum column length */
- if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
- err = DB_TOO_BIG_INDEX_COL;
-
- dict_mem_index_free(index);
- goto error_handling;
- }
- }
-
- heap = mem_heap_create(512);
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- /* Note that the space id where we store the index is inherited from
- the table in dict_build_index_def_step() in dict0crea.cc. */
-
- node = ind_create_graph_create(index, heap, true);
-
- thr = pars_complete_graph_for_exec(node, trx, heap);
-
- ut_a(thr == que_fork_start_command(
- static_cast<que_fork_t*>(que_node_get_parent(thr))));
-
- que_run_threads(thr);
-
- err = trx->error_state;
-
- que_graph_free((que_t*) que_node_get_parent(thr));
-
- /* Create the index specific FTS auxiliary tables. */
- if (err == DB_SUCCESS && is_fts) {
- dict_index_t* idx;
-
- idx = dict_table_get_index_on_name(table, index_name);
-
- ut_ad(idx);
- err = fts_create_index_tables(trx, idx);
- }
-
-error_handling:
- dict_table_close(table, TRUE, FALSE);
-
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- row_drop_table_for_mysql(table_name, trx, FALSE, TRUE);
-
- trx_commit_for_mysql(trx);
-
- trx->error_state = DB_SUCCESS;
- }
-
- trx->op_info = "";
-
- mem_free(table_name);
- mem_free(index_name);
-
- return(err);
-}
-
-/*********************************************************************//**
-Scans a table create SQL string and adds to the data dictionary
-the foreign key constraints declared in the string. This function
-should be called after the indexes for a table have been created.
-Each foreign key constraint must be accompanied with indexes in
-both participating tables. The indexes are allowed to contain more
-fields than mentioned in the constraint. Check also that foreign key
-constraints which reference this table are ok.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_table_add_foreign_constraints(
-/*==============================*/
- trx_t* trx, /*!< in: transaction */
- const char* sql_string, /*!< in: table create statement where
- foreign keys are declared like:
- FOREIGN KEY (a, b) REFERENCES table2(c, d),
- table2 can be written also with the
- database name before it: test.table2 */
- size_t sql_length, /*!< in: length of sql_string */
- const char* name, /*!< in: table full name in the
- normalized form
- database_name/table_name */
- ibool reject_fks) /*!< in: if TRUE, fail with error
- code DB_CANNOT_ADD_CONSTRAINT if
- any foreign keys are found. */
-{
- dberr_t err;
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- ut_a(sql_string);
-
- trx->op_info = "adding foreign keys";
-
- trx_start_if_not_started_xa(trx);
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- err = dict_create_foreign_constraints(trx, sql_string, sql_length,
- name, reject_fks);
-
- DBUG_EXECUTE_IF("ib_table_add_foreign_fail",
- err = DB_DUPLICATE_KEY;);
-
- DEBUG_SYNC_C("table_add_foreign_constraints");
-
- if (err == DB_SUCCESS) {
- /* Check that also referencing constraints are ok */
- err = dict_load_foreigns(name, NULL, false, true,
- DICT_ERR_IGNORE_NONE);
- }
-
- if (err != DB_SUCCESS) {
- /* We have special error handling here */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- row_drop_table_for_mysql(name, trx, FALSE, TRUE);
-
- trx_commit_for_mysql(trx);
-
- trx->error_state = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Drops a table for MySQL as a background operation. MySQL relies on Unix
-in ALTER TABLE to the fact that the table handler does not remove the
-table before all handles to it has been removed. Furhermore, the MySQL's
-call to drop table must be non-blocking. Therefore we do the drop table
-as a background operation, which is taken care of by the master thread
-in srv0srv.cc.
-@return error code or DB_SUCCESS */
-static
-dberr_t
-row_drop_table_for_mysql_in_background(
-/*===================================*/
- const char* name) /*!< in: table name */
-{
- dberr_t error;
- trx_t* trx;
-
- trx = trx_allocate_for_background();
-
- /* If the original transaction was dropping a table referenced by
- foreign keys, we must set the following to be able to drop the
- table: */
-
- trx->check_foreigns = FALSE;
-
- /* fputs("InnoDB: Error: Dropping table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" in background drop list\n", stderr); */
-
- /* Try to drop the table in InnoDB */
-
- error = row_drop_table_for_mysql(name, trx, FALSE, FALSE);
-
- /* Flush the log to reduce probability that the .frm files and
- the InnoDB data dictionary get out-of-sync if the user runs
- with innodb_flush_log_at_trx_commit = 0 */
-
- log_buffer_flush_to_disk();
-
- trx_commit_for_mysql(trx);
-
- trx_free_for_background(trx);
-
- return(error);
-}
-
-/*********************************************************************//**
-The master thread in srv0srv.cc calls this regularly to drop tables which
-we must drop in background after queries to them have ended. Such lazy
-dropping of tables is needed in ALTER TABLE on Unix.
-@return how many tables dropped + remaining tables in list */
-UNIV_INTERN
-ulint
-row_drop_tables_for_mysql_in_background(void)
-/*=========================================*/
-{
- row_mysql_drop_t* drop;
- dict_table_t* table;
- ulint n_tables;
- ulint n_tables_dropped = 0;
-loop:
- mutex_enter(&row_drop_list_mutex);
-
- ut_a(row_mysql_drop_list_inited);
-
- drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
-
- n_tables = UT_LIST_GET_LEN(row_mysql_drop_list);
-
- mutex_exit(&row_drop_list_mutex);
-
- if (drop == NULL) {
- /* All tables dropped */
-
- return(n_tables + n_tables_dropped);
- }
-
- DBUG_EXECUTE_IF("row_drop_tables_in_background_sleep",
- os_thread_sleep(5000000);
- );
-
- table = dict_table_open_on_name(drop->table_name, FALSE, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- if (table == NULL) {
- /* If for some reason the table has already been dropped
- through some other mechanism, do not try to drop it */
-
- goto already_dropped;
- }
-
- if (!table->to_be_dropped) {
- /* There is a scenario: the old table is dropped
- just after it's added into drop list, and new
- table with the same name is created, then we try
- to drop the new table in background. */
- dict_table_close(table, FALSE, FALSE);
-
- goto already_dropped;
- }
-
- ut_a(!table->can_be_evicted);
-
- dict_table_close(table, FALSE, FALSE);
-
- if (DB_SUCCESS != row_drop_table_for_mysql_in_background(
- drop->table_name)) {
- /* If the DROP fails for some table, we return, and let the
- main thread retry later */
-
- return(n_tables + n_tables_dropped);
- }
-
- n_tables_dropped++;
-
-already_dropped:
- mutex_enter(&row_drop_list_mutex);
-
- UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop);
-
- MONITOR_DEC(MONITOR_BACKGROUND_DROP_TABLE);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Dropped table ", stderr);
- ut_print_name(stderr, NULL, TRUE, drop->table_name);
- fputs(" in background drop queue.\n", stderr);
-
- mem_free(drop->table_name);
-
- mem_free(drop);
-
- mutex_exit(&row_drop_list_mutex);
-
- goto loop;
-}
-
-/*********************************************************************//**
-Get the background drop list length. NOTE: the caller must own the
-drop list mutex!
-@return how many tables in list */
-UNIV_INTERN
-ulint
-row_get_background_drop_list_len_low(void)
-/*======================================*/
-{
- ulint len;
-
- mutex_enter(&row_drop_list_mutex);
-
- ut_a(row_mysql_drop_list_inited);
-
- len = UT_LIST_GET_LEN(row_mysql_drop_list);
-
- mutex_exit(&row_drop_list_mutex);
-
- return(len);
-}
-
-/*********************************************************************//**
-If a table is not yet in the drop list, adds the table to the list of tables
-which the master thread drops in background. We need this on Unix because in
-ALTER TABLE MySQL may call drop table even if the table has running queries on
-it. Also, if there are running foreign key checks on the table, we drop the
-table lazily.
-@return TRUE if the table was not yet in the drop list, and was added there */
-static
-ibool
-row_add_table_to_background_drop_list(
-/*==================================*/
- const char* name) /*!< in: table name */
-{
- row_mysql_drop_t* drop;
-
- mutex_enter(&row_drop_list_mutex);
-
- ut_a(row_mysql_drop_list_inited);
-
- /* Look if the table already is in the drop list */
- for (drop = UT_LIST_GET_FIRST(row_mysql_drop_list);
- drop != NULL;
- drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop)) {
-
- if (strcmp(drop->table_name, name) == 0) {
- /* Already in the list */
-
- mutex_exit(&row_drop_list_mutex);
-
- return(FALSE);
- }
- }
-
- drop = static_cast<row_mysql_drop_t*>(
- mem_alloc(sizeof(row_mysql_drop_t)));
-
- drop->table_name = mem_strdup(name);
-
- UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop);
-
- MONITOR_INC(MONITOR_BACKGROUND_DROP_TABLE);
-
- /* fputs("InnoDB: Adding table ", stderr);
- ut_print_name(stderr, trx, TRUE, drop->table_name);
- fputs(" to background drop list\n", stderr); */
-
- mutex_exit(&row_drop_list_mutex);
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Reassigns the table identifier of a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_mysql_table_id_reassign(
-/*========================*/
- dict_table_t* table, /*!< in/out: table */
- trx_t* trx, /*!< in/out: transaction */
- table_id_t* new_id) /*!< out: new table id */
-{
- dberr_t err;
- pars_info_t* info = pars_info_create();
-
- dict_hdr_get_new_id(new_id, NULL, NULL);
-
- /* Remove all locks except the table-level S and X locks. */
- lock_remove_all_on_table(table, FALSE);
-
- pars_info_add_ull_literal(info, "old_id", table->id);
- pars_info_add_ull_literal(info, "new_id", *new_id);
-
- /* As micro-SQL does not support int4 == int8 comparisons,
- old and new IDs are added again under different names as
- int4 values*/
- pars_info_add_int4_literal(info, "old_id_narrow", table->id);
- pars_info_add_int4_literal(info, "new_id_narrow", *new_id);
-
- err = que_eval_sql(
- info,
- "PROCEDURE RENUMBER_TABLE_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES SET ID = :new_id\n"
- " WHERE ID = :old_id;\n"
- "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = :old_id;\n"
- "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = :old_id;\n"
- "END;\n", FALSE, trx);
-
- return(err);
-}
-
-/*********************************************************************//**
-Setup the pre-requisites for DISCARD TABLESPACE. It will start the transaction,
-acquire the data dictionary lock in X mode and open the table.
-@return table instance or 0 if not found. */
-static
-dict_table_t*
-row_discard_tablespace_begin(
-/*=========================*/
- const char* name, /*!< in: table name */
- trx_t* trx) /*!< in: transaction handle */
-{
- trx->op_info = "discarding tablespace";
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- trx_start_if_not_started_xa(trx);
-
- /* Serialize data dictionary operations with dictionary mutex:
- this is to avoid deadlocks during data dictionary operations */
-
- row_mysql_lock_data_dictionary(trx);
-
- dict_table_t* table;
-
- table = dict_table_open_on_name(
- name, TRUE, FALSE, DICT_ERR_IGNORE_NONE);
-
- if (table) {
- dict_stats_wait_bg_to_stop_using_table(table, trx);
- ut_a(table->space != TRX_SYS_SPACE);
- ut_a(table->n_foreign_key_checks_running == 0);
- }
-
- return(table);
-}
-
-/*********************************************************************//**
-Do the foreign key constraint checks.
-@return DB_SUCCESS or error code. */
-static
-dberr_t
-row_discard_tablespace_foreign_key_checks(
-/*======================================*/
- const trx_t* trx, /*!< in: transaction handle */
- const dict_table_t* table) /*!< in: table to be discarded */
-{
-
- if (srv_read_only_mode || !trx->check_foreigns) {
- return(DB_SUCCESS);
- }
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
- dict_foreign_set::const_iterator it
- = std::find_if(table->referenced_set.begin(),
- table->referenced_set.end(),
- dict_foreign_different_tables());
-
- if (it == table->referenced_set.end()) {
- return(DB_SUCCESS);
- }
-
- const dict_foreign_t* foreign = *it;
- FILE* ef = dict_foreign_err_file;
-
- ut_ad(foreign->foreign_table != table);
- ut_ad(foreign->referenced_table == table);
-
- /* We only allow discarding a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- mutex_enter(&dict_foreign_err_mutex);
-
- rewind(ef);
-
- ut_print_timestamp(ef);
-
- fputs(" Cannot DISCARD table ", ef);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "because it is referenced by ", ef);
- ut_print_name(stderr, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
-
- mutex_exit(&dict_foreign_err_mutex);
-
- return(DB_CANNOT_DROP_CONSTRAINT);
-}
-
-/*********************************************************************//**
-Cleanup after the DISCARD TABLESPACE operation.
-@return error code. */
-static
-dberr_t
-row_discard_tablespace_end(
-/*=======================*/
- trx_t* trx, /*!< in/out: transaction handle */
- dict_table_t* table, /*!< in/out: table to be discarded */
- dberr_t err) /*!< in: error code */
-{
- if (table != 0) {
- dict_table_close(table, TRUE, FALSE);
- }
-
- DBUG_EXECUTE_IF("ib_discard_before_commit_crash",
- log_make_checkpoint_at(LSN_MAX, TRUE);
- DBUG_SUICIDE(););
-
- trx_commit_for_mysql(trx);
-
- DBUG_EXECUTE_IF("ib_discard_after_commit_crash",
- log_make_checkpoint_at(LSN_MAX, TRUE);
- DBUG_SUICIDE(););
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Do the DISCARD TABLESPACE operation.
-@return DB_SUCCESS or error code. */
-static
-dberr_t
-row_discard_tablespace(
-/*===================*/
- trx_t* trx, /*!< in/out: transaction handle */
- dict_table_t* table) /*!< in/out: table to be discarded */
-{
- dberr_t err;
-
- /* How do we prevent crashes caused by ongoing operations on
- the table? Old operations could try to access non-existent
- pages. MySQL will block all DML on the table using MDL and a
- DISCARD will not start unless all existing operations on the
- table to be discarded are completed.
-
- 1) Acquire the data dictionary latch in X mode. To prevent any
- internal operations that MySQL is not aware off and also for
- the internal SQL parser.
-
- 2) Purge and rollback: we assign a new table id for the
- table. Since purge and rollback look for the table based on
- the table id, they see the table as 'dropped' and discard
- their operations.
-
- 3) Insert buffer: we remove all entries for the tablespace in
- the insert buffer tree.
-
- 4) FOREIGN KEY operations: if table->n_foreign_key_checks_running > 0,
- we do not allow the discard. */
-
- /* Play safe and remove all insert buffer entries, though we should
- have removed them already when DISCARD TABLESPACE was called */
-
- ibuf_delete_for_discarded_space(table->space);
-
- table_id_t new_id;
-
- /* Set the TABLESPACE DISCARD flag in the table definition on disk. */
-
- err = row_import_update_discarded_flag(trx, table->id, true, true);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Update the index root pages in the system tables, on disk */
-
- err = row_import_update_index_root(trx, table, true, true);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Drop all the FTS auxiliary tables. */
- if (dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
-
- fts_drop_tables(trx, table);
- }
-
- /* Assign a new space ID to the table definition so that purge
- can ignore the changes. Update the system table on disk. */
-
- err = row_mysql_table_id_reassign(table, trx, &new_id);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Discard the physical file that is used for the tablespace. */
-
- err = fil_discard_tablespace(table->space);
-
- switch(err) {
- case DB_SUCCESS:
- case DB_IO_ERROR:
- case DB_TABLESPACE_NOT_FOUND:
- /* All persistent operations successful, update the
- data dictionary memory cache. */
-
- table->file_unreadable = true;
-
- table->flags2 |= DICT_TF2_DISCARDED;
-
- dict_table_change_id_in_cache(table, new_id);
-
- /* Reset the root page numbers. */
-
- for (dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
- index != 0;
- index = UT_LIST_GET_NEXT(indexes, index)) {
-
- index->page = FIL_NULL;
- index->space = FIL_NULL;
- }
-
- /* If the tablespace did not already exist or we couldn't
- write to it, we treat that as a successful DISCARD. It is
- unusable anyway. */
-
- err = DB_SUCCESS;
- break;
-
- default:
- /* We need to rollback the disk changes, something failed. */
-
- trx->error_state = DB_SUCCESS;
-
- trx_rollback_to_savepoint(trx, NULL);
-
- trx->error_state = DB_SUCCESS;
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Discards the tablespace of a table which stored in an .ibd file. Discarding
-means that this function renames the .ibd file and assigns a new table id for
-the table. Also the flag table->ibd_file_missing is set to TRUE.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_discard_tablespace_for_mysql(
-/*=============================*/
- const char* name, /*!< in: table name */
- trx_t* trx) /*!< in: transaction handle */
-{
- dberr_t err;
- dict_table_t* table;
-
- /* Open the table and start the transaction if not started. */
-
- table = row_discard_tablespace_begin(name, trx);
-
- if (table == 0) {
- err = DB_TABLE_NOT_FOUND;
- } else if (table->space == TRX_SYS_SPACE) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
-
- err = DB_ERROR;
-
- } else if (table->n_foreign_key_checks_running > 0) {
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
- ER_DISCARD_FK_CHECKS_RUNNING, table_name);
-
- err = DB_ERROR;
-
- } else {
- /* Do foreign key constraint checks. */
-
- err = row_discard_tablespace_foreign_key_checks(trx, table);
-
- if (err == DB_SUCCESS) {
- err = row_discard_tablespace(trx, table);
- }
- }
-
- return(row_discard_tablespace_end(trx, table, err));
-}
-
-/*********************************************************************//**
-Sets an exclusive lock on a table.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_mysql_lock_table(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_table_t* table, /*!< in: table to lock */
- enum lock_mode mode, /*!< in: LOCK_X or LOCK_S */
- const char* op_info) /*!< in: string for trx->op_info */
-{
- mem_heap_t* heap;
- que_thr_t* thr;
- dberr_t err;
- sel_node_t* node;
-
- ut_ad(trx);
- ut_ad(mode == LOCK_X || mode == LOCK_S);
-
- heap = mem_heap_create(512);
-
- trx->op_info = op_info;
-
- node = sel_node_create(heap);
- thr = pars_complete_graph_for_exec(node, trx, heap);
- thr->graph->state = QUE_FORK_ACTIVE;
-
- /* We use the select query graph as the dummy graph needed
- in the lock module call */
-
- thr = que_fork_get_first_thr(
- static_cast<que_fork_t*>(que_node_get_parent(thr)));
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
-run_again:
- thr->run_node = thr;
- thr->prev_node = thr->common.parent;
-
- err = lock_table(0, table, mode, thr);
-
- trx->error_state = err;
-
- if (err == DB_SUCCESS) {
- que_thr_stop_for_mysql_no_error(thr, trx);
- } else {
- que_thr_stop_for_mysql(thr);
-
- if (err != DB_QUE_THR_SUSPENDED) {
- ibool was_lock_wait;
-
- was_lock_wait = row_mysql_handle_errors(
- &err, trx, thr, NULL);
-
- if (was_lock_wait) {
- goto run_again;
- }
- } else {
- que_thr_t* run_thr;
- que_node_t* parent;
-
- parent = que_node_get_parent(thr);
-
- run_thr = que_fork_start_command(
- static_cast<que_fork_t*>(parent));
-
- ut_a(run_thr == thr);
-
- /* There was a lock wait but the thread was not
- in a ready to run or running state. */
- trx->error_state = DB_LOCK_WAIT;
-
- goto run_again;
- }
- }
-
- que_graph_free(thr->graph);
- trx->op_info = "";
-
- return(err);
-}
-
-static
-void
-fil_wait_crypt_bg_threads(
- dict_table_t* table)
-{
- time_t start = time(0);
- time_t last = start;
-
- while (table->n_ref_count > 0) {
- dict_mutex_exit_for_mysql();
- os_thread_sleep(20000);
- dict_mutex_enter_for_mysql();
- time_t now = time(0);
- if (now >= last + 30) {
- fprintf(stderr,
- "WARNING: waited %ld seconds "
- "for ref-count on table: %s space: %u\n",
- now - start, table->name, table->space);
- last = now;
- }
-
- if (now >= start + 300) {
- fprintf(stderr,
- "WARNING: after %ld seconds, gave up waiting "
- "for ref-count on table: %s space: %u\n",
- now - start, table->name, table->space);
- break;
- }
- }
-}
-
-/*********************************************************************//**
-Truncates a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_truncate_table_for_mysql(
-/*=========================*/
- dict_table_t* table, /*!< in: table handle */
- trx_t* trx) /*!< in: transaction handle */
-{
- dberr_t err;
- mem_heap_t* heap;
- byte* buf;
- dtuple_t* tuple;
- dfield_t* dfield;
- dict_index_t* sys_index;
- btr_pcur_t pcur;
- mtr_t mtr;
- table_id_t new_id;
- ulint recreate_space = 0;
- pars_info_t* info = NULL;
- ibool has_internal_doc_id;
- ulint old_space = table->space;
-
- /* How do we prevent crashes caused by ongoing operations on
- the table? Old operations could try to access non-existent
- pages.
-
- 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive
- InnoDB table lock on the table before we can do TRUNCATE
- TABLE. Then there are no running queries on the table.
-
- 2) Purge and rollback: we assign a new table id for the
- table. Since purge and rollback look for the table based on
- the table id, they see the table as 'dropped' and discard
- their operations.
-
- 3) Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE,
- so we do not have to remove insert buffer records, as the
- insert buffer works at a low level. If a freed page is later
- reallocated, the allocator will remove the ibuf entries for
- it.
-
- When we truncate *.ibd files by recreating them (analogous to
- DISCARD TABLESPACE), we remove all entries for the table in the
- insert buffer tree. This is not strictly necessary, because
- in 6) we will assign a new tablespace identifier, but we can
- free up some space in the system tablespace.
-
- 4) Linear readahead and random readahead: we use the same
- method as in 3) to discard ongoing operations. (This is only
- relevant for TRUNCATE TABLE by DISCARD TABLESPACE.)
-
- 5) FOREIGN KEY operations: if
- table->n_foreign_key_checks_running > 0, we do not allow the
- TRUNCATE. We also reserve the data dictionary latch.
-
- 6) Crash recovery: To prevent the application of pre-truncation
- redo log records on the truncated tablespace, we will assign
- a new tablespace identifier to the truncated tablespace. */
-
- ut_ad(table);
-
- if (dict_table_is_discarded(table)) {
- return(DB_TABLESPACE_DELETED);
- } else if (!table->is_readable()) {
- return (row_mysql_get_table_status(table, trx, true));
- }
-
- trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
-
- trx->op_info = "truncating table";
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- ut_a(trx->dict_operation_lock_mode == 0);
- /* Prevent foreign key checks etc. while we are truncating the
- table */
- row_mysql_lock_data_dictionary(trx);
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- dict_stats_wait_bg_to_stop_using_table(table, trx);
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- dict_foreign_set::iterator it
- = std::find_if(table->referenced_set.begin(),
- table->referenced_set.end(),
- dict_foreign_different_tables());
-
- if (!srv_read_only_mode
- && it != table->referenced_set.end()
- && trx->check_foreigns) {
-
- FILE* ef = dict_foreign_err_file;
- dict_foreign_t* foreign = *it;
-
- /* We only allow truncating a referenced table if
- FOREIGN_KEY_CHECKS is set to 0 */
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot truncate table ", ef);
- ut_print_name(ef, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE, foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- err = DB_ERROR;
- goto funct_exit;
- }
-
- /* TODO: could we replace the counter n_foreign_key_checks_running
- with lock checks on the table? Acquire here an exclusive lock on the
- table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
- they can cope with the table having been truncated here? Foreign key
- checks take an IS or IX lock on the table. */
-
- if (table->n_foreign_key_checks_running > 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Cannot truncate table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because there is a foreign key check"
- " running on it.\n",
- stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- }
-
- /* Check if memcached plugin is running on this table. if is, we don't
- allow truncate this table. */
- if (table->memcached_sync_count != 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Cannot truncate table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs(" by DROP+CREATE\n"
- "InnoDB: because there are memcached operations"
- " running on it.\n",
- stderr);
- err = DB_ERROR;
-
- goto funct_exit;
- } else {
- /* We need to set this counter to -1 for blocking
- memcached operations. */
- table->memcached_sync_count = DICT_TABLE_IN_DDL;
- }
-
- /* Remove all locks except the table-level X lock. */
-
- lock_remove_all_on_table(table, FALSE);
-
- /* Ensure that the table will be dropped by
- trx_rollback_active() in case of a crash. */
-
- trx->table_id = table->id;
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- /* Assign an undo segment for the transaction, so that the
- transaction will be recovered after a crash. */
-
- mutex_enter(&trx->undo_mutex);
-
- err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
-
- mutex_exit(&trx->undo_mutex);
-
- if (err != DB_SUCCESS) {
-
- goto funct_exit;
- }
-
- if (table->space && !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)) {
- /* Discard and create the single-table tablespace. */
- ulint space_id = table->space;
- ulint flags = ULINT_UNDEFINED;
- ulint key_id = FIL_DEFAULT_ENCRYPTION_KEY;
- fil_encryption_t mode = FIL_ENCRYPTION_DEFAULT;
-
- dict_get_and_save_data_dir_path(table, true);
-
- if (fil_space_t* space = fil_space_acquire(space_id)) {
- fil_space_crypt_t* crypt_data = space->crypt_data;
-
- if (crypt_data) {
- key_id = crypt_data->key_id;
- mode = crypt_data->encryption;
- }
-
- flags = space->flags;
- fil_space_release(space);
- }
-
- if (flags != ULINT_UNDEFINED
- && fil_discard_tablespace(space_id) == DB_SUCCESS) {
-
- dict_index_t* index;
-
- dict_hdr_get_new_id(NULL, NULL, &space_id);
-
- /* Lock all index trees for this table. We must
- do so after dict_hdr_get_new_id() to preserve
- the latch order */
- dict_table_x_lock_indexes(table);
-
- if (space_id == ULINT_UNDEFINED
- || fil_create_new_single_table_tablespace(
- space_id, table->name,
- table->data_dir_path,
- flags, table->flags2,
- FIL_IBD_FILE_INITIAL_SIZE,
- mode, key_id)
- != DB_SUCCESS) {
- dict_table_x_unlock_indexes(table);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "TRUNCATE TABLE %s failed to "
- "create a new tablespace",
- table->name);
-
- table->file_unreadable = true;
- err = DB_ERROR;
- goto funct_exit;
- }
-
- recreate_space = space_id;
-
- /* Replace the space_id in the data dictionary cache.
- The persisent data dictionary (SYS_TABLES.SPACE
- and SYS_INDEXES.SPACE) are updated later in this
- function. */
- table->space = space_id;
- index = dict_table_get_first_index(table);
- do {
- index->space = space_id;
- index = dict_table_get_next_index(index);
- } while (index);
-
- mtr_start_trx(&mtr, trx);
- fsp_header_init(space_id,
- FIL_IBD_FILE_INITIAL_SIZE, &mtr);
- mtr_commit(&mtr);
- }
- } else {
- /* Lock all index trees for this table, as we will
- truncate the table/index and possibly change their metadata.
- All DML/DDL are blocked by table level lock, with
- a few exceptions such as queries into information schema
- about the table, MySQL could try to access index stats
- for this kind of query, we need to use index locks to
- sync up */
- dict_table_x_lock_indexes(table);
- }
-
- /* scan SYS_INDEXES for all indexes of the table */
- heap = mem_heap_create(800);
-
- tuple = dtuple_create(heap, 1);
- dfield = dtuple_get_nth_field(tuple, 0);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, 8));
- mach_write_to_8(buf, table->id);
-
- dfield_set_data(dfield, buf, 8);
- sys_index = dict_table_get_first_index(dict_sys->sys_indexes);
- dict_index_copy_types(tuple, sys_index, 1);
-
- mtr_start_trx(&mtr, trx);
- btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
- BTR_MODIFY_LEAF, &pcur, &mtr);
- for (;;) {
- rec_t* rec;
- const byte* field;
- ulint len;
- ulint root_page_no;
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- /* The end of SYS_INDEXES has been reached. */
- break;
- }
-
- rec = btr_pcur_get_rec(&pcur);
-
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_INDEXES__TABLE_ID, &len);
- ut_ad(len == 8);
-
- if (memcmp(buf, field, len) != 0) {
- /* End of indexes for the table (TABLE_ID mismatch). */
- break;
- }
-
- if (rec_get_deleted_flag(rec, FALSE)) {
- /* The index has been dropped. */
- goto next_rec;
- }
-
- /* This call may commit and restart mtr
- and reposition pcur. */
- root_page_no = dict_truncate_index_tree(table, recreate_space,
- &pcur, &mtr);
-
- rec = btr_pcur_get_rec(&pcur);
-
- if (root_page_no != FIL_NULL) {
- page_rec_write_field(
- rec, DICT_FLD__SYS_INDEXES__PAGE_NO,
- root_page_no, &mtr);
- /* We will need to commit and restart the
- mini-transaction in order to avoid deadlocks.
- The dict_truncate_index_tree() call has allocated
- a page in this mini-transaction, and the rest of
- this loop could latch another index page. */
- mtr_commit(&mtr);
- mtr_start_trx(&mtr, trx);
- btr_pcur_restore_position(BTR_MODIFY_LEAF,
- &pcur, &mtr);
- }
-
-next_rec:
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- mem_heap_free(heap);
- /* Done with index truncation, release index tree locks,
- subsequent work relates to table level metadata change */
- dict_table_x_unlock_indexes(table);
-
- dict_hdr_get_new_id(&new_id, NULL, NULL);
-
- /* Create new FTS auxiliary tables with the new_id, and
- drop the old index later, only if everything runs successful. */
- has_internal_doc_id = dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(
- table, DICT_TF2_FTS_HAS_DOC_ID);
- if (has_internal_doc_id) {
- dict_table_t fts_table;
- ulint i;
-
- fts_table.name = table->name;
- fts_table.id = new_id;
- fts_table.flags2 = table->flags2;
-
- err = fts_create_common_tables(
- trx, &fts_table, table->name, TRUE);
-
- for (i = 0;
- i < ib_vector_size(table->fts->indexes)
- && err == DB_SUCCESS;
- i++) {
-
- dict_index_t* fts_index;
-
- fts_index = static_cast<dict_index_t*>(
- ib_vector_getp(table->fts->indexes, i));
-
- err = fts_create_index_tables_low(
- trx, fts_index, table->name, new_id);
- }
-
- if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to truncate FTS index for"
- " table", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n", stderr);
-
- goto funct_exit;
- } else {
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- }
- }
-
- info = pars_info_create();
-
- pars_info_add_int4_literal(info, "new_space", (lint) table->space);
- pars_info_add_ull_literal(info, "old_id", table->id);
- pars_info_add_ull_literal(info, "new_id", new_id);
-
- /* As micro-SQL does not support int4 == int8 comparisons,
- old and new IDs are added again under different names as
- int4 values*/
- pars_info_add_int4_literal(info, "old_id_narrow", table->id);
- pars_info_add_int4_literal(info, "new_id_narrow", new_id);
-
- err = que_eval_sql(info,
- "PROCEDURE RENUMBER_TABLE_ID_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES"
- " SET ID = :new_id, SPACE = :new_space\n"
- " WHERE ID = :old_id;\n"
- "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n"
- " WHERE TABLE_ID = :old_id;\n"
- "UPDATE SYS_INDEXES"
- " SET TABLE_ID = :new_id, SPACE = :new_space\n"
- " WHERE TABLE_ID = :old_id;\n"
- "END;\n"
- , FALSE, trx);
-
- if (err == DB_SUCCESS && old_space != table->space) {
- info = pars_info_create();
-
- pars_info_add_int4_literal(info, "old_space", (lint) old_space);
-
- pars_info_add_int4_literal(
- info, "new_space", (lint) table->space);
-
- err = que_eval_sql(info,
- "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLESPACES"
- " SET SPACE = :new_space\n"
- " WHERE SPACE = :old_space;\n"
- "UPDATE SYS_DATAFILES"
- " SET SPACE = :new_space"
- " WHERE SPACE = :old_space;\n"
- "END;\n"
- , FALSE, trx);
- }
- DBUG_EXECUTE_IF("ib_ddl_crash_before_fts_truncate", err = DB_ERROR;);
-
- if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
-
- /* Update system table failed. Table in memory metadata
- could be in an inconsistent state, mark the in-memory
- table->corrupted to be true. In the long run, this should
- be fixed by atomic truncate table */
- table->corrupted = true;
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Unable to assign a new identifier to table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: after truncating it. Background processes"
- " may corrupt the table!\n", stderr);
-
- /* Failed to update the table id, so drop the new
- FTS auxiliary tables */
- if (has_internal_doc_id) {
- ut_ad(trx->state == TRX_STATE_NOT_STARTED);
-
- table_id_t id = table->id;
-
- table->id = new_id;
-
- fts_drop_tables(trx, table);
-
- table->id = id;
-
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- }
-
- err = DB_ERROR;
- } else {
- /* Drop the old FTS index */
- if (has_internal_doc_id) {
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- fts_drop_tables(trx, table);
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- }
-
- DBUG_EXECUTE_IF("ib_truncate_crash_after_fts_drop",
- DBUG_SUICIDE(););
-
- dict_table_change_id_in_cache(table, new_id);
-
- /* Reset the Doc ID in cache to 0 */
- if (has_internal_doc_id && table->fts->cache) {
- table->fts->fts_status |= TABLE_DICT_LOCKED;
- fts_update_next_doc_id(trx, table, NULL, 0);
- fts_cache_clear(table->fts->cache);
- fts_cache_init(table->fts->cache);
- table->fts->fts_status &= ~TABLE_DICT_LOCKED;
- }
- }
-
- /* Reset auto-increment. */
- dict_table_autoinc_lock(table);
- dict_table_autoinc_initialize(table, 1);
- dict_table_autoinc_unlock(table);
-
- trx_commit_for_mysql(trx);
-
-funct_exit:
-
- if (table->memcached_sync_count == DICT_TABLE_IN_DDL) {
- /* We need to set the memcached sync back to 0, unblock
- memcached operationse. */
- table->memcached_sync_count = 0;
- }
-
- row_mysql_unlock_data_dictionary(trx);
-
- dict_stats_update(table, DICT_STATS_EMPTY_TABLE);
-
- trx->op_info = "";
-
- srv_wake_master_thread();
-
- return(err);
-}
-
-/*********************************************************************//**
-Drops a table for MySQL. If the name of the dropped table ends in
-one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
-"innodb_table_monitor", then this will also stop the printing of monitor
-output by the master thread. If the data dictionary was not already locked
-by the transaction, the transaction will be committed. Otherwise, the
-data dictionary will remain locked.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_drop_table_for_mysql(
-/*=====================*/
- const char* name, /*!< in: table name */
- trx_t* trx, /*!< in: transaction handle */
- bool drop_db,/*!< in: true=dropping whole database */
- ibool create_failed,/*!<in: TRUE=create table failed
- because e.g. foreign key column
- type mismatch. */
- bool nonatomic)
- /*!< in: whether it is permitted
- to release and reacquire dict_operation_lock */
-{
- dberr_t err;
- dict_foreign_t* foreign;
- dict_table_t* table;
- ibool print_msg;
- ulint space_id;
- char* filepath = NULL;
- const char* tablename_minus_db;
- char* tablename = NULL;
- bool ibd_file_missing;
- ulint namelen;
- bool locked_dictionary = false;
- pars_info_t* info = NULL;
- mem_heap_t* heap = NULL;
-
- DBUG_ENTER("row_drop_table_for_mysql");
-
- DBUG_PRINT("row_drop_table_for_mysql", ("table: %s", name));
-
- ut_a(name != NULL);
-
- /* The table name is prefixed with the database name and a '/'.
- Certain table names starting with 'innodb_' have their special
- meaning regardless of the database name. Thus, we need to
- ignore the database name prefix in the comparisons. */
- tablename_minus_db = strchr(name, '/');
-
- if (tablename_minus_db) {
- tablename_minus_db++;
- } else {
- /* Ancillary FTS tables don't have '/' characters. */
- tablename_minus_db = name;
- }
-
- namelen = strlen(tablename_minus_db) + 1;
-
- if (namelen == sizeof S_innodb_monitor
- && !memcmp(tablename_minus_db, S_innodb_monitor,
- sizeof S_innodb_monitor)) {
-
- /* Table name equals "innodb_monitor":
- stop monitor prints */
-
- srv_print_innodb_monitor = FALSE;
- srv_print_innodb_lock_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_lock_monitor
- && !memcmp(tablename_minus_db, S_innodb_lock_monitor,
- sizeof S_innodb_lock_monitor)) {
- srv_print_innodb_monitor = FALSE;
- srv_print_innodb_lock_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_tablespace_monitor
- && !memcmp(tablename_minus_db, S_innodb_tablespace_monitor,
- sizeof S_innodb_tablespace_monitor)) {
-
- srv_print_innodb_tablespace_monitor = FALSE;
- } else if (namelen == sizeof S_innodb_table_monitor
- && !memcmp(tablename_minus_db, S_innodb_table_monitor,
- sizeof S_innodb_table_monitor)) {
-
- srv_print_innodb_table_monitor = FALSE;
- }
-
- /* Serialize data dictionary operations with dictionary mutex:
- no deadlocks can occur then in these operations */
-
- trx->op_info = "dropping table";
-
- /* This function is called recursively via fts_drop_tables(). */
- if (trx->state == TRX_STATE_NOT_STARTED) {
- trx_start_for_ddl(trx, TRX_DICT_OP_TABLE);
- }
-
- if (trx->dict_operation_lock_mode != RW_X_LATCH) {
- /* Prevent foreign key checks etc. while we are dropping the
- table */
-
- row_mysql_lock_data_dictionary(trx);
-
- locked_dictionary = true;
- nonatomic = true;
- }
-
- ut_ad(mutex_own(&(dict_sys->mutex)));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- table = dict_table_open_on_name(
- name, TRUE, FALSE,
- static_cast<dict_err_ignore_t>(
- DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs(" does not exist in the InnoDB internal\n"
- "InnoDB: data dictionary though MySQL is"
- " trying to drop it.\n"
- "InnoDB: Have you copied the .frm file"
- " of the table to the\n"
- "InnoDB: MySQL database directory"
- " from another database?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
- }
-
- /* Turn on this drop bit before we could release the dictionary
- latch */
- table->to_be_dropped = true;
-
- if (nonatomic) {
- /* This trx did not acquire any locks on dictionary
- table records yet. Thus it is safe to release and
- reacquire the data dictionary latches. */
- if (table->fts) {
- ut_ad(!table->fts->add_wq);
- ut_ad(lock_trx_has_sys_table_locks(trx) == 0);
-
- row_mysql_unlock_data_dictionary(trx);
- fts_optimize_remove_table(table);
- row_mysql_lock_data_dictionary(trx);
- }
-
- /* Do not bother to deal with persistent stats for temp
- tables since we know temp tables do not use persistent
- stats. */
- if (!dict_table_is_temporary(table)) {
- dict_stats_wait_bg_to_stop_using_table(
- table, trx);
- }
- }
-
- /* make sure background stats thread is not running on the table */
- ut_ad(!(table->stats_bg_flag & BG_STAT_IN_PROGRESS));
-
- /* Delete the link file if used. */
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- fil_delete_link_file(name);
- }
-
- if (!dict_table_is_temporary(table)) {
-
- dict_stats_recalc_pool_del(table);
- dict_stats_defrag_pool_del(table, NULL);
- btr_defragment_remove_table(table);
-
- /* Remove stats for this table and all of its indexes from the
- persistent storage if it exists and if there are stats for this
- table in there. This function creates its own trx and commits
- it. */
- char errstr[1024];
- err = dict_stats_drop_table(name, errstr, sizeof(errstr));
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_WARN, "%s", errstr);
- }
- }
-
- /* Move the table the the non-LRU list so that it isn't
- considered for eviction. */
-
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
-
- dict_table_close(table, TRUE, FALSE);
-
- /* Check if the table is referenced by foreign key constraints from
- some other table (not the table itself) */
-
- if (!srv_read_only_mode && trx->check_foreigns) {
-
- for (dict_foreign_set::iterator it
- = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- const bool ref_ok = drop_db
- && dict_tables_have_same_db(
- name,
- foreign->foreign_table_name_lookup);
-
- /* We should allow dropping a referenced table if creating
- that referenced table has failed for some reason. For example
- if referenced table is created but it column types that are
- referenced do not match. */
- if (foreign->foreign_table != table &&
- !create_failed && !ref_ok) {
-
- FILE* ef = dict_foreign_err_file;
-
- /* We only allow dropping a referenced table
- if FOREIGN_KEY_CHECKS is set to 0 */
-
- err = DB_CANNOT_DROP_CONSTRAINT;
-
- mutex_enter(&dict_foreign_err_mutex);
- rewind(ef);
- ut_print_timestamp(ef);
-
- fputs(" Cannot drop table ", ef);
- ut_print_name(ef, trx, TRUE, name);
- fputs("\n"
- "because it is referenced by ", ef);
- ut_print_name(ef, trx, TRUE,
- foreign->foreign_table_name);
- putc('\n', ef);
- mutex_exit(&dict_foreign_err_mutex);
-
- goto funct_exit;
- }
- }
- }
-
-
- DBUG_EXECUTE_IF("row_drop_table_add_to_background",
- row_add_table_to_background_drop_list(table->name);
- err = DB_SUCCESS;
- goto funct_exit;
- );
-
- /* TODO: could we replace the counter n_foreign_key_checks_running
- with lock checks on the table? Acquire here an exclusive lock on the
- table, and rewrite lock0lock.cc and the lock wait in srv0srv.cc so that
- they can cope with the table having been dropped here? Foreign key
- checks take an IS or IX lock on the table. */
-
- if (table->n_foreign_key_checks_running > 0) {
-
- const char* save_tablename = table->name;
- ibool added;
-
- added = row_add_table_to_background_drop_list(save_tablename);
-
- if (added) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: You are trying to drop table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, save_tablename);
- fputs("\n"
- "InnoDB: though there is a"
- " foreign key check running on it.\n"
- "InnoDB: Adding the table to"
- " the background drop queue.\n",
- stderr);
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
-
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
- }
-
- goto funct_exit;
- }
-
- /* Remove all locks that are on the table or its records, if there
- are no refernces to the table but it has record locks, we release
- the record locks unconditionally. One use case is:
-
- CREATE TABLE t2 (PRIMARY KEY (a)) SELECT * FROM t1;
-
- If after the user transaction has done the SELECT and there is a
- problem in completing the CREATE TABLE operation, MySQL will drop
- the table. InnoDB will create a new background transaction to do the
- actual drop, the trx instance that is passed to this function. To
- preserve existing behaviour we remove the locks but ideally we
- shouldn't have to. There should never be record locks on a table
- that is going to be dropped. */
-
- /* Wait on background threads to stop using table */
- fil_wait_crypt_bg_threads(table);
-
- if (table->n_ref_count == 0) {
- lock_remove_all_on_table(table, TRUE);
- ut_a(table->n_rec_locks == 0);
- } else if (table->n_ref_count > 0 || table->n_rec_locks > 0) {
- ibool added;
-
- added = row_add_table_to_background_drop_list(table->name);
-
- if (added) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: MySQL is"
- " trying to drop table ", stderr);
- ut_print_name(stderr, trx, TRUE, table->name);
- fputs("\n"
- "InnoDB: though there are still"
- " open handles to it.\n"
- "InnoDB: Adding the table to the"
- " background drop queue.\n",
- stderr);
-
- /* We return DB_SUCCESS to MySQL though the drop will
- happen lazily later */
- err = DB_SUCCESS;
- } else {
- /* The table is already in the background drop list */
- err = DB_ERROR;
- }
-
- goto funct_exit;
- }
-
- /* The "to_be_dropped" marks table that is to be dropped, but
- has not been dropped, instead, was put in the background drop
- list due to being used by concurrent DML operations. Clear it
- here since there are no longer any concurrent activities on it,
- and it is free to be dropped */
- table->to_be_dropped = false;
-
- /* If we get this far then the table to be dropped must not have
- any table or record locks on it. */
-
- ut_a(!lock_table_has_locks(table));
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->table_id = table->id;
- case TRX_DICT_OP_TABLE:
- break;
- case TRX_DICT_OP_INDEX:
- /* If the transaction was previously flagged as
- TRX_DICT_OP_INDEX, we should be dropping auxiliary
- tables for full-text indexes. */
- ut_ad(strstr(table->name, "/FTS_") != NULL);
- }
-
- /* Mark all indexes unavailable in the data dictionary cache
- before starting to drop the table. */
-
- unsigned* page_no;
- unsigned* page_nos;
- heap = mem_heap_create(
- 200 + UT_LIST_GET_LEN(table->indexes) * sizeof *page_nos);
- tablename = mem_heap_strdup(heap, name);
-
- page_no = page_nos = static_cast<unsigned*>(
- mem_heap_alloc(
- heap,
- UT_LIST_GET_LEN(table->indexes) * sizeof *page_no));
-
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- rw_lock_x_lock(dict_index_get_lock(index));
- /* Save the page numbers so that we can restore them
- if the operation fails. */
- *page_no++ = index->page;
- /* Mark the index unusable. */
- index->page = FIL_NULL;
- rw_lock_x_unlock(dict_index_get_lock(index));
- }
-
- /* We use the private SQL parser of Innobase to generate the
- query graphs needed in deleting the dictionary data from system
- tables in Innobase. Deleting a row from SYS_INDEXES table also
- frees the file segments of the B-tree associated with the index. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "table_name", name);
-
- err = que_eval_sql(info,
- "PROCEDURE DROP_TABLE_PROC () IS\n"
- "sys_foreign_id CHAR;\n"
- "table_id CHAR;\n"
- "index_id CHAR;\n"
- "foreign_id CHAR;\n"
- "space_id INT;\n"
- "found INT;\n"
-
- "DECLARE CURSOR cur_fk IS\n"
- "SELECT ID FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME = :table_name\n"
- "AND TO_BINARY(FOR_NAME)\n"
- " = TO_BINARY(:table_name)\n"
- "LOCK IN SHARE MODE;\n"
-
- "DECLARE CURSOR cur_idx IS\n"
- "SELECT ID FROM SYS_INDEXES\n"
- "WHERE TABLE_ID = table_id\n"
- "LOCK IN SHARE MODE;\n"
-
- "BEGIN\n"
- "SELECT ID INTO table_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " RETURN;\n"
- "END IF;\n"
- "SELECT SPACE INTO space_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = :table_name;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " RETURN;\n"
- "END IF;\n"
- "found := 1;\n"
- "SELECT ID INTO sys_foreign_id\n"
- "FROM SYS_TABLES\n"
- "WHERE NAME = 'SYS_FOREIGN'\n"
- "LOCK IN SHARE MODE;\n"
- "IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "IF (:table_name = 'SYS_FOREIGN') THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "IF (:table_name = 'SYS_FOREIGN_COLS') THEN\n"
- " found := 0;\n"
- "END IF;\n"
- "OPEN cur_fk;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur_fk INTO foreign_id;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur_fk;\n"
- "found := 1;\n"
- "OPEN cur_idx;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur_idx INTO index_id;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " DELETE FROM SYS_FIELDS\n"
- " WHERE INDEX_ID = index_id;\n"
- " DELETE FROM SYS_INDEXES\n"
- " WHERE ID = index_id\n"
- " AND TABLE_ID = table_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur_idx;\n"
- "DELETE FROM SYS_TABLESPACES\n"
- "WHERE SPACE = space_id;\n"
- "DELETE FROM SYS_DATAFILES\n"
- "WHERE SPACE = space_id;\n"
- "DELETE FROM SYS_COLUMNS\n"
- "WHERE TABLE_ID = table_id;\n"
- "DELETE FROM SYS_TABLES\n"
- "WHERE NAME = :table_name;\n"
- "END;\n"
- , FALSE, trx);
-
- switch (err) {
- ibool is_temp;
- ulint table_flags;
-
- case DB_SUCCESS:
- /* Clone the name, in case it has been allocated
- from table->heap, which will be freed by
- dict_table_remove_from_cache(table) below. */
- space_id = table->space;
- ibd_file_missing = table->file_unreadable;
-
- table_flags = table->flags;
- is_temp = DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY);
-
- /* If there is a temp path then the temp flag is set.
- However, during recovery or reloading the table object
- after eviction from data dictionary cache, we might
- have a temp flag but not know the temp path */
- ut_a(table->dir_path_of_temp_table == NULL || is_temp);
- if (dict_table_is_discarded(table)
- || (!table->is_readable()
- && fil_space_get(table->space) == NULL)) {
- /* Do not attempt to drop known-to-be-missing
- tablespaces. */
- space_id = 0;
- }
-
- /* We do not allow temporary tables with a remote path. */
- ut_a(!(is_temp && DICT_TF_HAS_DATA_DIR(table_flags)));
-
- if (space_id && DICT_TF_HAS_DATA_DIR(table_flags)) {
- dict_get_and_save_data_dir_path(table, true);
- ut_a(table->data_dir_path);
-
- filepath = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "ibd");
- } else if (table->dir_path_of_temp_table) {
- filepath = fil_make_ibd_name(
- table->dir_path_of_temp_table, true);
- } else {
- filepath = fil_make_ibd_name(tablename, false);
- }
-
- if (dict_table_has_fts_index(table)
- || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- ut_ad(table->n_ref_count == 0);
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- err = fts_drop_tables(trx, table);
-
- if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr," InnoDB: Error: (%s) not "
- "able to remove ancillary FTS tables "
- "for table ", ut_strerr(err));
- ut_print_name(stderr, trx, TRUE, tablename);
- fputs("\n", stderr);
-
- goto funct_exit;
- }
- }
-
- /* The table->fts flag can be set on the table for which
- the cluster index is being rebuilt. Such table might not have
- DICT_TF2_FTS flag set. So keep this out of above
- dict_table_has_fts_index condition */
- if (table->fts) {
- /* Need to set TABLE_DICT_LOCKED bit, since
- fts_que_graph_free_check_lock would try to acquire
- dict mutex lock */
- table->fts->fts_status |= TABLE_DICT_LOCKED;
-
- fts_free(table);
- }
-
- dict_table_remove_from_cache(table);
-
- if (dict_load_table(tablename, TRUE,
- DICT_ERR_IGNORE_NONE) != NULL) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: not able to remove table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, tablename);
- fputs(" from the dictionary cache!\n", stderr);
- err = DB_ERROR;
- }
-
- /* Do not drop possible .ibd tablespace if something went
- wrong: we do not want to delete valuable data of the user */
-
- /* Don't spam the log if we can't find the tablespace of
- a temp table or if the tablesace has been discarded. */
- print_msg = !(is_temp || ibd_file_missing);
-
- if (err == DB_SUCCESS && space_id > TRX_SYS_SPACE) {
- if (!is_temp
- && !fil_space_for_table_exists_in_mem(
- space_id, tablename,
- print_msg, IS_XTRABACKUP() && print_msg, false, NULL, 0,
- table_flags)) {
-
- /* This might happen if we are dropping a
- discarded tablespace */
- err = DB_SUCCESS;
-
- if (print_msg) {
- char msg_tablename[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- msg_tablename, sizeof(tablename),
- tablename, FALSE);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Removed the table %s from "
- "InnoDB's data dictionary",
- msg_tablename);
- }
-
- /* Force a delete of any discarded
- or temporary files. */
-
- fil_delete_file(filepath);
-
- } else if (fil_delete_tablespace(
- space_id,
- BUF_REMOVE_FLUSH_NO_WRITE)
- != DB_SUCCESS) {
- fprintf(stderr,
- "InnoDB: We removed now the InnoDB"
- " internal data dictionary entry\n"
- "InnoDB: of table ");
- ut_print_name(stderr, trx, TRUE, tablename);
- fprintf(stderr, ".\n");
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: not able to"
- " delete tablespace %lu of table ",
- (ulong) space_id);
- ut_print_name(stderr, trx, TRUE, tablename);
- fputs("!\n", stderr);
- err = DB_ERROR;
- }
- }
-
- break;
-
- case DB_OUT_OF_FILE_SPACE:
- err = DB_MUST_GET_MORE_FILE_SPACE;
-
- trx->error_state = err;
- row_mysql_handle_errors(&err, trx, NULL, NULL);
-
- /* raise error */
- ut_error;
- break;
-
- case DB_TOO_MANY_CONCURRENT_TRXS:
- /* Cannot even find a free slot for the
- the undo log. We can directly exit here
- and return the DB_TOO_MANY_CONCURRENT_TRXS
- error. */
-
- default:
- /* This is some error we do not expect. Print
- the error number and rollback transaction */
- ut_print_timestamp(stderr);
-
- fprintf(stderr, "InnoDB: unknown error code %lu"
- " while dropping table:", (ulong) err);
- ut_print_name(stderr, trx, TRUE, tablename);
- fprintf(stderr, ".\n");
-
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
-
- /* Mark all indexes available in the data dictionary
- cache again. */
-
- page_no = page_nos;
-
- for (dict_index_t* index = dict_table_get_first_index(table);
- index != NULL;
- index = dict_table_get_next_index(index)) {
- rw_lock_x_lock(dict_index_get_lock(index));
- ut_a(index->page == FIL_NULL);
- index->page = *page_no++;
- rw_lock_x_unlock(dict_index_get_lock(index));
- }
- }
-
-funct_exit:
- if (heap) {
- mem_heap_free(heap);
- }
- if (filepath) {
- mem_free(filepath);
- }
-
- if (locked_dictionary) {
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
- }
-
- trx->op_info = "";
-
- srv_wake_master_thread();
-
- DBUG_RETURN(err);
-}
-
-/*********************************************************************//**
-Drop all temporary tables during crash recovery. */
-UNIV_INTERN
-void
-row_mysql_drop_temp_tables(void)
-/*============================*/
-{
- trx_t* trx;
- btr_pcur_t pcur;
- mtr_t mtr;
- mem_heap_t* heap;
-
- trx = trx_allocate_for_background();
- trx->op_info = "dropping temporary tables";
- row_mysql_lock_data_dictionary(trx);
-
- heap = mem_heap_create(200);
-
- mtr_start(&mtr);
-
- btr_pcur_open_at_index_side(
- true,
- dict_table_get_first_index(dict_sys->sys_tables),
- BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
-
- for (;;) {
- const rec_t* rec;
- const byte* field;
- ulint len;
- const char* table_name;
- dict_table_t* table;
-
- btr_pcur_move_to_next_user_rec(&pcur, &mtr);
-
- if (!btr_pcur_is_on_user_rec(&pcur)) {
- break;
- }
-
- /* The high order bit of N_COLS is set unless
- ROW_FORMAT=REDUNDANT. */
- rec = btr_pcur_get_rec(&pcur);
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__N_COLS, &len);
- if (len != 4
- || !(mach_read_from_4(field) & DICT_N_COLS_COMPACT)) {
- continue;
- }
-
- /* Older versions of InnoDB, which only supported tables
- in ROW_FORMAT=REDUNDANT could write garbage to
- SYS_TABLES.MIX_LEN, where we now store the is_temp flag.
- Above, we assumed is_temp=0 if ROW_FORMAT=REDUNDANT. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__MIX_LEN, &len);
- if (len != 4
- || !(mach_read_from_4(field) & DICT_TF2_TEMPORARY)) {
- continue;
- }
-
- /* This is a temporary table. */
- field = rec_get_nth_field_old(
- rec, DICT_FLD__SYS_TABLES__NAME, &len);
- if (len == UNIV_SQL_NULL || len == 0) {
- /* Corrupted SYS_TABLES.NAME */
- continue;
- }
-
- table_name = mem_heap_strdupl(heap, (const char*) field, len);
-
- btr_pcur_store_position(&pcur, &mtr);
- btr_pcur_commit_specify_mtr(&pcur, &mtr);
-
- table = dict_table_get_low(table_name);
-
- if (table) {
- row_drop_table_for_mysql(table_name, trx, FALSE, FALSE);
- trx_commit_for_mysql(trx);
- }
-
- mtr_start(&mtr);
- btr_pcur_restore_position(BTR_SEARCH_LEAF,
- &pcur, &mtr);
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
- mem_heap_free(heap);
- row_mysql_unlock_data_dictionary(trx);
- trx_free_for_background(trx);
-}
-
-/*******************************************************************//**
-Drop all foreign keys in a database, see Bug#18942.
-Called at the end of row_drop_database_for_mysql().
-@return error code or DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-drop_all_foreign_keys_in_db(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx) /*!< in: transaction handle */
-{
- pars_info_t* pinfo;
- dberr_t err;
-
- ut_a(name[strlen(name) - 1] == '/');
-
- pinfo = pars_info_create();
-
- pars_info_add_str_literal(pinfo, "dbname", name);
-
-/** true if for_name is not prefixed with dbname */
-#define TABLE_NOT_IN_THIS_DB \
-"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname"
-
- err = que_eval_sql(pinfo,
- "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n"
- "foreign_id CHAR;\n"
- "for_name CHAR;\n"
- "found INT;\n"
- "DECLARE CURSOR cur IS\n"
- "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n"
- "WHERE FOR_NAME >= :dbname\n"
- "LOCK IN SHARE MODE\n"
- "ORDER BY FOR_NAME;\n"
- "BEGIN\n"
- "found := 1;\n"
- "OPEN cur;\n"
- "WHILE found = 1 LOOP\n"
- " FETCH cur INTO foreign_id, for_name;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n"
- " found := 0;\n"
- " ELSIF (1=1) THEN\n"
- " DELETE FROM SYS_FOREIGN_COLS\n"
- " WHERE ID = foreign_id;\n"
- " DELETE FROM SYS_FOREIGN\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- "END LOOP;\n"
- "CLOSE cur;\n"
- "COMMIT WORK;\n"
- "END;\n",
- FALSE, /* do not reserve dict mutex,
- we are already holding it */
- trx);
-
- return(err);
-}
-
-/*********************************************************************//**
-Drops a database for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_drop_database_for_mysql(
-/*========================*/
- const char* name, /*!< in: database name which ends to '/' */
- trx_t* trx) /*!< in: transaction handle */
-{
- dict_table_t* table;
- char* table_name;
- dberr_t err = DB_SUCCESS;
- ulint namelen = strlen(name);
-
- ut_a(name != NULL);
- ut_a(name[namelen - 1] == '/');
-
- trx->op_info = "dropping database";
-
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
-
- trx_start_if_not_started_xa(trx);
-loop:
- row_mysql_lock_data_dictionary(trx);
-
- while ((table_name = dict_get_first_table_name_in_db(name))) {
- /* Drop parent table if it is a fts aux table, to
- avoid accessing dropped fts aux tables in information
- scheam when parent table still exists.
- Note: Drop parent table will drop fts aux tables. */
- char* parent_table_name;
- parent_table_name = fts_get_parent_table_name(
- table_name, strlen(table_name));
-
- if (parent_table_name != NULL) {
- mem_free(table_name);
- table_name = parent_table_name;
- }
-
- ut_a(memcmp(table_name, name, namelen) == 0);
-
- table = dict_table_open_on_name(
- table_name, TRUE, FALSE, static_cast<dict_err_ignore_t>(
- DICT_ERR_IGNORE_INDEX_ROOT
- | DICT_ERR_IGNORE_CORRUPT));
-
- if (!table) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot load table %s from InnoDB internal "
- "data dictionary during drop database",
- table_name);
- mem_free(table_name);
- err = DB_TABLE_NOT_FOUND;
- break;
-
- }
-
- if (!row_is_mysql_tmp_table_name(table->name)) {
- /* There could be orphan temp tables left from
- interrupted alter table. Leave them, and handle
- the rest.*/
- if (table->can_be_evicted) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Orphan table encountered during "
- "DROP DATABASE. This is possible if "
- "'%s.frm' was lost.", table->name);
- }
-
- if (!table->is_readable()
- && fil_space_get(table->space) == NULL) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Missing %s.ibd file for table %s.",
- table->name, table->name);
- }
- }
-
- dict_table_close(table, TRUE, FALSE);
-
- /* The dict_table_t object must not be accessed before
- dict_table_open() or after dict_table_close(). But this is OK
- if we are holding, the dict_sys->mutex. */
- ut_ad(mutex_own(&dict_sys->mutex));
-
- /* Wait until MySQL does not have any queries running on
- the table */
-
- if (table->n_ref_count > 0) {
- row_mysql_unlock_data_dictionary(trx);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: MySQL is trying to"
- " drop database ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fputs("\n"
- "InnoDB: though there are still"
- " open handles to table ", stderr);
- ut_print_name(stderr, trx, TRUE, table_name);
- fputs(".\n", stderr);
-
- os_thread_sleep(1000000);
-
- mem_free(table_name);
-
- goto loop;
- }
-
- err = row_drop_table_for_mysql(table_name, trx, TRUE, FALSE);
- trx_commit_for_mysql(trx);
-
- if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error (%s) for table ",
- ut_strerr(err));
- ut_print_name(stderr, trx, TRUE, table_name);
- putc('\n', stderr);
- mem_free(table_name);
- break;
- }
-
- mem_free(table_name);
- }
-
- if (err == DB_SUCCESS) {
- /* after dropping all tables try to drop all leftover
- foreign keys in case orphaned ones exist */
- err = drop_all_foreign_keys_in_db(name, trx);
-
- if (err != DB_SUCCESS) {
- fputs("InnoDB: DROP DATABASE ", stderr);
- ut_print_name(stderr, trx, TRUE, name);
- fprintf(stderr, " failed with error %d while "
- "dropping all foreign keys", err);
- }
- }
-
- trx_commit_for_mysql(trx);
-
- row_mysql_unlock_data_dictionary(trx);
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks if a table name contains the string "/#sql" which denotes temporary
-tables in MySQL.
-@return true if temporary table */
-UNIV_INTERN MY_ATTRIBUTE((warn_unused_result))
-bool
-row_is_mysql_tmp_table_name(
-/*========================*/
- const char* name) /*!< in: table name in the form
- 'database/tablename' */
-{
- return(strstr(name, "/#sql") != NULL);
- /* return(strstr(name, "/@0023sql") != NULL); */
-}
-
-/****************************************************************//**
-Delete a single constraint.
-@return error code or DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_delete_constraint_low(
-/*======================*/
- const char* id, /*!< in: constraint id */
- trx_t* trx) /*!< in: transaction handle */
-{
- pars_info_t* info = pars_info_create();
-
- pars_info_add_str_literal(info, "id", id);
-
- return(que_eval_sql(info,
- "PROCEDURE DELETE_CONSTRAINT () IS\n"
- "BEGIN\n"
- "DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n"
- "DELETE FROM SYS_FOREIGN WHERE ID = :id;\n"
- "END;\n"
- , FALSE, trx));
-}
-
-/****************************************************************//**
-Delete a single constraint.
-@return error code or DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_delete_constraint(
-/*==================*/
- const char* id, /*!< in: constraint id */
- const char* database_name, /*!< in: database name, with the
- trailing '/' */
- mem_heap_t* heap, /*!< in: memory heap */
- trx_t* trx) /*!< in: transaction handle */
-{
- dberr_t err;
-
- /* New format constraints have ids <databasename>/<constraintname>. */
- err = row_delete_constraint_low(
- mem_heap_strcat(heap, database_name, id), trx);
-
- if ((err == DB_SUCCESS) && !strchr(id, '/')) {
- /* Old format < 4.0.18 constraints have constraint ids
- NUMBER_NUMBER. We only try deleting them if the
- constraint name does not contain a '/' character, otherwise
- deleting a new format constraint named 'foo/bar' from
- database 'baz' would remove constraint 'bar' from database
- 'foo', if it existed. */
-
- err = row_delete_constraint_low(id, trx);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Renames a table for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-row_rename_table_for_mysql(
-/*=======================*/
- const char* old_name, /*!< in: old table name */
- const char* new_name, /*!< in: new table name */
- trx_t* trx, /*!< in/out: transaction */
- bool commit) /*!< in: whether to commit trx */
-{
- dict_table_t* table = NULL;
- ibool dict_locked = FALSE;
- dberr_t err = DB_ERROR;
- mem_heap_t* heap = NULL;
- const char** constraints_to_drop = NULL;
- ulint n_constraints_to_drop = 0;
- ibool old_is_tmp, new_is_tmp;
- pars_info_t* info = NULL;
- int retry;
- bool aux_fts_rename = false;
- char* is_part = NULL;
-
- ut_a(old_name != NULL);
- ut_a(new_name != NULL);
- ut_ad(trx->state == TRX_STATE_ACTIVE);
-
- if (srv_force_recovery) {
- fputs("InnoDB: innodb_force_recovery is on: we do not allow\n"
- "InnoDB: database modifications by the user. Shut down\n"
- "InnoDB: mysqld and edit my.cnf so that"
- "InnoDB: innodb_force_... is removed.\n",
- stderr);
-
- err = DB_READ_ONLY;
- goto funct_exit;
-
- } else if (row_mysql_is_system_table(new_name)) {
-
- fprintf(stderr,
- "InnoDB: Error: trying to create a MySQL"
- " system table %s of type InnoDB.\n"
- "InnoDB: MySQL system tables must be"
- " of the MyISAM type!\n",
- new_name);
-
- goto funct_exit;
- }
-
- trx->op_info = "renaming table";
-
- old_is_tmp = row_is_mysql_tmp_table_name(old_name);
- new_is_tmp = row_is_mysql_tmp_table_name(new_name);
-
- dict_locked = trx->dict_operation_lock_mode == RW_X_LATCH;
-
- table = dict_table_open_on_name(old_name, dict_locked, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- /* We look for pattern #P# to see if the table is partitioned
- MySQL table. */
-#ifdef __WIN__
- is_part = strstr((char *)old_name, (char *)"#p#");
-#else
- is_part = strstr((char *)old_name, (char *)"#P#");
-#endif /* __WIN__ */
-
- /* MySQL partition engine hard codes the file name
- separator as "#P#". The text case is fixed even if
- lower_case_table_names is set to 1 or 2. This is true
- for sub-partition names as well. InnoDB always
- normalises file names to lower case on Windows, this
- can potentially cause problems when copying/moving
- tables between platforms.
-
- 1) If boot against an installation from Windows
- platform, then its partition table name could
- be all be in lower case in system tables. So we
- will need to check lower case name when load table.
-
- 2) If we boot an installation from other case
- sensitive platform in Windows, we might need to
- check the existence of table name without lowering
- case them in the system table. */
- if (!table &&
- is_part &&
- innobase_get_lower_case_table_names() == 1) {
- char par_case_name[MAX_FULL_NAME_LEN + 1];
-#ifndef __WIN__
- /* Check for the table using lower
- case name, including the partition
- separator "P" */
- memcpy(par_case_name, old_name,
- strlen(old_name));
- par_case_name[strlen(old_name)] = 0;
- innobase_casedn_str(par_case_name);
-#else
- /* On Windows platfrom, check
- whether there exists table name in
- system table whose name is
- not being normalized to lower case */
- normalize_table_name_low(
- par_case_name, old_name, FALSE);
-#endif
- table = dict_table_open_on_name(par_case_name, dict_locked, FALSE,
- DICT_ERR_IGNORE_NONE);
- }
-
- if (!table) {
- err = DB_TABLE_NOT_FOUND;
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Error: table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" does not exist in the InnoDB internal\n"
- "InnoDB: data dictionary though MySQL is"
- " trying to rename the table.\n"
- "InnoDB: Have you copied the .frm file"
- " of the table to the\n"
- "InnoDB: MySQL database directory"
- " from another database?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n",
- stderr);
- goto funct_exit;
-
- } else if (!table->is_readable()
- && fil_space_get(table->space) == NULL
- && !dict_table_is_discarded(table)) {
-
- err = DB_TABLE_NOT_FOUND;
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Table %s does not have an .ibd file in the database "
- "directory. See " REFMAN "innodb-troubleshooting.html",
- old_name);
-
- goto funct_exit;
-
- } else if (new_is_tmp) {
- /* MySQL is doing an ALTER TABLE command and it renames the
- original table to a temporary table name. We want to preserve
- the original foreign key constraint definitions despite the
- name change. An exception is those constraints for which
- the ALTER TABLE contained DROP FOREIGN KEY <foreign key id>.*/
-
- heap = mem_heap_create(100);
-
- err = dict_foreign_parse_drop_constraints(
- heap, trx, table, &n_constraints_to_drop,
- &constraints_to_drop);
-
- if (err != DB_SUCCESS) {
- goto funct_exit;
- }
- }
-
- /* Is a foreign key check running on this table? */
- for (retry = 0; retry < 100
- && table->n_foreign_key_checks_running > 0; ++retry) {
- row_mysql_unlock_data_dictionary(trx);
- os_thread_yield();
- row_mysql_lock_data_dictionary(trx);
- }
-
- if (table->n_foreign_key_checks_running > 0) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: in ALTER TABLE ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fprintf(stderr, "\n"
- "InnoDB: a FOREIGN KEY check is running.\n"
- "InnoDB: Cannot rename table.\n");
- err = DB_TABLE_IN_FK_CHECK;
- goto funct_exit;
- }
-
- /* We use the private SQL parser of Innobase to generate the query
- graphs needed in updating the dictionary data from system tables. */
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_table_name", new_name);
- pars_info_add_str_literal(info, "old_table_name", old_name);
-
- err = que_eval_sql(info,
- "PROCEDURE RENAME_TABLE () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLES"
- " SET NAME = :new_table_name\n"
- " WHERE NAME = :old_table_name;\n"
- "END;\n"
- , FALSE, trx);
-
- /* SYS_TABLESPACES and SYS_DATAFILES track non-system tablespaces
- which have space IDs > 0. */
- if (err == DB_SUCCESS
- && table->space != TRX_SYS_SPACE
- && table->is_readable()) {
- /* Make a new pathname to update SYS_DATAFILES. */
- char* new_path = row_make_new_pathname(table, new_name);
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_table_name", new_name);
- pars_info_add_str_literal(info, "new_path_name", new_path);
- pars_info_add_int4_literal(info, "space_id", table->space);
-
- err = que_eval_sql(info,
- "PROCEDURE RENAME_SPACE () IS\n"
- "BEGIN\n"
- "UPDATE SYS_TABLESPACES"
- " SET NAME = :new_table_name\n"
- " WHERE SPACE = :space_id;\n"
- "UPDATE SYS_DATAFILES"
- " SET PATH = :new_path_name\n"
- " WHERE SPACE = :space_id;\n"
- "END;\n"
- , FALSE, trx);
-
- mem_free(new_path);
- }
- if (err != DB_SUCCESS) {
- goto end;
- }
-
- if (!new_is_tmp) {
- /* Rename all constraints. */
- char new_table_name[MAX_TABLE_NAME_LEN] = "";
- char old_table_utf8[MAX_TABLE_NAME_LEN] = "";
- uint errors = 0;
-
- strncpy(old_table_utf8, old_name, MAX_TABLE_NAME_LEN);
- innobase_convert_to_system_charset(
- strchr(old_table_utf8, '/') + 1,
- strchr(old_name, '/') +1,
- MAX_TABLE_NAME_LEN, &errors);
-
- if (errors) {
- /* Table name could not be converted from charset
- my_charset_filename to UTF-8. This means that the
- table name is already in UTF-8 (#mysql#50). */
- strncpy(old_table_utf8, old_name, MAX_TABLE_NAME_LEN);
- }
-
- info = pars_info_create();
-
- pars_info_add_str_literal(info, "new_table_name", new_name);
- pars_info_add_str_literal(info, "old_table_name", old_name);
- pars_info_add_str_literal(info, "old_table_name_utf8",
- old_table_utf8);
-
- strncpy(new_table_name, new_name, MAX_TABLE_NAME_LEN);
- innobase_convert_to_system_charset(
- strchr(new_table_name, '/') + 1,
- strchr(new_name, '/') +1,
- MAX_TABLE_NAME_LEN, &errors);
-
- if (errors) {
- /* Table name could not be converted from charset
- my_charset_filename to UTF-8. This means that the
- table name is already in UTF-8 (#mysql#50). */
- strncpy(new_table_name, new_name, MAX_TABLE_NAME_LEN);
- }
-
- pars_info_add_str_literal(info, "new_table_utf8", new_table_name);
-
- err = que_eval_sql(
- info,
- "PROCEDURE RENAME_CONSTRAINT_IDS () IS\n"
- "gen_constr_prefix CHAR;\n"
- "new_db_name CHAR;\n"
- "foreign_id CHAR;\n"
- "new_foreign_id CHAR;\n"
- "old_db_name_len INT;\n"
- "old_t_name_len INT;\n"
- "new_db_name_len INT;\n"
- "id_len INT;\n"
- "offset INT;\n"
- "found INT;\n"
- "BEGIN\n"
- "found := 1;\n"
- "old_db_name_len := INSTR(:old_table_name, '/')-1;\n"
- "new_db_name_len := INSTR(:new_table_name, '/')-1;\n"
- "new_db_name := SUBSTR(:new_table_name, 0,\n"
- " new_db_name_len);\n"
- "old_t_name_len := LENGTH(:old_table_name);\n"
- "gen_constr_prefix := CONCAT(:old_table_name_utf8,\n"
- " '_ibfk_');\n"
- "WHILE found = 1 LOOP\n"
- " SELECT ID INTO foreign_id\n"
- " FROM SYS_FOREIGN\n"
- " WHERE FOR_NAME = :old_table_name\n"
- " AND TO_BINARY(FOR_NAME)\n"
- " = TO_BINARY(:old_table_name)\n"
- " LOCK IN SHARE MODE;\n"
- " IF (SQL % NOTFOUND) THEN\n"
- " found := 0;\n"
- " ELSE\n"
- " UPDATE SYS_FOREIGN\n"
- " SET FOR_NAME = :new_table_name\n"
- " WHERE ID = foreign_id;\n"
- " id_len := LENGTH(foreign_id);\n"
- " IF (INSTR(foreign_id, '/') > 0) THEN\n"
- " IF (INSTR(foreign_id,\n"
- " gen_constr_prefix) > 0)\n"
- " THEN\n"
- " offset := INSTR(foreign_id, '_ibfk_') - 1;\n"
- " new_foreign_id :=\n"
- " CONCAT(:new_table_utf8,\n"
- " SUBSTR(foreign_id, offset,\n"
- " id_len - offset));\n"
- " ELSE\n"
- " new_foreign_id :=\n"
- " CONCAT(new_db_name,\n"
- " SUBSTR(foreign_id,\n"
- " old_db_name_len,\n"
- " id_len - old_db_name_len));\n"
- " END IF;\n"
- " UPDATE SYS_FOREIGN\n"
- " SET ID = new_foreign_id\n"
- " WHERE ID = foreign_id;\n"
- " UPDATE SYS_FOREIGN_COLS\n"
- " SET ID = new_foreign_id\n"
- " WHERE ID = foreign_id;\n"
- " END IF;\n"
- " END IF;\n"
- "END LOOP;\n"
- "UPDATE SYS_FOREIGN SET REF_NAME = :new_table_name\n"
- "WHERE REF_NAME = :old_table_name\n"
- " AND TO_BINARY(REF_NAME)\n"
- " = TO_BINARY(:old_table_name);\n"
- "END;\n"
- , FALSE, trx);
-
- } else if (n_constraints_to_drop > 0) {
- /* Drop some constraints of tmp tables. */
-
- ulint db_name_len = dict_get_db_name_len(old_name) + 1;
- char* db_name = mem_heap_strdupl(heap, old_name,
- db_name_len);
- ulint i;
-
- for (i = 0; i < n_constraints_to_drop; i++) {
- err = row_delete_constraint(constraints_to_drop[i],
- db_name, heap, trx);
-
- if (err != DB_SUCCESS) {
- break;
- }
- }
- }
-
- if (dict_table_has_fts_index(table)
- && !dict_tables_have_same_db(old_name, new_name)) {
- err = fts_rename_aux_tables(table, new_name, trx);
- if (err != DB_TABLE_NOT_FOUND) {
- aux_fts_rename = true;
- }
- }
-
-end:
- if (err != DB_SUCCESS) {
- if (err == DB_DUPLICATE_KEY) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error; possible reasons:\n"
- "InnoDB: 1) Table rename would cause"
- " two FOREIGN KEY constraints\n"
- "InnoDB: to have the same internal name"
- " in case-insensitive comparison.\n"
- "InnoDB: 2) table ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs(" exists in the InnoDB internal data\n"
- "InnoDB: dictionary though MySQL is"
- " trying to rename table ", stderr);
- ut_print_name(stderr, trx, TRUE, old_name);
- fputs(" to it.\n"
- "InnoDB: Have you deleted the .frm file"
- " and not used DROP TABLE?\n"
- "InnoDB: You can look for further help from\n"
- "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
- "InnoDB: If table ", stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs(" is a temporary table #sql..., then"
- " it can be that\n"
- "InnoDB: there are still queries running"
- " on the table, and it will be\n"
- "InnoDB: dropped automatically when"
- " the queries end.\n"
- "InnoDB: You can drop the orphaned table"
- " inside InnoDB by\n"
- "InnoDB: creating an InnoDB table with"
- " the same name in another\n"
- "InnoDB: database and copying the .frm file"
- " to the current database.\n"
- "InnoDB: Then MySQL thinks the table exists,"
- " and DROP TABLE will\n"
- "InnoDB: succeed.\n", stderr);
- }
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
- } else {
- /* The following call will also rename the .ibd data file if
- the table is stored in a single-table tablespace */
-
- err = dict_table_rename_in_cache(
- table, new_name, !new_is_tmp);
- if (err != DB_SUCCESS) {
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
- goto funct_exit;
- }
-
- /* We only want to switch off some of the type checking in
- an ALTER, not in a RENAME. */
-
- err = dict_load_foreigns(
- new_name, NULL,
- false, !old_is_tmp || trx->check_foreigns,
- DICT_ERR_IGNORE_NONE);
-
- if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
-
- if (old_is_tmp) {
- fputs(" InnoDB: Error: in ALTER TABLE ",
- stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs("\n"
- "InnoDB: has or is referenced"
- " in foreign key constraints\n"
- "InnoDB: which are not compatible"
- " with the new table definition.\n",
- stderr);
- } else {
- fputs(" InnoDB: Error: in RENAME TABLE"
- " table ",
- stderr);
- ut_print_name(stderr, trx, TRUE, new_name);
- fputs("\n"
- "InnoDB: is referenced in"
- " foreign key constraints\n"
- "InnoDB: which are not compatible"
- " with the new table definition.\n",
- stderr);
- }
-
- ut_a(DB_SUCCESS == dict_table_rename_in_cache(
- table, old_name, FALSE));
- trx->error_state = DB_SUCCESS;
- trx_rollback_to_savepoint(trx, NULL);
- trx->error_state = DB_SUCCESS;
- }
- }
-
-funct_exit:
- if (aux_fts_rename && err != DB_SUCCESS
- && table != NULL && (table->space != 0)) {
-
- char* orig_name = table->name;
- trx_t* trx_bg = trx_allocate_for_background();
-
- /* If the first fts_rename fails, the trx would
- be rolled back and committed, we can't use it any more,
- so we have to start a new background trx here. */
- ut_a(trx_state_eq(trx_bg, TRX_STATE_NOT_STARTED));
- trx_bg->op_info = "Revert the failing rename "
- "for fts aux tables";
- trx_bg->dict_operation_lock_mode = RW_X_LATCH;
- trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
-
- /* If rename fails and table has its own tablespace,
- we need to call fts_rename_aux_tables again to
- revert the ibd file rename, which is not under the
- control of trx. Also notice the parent table name
- in cache is not changed yet. If the reverting fails,
- the ibd data may be left in the new database, which
- can be fixed only manually. */
- table->name = const_cast<char*>(new_name);
- fts_rename_aux_tables(table, old_name, trx_bg);
- table->name = orig_name;
-
- trx_bg->dict_operation_lock_mode = 0;
- trx_commit_for_mysql(trx_bg);
- trx_free_for_background(trx_bg);
- }
-
- if (table != NULL) {
- dict_table_close(table, dict_locked, FALSE);
- }
-
- if (commit) {
- trx_commit_for_mysql(trx);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- trx->op_info = "";
-
- return(err);
-}
-
-/*********************************************************************//**
-Checks that the index contains entries in an ascending order, unique
-constraint is not broken, and calculates the number of index entries
-in the read view of the current transaction.
-@return true if ok */
-UNIV_INTERN
-bool
-row_check_index_for_mysql(
-/*======================*/
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct
- in MySQL handle */
- const dict_index_t* index, /*!< in: index */
- ulint* n_rows) /*!< out: number of entries
- seen in the consistent read */
-{
- dtuple_t* prev_entry = NULL;
- ulint matched_fields;
- ulint matched_bytes;
- byte* buf;
- ulint ret;
- rec_t* rec;
- bool is_ok = true;
- int cmp;
- ibool contains_null;
- ulint i;
- ulint cnt;
- mem_heap_t* heap = NULL;
- ulint n_ext;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
- rec_offs_init(offsets_);
-
- *n_rows = 0;
-
- if (dict_index_is_clust(index)) {
- /* The clustered index of a table is always available.
- During online ALTER TABLE that rebuilds the table, the
- clustered index in the old table will have
- index->online_log pointing to the new table. All
- indexes of the old table will remain valid and the new
- table will be unaccessible to MySQL until the
- completion of the ALTER TABLE. */
- } else if (dict_index_is_online_ddl(index)
- || (index->type & DICT_FTS)) {
- /* Full Text index are implemented by auxiliary tables,
- not the B-tree. We also skip secondary indexes that are
- being created online. */
- return(true);
- }
-
- buf = static_cast<byte*>(mem_alloc(UNIV_PAGE_SIZE));
- heap = mem_heap_create(100);
-
- cnt = 1000;
-
- ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0);
-loop:
- /* Check thd->killed every 1,000 scanned rows */
- if (--cnt == 0) {
- if (trx_is_interrupted(prebuilt->trx)) {
- goto func_exit;
- }
- cnt = 1000;
- }
-
- switch (ret) {
- case DB_SUCCESS:
- break;
- default:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Warning: CHECK TABLE on ", stderr);
- dict_index_name_print(stderr, prebuilt->trx, index);
- fprintf(stderr, " returned %lu\n", ret);
- /* (this error is ignored by CHECK TABLE) */
- /* fall through */
- case DB_END_OF_INDEX:
-func_exit:
- mem_free(buf);
- mem_heap_free(heap);
-
- return(is_ok);
- }
-
- *n_rows = *n_rows + 1;
-
- /* row_search... returns the index record in buf, record origin offset
- within buf stored in the first 4 bytes, because we have built a dummy
- template */
-
- rec = buf + mach_read_from_4(buf);
-
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- if (prev_entry != NULL) {
- matched_fields = 0;
- matched_bytes = 0;
-
- cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets,
- &matched_fields,
- &matched_bytes);
- contains_null = FALSE;
-
- /* In a unique secondary index we allow equal key values if
- they contain SQL NULLs */
-
- for (i = 0;
- i < dict_index_get_n_ordering_defined_by_user(index);
- i++) {
- if (UNIV_SQL_NULL == dfield_get_len(
- dtuple_get_nth_field(prev_entry, i))) {
-
- contains_null = TRUE;
- break;
- }
- }
-
- if (cmp > 0) {
- fputs("InnoDB: index records in a wrong order in ",
- stderr);
-not_ok:
- dict_index_name_print(stderr,
- prebuilt->trx, index);
- fputs("\n"
- "InnoDB: prev record ", stderr);
- dtuple_print(stderr, prev_entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print_new(stderr, rec, offsets);
- putc('\n', stderr);
- is_ok = false;
- } else if (dict_index_is_unique(index)
- && !contains_null
- && matched_fields
- >= dict_index_get_n_ordering_defined_by_user(
- index)) {
-
- fputs("InnoDB: duplicate key in ", stderr);
- goto not_ok;
- }
- }
-
- {
- mem_heap_t* tmp_heap = NULL;
-
- /* Empty the heap on each round. But preserve offsets[]
- for the row_rec_to_index_entry() call, by copying them
- into a separate memory heap when needed. */
- if (UNIV_UNLIKELY(offsets != offsets_)) {
- ulint size = rec_offs_get_n_alloc(offsets)
- * sizeof *offsets;
-
- tmp_heap = mem_heap_create(size);
-
- offsets = static_cast<ulint*>(
- mem_heap_dup(tmp_heap, offsets, size));
- }
-
- mem_heap_empty(heap);
-
- prev_entry = row_rec_to_index_entry(
- rec, index, offsets, &n_ext, heap);
-
- if (UNIV_LIKELY_NULL(tmp_heap)) {
- mem_heap_free(tmp_heap);
- }
- }
-
- ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT);
-
- goto loop;
-}
-
-/*********************************************************************//**
-Determines if a table is a magic monitor table.
-@return true if monitor table */
-UNIV_INTERN
-bool
-row_is_magic_monitor_table(
-/*=======================*/
- const char* table_name) /*!< in: name of the table, in the
- form database/table_name */
-{
- const char* name; /* table_name without database/ */
- ulint len;
-
- name = dict_remove_db_name(table_name);
- len = strlen(name) + 1;
-
- return(STR_EQ(name, len, S_innodb_monitor)
- || STR_EQ(name, len, S_innodb_lock_monitor)
- || STR_EQ(name, len, S_innodb_tablespace_monitor)
- || STR_EQ(name, len, S_innodb_table_monitor)
-#ifdef UNIV_MEM_DEBUG
- || STR_EQ(name, len, S_innodb_mem_validate)
-#endif /* UNIV_MEM_DEBUG */
- );
-}
-
-/*********************************************************************//**
-Initialize this module */
-UNIV_INTERN
-void
-row_mysql_init(void)
-/*================*/
-{
- mutex_create(
- row_drop_list_mutex_key,
- &row_drop_list_mutex, SYNC_NO_ORDER_CHECK);
-
- UT_LIST_INIT(row_mysql_drop_list);
-
- row_mysql_drop_list_inited = TRUE;
-}
-
-/*********************************************************************//**
-Close this module */
-UNIV_INTERN
-void
-row_mysql_close(void)
-/*================*/
-{
- ut_a(UT_LIST_GET_LEN(row_mysql_drop_list) == 0);
-
- mutex_free(&row_drop_list_mutex);
-
- row_mysql_drop_list_inited = FALSE;
-}
diff --git a/storage/xtradb/row/row0purge.cc b/storage/xtradb/row/row0purge.cc
deleted file mode 100644
index 333677edf21..00000000000
--- a/storage/xtradb/row/row0purge.cc
+++ /dev/null
@@ -1,1057 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0purge.cc
-Purge obsolete records
-
-Created 3/14/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0purge.h"
-
-#ifdef UNIV_NONINL
-#include "row0purge.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "row0vers.h"
-#include "row0mysql.h"
-#include "row0log.h"
-#include "log0log.h"
-#include "srv0mon.h"
-#include "srv0start.h"
-
-/*************************************************************************
-IMPORTANT NOTE: Any operation that generates redo MUST check that there
-is enough space in the redo log before for that operation. This is
-done by calling log_free_check(). The reason for checking the
-availability of the redo log space before the start of the operation is
-that we MUST not hold any synchonization objects when performing the
-check.
-If you make a change in this module make sure that no codepath is
-introduced where a call to log_free_check() is bypassed. */
-
-/********************************************************************//**
-Creates a purge node to a query graph.
-@return own: purge node */
-UNIV_INTERN
-purge_node_t*
-row_purge_node_create(
-/*==================*/
- que_thr_t* parent, /*!< in: parent node */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- purge_node_t* node;
-
- ut_ad(parent != NULL);
- ut_ad(heap != NULL);
-
- node = static_cast<purge_node_t*>(
- mem_heap_zalloc(heap, sizeof(*node)));
-
- node->common.type = QUE_NODE_PURGE;
- node->common.parent = parent;
- node->done = TRUE;
- node->heap = mem_heap_create(256);
-
- return(node);
-}
-
-/***********************************************************//**
-Repositions the pcur in the purge node on the clustered index record,
-if found. If the record is not found, close pcur.
-@return TRUE if the record was found */
-static
-ibool
-row_purge_reposition_pcur(
-/*======================*/
- ulint mode, /*!< in: latching mode */
- purge_node_t* node, /*!< in: row purge node */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (node->found_clust) {
- ut_ad(node->validate_pcur());
-
- node->found_clust = btr_pcur_restore_position(mode, &node->pcur, mtr);
-
- } else {
- node->found_clust = row_search_on_row_ref(
- &node->pcur, mode, node->table, node->ref, mtr);
-
- if (node->found_clust) {
- btr_pcur_store_position(&node->pcur, mtr);
- }
- }
-
- /* Close the current cursor if we fail to position it correctly. */
- if (!node->found_clust) {
- btr_pcur_close(&node->pcur);
- }
-
- return(node->found_clust);
-}
-
-/***********************************************************//**
-Removes a delete marked clustered index record if possible.
-@retval true if the row was not found, or it was successfully removed
-@retval false if the row was modified after the delete marking */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-row_purge_remove_clust_if_poss_low(
-/*===============================*/
- purge_node_t* node, /*!< in/out: row purge node */
- ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- dict_index_t* index;
- bool success = true;
- mtr_t mtr;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint* offsets;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- index = dict_table_get_first_index(node->table);
-
- log_free_check();
- mtr_start(&mtr);
-
- if (!row_purge_reposition_pcur(mode, node, &mtr)) {
- /* The record was already removed. */
- goto func_exit;
- }
-
- rec = btr_pcur_get_rec(&node->pcur);
-
- offsets = rec_get_offsets(
- rec, index, offsets_, ULINT_UNDEFINED, &heap);
-
- if (node->roll_ptr != row_get_rec_roll_ptr(rec, index, offsets)) {
- /* Someone else has modified the record later: do not remove */
- goto func_exit;
- }
-
- if (mode == BTR_MODIFY_LEAF) {
- success = btr_cur_optimistic_delete(
- btr_pcur_get_btr_cur(&node->pcur), 0, &mtr);
- } else {
- dberr_t err;
- ut_ad(mode == BTR_MODIFY_TREE);
- btr_cur_pessimistic_delete(
- &err, FALSE, btr_pcur_get_btr_cur(&node->pcur), 0,
- RB_NONE, &mtr);
-
- switch (err) {
- case DB_SUCCESS:
- break;
- case DB_OUT_OF_FILE_SPACE:
- success = false;
- break;
- default:
- ut_error;
- }
- }
-
-func_exit:
- if (heap) {
- mem_heap_free(heap);
- }
-
- /* Persistent cursor is closed if reposition fails. */
- if (node->found_clust) {
- btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
- } else {
- mtr_commit(&mtr);
- }
-
- return(success);
-}
-
-/***********************************************************//**
-Removes a clustered index record if it has not been modified after the delete
-marking.
-@retval true if the row was not found, or it was successfully removed
-@retval false the purge needs to be suspended because of running out
-of file space. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-row_purge_remove_clust_if_poss(
-/*===========================*/
- purge_node_t* node) /*!< in/out: row purge node */
-{
- if (row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF)) {
- return(true);
- }
-
- for (ulint n_tries = 0;
- n_tries < BTR_CUR_RETRY_DELETE_N_TIMES;
- n_tries++) {
- if (row_purge_remove_clust_if_poss_low(
- node, BTR_MODIFY_TREE)) {
- return(true);
- }
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
- }
-
- return(false);
-}
-
-/***********************************************************//**
-Determines if it is possible to remove a secondary index entry.
-Removal is possible if the secondary index entry does not refer to any
-not delete marked version of a clustered index record where DB_TRX_ID
-is newer than the purge view.
-
-NOTE: This function should only be called by the purge thread, only
-while holding a latch on the leaf page of the secondary index entry
-(or keeping the buffer pool watch on the page). It is possible that
-this function first returns true and then false, if a user transaction
-inserts a record that the secondary index entry would refer to.
-However, in that case, the user transaction would also re-insert the
-secondary index entry after purge has removed it and released the leaf
-page latch.
-@return true if the secondary index record can be purged */
-UNIV_INTERN
-bool
-row_purge_poss_sec(
-/*===============*/
- purge_node_t* node, /*!< in/out: row purge node */
- dict_index_t* index, /*!< in: secondary index */
- const dtuple_t* entry) /*!< in: secondary index entry */
-{
- bool can_delete;
- mtr_t mtr;
-
- ut_ad(!dict_index_is_clust(index));
- mtr_start(&mtr);
-
- can_delete = !row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, &mtr)
- || !row_vers_old_has_index_entry(TRUE,
- btr_pcur_get_rec(&node->pcur),
- &mtr, index, entry);
-
- /* Persistent cursor is closed if reposition fails. */
- if (node->found_clust) {
- btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
- } else {
- mtr_commit(&mtr);
- }
-
- return(can_delete);
-}
-
-/***************************************************************
-Removes a secondary index entry if possible, by modifying the
-index tree. Does not try to buffer the delete.
-@return TRUE if success or if not found */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-ibool
-row_purge_remove_sec_if_poss_tree(
-/*==============================*/
- purge_node_t* node, /*!< in: row purge node */
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry) /*!< in: index entry */
-{
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- ibool success = TRUE;
- dberr_t err;
- mtr_t mtr;
- enum row_search_result search_result;
-
- log_free_check();
- mtr_start(&mtr);
-
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- if (dict_index_is_online_ddl(index)) {
- /* Online secondary index creation will not
- copy any delete-marked records. Therefore
- there is nothing to be purged. We must also
- skip the purge when a completed index is
- dropped by rollback_inplace_alter_table(). */
- goto func_exit_no_pcur;
- }
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
- ut_ad(!dict_index_is_online_ddl(index));
- }
-
- search_result = row_search_index_entry(index, entry, BTR_MODIFY_TREE,
- &pcur, &mtr);
-
- switch (search_result) {
- case ROW_NOT_FOUND:
- /* Not found. This is a legitimate condition. In a
- rollback, InnoDB will remove secondary recs that would
- be purged anyway. Then the actual purge will not find
- the secondary index record. Also, the purge itself is
- eager: if it comes to consider a secondary index
- record, and notices it does not need to exist in the
- index, it will remove it. Then if/when the purge
- comes to consider the secondary index record a second
- time, it will not exist any more in the index. */
-
- /* fputs("PURGE:........sec entry not found\n", stderr); */
- /* dtuple_print(stderr, entry); */
- goto func_exit;
- case ROW_FOUND:
- break;
- case ROW_BUFFERED:
- case ROW_NOT_DELETED_REF:
- /* These are invalid outcomes, because the mode passed
- to row_search_index_entry() did not include any of the
- flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
- ut_error;
- }
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- /* We should remove the index record if no later version of the row,
- which cannot be purged yet, requires its existence. If some requires,
- we should do nothing. */
-
- if (row_purge_poss_sec(node, index, entry)) {
- /* Remove the index record, which should have been
- marked for deletion. */
- if (!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
- dict_table_is_comp(index->table))) {
- fputs("InnoDB: tried to purge sec index entry not"
- " marked for deletion in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_cur_get_rec(btr_cur), index);
- putc('\n', stderr);
-
- ut_ad(0);
-
- goto func_exit;
- }
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- RB_NONE, &mtr);
- switch (UNIV_EXPECT(err, DB_SUCCESS)) {
- case DB_SUCCESS:
- break;
- case DB_OUT_OF_FILE_SPACE:
- success = FALSE;
- break;
- default:
- ut_error;
- }
- }
-
-func_exit:
- btr_pcur_close(&pcur);
-func_exit_no_pcur:
- mtr_commit(&mtr);
-
- return(success);
-}
-
-/***************************************************************
-Removes a secondary index entry without modifying the index tree,
-if possible.
-@retval true if success or if not found
-@retval false if row_purge_remove_sec_if_poss_tree() should be invoked */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-row_purge_remove_sec_if_poss_leaf(
-/*==============================*/
- purge_node_t* node, /*!< in: row purge node */
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry) /*!< in: index entry */
-{
- mtr_t mtr;
- btr_pcur_t pcur;
- ulint mode;
- enum row_search_result search_result;
- bool success = true;
-
- log_free_check();
-
- mtr_start(&mtr);
-
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- if (dict_index_is_online_ddl(index)) {
- /* Online secondary index creation will not
- copy any delete-marked records. Therefore
- there is nothing to be purged. We must also
- skip the purge when a completed index is
- dropped by rollback_inplace_alter_table(). */
- goto func_exit_no_pcur;
- }
-
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED | BTR_DELETE;
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
- ut_ad(!dict_index_is_online_ddl(index));
-
- mode = BTR_MODIFY_LEAF | BTR_DELETE;
- }
-
- /* Set the purge node for the call to row_purge_poss_sec(). */
- pcur.btr_cur.purge_node = node;
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- pcur.btr_cur.thr = static_cast<que_thr_t*>(que_node_get_parent(node));
-
- search_result = row_search_index_entry(
- index, entry, mode, &pcur, &mtr);
-
- switch (search_result) {
- case ROW_FOUND:
- /* Before attempting to purge a record, check
- if it is safe to do so. */
- if (row_purge_poss_sec(node, index, entry)) {
- btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- /* Only delete-marked records should be purged. */
- if (!rec_get_deleted_flag(
- btr_cur_get_rec(btr_cur),
- dict_table_is_comp(index->table))) {
-
- fputs("InnoDB: tried to purge sec index"
- " entry not marked for deletion in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, NULL, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_cur_get_rec(btr_cur),
- index);
- putc('\n', stderr);
-
- ut_ad(0);
-
- btr_pcur_close(&pcur);
-
- goto func_exit_no_pcur;
- }
-
- if (!btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
-
- /* The index entry could not be deleted. */
- success = false;
- }
- }
- /* (The index entry is still needed,
- or the deletion succeeded) */
- /* fall through */
- case ROW_NOT_DELETED_REF:
- /* The index entry is still needed. */
- case ROW_BUFFERED:
- /* The deletion was buffered. */
- case ROW_NOT_FOUND:
- /* The index entry does not exist, nothing to do. */
- btr_pcur_close(&pcur);
- func_exit_no_pcur:
- mtr_commit(&mtr);
- return(success);
- }
-
- ut_error;
- return(FALSE);
-}
-
-/***********************************************************//**
-Removes a secondary index entry if possible. */
-UNIV_INLINE MY_ATTRIBUTE((nonnull(1,2)))
-void
-row_purge_remove_sec_if_poss(
-/*=========================*/
- purge_node_t* node, /*!< in: row purge node */
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry) /*!< in: index entry */
-{
- ibool success;
- ulint n_tries = 0;
-
- /* fputs("Purge: Removing secondary record\n", stderr); */
-
- if (!entry) {
- /* The node->row must have lacked some fields of this
- index. This is possible when the undo log record was
- written before this index was created. */
- return;
- }
-
- if (row_purge_remove_sec_if_poss_leaf(node, index, entry)) {
-
- return;
- }
-retry:
- success = row_purge_remove_sec_if_poss_tree(node, index, entry);
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- ut_a(success);
-}
-
-/***********************************************************//**
-Purges a delete marking of a record.
-@retval true if the row was not found, or it was successfully removed
-@retval false the purge needs to be suspended because of
-running out of file space */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-row_purge_del_mark(
-/*===============*/
- purge_node_t* node) /*!< in/out: row purge node */
-{
- mem_heap_t* heap;
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- /* skip corrupted secondary index */
- dict_table_skip_corrupt_index(node->index);
-
- if (!node->index) {
- break;
- }
-
- if (node->index->type != DICT_FTS) {
- dtuple_t* entry = row_build_index_entry_low(
- node->row, NULL, node->index, heap);
- row_purge_remove_sec_if_poss(node, node->index, entry);
- mem_heap_empty(heap);
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(row_purge_remove_clust_if_poss(node));
-}
-
-/***********************************************************//**
-Purges an update of an existing record. Also purges an update of a delete
-marked record if that record contained an externally stored field. */
-static
-void
-row_purge_upd_exist_or_extern_func(
-/*===============================*/
-#ifdef UNIV_DEBUG
- const que_thr_t*thr, /*!< in: query thread */
-#endif /* UNIV_DEBUG */
- purge_node_t* node, /*!< in: row purge node */
- trx_undo_rec_t* undo_rec) /*!< in: record to purge */
-{
- mem_heap_t* heap;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (node->rec_type == TRX_UNDO_UPD_DEL_REC
- || (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
-
- goto skip_secondaries;
- }
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- dict_table_skip_corrupt_index(node->index);
-
- if (!node->index) {
- break;
- }
-
- if (row_upd_changes_ord_field_binary(node->index, node->update,
- thr, NULL, NULL)) {
- /* Build the older version of the index entry */
- dtuple_t* entry = row_build_index_entry_low(
- node->row, NULL, node->index, heap);
- row_purge_remove_sec_if_poss(node, node->index, entry);
- mem_heap_empty(heap);
- }
-
- node->index = dict_table_get_next_index(node->index);
- }
-
- mem_heap_free(heap);
-
-skip_secondaries:
- /* Free possible externally stored fields */
- for (ulint i = 0; i < upd_get_n_fields(node->update); i++) {
-
- const upd_field_t* ufield
- = upd_get_nth_field(node->update, i);
-
- if (dfield_is_ext(&ufield->new_val)) {
- trx_rseg_t* rseg;
- buf_block_t* block;
- ulint internal_offset;
- byte* data_field;
- dict_index_t* index;
- ibool is_insert;
- ulint rseg_id;
- ulint page_no;
- ulint offset;
- mtr_t mtr;
-
- /* We use the fact that new_val points to
- undo_rec and get thus the offset of
- dfield data inside the undo record. Then we
- can calculate from node->roll_ptr the file
- address of the new_val data */
-
- internal_offset
- = ((const byte*)
- dfield_get_data(&ufield->new_val))
- - undo_rec;
-
- ut_a(internal_offset < UNIV_PAGE_SIZE);
-
- trx_undo_decode_roll_ptr(node->roll_ptr,
- &is_insert, &rseg_id,
- &page_no, &offset);
-
- rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
- ut_a(rseg != NULL);
- ut_a(rseg->id == rseg_id);
-
- mtr_start(&mtr);
-
- /* We have to acquire an X-latch to the clustered
- index tree */
-
- index = dict_table_get_first_index(node->table);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
-
- /* NOTE: we must also acquire an X-latch to the
- root page of the tree. We will need it when we
- free pages from the tree. If the tree is of height 1,
- the tree X-latch does NOT protect the root page,
- because it is also a leaf page. Since we will have a
- latch on an undo log page, we would break the
- latching order if we would only later latch the
- root page of such a tree! */
-
- btr_root_get(index, &mtr);
-
- block = buf_page_get(
- rseg->space, 0, page_no, RW_X_LATCH, &mtr);
-
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- data_field = buf_block_get_frame(block)
- + offset + internal_offset;
-
- ut_a(dfield_get_len(&ufield->new_val)
- >= BTR_EXTERN_FIELD_REF_SIZE);
- btr_free_externally_stored_field(
- index,
- data_field + dfield_get_len(&ufield->new_val)
- - BTR_EXTERN_FIELD_REF_SIZE,
- NULL, NULL, NULL, 0, RB_NONE, &mtr);
- mtr_commit(&mtr);
- }
- }
-}
-
-#ifdef UNIV_DEBUG
-# define row_purge_upd_exist_or_extern(thr,node,undo_rec) \
- row_purge_upd_exist_or_extern_func(thr,node,undo_rec)
-#else /* UNIV_DEBUG */
-# define row_purge_upd_exist_or_extern(thr,node,undo_rec) \
- row_purge_upd_exist_or_extern_func(node,undo_rec)
-#endif /* UNIV_DEBUG */
-
-/***********************************************************//**
-Parses the row reference and other info in a modify undo log record.
-@return true if purge operation required */
-static
-bool
-row_purge_parse_undo_rec(
-/*=====================*/
- purge_node_t* node, /*!< in: row undo node */
- trx_undo_rec_t* undo_rec, /*!< in: record to purge */
- bool* updated_extern, /*!< out: true if an externally
- stored field was updated */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_index_t* clust_index;
- byte* ptr;
- trx_t* trx;
- undo_no_t undo_no;
- table_id_t table_id;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- ulint info_bits;
- ulint type;
-
- ut_ad(node != NULL);
- ut_ad(thr != NULL);
-
- ptr = trx_undo_rec_get_pars(
- undo_rec, &type, &node->cmpl_info,
- updated_extern, &undo_no, &table_id);
-
- node->rec_type = type;
-
- if (type == TRX_UNDO_UPD_DEL_REC && !*updated_extern) {
-
- return(false);
- }
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
- node->table = NULL;
-
- /* Prevent DROP TABLE etc. from running when we are doing the purge
- for this row */
-
- rw_lock_s_lock_inline(&dict_operation_lock, 0, __FILE__, __LINE__);
-
- node->table = dict_table_open_on_id(
- table_id, FALSE, DICT_TABLE_OP_NORMAL);
-
- if (node->table == NULL) {
- /* The table has been dropped: no need to do purge */
- goto err_exit;
- }
-
- if (node->table->file_unreadable) {
- /* We skip purge of missing .ibd files */
-
- dict_table_close(node->table, FALSE, FALSE);
-
- node->table = NULL;
-
- goto err_exit;
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- if (clust_index == NULL) {
- /* The table was corrupt in the data dictionary.
- dict_set_corrupted() works on an index, and
- we do not have an index to call it with. */
-close_exit:
- dict_table_close(node->table, FALSE, FALSE);
-err_exit:
- rw_lock_s_unlock(&dict_operation_lock);
- return(false);
- }
-
- if (type == TRX_UNDO_UPD_EXIST_REC
- && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)
- && !*updated_extern) {
-
- /* Purge requires no changes to indexes: we may return */
- goto close_exit;
- }
-
- ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
- node->heap);
-
- trx = thr_get_trx(thr);
-
- ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, trx,
- node->heap, &(node->update));
-
- /* Read to the partial row the fields that occur in indexes */
-
- if (!(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- ptr = trx_undo_rec_get_partial_row(
- ptr, clust_index, &node->row,
- type == TRX_UNDO_UPD_DEL_REC,
- node->heap);
- }
-
- return(true);
-}
-
-/***********************************************************//**
-Purges the parsed record.
-@return true if purged, false if skipped */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-row_purge_record_func(
-/*==================*/
- purge_node_t* node, /*!< in: row purge node */
- trx_undo_rec_t* undo_rec, /*!< in: record to purge */
-#ifdef UNIV_DEBUG
- const que_thr_t*thr, /*!< in: query thread */
-#endif /* UNIV_DEBUG */
- bool updated_extern) /*!< in: whether external columns
- were updated */
-{
- dict_index_t* clust_index;
- bool purged = true;
-
- ut_ad(!node->found_clust);
-
- clust_index = dict_table_get_first_index(node->table);
-
- node->index = dict_table_get_next_index(clust_index);
-
- switch (node->rec_type) {
- case TRX_UNDO_DEL_MARK_REC:
- purged = row_purge_del_mark(node);
- if (!purged) {
- break;
- }
- MONITOR_INC(MONITOR_N_DEL_ROW_PURGE);
- break;
- default:
- if (!updated_extern) {
- break;
- }
- /* fall through */
- case TRX_UNDO_UPD_EXIST_REC:
- row_purge_upd_exist_or_extern(thr, node, undo_rec);
- MONITOR_INC(MONITOR_N_UPD_EXIST_EXTERN);
- break;
- }
-
- if (node->found_clust) {
- btr_pcur_close(&node->pcur);
- node->found_clust = FALSE;
- }
-
- if (node->table != NULL) {
- dict_table_close(node->table, FALSE, FALSE);
- node->table = NULL;
- }
-
- return(purged);
-}
-
-#ifdef UNIV_DEBUG
-# define row_purge_record(node,undo_rec,thr,updated_extern) \
- row_purge_record_func(node,undo_rec,thr,updated_extern)
-#else /* UNIV_DEBUG */
-# define row_purge_record(node,undo_rec,thr,updated_extern) \
- row_purge_record_func(node,undo_rec,updated_extern)
-#endif /* UNIV_DEBUG */
-
-/***********************************************************//**
-Fetches an undo log record and does the purge for the recorded operation.
-If none left, or the current purge completed, returns the control to the
-parent node, which is always a query thread node. */
-static
-void
-row_purge(
-/*======*/
- purge_node_t* node, /*!< in: row purge node */
- trx_undo_rec_t* undo_rec, /*!< in: record to purge */
- que_thr_t* thr) /*!< in: query thread */
-{
- if (undo_rec != &trx_purge_dummy_rec) {
- bool updated_extern;
-
- while (row_purge_parse_undo_rec(
- node, undo_rec, &updated_extern, thr)) {
-
- bool purged = row_purge_record(
- node, undo_rec, thr, updated_extern);
-
- rw_lock_s_unlock(&dict_operation_lock);
-
- if (purged
- || srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- return;
- }
-
- /* Retry the purge in a second. */
- os_thread_sleep(1000000);
- }
- }
-}
-
-/***********************************************************//**
-Reset the purge query thread. */
-UNIV_INLINE
-void
-row_purge_end(
-/*==========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- purge_node_t* node;
-
- ut_ad(thr);
-
- node = static_cast<purge_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
-
- thr->run_node = que_node_get_parent(node);
-
- node->undo_recs = NULL;
-
- node->done = TRUE;
-
- ut_a(thr->run_node != NULL);
-
- mem_heap_empty(node->heap);
-}
-
-/***********************************************************//**
-Does the purge operation for a single undo log record. This is a high-level
-function used in an SQL execution graph.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_purge_step(
-/*===========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- purge_node_t* node;
-
- ut_ad(thr);
-
- node = static_cast<purge_node_t*>(thr->run_node);
-
- node->table = NULL;
- node->row = NULL;
- node->ref = NULL;
- node->index = NULL;
- node->update = NULL;
- node->found_clust = FALSE;
- node->rec_type = ULINT_UNDEFINED;
- node->cmpl_info = ULINT_UNDEFINED;
-
- ut_a(!node->done);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_PURGE);
-
- if (!(node->undo_recs == NULL || ib_vector_is_empty(node->undo_recs))) {
- trx_purge_rec_t*purge_rec;
-
- purge_rec = static_cast<trx_purge_rec_t*>(
- ib_vector_pop(node->undo_recs));
-
- node->roll_ptr = purge_rec->roll_ptr;
-
- row_purge(node, purge_rec->undo_rec, thr);
-
- if (ib_vector_is_empty(node->undo_recs)) {
- row_purge_end(thr);
- } else {
- thr->run_node = node;
- }
- } else {
- row_purge_end(thr);
- }
-
- return(thr);
-}
-
-#ifdef UNIV_DEBUG
-/***********************************************************//**
-Validate the persisent cursor. The purge node has two references
-to the clustered index record - one via the ref member, and the
-other via the persistent cursor. These two references must match
-each other if the found_clust flag is set.
-@return true if the stored copy of persistent cursor is consistent
-with the ref member.*/
-bool
-purge_node_t::validate_pcur()
-{
- if (!found_clust) {
- return(true);
- }
-
- if (index == NULL) {
- return(true);
- }
-
- if (index->type == DICT_FTS) {
- return(true);
- }
-
- if (pcur.old_stored != BTR_PCUR_OLD_STORED) {
- return(true);
- }
-
- dict_index_t* clust_index = pcur.btr_cur.index;
-
- ulint* offsets = rec_get_offsets(
- pcur.old_rec, clust_index, NULL, pcur.old_n_fields, &heap);
-
- /* Here we are comparing the purge ref record and the stored initial
- part in persistent cursor. Both cases we store n_uniq fields of the
- cluster index and so it is fine to do the comparison. We note this
- dependency here as pcur and ref belong to different modules. */
- int st = cmp_dtuple_rec(ref, pcur.old_rec, offsets);
-
- if (st != 0) {
- fprintf(stderr, "Purge node pcur validation failed\n");
- dtuple_print(stderr, ref);
- rec_print(stderr, pcur.old_rec, clust_index);
- return(false);
- }
-
- return(true);
-}
-#endif /* UNIV_DEBUG */
diff --git a/storage/xtradb/row/row0quiesce.cc b/storage/xtradb/row/row0quiesce.cc
deleted file mode 100644
index 583fbe60fb3..00000000000
--- a/storage/xtradb/row/row0quiesce.cc
+++ /dev/null
@@ -1,700 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0quiesce.cc
-Quiesce a tablespace.
-
-Created 2012-02-08 by Sunny Bains.
-*******************************************************/
-
-#include "row0quiesce.h"
-#include "row0mysql.h"
-
-#ifdef UNIV_NONINL
-#include "row0quiesce.ic"
-#endif
-
-#include "ibuf0ibuf.h"
-#include "srv0start.h"
-#include "trx0purge.h"
-
-/*********************************************************************//**
-Write the meta data (index user fields) config file.
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_quiesce_write_index_fields(
-/*===========================*/
- const dict_index_t* index, /*!< in: write the meta data for
- this index */
- FILE* file, /*!< in: file to write to */
- THD* thd) /*!< in/out: session */
-{
- byte row[sizeof(ib_uint32_t) * 2];
-
- for (ulint i = 0; i < index->n_fields; ++i) {
- byte* ptr = row;
- const dict_field_t* field = &index->fields[i];
-
- mach_write_to_4(ptr, field->prefix_len);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, field->fixed_len);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_9",
- close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing index fields.");
-
- return(DB_IO_ERROR);
- }
-
- /* Include the NUL byte in the length. */
- ib_uint32_t len = static_cast<ib_uint32_t>(strlen(field->name) + 1);
- ut_a(len > 1);
-
- mach_write_to_4(row, len);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_10",
- close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(len), file) != sizeof(len)
- || fwrite(field->name, 1, len, file) != len) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing index column.");
-
- return(DB_IO_ERROR);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Write the meta data config file index information.
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_quiesce_write_indexes(
-/*======================*/
- const dict_table_t* table, /*!< in: write the meta data for
- this table */
- FILE* file, /*!< in: file to write to */
- THD* thd) /*!< in/out: session */
-{
- {
- byte row[sizeof(ib_uint32_t)];
-
- /* Write the number of indexes in the table. */
- mach_write_to_4(row, UT_LIST_GET_LEN(table->indexes));
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_11",
- close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing index count.");
-
- return(DB_IO_ERROR);
- }
- }
-
- dberr_t err = DB_SUCCESS;
-
- /* Write the index meta data. */
- for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
- index != 0 && err == DB_SUCCESS;
- index = UT_LIST_GET_NEXT(indexes, index)) {
-
- byte* ptr;
- byte row[sizeof(index_id_t)
- + sizeof(ib_uint32_t) * 8];
-
- ptr = row;
-
- ut_ad(sizeof(index_id_t) == 8);
- mach_write_to_8(ptr, index->id);
- ptr += sizeof(index_id_t);
-
- mach_write_to_4(ptr, index->space);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, index->page);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, index->type);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, index->trx_id_offset);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, index->n_user_defined_cols);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, index->n_uniq);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, index->n_nullable);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, index->n_fields);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_12",
- close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing index meta-data.");
-
- return(DB_IO_ERROR);
- }
-
- /* Write the length of the index name.
- NUL byte is included in the length. */
- ib_uint32_t len = static_cast<ib_uint32_t>(strlen(index->name) + 1);
- ut_a(len > 1);
-
- mach_write_to_4(row, len);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_1",
- close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(len), file) != sizeof(len)
- || fwrite(index->name, 1, len, file) != len) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing index name.");
-
- return(DB_IO_ERROR);
- }
-
- err = row_quiesce_write_index_fields(index, file, thd);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Write the meta data (table columns) config file. Serialise the contents of
-dict_col_t structure, along with the column name. All fields are serialized
-as ib_uint32_t.
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_quiesce_write_table(
-/*====================*/
- const dict_table_t* table, /*!< in: write the meta data for
- this table */
- FILE* file, /*!< in: file to write to */
- THD* thd) /*!< in/out: session */
-{
- dict_col_t* col;
- byte row[sizeof(ib_uint32_t) * 7];
-
- col = table->cols;
-
- for (ulint i = 0; i < table->n_cols; ++i, ++col) {
- byte* ptr = row;
-
- mach_write_to_4(ptr, col->prtype);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, col->mtype);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, col->len);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, col->mbminmaxlen);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, col->ind);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, col->ord_part);
- ptr += sizeof(ib_uint32_t);
-
- mach_write_to_4(ptr, col->max_prefix);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_2",
- close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing table column data.");
-
- return(DB_IO_ERROR);
- }
-
- /* Write out the column name as [len, byte array]. The len
- includes the NUL byte. */
- ib_uint32_t len;
- const char* col_name;
-
- col_name = dict_table_get_col_name(table, dict_col_get_no(col));
-
- /* Include the NUL byte in the length. */
- len = static_cast<ib_uint32_t>(strlen(col_name) + 1);
- ut_a(len > 1);
-
- mach_write_to_4(row, len);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_3",
- close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(len), file) != sizeof(len)
- || fwrite(col_name, 1, len, file) != len) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing column name.");
-
- return(DB_IO_ERROR);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Write the meta data config file header.
-@return DB_SUCCESS or error code. */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_quiesce_write_header(
-/*=====================*/
- const dict_table_t* table, /*!< in: write the meta data for
- this table */
- FILE* file, /*!< in: file to write to */
- THD* thd) /*!< in/out: session */
-{
- byte value[sizeof(ib_uint32_t)];
-
- /* Write the meta-data version number. */
- mach_write_to_4(value, IB_EXPORT_CFG_VERSION_V1);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_4", close(fileno(file)););
-
- if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing meta-data version number.");
-
- return(DB_IO_ERROR);
- }
-
- /* Write the server hostname. */
- ib_uint32_t len;
- const char* hostname = server_get_hostname();
-
- /* Play it safe and check for NULL. */
- if (hostname == 0) {
- static const char NullHostname[] = "Hostname unknown";
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Unable to determine server hostname.");
-
- hostname = NullHostname;
- }
-
- /* The server hostname includes the NUL byte. */
- len = static_cast<ib_uint32_t>(strlen(hostname) + 1);
- mach_write_to_4(value, len);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_5", close(fileno(file)););
-
- if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)
- || fwrite(hostname, 1, len, file) != len) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing hostname.");
-
- return(DB_IO_ERROR);
- }
-
- /* The table name includes the NUL byte. */
- ut_a(table->name != 0);
- len = static_cast<ib_uint32_t>(strlen(table->name) + 1);
-
- /* Write the table name. */
- mach_write_to_4(value, len);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_6", close(fileno(file)););
-
- if (fwrite(&value, 1, sizeof(value), file) != sizeof(value)
- || fwrite(table->name, 1, len, file) != len) {
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing table name.");
-
- return(DB_IO_ERROR);
- }
-
- byte row[sizeof(ib_uint32_t) * 3];
-
- /* Write the next autoinc value. */
- mach_write_to_8(row, table->autoinc);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_7", close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(ib_uint64_t), file) != sizeof(ib_uint64_t)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing table autoinc value.");
-
- return(DB_IO_ERROR);
- }
-
- byte* ptr = row;
-
- /* Write the system page size. */
- mach_write_to_4(ptr, UNIV_PAGE_SIZE);
- ptr += sizeof(ib_uint32_t);
-
- /* Write the table->flags. */
- mach_write_to_4(ptr, table->flags);
- ptr += sizeof(ib_uint32_t);
-
- /* Write the number of columns in the table. */
- mach_write_to_4(ptr, table->n_cols);
-
- DBUG_EXECUTE_IF("ib_export_io_write_failure_8", close(fileno(file)););
-
- if (fwrite(row, 1, sizeof(row), file) != sizeof(row)) {
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno),
- "while writing table meta-data.");
-
- return(DB_IO_ERROR);
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Write the table meta data after quiesce.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_quiesce_write_cfg(
-/*==================*/
- dict_table_t* table, /*!< in: write the meta data for
- this table */
- THD* thd) /*!< in/out: session */
-{
- dberr_t err;
- char name[OS_FILE_MAX_PATH];
-
- srv_get_meta_data_filename(table, name, sizeof(name));
-
- ib_logf(IB_LOG_LEVEL_INFO, "Writing table metadata to '%s'", name);
-
- FILE* file = fopen(name, "w+b");
-
- if (file == NULL) {
- ib_errf(thd, IB_LOG_LEVEL_WARN, ER_CANT_CREATE_FILE,
- name, errno, strerror(errno));
-
- err = DB_IO_ERROR;
- } else {
- err = row_quiesce_write_header(table, file, thd);
-
- if (err == DB_SUCCESS) {
- err = row_quiesce_write_table(table, file, thd);
- }
-
- if (err == DB_SUCCESS) {
- err = row_quiesce_write_indexes(table, file, thd);
- }
-
- if (fflush(file) != 0) {
-
- char msg[BUFSIZ];
-
- ut_snprintf(msg, sizeof(msg), "%s flush() failed",
- name);
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno), msg);
- }
-
- if (fclose(file) != 0) {
- char msg[BUFSIZ];
-
- ut_snprintf(msg, sizeof(msg), "%s flose() failed",
- name);
-
- ib_senderrf(
- thd, IB_LOG_LEVEL_WARN, ER_IO_WRITE_ERROR,
- errno, strerror(errno), msg);
- }
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Check whether a table has an FTS index defined on it.
-@return true if an FTS index exists on the table */
-static
-bool
-row_quiesce_table_has_fts_index(
-/*============================*/
- const dict_table_t* table) /*!< in: quiesce this table */
-{
- bool exists = false;
-
- dict_mutex_enter_for_mysql();
-
- for (const dict_index_t* index = UT_LIST_GET_FIRST(table->indexes);
- index != 0;
- index = UT_LIST_GET_NEXT(indexes, index)) {
-
- if (index->type & DICT_FTS) {
- exists = true;
- break;
- }
- }
-
- dict_mutex_exit_for_mysql();
-
- return(exists);
-}
-
-/*********************************************************************//**
-Quiesce the tablespace that the table resides in. */
-UNIV_INTERN
-void
-row_quiesce_table_start(
-/*====================*/
- dict_table_t* table, /*!< in: quiesce this table */
- trx_t* trx) /*!< in/out: transaction/session */
-{
- ut_a(trx->mysql_thd != 0);
- ut_a(srv_n_purge_threads > 0);
- ut_ad(!srv_read_only_mode);
-
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- ut_a(trx->mysql_thd != 0);
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Sync to disk of '%s' started.", table_name);
-
- if (trx_purge_state() != PURGE_STATE_DISABLED) {
- trx_purge_stop();
- }
-
- for (ulint count = 0;
- ibuf_merge_space(table->space) != 0
- && !trx_is_interrupted(trx);
- ++count) {
- if (!(count % 20)) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Merging change buffer entries for '%s'",
- table_name);
- }
- }
-
- if (!trx_is_interrupted(trx)) {
- buf_LRU_flush_or_remove_pages(
- table->space, BUF_REMOVE_FLUSH_WRITE, trx);
-
- if (trx_is_interrupted(trx)) {
-
- ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
-
- } else if (row_quiesce_write_cfg(table, trx->mysql_thd)
- != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "There was an error writing to the "
- "meta data file");
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Table '%s' flushed to disk", table_name);
- }
- } else {
- ib_logf(IB_LOG_LEVEL_WARN, "Quiesce aborted!");
- }
-
- dberr_t err = row_quiesce_set_state(table, QUIESCE_COMPLETE, trx);
- ut_a(err == DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Cleanup after table quiesce. */
-UNIV_INTERN
-void
-row_quiesce_table_complete(
-/*=======================*/
- dict_table_t* table, /*!< in: quiesce this table */
- trx_t* trx) /*!< in/out: transaction/session */
-{
- ulint count = 0;
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- ut_a(trx->mysql_thd != 0);
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- /* We need to wait for the operation to complete if the
- transaction has been killed. */
-
- while (table->quiesce != QUIESCE_COMPLETE) {
-
- /* Print a warning after every minute. */
- if (!(count % 60)) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Waiting for quiesce of '%s' to complete",
- table_name);
- }
-
- /* Sleep for a second. */
- os_thread_sleep(1000000);
-
- ++count;
- }
-
- /* Remove the .cfg file now that the user has resumed
- normal operations. Otherwise it will cause problems when
- the user tries to drop the database (remove directory). */
- char cfg_name[OS_FILE_MAX_PATH];
-
- srv_get_meta_data_filename(table, cfg_name, sizeof(cfg_name));
-
- os_file_delete_if_exists(innodb_file_data_key, cfg_name);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Deleting the meta-data file '%s'", cfg_name);
-
- if (trx_purge_state() != PURGE_STATE_DISABLED) {
- trx_purge_run();
- }
-
- dberr_t err = row_quiesce_set_state(table, QUIESCE_NONE, trx);
- ut_a(err == DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Set a table's quiesce state.
-@return DB_SUCCESS or error code. */
-UNIV_INTERN
-dberr_t
-row_quiesce_set_state(
-/*==================*/
- dict_table_t* table, /*!< in: quiesce this table */
- ib_quiesce_t state, /*!< in: quiesce state to set */
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_a(srv_n_purge_threads > 0);
-
- if (srv_read_only_mode) {
-
- ib_senderrf(trx->mysql_thd,
- IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
-
- return(DB_UNSUPPORTED);
-
- } else if (table->space == TRX_SYS_SPACE) {
-
- char table_name[MAX_FULL_NAME_LEN + 1];
-
- innobase_format_name(
- table_name, sizeof(table_name), table->name, FALSE);
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
- ER_TABLE_IN_SYSTEM_TABLESPACE, table_name);
-
- return(DB_UNSUPPORTED);
- } else if (row_quiesce_table_has_fts_index(table)) {
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
- ER_NOT_SUPPORTED_YET,
- "FLUSH TABLES on tables that have an FTS index. "
- "FTS auxiliary tables will not be flushed.");
-
- } else if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
- /* If this flag is set then the table may not have any active
- FTS indexes but it will still have the auxiliary tables. */
-
- ib_senderrf(trx->mysql_thd, IB_LOG_LEVEL_WARN,
- ER_NOT_SUPPORTED_YET,
- "FLUSH TABLES on a table that had an FTS index, "
- "created on a hidden column, the "
- "auxiliary tables haven't been dropped as yet. "
- "FTS auxiliary tables will not be flushed.");
- }
-
- row_mysql_lock_data_dictionary(trx);
-
- dict_table_x_lock_indexes(table);
-
- switch (state) {
- case QUIESCE_START:
- break;
-
- case QUIESCE_COMPLETE:
- ut_a(table->quiesce == QUIESCE_START);
- break;
-
- case QUIESCE_NONE:
- ut_a(table->quiesce == QUIESCE_COMPLETE);
- break;
- }
-
- table->quiesce = state;
-
- dict_table_x_unlock_indexes(table);
-
- row_mysql_unlock_data_dictionary(trx);
-
- return(DB_SUCCESS);
-}
-
diff --git a/storage/xtradb/row/row0row.cc b/storage/xtradb/row/row0row.cc
deleted file mode 100644
index 96d25e15777..00000000000
--- a/storage/xtradb/row/row0row.cc
+++ /dev/null
@@ -1,1260 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0row.cc
-General row routines
-
-Created 4/20/1996 Heikki Tuuri
-*******************************************************/
-
-#include "row0row.h"
-
-#ifdef UNIV_NONINL
-#include "row0row.ic"
-#endif
-
-#include "data0type.h"
-#include "dict0dict.h"
-#include "btr0btr.h"
-#include "ha_prototypes.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0ext.h"
-#include "row0upd.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-#include "ut0mem.h"
-
-/*****************************************************************//**
-When an insert or purge to a table is performed, this function builds
-the entry to be inserted into or purged from an index on the table.
-@return index entry which should be inserted or purged
-@retval NULL if the externally stored columns in the clustered index record
-are unavailable and ext != NULL, or row is missing some needed columns. */
-UNIV_INTERN
-dtuple_t*
-row_build_index_entry_low(
-/*======================*/
- const dtuple_t* row, /*!< in: row which should be
- inserted or purged */
- const row_ext_t* ext, /*!< in: externally stored column
- prefixes, or NULL */
- dict_index_t* index, /*!< in: index on the table */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory for the index entry
- is allocated */
-{
- dtuple_t* entry;
- ulint entry_len;
- ulint i;
-
- entry_len = dict_index_get_n_fields(index);
- entry = dtuple_create(heap, entry_len);
-
- if (dict_index_is_univ(index)) {
- dtuple_set_n_fields_cmp(entry, entry_len);
- /* There may only be externally stored columns
- in a clustered index B-tree of a user table. */
- ut_a(!ext);
- } else {
- dtuple_set_n_fields_cmp(
- entry, dict_index_get_n_unique_in_tree(index));
- }
-
- for (i = 0; i < entry_len; i++) {
- const dict_field_t* ind_field
- = dict_index_get_nth_field(index, i);
- const dict_col_t* col
- = ind_field->col;
- ulint col_no
- = dict_col_get_no(col);
- dfield_t* dfield
- = dtuple_get_nth_field(entry, i);
- const dfield_t* dfield2
- = dtuple_get_nth_field(row, col_no);
- ulint len;
-
-#if DATA_MISSING != 0
-# error "DATA_MISSING != 0"
-#endif
- if (UNIV_UNLIKELY(dfield_get_type(dfield2)->mtype
- == DATA_MISSING)) {
- /* The field has not been initialized in the row.
- This should be from trx_undo_rec_get_partial_row(). */
- return(NULL);
- }
-
- len = dfield_get_len(dfield2);
-
- dfield_copy(dfield, dfield2);
-
- if (dfield_is_null(dfield)) {
- continue;
- }
-
- if (ind_field->prefix_len == 0
- && (!dfield_is_ext(dfield)
- || dict_index_is_clust(index))) {
- /* The dfield_copy() above suffices for
- columns that are stored in-page, or for
- clustered index record columns that are not
- part of a column prefix in the PRIMARY KEY. */
- continue;
- }
-
- /* If the column is stored externally (off-page) in
- the clustered index, it must be an ordering field in
- the secondary index. In the Antelope format, only
- prefix-indexed columns may be stored off-page in the
- clustered index record. In the Barracuda format, also
- fully indexed long CHAR or VARCHAR columns may be
- stored off-page. */
- ut_ad(col->ord_part);
-
- if (ext) {
- /* See if the column is stored externally. */
- const byte* buf = row_ext_lookup(ext, col_no,
- &len);
- if (UNIV_LIKELY_NULL(buf)) {
- if (UNIV_UNLIKELY(buf == field_ref_zero)) {
- return(NULL);
- }
- dfield_set_data(dfield, buf, len);
- }
-
- if (ind_field->prefix_len == 0) {
- /* In the Barracuda format
- (ROW_FORMAT=DYNAMIC or
- ROW_FORMAT=COMPRESSED), we can have a
- secondary index on an entire column
- that is stored off-page in the
- clustered index. As this is not a
- prefix index (prefix_len == 0),
- include the entire off-page column in
- the secondary index record. */
- continue;
- }
- } else if (dfield_is_ext(dfield)) {
- /* This table is either in Antelope format
- (ROW_FORMAT=REDUNDANT or ROW_FORMAT=COMPACT)
- or a purge record where the ordered part of
- the field is not external.
- In Antelope, the maximum column prefix
- index length is 767 bytes, and the clustered
- index record contains a 768-byte prefix of
- each off-page column. */
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- len -= BTR_EXTERN_FIELD_REF_SIZE;
- dfield_set_len(dfield, len);
- }
-
- /* If a column prefix index, take only the prefix. */
- if (ind_field->prefix_len) {
- len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminmaxlen,
- ind_field->prefix_len, len,
- static_cast<char*>(dfield_get_data(dfield)));
- dfield_set_len(dfield, len);
- }
- }
-
- return(entry);
-}
-
-/*******************************************************************//**
-An inverse function to row_build_index_entry. Builds a row from a
-record in a clustered index.
-@return own: row built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_build(
-/*======*/
- ulint type, /*!< in: ROW_COPY_POINTERS or
- ROW_COPY_DATA; the latter
- copies also the data fields to
- heap while the first only
- places pointers to data fields
- on the index page, and thus is
- more efficient */
- const dict_index_t* index, /*!< in: clustered index */
- const rec_t* rec, /*!< in: record in the clustered
- index; NOTE: in the case
- ROW_COPY_POINTERS the data
- fields in the row will point
- directly into this record,
- therefore, the buffer page of
- this record must be at least
- s-latched and the latch held
- as long as the row dtuple is used! */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index)
- or NULL, in which case this function
- will invoke rec_get_offsets() */
- const dict_table_t* col_table,
- /*!< in: table, to check which
- externally stored columns
- occur in the ordering columns
- of an index, or NULL if
- index->table should be
- consulted instead */
- const dtuple_t* add_cols,
- /*!< in: default values of
- added columns, or NULL */
- const ulint* col_map,/*!< in: mapping of old column
- numbers to new ones, or NULL */
- row_ext_t** ext, /*!< out, own: cache of
- externally stored column
- prefixes, or NULL */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
-{
- const byte* copy;
- dtuple_t* row;
- ulint n_ext_cols;
- ulint* ext_cols = NULL; /* remove warning */
- ulint len;
- byte* buf;
- ulint j;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- ut_ad(index != NULL);
- ut_ad(rec != NULL);
- ut_ad(heap != NULL);
- ut_ad(dict_index_is_clust(index));
- ut_ad(!mutex_own(&trx_sys->mutex));
- ut_ad(!col_map || col_table);
-
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &tmp_heap);
- } else {
- ut_ad(rec_offs_validate(rec, index, offsets));
- }
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- if (rec_offs_any_null_extern(rec, offsets)) {
- /* This condition can occur during crash recovery
- before trx_rollback_active() has completed execution,
- or when a concurrently executing
- row_ins_index_entry_low() has committed the B-tree
- mini-transaction but has not yet managed to restore
- the cursor position for writing the big_rec. */
- ut_a(trx_undo_roll_ptr_is_insert(
- row_get_rec_roll_ptr(rec, index, offsets)));
- }
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- if (type != ROW_COPY_POINTERS) {
- /* Take a copy of rec to heap */
- buf = static_cast<byte*>(
- mem_heap_alloc(heap, rec_offs_size(offsets)));
-
- copy = rec_copy(buf, rec, offsets);
- } else {
- copy = rec;
- }
-
- n_ext_cols = rec_offs_n_extern(offsets);
- if (n_ext_cols) {
- ext_cols = static_cast<ulint*>(
- mem_heap_alloc(heap, n_ext_cols * sizeof *ext_cols));
- }
-
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(copy, index, const_cast<ulint*>(offsets));
-
- if (!col_table) {
- ut_ad(!col_map);
- ut_ad(!add_cols);
- col_table = index->table;
- }
-
- if (add_cols) {
- ut_ad(col_map);
- row = dtuple_copy(add_cols, heap);
- /* dict_table_copy_types() would set the fields to NULL */
- for (ulint i = 0; i < dict_table_get_n_cols(col_table); i++) {
- dict_col_copy_type(
- dict_table_get_nth_col(col_table, i),
- dfield_get_type(dtuple_get_nth_field(row, i)));
- }
- } else {
- row = dtuple_create(heap, dict_table_get_n_cols(col_table));
- dict_table_copy_types(row, col_table);
- }
-
- dtuple_set_info_bits(row, rec_get_info_bits(
- copy, rec_offs_comp(offsets)));
-
- j = 0;
-
- for (ulint i = 0; i < rec_offs_n_fields(offsets); i++) {
- const dict_field_t* ind_field
- = dict_index_get_nth_field(index, i);
-
- if (ind_field->prefix_len) {
- /* Column prefixes can only occur in key
- fields, which cannot be stored externally. For
- a column prefix, there should also be the full
- field in the clustered index tuple. The row
- tuple comprises full fields, not prefixes. */
- ut_ad(!rec_offs_nth_extern(offsets, i));
- continue;
- }
-
- const dict_col_t* col
- = dict_field_get_col(ind_field);
- ulint col_no
- = dict_col_get_no(col);
-
- if (col_map) {
- col_no = col_map[col_no];
-
- if (col_no == ULINT_UNDEFINED) {
- /* dropped column */
- continue;
- }
- }
-
- dfield_t* dfield = dtuple_get_nth_field(row, col_no);
-
- const byte* field = rec_get_nth_field(
- copy, offsets, i, &len);
-
- dfield_set_data(dfield, field, len);
-
- if (rec_offs_nth_extern(offsets, i)) {
- dfield_set_ext(dfield);
-
- col = dict_table_get_nth_col(col_table, col_no);
-
- if (col->ord_part) {
- /* We will have to fetch prefixes of
- externally stored columns that are
- referenced by column prefixes. */
- ext_cols[j++] = col_no;
- }
- }
- }
-
- rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
-
- ut_ad(dtuple_check_typed(row));
-
- if (!ext) {
- /* REDUNDANT and COMPACT formats store a local
- 768-byte prefix of each externally stored
- column. No cache is needed.
-
- During online table rebuild,
- row_log_table_apply_delete_low()
- may use a cache that was set up by
- row_log_table_delete(). */
-
- } else if (j) {
- *ext = row_ext_create(j, ext_cols, index->table->flags, row,
- heap);
- } else {
- *ext = NULL;
- }
-
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- return(row);
-}
-
-/*******************************************************************//**
-Converts an index record to a typed data tuple.
-@return index entry built; does not set info_bits, and the data fields
-in the entry will point directly to rec */
-UNIV_INTERN
-dtuple_t*
-row_rec_to_index_entry_low(
-/*=======================*/
- const rec_t* rec, /*!< in: record in the index */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
-{
- dtuple_t* entry;
- dfield_t* dfield;
- ulint i;
- const byte* field;
- ulint len;
- ulint rec_len;
-
- ut_ad(rec != NULL);
- ut_ad(heap != NULL);
- ut_ad(index != NULL);
- /* Because this function may be invoked by row0merge.cc
- on a record whose header is in different format, the check
- rec_offs_validate(rec, index, offsets) must be avoided here. */
- ut_ad(n_ext);
- *n_ext = 0;
-
- rec_len = rec_offs_n_fields(offsets);
-
- entry = dtuple_create(heap, rec_len);
-
- dtuple_set_n_fields_cmp(entry,
- dict_index_get_n_unique_in_tree(index));
- ut_ad(rec_len == dict_index_get_n_fields(index));
-
- dict_index_copy_types(entry, index, rec_len);
-
- for (i = 0; i < rec_len; i++) {
-
- dfield = dtuple_get_nth_field(entry, i);
- field = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield_set_data(dfield, field, len);
-
- if (rec_offs_nth_extern(offsets, i)) {
- dfield_set_ext(dfield);
- (*n_ext)++;
- }
- }
-
- ut_ad(dtuple_check_typed(entry));
-
- return(entry);
-}
-
-/*******************************************************************//**
-Converts an index record to a typed data tuple. NOTE that externally
-stored (often big) fields are NOT copied to heap.
-@return own: index entry built */
-UNIV_INTERN
-dtuple_t*
-row_rec_to_index_entry(
-/*===================*/
- const rec_t* rec, /*!< in: record in the index */
- const dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec) */
- ulint* n_ext, /*!< out: number of externally
- stored columns */
- mem_heap_t* heap) /*!< in: memory heap from which
- the memory needed is allocated */
-{
- dtuple_t* entry;
- byte* buf;
- const rec_t* copy_rec;
-
- ut_ad(rec != NULL);
- ut_ad(heap != NULL);
- ut_ad(index != NULL);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- /* Take a copy of rec to heap */
- buf = static_cast<byte*>(
- mem_heap_alloc(heap, rec_offs_size(offsets)));
-
- copy_rec = rec_copy(buf, rec, offsets);
-
- rec_offs_make_valid(copy_rec, index, const_cast<ulint*>(offsets));
- entry = row_rec_to_index_entry_low(
- copy_rec, index, offsets, n_ext, heap);
- rec_offs_make_valid(rec, index, const_cast<ulint*>(offsets));
-
- dtuple_set_info_bits(entry,
- rec_get_info_bits(rec, rec_offs_comp(offsets)));
-
- return(entry);
-}
-
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record.
-@return own: row reference built; see the NOTE below! */
-UNIV_INTERN
-dtuple_t*
-row_build_row_ref(
-/*==============*/
- ulint type, /*!< in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
- the former copies also the data fields to
- heap, whereas the latter only places pointers
- to data fields on the index page */
- dict_index_t* index, /*!< in: secondary index */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: in the case ROW_COPY_POINTERS
- the data fields in the row will point
- directly into this record, therefore,
- the buffer page of this record must be
- at least s-latched and the latch held
- as long as the row reference is used! */
- mem_heap_t* heap) /*!< in: memory heap from which the memory
- needed is allocated */
-{
- dict_table_t* table;
- dict_index_t* clust_index;
- dfield_t* dfield;
- dtuple_t* ref;
- const byte* field;
- ulint len;
- ulint ref_len;
- ulint pos;
- byte* buf;
- ulint clust_col_prefix_len;
- ulint i;
- mem_heap_t* tmp_heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(index != NULL);
- ut_ad(rec != NULL);
- ut_ad(heap != NULL);
- ut_ad(!dict_index_is_clust(index));
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &tmp_heap);
- /* Secondary indexes must not contain externally stored columns. */
- ut_ad(!rec_offs_any_extern(offsets));
-
- if (type == ROW_COPY_DATA) {
- /* Take a copy of rec to heap */
-
- buf = static_cast<byte*>(
- mem_heap_alloc(heap, rec_offs_size(offsets)));
-
- rec = rec_copy(buf, rec, offsets);
- /* Avoid a debug assertion in rec_offs_validate(). */
- rec_offs_make_valid(rec, index, offsets);
- }
-
- table = index->table;
-
- clust_index = dict_table_get_first_index(table);
-
- ref_len = dict_index_get_n_unique(clust_index);
-
- ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- dfield_set_data(dfield, field, len);
-
- /* If the primary key contains a column prefix, then the
- secondary index may contain a longer prefix of the same
- column, or the full column, and we must adjust the length
- accordingly. */
-
- clust_col_prefix_len = dict_index_get_nth_field(
- clust_index, i)->prefix_len;
-
- if (clust_col_prefix_len > 0) {
- if (len != UNIV_SQL_NULL) {
-
- const dtype_t* dtype
- = dfield_get_type(dfield);
-
- dfield_set_len(dfield,
- dtype_get_at_most_n_mbchars(
- dtype->prtype,
- dtype->mbminmaxlen,
- clust_col_prefix_len,
- len, (char*) field));
- }
- }
- }
-
- ut_ad(dtuple_check_typed(ref));
- if (tmp_heap) {
- mem_heap_free(tmp_heap);
- }
-
- return(ref);
-}
-
-/*******************************************************************//**
-Builds from a secondary index record a row reference with which we can
-search the clustered index record. */
-UNIV_INTERN
-void
-row_build_row_ref_in_tuple(
-/*=======================*/
- dtuple_t* ref, /*!< in/out: row reference built;
- see the NOTE below! */
- const rec_t* rec, /*!< in: record in the index;
- NOTE: the data fields in ref
- will point directly into this
- record, therefore, the buffer
- page of this record must be at
- least s-latched and the latch
- held as long as the row
- reference is used! */
- const dict_index_t* index, /*!< in: secondary index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index)
- or NULL */
- trx_t* trx) /*!< in: transaction */
-{
- const dict_index_t* clust_index;
- dfield_t* dfield;
- const byte* field;
- ulint len;
- ulint ref_len;
- ulint pos;
- ulint clust_col_prefix_len;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- ut_a(ref);
- ut_a(index);
- ut_a(rec);
- ut_ad(!dict_index_is_clust(index));
-
- if (UNIV_UNLIKELY(!index->table)) {
- fputs("InnoDB: table ", stderr);
-notfound:
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fputs(" for index ", stderr);
- ut_print_name(stderr, trx, FALSE, index->name);
- fputs(" not found\n", stderr);
- ut_error;
- }
-
- clust_index = dict_table_get_first_index(index->table);
-
- if (UNIV_UNLIKELY(!clust_index)) {
- fputs("InnoDB: clust index for table ", stderr);
- goto notfound;
- }
-
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
- } else {
- ut_ad(rec_offs_validate(rec, index, offsets));
- }
-
- /* Secondary indexes must not contain externally stored columns. */
- ut_ad(!rec_offs_any_extern(offsets));
- ref_len = dict_index_get_n_unique(clust_index);
-
- ut_ad(ref_len == dtuple_get_n_fields(ref));
-
- dict_index_copy_types(ref, clust_index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield = dtuple_get_nth_field(ref, i);
-
- pos = dict_index_get_nth_field_pos(index, clust_index, i);
-
- ut_a(pos != ULINT_UNDEFINED);
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
-
- dfield_set_data(dfield, field, len);
-
- /* If the primary key contains a column prefix, then the
- secondary index may contain a longer prefix of the same
- column, or the full column, and we must adjust the length
- accordingly. */
-
- clust_col_prefix_len = dict_index_get_nth_field(
- clust_index, i)->prefix_len;
-
- if (clust_col_prefix_len > 0) {
- if (len != UNIV_SQL_NULL) {
-
- const dtype_t* dtype
- = dfield_get_type(dfield);
-
- dfield_set_len(dfield,
- dtype_get_at_most_n_mbchars(
- dtype->prtype,
- dtype->mbminmaxlen,
- clust_col_prefix_len,
- len, (char*) field));
- }
- }
- }
-
- ut_ad(dtuple_check_typed(ref));
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***************************************************************//**
-Searches the clustered index record for a row, if we have the row reference.
-@return TRUE if found */
-UNIV_INTERN
-ibool
-row_search_on_row_ref(
-/*==================*/
- btr_pcur_t* pcur, /*!< out: persistent cursor, which must
- be closed by the caller */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- const dict_table_t* table, /*!< in: table */
- const dtuple_t* ref, /*!< in: row reference */
- mtr_t* mtr) /*!< in/out: mtr */
-{
- ulint low_match;
- rec_t* rec;
- dict_index_t* index;
-
- ut_ad(dtuple_check_typed(ref));
-
- index = dict_table_get_first_index(table);
-
- ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index));
-
- btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr);
-
- low_match = btr_pcur_get_low_match(pcur);
-
- rec = btr_pcur_get_rec(pcur);
-
- if (page_rec_is_infimum(rec)) {
-
- return(FALSE);
- }
-
- if (low_match != dtuple_get_n_fields(ref)) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Fetches the clustered index record for a secondary index record. The latches
-on the secondary index record are preserved.
-@return record or NULL, if no record found */
-UNIV_INTERN
-rec_t*
-row_get_clust_rec(
-/*==============*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: secondary index */
- dict_index_t** clust_index,/*!< out: clustered index */
- mtr_t* mtr) /*!< in: mtr */
-{
- mem_heap_t* heap;
- dtuple_t* ref;
- dict_table_t* table;
- btr_pcur_t pcur;
- ibool found;
- rec_t* clust_rec;
-
- ut_ad(!dict_index_is_clust(index));
-
- table = index->table;
-
- heap = mem_heap_create(256);
-
- ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap);
-
- found = row_search_on_row_ref(&pcur, mode, table, ref, mtr);
-
- clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL;
-
- mem_heap_free(heap);
-
- btr_pcur_close(&pcur);
-
- *clust_index = dict_table_get_first_index(table);
-
- return(clust_rec);
-}
-
-/***************************************************************//**
-Searches an index record.
-@return whether the record was found or buffered */
-UNIV_INTERN
-enum row_search_result
-row_search_index_entry(
-/*===================*/
- dict_index_t* index, /*!< in: index */
- const dtuple_t* entry, /*!< in: index entry */
- ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */
- btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must
- be closed by the caller */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint n_fields;
- ulint low_match;
- rec_t* rec;
-
- ut_ad(dtuple_check_typed(entry));
-
- btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr);
-
- switch (btr_pcur_get_btr_cur(pcur)->flag) {
- case BTR_CUR_DELETE_REF:
- ut_a(mode & BTR_DELETE);
- return(ROW_NOT_DELETED_REF);
-
- case BTR_CUR_DEL_MARK_IBUF:
- case BTR_CUR_DELETE_IBUF:
- case BTR_CUR_INSERT_TO_IBUF:
- return(ROW_BUFFERED);
-
- case BTR_CUR_HASH:
- case BTR_CUR_HASH_FAIL:
- case BTR_CUR_BINARY:
- break;
- }
-
- low_match = btr_pcur_get_low_match(pcur);
-
- rec = btr_pcur_get_rec(pcur);
-
- n_fields = dtuple_get_n_fields(entry);
-
- if (page_rec_is_infimum(rec)) {
-
- return(ROW_NOT_FOUND);
- } else if (low_match != n_fields) {
-
- return(ROW_NOT_FOUND);
- }
-
- return(ROW_FOUND);
-}
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) that is of
-type DATA_INT using "prtype" and writes the result to "buf".
-If the data is in unknown format, then nothing is written to "buf",
-0 is returned and "format_in_hex" is set to TRUE, otherwise
-"format_in_hex" is left untouched.
-Not more than "buf_size" bytes are written to "buf".
-The result is always '\0'-terminated (provided buf_size > 0) and the
-number of bytes that were written to "buf" is returned (including the
-terminating '\0').
-@return number of bytes that were written */
-static
-ulint
-row_raw_format_int(
-/*===============*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- ulint prtype, /*!< in: precise type */
- char* buf, /*!< out: output buffer */
- ulint buf_size, /*!< in: output buffer size
- in bytes */
- ibool* format_in_hex) /*!< out: should the data be
- formated in hex */
-{
- ulint ret;
-
- if (data_len <= sizeof(ib_uint64_t)) {
-
- ib_uint64_t value;
- ibool unsigned_type = prtype & DATA_UNSIGNED;
-
- value = mach_read_int_type(
- (const byte*) data, data_len, unsigned_type);
-
- ret = ut_snprintf(
- buf, buf_size,
- unsigned_type ? UINT64PF : INT64PF, value) + 1;
- } else {
-
- *format_in_hex = TRUE;
- ret = 0;
- }
-
- return(ut_min(ret, buf_size));
-}
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) that is of
-type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "prtype" and writes the
-result to "buf".
-If the data is in binary format, then nothing is written to "buf",
-0 is returned and "format_in_hex" is set to TRUE, otherwise
-"format_in_hex" is left untouched.
-Not more than "buf_size" bytes are written to "buf".
-The result is always '\0'-terminated (provided buf_size > 0) and the
-number of bytes that were written to "buf" is returned (including the
-terminating '\0').
-@return number of bytes that were written */
-static
-ulint
-row_raw_format_str(
-/*===============*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- ulint prtype, /*!< in: precise type */
- char* buf, /*!< out: output buffer */
- ulint buf_size, /*!< in: output buffer size
- in bytes */
- ibool* format_in_hex) /*!< out: should the data be
- formated in hex */
-{
- ulint charset_coll;
-
- if (buf_size == 0) {
-
- return(0);
- }
-
- /* we assume system_charset_info is UTF-8 */
-
- charset_coll = dtype_get_charset_coll(prtype);
-
- if (UNIV_LIKELY(dtype_is_utf8(prtype))) {
-
- return(ut_str_sql_format(data, data_len, buf, buf_size));
- }
- /* else */
-
- if (charset_coll == DATA_MYSQL_BINARY_CHARSET_COLL) {
-
- *format_in_hex = TRUE;
- return(0);
- }
- /* else */
-
- return(innobase_raw_format(data, data_len, charset_coll,
- buf, buf_size));
-}
-
-/*******************************************************************//**
-Formats the raw data in "data" (in InnoDB on-disk format) using
-"dict_field" and writes the result to "buf".
-Not more than "buf_size" bytes are written to "buf".
-The result is always NUL-terminated (provided buf_size is positive) and the
-number of bytes that were written to "buf" is returned (including the
-terminating NUL).
-@return number of bytes that were written */
-UNIV_INTERN
-ulint
-row_raw_format(
-/*===========*/
- const char* data, /*!< in: raw data */
- ulint data_len, /*!< in: raw data length
- in bytes */
- const dict_field_t* dict_field, /*!< in: index field */
- char* buf, /*!< out: output buffer */
- ulint buf_size) /*!< in: output buffer size
- in bytes */
-{
- ulint mtype;
- ulint prtype;
- ulint ret;
- ibool format_in_hex;
-
- if (buf_size == 0) {
-
- return(0);
- }
-
- if (data_len == UNIV_SQL_NULL) {
-
- ret = ut_snprintf((char*) buf, buf_size, "NULL") + 1;
-
- return(ut_min(ret, buf_size));
- }
-
- mtype = dict_field->col->mtype;
- prtype = dict_field->col->prtype;
-
- format_in_hex = FALSE;
-
- switch (mtype) {
- case DATA_INT:
-
- ret = row_raw_format_int(data, data_len, prtype,
- buf, buf_size, &format_in_hex);
- if (format_in_hex) {
-
- goto format_in_hex;
- }
- break;
- case DATA_CHAR:
- case DATA_VARCHAR:
- case DATA_MYSQL:
- case DATA_VARMYSQL:
-
- ret = row_raw_format_str(data, data_len, prtype,
- buf, buf_size, &format_in_hex);
- if (format_in_hex) {
-
- goto format_in_hex;
- }
-
- break;
- /* XXX support more data types */
- default:
- format_in_hex:
-
- if (UNIV_LIKELY(buf_size > 2)) {
-
- memcpy(buf, "0x", 2);
- buf += 2;
- buf_size -= 2;
- ret = 2 + ut_raw_to_hex(data, data_len,
- buf, buf_size);
- } else {
-
- buf[0] = '\0';
- ret = 1;
- }
- }
-
- return(ret);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-#include "ut0dbg.h"
-
-void
-test_row_raw_format_int()
-{
- ulint ret;
- char buf[128];
- ibool format_in_hex;
- speedo_t speedo;
- ulint i;
-
-#define CALL_AND_TEST(data, data_len, prtype, buf, buf_size,\
- ret_expected, buf_expected, format_in_hex_expected)\
- do {\
- ibool ok = TRUE;\
- ulint i;\
- memset(buf, 'x', 10);\
- buf[10] = '\0';\
- format_in_hex = FALSE;\
- fprintf(stderr, "TESTING \"\\x");\
- for (i = 0; i < data_len; i++) {\
- fprintf(stderr, "%02hhX", data[i]);\
- }\
- fprintf(stderr, "\", %lu, %lu, %lu\n",\
- (ulint) data_len, (ulint) prtype,\
- (ulint) buf_size);\
- ret = row_raw_format_int(data, data_len, prtype,\
- buf, buf_size, &format_in_hex);\
- if (ret != ret_expected) {\
- fprintf(stderr, "expected ret %lu, got %lu\n",\
- (ulint) ret_expected, ret);\
- ok = FALSE;\
- }\
- if (strcmp((char*) buf, buf_expected) != 0) {\
- fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
- buf_expected, buf);\
- ok = FALSE;\
- }\
- if (format_in_hex != format_in_hex_expected) {\
- fprintf(stderr, "expected format_in_hex %d, got %d\n",\
- (int) format_in_hex_expected,\
- (int) format_in_hex);\
- ok = FALSE;\
- }\
- if (ok) {\
- fprintf(stderr, "OK: %lu, \"%s\" %d\n\n",\
- (ulint) ret, buf, (int) format_in_hex);\
- } else {\
- return;\
- }\
- } while (0)
-
-#if 1
- /* min values for signed 1-8 byte integers */
-
- CALL_AND_TEST("\x00", 1, 0,
- buf, sizeof(buf), 5, "-128", 0);
-
- CALL_AND_TEST("\x00\x00", 2, 0,
- buf, sizeof(buf), 7, "-32768", 0);
-
- CALL_AND_TEST("\x00\x00\x00", 3, 0,
- buf, sizeof(buf), 9, "-8388608", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00", 4, 0,
- buf, sizeof(buf), 12, "-2147483648", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, 0,
- buf, sizeof(buf), 14, "-549755813888", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, 0,
- buf, sizeof(buf), 17, "-140737488355328", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, 0,
- buf, sizeof(buf), 19, "-36028797018963968", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, 0,
- buf, sizeof(buf), 21, "-9223372036854775808", 0);
-
- /* min values for unsigned 1-8 byte integers */
-
- CALL_AND_TEST("\x00", 1, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00", 2, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00", 3, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00", 4, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00", 5, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00", 6, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00", 7, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x00\x00\x00", 8, DATA_UNSIGNED,
- buf, sizeof(buf), 2, "0", 0);
-
- /* max values for signed 1-8 byte integers */
-
- CALL_AND_TEST("\xFF", 1, 0,
- buf, sizeof(buf), 4, "127", 0);
-
- CALL_AND_TEST("\xFF\xFF", 2, 0,
- buf, sizeof(buf), 6, "32767", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF", 3, 0,
- buf, sizeof(buf), 8, "8388607", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, 0,
- buf, sizeof(buf), 11, "2147483647", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, 0,
- buf, sizeof(buf), 13, "549755813887", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, 0,
- buf, sizeof(buf), 16, "140737488355327", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, 0,
- buf, sizeof(buf), 18, "36028797018963967", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, 0,
- buf, sizeof(buf), 20, "9223372036854775807", 0);
-
- /* max values for unsigned 1-8 byte integers */
-
- CALL_AND_TEST("\xFF", 1, DATA_UNSIGNED,
- buf, sizeof(buf), 4, "255", 0);
-
- CALL_AND_TEST("\xFF\xFF", 2, DATA_UNSIGNED,
- buf, sizeof(buf), 6, "65535", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF", 3, DATA_UNSIGNED,
- buf, sizeof(buf), 9, "16777215", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF", 4, DATA_UNSIGNED,
- buf, sizeof(buf), 11, "4294967295", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF", 5, DATA_UNSIGNED,
- buf, sizeof(buf), 14, "1099511627775", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF", 6, DATA_UNSIGNED,
- buf, sizeof(buf), 16, "281474976710655", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 7, DATA_UNSIGNED,
- buf, sizeof(buf), 18, "72057594037927935", 0);
-
- CALL_AND_TEST("\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8, DATA_UNSIGNED,
- buf, sizeof(buf), 21, "18446744073709551615", 0);
-
- /* some random values */
-
- CALL_AND_TEST("\x52", 1, 0,
- buf, sizeof(buf), 4, "-46", 0);
-
- CALL_AND_TEST("\x0E", 1, DATA_UNSIGNED,
- buf, sizeof(buf), 3, "14", 0);
-
- CALL_AND_TEST("\x62\xCE", 2, 0,
- buf, sizeof(buf), 6, "-7474", 0);
-
- CALL_AND_TEST("\x29\xD6", 2, DATA_UNSIGNED,
- buf, sizeof(buf), 6, "10710", 0);
-
- CALL_AND_TEST("\x7F\xFF\x90", 3, 0,
- buf, sizeof(buf), 5, "-112", 0);
-
- CALL_AND_TEST("\x00\xA1\x16", 3, DATA_UNSIGNED,
- buf, sizeof(buf), 6, "41238", 0);
-
- CALL_AND_TEST("\x7F\xFF\xFF\xF7", 4, 0,
- buf, sizeof(buf), 3, "-9", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x5C", 4, DATA_UNSIGNED,
- buf, sizeof(buf), 3, "92", 0);
-
- CALL_AND_TEST("\x7F\xFF\xFF\xFF\xFF\xFF\xDC\x63", 8, 0,
- buf, sizeof(buf), 6, "-9117", 0);
-
- CALL_AND_TEST("\x00\x00\x00\x00\x00\x01\x64\x62", 8, DATA_UNSIGNED,
- buf, sizeof(buf), 6, "91234", 0);
-#endif
-
- /* speed test */
-
- speedo_reset(&speedo);
-
- for (i = 0; i < 1000000; i++) {
- row_raw_format_int("\x23", 1,
- 0, buf, sizeof(buf),
- &format_in_hex);
- row_raw_format_int("\x23", 1,
- DATA_UNSIGNED, buf, sizeof(buf),
- &format_in_hex);
-
- row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8,
- 0, buf, sizeof(buf),
- &format_in_hex);
- row_raw_format_int("\x00\x00\x00\x00\x00\x01\x64\x62", 8,
- DATA_UNSIGNED, buf, sizeof(buf),
- &format_in_hex);
- }
-
- speedo_show(&speedo);
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc
deleted file mode 100644
index 8e3ed3d1a4e..00000000000
--- a/storage/xtradb/row/row0sel.cc
+++ /dev/null
@@ -1,5521 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/***************************************************//**
-@file row/row0sel.cc
-Select
-
-Created 12/19/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0sel.h"
-
-#ifdef UNIV_NONINL
-#include "row0sel.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0undo.h"
-#include "trx0trx.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "btr0sea.h"
-#include "mach0data.h"
-#include "que0que.h"
-#include "row0upd.h"
-#include "row0row.h"
-#include "row0vers.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "eval0eval.h"
-#include "pars0sym.h"
-#include "pars0pars.h"
-#include "row0mysql.h"
-#include "read0read.h"
-#include "buf0lru.h"
-#include "srv0srv.h"
-#include "ha_prototypes.h"
-#include "srv0start.h"
-#include "m_string.h" /* for my_sys.h */
-#include "my_sys.h" /* DEBUG_SYNC_C */
-#include "fil0fil.h"
-
-#include "my_compare.h" /* enum icp_result */
-
-#include <vector>
-
-/* Maximum number of rows to prefetch; MySQL interface has another parameter */
-#define SEL_MAX_N_PREFETCH 16
-
-/* Number of rows fetched, after which to start prefetching; MySQL interface
-has another parameter */
-#define SEL_PREFETCH_LIMIT 1
-
-/* When a select has accessed about this many pages, it returns control back
-to que_run_threads: this is to allow canceling runaway queries */
-
-#define SEL_COST_LIMIT 100
-
-/* Flags for search shortcut */
-#define SEL_FOUND 0
-#define SEL_EXHAUSTED 1
-#define SEL_RETRY 2
-
-/********************************************************************//**
-Returns TRUE if the user-defined column in a secondary index record
-is alphabetically the same as the corresponding BLOB column in the clustered
-index record.
-NOTE: the comparison is NOT done as a binary comparison, but character
-fields are compared with collation!
-@return TRUE if the columns are equal */
-static
-ibool
-row_sel_sec_rec_is_for_blob(
-/*========================*/
- ulint mtype, /*!< in: main type */
- ulint prtype, /*!< in: precise type */
- ulint mbminmaxlen, /*!< in: minimum and maximum length of
- a multi-byte character */
- const byte* clust_field, /*!< in: the locally stored part of
- the clustered index column, including
- the BLOB pointer; the clustered
- index record must be covered by
- a lock or a page latch to protect it
- against deletion (rollback or purge) */
- ulint clust_len, /*!< in: length of clust_field */
- const byte* sec_field, /*!< in: column in secondary index */
- ulint sec_len, /*!< in: length of sec_field */
- ulint prefix_len, /*!< in: index column prefix length
- in bytes */
- dict_table_t* table) /*!< in: table */
-{
- ulint len;
- byte buf[REC_VERSION_56_MAX_INDEX_COL_LEN];
- ulint zip_size = dict_tf_get_zip_size(table->flags);
-
- /* This function should never be invoked on an Antelope format
- table, because they should always contain enough prefix in the
- clustered index record. */
- ut_ad(dict_table_get_format(table) >= UNIV_FORMAT_B);
- ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE);
- ut_ad(prefix_len >= sec_len);
- ut_ad(prefix_len > 0);
- ut_a(prefix_len <= sizeof buf);
-
- if (UNIV_UNLIKELY
- (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE,
- field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE))) {
- /* The externally stored field was not written yet.
- This record should only be seen by
- recv_recovery_rollback_active() or any
- TRX_ISO_READ_UNCOMMITTED transactions. */
- return(FALSE);
- }
-
- len = btr_copy_externally_stored_field_prefix(buf, prefix_len,
- zip_size,
- clust_field, clust_len,
- NULL);
-
- if (UNIV_UNLIKELY(len == 0)) {
- /* The BLOB was being deleted as the server crashed.
- There should not be any secondary index records
- referring to this clustered index record, because
- btr_free_externally_stored_field() is called after all
- secondary index entries of the row have been purged. */
- return(FALSE);
- }
-
- len = dtype_get_at_most_n_mbchars(prtype, mbminmaxlen,
- prefix_len, len, (const char*) buf);
-
- return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
-}
-
-/********************************************************************//**
-Returns TRUE if the user-defined column values in a secondary index record
-are alphabetically the same as the corresponding columns in the clustered
-index record.
-NOTE: the comparison is NOT done as a binary comparison, but character
-fields are compared with collation!
-@return TRUE if the secondary record is equal to the corresponding
-fields in the clustered record, when compared with collation;
-FALSE if not equal or if the clustered record has been marked for deletion */
-static
-ibool
-row_sel_sec_rec_is_for_clust_rec(
-/*=============================*/
- const rec_t* sec_rec, /*!< in: secondary index record */
- dict_index_t* sec_index, /*!< in: secondary index */
- const rec_t* clust_rec, /*!< in: clustered index record;
- must be protected by a lock or
- a page latch against deletion
- in rollback or purge */
- dict_index_t* clust_index) /*!< in: clustered index */
-{
- const byte* sec_field;
- ulint sec_len;
- const byte* clust_field;
- ulint n;
- ulint i;
- mem_heap_t* heap = NULL;
- ulint clust_offsets_[REC_OFFS_NORMAL_SIZE];
- ulint sec_offsets_[REC_OFFS_SMALL_SIZE];
- ulint* clust_offs = clust_offsets_;
- ulint* sec_offs = sec_offsets_;
- ibool is_equal = TRUE;
-
- rec_offs_init(clust_offsets_);
- rec_offs_init(sec_offsets_);
-
- if (rec_get_deleted_flag(clust_rec,
- dict_table_is_comp(clust_index->table))) {
-
- /* The clustered index record is delete-marked;
- it is not visible in the read view. Besides,
- if there are any externally stored columns,
- some of them may have already been purged. */
- return(FALSE);
- }
-
- clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
- ULINT_UNDEFINED, &heap);
- sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
- ULINT_UNDEFINED, &heap);
-
- n = dict_index_get_n_ordering_defined_by_user(sec_index);
-
- for (i = 0; i < n; i++) {
- const dict_field_t* ifield;
- const dict_col_t* col;
- ulint clust_pos;
- ulint clust_len;
- ulint len;
-
- ifield = dict_index_get_nth_field(sec_index, i);
- col = dict_field_get_col(ifield);
- clust_pos = dict_col_get_clust_pos(col, clust_index);
-
- clust_field = rec_get_nth_field(
- clust_rec, clust_offs, clust_pos, &clust_len);
- sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
-
- len = clust_len;
-
- if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL
- && sec_len != UNIV_SQL_NULL) {
-
- if (rec_offs_nth_extern(clust_offs, clust_pos)) {
- len -= BTR_EXTERN_FIELD_REF_SIZE;
- }
-
- len = dtype_get_at_most_n_mbchars(
- col->prtype, col->mbminmaxlen,
- ifield->prefix_len, len, (char*) clust_field);
-
- if (rec_offs_nth_extern(clust_offs, clust_pos)
- && len < sec_len) {
- if (!row_sel_sec_rec_is_for_blob(
- col->mtype, col->prtype,
- col->mbminmaxlen,
- clust_field, clust_len,
- sec_field, sec_len,
- ifield->prefix_len,
- clust_index->table)) {
- goto inequal;
- }
-
- continue;
- }
- }
-
- if (0 != cmp_data_data(col->mtype, col->prtype,
- clust_field, len,
- sec_field, sec_len)) {
-inequal:
- is_equal = FALSE;
- goto func_exit;
- }
- }
-
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(is_equal);
-}
-
-/*********************************************************************//**
-Creates a select node struct.
-@return own: select node struct */
-UNIV_INTERN
-sel_node_t*
-sel_node_create(
-/*============*/
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- sel_node_t* node;
-
- node = static_cast<sel_node_t*>(
- mem_heap_alloc(heap, sizeof(sel_node_t)));
-
- node->common.type = QUE_NODE_SELECT;
- node->state = SEL_NODE_OPEN;
-
- node->plans = NULL;
-
- return(node);
-}
-
-/*********************************************************************//**
-Frees the memory private to a select node when a query graph is freed,
-does not free the heap where the node was originally created. */
-UNIV_INTERN
-void
-sel_node_free_private(
-/*==================*/
- sel_node_t* node) /*!< in: select node struct */
-{
- ulint i;
- plan_t* plan;
-
- if (node->plans != NULL) {
- for (i = 0; i < node->n_tables; i++) {
- plan = sel_node_get_nth_plan(node, i);
-
- btr_pcur_close(&(plan->pcur));
- btr_pcur_close(&(plan->clust_pcur));
-
- if (plan->old_vers_heap) {
- mem_heap_free(plan->old_vers_heap);
- }
- }
- }
-}
-
-/*********************************************************************//**
-Evaluates the values in a select list. If there are aggregate functions,
-their argument value is added to the aggregate total. */
-UNIV_INLINE
-void
-sel_eval_select_list(
-/*=================*/
- sel_node_t* node) /*!< in: select node */
-{
- que_node_t* exp;
-
- exp = node->select_list;
-
- while (exp) {
- eval_exp(exp);
-
- exp = que_node_get_next(exp);
- }
-}
-
-/*********************************************************************//**
-Assigns the values in the select list to the possible into-variables in
-SELECT ... INTO ... */
-UNIV_INLINE
-void
-sel_assign_into_var_values(
-/*=======================*/
- sym_node_t* var, /*!< in: first variable in a list of
- variables */
- sel_node_t* node) /*!< in: select node */
-{
- que_node_t* exp;
-
- if (var == NULL) {
-
- return;
- }
-
- for (exp = node->select_list;
- var != 0;
- var = static_cast<sym_node_t*>(que_node_get_next(var))) {
-
- ut_ad(exp);
-
- eval_node_copy_val(var->alias, exp);
-
- exp = que_node_get_next(exp);
- }
-}
-
-/*********************************************************************//**
-Resets the aggregate value totals in the select list of an aggregate type
-query. */
-UNIV_INLINE
-void
-sel_reset_aggregate_vals(
-/*=====================*/
- sel_node_t* node) /*!< in: select node */
-{
- func_node_t* func_node;
-
- ut_ad(node->is_aggregate);
-
- for (func_node = static_cast<func_node_t*>(node->select_list);
- func_node != 0;
- func_node = static_cast<func_node_t*>(
- que_node_get_next(func_node))) {
-
- eval_node_set_int_val(func_node, 0);
- }
-
- node->aggregate_already_fetched = FALSE;
-}
-
-/*********************************************************************//**
-Copies the input variable values when an explicit cursor is opened. */
-UNIV_INLINE
-void
-row_sel_copy_input_variable_vals(
-/*=============================*/
- sel_node_t* node) /*!< in: select node */
-{
- sym_node_t* var;
-
- var = UT_LIST_GET_FIRST(node->copy_variables);
-
- while (var) {
- eval_node_copy_val(var, var->alias);
-
- var->indirection = NULL;
-
- var = UT_LIST_GET_NEXT(col_var_list, var);
- }
-}
-
-/*********************************************************************//**
-Fetches the column values from a record. */
-static
-void
-row_sel_fetch_columns(
-/*==================*/
- dict_index_t* index, /*!< in: record index */
- const rec_t* rec, /*!< in: record in a clustered or non-clustered
- index; must be protected by a page latch */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- sym_node_t* column) /*!< in: first column in a column list, or
- NULL */
-{
- dfield_t* val;
- ulint index_type;
- ulint field_no;
- const byte* data;
- ulint len;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (dict_index_is_clust(index)) {
- index_type = SYM_CLUST_FIELD_NO;
- } else {
- index_type = SYM_SEC_FIELD_NO;
- }
-
- while (column) {
- mem_heap_t* heap = NULL;
- ibool needs_copy;
-
- field_no = column->field_nos[index_type];
-
- if (field_no != ULINT_UNDEFINED) {
-
- if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
- field_no))) {
-
- /* Copy an externally stored field to the
- temporary heap, if possible. */
-
- heap = mem_heap_create(1);
-
- data = btr_rec_copy_externally_stored_field(
- rec, offsets,
- dict_table_zip_size(index->table),
- field_no, &len, heap, NULL);
-
- /* data == NULL means that the
- externally stored field was not
- written yet. This record
- should only be seen by
- recv_recovery_rollback_active() or any
- TRX_ISO_READ_UNCOMMITTED
- transactions. The InnoDB SQL parser
- (the sole caller of this function)
- does not implement READ UNCOMMITTED,
- and it is not involved during rollback. */
- ut_a(data);
- ut_a(len != UNIV_SQL_NULL);
-
- needs_copy = TRUE;
- } else {
- data = rec_get_nth_field(rec, offsets,
- field_no, &len);
-
- needs_copy = column->copy_val;
- }
-
- if (needs_copy) {
- eval_node_copy_and_alloc_val(column, data,
- len);
- } else {
- val = que_node_get_val(column);
- dfield_set_data(val, data, len);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- }
-
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-}
-
-/*********************************************************************//**
-Allocates a prefetch buffer for a column when prefetch is first time done. */
-static
-void
-sel_col_prefetch_buf_alloc(
-/*=======================*/
- sym_node_t* column) /*!< in: symbol table node for a column */
-{
- sel_buf_t* sel_buf;
- ulint i;
-
- ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
-
- column->prefetch_buf = static_cast<sel_buf_t*>(
- mem_alloc(SEL_MAX_N_PREFETCH * sizeof(sel_buf_t)));
-
- for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
- sel_buf = column->prefetch_buf + i;
-
- sel_buf->data = NULL;
- sel_buf->len = 0;
- sel_buf->val_buf_size = 0;
- }
-}
-
-/*********************************************************************//**
-Frees a prefetch buffer for a column, including the dynamically allocated
-memory for data stored there. */
-UNIV_INTERN
-void
-sel_col_prefetch_buf_free(
-/*======================*/
- sel_buf_t* prefetch_buf) /*!< in, own: prefetch buffer */
-{
- sel_buf_t* sel_buf;
- ulint i;
-
- for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
- sel_buf = prefetch_buf + i;
-
- if (sel_buf->val_buf_size > 0) {
-
- mem_free(sel_buf->data);
- }
- }
-
- mem_free(prefetch_buf);
-}
-
-/*********************************************************************//**
-Pops the column values for a prefetched, cached row from the column prefetch
-buffers and places them to the val fields in the column nodes. */
-static
-void
-sel_dequeue_prefetched_row(
-/*=======================*/
- plan_t* plan) /*!< in: plan node for a table */
-{
- sym_node_t* column;
- sel_buf_t* sel_buf;
- dfield_t* val;
- byte* data;
- ulint len;
- ulint val_buf_size;
-
- ut_ad(plan->n_rows_prefetched > 0);
-
- column = UT_LIST_GET_FIRST(plan->columns);
-
- while (column) {
- val = que_node_get_val(column);
-
- if (!column->copy_val) {
- /* We did not really push any value for the
- column */
-
- ut_ad(!column->prefetch_buf);
- ut_ad(que_node_get_val_buf_size(column) == 0);
- ut_d(dfield_set_null(val));
-
- goto next_col;
- }
-
- ut_ad(column->prefetch_buf);
- ut_ad(!dfield_is_ext(val));
-
- sel_buf = column->prefetch_buf + plan->first_prefetched;
-
- data = sel_buf->data;
- len = sel_buf->len;
- val_buf_size = sel_buf->val_buf_size;
-
- /* We must keep track of the allocated memory for
- column values to be able to free it later: therefore
- we swap the values for sel_buf and val */
-
- sel_buf->data = static_cast<byte*>(dfield_get_data(val));
- sel_buf->len = dfield_get_len(val);
- sel_buf->val_buf_size = que_node_get_val_buf_size(column);
-
- dfield_set_data(val, data, len);
- que_node_set_val_buf_size(column, val_buf_size);
-next_col:
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-
- plan->n_rows_prefetched--;
-
- plan->first_prefetched++;
-}
-
-/*********************************************************************//**
-Pushes the column values for a prefetched, cached row to the column prefetch
-buffers from the val fields in the column nodes. */
-UNIV_INLINE
-void
-sel_enqueue_prefetched_row(
-/*=======================*/
- plan_t* plan) /*!< in: plan node for a table */
-{
- sym_node_t* column;
- sel_buf_t* sel_buf;
- dfield_t* val;
- byte* data;
- ulint len;
- ulint pos;
- ulint val_buf_size;
-
- if (plan->n_rows_prefetched == 0) {
- pos = 0;
- plan->first_prefetched = 0;
- } else {
- pos = plan->n_rows_prefetched;
-
- /* We have the convention that pushing new rows starts only
- after the prefetch stack has been emptied: */
-
- ut_ad(plan->first_prefetched == 0);
- }
-
- plan->n_rows_prefetched++;
-
- ut_ad(pos < SEL_MAX_N_PREFETCH);
-
- for (column = UT_LIST_GET_FIRST(plan->columns);
- column != 0;
- column = UT_LIST_GET_NEXT(col_var_list, column)) {
-
- if (!column->copy_val) {
- /* There is no sense to push pointers to database
- page fields when we do not keep latch on the page! */
- continue;
- }
-
- if (!column->prefetch_buf) {
- /* Allocate a new prefetch buffer */
-
- sel_col_prefetch_buf_alloc(column);
- }
-
- sel_buf = column->prefetch_buf + pos;
-
- val = que_node_get_val(column);
-
- data = static_cast<byte*>(dfield_get_data(val));
- len = dfield_get_len(val);
- val_buf_size = que_node_get_val_buf_size(column);
-
- /* We must keep track of the allocated memory for
- column values to be able to free it later: therefore
- we swap the values for sel_buf and val */
-
- dfield_set_data(val, sel_buf->data, sel_buf->len);
- que_node_set_val_buf_size(column, sel_buf->val_buf_size);
-
- sel_buf->data = data;
- sel_buf->len = len;
- sel_buf->val_buf_size = val_buf_size;
- }
-}
-
-/*********************************************************************//**
-Builds a previous version of a clustered index record for a consistent read
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_sel_build_prev_vers(
-/*====================*/
- read_view_t* read_view, /*!< in: read view */
- dict_index_t* index, /*!< in: plan node for table */
- rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
- rec_get_offsets(rec, plan->index) */
- mem_heap_t** offset_heap, /*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t** old_vers_heap, /*!< out: old version heap to use */
- rec_t** old_vers, /*!< out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /*!< in: mtr */
-{
- dberr_t err;
-
- if (*old_vers_heap) {
- mem_heap_empty(*old_vers_heap);
- } else {
- *old_vers_heap = mem_heap_create(512);
- }
-
- err = row_vers_build_for_consistent_read(
- rec, mtr, index, offsets, read_view, offset_heap,
- *old_vers_heap, old_vers);
- return(err);
-}
-
-/*********************************************************************//**
-Builds the last committed version of a clustered index record for a
-semi-consistent read. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_sel_build_committed_vers_for_mysql(
-/*===================================*/
- dict_index_t* clust_index, /*!< in: clustered index */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
- const rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
- rec_get_offsets(rec, clust_index) */
- mem_heap_t** offset_heap, /*!< in/out: memory heap from which
- the offsets are allocated */
- const rec_t** old_vers, /*!< out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /*!< in: mtr */
-{
- if (prebuilt->old_vers_heap) {
- mem_heap_empty(prebuilt->old_vers_heap);
- } else {
- prebuilt->old_vers_heap = mem_heap_create(
- rec_offs_size(*offsets));
- }
-
- row_vers_build_for_semi_consistent_read(
- rec, mtr, clust_index, offsets, offset_heap,
- prebuilt->old_vers_heap, old_vers);
-}
-
-/*********************************************************************//**
-Tests the conditions which determine when the index segment we are searching
-through has been exhausted.
-@return TRUE if row passed the tests */
-UNIV_INLINE
-ibool
-row_sel_test_end_conds(
-/*===================*/
- plan_t* plan) /*!< in: plan for the table; the column values must
- already have been retrieved and the right sides of
- comparisons evaluated */
-{
- func_node_t* cond;
-
- /* All conditions in end_conds are comparisons of a column to an
- expression */
-
- for (cond = UT_LIST_GET_FIRST(plan->end_conds);
- cond != 0;
- cond = UT_LIST_GET_NEXT(cond_list, cond)) {
-
- /* Evaluate the left side of the comparison, i.e., get the
- column value if there is an indirection */
-
- eval_sym(static_cast<sym_node_t*>(cond->args));
-
- /* Do the comparison */
-
- if (!eval_cmp(cond)) {
-
- return(FALSE);
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Tests the other conditions.
-@return TRUE if row passed the tests */
-UNIV_INLINE
-ibool
-row_sel_test_other_conds(
-/*=====================*/
- plan_t* plan) /*!< in: plan for the table; the column values must
- already have been retrieved */
-{
- func_node_t* cond;
-
- cond = UT_LIST_GET_FIRST(plan->other_conds);
-
- while (cond) {
- eval_exp(cond);
-
- if (!eval_node_get_ibool_val(cond)) {
-
- return(FALSE);
- }
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Retrieves the clustered index record corresponding to a record in a
-non-clustered index. Does the necessary locking.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_sel_get_clust_rec(
-/*==================*/
- sel_node_t* node, /*!< in: select_node */
- plan_t* plan, /*!< in: plan node for table */
- rec_t* rec, /*!< in: record in a non-clustered index */
- que_thr_t* thr, /*!< in: query thread */
- rec_t** out_rec,/*!< out: clustered record or an old version of
- it, NULL if the old version did not exist
- in the read view, i.e., it was a fresh
- inserted version */
- mtr_t* mtr) /*!< in: mtr used to get access to the
- non-clustered record; the same mtr is used to
- access the clustered index */
-{
- dict_index_t* index;
- rec_t* clust_rec;
- rec_t* old_vers;
- dberr_t err;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- *out_rec = NULL;
-
- offsets = rec_get_offsets(rec,
- btr_pcur_get_btr_cur(&plan->pcur)->index,
- offsets, ULINT_UNDEFINED, &heap);
-
- row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
-
- index = dict_table_get_first_index(plan->table);
-
- btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
- BTR_SEARCH_LEAF, &plan->clust_pcur,
- 0, mtr);
-
- clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
-
- /* Note: only if the search ends up on a non-infimum record is the
- low_match value the real match to the search tuple */
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(&(plan->clust_pcur))
- < dict_index_get_n_unique(index)) {
-
- ut_a(rec_get_deleted_flag(rec,
- dict_table_is_comp(plan->table)));
- ut_a(node->read_view);
-
- /* In a rare case it is possible that no clust rec is found
- for a delete-marked secondary index record: if in row0umod.cc
- in row_undo_mod_remove_clust_low() we have already removed
- the clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case we know that the
- clustered index record did not exist in the read view of
- trx. */
-
- goto func_exit;
- }
-
- offsets = rec_get_offsets(clust_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!node->read_view) {
- /* Try to place a lock on the index record */
- ulint lock_type;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED or lower isolation level
- we lock only the record, i.e., next-key locking is
- not used. */
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = lock_clust_rec_read_check_and_lock(
- 0, btr_pcur_get_block(&plan->clust_pcur),
- clust_rec, index, offsets,
- static_cast<enum lock_mode>(node->row_lock_mode),
- lock_type,
- thr);
-
- switch (err) {
- case DB_SUCCESS:
- case DB_SUCCESS_LOCKED_REC:
- /* Declare the variable uninitialized in Valgrind.
- It should be set to DB_SUCCESS at func_exit. */
- UNIV_MEM_INVALID(&err, sizeof err);
- break;
- default:
- goto err_exit;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- old_vers = NULL;
-
- if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
- node->read_view)) {
-
- err = row_sel_build_prev_vers(
- node->read_view, index, clust_rec,
- &offsets, &heap, &plan->old_vers_heap,
- &old_vers, mtr);
-
- if (err != DB_SUCCESS) {
-
- goto err_exit;
- }
-
- clust_rec = old_vers;
-
- if (clust_rec == NULL) {
- goto func_exit;
- }
- }
-
- /* If we had to go to an earlier version of row or the
- secondary index record is delete marked, then it may be that
- the secondary index record corresponding to clust_rec
- (or old_vers) is not rec; in that case we must ignore
- such row because in our snapshot rec would not have existed.
- Remember that from rec we cannot see directly which transaction
- id corresponds to it: we have to go to the clustered index
- record. A query where we want to fetch all rows where
- the secondary index value is in some interval would return
- a wrong result if we would not drop rows which we come to
- visit through secondary index records that would not really
- exist in our snapshot. */
-
- if ((old_vers
- || rec_get_deleted_flag(rec, dict_table_is_comp(
- plan->table)))
- && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
- clust_rec, index)) {
- goto func_exit;
- }
- }
-
- /* Fetch the columns needed in test conditions. The clustered
- index record is protected by a page latch that was acquired
- when plan->clust_pcur was positioned. The latch will not be
- released until mtr_commit(mtr). */
-
- ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
- row_sel_fetch_columns(index, clust_rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
- *out_rec = clust_rec;
-func_exit:
- err = DB_SUCCESS;
-err_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/*********************************************************************//**
-Sets a lock on a record.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
-UNIV_INLINE
-dberr_t
-sel_set_rec_lock(
-/*=============*/
- const buf_block_t* block, /*!< in: buffer block of rec */
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint mode, /*!< in: lock mode */
- ulint type, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
- LOC_REC_NOT_GAP */
- que_thr_t* thr) /*!< in: query thread */
-{
- trx_t* trx;
- dberr_t err;
-
- trx = thr_get_trx(thr);
-
- if (UT_LIST_GET_LEN(trx->lock.trx_locks) > 10000) {
- if (buf_LRU_buf_pool_running_out()) {
-
- return(DB_LOCK_TABLE_FULL);
- }
- }
-
- if (dict_index_is_clust(index)) {
- err = lock_clust_rec_read_check_and_lock(
- 0, block, rec, index, offsets,
- static_cast<enum lock_mode>(mode), type, thr);
- } else {
- err = lock_sec_rec_read_check_and_lock(
- 0, block, rec, index, offsets,
- static_cast<enum lock_mode>(mode), type, thr);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Opens a pcur to a table index. */
-static
-void
-row_sel_open_pcur(
-/*==============*/
- plan_t* plan, /*!< in: table plan */
- ibool search_latch_locked,
- /*!< in: TRUE if the thread currently
- has the search latch locked in
- s-mode */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- func_node_t* cond;
- que_node_t* exp;
- ulint n_fields;
- ulint has_search_latch = 0; /* RW_S_LATCH or 0 */
- ulint i;
-
- if (search_latch_locked) {
- has_search_latch = RW_S_LATCH;
- }
-
- index = plan->index;
-
- /* Calculate the value of the search tuple: the exact match columns
- get their expressions evaluated when we evaluate the right sides of
- end_conds */
-
- cond = UT_LIST_GET_FIRST(plan->end_conds);
-
- while (cond) {
- eval_exp(que_node_get_next(cond->args));
-
- cond = UT_LIST_GET_NEXT(cond_list, cond);
- }
-
- if (plan->tuple) {
- n_fields = dtuple_get_n_fields(plan->tuple);
-
- if (plan->n_exact_match < n_fields) {
- /* There is a non-exact match field which must be
- evaluated separately */
-
- eval_exp(plan->tuple_exps[n_fields - 1]);
- }
-
- for (i = 0; i < n_fields; i++) {
- exp = plan->tuple_exps[i];
-
- dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
- que_node_get_val(exp));
- }
-
- /* Open pcur to the index */
-
- btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
- BTR_SEARCH_LEAF, &plan->pcur,
- has_search_latch, mtr);
- } else {
- /* Open the cursor to the start or the end of the index
- (FALSE: no init) */
-
- btr_pcur_open_at_index_side(plan->asc, index, BTR_SEARCH_LEAF,
- &(plan->pcur), false, 0, mtr);
- }
-
- ut_ad(plan->n_rows_prefetched == 0);
- ut_ad(plan->n_rows_fetched == 0);
- ut_ad(plan->cursor_at_end == FALSE);
-
- plan->pcur_is_open = TRUE;
-}
-
-/*********************************************************************//**
-Restores a stored pcur position to a table index.
-@return TRUE if the cursor should be moved to the next record after we
-return from this function (moved to the previous, in the case of a
-descending cursor) without processing again the current cursor
-record */
-static
-ibool
-row_sel_restore_pcur_pos(
-/*=====================*/
- plan_t* plan, /*!< in: table plan */
- mtr_t* mtr) /*!< in: mtr */
-{
- ibool equal_position;
- ulint relative_position;
-
- ut_ad(!plan->cursor_at_end);
-
- relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
-
- equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF,
- &(plan->pcur), mtr);
-
- /* If the cursor is traveling upwards, and relative_position is
-
- (1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
- yet on the successor of the page infimum;
- (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
- first record GREATER than the predecessor of a page supremum; we have
- not yet processed the cursor record: no need to move the cursor to the
- next record;
- (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
- last record LESS or EQUAL to the old stored user record; (a) if
- equal_position is FALSE, this means that the cursor is now on a record
- less than the old user record, and we must move to the next record;
- (b) if equal_position is TRUE, then if
- plan->stored_cursor_rec_processed is TRUE, we must move to the next
- record, else there is no need to move the cursor. */
-
- if (plan->asc) {
- if (relative_position == BTR_PCUR_ON) {
-
- if (equal_position) {
-
- return(plan->stored_cursor_rec_processed);
- }
-
- return(TRUE);
- }
-
- ut_ad(relative_position == BTR_PCUR_AFTER
- || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
-
- return(FALSE);
- }
-
- /* If the cursor is traveling downwards, and relative_position is
-
- (1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
- the last record LESS than the successor of a page infimum; we have not
- processed the cursor record: no need to move the cursor;
- (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
- first record GREATER than the predecessor of a page supremum; we have
- processed the cursor record: we should move the cursor to the previous
- record;
- (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
- last record LESS or EQUAL to the old stored user record; (a) if
- equal_position is FALSE, this means that the cursor is now on a record
- less than the old user record, and we need not move to the previous
- record; (b) if equal_position is TRUE, then if
- plan->stored_cursor_rec_processed is TRUE, we must move to the previous
- record, else there is no need to move the cursor. */
-
- if (relative_position == BTR_PCUR_BEFORE
- || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
-
- return(FALSE);
- }
-
- if (relative_position == BTR_PCUR_ON) {
-
- if (equal_position) {
-
- return(plan->stored_cursor_rec_processed);
- }
-
- return(FALSE);
- }
-
- ut_ad(relative_position == BTR_PCUR_AFTER
- || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Resets a plan cursor to a closed state. */
-UNIV_INLINE
-void
-plan_reset_cursor(
-/*==============*/
- plan_t* plan) /*!< in: plan */
-{
- plan->pcur_is_open = FALSE;
- plan->cursor_at_end = FALSE;
- plan->n_rows_fetched = 0;
- plan->n_rows_prefetched = 0;
-}
-
-/*********************************************************************//**
-Tries to do a shortcut to fetch a clustered index record with a unique key,
-using the hash index if possible (not always).
-@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
-static
-ulint
-row_sel_try_search_shortcut(
-/*========================*/
- sel_node_t* node, /*!< in: select node for a consistent read */
- plan_t* plan, /*!< in: plan for a unique search in clustered
- index */
- ibool search_latch_locked,
- /*!< in: whether the search holds
- btr_search_latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_index_t* index;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ulint ret;
- rec_offs_init(offsets_);
-
- index = plan->index;
-
- ut_ad(node->read_view);
- ut_ad(plan->unique_search);
- ut_ad(!plan->must_get_clust);
-#ifdef UNIV_SYNC_DEBUG
- if (search_latch_locked) {
- ut_ad(rw_lock_own(btr_search_get_latch(index),
- RW_LOCK_SHARED));
- }
-#endif /* UNIV_SYNC_DEBUG */
-
- row_sel_open_pcur(plan, search_latch_locked, mtr);
-
- rec = btr_pcur_get_rec(&(plan->pcur));
-
- if (!page_rec_is_user_rec(rec)) {
-
- return(SEL_RETRY);
- }
-
- ut_ad(plan->mode == PAGE_CUR_GE);
-
- /* As the cursor is now placed on a user record after a search with
- the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
- fields in the user record matched to the search tuple */
-
- if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
-
- return(SEL_EXHAUSTED);
- }
-
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (dict_index_is_clust(index)) {
- if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
- node->read_view)) {
- ret = SEL_RETRY;
- goto func_exit;
- }
- } else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
-
- ret = SEL_RETRY;
- goto func_exit;
- }
-
- /* Test the deleted flag. */
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
-
- ret = SEL_EXHAUSTED;
- goto func_exit;
- }
-
- /* Fetch the columns needed in test conditions. The index
- record is protected by a page latch that was acquired when
- plan->pcur was positioned. The latch will not be released
- until mtr_commit(mtr). */
-
- row_sel_fetch_columns(index, rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
-
- /* Test the rest of search conditions */
-
- if (!row_sel_test_other_conds(plan)) {
-
- ret = SEL_EXHAUSTED;
- goto func_exit;
- }
-
- ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
-
- plan->n_rows_fetched++;
- ret = SEL_FOUND;
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(ret);
-}
-
-/*********************************************************************//**
-Performs a select step.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_sel(
-/*====*/
- sel_node_t* node, /*!< in: select node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_index_t* index;
- plan_t* plan;
- mtr_t mtr;
- ibool moved;
- rec_t* rec;
- rec_t* old_vers;
- rec_t* clust_rec;
- ibool search_latch_locked;
- ibool consistent_read;
-
- /* The following flag becomes TRUE when we are doing a
- consistent read from a non-clustered index and we must look
- at the clustered index to find out the previous delete mark
- state of the non-clustered record: */
-
- ibool cons_read_requires_clust_rec = FALSE;
- ulint cost_counter = 0;
- ibool cursor_just_opened;
- ibool must_go_to_next;
- ibool mtr_has_extra_clust_latch = FALSE;
- /* TRUE if the search was made using
- a non-clustered index, and we had to
- access the clustered record: now &mtr
- contains a clustered index latch, and
- &mtr must be committed before we move
- to the next non-clustered record */
- ulint found_flag;
- dberr_t err;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- ut_ad(thr->run_node == node);
-
- search_latch_locked = FALSE;
-
- if (node->read_view) {
- /* In consistent reads, we try to do with the hash index and
- not to use the buffer page get. This is to reduce memory bus
- load resulting from semaphore operations. The search latch
- will be s-locked when we access an index with a unique search
- condition, but not locked when we access an index with a
- less selective search condition. */
-
- consistent_read = TRUE;
- } else {
- consistent_read = FALSE;
- }
-
-table_loop:
- /* TABLE LOOP
- ----------
- This is the outer major loop in calculating a join. We come here when
- node->fetch_table changes, and after adding a row to aggregate totals
- and, of course, when this function is called. */
-
- ut_ad(mtr_has_extra_clust_latch == FALSE);
-
- plan = sel_node_get_nth_plan(node, node->fetch_table);
- index = plan->index;
-
- if (plan->n_rows_prefetched > 0) {
- sel_dequeue_prefetched_row(plan);
-
- goto next_table_no_mtr;
- }
-
- if (plan->cursor_at_end) {
- /* The cursor has already reached the result set end: no more
- rows to process for this table cursor, as also the prefetch
- stack was empty */
-
- ut_ad(plan->pcur_is_open);
-
- goto table_exhausted_no_mtr;
- }
-
- /* Open a cursor to index, or restore an open cursor position */
-
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- if (consistent_read && plan->unique_search && !plan->pcur_is_open
- && !plan->must_get_clust
- && !plan->table->big_rows) {
- if (!search_latch_locked) {
- rw_lock_s_lock(btr_search_get_latch(index));
-
- search_latch_locked = TRUE;
- } else if (rw_lock_get_writer(btr_search_get_latch(index))
- == RW_LOCK_WAIT_EX) {
-
- /* There is an x-latch request waiting: release the
- s-latch for a moment; as an s-latch here is often
- kept for some 10 searches before being released,
- a waiting x-latch request would block other threads
- from acquiring an s-latch for a long time, lowering
- performance significantly in multiprocessors. */
-
- rw_lock_s_unlock(btr_search_get_latch(index));
- rw_lock_s_lock(btr_search_get_latch(index));
- }
-
- found_flag = row_sel_try_search_shortcut(node, plan,
- search_latch_locked,
- &mtr);
-
- if (found_flag == SEL_FOUND) {
-
- goto next_table;
-
- } else if (found_flag == SEL_EXHAUSTED) {
-
- goto table_exhausted;
- }
-
- ut_ad(found_flag == SEL_RETRY);
-
- plan_reset_cursor(plan);
-
- mtr_commit(&mtr);
- mtr_start_trx(&mtr, thr_get_trx(thr));
- }
-
- if (search_latch_locked) {
- rw_lock_s_unlock(btr_search_get_latch(index));
-
- search_latch_locked = FALSE;
- }
-
- if (!plan->pcur_is_open) {
- /* Evaluate the expressions to build the search tuple and
- open the cursor */
-
- row_sel_open_pcur(plan, search_latch_locked, &mtr);
-
- cursor_just_opened = TRUE;
-
- /* A new search was made: increment the cost counter */
- cost_counter++;
- } else {
- /* Restore pcur position to the index */
-
- must_go_to_next = row_sel_restore_pcur_pos(plan, &mtr);
-
- cursor_just_opened = FALSE;
-
- if (must_go_to_next) {
- /* We have already processed the cursor record: move
- to the next */
-
- goto next_rec;
- }
- }
-
-rec_loop:
- /* RECORD LOOP
- -----------
- In this loop we use pcur and try to fetch a qualifying row, and
- also fill the prefetch buffer for this table if n_rows_fetched has
- exceeded a threshold. While we are inside this loop, the following
- holds:
- (1) &mtr is started,
- (2) pcur is positioned and open.
-
- NOTE that if cursor_just_opened is TRUE here, it means that we came
- to this point right after row_sel_open_pcur. */
-
- ut_ad(mtr_has_extra_clust_latch == FALSE);
-
- rec = btr_pcur_get_rec(&(plan->pcur));
-
- /* PHASE 1: Set a lock if specified */
-
- if (!node->asc && cursor_just_opened
- && !page_rec_is_supremum(rec)) {
-
- /* When we open a cursor for a descending search, we must set
- a next-key lock on the successor record: otherwise it would
- be possible to insert new records next to the cursor position,
- and it might be that these new records should appear in the
- search result set, resulting in the phantom problem. */
-
- if (!consistent_read) {
- rec_t* next_rec = page_rec_get_next(rec);
- ulint lock_type;
- trx_t* trx;
-
- trx = thr_get_trx(thr);
-
- offsets = rec_get_offsets(next_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED or lower isolation
- level, we lock only the record, i.e., next-key
- locking is not used. */
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level
- <= TRX_ISO_READ_COMMITTED) {
-
- if (page_rec_is_supremum(next_rec)) {
-
- goto skip_lock;
- }
-
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
- next_rec, index, offsets,
- node->row_lock_mode,
- lock_type, thr);
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- err = DB_SUCCESS;
- case DB_SUCCESS:
- break;
- default:
- /* Note that in this case we will store in pcur
- the PREDECESSOR of the record we are waiting
- the lock for */
- goto lock_wait_or_error;
- }
- }
- }
-
-skip_lock:
- if (page_rec_is_infimum(rec)) {
-
- /* The infimum record on a page cannot be in the result set,
- and neither can a record lock be placed on it: we skip such
- a record. We also increment the cost counter as we may have
- processed yet another page of index. */
-
- cost_counter++;
-
- goto next_rec;
- }
-
- if (!consistent_read) {
- /* Try to place a lock on the index record */
- ulint lock_type;
- trx_t* trx;
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- trx = thr_get_trx(thr);
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using READ COMMITTED or lower isolation level,
- we lock only the record, i.e., next-key locking is
- not used. */
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
-
- if (page_rec_is_supremum(rec)) {
-
- goto next_rec;
- }
-
- lock_type = LOCK_REC_NOT_GAP;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
- rec, index, offsets,
- node->row_lock_mode, lock_type, thr);
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- err = DB_SUCCESS;
- case DB_SUCCESS:
- break;
- default:
- goto lock_wait_or_error;
- }
- }
-
- if (page_rec_is_supremum(rec)) {
-
- /* A page supremum record cannot be in the result set: skip
- it now when we have placed a possible lock on it */
-
- goto next_rec;
- }
-
- ut_ad(page_rec_is_user_rec(rec));
-
- if (cost_counter > SEL_COST_LIMIT) {
-
- /* Now that we have placed the necessary locks, we can stop
- for a while and store the cursor position; NOTE that if we
- would store the cursor position BEFORE placing a record lock,
- it might happen that the cursor would jump over some records
- that another transaction could meanwhile insert adjacent to
- the cursor: this would result in the phantom problem. */
-
- goto stop_for_a_while;
- }
-
- /* PHASE 2: Check a mixed index mix id if needed */
-
- if (plan->unique_search && cursor_just_opened) {
-
- ut_ad(plan->mode == PAGE_CUR_GE);
-
- /* As the cursor is now placed on a user record after a search
- with the mode PAGE_CUR_GE, the up_match field in the cursor
- tells how many fields in the user record matched to the search
- tuple */
-
- if (btr_pcur_get_up_match(&(plan->pcur))
- < plan->n_exact_match) {
- goto table_exhausted;
- }
-
- /* Ok, no need to test end_conds or mix id */
-
- }
-
- /* We are ready to look at a possible new index entry in the result
- set: the cursor is now placed on a user record */
-
- /* PHASE 3: Get previous version in a consistent read */
-
- cons_read_requires_clust_rec = FALSE;
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (consistent_read) {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- if (dict_index_is_clust(index)) {
-
- if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
- node->read_view)) {
-
- err = row_sel_build_prev_vers(
- node->read_view, index, rec,
- &offsets, &heap, &plan->old_vers_heap,
- &old_vers, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- if (old_vers == NULL) {
- /* The record does not exist
- in our read view. Skip it, but
- first attempt to determine
- whether the index segment we
- are searching through has been
- exhausted. */
-
- offsets = rec_get_offsets(
- rec, index, offsets,
- ULINT_UNDEFINED, &heap);
-
- /* Fetch the columns needed in
- test conditions. The clustered
- index record is protected by a
- page latch that was acquired
- by row_sel_open_pcur() or
- row_sel_restore_pcur_pos().
- The latch will not be released
- until mtr_commit(mtr). */
-
- row_sel_fetch_columns(
- index, rec, offsets,
- UT_LIST_GET_FIRST(
- plan->columns));
-
- if (!row_sel_test_end_conds(plan)) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- rec = old_vers;
- }
- } else if (!lock_sec_rec_cons_read_sees(rec,
- node->read_view)) {
- cons_read_requires_clust_rec = TRUE;
- }
- }
-
- /* PHASE 4: Test search end conditions and deleted flag */
-
- /* Fetch the columns needed in test conditions. The record is
- protected by a page latch that was acquired by
- row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch
- will not be released until mtr_commit(mtr). */
-
- row_sel_fetch_columns(index, rec, offsets,
- UT_LIST_GET_FIRST(plan->columns));
-
- /* Test the selection end conditions: these can only contain columns
- which already are found in the index, even though the index might be
- non-clustered */
-
- if (plan->unique_search && cursor_just_opened) {
-
- /* No test necessary: the test was already made above */
-
- } else if (!row_sel_test_end_conds(plan)) {
-
- goto table_exhausted;
- }
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))
- && !cons_read_requires_clust_rec) {
-
- /* The record is delete marked: we can skip it if this is
- not a consistent read which might see an earlier version
- of a non-clustered index record */
-
- if (plan->unique_search) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- /* PHASE 5: Get the clustered index record, if needed and if we did
- not do the search using the clustered index */
-
- if (plan->must_get_clust || cons_read_requires_clust_rec) {
-
- /* It was a non-clustered index and we must fetch also the
- clustered index record */
-
- err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
- &mtr);
- mtr_has_extra_clust_latch = TRUE;
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- /* Retrieving the clustered record required a search:
- increment the cost counter */
-
- cost_counter++;
-
- if (clust_rec == NULL) {
- /* The record did not exist in the read view */
- ut_ad(consistent_read);
-
- goto next_rec;
- }
-
- if (rec_get_deleted_flag(clust_rec,
- dict_table_is_comp(plan->table))) {
-
- /* The record is delete marked: we can skip it */
-
- goto next_rec;
- }
-
- if (node->can_get_updated) {
-
- btr_pcur_store_position(&(plan->clust_pcur), &mtr);
- }
- }
-
- /* PHASE 6: Test the rest of search conditions */
-
- if (!row_sel_test_other_conds(plan)) {
-
- if (plan->unique_search) {
-
- goto table_exhausted;
- }
-
- goto next_rec;
- }
-
- /* PHASE 7: We found a new qualifying row for the current table; push
- the row if prefetch is on, or move to the next table in the join */
-
- plan->n_rows_fetched++;
-
- ut_ad(plan->pcur.latch_mode == BTR_SEARCH_LEAF);
-
- if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
- || plan->unique_search || plan->no_prefetch
- || plan->table->big_rows) {
-
- /* No prefetch in operation: go to the next table */
-
- goto next_table;
- }
-
- sel_enqueue_prefetched_row(plan);
-
- if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
-
- /* The prefetch buffer is now full */
-
- sel_dequeue_prefetched_row(plan);
-
- goto next_table;
- }
-
-next_rec:
- ut_ad(!search_latch_locked);
-
- if (mtr_has_extra_clust_latch) {
-
- /* We must commit &mtr if we are moving to the next
- non-clustered index record, because we could break the
- latching order if we would access a different clustered
- index page right away without releasing the previous. */
-
- goto commit_mtr_for_a_while;
- }
-
- if (node->asc) {
- moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
- } else {
- moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
- }
-
- if (!moved) {
-
- goto table_exhausted;
- }
-
- cursor_just_opened = FALSE;
-
- /* END OF RECORD LOOP
- ------------------ */
- goto rec_loop;
-
-next_table:
- /* We found a record which satisfies the conditions: we can move to
- the next table or return a row in the result set */
-
- ut_ad(btr_pcur_is_on_user_rec(&plan->pcur));
-
- if (plan->unique_search && !node->can_get_updated) {
-
- plan->cursor_at_end = TRUE;
- } else {
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = TRUE;
-
- btr_pcur_store_position(&(plan->pcur), &mtr);
- }
-
- mtr_commit(&mtr);
-
- mtr_has_extra_clust_latch = FALSE;
-
-next_table_no_mtr:
- /* If we use 'goto' to this label, it means that the row was popped
- from the prefetched rows stack, and &mtr is already committed */
-
- if (node->fetch_table + 1 == node->n_tables) {
-
- sel_eval_select_list(node);
-
- if (node->is_aggregate) {
-
- goto table_loop;
- }
-
- sel_assign_into_var_values(node->into_list, node);
-
- thr->run_node = que_node_get_parent(node);
-
- err = DB_SUCCESS;
- goto func_exit;
- }
-
- node->fetch_table++;
-
- /* When we move to the next table, we first reset the plan cursor:
- we do not care about resetting it when we backtrack from a table */
-
- plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
-
- goto table_loop;
-
-table_exhausted:
- /* The table cursor pcur reached the result set end: backtrack to the
- previous table in the join if we do not have cached prefetched rows */
-
- plan->cursor_at_end = TRUE;
-
- mtr_commit(&mtr);
-
- mtr_has_extra_clust_latch = FALSE;
-
- if (plan->n_rows_prefetched > 0) {
- /* The table became exhausted during a prefetch */
-
- sel_dequeue_prefetched_row(plan);
-
- goto next_table_no_mtr;
- }
-
-table_exhausted_no_mtr:
- if (node->fetch_table == 0) {
- err = DB_SUCCESS;
-
- if (node->is_aggregate && !node->aggregate_already_fetched) {
-
- node->aggregate_already_fetched = TRUE;
-
- sel_assign_into_var_values(node->into_list, node);
-
- thr->run_node = que_node_get_parent(node);
- } else {
- node->state = SEL_NODE_NO_MORE_ROWS;
-
- thr->run_node = que_node_get_parent(node);
- }
-
- goto func_exit;
- }
-
- node->fetch_table--;
-
- goto table_loop;
-
-stop_for_a_while:
- /* Return control for a while to que_run_threads, so that runaway
- queries can be canceled. NOTE that when we come here, we must, in a
- locking read, have placed the necessary (possibly waiting request)
- record lock on the cursor record or its successor: when we reposition
- the cursor, this record lock guarantees that nobody can meanwhile have
- inserted new records which should have appeared in the result set,
- which would result in the phantom problem. */
-
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = FALSE;
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
- err = DB_SUCCESS;
- goto func_exit;
-
-commit_mtr_for_a_while:
- /* Stores the cursor position and commits &mtr; this is used if
- &mtr may contain latches which would break the latching order if
- &mtr would not be committed and the latches released. */
-
- plan->stored_cursor_rec_processed = TRUE;
-
- ut_ad(!search_latch_locked);
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
- mtr_has_extra_clust_latch = FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
-
- goto table_loop;
-
-lock_wait_or_error:
- /* See the note at stop_for_a_while: the same holds for this case */
-
- ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc);
- ut_ad(!search_latch_locked);
-
- plan->stored_cursor_rec_processed = FALSE;
- btr_pcur_store_position(&(plan->pcur), &mtr);
-
- mtr_commit(&mtr);
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_except_dict());
-#endif /* UNIV_SYNC_DEBUG */
-
-func_exit:
- if (search_latch_locked) {
- rw_lock_s_unlock(btr_search_get_latch(index));
- }
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/**********************************************************************//**
-Performs a select step. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_sel_step(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- sel_node_t* node;
-
- ut_ad(thr);
-
- node = static_cast<sel_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
-
- /* If this is a new time this node is executed (or when execution
- resumes after wait for a table intention lock), set intention locks
- on the tables, or assign a read view */
-
- if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
-
- node->state = SEL_NODE_OPEN;
- }
-
- if (node->state == SEL_NODE_OPEN) {
-
- /* It may be that the current session has not yet started
- its transaction, or it has been committed: */
-
- trx_start_if_not_started_xa(thr_get_trx(thr));
-
- plan_reset_cursor(sel_node_get_nth_plan(node, 0));
-
- if (node->consistent_read) {
- /* Assign a read view for the query */
- node->read_view = trx_assign_read_view(
- thr_get_trx(thr));
- } else {
- sym_node_t* table_node;
- enum lock_mode i_lock_mode;
-
- if (node->set_x_locks) {
- i_lock_mode = LOCK_IX;
- } else {
- i_lock_mode = LOCK_IS;
- }
-
- for (table_node = node->table_list;
- table_node != 0;
- table_node = static_cast<sym_node_t*>(
- que_node_get_next(table_node))) {
-
- dberr_t err = lock_table(
- 0, table_node->table, i_lock_mode,
- thr);
-
- if (err != DB_SUCCESS) {
- trx_t* trx;
-
- trx = thr_get_trx(thr);
- trx->error_state = err;
-
- return(NULL);
- }
- }
- }
-
- /* If this is an explicit cursor, copy stored procedure
- variable values, so that the values cannot change between
- fetches (currently, we copy them also for non-explicit
- cursors) */
-
- if (node->explicit_cursor
- && UT_LIST_GET_FIRST(node->copy_variables)) {
-
- row_sel_copy_input_variable_vals(node);
- }
-
- node->state = SEL_NODE_FETCH;
- node->fetch_table = 0;
-
- if (node->is_aggregate) {
- /* Reset the aggregate total values */
- sel_reset_aggregate_vals(node);
- }
- }
-
- dberr_t err = row_sel(node, thr);
-
- /* NOTE! if queries are parallelized, the following assignment may
- have problems; the assignment should be made only if thr is the
- only top-level thr in the graph: */
-
- thr->graph->last_sel_node = node;
-
- if (err != DB_SUCCESS) {
- thr_get_trx(thr)->error_state = err;
-
- return(NULL);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Performs a fetch for a cursor.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-fetch_step(
-/*=======*/
- que_thr_t* thr) /*!< in: query thread */
-{
- sel_node_t* sel_node;
- fetch_node_t* node;
-
- ut_ad(thr);
-
- node = static_cast<fetch_node_t*>(thr->run_node);
- sel_node = node->cursor_def;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
-
- if (thr->prev_node != que_node_get_parent(node)) {
-
- if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
-
- if (node->into_list) {
- sel_assign_into_var_values(node->into_list,
- sel_node);
- } else {
- ibool ret = (*node->func->func)(
- sel_node, node->func->arg);
-
- if (!ret) {
- sel_node->state
- = SEL_NODE_NO_MORE_ROWS;
- }
- }
- }
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
- }
-
- /* Make the fetch node the parent of the cursor definition for
- the time of the fetch, so that execution knows to return to this
- fetch node after a row has been selected or we know that there is
- no row left */
-
- sel_node->common.parent = node;
-
- if (sel_node->state == SEL_NODE_CLOSED) {
- fprintf(stderr,
- "InnoDB: Error: fetch called on a closed cursor\n");
-
- thr_get_trx(thr)->error_state = DB_ERROR;
-
- return(NULL);
- }
-
- thr->run_node = sel_node;
-
- return(thr);
-}
-
-/****************************************************************//**
-Sample callback function for fetch that prints each row.
-@return always returns non-NULL */
-UNIV_INTERN
-void*
-row_fetch_print(
-/*============*/
- void* row, /*!< in: sel_node_t* */
- void* user_arg) /*!< in: not used */
-{
- que_node_t* exp;
- ulint i = 0;
- sel_node_t* node = static_cast<sel_node_t*>(row);
-
- UT_NOT_USED(user_arg);
-
- fprintf(stderr, "row_fetch_print: row %p\n", row);
-
- for (exp = node->select_list;
- exp != 0;
- exp = que_node_get_next(exp), i++) {
-
- dfield_t* dfield = que_node_get_val(exp);
- const dtype_t* type = dfield_get_type(dfield);
-
- fprintf(stderr, " column %lu:\n", (ulong) i);
-
- dtype_print(type);
- putc('\n', stderr);
-
- if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
- ut_print_buf(stderr, dfield_get_data(dfield),
- dfield_get_len(dfield));
- putc('\n', stderr);
- } else {
- fputs(" <NULL>;\n", stderr);
- }
- }
-
- return((void*)42);
-}
-
-/***********************************************************//**
-Prints a row in a select result.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_printf_step(
-/*============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- row_printf_node_t* node;
- sel_node_t* sel_node;
- que_node_t* arg;
-
- ut_ad(thr);
-
- node = static_cast<row_printf_node_t*>(thr->run_node);
-
- sel_node = node->sel_node;
-
- ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
-
- if (thr->prev_node == que_node_get_parent(node)) {
-
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch next row to print */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
-
- if (sel_node->state != SEL_NODE_FETCH) {
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to print */
-
- thr->run_node = que_node_get_parent(node);
-
- return(thr);
- }
-
- arg = sel_node->select_list;
-
- while (arg) {
- dfield_print_also_hex(que_node_get_val(arg));
-
- fputs(" ::: ", stderr);
-
- arg = que_node_get_next(arg);
- }
-
- putc('\n', stderr);
-
- /* Fetch next row to print */
-
- thr->run_node = sel_node;
-
- return(thr);
-}
-
-/****************************************************************//**
-Converts a key value stored in MySQL format to an Innobase dtuple. The last
-field of the key value may be just a prefix of a fixed length field: hence
-the parameter key_len. But currently we do not allow search keys where the
-last field is only a prefix of the full key field len and print a warning if
-such appears. A counterpart of this function is
-ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-UNIV_INTERN
-void
-row_sel_convert_mysql_key_to_innobase(
-/*==================================*/
- dtuple_t* tuple, /*!< in/out: tuple where to build;
- NOTE: we assume that the type info
- in the tuple is already according
- to index! */
- byte* buf, /*!< in: buffer to use in field
- conversions; NOTE that dtuple->data
- may end up pointing inside buf so
- do not discard that buffer while
- the tuple is being used. See
- row_mysql_store_col_in_innobase_format()
- in the case of DATA_INT */
- ulint buf_len, /*!< in: buffer length */
- dict_index_t* index, /*!< in: index of the key value */
- const byte* key_ptr, /*!< in: MySQL key value */
- ulint key_len, /*!< in: MySQL key value length */
- trx_t* trx) /*!< in: transaction */
-{
- byte* original_buf = buf;
- const byte* original_key_ptr = key_ptr;
- dict_field_t* field;
- dfield_t* dfield;
- ulint data_offset;
- ulint data_len;
- ulint data_field_len;
- ibool is_null;
- const byte* key_end;
- ulint n_fields = 0;
-
- /* For documentation of the key value storage format in MySQL, see
- ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
-
- key_end = key_ptr + key_len;
-
- /* Permit us to access any field in the tuple (ULINT_MAX): */
-
- dtuple_set_n_fields(tuple, ULINT_MAX);
-
- dfield = dtuple_get_nth_field(tuple, 0);
- field = dict_index_get_nth_field(index, 0);
-
- if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
- /* A special case: we are looking for a position in the
- generated clustered index which InnoDB automatically added
- to a table with no primary key: the first and the only
- ordering column is ROW_ID which InnoDB stored to the key_ptr
- buffer. */
-
- ut_a(key_len == DATA_ROW_ID_LEN);
-
- dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
-
- dtuple_set_n_fields(tuple, 1);
-
- return;
- }
-
- while (key_ptr < key_end) {
-
- ulint type = dfield_get_type(dfield)->mtype;
- ut_a(field->col->mtype == type);
-
- data_offset = 0;
- is_null = FALSE;
-
- if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
- /* The first byte in the field tells if this is
- an SQL NULL value */
-
- data_offset = 1;
-
- if (*key_ptr != 0) {
- dfield_set_null(dfield);
-
- is_null = TRUE;
- }
- }
-
- /* Calculate data length and data field total length */
-
- if (type == DATA_BLOB) {
- /* The key field is a column prefix of a BLOB or
- TEXT */
-
- ut_a(field->prefix_len > 0);
-
- /* MySQL stores the actual data length to the first 2
- bytes after the optional SQL NULL marker byte. The
- storage format is little-endian, that is, the most
- significant byte at a higher address. In UTF-8, MySQL
- seems to reserve field->prefix_len bytes for
- storing this field in the key value buffer, even
- though the actual value only takes data_len bytes
- from the start. */
-
- data_len = key_ptr[data_offset]
- + 256 * key_ptr[data_offset + 1];
- data_field_len = data_offset + 2 + field->prefix_len;
-
- data_offset += 2;
-
- /* Now that we know the length, we store the column
- value like it would be a fixed char field */
-
- } else if (field->prefix_len > 0) {
- /* Looks like MySQL pads unused end bytes in the
- prefix with space. Therefore, also in UTF-8, it is ok
- to compare with a prefix containing full prefix_len
- bytes, and no need to take at most prefix_len / 3
- UTF-8 characters from the start.
- If the prefix is used as the upper end of a LIKE
- 'abc%' query, then MySQL pads the end with chars
- 0xff. TODO: in that case does it any harm to compare
- with the full prefix_len bytes. How do characters
- 0xff in UTF-8 behave? */
-
- data_len = field->prefix_len;
- data_field_len = data_offset + data_len;
- } else {
- data_len = dfield_get_type(dfield)->len;
- data_field_len = data_offset + data_len;
- }
-
- if (UNIV_UNLIKELY
- (dtype_get_mysql_type(dfield_get_type(dfield))
- == DATA_MYSQL_TRUE_VARCHAR)
- && UNIV_LIKELY(type != DATA_INT)) {
- /* In a MySQL key value format, a true VARCHAR is
- always preceded by 2 bytes of a length field.
- dfield_get_type(dfield)->len returns the maximum
- 'payload' len in bytes. That does not include the
- 2 bytes that tell the actual data length.
-
- We added the check != DATA_INT to make sure we do
- not treat MySQL ENUM or SET as a true VARCHAR! */
-
- data_len += 2;
- data_field_len += 2;
- }
-
- /* Storing may use at most data_len bytes of buf */
-
- if (UNIV_LIKELY(!is_null)) {
- buf = row_mysql_store_col_in_innobase_format(
- dfield, buf,
- /* MySQL key value format col */
- FALSE,
- key_ptr + data_offset, data_len,
- dict_table_is_comp(index->table));
- ut_a(buf <= original_buf + buf_len);
- }
-
- key_ptr += data_field_len;
-
- if (UNIV_UNLIKELY(key_ptr > key_end)) {
- /* The last field in key was not a complete key field
- but a prefix of it.
-
- Print a warning about this! HA_READ_PREFIX_LAST does
- not currently work in InnoDB with partial-field key
- value prefixes. Since MySQL currently uses a padding
- trick to calculate LIKE 'abc%' type queries there
- should never be partial-field prefixes in searches. */
-
- ut_print_timestamp(stderr);
-
- fputs(" InnoDB: Warning: using a partial-field"
- " key prefix in search.\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, ". Last data field length %lu bytes,\n"
- "InnoDB: key ptr now exceeds"
- " key end by %lu bytes.\n"
- "InnoDB: Key value in the MySQL format:\n",
- (ulong) data_field_len,
- (ulong) (key_ptr - key_end));
- fflush(stderr);
- ut_print_buf(stderr, original_key_ptr, key_len);
- putc('\n', stderr);
-
- if (!is_null) {
- ulint len = dfield_get_len(dfield);
- dfield_set_len(dfield, len
- - (ulint) (key_ptr - key_end));
- }
- ut_ad(0);
- }
-
- n_fields++;
- field++;
- dfield++;
- }
-
- ut_a(buf <= original_buf + buf_len);
-
- /* We set the length of tuple to n_fields: we assume that the memory
- area allocated for it is big enough (usually bigger than n_fields). */
-
- dtuple_set_n_fields(tuple, n_fields);
-}
-
-/**************************************************************//**
-Stores the row id to the prebuilt struct. */
-static
-void
-row_sel_store_row_id_to_prebuilt(
-/*=============================*/
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt */
- const rec_t* index_rec, /*!< in: record */
- const dict_index_t* index, /*!< in: index of the record */
- const ulint* offsets) /*!< in: rec_get_offsets
- (index_rec, index) */
-{
- const byte* data;
- ulint len;
-
- ut_ad(rec_offs_validate(index_rec, index, offsets));
-
- data = rec_get_nth_field(
- index_rec, offsets,
- dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
-
- if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
- fprintf(stderr,
- "InnoDB: Error: Row id field is"
- " wrong length %lu in ", (ulong) len);
- dict_index_name_print(stderr, prebuilt->trx, index);
- fprintf(stderr, "\n"
- "InnoDB: Field number %lu, record:\n",
- (ulong) dict_index_get_sys_col_pos(index,
- DATA_ROW_ID));
- rec_print_new(stderr, index_rec, offsets);
- putc('\n', stderr);
- ut_error;
- }
-
- ut_memcpy(prebuilt->row_id, data, len);
-}
-
-#ifdef UNIV_DEBUG
-/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format( \
- dest,templ,idx,field,src,len) \
- row_sel_field_store_in_mysql_format_func \
- (dest,templ,idx,field,src,len)
-#else /* UNIV_DEBUG */
-/** Convert a non-SQL-NULL field from Innobase format to MySQL format. */
-# define row_sel_field_store_in_mysql_format( \
- dest,templ,idx,field,src,len) \
- row_sel_field_store_in_mysql_format_func \
- (dest,templ,src,len)
-#endif /* UNIV_DEBUG */
-
-/**************************************************************//**
-Stores a non-SQL-NULL field in the MySQL format. The counterpart of this
-function is row_mysql_store_col_in_innobase_format() in row0mysql.cc. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_sel_field_store_in_mysql_format_func(
-/*=====================================*/
- byte* dest, /*!< in/out: buffer where to store; NOTE
- that BLOBs are not in themselves
- stored here: the caller must allocate
- and copy the BLOB into buffer before,
- and pass the pointer to the BLOB in
- 'data' */
- const mysql_row_templ_t* templ,
- /*!< in: MySQL column template.
- Its following fields are referenced:
- type, is_unsigned, mysql_col_len,
- mbminlen, mbmaxlen */
-#ifdef UNIV_DEBUG
- const dict_index_t* index,
- /*!< in: InnoDB index */
- ulint field_no,
- /*!< in: templ->rec_field_no or
- templ->clust_rec_field_no or
- templ->icp_rec_field_no */
-#endif /* UNIV_DEBUG */
- const byte* data, /*!< in: data to store */
- ulint len) /*!< in: length of the data */
-{
- byte* ptr;
-#ifdef UNIV_DEBUG
- const dict_field_t* field
- = dict_index_get_nth_field(index, field_no);
-#endif /* UNIV_DEBUG */
-
- ut_ad(len != UNIV_SQL_NULL);
- UNIV_MEM_ASSERT_RW(data, len);
- UNIV_MEM_ASSERT_W(dest, templ->mysql_col_len);
- UNIV_MEM_INVALID(dest, templ->mysql_col_len);
-
- switch (templ->type) {
- const byte* field_end;
- byte* pad;
- case DATA_INT:
- /* Convert integer data from Innobase to a little-endian
- format, sign bit restored to normal */
-
- ptr = dest + len;
-
- for (;;) {
- ptr--;
- *ptr = *data;
- if (ptr == dest) {
- break;
- }
- data++;
- }
-
- if (!templ->is_unsigned) {
- dest[len - 1] = (byte) (dest[len - 1] ^ 128);
- }
-
- ut_ad(templ->mysql_col_len == len);
- break;
-
- case DATA_VARCHAR:
- case DATA_VARMYSQL:
- case DATA_BINARY:
- field_end = dest + templ->mysql_col_len;
-
- if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
- /* This is a >= 5.0.3 type true VARCHAR. Store the
- length of the data to the first byte or the first
- two bytes of dest. */
-
- dest = row_mysql_store_true_var_len(
- dest, len, templ->mysql_length_bytes);
- /* Copy the actual data. Leave the rest of the
- buffer uninitialized. */
- memcpy(dest, data, len);
- break;
- }
-
- /* Copy the actual data */
- ut_memcpy(dest, data, len);
-
- /* Pad with trailing spaces. */
-
- pad = dest + len;
-
- ut_ad(templ->mbminlen <= templ->mbmaxlen);
-
- /* We treat some Unicode charset strings specially. */
- switch (templ->mbminlen) {
- case 4:
- /* InnoDB should never have stripped partial
- UTF-32 characters. */
- ut_a(!(len & 3));
- break;
- case 2:
- /* A space char is two bytes,
- 0x0020 in UCS2 and UTF-16 */
-
- if (UNIV_UNLIKELY(len & 1)) {
- /* A 0x20 has been stripped from the column.
- Pad it back. */
-
- if (pad < field_end) {
- *pad++ = 0x20;
- }
- }
- }
-
- row_mysql_pad_col(templ->mbminlen, pad, field_end - pad);
- break;
-
- case DATA_BLOB:
- /* Store a pointer to the BLOB buffer to dest: the BLOB was
- already copied to the buffer in row_sel_store_mysql_rec */
-
- row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
- len);
- break;
-
- case DATA_MYSQL:
- memcpy(dest, data, len);
-
- ut_ad(templ->mysql_col_len >= len);
- ut_ad(templ->mbmaxlen >= templ->mbminlen);
-
- /* If field_no equals to templ->icp_rec_field_no,
- we are examining a row pointed by "icp_rec_field_no".
- There is possibility that icp_rec_field_no refers to
- a field in a secondary index while templ->rec_field_no
- points to field in a primary index. The length
- should still be equal, unless the field pointed
- by icp_rec_field_no has a prefix */
- ut_ad(templ->mbmaxlen > templ->mbminlen
- || templ->mysql_col_len == len
- || (field_no == templ->icp_rec_field_no
- && field->prefix_len > 0));
-
- /* The following assertion would fail for old tables
- containing UTF-8 ENUM columns due to Bug #9526. */
- ut_ad(!templ->mbmaxlen
- || !(templ->mysql_col_len % templ->mbmaxlen));
- ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len
- || (field_no == templ->icp_rec_field_no
- && field->prefix_len > 0)
- || templ->rec_field_is_prefix);
- ut_ad(!(field->prefix_len % templ->mbmaxlen));
-
- if (templ->mbminlen == 1 && templ->mbmaxlen != 1) {
- /* Pad with spaces. This undoes the stripping
- done in row0mysql.cc, function
- row_mysql_store_col_in_innobase_format(). */
-
- memset(dest + len, 0x20, templ->mysql_col_len - len);
- }
- break;
-
- default:
-#ifdef UNIV_DEBUG
- case DATA_SYS_CHILD:
- case DATA_SYS:
- /* These column types should never be shipped to MySQL. */
- ut_ad(0);
-
- case DATA_CHAR:
- case DATA_FIXBINARY:
- case DATA_FLOAT:
- case DATA_DOUBLE:
- case DATA_DECIMAL:
- /* Above are the valid column types for MySQL data. */
-#endif /* UNIV_DEBUG */
- ut_ad(field->prefix_len
- ? field->prefix_len == len
- : templ->mysql_col_len == len);
- memcpy(dest, data, len);
- }
-}
-
-#ifdef UNIV_DEBUG
-/** Convert a field from Innobase format to MySQL format. */
-# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \
- row_sel_store_mysql_field_func(m,p,r,i,o,f,t)
-#else /* UNIV_DEBUG */
-/** Convert a field from Innobase format to MySQL format. */
-# define row_sel_store_mysql_field(m,p,r,i,o,f,t) \
- row_sel_store_mysql_field_func(m,p,r,o,f,t)
-#endif /* UNIV_DEBUG */
-/** Convert a field in the Innobase format to a field in the MySQL format.
-@param[out] mysql_rec record in the MySQL format
-@param[in,out] prebuilt prebuilt struct
-@param[in] rec InnoDB record; must be protected
- by a page latch
-@param[in] index index of rec
-@param[in] offsets array returned by rec_get_offsets()
-@param[in] field_no templ->rec_field_no or
- templ->clust_rec_field_no
- or templ->icp_rec_field_no
- or sec field no if clust_templ_for_sec
- is TRUE
-@param[in] templ row template
-*/
-static MY_ATTRIBUTE((warn_unused_result))
-ibool
-row_sel_store_mysql_field_func(
- byte* mysql_rec,
- row_prebuilt_t* prebuilt,
- const rec_t* rec,
-#ifdef UNIV_DEBUG
- const dict_index_t* index,
-#endif
- const ulint* offsets,
- ulint field_no,
- const mysql_row_templ_t*templ)
-{
- const byte* data;
- ulint len;
-
- ut_ad(prebuilt->default_rec);
- ut_ad(templ);
- ut_ad(templ >= prebuilt->mysql_template);
- ut_ad(templ < &prebuilt->mysql_template[prebuilt->n_template]);
- ut_ad(field_no == templ->clust_rec_field_no
- || field_no == templ->rec_field_no
- || field_no == templ->icp_rec_field_no);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) {
-
- mem_heap_t* heap;
- /* Copy an externally stored field to a temporary heap */
-
- ut_a(!prebuilt->trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
-#endif
- ut_ad(field_no == templ->clust_rec_field_no);
-
- if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
- if (prebuilt->blob_heap == NULL) {
- prebuilt->blob_heap = mem_heap_create(
- UNIV_PAGE_SIZE);
- }
-
- heap = prebuilt->blob_heap;
- } else {
- heap = mem_heap_create(UNIV_PAGE_SIZE);
- }
-
- /* NOTE: if we are retrieving a big BLOB, we may
- already run out of memory in the next call, which
- causes an assert */
-
- data = btr_rec_copy_externally_stored_field(
- rec, offsets,
- dict_table_zip_size(prebuilt->table),
- field_no, &len, heap, NULL);
-
- if (UNIV_UNLIKELY(!data)) {
- /* The externally stored field was not written
- yet. This record should only be seen by
- recv_recovery_rollback_active() or any
- TRX_ISO_READ_UNCOMMITTED transactions. */
-
- if (heap != prebuilt->blob_heap) {
- mem_heap_free(heap);
- }
-
- ut_a(prebuilt->trx->isolation_level
- == TRX_ISO_READ_UNCOMMITTED);
- return(FALSE);
- }
-
- ut_a(len != UNIV_SQL_NULL);
-
- row_sel_field_store_in_mysql_format(
- mysql_rec + templ->mysql_col_offset,
- templ, index, field_no, data, len);
-
- if (heap != prebuilt->blob_heap) {
- mem_heap_free(heap);
- }
- } else {
- /* Field is stored in the row. */
-
- data = rec_get_nth_field(rec, offsets, field_no, &len);
-
- if (len == UNIV_SQL_NULL) {
- /* MySQL assumes that the field for an SQL
- NULL value is set to the default value. */
- ut_ad(templ->mysql_null_bit_mask);
-
- UNIV_MEM_ASSERT_RW(prebuilt->default_rec
- + templ->mysql_col_offset,
- templ->mysql_col_len);
- mysql_rec[templ->mysql_null_byte_offset]
- |= (byte) templ->mysql_null_bit_mask;
- memcpy(mysql_rec + templ->mysql_col_offset,
- (const byte*) prebuilt->default_rec
- + templ->mysql_col_offset,
- templ->mysql_col_len);
- return(TRUE);
- }
-
- if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
-
- /* It is a BLOB field locally stored in the
- InnoDB record: we MUST copy its contents to
- prebuilt->blob_heap here because
- row_sel_field_store_in_mysql_format() stores a
- pointer to the data, and the data passed to us
- will be invalid as soon as the
- mini-transaction is committed and the page
- latch on the clustered index page is
- released. */
-
- if (prebuilt->blob_heap == NULL) {
- prebuilt->blob_heap = mem_heap_create(
- UNIV_PAGE_SIZE);
- }
-
- data = static_cast<byte*>(
- mem_heap_dup(prebuilt->blob_heap, data, len));
- }
-
- row_sel_field_store_in_mysql_format(
- mysql_rec + templ->mysql_col_offset,
- templ, index, field_no, data, len);
- }
-
- ut_ad(len != UNIV_SQL_NULL);
-
- if (templ->mysql_null_bit_mask) {
- /* It is a nullable column with a non-NULL
- value */
- mysql_rec[templ->mysql_null_byte_offset]
- &= ~(byte) templ->mysql_null_bit_mask;
- }
-
- return(TRUE);
-}
-
-/** Convert a row in the Innobase format to a row in the MySQL format.
-Note that the template in prebuilt may advise us to copy only a few
-columns to mysql_rec, other columns are left blank. All columns may not
-be needed in the query.
-@param[out] mysql_rec row in the MySQL format
-@param[in] prebuilt prebuilt structure
-@param[in] rec Innobase record in the index
- which was described in prebuilt's
- template, or in the clustered index;
- must be protected by a page latch
-@param[in] rec_clust TRUE if the rec in the clustered index
-@param[in] index index of rec
-@param[in] offsets array returned by rec_get_offsets(rec)
-@return TRUE on success, FALSE if not all columns could be retrieved */
-static MY_ATTRIBUTE((warn_unused_result))
-ibool
-row_sel_store_mysql_rec(
- byte* mysql_rec,
- row_prebuilt_t* prebuilt,
- const rec_t* rec,
- ibool rec_clust,
- const dict_index_t* index,
- const ulint* offsets)
-{
- ulint i;
- ut_ad(rec_clust || index == prebuilt->index);
- ut_ad(!rec_clust || dict_index_is_clust(index));
-
- if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
- mem_heap_free(prebuilt->blob_heap);
- prebuilt->blob_heap = NULL;
- }
-
- for (i = 0; i < prebuilt->n_template; i++) {
- const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
- const ulint field_no
- = rec_clust
- ? templ->clust_rec_field_no
- : templ->rec_field_no;
- /* We should never deliver column prefixes to MySQL,
- except for evaluating innobase_index_cond() and if the prefix
- index is longer than the actual row data. */
- /* ...actually, we do want to do this in order to
- support the prefix query optimization.
-
- ut_ad(dict_index_get_nth_field(index, field_no)->prefix_len
- == 0 || templ->rec_field_is_prefix);
-
-
- ...so we disable this assert. */
-
- if (!row_sel_store_mysql_field(mysql_rec, prebuilt,
- rec, index, offsets,
- field_no, templ)) {
- return(FALSE);
- }
- }
-
- /* FIXME: We only need to read the doc_id if an FTS indexed
- column is being updated.
- NOTE, the record must be cluster index record. Secondary index
- might not have the Doc ID */
- if (dict_table_has_fts_index(prebuilt->table)
- && dict_index_is_clust(index)) {
-
- prebuilt->fts_doc_id = fts_get_doc_id_from_rec(
- prebuilt->table, rec, NULL);
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Builds a previous version of a clustered index record for a consistent read
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_sel_build_prev_vers_for_mysql(
-/*==============================*/
- read_view_t* read_view, /*!< in: read view */
- dict_index_t* clust_index, /*!< in: clustered index */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */
- const rec_t* rec, /*!< in: record in a clustered index */
- ulint** offsets, /*!< in/out: offsets returned by
- rec_get_offsets(rec, clust_index) */
- mem_heap_t** offset_heap, /*!< in/out: memory heap from which
- the offsets are allocated */
- rec_t** old_vers, /*!< out: old version, or NULL if the
- record does not exist in the view:
- i.e., it was freshly inserted
- afterwards */
- mtr_t* mtr) /*!< in: mtr */
-{
- dberr_t err;
-
- if (prebuilt->old_vers_heap) {
- mem_heap_empty(prebuilt->old_vers_heap);
- } else {
- prebuilt->old_vers_heap = mem_heap_create(200);
- }
-
- err = row_vers_build_for_consistent_read(
- rec, mtr, clust_index, offsets, read_view, offset_heap,
- prebuilt->old_vers_heap, old_vers);
- return(err);
-}
-
-/*********************************************************************//**
-Retrieves the clustered index record corresponding to a record in a
-non-clustered index. Does the necessary locking. Used in the MySQL
-interface.
-@return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_sel_get_clust_rec_for_mysql(
-/*============================*/
- row_prebuilt_t* prebuilt,/*!< in: prebuilt struct in the handle */
- dict_index_t* sec_index,/*!< in: secondary index where rec resides */
- const rec_t* rec, /*!< in: record in a non-clustered index; if
- this is a locking read, then rec is not
- allowed to be delete-marked, and that would
- not make sense either */
- que_thr_t* thr, /*!< in: query thread */
- const rec_t** out_rec,/*!< out: clustered record or an old version of
- it, NULL if the old version did not exist
- in the read view, i.e., it was a fresh
- inserted version */
- ulint** offsets,/*!< in: offsets returned by
- rec_get_offsets(rec, sec_index);
- out: offsets returned by
- rec_get_offsets(out_rec, clust_index) */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mtr_t* mtr) /*!< in: mtr used to get access to the
- non-clustered record; the same mtr is used to
- access the clustered index */
-{
- dict_index_t* clust_index;
- const rec_t* clust_rec;
- rec_t* old_vers;
- dberr_t err;
- trx_t* trx;
-
- *out_rec = NULL;
- trx = thr_get_trx(thr);
-
- srv_stats.n_sec_rec_cluster_reads.inc(
- thd_get_thread_id(trx->mysql_thd));
-
- row_build_row_ref_in_tuple(prebuilt->clust_ref, rec,
- sec_index, *offsets, trx);
-
- clust_index = dict_table_get_first_index(sec_index->table);
-
- btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
- PAGE_CUR_LE, BTR_SEARCH_LEAF,
- &prebuilt->clust_pcur, 0, mtr);
-
- clust_rec = btr_pcur_get_rec(&prebuilt->clust_pcur);
-
- prebuilt->clust_pcur.trx_if_known = trx;
-
- /* Note: only if the search ends up on a non-infimum record is the
- low_match value the real match to the search tuple */
-
- if (!page_rec_is_user_rec(clust_rec)
- || btr_pcur_get_low_match(&prebuilt->clust_pcur)
- < dict_index_get_n_unique(clust_index)) {
-
- /* In a rare case it is possible that no clust rec is found
- for a delete-marked secondary index record: if in row0umod.cc
- in row_undo_mod_remove_clust_low() we have already removed
- the clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case we know that the
- clustered index record did not exist in the read view of
- trx. */
-
- if (!rec_get_deleted_flag(rec,
- dict_table_is_comp(sec_index->table))
- || prebuilt->select_lock_type != LOCK_NONE) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: error clustered record"
- " for sec rec not found\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, sec_index);
- fputs("\n"
- "InnoDB: sec index record ", stderr);
- rec_print(stderr, rec, sec_index);
- fputs("\n"
- "InnoDB: clust index record ", stderr);
- rec_print(stderr, clust_rec, clust_index);
- putc('\n', stderr);
- trx_print(stderr, trx, 600);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- ut_ad(0);
- }
-
- clust_rec = NULL;
-
- err = DB_SUCCESS;
- goto func_exit;
- }
-
- *offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
- ULINT_UNDEFINED, offset_heap);
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
- /* Try to place a lock on the index record; we are searching
- the clust rec with a unique condition, hence
- we set a LOCK_REC_NOT_GAP type lock */
-
- err = lock_clust_rec_read_check_and_lock(
- 0, btr_pcur_get_block(&prebuilt->clust_pcur),
- clust_rec, clust_index, *offsets,
- static_cast<enum lock_mode>(prebuilt->select_lock_type),
- LOCK_REC_NOT_GAP,
- thr);
-
- switch (err) {
- case DB_SUCCESS:
- case DB_SUCCESS_LOCKED_REC:
- break;
- default:
- goto err_exit;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- old_vers = NULL;
-
- /* If the isolation level allows reading of uncommitted data,
- then we never look for an earlier version */
-
- if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
- && !lock_clust_rec_cons_read_sees(
- clust_rec, clust_index, *offsets,
- trx->read_view)) {
-
- /* The following call returns 'offsets' associated with
- 'old_vers' */
- err = row_sel_build_prev_vers_for_mysql(
- trx->read_view, clust_index, prebuilt,
- clust_rec, offsets, offset_heap, &old_vers,
- mtr);
-
- if (err != DB_SUCCESS || old_vers == NULL) {
-
- goto err_exit;
- }
-
- clust_rec = old_vers;
- }
-
- /* If we had to go to an earlier version of row or the
- secondary index record is delete marked, then it may be that
- the secondary index record corresponding to clust_rec
- (or old_vers) is not rec; in that case we must ignore
- such row because in our snapshot rec would not have existed.
- Remember that from rec we cannot see directly which transaction
- id corresponds to it: we have to go to the clustered index
- record. A query where we want to fetch all rows where
- the secondary index value is in some interval would return
- a wrong result if we would not drop rows which we come to
- visit through secondary index records that would not really
- exist in our snapshot. */
-
- if (clust_rec
- && (old_vers
- || trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED
- || rec_get_deleted_flag(rec, dict_table_is_comp(
- sec_index->table)))
- && !row_sel_sec_rec_is_for_clust_rec(
- rec, sec_index, clust_rec, clust_index)) {
- clust_rec = NULL;
-#ifdef UNIV_SEARCH_DEBUG
- } else {
- ut_a(clust_rec == NULL
- || row_sel_sec_rec_is_for_clust_rec(
- rec, sec_index, clust_rec, clust_index));
-#endif
- }
-
- err = DB_SUCCESS;
- }
-
-func_exit:
- *out_rec = clust_rec;
-
- /* Store the current position if select_lock_type is not
- LOCK_NONE or if we are scanning using InnoDB APIs */
- if (prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->innodb_api) {
- /* We may use the cursor in update or in unlock_row():
- store its position */
-
- btr_pcur_store_position(&prebuilt->clust_pcur, mtr);
- }
-
-err_exit:
- return(err);
-}
-
-/********************************************************************//**
-Restores cursor position after it has been stored. We have to take into
-account that the record cursor was positioned on may have been deleted.
-Then we may have to move the cursor one step up or down.
-@return TRUE if we may need to process the record the cursor is now
-positioned on (i.e. we should not go to the next record yet) */
-static
-ibool
-sel_restore_position_for_mysql(
-/*===========================*/
- ibool* same_user_rec, /*!< out: TRUE if we were able to restore
- the cursor on a user record with the
- same ordering prefix in in the
- B-tree index */
- ulint latch_mode, /*!< in: latch mode wished in
- restoration */
- btr_pcur_t* pcur, /*!< in: cursor whose position
- has been stored */
- ibool moves_up, /*!< in: TRUE if the cursor moves up
- in the index */
- mtr_t* mtr) /*!< in: mtr; CAUTION: may commit
- mtr temporarily! */
-{
- ibool success;
-
- success = btr_pcur_restore_position(latch_mode, pcur, mtr);
-
- *same_user_rec = success;
-
- ut_ad(!success || pcur->rel_pos == BTR_PCUR_ON);
-#ifdef UNIV_DEBUG
- if (pcur->pos_state == BTR_PCUR_IS_POSITIONED_OPTIMISTIC) {
- ut_ad(pcur->rel_pos == BTR_PCUR_BEFORE
- || pcur->rel_pos == BTR_PCUR_AFTER);
- } else {
- ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
- ut_ad((pcur->rel_pos == BTR_PCUR_ON)
- == btr_pcur_is_on_user_rec(pcur));
- }
-#endif
-
- /* The position may need be adjusted for rel_pos and moves_up. */
-
- switch (pcur->rel_pos) {
- case BTR_PCUR_ON:
- if (!success && moves_up) {
-next:
- btr_pcur_move_to_next(pcur, mtr);
- return(TRUE);
- }
- return(!success);
- case BTR_PCUR_AFTER_LAST_IN_TREE:
- case BTR_PCUR_BEFORE_FIRST_IN_TREE:
- return(TRUE);
- case BTR_PCUR_AFTER:
- /* positioned to record after pcur->old_rec. */
- pcur->pos_state = BTR_PCUR_IS_POSITIONED;
-prev:
- if (btr_pcur_is_on_user_rec(pcur) && !moves_up) {
- btr_pcur_move_to_prev(pcur, mtr);
- }
- return(TRUE);
- case BTR_PCUR_BEFORE:
- /* For non optimistic restoration:
- The position is now set to the record before pcur->old_rec.
-
- For optimistic restoration:
- The position also needs to take the previous search_mode into
- consideration. */
-
- switch (pcur->pos_state) {
- case BTR_PCUR_IS_POSITIONED_OPTIMISTIC:
- pcur->pos_state = BTR_PCUR_IS_POSITIONED;
- if (pcur->search_mode == PAGE_CUR_GE) {
- /* Positioned during Greater or Equal search
- with BTR_PCUR_BEFORE. Optimistic restore to
- the same record. If scanning for lower then
- we must move to previous record.
- This can happen with:
- HANDLER READ idx a = (const);
- HANDLER READ idx PREV; */
- goto prev;
- }
- return(TRUE);
- case BTR_PCUR_IS_POSITIONED:
- if (moves_up && btr_pcur_is_on_user_rec(pcur)) {
- goto next;
- }
- return(TRUE);
- case BTR_PCUR_WAS_POSITIONED:
- case BTR_PCUR_NOT_POSITIONED:
- break;
- }
- }
- ut_ad(0);
- return(TRUE);
-}
-
-/********************************************************************//**
-Copies a cached field for MySQL from the fetch cache. */
-static
-void
-row_sel_copy_cached_field_for_mysql(
-/*================================*/
- byte* buf, /*!< in/out: row buffer */
- const byte* cache, /*!< in: cached row */
- const mysql_row_templ_t*templ) /*!< in: column template */
-{
- ulint len;
-
- buf += templ->mysql_col_offset;
- cache += templ->mysql_col_offset;
-
- UNIV_MEM_ASSERT_W(buf, templ->mysql_col_len);
-
- if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR
- && templ->type != DATA_INT) {
- /* Check for != DATA_INT to make sure we do
- not treat MySQL ENUM or SET as a true VARCHAR!
- Find the actual length of the true VARCHAR field. */
- row_mysql_read_true_varchar(
- &len, cache, templ->mysql_length_bytes);
- len += templ->mysql_length_bytes;
- UNIV_MEM_INVALID(buf, templ->mysql_col_len);
- } else {
- len = templ->mysql_col_len;
- }
-
- ut_memcpy(buf, cache, len);
-}
-
-/********************************************************************//**
-Pops a cached row for MySQL from the fetch cache. */
-UNIV_INLINE
-void
-row_sel_dequeue_cached_row_for_mysql(
-/*=================================*/
- byte* buf, /*!< in/out: buffer where to copy the
- row */
- row_prebuilt_t* prebuilt) /*!< in: prebuilt struct */
-{
- ulint i;
- const mysql_row_templ_t*templ;
- const byte* cached_rec;
- ut_ad(prebuilt->n_fetch_cached > 0);
- ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
-
- UNIV_MEM_ASSERT_W(buf, prebuilt->mysql_row_len);
-
- cached_rec = prebuilt->fetch_cache[prebuilt->fetch_cache_first];
-
- if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
- /* Copy cache record field by field, don't touch fields that
- are not covered by current key */
-
- for (i = 0; i < prebuilt->n_template; i++) {
- templ = prebuilt->mysql_template + i;
- row_sel_copy_cached_field_for_mysql(
- buf, cached_rec, templ);
- /* Copy NULL bit of the current field from cached_rec
- to buf */
- if (templ->mysql_null_bit_mask) {
- buf[templ->mysql_null_byte_offset]
- ^= (buf[templ->mysql_null_byte_offset]
- ^ cached_rec[templ->mysql_null_byte_offset])
- & (byte) templ->mysql_null_bit_mask;
- }
- }
- } else if (prebuilt->mysql_prefix_len > 63) {
- /* The record is long. Copy it field by field, in case
- there are some long VARCHAR column of which only a
- small length is being used. */
- UNIV_MEM_INVALID(buf, prebuilt->mysql_prefix_len);
-
- /* First copy the NULL bits. */
- ut_memcpy(buf, cached_rec, prebuilt->null_bitmap_len);
- /* Then copy the requested fields. */
-
- for (i = 0; i < prebuilt->n_template; i++) {
- row_sel_copy_cached_field_for_mysql(
- buf, cached_rec, prebuilt->mysql_template + i);
- }
- } else {
- ut_memcpy(buf, cached_rec, prebuilt->mysql_prefix_len);
- }
-
- prebuilt->n_fetch_cached--;
- prebuilt->fetch_cache_first++;
-
- if (prebuilt->n_fetch_cached == 0) {
- prebuilt->fetch_cache_first = 0;
- }
-}
-
-/********************************************************************//**
-Initialise the prefetch cache. */
-UNIV_INLINE
-void
-row_sel_prefetch_cache_init(
-/*========================*/
- row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */
-{
- ulint i;
- ulint sz;
- byte* ptr;
-
- /* Reserve space for the magic number. */
- sz = UT_ARR_SIZE(prebuilt->fetch_cache) * (prebuilt->mysql_row_len + 8);
- ptr = static_cast<byte*>(mem_alloc(sz));
-
- for (i = 0; i < UT_ARR_SIZE(prebuilt->fetch_cache); i++) {
-
- /* A user has reported memory corruption in these
- buffers in Linux. Put magic numbers there to help
- to track a possible bug. */
-
- mach_write_to_4(ptr, ROW_PREBUILT_FETCH_MAGIC_N);
- ptr += 4;
-
- prebuilt->fetch_cache[i] = ptr;
- ptr += prebuilt->mysql_row_len;
-
- mach_write_to_4(ptr, ROW_PREBUILT_FETCH_MAGIC_N);
- ptr += 4;
- }
-}
-
-/********************************************************************//**
-Get the last fetch cache buffer from the queue.
-@return pointer to buffer. */
-UNIV_INLINE
-byte*
-row_sel_fetch_last_buf(
-/*===================*/
- row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */
-{
- ut_ad(!prebuilt->templ_contains_blob);
- ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
-
- if (prebuilt->fetch_cache[0] == NULL) {
- /* Allocate memory for the fetch cache */
- ut_ad(prebuilt->n_fetch_cached == 0);
-
- row_sel_prefetch_cache_init(prebuilt);
- }
-
- ut_ad(prebuilt->fetch_cache_first == 0);
- UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached],
- prebuilt->mysql_row_len);
-
- return(prebuilt->fetch_cache[prebuilt->n_fetch_cached]);
-}
-
-/********************************************************************//**
-Pushes a row for MySQL to the fetch cache. */
-UNIV_INLINE
-void
-row_sel_enqueue_cache_row_for_mysql(
-/*================================*/
- byte* mysql_rec, /*!< in/out: MySQL record */
- row_prebuilt_t* prebuilt) /*!< in/out: prebuilt struct */
-{
- /* For non ICP code path the row should already exist in the
- next fetch cache slot. */
-
- if (prebuilt->idx_cond != NULL) {
- byte* dest = row_sel_fetch_last_buf(prebuilt);
-
- ut_memcpy(dest, mysql_rec, prebuilt->mysql_row_len);
- }
-
- ++prebuilt->n_fetch_cached;
-}
-
-/*********************************************************************//**
-Tries to do a shortcut to fetch a clustered index record with a unique key,
-using the hash index if possible (not always). We assume that the search
-mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx,
-btr search latch has been locked in S-mode if AHI is enabled.
-@return SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */
-static
-ulint
-row_sel_try_search_shortcut_for_mysql(
-/*==================================*/
- const rec_t** out_rec,/*!< out: record if found */
- row_prebuilt_t* prebuilt,/*!< in: prebuilt struct */
- ulint** offsets,/*!< in/out: for rec_get_offsets(*out_rec) */
- mem_heap_t** heap, /*!< in/out: heap for rec_get_offsets() */
- mtr_t* mtr) /*!< in: started mtr */
-{
- dict_index_t* index = prebuilt->index;
- const dtuple_t* search_tuple = prebuilt->search_tuple;
- btr_pcur_t* pcur = &prebuilt->pcur;
- trx_t* trx = prebuilt->trx;
- const rec_t* rec;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(!prebuilt->templ_contains_blob);
-
-#ifndef UNIV_SEARCH_DEBUG
- ut_ad(trx->has_search_latch);
-
- btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, pcur,
- RW_S_LATCH,
- mtr);
-#else /* UNIV_SEARCH_DEBUG */
- btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
- BTR_SEARCH_LEAF, pcur,
- 0,
- mtr);
-#endif /* UNIV_SEARCH_DEBUG */
- rec = btr_pcur_get_rec(pcur);
-
- if (!page_rec_is_user_rec(rec)) {
-
- return(SEL_RETRY);
- }
-
- /* As the cursor is now placed on a user record after a search with
- the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
- fields in the user record matched to the search tuple */
-
- if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
-
- return(SEL_EXHAUSTED);
- }
-
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- *offsets = rec_get_offsets(rec, index, *offsets,
- ULINT_UNDEFINED, heap);
-
- if (!lock_clust_rec_cons_read_sees(rec, index,
- *offsets, trx->read_view)) {
-
- return(SEL_RETRY);
- }
-
- if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
-
- return(SEL_EXHAUSTED);
- }
-
- *out_rec = rec;
-
- return(SEL_FOUND);
-}
-
-/*********************************************************************//**
-Check a pushed-down index condition.
-@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */
-static
-enum icp_result
-row_search_idx_cond_check(
-/*======================*/
- byte* mysql_rec, /*!< out: record
- in MySQL format (invalid unless
- prebuilt->idx_cond!=NULL and
- we return ICP_MATCH) */
- row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct
- for the table handle */
- const rec_t* rec, /*!< in: InnoDB record */
- const ulint* offsets) /*!< in: rec_get_offsets() */
-{
- enum icp_result result;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, prebuilt->index, offsets));
-
- if (!prebuilt->idx_cond) {
- return(ICP_MATCH);
- }
-
- MONITOR_INC(MONITOR_ICP_ATTEMPTS);
-
- /* Convert to MySQL format those fields that are needed for
- evaluating the index condition. */
-
- if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
- mem_heap_empty(prebuilt->blob_heap);
- }
-
- for (i = 0; i < prebuilt->idx_cond_n_cols; i++) {
- const mysql_row_templ_t*templ = &prebuilt->mysql_template[i];
-
- if (!row_sel_store_mysql_field(mysql_rec, prebuilt,
- rec, prebuilt->index, offsets,
- templ->icp_rec_field_no,
- templ)) {
- return(ICP_NO_MATCH);
- }
- }
-
- /* We assume that the index conditions on
- case-insensitive columns are case-insensitive. The
- case of such columns may be wrong in a secondary
- index, if the case of the column has been updated in
- the past, or a record has been deleted and a record
- inserted in a different case. */
- result = innobase_index_cond(prebuilt->idx_cond);
- switch (result) {
- case ICP_MATCH:
- /* Convert the remaining fields to MySQL format.
- If this is a secondary index record, we must defer
- this until we have fetched the clustered index record. */
- if (!prebuilt->need_to_access_clustered
- || dict_index_is_clust(prebuilt->index)) {
- if (!row_sel_store_mysql_rec(
- mysql_rec, prebuilt, rec, FALSE,
- prebuilt->index, offsets)) {
- ut_ad(dict_index_is_clust(prebuilt->index));
- return(ICP_NO_MATCH);
- }
- }
- MONITOR_INC(MONITOR_ICP_MATCH);
- return(result);
- case ICP_NO_MATCH:
- MONITOR_INC(MONITOR_ICP_NO_MATCH);
- return(result);
- case ICP_OUT_OF_RANGE:
- MONITOR_INC(MONITOR_ICP_OUT_OF_RANGE);
- return(result);
- case ICP_ERROR:
- case ICP_ABORTED_BY_USER:
- return(result);
- }
-
- ut_error;
- return(result);
-}
-
-/********************************************************************//**
-Searches for rows in the database. This is used in the interface to
-MySQL. This function opens a cursor, and also implements fetch next
-and fetch prev. NOTE that if we do a search with a full key value
-from a unique index (ROW_SEL_EXACT), then we will not store the cursor
-position and fetch next or fetch prev must not be tried to the cursor!
-@return DB_SUCCESS, DB_RECORD_NOT_FOUND, DB_END_OF_INDEX, DB_DEADLOCK,
-DB_LOCK_TABLE_FULL, DB_CORRUPTION, DB_SEARCH_ABORTED_BY_USER or
-DB_TOO_BIG_RECORD */
-UNIV_INTERN
-dberr_t
-row_search_for_mysql(
-/*=================*/
- byte* buf, /*!< in/out: buffer for the fetched
- row in the MySQL format */
- ulint mode, /*!< in: search mode PAGE_CUR_L, ... */
- row_prebuilt_t* prebuilt, /*!< in: prebuilt struct for the
- table handle; this contains the info
- of search_tuple, index; if search
- tuple contains 0 fields then we
- position the cursor at the start or
- the end of the index, depending on
- 'mode' */
- ulint match_mode, /*!< in: 0 or ROW_SEL_EXACT or
- ROW_SEL_EXACT_PREFIX */
- ulint direction) /*!< in: 0 or ROW_SEL_NEXT or
- ROW_SEL_PREV; NOTE: if this is != 0,
- then prebuilt must have a pcur
- with stored position! In opening of a
- cursor 'direction' should be 0. */
-{
- dict_index_t* index = prebuilt->index;
- ibool comp = dict_table_is_comp(index->table);
- const dtuple_t* search_tuple = prebuilt->search_tuple;
- btr_pcur_t* pcur = &prebuilt->pcur;
- trx_t* trx = prebuilt->trx;
- dict_index_t* clust_index;
- que_thr_t* thr;
- const rec_t* rec = NULL;
- const rec_t* result_rec = NULL;
- const rec_t* clust_rec;
- dberr_t err = DB_SUCCESS;
- ibool unique_search = FALSE;
- ibool mtr_has_extra_clust_latch = FALSE;
- ibool moves_up = FALSE;
- ibool set_also_gap_locks = TRUE;
- /* if the query is a plain locking SELECT, and the isolation level
- is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */
- ibool did_semi_consistent_read = FALSE;
- /* if the returned record was locked and we did a semi-consistent
- read (fetch the newest committed version), then this is set to
- TRUE */
-#ifdef UNIV_SEARCH_DEBUG
- ulint cnt = 0;
-#endif /* UNIV_SEARCH_DEBUG */
- ulint next_offs;
- ibool same_user_rec;
- mtr_t mtr;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- ibool table_lock_waited = FALSE;
- byte* next_buf = 0;
- bool use_clustered_index = false;
-
- rec_offs_init(offsets_);
-
- ut_ad(index && pcur && search_tuple);
-
- /* We don't support FTS queries from the HANDLER interfaces, because
- we implemented FTS as reversed inverted index with auxiliary tables.
- So anything related to traditional index query would not apply to
- it. */
- if (index->type & DICT_FTS) {
- return(DB_END_OF_INDEX);
- }
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (dict_table_is_discarded(prebuilt->table)) {
-
- return(DB_TABLESPACE_DELETED);
-
- } else if (!prebuilt->table->is_readable()) {
- if (fil_space_get(prebuilt->table->space) == NULL) {
- return(DB_TABLESPACE_NOT_FOUND);
- } else {
- return(DB_DECRYPTION_FAILED);
- }
- } else if (!prebuilt->index_usable) {
-
- return(DB_MISSING_HISTORY);
-
- } else if (dict_index_is_corrupted(index)) {
-
- return(DB_CORRUPTION);
-
- } else if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) {
- fprintf(stderr,
- "InnoDB: Error: trying to free a corrupt\n"
- "InnoDB: table handle. Magic n %lu, table name ",
- (ulong) prebuilt->magic_n);
- ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
- putc('\n', stderr);
-
- mem_analyze_corruption(prebuilt);
-
- ut_error;
- }
-
-#if 0
- /* August 19, 2005 by Heikki: temporarily disable this error
- print until the cursor lock count is done correctly.
- See bugs #12263 and #12456!*/
-
- if (trx->n_mysql_tables_in_use == 0
- && UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) {
- /* Note that if MySQL uses an InnoDB temp table that it
- created inside LOCK TABLES, then n_mysql_tables_in_use can
- be zero; in that case select_lock_type is set to LOCK_X in
- ::start_stmt. */
-
- fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n"
- "InnoDB: but it has not locked"
- " any tables in ::external_lock()!\n",
- stderr);
- trx_print(stderr, trx, 600);
- fputc('\n', stderr);
- }
-#endif
-
-#if 0
- fprintf(stderr, "Match mode %lu\n search tuple ",
- (ulong) match_mode);
- dtuple_print(search_tuple);
- fprintf(stderr, "N tables locked %lu\n",
- (ulong) trx->mysql_n_tables_locked);
-#endif
- /* Reset the new record lock info if srv_locks_unsafe_for_binlog
- is set or session is using a READ COMMITED isolation level. Then
- we are able to remove the record locks set here on an individual
- row. */
- prebuilt->new_rec_locks = 0;
-
- /*-------------------------------------------------------------*/
- /* PHASE 1: Try to pop the row from the prefetch cache */
-
- if (UNIV_UNLIKELY(direction == 0)) {
- trx->op_info = "starting index read";
-
- prebuilt->n_rows_fetched = 0;
- prebuilt->n_fetch_cached = 0;
- prebuilt->fetch_cache_first = 0;
-
- if (prebuilt->sel_graph == NULL) {
- /* Build a dummy select query graph */
- row_prebuild_sel_graph(prebuilt);
- }
- } else {
- trx->op_info = "fetching rows";
-
- if (prebuilt->n_rows_fetched == 0) {
- prebuilt->fetch_direction = direction;
- }
-
- if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) {
- if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) {
- ut_error;
- /* TODO: scrollable cursor: restore cursor to
- the place of the latest returned row,
- or better: prevent caching for a scroll
- cursor! */
- }
-
- prebuilt->n_rows_fetched = 0;
- prebuilt->n_fetch_cached = 0;
- prebuilt->fetch_cache_first = 0;
-
- } else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) {
- row_sel_dequeue_cached_row_for_mysql(buf, prebuilt);
-
- prebuilt->n_rows_fetched++;
-
- err = DB_SUCCESS;
- goto func_exit;
- }
-
- if (prebuilt->fetch_cache_first > 0
- && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
-
- /* The previous returned row was popped from the fetch
- cache, but the cache was not full at the time of the
- popping: no more rows can exist in the result set */
-
- err = DB_RECORD_NOT_FOUND;
- goto func_exit;
- }
-
- prebuilt->n_rows_fetched++;
-
- if (prebuilt->n_rows_fetched > 1000000000) {
- /* Prevent wrap-over */
- prebuilt->n_rows_fetched = 500000000;
- }
-
- mode = pcur->search_mode;
- }
-
- /* In a search where at most one record in the index may match, we
- can use a LOCK_REC_NOT_GAP type record lock when locking a
- non-delete-marked matching record.
-
- Note that in a unique secondary index there may be different
- delete-marked versions of a record where only the primary key
- values differ: thus in a secondary index we must use next-key
- locks when locking delete-marked records. */
-
- if (match_mode == ROW_SEL_EXACT
- && dict_index_is_unique(index)
- && dtuple_get_n_fields(search_tuple)
- == dict_index_get_n_unique(index)
- && (dict_index_is_clust(index)
- || !dtuple_contains_null(search_tuple))) {
-
- /* Note above that a UNIQUE secondary index can contain many
- rows with the same key value if one of the columns is the SQL
- null. A clustered index under MySQL can never contain null
- columns because we demand that all the columns in primary key
- are non-null. */
-
- unique_search = TRUE;
-
- /* Even if the condition is unique, MySQL seems to try to
- retrieve also a second row if a primary key contains more than
- 1 column. Return immediately if this is not a HANDLER
- command. */
-
- if (UNIV_UNLIKELY(direction != 0
- && !prebuilt->used_in_HANDLER)) {
-
- err = DB_RECORD_NOT_FOUND;
- goto func_exit;
- }
- }
-
- mtr_start_trx(&mtr, trx);
-
- /*-------------------------------------------------------------*/
- /* PHASE 2: Try fast adaptive hash index search if possible */
-
- /* Next test if this is the special case where we can use the fast
- adaptive hash index to try the search. Since we must release the
- search system latch when we retrieve an externally stored field, we
- cannot use the adaptive hash index in a search in the case the row
- may be long and there may be externally stored fields */
-
- if (UNIV_UNLIKELY(direction == 0)
- && unique_search
- && dict_index_is_clust(index)
- && !prebuilt->templ_contains_blob
- && !prebuilt->used_in_HANDLER
- && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)
- && !prebuilt->innodb_api) {
-
- mode = PAGE_CUR_GE;
-
- if (trx->mysql_n_tables_locked == 0
- && prebuilt->select_lock_type == LOCK_NONE
- && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
- && trx->read_view) {
-
- /* This is a SELECT query done as a consistent read,
- and the read view has already been allocated:
- let us try a search shortcut through the hash
- index.
- NOTE that we must also test that
- mysql_n_tables_locked == 0, because this might
- also be INSERT INTO ... SELECT ... or
- CREATE TABLE ... SELECT ... . Our algorithm is
- NOT prepared to inserts interleaved with the SELECT,
- and if we try that, we can deadlock on the adaptive
- hash index semaphore! */
-
-#ifndef UNIV_SEARCH_DEBUG
- ut_ad(!trx->has_search_latch);
- rw_lock_s_lock(btr_search_get_latch(index));
- trx->has_search_latch = TRUE;
-#endif
- switch (row_sel_try_search_shortcut_for_mysql(
- &rec, prebuilt, &offsets, &heap,
- &mtr)) {
- case SEL_FOUND:
-#ifdef UNIV_SEARCH_DEBUG
- ut_a(0 == cmp_dtuple_rec(search_tuple,
- rec, offsets));
-#endif
- /* At this point, rec is protected by
- a page latch that was acquired by
- row_sel_try_search_shortcut_for_mysql().
- The latch will not be released until
- mtr_commit(&mtr). */
- ut_ad(!rec_get_deleted_flag(rec, comp));
-
- if (prebuilt->idx_cond) {
- switch (row_search_idx_cond_check(
- buf, prebuilt,
- rec, offsets)) {
- case ICP_NO_MATCH:
- case ICP_OUT_OF_RANGE:
- case ICP_ABORTED_BY_USER:
- case ICP_ERROR:
- goto shortcut_mismatch;
- case ICP_MATCH:
- goto shortcut_match;
- }
- }
-
- if (!row_sel_store_mysql_rec(
- buf, prebuilt,
- rec, FALSE, index,
- offsets)) {
- /* Only fresh inserts may contain
- incomplete externally stored
- columns. Pretend that such
- records do not exist. Such
- records may only be accessed
- at the READ UNCOMMITTED
- isolation level or when
- rolling back a recovered
- transaction. Rollback happens
- at a lower level, not here. */
-
- /* Proceed as in case SEL_RETRY. */
- break;
- }
-
- shortcut_match:
- mtr_commit(&mtr);
-
- /* ut_print_name(stderr, index->name);
- fputs(" shortcut\n", stderr); */
-
- err = DB_SUCCESS;
- goto release_search_latch;
-
- case SEL_EXHAUSTED:
- shortcut_mismatch:
- mtr_commit(&mtr);
-
- /* ut_print_name(stderr, index->name);
- fputs(" record not found 2\n", stderr); */
-
- err = DB_RECORD_NOT_FOUND;
-release_search_latch:
- rw_lock_s_unlock(
- btr_search_get_latch(index));
- trx->has_search_latch = FALSE;
-
- /* NOTE that we do NOT store the cursor
- position */
- goto func_exit;
-
- case SEL_RETRY:
- break;
-
- default:
- ut_ad(0);
- }
-
- mtr_commit(&mtr);
- mtr_start(&mtr);
-
- rw_lock_s_unlock(btr_search_get_latch(index));
- trx->has_search_latch = FALSE;
- }
- }
-
- /*-------------------------------------------------------------*/
- /* PHASE 3: Open or restore index cursor position */
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
-#endif
-
- /* The state of a running trx can only be changed by the
- thread that is currently serving the transaction. Because we
- are that thread, we can read trx->state without holding any
- mutex. */
- ut_ad(prebuilt->sql_stat_start || trx->state == TRX_STATE_ACTIVE);
-
- ut_ad(trx->state == TRX_STATE_NOT_STARTED
- || trx->state == TRX_STATE_ACTIVE);
-
- ut_ad(prebuilt->sql_stat_start
- || prebuilt->select_lock_type != LOCK_NONE
- || trx->read_view);
-
- trx_start_if_not_started(trx);
-
- if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
- && prebuilt->select_lock_type != LOCK_NONE
- && trx->mysql_thd != NULL
- && thd_is_select(trx->mysql_thd)) {
- /* It is a plain locking SELECT and the isolation
- level is low: do not lock gaps */
-
- set_also_gap_locks = FALSE;
- }
-
- /* Note that if the search mode was GE or G, then the cursor
- naturally moves upward (in fetch next) in alphabetical order,
- otherwise downward */
-
- if (UNIV_UNLIKELY(direction == 0)) {
- if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
- moves_up = TRUE;
- }
- } else if (direction == ROW_SEL_NEXT) {
- moves_up = TRUE;
- }
-
- thr = que_fork_get_first_thr(prebuilt->sel_graph);
-
- que_thr_move_to_run_state_for_mysql(thr, trx);
-
- clust_index = dict_table_get_first_index(index->table);
-
- /* Do some start-of-statement preparations */
-
- if (!prebuilt->sql_stat_start) {
- /* No need to set an intention lock or assign a read view */
-
- if (UNIV_UNLIKELY
- (trx->read_view == NULL
- && prebuilt->select_lock_type == LOCK_NONE)) {
-
- fputs("InnoDB: Error: MySQL is trying to"
- " perform a consistent read\n"
- "InnoDB: but the read view is not assigned!\n",
- stderr);
- trx_print(stderr, trx, 600);
- fputc('\n', stderr);
- ut_error;
- }
- } else if (prebuilt->select_lock_type == LOCK_NONE) {
- /* This is a consistent read */
- /* Assign a read view for the query */
-
- trx_assign_read_view(trx);
- prebuilt->sql_stat_start = FALSE;
- } else {
-wait_table_again:
- err = lock_table(0, index->table,
- prebuilt->select_lock_type == LOCK_S
- ? LOCK_IS : LOCK_IX, thr);
-
- if (err != DB_SUCCESS) {
-
- table_lock_waited = TRUE;
- goto lock_table_wait;
- }
- prebuilt->sql_stat_start = FALSE;
- }
-
- /* Open or restore index cursor position */
-
- if (UNIV_LIKELY(direction != 0)) {
- ibool need_to_process = sel_restore_position_for_mysql(
- &same_user_rec, BTR_SEARCH_LEAF,
- pcur, moves_up, &mtr);
-
- if (UNIV_UNLIKELY(need_to_process)) {
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- /* We did a semi-consistent read,
- but the record was removed in
- the meantime. */
- prebuilt->row_read_type
- = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- } else if (UNIV_LIKELY(prebuilt->row_read_type
- != ROW_READ_DID_SEMI_CONSISTENT)) {
-
- /* The cursor was positioned on the record
- that we returned previously. If we need
- to repeat a semi-consistent read as a
- pessimistic locking read, the record
- cannot be skipped. */
-
- goto next_rec;
- }
-
- } else if (dtuple_get_n_fields(search_tuple) > 0) {
-
- err = btr_pcur_open_with_no_init(index, search_tuple, mode,
- BTR_SEARCH_LEAF,
- pcur, 0, &mtr);
-
- if (err != DB_SUCCESS) {
- rec = NULL;
- goto lock_wait_or_error;
- }
-
- pcur->trx_if_known = trx;
-
- rec = btr_pcur_get_rec(pcur);
-
- if (!moves_up
- && !page_rec_is_supremum(rec)
- && set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the next index record
- to prevent phantoms in ORDER BY ... DESC queries */
- const rec_t* next_rec = page_rec_get_next_const(rec);
-
- offsets = rec_get_offsets(next_rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
- next_rec, index, offsets,
- prebuilt->select_lock_type,
- LOCK_GAP, thr);
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- err = DB_SUCCESS;
- case DB_SUCCESS:
- break;
- default:
- goto lock_wait_or_error;
- }
- }
- } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_L) {
- err = btr_pcur_open_at_index_side(
- mode == PAGE_CUR_G, index, BTR_SEARCH_LEAF,
- pcur, false, 0, &mtr);
-
- if (err != DB_SUCCESS) {
- if (err == DB_DECRYPTION_FAILED) {
- ib_push_warning(trx->mysql_thd,
- DB_DECRYPTION_FAILED,
- "Table %s is encrypted but encryption service or"
- " used key_id is not available. "
- " Can't continue reading table.",
- prebuilt->table->name);
- index->table->file_unreadable = true;
- }
- rec = NULL;
- goto lock_wait_or_error;
- }
- }
-
-rec_loop:
- DEBUG_SYNC_C("row_search_rec_loop");
- if (trx_is_interrupted(trx)) {
- btr_pcur_store_position(pcur, &mtr);
- err = DB_INTERRUPTED;
- goto normal_return;
- }
-
- /*-------------------------------------------------------------*/
- /* PHASE 4: Look for matching records in a loop */
-
- rec = btr_pcur_get_rec(pcur);
-
- if (!index->table->is_readable()) {
- err = DB_DECRYPTION_FAILED;
- goto lock_wait_or_error;
- }
-
- SRV_CORRUPT_TABLE_CHECK(rec,
- {
- err = DB_CORRUPTION;
- goto lock_wait_or_error;
- });
-
- ut_ad(!!page_rec_is_comp(rec) == comp);
-#ifdef UNIV_SEARCH_DEBUG
- /*
- fputs("Using ", stderr);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
- page_get_page_no(page_align(rec)));
- rec_print(stderr, rec, index);
- printf("delete-mark: %lu\n",
- rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
- */
-#endif /* UNIV_SEARCH_DEBUG */
-
- if (page_rec_is_infimum(rec)) {
-
- /* The infimum record on a page cannot be in the result set,
- and neither can a record lock be placed on it: we skip such
- a record. */
-
- goto next_rec;
- }
-
- if (page_rec_is_supremum(rec)) {
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a lock on the index record */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using a READ COMMITTED or lower isolation
- level we do not lock gaps. Supremum record is really
- a gap and therefore we do not set locks there. */
-
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED, &heap);
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
- rec, index, offsets,
- prebuilt->select_lock_type,
- LOCK_ORDINARY, thr);
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- err = DB_SUCCESS;
- case DB_SUCCESS:
- break;
- default:
- goto lock_wait_or_error;
- }
- }
- /* A page supremum record cannot be in the result set: skip
- it now that we have placed a possible lock on it */
-
- goto next_rec;
- }
-
- /*-------------------------------------------------------------*/
- /* Do sanity checks in case our cursor has bumped into page
- corruption */
-
- if (comp) {
- next_offs = rec_get_next_offs(rec, TRUE);
- if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
-
- goto wrong_offs;
- }
- } else {
- next_offs = rec_get_next_offs(rec, FALSE);
- if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
-
- goto wrong_offs;
- }
- }
-
- if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
-
-wrong_offs:
- if (srv_pass_corrupt_table && index->table->space != 0 &&
- index->table->space < SRV_LOG_SPACE_FIRST_ID) {
- index->table->file_unreadable = TRUE;
- fil_space_set_corrupt(index->table->space);
- }
-
- if ((srv_force_recovery == 0 || moves_up == FALSE)
- && srv_pass_corrupt_table <= 1) {
- ut_print_timestamp(stderr);
- buf_page_print(page_align(rec), 0,
- BUF_PAGE_PRINT_NO_CRASH);
- fprintf(stderr,
- "\nInnoDB: rec address %p,"
- " buf block fix count %lu\n",
- (void*) rec, (ulong)
- btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
- ->page.buf_fix_count);
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) page_get_page_no(page_align(rec)));
- dict_index_name_print(stderr, trx, index);
- fputs(". Run CHECK TABLE. You may need to\n"
- "InnoDB: restore from a backup, or"
- " dump + drop + reimport the table.\n",
- stderr);
- ut_ad(0);
- err = DB_CORRUPTION;
-
- goto lock_wait_or_error;
- } else {
- /* The user may be dumping a corrupt table. Jump
- over the corruption to recover as much as possible. */
-
- fprintf(stderr,
- "InnoDB: Index corruption: rec offs %lu"
- " next offs %lu, page no %lu,\n"
- "InnoDB: ",
- (ulong) page_offset(rec),
- (ulong) next_offs,
- (ulong) page_get_page_no(page_align(rec)));
- dict_index_name_print(stderr, trx, index);
- fputs(". We try to skip the rest of the page.\n",
- stderr);
-
- btr_pcur_move_to_last_on_page(pcur, &mtr);
-
- goto next_rec;
- }
- }
- /*-------------------------------------------------------------*/
-
- /* Calculate the 'offsets' associated with 'rec' */
-
- ut_ad(fil_page_get_type(btr_pcur_get_page(pcur)) == FIL_PAGE_INDEX);
- ut_ad(btr_page_get_index_id(btr_pcur_get_page(pcur)) == index->id);
-
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
-
- if (UNIV_UNLIKELY(srv_force_recovery > 0
- || (!index->table->is_readable() &&
- srv_pass_corrupt_table == 2))) {
- if (!rec_validate(rec, offsets)
- || !btr_index_rec_validate(rec, index, FALSE)) {
- char buf[MAX_FULL_NAME_LEN];
- ut_format_name(index->table->name, FALSE, buf, sizeof(buf));
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Index %s corrupted: rec offs " ULINTPF
- " next offs " ULINTPF
- ", page no " ULINTPF " ."
- " We try to skip the record.",
- buf,
- page_offset(rec),
- next_offs,
- page_get_page_no(page_align(rec)));
-
- goto next_rec;
- }
- }
-
- /* Note that we cannot trust the up_match value in the cursor at this
- place because we can arrive here after moving the cursor! Thus
- we have to recompare rec and search_tuple to determine if they
- match enough. */
-
- if (match_mode == ROW_SEL_EXACT) {
- /* Test if the index record matches completely to search_tuple
- in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
-
- /* fputs("Comparing rec and search tuple\n", stderr); */
-
- if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
-
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level
- <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the index
- record only if innodb_locks_unsafe_for_binlog
- option is not set or this session is not
- using a READ COMMITTED or lower isolation level. */
-
- err = sel_set_rec_lock(
- btr_pcur_get_block(pcur),
- rec, index, offsets,
- prebuilt->select_lock_type, LOCK_GAP,
- thr);
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- case DB_SUCCESS:
- break;
- default:
- goto lock_wait_or_error;
- }
- }
-
- btr_pcur_store_position(pcur, &mtr);
-
- /* The found record was not a match, but may be used
- as NEXT record (index_next). Set the relative position
- to BTR_PCUR_BEFORE, to reflect that the position of
- the persistent cursor is before the found/stored row
- (pcur->old_rec). */
- ut_ad(pcur->rel_pos == BTR_PCUR_ON);
- pcur->rel_pos = BTR_PCUR_BEFORE;
-
- err = DB_RECORD_NOT_FOUND;
-#if 0
- ut_print_name(stderr, trx, FALSE, index->name);
- fputs(" record not found 3\n", stderr);
-#endif
-
- goto normal_return;
- }
-
- } else if (match_mode == ROW_SEL_EXACT_PREFIX) {
-
- if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) {
-
- if (set_also_gap_locks
- && !(srv_locks_unsafe_for_binlog
- || trx->isolation_level
- <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* Try to place a gap lock on the index
- record only if innodb_locks_unsafe_for_binlog
- option is not set or this session is not
- using a READ COMMITTED or lower isolation level. */
-
- err = sel_set_rec_lock(
- btr_pcur_get_block(pcur),
- rec, index, offsets,
- prebuilt->select_lock_type, LOCK_GAP,
- thr);
-
- switch (err) {
- case DB_SUCCESS_LOCKED_REC:
- case DB_SUCCESS:
- break;
- default:
- goto lock_wait_or_error;
- }
- }
-
- btr_pcur_store_position(pcur, &mtr);
-
- /* The found record was not a match, but may be used
- as NEXT record (index_next). Set the relative position
- to BTR_PCUR_BEFORE, to reflect that the position of
- the persistent cursor is before the found/stored row
- (pcur->old_rec). */
- ut_ad(pcur->rel_pos == BTR_PCUR_ON);
- pcur->rel_pos = BTR_PCUR_BEFORE;
-
- err = DB_RECORD_NOT_FOUND;
-#if 0
- ut_print_name(stderr, trx, FALSE, index->name);
- fputs(" record not found 4\n", stderr);
-#endif
-
- goto normal_return;
- }
- }
-
- /* We are ready to look at a possible new index entry in the result
- set: the cursor is now placed on a user record */
-
- if (prebuilt->select_lock_type != LOCK_NONE) {
- /* Try to place a lock on the index record; note that delete
- marked records are a special case in a unique search. If there
- is a non-delete marked record, then it is enough to lock its
- existence with LOCK_REC_NOT_GAP. */
-
- /* If innodb_locks_unsafe_for_binlog option is used
- or this session is using a READ COMMITED isolation
- level we lock only the record, i.e., next-key locking is
- not used. */
-
- ulint lock_type;
-
- if (!set_also_gap_locks
- || srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED
- || (unique_search && !rec_get_deleted_flag(rec, comp))) {
-
- goto no_gap_lock;
- } else {
- lock_type = LOCK_ORDINARY;
- }
-
- /* If we are doing a 'greater or equal than a primary key
- value' search from a clustered index, and we find a record
- that has that exact primary key value, then there is no need
- to lock the gap before the record, because no insert in the
- gap can be in our search range. That is, no phantom row can
- appear that way.
-
- An example: if col1 is the primary key, the search is WHERE
- col1 >= 100, and we find a record where col1 = 100, then no
- need to lock the gap before that record. */
-
- if (index == clust_index
- && mode == PAGE_CUR_GE
- && direction == 0
- && dtuple_get_n_fields_cmp(search_tuple)
- == dict_index_get_n_unique(index)
- && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
-no_gap_lock:
- lock_type = LOCK_REC_NOT_GAP;
- }
-
- err = sel_set_rec_lock(btr_pcur_get_block(pcur),
- rec, index, offsets,
- prebuilt->select_lock_type,
- lock_type, thr);
-
- switch (err) {
- const rec_t* old_vers;
- case DB_SUCCESS_LOCKED_REC:
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level
- <= TRX_ISO_READ_COMMITTED) {
- /* Note that a record of
- prebuilt->index was locked. */
- prebuilt->new_rec_locks = 1;
- }
- err = DB_SUCCESS;
- case DB_SUCCESS:
- break;
- case DB_LOCK_WAIT:
- /* Never unlock rows that were part of a conflict. */
- prebuilt->new_rec_locks = 0;
-
- if (UNIV_LIKELY(prebuilt->row_read_type
- != ROW_READ_TRY_SEMI_CONSISTENT)
- || unique_search
- || index != clust_index) {
-
- goto lock_wait_or_error;
- }
-
- /* The following call returns 'offsets'
- associated with 'old_vers' */
- row_sel_build_committed_vers_for_mysql(
- clust_index, prebuilt, rec,
- &offsets, &heap, &old_vers, &mtr);
-
- /* Check whether it was a deadlock or not, if not
- a deadlock and the transaction had to wait then
- release the lock it is waiting on. */
-
- err = lock_trx_handle_wait(trx);
-
- switch (err) {
- case DB_SUCCESS:
- /* The lock was granted while we were
- searching for the last committed version.
- Do a normal locking read. */
-
- offsets = rec_get_offsets(
- rec, index, offsets, ULINT_UNDEFINED,
- &heap);
- goto locks_ok;
- case DB_DEADLOCK:
- goto lock_wait_or_error;
- case DB_LOCK_WAIT:
- err = DB_SUCCESS;
- break;
- default:
- ut_error;
- }
-
- if (old_vers == NULL) {
- /* The row was not yet committed */
-
- goto next_rec;
- }
-
- did_semi_consistent_read = TRUE;
- rec = old_vers;
- break;
- default:
-
- goto lock_wait_or_error;
- }
- } else {
- /* This is a non-locking consistent read: if necessary, fetch
- a previous version of the record */
-
- if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
-
- /* Do nothing: we let a non-locking SELECT read the
- latest version of the record */
-
- } else if (index == clust_index) {
-
- /* Fetch a previous version of the row if the current
- one is not visible in the snapshot; if we have a very
- high force recovery level set, we try to avoid crashes
- by skipping this lookup */
-
- if (UNIV_LIKELY(srv_force_recovery < 5)
- && !lock_clust_rec_cons_read_sees(
- rec, index, offsets, trx->read_view)) {
-
- rec_t* old_vers;
- /* The following call returns 'offsets'
- associated with 'old_vers' */
- err = row_sel_build_prev_vers_for_mysql(
- trx->read_view, clust_index,
- prebuilt, rec, &offsets, &heap,
- &old_vers, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto lock_wait_or_error;
- }
-
- if (old_vers == NULL) {
- /* The row did not exist yet in
- the read view */
-
- goto next_rec;
- }
-
- rec = old_vers;
- }
- } else {
- /* We are looking into a non-clustered index,
- and to get the right version of the record we
- have to look also into the clustered index: this
- is necessary, because we can only get the undo
- information via the clustered index record. */
-
- ut_ad(!dict_index_is_clust(index));
-
- if (!lock_sec_rec_cons_read_sees(
- rec, trx->read_view)) {
- /* We should look at the clustered index.
- However, as this is a non-locking read,
- we can skip the clustered index lookup if
- the condition does not match the secondary
- index entry. */
- switch (row_search_idx_cond_check(
- buf, prebuilt, rec, offsets)) {
- case ICP_NO_MATCH:
- goto next_rec;
- case ICP_OUT_OF_RANGE:
- err = DB_RECORD_NOT_FOUND;
- goto idx_cond_failed;
- case ICP_ABORTED_BY_USER:
- err = DB_SEARCH_ABORTED_BY_USER;
- goto idx_cond_failed;
- case ICP_ERROR:
- err = DB_ERROR;
- goto idx_cond_failed;
- case ICP_MATCH:
- goto requires_clust_rec;
- }
-
- ut_error;
- }
- }
- }
-
-locks_ok:
- /* NOTE that at this point rec can be an old version of a clustered
- index record built for a consistent read. We cannot assume after this
- point that rec is on a buffer pool page. Functions like
- page_rec_is_comp() cannot be used! */
-
- if (rec_get_deleted_flag(rec, comp)) {
-
- /* The record is delete-marked: we can skip it */
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE
- && !did_semi_consistent_read) {
-
- /* No need to keep a lock on a delete-marked record
- if we do not want to use next-key locking. */
-
- row_unlock_for_mysql(prebuilt, TRUE);
- }
-
- /* This is an optimization to skip setting the next key lock
- on the record that follows this delete-marked record. This
- optimization works because of the unique search criteria
- which precludes the presence of a range lock between this
- delete marked record and the record following it.
-
- For now this is applicable only to clustered indexes while
- doing a unique search except for HANDLER queries because
- HANDLER allows NEXT and PREV even in unique search on
- clustered index. There is scope for further optimization
- applicable to unique secondary indexes. Current behaviour is
- to widen the scope of a lock on an already delete marked record
- if the same record is deleted twice by the same transaction */
- if (index == clust_index && unique_search
- && !prebuilt->used_in_HANDLER) {
-
- err = DB_RECORD_NOT_FOUND;
-
- goto normal_return;
- }
-
- goto next_rec;
- }
-
- /* Check if the record matches the index condition. */
- switch (row_search_idx_cond_check(buf, prebuilt, rec, offsets)) {
- case ICP_NO_MATCH:
- if (did_semi_consistent_read) {
- row_unlock_for_mysql(prebuilt, TRUE);
- }
- goto next_rec;
- case ICP_ABORTED_BY_USER:
- err = DB_SEARCH_ABORTED_BY_USER;
- goto idx_cond_failed;
- case ICP_ERROR:
- err = DB_ERROR;
- goto idx_cond_failed;
- case ICP_OUT_OF_RANGE:
- err = DB_RECORD_NOT_FOUND;
- goto idx_cond_failed;
- case ICP_MATCH:
- break;
- }
-
- /* Get the clustered index record if needed, if we did not do the
- search using the clustered index... */
-
- use_clustered_index =
- (index != clust_index && prebuilt->need_to_access_clustered);
-
- if (use_clustered_index && srv_prefix_index_cluster_optimization
- && prebuilt->n_template <= index->n_fields) {
- /* ...but, perhaps avoid the clustered index lookup if
- all of the following are true:
- 1) all columns are in the secondary index
- 2) all values for columns that are prefix-only
- indexes are shorter than the prefix size
- This optimization can avoid many IOs for certain schemas.
- */
- bool row_contains_all_values = true;
- unsigned int i;
- for (i = 0; i < prebuilt->n_template; i++) {
- /* Condition (1) from above: is the field in the
- index (prefix or not)? */
- const mysql_row_templ_t* templ =
- prebuilt->mysql_template + i;
- ulint secondary_index_field_no =
- templ->rec_prefix_field_no;
- if (secondary_index_field_no == ULINT_UNDEFINED) {
- row_contains_all_values = false;
- break;
- }
- /* Condition (2) from above: if this is a
- prefix, is this row's value size shorter
- than the prefix? */
- if (templ->rec_field_is_prefix) {
- ulint record_size = rec_offs_nth_size(
- offsets,
- secondary_index_field_no);
- const dict_field_t *field =
- dict_index_get_nth_field(
- index,
- secondary_index_field_no);
- ut_a(field->prefix_len > 0);
- if (record_size >= field->prefix_len
- / templ->mbmaxlen) {
- row_contains_all_values = false;
- break;
- }
- }
- }
- /* If (1) and (2) were true for all columns above, use
- rec_prefix_field_no instead of rec_field_no, and skip
- the clustered lookup below. */
- if (row_contains_all_values) {
- for (i = 0; i < prebuilt->n_template; i++) {
- mysql_row_templ_t* templ =
- prebuilt->mysql_template + i;
- templ->rec_field_no =
- templ->rec_prefix_field_no;
- ut_a(templ->rec_field_no != ULINT_UNDEFINED);
- }
- use_clustered_index = false;
- srv_stats.n_sec_rec_cluster_reads_avoided.inc();
- }
- }
-
- if (use_clustered_index) {
-
-requires_clust_rec:
- ut_ad(index != clust_index);
- /* We use a 'goto' to the preceding label if a consistent
- read of a secondary index record requires us to look up old
- versions of the associated clustered index record. */
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- /* It was a non-clustered index and we must fetch also the
- clustered index record */
-
- mtr_has_extra_clust_latch = TRUE;
-
- /* The following call returns 'offsets' associated with
- 'clust_rec'. Note that 'clust_rec' can be an old version
- built for a consistent read. */
-
- err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
- thr, &clust_rec,
- &offsets, &heap, &mtr);
- switch (err) {
- case DB_SUCCESS:
- if (clust_rec == NULL) {
- /* The record did not exist in the read view */
- ut_ad(prebuilt->select_lock_type == LOCK_NONE);
-
- goto next_rec;
- }
- break;
- case DB_SUCCESS_LOCKED_REC:
- ut_a(clust_rec != NULL);
- if (srv_locks_unsafe_for_binlog
- || trx->isolation_level
- <= TRX_ISO_READ_COMMITTED) {
- /* Note that the clustered index record
- was locked. */
- prebuilt->new_rec_locks = 2;
- }
- err = DB_SUCCESS;
- break;
- default:
- goto lock_wait_or_error;
- }
-
- if (rec_get_deleted_flag(clust_rec, comp)) {
-
- /* The record is delete marked: we can skip it */
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && prebuilt->select_lock_type != LOCK_NONE) {
-
- /* No need to keep a lock on a delete-marked
- record if we do not want to use next-key
- locking. */
-
- row_unlock_for_mysql(prebuilt, TRUE);
- }
-
- goto next_rec;
- }
-
- result_rec = clust_rec;
- ut_ad(rec_offs_validate(result_rec, clust_index, offsets));
-
- if (prebuilt->idx_cond) {
- /* Convert the record to MySQL format. We were
- unable to do this in row_search_idx_cond_check(),
- because the condition is on the secondary index
- and the requested column is in the clustered index.
- We convert all fields, including those that
- may have been used in ICP, because the
- secondary index may contain a column prefix
- rather than the full column. Also, as noted
- in Bug #56680, the column in the secondary
- index may be in the wrong case, and the
- authoritative case is in result_rec, the
- appropriate version of the clustered index record. */
- if (!row_sel_store_mysql_rec(
- buf, prebuilt, result_rec,
- TRUE, clust_index, offsets)) {
- goto next_rec;
- }
- }
- } else {
- result_rec = rec;
- }
-
- /* We found a qualifying record 'result_rec'. At this point,
- 'offsets' are associated with 'result_rec'. */
-
- ut_ad(rec_offs_validate(result_rec,
- result_rec != rec ? clust_index : index,
- offsets));
- ut_ad(!rec_get_deleted_flag(result_rec, comp));
-
- /* At this point, the clustered index record is protected
- by a page latch that was acquired when pcur was positioned.
- The latch will not be released until mtr_commit(&mtr). */
-
- if ((match_mode == ROW_SEL_EXACT
- || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
- && prebuilt->select_lock_type == LOCK_NONE
- && !prebuilt->templ_contains_blob
- && !prebuilt->clust_index_was_generated
- && !prebuilt->used_in_HANDLER
- && !prebuilt->innodb_api
- && prebuilt->template_type
- != ROW_MYSQL_DUMMY_TEMPLATE
- && !prebuilt->in_fts_query) {
-
- /* Inside an update, for example, we do not cache rows,
- since we may use the cursor position to do the actual
- update, that is why we require ...lock_type == LOCK_NONE.
- Since we keep space in prebuilt only for the BLOBs of
- a single row, we cannot cache rows in the case there
- are BLOBs in the fields to be fetched. In HANDLER we do
- not cache rows because there the cursor is a scrollable
- cursor. */
-
- ut_a(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
-
- /* We only convert from InnoDB row format to MySQL row
- format when ICP is disabled. */
-
- if (!prebuilt->idx_cond) {
-
- /* We use next_buf to track the allocation of buffers
- where we store and enqueue the buffers for our
- pre-fetch optimisation.
-
- If next_buf == 0 then we store the converted record
- directly into the MySQL record buffer (buf). If it is
- != 0 then we allocate a pre-fetch buffer and store the
- converted record there.
-
- If the conversion fails and the MySQL record buffer
- was not written to then we reset next_buf so that
- we can re-use the MySQL record buffer in the next
- iteration. */
-
- next_buf = next_buf
- ? row_sel_fetch_last_buf(prebuilt) : buf;
-
- if (!row_sel_store_mysql_rec(
- next_buf, prebuilt, result_rec,
- result_rec != rec,
- result_rec != rec ? clust_index : index,
- offsets)) {
-
- if (next_buf == buf) {
- ut_a(prebuilt->n_fetch_cached == 0);
- next_buf = 0;
- }
-
- /* Only fresh inserts may contain incomplete
- externally stored columns. Pretend that such
- records do not exist. Such records may only be
- accessed at the READ UNCOMMITTED isolation
- level or when rolling back a recovered
- transaction. Rollback happens at a lower
- level, not here. */
- goto next_rec;
- }
-
- if (next_buf != buf) {
- row_sel_enqueue_cache_row_for_mysql(
- next_buf, prebuilt);
- }
- } else {
- row_sel_enqueue_cache_row_for_mysql(buf, prebuilt);
- }
-
- if (prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE) {
- goto next_rec;
- }
-
- } else {
- if (UNIV_UNLIKELY
- (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) {
- /* CHECK TABLE: fetch the row */
-
- if (result_rec != rec
- && !prebuilt->need_to_access_clustered) {
- /* We used 'offsets' for the clust
- rec, recalculate them for 'rec' */
- offsets = rec_get_offsets(rec, index, offsets,
- ULINT_UNDEFINED,
- &heap);
- result_rec = rec;
- }
-
- memcpy(buf + 4, result_rec
- - rec_offs_extra_size(offsets),
- rec_offs_size(offsets));
- mach_write_to_4(buf,
- rec_offs_extra_size(offsets) + 4);
- } else if (!prebuilt->idx_cond && !prebuilt->innodb_api) {
- /* The record was not yet converted to MySQL format. */
- if (!row_sel_store_mysql_rec(
- buf, prebuilt, result_rec,
- result_rec != rec,
- result_rec != rec ? clust_index : index,
- offsets)) {
- /* Only fresh inserts may contain
- incomplete externally stored
- columns. Pretend that such records do
- not exist. Such records may only be
- accessed at the READ UNCOMMITTED
- isolation level or when rolling back a
- recovered transaction. Rollback
- happens at a lower level, not here. */
- goto next_rec;
- }
- }
-
- if (prebuilt->clust_index_was_generated) {
- row_sel_store_row_id_to_prebuilt(
- prebuilt, result_rec,
- result_rec == rec ? index : clust_index,
- offsets);
- }
- }
-
- /* From this point on, 'offsets' are invalid. */
-
- /* We have an optimization to save CPU time: if this is a consistent
- read on a unique condition on the clustered index, then we do not
- store the pcur position, because any fetch next or prev will anyway
- return 'end of file'. Exceptions are locking reads and the MySQL
- HANDLER command where the user can move the cursor with PREV or NEXT
- even after a unique search. */
-
- err = DB_SUCCESS;
-
-idx_cond_failed:
- if (!unique_search
- || !dict_index_is_clust(index)
- || direction != 0
- || prebuilt->select_lock_type != LOCK_NONE
- || prebuilt->used_in_HANDLER
- || prebuilt->innodb_api) {
-
- /* Inside an update always store the cursor position */
-
- btr_pcur_store_position(pcur, &mtr);
-
- if (prebuilt->innodb_api) {
- prebuilt->innodb_api_rec = result_rec;
- }
- }
-
- goto normal_return;
-
-next_rec:
- /* Reset the old and new "did semi-consistent read" flags. */
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- did_semi_consistent_read = FALSE;
- prebuilt->new_rec_locks = 0;
-
- /*-------------------------------------------------------------*/
- /* PHASE 5: Move the cursor to the next index record */
-
- /* NOTE: For moves_up==FALSE, the mini-transaction will be
- committed and restarted every time when switching b-tree
- pages. For moves_up==TRUE in index condition pushdown, we can
- scan an entire secondary index tree within a single
- mini-transaction. As long as the prebuilt->idx_cond does not
- match, we do not need to consult the clustered index or
- return records to MySQL, and thus we can avoid repositioning
- the cursor. What prevents us from buffer-fixing all leaf pages
- within the mini-transaction is the btr_leaf_page_release()
- call in btr_pcur_move_to_next_page(). Only the leaf page where
- the cursor is positioned will remain buffer-fixed. */
-
- if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
- /* We must commit mtr if we are moving to the next
- non-clustered index record, because we could break the
- latching order if we would access a different clustered
- index page right away without releasing the previous. */
-
- btr_pcur_store_position(pcur, &mtr);
-
- mtr_commit(&mtr);
- mtr_has_extra_clust_latch = FALSE;
-
- mtr_start_trx(&mtr, trx);
- if (sel_restore_position_for_mysql(&same_user_rec,
- BTR_SEARCH_LEAF,
- pcur, moves_up, &mtr)) {
-#ifdef UNIV_SEARCH_DEBUG
- cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
- goto rec_loop;
- }
- }
-
- if (moves_up) {
- if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
-not_moved:
- btr_pcur_store_position(pcur, &mtr);
-
- if (match_mode != 0) {
- err = DB_RECORD_NOT_FOUND;
- } else {
- err = DB_END_OF_INDEX;
- }
-
- goto normal_return;
- }
- } else {
- if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
- goto not_moved;
- }
- }
-
-#ifdef UNIV_SEARCH_DEBUG
- cnt++;
-#endif /* UNIV_SEARCH_DEBUG */
-
- goto rec_loop;
-
-lock_wait_or_error:
- /* Reset the old and new "did semi-consistent read" flags. */
- if (UNIV_UNLIKELY(prebuilt->row_read_type
- == ROW_READ_DID_SEMI_CONSISTENT)) {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- did_semi_consistent_read = FALSE;
-
- /*-------------------------------------------------------------*/
-
- if (rec) {
- btr_pcur_store_position(pcur, &mtr);
- }
-
-lock_table_wait:
- mtr_commit(&mtr);
- mtr_has_extra_clust_latch = FALSE;
-
- trx->error_state = err;
-
- /* The following is a patch for MySQL */
-
- que_thr_stop_for_mysql(thr);
-
- thr->lock_state = QUE_THR_LOCK_ROW;
-
- if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
- /* It was a lock wait, and it ended */
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
- mtr_start_trx(&mtr, trx);
-
- /* Table lock waited, go try to obtain table lock
- again */
- if (table_lock_waited) {
- table_lock_waited = FALSE;
-
- goto wait_table_again;
- }
-
- sel_restore_position_for_mysql(&same_user_rec,
- BTR_SEARCH_LEAF, pcur,
- moves_up, &mtr);
-
- if ((srv_locks_unsafe_for_binlog
- || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
- && !same_user_rec) {
-
- /* Since we were not able to restore the cursor
- on the same user record, we cannot use
- row_unlock_for_mysql() to unlock any records, and
- we must thus reset the new rec lock info. Since
- in lock0lock.cc we have blocked the inheriting of gap
- X-locks, we actually do not have any new record locks
- set in this case.
-
- Note that if we were able to restore on the 'same'
- user record, it is still possible that we were actually
- waiting on a delete-marked record, and meanwhile
- it was removed by purge and inserted again by some
- other user. But that is no problem, because in
- rec_loop we will again try to set a lock, and
- new_rec_lock_info in trx will be right at the end. */
-
- prebuilt->new_rec_locks = 0;
- }
-
- mode = pcur->search_mode;
-
- goto rec_loop;
- }
-
- thr->lock_state = QUE_THR_LOCK_NOLOCK;
-
-#ifdef UNIV_SEARCH_DEBUG
- /* fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
- goto func_exit;
-
-normal_return:
- /*-------------------------------------------------------------*/
- que_thr_stop_for_mysql_no_error(thr, trx);
-
- mtr_commit(&mtr);
-
- if (prebuilt->idx_cond != 0) {
-
- /* When ICP is active we don't write to the MySQL buffer
- directly, only to buffers that are enqueued in the pre-fetch
- queue. We need to dequeue the first buffer and copy the contents
- to the record buffer that was passed in by MySQL. */
-
- if (prebuilt->n_fetch_cached > 0) {
- row_sel_dequeue_cached_row_for_mysql(buf, prebuilt);
- err = DB_SUCCESS;
- }
-
- } else if (next_buf != 0) {
-
- /* We may or may not have enqueued some buffers to the
- pre-fetch queue, but we definitely wrote to the record
- buffer passed to use by MySQL. */
-
- DEBUG_SYNC_C("row_search_cached_row");
- err = DB_SUCCESS;
- }
-
-#ifdef UNIV_SEARCH_DEBUG
- /* fputs("Using ", stderr);
- dict_index_name_print(stderr, index);
- fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
-#endif /* UNIV_SEARCH_DEBUG */
-
-func_exit:
- trx->op_info = "";
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- /* Set or reset the "did semi-consistent read" flag on return.
- The flag did_semi_consistent_read is set if and only if
- the record being returned was fetched with a semi-consistent read. */
- ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
- || !did_semi_consistent_read);
-
- if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
- if (UNIV_UNLIKELY(did_semi_consistent_read)) {
- prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
- } else {
- prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
- }
- }
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- DEBUG_SYNC_C("innodb_row_search_for_mysql_exit");
-
- return(err);
-}
-
-/*******************************************************************//**
-Checks if MySQL at the moment is allowed for this table to retrieve a
-consistent read result, or store it to the query cache.
-@return TRUE if storing or retrieving from the query cache is permitted */
-UNIV_INTERN
-ibool
-row_search_check_if_query_cache_permitted(
-/*======================================*/
- trx_t* trx, /*!< in: transaction object */
- const char* norm_name) /*!< in: concatenation of database name,
- '/' char, table name */
-{
- dict_table_t* table;
- ibool ret = FALSE;
-
- /* Disable query cache altogether for all tables if recovered XA
- transactions in prepared state exist. This is because we do not
- restore the table locks for those transactions and we may wrongly
- set ret=TRUE above if "lock_table_get_n_locks(table) == 0". See
- "Bug#14658648 XA ROLLBACK (DISTRIBUTED DATABASE) NOT WORKING WITH
- QUERY CACHE ENABLED".
- Read trx_sys->n_prepared_recovered_trx without mutex protection,
- not possible to end up with a torn read since n_prepared_recovered_trx
- is word size. */
- if (trx_sys->n_prepared_recovered_trx > 0) {
-
- return(FALSE);
- }
-
- table = dict_table_open_on_name(norm_name, FALSE, FALSE,
- DICT_ERR_IGNORE_NONE);
-
- if (table == NULL) {
-
- return(FALSE);
- }
-
- /* Start the transaction if it is not started yet */
-
- trx_start_if_not_started(trx);
-
- /* If there are locks on the table or some trx has invalidated the
- cache up to our trx id, then ret = FALSE.
- We do not check what type locks there are on the table, though only
- IX type locks actually would require ret = FALSE. */
-
- if (lock_table_get_n_locks(table) == 0
- && trx->id >= table->query_cache_inv_trx_id) {
-
- ret = TRUE;
-
- /* If the isolation level is high, assign a read view for the
- transaction if it does not yet have one */
-
- if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
- && !trx->read_view) {
-
- trx->read_view =
- read_view_open_now(trx->id,
- trx->prebuilt_view);
- trx->global_read_view = trx->read_view;
- }
- }
-
- dict_table_close(table, FALSE, FALSE);
-
- return(ret);
-}
-
-/*******************************************************************//**
-Read the AUTOINC column from the current row. If the value is less than
-0 and the type is not unsigned then we reset the value to 0.
-@return value read from the column */
-static
-ib_uint64_t
-row_search_autoinc_read_column(
-/*===========================*/
- dict_index_t* index, /*!< in: index to read from */
- const rec_t* rec, /*!< in: current rec */
- ulint col_no, /*!< in: column number */
- ulint mtype, /*!< in: column main type */
- ibool unsigned_type) /*!< in: signed or unsigned flag */
-{
- ulint len;
- const byte* data;
- ib_uint64_t value;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
-
- rec_offs_init(offsets_);
-
- offsets = rec_get_offsets(rec, index, offsets, col_no + 1, &heap);
-
- if (rec_offs_nth_sql_null(offsets, col_no)) {
- /* There is no non-NULL value in the auto-increment column. */
- value = 0;
- goto func_exit;
- }
-
- data = rec_get_nth_field(rec, offsets, col_no, &len);
-
- switch (mtype) {
- case DATA_INT:
- ut_a(len <= sizeof value);
- value = mach_read_int_type(data, len, unsigned_type);
- break;
-
- case DATA_FLOAT:
- ut_a(len == sizeof(float));
- value = (ib_uint64_t) mach_float_read(data);
- break;
-
- case DATA_DOUBLE:
- ut_a(len == sizeof(double));
- value = (ib_uint64_t) mach_double_read(data);
- break;
-
- default:
- ut_error;
- }
-
- if (!unsigned_type && (ib_int64_t) value < 0) {
- value = 0;
- }
-
-func_exit:
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
- return(value);
-}
-
-/** Get the maximum and non-delete-marked record in an index.
-@param[in] index index tree
-@param[in,out] mtr mini-transaction (may be committed and restarted)
-@return maximum record, page s-latched in mtr
-@retval NULL if there are no records, or if all of them are delete-marked */
-static
-const rec_t*
-row_search_get_max_rec(
- dict_index_t* index,
- mtr_t* mtr)
-{
- btr_pcur_t pcur;
- const rec_t* rec;
- /* Open at the high/right end (false), and init cursor */
- btr_pcur_open_at_index_side(
- false, index, BTR_SEARCH_LEAF, &pcur, true, 0, mtr);
-
- do {
- const page_t* page;
-
- page = btr_pcur_get_page(&pcur);
- rec = page_find_rec_max_not_deleted(page);
-
- if (page_rec_is_user_rec(rec)) {
- break;
- } else {
- rec = NULL;
- }
- btr_pcur_move_before_first_on_page(&pcur);
- } while (btr_pcur_move_to_prev(&pcur, mtr));
-
- btr_pcur_close(&pcur);
-
- return(rec);
-}
-
-/*******************************************************************//**
-Read the max AUTOINC value from an index.
-@return DB_SUCCESS if all OK else error code, DB_RECORD_NOT_FOUND if
-column name can't be found in index */
-UNIV_INTERN
-dberr_t
-row_search_max_autoinc(
-/*===================*/
- dict_index_t* index, /*!< in: index to search */
- const char* col_name, /*!< in: name of autoinc column */
- ib_uint64_t* value) /*!< out: AUTOINC value read */
-{
- dict_field_t* dfield = dict_index_get_nth_field(index, 0);
- dberr_t error = DB_SUCCESS;
- *value = 0;
-
- if (strcmp(col_name, dfield->name) != 0) {
- error = DB_RECORD_NOT_FOUND;
- } else {
- mtr_t mtr;
- const rec_t* rec;
-
- mtr_start(&mtr);
-
- rec = row_search_get_max_rec(index, &mtr);
-
- if (rec != NULL) {
- ibool unsigned_type = (
- dfield->col->prtype & DATA_UNSIGNED);
-
- *value = row_search_autoinc_read_column(
- index, rec, 0,
- dfield->col->mtype, unsigned_type);
- }
-
- mtr_commit(&mtr);
- }
-
- return(error);
-}
diff --git a/storage/xtradb/row/row0uins.cc b/storage/xtradb/row/row0uins.cc
deleted file mode 100644
index f14a4ef9bcf..00000000000
--- a/storage/xtradb/row/row0uins.cc
+++ /dev/null
@@ -1,475 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0uins.cc
-Fresh insert undo
-
-Created 2/25/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0uins.h"
-
-#ifdef UNIV_NONINL
-#include "row0uins.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "dict0crea.h"
-#include "trx0undo.h"
-#include "trx0roll.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "row0undo.h"
-#include "row0vers.h"
-#include "row0log.h"
-#include "trx0trx.h"
-#include "trx0rec.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "ibuf0ibuf.h"
-#include "log0log.h"
-
-/*************************************************************************
-IMPORTANT NOTE: Any operation that generates redo MUST check that there
-is enough space in the redo log before for that operation. This is
-done by calling log_free_check(). The reason for checking the
-availability of the redo log space before the start of the operation is
-that we MUST not hold any synchonization objects when performing the
-check.
-If you make a change in this module make sure that no codepath is
-introduced where a call to log_free_check() is bypassed. */
-
-/***************************************************************//**
-Removes a clustered index record. The pcur in node was positioned on the
-record, now it is detached.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_ins_remove_clust_rec(
-/*==========================*/
- undo_node_t* node) /*!< in: undo node */
-{
- btr_cur_t* btr_cur;
- ibool success;
- dberr_t err;
- ulint n_tries = 0;
- mtr_t mtr;
- dict_index_t* index = node->pcur.btr_cur.index;
- bool online;
-
- ut_ad(dict_index_is_clust(index));
-
- mtr_start(&mtr);
-
- /* This is similar to row_undo_mod_clust(). The DDL thread may
- already have copied this row from the log to the new table.
- We must log the removal, so that the row will be correctly
- purged. However, we can log the removal out of sync with the
- B-tree modification. */
-
- online = dict_index_is_online_ddl(index);
- if (online) {
- ut_ad(node->trx->dict_operation_lock_mode
- != RW_X_LATCH);
- ut_ad(node->table->id != DICT_INDEXES_ID);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- }
-
- success = btr_pcur_restore_position(
- online
- ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
- : BTR_MODIFY_LEAF, &node->pcur, &mtr);
- ut_a(success);
-
- btr_cur = btr_pcur_get_btr_cur(&node->pcur);
-
- ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur), btr_cur->index)
- == node->trx->id);
-
- if (online && dict_index_is_online_ddl(index)) {
- const rec_t* rec = btr_cur_get_rec(btr_cur);
- mem_heap_t* heap = NULL;
- const ulint* offsets = rec_get_offsets(
- rec, index, NULL, ULINT_UNDEFINED, &heap);
- row_log_table_delete(rec, index, offsets, NULL);
- mem_heap_free(heap);
- }
-
- if (node->table->id == DICT_INDEXES_ID) {
- ut_ad(!online);
- ut_ad(node->trx->dict_operation_lock_mode == RW_X_LATCH);
-
- /* Drop the index tree associated with the row in
- SYS_INDEXES table: */
-
- dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr);
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- success = btr_pcur_restore_position(
- BTR_MODIFY_LEAF, &node->pcur, &mtr);
- ut_a(success);
- }
-
- if (btr_cur_optimistic_delete(btr_cur, 0, &mtr)) {
- err = DB_SUCCESS;
- goto func_exit;
- }
-
- btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
-retry:
- /* If did not succeed, try pessimistic descent to tree */
- mtr_start(&mtr);
-
- success = btr_pcur_restore_position(BTR_MODIFY_TREE,
- &(node->pcur), &mtr);
- ut_a(success);
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- trx_is_recv(node->trx)
- ? RB_RECOVERY
- : RB_NORMAL, &mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (err == DB_OUT_OF_FILE_SPACE
- && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
-func_exit:
- btr_pcur_commit_specify_mtr(&node->pcur, &mtr);
- trx_undo_rec_release(node->trx, node->undo_no);
-
- return(err);
-}
-
-/***************************************************************//**
-Removes a secondary index entry if found.
-@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_ins_remove_sec_low(
-/*========================*/
- ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE,
- depending on whether we wish optimistic or
- pessimistic descent down the index tree */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry to remove */
-{
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- dberr_t err = DB_SUCCESS;
- mtr_t mtr;
- enum row_search_result search_result;
-
- log_free_check();
-
- mtr_start(&mtr);
-
- if (mode == BTR_MODIFY_LEAF) {
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
- }
-
- if (row_log_online_op_try(index, entry, 0)) {
- goto func_exit_no_pcur;
- }
-
- search_result = row_search_index_entry(index, entry, mode,
- &pcur, &mtr);
-
- switch (search_result) {
- case ROW_NOT_FOUND:
- goto func_exit;
- case ROW_FOUND:
- break;
- case ROW_BUFFERED:
- case ROW_NOT_DELETED_REF:
- /* These are invalid outcomes, because the mode passed
- to row_search_index_entry() did not include any of the
- flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
- ut_error;
- }
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- if (mode != BTR_MODIFY_TREE) {
- err = btr_cur_optimistic_delete(btr_cur, 0, &mtr)
- ? DB_SUCCESS : DB_FAIL;
- } else {
- /* No need to distinguish RB_RECOVERY here, because we
- are deleting a secondary index record: the distinction
- between RB_NORMAL and RB_RECOVERY only matters when
- deleting a record that contains externally stored
- columns. */
- ut_ad(!dict_index_is_clust(index));
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- RB_NORMAL, &mtr);
- }
-func_exit:
- btr_pcur_close(&pcur);
-func_exit_no_pcur:
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/***************************************************************//**
-Removes a secondary index entry from the index if found. Tries first
-optimistic, then pessimistic descent down the tree.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_ins_remove_sec(
-/*====================*/
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry to insert */
-{
- dberr_t err;
- ulint n_tries = 0;
-
- /* Try first optimistic descent to the B-tree */
-
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry);
-
- if (err == DB_SUCCESS) {
-
- return(err);
- }
-
- /* Try then pessimistic descent to the B-tree */
-retry:
- err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
-
- if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) {
-
- n_tries++;
-
- os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME);
-
- goto retry;
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Parses the row reference and other info in a fresh insert undo record. */
-static
-void
-row_undo_ins_parse_undo_rec(
-/*========================*/
- undo_node_t* node, /*!< in/out: row undo node */
- ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */
-{
- dict_index_t* clust_index;
- byte* ptr;
- undo_no_t undo_no;
- table_id_t table_id;
- ulint type;
- ulint dummy;
- bool dummy_extern;
-
- ut_ad(node);
-
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
- &dummy_extern, &undo_no, &table_id);
- ut_ad(type == TRX_UNDO_INSERT_REC);
- node->rec_type = type;
-
- node->update = NULL;
- node->table = dict_table_open_on_id(
- table_id, dict_locked, DICT_TABLE_OP_NORMAL);
-
- /* Skip the UNDO if we can't find the table or the .ibd file. */
- if (UNIV_UNLIKELY(node->table == NULL)) {
- } else if (UNIV_UNLIKELY(node->table->file_unreadable)) {
-close_table:
- dict_table_close(node->table, dict_locked, FALSE);
- node->table = NULL;
- } else {
- clust_index = dict_table_get_first_index(node->table);
-
- if (clust_index != NULL) {
- trx_undo_rec_get_row_ref(
- ptr, clust_index, &node->ref, node->heap);
-
- if (!row_undo_search_clust_to_pcur(node)) {
- goto close_table;
- }
-
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: table ");
- ut_print_name(stderr, node->trx, TRUE,
- node->table->name);
- fprintf(stderr, " has no indexes, "
- "ignoring the table\n");
- goto close_table;
- }
- }
-}
-
-/***************************************************************//**
-Removes secondary index records.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_ins_remove_sec_rec(
-/*========================*/
- undo_node_t* node) /*!< in/out: row undo node */
-{
- dberr_t err = DB_SUCCESS;
- dict_index_t* index = node->index;
- mem_heap_t* heap;
-
- heap = mem_heap_create(1024);
-
- while (index != NULL) {
- dtuple_t* entry;
-
- if (index->type & DICT_FTS) {
- dict_table_next_uncorrupted_index(index);
- continue;
- }
-
- /* An insert undo record TRX_UNDO_INSERT_REC will
- always contain all fields of the index. It does not
- matter if any indexes were created afterwards; all
- index entries can be reconstructed from the row. */
- entry = row_build_index_entry(
- node->row, node->ext, index, heap);
- if (UNIV_UNLIKELY(!entry)) {
- /* The database must have crashed after
- inserting a clustered index record but before
- writing all the externally stored columns of
- that record, or a statement is being rolled
- back because an error occurred while storing
- off-page columns.
-
- Because secondary index entries are inserted
- after the clustered index record, we may
- assume that the secondary index record does
- not exist. */
- } else {
- err = row_undo_ins_remove_sec(index, entry);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
- goto func_exit;
- }
- }
-
- mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(index);
- }
-
-func_exit:
- node->index = index;
- mem_heap_free(heap);
- return(err);
-}
-
-/***********************************************************//**
-Undoes a fresh insert of a row to a table. A fresh insert means that
-the same clustered index unique key did not have any record, even delete
-marked, at the time of the insert. InnoDB is eager in a rollback:
-if it figures out that an index record will be removed in the purge
-anyway, it will remove it in the rollback.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-UNIV_INTERN
-dberr_t
-row_undo_ins(
-/*=========*/
- undo_node_t* node) /*!< in: row undo node */
-{
- dberr_t err;
- ibool dict_locked;
-
- ut_ad(node->state == UNDO_NODE_INSERT);
-
- dict_locked = node->trx->dict_operation_lock_mode == RW_X_LATCH;
-
- row_undo_ins_parse_undo_rec(node, dict_locked);
-
- if (node->table == NULL) {
- trx_undo_rec_release(node->trx, node->undo_no);
-
- return(DB_SUCCESS);
- }
-
- /* Iterate over all the indexes and undo the insert.*/
-
- node->index = dict_table_get_first_index(node->table);
- ut_ad(dict_index_is_clust(node->index));
- /* Skip the clustered index (the first index) */
- node->index = dict_table_get_next_index(node->index);
-
- dict_table_skip_corrupt_index(node->index);
-
- err = row_undo_ins_remove_sec_rec(node);
-
- if (err == DB_SUCCESS) {
-
- log_free_check();
-
- if (node->table->id == DICT_INDEXES_ID) {
-
- if (!dict_locked) {
- mutex_enter(&dict_sys->mutex);
- }
- }
-
- // FIXME: We need to update the dict_index_t::space and
- // page number fields too.
- err = row_undo_ins_remove_clust_rec(node);
-
- if (node->table->id == DICT_INDEXES_ID
- && !dict_locked) {
-
- mutex_exit(&dict_sys->mutex);
- }
- }
-
- dict_table_close(node->table, dict_locked, FALSE);
-
- node->table = NULL;
-
- return(err);
-}
diff --git a/storage/xtradb/row/row0umod.cc b/storage/xtradb/row/row0umod.cc
deleted file mode 100644
index 8deba4f00a5..00000000000
--- a/storage/xtradb/row/row0umod.cc
+++ /dev/null
@@ -1,1168 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0umod.cc
-Undo modify of a row
-
-Created 2/27/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0umod.h"
-
-#ifdef UNIV_NONINL
-#include "row0umod.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "trx0undo.h"
-#include "trx0roll.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "row0undo.h"
-#include "row0vers.h"
-#include "row0log.h"
-#include "trx0trx.h"
-#include "trx0rec.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "log0log.h"
-
-/* Considerations on undoing a modify operation.
-(1) Undoing a delete marking: all index records should be found. Some of
-them may have delete mark already FALSE, if the delete mark operation was
-stopped underway, or if the undo operation ended prematurely because of a
-system crash.
-(2) Undoing an update of a delete unmarked record: the newer version of
-an updated secondary index entry should be removed if no prior version
-of the clustered index record requires its existence. Otherwise, it should
-be delete marked.
-(3) Undoing an update of a delete marked record. In this kind of update a
-delete marked clustered index record was delete unmarked and possibly also
-some of its fields were changed. Now, it is possible that the delete marked
-version has become obsolete at the time the undo is started. */
-
-/*************************************************************************
-IMPORTANT NOTE: Any operation that generates redo MUST check that there
-is enough space in the redo log before for that operation. This is
-done by calling log_free_check(). The reason for checking the
-availability of the redo log space before the start of the operation is
-that we MUST not hold any synchonization objects when performing the
-check.
-If you make a change in this module make sure that no codepath is
-introduced where a call to log_free_check() is bypassed. */
-
-/***********************************************************//**
-Undoes a modify in a clustered index record.
-@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_clust_low(
-/*===================*/
- undo_node_t* node, /*!< in: row undo node */
- ulint** offsets,/*!< out: rec_get_offsets() on the record */
- mem_heap_t** offsets_heap,
- /*!< in/out: memory heap that can be emptied */
- mem_heap_t* heap, /*!< in/out: memory heap */
- const dtuple_t**rebuilt_old_pk,
- /*!< out: row_log_table_get_pk()
- before the update, or NULL if
- the table is not being rebuilt online or
- the PRIMARY KEY definition does not change */
- byte* sys, /*!< out: DB_TRX_ID, DB_ROLL_PTR
- for row_log_table_delete() */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr, /*!< in: mtr; must be committed before
- latching any further pages */
- ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- dberr_t err;
-#ifdef UNIV_DEBUG
- ibool success;
-#endif /* UNIV_DEBUG */
-
- pcur = &node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
-#ifdef UNIV_DEBUG
- success =
-#endif /* UNIV_DEBUG */
- btr_pcur_restore_position(mode, pcur, mtr);
-
- ut_ad(success);
- ut_ad(rec_get_trx_id(btr_cur_get_rec(btr_cur),
- btr_cur_get_index(btr_cur))
- == thr_get_trx(thr)->id);
-
- if (mode != BTR_MODIFY_LEAF
- && dict_index_is_online_ddl(btr_cur_get_index(btr_cur))) {
- *rebuilt_old_pk = row_log_table_get_pk(
- btr_cur_get_rec(btr_cur),
- btr_cur_get_index(btr_cur), NULL, sys, &heap);
- } else {
- *rebuilt_old_pk = NULL;
- }
-
- if (mode != BTR_MODIFY_TREE) {
- ut_ad((mode & ~BTR_ALREADY_S_LATCHED) == BTR_MODIFY_LEAF);
-
- err = btr_cur_optimistic_update(
- BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG
- | BTR_KEEP_SYS_FLAG,
- btr_cur, offsets, offsets_heap,
- node->update, node->cmpl_info,
- thr, thr_get_trx(thr)->id, mtr);
- } else {
- big_rec_t* dummy_big_rec;
-
- err = btr_cur_pessimistic_update(
- BTR_NO_LOCKING_FLAG
- | BTR_NO_UNDO_LOG_FLAG
- | BTR_KEEP_SYS_FLAG,
- btr_cur, offsets, offsets_heap, heap,
- &dummy_big_rec, node->update,
- node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
-
- ut_a(!dummy_big_rec);
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Purges a clustered index record after undo if possible.
-This is attempted when the record was inserted by updating a
-delete-marked record and there no longer exist transactions
-that would see the delete-marked record.
-@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_remove_clust_low(
-/*==========================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr, /*!< in/out: mini-transaction */
- ulint mode) /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
-{
- btr_cur_t* btr_cur;
- dberr_t err;
- ulint trx_id_offset;
-
- ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
-
- /* Find out if the record has been purged already
- or if we can remove it. */
-
- if (!btr_pcur_restore_position(mode, &node->pcur, mtr)
- || row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
-
- return(DB_SUCCESS);
- }
-
- btr_cur = btr_pcur_get_btr_cur(&node->pcur);
-
- trx_id_offset = btr_cur_get_index(btr_cur)->trx_id_offset;
-
- if (!trx_id_offset) {
- mem_heap_t* heap = NULL;
- ulint trx_id_col;
- const ulint* offsets;
- ulint len;
-
- trx_id_col = dict_index_get_sys_col_pos(
- btr_cur_get_index(btr_cur), DATA_TRX_ID);
- ut_ad(trx_id_col > 0);
- ut_ad(trx_id_col != ULINT_UNDEFINED);
-
- offsets = rec_get_offsets(
- btr_cur_get_rec(btr_cur), btr_cur_get_index(btr_cur),
- NULL, trx_id_col + 1, &heap);
-
- trx_id_offset = rec_get_nth_field_offs(
- offsets, trx_id_col, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
- mem_heap_free(heap);
- }
-
- if (trx_read_trx_id(btr_cur_get_rec(btr_cur) + trx_id_offset)
- != node->new_trx_id) {
- /* The record must have been purged and then replaced
- with a different one. */
- return(DB_SUCCESS);
- }
-
- /* We are about to remove an old, delete-marked version of the
- record that may have been delete-marked by a different transaction
- than the rolling-back one. */
- ut_ad(rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
- dict_table_is_comp(node->table)));
-
- if (mode == BTR_MODIFY_LEAF) {
- err = btr_cur_optimistic_delete(btr_cur, 0, mtr)
- ? DB_SUCCESS
- : DB_FAIL;
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
-
- /* This operation is analogous to purge, we can free also
- inherited externally stored fields */
-
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- thr_is_recv(thr)
- ? RB_RECOVERY_PURGE_REC
- : RB_NONE, mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Undoes a modify in a clustered index record. Sets also the node state for the
-next round of undo.
-@return DB_SUCCESS or error code: we may run out of file space */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_clust(
-/*===============*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- btr_pcur_t* pcur;
- mtr_t mtr;
- dberr_t err;
- dict_index_t* index;
- bool online;
-
- ut_ad(thr_get_trx(thr) == node->trx);
- ut_ad(node->trx->dict_operation_lock_mode);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)
- || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
-
- log_free_check();
- pcur = &node->pcur;
- index = btr_cur_get_index(btr_pcur_get_btr_cur(pcur));
-
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- online = dict_index_is_online_ddl(index);
- if (online) {
- ut_ad(node->trx->dict_operation_lock_mode != RW_X_LATCH);
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- }
-
- mem_heap_t* heap = mem_heap_create(1024);
- mem_heap_t* offsets_heap = NULL;
- ulint* offsets = NULL;
- const dtuple_t* rebuilt_old_pk;
- byte sys[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
-
- /* Try optimistic processing of the record, keeping changes within
- the index page */
-
- err = row_undo_mod_clust_low(node, &offsets, &offsets_heap,
- heap, &rebuilt_old_pk, sys,
- thr, &mtr, online
- ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
- : BTR_MODIFY_LEAF);
-
- if (err != DB_SUCCESS) {
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- /* We may have to modify tree structure: do a pessimistic
- descent down the index tree */
-
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- err = row_undo_mod_clust_low(
- node, &offsets, &offsets_heap,
- heap, &rebuilt_old_pk, sys,
- thr, &mtr, BTR_MODIFY_TREE);
- ut_ad(err == DB_SUCCESS || err == DB_OUT_OF_FILE_SPACE);
- }
-
- /* Online rebuild cannot be initiated while we are holding
- dict_operation_lock and index->lock. (It can be aborted.) */
- ut_ad(online || !dict_index_is_online_ddl(index));
-
- if (err == DB_SUCCESS && online) {
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&index->lock, RW_LOCK_SHARED)
- || rw_lock_own(&index->lock, RW_LOCK_EX));
-#endif /* UNIV_SYNC_DEBUG */
- switch (node->rec_type) {
- case TRX_UNDO_DEL_MARK_REC:
- row_log_table_insert(
- btr_pcur_get_rec(pcur), index, offsets);
- break;
- case TRX_UNDO_UPD_EXIST_REC:
- row_log_table_update(
- btr_pcur_get_rec(pcur), index, offsets,
- rebuilt_old_pk);
- break;
- case TRX_UNDO_UPD_DEL_REC:
- row_log_table_delete(
- btr_pcur_get_rec(pcur), index, offsets, sys);
- break;
- default:
- ut_ad(0);
- break;
- }
- }
-
- /**
- * when scrubbing, and records gets cleared,
- * the transaction id is not present afterwards.
- * this is safe as: since the record is on free-list
- * it can be reallocated at any time after this mtr-commits
- * which is just below
- */
- ut_ad(srv_immediate_scrub_data_uncompressed ||
- rec_get_trx_id(btr_pcur_get_rec(pcur), index) == node->new_trx_id);
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
-
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- /* It is not necessary to call row_log_table,
- because the record is delete-marked and would thus
- be omitted from the rebuilt copy of the table. */
- err = row_undo_mod_remove_clust_low(
- node, thr, &mtr, BTR_MODIFY_LEAF);
- if (err != DB_SUCCESS) {
- btr_pcur_commit_specify_mtr(pcur, &mtr);
-
- /* We may have to modify tree structure: do a
- pessimistic descent down the index tree */
-
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- err = row_undo_mod_remove_clust_low(node, thr, &mtr,
- BTR_MODIFY_TREE);
-
- ut_ad(err == DB_SUCCESS
- || err == DB_OUT_OF_FILE_SPACE);
- }
-
- btr_pcur_commit_specify_mtr(pcur, &mtr);
- }
-
- node->state = UNDO_NODE_FETCH_NEXT;
-
- trx_undo_rec_release(node->trx, node->undo_no);
-
- if (offsets_heap) {
- mem_heap_free(offsets_heap);
- }
- mem_heap_free(heap);
- return(err);
-}
-
-/***********************************************************//**
-Delete marks or removes a secondary index entry if found.
-@return DB_SUCCESS, DB_FAIL, or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_del_mark_or_remove_sec_low(
-/*====================================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry */
- ulint mode) /*!< in: latch mode BTR_MODIFY_LEAF or
- BTR_MODIFY_TREE */
-{
- btr_pcur_t pcur;
- btr_cur_t* btr_cur;
- ibool success;
- ibool old_has;
- dberr_t err = DB_SUCCESS;
- mtr_t mtr;
- mtr_t mtr_vers;
- enum row_search_result search_result;
-
- log_free_check();
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
- if (mode == BTR_MODIFY_LEAF) {
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
- }
-
- if (row_log_online_op_try(index, entry, 0)) {
- goto func_exit_no_pcur;
- }
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
- ut_ad(!dict_index_is_online_ddl(index));
- }
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- search_result = row_search_index_entry(index, entry, mode,
- &pcur, &mtr);
-
- switch (UNIV_EXPECT(search_result, ROW_FOUND)) {
- case ROW_NOT_FOUND:
- /* In crash recovery, the secondary index record may
- be missing if the UPDATE did not have time to insert
- the secondary index records before the crash. When we
- are undoing that UPDATE in crash recovery, the record
- may be missing.
-
- In normal processing, if an update ends in a deadlock
- before it has inserted all updated secondary index
- records, then the undo will not find those records. */
- goto func_exit;
- case ROW_FOUND:
- break;
- case ROW_BUFFERED:
- case ROW_NOT_DELETED_REF:
- /* These are invalid outcomes, because the mode passed
- to row_search_index_entry() did not include any of the
- flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
- ut_error;
- }
-
- /* We should remove the index record if no prior version of the row,
- which cannot be purged yet, requires its existence. If some requires,
- we should delete mark the record. */
-
- mtr_start_trx(&mtr_vers, thr_get_trx(thr));
-
- success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur),
- &mtr_vers);
- ut_a(success);
-
- old_has = row_vers_old_has_index_entry(FALSE,
- btr_pcur_get_rec(&(node->pcur)),
- &mtr_vers, index, entry);
- if (old_has) {
- err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
- btr_cur, TRUE, thr, &mtr);
- ut_ad(err == DB_SUCCESS);
- } else {
- /* Remove the index record */
-
- if (mode != BTR_MODIFY_TREE) {
- success = btr_cur_optimistic_delete(btr_cur, 0, &mtr);
- if (success) {
- err = DB_SUCCESS;
- } else {
- err = DB_FAIL;
- }
- } else {
- /* No need to distinguish RB_RECOVERY_PURGE here,
- because we are deleting a secondary index record:
- the distinction between RB_NORMAL and
- RB_RECOVERY_PURGE only matters when deleting a
- record that contains externally stored
- columns. */
- ut_ad(!dict_index_is_clust(index));
- btr_cur_pessimistic_delete(&err, FALSE, btr_cur, 0,
- RB_NORMAL, &mtr);
-
- /* The delete operation may fail if we have little
- file space left: TODO: easiest to crash the database
- and restart with more file space */
- }
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers);
-
-func_exit:
- btr_pcur_close(&pcur);
-func_exit_no_pcur:
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/***********************************************************//**
-Delete marks or removes a secondary index entry if found.
-NOTE that if we updated the fields of a delete-marked secondary index record
-so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot
-return to the original values because we do not know them. But this should
-not cause problems because in row0sel.cc, in queries we always retrieve the
-clustered index record or an earlier version of it, if the secondary index
-record through which we do the search is delete-marked.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_del_mark_or_remove_sec(
-/*================================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry */
-{
- dberr_t err;
-
- err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
- entry, BTR_MODIFY_LEAF);
- if (err == DB_SUCCESS) {
-
- return(err);
- }
-
- err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index,
- entry, BTR_MODIFY_TREE);
- return(err);
-}
-
-/***********************************************************//**
-Delete unmarks a secondary index entry which must be found. It might not be
-delete-marked at the moment, but it does not harm to unmark it anyway. We also
-need to update the fields of the secondary index record if we updated its
-fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'.
-@retval DB_SUCCESS on success
-@retval DB_FAIL if BTR_MODIFY_TREE should be tried
-@retval DB_OUT_OF_FILE_SPACE when running out of tablespace
-@retval DB_DUPLICATE_KEY if the value was missing
- and an insert would lead to a duplicate exists */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_del_unmark_sec_and_undo_update(
-/*========================================*/
- ulint mode, /*!< in: search mode: BTR_MODIFY_LEAF or
- BTR_MODIFY_TREE */
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: index */
- dtuple_t* entry) /*!< in: index entry */
-{
- btr_pcur_t pcur;
- btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur);
- upd_t* update;
- dberr_t err = DB_SUCCESS;
- big_rec_t* dummy_big_rec;
- mtr_t mtr;
- trx_t* trx = thr_get_trx(thr);
- const ulint flags
- = BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG;
- enum row_search_result search_result;
-
- ut_ad(trx->id);
-
- log_free_check();
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
- if (mode == BTR_MODIFY_LEAF) {
- mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED;
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- } else {
- ut_ad(mode == BTR_MODIFY_TREE);
- mtr_x_lock(dict_index_get_lock(index), &mtr);
- }
-
- if (row_log_online_op_try(index, entry, trx->id)) {
- goto func_exit_no_pcur;
- }
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
- ut_ad(!dict_index_is_online_ddl(index));
- }
-
- search_result = row_search_index_entry(index, entry, mode,
- &pcur, &mtr);
-
- switch (search_result) {
- mem_heap_t* heap;
- mem_heap_t* offsets_heap;
- ulint* offsets;
- case ROW_BUFFERED:
- case ROW_NOT_DELETED_REF:
- /* These are invalid outcomes, because the mode passed
- to row_search_index_entry() did not include any of the
- flags BTR_INSERT, BTR_DELETE, or BTR_DELETE_MARK. */
- ut_error;
- case ROW_NOT_FOUND:
- if (*index->name != TEMP_INDEX_PREFIX) {
- /* During online secondary index creation, it
- is possible that MySQL is waiting for a
- meta-data lock upgrade before invoking
- ha_innobase::commit_inplace_alter_table()
- while this ROLLBACK is executing. InnoDB has
- finished building the index, but it does not
- yet exist in MySQL. In this case, we suppress
- the printout to the error log. */
- fputs("InnoDB: error in sec index entry del undo in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, btr_pcur_get_rec(&pcur), index);
- putc('\n', stderr);
- trx_print(stderr, trx, 0);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "record in index %s was not found"
- " on rollback, trying to insert",
- index->name);
- }
-
- if (btr_cur->up_match >= dict_index_get_n_unique(index)
- || btr_cur->low_match >= dict_index_get_n_unique(index)) {
- if (*index->name != TEMP_INDEX_PREFIX) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "record in index %s was not found on"
- " rollback, and a duplicate exists",
- index->name);
- }
- err = DB_DUPLICATE_KEY;
- break;
- }
-
- /* Insert the missing record that we were trying to
- delete-unmark. */
- big_rec_t* big_rec;
- rec_t* insert_rec;
- offsets = NULL;
- offsets_heap = NULL;
-
- err = btr_cur_optimistic_insert(
- flags, btr_cur, &offsets, &offsets_heap,
- entry, &insert_rec, &big_rec,
- 0, thr, &mtr);
- ut_ad(!big_rec);
-
- if (err == DB_FAIL && mode == BTR_MODIFY_TREE) {
- err = btr_cur_pessimistic_insert(
- flags, btr_cur,
- &offsets, &offsets_heap,
- entry, &insert_rec, &big_rec,
- 0, thr, &mtr);
- /* There are no off-page columns in
- secondary indexes. */
- ut_ad(!big_rec);
- }
-
- if (err == DB_SUCCESS) {
- page_update_max_trx_id(
- btr_cur_get_block(btr_cur),
- btr_cur_get_page_zip(btr_cur),
- trx->id, &mtr);
- }
-
- if (offsets_heap) {
- mem_heap_free(offsets_heap);
- }
-
- break;
- case ROW_FOUND:
- err = btr_cur_del_mark_set_sec_rec(
- BTR_NO_LOCKING_FLAG,
- btr_cur, FALSE, thr, &mtr);
- ut_a(err == DB_SUCCESS);
- heap = mem_heap_create(
- sizeof(upd_t)
- + dtuple_get_n_fields(entry) * sizeof(upd_field_t));
- offsets_heap = NULL;
- offsets = rec_get_offsets(
- btr_cur_get_rec(btr_cur),
- index, NULL, ULINT_UNDEFINED, &offsets_heap);
- update = row_upd_build_sec_rec_difference_binary(
- btr_cur_get_rec(btr_cur), index, offsets, entry, heap);
- if (upd_get_n_fields(update) == 0) {
-
- /* Do nothing */
-
- } else if (mode != BTR_MODIFY_TREE) {
- /* Try an optimistic updating of the record, keeping
- changes within the page */
-
- /* TODO: pass offsets, not &offsets */
- err = btr_cur_optimistic_update(
- flags, btr_cur, &offsets, &offsets_heap,
- update, 0, thr, thr_get_trx(thr)->id, &mtr);
- switch (err) {
- case DB_OVERFLOW:
- case DB_UNDERFLOW:
- case DB_ZIP_OVERFLOW:
- err = DB_FAIL;
- default:
- break;
- }
- } else {
- err = btr_cur_pessimistic_update(
- flags, btr_cur, &offsets, &offsets_heap,
- heap, &dummy_big_rec,
- update, 0, thr, thr_get_trx(thr)->id, &mtr);
- ut_a(!dummy_big_rec);
- }
-
- mem_heap_free(heap);
- mem_heap_free(offsets_heap);
- }
-
- btr_pcur_close(&pcur);
-func_exit_no_pcur:
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/***********************************************************//**
-Flags a secondary index corrupted. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_undo_mod_sec_flag_corrupted(
-/*============================*/
- trx_t* trx, /*!< in/out: transaction */
- dict_index_t* index) /*!< in: secondary index */
-{
- ut_ad(!dict_index_is_clust(index));
-
- switch (trx->dict_operation_lock_mode) {
- case RW_S_LATCH:
- /* Because row_undo() is holding an S-latch
- on the data dictionary during normal rollback,
- we can only mark the index corrupted in the
- data dictionary cache. TODO: fix this somehow.*/
- mutex_enter(&dict_sys->mutex);
- dict_set_corrupted_index_cache_only(index, index->table);
- mutex_exit(&dict_sys->mutex);
- break;
- default:
- ut_ad(0);
- /* fall through */
- case RW_X_LATCH:
- /* This should be the rollback of a data dictionary
- transaction. */
- dict_set_corrupted(index, trx, "rollback");
- }
-}
-
-/***********************************************************//**
-Undoes a modify in secondary indexes when undo record type is UPD_DEL.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_upd_del_sec(
-/*=====================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- mem_heap_t* heap;
- dberr_t err = DB_SUCCESS;
-
- ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
- ut_ad(!node->undo_row);
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- dict_index_t* index = node->index;
- dtuple_t* entry;
-
- if (index->type & DICT_FTS) {
- dict_table_next_uncorrupted_index(node->index);
- continue;
- }
-
- /* During online index creation,
- HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
- guarantee that any active transaction has not modified
- indexed columns such that col->ord_part was 0 at the
- time when the undo log record was written. When we get
- to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
- it should always cover all affected indexes. */
- entry = row_build_index_entry(
- node->row, node->ext, index, heap);
-
- if (UNIV_UNLIKELY(!entry)) {
- /* The database must have crashed after
- inserting a clustered index record but before
- writing all the externally stored columns of
- that record. Because secondary index entries
- are inserted after the clustered index record,
- we may assume that the secondary index record
- does not exist. However, this situation may
- only occur during the rollback of incomplete
- transactions. */
- ut_a(thr_is_recv(thr));
- } else {
- err = row_undo_mod_del_mark_or_remove_sec(
- node, thr, index, entry);
-
- if (UNIV_UNLIKELY(err != DB_SUCCESS)) {
-
- break;
- }
- }
-
- mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************//**
-Undoes a modify in secondary indexes when undo record type is DEL_MARK.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_del_mark_sec(
-/*======================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- mem_heap_t* heap;
- dberr_t err = DB_SUCCESS;
-
- ut_ad(!node->undo_row);
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- dict_index_t* index = node->index;
- dtuple_t* entry;
-
- if (index->type == DICT_FTS) {
- dict_table_next_uncorrupted_index(node->index);
- continue;
- }
-
- /* During online index creation,
- HA_ALTER_INPLACE_NO_LOCK_AFTER_PREPARE should
- guarantee that any active transaction has not modified
- indexed columns such that col->ord_part was 0 at the
- time when the undo log record was written. When we get
- to roll back an undo log entry TRX_UNDO_DEL_MARK_REC,
- it should always cover all affected indexes. */
- entry = row_build_index_entry(
- node->row, node->ext, index, heap);
-
- ut_a(entry);
-
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_LEAF, thr, index, entry);
- if (err == DB_FAIL) {
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_TREE, thr, index, entry);
- }
-
- if (err == DB_DUPLICATE_KEY) {
- row_undo_mod_sec_flag_corrupted(
- thr_get_trx(thr), index);
- err = DB_SUCCESS;
- /* Do not return any error to the caller. The
- duplicate will be reported by ALTER TABLE or
- CREATE UNIQUE INDEX. Unfortunately we cannot
- report the duplicate key value to the DDL
- thread, because the altered_table object is
- private to its call stack. */
- } else if (err != DB_SUCCESS) {
- break;
- }
-
- mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************//**
-Undoes a modify in secondary indexes when undo record type is UPD_EXIST.
-@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo_mod_upd_exist_sec(
-/*=======================*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- mem_heap_t* heap;
- dberr_t err = DB_SUCCESS;
-
- if (node->index == NULL
- || ((node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
- /* No change in secondary indexes */
-
- return(err);
- }
-
- heap = mem_heap_create(1024);
-
- while (node->index != NULL) {
- dict_index_t* index = node->index;
- dtuple_t* entry;
-
- if (index->type == DICT_FTS
- || !row_upd_changes_ord_field_binary(
- index, node->update, thr, node->row, node->ext)) {
- dict_table_next_uncorrupted_index(node->index);
- continue;
- }
-
- /* Build the newest version of the index entry */
- entry = row_build_index_entry(node->row, node->ext,
- index, heap);
- if (UNIV_UNLIKELY(!entry)) {
- /* The server must have crashed in
- row_upd_clust_rec_by_insert() before
- the updated externally stored columns (BLOBs)
- of the new clustered index entry were written. */
-
- /* The table must be in DYNAMIC or COMPRESSED
- format. REDUNDANT and COMPACT formats
- store a local 768-byte prefix of each
- externally stored column. */
- ut_a(dict_table_get_format(index->table)
- >= UNIV_FORMAT_B);
-
- /* This is only legitimate when
- rolling back an incomplete transaction
- after crash recovery. */
- ut_a(thr_get_trx(thr)->is_recovered);
-
- /* The server must have crashed before
- completing the insert of the new
- clustered index entry and before
- inserting to the secondary indexes.
- Because node->row was not yet written
- to this index, we can ignore it. But
- we must restore node->undo_row. */
- } else {
- /* NOTE that if we updated the fields of a
- delete-marked secondary index record so that
- alphabetically they stayed the same, e.g.,
- 'abc' -> 'aBc', we cannot return to the
- original values because we do not know them.
- But this should not cause problems because
- in row0sel.cc, in queries we always retrieve
- the clustered index record or an earlier
- version of it, if the secondary index record
- through which we do the search is
- delete-marked. */
-
- err = row_undo_mod_del_mark_or_remove_sec(
- node, thr, index, entry);
- if (err != DB_SUCCESS) {
- break;
- }
- }
-
- mem_heap_empty(heap);
- /* We may have to update the delete mark in the
- secondary index record of the previous version of
- the row. We also need to update the fields of
- the secondary index record if we updated its fields
- but alphabetically they stayed the same, e.g.,
- 'abc' -> 'aBc'. */
- entry = row_build_index_entry(node->undo_row,
- node->undo_ext,
- index, heap);
- ut_a(entry);
-
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_LEAF, thr, index, entry);
- if (err == DB_FAIL) {
- err = row_undo_mod_del_unmark_sec_and_undo_update(
- BTR_MODIFY_TREE, thr, index, entry);
- }
-
- if (err == DB_DUPLICATE_KEY) {
- row_undo_mod_sec_flag_corrupted(
- thr_get_trx(thr), index);
- err = DB_SUCCESS;
- } else if (err != DB_SUCCESS) {
- break;
- }
-
- mem_heap_empty(heap);
- dict_table_next_uncorrupted_index(node->index);
- }
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************//**
-Parses the row reference and other info in a modify undo log record. */
-static MY_ATTRIBUTE((nonnull))
-void
-row_undo_mod_parse_undo_rec(
-/*========================*/
- undo_node_t* node, /*!< in: row undo node */
- ibool dict_locked) /*!< in: TRUE if own dict_sys->mutex */
-{
- dict_index_t* clust_index;
- byte* ptr;
- undo_no_t undo_no;
- table_id_t table_id;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- ulint info_bits;
- ulint type;
- ulint cmpl_info;
- bool dummy_extern;
-
- ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
- &dummy_extern, &undo_no, &table_id);
- node->rec_type = type;
-
- node->table = dict_table_open_on_id(
- table_id, dict_locked, DICT_TABLE_OP_NORMAL);
-
- /* TODO: other fixes associated with DROP TABLE + rollback in the
- same table by another user */
-
- if (node->table == NULL) {
- /* Table was dropped */
- return;
- }
-
- if (node->table->file_unreadable) {
- dict_table_close(node->table, dict_locked, FALSE);
-
- /* We skip undo operations to missing .ibd files */
- node->table = NULL;
-
- return;
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
-
- ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref),
- node->heap);
-
- trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
- roll_ptr, info_bits, node->trx,
- node->heap, &(node->update));
- node->new_trx_id = trx_id;
- node->cmpl_info = cmpl_info;
-
- if (!row_undo_search_clust_to_pcur(node)) {
-
- dict_table_close(node->table, dict_locked, FALSE);
-
- node->table = NULL;
- }
-}
-
-/***********************************************************//**
-Undoes a modify operation on a row of a table.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-row_undo_mod(
-/*=========*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- ibool dict_locked;
-
- ut_ad(node != NULL);
- ut_ad(thr != NULL);
- ut_ad(node->state == UNDO_NODE_MODIFY);
-
- dict_locked = thr_get_trx(thr)->dict_operation_lock_mode == RW_X_LATCH;
-
- ut_ad(thr_get_trx(thr) == node->trx);
-
- row_undo_mod_parse_undo_rec(node, dict_locked);
-
- if (node->table == NULL) {
- /* It is already undone, or will be undone by another query
- thread, or table was dropped */
-
- trx_undo_rec_release(node->trx, node->undo_no);
- node->state = UNDO_NODE_FETCH_NEXT;
-
- return(DB_SUCCESS);
- }
-
- node->index = dict_table_get_first_index(node->table);
- ut_ad(dict_index_is_clust(node->index));
- /* Skip the clustered index (the first index) */
- node->index = dict_table_get_next_index(node->index);
-
- /* Skip all corrupted secondary index */
- dict_table_skip_corrupt_index(node->index);
-
- switch (node->rec_type) {
- case TRX_UNDO_UPD_EXIST_REC:
- err = row_undo_mod_upd_exist_sec(node, thr);
- break;
- case TRX_UNDO_DEL_MARK_REC:
- err = row_undo_mod_del_mark_sec(node, thr);
- break;
- case TRX_UNDO_UPD_DEL_REC:
- err = row_undo_mod_upd_del_sec(node, thr);
- break;
- default:
- ut_error;
- err = DB_ERROR;
- }
-
- if (err == DB_SUCCESS) {
-
- err = row_undo_mod_clust(node, thr);
- }
-
- dict_table_close(node->table, dict_locked, FALSE);
-
- node->table = NULL;
-
- return(err);
-}
diff --git a/storage/xtradb/row/row0undo.cc b/storage/xtradb/row/row0undo.cc
deleted file mode 100644
index 82b1ab049fa..00000000000
--- a/storage/xtradb/row/row0undo.cc
+++ /dev/null
@@ -1,375 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0undo.cc
-Row undo
-
-Created 1/8/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0undo.h"
-
-#ifdef UNIV_NONINL
-#include "row0undo.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0uins.h"
-#include "row0umod.h"
-#include "row0upd.h"
-#include "row0mysql.h"
-#include "srv0srv.h"
-
-/* How to undo row operations?
-(1) For an insert, we have stored a prefix of the clustered index record
-in the undo log. Using it, we look for the clustered record, and using
-that we look for the records in the secondary indexes. The insert operation
-may have been left incomplete, if the database crashed, for example.
-We may have look at the trx id and roll ptr to make sure the record in the
-clustered index is really the one for which the undo log record was
-written. We can use the framework we get from the original insert op.
-(2) Delete marking: We can use the framework we get from the original
-delete mark op. We only have to check the trx id.
-(3) Update: This may be the most complicated. We have to use the framework
-we get from the original update op.
-
-What if the same trx repeatedly deletes and inserts an identical row.
-Then the row id changes and also roll ptr. What if the row id was not
-part of the ordering fields in the clustered index? Maybe we have to write
-it to undo log. Well, maybe not, because if we order the row id and trx id
-in descending order, then the only undeleted copy is the first in the
-index. Our searches in row operations always position the cursor before
-the first record in the result set. But, if there is no key defined for
-a table, then it would be desirable that row id is in ascending order.
-So, lets store row id in descending order only if it is not an ordering
-field in the clustered index.
-
-NOTE: Deletes and inserts may lead to situation where there are identical
-records in a secondary index. Is that a problem in the B-tree? Yes.
-Also updates can lead to this, unless trx id and roll ptr are included in
-ord fields.
-(1) Fix in clustered indexes: include row id, trx id, and roll ptr
-in node pointers of B-tree.
-(2) Fix in secondary indexes: include all fields in node pointers, and
-if an entry is inserted, check if it is equal to the right neighbor,
-in which case update the right neighbor: the neighbor must be delete
-marked, set it unmarked and write the trx id of the current transaction.
-
-What if the same trx repeatedly updates the same row, updating a secondary
-index field or not? Updating a clustered index ordering field?
-
-(1) If it does not update the secondary index and not the clustered index
-ord field. Then the secondary index record stays unchanged, but the
-trx id in the secondary index record may be smaller than in the clustered
-index record. This is no problem?
-(2) If it updates secondary index ord field but not clustered: then in
-secondary index there are delete marked records, which differ in an
-ord field. No problem.
-(3) Updates clustered ord field but not secondary, and secondary index
-is unique. Then the record in secondary index is just updated at the
-clustered ord field.
-(4)
-
-Problem with duplicate records:
-Fix 1: Add a trx op no field to all indexes. A problem: if a trx with a
-bigger trx id has inserted and delete marked a similar row, our trx inserts
-again a similar row, and a trx with an even bigger id delete marks it. Then
-the position of the row should change in the index if the trx id affects
-the alphabetical ordering.
-
-Fix 2: If an insert encounters a similar row marked deleted, we turn the
-insert into an 'update' of the row marked deleted. Then we must write undo
-info on the update. A problem: what if a purge operation tries to remove
-the delete marked row?
-
-We can think of the database row versions as a linked list which starts
-from the record in the clustered index, and is linked by roll ptrs
-through undo logs. The secondary index records are references which tell
-what kinds of records can be found in this linked list for a record
-in the clustered index.
-
-How to do the purge? A record can be removed from the clustered index
-if its linked list becomes empty, i.e., the row has been marked deleted
-and its roll ptr points to the record in the undo log we are going through,
-doing the purge. Similarly, during a rollback, a record can be removed
-if the stored roll ptr in the undo log points to a trx already (being) purged,
-or if the roll ptr is NULL, i.e., it was a fresh insert. */
-
-/********************************************************************//**
-Creates a row undo node to a query graph.
-@return own: undo node */
-UNIV_INTERN
-undo_node_t*
-row_undo_node_create(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
- mem_heap_t* heap) /*!< in: memory heap where created */
-{
- undo_node_t* undo;
-
- ut_ad(trx && parent && heap);
-
- undo = static_cast<undo_node_t*>(
- mem_heap_alloc(heap, sizeof(undo_node_t)));
-
- undo->common.type = QUE_NODE_UNDO;
- undo->common.parent = parent;
-
- undo->state = UNDO_NODE_FETCH_NEXT;
- undo->trx = trx;
-
- btr_pcur_init(&(undo->pcur));
-
- undo->heap = mem_heap_create(256);
-
- return(undo);
-}
-
-/***********************************************************//**
-Looks for the clustered index record when node has the row reference.
-The pcur in node is used in the search. If found, stores the row to node,
-and stores the position of pcur, and detaches it. The pcur must be closed
-by the caller in any case.
-@return TRUE if found; NOTE the node->pcur must be closed by the
-caller, regardless of the return value */
-UNIV_INTERN
-ibool
-row_undo_search_clust_to_pcur(
-/*==========================*/
- undo_node_t* node) /*!< in: row undo node */
-{
- dict_index_t* clust_index;
- ibool found;
- mtr_t mtr;
- ibool ret;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets = offsets_;
- rec_offs_init(offsets_);
-
- mtr_start(&mtr);
-
- clust_index = dict_table_get_first_index(node->table);
-
- found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF,
- node->table, node->ref, &mtr);
-
- rec = btr_pcur_get_rec(&(node->pcur));
-
- offsets = rec_get_offsets(rec, clust_index, offsets,
- ULINT_UNDEFINED, &heap);
-
- if (!found || node->roll_ptr
- != row_get_rec_roll_ptr(rec, clust_index, offsets)) {
-
- /* We must remove the reservation on the undo log record
- BEFORE releasing the latch on the clustered index page: this
- is to make sure that some thread will eventually undo the
- modification corresponding to node->roll_ptr. */
-
- /* fputs("--------------------undoing a previous version\n",
- stderr); */
-
- ret = FALSE;
- } else {
- row_ext_t** ext;
-
- if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) {
- /* In DYNAMIC or COMPRESSED format, there is
- no prefix of externally stored columns in the
- clustered index record. Build a cache of
- column prefixes. */
- ext = &node->ext;
- } else {
- /* REDUNDANT and COMPACT formats store a local
- 768-byte prefix of each externally stored
- column. No cache is needed. */
- ext = NULL;
- node->ext = NULL;
- }
-
- node->row = row_build(ROW_COPY_DATA, clust_index, rec,
- offsets, NULL,
- NULL, NULL, ext, node->heap);
- if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
- node->undo_row = dtuple_copy(node->row, node->heap);
- row_upd_replace(node->undo_row, &node->undo_ext,
- clust_index, node->update, node->heap);
- } else {
- node->undo_row = NULL;
- node->undo_ext = NULL;
- }
-
- btr_pcur_store_position(&(node->pcur), &mtr);
-
- ret = TRUE;
- }
-
- btr_pcur_commit_specify_mtr(&(node->pcur), &mtr);
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
- return(ret);
-}
-
-/***********************************************************//**
-Fetches an undo log record and does the undo for the recorded operation.
-If none left, or a partial rollback completed, returns control to the
-parent node, which is always a query thread node.
-@return DB_SUCCESS if operation successfully completed, else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_undo(
-/*=====*/
- undo_node_t* node, /*!< in: row undo node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- trx_t* trx;
- roll_ptr_t roll_ptr;
- ibool locked_data_dict;
-
- ut_ad(node != NULL);
- ut_ad(thr != NULL);
-
- trx = node->trx;
-
- if (node->state == UNDO_NODE_FETCH_NEXT) {
-
- node->undo_rec = trx_roll_pop_top_rec_of_trx(trx,
- trx->roll_limit,
- &roll_ptr,
- node->heap);
- if (!node->undo_rec) {
- /* Rollback completed for this query thread */
-
- thr->run_node = que_node_get_parent(node);
-
- return(DB_SUCCESS);
- }
-
- node->roll_ptr = roll_ptr;
- node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- node->state = UNDO_NODE_INSERT;
- } else {
- node->state = UNDO_NODE_MODIFY;
- }
- }
-
- /* Prevent DROP TABLE etc. while we are rolling back this row.
- If we are doing a TABLE CREATE or some other dictionary operation,
- then we already have dict_operation_lock locked in x-mode. Do not
- try to lock again, because that would cause a hang. */
-
- locked_data_dict = (trx->dict_operation_lock_mode == 0);
-
- if (locked_data_dict) {
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- if (node->state == UNDO_NODE_INSERT) {
-
- err = row_undo_ins(node);
-
- node->state = UNDO_NODE_FETCH_NEXT;
- } else {
- ut_ad(node->state == UNDO_NODE_MODIFY);
- err = row_undo_mod(node, thr);
- }
-
- if (locked_data_dict) {
-
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- /* Do some cleanup */
- btr_pcur_close(&(node->pcur));
-
- mem_heap_empty(node->heap);
-
- thr->run_node = node;
-
- return(err);
-}
-
-/***********************************************************//**
-Undoes a row operation in a table. This is a high-level function used
-in SQL execution graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_undo_step(
-/*==========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err;
- undo_node_t* node;
- trx_t* trx;
-
- ut_ad(thr);
-
- srv_inc_activity_count();
-
- trx = thr_get_trx(thr);
-
- node = static_cast<undo_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_UNDO);
-
- err = row_undo(node, thr);
-
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- /* SQL error detected */
-
- fprintf(stderr, "InnoDB: Fatal error (%s) in rollback.\n",
- ut_strerr(err));
-
- if (err == DB_OUT_OF_FILE_SPACE) {
- fprintf(stderr,
- "InnoDB: Out of tablespace.\n"
- "InnoDB: Consider increasing"
- " your tablespace.\n");
- abort();
- }
-
- ut_error;
-
- return(NULL);
- }
-
- return(thr);
-}
diff --git a/storage/xtradb/row/row0upd.cc b/storage/xtradb/row/row0upd.cc
deleted file mode 100644
index 1156cbe4b4c..00000000000
--- a/storage/xtradb/row/row0upd.cc
+++ /dev/null
@@ -1,3017 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0upd.cc
-Update of a row
-
-Created 12/27/1996 Heikki Tuuri
-*******************************************************/
-
-#include "m_string.h" /* for my_sys.h */
-#include "my_sys.h" /* DEBUG_SYNC_C */
-#include "row0upd.h"
-
-#ifdef UNIV_NONINL
-#include "row0upd.ic"
-#endif
-
-#include "ha_prototypes.h"
-#include "dict0dict.h"
-#include "trx0undo.h"
-#include "rem0rec.h"
-#ifndef UNIV_HOTBACKUP
-#include "dict0boot.h"
-#include "dict0crea.h"
-#include "mach0data.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "que0que.h"
-#include "row0ext.h"
-#include "row0ins.h"
-#include "row0log.h"
-#include "row0row.h"
-#include "row0sel.h"
-#include "rem0cmp.h"
-#include "lock0lock.h"
-#include "log0log.h"
-#include "pars0sym.h"
-#include "eval0eval.h"
-#include "buf0lru.h"
-#include <algorithm>
-
-#include <mysql/plugin.h>
-#include <mysql/service_wsrep.h>
-
-/* What kind of latch and lock can we assume when the control comes to
- -------------------------------------------------------------------
-an update node?
---------------
-Efficiency of massive updates would require keeping an x-latch on a
-clustered index page through many updates, and not setting an explicit
-x-lock on clustered index records, as they anyway will get an implicit
-x-lock when they are updated. A problem is that the read nodes in the
-graph should know that they must keep the latch when passing the control
-up to the update node, and not set any record lock on the record which
-will be updated. Another problem occurs if the execution is stopped,
-as the kernel switches to another query thread, or the transaction must
-wait for a lock. Then we should be able to release the latch and, maybe,
-acquire an explicit x-lock on the record.
- Because this seems too complicated, we conclude that the less
-efficient solution of releasing all the latches when the control is
-transferred to another node, and acquiring explicit x-locks, is better. */
-
-/* How is a delete performed? If there is a delete without an
-explicit cursor, i.e., a searched delete, there are at least
-two different situations:
-the implicit select cursor may run on (1) the clustered index or
-on (2) a secondary index. The delete is performed by setting
-the delete bit in the record and substituting the id of the
-deleting transaction for the original trx id, and substituting a
-new roll ptr for previous roll ptr. The old trx id and roll ptr
-are saved in the undo log record. Thus, no physical changes occur
-in the index tree structure at the time of the delete. Only
-when the undo log is purged, the index records will be physically
-deleted from the index trees.
-
-The query graph executing a searched delete would consist of
-a delete node which has as a subtree a select subgraph.
-The select subgraph should return a (persistent) cursor
-in the clustered index, placed on page which is x-latched.
-The delete node should look for all secondary index records for
-this clustered index entry and mark them as deleted. When is
-the x-latch freed? The most efficient way for performing a
-searched delete is obviously to keep the x-latch for several
-steps of query graph execution. */
-
-/*************************************************************************
-IMPORTANT NOTE: Any operation that generates redo MUST check that there
-is enough space in the redo log before for that operation. This is
-done by calling log_free_check(). The reason for checking the
-availability of the redo log space before the start of the operation is
-that we MUST not hold any synchonization objects when performing the
-check.
-If you make a change in this module make sure that no codepath is
-introduced where a call to log_free_check() is bypassed. */
-
-/***********************************************************//**
-Checks if an update vector changes some of the first ordering fields of an
-index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes.
-@return TRUE if changes */
-static
-ibool
-row_upd_changes_first_fields_binary(
-/*================================*/
- dtuple_t* entry, /*!< in: old value of index entry */
- dict_index_t* index, /*!< in: index of entry */
- const upd_t* update, /*!< in: update vector for the row */
- ulint n); /*!< in: how many first fields to check */
-
-
-/*********************************************************************//**
-Checks if index currently is mentioned as a referenced index in a foreign
-key constraint.
-
-NOTE that since we do not hold dict_operation_lock when leaving the
-function, it may be that the referencing table has been dropped when
-we leave this function: this function is only for heuristic use!
-
-@return TRUE if referenced */
-static
-ibool
-row_upd_index_is_referenced(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction */
-{
- dict_table_t* table = index->table;
- ibool froze_data_dict = FALSE;
- ibool is_referenced = FALSE;
-
- if (table->referenced_set.empty()) {
- return(FALSE);
- }
-
- if (trx->dict_operation_lock_mode == 0) {
- row_mysql_freeze_data_dictionary(trx);
- froze_data_dict = TRUE;
- }
-
- dict_foreign_set::iterator it
- = std::find_if(table->referenced_set.begin(),
- table->referenced_set.end(),
- dict_foreign_with_index(index));
-
- is_referenced = (it != table->referenced_set.end());
-
- if (froze_data_dict) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- return(is_referenced);
-}
-
-#ifdef WITH_WSREP
-static
-ibool
-wsrep_row_upd_index_is_foreign(
-/*========================*/
- dict_index_t* index, /*!< in: index */
- trx_t* trx) /*!< in: transaction */
-{
- dict_table_t* table = index->table;
- dict_foreign_t* foreign;
- ibool froze_data_dict = FALSE;
- ibool is_referenced = FALSE;
-
- if (table->foreign_set.empty()) {
-
- return(FALSE);
- }
-
- if (trx->dict_operation_lock_mode == 0) {
- row_mysql_freeze_data_dictionary(trx);
- froze_data_dict = TRUE;
- }
-
- for (dict_foreign_set::iterator it= table->foreign_set.begin();
- it != table->foreign_set.end();
- ++ it)
- {
- foreign= *it;
-
- if (foreign->foreign_index == index) {
-
- is_referenced = TRUE;
- goto func_exit;
- }
-
- }
-
-func_exit:
- if (froze_data_dict) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- return(is_referenced);
-}
-#endif /* WITH_WSREP */
-
-/*********************************************************************//**
-Checks if possible foreign key constraints hold after a delete of the record
-under pcur.
-
-NOTE that this function will temporarily commit mtr and lose the
-pcur position!
-
-@return DB_SUCCESS or an error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_upd_check_references_constraints(
-/*=================================*/
- upd_node_t* node, /*!< in: row update node */
- btr_pcur_t* pcur, /*!< in: cursor positioned on a record; NOTE: the
- cursor position is lost in this function! */
- dict_table_t* table, /*!< in: table in question */
- dict_index_t* index, /*!< in: index of the cursor */
- ulint* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_foreign_t* foreign;
- mem_heap_t* heap;
- dtuple_t* entry;
- trx_t* trx;
- const rec_t* rec;
- ulint n_ext;
- dberr_t err;
- ibool got_s_lock = FALSE;
-
- if (table->referenced_set.empty()) {
-
- return(DB_SUCCESS);
- }
-
- trx = thr_get_trx(thr);
-
- rec = btr_pcur_get_rec(pcur);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- heap = mem_heap_create(500);
-
- entry = row_rec_to_index_entry(rec, index, offsets, &n_ext, heap);
-
- mtr_commit(mtr);
-
- DEBUG_SYNC_C("foreign_constraint_check_for_update");
-
- mtr_start_trx(mtr, trx);
-
- if (trx->dict_operation_lock_mode == 0) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
-run_again:
-
- for (dict_foreign_set::iterator it = table->referenced_set.begin();
- it != table->referenced_set.end();
- ++it) {
-
- foreign = *it;
-
- /* Note that we may have an update which updates the index
- record, but does NOT update the first fields which are
- referenced in a foreign key constraint. Then the update does
- NOT break the constraint. */
-
- if (foreign->referenced_index == index
- && (node->is_delete
- || row_upd_changes_first_fields_binary(
- entry, index, node->update,
- foreign->n_fields))) {
- dict_table_t* foreign_table = foreign->foreign_table;
-
- dict_table_t* ref_table = NULL;
-
- if (foreign_table == NULL) {
-
- ref_table = dict_table_open_on_name(
- foreign->foreign_table_name_lookup,
- FALSE, FALSE, DICT_ERR_IGNORE_NONE);
- }
-
- if (foreign_table) {
- os_inc_counter(dict_sys->mutex,
- foreign_table
- ->n_foreign_key_checks_running);
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_operation_lock temporarily!
- But the counter on the table protects 'foreign' from
- being dropped while the check is running. */
-
- err = row_ins_check_foreign_constraint(
- FALSE, foreign, table, entry, thr);
-
- if (foreign_table) {
- os_dec_counter(dict_sys->mutex,
- foreign_table
- ->n_foreign_key_checks_running);
- }
-
- if (ref_table != NULL) {
- dict_table_close(ref_table, FALSE, FALSE);
- }
-
- /* Some table foreign key dropped, try again */
- if (err == DB_DICT_CHANGED) {
- goto run_again;
- } else if (err != DB_SUCCESS) {
- goto func_exit;
- }
- }
- }
-
- err = DB_SUCCESS;
-func_exit:
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- mem_heap_free(heap);
-
- return(err);
-}
-#ifdef WITH_WSREP
-static
-dberr_t
-wsrep_row_upd_check_foreign_constraints(
-/*=================================*/
- upd_node_t* node, /*!< in: row update node */
- btr_pcur_t* pcur, /*!< in: cursor positioned on a record; NOTE: the
- cursor position is lost in this function! */
- dict_table_t* table, /*!< in: table in question */
- dict_index_t* index, /*!< in: index of the cursor */
- ulint* offsets,/*!< in/out: rec_get_offsets(pcur.rec, index) */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_foreign_t* foreign;
- mem_heap_t* heap;
- dtuple_t* entry;
- trx_t* trx;
- const rec_t* rec;
- ulint n_ext;
- dberr_t err;
- ibool got_s_lock = FALSE;
- ibool opened = FALSE;
-
- if (table->foreign_set.empty()) {
-
- return(DB_SUCCESS);
- }
-
- trx = thr_get_trx(thr);
-
- /* TODO: make native slave thread bail out here */
-
- rec = btr_pcur_get_rec(pcur);
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- heap = mem_heap_create(500);
-
- entry = row_rec_to_index_entry(rec, index, offsets,
- &n_ext, heap);
-
- mtr_commit(mtr);
-
- mtr_start(mtr);
-
- if (trx->dict_operation_lock_mode == 0) {
- got_s_lock = TRUE;
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- for (dict_foreign_set::iterator it= table->foreign_set.begin();
- it != table->foreign_set.end();
- ++ it)
- {
- foreign= *it;
-
-
- /* Note that we may have an update which updates the index
- record, but does NOT update the first fields which are
- referenced in a foreign key constraint. Then the update does
- NOT break the constraint. */
-
- if (foreign->foreign_index == index
- && (node->is_delete
- || row_upd_changes_first_fields_binary(
- entry, index, node->update,
- foreign->n_fields))) {
-
- if (foreign->referenced_table == NULL) {
- foreign->referenced_table =
- dict_table_open_on_name(
- foreign->referenced_table_name_lookup,
- FALSE, FALSE, DICT_ERR_IGNORE_NONE);
- opened = (foreign->referenced_table) ? TRUE : FALSE;
- }
-
- if (foreign->referenced_table) {
- os_inc_counter(dict_sys->mutex,
- foreign->referenced_table
- ->n_foreign_key_checks_running);
- }
-
- /* NOTE that if the thread ends up waiting for a lock
- we will release dict_operation_lock temporarily!
- But the counter on the table protects 'foreign' from
- being dropped while the check is running. */
-
- err = row_ins_check_foreign_constraint(
- TRUE, foreign, table, entry, thr);
-
- if (foreign->referenced_table) {
- os_dec_counter(dict_sys->mutex,
- foreign->referenced_table
- ->n_foreign_key_checks_running);
-
- if (opened == TRUE) {
- dict_table_close(foreign->referenced_table, FALSE, FALSE);
- opened = FALSE;
- }
- }
-
- if (err != DB_SUCCESS) {
-
- goto func_exit;
- }
- }
-
- }
-
- err = DB_SUCCESS;
-func_exit:
- if (got_s_lock) {
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- mem_heap_free(heap);
-
- DEBUG_SYNC_C("foreign_constraint_check_for_update_done");
-
- return(err);
-}
-#endif /* WITH_WSREP */
-
-/*********************************************************************//**
-Creates an update node for a query graph.
-@return own: update node */
-UNIV_INTERN
-upd_node_t*
-upd_node_create(
-/*============*/
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- upd_node_t* node;
-
- node = static_cast<upd_node_t*>(
- mem_heap_alloc(heap, sizeof(upd_node_t)));
-
- node->common.type = QUE_NODE_UPDATE;
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
- node->in_mysql_interface = FALSE;
-
- node->row = NULL;
- node->ext = NULL;
- node->upd_row = NULL;
- node->upd_ext = NULL;
- node->index = NULL;
- node->update = NULL;
-
- node->foreign = NULL;
- node->cascade_heap = NULL;
- node->cascade_node = NULL;
-
- node->select = NULL;
-
- node->heap = mem_heap_create(128);
- node->magic_n = UPD_NODE_MAGIC_N;
-
- node->cmpl_info = 0;
-
- return(node);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Updates the trx id and roll ptr field in a clustered index record in database
-recovery. */
-UNIV_INTERN
-void
-row_upd_rec_sys_fields_in_recovery(
-/*===============================*/
- rec_t* rec, /*!< in/out: record */
- page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint pos, /*!< in: TRX_ID position in rec */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr)/*!< in: roll ptr of the undo log record */
-{
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (page_zip) {
- page_zip_write_trx_id_and_roll_ptr(
- page_zip, rec, offsets, pos, trx_id, roll_ptr);
- } else {
- byte* field;
- ulint len;
-
- field = rec_get_nth_field(rec, offsets, pos, &len);
- ut_ad(len == DATA_TRX_ID_LEN);
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
- trx_write_trx_id(field, trx_id);
- trx_write_roll_ptr(field + DATA_TRX_ID_LEN, roll_ptr);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Sets the trx id or roll ptr field of a clustered index entry. */
-UNIV_INTERN
-void
-row_upd_index_entry_sys_field(
-/*==========================*/
- dtuple_t* entry, /*!< in/out: index entry, where the memory
- buffers for sys fields are already allocated:
- the function just copies the new values to
- them */
- dict_index_t* index, /*!< in: clustered index */
- ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
- ib_uint64_t val) /*!< in: value to write */
-{
- dfield_t* dfield;
- byte* field;
- ulint pos;
-
- ut_ad(dict_index_is_clust(index));
-
- pos = dict_index_get_sys_col_pos(index, type);
-
- dfield = dtuple_get_nth_field(entry, pos);
- field = static_cast<byte*>(dfield_get_data(dfield));
-
- if (type == DATA_TRX_ID) {
- trx_write_trx_id(field, val);
- } else {
- ut_ad(type == DATA_ROLL_PTR);
- trx_write_roll_ptr(field, val);
- }
-}
-
-/***********************************************************//**
-Returns TRUE if row update changes size of some field in index or if some
-field to be updated is stored externally in rec or update.
-@return TRUE if the update changes the size of some field in index or
-the field is external in rec or update */
-UNIV_INTERN
-ibool
-row_upd_changes_field_size_or_external(
-/*===================================*/
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const upd_t* update) /*!< in: update vector */
-{
- const upd_field_t* upd_field;
- const dfield_t* new_val;
- ulint old_len;
- ulint new_len;
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(NULL, index, offsets));
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
-
- new_val = &(upd_field->new_val);
- new_len = dfield_get_len(new_val);
-
- if (dfield_is_null(new_val) && !rec_offs_comp(offsets)) {
- /* A bug fixed on Dec 31st, 2004: we looked at the
- SQL NULL size from the wrong field! We may backport
- this fix also to 4.0. The merge to 5.0 will be made
- manually immediately after we commit this to 4.1. */
-
- new_len = dict_col_get_sql_null_size(
- dict_index_get_nth_col(index,
- upd_field->field_no),
- 0);
- }
-
- old_len = rec_offs_nth_size(offsets, upd_field->field_no);
-
- if (rec_offs_comp(offsets)
- && rec_offs_nth_sql_null(offsets,
- upd_field->field_no)) {
- /* Note that in the compact table format, for a
- variable length field, an SQL NULL will use zero
- bytes in the offset array at the start of the physical
- record, but a zero-length value (empty string) will
- use one byte! Thus, we cannot use update-in-place
- if we update an SQL NULL varchar to an empty string! */
-
- old_len = UNIV_SQL_NULL;
- }
-
- if (dfield_is_ext(new_val) || old_len != new_len
- || rec_offs_nth_extern(offsets, upd_field->field_no)) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/***********************************************************//**
-Returns true if row update contains disowned external fields.
-@return true if the update contains disowned external fields. */
-UNIV_INTERN
-bool
-row_upd_changes_disowned_external(
-/*==============================*/
- const upd_t* update) /*!< in: update vector */
-{
- const upd_field_t* upd_field;
- const dfield_t* new_val;
- ulint new_len;
- ulint n_fields;
- ulint i;
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- const byte* field_ref;
-
- upd_field = upd_get_nth_field(update, i);
- new_val = &(upd_field->new_val);
- new_len = dfield_get_len(new_val);
-
- if (!dfield_is_ext(new_val)) {
- continue;
- }
-
- ut_ad(new_len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- field_ref = static_cast<const byte*>(dfield_get_data(new_val))
- + new_len - BTR_EXTERN_FIELD_REF_SIZE;
-
- if (field_ref[BTR_EXTERN_LEN] & BTR_EXTERN_OWNER_FLAG) {
- return(true);
- }
- }
-
- return(false);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the
-record given. No field size changes are allowed. This function is
-usually invoked on a clustered index. The only use case for a
-secondary index is row_ins_sec_index_entry_by_modify() or its
-counterpart in ibuf_insert_to_index_page(). */
-UNIV_INTERN
-void
-row_upd_rec_in_place(
-/*=================*/
- rec_t* rec, /*!< in/out: record where replaced */
- dict_index_t* index, /*!< in: the index the record belongs to */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- const upd_t* update, /*!< in: update vector */
- page_zip_des_t* page_zip)/*!< in: compressed page with enough space
- available, or NULL */
-{
- const upd_field_t* upd_field;
- const dfield_t* new_val;
- ulint n_fields;
- ulint i;
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- if (rec_offs_comp(offsets)) {
- rec_set_info_bits_new(rec, update->info_bits);
- } else {
- rec_set_info_bits_old(rec, update->info_bits);
- }
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
-#ifdef UNIV_BLOB_DEBUG
- btr_blob_dbg_t b;
- const byte* field_ref = NULL;
-#endif /* UNIV_BLOB_DEBUG */
-
- upd_field = upd_get_nth_field(update, i);
- new_val = &(upd_field->new_val);
- ut_ad(!dfield_is_ext(new_val) ==
- !rec_offs_nth_extern(offsets, upd_field->field_no));
-#ifdef UNIV_BLOB_DEBUG
- if (dfield_is_ext(new_val)) {
- ulint len;
- field_ref = rec_get_nth_field(rec, offsets, i, &len);
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
- field_ref += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- b.ref_page_no = page_get_page_no(page_align(rec));
- b.ref_heap_no = page_rec_get_heap_no(rec);
- b.ref_field_no = i;
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
- ut_a(b.ref_field_no >= index->n_uniq);
- btr_blob_dbg_rbt_delete(index, &b, "upd_in_place");
- }
-#endif /* UNIV_BLOB_DEBUG */
-
- rec_set_nth_field(rec, offsets, upd_field->field_no,
- dfield_get_data(new_val),
- dfield_get_len(new_val));
-
-#ifdef UNIV_BLOB_DEBUG
- if (dfield_is_ext(new_val)) {
- b.blob_page_no = mach_read_from_4(
- field_ref + BTR_EXTERN_PAGE_NO);
- b.always_owner = b.owner = !(field_ref[BTR_EXTERN_LEN]
- & BTR_EXTERN_OWNER_FLAG);
- b.del = rec_get_deleted_flag(
- rec, rec_offs_comp(offsets));
-
- btr_blob_dbg_rbt_insert(index, &b, "upd_in_place");
- }
-#endif /* UNIV_BLOB_DEBUG */
- }
-
- if (page_zip) {
- page_zip_write_rec(page_zip, rec, index, offsets, 0);
- }
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Writes into the redo log the values of trx id and roll ptr and enough info
-to determine their positions within a clustered index record.
-@return new pointer to mlog */
-UNIV_INTERN
-byte*
-row_upd_write_sys_vals_to_log(
-/*==========================*/
- dict_index_t* index, /*!< in: clustered index */
- trx_id_t trx_id, /*!< in: transaction id */
- roll_ptr_t roll_ptr,/*!< in: roll ptr of the undo log record */
- byte* log_ptr,/*!< pointer to a buffer of size > 20 opened
- in mlog */
- mtr_t* mtr MY_ATTRIBUTE((unused))) /*!< in: mtr */
-{
- ut_ad(dict_index_is_clust(index));
- ut_ad(mtr);
-
- log_ptr += mach_write_compressed(log_ptr,
- dict_index_get_sys_col_pos(
- index, DATA_TRX_ID));
-
- trx_write_roll_ptr(log_ptr, roll_ptr);
- log_ptr += DATA_ROLL_PTR_LEN;
-
- log_ptr += mach_ull_write_compressed(log_ptr, trx_id);
-
- return(log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Parses the log data of system field values.
-@return log data end or NULL */
-UNIV_INTERN
-byte*
-row_upd_parse_sys_vals(
-/*===================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- ulint* pos, /*!< out: TRX_ID position in record */
- trx_id_t* trx_id, /*!< out: trx id */
- roll_ptr_t* roll_ptr)/*!< out: roll ptr */
-{
- ptr = mach_parse_compressed(ptr, end_ptr, pos);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (end_ptr < ptr + DATA_ROLL_PTR_LEN) {
-
- return(NULL);
- }
-
- *roll_ptr = trx_read_roll_ptr(ptr);
- ptr += DATA_ROLL_PTR_LEN;
-
- ptr = mach_ull_parse_compressed(ptr, end_ptr, trx_id);
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************//**
-Writes to the redo log the new values of the fields occurring in the index. */
-UNIV_INTERN
-void
-row_upd_index_write_log(
-/*====================*/
- const upd_t* update, /*!< in: update vector */
- byte* log_ptr,/*!< in: pointer to mlog buffer: must
- contain at least MLOG_BUF_MARGIN bytes
- of free space; the buffer is closed
- within this function */
- mtr_t* mtr) /*!< in: mtr into whose log to write */
-{
- const upd_field_t* upd_field;
- const dfield_t* new_val;
- ulint len;
- ulint n_fields;
- byte* buf_end;
- ulint i;
-
- n_fields = upd_get_n_fields(update);
-
- buf_end = log_ptr + MLOG_BUF_MARGIN;
-
- mach_write_to_1(log_ptr, update->info_bits);
- log_ptr++;
- log_ptr += mach_write_compressed(log_ptr, n_fields);
-
- for (i = 0; i < n_fields; i++) {
-
-#if MLOG_BUF_MARGIN <= 30
-# error "MLOG_BUF_MARGIN <= 30"
-#endif
-
- if (log_ptr + 30 > buf_end) {
- mlog_close(mtr, log_ptr);
-
- log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
- buf_end = log_ptr + MLOG_BUF_MARGIN;
- }
-
- upd_field = upd_get_nth_field(update, i);
-
- new_val = &(upd_field->new_val);
-
- len = dfield_get_len(new_val);
-
- log_ptr += mach_write_compressed(log_ptr, upd_field->field_no);
- log_ptr += mach_write_compressed(log_ptr, len);
-
- if (len != UNIV_SQL_NULL) {
- if (log_ptr + len < buf_end) {
- memcpy(log_ptr, dfield_get_data(new_val), len);
-
- log_ptr += len;
- } else {
- mlog_close(mtr, log_ptr);
-
- mlog_catenate_string(
- mtr,
- static_cast<byte*>(
- dfield_get_data(new_val)),
- len);
-
- log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN);
- buf_end = log_ptr + MLOG_BUF_MARGIN;
- }
- }
- }
-
- mlog_close(mtr, log_ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Parses the log data written by row_upd_index_write_log.
-@return log data end or NULL */
-UNIV_INTERN
-byte*
-row_upd_index_parse(
-/*================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- mem_heap_t* heap, /*!< in: memory heap where update vector is
- built */
- upd_t** update_out)/*!< out: update vector */
-{
- upd_t* update;
- upd_field_t* upd_field;
- dfield_t* new_val;
- ulint len;
- ulint n_fields;
- ulint info_bits;
- ulint i;
-
- if (end_ptr < ptr + 1) {
-
- return(NULL);
- }
-
- info_bits = mach_read_from_1(ptr);
- ptr++;
- ptr = mach_parse_compressed(ptr, end_ptr, &n_fields);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- update = upd_create(n_fields, heap);
- update->info_bits = info_bits;
-
- for (i = 0; i < n_fields; i++) {
- ulint field_no;
- upd_field = upd_get_nth_field(update, i);
- new_val = &(upd_field->new_val);
-
- ptr = mach_parse_compressed(ptr, end_ptr, &field_no);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- upd_field->field_no = field_no;
-
- ptr = mach_parse_compressed(ptr, end_ptr, &len);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (len != UNIV_SQL_NULL) {
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- }
-
- dfield_set_data(new_val,
- mem_heap_dup(heap, ptr, len), len);
- ptr += len;
- } else {
- dfield_set_null(new_val);
- }
- }
-
- *update_out = update;
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Builds an update vector from those fields which in a secondary index entry
-differ from a record that has the equal ordering fields. NOTE: we compare
-the fields as binary strings!
-@return own: update vector of differing fields */
-UNIV_INTERN
-upd_t*
-row_upd_build_sec_rec_difference_binary(
-/*====================================*/
- const rec_t* rec, /*!< in: secondary index record */
- dict_index_t* index, /*!< in: index */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- const dtuple_t* entry, /*!< in: entry to insert */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
-{
- upd_field_t* upd_field;
- const dfield_t* dfield;
- const byte* data;
- ulint len;
- upd_t* update;
- ulint n_diff;
- ulint i;
-
- /* This function is used only for a secondary index */
- ut_a(!dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(rec_offs_n_fields(offsets) == dtuple_get_n_fields(entry));
- ut_ad(!rec_offs_any_extern(offsets));
-
- update = upd_create(dtuple_get_n_fields(entry), heap);
-
- n_diff = 0;
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield = dtuple_get_nth_field(entry, i);
-
- /* NOTE that it may be that len != dfield_get_len(dfield) if we
- are updating in a character set and collation where strings of
- different length can be equal in an alphabetical comparison,
- and also in the case where we have a column prefix index
- and the last characters in the index field are spaces; the
- latter case probably caused the assertion failures reported at
- row0upd.cc line 713 in versions 4.0.14 - 4.0.16. */
-
- /* NOTE: we compare the fields as binary strings!
- (No collation) */
-
- if (!dfield_data_is_binary_equal(dfield, len, data)) {
-
- upd_field = upd_get_nth_field(update, n_diff);
-
- dfield_copy(&(upd_field->new_val), dfield);
-
- upd_field_set_field_no(upd_field, i, index, NULL);
-
- n_diff++;
- }
- }
-
- update->n_fields = n_diff;
-
- return(update);
-}
-
-/***************************************************************//**
-Builds an update vector from those fields, excluding the roll ptr and
-trx id fields, which in an index entry differ from a record that has
-the equal ordering fields. NOTE: we compare the fields as binary strings!
-@return own: update vector of differing fields, excluding roll ptr and
-trx id */
-UNIV_INTERN
-const upd_t*
-row_upd_build_difference_binary(
-/*============================*/
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* entry, /*!< in: entry to insert */
- const rec_t* rec, /*!< in: clustered index record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec,index), or NULL */
- bool no_sys, /*!< in: skip the system columns
- DB_TRX_ID and DB_ROLL_PTR */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap) /*!< in: memory heap from which allocated */
-{
- upd_field_t* upd_field;
- const dfield_t* dfield;
- const byte* data;
- ulint len;
- upd_t* update;
- ulint n_diff;
- ulint trx_id_pos;
- ulint i;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- rec_offs_init(offsets_);
-
- /* This function is used only for a clustered index */
- ut_a(dict_index_is_clust(index));
-
- update = upd_create(dtuple_get_n_fields(entry), heap);
-
- n_diff = 0;
-
- trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
- ut_ad(dict_index_get_sys_col_pos(index, DATA_ROLL_PTR)
- == trx_id_pos + 1);
-
- if (!offsets) {
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
- } else {
- ut_ad(rec_offs_validate(rec, index, offsets));
- }
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
-
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- dfield = dtuple_get_nth_field(entry, i);
-
- /* NOTE: we compare the fields as binary strings!
- (No collation) */
-
- if (no_sys && (i == trx_id_pos || i == trx_id_pos + 1)) {
-
- continue;
- }
-
- if (!dfield_is_ext(dfield)
- != !rec_offs_nth_extern(offsets, i)
- || !dfield_data_is_binary_equal(dfield, len, data)) {
-
- upd_field = upd_get_nth_field(update, n_diff);
-
- dfield_copy(&(upd_field->new_val), dfield);
-
- upd_field_set_field_no(upd_field, i, index, trx);
-
- n_diff++;
- }
- }
-
- update->n_fields = n_diff;
-
- return(update);
-}
-
-/***********************************************************//**
-Fetch a prefix of an externally stored column. This is similar
-to row_ext_lookup(), but the row_ext_t holds the old values
-of the column and must not be poisoned with the new values.
-@return BLOB prefix */
-static
-byte*
-row_upd_ext_fetch(
-/*==============*/
- const byte* data, /*!< in: 'internally' stored part of the
- field containing also the reference to
- the external part */
- ulint local_len, /*!< in: length of data, in bytes */
- ulint zip_size, /*!< in: nonzero=compressed BLOB
- page size, zero for uncompressed
- BLOBs */
- ulint* len, /*!< in: length of prefix to fetch;
- out: fetched length of the prefix */
- mem_heap_t* heap) /*!< in: heap where to allocate */
-{
- byte* buf = static_cast<byte*>(mem_heap_alloc(heap, *len));
-
- *len = btr_copy_externally_stored_field_prefix(
- buf, *len, zip_size, data, local_len, NULL);
-
- /* We should never update records containing a half-deleted BLOB. */
- ut_a(*len);
-
- return(buf);
-}
-
-/***********************************************************//**
-Replaces the new column value stored in the update vector in
-the given index entry field. */
-static
-void
-row_upd_index_replace_new_col_val(
-/*==============================*/
- dfield_t* dfield, /*!< in/out: data field
- of the index entry */
- const dict_field_t* field, /*!< in: index field */
- const dict_col_t* col, /*!< in: field->col */
- const upd_field_t* uf, /*!< in: update field */
- mem_heap_t* heap, /*!< in: memory heap for allocating
- and copying the new value */
- ulint zip_size)/*!< in: compressed page
- size of the table, or 0 */
-{
- ulint len;
- const byte* data;
-
- dfield_copy_data(dfield, &uf->new_val);
-
- if (dfield_is_null(dfield)) {
- return;
- }
-
- len = dfield_get_len(dfield);
- data = static_cast<const byte*>(dfield_get_data(dfield));
-
- if (field->prefix_len > 0) {
- ibool fetch_ext = dfield_is_ext(dfield)
- && len < (ulint) field->prefix_len
- + BTR_EXTERN_FIELD_REF_SIZE;
-
- if (fetch_ext) {
- ulint l = len;
-
- len = field->prefix_len;
-
- data = row_upd_ext_fetch(data, l, zip_size,
- &len, heap);
- }
-
- len = dtype_get_at_most_n_mbchars(col->prtype,
- col->mbminmaxlen,
- field->prefix_len, len,
- (const char*) data);
-
- dfield_set_data(dfield, data, len);
-
- if (!fetch_ext) {
- dfield_dup(dfield, heap);
- }
-
- return;
- }
-
- switch (uf->orig_len) {
- byte* buf;
- case BTR_EXTERN_FIELD_REF_SIZE:
- /* Restore the original locally stored
- part of the column. In the undo log,
- InnoDB writes a longer prefix of externally
- stored columns, so that column prefixes
- in secondary indexes can be reconstructed. */
- dfield_set_data(dfield,
- data + len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- dfield_set_ext(dfield);
- /* fall through */
- case 0:
- dfield_dup(dfield, heap);
- break;
- default:
- /* Reconstruct the original locally
- stored part of the column. The data
- will have to be copied. */
- ut_a(uf->orig_len > BTR_EXTERN_FIELD_REF_SIZE);
- buf = static_cast<byte*>(mem_heap_alloc(heap, uf->orig_len));
-
- /* Copy the locally stored prefix. */
- memcpy(buf, data,
- uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE);
-
- /* Copy the BLOB pointer. */
- memcpy(buf + uf->orig_len - BTR_EXTERN_FIELD_REF_SIZE,
- data + len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
-
- dfield_set_data(dfield, buf, uf->orig_len);
- dfield_set_ext(dfield);
- break;
- }
-}
-
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
-UNIV_INTERN
-void
-row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the index so
- that the field number in an upd_field is the
- index position */
- ibool order_only,
- /*!< in: if TRUE, limit the replacement to
- ordering fields of index; note that this
- does not work for non-clustered indexes. */
- mem_heap_t* heap) /*!< in: memory heap for allocating and
- copying the new values */
-{
- ulint i;
- ulint n_fields;
- const ulint zip_size = dict_table_zip_size(index->table);
-
- dtuple_set_info_bits(entry, update->info_bits);
-
- if (order_only) {
- n_fields = dict_index_get_n_unique(index);
- } else {
- n_fields = dict_index_get_n_fields(index);
- }
-
- for (i = 0; i < n_fields; i++) {
- const dict_field_t* field;
- const dict_col_t* col;
- const upd_field_t* uf;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
- uf = upd_get_field_by_field_no(update, i);
-
- if (uf) {
- row_upd_index_replace_new_col_val(
- dtuple_get_nth_field(entry, i),
- field, col, uf, heap, zip_size);
- }
- }
-}
-
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
-UNIV_INTERN
-void
-row_upd_index_replace_new_col_vals(
-/*===============================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the
- CLUSTERED index so that the field number in
- an upd_field is the clustered index position */
- mem_heap_t* heap) /*!< in: memory heap for allocating and
- copying the new values */
-{
- ulint i;
- const dict_index_t* clust_index
- = dict_table_get_first_index(index->table);
- const ulint zip_size
- = dict_table_zip_size(index->table);
-
- dtuple_set_info_bits(entry, update->info_bits);
-
- for (i = 0; i < dict_index_get_n_fields(index); i++) {
- const dict_field_t* field;
- const dict_col_t* col;
- const upd_field_t* uf;
-
- field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(field);
- uf = upd_get_field_by_field_no(
- update, dict_col_get_clust_pos(col, clust_index));
-
- if (uf) {
- row_upd_index_replace_new_col_val(
- dtuple_get_nth_field(entry, i),
- field, col, uf, heap, zip_size);
- }
- }
-}
-
-/***********************************************************//**
-Replaces the new column values stored in the update vector. */
-UNIV_INTERN
-void
-row_upd_replace(
-/*============*/
- dtuple_t* row, /*!< in/out: row where replaced,
- indexed by col_no;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- row_ext_t** ext, /*!< out, own: NULL, or externally
- stored column prefixes */
- const dict_index_t* index, /*!< in: clustered index */
- const upd_t* update, /*!< in: an update vector built for the
- clustered index */
- mem_heap_t* heap) /*!< in: memory heap */
-{
- ulint col_no;
- ulint i;
- ulint n_cols;
- ulint n_ext_cols;
- ulint* ext_cols;
- const dict_table_t* table;
-
- ut_ad(row);
- ut_ad(ext);
- ut_ad(index);
- ut_ad(dict_index_is_clust(index));
- ut_ad(update);
- ut_ad(heap);
-
- n_cols = dtuple_get_n_fields(row);
- table = index->table;
- ut_ad(n_cols == dict_table_get_n_cols(table));
-
- ext_cols = static_cast<ulint*>(
- mem_heap_alloc(heap, n_cols * sizeof *ext_cols));
-
- n_ext_cols = 0;
-
- dtuple_set_info_bits(row, update->info_bits);
-
- for (col_no = 0; col_no < n_cols; col_no++) {
-
- const dict_col_t* col
- = dict_table_get_nth_col(table, col_no);
- const ulint clust_pos
- = dict_col_get_clust_pos(col, index);
- dfield_t* dfield;
-
- if (UNIV_UNLIKELY(clust_pos == ULINT_UNDEFINED)) {
-
- continue;
- }
-
- dfield = dtuple_get_nth_field(row, col_no);
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- const upd_field_t* upd_field
- = upd_get_nth_field(update, i);
-
- if (upd_field->field_no != clust_pos) {
-
- continue;
- }
-
- dfield_copy_data(dfield, &upd_field->new_val);
- break;
- }
-
- if (dfield_is_ext(dfield) && col->ord_part) {
- ext_cols[n_ext_cols++] = col_no;
- }
- }
-
- if (n_ext_cols) {
- *ext = row_ext_create(n_ext_cols, ext_cols, table->flags, row,
- heap);
- } else {
- *ext = NULL;
- }
-}
-
-/***********************************************************//**
-Checks if an update vector changes an ordering field of an index record.
-
-This function is fast if the update vector is short or the number of ordering
-fields in the index is small. Otherwise, this can be quadratic.
-NOTE: we compare the fields as binary strings!
-@return TRUE if update vector changes an ordering field in the index record */
-UNIV_INTERN
-ibool
-row_upd_changes_ord_field_binary_func(
-/*==================================*/
- dict_index_t* index, /*!< in: index of the record */
- const upd_t* update, /*!< in: update vector for the row; NOTE: the
- field numbers in this MUST be clustered index
- positions! */
-#ifdef UNIV_DEBUG
- const que_thr_t*thr, /*!< in: query thread */
-#endif /* UNIV_DEBUG */
- const dtuple_t* row, /*!< in: old value of row, or NULL if the
- row and the data values in update are not
- known when this function is called, e.g., at
- compile time */
- const row_ext_t*ext) /*!< NULL, or prefixes of the externally
- stored columns in the old row */
-{
- ulint n_unique;
- ulint i;
- const dict_index_t* clust_index;
-
- ut_ad(thr);
- ut_ad(thr->graph);
- ut_ad(thr->graph->trx);
-
- n_unique = dict_index_get_n_unique(index);
-
- clust_index = dict_table_get_first_index(index->table);
-
- for (i = 0; i < n_unique; i++) {
-
- const dict_field_t* ind_field;
- const dict_col_t* col;
- ulint col_no;
- const upd_field_t* upd_field;
- const dfield_t* dfield;
- dfield_t dfield_ext;
- ulint dfield_len= 0;
- const byte* buf;
-
- ind_field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(ind_field);
- col_no = dict_col_get_no(col);
-
- upd_field = upd_get_field_by_field_no(
- update, dict_col_get_clust_pos(col, clust_index));
-
- if (upd_field == NULL) {
- continue;
- }
-
- if (row == NULL) {
- ut_ad(ext == NULL);
- return(TRUE);
- }
-
- dfield = dtuple_get_nth_field(row, col_no);
-
- /* This treatment of column prefix indexes is loosely
- based on row_build_index_entry(). */
-
- if (UNIV_LIKELY(ind_field->prefix_len == 0)
- || dfield_is_null(dfield)) {
- /* do nothing special */
- } else if (ext) {
- /* Silence a compiler warning without
- silencing a Valgrind error. */
- dfield_len = 0;
- UNIV_MEM_INVALID(&dfield_len, sizeof dfield_len);
- /* See if the column is stored externally. */
- buf = row_ext_lookup(ext, col_no, &dfield_len);
-
- ut_ad(col->ord_part);
-
- if (UNIV_LIKELY_NULL(buf)) {
- if (UNIV_UNLIKELY(buf == field_ref_zero)) {
- /* The externally stored field
- was not written yet. This
- record should only be seen by
- recv_recovery_rollback_active(),
- when the server had crashed before
- storing the field. */
- ut_ad(thr->graph->trx->is_recovered);
- ut_ad(trx_is_recv(thr->graph->trx));
- return(TRUE);
- }
-
- goto copy_dfield;
- }
- } else if (dfield_is_ext(dfield)) {
- dfield_len = dfield_get_len(dfield);
- ut_a(dfield_len > BTR_EXTERN_FIELD_REF_SIZE);
- dfield_len -= BTR_EXTERN_FIELD_REF_SIZE;
- ut_a(dict_index_is_clust(index)
- || ind_field->prefix_len <= dfield_len);
-
- buf = static_cast<byte*>(dfield_get_data(dfield));
-copy_dfield:
- ut_a(dfield_len > 0);
- dfield_copy(&dfield_ext, dfield);
- dfield_set_data(&dfield_ext, buf, dfield_len);
- dfield = &dfield_ext;
- }
-
- if (!dfield_datas_are_binary_equal(
- dfield, &upd_field->new_val,
- ind_field->prefix_len)) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/***********************************************************//**
-Checks if an update vector changes an ordering field of an index record.
-NOTE: we compare the fields as binary strings!
-@return TRUE if update vector may change an ordering field in an index
-record */
-UNIV_INTERN
-ibool
-row_upd_changes_some_index_ord_field_binary(
-/*========================================*/
- const dict_table_t* table, /*!< in: table */
- const upd_t* update) /*!< in: update vector for the row */
-{
- upd_field_t* upd_field;
- dict_index_t* index;
- ulint i;
-
- index = dict_table_get_first_index(table);
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- upd_field = upd_get_nth_field(update, i);
-
- if (dict_field_get_col(dict_index_get_nth_field(
- index, upd_field->field_no))
- ->ord_part) {
-
- return(TRUE);
- }
- }
-
- return(FALSE);
-}
-
-/***********************************************************//**
-Checks if an FTS Doc ID column is affected by an UPDATE.
-@return whether the Doc ID column is changed */
-UNIV_INTERN
-bool
-row_upd_changes_doc_id(
-/*===================*/
- dict_table_t* table, /*!< in: table */
- upd_field_t* upd_field) /*!< in: field to check */
-{
- ulint col_no;
- dict_index_t* clust_index;
- fts_t* fts = table->fts;
-
- clust_index = dict_table_get_first_index(table);
-
- /* Convert from index-specific column number to table-global
- column number. */
- col_no = dict_index_get_nth_col_no(clust_index, upd_field->field_no);
-
- return(col_no == fts->doc_col);
-}
-/***********************************************************//**
-Checks if an FTS indexed column is affected by an UPDATE.
-@return offset within fts_t::indexes if FTS indexed column updated else
-ULINT_UNDEFINED */
-UNIV_INTERN
-ulint
-row_upd_changes_fts_column(
-/*=======================*/
- dict_table_t* table, /*!< in: table */
- upd_field_t* upd_field) /*!< in: field to check */
-{
- ulint col_no;
- dict_index_t* clust_index;
- fts_t* fts = table->fts;
-
- clust_index = dict_table_get_first_index(table);
-
- /* Convert from index-specific column number to table-global
- column number. */
- col_no = dict_index_get_nth_col_no(clust_index, upd_field->field_no);
-
- return(dict_table_is_fts_column(fts->indexes, col_no));
-}
-
-/***********************************************************//**
-Checks if an update vector changes some of the first ordering fields of an
-index record. This is only used in foreign key checks and we can assume
-that index does not contain column prefixes.
-@return TRUE if changes */
-static
-ibool
-row_upd_changes_first_fields_binary(
-/*================================*/
- dtuple_t* entry, /*!< in: index entry */
- dict_index_t* index, /*!< in: index of entry */
- const upd_t* update, /*!< in: update vector for the row */
- ulint n) /*!< in: how many first fields to check */
-{
- ulint n_upd_fields;
- ulint i, j;
- dict_index_t* clust_index;
-
- ut_ad(update && index);
- ut_ad(n <= dict_index_get_n_fields(index));
-
- n_upd_fields = upd_get_n_fields(update);
- clust_index = dict_table_get_first_index(index->table);
-
- for (i = 0; i < n; i++) {
-
- const dict_field_t* ind_field;
- const dict_col_t* col;
- ulint col_pos;
-
- ind_field = dict_index_get_nth_field(index, i);
- col = dict_field_get_col(ind_field);
- col_pos = dict_col_get_clust_pos(col, clust_index);
-
- ut_a(ind_field->prefix_len == 0);
-
- for (j = 0; j < n_upd_fields; j++) {
-
- upd_field_t* upd_field
- = upd_get_nth_field(update, j);
-
- if (col_pos == upd_field->field_no
- && !dfield_datas_are_binary_equal(
- dtuple_get_nth_field(entry, i),
- &upd_field->new_val, 0)) {
-
- return(TRUE);
- }
- }
- }
-
- return(FALSE);
-}
-
-/*********************************************************************//**
-Copies the column values from a record. */
-UNIV_INLINE
-void
-row_upd_copy_columns(
-/*=================*/
- rec_t* rec, /*!< in: record in a clustered index */
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- sym_node_t* column) /*!< in: first column in a column list, or
- NULL */
-{
- byte* data;
- ulint len;
-
- while (column) {
- data = rec_get_nth_field(rec, offsets,
- column->field_nos[SYM_CLUST_FIELD_NO],
- &len);
- eval_node_copy_and_alloc_val(column, data, len);
-
- column = UT_LIST_GET_NEXT(col_var_list, column);
- }
-}
-
-/*********************************************************************//**
-Calculates the new values for fields to update. Note that row_upd_copy_columns
-must have been called first. */
-UNIV_INLINE
-void
-row_upd_eval_new_vals(
-/*==================*/
- upd_t* update) /*!< in/out: update vector */
-{
- que_node_t* exp;
- upd_field_t* upd_field;
- ulint n_fields;
- ulint i;
-
- n_fields = upd_get_n_fields(update);
-
- for (i = 0; i < n_fields; i++) {
- upd_field = upd_get_nth_field(update, i);
-
- exp = upd_field->exp;
-
- eval_exp(exp);
-
- dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp));
- }
-}
-
-/***********************************************************//**
-Stores to the heap the row on which the node->pcur is positioned. */
-static
-void
-row_upd_store_row(
-/*==============*/
- upd_node_t* node) /*!< in: row update node */
-{
- dict_index_t* clust_index;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- row_ext_t** ext;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- const ulint* offsets;
- rec_offs_init(offsets_);
-
- ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);
-
- if (node->row != NULL) {
- mem_heap_empty(node->heap);
- }
-
- clust_index = dict_table_get_first_index(node->table);
-
- rec = btr_pcur_get_rec(node->pcur);
-
- offsets = rec_get_offsets(rec, clust_index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- if (dict_table_get_format(node->table) >= UNIV_FORMAT_B) {
- /* In DYNAMIC or COMPRESSED format, there is no prefix
- of externally stored columns in the clustered index
- record. Build a cache of column prefixes. */
- ext = &node->ext;
- } else {
- /* REDUNDANT and COMPACT formats store a local
- 768-byte prefix of each externally stored column.
- No cache is needed. */
- ext = NULL;
- node->ext = NULL;
- }
-
- node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets,
- NULL, NULL, NULL, ext, node->heap);
- if (node->is_delete) {
- node->upd_row = NULL;
- node->upd_ext = NULL;
- } else {
- node->upd_row = dtuple_copy(node->row, node->heap);
- row_upd_replace(node->upd_row, &node->upd_ext,
- clust_index, node->update, node->heap);
- }
-
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-}
-
-/***********************************************************//**
-Updates a secondary index entry of a row.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_upd_sec_index_entry(
-/*====================*/
- upd_node_t* node, /*!< in: row update node */
- que_thr_t* thr) /*!< in: query thread */
-{
- mtr_t mtr;
- const rec_t* rec;
- btr_pcur_t pcur;
- mem_heap_t* heap;
- dtuple_t* entry;
- dict_index_t* index;
- btr_cur_t* btr_cur;
- ibool referenced;
- dberr_t err = DB_SUCCESS;
- trx_t* trx = thr_get_trx(thr);
- ulint mode;
- enum row_search_result search_result;
-
- ut_ad(trx->id);
-
- index = node->index;
-
- referenced = row_upd_index_is_referenced(index, trx);
-#ifdef WITH_WSREP
- ibool foreign = wsrep_row_upd_index_is_foreign(index, trx);
-#endif /* WITH_WSREP */
-
- heap = mem_heap_create(1024);
-
- /* Build old index entry */
- entry = row_build_index_entry(node->row, node->ext, index, heap);
- ut_a(entry);
-
- log_free_check();
-
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!trx->ddl) {
- DEBUG_SYNC_C_IF_THD(trx->mysql_thd,
- "before_row_upd_sec_index_entry");
- }
-#endif /* UNIV_DEBUG */
-
- mtr_start_trx(&mtr, trx);
-
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* The index->online_status may change if the
- index->name starts with TEMP_INDEX_PREFIX (meaning
- that the index is or was being created online). It is
- protected by index->lock. */
-
- mtr_s_lock(dict_index_get_lock(index), &mtr);
-
- switch (dict_index_get_online_status(index)) {
- case ONLINE_INDEX_COMPLETE:
- /* This is a normal index. Do not log anything.
- Perform the update on the index tree directly. */
- break;
- case ONLINE_INDEX_CREATION:
- /* Log a DELETE and optionally INSERT. */
- row_log_online_op(index, entry, 0);
-
- if (!node->is_delete) {
- mem_heap_empty(heap);
- entry = row_build_index_entry(
- node->upd_row, node->upd_ext,
- index, heap);
- ut_a(entry);
- row_log_online_op(index, entry, trx->id);
- }
- /* fall through */
- case ONLINE_INDEX_ABORTED:
- case ONLINE_INDEX_ABORTED_DROPPED:
- mtr_commit(&mtr);
- goto func_exit;
- }
-
- /* We can only buffer delete-mark operations if there
- are no foreign key constraints referring to the index. */
- mode = referenced
- ? BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
- : BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED
- | BTR_DELETE_MARK;
- } else {
- /* For secondary indexes,
- index->online_status==ONLINE_INDEX_CREATION unless
- index->name starts with TEMP_INDEX_PREFIX. */
- ut_ad(!dict_index_is_online_ddl(index));
-
- /* We can only buffer delete-mark operations if there
- are no foreign key constraints referring to the index. */
- mode = referenced
- ? BTR_MODIFY_LEAF
- : BTR_MODIFY_LEAF | BTR_DELETE_MARK;
- }
-
- /* Set the query thread, so that ibuf_insert_low() will be
- able to invoke thd_get_trx(). */
- btr_pcur_get_btr_cur(&pcur)->thr = thr;
-
- search_result = row_search_index_entry(index, entry,
- UNIV_UNLIKELY(trx->fake_changes)
- ? BTR_SEARCH_LEAF
- : (btr_latch_mode)mode,
- &pcur, &mtr);
-
- btr_cur = btr_pcur_get_btr_cur(&pcur);
-
- rec = btr_cur_get_rec(btr_cur);
-
- switch (search_result) {
- case ROW_NOT_DELETED_REF: /* should only occur for BTR_DELETE */
- ut_error;
- break;
- case ROW_BUFFERED:
- /* Entry was delete marked already. */
- break;
-
- case ROW_NOT_FOUND:
- if (*index->name == TEMP_INDEX_PREFIX) {
- /* When online CREATE INDEX copied the update
- that we already made to the clustered index,
- and completed the secondary index creation
- before we got here, the old secondary index
- record would not exist. The CREATE INDEX
- should be waiting for a MySQL meta-data lock
- upgrade at least until this UPDATE
- returns. After that point, the
- TEMP_INDEX_PREFIX would be dropped from the
- index name in commit_inplace_alter_table(). */
- break;
- }
-
- fputs("InnoDB: error in sec index entry update in\n"
- "InnoDB: ", stderr);
- dict_index_name_print(stderr, trx, index);
- fputs("\n"
- "InnoDB: tuple ", stderr);
- dtuple_print(stderr, entry);
- fputs("\n"
- "InnoDB: record ", stderr);
- rec_print(stderr, rec, index);
- putc('\n', stderr);
- trx_print(stderr, trx, 0);
- fputs("\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n", stderr);
- ut_ad(0);
- break;
- case ROW_FOUND:
- /* Delete mark the old index record; it can already be
- delete marked if we return after a lock wait in
- row_ins_sec_index_entry() below */
- if (!rec_get_deleted_flag(
- rec, dict_table_is_comp(index->table))) {
-#ifdef WITH_WSREP
- que_node_t *parent = que_node_get_parent(node);
-#endif /* WITH_WSREP */
- err = btr_cur_del_mark_set_sec_rec(
- 0, btr_cur, TRUE, thr, &mtr);
-
- if (err == DB_SUCCESS && referenced) {
-
- ulint* offsets;
-
- offsets = rec_get_offsets(
- rec, index, NULL, ULINT_UNDEFINED,
- &heap);
-
- /* NOTE that the following call loses
- the position of pcur ! */
- err = row_upd_check_references_constraints(
- node, &pcur, index->table,
- index, offsets, thr, &mtr);
- }
-#ifdef WITH_WSREP
- if (err == DB_SUCCESS && !referenced &&
- !(parent && que_node_get_type(parent) ==
- QUE_NODE_UPDATE &&
- ((upd_node_t*)parent)->cascade_node == node) &&
- foreign
- ) {
- ulint* offsets =
- rec_get_offsets(
- rec, index, NULL, ULINT_UNDEFINED,
- &heap);
- err = wsrep_row_upd_check_foreign_constraints(
- node, &pcur, index->table,
- index, offsets, thr, &mtr);
- switch (err) {
- case DB_SUCCESS:
- case DB_NO_REFERENCED_ROW:
- err = DB_SUCCESS;
- break;
- case DB_DEADLOCK:
- if (wsrep_debug) fprintf (stderr,
- "WSREP: sec index FK check fail for deadlock");
- break;
- default:
- fprintf (stderr,
- "WSREP: referenced FK check fail: %d",
- (int)err);
- break;
- }
- }
-#endif /* WITH_WSREP */
- }
- break;
- }
-
- btr_pcur_close(&pcur);
- mtr_commit(&mtr);
-
- if (node->is_delete || err != DB_SUCCESS) {
-
- goto func_exit;
- }
-
- mem_heap_empty(heap);
-
- /* Build a new index entry */
- entry = row_build_index_entry(node->upd_row, node->upd_ext,
- index, heap);
- ut_a(entry);
-
- /* Insert new index entry */
- err = row_ins_sec_index_entry(index, entry, thr);
-
-func_exit:
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************//**
-Updates the secondary index record if it is changed in the row update or
-deletes it if this is a delete.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_upd_sec_step(
-/*=============*/
- upd_node_t* node, /*!< in: row update node */
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC)
- || (node->state == UPD_NODE_UPDATE_SOME_SEC));
- ut_ad(!dict_index_is_clust(node->index));
-
- if (node->state == UPD_NODE_UPDATE_ALL_SEC
- || row_upd_changes_ord_field_binary(node->index, node->update,
- thr, node->row, node->ext)) {
- return(row_upd_sec_index_entry(node, thr));
- }
-
- return(DB_SUCCESS);
-}
-
-#ifdef UNIV_DEBUG
-# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \
- row_upd_clust_rec_by_insert_inherit_func(rec,offsets,entry,update)
-#else /* UNIV_DEBUG */
-# define row_upd_clust_rec_by_insert_inherit(rec,offsets,entry,update) \
- row_upd_clust_rec_by_insert_inherit_func(entry,update)
-#endif /* UNIV_DEBUG */
-/*******************************************************************//**
-Mark non-updated off-page columns inherited when the primary key is
-updated. We must mark them as inherited in entry, so that they are not
-freed in a rollback. A limited version of this function used to be
-called btr_cur_mark_dtuple_inherited_extern().
-@return TRUE if any columns were inherited */
-static MY_ATTRIBUTE((warn_unused_result))
-ibool
-row_upd_clust_rec_by_insert_inherit_func(
-/*=====================================*/
-#ifdef UNIV_DEBUG
- const rec_t* rec, /*!< in: old record, or NULL */
- const ulint* offsets,/*!< in: rec_get_offsets(rec), or NULL */
-#endif /* UNIV_DEBUG */
- dtuple_t* entry, /*!< in/out: updated entry to be
- inserted into the clustered index */
- const upd_t* update) /*!< in: update vector */
-{
- ibool inherit = FALSE;
- ulint i;
-
- ut_ad(!rec == !offsets);
- ut_ad(!rec || rec_offs_any_extern(offsets));
-
- for (i = 0; i < dtuple_get_n_fields(entry); i++) {
- dfield_t* dfield = dtuple_get_nth_field(entry, i);
- byte* data;
- ulint len;
-
- ut_ad(!offsets
- || !rec_offs_nth_extern(offsets, i)
- == !dfield_is_ext(dfield)
- || upd_get_field_by_field_no(update, i));
- if (!dfield_is_ext(dfield)
- || upd_get_field_by_field_no(update, i)) {
- continue;
- }
-
-#ifdef UNIV_DEBUG
- if (UNIV_LIKELY(rec != NULL)) {
- const byte* rec_data
- = rec_get_nth_field(rec, offsets, i, &len);
- ut_ad(len == dfield_get_len(dfield));
- ut_ad(len != UNIV_SQL_NULL);
- ut_ad(len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- rec_data += len - BTR_EXTERN_FIELD_REF_SIZE;
-
- /* The pointer must not be zero. */
- ut_ad(memcmp(rec_data, field_ref_zero,
- BTR_EXTERN_FIELD_REF_SIZE));
- /* The BLOB must be owned. */
- ut_ad(!(rec_data[BTR_EXTERN_LEN]
- & BTR_EXTERN_OWNER_FLAG));
- }
-#endif /* UNIV_DEBUG */
-
- len = dfield_get_len(dfield);
- ut_a(len != UNIV_SQL_NULL);
- ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- data = static_cast<byte*>(dfield_get_data(dfield));
-
- data += len - BTR_EXTERN_FIELD_REF_SIZE;
- /* The pointer must not be zero. */
- ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
- data[BTR_EXTERN_LEN] &= ~BTR_EXTERN_OWNER_FLAG;
- data[BTR_EXTERN_LEN] |= BTR_EXTERN_INHERITED_FLAG;
- /* The BTR_EXTERN_INHERITED_FLAG only matters in
- rollback. Purge will always free the extern fields of
- a delete-marked row. */
-
- inherit = TRUE;
- }
-
- return(inherit);
-}
-
-/***********************************************************//**
-Marks the clustered index record deleted and inserts the updated version
-of the record to the index. This function should be used when the ordering
-fields of the clustered index record change. This should be quite rare in
-database applications.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_upd_clust_rec_by_insert(
-/*========================*/
- upd_node_t* node, /*!< in/out: row update node */
- dict_index_t* index, /*!< in: clustered index of the record */
- que_thr_t* thr, /*!< in: query thread */
- ibool referenced,/*!< in: TRUE if index may be referenced in
- a foreign key constraint */
-#ifdef WITH_WSREP
- ibool foreign, /*!< in: TRUE if index is foreign key index */
-#endif /* WITH_WSREP */
- mtr_t* mtr) /*!< in/out: mtr; gets committed here */
-{
- mem_heap_t* heap;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- trx_t* trx;
- dict_table_t* table;
- dtuple_t* entry;
- dberr_t err;
- ibool change_ownership = FALSE;
- rec_t* rec;
- ulint* offsets = NULL;
-
-#ifdef WITH_WSREP
- que_node_t *parent = que_node_get_parent(node);
-#endif /* WITH_WSREP */
- ut_ad(node);
- ut_ad(dict_index_is_clust(index));
-
- trx = thr_get_trx(thr);
- table = node->table;
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- heap = mem_heap_create(1000);
-
- entry = row_build_index_entry(node->upd_row, node->upd_ext,
- index, heap);
- ut_a(entry);
-
- row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
-
- switch (node->state) {
- default:
- ut_error;
- case UPD_NODE_INSERT_BLOB:
- /* A lock wait occurred in row_ins_clust_index_entry() in
- the previous invocation of this function. Mark the
- off-page columns in the entry inherited. */
-
- if (UNIV_LIKELY(!trx->fake_changes)) {
- change_ownership = row_upd_clust_rec_by_insert_inherit(
- NULL, NULL, entry, node->update);
- ut_a(change_ownership);
- }
- /* fall through */
- case UPD_NODE_INSERT_CLUSTERED:
- /* A lock wait occurred in row_ins_clust_index_entry() in
- the previous invocation of this function. */
- break;
- case UPD_NODE_UPDATE_CLUSTERED:
- /* This is the first invocation of the function where
- we update the primary key. Delete-mark the old record
- in the clustered index and prepare to insert a new entry. */
- rec = btr_cur_get_rec(btr_cur);
- offsets = rec_get_offsets(rec, index, NULL,
- ULINT_UNDEFINED, &heap);
- ut_ad(page_rec_is_user_rec(rec));
-
- err = btr_cur_del_mark_set_clust_rec(
- btr_cur_get_block(btr_cur), rec, index, offsets,
- thr, mtr);
- if (err != DB_SUCCESS) {
-err_exit:
- mtr_commit(mtr);
- mem_heap_free(heap);
- return(err);
- }
-
- /* If the the new row inherits externally stored
- fields (off-page columns a.k.a. BLOBs) from the
- delete-marked old record, mark them disowned by the
- old record and owned by the new entry. */
-
- if (rec_offs_any_extern(offsets)
- && UNIV_LIKELY(!(trx->fake_changes))) {
- change_ownership = row_upd_clust_rec_by_insert_inherit(
- rec, offsets, entry, node->update);
-
- if (change_ownership) {
- /* The blobs are disowned here, expecting the
- insert down below to inherit them. But if the
- insert fails, then this disown will be undone
- when the operation is rolled back. */
- btr_cur_disown_inherited_fields(
- btr_cur_get_page_zip(btr_cur),
- rec, index, offsets, node->update, mtr);
- }
- }
-
- if (referenced) {
- /* NOTE that the following call loses
- the position of pcur ! */
-
- err = row_upd_check_references_constraints(
- node, pcur, table, index, offsets, thr, mtr);
-
- if (err != DB_SUCCESS) {
- goto err_exit;
- }
- }
-#ifdef WITH_WSREP
- if (!referenced &&
- !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE &&
- ((upd_node_t*)parent)->cascade_node == node) &&
- foreign
- ) {
- err = wsrep_row_upd_check_foreign_constraints(
- node, pcur, table, index, offsets, thr, mtr);
- switch (err) {
- case DB_SUCCESS:
- case DB_NO_REFERENCED_ROW:
- err = DB_SUCCESS;
- break;
- case DB_DEADLOCK:
- if (wsrep_debug) fprintf (stderr,
- "WSREP: insert FK check fail for deadlock");
- break;
- default:
- fprintf (stderr,
- "WSREP: referenced FK check fail: %d",
- (int)err);
- break;
- }
- if (err != DB_SUCCESS) {
- goto err_exit;
- }
- }
-#endif /* WITH_WSREP */
- }
-
- mtr_commit(mtr);
-
- err = row_ins_clust_index_entry(
- index, entry, thr,
- node->upd_ext ? node->upd_ext->n_ext : 0);
- node->state = change_ownership
- ? UPD_NODE_INSERT_BLOB
- : UPD_NODE_INSERT_CLUSTERED;
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/***********************************************************//**
-Updates a clustered index record of a row when the ordering fields do
-not change.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_upd_clust_rec(
-/*==============*/
- upd_node_t* node, /*!< in: row update node */
- dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in: rec_get_offsets() on node->pcur */
- mem_heap_t** offsets_heap,
- /*!< in/out: memory heap, can be emptied */
- que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr) /*!< in: mtr; gets committed here */
-{
- mem_heap_t* heap = NULL;
- big_rec_t* big_rec = NULL;
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- dberr_t err;
- const dtuple_t* rebuilt_old_pk = NULL;
-
- ut_ad(node);
- ut_ad(dict_index_is_clust(index));
-
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- ut_ad(btr_cur_get_index(btr_cur) == index);
- ut_ad(!rec_get_deleted_flag(btr_cur_get_rec(btr_cur),
- dict_table_is_comp(index->table)));
- ut_ad(rec_offs_validate(btr_cur_get_rec(btr_cur), index, offsets));
-
- if (dict_index_is_online_ddl(index)) {
- rebuilt_old_pk = row_log_table_get_pk(
- btr_cur_get_rec(btr_cur), index, offsets, NULL, &heap);
- }
-
- /* Try optimistic updating of the record, keeping changes within
- the page; we do not check locks because we assume the x-lock on the
- record to update */
-
- if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) {
- err = btr_cur_update_in_place(
- BTR_NO_LOCKING_FLAG, btr_cur,
- offsets, node->update,
- node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
- } else {
- err = btr_cur_optimistic_update(
- BTR_NO_LOCKING_FLAG, btr_cur,
- &offsets, offsets_heap, node->update,
- node->cmpl_info, thr, thr_get_trx(thr)->id, mtr);
- }
-
- if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
- row_log_table_update(btr_cur_get_rec(btr_cur),
- index, offsets, rebuilt_old_pk);
- }
-
- mtr_commit(mtr);
-
- if (UNIV_LIKELY(err == DB_SUCCESS)) {
-
- goto func_exit;
- }
-
- if (buf_LRU_buf_pool_running_out()) {
-
- err = DB_LOCK_TABLE_FULL;
- goto func_exit;
- }
- /* We may have to modify the tree structure: do a pessimistic descent
- down the index tree */
-
- mtr_start_trx(mtr, thr_get_trx(thr));
-
- /* NOTE: this transaction has an s-lock or x-lock on the record and
- therefore other transactions cannot modify the record when we have no
- latch on the page. In addition, we assume that other query threads of
- the same transaction do not modify the record in the meantime.
- Therefore we can assert that the restoration of the cursor succeeds. */
-
- ut_a(btr_pcur_restore_position(
- UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)
- ? BTR_SEARCH_TREE : BTR_MODIFY_TREE,
- pcur, mtr));
-
- ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
- dict_table_is_comp(index->table)));
-
- if (!heap) {
- heap = mem_heap_create(1024);
- }
-
- err = btr_cur_pessimistic_update(
- BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur,
- &offsets, offsets_heap, heap, &big_rec,
- node->update, node->cmpl_info,
- thr, thr_get_trx(thr)->id, mtr);
- if (big_rec && UNIV_LIKELY(!(thr_get_trx(thr)->fake_changes))) {
- ut_a(err == DB_SUCCESS);
- /* Write out the externally stored
- columns while still x-latching
- index->lock and block->lock. Allocate
- pages for big_rec in the mtr that
- modified the B-tree, but be sure to skip
- any pages that were freed in mtr. We will
- write out the big_rec pages before
- committing the B-tree mini-transaction. If
- the system crashes so that crash recovery
- will not replay the mtr_commit(&mtr), the
- big_rec pages will be left orphaned until
- the pages are allocated for something else.
-
- TODO: If the allocation extends the tablespace, it
- will not be redo logged, in either mini-transaction.
- Tablespace extension should be redo-logged in the
- big_rec mini-transaction, so that recovery will not
- fail when the big_rec was written to the extended
- portion of the file, in case the file was somehow
- truncated in the crash. */
-
- DEBUG_SYNC_C("before_row_upd_extern");
- err = btr_store_big_rec_extern_fields(
- index, btr_cur_get_block(btr_cur),
- btr_cur_get_rec(btr_cur), offsets,
- big_rec, mtr, BTR_STORE_UPDATE);
- DEBUG_SYNC_C("after_row_upd_extern");
- /* If writing big_rec fails (for example, because of
- DB_OUT_OF_FILE_SPACE), the record will be corrupted.
- Even if we did not update any externally stored
- columns, our update could cause the record to grow so
- that a non-updated column was selected for external
- storage. This non-update would not have been written
- to the undo log, and thus the record cannot be rolled
- back.
-
- However, because we have not executed mtr_commit(mtr)
- yet, the update will not be replayed in crash
- recovery, and the following assertion failure will
- effectively "roll back" the operation. */
- ut_a(err == DB_SUCCESS);
- }
-
- if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) {
- row_log_table_update(btr_cur_get_rec(btr_cur),
- index, offsets, rebuilt_old_pk);
- }
-
- mtr_commit(mtr);
-func_exit:
- if (heap) {
- mem_heap_free(heap);
- }
-
- if (big_rec) {
- dtuple_big_rec_free(big_rec);
- }
-
- return(err);
-}
-
-/***********************************************************//**
-Delete marks a clustered index record.
-@return DB_SUCCESS if operation successfully completed, else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_upd_del_mark_clust_rec(
-/*=======================*/
- upd_node_t* node, /*!< in: row update node */
- dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in/out: rec_get_offsets() for the
- record under the cursor */
- que_thr_t* thr, /*!< in: query thread */
- ibool referenced,
- /*!< in: TRUE if index may be referenced in
- a foreign key constraint */
-#ifdef WITH_WSREP
- ibool foreign,/*!< in: TRUE if index is foreign key index */
-#endif /* WITH_WSREP */
- mtr_t* mtr) /*!< in: mtr; gets committed here */
-{
- btr_pcur_t* pcur;
- btr_cur_t* btr_cur;
- dberr_t err;
-#ifdef WITH_WSREP
- rec_t* rec;
- que_node_t *parent = que_node_get_parent(node);
-#endif /* WITH_WSREP */
-
- ut_ad(node);
- ut_ad(dict_index_is_clust(index));
- ut_ad(node->is_delete);
-
- pcur = node->pcur;
- btr_cur = btr_pcur_get_btr_cur(pcur);
-
- /* Store row because we have to build also the secondary index
- entries */
-
- row_upd_store_row(node);
-
- /* Mark the clustered index record deleted; we do not have to check
- locks, because we assume that we have an x-lock on the record */
-
-#ifdef WITH_WSREP
- rec = btr_cur_get_rec(btr_cur);
-#endif /* WITH_WSREP */
-
- err = btr_cur_del_mark_set_clust_rec(
-#ifdef WITH_WSREP
- btr_cur_get_block(btr_cur), rec,
-#else
- btr_cur_get_block(btr_cur), btr_cur_get_rec(btr_cur),
-#endif /* WITH_WSREP */
- index, offsets, thr, mtr);
- if (err == DB_SUCCESS && referenced) {
- /* NOTE that the following call loses the position of pcur ! */
-
- err = row_upd_check_references_constraints(
- node, pcur, index->table, index, offsets, thr, mtr);
- }
-#ifdef WITH_WSREP
- if (err == DB_SUCCESS && !referenced &&
- !(parent && que_node_get_type(parent) == QUE_NODE_UPDATE &&
- ((upd_node_t*)parent)->cascade_node == node) &&
- thr_get_trx(thr) &&
- foreign
- ) {
- err = wsrep_row_upd_check_foreign_constraints(
- node, pcur, index->table, index, offsets, thr, mtr);
- switch (err) {
- case DB_SUCCESS:
- case DB_NO_REFERENCED_ROW:
- err = DB_SUCCESS;
- break;
- case DB_DEADLOCK:
- if (wsrep_debug) fprintf (stderr,
- "WSREP: clust rec FK check fail for deadlock");
- break;
- default:
- fprintf (stderr,
- "WSREP: clust rec referenced FK check fail: %d",
- (int)err);
- break;
- }
- }
-#endif /* WITH_WSREP */
-
- mtr_commit(mtr);
-
- return(err);
-}
-
-/***********************************************************//**
-Updates the clustered index record.
-@return DB_SUCCESS if operation successfully completed, DB_LOCK_WAIT
-in case of a lock wait, else error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_upd_clust_step(
-/*===============*/
- upd_node_t* node, /*!< in: row update node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dict_index_t* index;
- btr_pcur_t* pcur;
- ibool success;
- dberr_t err;
- mtr_t mtr;
- rec_t* rec;
- mem_heap_t* heap = NULL;
- ulint offsets_[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
- ibool referenced;
- rec_offs_init(offsets_);
-
- index = dict_table_get_first_index(node->table);
-
- referenced = row_upd_index_is_referenced(index, thr_get_trx(thr));
-#ifdef WITH_WSREP
- ibool foreign = wsrep_row_upd_index_is_foreign(
- index, thr_get_trx(thr));
-#endif /* WITH_WSREP */
-
- pcur = node->pcur;
-
- /* We have to restore the cursor to its position */
-
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- /* If the restoration does not succeed, then the same
- transaction has deleted the record on which the cursor was,
- and that is an SQL error. If the restoration succeeds, it may
- still be that the same transaction has successively deleted
- and inserted a record with the same ordering fields, but in
- that case we know that the transaction has at least an
- implicit x-lock on the record. */
-
- ut_a(pcur->rel_pos == BTR_PCUR_ON);
-
- ulint mode;
- ulint search_mode;
-
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!thr_get_trx(thr)->ddl) {
- DEBUG_SYNC_C_IF_THD(
- thr_get_trx(thr)->mysql_thd,
- "innodb_row_upd_clust_step_enter");
- }
-#endif /* UNIV_DEBUG */
-
- /* If running with fake_changes mode on then switch from modify to
- search so that code takes only s-latch and not x-latch.
- For dry-run (fake-changes) s-latch is acceptable. Taking x-latch will
- make it more restrictive and will block real changes/workflow. */
- if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
- mode = BTR_MODIFY_LEAF;
- search_mode = BTR_SEARCH_LEAF;
- } else {
- mode = BTR_MODIFY_LEAF;
- search_mode = BTR_MODIFY_LEAF;
- }
-
- if (dict_index_is_online_ddl(index)) {
-
- ut_ad(node->table->id != DICT_INDEXES_ID);
-
- mode |= BTR_ALREADY_S_LATCHED;
- search_mode |= BTR_ALREADY_S_LATCHED;
-
- mtr_s_lock(dict_index_get_lock(index), &mtr);
- }
-
- success = btr_pcur_restore_position(search_mode, pcur, &mtr);
-
- if (!success) {
- err = DB_RECORD_NOT_FOUND;
-
- mtr_commit(&mtr);
-
- return(err);
- }
-
- /* If this is a row in SYS_INDEXES table of the data dictionary,
- then we have to free the file segments of the index tree associated
- with the index */
-
- if (node->is_delete && node->table->id == DICT_INDEXES_ID) {
-
- ut_ad(!dict_index_is_online_ddl(index));
-
- /* Action in fake change mode shouldn't cause changes
- in system tables. */
- ut_ad(UNIV_LIKELY(!thr_get_trx(thr)->fake_changes));
-
- dict_drop_index_tree(btr_pcur_get_rec(pcur), &mtr);
-
- mtr_commit(&mtr);
-
- mtr_start_trx(&mtr, thr_get_trx(thr));
-
- success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur,
- &mtr);
- if (!success) {
- err = DB_ERROR;
-
- mtr_commit(&mtr);
-
- return(err);
- }
- }
-
- rec = btr_pcur_get_rec(pcur);
- offsets = rec_get_offsets(rec, index, offsets_,
- ULINT_UNDEFINED, &heap);
-
- if (!node->has_clust_rec_x_lock) {
- err = lock_clust_rec_modify_check_and_lock(
- 0, btr_pcur_get_block(pcur),
- rec, index, offsets, thr);
- if (err != DB_SUCCESS) {
- mtr_commit(&mtr);
- goto exit_func;
- }
- }
-
- /* This check passes as the function manipulates x-lock to s-lock
- if operating in fake-change mode. */
- ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table,
- btr_pcur_get_block(pcur),
- page_rec_get_heap_no(rec)));
-
- /* NOTE: the following function calls will also commit mtr */
-
- if (node->is_delete) {
- err = row_upd_del_mark_clust_rec(
-#ifdef WITH_WSREP
- node, index, offsets, thr, referenced, foreign, &mtr);
-#else
- node, index, offsets, thr, referenced, &mtr);
-#endif /* WITH_WSREP */
-
- if (err == DB_SUCCESS) {
- node->state = UPD_NODE_UPDATE_ALL_SEC;
- node->index = dict_table_get_next_index(index);
- }
-
- goto exit_func;
- }
-
- /* If the update is made for MySQL, we already have the update vector
- ready, else we have to do some evaluation: */
-
- if (UNIV_UNLIKELY(!node->in_mysql_interface)) {
- /* Copy the necessary columns from clust_rec and calculate the
- new values to set */
- row_upd_copy_columns(rec, offsets,
- UT_LIST_GET_FIRST(node->columns));
- row_upd_eval_new_vals(node->update);
- }
-
- if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
-
- err = row_upd_clust_rec(
- node, index, offsets, &heap, thr, &mtr);
- goto exit_func;
- }
-
- row_upd_store_row(node);
-
- if (row_upd_changes_ord_field_binary(index, node->update, thr,
- node->row, node->ext)) {
-
- /* Update causes an ordering field (ordering fields within
- the B-tree) of the clustered index record to change: perform
- the update by delete marking and inserting.
-
- TODO! What to do to the 'Halloween problem', where an update
- moves the record forward in index so that it is again
- updated when the cursor arrives there? Solution: the
- read operation must check the undo record undo number when
- choosing records to update. MySQL solves now the problem
- externally! */
-
- err = row_upd_clust_rec_by_insert(
-#ifdef WITH_WSREP
- node, index, thr, referenced, foreign, &mtr);
-#else
- node, index, thr, referenced, &mtr);
-#endif /* WITH_WSREP */
-
- if (err != DB_SUCCESS) {
-
- goto exit_func;
- }
-
- node->state = UPD_NODE_UPDATE_ALL_SEC;
- } else {
- err = row_upd_clust_rec(
- node, index, offsets, &heap, thr, &mtr);
-
- if (err != DB_SUCCESS) {
-
- goto exit_func;
- }
-
- node->state = UPD_NODE_UPDATE_SOME_SEC;
- }
-
- node->index = dict_table_get_next_index(index);
-
-exit_func:
- if (heap) {
- mem_heap_free(heap);
- }
- return(err);
-}
-
-/***********************************************************//**
-Updates the affected index records of a row. When the control is transferred
-to this node, we assume that we have a persistent cursor which was on a
-record, and the position of the cursor is stored in the cursor.
-@return DB_SUCCESS if operation successfully completed, else error
-code or DB_LOCK_WAIT */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-row_upd(
-/*====*/
- upd_node_t* node, /*!< in: row update node */
- que_thr_t* thr) /*!< in: query thread */
-{
- dberr_t err = DB_SUCCESS;
-
- if (UNIV_LIKELY(node->in_mysql_interface)) {
-
- /* We do not get the cmpl_info value from the MySQL
- interpreter: we must calculate it on the fly: */
-
- if (node->is_delete
- || row_upd_changes_some_index_ord_field_binary(
- node->table, node->update)) {
- node->cmpl_info = 0;
- } else {
- node->cmpl_info = UPD_NODE_NO_ORD_CHANGE;
- }
- }
-
- switch (node->state) {
- case UPD_NODE_UPDATE_CLUSTERED:
- case UPD_NODE_INSERT_CLUSTERED:
- case UPD_NODE_INSERT_BLOB:
- log_free_check();
- err = row_upd_clust_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- if (node->index == NULL
- || (!node->is_delete
- && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE))) {
-
- return(DB_SUCCESS);
- }
-
-#ifdef UNIV_DEBUG
- /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC().
- Once it is fixed, remove the 'ifdef', 'if' and this comment. */
- if (!thr_get_trx(thr)->ddl) {
- DEBUG_SYNC_C_IF_THD(thr_get_trx(thr)->mysql_thd,
- "after_row_upd_clust");
- }
-#endif /* UNIV_DEBUG */
-
- DBUG_EXECUTE_IF("row_upd_skip_sec", node->index = NULL;);
-
- do {
- /* Skip corrupted index */
- dict_table_skip_corrupt_index(node->index);
-
- if (!node->index) {
- break;
- }
-
- if (node->index->type != DICT_FTS) {
- err = row_upd_sec_step(node, thr);
-
- if (err != DB_SUCCESS) {
-
- return(err);
- }
- }
-
- node->index = dict_table_get_next_index(node->index);
- } while (node->index != NULL);
-
- ut_ad(err == DB_SUCCESS);
-
- /* Do some cleanup */
-
- if (node->row != NULL) {
- node->row = NULL;
- node->ext = NULL;
- node->upd_row = NULL;
- node->upd_ext = NULL;
- mem_heap_empty(node->heap);
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- return(err);
-}
-
-/***********************************************************//**
-Updates a row in a table. This is a high-level function used in SQL execution
-graphs.
-@return query thread to run next or NULL */
-UNIV_INTERN
-que_thr_t*
-row_upd_step(
-/*=========*/
- que_thr_t* thr) /*!< in: query thread */
-{
- upd_node_t* node;
- sel_node_t* sel_node;
- que_node_t* parent;
- dberr_t err = DB_SUCCESS;
- trx_t* trx;
-
- ut_ad(thr);
-
- trx = thr_get_trx(thr);
-
- trx_start_if_not_started_xa(trx);
-
- node = static_cast<upd_node_t*>(thr->run_node);
-
- sel_node = node->select;
-
- parent = que_node_get_parent(node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE);
-
- if (thr->prev_node == parent) {
- node->state = UPD_NODE_SET_IX_LOCK;
- }
-
- if (node->state == UPD_NODE_SET_IX_LOCK) {
-
- if (!node->has_clust_rec_x_lock) {
- /* It may be that the current session has not yet
- started its transaction, or it has been committed: */
-
- err = lock_table(0, node->table, LOCK_IX, thr);
-
- if (err != DB_SUCCESS) {
-
- goto error_handling;
- }
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- if (node->searched_update) {
- /* Reset the cursor */
- sel_node->state = SEL_NODE_OPEN;
-
- /* Fetch a row to update */
-
- thr->run_node = sel_node;
-
- return(thr);
- }
- }
-
- /* sel_node is NULL if we are in the MySQL interface */
-
- if (sel_node && (sel_node->state != SEL_NODE_FETCH)) {
-
- if (!node->searched_update) {
- /* An explicit cursor should be positioned on a row
- to update */
-
- ut_error;
-
- err = DB_ERROR;
-
- goto error_handling;
- }
-
- ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
-
- /* No more rows to update, or the select node performed the
- updates directly in-place */
-
- thr->run_node = parent;
-
- return(thr);
- }
-
- /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */
-
- err = row_upd(node, thr);
-
-error_handling:
- trx->error_state = err;
-
- if (err != DB_SUCCESS) {
- return(NULL);
- }
-
- /* DO THE TRIGGER ACTIONS HERE */
-
- if (node->searched_update) {
- /* Fetch next row to update */
-
- thr->run_node = sel_node;
- } else {
- /* It was an explicit cursor update */
-
- thr->run_node = parent;
- }
-
- node->state = UPD_NODE_UPDATE_CLUSTERED;
-
- return(thr);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/row/row0vers.cc b/storage/xtradb/row/row0vers.cc
deleted file mode 100644
index 9f1fc13ee09..00000000000
--- a/storage/xtradb/row/row0vers.cc
+++ /dev/null
@@ -1,770 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file row/row0vers.cc
-Row versions
-
-Created 2/6/1997 Heikki Tuuri
-*******************************************************/
-
-#include "row0vers.h"
-
-#ifdef UNIV_NONINL
-#include "row0vers.ic"
-#endif
-
-#include "dict0dict.h"
-#include "dict0boot.h"
-#include "btr0btr.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "trx0undo.h"
-#include "trx0purge.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "row0row.h"
-#include "row0upd.h"
-#include "rem0cmp.h"
-#include "read0read.h"
-#include "lock0lock.h"
-
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
-index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
-UNIV_INLINE
-trx_id_t
-row_vers_impl_x_locked_low(
-/*=======================*/
- const rec_t* clust_rec, /*!< in: clustered index record */
- dict_index_t* clust_index, /*!< in: the clustered index */
- const rec_t* rec, /*!< in: secondary index record */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- trx_id_t trx_id;
- ibool corrupt;
- ulint comp;
- ulint rec_del;
- const rec_t* version;
- rec_t* prev_version = NULL;
- ulint* clust_offsets;
- mem_heap_t* heap;
-
- DBUG_ENTER("row_vers_impl_x_locked_low");
-
- ut_ad(rec_offs_validate(rec, index, offsets));
-
- heap = mem_heap_create(1024);
-
- clust_offsets = rec_get_offsets(
- clust_rec, clust_index, NULL, ULINT_UNDEFINED, &heap);
-
- trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets);
- corrupt = FALSE;
-
- if (!trx_rw_is_active(trx_id, &corrupt)) {
- /* The transaction that modified or inserted clust_rec is no
- longer active, or it is corrupt: no implicit lock on rec */
- if (corrupt) {
- lock_report_trx_id_insanity(
- trx_id, clust_rec, clust_index, clust_offsets,
- trx_sys_get_max_trx_id());
- }
- mem_heap_free(heap);
- DBUG_RETURN(0);
- }
-
- comp = page_rec_is_comp(rec);
- ut_ad(index->table == clust_index->table);
- ut_ad(!!comp == dict_table_is_comp(index->table));
- ut_ad(!comp == !page_rec_is_comp(clust_rec));
-
- rec_del = rec_get_deleted_flag(rec, comp);
-
- /* We look up if some earlier version, which was modified by
- the trx_id transaction, of the clustered index record would
- require rec to be in a different state (delete marked or
- unmarked, or have different field values, or not existing). If
- there is such a version, then rec was modified by the trx_id
- transaction, and it has an implicit x-lock on rec. Note that
- if clust_rec itself would require rec to be in a different
- state, then the trx_id transaction has not yet had time to
- modify rec, and does not necessarily have an implicit x-lock
- on rec. */
-
- for (version = clust_rec;; version = prev_version) {
- row_ext_t* ext;
- const dtuple_t* row;
- dtuple_t* entry;
- ulint vers_del;
- trx_id_t prev_trx_id;
- mem_heap_t* old_heap = heap;
-
- /* We keep the semaphore in mtr on the clust_rec page, so
- that no other transaction can update it and get an
- implicit x-lock on rec until mtr_commit(mtr). */
-
- heap = mem_heap_create(1024);
-
- trx_undo_prev_version_build(
- clust_rec, mtr, version, clust_index, clust_offsets,
- heap, &prev_version);
-
- /* The oldest visible clustered index version must not be
- delete-marked, because we never start a transaction by
- inserting a delete-marked record. */
- ut_ad(prev_version
- || !rec_get_deleted_flag(version, comp)
- || !trx_rw_is_active(trx_id, NULL));
-
- /* Free version and clust_offsets. */
- mem_heap_free(old_heap);
-
- if (prev_version == NULL) {
-
- /* We reached the oldest visible version without
- finding an older version of clust_rec that would
- match the secondary index record. If the secondary
- index record is not delete marked, then clust_rec
- is considered the correct match of the secondary
- index record and hence holds the implicit lock. */
-
- if (rec_del) {
- /* The secondary index record is del marked.
- So, the implicit lock holder of clust_rec
- did not modify the secondary index record yet,
- and is not holding an implicit lock on it.
-
- This assumes that whenever a row is inserted
- or updated, the leaf page record always is
- created with a clear delete-mark flag.
- (We never insert a delete-marked record.) */
- trx_id = 0;
- }
-
- break;
- }
-
- clust_offsets = rec_get_offsets(
- prev_version, clust_index, NULL, ULINT_UNDEFINED,
- &heap);
-
- vers_del = rec_get_deleted_flag(prev_version, comp);
-
- prev_trx_id = row_get_rec_trx_id(prev_version, clust_index,
- clust_offsets);
-
- /* The stack of versions is locked by mtr. Thus, it
- is safe to fetch the prefixes for externally stored
- columns. */
-
- row = row_build(ROW_COPY_POINTERS, clust_index, prev_version,
- clust_offsets,
- NULL, NULL, NULL, &ext, heap);
-
- entry = row_build_index_entry(row, ext, index, heap);
-
- /* entry may be NULL if a record was inserted in place
- of a deleted record, and the BLOB pointers of the new
- record were not initialized yet. But in that case,
- prev_version should be NULL. */
-
- ut_a(entry != NULL);
-
- /* If we get here, we know that the trx_id transaction
- modified prev_version. Let us check if prev_version
- would require rec to be in a different state. */
-
- /* The previous version of clust_rec must be
- accessible, because clust_rec was not a fresh insert.
- There is no guarantee that the transaction is still
- active. */
-
- /* We check if entry and rec are identified in the alphabetical
- ordering */
-
- if (!trx_rw_is_active(trx_id, &corrupt)) {
- /* Transaction no longer active: no implicit
- x-lock. This situation should only be possible
- because we are not holding lock_sys->mutex. */
- ut_ad(!lock_mutex_own());
- if (corrupt) {
- lock_report_trx_id_insanity(
- trx_id,
- prev_version, clust_index,
- clust_offsets,
- trx_sys_get_max_trx_id());
- }
- trx_id = 0;
- break;
- } else if (0 == cmp_dtuple_rec(entry, rec, offsets)) {
- /* The delete marks of rec and prev_version should be
- equal for rec to be in the state required by
- prev_version */
-
- if (rec_del != vers_del) {
-
- break;
- }
-
- /* It is possible that the row was updated so that the
- secondary index record remained the same in
- alphabetical ordering, but the field values changed
- still. For example, 'abc' -> 'ABC'. Check also that. */
-
- dtuple_set_types_binary(
- entry, dtuple_get_n_fields(entry));
-
- if (0 != cmp_dtuple_rec(entry, rec, offsets)) {
-
- break;
- }
-
- } else if (!rec_del) {
- /* The delete mark should be set in rec for it to be
- in the state required by prev_version */
-
- break;
- }
-
- if (trx_id != prev_trx_id) {
- /* prev_version was the first version modified by
- the trx_id transaction: no implicit x-lock */
-
- trx_id = 0;
- break;
- }
- }
-
- DBUG_PRINT("info", ("Implicit lock is held by trx:%lu",
- static_cast<unsigned long>(trx_id)));
-
- mem_heap_free(heap);
- DBUG_RETURN(trx_id);
-}
-
-/*****************************************************************//**
-Finds out if an active transaction has inserted or modified a secondary
-index record.
-@return 0 if committed, else the active transaction id;
-NOTE that this function can return false positives but never false
-negatives. The caller must confirm all positive results by calling
-trx_is_active() while holding lock_sys->mutex. */
-UNIV_INTERN
-trx_id_t
-row_vers_impl_x_locked(
-/*===================*/
- const rec_t* rec, /*!< in: record in a secondary index */
- dict_index_t* index, /*!< in: the secondary index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
-{
- dict_index_t* clust_index;
- const rec_t* clust_rec;
- trx_id_t trx_id;
- mtr_t mtr;
-
- ut_ad(!lock_mutex_own());
- ut_ad(!mutex_own(&trx_sys->mutex));
-
- mtr_start(&mtr);
-
- /* Search for the clustered index record. The latch on the
- page of clust_rec locks the top of the stack of versions. The
- bottom of the version stack is not locked; oldest versions may
- disappear by the fact that transactions may be committed and
- collected by the purge. This is not a problem, because we are
- only interested in active transactions. */
-
- clust_rec = row_get_clust_rec(
- BTR_SEARCH_LEAF, rec, index, &clust_index, &mtr);
-
- if (UNIV_UNLIKELY(!clust_rec)) {
- /* In a rare case it is possible that no clust rec is found
- for a secondary index record: if in row0umod.cc
- row_undo_mod_remove_clust_low() we have already removed the
- clust rec, while purge is still cleaning and removing
- secondary index records associated with earlier versions of
- the clustered index record. In that case there cannot be
- any implicit lock on the secondary index record, because
- an active transaction which has modified the secondary index
- record has also modified the clustered index record. And in
- a rollback we always undo the modifications to secondary index
- records before the clustered index record. */
-
- trx_id = 0;
- } else {
- trx_id = row_vers_impl_x_locked_low(
- clust_rec, clust_index, rec, index, offsets, &mtr);
- }
-
- mtr_commit(&mtr);
-
- return(trx_id);
-}
-
-/*****************************************************************//**
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view.
-@return TRUE if earlier version should be preserved */
-UNIV_INTERN
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
- trx_id_t trx_id, /*!< in: transaction id in the version */
- mtr_t* mtr) /*!< in: mtr holding the latch on the
- clustered index record; it will also
- hold the latch on purge_view */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- mtr_s_lock(&(purge_sys->latch), mtr);
-
- return(!read_view_sees_trx_id(purge_sys->view, trx_id));
-}
-
-/*****************************************************************//**
-Finds out if a version of the record, where the version >= the current
-purge view, should have ientry as its secondary index entry. We check
-if there is any not delete marked version of the record where the trx
-id >= purge view, and the secondary index entry and ientry are identified in
-the alphabetical ordering; exactly in this case we return TRUE.
-@return TRUE if earlier version should have */
-UNIV_INTERN
-ibool
-row_vers_old_has_index_entry(
-/*=========================*/
- ibool also_curr,/*!< in: TRUE if also rec is included in the
- versions to search; otherwise only versions
- prior to it are searched */
- const rec_t* rec, /*!< in: record in the clustered index; the
- caller must have a latch on the page */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec; it will
- also hold the latch on purge_view */
- dict_index_t* index, /*!< in: the secondary index */
- const dtuple_t* ientry) /*!< in: the secondary index entry */
-{
- const rec_t* version;
- rec_t* prev_version;
- dict_index_t* clust_index;
- ulint* clust_offsets;
- mem_heap_t* heap;
- mem_heap_t* heap2;
- const dtuple_t* row;
- const dtuple_t* entry;
- ulint comp;
-
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- clust_index = dict_table_get_first_index(index->table);
-
- comp = page_rec_is_comp(rec);
- ut_ad(!dict_table_is_comp(index->table) == !comp);
- heap = mem_heap_create(1024);
- clust_offsets = rec_get_offsets(rec, clust_index, NULL,
- ULINT_UNDEFINED, &heap);
-
- if (also_curr && !rec_get_deleted_flag(rec, comp)) {
- row_ext_t* ext;
-
- /* The top of the stack of versions is locked by the
- mtr holding a latch on the page containing the
- clustered index record. The bottom of the stack is
- locked by the fact that the purge_sys->view must
- 'overtake' any read view of an active transaction.
- Thus, it is safe to fetch the prefixes for
- externally stored columns. */
- row = row_build(ROW_COPY_POINTERS, clust_index,
- rec, clust_offsets,
- NULL, NULL, NULL, &ext, heap);
- entry = row_build_index_entry(row, ext, index, heap);
-
- /* If entry == NULL, the record contains unset BLOB
- pointers. This must be a freshly inserted record. If
- this is called from
- row_purge_remove_sec_if_poss_low(), the thread will
- hold latches on the clustered index and the secondary
- index. Because the insert works in three steps:
-
- (1) insert the record to clustered index
- (2) store the BLOBs and update BLOB pointers
- (3) insert records to secondary indexes
-
- the purge thread can safely ignore freshly inserted
- records and delete the secondary index record. The
- thread that inserted the new record will be inserting
- the secondary index records. */
-
- /* NOTE that we cannot do the comparison as binary
- fields because the row is maybe being modified so that
- the clustered index record has already been updated to
- a different binary value in a char field, but the
- collation identifies the old and new value anyway! */
- if (entry && !dtuple_coll_cmp(ientry, entry)) {
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
- }
-
- version = rec;
-
- for (;;) {
- heap2 = heap;
- heap = mem_heap_create(1024);
- trx_undo_prev_version_build(rec, mtr, version,
- clust_index, clust_offsets,
- heap, &prev_version);
- mem_heap_free(heap2); /* free version and clust_offsets */
-
- if (!prev_version) {
- /* Versions end here */
-
- mem_heap_free(heap);
-
- return(FALSE);
- }
-
- clust_offsets = rec_get_offsets(prev_version, clust_index,
- NULL, ULINT_UNDEFINED, &heap);
-
- if (!rec_get_deleted_flag(prev_version, comp)) {
- row_ext_t* ext;
-
- /* The stack of versions is locked by mtr.
- Thus, it is safe to fetch the prefixes for
- externally stored columns. */
- row = row_build(ROW_COPY_POINTERS, clust_index,
- prev_version, clust_offsets,
- NULL, NULL, NULL, &ext, heap);
- entry = row_build_index_entry(row, ext, index, heap);
-
- /* If entry == NULL, the record contains unset
- BLOB pointers. This must be a freshly
- inserted record that we can safely ignore.
- For the justification, see the comments after
- the previous row_build_index_entry() call. */
-
- /* NOTE that we cannot do the comparison as binary
- fields because maybe the secondary index record has
- already been updated to a different binary value in
- a char field, but the collation identifies the old
- and new value anyway! */
-
- if (entry && !dtuple_coll_cmp(ientry, entry)) {
-
- mem_heap_free(heap);
-
- return(TRUE);
- }
- }
-
- version = prev_version;
- }
-}
-
-/*****************************************************************//**
-Constructs the version of a clustered index record which a consistent
-read should see. We assume that the trx id stored in rec is such that
-the consistent read should not see rec in its present version.
-@return DB_SUCCESS or DB_MISSING_HISTORY */
-UNIV_INTERN
-dberr_t
-row_vers_build_for_consistent_read(
-/*===============================*/
- const rec_t* rec, /*!< in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec */
- dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
- rec_get_offsets(rec, index) */
- read_view_t* view, /*!< in: the consistent read view */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
- *old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- rec_t** old_vers)/*!< out, own: old version, or NULL
- if the history is missing or the record
- does not exist in the view, that is,
- it was freshly inserted afterwards */
-{
- const rec_t* version;
- rec_t* prev_version;
- trx_id_t trx_id;
- mem_heap_t* heap = NULL;
- byte* buf;
- dberr_t err;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(rec_offs_validate(rec, index, *offsets));
-
- trx_id = row_get_rec_trx_id(rec, index, *offsets);
-
- ut_ad(!read_view_sees_trx_id(view, trx_id));
-
- version = rec;
-
- for (;;) {
- mem_heap_t* heap2 = heap;
- trx_undo_rec_t* undo_rec;
- roll_ptr_t roll_ptr;
- undo_no_t undo_no;
- heap = mem_heap_create(1024);
-
- /* If we have high-granularity consistent read view and
- creating transaction of the view is the same as trx_id in
- the record we see this record only in the case when
- undo_no of the record is < undo_no in the view. */
-
- if (view->type == VIEW_HIGH_GRANULARITY
- && view->creator_trx_id == trx_id) {
-
- roll_ptr = row_get_rec_roll_ptr(version, index,
- *offsets);
- undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
- mem_heap_empty(heap);
-
- if (view->undo_no > undo_no) {
- /* The view already sees this version: we can
- copy it to in_heap and return */
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(
- version, *offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- buf = static_cast<byte*>(mem_heap_alloc(
- in_heap, rec_offs_size(*offsets)));
-
- *old_vers = rec_copy(buf, version, *offsets);
- rec_offs_make_valid(*old_vers, index,
- *offsets);
- err = DB_SUCCESS;
- break;
- }
- }
-
- err = trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version)
- ? DB_SUCCESS : DB_MISSING_HISTORY;
- if (heap2) {
- mem_heap_free(heap2); /* free version */
- }
-
- if (prev_version == NULL) {
- /* It was a freshly inserted version */
- *old_vers = NULL;
- break;
- }
-
- *offsets = rec_get_offsets(prev_version, index, *offsets,
- ULINT_UNDEFINED, offset_heap);
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
-
- if (read_view_sees_trx_id(view, trx_id)) {
-
- /* The view already sees this version: we can copy
- it to in_heap and return */
-
- buf = static_cast<byte*>(
- mem_heap_alloc(
- in_heap, rec_offs_size(*offsets)));
-
- *old_vers = rec_copy(buf, prev_version, *offsets);
- rec_offs_make_valid(*old_vers, index, *offsets);
- break;
- }
-
- version = prev_version;
- }/* for (;;) */
-
- mem_heap_free(heap);
-
- return(err);
-}
-
-/*****************************************************************//**
-Constructs the last committed version of a clustered index record,
-which should be seen by a semi-consistent read. */
-UNIV_INTERN
-void
-row_vers_build_for_semi_consistent_read(
-/*====================================*/
- const rec_t* rec, /*!< in: record in a clustered index; the
- caller must have a latch on the page; this
- latch locks the top of the stack of versions
- of this records */
- mtr_t* mtr, /*!< in: mtr holding the latch on rec */
- dict_index_t* index, /*!< in: the clustered index */
- ulint** offsets,/*!< in/out: offsets returned by
- rec_get_offsets(rec, index) */
- mem_heap_t** offset_heap,/*!< in/out: memory heap from which
- the offsets are allocated */
- mem_heap_t* in_heap,/*!< in: memory heap from which the memory for
- *old_vers is allocated; memory for possible
- intermediate versions is allocated and freed
- locally within the function */
- const rec_t** old_vers)/*!< out: rec, old version, or NULL if the
- record does not exist in the view, that is,
- it was freshly inserted afterwards */
-{
- const rec_t* version;
- mem_heap_t* heap = NULL;
- byte* buf;
- trx_id_t rec_trx_id = 0;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
- || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_ad(rec_offs_validate(rec, index, *offsets));
-
- version = rec;
-
- for (;;) {
- trx_id_t* version_trx_descr;
- mem_heap_t* heap2;
- rec_t* prev_version;
- trx_id_t version_trx_id;
-
- version_trx_id = row_get_rec_trx_id(version, index, *offsets);
- if (rec == version) {
- rec_trx_id = version_trx_id;
- }
-
- mutex_enter(&trx_sys->mutex);
- version_trx_descr = trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- version_trx_id);
- /* Because version_trx is a read-write transaction,
- its state cannot change from or to NOT_STARTED while
- we are holding the trx_sys->mutex. It may change from
- ACTIVE to PREPARED or COMMITTED. */
- mutex_exit(&trx_sys->mutex);
-
- if (!version_trx_descr) {
-committed_version_trx:
- /* We found a version that belongs to a
- committed transaction: return it. */
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(version, *offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- if (rec == version) {
- *old_vers = rec;
- break;
- }
-
- /* We assume that a rolled-back transaction stays in
- TRX_STATE_ACTIVE state until all the changes have been
- rolled back and the transaction is removed from
- the global list of transactions. */
-
- if (rec_trx_id == version_trx_id) {
- /* The transaction was committed while
- we searched for earlier versions.
- Return the current version as a
- semi-consistent read. */
-
- version = rec;
- *offsets = rec_get_offsets(version,
- index, *offsets,
- ULINT_UNDEFINED,
- offset_heap);
- }
-
- buf = static_cast<byte*>(
- mem_heap_alloc(
- in_heap, rec_offs_size(*offsets)));
-
- *old_vers = rec_copy(buf, version, *offsets);
- rec_offs_make_valid(*old_vers, index, *offsets);
- break;
- }
-
- DEBUG_SYNC_C("after_row_vers_check_trx_active");
-
- heap2 = heap;
- heap = mem_heap_create(1024);
-
- if (!trx_undo_prev_version_build(rec, mtr, version, index,
- *offsets, heap,
- &prev_version)) {
- mem_heap_free(heap);
- heap = heap2;
- heap2 = NULL;
- goto committed_version_trx;
- }
-
- if (heap2) {
- mem_heap_free(heap2); /* free version */
- }
-
- if (prev_version == NULL) {
- /* It was a freshly inserted version */
- *old_vers = NULL;
- break;
- }
-
- version = prev_version;
- *offsets = rec_get_offsets(version, index, *offsets,
- ULINT_UNDEFINED, offset_heap);
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(version, *offsets));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
- }/* for (;;) */
-
- if (heap) {
- mem_heap_free(heap);
- }
-}
diff --git a/storage/xtradb/srv/srv0conc.cc b/storage/xtradb/srv/srv0conc.cc
deleted file mode 100644
index e90f744cfa4..00000000000
--- a/storage/xtradb/srv/srv0conc.cc
+++ /dev/null
@@ -1,713 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file srv/srv0conc.cc
-
-InnoDB concurrency manager
-
-Created 2011/04/18 Sunny Bains
-*******************************************************/
-
-#include "srv0srv.h"
-#include "sync0sync.h"
-#include "btr0types.h"
-#include "trx0trx.h"
-
-#include <mysql/plugin.h>
-#include <mysql/service_wsrep.h>
-
-/** Number of times a thread is allowed to enter InnoDB within the same
-SQL query after it has once got the ticket. */
-UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
-
-#ifdef HAVE_ATOMIC_BUILTINS
-/** Maximum sleep delay (in micro-seconds), value of 0 disables it. */
-UNIV_INTERN ulong srv_adaptive_max_sleep_delay = 150000;
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
-
-
-/** We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
-UNIV_INTERN ulint srv_max_n_threads = 0;
-
-/** The following controls how many threads we let inside InnoDB concurrently:
-threads waiting for locks are not counted into the number because otherwise
-we could get a deadlock. Value of 0 will disable the concurrency check. */
-
-UNIV_INTERN ulong srv_thread_concurrency = 0;
-
-#ifndef HAVE_ATOMIC_BUILTINS
-
-/** This mutex protects srv_conc data structures */
-static os_fast_mutex_t srv_conc_mutex;
-
-/** Concurrency list node */
-typedef UT_LIST_NODE_T(struct srv_conc_slot_t) srv_conc_node_t;
-
-/** Slot for a thread waiting in the concurrency control queue. */
-struct srv_conc_slot_t{
- os_event_t event; /*!< event to wait for;
- os_event_set() and os_event_reset()
- are protected by srv_conc_mutex */
- ibool reserved; /*!< TRUE if slot
- reserved */
- ibool wait_ended; /*!< TRUE when another thread has
- already set the event and the thread
- in this slot is free to proceed; but
- reserved may still be TRUE at that
- point */
- srv_conc_node_t srv_conc_queue; /*!< queue node */
-#ifdef WITH_WSREP
- void *thd; /*!< to see priority */
-#endif
-};
-
-/** Queue of threads waiting to get in */
-typedef UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue_t;
-
-static srv_conc_queue_t srv_conc_queue;
-
-/** Array of wait slots */
-static srv_conc_slot_t* srv_conc_slots;
-
-#if defined(UNIV_PFS_MUTEX)
-/* Key to register srv_conc_mutex_key with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_conc_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
-/** Variables tracking the active and waiting threads. */
-struct srv_conc_t {
- char pad[CACHE_LINE_SIZE - (sizeof(ulint) + sizeof(lint))];
-
- /** Number of transactions that have declared_to_be_inside_innodb set.
- It used to be a non-error for this value to drop below zero temporarily.
- This is no longer true. We'll, however, keep the lint datatype to add
- assertions to catch any corner cases that we may have missed. */
-
- volatile lint n_active;
-
- /** Number of OS threads waiting in the FIFO for permission to
- enter InnoDB */
- volatile lint n_waiting;
-};
-
-/* Control variables for tracking concurrency. */
-static srv_conc_t srv_conc;
-
-/*********************************************************************//**
-Initialise the concurrency management data structures */
-void
-srv_conc_init(void)
-/*===============*/
-{
-#ifndef HAVE_ATOMIC_BUILTINS
- ulint i;
-
- /* Init the server concurrency restriction data structures */
-
- os_fast_mutex_init(srv_conc_mutex_key, &srv_conc_mutex);
-
- UT_LIST_INIT(srv_conc_queue);
-
- srv_conc_slots = static_cast<srv_conc_slot_t*>(
- mem_zalloc(OS_THREAD_MAX_N * sizeof(*srv_conc_slots)));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- srv_conc_slot_t* conc_slot = &srv_conc_slots[i];
-
- conc_slot->event = os_event_create();
- ut_a(conc_slot->event);
-#ifdef WITH_WSREP
- conc_slot->thd = NULL;
-#endif /* WITH_WSREP */
- }
-#endif /* !HAVE_ATOMIC_BUILTINS */
-}
-
-/*********************************************************************//**
-Free the concurrency management data structures */
-void
-srv_conc_free(void)
-/*===============*/
-{
-#ifndef HAVE_ATOMIC_BUILTINS
- os_fast_mutex_free(&srv_conc_mutex);
-
- for (ulint i = 0; i < OS_THREAD_MAX_N; i++)
- os_event_free(srv_conc_slots[i].event);
-
- mem_free(srv_conc_slots);
- srv_conc_slots = NULL;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-}
-
-#ifdef HAVE_ATOMIC_BUILTINS
-/*********************************************************************//**
-Note that a user thread is entering InnoDB. */
-static
-void
-srv_enter_innodb_with_tickets(
-/*==========================*/
- trx_t* trx) /*!< in/out: transaction that wants
- to enter InnoDB */
-{
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
-}
-
-/*********************************************************************//**
-Handle the scheduling of a user thread that wants to enter InnoDB. Setting
-srv_adaptive_max_sleep_delay > 0 switches the adaptive sleep calibration to
-ON. When set, we want to wait in the queue for as little time as possible.
-However, very short waits will result in a lot of context switches and that
-is also not desirable. When threads need to sleep multiple times we increment
-os_thread_sleep_delay by one. When we see threads getting a slot without
-waiting and there are no other threads waiting in the queue, we try and reduce
-the wait as much as we can. Currently we reduce it by half each time. If the
-thread only had to wait for one turn before it was able to enter InnoDB we
-decrement it by one. This is to try and keep the sleep time stable around the
-"optimum" sleep time. */
-static
-void
-srv_conc_enter_innodb_with_atomics(
-/*===============================*/
- trx_t* trx) /*!< in/out: transaction that wants
- to enter InnoDB */
-{
- ulint n_sleeps = 0;
- ibool notified_mysql = FALSE;
-
- ut_a(!trx->declared_to_be_inside_innodb);
-
- for (;;) {
- ulint sleep_in_us;
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- wsrep_trx_is_aborting(trx->mysql_thd)) {
- if (wsrep_debug)
- fprintf(stderr,
- "srv_conc_enter due to MUST_ABORT");
- srv_conc_force_enter_innodb(trx);
- return;
- }
-#endif /* WITH_WSREP */
-
- if (srv_conc.n_active < (lint) srv_thread_concurrency) {
- ulint n_active;
-
- /* Check if there are any free tickets. */
- n_active = os_atomic_increment_lint(
- &srv_conc.n_active, 1);
-
- if (n_active <= srv_thread_concurrency) {
-
- srv_enter_innodb_with_tickets(trx);
-
- if (notified_mysql) {
-
- (void) os_atomic_decrement_lint(
- &srv_conc.n_waiting, 1);
-
- thd_wait_end(trx->mysql_thd);
- }
-
- if (srv_adaptive_max_sleep_delay > 0) {
- if (srv_thread_sleep_delay > 20
- && n_sleeps == 1) {
-
- --srv_thread_sleep_delay;
- }
-
- if (srv_conc.n_waiting == 0) {
- srv_thread_sleep_delay >>= 1;
- }
- }
-
- return;
- }
-
- /* Since there were no free seats, we relinquish
- the overbooked ticket. */
-
- (void) os_atomic_decrement_lint(
- &srv_conc.n_active, 1);
- }
-
- if (!notified_mysql) {
- (void) os_atomic_increment_lint(
- &srv_conc.n_waiting, 1);
-
- /* Release possible search system latch this
- thread has */
-
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
-
- notified_mysql = TRUE;
- }
-
- trx->op_info = "sleeping before entering InnoDB";
-
- sleep_in_us = srv_thread_sleep_delay;
-
- /* Guard against overflow when adaptive sleep delay is on. */
-
- if (srv_adaptive_max_sleep_delay > 0
- && sleep_in_us > srv_adaptive_max_sleep_delay) {
-
- sleep_in_us = srv_adaptive_max_sleep_delay;
- srv_thread_sleep_delay = static_cast<ulong>(sleep_in_us);
- }
-
- os_thread_sleep(sleep_in_us);
- trx->innodb_que_wait_timer += sleep_in_us;
-
- trx->op_info = "";
-
- ++n_sleeps;
-
- if (srv_adaptive_max_sleep_delay > 0 && n_sleeps > 1) {
- ++srv_thread_sleep_delay;
- }
- }
-}
-
-/*********************************************************************//**
-Note that a user thread is leaving InnoDB code. */
-static
-void
-srv_conc_exit_innodb_with_atomics(
-/*==============================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- trx->n_tickets_to_enter_innodb = 0;
- trx->declared_to_be_inside_innodb = FALSE;
-
- (void) os_atomic_decrement_lint(&srv_conc.n_active, 1);
-}
-#else
-/*********************************************************************//**
-Note that a user thread is leaving InnoDB code. */
-static
-void
-srv_conc_exit_innodb_without_atomics(
-/*=================================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- srv_conc_slot_t* slot;
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- ut_ad(srv_conc.n_active > 0);
- srv_conc.n_active--;
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- slot = NULL;
-
- if (srv_conc.n_active < (lint) srv_thread_concurrency) {
-#ifdef WITH_WSREP
- srv_conc_slot_t* wsrep_slot;
-#endif
- /* Look for a slot where a thread is waiting and no other
- thread has yet released the thread */
-
- for (slot = UT_LIST_GET_FIRST(srv_conc_queue);
- slot != NULL && slot->wait_ended == TRUE;
- slot = UT_LIST_GET_NEXT(srv_conc_queue, slot)) {
-
- /* No op */
- }
-
-#ifdef WITH_WSREP
- /* look for aborting trx, they must be released asap */
- wsrep_slot= slot;
- while (wsrep_slot && (wsrep_slot->wait_ended == TRUE ||
- !wsrep_trx_is_aborting(wsrep_slot->thd))) {
- wsrep_slot = UT_LIST_GET_NEXT(srv_conc_queue, wsrep_slot);
- }
- if (wsrep_slot) {
- slot = wsrep_slot;
- if (wsrep_debug)
- fprintf(stderr, "WSREP: releasing aborting thd\n");
- }
-#endif
- if (slot != NULL) {
- slot->wait_ended = TRUE;
-
- /* We increment the count on behalf of the released
- thread */
-
- srv_conc.n_active++;
- }
- }
-
- if (slot != NULL) {
- os_event_set(slot->event);
- }
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*********************************************************************//**
-Handle the scheduling of a user thread that wants to enter InnoDB. */
-static
-void
-srv_conc_enter_innodb_without_atomics(
-/*==================================*/
- trx_t* trx) /*!< in/out: transaction that wants
- to enter InnoDB */
-{
- ulint i;
- srv_conc_slot_t* slot = NULL;
- ibool has_slept = FALSE;
- ib_uint64_t start_time = 0L;
- ib_uint64_t finish_time = 0L;
- ulint sec;
- ulint ms;
-
- os_fast_mutex_lock(&srv_conc_mutex);
-retry:
- if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) {
- os_fast_mutex_unlock(&srv_conc_mutex);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to declare trx"
- " to enter InnoDB, but\n"
- "InnoDB: it already is declared.\n", stderr);
- trx_print(stderr, trx, 0);
- putc('\n', stderr);
- return;
- }
-
- ut_ad(srv_conc.n_active >= 0);
-
- if (srv_conc.n_active < (lint) srv_thread_concurrency) {
-
- srv_conc.n_active++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- wsrep_thd_is_brute_force(trx->mysql_thd)) {
- srv_conc_force_enter_innodb(trx);
- return;
- }
-#endif
-
- /* If the transaction is not holding resources, let it sleep
- for srv_thread_sleep_delay microseconds, and try again then */
-
- if (!has_slept && !trx->has_search_latch
- && NULL == UT_LIST_GET_FIRST(trx->lock.trx_locks)) {
-
- has_slept = TRUE; /* We let it sleep only once to avoid
- starvation */
-
- srv_conc.n_waiting++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- trx->op_info = "sleeping before joining InnoDB queue";
-
- /* Peter Zaitsev suggested that we take the sleep away
- altogether. But the sleep may be good in pathological
- situations of lots of thread switches. Simply put some
- threads aside for a while to reduce the number of thread
- switches. */
- if (srv_thread_sleep_delay > 0) {
- os_thread_sleep(srv_thread_sleep_delay);
- trx->innodb_que_wait_timer += sleep_in_us;
- }
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc.n_waiting--;
-
- goto retry;
- }
-
- /* Too many threads inside: put the current thread to a queue */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_conc_slots + i;
-
- if (!slot->reserved) {
-
- break;
- }
- }
-
- if (i == OS_THREAD_MAX_N) {
- /* Could not find a free wait slot, we must let the
- thread enter */
-
- srv_conc.n_active++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 0;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* Release possible search system latch this thread has */
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- /* Add to the queue */
- slot->reserved = TRUE;
- slot->wait_ended = FALSE;
-#ifdef WITH_WSREP
- slot->thd = trx->mysql_thd;
-#endif
-
- UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
-
- os_event_reset(slot->event);
-
- srv_conc.n_waiting++;
-
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- wsrep_trx_is_aborting(trx->mysql_thd)) {
- os_fast_mutex_unlock(&srv_conc_mutex);
- if (wsrep_debug)
- fprintf(stderr, "srv_conc_enter due to MUST_ABORT");
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
- return;
- }
- trx->wsrep_event = slot->event;
-#endif /* WITH_WSREP */
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- /* Go to wait for the event; when a thread leaves InnoDB it will
- release this thread */
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- ut_usectime(&sec, &ms);
- start_time = (ib_uint64_t)sec * 1000000 + ms;
- } else {
- start_time = 0;
- }
-
- trx->op_info = "waiting in InnoDB queue";
-
- thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
-
- os_event_wait(slot->event);
- thd_wait_end(trx->mysql_thd);
-#ifdef WITH_WSREP
- trx->wsrep_event = NULL;
-#endif /* WITH_WSREP */
-
- trx->op_info = "";
-
- if (UNIV_UNLIKELY(start_time != 0)) {
- ut_usectime(&sec, &ms);
- finish_time = (ib_uint64_t)sec * 1000000 + ms;
- trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
- }
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc.n_waiting--;
-
- /* NOTE that the thread which released this thread already
- incremented the thread counter on behalf of this thread */
-
- slot->reserved = FALSE;
-#ifdef WITH_WSREP
- slot->thd = NULL;
-#endif
-
- UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
-
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-/*********************************************************************//**
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-UNIV_INTERN
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifdef HAVE_ATOMIC_BUILTINS
- srv_conc_enter_innodb_with_atomics(trx);
-#else
- srv_conc_enter_innodb_without_atomics(trx);
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/*********************************************************************//**
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
-void
-srv_conc_force_enter_innodb(
-/*========================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (!srv_thread_concurrency) {
-
- return;
- }
-
- ut_ad(srv_conc.n_active >= 0);
-
-#ifdef HAVE_ATOMIC_BUILTINS
- (void) os_atomic_increment_lint(&srv_conc.n_active, 1);
-#else
- os_fast_mutex_lock(&srv_conc_mutex);
- ++srv_conc.n_active;
- os_fast_mutex_unlock(&srv_conc_mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- trx->n_tickets_to_enter_innodb = 1;
- trx->declared_to_be_inside_innodb = TRUE;
-}
-
-/*********************************************************************//**
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-UNIV_INTERN
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- if ((trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd))
- || trx->declared_to_be_inside_innodb == FALSE) {
-
- return;
- }
-
-#ifdef HAVE_ATOMIC_BUILTINS
- srv_conc_exit_innodb_with_atomics(trx);
-#else
- srv_conc_exit_innodb_without_atomics(trx);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-/*********************************************************************//**
-Get the count of threads waiting inside InnoDB. */
-UNIV_INTERN
-ulint
-srv_conc_get_waiting_threads(void)
-/*==============================*/
-{
- return(srv_conc.n_waiting);
-}
-
-/*********************************************************************//**
-Get the count of threads active inside InnoDB. */
-UNIV_INTERN
-ulint
-srv_conc_get_active_threads(void)
-/*==============================*/
-{
- return(srv_conc.n_active);
-}
-
-#ifdef WITH_WSREP
-UNIV_INTERN
-void
-wsrep_srv_conc_cancel_wait(
-/*==================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
-#ifdef HAVE_ATOMIC_BUILTINS
- /* aborting transactions will enter innodb by force in
- srv_conc_enter_innodb_with_atomics(). No need to cancel here,
- thr will wake up after os_sleep and let to enter innodb
- */
- if (wsrep_debug)
- fprintf(stderr, "WSREP: conc slot cancel, no atomics\n");
-#else
- os_fast_mutex_lock(&srv_conc_mutex);
- if (trx->wsrep_event) {
- if (wsrep_debug)
- fprintf(stderr, "WSREP: conc slot cancel\n");
- os_event_set(trx->wsrep_event);
- }
- os_fast_mutex_unlock(&srv_conc_mutex);
-#endif
-}
-#endif /* WITH_WSREP */
-
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
deleted file mode 100644
index 47abae66192..00000000000
--- a/storage/xtradb/srv/srv0mon.cc
+++ /dev/null
@@ -1,2177 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file srv/srv0mon.cc
-Database monitor counter interfaces
-
-Created 12/9/2009 Jimmy Yang
-*******************************************************/
-
-#ifndef UNIV_HOTBACKUP
-#include "os0file.h"
-#include "mach0data.h"
-#include "srv0mon.h"
-#include "srv0srv.h"
-#include "buf0buf.h"
-#include "trx0sys.h"
-#include "trx0rseg.h"
-#include "lock0lock.h"
-#include "ibuf0ibuf.h"
-#ifdef UNIV_NONINL
-#include "srv0mon.ic"
-#endif
-
-/* Macro to standardize the counter names for counters in the
-"monitor_buf_page" module as they have very structured defines */
-#define MONITOR_BUF_PAGE(name, description, code, op, op_code) \
- {"buffer_page_" op "_" name, "buffer_page_io", \
- "Number of " description " Pages " op, \
- MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START, \
- MONITOR_##code##_##op_code}
-
-#define MONITOR_BUF_PAGE_READ(name, description, code) \
- MONITOR_BUF_PAGE(name, description, code, "read", PAGE_READ)
-
-#define MONITOR_BUF_PAGE_WRITTEN(name, description, code) \
- MONITOR_BUF_PAGE(name, description, code, "written", PAGE_WRITTEN)
-
-
-/** This array defines basic static information of monitor counters,
-including each monitor's name, module it belongs to, a short
-description and its property/type and corresponding monitor_id.
-Please note: If you add a monitor here, please add its corresponding
-monitor_id to "enum monitor_id_value" structure in srv0mon.h file. */
-
-static monitor_info_t innodb_counter_info[] =
-{
- /* A dummy item to mark the module start, this is
- to accomodate the default value (0) set for the
- global variables with the control system. */
- {"module_start", "module_start", "module_start",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_DEFAULT_START},
-
- /* ========== Counters for Server Metadata ========== */
- {"module_metadata", "metadata", "Server Metadata",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_METADATA},
-
- {"metadata_table_handles_opened", "metadata",
- "Number of table handles opened",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TABLE_OPEN},
-
- {"metadata_table_handles_closed", "metadata",
- "Number of table handles closed",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TABLE_CLOSE},
-
- {"metadata_table_reference_count", "metadata",
- "Table reference counter",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TABLE_REFERENCE},
-
- {"metadata_mem_pool_size", "metadata",
- "Size of a memory pool InnoDB uses to store data dictionary"
- " and internal data structures in bytes",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_META_MEM_POOL},
-
- /* ========== Counters for Lock Module ========== */
- {"module_lock", "lock", "Lock Module",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_LOCK},
-
- {"lock_deadlocks", "lock", "Number of deadlocks",
- MONITOR_DEFAULT_ON,
- MONITOR_DEFAULT_START, MONITOR_DEADLOCK},
-
- {"lock_timeouts", "lock", "Number of lock timeouts",
- MONITOR_DEFAULT_ON,
- MONITOR_DEFAULT_START, MONITOR_TIMEOUT},
-
- {"lock_rec_lock_waits", "lock",
- "Number of times enqueued into record lock wait queue",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_LOCKREC_WAIT},
-
- {"lock_table_lock_waits", "lock",
- "Number of times enqueued into table lock wait queue",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TABLELOCK_WAIT},
-
- {"lock_rec_lock_requests", "lock",
- "Number of record locks requested",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK_REQ},
-
- {"lock_rec_lock_created", "lock", "Number of record locks created",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_RECLOCK_CREATED},
-
- {"lock_rec_lock_removed", "lock",
- "Number of record locks removed from the lock queue",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_RECLOCK_REMOVED},
-
- {"lock_rec_locks", "lock",
- "Current number of record locks on tables",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK},
-
- {"lock_table_lock_created", "lock", "Number of table locks created",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TABLELOCK_CREATED},
-
- {"lock_table_lock_removed", "lock",
- "Number of table locks removed from the lock queue",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TABLELOCK_REMOVED},
-
- {"lock_table_locks", "lock",
- "Current number of table locks on tables",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_NUM_TABLELOCK},
-
- {"lock_row_lock_current_waits", "lock",
- "Number of row locks currently being waited for"
- " (innodb_row_lock_current_waits)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT},
-
- {"lock_row_lock_time", "lock",
- "Time spent in acquiring row locks, in milliseconds"
- " (innodb_row_lock_time)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_WAIT_TIME},
-
- {"lock_row_lock_time_max", "lock",
- "The maximum time to acquire a row lock, in milliseconds"
- " (innodb_row_lock_time_max)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_MAX_WAIT_TIME},
-
- {"lock_row_lock_waits", "lock",
- "Number of times a row lock had to be waited for"
- " (innodb_row_lock_waits)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_WAIT},
-
- {"lock_row_lock_time_avg", "lock",
- "The average time to acquire a row lock, in milliseconds"
- " (innodb_row_lock_time_avg)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_AVG_WAIT_TIME},
-
- /* ========== Counters for Buffer Manager and I/O ========== */
- {"module_buffer", "buffer", "Buffer Manager Module",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_BUFFER},
-
- {"buffer_pool_size", "server",
- "Server buffer pool size (all buffer pools) in bytes",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUFFER_POOL_SIZE},
-
- {"buffer_pool_reads", "buffer",
- "Number of reads directly from disk (innodb_buffer_pool_reads)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READS},
-
- {"buffer_pool_read_requests", "buffer",
- "Number of logical read requests (innodb_buffer_pool_read_requests)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_REQUESTS},
-
- {"buffer_pool_write_requests", "buffer",
- "Number of write requests (innodb_buffer_pool_write_requests)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WRITE_REQUEST},
-
- {"buffer_pool_wait_free", "buffer",
- "Number of times waited for free buffer"
- " (innodb_buffer_pool_wait_free)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WAIT_FREE},
-
- {"buffer_pool_read_ahead", "buffer",
- "Number of pages read as read ahead (innodb_buffer_pool_read_ahead)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD},
-
- {"buffer_pool_read_ahead_evicted", "buffer",
- "Read-ahead pages evicted without being accessed"
- " (innodb_buffer_pool_read_ahead_evicted)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED},
-
- {"buffer_pool_pages_total", "buffer",
- "Total buffer pool size in pages (innodb_buffer_pool_pages_total)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_TOTAL},
-
- {"buffer_pool_pages_misc", "buffer",
- "Buffer pages for misc use such as row locks or the adaptive"
- " hash index (innodb_buffer_pool_pages_misc)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_MISC},
-
- {"buffer_pool_pages_data", "buffer",
- "Buffer pages containing data (innodb_buffer_pool_pages_data)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DATA},
-
- {"buffer_pool_bytes_data", "buffer",
- "Buffer bytes containing data (innodb_buffer_pool_bytes_data)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DATA},
-
- {"buffer_pool_pages_dirty", "buffer",
- "Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY},
-
- {"buffer_pool_bytes_dirty", "buffer",
- "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
-
- {"buffer_pool_pages_free", "buffer",
- "Buffer pages currently free (innodb_buffer_pool_pages_free)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_FREE},
-
- {"buffer_pages_created", "buffer",
- "Number of pages created (innodb_pages_created)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_CREATED},
-
- {"buffer_pages_written", "buffer",
- "Number of pages written (innodb_pages_written)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN},
-
- {"buffer_index_pages_written", "buffer",
- "Number of index pages written (innodb_index_pages_written)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN},
-
- {"buffer_non_index_pages_written", "buffer",
- "Number of non index pages written (innodb_non_index_pages_written)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN},
-
- {"buffer_pages_read", "buffer",
- "Number of pages read (innodb_pages_read)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_READ},
-
- {"buffer_pages0_read", "buffer",
- "Number of page 0 read (innodb_pages0_read)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES0_READ},
-
- {"buffer_index_sec_rec_cluster_reads", "buffer",
- "Number of secondary record reads triggered cluster read",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS},
-
- {"buffer_index_sec_rec_cluster_reads_avoided", "buffer",
- "Number of secondary record reads avoided triggering cluster read",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED},
-
- {"buffer_data_reads", "buffer",
- "Amount of data read in bytes (innodb_data_reads)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_READ},
-
- {"buffer_data_written", "buffer",
- "Amount of data written in bytes (innodb_data_written)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_WRITTEN},
-
- /* Cumulative counter for scanning in flush batches */
- {"buffer_flush_batch_scanned", "buffer",
- "Total pages scanned as part of flush batch",
- MONITOR_SET_OWNER,
- MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
- MONITOR_FLUSH_BATCH_SCANNED},
-
- {"buffer_flush_batch_num_scan", "buffer",
- "Number of times buffer flush list flush is called",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
- MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL},
-
- {"buffer_flush_batch_scanned_per_call", "buffer",
- "Pages scanned per flush batch scan",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
- MONITOR_FLUSH_BATCH_SCANNED_PER_CALL},
-
- {"buffer_flush_batch_rescan", "buffer",
- "Number of times rescan of flush list forced",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_HP_RESCAN},
-
- /* Cumulative counter for pages flushed in flush batches */
- {"buffer_flush_batch_total_pages", "buffer",
- "Total pages flushed as part of flush batch",
- MONITOR_SET_OWNER, MONITOR_FLUSH_BATCH_COUNT,
- MONITOR_FLUSH_BATCH_TOTAL_PAGE},
-
- {"buffer_flush_batches", "buffer",
- "Number of flush batches",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE,
- MONITOR_FLUSH_BATCH_COUNT},
-
- {"buffer_flush_batch_pages", "buffer",
- "Pages queued as a flush batch",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE,
- MONITOR_FLUSH_BATCH_PAGES},
-
- /* Cumulative counter for flush batches because of neighbor */
- {"buffer_flush_neighbor_total_pages", "buffer",
- "Total neighbors flushed as part of neighbor flush",
- MONITOR_SET_OWNER, MONITOR_FLUSH_NEIGHBOR_COUNT,
- MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE},
-
- {"buffer_flush_neighbor", "buffer",
- "Number of times neighbors flushing is invoked",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
- MONITOR_FLUSH_NEIGHBOR_COUNT},
-
- {"buffer_flush_neighbor_pages", "buffer",
- "Pages queued as a neighbor batch",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
- MONITOR_FLUSH_NEIGHBOR_PAGES},
-
- {"buffer_flush_n_to_flush_requested", "buffer",
- "Number of pages requested for flushing.",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED},
-
- {"buffer_flush_avg_page_rate", "buffer",
- "Average number of pages at which flushing is happening",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PAGE_RATE},
-
- {"buffer_flush_lsn_avg_rate", "buffer",
- "Average redo generation rate",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_LSN_AVG_RATE},
-
- {"buffer_flush_pct_for_dirty", "buffer",
- "Percent of IO capacity used to avoid max dirty page limit",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_DIRTY},
-
- {"buffer_flush_pct_for_lsn", "buffer",
- "Percent of IO capacity used to avoid reusable redo space limit",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_LSN},
-
- {"buffer_flush_sync_waits", "buffer",
- "Number of times a wait happens due to sync flushing",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS},
-
- /* Cumulative counter for flush batches for adaptive flushing */
- {"buffer_flush_adaptive_total_pages", "buffer",
- "Total pages flushed as part of adaptive flushing",
- MONITOR_SET_OWNER, MONITOR_FLUSH_ADAPTIVE_COUNT,
- MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE},
-
- {"buffer_flush_adaptive", "buffer",
- "Number of adaptive batches",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
- MONITOR_FLUSH_ADAPTIVE_COUNT},
-
- {"buffer_flush_adaptive_pages", "buffer",
- "Pages queued as an adaptive batch",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
- MONITOR_FLUSH_ADAPTIVE_PAGES},
-
- /* Cumulative counter for flush batches because of sync */
- {"buffer_flush_sync_total_pages", "buffer",
- "Total pages flushed as part of sync batches",
- MONITOR_SET_OWNER, MONITOR_FLUSH_SYNC_COUNT,
- MONITOR_FLUSH_SYNC_TOTAL_PAGE},
-
- {"buffer_flush_sync", "buffer",
- "Number of sync batches",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE,
- MONITOR_FLUSH_SYNC_COUNT},
-
- {"buffer_flush_sync_pages", "buffer",
- "Pages queued as a sync batch",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE,
- MONITOR_FLUSH_SYNC_PAGES},
-
- /* Cumulative counter for flush batches because of background */
- {"buffer_flush_background_total_pages", "buffer",
- "Total pages flushed as part of background batches",
- MONITOR_SET_OWNER, MONITOR_FLUSH_BACKGROUND_COUNT,
- MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE},
-
- {"buffer_flush_background", "buffer",
- "Number of background batches",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
- MONITOR_FLUSH_BACKGROUND_COUNT},
-
- {"buffer_flush_background_pages", "buffer",
- "Pages queued as a background batch",
- MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
- MONITOR_FLUSH_BACKGROUND_PAGES},
-
- /* Cumulative counter for LRU batch scan */
- {"buffer_LRU_batch_scanned", "buffer",
- "Total pages scanned as part of LRU batch",
- MONITOR_SET_OWNER, MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
- MONITOR_LRU_BATCH_SCANNED},
-
- {"buffer_LRU_batch_num_scan", "buffer",
- "Number of times LRU batch is called",
- MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED,
- MONITOR_LRU_BATCH_SCANNED_NUM_CALL},
-
- {"buffer_LRU_batch_scanned_per_call", "buffer",
- "Pages scanned per LRU batch call",
- MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED,
- MONITOR_LRU_BATCH_SCANNED_PER_CALL},
-
- /* Cumulative counter for LRU batch pages flushed */
- {"buffer_LRU_batch_flush_total_pages", "buffer",
- "Total pages flushed as part of LRU batches",
- MONITOR_SET_OWNER, MONITOR_LRU_BATCH_FLUSH_COUNT,
- MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE},
-
- {"buffer_LRU_batches_flush", "buffer",
- "Number of LRU batches",
- MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
- MONITOR_LRU_BATCH_FLUSH_COUNT},
-
- {"buffer_LRU_batch_flush_pages", "buffer",
- "Pages queued as an LRU batch",
- MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_FLUSH_TOTAL_PAGE,
- MONITOR_LRU_BATCH_FLUSH_PAGES},
-
- /* Cumulative counter for LRU batch pages flushed */
- {"buffer_LRU_batch_evict_total_pages", "buffer",
- "Total pages evicted as part of LRU batches",
- MONITOR_SET_OWNER, MONITOR_LRU_BATCH_EVICT_COUNT,
- MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE},
-
- {"buffer_LRU_batches_evict", "buffer",
- "Number of LRU batches",
- MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_BATCH_EVICT_COUNT},
-
- {"buffer_LRU_batch_evict_pages", "buffer",
- "Pages queued as an LRU batch",
- MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_EVICT_TOTAL_PAGE,
- MONITOR_LRU_BATCH_EVICT_PAGES},
-
- /* Cumulative counter for single page LRU scans */
- {"buffer_LRU_single_flush_scanned", "buffer",
- "Total pages scanned as part of single page LRU flush",
- MONITOR_SET_OWNER,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED},
-
- {"buffer_LRU_single_flush_num_scan", "buffer",
- "Number of times single page LRU flush is called",
- MONITOR_SET_MEMBER, MONITOR_LRU_SINGLE_FLUSH_SCANNED,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL},
-
- {"buffer_LRU_single_flush_scanned_per_call", "buffer",
- "Page scanned per single LRU flush",
- MONITOR_SET_MEMBER, MONITOR_LRU_SINGLE_FLUSH_SCANNED,
- MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL},
-
- {"buffer_LRU_single_flush_failure_count", "Buffer",
- "Number of times attempt to flush a single page from LRU failed",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT},
-
- {"buffer_LRU_get_free_search", "Buffer",
- "Number of searches performed for a clean page",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_SEARCH},
-
- /* Cumulative counter for LRU search scans */
- {"buffer_LRU_search_scanned", "buffer",
- "Total pages scanned as part of LRU search",
- MONITOR_SET_OWNER,
- MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
- MONITOR_LRU_SEARCH_SCANNED},
-
- {"buffer_LRU_search_num_scan", "buffer",
- "Number of times LRU search is performed",
- MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED,
- MONITOR_LRU_SEARCH_SCANNED_NUM_CALL},
-
- {"buffer_LRU_search_scanned_per_call", "buffer",
- "Page scanned per single LRU search",
- MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED,
- MONITOR_LRU_SEARCH_SCANNED_PER_CALL},
-
- /* Cumulative counter for LRU unzip search scans */
- {"buffer_LRU_unzip_search_scanned", "buffer",
- "Total pages scanned as part of LRU unzip search",
- MONITOR_SET_OWNER,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED},
-
- {"buffer_LRU_unzip_search_num_scan", "buffer",
- "Number of times LRU unzip search is performed",
- MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL},
-
- {"buffer_LRU_unzip_search_scanned_per_call", "buffer",
- "Page scanned per single LRU unzip search",
- MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED,
- MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL},
-
- /* ========== Counters for Buffer Page I/O ========== */
- {"module_buffer_page", "buffer_page_io", "Buffer Page I/O Module",
- static_cast<monitor_type_t>(
- MONITOR_MODULE | MONITOR_GROUP_MODULE),
- MONITOR_DEFAULT_START, MONITOR_MODULE_BUF_PAGE},
-
- MONITOR_BUF_PAGE_READ("index_leaf","Index Leaf", INDEX_LEAF),
-
- MONITOR_BUF_PAGE_READ("index_non_leaf","Index Non-leaf",
- INDEX_NON_LEAF),
-
- MONITOR_BUF_PAGE_READ("index_ibuf_leaf", "Insert Buffer Index Leaf",
- INDEX_IBUF_LEAF),
-
- MONITOR_BUF_PAGE_READ("index_ibuf_non_leaf",
- "Insert Buffer Index Non-Leaf",
- INDEX_IBUF_NON_LEAF),
-
- MONITOR_BUF_PAGE_READ("undo_log", "Undo Log", UNDO_LOG),
-
- MONITOR_BUF_PAGE_READ("index_inode", "Index Inode", INODE),
-
- MONITOR_BUF_PAGE_READ("ibuf_free_list", "Insert Buffer Free List",
- IBUF_FREELIST),
-
- MONITOR_BUF_PAGE_READ("ibuf_bitmap", "Insert Buffer Bitmap",
- IBUF_BITMAP),
-
- MONITOR_BUF_PAGE_READ("system_page", "System", SYSTEM),
-
- MONITOR_BUF_PAGE_READ("trx_system", "Transaction System", TRX_SYSTEM),
-
- MONITOR_BUF_PAGE_READ("fsp_hdr", "File Space Header", FSP_HDR),
-
- MONITOR_BUF_PAGE_READ("xdes", "Extent Descriptor", XDES),
-
- MONITOR_BUF_PAGE_READ("blob", "Uncompressed BLOB", BLOB),
-
- MONITOR_BUF_PAGE_READ("zblob", "First Compressed BLOB", ZBLOB),
-
- MONITOR_BUF_PAGE_READ("zblob2", "Subsequent Compressed BLOB", ZBLOB2),
-
- MONITOR_BUF_PAGE_READ("other", "other/unknown (old version of InnoDB)",
- OTHER),
-
- MONITOR_BUF_PAGE_WRITTEN("index_leaf","Index Leaf", INDEX_LEAF),
-
- MONITOR_BUF_PAGE_WRITTEN("index_non_leaf","Index Non-leaf",
- INDEX_NON_LEAF),
-
- MONITOR_BUF_PAGE_WRITTEN("index_ibuf_leaf", "Insert Buffer Index Leaf",
- INDEX_IBUF_LEAF),
-
- MONITOR_BUF_PAGE_WRITTEN("index_ibuf_non_leaf",
- "Insert Buffer Index Non-Leaf",
- INDEX_IBUF_NON_LEAF),
-
- MONITOR_BUF_PAGE_WRITTEN("undo_log", "Undo Log", UNDO_LOG),
-
- MONITOR_BUF_PAGE_WRITTEN("index_inode", "Index Inode", INODE),
-
- MONITOR_BUF_PAGE_WRITTEN("ibuf_free_list", "Insert Buffer Free List",
- IBUF_FREELIST),
-
- MONITOR_BUF_PAGE_WRITTEN("ibuf_bitmap", "Insert Buffer Bitmap",
- IBUF_BITMAP),
-
- MONITOR_BUF_PAGE_WRITTEN("system_page", "System", SYSTEM),
-
- MONITOR_BUF_PAGE_WRITTEN("trx_system", "Transaction System",
- TRX_SYSTEM),
-
- MONITOR_BUF_PAGE_WRITTEN("fsp_hdr", "File Space Header", FSP_HDR),
-
- MONITOR_BUF_PAGE_WRITTEN("xdes", "Extent Descriptor", XDES),
-
- MONITOR_BUF_PAGE_WRITTEN("blob", "Uncompressed BLOB", BLOB),
-
- MONITOR_BUF_PAGE_WRITTEN("zblob", "First Compressed BLOB", ZBLOB),
-
- MONITOR_BUF_PAGE_WRITTEN("zblob2", "Subsequent Compressed BLOB",
- ZBLOB2),
-
- MONITOR_BUF_PAGE_WRITTEN("other", "other/unknown (old version InnoDB)",
- OTHER),
-
- /* ========== Counters for OS level operations ========== */
- {"module_os", "os", "OS Level Operation",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_OS},
-
- {"os_data_reads", "os",
- "Number of reads initiated (innodb_data_reads)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_READ},
-
- {"os_data_writes", "os",
- "Number of writes initiated (innodb_data_writes)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_WRITE},
-
- {"os_data_fsyncs", "os",
- "Number of fsync() calls (innodb_data_fsyncs)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FSYNC},
-
- {"os_pending_reads", "os", "Number of reads pending",
- MONITOR_DEFAULT_ON,
- MONITOR_DEFAULT_START, MONITOR_OS_PENDING_READS},
-
- {"os_pending_writes", "os", "Number of writes pending",
- MONITOR_DEFAULT_ON,
- MONITOR_DEFAULT_START, MONITOR_OS_PENDING_WRITES},
-
- {"os_log_bytes_written", "os",
- "Bytes of log written (innodb_os_log_written)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_WRITTEN},
-
- {"os_log_fsyncs", "os",
- "Number of fsync log writes (innodb_os_log_fsyncs)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_FSYNC},
-
- {"os_log_pending_fsyncs", "os",
- "Number of pending fsync write (innodb_os_log_pending_fsyncs)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_FSYNC},
-
- {"os_log_pending_writes", "os",
- "Number of pending log file writes (innodb_os_log_pending_writes)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_WRITES},
-
- /* ========== Counters for Transaction Module ========== */
- {"module_trx", "transaction", "Transaction Manager",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_TRX},
-
- {"trx_rw_commits", "transaction", "Number of read-write transactions "
- "committed",
- MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RW_COMMIT},
-
- {"trx_ro_commits", "transaction", "Number of read-only transactions "
- "committed",
- MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RO_COMMIT},
-
- {"trx_nl_ro_commits", "transaction", "Number of non-locking "
- "auto-commit read-only transactions committed",
- MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_NL_RO_COMMIT},
-
- {"trx_commits_insert_update", "transaction",
- "Number of transactions committed with inserts and updates",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TRX_COMMIT_UNDO},
-
- {"trx_rollbacks", "transaction",
- "Number of transactions rolled back",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK},
-
- {"trx_rollbacks_savepoint", "transaction",
- "Number of transactions rolled back to savepoint",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_SAVEPOINT},
-
- {"trx_rollback_active", "transaction",
- "Number of resurrected active transactions rolled back",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_ACTIVE},
-
- {"trx_active_transactions", "transaction",
- "Number of active transactions",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_TRX_ACTIVE},
-
- {"trx_rseg_history_len", "transaction",
- "Length of the TRX_RSEG_HISTORY list",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_RSEG_HISTORY_LEN},
-
- {"trx_undo_slots_used", "transaction", "Number of undo slots used",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_USED},
-
- {"trx_undo_slots_cached", "transaction",
- "Number of undo slots cached",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_CACHED},
-
- {"trx_rseg_current_size", "transaction",
- "Current rollback segment size in pages",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_RSEG_CUR_SIZE},
-
- /* ========== Counters for Purge Module ========== */
- {"module_purge", "purge", "Purge Module",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_PURGE},
-
- {"purge_del_mark_records", "purge",
- "Number of delete-marked rows purged",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_N_DEL_ROW_PURGE},
-
- {"purge_upd_exist_or_extern_records", "purge",
- "Number of purges on updates of existing records and "
- " updates on delete marked record with externally stored field",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_N_UPD_EXIST_EXTERN},
-
- {"purge_invoked", "purge",
- "Number of times purge was invoked",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PURGE_INVOKED},
-
- {"purge_undo_log_pages", "purge",
- "Number of undo log pages handled by the purge",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PURGE_N_PAGE_HANDLED},
-
- {"purge_dml_delay_usec", "purge",
- "Microseconds DML to be delayed due to purge lagging",
- MONITOR_DISPLAY_CURRENT,
- MONITOR_DEFAULT_START, MONITOR_DML_PURGE_DELAY},
-
- {"purge_stop_count", "purge",
- "Number of times purge was stopped",
- MONITOR_DISPLAY_CURRENT,
- MONITOR_DEFAULT_START, MONITOR_PURGE_STOP_COUNT},
-
- {"purge_resume_count", "purge",
- "Number of times purge was resumed",
- MONITOR_DISPLAY_CURRENT,
- MONITOR_DEFAULT_START, MONITOR_PURGE_RESUME_COUNT},
-
- /* ========== Counters for Recovery Module ========== */
- {"module_log", "recovery", "Recovery Module",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_RECOVERY},
-
- {"log_checkpoints", "recovery", "Number of checkpoints",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_NUM_CHECKPOINT},
-
- {"log_lsn_last_flush", "recovery", "LSN of Last flush",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_FLUSHDISK},
-
- {"log_lsn_last_checkpoint", "recovery", "LSN at last checkpoint",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CHECKPOINT},
-
- {"log_lsn_current", "recovery", "Current LSN value",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CURRENT},
-
- {"log_lsn_checkpoint_age", "recovery",
- "Current LSN value minus LSN at last checkpoint",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_LSN_CHECKPOINT_AGE},
-
- {"log_lsn_buf_pool_oldest", "recovery",
- "The oldest modified block LSN in the buffer pool",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_OLDEST_LSN},
-
- {"log_max_modified_age_async", "recovery",
- "Maximum LSN difference; when exceeded, start asynchronous preflush",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_ASYNC},
-
- {"log_max_modified_age_sync", "recovery",
- "Maximum LSN difference; when exceeded, start synchronous preflush",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_SYNC},
-
- {"log_pending_log_writes", "recovery", "Pending log writes",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_WRITE},
-
- {"log_pending_checkpoint_writes", "recovery", "Pending checkpoints",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PENDING_CHECKPOINT_WRITE},
-
- {"log_num_log_io", "recovery", "Number of log I/Os",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_LOG_IO},
-
- {"log_waits", "recovery",
- "Number of log waits due to small log buffer (innodb_log_waits)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WAITS},
-
- {"log_write_requests", "recovery",
- "Number of log write requests (innodb_log_write_requests)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITE_REQUEST},
-
- {"log_writes", "recovery",
- "Number of log writes (innodb_log_writes)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITES},
-
- /* ========== Counters for Page Compression ========== */
- {"module_compress", "compression", "Page Compression Info",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_PAGE},
-
- {"compress_pages_compressed", "compression",
- "Number of pages compressed", MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PAGE_COMPRESS},
-
- {"compress_pages_decompressed", "compression",
- "Number of pages decompressed",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PAGE_DECOMPRESS},
-
- {"compression_pad_increments", "compression",
- "Number of times padding is incremented to avoid compression failures",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PAD_INCREMENTS},
-
- {"compression_pad_decrements", "compression",
- "Number of times padding is decremented due to good compressibility",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
-
- {"compress_saved", "compression",
- "Number of bytes saved by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
-
- {"compress_trim_sect512", "compression",
- "Number of sect-512 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512},
-
- {"compress_trim_sect1024", "compression",
- "Number of sect-1024 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024},
-
- {"compress_trim_sect2048", "compression",
- "Number of sect-2048 TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048},
-
- {"compress_trim_sect4096", "compression",
- "Number of sect-4K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096},
-
- {"compress_trim_sect8192", "compression",
- "Number of sect-8K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192},
-
- {"compress_trim_sect16384", "compression",
- "Number of sect-16K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384},
-
- {"compress_trim_sect32768", "compression",
- "Number of sect-32K TRIMed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768},
-
- {"compress_pages_page_compressed", "compression",
- "Number of pages compressed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED},
-
- {"compress_page_compressed_trim_op", "compression",
- "Number of TRIM operation performed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
-
- {"compress_page_compressed_trim_op_saved", "compression",
- "Number of TRIM operation saved by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED},
-
- {"compress_pages_page_decompressed", "compression",
- "Number of pages decompressed by page compression",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED},
-
- {"compress_pages_page_compression_error", "compression",
- "Number of page compression errors",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR},
-
- {"compress_pages_encrypted", "compression",
- "Number of pages encrypted",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_ENCRYPTED},
-
- {"compress_pages_decrypted", "compression",
- "Number of pages decrypted",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_DECRYPTED},
-
- /* ========== Counters for Index ========== */
- {"module_index", "index", "Index Manager",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_INDEX},
-
- {"index_page_splits", "index", "Number of index page splits",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_INDEX_SPLIT},
-
- {"index_page_merge_attempts", "index",
- "Number of index page merge attempts",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_ATTEMPTS},
-
- {"index_page_merge_successful", "index",
- "Number of successful index page merges",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_SUCCESSFUL},
-
- {"index_page_reorg_attempts", "index",
- "Number of index page reorganization attempts",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_ATTEMPTS},
-
- {"index_page_reorg_successful", "index",
- "Number of successful index page reorganizations",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_SUCCESSFUL},
-
- {"index_page_discards", "index", "Number of index pages discarded",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_INDEX_DISCARD},
-
- /* ========== Counters for Adaptive Hash Index ========== */
- {"module_adaptive_hash", "adaptive_hash_index", "Adpative Hash Index",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_ADAPTIVE_HASH},
-
- {"adaptive_hash_searches", "adaptive_hash_index",
- "Number of successful searches using Adaptive Hash Index",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH},
-
- {"adaptive_hash_searches_btree", "adaptive_hash_index",
- "Number of searches using B-tree on an index search",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE},
-
- {"adaptive_hash_pages_added", "adaptive_hash_index",
- "Number of index pages on which the Adaptive Hash Index is built",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_ADDED},
-
- {"adaptive_hash_pages_removed", "adaptive_hash_index",
- "Number of index pages whose corresponding Adaptive Hash Index"
- " entries were removed",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_REMOVED},
-
- {"adaptive_hash_rows_added", "adaptive_hash_index",
- "Number of Adaptive Hash Index rows added",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_ADDED},
-
- {"adaptive_hash_rows_removed", "adaptive_hash_index",
- "Number of Adaptive Hash Index rows removed",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVED},
-
- {"adaptive_hash_rows_deleted_no_hash_entry", "adaptive_hash_index",
- "Number of rows deleted that did not have corresponding Adaptive Hash"
- " Index entries",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND},
-
- {"adaptive_hash_rows_updated", "adaptive_hash_index",
- "Number of Adaptive Hash Index rows updated",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_UPDATED},
-
- /* ========== Counters for tablespace ========== */
- {"module_file", "file_system", "Tablespace and File System Manager",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_FIL_SYSTEM},
-
- {"file_num_open_files", "file_system",
- "Number of files currently open (innodb_num_open_files)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_N_FILE_OPENED},
-
- /* ========== Counters for Change Buffer ========== */
- {"module_ibuf_system", "change_buffer", "InnoDB Change Buffer",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_IBUF_SYSTEM},
-
- {"ibuf_merges_insert", "change_buffer",
- "Number of inserted records merged by change buffering",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_INSERT},
-
- {"ibuf_merges_delete_mark", "change_buffer",
- "Number of deleted records merged by change buffering",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DELETE},
-
- {"ibuf_merges_delete", "change_buffer",
- "Number of purge records merged by change buffering",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_PURGE},
-
- {"ibuf_merges_discard_insert", "change_buffer",
- "Number of insert merged operations discarded",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT},
-
- {"ibuf_merges_discard_delete_mark", "change_buffer",
- "Number of deleted merged operations discarded",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE},
-
- {"ibuf_merges_discard_delete", "change_buffer",
- "Number of purge merged operations discarded",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE},
-
- {"ibuf_merges", "change_buffer", "Number of change buffer merges",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGES},
-
- {"ibuf_size", "change_buffer", "Change buffer size in pages",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_SIZE},
-
- /* ========== Counters for server operations ========== */
- {"module_innodb", "innodb",
- "Counter for general InnoDB server wide operations and properties",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_SERVER},
-
- {"innodb_master_thread_sleeps", "server",
- "Number of times (seconds) master thread sleeps",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_MASTER_THREAD_SLEEP},
-
- {"innodb_activity_count", "server", "Current server activity count",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_SERVER_ACTIVITY},
-
- {"innodb_master_active_loops", "server",
- "Number of times master thread performs its tasks when"
- " server is active",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_MASTER_ACTIVE_LOOPS},
-
- {"innodb_master_idle_loops", "server",
- "Number of times master thread performs its tasks when server is idle",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_MASTER_IDLE_LOOPS},
-
- {"innodb_background_drop_table_usec", "server",
- "Time (in microseconds) spent to process drop table list",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND},
-
- {"innodb_ibuf_merge_usec", "server",
- "Time (in microseconds) spent to process change buffer merge",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_IBUF_MERGE_MICROSECOND},
-
- {"innodb_log_flush_usec", "server",
- "Time (in microseconds) spent to flush log records",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_LOG_FLUSH_MICROSECOND},
-
- {"innodb_mem_validate_usec", "server",
- "Time (in microseconds) spent to do memory validation",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_MEM_VALIDATE_MICROSECOND},
-
- {"innodb_master_purge_usec", "server",
- "Time (in microseconds) spent by master thread to purge records",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_PURGE_MICROSECOND},
-
- {"innodb_dict_lru_usec", "server",
- "Time (in microseconds) spent to process DICT LRU list",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_MICROSECOND},
-
- {"innodb_dict_lru_count_active", "server",
- "Number of tables evicted from DICT LRU list in the active loop",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE},
-
- {"innodb_dict_lru_count_idle", "server",
- "Number of tables evicted from DICT LRU list in the idle loop",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE},
-
- {"innodb_checkpoint_usec", "server",
- "Time (in microseconds) spent by master thread to do checkpoint",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_SRV_CHECKPOINT_MICROSECOND},
-
- {"innodb_dblwr_writes", "server",
- "Number of doublewrite operations that have been performed"
- " (innodb_dblwr_writes)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_WRITES},
-
- {"innodb_dblwr_pages_written", "server",
- "Number of pages that have been written for doublewrite operations"
- " (innodb_dblwr_pages_written)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN},
-
- {"innodb_page_size", "server",
- "InnoDB page size in bytes (innodb_page_size)",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
- MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_PAGE_SIZE},
-
- {"innodb_rwlock_s_spin_waits", "server",
- "Number of rwlock spin waits due to shared latch request",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_WAITS},
-
- {"innodb_rwlock_x_spin_waits", "server",
- "Number of rwlock spin waits due to exclusive latch request",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_WAITS},
-
- {"innodb_rwlock_s_spin_rounds", "server",
- "Number of rwlock spin loop rounds due to shared latch request",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS},
-
- {"innodb_rwlock_x_spin_rounds", "server",
- "Number of rwlock spin loop rounds due to exclusive latch request",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS},
-
- {"innodb_rwlock_s_os_waits", "server",
- "Number of OS waits due to shared latch request",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_OS_WAITS},
-
- {"innodb_rwlock_x_os_waits", "server",
- "Number of OS waits due to exclusive latch request",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_OS_WAITS},
-
- /* ========== Counters for DML operations ========== */
- {"module_dml", "dml", "Statistics for DMLs",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_DML_STATS},
-
- {"dml_reads", "dml", "Number of rows read",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_READ},
-
- {"dml_inserts", "dml", "Number of rows inserted",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_INSERTED},
-
- {"dml_deletes", "dml", "Number of rows deleted",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_DELETED},
-
- {"dml_updates", "dml", "Number of rows updated",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_UPDTATED},
-
- {"dml_system_reads", "dml", "Number of system rows read",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_READ},
-
- {"dml_system_inserts", "dml", "Number of system rows inserted",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_INSERTED},
-
- {"dml_system_deletes", "dml", "Number of system rows deleted",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_DELETED},
-
- {"dml_system_updates", "dml", "Number of system rows updated",
- static_cast<monitor_type_t>(
- MONITOR_EXISTING | MONITOR_DEFAULT_ON),
- MONITOR_DEFAULT_START, MONITOR_OLVD_SYSTEM_ROW_UPDATED},
-
- /* ========== Counters for DDL operations ========== */
- {"module_ddl", "ddl", "Statistics for DDLs",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_DDL_STATS},
-
- {"ddl_background_drop_indexes", "ddl",
- "Number of indexes waiting to be dropped after failed index creation",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_INDEX},
-
- {"ddl_background_drop_tables", "ddl",
- "Number of tables in background drop table list",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_TABLE},
-
- {"ddl_online_create_index", "ddl",
- "Number of indexes being created online",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ONLINE_CREATE_INDEX},
-
- {"ddl_pending_alter_table", "ddl",
- "Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE},
-
- /* ===== Counters for ICP (Index Condition Pushdown) Module ===== */
- {"module_icp", "icp", "Index Condition Pushdown",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_MODULE_ICP},
-
- {"icp_attempts", "icp",
- "Number of attempts for index push-down condition checks",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ICP_ATTEMPTS},
-
- {"icp_no_match", "icp", "Index push-down condition does not match",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ICP_NO_MATCH},
-
- {"icp_out_of_range", "icp", "Index push-down condition out of range",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ICP_OUT_OF_RANGE},
-
- {"icp_match", "icp", "Index push-down condition matches",
- MONITOR_NONE,
- MONITOR_DEFAULT_START, MONITOR_ICP_MATCH},
-
- /* ========== To turn on/off reset all counters ========== */
- {"all", "All Counters", "Turn on/off and reset all counters",
- MONITOR_MODULE,
- MONITOR_DEFAULT_START, MONITOR_ALL_COUNTER}
-};
-
-/* The "innodb_counter_value" array stores actual counter values */
-UNIV_INTERN monitor_value_t innodb_counter_value[NUM_MONITOR];
-
-/* monitor_set_tbl is used to record and determine whether a monitor
-has been turned on/off. */
-UNIV_INTERN ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT
- - 1) / NUM_BITS_ULINT];
-
-#ifndef HAVE_ATOMIC_BUILTINS_64
-/** Mutex protecting atomic operations on platforms that lack
-built-in operations for atomic memory access */
-ib_mutex_t monitor_mutex;
-
-/** Key to register monitor_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t monitor_mutex_key;
-
-/****************************************************************//**
-Initialize the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_create(void)
-/*================*/
-{
- mutex_create(monitor_mutex_key, &monitor_mutex, SYNC_ANY_LATCH);
-}
-/****************************************************************//**
-Close the monitor subsystem. */
-UNIV_INTERN
-void
-srv_mon_free(void)
-/*==============*/
-{
- mutex_free(&monitor_mutex);
-}
-#endif /* !HAVE_ATOMIC_BUILTINS_64 */
-
-/****************************************************************//**
-Get a monitor's "monitor_info" by its monitor id (index into the
-innodb_counter_info array.
-@return Point to corresponding monitor_info_t, or NULL if no such
-monitor */
-UNIV_INTERN
-monitor_info_t*
-srv_mon_get_info(
-/*=============*/
- monitor_id_t monitor_id) /*!< id indexing into the
- innodb_counter_info array */
-{
- ut_a(monitor_id < NUM_MONITOR);
-
- return((monitor_id < NUM_MONITOR)
- ? &innodb_counter_info[monitor_id]
- : NULL);
-}
-
-/****************************************************************//**
-Get monitor's name by its monitor id (indexing into the
-innodb_counter_info array.
-@return corresponding monitor name, or NULL if no such
-monitor */
-UNIV_INTERN
-const char*
-srv_mon_get_name(
-/*=============*/
- monitor_id_t monitor_id) /*!< id index into the
- innodb_counter_info array */
-{
- ut_a(monitor_id < NUM_MONITOR);
-
- return((monitor_id < NUM_MONITOR)
- ? innodb_counter_info[monitor_id].monitor_name
- : NULL);
-}
-
-/****************************************************************//**
-Turn on/off, reset monitor counters in a module. If module_id
-is MONITOR_ALL_COUNTER then turn on all monitor counters.
-turned on because it has already been turned on. */
-UNIV_INTERN
-void
-srv_mon_set_module_control(
-/*=======================*/
- monitor_id_t module_id, /*!< in: Module ID as in
- monitor_counter_id. If it is
- set to MONITOR_ALL_COUNTER, this means
- we shall turn on all the counters */
- mon_option_t set_option) /*!< in: Turn on/off reset the
- counter */
-{
- ulint ix;
- ulint start_id;
- ibool set_current_module = FALSE;
-
- ut_a(module_id <= NUM_MONITOR);
- ut_a(UT_ARR_SIZE(innodb_counter_info) == NUM_MONITOR);
-
- /* The module_id must be an ID of MONITOR_MODULE type */
- ut_a(innodb_counter_info[module_id].monitor_type & MONITOR_MODULE);
-
- /* start with the first monitor in the module. If module_id
- is MONITOR_ALL_COUNTER, this means we need to turn on all
- monitor counters. */
- if (module_id == MONITOR_ALL_COUNTER) {
- start_id = 1;
- } else if (innodb_counter_info[module_id].monitor_type
- & MONITOR_GROUP_MODULE) {
- /* Counters in this module are set as a group together
- and cannot be turned on/off individually. Need to set
- the on/off bit in the module counter */
- start_id = module_id;
- set_current_module = TRUE;
-
- } else {
- start_id = module_id + 1;
- }
-
- for (ix = start_id; ix < NUM_MONITOR; ix++) {
- /* if we hit the next module counter, we will
- continue if we want to turn on all monitor counters,
- and break if just turn on the counters in the
- current module. */
- if (innodb_counter_info[ix].monitor_type & MONITOR_MODULE) {
-
- if (set_current_module) {
- /* Continue to set on/off bit on current
- module */
- set_current_module = FALSE;
- } else if (module_id == MONITOR_ALL_COUNTER) {
- if (!(innodb_counter_info[ix].monitor_type
- & MONITOR_GROUP_MODULE)) {
- continue;
- }
- } else {
- /* Hitting the next module, stop */
- break;
- }
- }
-
- /* Cannot turn on a monitor already been turned on. User
- should be aware some counters are already on before
- turn them on again (which could reset counter value) */
- if (MONITOR_IS_ON(ix) && (set_option == MONITOR_TURN_ON)) {
- fprintf(stderr, "Monitor '%s' is already enabled.\n",
- srv_mon_get_name((monitor_id_t) ix));
- continue;
- }
-
- /* For some existing counters (server status variables),
- we will get its counter value at the start/stop time
- to calculate the actual value during the time. */
- if (innodb_counter_info[ix].monitor_type & MONITOR_EXISTING) {
- srv_mon_process_existing_counter(
- static_cast<monitor_id_t>(ix), set_option);
- }
-
- /* Currently support 4 operations on the monitor counters:
- turn on, turn off, reset and reset all operations. */
- switch (set_option) {
- case MONITOR_TURN_ON:
- MONITOR_ON(ix);
- MONITOR_INIT(ix);
- MONITOR_SET_START(ix);
- break;
-
- case MONITOR_TURN_OFF:
- MONITOR_OFF(ix);
- MONITOR_SET_OFF(ix);
- break;
-
- case MONITOR_RESET_VALUE:
- srv_mon_reset(static_cast<monitor_id_t>(ix));
- break;
-
- case MONITOR_RESET_ALL_VALUE:
- srv_mon_reset_all(static_cast<monitor_id_t>(ix));
- break;
-
- default:
- ut_error;
- }
- }
-}
-
-/****************************************************************//**
-Get transaction system's rollback segment size in pages
-@return size in pages */
-static
-ulint
-srv_mon_get_rseg_size(void)
-/*=======================*/
-{
- ulint i;
- ulint value = 0;
-
- /* rseg_array is a static array, so we can go through it without
- mutex protection. In addition, we provide an estimate of the
- total rollback segment size and to avoid mutex contention we
- don't acquire the rseg->mutex" */
- for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- const trx_rseg_t* rseg = trx_sys->rseg_array[i];
-
- if (rseg != NULL) {
- value += rseg->curr_size;
- }
- }
-
- return(value);
-}
-
-/****************************************************************//**
-This function consolidates some existing server counters used
-by "system status variables". These existing system variables do not have
-mechanism to start/stop and reset the counters, so we simulate these
-controls by remembering the corresponding counter values when the
-corresponding monitors are turned on/off/reset, and do appropriate
-mathematics to deduct the actual value. Please also refer to
-srv_export_innodb_status() for related global counters used by
-the existing status variables.*/
-UNIV_INTERN
-void
-srv_mon_process_existing_counter(
-/*=============================*/
- monitor_id_t monitor_id, /*!< in: the monitor's ID as in
- monitor_counter_id */
- mon_option_t set_option) /*!< in: Turn on/off reset the
- counter */
-{
- mon_type_t value;
- monitor_info_t* monitor_info;
- ibool update_min = FALSE;
- buf_pool_stat_t stat;
- buf_pools_list_size_t buf_pools_list_size;
- ulint LRU_len;
- ulint free_len;
- ulint flush_list_len;
-
- monitor_info = srv_mon_get_info(monitor_id);
-
- ut_a(monitor_info->monitor_type & MONITOR_EXISTING);
- ut_a(monitor_id < NUM_MONITOR);
-
- /* Get the value from corresponding global variable */
- switch (monitor_id) {
- case MONITOR_OVLD_META_MEM_POOL:
- value = srv_mem_pool_size;
- break;
-
- /* export_vars.innodb_buffer_pool_reads. Num Reads from
- disk (page not in buffer) */
- case MONITOR_OVLD_BUF_POOL_READS:
- value = srv_stats.buf_pool_reads;
- break;
-
- /* innodb_buffer_pool_read_requests, the number of logical
- read requests */
- case MONITOR_OVLD_BUF_POOL_READ_REQUESTS:
- buf_get_total_stat(&stat);
- value = stat.n_page_gets;
- break;
-
- /* innodb_buffer_pool_write_requests, the number of
- write request */
- case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST:
- value = srv_stats.buf_pool_write_requests;
- break;
-
- /* innodb_buffer_pool_wait_free */
- case MONITOR_OVLD_BUF_POOL_WAIT_FREE:
- value = srv_stats.buf_pool_wait_free;
- break;
-
- /* innodb_buffer_pool_read_ahead */
- case MONITOR_OVLD_BUF_POOL_READ_AHEAD:
- buf_get_total_stat(&stat);
- value = stat.n_ra_pages_read;
- break;
-
- /* innodb_buffer_pool_read_ahead_evicted */
- case MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED:
- buf_get_total_stat(&stat);
- value = stat.n_ra_pages_evicted;
- break;
-
- /* innodb_buffer_pool_pages_total */
- case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL:
- value = buf_pool_get_n_pages();
- break;
-
- /* innodb_buffer_pool_pages_misc */
- case MONITOR_OVLD_BUF_POOL_PAGE_MISC:
- buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
- value = buf_pool_get_n_pages() - LRU_len - free_len;
- break;
-
- /* innodb_buffer_pool_pages_data */
- case MONITOR_OVLD_BUF_POOL_PAGES_DATA:
- buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
- value = LRU_len;
- break;
-
- /* innodb_buffer_pool_bytes_data */
- case MONITOR_OVLD_BUF_POOL_BYTES_DATA:
- buf_get_total_list_size_in_bytes(&buf_pools_list_size);
- value = buf_pools_list_size.LRU_bytes
- + buf_pools_list_size.unzip_LRU_bytes;
- break;
-
- /* innodb_buffer_pool_pages_dirty */
- case MONITOR_OVLD_BUF_POOL_PAGES_DIRTY:
- buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
- value = flush_list_len;
- break;
-
- /* innodb_buffer_pool_bytes_dirty */
- case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY:
- buf_get_total_list_size_in_bytes(&buf_pools_list_size);
- value = buf_pools_list_size.flush_list_bytes;
- break;
-
- /* innodb_buffer_pool_pages_free */
- case MONITOR_OVLD_BUF_POOL_PAGES_FREE:
- buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
- value = free_len;
- break;
-
- /* innodb_pages_created, the number of pages created */
- case MONITOR_OVLD_PAGE_CREATED:
- buf_get_total_stat(&stat);
- value = stat.n_pages_created;
- break;
-
- /* innodb_pages_written, the number of page written */
- case MONITOR_OVLD_PAGES_WRITTEN:
- buf_get_total_stat(&stat);
- value = stat.n_pages_written;
- break;
-
- /* innodb_index_pages_written, the number of index pages written */
- case MONITOR_OVLD_INDEX_PAGES_WRITTEN:
- value = srv_stats.index_pages_written;
- break;
-
- /* innodb_non_index_pages_written, the number of non index pages written */
- case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN:
- value = srv_stats.non_index_pages_written;
- break;
-
- /* innodb_pages_read */
- case MONITOR_OVLD_PAGES_READ:
- buf_get_total_stat(&stat);
- value = stat.n_pages_read;
- break;
-
- /* innodb_pages0_read */
- case MONITOR_OVLD_PAGES0_READ:
- value = srv_stats.page0_read;
- break;
-
- /* Number of times secondary index lookup triggered cluster lookup */
- case MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS:
- value = srv_stats.n_sec_rec_cluster_reads;
- break;
- /* Number of times prefix optimization avoided triggering cluster
- lookup */
- case MONITOR_OVLD_INDEX_SEC_REC_CLUSTER_READS_AVOIDED:
- value = srv_stats.n_sec_rec_cluster_reads_avoided;
- break;
-
- /* innodb_data_reads, the total number of data reads */
- case MONITOR_OVLD_BYTE_READ:
- value = srv_stats.data_read;
- break;
-
- /* innodb_data_writes, the total number of data writes. */
- case MONITOR_OVLD_BYTE_WRITTEN:
- value = srv_stats.data_written;
- break;
-
- /* innodb_data_reads, the total number of data reads. */
- case MONITOR_OVLD_OS_FILE_READ:
- value = os_n_file_reads;
- break;
-
- /* innodb_data_writes, the total number of data writes*/
- case MONITOR_OVLD_OS_FILE_WRITE:
- value = os_n_file_writes;
- break;
-
- /* innodb_data_fsyncs, number of fsync() operations so far. */
- case MONITOR_OVLD_OS_FSYNC:
- value = os_n_fsyncs;
- break;
-
- /* innodb_os_log_written */
- case MONITOR_OVLD_OS_LOG_WRITTEN:
- value = (mon_type_t) srv_stats.os_log_written;
- break;
-
- /* innodb_os_log_fsyncs */
- case MONITOR_OVLD_OS_LOG_FSYNC:
- value = fil_n_log_flushes;
- break;
-
- /* innodb_os_log_pending_fsyncs */
- case MONITOR_OVLD_OS_LOG_PENDING_FSYNC:
- value = fil_n_pending_log_flushes;
- update_min = TRUE;
- break;
-
- /* innodb_os_log_pending_writes */
- case MONITOR_OVLD_OS_LOG_PENDING_WRITES:
- value = srv_stats.os_log_pending_writes;
- update_min = TRUE;
- break;
-
- /* innodb_log_waits */
- case MONITOR_OVLD_LOG_WAITS:
- value = srv_stats.log_waits;
- break;
-
- /* innodb_log_write_requests */
- case MONITOR_OVLD_LOG_WRITE_REQUEST:
- value = srv_stats.log_write_requests;
- break;
-
- /* innodb_log_writes */
- case MONITOR_OVLD_LOG_WRITES:
- value = srv_stats.log_writes;
- break;
-
- /* innodb_dblwr_writes */
- case MONITOR_OVLD_SRV_DBLWR_WRITES:
- value = srv_stats.dblwr_writes;
- break;
-
- /* innodb_dblwr_pages_written */
- case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN:
- value = srv_stats.dblwr_pages_written;
- break;
-
- /* innodb_page_size */
- case MONITOR_OVLD_SRV_PAGE_SIZE:
- value = UNIV_PAGE_SIZE;
- break;
-
- case MONITOR_OVLD_RWLOCK_S_SPIN_WAITS:
- value = rw_lock_stats.rw_s_spin_wait_count;
- break;
-
- case MONITOR_OVLD_RWLOCK_X_SPIN_WAITS:
- value = rw_lock_stats.rw_x_spin_wait_count;
- break;
-
- case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS:
- value = rw_lock_stats.rw_s_spin_round_count;
- break;
-
- case MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS:
- value = rw_lock_stats.rw_x_spin_round_count;
- break;
-
- case MONITOR_OVLD_RWLOCK_S_OS_WAITS:
- value = rw_lock_stats.rw_s_os_wait_count;
- break;
-
- case MONITOR_OVLD_RWLOCK_X_OS_WAITS:
- value = rw_lock_stats.rw_x_os_wait_count;
- break;
-
- case MONITOR_OVLD_BUFFER_POOL_SIZE:
- value = srv_buf_pool_size;
- break;
-
- /* innodb_rows_read */
- case MONITOR_OLVD_ROW_READ:
- value = srv_stats.n_rows_read;
- break;
-
- /* innodb_rows_inserted */
- case MONITOR_OLVD_ROW_INSERTED:
- value = srv_stats.n_rows_inserted;
- break;
-
- /* innodb_rows_deleted */
- case MONITOR_OLVD_ROW_DELETED:
- value = srv_stats.n_rows_deleted;
- break;
-
- /* innodb_rows_updated */
- case MONITOR_OLVD_ROW_UPDTATED:
- value = srv_stats.n_rows_updated;
- break;
-
- /* innodb_system_rows_read */
- case MONITOR_OLVD_SYSTEM_ROW_READ:
- value = srv_stats.n_system_rows_read;
- break;
-
- /* innodb_system_rows_inserted */
- case MONITOR_OLVD_SYSTEM_ROW_INSERTED:
- value = srv_stats.n_system_rows_inserted;
- break;
-
- /* innodb_system_rows_deleted */
- case MONITOR_OLVD_SYSTEM_ROW_DELETED:
- value = srv_stats.n_system_rows_deleted;
- break;
-
- /* innodb_system_rows_updated */
- case MONITOR_OLVD_SYSTEM_ROW_UPDATED:
- value = srv_stats.n_system_rows_updated;
- break;
-
- /* innodb_row_lock_current_waits */
- case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT:
- value = srv_stats.n_lock_wait_current_count;
- break;
-
- /* innodb_row_lock_time */
- case MONITOR_OVLD_LOCK_WAIT_TIME:
- value = srv_stats.n_lock_wait_time / 1000;
- break;
-
- /* innodb_row_lock_time_max */
- case MONITOR_OVLD_LOCK_MAX_WAIT_TIME:
- value = lock_sys->n_lock_max_wait_time / 1000;
- break;
-
- /* innodb_row_lock_time_avg */
- case MONITOR_OVLD_LOCK_AVG_WAIT_TIME:
- if (srv_stats.n_lock_wait_count > 0) {
- value = srv_stats.n_lock_wait_time / 1000
- / srv_stats.n_lock_wait_count;
- } else {
- value = 0;
- }
- break;
-
- /* innodb_row_lock_waits */
- case MONITOR_OVLD_ROW_LOCK_WAIT:
- value = srv_stats.n_lock_wait_count;
- break;
-
- case MONITOR_RSEG_HISTORY_LEN:
- value = trx_sys->rseg_history_len;
- break;
-
- case MONITOR_RSEG_CUR_SIZE:
- value = srv_mon_get_rseg_size();
- break;
-
- case MONITOR_OVLD_N_FILE_OPENED:
- value = fil_n_file_opened;
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_INSERT:
- value = ibuf->n_merged_ops[IBUF_OP_INSERT];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_DELETE:
- value = ibuf->n_merged_ops[IBUF_OP_DELETE_MARK];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_PURGE:
- value = ibuf->n_merged_ops[IBUF_OP_DELETE];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT:
- value = ibuf->n_discarded_ops[IBUF_OP_INSERT];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE:
- value = ibuf->n_discarded_ops[IBUF_OP_DELETE_MARK];
- break;
-
- case MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE:
- value = ibuf->n_discarded_ops[IBUF_OP_DELETE];
- break;
-
- case MONITOR_OVLD_IBUF_MERGES:
- value = ibuf->n_merges;
- break;
-
- case MONITOR_OVLD_IBUF_SIZE:
- value = ibuf->size;
- break;
-
- case MONITOR_OVLD_SERVER_ACTIVITY:
- value = srv_get_activity_count();
- break;
-
- case MONITOR_OVLD_LSN_FLUSHDISK:
- value = (mon_type_t) log_sys->flushed_to_disk_lsn;
- break;
-
- case MONITOR_OVLD_LSN_CURRENT:
- value = (mon_type_t) log_sys->lsn;
- break;
-
- case MONITOR_OVLD_BUF_OLDEST_LSN:
- value = (mon_type_t) buf_pool_get_oldest_modification();
- break;
-
- case MONITOR_OVLD_LSN_CHECKPOINT:
- value = (mon_type_t) log_sys->last_checkpoint_lsn;
- break;
-
- case MONITOR_OVLD_MAX_AGE_ASYNC:
- value = log_sys->max_modified_age_async;
- break;
-
- case MONITOR_OVLD_MAX_AGE_SYNC:
- value = log_sys->max_modified_age_sync;
- break;
-
- case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH:
- value = btr_cur_n_sea;
- break;
-
- case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE:
- value = btr_cur_n_non_sea;
- break;
-
- case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
- value = srv_stats.page_compression_saved;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512:
- value = srv_stats.page_compression_trim_sect512;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT1024:
- value = srv_stats.page_compression_trim_sect1024;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT2048:
- value = srv_stats.page_compression_trim_sect2048;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096:
- value = srv_stats.page_compression_trim_sect4096;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT8192:
- value = srv_stats.page_compression_trim_sect8192;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT16384:
- value = srv_stats.page_compression_trim_sect16384;
- break;
- case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT32768:
- value = srv_stats.page_compression_trim_sect32768;
- break;
- case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
- value = srv_stats.pages_page_compressed;
- break;
- case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
- value = srv_stats.page_compressed_trim_op;
- break;
- case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED:
- value = srv_stats.page_compressed_trim_op_saved;
- break;
- case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
- value = srv_stats.pages_page_decompressed;
- break;
- case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR:
- value = srv_stats.pages_page_compression_error;
- break;
- case MONITOR_OVLD_PAGES_ENCRYPTED:
- value = srv_stats.pages_encrypted;
- break;
- case MONITOR_OVLD_PAGES_DECRYPTED:
- value = srv_stats.pages_decrypted;
- break;
-
- default:
- ut_error;
- }
-
- switch (set_option) {
- case MONITOR_TURN_ON:
- /* Save the initial counter value in mon_start_value
- field */
- MONITOR_SAVE_START(monitor_id, value);
- return;
-
- case MONITOR_TURN_OFF:
- /* Save the counter value to mon_last_value when we
- turn off the monitor but not yet reset. Note the
- counter has not yet been set to off in the bitmap
- table for normal turn off. We need to check the
- count status (on/off) to avoid reset the value
- for an already off conte */
- if (MONITOR_IS_ON(monitor_id)) {
- srv_mon_process_existing_counter(monitor_id,
- MONITOR_GET_VALUE);
- MONITOR_SAVE_LAST(monitor_id);
- }
- return;
-
- case MONITOR_GET_VALUE:
- if (MONITOR_IS_ON(monitor_id)) {
-
- /* If MONITOR_DISPLAY_CURRENT bit is on, we
- only record the current value, rather than
- incremental value over a period. Most of
-` this type of counters are resource related
- counters such as number of buffer pages etc. */
- if (monitor_info->monitor_type
- & MONITOR_DISPLAY_CURRENT) {
- MONITOR_SET(monitor_id, value);
- } else {
- /* Most status counters are montonically
- increasing, no need to update their
- minimum values. Only do so
- if "update_min" set to TRUE */
- MONITOR_SET_DIFF(monitor_id, value);
-
- if (update_min
- && (MONITOR_VALUE(monitor_id)
- < MONITOR_MIN_VALUE(monitor_id))) {
- MONITOR_MIN_VALUE(monitor_id) =
- MONITOR_VALUE(monitor_id);
- }
- }
- }
- return;
-
- case MONITOR_RESET_VALUE:
- if (!MONITOR_IS_ON(monitor_id)) {
- MONITOR_LAST_VALUE(monitor_id) = 0;
- }
- return;
-
- /* Nothing special for reset all operation for these existing
- counters */
- case MONITOR_RESET_ALL_VALUE:
- return;
- }
-}
-
-/*************************************************************//**
-Reset a monitor, create a new base line with the current monitor
-value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
-UNIV_INTERN
-void
-srv_mon_reset(
-/*==========*/
- monitor_id_t monitor) /*!< in: monitor id */
-{
- ibool monitor_was_on;
-
- monitor_was_on = MONITOR_IS_ON(monitor);
-
- if (monitor_was_on) {
- /* Temporarily turn off the counter for the resetting
- operation */
- MONITOR_OFF(monitor);
- }
-
- /* Before resetting the current monitor value, first
- calculate and set the max/min value since monitor
- start */
- srv_mon_calc_max_since_start(monitor);
- srv_mon_calc_min_since_start(monitor);
-
- /* Monitors with MONITOR_DISPLAY_CURRENT bit
- are not incremental, no need to remember
- the reset value. */
- if (innodb_counter_info[monitor].monitor_type
- & MONITOR_DISPLAY_CURRENT) {
- MONITOR_VALUE_RESET(monitor) = 0;
- } else {
- /* Remember the new baseline */
- MONITOR_VALUE_RESET(monitor) = MONITOR_VALUE_RESET(monitor)
- + MONITOR_VALUE(monitor);
- }
-
- /* Reset the counter value */
- MONITOR_VALUE(monitor) = 0;
- MONITOR_MAX_VALUE(monitor) = MAX_RESERVED;
- MONITOR_MIN_VALUE(monitor) = MIN_RESERVED;
-
- MONITOR_FIELD((monitor), mon_reset_time) = time(NULL);
-
- if (monitor_was_on) {
- MONITOR_ON(monitor);
- }
-}
-
-/*************************************************************//**
-Turn on monitor counters that are marked as default ON. */
-UNIV_INTERN
-void
-srv_mon_default_on(void)
-/*====================*/
-{
- ulint ix;
-
- for (ix = 0; ix < NUM_MONITOR; ix++) {
- if (innodb_counter_info[ix].monitor_type
- & MONITOR_DEFAULT_ON) {
- /* Turn on monitor counters that are default on */
- MONITOR_ON(ix);
- MONITOR_INIT(ix);
- MONITOR_SET_START(ix);
- }
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
deleted file mode 100644
index cc5d1320142..00000000000
--- a/storage/xtradb/srv/srv0srv.cc
+++ /dev/null
@@ -1,3693 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, 2009 Google Inc.
-Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file srv/srv0srv.cc
-The database server main program
-
-Created 10/8/1995 Heikki Tuuri
-*******************************************************/
-
-/* Dummy comment */
-#include "srv0srv.h"
-
-#include "ut0mem.h"
-#include "ut0ut.h"
-#include "os0proc.h"
-#include "mem0mem.h"
-#include "mem0pool.h"
-#include "sync0sync.h"
-#include "que0que.h"
-#include "log0online.h"
-#include "log0recv.h"
-#include "pars0pars.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0purge.h"
-#include "ibuf0ibuf.h"
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "btr0sea.h"
-#include "dict0load.h"
-#include "dict0boot.h"
-#include "srv0start.h"
-#include "row0mysql.h"
-#include "row0log.h"
-#include "ha_prototypes.h"
-#include "trx0i_s.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#include "srv0mon.h"
-#include "ut0crc32.h"
-#include "os0file.h"
-#include "btr0defragment.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#include "fil0pagecompress.h"
-#include <my_rdtsc.h>
-#include "btr0scrub.h"
-
-/* prototypes for new functions added to ha_innodb.cc */
-ibool innobase_get_slow_log();
-
-#ifdef WITH_WSREP
-extern int wsrep_debug;
-extern int wsrep_trx_is_aborting(void *thd_ptr);
-#endif
-/* The following counter is incremented whenever there is some user activity
-in the server */
-UNIV_INTERN ulint srv_activity_count = 0;
-
-/* The following is the maximum allowed duration of a lock wait. */
-UNIV_INTERN ulong srv_fatal_semaphore_wait_threshold = DEFAULT_SRV_FATAL_SEMAPHORE_TIMEOUT;
-
-/**/
-UNIV_INTERN long long srv_kill_idle_transaction = 0;
-
-/* How much data manipulation language (DML) statements need to be delayed,
-in microseconds, in order to reduce the lagging of the purge thread. */
-UNIV_INTERN ulint srv_dml_needed_delay = 0;
-
-UNIV_INTERN bool srv_monitor_active;
-UNIV_INTERN bool srv_error_monitor_active;
-
-UNIV_INTERN bool srv_buf_dump_thread_active;
-
-UNIV_INTERN bool srv_dict_stats_thread_active;
-
-UNIV_INTERN my_bool srv_scrub_log;
-
-UNIV_INTERN const char* srv_main_thread_op_info = "";
-
-/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
-const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
-
-/* Server parameters which are read from the initfile */
-
-/* The following three are dir paths which are catenated before file
-names, where the file name itself may also contain a path */
-
-UNIV_INTERN char* srv_data_home = NULL;
-
-/** Rollback files directory, can be absolute. */
-UNIV_INTERN char* srv_undo_dir = NULL;
-
-/** The number of tablespaces to use for rollback segments. */
-UNIV_INTERN ulong srv_undo_tablespaces = 8;
-
-/** The number of UNDO tablespaces that are open and ready to use. */
-UNIV_INTERN ulint srv_undo_tablespaces_open = 8;
-
-/* The number of rollback segments to use */
-UNIV_INTERN ulong srv_undo_logs = 1;
-
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN char* srv_arch_dir = NULL;
-UNIV_INTERN ulong srv_log_arch_expire_sec = 0;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/** Set if InnoDB must operate in read-only mode. We don't do any
-recovery and open all tables in RO mode instead of RW mode. We don't
-sync the max trx id to disk either. */
-UNIV_INTERN my_bool srv_read_only_mode;
-/** store to its own file each table created by an user; data
-dictionary tables are in the system tablespace 0 */
-UNIV_INTERN my_bool srv_file_per_table;
-/** The file format to use on new *.ibd files. */
-UNIV_INTERN ulint srv_file_format = 0;
-/** Whether to check file format during startup. A value of
-UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
-set it to the highest format we support. */
-UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
-/** Set if InnoDB operates in read-only mode or innodb-force-recovery
-is greater than SRV_FORCE_NO_TRX_UNDO. */
-UNIV_INTERN my_bool high_level_read_only;
-
-#if UNIV_FORMAT_A
-# error "UNIV_FORMAT_A must be 0!"
-#endif
-
-/** Place locks to records only i.e. do not use next-key locking except
-on duplicate key checking and foreign key checking */
-UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
-/** Sort buffer size in index creation */
-UNIV_INTERN ulong srv_sort_buf_size = 1048576;
-/** Maximum modification log file size for online index creation */
-UNIV_INTERN unsigned long long srv_online_max_size;
-
-/* If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads.
-Currently we support native aio on windows and linux */
-/* make srv_use_native_aio to be visible for other plugins */
-my_bool srv_use_native_aio = TRUE;
-UNIV_INTERN my_bool srv_numa_interleave = FALSE;
-
-/* Default compression level if page compression is used and no compression
-level is set for the table*/
-UNIV_INTERN long srv_compress_zlib_level = 6;
-/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
-to the pages */
-UNIV_INTERN my_bool srv_use_trim = FALSE;
-/* If this flag is TRUE, then we will use posix fallocate for file extentsion */
-UNIV_INTERN my_bool srv_use_posix_fallocate = FALSE;
-/* If this flag is TRUE, then we disable doublewrite buffer */
-UNIV_INTERN my_bool srv_use_atomic_writes = FALSE;
-/* If this flag IS TRUE, then we use this algorithm for page compressing the pages */
-UNIV_INTERN ulong innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM;
-/* Number of threads used for multi-threaded flush */
-UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
-/* If this flag is TRUE, then we will use multi threaded flush. */
-UNIV_INTERN my_bool srv_use_mtflush = FALSE;
-
-#ifdef __WIN__
-/* Windows native condition variables. We use runtime loading / function
-pointers, because they are not available on Windows Server 2003 and
-Windows XP/2000.
-
-We use condition for events on Windows if possible, even if os_event
-resembles Windows kernel event object well API-wise. The reason is
-performance, kernel objects are heavyweights and WaitForSingleObject() is a
-performance killer causing calling thread to context switch. Besides, Innodb
-is preallocating large number (often millions) of os_events. With kernel event
-objects it takes a big chunk out of non-paged pool, which is better suited
-for tasks like IO than for storing idle event objects. */
-UNIV_INTERN ibool srv_use_native_conditions = TRUE;
-#endif /* __WIN__ */
-
-UNIV_INTERN ulint srv_n_data_files = 0;
-UNIV_INTERN char** srv_data_file_names = NULL;
-/* size in database pages */
-UNIV_INTERN ulint* srv_data_file_sizes = NULL;
-
-/** Whether the redo log tracking is currently enabled. Note that it is
-possible for the log tracker thread to be running and the tracking to be
-disabled */
-UNIV_INTERN my_bool srv_track_changed_pages = FALSE;
-
-UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024;
-
-UNIV_INTERN ulonglong srv_max_changed_pages = 0;
-
-/** When TRUE, fake change transcations take S rather than X row locks.
- When FALSE, row locks are not taken at all. */
-UNIV_INTERN my_bool srv_fake_changes_locks = TRUE;
-
-/* if TRUE, then we auto-extend the last data file */
-UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
-/* if != 0, this tells the max size auto-extending may increase the
-last data file size */
-UNIV_INTERN ulint srv_last_file_size_max = 0;
-/* If the last data file is auto-extended, we add this
-many pages to it at a time */
-UNIV_INTERN ulong srv_auto_extend_increment = 8;
-UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
-
-/* If the following is TRUE we do not allow inserts etc. This protects
-the user from forgetting the 'newraw' keyword to my.cnf */
-
-UNIV_INTERN ibool srv_created_new_raw = FALSE;
-
-UNIV_INTERN char* srv_log_group_home_dir = NULL;
-
-UNIV_INTERN ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
-/* size in database pages */
-UNIV_INTERN ib_uint64_t srv_log_file_size = IB_UINT64_MAX;
-UNIV_INTERN ib_uint64_t srv_log_file_size_requested;
-/* size in database pages */
-UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
-UNIV_INTERN uint srv_flush_log_at_timeout = 1;
-UNIV_INTERN ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
-UNIV_INTERN ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
-UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
-
-/* Try to flush dirty pages so as to avoid IO bursts at
-the checkpoints. */
-UNIV_INTERN char srv_adaptive_flushing = TRUE;
-
-UNIV_INTERN ulong srv_show_locks_held = 10;
-UNIV_INTERN ulong srv_show_verbose_locks = 0;
-
-/** Maximum number of times allowed to conditionally acquire
-mutex before switching to blocking wait on the mutex */
-#define MAX_MUTEX_NOWAIT 20
-
-/** Check whether the number of failed nonblocking mutex
-acquisition attempts exceeds maximum allowed value. If so,
-srv_printf_innodb_monitor() will request mutex acquisition
-with mutex_enter(), which will wait until it gets the mutex. */
-#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
-
-#ifdef WITH_INNODB_DISALLOW_WRITES
-UNIV_INTERN os_event_t srv_allow_writes_event;
-#endif /* WITH_INNODB_DISALLOW_WRITES */
-
-/** The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-UNIV_INTERN const byte* srv_latin1_ordering;
-
-/* use os/external memory allocator */
-UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
-/* requested size in kilobytes */
-UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
-/* requested number of buffer pool instances */
-UNIV_INTERN ulint srv_buf_pool_instances = 1;
-/* number of locks to protect buf_pool->page_hash */
-UNIV_INTERN ulong srv_n_page_hash_locks = 16;
-/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
-UNIV_INTERN ulong srv_LRU_scan_depth = 1024;
-/** whether or not to flush neighbors of a block */
-UNIV_INTERN ulong srv_flush_neighbors = 1;
-/* previously requested size */
-UNIV_INTERN ulint srv_buf_pool_old_size;
-/* current size in kilobytes */
-UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
-/* dump that may % of each buffer pool during BP dump */
-UNIV_INTERN ulong srv_buf_pool_dump_pct;
-/* size in bytes */
-UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
-UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
-
-/* Defragmentation */
-UNIV_INTERN my_bool srv_defragment = FALSE;
-UNIV_INTERN uint srv_defragment_n_pages = 7;
-UNIV_INTERN uint srv_defragment_stats_accuracy = 0;
-UNIV_INTERN uint srv_defragment_fill_factor_n_recs = 20;
-UNIV_INTERN double srv_defragment_fill_factor = 0.9;
-UNIV_INTERN uint srv_defragment_frequency =
- SRV_DEFRAGMENT_FREQUENCY_DEFAULT;
-UNIV_INTERN ulonglong srv_defragment_interval = 0;
-
-/** Query thread preflush algorithm */
-UNIV_INTERN ulong srv_foreground_preflush
- = SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF;
-
-/** The maximum time limit for a single LRU tail flush iteration by the page
-cleaner thread */
-UNIV_INTERN ulint srv_cleaner_max_lru_time = 1000;
-
-/** The maximum time limit for a single flush list flush iteration by the page
-cleaner thread */
-UNIV_INTERN ulint srv_cleaner_max_flush_time = 1000;
-
-/** Page cleaner flush list flush batches are further divided into this chunk
-size */
-UNIV_INTERN ulint srv_cleaner_flush_chunk_size = 100;
-
-/** Page cleaner LRU list flush batches are further divided into this chunk
-size */
-UNIV_INTERN ulint srv_cleaner_lru_chunk_size = 100;
-
-/** If free list length is lower than this percentage of srv_LRU_scan_depth,
-page cleaner LRU flushes will issue flush batches to the same instance in a
-row */
-UNIV_INTERN ulint srv_cleaner_free_list_lwm = 10;
-
-/** If TRUE, page cleaner heuristics use evicted instead of flushed page counts
-for its heuristics */
-UNIV_INTERN my_bool srv_cleaner_eviction_factor = FALSE;
-
-/** Page cleaner LSN age factor formula option */
-UNIV_INTERN ulong srv_cleaner_lsn_age_factor
- = SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT;
-
-/** Empty free list for a query thread handling algorithm option */
-UNIV_INTERN ulong srv_empty_free_list_algorithm
- = SRV_EMPTY_FREE_LIST_BACKOFF;
-
-UNIV_INTERN ulong srv_idle_flush_pct = 100;
-
-/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
-instead. */
-UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
-UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
-UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
-
-/* Switch to enable random read ahead. */
-UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
-
-/* The log block size */
-UNIV_INTERN ulint srv_log_block_size = 0;
-
-/* User settable value of the number of pages that must be present
-in the buffer cache and accessed sequentially for InnoDB to trigger a
-readahead request. */
-UNIV_INTERN ulong srv_read_ahead_threshold = 56;
-
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN bool srv_log_archive_on;
-UNIV_INTERN bool srv_archive_recovery;
-UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/* This parameter is used to throttle the number of insert buffers that are
-merged in a batch. By increasing this parameter on a faster disk you can
-possibly reduce the number of I/O operations performed to complete the
-merge operation. The value of this parameter is used as is by the
-background loop when the system is idle (low load), on a busy system
-the parameter is scaled down by a factor of 4, this is to avoid putting
-a heavier load on the I/O sub system. */
-
-UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
-
-UNIV_INTERN char* srv_file_flush_method_str = NULL;
-UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-
-UNIV_INTERN ulint srv_max_n_open_files = 300;
-
-/* Number of IO operations per second the server can do */
-UNIV_INTERN ulong srv_io_capacity = 200;
-UNIV_INTERN ulong srv_max_io_capacity = 400;
-
-/* The InnoDB main thread tries to keep the ratio of modified pages
-in the buffer pool to all database pages in the buffer pool smaller than
-the following number. But it is not guaranteed that the value stays below
-that during a time of heavy update/insert activity. */
-
-UNIV_INTERN double srv_max_buf_pool_modified_pct = 75.0;
-UNIV_INTERN double srv_max_dirty_pages_pct_lwm = 50.0;
-
-/* This is the percentage of log capacity at which adaptive flushing,
-if enabled, will kick in. */
-UNIV_INTERN double srv_adaptive_flushing_lwm = 10.0;
-
-/* Number of iterations over which adaptive flushing is averaged. */
-UNIV_INTERN ulong srv_flushing_avg_loops = 30;
-
-/* The tid of the cleaner thread */
-UNIV_INTERN os_tid_t srv_cleaner_tid;
-
-/* The tid of the LRU manager thread */
-UNIV_INTERN os_tid_t srv_lru_manager_tid;
-
-/* The tids of the purge threads */
-UNIV_INTERN os_tid_t srv_purge_tids[SRV_MAX_N_PURGE_THREADS];
-
-/* The tids of the I/O threads */
-UNIV_INTERN os_tid_t srv_io_tids[SRV_MAX_N_IO_THREADS];
-
-/* The tid of the master thread */
-UNIV_INTERN os_tid_t srv_master_tid;
-
-/* The relative scheduling priority of the cleaner and LRU manager threads */
-UNIV_INTERN ulint srv_sched_priority_cleaner = 19;
-
-/* The relative scheduling priority of the purge threads */
-UNIV_INTERN ulint srv_sched_priority_purge = 19;
-
-/* The relative scheduling priority of the I/O threads */
-UNIV_INTERN ulint srv_sched_priority_io = 19;
-
-/* The relative scheduling priority of the master thread */
-UNIV_INTERN ulint srv_sched_priority_master = 19;
-
-/* The relative priority of the current thread. If 0, low priority; if 1, high
-priority. */
-UNIV_INTERN UNIV_THREAD_LOCAL ulint srv_current_thread_priority = 0;
-
-/* The relative priority of the purge coordinator and worker threads. */
-UNIV_INTERN my_bool srv_purge_thread_priority = FALSE;
-
-/* The relative priority of the I/O threads. */
-UNIV_INTERN my_bool srv_io_thread_priority = FALSE;
-
-/* The relative priority of the cleaner thread. */
-UNIV_INTERN my_bool srv_cleaner_thread_priority = FALSE;
-
-/* The relative priority of the master thread. */
-UNIV_INTERN my_bool srv_master_thread_priority = FALSE;
-
-/* The number of purge threads to use.*/
-UNIV_INTERN ulong srv_n_purge_threads;
-
-/* the number of pages to purge in one batch */
-UNIV_INTERN ulong srv_purge_batch_size = 20;
-
-/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
-NULL value when collecting statistics. By default, it is set to
-SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
-UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
-
-UNIV_INTERN srv_stats_t srv_stats;
-
-/* structure to pass status variables to MySQL */
-UNIV_INTERN export_var_t export_vars;
-
-/** Normally 0. When nonzero, skip some phases of crash recovery,
-starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
-by SELECT or mysqldump. When this is nonzero, we do not allow any user
-modifications to the data. */
-UNIV_INTERN ulong srv_force_recovery;
-
-/** Print all user-level transactions deadlocks to mysqld stderr */
-
-UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
-
-/* Produce a stacktrace on long semaphore wait */
-UNIV_INTERN my_bool srv_use_stacktrace = FALSE;
-
-/** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
-UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE;
-
-/* If the following is set to 1 then we do not run purge and insert buffer
-merge to completion before shutdown. If it is set to 2, do not even flush the
-buffer pool to data files at the shutdown: we effectively 'crash'
-InnoDB (but lose no committed transactions). */
-UNIV_INTERN ulint srv_fast_shutdown = 0;
-
-/* Generate a innodb_status.<pid> file */
-UNIV_INTERN ibool srv_innodb_status = FALSE;
-
-/* Optimize prefix index queries to skip cluster index lookup when possible */
-/* Enables or disables this prefix optimization. Disabled by default. */
-UNIV_INTERN my_bool srv_prefix_index_cluster_optimization = 0;
-
-/* When estimating number of different key values in an index, sample
-this many index pages, there are 2 ways to calculate statistics:
-* persistent stats that are calculated by ANALYZE TABLE and saved
- in the innodb database.
-* quick transient stats, that are used if persistent stats for the given
- table/index are not found in the innodb database */
-UNIV_INTERN unsigned long long srv_stats_transient_sample_pages = 8;
-UNIV_INTERN my_bool srv_stats_persistent = TRUE;
-UNIV_INTERN my_bool srv_stats_include_delete_marked = FALSE;
-UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20;
-UNIV_INTERN my_bool srv_stats_auto_recalc = TRUE;
-
-/* The number of rows modified before we calculate new statistics (default 0
-= current limits) */
-UNIV_INTERN unsigned long long srv_stats_modified_counter = 0;
-
-/* Enable traditional statistic calculation based on number of configured
-pages default true. */
-UNIV_INTERN my_bool srv_stats_sample_traditional = TRUE;
-
-UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
-
-/** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
-The following parameter is the size of the buffer that is used for
-batch flushing i.e.: LRU flushing and flush_list flushing. The rest
-of the pages are used for single page flushing. */
-UNIV_INTERN ulong srv_doublewrite_batch_size = 120;
-
-UNIV_INTERN ulong srv_replication_delay = 0;
-
-UNIV_INTERN bool srv_apply_log_only;
-
-UNIV_INTERN bool srv_backup_mode;
-UNIV_INTERN bool srv_close_files;
-UNIV_INTERN bool srv_xtrabackup;
-
-UNIV_INTERN ulong srv_pass_corrupt_table = 0; /* 0:disable 1:enable */
-
-UNIV_INTERN ulong srv_log_checksum_algorithm =
- SRV_CHECKSUM_ALGORITHM_INNODB;
-
-/*-------------------------------------------*/
-#ifdef HAVE_MEMORY_BARRIER
-/* No idea to wait long with memory barriers */
-UNIV_INTERN ulong srv_n_spin_wait_rounds = 15;
-#else
-UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
-#endif
-UNIV_INTERN ulong srv_spin_wait_delay = 6;
-UNIV_INTERN ibool srv_priority_boost = TRUE;
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool srv_print_thread_releases = FALSE;
-UNIV_INTERN ibool srv_print_lock_waits = FALSE;
-UNIV_INTERN ibool srv_print_buf_io = FALSE;
-UNIV_INTERN ibool srv_print_log_io = FALSE;
-UNIV_INTERN ibool srv_print_latch_waits = FALSE;
-#endif /* UNIV_DEBUG */
-
-static ulint srv_n_rows_inserted_old = 0;
-static ulint srv_n_rows_updated_old = 0;
-static ulint srv_n_rows_deleted_old = 0;
-static ulint srv_n_rows_read_old = 0;
-static ulint srv_n_system_rows_inserted_old = 0;
-static ulint srv_n_system_rows_updated_old = 0;
-static ulint srv_n_system_rows_deleted_old = 0;
-static ulint srv_n_system_rows_read_old = 0;
-
-UNIV_INTERN ulint srv_truncated_status_writes = 0;
-UNIV_INTERN ulint srv_available_undo_logs = 0;
-
-UNIV_INTERN ib_uint64_t srv_page_compression_saved = 0;
-UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512 = 0;
-UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096 = 0;
-UNIV_INTERN ib_uint64_t srv_index_pages_written = 0;
-UNIV_INTERN ib_uint64_t srv_non_index_pages_written = 0;
-UNIV_INTERN ib_uint64_t srv_pages_page_compressed = 0;
-UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op = 0;
-UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved = 0;
-UNIV_INTERN ib_uint64_t srv_index_page_decompressed = 0;
-
-/* Ensure status variables are on separate cache lines */
-
-#define CACHE_ALIGNED MY_ATTRIBUTE((aligned (CACHE_LINE_SIZE)))
-
-UNIV_INTERN byte
-counters_pad_start[CACHE_LINE_SIZE] MY_ATTRIBUTE((unused)) = {0};
-
-UNIV_INTERN ulint srv_read_views_memory CACHE_ALIGNED = 0;
-UNIV_INTERN ulint srv_descriptors_memory CACHE_ALIGNED = 0;
-
-UNIV_INTERN byte
-counters_pad_end[CACHE_LINE_SIZE] MY_ATTRIBUTE((unused)) = {0};
-
-/* Set the following to 0 if you want InnoDB to write messages on
-stderr on startup/shutdown. */
-UNIV_INTERN ibool srv_print_verbose_log = TRUE;
-UNIV_INTERN my_bool srv_print_innodb_monitor = FALSE;
-UNIV_INTERN my_bool srv_print_innodb_lock_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
-
-/** If this flag is set tables without primary key are not allowed */
-UNIV_INTERN my_bool srv_force_primary_key = FALSE;
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-
-UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
-
-UNIV_INTERN time_t srv_last_monitor_time;
-
-static ib_mutex_t srv_innodb_monitor_mutex;
-
-/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
-UNIV_INTERN ib_mutex_t srv_monitor_file_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-# ifndef HAVE_ATOMIC_BUILTINS
-/* Key to register server_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t server_mutex_key;
-# endif /* !HAVE_ATOMIC_BUILTINS */
-/** Key to register srv_innodb_monitor_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
-/** Key to register srv_monitor_file_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
-/** Key to register srv_dict_tmpfile_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
-/** Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
-/** Key to register srv_sys_t::mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_sys_mutex_key;
-/** Key to register srv_sys_t::tasks_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_sys_tasks_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/** Temporary file for innodb monitor output */
-UNIV_INTERN FILE* srv_monitor_file;
-/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-UNIV_INTERN ib_mutex_t srv_dict_tmpfile_mutex;
-/** Temporary file for output from the data dictionary */
-UNIV_INTERN FILE* srv_dict_tmpfile;
-/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-UNIV_INTERN ib_mutex_t srv_misc_tmpfile_mutex;
-/** Temporary file for miscellanous diagnostic output */
-UNIV_INTERN FILE* srv_misc_tmpfile;
-
-UNIV_INTERN ulint srv_main_thread_process_no = 0;
-UNIV_INTERN ulint srv_main_thread_id = 0;
-
-/* The following counts are used by the srv_master_thread. */
-
-/** Iterations of the loop bounded by 'srv_active' label. */
-static ulint srv_main_active_loops = 0;
-/** Iterations of the loop bounded by the 'srv_idle' label. */
-static ulint srv_main_idle_loops = 0;
-/** Iterations of the loop bounded by the 'srv_shutdown' label. */
-static ulint srv_main_shutdown_loops = 0;
-/** Log writes involving flush. */
-static ulint srv_log_writes_and_flush = 0;
-
-/* This is only ever touched by the master thread. It records the
-time when the last flush of log file has happened. The master
-thread ensures that we flush the log files at least once per
-second. */
-static time_t srv_last_log_flush_time;
-
-/** Enable semaphore request instrumentation */
-UNIV_INTERN my_bool srv_instrument_semaphores = FALSE;
-
-/* Interval in seconds at which various tasks are performed by the
-master thread when server is active. In order to balance the workload,
-we should try to keep intervals such that they are not multiple of
-each other. For example, if we have intervals for various tasks
-defined as 5, 10, 15, 60 then all tasks will be performed when
-current_time % 60 == 0 and no tasks will be performed when
-current_time % 5 != 0. */
-
-# define SRV_MASTER_CHECKPOINT_INTERVAL (7)
-# define SRV_MASTER_PURGE_INTERVAL (10)
-#ifdef MEM_PERIODIC_CHECK
-# define SRV_MASTER_MEM_VALIDATE_INTERVAL (13)
-#endif /* MEM_PERIODIC_CHECK */
-# define SRV_MASTER_DICT_LRU_INTERVAL (47)
-
-/** Buffer pool dump status frequence in percentages */
-UNIV_INTERN ulong srv_buf_dump_status_frequency = 0;
-
-/** Acquire the system_mutex. */
-#define srv_sys_mutex_enter() do { \
- mutex_enter(&srv_sys.mutex); \
-} while (0)
-
-/** Test if the system mutex is owned. */
-#define srv_sys_mutex_own() (mutex_own(&srv_sys.mutex) \
- && !srv_read_only_mode)
-
-/** Release the system mutex. */
-#define srv_sys_mutex_exit() do { \
- mutex_exit(&srv_sys.mutex); \
-} while (0)
-
-#define fetch_lock_wait_timeout(trx) \
- ((trx)->lock.allowed_to_wait \
- ? thd_lock_wait_timeout((trx)->mysql_thd) \
- : 0)
-
-/** Simulate compression failures. */
-UNIV_INTERN uint srv_simulate_comp_failures = 0;
-
-/*
- IMPLEMENTATION OF THE SERVER MAIN PROGRAM
- =========================================
-
-There is the following analogue between this database
-server and an operating system kernel:
-
-DB concept equivalent OS concept
----------- ---------------------
-transaction -- process;
-
-query thread -- thread;
-
-lock -- semaphore;
-
-kernel -- kernel;
-
-query thread execution:
-(a) without lock mutex
-reserved -- process executing in user mode;
-(b) with lock mutex reserved
- -- process executing in kernel mode;
-
-The server has several backgroind threads all running at the same
-priority as user threads. It periodically checks if here is anything
-happening in the server which requires intervention of the master
-thread. Such situations may be, for example, when flushing of dirty
-blocks is needed in the buffer pool or old version of database rows
-have to be cleaned away (purged). The user can configure a separate
-dedicated purge thread(s) too, in which case the master thread does not
-do any purging.
-
-The threads which we call user threads serve the queries of the MySQL
-server. They run at normal priority.
-
-When there is no activity in the system, also the master thread
-suspends itself to wait for an event making the server totally silent.
-
-There is still one complication in our server design. If a
-background utility thread obtains a resource (e.g., mutex) needed by a user
-thread, and there is also some other user activity in the system,
-the user thread may have to wait indefinitely long for the
-resource, as the OS does not schedule a background thread if
-there is some other runnable user thread. This problem is called
-priority inversion in real-time programming.
-
-One solution to the priority inversion problem would be to keep record
-of which thread owns which resource and in the above case boost the
-priority of the background thread so that it will be scheduled and it
-can release the resource. This solution is called priority inheritance
-in real-time programming. A drawback of this solution is that the overhead
-of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
-MHz Pentium, because the thread has to call os_thread_get_curr_id. This may
-be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
-that the thread cannot store the information in the resource , say mutex,
-itself, because competing threads could wipe out the information if it is
-stored before acquiring the mutex, and if it stored afterwards, the
-information is outdated for the time of one machine instruction, at least.
-(To be precise, the information could be stored to lock_word in mutex if
-the machine supports atomic swap.)
-
-The above solution with priority inheritance may become actual in the
-future, currently we do not implement any priority twiddling solution.
-Our general aim is to reduce the contention of all mutexes by making
-them more fine grained.
-
-The thread table contains information of the current status of each
-thread existing in the system, and also the event semaphores used in
-suspending the master thread and utility threads when they have nothing
-to do. The thread table can be seen as an analogue to the process table
-in a traditional Unix implementation. */
-
-/** The server system struct */
-struct srv_sys_t{
- ib_mutex_t tasks_mutex; /*!< variable protecting the
- tasks queue */
- UT_LIST_BASE_NODE_T(que_thr_t)
- tasks; /*!< task queue */
-
- ib_mutex_t mutex; /*!< variable protecting the
- fields below. */
- ulint n_sys_threads; /*!< size of the sys_threads
- array */
-
- srv_slot_t sys_threads[32 + 1]; /*!< server thread table;
- os_event_set() and
- os_event_reset() on
- sys_threads[]->event are
- covered by srv_sys_t::mutex */
-
- ulint n_threads_active[SRV_MASTER + 1];
- /*!< number of threads active
- in a thread class */
-
- srv_stats_t::ulint_ctr_1_t
- activity_count; /*!< For tracking server
- activity */
- srv_stats_t::ulint_ctr_1_t
- ibuf_merge_activity_count;/*!< For tracking change
- buffer merge activity, a subset
- of overall server activity */
-};
-
-#ifndef HAVE_ATOMIC_BUILTINS
-/** Mutex protecting some server global variables. */
-UNIV_INTERN ib_mutex_t server_mutex;
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
-static srv_sys_t srv_sys;
-
-/** Event to signal srv_monitor_thread. Not protected by a mutex.
-Set after setting srv_print_innodb_monitor. */
-UNIV_INTERN os_event_t srv_monitor_event;
-
-/** Event to signal the shutdown of srv_error_monitor_thread.
-Not protected by a mutex. */
-UNIV_INTERN os_event_t srv_error_event;
-
-/** Event for waking up buf_dump_thread. Not protected by a mutex.
-Set on shutdown or by buf_dump_start() or buf_load_start(). */
-UNIV_INTERN os_event_t srv_buf_dump_event;
-
-/** The buffer pool dump/load file name */
-UNIV_INTERN char* srv_buf_dump_filename;
-
-/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
-and/or load it during startup. */
-UNIV_INTERN char srv_buffer_pool_dump_at_shutdown = FALSE;
-UNIV_INTERN char srv_buffer_pool_load_at_startup = FALSE;
-
-/** Slot index in the srv_sys.sys_threads array for the purge thread. */
-static const ulint SRV_PURGE_SLOT = 1;
-
-/** Slot index in the srv_sys.sys_threads array for the master thread. */
-static const ulint SRV_MASTER_SLOT = 0;
-
-UNIV_INTERN os_event_t srv_checkpoint_completed_event;
-
-UNIV_INTERN os_event_t srv_redo_log_tracked_event;
-
-/** Whether the redo log tracker thread has been started. Does not take into
-account whether the tracking is currently enabled (see srv_track_changed_pages
-for that) */
-UNIV_INTERN bool srv_redo_log_thread_started = false;
-
-/*********************************************************************//**
-Prints counters for work done by srv_master_thread. */
-static
-void
-srv_print_master_thread_info(
-/*=========================*/
- FILE *file) /* in: output stream */
-{
- fprintf(file, "srv_master_thread loops: %lu srv_active, "
- "%lu srv_shutdown, %lu srv_idle\n",
- srv_main_active_loops,
- srv_main_shutdown_loops,
- srv_main_idle_loops);
- fprintf(file, "srv_master_thread log flush and writes: %lu\n",
- srv_log_writes_and_flush);
-}
-
-/*********************************************************************//**
-Sets the info describing an i/o thread current state. */
-UNIV_INTERN
-void
-srv_set_io_thread_op_info(
-/*======================*/
- ulint i, /*!< in: the 'segment' of the i/o thread */
- const char* str) /*!< in: constant char string describing the
- state */
-{
- ut_a(i < SRV_MAX_N_IO_THREADS);
-
- srv_io_thread_op_info[i] = str;
-}
-
-/*********************************************************************//**
-Resets the info describing an i/o thread current state. */
-UNIV_INTERN
-void
-srv_reset_io_thread_op_info()
-/*=========================*/
-{
- for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
- srv_io_thread_op_info[i] = "not started yet";
- }
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates the type of a thread table slot.
-@return TRUE if ok */
-static
-ibool
-srv_thread_type_validate(
-/*=====================*/
- srv_thread_type type) /*!< in: thread type */
-{
- switch (type) {
- case SRV_NONE:
- break;
- case SRV_WORKER:
- case SRV_PURGE:
- case SRV_MASTER:
- return(TRUE);
- }
- ut_error;
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Gets the type of a thread table slot.
-@return thread type */
-static
-srv_thread_type
-srv_slot_get_type(
-/*==============*/
- const srv_slot_t* slot) /*!< in: thread slot */
-{
- srv_thread_type type = slot->type;
- ut_ad(srv_thread_type_validate(type));
- return(type);
-}
-
-/*********************************************************************//**
-Reserves a slot in the thread table for the current thread.
-@return reserved slot */
-static
-srv_slot_t*
-srv_reserve_slot(
-/*=============*/
- srv_thread_type type) /*!< in: type of the thread */
-{
- srv_slot_t* slot = 0;
-
- srv_sys_mutex_enter();
-
- ut_ad(srv_thread_type_validate(type));
-
- switch (type) {
- case SRV_MASTER:
- slot = &srv_sys.sys_threads[SRV_MASTER_SLOT];
- break;
-
- case SRV_PURGE:
- slot = &srv_sys.sys_threads[SRV_PURGE_SLOT];
- break;
-
- case SRV_WORKER:
- /* Find an empty slot, skip the master and purge slots. */
- for (slot = &srv_sys.sys_threads[2];
- slot->in_use;
- ++slot) {
-
- ut_a(slot < &srv_sys.sys_threads[
- srv_sys.n_sys_threads]);
- }
- break;
-
- case SRV_NONE:
- ut_error;
- }
-
- ut_a(!slot->in_use);
-
- slot->in_use = TRUE;
- slot->suspended = FALSE;
- slot->type = type;
-
- ut_ad(srv_slot_get_type(slot) == type);
-
- ++srv_sys.n_threads_active[type];
-
- srv_sys_mutex_exit();
-
- return(slot);
-}
-
-/*********************************************************************//**
-Suspends the calling thread to wait for the event in its thread slot.
-@return the current signal count of the event. */
-static
-ib_int64_t
-srv_suspend_thread_low(
-/*===================*/
- srv_slot_t* slot) /*!< in/out: thread slot */
-{
- ut_ad(!srv_read_only_mode);
- ut_ad(srv_sys_mutex_own());
-
- ut_ad(slot->in_use);
-
- srv_thread_type type = srv_slot_get_type(slot);
-
- switch (type) {
- case SRV_NONE:
- ut_error;
-
- case SRV_MASTER:
- /* We have only one master thread and it
- should be the first entry always. */
- ut_a(srv_sys.n_threads_active[type] == 1);
- break;
-
- case SRV_PURGE:
- /* We have only one purge coordinator thread
- and it should be the second entry always. */
- ut_a(srv_sys.n_threads_active[type] == 1);
- break;
-
- case SRV_WORKER:
- ut_a(srv_n_purge_threads > 1);
- ut_a(srv_sys.n_threads_active[type] > 0);
- break;
- }
-
- ut_a(!slot->suspended);
- slot->suspended = TRUE;
-
- ut_a(srv_sys.n_threads_active[type] > 0);
-
- srv_sys.n_threads_active[type]--;
-
- return(os_event_reset(slot->event));
-}
-
-/*********************************************************************//**
-Suspends the calling thread to wait for the event in its thread slot.
-@return the current signal count of the event. */
-static
-ib_int64_t
-srv_suspend_thread(
-/*===============*/
- srv_slot_t* slot) /*!< in/out: thread slot */
-{
- srv_sys_mutex_enter();
-
- ib_int64_t sig_count = srv_suspend_thread_low(slot);
-
- srv_sys_mutex_exit();
-
- return(sig_count);
-}
-
-/** Resume the calling thread.
-@param[in,out] slot thread slot
-@param[in] sig_count signal count (if wait)
-@param[in] wait whether to wait for the event
-@param[in] timeout_usec timeout in microseconds (0=infinite)
-@return whether the wait timed out */
-static
-bool
-srv_resume_thread(srv_slot_t* slot, ib_int64_t sig_count = 0, bool wait = true,
- ulint timeout_usec = 0)
-{
- bool timeout;
-
- ut_ad(!srv_read_only_mode);
- ut_ad(slot->in_use);
- ut_ad(slot->suspended);
-
- if (!wait) {
- timeout = false;
- } else if (timeout_usec) {
- timeout = OS_SYNC_TIME_EXCEEDED == os_event_wait_time_low(
- slot->event, timeout_usec, sig_count);
- } else {
- timeout = false;
- os_event_wait_low(slot->event, sig_count);
- }
-
- srv_sys_mutex_enter();
- ut_ad(slot->in_use);
- ut_ad(slot->suspended);
-
- slot->suspended = FALSE;
- ++srv_sys.n_threads_active[slot->type];
- srv_sys_mutex_exit();
- return(timeout);
-}
-
-/** Ensure that a given number of threads of the type given are running
-(or are already terminated).
-@param[in] type thread type
-@param[in] n number of threads that have to run */
-void
-srv_release_threads(enum srv_thread_type type, ulint n)
-{
- ulint running;
-
- ut_ad(srv_thread_type_validate(type));
- ut_ad(n > 0);
-
- do {
- running = 0;
-
- srv_sys_mutex_enter();
-
- for (ulint i = 0; i < srv_sys.n_sys_threads; i++) {
- srv_slot_t* slot = &srv_sys.sys_threads[i];
-
- if (!slot->in_use || srv_slot_get_type(slot) != type) {
- continue;
- } else if (!slot->suspended) {
- if (++running >= n) {
- break;
- }
- continue;
- }
-
- switch (type) {
- case SRV_NONE:
- ut_error;
-
- case SRV_MASTER:
- /* We have only one master thread and it
- should be the first entry always. */
- ut_a(n == 1);
- ut_a(i == SRV_MASTER_SLOT);
- ut_a(srv_sys.n_threads_active[type] == 0);
- break;
-
- case SRV_PURGE:
- /* We have only one purge coordinator thread
- and it should be the second entry always. */
- ut_a(n == 1);
- ut_a(i == SRV_PURGE_SLOT);
- ut_a(srv_n_purge_threads > 0);
- ut_a(srv_sys.n_threads_active[type] == 0);
- break;
-
- case SRV_WORKER:
- ut_a(srv_n_purge_threads > 1);
- ut_a(srv_sys.n_threads_active[type]
- < srv_n_purge_threads - 1);
- break;
- }
-
- os_event_set(slot->event);
- }
-
- srv_sys_mutex_exit();
- } while (running && running < n);
-}
-
-/*********************************************************************//**
-Release a thread's slot. */
-static
-void
-srv_free_slot(
-/*==========*/
- srv_slot_t* slot) /*!< in/out: thread slot */
-{
- srv_sys_mutex_enter();
-
- /* Mark the thread as inactive. */
- srv_suspend_thread_low(slot);
- /* Free the slot for reuse. */
- ut_ad(slot->in_use);
- slot->in_use = FALSE;
-
- srv_sys_mutex_exit();
-}
-
-/*********************************************************************//**
-Initializes the server. */
-UNIV_INTERN
-void
-srv_init(void)
-/*==========*/
-{
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
- mutex_create(srv_innodb_monitor_mutex_key,
- &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
-
- srv_sys.n_sys_threads = srv_read_only_mode
- ? 0
- : srv_n_purge_threads + 1/* purge coordinator */;
-
- if (!srv_read_only_mode) {
-
- mutex_create(srv_sys_mutex_key, &srv_sys.mutex, SYNC_THREADS);
-
- mutex_create(srv_sys_tasks_mutex_key,
- &srv_sys.tasks_mutex, SYNC_ANY_LATCH);
-
- for (ulint i = 0; i < srv_sys.n_sys_threads; ++i) {
- srv_slot_t* slot = &srv_sys.sys_threads[i];
-
- slot->event = os_event_create();
-
- ut_a(slot->event);
- }
-
- srv_error_event = os_event_create();
-
- srv_monitor_event = os_event_create();
-
- srv_buf_dump_event = os_event_create();
-
- srv_checkpoint_completed_event = os_event_create();
-
- srv_redo_log_tracked_event = os_event_create();
-
- if (srv_track_changed_pages) {
- os_event_set(srv_redo_log_tracked_event);
- }
- }
-
- /* page_zip_stat_per_index_mutex is acquired from:
- 1. page_zip_compress() (after SYNC_FSP)
- 2. page_zip_decompress()
- 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
- 4. innodb_cmp_per_index_update(), no other latches
- since we do not acquire any other latches while holding this mutex,
- it can have very low level. We pick SYNC_ANY_LATCH for it. */
-
- mutex_create(
- page_zip_stat_per_index_mutex_key,
- &page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
-
- /* Create dummy indexes for infimum and supremum records */
-
- dict_ind_init();
-
- srv_conc_init();
-#ifdef WITH_INNODB_DISALLOW_WRITES
- /* Writes have to be enabled on init or else we hang. Thus, we
- always set the event here regardless of innobase_disallow_writes.
- That flag will always be 0 at this point because it isn't settable
- via my.cnf or command line arg. */
- srv_allow_writes_event = os_event_create();
- os_event_set(srv_allow_writes_event);
-#endif /* WITH_INNODB_DISALLOW_WRITES */
-
- /* Initialize some INFORMATION SCHEMA internal structures */
- trx_i_s_cache_init(trx_i_s_cache);
-
- ut_crc32_init();
-
- dict_mem_init();
-}
-
-/*********************************************************************//**
-Frees the data structures created in srv_init(). */
-UNIV_INTERN
-void
-srv_free(void)
-/*==========*/
-{
- srv_conc_free();
-
- if (!srv_read_only_mode) {
-
- for (ulint i = 0; i < srv_sys.n_sys_threads; i++)
- os_event_free(srv_sys.sys_threads[i].event);
-
- os_event_free(srv_error_event);
- srv_error_event = NULL;
- os_event_free(srv_monitor_event);
- srv_monitor_event = NULL;
- os_event_free(srv_buf_dump_event);
- srv_buf_dump_event = NULL;
- os_event_free(srv_checkpoint_completed_event);
- srv_checkpoint_completed_event = NULL;
- os_event_free(srv_redo_log_tracked_event);
- srv_redo_log_tracked_event = NULL;
- mutex_free(&srv_sys.mutex);
- mutex_free(&srv_sys.tasks_mutex);
- }
-
-#ifdef WITH_INNODB_DISALLOW_WRITES
- os_event_free(srv_allow_writes_event);
- srv_allow_writes_event = NULL;
-#endif /* WITH_INNODB_DISALLOW_WRITES */
-
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_free(&server_mutex);
-#endif
- mutex_free(&srv_innodb_monitor_mutex);
- mutex_free(&page_zip_stat_per_index_mutex);
-
- trx_i_s_cache_free(trx_i_s_cache);
-
- /* This is needed for Mariabackup. */
- memset(&srv_sys, 0, sizeof srv_sys);
-}
-
-/*********************************************************************//**
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-UNIV_INTERN
-void
-srv_general_init(void)
-/*==================*/
-{
- ut_mem_init();
- /* Reset the system variables in the recovery module. */
- recv_sys_var_init();
- os_sync_init();
- sync_init();
- mem_init(srv_mem_pool_size);
- que_init();
- row_mysql_init();
-}
-
-/*********************************************************************//**
-Normalizes init parameter values to use units we use inside InnoDB. */
-static
-void
-srv_normalize_init_values(void)
-/*===========================*/
-{
- ulint n;
- ulint i;
-
- n = srv_n_data_files;
-
- for (i = 0; i < n; i++) {
- srv_data_file_sizes[i] = srv_data_file_sizes[i]
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
- }
-
- srv_last_file_size_max = srv_last_file_size_max
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
-
- srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
-
- srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
-}
-
-/*********************************************************************//**
-Boots the InnoDB server. */
-UNIV_INTERN
-void
-srv_boot(void)
-/*==========*/
-{
- /* Transform the init parameter values given by MySQL to
- use units we use inside InnoDB: */
-
- srv_normalize_init_values();
-
- /* Initialize synchronization primitives, memory management, and thread
- local storage */
-
- srv_general_init();
-
- /* Initialize this module */
-
- srv_init();
- srv_mon_create();
-}
-
-/******************************************************************//**
-Refreshes the values used to calculate per-second averages. */
-static
-void
-srv_refresh_innodb_monitor_stats(void)
-/*==================================*/
-{
- mutex_enter(&srv_innodb_monitor_mutex);
-
- srv_last_monitor_time = time(NULL);
-
- os_aio_refresh_stats();
-
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- log_refresh_stats();
-
- buf_refresh_io_stats_all();
-
- srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
- srv_n_rows_updated_old = srv_stats.n_rows_updated;
- srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
- srv_n_rows_read_old = srv_stats.n_rows_read;
-
- srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted;
- srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated;
- srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted;
- srv_n_system_rows_read_old = srv_stats.n_system_rows_read;
-
- mutex_exit(&srv_innodb_monitor_mutex);
-}
-
-/******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor.
-@return FALSE if not all information printed
-due to failure to obtain necessary mutex */
-UNIV_INTERN
-ibool
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file, /*!< in: output stream */
- ibool nowait, /*!< in: whether to wait for the
- lock_sys_t:: mutex */
- ulint* trx_start_pos, /*!< out: file position of the start of
- the list of active transactions */
- ulint* trx_end) /*!< out: file position of the end of
- the list of active transactions */
-{
- double time_elapsed;
- time_t current_time;
- ulint n_reserved;
- ibool ret;
-
- ulong btr_search_sys_constant;
- ulong btr_search_sys_variable;
- ulint lock_sys_subtotal;
- ulint recv_sys_subtotal;
-
- ulint i;
- trx_t* trx;
-
- mutex_enter(&srv_innodb_monitor_mutex);
-
- current_time = time(NULL);
-
- /* We add 0.001 seconds to time_elapsed to prevent division
- by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
- same time */
-
- time_elapsed = difftime(current_time, srv_last_monitor_time)
- + 0.001;
-
- srv_last_monitor_time = time(NULL);
-
- fputs("\n=====================================\n", file);
-
- ut_print_timestamp(file);
- fprintf(file,
- " INNODB MONITOR OUTPUT\n"
- "=====================================\n"
- "Per second averages calculated from the last %lu seconds\n",
- (ulong) time_elapsed);
-
- fputs("-----------------\n"
- "BACKGROUND THREAD\n"
- "-----------------\n", file);
- srv_print_master_thread_info(file);
-
- fputs("----------\n"
- "SEMAPHORES\n"
- "----------\n", file);
- sync_print(file);
-
- /* Conceptually, srv_innodb_monitor_mutex has a very high latching
- order level in sync0sync.h, while dict_foreign_err_mutex has a very
- low level 135. Therefore we can reserve the latter mutex here without
- a danger of a deadlock of threads. */
-
- if (!recv_recovery_on) {
-
- mutex_enter(&dict_foreign_err_mutex);
-
- if (!srv_read_only_mode
- && ftell(dict_foreign_err_file) != 0L) {
- fputs("------------------------\n"
- "LATEST FOREIGN KEY ERROR\n"
- "------------------------\n", file);
- ut_copy_file(file, dict_foreign_err_file);
- }
-
- mutex_exit(&dict_foreign_err_mutex);
- }
-
- /* Only if lock_print_info_summary proceeds correctly,
- before we call the lock_print_info_all_transactions
- to print all the lock information. IMPORTANT NOTE: This
- function acquires the lock mutex on success. */
- ret = recv_recovery_on ? FALSE : lock_print_info_summary(file, nowait);
-
- if (ret) {
- if (trx_start_pos) {
- long t = ftell(file);
- if (t < 0) {
- *trx_start_pos = ULINT_UNDEFINED;
- } else {
- *trx_start_pos = (ulint) t;
- }
- }
-
- /* NOTE: If we get here then we have the lock mutex. This
- function will release the lock mutex that we acquired when
- we called the lock_print_info_summary() function earlier. */
-
- lock_print_info_all_transactions(file);
-
- if (trx_end) {
- long t = ftell(file);
- if (t < 0) {
- *trx_end = ULINT_UNDEFINED;
- } else {
- *trx_end = (ulint) t;
- }
- }
- }
-
- fputs("--------\n"
- "FILE I/O\n"
- "--------\n", file);
- os_aio_print(file);
-
- if (!recv_recovery_on) {
-
- fputs("-------------------------------------\n"
- "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
- "-------------------------------------\n", file);
- ibuf_print(file);
- }
-
-
- fprintf(file,
- "%.2f hash searches/s, %.2f non-hash searches/s\n",
- (btr_cur_n_sea - btr_cur_n_sea_old)
- / time_elapsed,
- (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
- / time_elapsed);
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- if (!recv_recovery_on) {
-
- fputs("---\n"
- "LOG\n"
- "---\n", file);
- log_print(file);
- }
-
- fputs("----------------------\n"
- "BUFFER POOL AND MEMORY\n"
- "----------------------\n", file);
- fprintf(file,
- "Total memory allocated " ULINTPF
- "; in additional pool allocated " ULINTPF "\n",
- ut_total_allocated_memory,
- mem_pool_get_reserved(mem_comm_pool));
-
- fprintf(file,
- "Total memory allocated by read views " ULINTPF "\n",
- os_atomic_increment_ulint(&srv_read_views_memory, 0));
-
- /* Calculate AHI constant and variable memory allocations */
-
- btr_search_sys_constant = 0;
- btr_search_sys_variable = 0;
-
- ut_ad(btr_search_sys->hash_tables);
-
- for (i = 0; i < btr_search_index_num; i++) {
- hash_table_t* ht = btr_search_sys->hash_tables[i];
-
- ut_ad(ht);
- ut_ad(ht->heap);
-
- /* Multiple mutexes/heaps are currently never used for adaptive
- hash index tables. */
- ut_ad(!ht->n_sync_obj);
- ut_ad(!ht->heaps);
-
- btr_search_sys_variable += mem_heap_get_size(ht->heap);
- btr_search_sys_constant += ht->n_cells * sizeof(hash_cell_t);
- }
-
- lock_sys_subtotal = 0;
- if (trx_sys) {
- mutex_enter(&trx_sys->mutex);
- trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
- while (trx) {
- lock_sys_subtotal
- += ((trx->lock.lock_heap)
- ? mem_heap_get_size(trx->lock.lock_heap)
- : 0);
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
- }
- mutex_exit(&trx_sys->mutex);
- }
-
- recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
- ? mem_heap_get_size(recv_sys->heap) : 0);
-
- fprintf(file,
- "Internal hash tables (constant factor + variable factor)\n"
- " Adaptive hash index %lu \t(%lu + " ULINTPF ")\n"
- " Page hash %lu (buffer pool 0 only)\n"
- " Dictionary cache %lu \t(%lu + " ULINTPF ")\n"
- " File system %lu \t(%lu + " ULINTPF ")\n"
- " Lock system %lu \t(%lu + " ULINTPF ")\n"
- " Recovery system %lu \t(%lu + " ULINTPF ")\n",
-
- btr_search_sys_constant + btr_search_sys_variable,
- btr_search_sys_constant,
- btr_search_sys_variable,
-
- (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
-
- (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
- + dict_sys->table_id_hash->n_cells
- ) * sizeof(hash_cell_t)
- + dict_sys->size) : 0),
- (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
- + dict_sys->table_id_hash->n_cells
- ) * sizeof(hash_cell_t)) : 0),
- dict_sys ? (dict_sys->size) : 0,
-
- (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
- + fil_system_hash_nodes()),
- (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
- fil_system_hash_nodes(),
-
- (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
- + lock_sys_subtotal),
- (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
- lock_sys_subtotal,
-
- (ulong) (((recv_sys && recv_sys->addr_hash)
- ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
- + recv_sys_subtotal),
- (ulong) ((recv_sys && recv_sys->addr_hash)
- ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
- recv_sys_subtotal);
-
-
- fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
- dict_sys ? dict_sys->size : 0);
-
- buf_print_io(file);
-
- fputs("--------------\n"
- "ROW OPERATIONS\n"
- "--------------\n", file);
- fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
- (long) srv_conc_get_active_threads(),
- srv_conc_get_waiting_threads());
-
- mutex_enter(&trx_sys->mutex);
-
- fprintf(file, "%lu read views open inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->view_list));
-
- fprintf(file, "%lu RW transactions active inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->rw_trx_list));
-
- fprintf(file, "%lu RO transactions active inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->ro_trx_list));
-
- fprintf(file, "%lu out of %lu descriptors used\n",
- trx_sys->descr_n_used, trx_sys->descr_n_max);
-
- if (UT_LIST_GET_LEN(trx_sys->view_list)) {
- read_view_t* view = UT_LIST_GET_LAST(trx_sys->view_list);
-
- if (view) {
- fprintf(file, "---OLDEST VIEW---\n");
- read_view_print(file, view);
- fprintf(file, "-----------------\n");
- }
- }
-
- mutex_exit(&trx_sys->mutex);
-
- n_reserved = fil_space_get_n_reserved_extents(0);
- if (n_reserved > 0) {
- fprintf(file,
- "%lu tablespace extents now reserved for"
- " B-tree split operations\n",
- (ulong) n_reserved);
- }
-
-#ifdef UNIV_LINUX
- fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
- (ulong) srv_main_thread_process_no,
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#else
- fprintf(file, "Main thread id %lu, state: %s\n",
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#endif
- fprintf(file,
- "Number of rows inserted " ULINTPF
- ", updated " ULINTPF ", deleted " ULINTPF
- ", read " ULINTPF "\n",
- (ulint) srv_stats.n_rows_inserted,
- (ulint) srv_stats.n_rows_updated,
- (ulint) srv_stats.n_rows_deleted,
- (ulint) srv_stats.n_rows_read);
- fprintf(file,
- "%.2f inserts/s, %.2f updates/s,"
- " %.2f deletes/s, %.2f reads/s\n",
- ((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
- / time_elapsed,
- ((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
- / time_elapsed,
- ((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
- / time_elapsed,
- ((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
- / time_elapsed);
- fprintf(file,
- "Number of system rows inserted " ULINTPF
- ", updated " ULINTPF ", deleted " ULINTPF
- ", read " ULINTPF "\n",
- (ulint) srv_stats.n_system_rows_inserted,
- (ulint) srv_stats.n_system_rows_updated,
- (ulint) srv_stats.n_system_rows_deleted,
- (ulint) srv_stats.n_system_rows_read);
- fprintf(file,
- "%.2f inserts/s, %.2f updates/s,"
- " %.2f deletes/s, %.2f reads/s\n",
- ((ulint) srv_stats.n_system_rows_inserted
- - srv_n_system_rows_inserted_old) / time_elapsed,
- ((ulint) srv_stats.n_system_rows_updated
- - srv_n_system_rows_updated_old) / time_elapsed,
- ((ulint) srv_stats.n_system_rows_deleted
- - srv_n_system_rows_deleted_old) / time_elapsed,
- ((ulint) srv_stats.n_system_rows_read
- - srv_n_system_rows_read_old) / time_elapsed);
- srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
- srv_n_rows_updated_old = srv_stats.n_rows_updated;
- srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
- srv_n_rows_read_old = srv_stats.n_rows_read;
- srv_n_system_rows_inserted_old = srv_stats.n_system_rows_inserted;
- srv_n_system_rows_updated_old = srv_stats.n_system_rows_updated;
- srv_n_system_rows_deleted_old = srv_stats.n_system_rows_deleted;
- srv_n_system_rows_read_old = srv_stats.n_system_rows_read;
-
- fputs("----------------------------\n"
- "END OF INNODB MONITOR OUTPUT\n"
- "============================\n", file);
- mutex_exit(&srv_innodb_monitor_mutex);
- fflush(file);
-
-#ifndef DBUG_OFF
- srv_debug_monitor_printed = true;
-#endif
-
- return(ret);
-}
-
-/******************************************************************//**
-Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
-void
-srv_export_innodb_status(void)
-/*==========================*/
-{
- buf_pool_stat_t stat;
- buf_pools_list_size_t buf_pools_list_size;
- ulint LRU_len;
- ulint free_len;
- ulint flush_list_len;
- ulint mem_adaptive_hash, mem_dictionary;
- read_view_t* oldest_view;
- ulint i;
- fil_crypt_stat_t crypt_stat;
- btr_scrub_stat_t scrub_stat;
-
- buf_get_total_stat(&stat);
- buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
- buf_get_total_list_size_in_bytes(&buf_pools_list_size);
- if (!srv_read_only_mode) {
- fil_crypt_total_stat(&crypt_stat);
- btr_scrub_total_stat(&scrub_stat);
- }
-
- mem_adaptive_hash = 0;
-
- ut_ad(btr_search_sys->hash_tables);
-
- for (i = 0; i < btr_search_index_num; i++) {
- hash_table_t* ht = btr_search_sys->hash_tables[i];
-
- ut_ad(ht);
- ut_ad(ht->heap);
- /* Multiple mutexes/heaps are currently never used for adaptive
- hash index tables. */
- ut_ad(!ht->n_sync_obj);
- ut_ad(!ht->heaps);
-
- mem_adaptive_hash += mem_heap_get_size(ht->heap);
- mem_adaptive_hash += ht->n_cells * sizeof(hash_cell_t);
- }
-
- mem_dictionary = (dict_sys ? ((dict_sys->table_hash->n_cells
- + dict_sys->table_id_hash->n_cells
- ) * sizeof(hash_cell_t)
- + dict_sys->size) : 0);
-
- mutex_enter(&srv_innodb_monitor_mutex);
-
- export_vars.innodb_data_pending_reads =
- ulint(MONITOR_VALUE(MONITOR_OS_PENDING_READS));
-
- export_vars.innodb_data_pending_writes =
- ulint(MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
-
- export_vars.innodb_data_pending_fsyncs =
- fil_n_pending_log_flushes
- + fil_n_pending_tablespace_flushes;
- export_vars.innodb_adaptive_hash_hash_searches
- = btr_cur_n_sea;
- export_vars.innodb_adaptive_hash_non_hash_searches
- = btr_cur_n_non_sea;
- export_vars.innodb_background_log_sync
- = srv_log_writes_and_flush;
-
- export_vars.innodb_data_fsyncs = os_n_fsyncs;
-
- export_vars.innodb_data_read = srv_stats.data_read;
-
- export_vars.innodb_data_reads = os_n_file_reads;
-
- export_vars.innodb_data_writes = os_n_file_writes;
-
- export_vars.innodb_data_written = srv_stats.data_written;
-
- export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
-
- export_vars.innodb_buffer_pool_write_requests =
- srv_stats.buf_pool_write_requests;
-
- export_vars.innodb_buffer_pool_wait_free =
- srv_stats.buf_pool_wait_free;
-
- export_vars.innodb_buffer_pool_pages_flushed =
- srv_stats.buf_pool_flushed;
-
- export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
-
- export_vars.innodb_buffer_pool_read_ahead_rnd =
- stat.n_ra_pages_read_rnd;
-
- export_vars.innodb_buffer_pool_read_ahead =
- stat.n_ra_pages_read;
-
- export_vars.innodb_buffer_pool_read_ahead_evicted =
- stat.n_ra_pages_evicted;
-
- export_vars.innodb_buffer_pool_pages_LRU_flushed =
- stat.buf_lru_flush_page_count;
-
- export_vars.innodb_buffer_pool_pages_data = LRU_len;
-
- export_vars.innodb_buffer_pool_bytes_data =
- buf_pools_list_size.LRU_bytes
- + buf_pools_list_size.unzip_LRU_bytes;
-
- export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
-
- export_vars.innodb_buffer_pool_bytes_dirty =
- buf_pools_list_size.flush_list_bytes;
-
- export_vars.innodb_buffer_pool_pages_free = free_len;
-
- export_vars.innodb_deadlocks = srv_stats.lock_deadlock_count;
-
-#ifdef UNIV_DEBUG
- export_vars.innodb_buffer_pool_pages_latched =
- buf_get_latched_pages_number();
-#endif /* UNIV_DEBUG */
- export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
-
- export_vars.innodb_buffer_pool_pages_misc =
- buf_pool_get_n_pages() - LRU_len - free_len;
-
- export_vars.innodb_buffer_pool_pages_made_young
- = stat.n_pages_made_young;
- export_vars.innodb_buffer_pool_pages_made_not_young
- = stat.n_pages_not_made_young;
- export_vars.innodb_buffer_pool_pages_old = 0;
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool = buf_pool_from_array(i);
- export_vars.innodb_buffer_pool_pages_old
- += buf_pool->LRU_old_len;
- }
- export_vars.innodb_checkpoint_age
- = (log_sys->lsn - log_sys->last_checkpoint_lsn);
- export_vars.innodb_checkpoint_max_age
- = log_sys->max_checkpoint_age;
- export_vars.innodb_history_list_length
- = trx_sys->rseg_history_len;
- ibuf_export_ibuf_status(
- &export_vars.innodb_ibuf_size,
- &export_vars.innodb_ibuf_free_list,
- &export_vars.innodb_ibuf_segment_size,
- &export_vars.innodb_ibuf_merges,
- &export_vars.innodb_ibuf_merged_inserts,
- &export_vars.innodb_ibuf_merged_delete_marks,
- &export_vars.innodb_ibuf_merged_deletes,
- &export_vars.innodb_ibuf_discarded_inserts,
- &export_vars.innodb_ibuf_discarded_delete_marks,
- &export_vars.innodb_ibuf_discarded_deletes);
- export_vars.innodb_lsn_current
- = log_sys->lsn;
- export_vars.innodb_lsn_flushed
- = log_sys->flushed_to_disk_lsn;
- export_vars.innodb_lsn_last_checkpoint
- = log_sys->last_checkpoint_lsn;
- export_vars.innodb_master_thread_active_loops
- = srv_main_active_loops;
- export_vars.innodb_master_thread_idle_loops
- = srv_main_idle_loops;
- export_vars.innodb_max_trx_id
- = trx_sys->max_trx_id;
- export_vars.innodb_mem_adaptive_hash
- = mem_adaptive_hash;
- export_vars.innodb_mem_dictionary
- = mem_dictionary;
- export_vars.innodb_mem_total
- = ut_total_allocated_memory;
- export_vars.innodb_mutex_os_waits
- = mutex_os_wait_count;
- export_vars.innodb_mutex_spin_rounds
- = mutex_spin_round_count;
- export_vars.innodb_mutex_spin_waits
- = mutex_spin_wait_count;
- export_vars.innodb_s_lock_os_waits
- = rw_lock_stats.rw_s_os_wait_count;
- export_vars.innodb_s_lock_spin_rounds
- = rw_lock_stats.rw_s_spin_round_count;
- export_vars.innodb_s_lock_spin_waits
- = rw_lock_stats.rw_s_spin_wait_count;
- export_vars.innodb_x_lock_os_waits
- = rw_lock_stats.rw_x_os_wait_count;
- export_vars.innodb_x_lock_spin_rounds
- = rw_lock_stats.rw_x_spin_round_count;
- export_vars.innodb_x_lock_spin_waits
- = rw_lock_stats.rw_x_spin_wait_count;
-
- oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
- export_vars.innodb_oldest_view_low_limit_trx_id
- = oldest_view ? oldest_view->low_limit_id : 0;
-
- export_vars.innodb_purge_trx_id = purge_sys->limit.trx_no;
- export_vars.innodb_purge_undo_no = purge_sys->limit.undo_no;
- export_vars.innodb_current_row_locks
- = lock_sys->rec_num;
-
-#ifdef HAVE_ATOMIC_BUILTINS
- export_vars.innodb_have_atomic_builtins = 1;
-#else
- export_vars.innodb_have_atomic_builtins = 0;
-#endif
- export_vars.innodb_page_size = UNIV_PAGE_SIZE;
-
- export_vars.innodb_log_waits = srv_stats.log_waits;
-
- export_vars.innodb_os_log_written = srv_stats.os_log_written;
-
- export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
-
- export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
-
- export_vars.innodb_os_log_pending_writes =
- srv_stats.os_log_pending_writes;
-
- export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
-
- export_vars.innodb_log_writes = srv_stats.log_writes;
-
- export_vars.innodb_dblwr_pages_written =
- srv_stats.dblwr_pages_written;
-
- export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
-
- export_vars.innodb_pages_created = stat.n_pages_created;
-
- export_vars.innodb_pages_read = stat.n_pages_read;
- export_vars.innodb_page0_read = srv_stats.page0_read;
-
- export_vars.innodb_pages_written = stat.n_pages_written;
-
- export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
-
- export_vars.innodb_row_lock_current_waits =
- srv_stats.n_lock_wait_current_count;
-
- export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
-
- if (srv_stats.n_lock_wait_count > 0) {
-
- export_vars.innodb_row_lock_time_avg = (ulint)
- (srv_stats.n_lock_wait_time
- / 1000 / srv_stats.n_lock_wait_count);
-
- } else {
- export_vars.innodb_row_lock_time_avg = 0;
- }
-
- export_vars.innodb_row_lock_time_max =
- lock_sys->n_lock_max_wait_time / 1000;
-
- export_vars.innodb_rows_read = srv_stats.n_rows_read;
-
- export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
-
- export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
-
- export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
-
- export_vars.innodb_system_rows_read = srv_stats.n_system_rows_read;
-
- export_vars.innodb_system_rows_inserted =
- srv_stats.n_system_rows_inserted;
-
- export_vars.innodb_system_rows_updated =
- srv_stats.n_system_rows_updated;
-
- export_vars.innodb_system_rows_deleted =
- srv_stats.n_system_rows_deleted;
-
- export_vars.innodb_num_open_files = fil_n_file_opened;
-
- export_vars.innodb_truncated_status_writes =
- srv_truncated_status_writes;
-
- export_vars.innodb_available_undo_logs = srv_available_undo_logs;
- export_vars.innodb_read_views_memory
- = os_atomic_increment_ulint(&srv_read_views_memory, 0);
- export_vars.innodb_descriptors_memory
- = os_atomic_increment_ulint(&srv_descriptors_memory, 0);
-
- export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
- export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
- export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
- export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
- export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
- export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
- export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
- export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
- export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
- export_vars.innodb_pages_page_compression_error = srv_stats.pages_page_compression_error;
- export_vars.innodb_pages_decrypted = srv_stats.pages_decrypted;
- export_vars.innodb_pages_encrypted = srv_stats.pages_encrypted;
-
- export_vars.innodb_defragment_compression_failures =
- btr_defragment_compression_failures;
- export_vars.innodb_defragment_failures = btr_defragment_failures;
- export_vars.innodb_defragment_count = btr_defragment_count;
-
- export_vars.innodb_onlineddl_rowlog_rows = onlineddl_rowlog_rows;
- export_vars.innodb_onlineddl_rowlog_pct_used = onlineddl_rowlog_pct_used;
- export_vars.innodb_onlineddl_pct_progress = onlineddl_pct_progress;
-
-#ifdef UNIV_DEBUG
- rw_lock_s_lock(&purge_sys->latch);
- trx_id_t done_trx_no = purge_sys->done.trx_no;
- trx_id_t up_limit_id = purge_sys->view
- ? purge_sys->view->up_limit_id
- : 0;
- rw_lock_s_unlock(&purge_sys->latch);
-
- mutex_enter(&trx_sys->mutex);
- trx_id_t max_trx_id = trx_sys->rw_max_trx_id;
- mutex_exit(&trx_sys->mutex);
-
- if (!done_trx_no || max_trx_id < done_trx_no - 1) {
- export_vars.innodb_purge_trx_id_age = 0;
- } else {
- export_vars.innodb_purge_trx_id_age =
- (ulint) (max_trx_id - done_trx_no + 1);
- }
-
- if (!up_limit_id
- || max_trx_id < up_limit_id) {
- export_vars.innodb_purge_view_trx_id_age = 0;
- } else {
- export_vars.innodb_purge_view_trx_id_age =
- (ulint) (max_trx_id - up_limit_id);
- }
-#endif /* UNIV_DEBUG */
-
- export_vars.innodb_sec_rec_cluster_reads =
- srv_stats.n_sec_rec_cluster_reads;
- export_vars.innodb_sec_rec_cluster_reads_avoided =
- srv_stats.n_sec_rec_cluster_reads_avoided;
-
- if (!srv_read_only_mode) {
- export_vars.innodb_encryption_rotation_pages_read_from_cache =
- crypt_stat.pages_read_from_cache;
- export_vars.innodb_encryption_rotation_pages_read_from_disk =
- crypt_stat.pages_read_from_disk;
- export_vars.innodb_encryption_rotation_pages_modified =
- crypt_stat.pages_modified;
- export_vars.innodb_encryption_rotation_pages_flushed =
- crypt_stat.pages_flushed;
- export_vars.innodb_encryption_rotation_estimated_iops =
- crypt_stat.estimated_iops;
- export_vars.innodb_encryption_key_requests =
- srv_stats.n_key_requests;
- export_vars.innodb_key_rotation_list_length =
- srv_stats.key_rotation_list_length;
-
- export_vars.innodb_scrub_page_reorganizations =
- scrub_stat.page_reorganizations;
- export_vars.innodb_scrub_page_splits =
- scrub_stat.page_splits;
- export_vars.innodb_scrub_page_split_failures_underflow =
- scrub_stat.page_split_failures_underflow;
- export_vars.innodb_scrub_page_split_failures_out_of_filespace =
- scrub_stat.page_split_failures_out_of_filespace;
- export_vars.innodb_scrub_page_split_failures_missing_index =
- scrub_stat.page_split_failures_missing_index;
- export_vars.innodb_scrub_page_split_failures_unknown =
- scrub_stat.page_split_failures_unknown;
- }
-
- mutex_exit(&srv_innodb_monitor_mutex);
-}
-
-#ifndef DBUG_OFF
-/** false before InnoDB monitor has been printed at least once, true
-afterwards */
-bool srv_debug_monitor_printed = false;
-#endif
-
-/*********************************************************************//**
-A thread which prints the info output by various InnoDB monitors.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_monitor_thread)(void*)
-{
- ib_int64_t sig_count;
- double time_elapsed;
- time_t current_time;
- time_t last_table_monitor_time;
- time_t last_tablespace_monitor_time;
- time_t last_monitor_time;
- ulint mutex_skipped;
- ibool last_srv_print_monitor;
-
- ut_ad(!srv_read_only_mode);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Lock timeout thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_monitor_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
- srv_last_monitor_time = ut_time();
- last_table_monitor_time = ut_time();
- last_tablespace_monitor_time = ut_time();
- last_monitor_time = ut_time();
- mutex_skipped = 0;
- last_srv_print_monitor = srv_print_innodb_monitor;
-loop:
- /* Wake up every 5 seconds to see if we need to print
- monitor information or if signalled at shutdown. */
-
- sig_count = os_event_reset(srv_monitor_event);
-
- os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
-
- current_time = ut_time();
-
- time_elapsed = difftime(current_time, last_monitor_time);
-
- if (time_elapsed > 15) {
- last_monitor_time = ut_time();
-
- if (srv_print_innodb_monitor) {
- /* Reset mutex_skipped counter everytime
- srv_print_innodb_monitor changes. This is to
- ensure we will not be blocked by lock_sys->mutex
- for short duration information printing,
- such as requested by sync_array_print_long_waits() */
- if (!last_srv_print_monitor) {
- mutex_skipped = 0;
- last_srv_print_monitor = TRUE;
- }
-
- if (!srv_printf_innodb_monitor(stderr,
- MUTEX_NOWAIT(mutex_skipped),
- NULL, NULL)) {
- mutex_skipped++;
- } else {
- /* Reset the counter */
- mutex_skipped = 0;
- }
- } else {
- last_srv_print_monitor = FALSE;
- }
-
-
- /* We don't create the temp files or associated
- mutexes in read-only-mode */
-
- if (!srv_read_only_mode && srv_innodb_status) {
- mutex_enter(&srv_monitor_file_mutex);
- rewind(srv_monitor_file);
- if (!srv_printf_innodb_monitor(srv_monitor_file,
- MUTEX_NOWAIT(mutex_skipped),
- NULL, NULL)) {
- mutex_skipped++;
- } else {
- mutex_skipped = 0;
- }
-
- os_file_set_eof(srv_monitor_file);
- mutex_exit(&srv_monitor_file_mutex);
- }
-
- if (srv_print_innodb_tablespace_monitor
- && difftime(current_time,
- last_tablespace_monitor_time) > 60) {
- last_tablespace_monitor_time = ut_time();
-
- fputs("========================"
- "========================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
- "========================"
- "========================\n",
- stderr);
-
- fsp_print(0);
- fputs("Validating tablespace\n", stderr);
- fsp_validate(0);
- fputs("Validation ok\n"
- "---------------------------------------\n"
- "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
- "=======================================\n",
- stderr);
- }
-
- if (srv_print_innodb_table_monitor
- && difftime(current_time, last_table_monitor_time) > 60) {
-
- last_table_monitor_time = ut_time();
-
- fprintf(stderr, "Warning: %s\n",
- DEPRECATED_MSG_INNODB_TABLE_MONITOR);
-
- fputs("===========================================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLE MONITOR OUTPUT\n"
- "===========================================\n",
- stderr);
- dict_print();
-
- fputs("-----------------------------------\n"
- "END OF INNODB TABLE MONITOR OUTPUT\n"
- "==================================\n",
- stderr);
-
- fprintf(stderr, "Warning: %s\n",
- DEPRECATED_MSG_INNODB_TABLE_MONITOR);
- }
- }
-
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- goto exit_func;
- }
-
- if (srv_print_innodb_monitor
- || srv_print_innodb_lock_monitor
- || srv_print_innodb_tablespace_monitor
- || srv_print_innodb_table_monitor) {
- goto loop;
- }
-
- goto loop;
-
-exit_func:
- srv_monitor_active = false;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*********************************************************************//**
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-Note: In order to make sync_arr_wake_threads_if_sema_free work as expected,
-we should avoid waiting any mutexes in this function!
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_error_monitor_thread)(void*)
-{
- /* number of successive fatal timeouts observed */
- ulint fatal_cnt = 0;
- lsn_t old_lsn;
- lsn_t new_lsn;
- ib_int64_t sig_count;
- /* longest waiting thread for a semaphore */
- os_thread_id_t waiter = os_thread_get_curr_id();
- os_thread_id_t old_waiter = waiter;
- /* the semaphore that is being waited for */
- const void* sema = NULL;
- const void* old_sema = NULL;
-
- ut_ad(!srv_read_only_mode);
-
- old_lsn = srv_start_lsn;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Error monitor thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_error_monitor_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
-loop:
- /* Try to track a strange bug reported by Harald Fuchs and others,
- where the lsn seems to decrease at times */
-
- if (log_peek_lsn(&new_lsn)) {
- if (new_lsn < old_lsn) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: old log sequence number " LSN_PF
- " was greater\n"
- "InnoDB: than the new log sequence number " LSN_PF "!\n"
- "InnoDB: Please submit a bug report"
- " to http://bugs.mysql.com\n",
- old_lsn, new_lsn);
- ut_ad(0);
- }
-
- old_lsn = new_lsn;
- }
-
- if (difftime(time(NULL), srv_last_monitor_time) > 60) {
- /* We referesh InnoDB Monitor values so that averages are
- printed from at most 60 last seconds */
-
- srv_refresh_innodb_monitor_stats();
- }
-
- /* Update the statistics collected for deciding LRU
- eviction policy. */
- buf_LRU_stat_update();
-
- /* In case mutex_exit is not a memory barrier, it is
- theoretically possible some threads are left waiting though
- the semaphore is already released. Wake up those threads: */
-
- sync_arr_wake_threads_if_sema_free();
-
- if (sync_array_print_long_waits(&waiter, &sema)
- && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
-#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
- if (srv_allow_writes_event->is_set()) {
-#endif /* WITH_WSREP */
- fatal_cnt++;
-#if defined(WITH_WSREP) && defined(WITH_INNODB_DISALLOW_WRITES)
- } else {
- fprintf(stderr,
- "WSREP: avoiding InnoDB self crash due to long "
- "semaphore wait of > %lu seconds\n"
- "Server is processing SST donor operation, "
- "fatal_cnt now: %lu",
- (ulong) srv_fatal_semaphore_wait_threshold, fatal_cnt);
- }
-#endif /* WITH_WSREP */
- if (fatal_cnt > 10) {
-
- fprintf(stderr,
- "InnoDB: Error: semaphore wait has lasted"
- " > %lu seconds\n"
- "InnoDB: We intentionally crash the server,"
- " because it appears to be hung.\n",
- (ulong) srv_fatal_semaphore_wait_threshold);
-
- ut_error;
- }
- } else {
- fatal_cnt = 0;
- old_waiter = waiter;
- old_sema = sema;
- }
-
- /* Flush stderr so that a database user gets the output
- to possible MySQL error file */
-
- fflush(stderr);
-
- sig_count = os_event_reset(srv_error_event);
-
- os_event_wait_time_low(srv_error_event, 1000000, sig_count);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
-
- goto loop;
- }
-
- srv_error_monitor_active = false;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/******************************************************************//**
-Increment the server activity count. */
-UNIV_INTERN
-void
-srv_inc_activity_count(
-/*===================*/
- bool ibuf_merge_activity) /*!< whether this activity bump
- is caused by the background
- change buffer merge */
-{
- srv_sys.activity_count.inc();
- if (ibuf_merge_activity)
- srv_sys.ibuf_merge_activity_count.inc();
-}
-
-/**********************************************************************//**
-Check whether any background thread is active. If so return the thread
-type.
-@return SRV_NONE if all are suspended or have exited, thread
-type if any are still active. */
-UNIV_INTERN
-srv_thread_type
-srv_get_active_thread_type(void)
-/*============================*/
-{
- srv_thread_type ret = SRV_NONE;
-
- if (srv_read_only_mode) {
- return(SRV_NONE);
- }
-
- srv_sys_mutex_enter();
-
- for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
- if (srv_sys.n_threads_active[i] != 0) {
- ret = static_cast<srv_thread_type>(i);
- break;
- }
- }
-
- srv_sys_mutex_exit();
-
- /* Check only on shutdown. */
- if (ret == SRV_NONE
- && srv_shutdown_state != SRV_SHUTDOWN_NONE
- && trx_purge_state() != PURGE_STATE_DISABLED
- && trx_purge_state() != PURGE_STATE_EXIT) {
-
- ret = SRV_PURGE;
- }
-
- return(ret);
-}
-
-/******************************************************************//**
-A thread which follows the redo log and outputs the changed page bitmap.
-@return a dummy value */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_redo_log_follow_thread)(
-/*=======================================*/
- void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
- required by
- os_thread_create */
-{
- ut_ad(!srv_read_only_mode);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Redo log follower thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_log_tracking_thread_key);
-#endif
-
- my_thread_init();
- srv_redo_log_thread_started = true;
-
- do {
- os_event_wait(srv_checkpoint_completed_event);
- os_event_reset(srv_checkpoint_completed_event);
-
- if (srv_track_changed_pages
- && srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
- if (!log_online_follow_redo_log()) {
- /* TODO: sync with I_S log tracking status? */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "log tracking bitmap write failed, "
- "stopping log tracking thread!");
- break;
- }
- os_event_set(srv_redo_log_tracked_event);
- }
-
- } while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE);
-
- log_online_read_shutdown();
- os_event_set(srv_redo_log_tracked_event);
-
- my_thread_end();
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*************************************************************//**
-Removes old archived transaction log files.
-Both parameters couldn't be provided at the same time */
-dberr_t
-purge_archived_logs(
- time_t before_date, /*!< in: all files modified
- before timestamp should be removed */
- lsn_t before_no) /*!< in: files with this number in name
- and earler should be removed */
-{
- log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
-
- os_file_dir_t dir;
- os_file_stat_t fileinfo;
- char archived_log_filename[OS_FILE_MAX_PATH];
- char namegen[OS_FILE_MAX_PATH];
- ulint dirnamelen;
-
- if (srv_arch_dir) {
- dir = os_file_opendir(srv_arch_dir, FALSE);
- if (!dir) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "opening archived log directory %s failed. "
- "Purge archived logs are not available",
- srv_arch_dir);
- /* failed to open directory */
- return(DB_ERROR);
- }
- } else {
- /* log archive directory is not specified */
- return(DB_ERROR);
- }
-
- dirnamelen = strlen(srv_arch_dir);
-
- memcpy(archived_log_filename, srv_arch_dir, dirnamelen);
- if (dirnamelen &&
- archived_log_filename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- archived_log_filename[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- memset(&fileinfo, 0, sizeof(fileinfo));
- while(!os_file_readdir_next_file(srv_arch_dir, dir,
- &fileinfo) ) {
- if (strncmp(fileinfo.name,
- IB_ARCHIVED_LOGS_PREFIX, IB_ARCHIVED_LOGS_PREFIX_LEN)) {
- continue;
- }
- if (dirnamelen + strlen(fileinfo.name) + 2 > OS_FILE_MAX_PATH)
- continue;
-
- snprintf(archived_log_filename + dirnamelen,
- OS_FILE_MAX_PATH - dirnamelen - 1,
- "%s", fileinfo.name);
-
- if (before_no) {
- ib_uint64_t log_file_no = strtoull(fileinfo.name +
- IB_ARCHIVED_LOGS_PREFIX_LEN,
- NULL, 10);
- if (log_file_no == 0 || before_no <= log_file_no) {
- continue;
- }
- } else {
- fileinfo.mtime = 0;
- if (os_file_get_status(archived_log_filename,
- &fileinfo, false) != DB_SUCCESS ||
- fileinfo.mtime == 0) {
- continue;
- }
-
- if (before_date == 0 || fileinfo.mtime > before_date) {
- continue;
- }
- }
-
- /* We are going to delete archived file. Acquire log_sys->mutex
- to make sure that we are the only who try to delete file. This
- also prevents log system from using this file. Do not delete
- file if it is currently in progress of writting or have
- pending IO. This is enforced by checking:
- 1. fil_space_contains_node.
- 2. group->archived_offset % group->file_size != 0, i.e.
- there is archive in progress and we are going to delete it.
- This covers 3 cases:
- a. Usual case when we have one archive in progress,
- both 1 and 2 are TRUE
- b. When we have more then 1 archive in fil_space,
- this can happen when flushed LSN range crosses file
- boundary
- c. When we have empty fil_space, but existing file will be
- opened once archiving operation is requested. This usually
- happens on startup.
- */
-
- mutex_enter(&log_sys->mutex);
-
- log_archived_file_name_gen(namegen, sizeof(namegen),
- group->id, group->archived_file_no);
-
- if (fil_space_contains_node(group->archive_space_id,
- archived_log_filename) ||
- (group->archived_offset % group->file_size != 0 &&
- strcmp(namegen, archived_log_filename) == 0)) {
-
- mutex_exit(&log_sys->mutex);
- continue;
- }
-
- if (!os_file_delete_if_exists(innodb_file_data_key,
- archived_log_filename)) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "can't delete archived log file %s.",
- archived_log_filename);
-
- mutex_exit(&log_sys->mutex);
- os_file_closedir(dir);
-
- return(DB_ERROR);
- }
-
- mutex_exit(&log_sys->mutex);
- }
-
- os_file_closedir(dir);
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Tells the InnoDB server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the
-srv_sys_t->mutex, for performance reasons). */
-UNIV_INTERN
-void
-srv_active_wake_master_thread(void)
-/*===============================*/
-{
- if (srv_read_only_mode) {
- return;
- }
-
- ut_ad(!srv_sys_mutex_own());
-
- srv_inc_activity_count();
-
- if (srv_sys.n_threads_active[SRV_MASTER] == 0) {
- srv_slot_t* slot;
-
- srv_sys_mutex_enter();
-
- slot = &srv_sys.sys_threads[SRV_MASTER_SLOT];
-
- /* Only if the master thread has been started. */
-
- if (slot->in_use) {
- ut_a(srv_slot_get_type(slot) == SRV_MASTER);
- os_event_set(slot->event);
- }
-
- srv_sys_mutex_exit();
- }
-}
-
-/*******************************************************************//**
-Tells the purge thread that there has been activity in the database
-and wakes up the purge thread if it is suspended (not sleeping). Note
-that there is a small chance that the purge thread stays suspended
-(we do not protect our check with the srv_sys_t:mutex and the
-purge_sys->latch, for performance reasons). */
-UNIV_INTERN
-void
-srv_wake_purge_thread_if_not_active(void)
-/*=====================================*/
-{
- ut_ad(!srv_sys_mutex_own());
-
- if (purge_sys->state == PURGE_STATE_RUN
- && srv_sys.n_threads_active[SRV_PURGE] == 0) {
-
- srv_release_threads(SRV_PURGE, 1);
- }
-}
-
-/*******************************************************************//**
-Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
-void
-srv_wake_master_thread(void)
-/*========================*/
-{
- ut_ad(!srv_sys_mutex_own());
-
- srv_inc_activity_count();
-
- srv_release_threads(SRV_MASTER, 1);
-}
-
-/*******************************************************************//**
-Get current server activity count. We don't hold srv_sys::mutex while
-reading this value as it is only used in heuristics.
-@return activity count. */
-UNIV_INTERN
-ulint
-srv_get_activity_count(void)
-/*========================*/
-{
- return(srv_sys.activity_count);
-}
-
-/** Get current server ibuf merge activity count.
-@return ibuf merge activity count */
-static
-ulint
-srv_get_ibuf_merge_activity_count(void)
-{
- return(srv_sys.ibuf_merge_activity_count);
-}
-
-/*******************************************************************//**
-Check if there has been any activity. Considers background change buffer
-merge as regular server activity unless a non-default
-old_ibuf_merge_activity_count value is passed, in which case the merge will be
-treated as keeping server idle.
-@return FALSE if no change in activity counter. */
-UNIV_INTERN
-ibool
-srv_check_activity(
-/*===============*/
- ulint old_activity_count, /*!< in: old activity count */
- /*!< old change buffer merge
- activity count, or
- ULINT_UNDEFINED */
- ulint old_ibuf_merge_activity_count)
-{
- ulint new_activity_count = srv_sys.activity_count;
- if (old_ibuf_merge_activity_count == ULINT_UNDEFINED)
- return(new_activity_count != old_activity_count);
-
- /* If we care about ibuf merge activity, then the server is considered
- idle if all activity, if any, was due to ibuf merge. */
- ulint new_ibuf_merge_activity_count
- = srv_sys.ibuf_merge_activity_count;
-
- ut_ad(new_ibuf_merge_activity_count <= new_activity_count);
- ut_ad(new_ibuf_merge_activity_count >= old_ibuf_merge_activity_count);
- ut_ad(new_activity_count >= old_activity_count);
-
- ulint ibuf_merge_activity_delta =
- new_ibuf_merge_activity_count - old_ibuf_merge_activity_count;
- ulint activity_delta = new_activity_count - old_activity_count;
-
- return (activity_delta > ibuf_merge_activity_delta);
-}
-
-/********************************************************************//**
-The master thread is tasked to ensure that flush of log file happens
-once every second in the background. This is to ensure that not more
-than one second of trxs are lost in case of crash when
-innodb_flush_logs_at_trx_commit != 1 */
-static
-void
-srv_sync_log_buffer_in_background(void)
-/*===================================*/
-{
- time_t current_time = time(NULL);
-
- srv_main_thread_op_info = "flushing log";
- if (difftime(current_time, srv_last_log_flush_time)
- >= srv_flush_log_at_timeout) {
- log_buffer_sync_in_background(TRUE);
- srv_last_log_flush_time = current_time;
- srv_log_writes_and_flush++;
- }
-}
-
-/********************************************************************//**
-Make room in the table cache by evicting an unused table.
-@return number of tables evicted. */
-static
-ulint
-srv_master_evict_from_table_cache(
-/*==============================*/
- ulint pct_check) /*!< in: max percent to check */
-{
- ulint n_tables_evicted = 0;
-
- rw_lock_x_lock(&dict_operation_lock);
-
- dict_mutex_enter_for_mysql();
-
- n_tables_evicted = dict_make_room_in_cache(
- innobase_get_table_cache_size(), pct_check);
-
- dict_mutex_exit_for_mysql();
-
- rw_lock_x_unlock(&dict_operation_lock);
-
- return(n_tables_evicted);
-}
-
-/*********************************************************************//**
-This function prints progress message every 60 seconds during server
-shutdown, for any activities that master thread is pending on. */
-static
-void
-srv_shutdown_print_master_pending(
-/*==============================*/
- ib_time_t* last_print_time, /*!< last time the function
- print the message */
- ulint n_tables_to_drop, /*!< number of tables to
- be dropped */
- ulint n_bytes_merged) /*!< number of change buffer
- just merged */
-{
- ib_time_t current_time;
- double time_elapsed;
-
- current_time = ut_time();
- time_elapsed = ut_difftime(current_time, *last_print_time);
-
- if (time_elapsed > 60) {
- *last_print_time = current_time;
-
- if (n_tables_to_drop) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for "
- "%lu table(s) to be dropped\n",
- (ulong) n_tables_to_drop);
- }
-
- /* Check change buffer merge, we only wait for change buffer
- merge if it is a slow shutdown */
- if (!srv_fast_shutdown && n_bytes_merged) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for change "
- "buffer merge to complete\n"
- " InnoDB: number of bytes of change buffer "
- "just merged: %lu\n",
- n_bytes_merged);
- }
- }
-}
-
-/*********************************************************************//**
-Perform the tasks that the master thread is supposed to do when the
-server is active. There are two types of tasks. The first category is
-of such tasks which are performed at each inovcation of this function.
-We assume that this function is called roughly every second when the
-server is active. The second category is of such tasks which are
-performed at some interval e.g.: purge, dict_LRU cleanup etc. */
-static
-void
-srv_master_do_active_tasks(void)
-/*============================*/
-{
- ib_time_t cur_time = ut_time();
- ullint counter_time = ut_time_us(NULL);
- ulint n_evicted = 0;
-
- /* First do the tasks that we are suppose to do at each
- invocation of this function. */
-
- ++srv_main_active_loops;
-
- MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
-
- /* ALTER TABLE in MySQL requires on Unix that the table handler
- can drop tables lazily after there no longer are SELECT
- queries to them. */
- srv_main_thread_op_info = "doing background drop tables";
- row_drop_tables_for_mysql_in_background();
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
-
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- return;
- }
-
- /* make sure that there is enough reusable space in the redo
- log files */
- srv_main_thread_op_info = "checking free log space";
- log_free_check();
-
- /* Do an ibuf merge */
- srv_main_thread_op_info = "doing insert buffer merge";
- counter_time = ut_time_us(NULL);
- ibuf_merge_in_background(false);
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
-
- /* Flush logs if needed */
- srv_main_thread_op_info = "flushing log";
- srv_sync_log_buffer_in_background();
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
-
- /* Now see if various tasks that are performed at defined
- intervals need to be performed. */
-
-#ifdef MEM_PERIODIC_CHECK
- /* Check magic numbers of every allocated mem block once in
- SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */
- if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) {
- mem_validate_all_blocks();
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
- }
-#endif
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- return;
- }
-
- if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
- srv_main_thread_op_info = "enforcing dict cache limit";
- n_evicted = srv_master_evict_from_table_cache(50);
- MONITOR_INC_VALUE(
- MONITOR_SRV_DICT_LRU_EVICT_COUNT_ACTIVE, n_evicted);
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
- }
-
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- return;
- }
-
- /* Make a new checkpoint */
- if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
- srv_main_thread_op_info = "making checkpoint";
- log_checkpoint(TRUE, FALSE, TRUE);
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
- }
-}
-
-/*********************************************************************//**
-Perform the tasks that the master thread is supposed to do whenever the
-server is idle. We do check for the server state during this function
-and if the server has entered the shutdown phase we may return from
-the function without completing the required tasks.
-Note that the server can move to active state when we are executing this
-function but we don't check for that as we are suppose to perform more
-or less same tasks when server is active. */
-static
-void
-srv_master_do_idle_tasks(void)
-/*==========================*/
-{
- ullint counter_time;
- ulint n_evicted = 0;
-
- ++srv_main_idle_loops;
-
- MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
-
-
- /* ALTER TABLE in MySQL requires on Unix that the table handler
- can drop tables lazily after there no longer are SELECT
- queries to them. */
- counter_time = ut_time_us(NULL);
- srv_main_thread_op_info = "doing background drop tables";
- row_drop_tables_for_mysql_in_background();
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
- counter_time);
-
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- return;
- }
-
- /* make sure that there is enough reusable space in the redo
- log files */
- srv_main_thread_op_info = "checking free log space";
- log_free_check();
-
- /* Do an ibuf merge */
- counter_time = ut_time_us(NULL);
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_merge_in_background(true);
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
-
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- return;
- }
-
- srv_main_thread_op_info = "enforcing dict cache limit";
- n_evicted = srv_master_evict_from_table_cache(100);
- MONITOR_INC_VALUE(
- MONITOR_SRV_DICT_LRU_EVICT_COUNT_IDLE, n_evicted);
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
- MONITOR_INC_TIME_IN_MICRO_SECS(
- MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
-
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE) {
- return;
- }
-
- /* Make a new checkpoint */
- srv_main_thread_op_info = "making checkpoint";
- log_checkpoint(TRUE, FALSE, TRUE);
- MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
- counter_time);
-
- if (srv_shutdown_state > 0) {
- return;
- }
-
- if (srv_log_arch_expire_sec) {
- srv_main_thread_op_info = "purging archived logs";
- purge_archived_logs(ut_time() - srv_log_arch_expire_sec,
- 0);
- }
-}
-
-/** Perform shutdown tasks.
-@param[in] ibuf_merge whether to complete the change buffer merge */
-static
-void
-srv_shutdown(bool ibuf_merge)
-{
- ulint n_bytes_merged = 0;
- ulint n_tables_to_drop;
- ib_time_t now = ut_time();
-
- do {
- ut_ad(!srv_read_only_mode);
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
- ++srv_main_shutdown_loops;
-
- /* FIXME: Remove the background DROP TABLE queue; it is not
- crash-safe and breaks ACID. */
- srv_main_thread_op_info = "doing background drop tables";
- n_tables_to_drop = row_drop_tables_for_mysql_in_background();
-
- if (ibuf_merge) {
- srv_main_thread_op_info = "checking free log space";
- log_free_check();
- srv_main_thread_op_info = "doing insert buffer merge";
- n_bytes_merged = ibuf_merge_in_background(true);
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
- }
-
- /* Print progress message every 60 seconds during shutdown */
- if (srv_print_verbose_log) {
- srv_shutdown_print_master_pending(
- &now, n_tables_to_drop, n_bytes_merged);
- }
- } while (n_bytes_merged || n_tables_to_drop);
-}
-
-/*********************************************************************//**
-Puts master thread to sleep. At this point we are using polling to
-service various activities. Master thread sleeps for one second before
-checking the state of the server again */
-static
-void
-srv_master_sleep(void)
-/*==================*/
-{
- srv_main_thread_op_info = "sleeping";
- os_thread_sleep(1000000);
- srv_main_thread_op_info = "";
-}
-
-/*********************************************************************//**
-The master thread controlling the server.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_master_thread)(
-/*==============================*/
- void* arg MY_ATTRIBUTE((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- my_thread_init();
-
- srv_slot_t* slot;
- ulint old_activity_count = srv_get_activity_count();
- ulint old_ibuf_merge_activity_count
- = srv_get_ibuf_merge_activity_count();
-
- ut_ad(!srv_read_only_mode);
-
- srv_master_tid = os_thread_get_tid();
-
- os_thread_set_priority(srv_master_tid, srv_sched_priority_master);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Master thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_master_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
- srv_main_thread_process_no = os_proc_get_number();
- srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
-
- slot = srv_reserve_slot(SRV_MASTER);
- ut_a(slot == srv_sys.sys_threads);
-
-loop:
- if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
- goto suspend_thread;
- }
-
- while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
-
- srv_master_sleep();
-
- MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
-
- srv_current_thread_priority = srv_master_thread_priority;
-
- if (srv_check_activity(old_activity_count,
- old_ibuf_merge_activity_count)) {
-
- old_activity_count = srv_get_activity_count();
- old_ibuf_merge_activity_count
- = srv_get_ibuf_merge_activity_count();
- srv_master_do_active_tasks();
- } else {
- srv_master_do_idle_tasks();
- }
- }
-
-suspend_thread:
- switch (srv_shutdown_state) {
- case SRV_SHUTDOWN_NONE:
- break;
- case SRV_SHUTDOWN_FLUSH_PHASE:
- case SRV_SHUTDOWN_LAST_PHASE:
- ut_ad(0);
- /* fall through */
- case SRV_SHUTDOWN_EXIT_THREADS:
- /* srv_init_abort() must have been invoked */
- case SRV_SHUTDOWN_CLEANUP:
- if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP
- && srv_fast_shutdown < 2) {
- srv_shutdown(srv_fast_shutdown == 0);
- }
- srv_suspend_thread(slot);
- my_thread_end();
- os_thread_exit(NULL);
- }
-
- srv_main_thread_op_info = "suspending";
-
- srv_suspend_thread(slot);
-
- /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
- waits for database activity to die down when converting < 4.1.x
- databases, and relies on this string being exactly as it is. InnoDB
- manual also mentions this string in several places. */
- srv_main_thread_op_info = "waiting for server activity";
-
- srv_resume_thread(slot);
- goto loop;
-}
-
-/** Check if purge should stop.
-@param[in] n_purged pages purged in the last batch
-@return whether purge should exit */
-static
-bool
-srv_purge_should_exit(ulint n_purged)
-{
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_NONE
- || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP);
-
- if (srv_undo_sources) {
- return(false);
- }
- if (srv_fast_shutdown) {
- return(true);
- }
- /* Slow shutdown was requested. */
- if (n_purged) {
- /* The previous round still did some work. */
- return(false);
- }
- /* Exit if there are no active transactions to roll back. */
- return(trx_sys_any_active_transactions() == 0);
-}
-
-/*********************************************************************//**
-Fetch and execute a task from the work queue.
-@return true if a task was executed */
-static
-bool
-srv_task_execute(void)
-/*==================*/
-{
- que_thr_t* thr = NULL;
-
- ut_ad(!srv_read_only_mode);
- ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
-
- mutex_enter(&srv_sys.tasks_mutex);
-
- if (UT_LIST_GET_LEN(srv_sys.tasks) > 0) {
-
- thr = UT_LIST_GET_FIRST(srv_sys.tasks);
-
- ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
-
- UT_LIST_REMOVE(queue, srv_sys.tasks, thr);
- }
-
- mutex_exit(&srv_sys.tasks_mutex);
-
- if (thr != NULL) {
-
- que_run_threads(thr);
-
- os_atomic_inc_ulint(
- &purge_sys->bh_mutex, &purge_sys->n_completed, 1);
-
- srv_inc_activity_count();
- }
-
- return(thr != NULL);
-}
-
-static ulint purge_tid_i = 0;
-
-/*********************************************************************//**
-Worker thread that reads tasks from the work queue and executes them.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_worker_thread)(
-/*==============================*/
- void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
- required by os_thread_create */
-{
- my_thread_init();
-
- srv_slot_t* slot;
- ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1);
-
- ut_ad(tid_i < srv_n_purge_threads);
- ut_ad(!srv_read_only_mode);
- ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
-
- srv_purge_tids[tid_i] = os_thread_get_tid();
- os_thread_set_priority(srv_purge_tids[tid_i],
- srv_sched_priority_purge);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: worker thread starting, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- slot = srv_reserve_slot(SRV_WORKER);
-
- ut_a(srv_n_purge_threads > 1);
-
- srv_sys_mutex_enter();
-
- ut_a(srv_sys.n_threads_active[SRV_WORKER] < srv_n_purge_threads);
-
- srv_sys_mutex_exit();
-
- /* We need to ensure that the worker threads exit after the
- purge coordinator thread. Otherwise the purge coordinaor can
- end up waiting forever in trx_purge_wait_for_workers_to_complete() */
-
- do {
- srv_suspend_thread(slot);
- srv_resume_thread(slot);
-
- srv_current_thread_priority = srv_purge_thread_priority;
-
- if (srv_task_execute()) {
-
- /* If there are tasks in the queue, wakeup
- the purge coordinator thread. */
-
- srv_wake_purge_thread_if_not_active();
- }
-
- /* Note: we are checking the state without holding the
- purge_sys->latch here. */
- } while (purge_sys->state != PURGE_STATE_EXIT);
-
- srv_free_slot(slot);
-
- rw_lock_x_lock(&purge_sys->latch);
-
- ut_a(!purge_sys->running);
- ut_a(purge_sys->state == PURGE_STATE_EXIT);
- ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE);
-
- rw_lock_x_unlock(&purge_sys->latch);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- my_thread_end();
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
-}
-
-/*********************************************************************//**
-Do the actual purge operation.
-@return length of history list before the last purge batch. */
-static
-ulint
-srv_do_purge(
-/*=========*/
- ulint n_threads, /*!< in: number of threads to use */
- ulint* n_total_purged) /*!< in/out: total pages purged */
-{
- ulint n_pages_purged;
-
- static ulint count = 0;
- static ulint n_use_threads = 0;
- static ulint rseg_history_len = 0;
- ulint old_activity_count = srv_get_activity_count();
-
- ut_a(n_threads > 0);
- ut_ad(!srv_read_only_mode);
-
- /* Purge until there are no more records to purge and there is
- no change in configuration or server state. If the user has
- configured more than one purge thread then we treat that as a
- pool of threads and only use the extra threads if purge can't
- keep up with updates. */
-
- if (n_use_threads == 0) {
- n_use_threads = n_threads;
- }
-
- do {
- srv_current_thread_priority = srv_purge_thread_priority;
-
- if (trx_sys->rseg_history_len > rseg_history_len
- || (srv_max_purge_lag > 0
- && rseg_history_len > srv_max_purge_lag)) {
-
- /* History length is now longer than what it was
- when we took the last snapshot. Use more threads. */
-
- if (n_use_threads < n_threads) {
- ++n_use_threads;
- }
-
- } else if (srv_check_activity(old_activity_count)
- && n_use_threads > 1) {
-
- /* History length same or smaller since last snapshot,
- use fewer threads. */
-
- --n_use_threads;
-
- old_activity_count = srv_get_activity_count();
- }
-
- /* Ensure that the purge threads are less than what
- was configured. */
-
- ut_a(n_use_threads > 0);
- ut_a(n_use_threads <= n_threads);
-
- /* Take a snapshot of the history list before purge. */
- if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
- break;
- }
-
- n_pages_purged = trx_purge(
- n_use_threads, srv_purge_batch_size,
- (++count % TRX_SYS_N_RSEGS) == 0);
-
- *n_total_purged += n_pages_purged;
-
- } while (!srv_purge_should_exit(n_pages_purged)
- && n_pages_purged > 0
- && purge_sys->state == PURGE_STATE_RUN);
-
- return(rseg_history_len);
-}
-
-/*********************************************************************//**
-Suspend the purge coordinator thread. */
-static
-void
-srv_purge_coordinator_suspend(
-/*==========================*/
- srv_slot_t* slot, /*!< in/out: Purge coordinator
- thread slot */
- ulint rseg_history_len) /*!< in: history list length
- before last purge */
-{
- ut_ad(!srv_read_only_mode);
- ut_a(slot->type == SRV_PURGE);
-
- bool stop = false;
-
- /** Maximum wait time on the purge event, in micro-seconds. */
- static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
-
- ib_int64_t sig_count = srv_suspend_thread(slot);
-
- do {
- rw_lock_x_lock(&purge_sys->latch);
-
- purge_sys->running = false;
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- /* We don't wait right away on the the non-timed wait because
- we want to signal the thread that wants to suspend purge. */
- const bool wait = stop
- || rseg_history_len <= trx_sys->rseg_history_len;
- const bool timeout = srv_resume_thread(
- slot, sig_count, wait,
- stop ? 0 : SRV_PURGE_MAX_TIMEOUT);
-
- sig_count = srv_suspend_thread(slot);
-
- rw_lock_x_lock(&purge_sys->latch);
-
- stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE
- && purge_sys->state == PURGE_STATE_STOP);
-
- if (!stop) {
- ut_a(purge_sys->n_stop == 0);
- purge_sys->running = true;
-
- if (timeout
- && rseg_history_len == trx_sys->rseg_history_len
- && trx_sys->rseg_history_len < 5000) {
- /* No new records were added since the
- wait started. Simply wait for new
- records. The magic number 5000 is an
- approximation for the case where we
- have cached UNDO log records which
- prevent truncate of the UNDO
- segments. */
- stop = true;
- }
- } else {
- ut_a(purge_sys->n_stop > 0);
-
- /* Signal that we are suspended. */
- os_event_set(purge_sys->event);
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
- } while (stop && srv_undo_sources);
-
- srv_resume_thread(slot, 0, false);
-}
-
-/*********************************************************************//**
-Purge coordinator thread that schedules the purge tasks.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(srv_purge_coordinator_thread)(
-/*=========================================*/
- void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter
- required by os_thread_create */
-{
- my_thread_init();
-
- srv_slot_t* slot;
- ulint n_total_purged = ULINT_UNDEFINED;
-
- ut_ad(!srv_read_only_mode);
- ut_a(srv_n_purge_threads >= 1);
- ut_a(trx_purge_state() == PURGE_STATE_INIT);
- ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
-
- srv_purge_tids[0] = os_thread_get_tid();
- os_thread_set_priority(srv_purge_tids[0], srv_sched_priority_purge);
-
- rw_lock_x_lock(&purge_sys->latch);
-
- purge_sys->running = true;
- purge_sys->state = PURGE_STATE_RUN;
-
- rw_lock_x_unlock(&purge_sys->latch);
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_purge_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- slot = srv_reserve_slot(SRV_PURGE);
-
- ulint rseg_history_len = trx_sys->rseg_history_len;
-
- do {
- /* If there are no records to purge or the last
- purge didn't purge any records then wait for activity. */
-
- if (srv_shutdown_state == SRV_SHUTDOWN_NONE
- && srv_undo_sources
- && (purge_sys->state == PURGE_STATE_STOP
- || n_total_purged == 0)) {
-
- srv_purge_coordinator_suspend(slot, rseg_history_len);
- }
-
- ut_ad(!slot->suspended);
-
- if (srv_purge_should_exit(n_total_purged)) {
- break;
- }
-
- n_total_purged = 0;
-
- srv_current_thread_priority = srv_purge_thread_priority;
-
- rseg_history_len = srv_do_purge(
- srv_n_purge_threads, &n_total_purged);
-
- srv_inc_activity_count();
- } while (!srv_purge_should_exit(n_total_purged));
-
- /* The task queue should always be empty, independent of fast
- shutdown state. */
- ut_a(srv_get_task_queue_length() == 0);
-
- srv_free_slot(slot);
-
- /* Note that we are shutting down. */
- rw_lock_x_lock(&purge_sys->latch);
-
- purge_sys->state = PURGE_STATE_EXIT;
-
- purge_sys->running = false;
-
- rw_lock_x_unlock(&purge_sys->latch);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- /* Ensure that all the worker threads quit. */
- if (srv_n_purge_threads > 1) {
- srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
- }
-
- my_thread_end();
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
-}
-
-/**********************************************************************//**
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-UNIV_INTERN
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad(!srv_read_only_mode);
- mutex_enter(&srv_sys.tasks_mutex);
-
- UT_LIST_ADD_LAST(queue, srv_sys.tasks, thr);
-
- mutex_exit(&srv_sys.tasks_mutex);
-
- srv_release_threads(SRV_WORKER, 1);
-}
-
-/**********************************************************************//**
-Get count of tasks in the queue.
-@return number of tasks in queue */
-UNIV_INTERN
-ulint
-srv_get_task_queue_length(void)
-/*===========================*/
-{
- ulint n_tasks;
-
- ut_ad(!srv_read_only_mode);
-
- mutex_enter(&srv_sys.tasks_mutex);
-
- n_tasks = UT_LIST_GET_LEN(srv_sys.tasks);
-
- mutex_exit(&srv_sys.tasks_mutex);
-
- return(n_tasks);
-}
-
-/** Wake up the purge threads. */
-UNIV_INTERN
-void
-srv_purge_wakeup()
-{
- ut_ad(!srv_read_only_mode);
-
- if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
-
- srv_release_threads(SRV_PURGE, 1);
-
- if (srv_n_purge_threads > 1) {
- ulint n_workers = srv_n_purge_threads - 1;
-
- srv_release_threads(SRV_WORKER, n_workers);
- }
- }
-}
-
-/** Check whether given space id is undo tablespace id
-@param[in] space_id space id to check
-@return true if it is undo tablespace else false. */
-bool
-srv_is_undo_tablespace(
- ulint space_id)
-{
- if (srv_undo_space_id_start == 0) {
- return (false);
- }
-
- return(space_id >= srv_undo_space_id_start
- && space_id < (srv_undo_space_id_start
- + srv_undo_tablespaces_open));
-}
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
deleted file mode 100644
index fd129c3e55f..00000000000
--- a/storage/xtradb/srv/srv0start.cc
+++ /dev/null
@@ -1,3430 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file srv/srv0start.cc
-Starts the InnoDB database server
-
-Created 2/16/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "mysqld.h"
-#include "pars0pars.h"
-#include "row0ftsort.h"
-#include "ut0mem.h"
-#include "mem0mem.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "dict0dict.h"
-#include "buf0buf.h"
-#include "buf0dump.h"
-#include "os0file.h"
-#include "os0thread.h"
-#include "fil0fil.h"
-#include "fil0crypt.h"
-#include "fsp0fsp.h"
-#include "rem0rec.h"
-#include "mtr0mtr.h"
-#include "log0log.h"
-#include "log0online.h"
-#include "log0recv.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "trx0trx.h"
-#include "trx0sys.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "rem0rec.h"
-#include "ibuf0ibuf.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#include "buf0flu.h"
-#include "btr0defragment.h"
-#include "ut0timer.h"
-#include "btr0scrub.h"
-
-#include <mysql/service_wsrep.h>
-
-#ifndef UNIV_HOTBACKUP
-# include "trx0rseg.h"
-# include "os0proc.h"
-# include "sync0sync.h"
-# include "buf0flu.h"
-# include "buf0mtflu.h"
-# include "buf0rea.h"
-# include "dict0boot.h"
-# include "dict0load.h"
-# include "dict0stats_bg.h"
-# include "que0que.h"
-# include "usr0sess.h"
-# include "lock0lock.h"
-# include "trx0roll.h"
-# include "trx0purge.h"
-# include "lock0lock.h"
-# include "pars0pars.h"
-# include "btr0sea.h"
-# include "rem0cmp.h"
-# include "dict0crea.h"
-# include "row0ins.h"
-# include "row0sel.h"
-# include "row0upd.h"
-# include "row0row.h"
-# include "row0mysql.h"
-# include "btr0pcur.h"
-# include "os0sync.h"
-# include "zlib.h"
-# include "ut0crc32.h"
-# include "os0stacktrace.h"
-
-/** Log sequence number immediately after startup */
-UNIV_INTERN lsn_t srv_start_lsn;
-/** Log sequence number at shutdown */
-UNIV_INTERN lsn_t srv_shutdown_lsn;
-
-#ifdef HAVE_DARWIN_THREADS
-# include <sys/utsname.h>
-/** TRUE if the F_FULLFSYNC option is available */
-UNIV_INTERN ibool srv_have_fullfsync = FALSE;
-#endif
-
-/** TRUE if a raw partition is in use */
-UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE;
-
-/** UNDO tablespaces starts with space id. */
-ulint srv_undo_space_id_start;
-
-/** TRUE if the server is being started, before rolling back any
-incomplete transactions */
-UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE;
-/** TRUE if the server is being started */
-UNIV_INTERN ibool srv_is_being_started = FALSE;
-/** TRUE if the server was successfully started */
-UNIV_INTERN ibool srv_was_started = FALSE;
-/** TRUE if innobase_start_or_create_for_mysql() has been called */
-static ibool srv_start_has_been_called;
-
-/** Whether any undo log records can be generated */
-UNIV_INTERN bool srv_undo_sources;
-
-#ifdef UNIV_DEBUG
-/** InnoDB system tablespace to set during recovery */
-UNIV_INTERN uint srv_sys_space_size_debug;
-#endif /* UNIV_DEBUG */
-
-/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
-SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
-
-/** Files comprising the system tablespace. Also used by Mariabackup. */
-UNIV_INTERN pfs_os_file_t files[1000];
-
-/** io_handler_thread parameters for thread identification */
-static ulint n[SRV_MAX_N_IO_THREADS];
-/** io_handler_thread identifiers, 32 is the maximum number of purge threads.
-The extra elements at the end are allocated as follows:
-SRV_MAX_N_IO_THREADS + 1: srv_master_thread
-SRV_MAX_N_IO_THREADS + 2: lock_wait_timeout_thread
-SRV_MAX_N_IO_THREADS + 3: srv_error_monitor_thread
-SRV_MAX_N_IO_THREADS + 4: srv_monitor_thread
-SRV_MAX_N_IO_THREADS + 5: srv_redo_log_follow_thread
-SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread
-SRV_MAX_N_IO_THREADS + 7: srv_worker_thread
-...
-SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */
-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7
- + SRV_MAX_N_PURGE_THREADS
- + MTFLUSH_MAX_WORKER];
-/* Thread contex data for multi-threaded flush */
-void *mtflush_ctx=NULL;
-
-/** Thead handles */
-static os_thread_t thread_handles[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS];
-static os_thread_t buf_flush_page_cleaner_thread_handle;
-static os_thread_t buf_dump_thread_handle;
-static os_thread_t dict_stats_thread_handle;
-static os_thread_t buf_flush_lru_manager_thread_handle;
-static os_thread_t srv_redo_log_follow_thread_handle;
-/** Status variables, is thread started ?*/
-static bool thread_started[SRV_MAX_N_IO_THREADS + 7 + SRV_MAX_N_PURGE_THREADS] = {false};
-static bool buf_flush_page_cleaner_thread_started = false;
-static bool buf_dump_thread_started = false;
-static bool dict_stats_thread_started = false;
-static bool buf_flush_lru_manager_thread_started = false;
-static bool srv_redo_log_follow_thread_started = false;
-
-/** We use this mutex to test the return value of pthread_mutex_trylock
- on successful locking. HP-UX does NOT return 0, though Linux et al do. */
-static os_fast_mutex_t srv_os_test_mutex;
-
-/** Name of srv_monitor_file */
-static char* srv_monitor_file_name;
-#endif /* !UNIV_HOTBACKUP */
-
-/** Default undo tablespace size in UNIV_PAGEs count (10MB). */
-static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
- ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
-
-/** */
-#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
-#define SRV_MAX_N_PENDING_SYNC_IOS 100
-
-#ifdef UNIV_PFS_THREAD
-/* Keys to register InnoDB threads with performance schema */
-UNIV_INTERN mysql_pfs_key_t io_handler_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_lock_timeout_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_monitor_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_master_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_log_tracking_thread_key;
-#endif /* UNIV_PFS_THREAD */
-
-/** Innobase start-up aborted. Perform cleanup actions.
-@param[in] create_new_db TRUE if new db is being created
-@param[in] file File name
-@param[in] line Line number
-@param[in] err Reason for aborting InnoDB startup
-@return DB_SUCCESS or error code. */
-static
-dberr_t
-srv_init_abort(
- bool create_new_db,
- const char* file,
- ulint line,
- dberr_t err)
-{
- if (create_new_db) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Database creation was aborted"
- " at %s [" ULINTPF "]"
- " with error %s. You may need"
- " to delete the ibdata1 file before trying to start"
- " up again.",
- file, line, ut_strerr(err));
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Plugin initialization aborted"
- " at %s [" ULINTPF "]"
- " with error %s.",
- file, line, ut_strerr(err));
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Convert a numeric string that optionally ends in G or M or K, to a number
-containing megabytes.
-@return next character in string */
-static
-char*
-srv_parse_megabytes(
-/*================*/
- char* str, /*!< in: string containing a quantity in bytes */
- ulint* megs) /*!< out: the number in megabytes */
-{
- char* endp;
- ulint size;
-
- size = strtoul(str, &endp, 10);
-
- str = endp;
-
- switch (*str) {
- case 'G': case 'g':
- size *= 1024;
- /* fall through */
- case 'M': case 'm':
- str++;
- break;
- case 'K': case 'k':
- size /= 1024;
- str++;
- break;
- default:
- size /= 1024 * 1024;
- break;
- }
-
- *megs = size;
- return(str);
-}
-
-/*********************************************************************//**
-Check if a file can be opened in read-write mode.
-@return true if it doesn't exist or can be opened in rw mode. */
-static
-bool
-srv_file_check_mode(
-/*================*/
- const char* name) /*!< in: filename to check */
-{
- os_file_stat_t stat;
-
- memset(&stat, 0x0, sizeof(stat));
-
- dberr_t err = os_file_get_status(name, &stat, true);
-
- if (err == DB_FAIL) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "os_file_get_status() failed on '%s'. Can't determine "
- "file permissions", name);
-
- return(false);
-
- } else if (err == DB_SUCCESS) {
-
- /* Note: stat.rw_perm is only valid of files */
-
- if (stat.type == OS_FILE_TYPE_FILE) {
-
- if (!stat.rw_perm) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s can't be opened in %s mode",
- name,
- srv_read_only_mode
- ? "read" : "read-write");
-
- return(false);
- }
- } else {
- /* Not a regular file, bail out. */
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "'%s' not a regular file.", name);
-
- return(false);
- }
- } else {
-
- /* This is OK. If the file create fails on RO media, there
- is nothing we can do. */
-
- ut_a(err == DB_NOT_FOUND);
- }
-
- return(true);
-}
-
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- char* str) /*!< in/out: the data file path string */
-{
- char* input_str;
- char* path;
- ulint size;
- ulint i = 0;
-
- srv_auto_extend_last_data_file = FALSE;
- srv_last_file_size_max = 0;
- srv_data_file_names = NULL;
- srv_data_file_sizes = NULL;
- srv_data_file_is_raw_partition = NULL;
-
- input_str = str;
-
- /* First calculate the number of data files and check syntax:
- path:size[M | G];path:size[M | G]... . Note that a Windows path may
- contain a drive name and a ':'. */
-
- while (*str != '\0') {
- path = str;
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == '\0') {
- return(FALSE);
- }
-
- str++;
-
- str = srv_parse_megabytes(str, &size);
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(str, &size);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (size == 0) {
- return(FALSE);
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i == 0) {
- /* If innodb_data_file_path was defined it must contain
- at least one data file definition */
-
- return(FALSE);
- }
-
- srv_data_file_names = static_cast<char**>(
- malloc(i * sizeof *srv_data_file_names));
-
- srv_data_file_sizes = static_cast<ulint*>(
- malloc(i * sizeof *srv_data_file_sizes));
-
- srv_data_file_is_raw_partition = static_cast<ulint*>(
- malloc(i * sizeof *srv_data_file_is_raw_partition));
-
- srv_n_data_files = i;
-
- /* Then store the actual values to our arrays */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- /* Note that we must step over the ':' in a Windows path;
- a Windows path normally looks like C:\ibdata\ibdata1:1G, but
- a Windows raw partition may have a specification like
- \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == ':') {
- /* Make path a null-terminated string */
- *str = '\0';
- str++;
- }
-
- str = srv_parse_megabytes(str, &size);
-
- srv_data_file_names[i] = path;
- srv_data_file_sizes[i] = size;
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- srv_auto_extend_last_data_file = TRUE;
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(
- str, &srv_last_file_size_max);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- (srv_data_file_is_raw_partition)[i] = 0;
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- /* Initialize new raw device only during bootstrap */
- (srv_data_file_is_raw_partition)[i] =
- opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
-
- /* Initialize new raw device only during bootstrap */
- if ((srv_data_file_is_raw_partition)[i] == 0) {
- (srv_data_file_is_raw_partition)[i] =
- opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
- }
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
-and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
-void
-srv_free_paths_and_sizes(void)
-/*==========================*/
-{
- free(srv_data_file_names);
- srv_data_file_names = NULL;
- free(srv_data_file_sizes);
- srv_data_file_sizes = NULL;
- free(srv_data_file_is_raw_partition);
- srv_data_file_is_raw_partition = NULL;
-}
-
-#ifndef UNIV_HOTBACKUP
-
-static ulint io_tid_i = 0;
-
-/********************************************************************//**
-I/o-handler thread function.
-@return OS_THREAD_DUMMY_RETURN */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(io_handler_thread)(
-/*==============================*/
- void* arg) /*!< in: pointer to the number of the segment in
- the aio array */
-{
- ulint segment;
- ulint tid_i = os_atomic_increment_ulint(&io_tid_i, 1) - 1;
-
- ut_ad(tid_i < srv_n_file_io_threads);
-
- segment = *((ulint*) arg);
-
- srv_io_tids[tid_i] = os_thread_get_tid();
- os_thread_set_priority(srv_io_tids[tid_i], srv_sched_priority_io);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- ib_logf(IB_LOG_LEVEL_INFO,
- "Io handler thread %lu starts, id %lu\n", segment,
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(io_handler_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
- while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
- srv_current_thread_priority = srv_io_thread_priority;
- fil_aio_wait(segment);
- }
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit.
- The thread actually never comes here because it is exited in an
- os_event_wait(). */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str MY_ATTRIBUTE((unused))) /*!< in/out: null-terminated
- character string */
-{
-#ifdef __WIN__
- for (; *str; str++) {
-
- if (*str == '/') {
- *str = '\\';
- }
- }
-#endif
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Creates a log file.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-create_log_file(
-/*============*/
- pfs_os_file_t* file, /*!< out: file handle */
- const char* name) /*!< in: log file name */
-{
- ibool ret;
-
- *file = os_file_create(
- innodb_file_log_key, name,
- OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
- OS_LOG_FILE, &ret, FALSE);
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
- return(DB_ERROR);
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting log file %s size to %lu MB",
- name, (ulong) srv_log_file_size
- >> (20 - UNIV_PAGE_SIZE_SHIFT));
-
- ret = os_file_set_size(name, *file,
- (os_offset_t) srv_log_file_size
- << UNIV_PAGE_SIZE_SHIFT);
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
- " %s to size %lu MB", name, (ulong) srv_log_file_size
- >> (20 - UNIV_PAGE_SIZE_SHIFT));
- return(DB_ERROR);
- }
-
- ret = os_file_close(*file);
- ut_a(ret);
-
- return(DB_SUCCESS);
-}
-
-/** Initial number of the first redo log file */
-#define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1)
-
-/*********************************************************************//**
-Creates all log files.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-create_log_files(
-/*=============*/
- bool create_new_db, /*!< in: TRUE if new database is being
- created */
- char* logfilename, /*!< in/out: buffer for log file name */
- size_t dirnamelen, /*!< in: length of the directory path */
- lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
- char*& logfile0) /*!< out: name of the first log file */
-{
- if (srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create log files in read-only mode");
- return(DB_READ_ONLY);
- }
-
- /* We prevent system tablespace creation with existing files in
- data directory. So we do not delete log files when creating new system
- tablespace */
- if (!create_new_db) {
- /* Remove any old log files. */
- for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
- sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
-
- /* Ignore errors about non-existent files or files
- that cannot be removed. The create_log_file() will
- return an error when the file exists. */
-#ifdef __WIN__
- DeleteFile((LPCTSTR) logfilename);
-#else
- unlink(logfilename);
-#endif
- /* Crashing after deleting the first
- file should be recoverable. The buffer
- pool was clean, and we can simply create
- all log files from the scratch. */
- DBUG_EXECUTE_IF("innodb_log_abort_6",
- return(DB_ERROR););
- }
- }
-
- ut_ad(!buf_pool_check_no_pending_io());
-
- DBUG_EXECUTE_IF("innodb_log_abort_7", return(DB_ERROR););
-
- for (unsigned i = 0; i < srv_n_log_files; i++) {
- sprintf(logfilename + dirnamelen,
- "ib_logfile%u", i ? i : INIT_LOG_FILE0);
-
- dberr_t err = create_log_file(&files[i], logfilename);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- DBUG_EXECUTE_IF("innodb_log_abort_8", return(DB_ERROR););
-
- /* We did not create the first log file initially as
- ib_logfile0, so that crash recovery cannot find it until it
- has been completed and renamed. */
- sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
-
- fil_space_create(
- logfilename, SRV_LOG_SPACE_FIRST_ID, 0,
- FIL_LOG,
- NULL /* no encryption yet */,
- true /* this is create */);
-
- ut_a(fil_validate());
-
- logfile0 = fil_node_create(
- logfilename, (ulint) srv_log_file_size,
- SRV_LOG_SPACE_FIRST_ID, FALSE);
- ut_a(logfile0);
-
- for (unsigned i = 1; i < srv_n_log_files; i++) {
- sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
-
- if (!fil_node_create(
- logfilename,
- (ulint) srv_log_file_size,
- SRV_LOG_SPACE_FIRST_ID, FALSE)) {
- ut_error;
- }
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Create the file space object for archived logs. */
- fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
- 0, FIL_LOG, NULL /* no encryption yet */, true /* create */);
-#endif
- log_group_init(0, srv_n_log_files,
- srv_log_file_size * UNIV_PAGE_SIZE,
- SRV_LOG_SPACE_FIRST_ID,
- SRV_LOG_SPACE_FIRST_ID + 1);
-
- fil_open_log_and_system_tablespace_files();
-
- /* Create a log checkpoint. */
- mutex_enter(&log_sys->mutex);
- ut_d(recv_no_log_write = FALSE);
- recv_reset_logs(
-#ifdef UNIV_LOG_ARCHIVE
- UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no,
- TRUE,
-#endif
- lsn);
- mutex_exit(&log_sys->mutex);
-
- return(DB_SUCCESS);
-}
-
-/** Rename the first redo log file.
-@param[in,out] logfilename buffer for the log file name
-@param[in] dirnamelen length of the directory path
-@param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value
-@param[in,out] logfile0 name of the first log file
-@return error code
-@retval DB_SUCCESS on successful operation */
-MY_ATTRIBUTE((warn_unused_result, nonnull))
-static
-dberr_t
-create_log_files_rename(
-/*====================*/
- char* logfilename, /*!< in/out: buffer for log file name */
- size_t dirnamelen, /*!< in: length of the directory path */
- lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
- char* logfile0) /*!< in/out: name of the first log file */
-{
- /* If innodb_flush_method=O_DSYNC,
- we need to explicitly flush the log buffers. */
- fil_flush(SRV_LOG_SPACE_FIRST_ID);
-
- DBUG_EXECUTE_IF("innodb_log_abort_9", return(DB_ERROR););
-
- /* Close the log files, so that we can rename
- the first one. */
- fil_close_log_files(false);
-
- /* Rename the first log file, now that a log
- checkpoint has been created. */
- sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Renaming log file %s to %s", logfile0, logfilename);
-
- mutex_enter(&log_sys->mutex);
- ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
- dberr_t err = os_file_rename(
- innodb_file_log_key, logfile0, logfilename)
- ? DB_SUCCESS : DB_ERROR;
-
- /* Replace the first file with ib_logfile0. */
- strcpy(logfile0, logfilename);
- mutex_exit(&log_sys->mutex);
-
- DBUG_EXECUTE_IF("innodb_log_abort_10", err = DB_ERROR;);
-
- if (err == DB_SUCCESS) {
- fil_open_log_and_system_tablespace_files();
- ib_logf(IB_LOG_LEVEL_WARN,
- "New log files created, LSN=" LSN_PF, lsn);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Opens a log file.
-@return DB_SUCCESS or error code */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-open_log_file(
-/*==========*/
- pfs_os_file_t* file, /*!< out: file handle */
- const char* name, /*!< in: log file name */
- os_offset_t* size) /*!< out: file size */
-{
- ibool ret;
-
- *file = os_file_create(innodb_file_log_key, name,
- OS_FILE_OPEN, OS_FILE_AIO,
- OS_LOG_FILE, &ret, FALSE);
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
- return(DB_ERROR);
- }
-
- *size = os_file_get_size(*file);
-
- ret = os_file_close(*file);
- ut_a(ret);
- return(DB_SUCCESS);
-}
-
-
-/** Creates or opens database data files and closes them.
-@param[out] create_new_db true = create new database
-@param[out] min_arch_log_no min of archived log numbers in
- data files
-@param[out] max_arch_log_no max of archived log numbers in
- data files
-@param[out] flushed_lsn flushed lsn in fist datafile
-@param[out] sum_of_new_sizes sum of sizes of the new files
- added
-@return DB_SUCCESS or error code */
-MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-open_or_create_data_files(
- bool* create_new_db,
-#ifdef UNIV_LOG_ARCHIVE
- lsn_t* min_arch_log_no,
- lsn_t* max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- lsn_t* flushed_lsn,
- ulint* sum_of_new_sizes)
-{
- ibool ret;
- ulint i;
- bool one_opened = false;
- bool one_created = false;
- os_offset_t size;
- ulint flags;
- ulint space;
- ulint rounded_size_pages;
- char name[10000];
- fil_space_crypt_t* crypt_data=NULL;
-
- if (srv_n_data_files >= 1000) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can only have < 1000 data files, you have "
- "defined %lu", (ulong) srv_n_data_files);
-
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes = 0;
-
- *create_new_db = false;
-
- srv_normalize_path_for_win(srv_data_home);
-
- for (i = 0; i < srv_n_data_files; i++) {
- ulint dirnamelen;
-
- srv_normalize_path_for_win(srv_data_file_names[i]);
- dirnamelen = strlen(srv_data_home);
-
- ut_a(dirnamelen + strlen(srv_data_file_names[i])
- < (sizeof name) - 1);
-
- memcpy(name, srv_data_home, dirnamelen);
-
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- strcpy(name + dirnamelen, srv_data_file_names[i]);
-
- /* Note: It will return true if the file doesn' exist. */
-
- if (!srv_file_check_mode(name)) {
-
- return(DB_FAIL);
-
- } else if (srv_data_file_is_raw_partition[i] == 0) {
-
- /* First we try to create the file: if it already
- exists, ret will get value FALSE */
-
- files[i] = os_file_create(
- innodb_file_data_key, name, OS_FILE_CREATE,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-
- if (srv_read_only_mode) {
-
- if (ret) {
- goto size_check;
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Opening %s failed!", name);
-
- return(DB_ERROR);
-
- } else if (!ret
- && os_file_get_last_error(false)
- != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our
- function to return 100; work around that
- AIX problem */
- && os_file_get_last_error(false) != 100
-#endif /* UNIV_AIX */
- ) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Creating or opening %s failed!",
- name);
-
- return(DB_ERROR);
- }
-
- } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
-
- ut_a(!srv_read_only_mode);
-
- /* The partition is opened, not created; then it is
- written over */
-
- srv_start_raw_disk_in_use = TRUE;
- srv_created_new_raw = TRUE;
-
- files[i] = os_file_create(
- innodb_file_data_key, name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error in opening %s", name);
-
- return(DB_ERROR);
- }
-
- const char* check_msg;
-
- check_msg = fil_read_first_page(
- files[i], FALSE, &flags, &space,
- flushed_lsn, NULL);
-
- /* If first page is valid, don't overwrite DB.
- It prevents overwriting DB when mysql_install_db
- starts mysqld multiple times during bootstrap. */
- if (check_msg == NULL) {
-
- srv_created_new_raw = FALSE;
- ret = FALSE;
- }
-
- } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- srv_start_raw_disk_in_use = TRUE;
-
- ret = FALSE;
- } else {
- ut_a(0);
- }
-
- if (ret == FALSE) {
- const char* check_msg;
- /* We open the data file */
-
- if (one_created) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Data files can only be added at "
- "the end of a tablespace, but "
- "data file %s existed beforehand.",
- name);
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- ut_a(!srv_read_only_mode);
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
- } else if (i == 0) {
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN_RETRY,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
- } else {
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret, FALSE);
- }
-
- if (!ret) {
- os_file_get_last_error(true);
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can't open '%s'", name);
-
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- goto skip_size_check;
- }
-
-size_check:
- size = os_file_get_size(files[i]);
- ut_a(size != (os_offset_t) -1);
-
- /* If InnoDB encountered an error or was killed
- while extending the data file, the last page
- could be incomplete. */
-
- rounded_size_pages = static_cast<ulint>(
- size >> UNIV_PAGE_SIZE_SHIFT);
-
- if (i == srv_n_data_files - 1
- && srv_auto_extend_last_data_file) {
-
- if (srv_data_file_sizes[i] > rounded_size_pages
- || (srv_last_file_size_max > 0
- && srv_last_file_size_max
- < rounded_size_pages)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "auto-extending "
- "data file %s is "
- "of a different size "
- ULINTPF " pages (rounded "
- "down to MB) than specified "
- "in the .cnf file: "
- "initial " ULINTPF " pages, "
- "max " ULINTPF " (relevant if "
- "non-zero) pages!",
- name,
- rounded_size_pages,
- srv_data_file_sizes[i],
- srv_last_file_size_max);
-
- return(DB_ERROR);
- }
-
- srv_data_file_sizes[i] = rounded_size_pages;
- }
-
- if (rounded_size_pages != srv_data_file_sizes[i]) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Data file %s is of a different "
- "size " ULINTPF " pages (rounded down to MB) "
- "than specified in the .cnf file "
- ULINTPF " pages!",
- name,
- rounded_size_pages,
- srv_data_file_sizes[i]);
-
- return(DB_ERROR);
- }
-skip_size_check:
-
- /* This is the earliest location where we can load
- the double write buffer. */
- if (i == 0) {
- /* XtraBackup never loads corrupted pages from
- the doublewrite buffer */
- buf_dblwr_init_or_load_pages(
- files[i], srv_data_file_names[i], !IS_XTRABACKUP());
- }
-
- bool retry = true;
-check_first_page:
- check_msg = fil_read_first_page(
- files[i], one_opened, &flags, &space,
- flushed_lsn, &crypt_data);
-
- if (check_msg) {
-
- if (retry) {
- fsp_open_info fsp;
- const ulint page_no = 0;
-
- retry = false;
- fsp.id = 0;
- fsp.filepath = srv_data_file_names[i];
- fsp.file = files[i];
-
- if (fil_user_tablespace_restore_page(
- &fsp, page_no)) {
- goto check_first_page;
- }
- }
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "%s in data file %s",
- check_msg, name);
- return(DB_ERROR);
- }
-
- /* The first file of the system tablespace must
- have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID
- field in files greater than ibdata1 are unreliable. */
- ut_a(one_opened || space == TRX_SYS_SPACE);
-
- /* Check the flags for the first system tablespace
- file only. */
- if (!one_opened
- && UNIV_PAGE_SIZE
- != fsp_flags_get_page_size(flags)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Data file \"%s\" uses page size " ULINTPF " ,"
- "but the start-up parameter "
- "is --innodb-page-size=" ULINTPF " .",
- name,
- fsp_flags_get_page_size(flags),
- UNIV_PAGE_SIZE);
-
- return(DB_ERROR);
- }
-
- one_opened = TRUE;
- } else if (!srv_read_only_mode) {
- /* We created the data file and now write it full of
- zeros */
-
- one_created = TRUE;
-
- if (i > 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Data file %s did not"
- " exist: new to be created",
- name);
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "The first specified "
- "data file %s did not exist: "
- "a new database to be created!",
- name);
-
- *create_new_db = TRUE;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting file %s size to " ULINTPF " MB",
- name,
- (srv_data_file_sizes[i]
- >> (20 - UNIV_PAGE_SIZE_SHIFT)));
-
- ret = os_file_set_size(
- name, files[i],
- (os_offset_t) srv_data_file_sizes[i]
- << UNIV_PAGE_SIZE_SHIFT
- /* TODO: enable page_compression on the
- system tablespace and add
- , FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)*/);
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Error in creating %s: "
- "probably out of disk space",
- name);
-
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes += srv_data_file_sizes[i];
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- if (i == 0) {
- if (!crypt_data) {
- crypt_data = fil_space_create_crypt_data(FIL_ENCRYPTION_DEFAULT,
- FIL_DEFAULT_ENCRYPTION_KEY);
- }
-
- flags = FSP_FLAGS_PAGE_SSIZE();
-
- fil_space_create(name, 0, flags, FIL_TABLESPACE,
- crypt_data, (*create_new_db) == true);
- }
-
- ut_a(fil_validate());
-
- if (!fil_node_create(name, srv_data_file_sizes[i], 0,
- srv_data_file_is_raw_partition[i] != 0)) {
- return(DB_ERROR);
- }
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Create undo tablespace.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-srv_undo_tablespace_create(
-/*=======================*/
- const char* name, /*!< in: tablespace name */
- ulint size) /*!< in: tablespace size in pages */
-{
- pfs_os_file_t fh;
- ibool ret;
- dberr_t err = DB_SUCCESS;
-
- os_file_create_subdirs_if_needed(name);
-
- fh = os_file_create(
- innodb_file_data_key,
- name,
- srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
-
- if (srv_read_only_mode && ret) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "%s opened in read-only mode", name);
- } else if (ret == FALSE) {
- if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our function
- to return 100; work around that AIX problem */
- && os_file_get_last_error(false) != 100
-#endif /* UNIV_AIX */
- ) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Can't create UNDO tablespace %s", name);
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Creating system tablespace with"
- " existing undo tablespaces is not"
- " supported. Please delete all undo"
- " tablespaces before creating new"
- " system tablespace.");
- }
- err = DB_ERROR;
- } else {
- ut_a(!srv_read_only_mode);
-
- /* We created the data file and now write it full of zeros */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Data file %s did not exist: new to be created",
- name);
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Setting file %s size to %lu MB",
- name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
-
- ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT
- /* TODO: enable page_compression on the
- system tablespace and add
- FSP_FLAGS_HAS_PAGE_COMPRESSION(flags)
- */);
-
- if (!ret) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Error in creating %s: probably out of "
- "disk space", name);
-
- err = DB_ERROR;
- }
-
- os_file_close(fh);
- }
-
- return(err);
-}
-
-/*********************************************************************//**
-Open an undo tablespace.
-@return DB_SUCCESS or error code */
-static
-dberr_t
-srv_undo_tablespace_open(
-/*=====================*/
- const char* name, /*!< in: tablespace name */
- ulint space) /*!< in: tablespace id */
-{
- pfs_os_file_t fh;
- dberr_t err = DB_ERROR;
- ibool ret;
- ulint flags;
-
- if (!srv_file_check_mode(name)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "UNDO tablespaces must be %s!",
- srv_read_only_mode ? "writable" : "readable");
-
- return(DB_ERROR);
- }
-
- fh = os_file_create(
- innodb_file_data_key, name,
- OS_FILE_OPEN_RETRY
- | OS_FILE_ON_ERROR_NO_EXIT
- | OS_FILE_ON_ERROR_SILENT,
- OS_FILE_NORMAL,
- OS_DATA_FILE,
- &ret,
- FALSE);
-
- /* If the file open was successful then load the tablespace. */
-
- if (ret) {
- os_offset_t size;
-
- size = os_file_get_size(fh);
- ut_a(size != (os_offset_t) -1);
-
- ret = os_file_close(fh);
- ut_a(ret);
-
- /* Load the tablespace into InnoDB's internal
- data structures. */
-
- /* We set the biggest space id to the undo tablespace
- because InnoDB hasn't opened any other tablespace apart
- from the system tablespace. */
-
- fil_set_max_space_id_if_bigger(space);
-
- /* Set the compressed page size to 0 (non-compressed) */
- flags = FSP_FLAGS_PAGE_SSIZE();
- fil_space_create(name, space, flags, FIL_TABLESPACE,
- NULL /* no encryption */,
- true /* create */);
-
- ut_a(fil_validate());
-
- os_offset_t n_pages = size / UNIV_PAGE_SIZE;
-
- /* On 64 bit Windows ulint can be 32 bit and os_offset_t
- is 64 bit. It is OK to cast the n_pages to ulint because
- the unit has been scaled to pages and they are always
- 32 bit. */
- if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
- err = DB_SUCCESS;
- }
- }
-
- return(err);
-}
-
-/********************************************************************
-Opens the configured number of undo tablespaces.
-@return DB_SUCCESS or error code */
-dberr_t
-srv_undo_tablespaces_init(
-/*======================*/
- ibool create_new_db, /*!< in: TRUE if new db being
- created */
- ibool backup_mode, /*!< in: TRUE disables reading
- the system tablespace (used in
- XtraBackup), FALSE is passed on
- recovery. */
- const ulint n_conf_tablespaces, /*!< in: configured undo
- tablespaces */
- ulint* n_opened) /*!< out: number of UNDO
- tablespaces successfully
- discovered and opened */
-{
- ulint i;
- dberr_t err = DB_SUCCESS;
- ulint prev_space_id = 0;
- ulint n_undo_tablespaces;
- ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
-
- *n_opened = 0;
-
- ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
-
- memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
-
- /* Create the undo spaces only if we are creating a new
- instance. We don't allow creating of new undo tablespaces
- in an existing instance (yet). This restriction exists because
- we check in several places for SYSTEM tablespaces to be less than
- the min of user defined tablespace ids. Once we implement saving
- the location of the undo tablespaces and their space ids this
- restriction will/should be lifted. */
-
- for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
- char name[OS_FILE_MAX_PATH];
- ulint space_id = i + 1;
-
- DBUG_EXECUTE_IF("innodb_undo_upgrade",
- space_id = i + 3;);
-
- ut_snprintf(
- name, sizeof(name),
- "%s%cundo%03lu",
- srv_undo_dir, SRV_PATH_SEPARATOR, space_id);
-
- if (i == 0) {
- srv_undo_space_id_start = space_id;
- prev_space_id = srv_undo_space_id_start - 1;
- }
-
- undo_tablespace_ids[i] = space_id;
-
- err = srv_undo_tablespace_create(
- name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
-
- if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Could not create undo tablespace '%s'.",
- name);
-
- return(err);
- }
- }
-
- /* Get the tablespace ids of all the undo segments excluding
- the system tablespace (0). If we are creating a new instance then
- we build the undo_tablespace_ids ourselves since they don't
- already exist. */
-
- if (!create_new_db && !backup_mode) {
- n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
- undo_tablespace_ids);
-
- if (n_undo_tablespaces != 0) {
- srv_undo_space_id_start = undo_tablespace_ids[0];
- prev_space_id = srv_undo_space_id_start - 1;
- }
-
- } else {
- n_undo_tablespaces = n_conf_tablespaces;
-
- undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED;
- }
-
- /* Open all the undo tablespaces that are currently in use. If we
- fail to open any of these it is a fatal error. The tablespace ids
- should be contiguous. It is a fatal error because they are required
- for recovery and are referenced by the UNDO logs (a.k.a RBS). */
-
- for (i = 0; i < n_undo_tablespaces; ++i) {
- char name[OS_FILE_MAX_PATH];
-
- ut_snprintf(
- name, sizeof(name),
- "%s%cundo%03lu",
- srv_undo_dir, SRV_PATH_SEPARATOR,
- undo_tablespace_ids[i]);
-
- /* Should be no gaps in undo tablespace ids. */
- ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
-
- /* The system space id should not be in this array. */
- ut_a(undo_tablespace_ids[i] != 0);
- ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
-
- err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
-
- if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to open undo tablespace '%s'.", name);
-
- return(err);
- }
-
- prev_space_id = undo_tablespace_ids[i];
-
- ++*n_opened;
- }
-
- /* Open any extra unused undo tablespaces. These must be contiguous.
- We stop at the first failure. These are undo tablespaces that are
- not in use and therefore not required by recovery. We only check
- that there are no gaps. */
-
- for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
- char name[OS_FILE_MAX_PATH];
-
- ut_snprintf(
- name, sizeof(name),
- "%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
-
- /* Undo space ids start from 1. */
- err = srv_undo_tablespace_open(name, i);
-
- if (err != DB_SUCCESS) {
- break;
- }
-
- /** Note the first undo tablespace id in case of
- no active undo tablespace. */
- if (n_undo_tablespaces == 0) {
- srv_undo_space_id_start = i;
- }
-
- ++n_undo_tablespaces;
-
- ++*n_opened;
- }
-
- /** Explictly specify the srv_undo_space_id_start
- as zero when there are no undo tablespaces. */
- if (n_undo_tablespaces == 0) {
- srv_undo_space_id_start = 0;
- }
-
- /* If the user says that there are fewer than what we find we
- tolerate that discrepancy but not the inverse. Because there could
- be unused undo tablespaces for future use. */
-
- if (n_conf_tablespaces > n_undo_tablespaces) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Expected to open %lu undo "
- "tablespaces but was able\n",
- n_conf_tablespaces);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: to find only %lu undo "
- "tablespaces.\n", n_undo_tablespaces);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Set the "
- "innodb_undo_tablespaces parameter to "
- "the\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: correct value and retry. Suggested "
- "value is %lu\n", n_undo_tablespaces);
-
- return(err != DB_SUCCESS ? err : DB_ERROR);
-
- } else if (n_undo_tablespaces > 0) {
-
- ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
- n_undo_tablespaces);
-
- if (n_conf_tablespaces == 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Using the system tablespace for all UNDO "
- "logging because innodb_undo_tablespaces=0");
- }
- }
-
- if (create_new_db) {
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- /* The undo log tablespace */
- for (i = 0; i < n_undo_tablespaces; ++i) {
-
- fsp_header_init(
- undo_tablespace_ids[i],
- SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
- }
-
- mtr_commit(&mtr);
- }
-
- return(DB_SUCCESS);
-}
-
-/********************************************************************
-Wait for the purge thread(s) to start up. */
-static
-void
-srv_start_wait_for_purge_to_start()
-/*===============================*/
-{
- /* Wait for the purge coordinator and master thread to startup. */
-
- purge_state_t state = trx_purge_state();
-
- ut_a(state != PURGE_STATE_DISABLED);
-
- while (srv_shutdown_state == SRV_SHUTDOWN_NONE
- && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
- && state == PURGE_STATE_INIT) {
-
- switch (state = trx_purge_state()) {
- case PURGE_STATE_RUN:
- case PURGE_STATE_STOP:
- break;
-
- case PURGE_STATE_INIT:
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for purge to start");
-
- os_thread_sleep(50000);
- break;
-
- case PURGE_STATE_EXIT:
- case PURGE_STATE_DISABLED:
- ut_error;
- }
- }
-}
-
-/*********************************************************************//**
-Initializes the log tracking subsystem and starts its thread. */
-static
-void
-init_log_online(void)
-/*=================*/
-{
- if (UNIV_UNLIKELY(srv_force_recovery > 0 || srv_read_only_mode)) {
- srv_track_changed_pages = FALSE;
- return;
- }
-
- if (srv_track_changed_pages) {
-
- log_online_read_init();
-
- /* Create the thread that follows the redo log to output the
- changed page bitmap */
- srv_redo_log_follow_thread_handle = os_thread_create(&srv_redo_log_follow_thread, NULL,
- thread_ids + 5 + SRV_MAX_N_IO_THREADS);
- srv_redo_log_follow_thread_started = true;
- }
-}
-
-/********************************************************************
-Starts InnoDB and creates a new database if database files
-are not found and the user wants.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-innobase_start_or_create_for_mysql()
-{
- bool create_new_db;
- lsn_t flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- lsn_t min_arch_log_no = LSN_MAX;
- lsn_t max_arch_log_no = LSN_MAX;
-#endif /* UNIV_LOG_ARCHIVE */
- ulint sum_of_new_sizes;
- dberr_t err;
- unsigned i;
- ulint srv_n_log_files_found = srv_n_log_files;
- ulint io_limit;
- mtr_t mtr;
- ib_bh_t* ib_bh;
- ulint n_recovered_trx;
- char logfilename[10000];
- char* logfile0 = NULL;
- size_t dirnamelen;
- bool sys_datafiles_created = false;
-
- /* Check that os_fast_mutexes work as expected */
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
-
- ut_a(0 == os_fast_mutex_trylock(&srv_os_test_mutex));
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_lock(&srv_os_test_mutex);
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_free(&srv_os_test_mutex);
-
- /* This should be initialized early */
- ut_init_timer();
-
- if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
- srv_read_only_mode = 1;
- }
-
- high_level_read_only = srv_read_only_mode
- || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
-
- if (srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
- }
-
-#ifdef HAVE_DARWIN_THREADS
-# ifdef F_FULLFSYNC
- /* This executable has been compiled on Mac OS X 10.3 or later.
- Assume that F_FULLFSYNC is available at run-time. */
- srv_have_fullfsync = TRUE;
-# else /* F_FULLFSYNC */
- /* This executable has been compiled on Mac OS X 10.2
- or earlier. Determine if the executable is running
- on Mac OS X 10.3 or later. */
- struct utsname utsname;
- if (uname(&utsname)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
- } else {
- srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
- }
- if (!srv_have_fullfsync) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: On Mac OS X, fsync() may be "
- "broken on internal drives,\n", stderr);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: making transactions unsafe!\n", stderr);
- }
-# endif /* F_FULLFSYNC */
-#endif /* HAVE_DARWIN_THREADS */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Using %s to ref count buffer pool pages",
-#ifdef PAGE_ATOMIC_REF_COUNT
- "atomics"
-#else
- "mutexes"
-#endif /* PAGE_ATOMIC_REF_COUNT */
- );
-
- compile_time_assert(sizeof(ulint) == sizeof(void*));
-
- /* If stacktrace is used we set up signal handler for SIGUSR2 signal
- here. If signal handler set fails we report that and disable
- stacktrace feature. */
-
- if (srv_use_stacktrace) {
-#if defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS
- struct sigaction sigact;
-
- sigact.sa_sigaction = os_stacktrace_print;
- sigact.sa_flags = SA_RESTART | SA_SIGINFO;
-
- if (sigaction(SIGUSR2, &sigact, (struct sigaction *)NULL) != 0)
- {
- fprintf(stderr, " InnoDB:error setting signal handler for %d (%s)\n",
- SIGUSR2, strsignal(SIGUSR2));
- srv_use_stacktrace = FALSE;
-
- }
-#endif /* defined (__linux__) && HAVE_BACKTRACE && HAVE_BACKTRACE_SYMBOLS */
- }
-
-#ifdef UNIV_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!");
-#endif
-
-#ifdef UNIV_IBUF_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!");
-# ifdef UNIV_IBUF_COUNT_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
- "!!!!!!!!!");
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG");
-# endif
-#endif
-
-#ifdef UNIV_BLOB_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
- "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG");
-#endif /* UNIV_BLOB_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!");
-#endif
-
-#ifdef UNIV_SEARCH_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!");
-#endif
-
-#ifdef UNIV_LOG_LSN_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!");
-#endif /* UNIV_LOG_LSN_DEBUG */
-#ifdef UNIV_MEM_DEBUG
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!");
-#endif
-
- if (srv_use_sys_malloc) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "The InnoDB memory heap is disabled");
- }
-
-#if defined(COMPILER_HINTS_ENABLED)
- ib_logf(IB_LOG_LEVEL_INFO,
- " InnoDB: Compiler hints enabled.");
-#endif /* defined(COMPILER_HINTS_ENABLED) */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "" IB_ATOMICS_STARTUP_MSG "");
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "" IB_MEMORY_BARRIER_STARTUP_MSG "");
-
-#ifndef HAVE_MEMORY_BARRIER
-#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
-#else
- ib_logf(IB_LOG_LEVEL_WARN,
- "MySQL was built without a memory barrier capability on this"
- " architecture, which might allow a mutex/rw_lock violation"
- " under high thread concurrency. This may cause a hang.");
-#endif /* IA32 or AMD64 */
-#endif /* HAVE_MEMORY_BARRIER */
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Compressed tables use zlib " ZLIB_VERSION
-#ifdef UNIV_ZIP_DEBUG
- " with validation"
-#endif /* UNIV_ZIP_DEBUG */
- );
-#ifdef UNIV_ZIP_COPY
- ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
-#endif /* UNIV_ZIP_COPY */
-
-
- /* Since InnoDB does not currently clean up all its internal data
- structures in MySQL Embedded Server Library server_end(), we
- print an error message if someone tries to start up InnoDB a
- second time during the process lifetime. */
-
- if (srv_start_has_been_called) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: startup called second time "
- "during the process\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
- "Server Library you\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: cannot call server_init() more "
- "than once during the\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: process lifetime.\n");
- }
-
- srv_start_has_been_called = TRUE;
-
-#ifdef UNIV_DEBUG
- log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
- /* yydebug = TRUE; */
-
- srv_is_being_started = TRUE;
- srv_startup_is_before_trx_rollback_phase = TRUE;
-
-#ifdef __WIN__
- srv_use_native_aio = TRUE;
-
-#elif defined(LINUX_NATIVE_AIO)
-
- if (srv_use_native_aio) {
- ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
- }
-#else
- /* Currently native AIO is supported only on windows and linux
- and that also when the support is compiled in. In all other
- cases, we ignore the setting of innodb_use_native_aio. */
- srv_use_native_aio = FALSE;
-#endif /* __WIN__ */
-
- if (srv_file_flush_method_str == NULL) {
- /* These are the default options */
-
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
- srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
- srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
- srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
-#ifdef _WIN32
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
- srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
- srv_use_native_aio = FALSE;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- srv_use_native_aio = FALSE;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str,
- "async_unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- srv_use_native_aio = TRUE;
-#endif /* __WIN__ */
- } else {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unrecognized value %s for innodb_flush_method",
- srv_file_flush_method_str);
- return(DB_ERROR);
- }
-
- /* Note that the call srv_boot() also changes the values of
- some variables to the units used by InnoDB internally */
-
- /* Set the maximum number of threads which can wait for a semaphore
- inside InnoDB: this is the 'sync wait array' size, as well as the
- maximum number of threads that can wait in the 'srv_conc array' for
- their time to enter InnoDB. */
-
-#define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
- srv_max_n_threads = 1 /* io_ibuf_thread */
- + 1 /* io_log_thread */
- + 1 /* lock_wait_timeout_thread */
- + 1 /* srv_error_monitor_thread */
- + 1 /* srv_monitor_thread */
- + 1 /* srv_master_thread */
- + 1 /* srv_redo_log_follow_thread */
- + 1 /* srv_purge_coordinator_thread */
- + 1 /* buf_dump_thread */
- + 1 /* dict_stats_thread */
- + 1 /* fts_optimize_thread */
- + 1 /* recv_writer_thread */
- + 1 /* buf_flush_page_cleaner_thread */
- + 1 /* trx_rollback_or_clean_all_recovered */
- + 128 /* added as margin, for use of
- InnoDB Memcached etc. */
- + max_connections
- + srv_n_read_io_threads
- + srv_n_write_io_threads
- + srv_n_purge_threads
- /* FTS Parallel Sort */
- + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
- * max_connections;
-
- if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
- /* If buffer pool is less than 1 GB,
- use only one buffer pool instance */
- srv_buf_pool_instances = 1;
- }
-
- srv_boot();
-
- ib_logf(IB_LOG_LEVEL_INFO, ut_crc32_implementation);
-
- if (!srv_read_only_mode) {
-
- mutex_create(srv_monitor_file_mutex_key,
- &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
-
- if (srv_innodb_status) {
-
- srv_monitor_file_name = static_cast<char*>(
- mem_alloc(
- strlen(fil_path_to_mysql_datadir)
- + 20 + sizeof "/innodb_status."));
-
- sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
- fil_path_to_mysql_datadir,
- os_proc_get_number());
-
- srv_monitor_file = fopen(srv_monitor_file_name, "w+");
-
- if (!srv_monitor_file) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Unable to create %s: %s",
- srv_monitor_file_name,
- strerror(errno));
-
- return(DB_ERROR);
- }
- } else {
- srv_monitor_file_name = NULL;
- srv_monitor_file = os_file_create_tmpfile(NULL);
-
- if (!srv_monitor_file) {
- return(DB_ERROR);
- }
- }
-
- mutex_create(srv_dict_tmpfile_mutex_key,
- &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
-
- srv_dict_tmpfile = os_file_create_tmpfile(NULL);
-
- if (!srv_dict_tmpfile) {
- return(DB_ERROR);
- }
-
- mutex_create(srv_misc_tmpfile_mutex_key,
- &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
-
- srv_misc_tmpfile = os_file_create_tmpfile(NULL);
-
- if (!srv_misc_tmpfile) {
- return(DB_ERROR);
- }
- }
-
- /* If user has set the value of innodb_file_io_threads then
- we'll emit a message telling the user that this parameter
- is now deprecated. */
- if (srv_n_file_io_threads != 4) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "innodb_file_io_threads is deprecated. Please use "
- "innodb_read_io_threads and innodb_write_io_threads "
- "instead");
- }
-
- /* Now overwrite the value on srv_n_file_io_threads */
- srv_n_file_io_threads = srv_n_read_io_threads;
-
- if (!srv_read_only_mode) {
- /* Add the log and ibuf IO threads. */
- srv_n_file_io_threads += 2;
- srv_n_file_io_threads += srv_n_write_io_threads;
- } else {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Disabling background IO write threads.");
-
- srv_n_write_io_threads = 0;
- }
-
- ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
-
- io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
-
- /* On Windows when using native aio the number of aio requests
- that a thread can handle at a given time is limited to 32
- i.e.: SRV_N_PENDING_IOS_PER_THREAD */
-# ifdef __WIN__
- if (srv_use_native_aio) {
- io_limit = SRV_N_PENDING_IOS_PER_THREAD;
- }
-# endif /* __WIN__ */
-
- if (!os_aio_init(io_limit,
- srv_n_read_io_threads,
- srv_n_write_io_threads,
- SRV_MAX_N_PENDING_SYNC_IOS)) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Fatal : Cannot initialize AIO sub-system");
-#if defined(LINUX_NATIVE_AIO)
- ib_logf(IB_LOG_LEVEL_INFO,
- "You can try increasing system fs.aio-max-nr to 1048576 "
- "or larger or setting innodb_use_native_aio = 0 in my.cnf");
-#endif
-
- return(DB_ERROR);
- }
-
- fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
-
- double size;
- char unit;
-
- if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
- size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
- unit = 'G';
- } else {
- size = ((double) srv_buf_pool_size) / (1024 * 1024);
- unit = 'M';
- }
-
- /* Print time to initialize the buffer pool */
- ib_logf(IB_LOG_LEVEL_INFO,
- "Initializing buffer pool, size = %.1f%c", size, unit);
-
- err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
-
- if (err != DB_SUCCESS) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot allocate memory for the buffer pool");
-
- return(DB_ERROR);
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Completed initialization of buffer pool");
-
-#ifdef UNIV_DEBUG
- /* We have observed deadlocks with a 5MB buffer pool but
- the actual lower limit could very well be a little higher. */
-
- if (srv_buf_pool_size <= 5 * 1024 * 1024) {
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Small buffer pool size (%luM), the flst_validate() "
- "debug function can cause a deadlock if the "
- "buffer pool fills up.",
- srv_buf_pool_size / 1024 / 1024);
- }
-#endif /* UNIV_DEBUG */
-
- fsp_init();
- log_init();
- log_online_init();
-
- lock_sys_create(srv_lock_table_size);
-
- /* Create i/o-handler threads: */
-
- for (i = 0; i < srv_n_file_io_threads; ++i) {
-
- n[i] = i;
-
- thread_handles[i] = os_thread_create(io_handler_thread, n + i, thread_ids + i);
- thread_started[i] = true;
- }
-
- if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
- >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
- /* log_block_convert_lsn_to_no() limits the returned block
- number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
- bytes, then we have a limit of 512 GB. If that limit is to
- be raised, then log_block_convert_lsn_to_no() must be
- modified. */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Combined size of log files must be < 512 GB");
-
- return(DB_ERROR);
- }
-
- if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
- /* fil_io() takes ulint as an argument and we are passing
- (next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
- So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
- So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
- means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
- is 64 TB on 32 bit systems. */
- fprintf(stderr,
- " InnoDB: Error: combined size of log files"
- " must be < %lu GB\n",
- ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
-
- return(DB_ERROR);
- }
-
- sum_of_new_sizes = 0;
-
- for (i = 0; i < srv_n_data_files; i++) {
-#ifndef __WIN__
- if (sizeof(off_t) < 5
- && srv_data_file_sizes[i]
- >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: file size must be < 4 GB"
- " with this MySQL binary\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: and operating system combination,"
- " in some OS's < 2 GB\n");
-
- return(DB_ERROR);
- }
-#endif
- sum_of_new_sizes += srv_data_file_sizes[i];
- }
-
- if (!srv_auto_extend_last_data_file && sum_of_new_sizes < 640) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Combined size in innodb_data_file_path"
- " must be at least %u MiB",
- 640 >> (20 - UNIV_PAGE_SIZE_SHIFT));
-
- return(DB_ERROR);
- }
-
- recv_sys_create();
- recv_sys_init(buf_pool_get_curr_size());
-
- err = open_or_create_data_files(&create_new_db,
-#ifdef UNIV_LOG_ARCHIVE
- &min_arch_log_no, &max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- &flushed_lsn,
- &sum_of_new_sizes);
- if (err == DB_FAIL) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "The system tablespace must be writable!");
-
- return(DB_ERROR);
-
- } else if (err != DB_SUCCESS) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Could not open or create the system tablespace. If "
- "you tried to add new data files to the system "
- "tablespace, and it failed here, you should now "
- "edit innodb_data_file_path in my.cnf back to what "
- "it was, and remove the new ibdata files InnoDB "
- "created in this failed attempt. InnoDB only wrote "
- "those files full of zeros, but did not yet use "
- "them in any way. But be careful: do not remove "
- "old data files which contain your precious data!");
-
- return(err);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- srv_normalize_path_for_win(srv_arch_dir);
-#endif /* UNIV_LOG_ARCHIVE */
-
- dirnamelen = strlen(srv_log_group_home_dir);
- ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
- memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
-
- /* Add a path separator if needed. */
- if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- srv_log_file_size_requested = srv_log_file_size;
-
- if (create_new_db) {
- bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
- ut_a(success);
-
- flushed_lsn = log_get_lsn();
-
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- err = create_log_files(create_new_db, logfilename, dirnamelen,
- flushed_lsn, logfile0);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- } else {
- ut_d(fil_space_get(0)->recv_size = srv_sys_space_size_debug);
-
- for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
- os_offset_t size;
- os_file_stat_t stat_info;
-
- sprintf(logfilename + dirnamelen,
- "ib_logfile%u", i);
-
- err = os_file_get_status(
- logfilename, &stat_info, false);
-
- if (err == DB_NOT_FOUND) {
- if (i == 0) {
-
- if (flushed_lsn < (lsn_t) 1000) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot create"
- " log files because"
- " data files are"
- " corrupt or the"
- " database was not"
- " shut down cleanly"
- " after creating"
- " the data files.");
- return(DB_ERROR);
- }
-
- err = create_log_files(
- create_new_db, logfilename,
- dirnamelen, flushed_lsn,
- logfile0);
-
- if (err == DB_SUCCESS) {
- err = create_log_files_rename(
- logfilename,
- dirnamelen,
- flushed_lsn,
- logfile0);
- }
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Suppress the message about
- crash recovery. */
- flushed_lsn = log_get_lsn();
- goto files_checked;
- } else if (i < 2 && !IS_XTRABACKUP()) {
- /* must have at least 2 log files */
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Only one log file found.");
- return(err);
- }
-
- /* opened all files */
- break;
- }
-
- if (!srv_file_check_mode(logfilename)) {
- return(DB_ERROR);
- }
-
- err = open_log_file(&files[i], logfilename, &size);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- ut_a(size != (os_offset_t) -1);
-
- if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Log file %s size "
- UINT64PF " is not a multiple of"
- " innodb_page_size",
- logfilename, size);
- return(DB_ERROR);
- }
-
- size >>= UNIV_PAGE_SIZE_SHIFT;
-
- if (i == 0) {
- srv_log_file_size = size;
- } else if (size != srv_log_file_size) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Log file %s is"
- " of different size " UINT64PF " bytes"
- " than other log"
- " files " UINT64PF " bytes!",
- logfilename,
- size << UNIV_PAGE_SIZE_SHIFT,
- (os_offset_t) srv_log_file_size
- << UNIV_PAGE_SIZE_SHIFT);
- return(DB_ERROR);
- }
- }
-
- srv_n_log_files_found = i;
-
- /* Create the in-memory file space objects. */
-
- sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
-
- fil_space_create(logfilename,
- SRV_LOG_SPACE_FIRST_ID, 0, FIL_LOG,
- NULL /* no encryption yet */,
- true /* create */);
-
- ut_a(fil_validate());
-
- /* srv_log_file_size is measured in pages; if page size is 16KB,
- then we have a limit of 64TB on 32 bit systems */
- ut_a(srv_log_file_size <= ULINT_MAX);
-
- for (unsigned j = 0; j < i; j++) {
- sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
-
- if (!fil_node_create(logfilename,
- (ulint) srv_log_file_size,
- SRV_LOG_SPACE_FIRST_ID, FALSE)) {
- return(DB_ERROR);
- }
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Create the file space object for archived logs. Under
- MySQL, no archiving ever done. */
- fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
- 0, FIL_LOG, NULL /* no encryption yet */,
- true /* create */);
-#endif /* UNIV_LOG_ARCHIVE */
- log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
- SRV_LOG_SPACE_FIRST_ID,
- SRV_LOG_SPACE_FIRST_ID + 1);
- }
-
-files_checked:
- /* Open all log files and data files in the system
- tablespace: we keep them open until database
- shutdown */
-
- fil_open_log_and_system_tablespace_files();
-
- err = srv_undo_tablespaces_init(
- create_new_db,
- FALSE,
- srv_undo_tablespaces,
- &srv_undo_tablespaces_open);
-
- /* If the force recovery is set very high then we carry on regardless
- of all errors. Basically this is fingers crossed mode. */
-
- if (err != DB_SUCCESS
- && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
-
- return(err);
- }
-
- /* Initialize objects used by dict stats gathering thread, which
- can also be used by recovery if it tries to drop some table */
- if (!srv_read_only_mode) {
- dict_stats_thread_init();
- }
-
- trx_sys_file_format_init();
-
- trx_sys_create();
-
- if (create_new_db) {
- ut_a(!srv_read_only_mode);
- init_log_online();
-
- mtr_start(&mtr);
-
- fsp_header_init(0, sum_of_new_sizes, &mtr);
- compile_time_assert(TRX_SYS_SPACE == 0);
- compile_time_assert(IBUF_SPACE_ID == 0);
-
- ulint ibuf_root = btr_create(
- DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,
- 0, 0, DICT_IBUF_ID_MIN,
- dict_ind_redundant, &mtr);
-
- mtr_commit(&mtr);
-
- if (ibuf_root == FIL_NULL) {
- return(srv_init_abort(true, __FILE__, __LINE__,
- DB_ERROR));
- }
-
- ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO);
-
- /* To maintain backward compatibility we create only
- the first rollback segment before the double write buffer.
- All the remaining rollback segments will be created later,
- after the double write buffer has been created. */
- trx_sys_create_sys_pages();
-
- ib_bh = trx_sys_init_at_db_start();
- n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- /* The purge system needs to create the purge view and
- therefore requires that the trx_sys is inited. */
-
- trx_purge_sys_create(srv_n_purge_threads, ib_bh);
-
- err = dict_create();
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
- ut_a(success);
-
- flushed_lsn = log_get_lsn();
-
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- /* Stamp the LSN to the data files. */
- err = fil_write_flushed_lsn(flushed_lsn);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- err = create_log_files_rename(logfilename, dirnamelen,
- flushed_lsn, logfile0);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- } else {
-
- /* Check if we support the max format that is stamped
- on the system tablespace.
- Note: We are NOT allowed to make any modifications to
- the TRX_SYS_PAGE_NO page before recovery because this
- page also contains the max_trx_id etc. important system
- variables that are required for recovery. We need to
- ensure that we return the system to a state where normal
- recovery is guaranteed to work. We do this by
- invalidating the buffer cache, this will force the
- reread of the page and restoration to its last known
- consistent state, this is REQUIRED for the recovery
- process to work. */
- err = trx_sys_file_format_max_check(
- srv_max_file_format_at_startup);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Invalidate the buffer pool to ensure that we reread
- the page that we read above, during recovery.
- Note that this is not as heavy weight as it seems. At
- this point there will be only ONE page in the buf_LRU
- and there must be no page in the buf_flush list. */
- buf_pool_invalidate();
-
- /* Start monitor thread early enough so that e.g. crash
- recovery failing to find free pages in the buffer pool is
- diagnosed. */
- if (!srv_read_only_mode)
- {
- /* Create the thread which prints InnoDB monitor
- info */
- srv_monitor_active = true;
- thread_handles[4 + SRV_MAX_N_IO_THREADS] =
- os_thread_create(
- srv_monitor_thread,
- NULL,
- thread_ids + 4 + SRV_MAX_N_IO_THREADS);
-
- thread_started[4 + SRV_MAX_N_IO_THREADS] = true;
- }
-
- /* We always try to do a recovery, even if the database had
- been shut down normally: this is the normal startup path */
-
- err = recv_recovery_from_checkpoint_start(
- LOG_CHECKPOINT, LSN_MAX,
- flushed_lsn);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- init_log_online();
-
- /* Initialize the change buffer. */
- err = dict_boot();
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* This must precede recv_apply_hashed_log_recs(true). */
- ib_bh = trx_sys_init_at_db_start();
-
- if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
- /* Apply the hashed log records to the
- respective file pages, for the last batch of
- recv_group_scan_log_recs(). */
-
- recv_apply_hashed_log_recs(true);
-
- if (recv_sys->found_corrupt_log) {
- return (DB_CORRUPTION);
- }
-
- DBUG_PRINT("ib_log", ("apply completed"));
- }
-
- if (!srv_read_only_mode) {
- const ulint flags = FSP_FLAGS_PAGE_SSIZE();
- for (ulint id = 0; id <= srv_undo_tablespaces; id++) {
- if (fil_space_get(id)) {
- fsp_flags_try_adjust(id, flags);
- }
- }
-
- if (sum_of_new_sizes > 0) {
- /* New data file(s) were added */
- mtr_start(&mtr);
- fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
- mtr_commit(&mtr);
- /* Immediately write the log record about
- increased tablespace size to disk, so that it
- is durable even if mysqld would crash
- quickly */
- log_buffer_flush_to_disk();
- }
- }
-
- const ulint tablespace_size_in_header
- = fsp_header_get_tablespace_size();
-
-#ifdef UNIV_DEBUG
- /* buf_debug_prints = TRUE; */
-#endif /* UNIV_DEBUG */
- ulint sum_of_data_file_sizes = 0;
-
- for (ulint d = 0; d < srv_n_data_files; d++) {
- sum_of_data_file_sizes += srv_data_file_sizes[d];
- }
-
- /* Compare the system tablespace file size to what is
- stored in FSP_SIZE. In open_or_create_data_files()
- we already checked that the file sizes match the
- innodb_data_file_path specification. */
- if (srv_read_only_mode
- || sum_of_data_file_sizes == tablespace_size_in_header) {
- /* Do not complain about the size. */
- } else if (!srv_auto_extend_last_data_file
- || sum_of_data_file_sizes
- < tablespace_size_in_header) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Tablespace size stored in header is " ULINTPF
- " pages, but the sum of data file sizes is "
- ULINTPF " pages",
- tablespace_size_in_header,
- sum_of_data_file_sizes);
-
- if (srv_force_recovery == 0
- && sum_of_data_file_sizes
- < tablespace_size_in_header) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot start InnoDB. The tail of"
- " the system tablespace is"
- " missing. Have you edited"
- " innodb_data_file_path in my.cnf"
- " in an inappropriate way, removing"
- " data files from there?"
- " You can set innodb_force_recovery=1"
- " in my.cnf to force"
- " a startup if you are trying to"
- " recover a badly corrupt database.");
-
- return(DB_ERROR);
- }
- }
-
- n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- /* The purge system needs to create the purge view and
- therefore requires that the trx_sys is inited. */
-
- trx_purge_sys_create(srv_n_purge_threads, ib_bh);
-
- /* recv_recovery_from_checkpoint_finish needs trx lists which
- are initialized in trx_sys_init_at_db_start(). */
-
- recv_recovery_from_checkpoint_finish();
-
- if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
- /* The following call is necessary for the insert
- buffer to work with multiple tablespaces. We must
- know the mapping between space id's and .ibd file
- names.
-
- In a crash recovery, we check that the info in data
- dictionary is consistent with what we already know
- about space id's from the call of
- fil_load_single_table_tablespaces().
-
- In a normal startup, we create the space objects for
- every table in the InnoDB data dictionary that has
- an .ibd file.
-
- We also determine the maximum tablespace id used. */
- dict_check_t dict_check;
-
- if (recv_needed_recovery) {
- dict_check = DICT_CHECK_ALL_LOADED;
- } else if (n_recovered_trx) {
- dict_check = DICT_CHECK_SOME_LOADED;
- } else {
- dict_check = DICT_CHECK_NONE_LOADED;
- }
-
- /* Create the SYS_TABLESPACES and SYS_DATAFILES system table */
- err = dict_create_or_check_sys_tablespace();
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- sys_datafiles_created = true;
-
- /* This function assumes that SYS_DATAFILES exists */
- dict_check_tablespaces_and_store_max_id(dict_check);
- }
-
- if (IS_XTRABACKUP()
- && !srv_backup_mode
- && srv_read_only_mode
- && srv_log_file_size_requested != srv_log_file_size) {
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Log files size mismatch, ignored in readonly mode");
- srv_log_file_size_requested = srv_log_file_size;
- }
-
-
- if (!srv_force_recovery
- && !recv_sys->found_corrupt_log
- && (srv_log_file_size_requested != srv_log_file_size
- || srv_n_log_files_found != srv_n_log_files)) {
- /* Prepare to replace the redo log files. */
-
- if (srv_read_only_mode) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Cannot resize log files "
- "in read-only mode.");
- return(DB_READ_ONLY);
- }
-
- /* Clean the buffer pool. */
- bool success = buf_flush_list(
- ULINT_MAX, LSN_MAX, NULL);
- ut_a(success);
-
- DBUG_EXECUTE_IF("innodb_log_abort_1",
- return(DB_ERROR););
-
- flushed_lsn = log_get_lsn();
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Resizing redo log from %u*%u to %u*%u pages"
- ", LSN=" LSN_PF,
- (unsigned) i,
- (unsigned) srv_log_file_size,
- (unsigned) srv_n_log_files,
- (unsigned) srv_log_file_size_requested,
- flushed_lsn);
-
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- /* Flush the old log files. */
- log_buffer_flush_to_disk();
- /* If innodb_flush_method=O_DSYNC,
- we need to explicitly flush the log buffers. */
- fil_flush(SRV_LOG_SPACE_FIRST_ID);
-
- ut_ad(flushed_lsn == log_get_lsn());
-
- /* Prohibit redo log writes from any other
- threads until creating a log checkpoint at the
- end of create_log_files(). */
- ut_d(recv_no_log_write = TRUE);
- ut_ad(!buf_pool_check_no_pending_io());
-
- DBUG_EXECUTE_IF("innodb_log_abort_3",
- return(DB_ERROR););
-
- /* Stamp the LSN to the data files. */
- err = fil_write_flushed_lsn(flushed_lsn);
-
- DBUG_EXECUTE_IF("innodb_log_abort_4", err = DB_ERROR;);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Close and free the redo log files, so that
- we can replace them. */
- fil_close_log_files(true);
-
- DBUG_EXECUTE_IF("innodb_log_abort_5",
- return(DB_ERROR););
-
- /* Free the old log file space. */
- log_group_close_all();
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Starting to delete and rewrite log files.");
-
- srv_log_file_size = srv_log_file_size_requested;
-
- err = create_log_files(create_new_db, logfilename,
- dirnamelen, flushed_lsn,
- logfile0);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- err = create_log_files_rename(logfilename, dirnamelen,
- log_get_lsn(), logfile0);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- recv_recovery_rollback_active();
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- /* It is possible that file_format tag has never
- been set. In this case we initialize it to minimum
- value. Important to note that we can do it ONLY after
- we have finished the recovery process so that the
- image of TRX_SYS_PAGE_NO is not stale. */
- trx_sys_file_format_tag_init();
- }
-
- ut_ad(err == DB_SUCCESS);
- ut_a(sum_of_new_sizes != ULINT_UNDEFINED);
-
-#ifdef UNIV_LOG_ARCHIVE
- if (!srv_read_only_mode) {
- if (!srv_log_archive_on) {
- ut_a(DB_SUCCESS == log_archive_noarchivelog());
- } else {
- bool start_archive;
-
- mutex_enter(&(log_sys->mutex));
-
- start_archive = false;
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- start_archive = true;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (start_archive) {
- ut_a(DB_SUCCESS == log_archive_archivelog());
- }
- }
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* fprintf(stderr, "Max allowed record size %lu\n",
- page_get_free_space_of_empty() / 2); */
-
- if (!buf_dblwr_create()) {
- return(srv_init_abort(create_new_db, __FILE__, __LINE__,
- DB_ERROR));
- }
-
- /* Here the double write buffer has already been created and so
- any new rollback segments will be allocated after the double
- write buffer. The default segment should already exist.
- We create the new segments only if it's a new database or
- the database was shutdown cleanly. */
-
- /* Note: When creating the extra rollback segments during an upgrade
- we violate the latching order, even if the change buffer is empty.
- We make an exception in sync0sync.cc and check srv_is_being_started
- for that violation. It cannot create a deadlock because we are still
- running in single threaded mode essentially. Only the IO threads
- should be running at this stage. */
-
- ut_a(srv_undo_logs > 0);
- ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
-
- /* The number of rsegs that exist in InnoDB is given by status
- variable srv_available_undo_logs. The number of rsegs to use can
- be set using the dynamic global variable srv_undo_logs. */
-
- srv_available_undo_logs = trx_sys_create_rsegs(
- srv_undo_tablespaces, srv_undo_logs);
-
- if (srv_available_undo_logs == ULINT_UNDEFINED) {
- /* Can only happen if server is read only. */
- ut_a(srv_read_only_mode);
- srv_undo_logs = ULONG_UNDEFINED;
- } else if (srv_available_undo_logs < srv_undo_logs) {
- /* Should due to out of file space. */
- return (srv_init_abort(create_new_db, __FILE__, __LINE__, DB_ERROR));
- }
-
- if (!srv_read_only_mode) {
- /* Create the thread which watches the timeouts
- for lock waits */
- thread_handles[2 + SRV_MAX_N_IO_THREADS] = os_thread_create(
- lock_wait_timeout_thread,
- NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
- thread_started[2 + SRV_MAX_N_IO_THREADS] = true;
- lock_sys->timeout_thread_active = true;
-
- /* Create the thread which warns of long semaphore waits */
- srv_error_monitor_active = true;
- thread_handles[3 + SRV_MAX_N_IO_THREADS] = os_thread_create(
- srv_error_monitor_thread,
- NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
- thread_started[3 + SRV_MAX_N_IO_THREADS] = true;
-
- /* Create the thread which prints InnoDB monitor info */
- if (!thread_started[4 + SRV_MAX_N_IO_THREADS]) {
- /* srv_monitor_thread not yet started */
- srv_monitor_active = true;
- thread_handles[4 + SRV_MAX_N_IO_THREADS] = os_thread_create(
- srv_monitor_thread,
- NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
- thread_started[4 + SRV_MAX_N_IO_THREADS] = true;
- }
- }
-
- /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
- err = dict_create_or_check_foreign_constraint_tables();
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Create the SYS_TABLESPACES and SYS_DATAFILES system tables if we
- have not done that already on crash recovery. */
- if (sys_datafiles_created == false) {
- err = dict_create_or_check_sys_tablespace();
- if (err != DB_SUCCESS) {
- return(err);
- }
- }
-
- srv_is_being_started = FALSE;
-
- ut_a(trx_purge_state() == PURGE_STATE_INIT);
-
- /* Create the master thread which does purge and other utility
- operations */
-
- if (!srv_read_only_mode) {
-
- thread_handles[1 + SRV_MAX_N_IO_THREADS] = os_thread_create(
- srv_master_thread,
- NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
- thread_started[1 + SRV_MAX_N_IO_THREADS] = true;
-
- srv_undo_sources = true;
- /* Create the dict stats gathering thread */
- srv_dict_stats_thread_active = true;
- dict_stats_thread_handle = os_thread_create(
- dict_stats_thread, NULL, NULL);
- dict_stats_thread_started = true;
-
- /* Create the thread that will optimize the FTS sub-system. */
- fts_optimize_init();
- }
-
- if (!srv_read_only_mode
- && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
-
- thread_handles[6 + SRV_MAX_N_IO_THREADS] = os_thread_create(
- srv_purge_coordinator_thread,
- NULL, thread_ids + 6 + SRV_MAX_N_IO_THREADS);
-
- thread_started[6 + SRV_MAX_N_IO_THREADS] = true;
-
- ut_a(UT_ARR_SIZE(thread_ids)
- > 6 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
-
- /* We've already created the purge coordinator thread above. */
- for (i = 1; i < srv_n_purge_threads; ++i) {
- thread_handles[6 + i + SRV_MAX_N_IO_THREADS] = os_thread_create(
- srv_worker_thread, NULL,
- thread_ids + 6 + i + SRV_MAX_N_IO_THREADS);
- thread_started[6 + i + SRV_MAX_N_IO_THREADS] = true;
- }
-
- srv_start_wait_for_purge_to_start();
-
- } else {
- purge_sys->state = PURGE_STATE_DISABLED;
- }
-
- if (!srv_read_only_mode) {
-
- if (srv_use_mtflush) {
- /* Start multi-threaded flush threads */
- mtflush_ctx = buf_mtflu_handler_init(
- srv_mtflush_threads,
- srv_buf_pool_instances);
-
- /* Set up the thread ids */
- buf_mtflu_set_thread_ids(
- srv_mtflush_threads,
- mtflush_ctx,
- (thread_ids + 6 + SRV_MAX_N_PURGE_THREADS));
- }
-
-
- buf_page_cleaner_is_active = true;
- buf_flush_page_cleaner_thread_handle = os_thread_create(
- buf_flush_page_cleaner_thread, NULL, NULL);
- buf_flush_page_cleaner_thread_started = true;
-
- buf_lru_manager_is_active = true;
- buf_flush_lru_manager_thread_handle = os_thread_create(
- buf_flush_lru_manager_thread, NULL, NULL);
- buf_flush_lru_manager_thread_started = true;
- }
-
- if (!srv_file_per_table && srv_pass_corrupt_table) {
- fprintf(stderr, "InnoDB: Warning:"
- " The option innodb_file_per_table is disabled,"
- " so using the option innodb_pass_corrupt_table doesn't make sense.\n");
- }
-
- if (srv_print_verbose_log) {
- ib_logf(IB_LOG_LEVEL_INFO,
- " Percona XtraDB (http://www.percona.com) %s started; "
- "log sequence number " LSN_PF "",
- INNODB_VERSION_STR, srv_start_lsn);
- }
-
- if (srv_force_recovery > 0) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "!!! innodb_force_recovery is set to %lu !!!",
- (ulong) srv_force_recovery);
- }
-
- if (!srv_read_only_mode) {
- /*
- Create a checkpoint before logging anything new, so that
- the current encryption key in use is definitely logged
- before any log blocks encrypted with that key.
- */
- log_make_checkpoint_at(LSN_MAX, TRUE);
- }
-
- if (srv_force_recovery == 0) {
- /* In the insert buffer we may have even bigger tablespace
- id's, because we may have dropped those tablespaces, but
- insert buffer merge has not had time to clean the records from
- the ibuf tree. */
-
- ibuf_update_max_tablespace_id();
- }
-
- if (!srv_read_only_mode) {
-#ifdef WITH_WSREP
- /*
- Create the dump/load thread only when not running with
- --wsrep-recover.
- */
- if (!wsrep_recovery) {
-#endif /* WITH_WSREP */
- /* Create the buffer pool dump/load thread */
- srv_buf_dump_thread_active = true;
- buf_dump_thread_handle=
- os_thread_create(buf_dump_thread, NULL, NULL);
-
- buf_dump_thread_started = true;
-#ifdef WITH_WSREP
- } else {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Skipping buffer pool dump/restore during "
- "wsrep recovery.");
- }
-#endif /* WITH_WSREP */
-
- /* Create thread(s) that handles key rotation */
- fil_system_enter();
- fil_crypt_threads_init();
- fil_system_exit();
-
- /* Init data for datafile scrub threads */
- btr_scrub_init();
-
- /* Initialize online defragmentation. */
- btr_defragment_init();
- btr_defragment_thread_active = true;
- os_thread_create(btr_defragment_thread, NULL, NULL);
- }
-
- srv_was_started = TRUE;
-
- return(DB_SUCCESS);
-}
-
-#if 0
-/********************************************************************
-Sync all FTS cache before shutdown */
-static
-void
-srv_fts_close(void)
-/*===============*/
-{
- dict_table_t* table;
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
- table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
- fts_t* fts = table->fts;
-
- if (fts != NULL) {
- fts_sync_table(table);
- }
- }
-
- for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
- table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
- fts_t* fts = table->fts;
-
- if (fts != NULL) {
- fts_sync_table(table);
- }
- }
-}
-#endif
-
-/** Shut down InnoDB. */
-UNIV_INTERN
-void
-innodb_shutdown()
-{
- ulint i;
-
- if (!srv_was_started) {
- if (srv_is_being_started) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Shutting down an improperly started, "
- "or created database!");
- }
- }
-
- if (srv_undo_sources) {
- ut_ad(!srv_read_only_mode);
- /* Shutdown the FTS optimize sub system. */
- fts_optimize_start_shutdown();
-
- fts_optimize_end();
- dict_stats_shutdown();
- while (row_get_background_drop_list_len_low()) {
- srv_wake_master_thread();
- os_thread_yield();
- }
- srv_undo_sources = false;
- }
-
- /* 1. Flush the buffer pool to disk, write the current lsn to
- the tablespace header(s), and copy all log data to archive.
- The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
- just free data structures after the shutdown. */
-
- logs_empty_and_mark_files_at_shutdown();
-
- if (srv_conc_get_active_threads() != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Query counter shows %ld queries still "
- "inside InnoDB at shutdown",
- srv_conc_get_active_threads());
- }
-
- /* 2. Make all threads created by InnoDB to exit */
-
- srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
-
- /* All threads end up waiting for certain events. Put those events
- to the signaled state. Then the threads will exit themselves after
- os_event_wait(). */
-
- for (i = 0; i < 1000; i++) {
- /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
- HERE OR EARLIER */
-
- if (!srv_read_only_mode) {
- /* a. Let the lock timeout thread exit */
- os_event_set(lock_sys->timeout_event);
-
- /* b. srv error monitor thread exits automatically,
- no need to do anything here */
-
- /* c. We wake the master thread so that it exits */
- srv_wake_master_thread();
-
- /* d. Wakeup purge threads. */
- srv_purge_wakeup();
- }
-
- /* e. Exit the i/o threads */
-
- os_aio_wake_all_threads_at_shutdown();
-
- /* f. dict_stats_thread is signaled from
- logs_empty_and_mark_files_at_shutdown() and should have
- already quit or is quitting right now. */
-
- if (srv_use_mtflush) {
- /* g. Exit the multi threaded flush threads */
-
- buf_mtflu_io_thread_exit();
- }
-
- os_rmb;
- if (os_thread_count == 0) {
- /* All the threads have exited or are just exiting;
- NOTE that the threads may not have completed their
- exit yet. Should we use pthread_join() to make sure
- they have exited? If we did, we would have to
- remove the pthread_detach() from
- os_thread_exit(). Now we just sleep 0.1
- seconds and hope that is enough! */
-
- os_thread_sleep(100000);
-
- break;
- }
-
- os_thread_sleep(100000);
- }
-
- if (i == 1000) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "%lu threads created by InnoDB"
- " had not exited at shutdown!",
- (ulong) os_thread_count);
- }
-
- if (srv_monitor_file) {
- fclose(srv_monitor_file);
- srv_monitor_file = 0;
- if (srv_monitor_file_name) {
- unlink(srv_monitor_file_name);
- mem_free(srv_monitor_file_name);
- }
- }
-
- if (srv_dict_tmpfile) {
- fclose(srv_dict_tmpfile);
- srv_dict_tmpfile = 0;
- }
-
- if (srv_misc_tmpfile) {
- fclose(srv_misc_tmpfile);
- srv_misc_tmpfile = 0;
- }
-
- if (!srv_read_only_mode) {
- dict_stats_thread_deinit();
- fil_crypt_threads_cleanup();
- btr_scrub_cleanup();
- btr_defragment_shutdown();
- }
-
-#ifdef __WIN__
- /* MDEV-361: ha_innodb.dll leaks handles on Windows
- MDEV-7403: should not pass recv_writer_thread_handle to
- CloseHandle().
-
- On Windows we should call CloseHandle() for all
- open thread handles. */
- if (os_thread_count == 0) {
- for (i = 0; i < SRV_MAX_N_IO_THREADS + 6 + 32; ++i) {
- if (thread_started[i]) {
- CloseHandle(thread_handles[i]);
- }
- }
-
- if (buf_flush_page_cleaner_thread_started) {
- CloseHandle(buf_flush_page_cleaner_thread_handle);
- }
-
- if (buf_dump_thread_started) {
- CloseHandle(buf_dump_thread_handle);
- }
-
- if (dict_stats_thread_started) {
- CloseHandle(dict_stats_thread_handle);
- }
-
- if (buf_flush_lru_manager_thread_started) {
- CloseHandle(buf_flush_lru_manager_thread_handle);
- }
-
- if (srv_redo_log_follow_thread_started) {
- CloseHandle(srv_redo_log_follow_thread_handle);
- }
- }
-#endif /* __WIN __ */
-
- /* This must be disabled before closing the buffer pool
- and closing the data dictionary. */
- btr_search_disable();
-
- ibuf_close();
- log_online_shutdown();
- log_shutdown();
- trx_sys_file_format_close();
- trx_sys_close();
- lock_sys_close();
-
- /* We don't create these mutexes in RO mode because we don't create
- the temp files that the cover. */
- if (!srv_read_only_mode) {
- mutex_free(&srv_monitor_file_mutex);
- mutex_free(&srv_dict_tmpfile_mutex);
- mutex_free(&srv_misc_tmpfile_mutex);
- }
-
- dict_close();
- btr_search_sys_free();
-
- /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
- them */
- os_aio_free();
- que_close();
- row_mysql_close();
- srv_mon_free();
- srv_free();
- fil_close();
-
- /* 4. Free all allocated memory */
-
- pars_lexer_close();
- log_mem_free();
- buf_pool_free(srv_buf_pool_instances);
- mem_close();
- sync_close();
-
- /* ut_free_all_mem() frees all allocated memory not freed yet
- in shutdown, and it will also free the ut_list_mutex, so it
- should be the last one for all operation */
- ut_free_all_mem();
-
- os_rmb;
- if (os_thread_count != 0
- || os_event_count != 0
- || os_mutex_count != 0
- || os_fast_mutex_count != 0) {
- ib_logf(IB_LOG_LEVEL_WARN,
- "Some resources were not cleaned up in shutdown: "
- "threads %lu, events %lu, os_mutexes %lu, "
- "os_fast_mutexes %lu",
- (ulong) os_thread_count, (ulong) os_event_count,
- (ulong) os_mutex_count, (ulong) os_fast_mutex_count);
- }
-
- if (dict_foreign_err_file) {
- fclose(dict_foreign_err_file);
- }
-
- if (srv_print_verbose_log) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Shutdown completed; log sequence number " LSN_PF "",
- srv_shutdown_lsn);
- }
-
- srv_was_started = FALSE;
- srv_start_has_been_called = FALSE;
- /* reset io_tid_i, in case current process does second innodb start (xtrabackup might do that).*/
- io_tid_i = 0;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*****************************************************************//**
-Get the meta-data filename from the table name. */
-UNIV_INTERN
-void
-srv_get_meta_data_filename(
-/*=======================*/
- dict_table_t* table, /*!< in: table */
- char* filename, /*!< out: filename */
- ulint max_len) /*!< in: filename max length */
-{
- ulint len;
- char* path;
- char* suffix;
- static const ulint suffix_len = strlen(".cfg");
-
- if (DICT_TF_HAS_DATA_DIR(table->flags)) {
- dict_get_and_save_data_dir_path(table, false);
- ut_a(table->data_dir_path);
-
- path = os_file_make_remote_pathname(
- table->data_dir_path, table->name, "cfg");
- } else {
- path = fil_make_ibd_name(table->name, false);
- }
-
- ut_a(path);
- len = ut_strlen(path);
- ut_a(max_len >= len);
-
- suffix = path + (len - suffix_len);
- if (strncmp(suffix, ".cfg", suffix_len) == 0) {
- strcpy(filename, path);
- } else {
- ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
-
- strncpy(filename, path, len - suffix_len);
- suffix = filename + (len - suffix_len);
- strcpy(suffix, ".cfg");
- }
-
- mem_free(path);
-
- srv_normalize_path_for_win(filename);
-}
diff --git a/storage/xtradb/sync/sync0arr.cc b/storage/xtradb/sync/sync0arr.cc
deleted file mode 100644
index 134d16ae58e..00000000000
--- a/storage/xtradb/sync/sync0arr.cc
+++ /dev/null
@@ -1,1564 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file sync/sync0arr.cc
-The wait array used in synchronization primitives
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "univ.i"
-
-#include "sync0arr.h"
-#ifdef UNIV_NONINL
-#include "sync0arr.ic"
-#endif
-
-#include <mysqld_error.h>
-#include <mysql/plugin.h>
-#include <hash.h>
-#include <myisampack.h>
-#include <sql_acl.h>
-#include <mysys_err.h>
-#include <my_sys.h>
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "i_s.h"
-#include <sql_plugin.h>
-#include <innodb_priv.h>
-
-#include "sync0sync.h"
-#include "sync0rw.h"
-#include "os0sync.h"
-#include "os0file.h"
-#include "lock0lock.h"
-#include "srv0srv.h"
-#include "ha_prototypes.h"
-
-/*
- WAIT ARRAY
- ==========
-
-The wait array consists of cells each of which has an
-an operating system event object created for it. The threads
-waiting for a mutex, for example, can reserve a cell
-in the array and suspend themselves to wait for the event
-to become signaled. When using the wait array, remember to make
-sure that some thread holding the synchronization object
-will eventually know that there is a waiter in the array and
-signal the object, to prevent infinite wait.
-Why we chose to implement a wait array? First, to make
-mutexes fast, we had to code our own implementation of them,
-which only in usually uncommon cases resorts to using
-slow operating system primitives. Then we had the choice of
-assigning a unique OS event for each mutex, which would
-be simpler, or using a global wait array. In some operating systems,
-the global wait array solution is more efficient and flexible,
-because we can do with a very small number of OS events,
-say 200. In NT 3.51, allocating events seems to be a quadratic
-algorithm, because 10 000 events are created fast, but
-100 000 events takes a couple of minutes to create.
-
-As of 5.0.30 the above mentioned design is changed. Since now
-OS can handle millions of wait events efficiently, we no longer
-have this concept of each cell of wait array having one event.
-Instead, now the event that a thread wants to wait on is embedded
-in the wait object (mutex or rw_lock). We still keep the global
-wait array for the sake of diagnostics and also to avoid infinite
-wait The error_monitor thread scans the global wait array to signal
-any waiting threads who have missed the signal. */
-
-/** A cell where an individual thread may wait suspended
-until a resource is released. The suspending is implemented
-using an operating system event semaphore. */
-struct sync_cell_t {
- void* wait_object; /*!< pointer to the object the
- thread is waiting for; if NULL
- the cell is free for use */
- void* old_wait_mutex; /*!< the latest regular or priority
- wait mutex in cell */
- void* old_wait_rw_lock;
- /*!< the latest regular or priority
- wait rw-lock in cell */
- ulint request_type; /*!< lock type requested on the
- object */
- const char* file; /*!< in debug version file where
- requested */
- ulint line; /*!< in debug version line where
- requested */
- os_thread_id_t thread; /*!< thread id of this waiting
- thread */
- ibool waiting; /*!< TRUE if the thread has already
- called sync_array_event_wait
- on this cell */
- ib_int64_t signal_count; /*!< We capture the signal_count
- of the wait_object when we
- reset the event. This value is
- then passed on to os_event_wait
- and we wait only if the event
- has not been signalled in the
- period between the reset and
- wait call. */
- time_t reservation_time;/*!< time when the thread reserved
- the wait cell */
-};
-
-/* NOTE: It is allowed for a thread to wait
-for an event allocated for the array without owning the
-protecting mutex (depending on the case: OS or database mutex), but
-all changes (set or reset) to the state of the event must be made
-while owning the mutex. */
-/** Synchronization array */
-struct sync_array_t {
- ulint n_reserved; /*!< number of currently reserved
- cells in the wait array */
- ulint n_cells; /*!< number of cells in the
- wait array */
- sync_cell_t* array; /*!< pointer to wait array */
- ib_mutex_t mutex; /*!< possible database mutex
- protecting this data structure */
- os_ib_mutex_t os_mutex; /*!< Possible operating system mutex
- protecting the data structure.
- As this data structure is used in
- constructing the database mutex,
- to prevent infinite recursion
- in implementation, we fall back to
- an OS mutex. */
- ulint res_count; /*!< count of cell reservations
- since creation of the array */
-};
-
-/** User configured sync array size */
-UNIV_INTERN ulong srv_sync_array_size = 32;
-
-/** Locally stored copy of srv_sync_array_size */
-static ulint sync_array_size;
-
-/** The global array of wait cells for implementation of the database's own
-mutexes and read-write locks */
-static sync_array_t** sync_wait_array;
-
-/** count of how many times an object has been signalled */
-static ulint sg_count;
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores.
-@return TRUE if deadlock detected */
-static
-ibool
-sync_array_detect_deadlock(
-/*=======================*/
- sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /*!< in: cell where recursive search started */
- sync_cell_t* cell, /*!< in: cell to search */
- ulint depth); /*!< in: recursion depth */
-#endif /* UNIV_SYNC_DEBUG */
-
-/*****************************************************************//**
-Gets the nth cell in array.
-@return cell */
-sync_cell_t*
-sync_array_get_nth_cell(
-/*====================*/
- sync_array_t* arr, /*!< in: sync array */
- ulint n) /*!< in: index */
-{
- ut_a(arr);
- ut_a(n < arr->n_cells);
-
- return(arr->array + n);
-}
-
-/******************************************************************//**
-Looks for a cell with the given thread id.
-@return pointer to cell or NULL if not found */
-static
-sync_cell_t*
-sync_array_find_thread(
-/*===================*/
- sync_array_t* arr, /*!< in: wait array */
- os_thread_id_t thread) /*!< in: thread id */
-{
- ulint i;
- sync_cell_t* cell;
-
- for (i = 0; i < arr->n_cells; i++) {
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL
- && os_thread_eq(cell->thread, thread)) {
-
- return(cell); /* Found */
- }
- }
-
- return(NULL); /* Not found */
-}
-
-/******************************************************************//**
-Reserves the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_enter(
-/*=============*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- os_mutex_enter(arr->os_mutex);
-}
-
-/******************************************************************//**
-Releases the mutex semaphore protecting a sync array. */
-static
-void
-sync_array_exit(
-/*============*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- os_mutex_exit(arr->os_mutex);
-}
-
-/*******************************************************************//**
-Creates a synchronization wait array. It is protected by a mutex
-which is automatically reserved when the functions operating on it
-are called.
-@return own: created wait array */
-static
-sync_array_t*
-sync_array_create(
-/*==============*/
- ulint n_cells) /*!< in: number of cells in the array
- to create */
-{
- ulint sz;
- sync_array_t* arr;
-
- ut_a(n_cells > 0);
-
- /* Allocate memory for the data structures */
- arr = static_cast<sync_array_t*>(ut_malloc(sizeof(*arr)));
- memset(arr, 0x0, sizeof(*arr));
-
- sz = sizeof(sync_cell_t) * n_cells;
- arr->array = static_cast<sync_cell_t*>(ut_malloc(sz));
- memset(arr->array, 0x0, sz);
-
- arr->n_cells = n_cells;
-
- /* Then create the mutex to protect the wait array complex */
- arr->os_mutex = os_mutex_create();
-
- return(arr);
-}
-
-/******************************************************************//**
-Frees the resources in a wait array. */
-static
-void
-sync_array_free(
-/*============*/
- sync_array_t* arr) /*!< in, own: sync wait array */
-{
- ut_a(arr->n_reserved == 0);
-
- sync_array_validate(arr);
-
- /* Release the mutex protecting the wait array complex */
-
- os_mutex_free(arr->os_mutex);
-
- ut_free(arr->array);
- ut_free(arr);
-}
-
-/********************************************************************//**
-Validates the integrity of the wait array. Checks
-that the number of reserved cells equals the count variable. */
-UNIV_INTERN
-void
-sync_array_validate(
-/*================*/
- sync_array_t* arr) /*!< in: sync wait array */
-{
- ulint i;
- sync_cell_t* cell;
- ulint count = 0;
-
- sync_array_enter(arr);
-
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
- if (cell->wait_object != NULL) {
- count++;
- }
- }
-
- ut_a(count == arr->n_reserved);
-
- sync_array_exit(arr);
-}
-
-/*******************************************************************//**
-Returns the event that the thread owning the cell waits for. */
-static
-os_event_t
-sync_cell_get_event(
-/*================*/
- sync_cell_t* cell) /*!< in: non-empty sync array cell */
-{
- ulint type = cell->request_type;
-
- if (type == SYNC_MUTEX) {
- return(&((ib_mutex_t*) cell->wait_object)->event);
- } else if (type == SYNC_PRIO_MUTEX) {
- return(&((ib_prio_mutex_t*) cell->wait_object)
- ->high_priority_event);
- } else if (type == RW_LOCK_WAIT_EX) {
- return(&((rw_lock_t*) cell->wait_object)->wait_ex_event);
- } else if (type == PRIO_RW_LOCK_SHARED) {
- return(&((prio_rw_lock_t *) cell->wait_object)
- ->high_priority_s_event);
- } else if (type == PRIO_RW_LOCK_EX) {
- return(&((prio_rw_lock_t *) cell->wait_object)
- ->high_priority_x_event);
- } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
- ut_ad(type == RW_LOCK_SHARED || type == RW_LOCK_EX);
- return(&((rw_lock_t*) cell->wait_object)->event);
- }
-}
-
-/******************************************************************//**
-Reserves a wait array cell for waiting for an object.
-The event of the cell is reset to nonsignalled state.
-@return true if free cell is found, otherwise false */
-UNIV_INTERN
-bool
-sync_array_reserve_cell(
-/*====================*/
- sync_array_t* arr, /*!< in: wait array */
- void* object, /*!< in: pointer to the object to wait for */
- ulint type, /*!< in: lock request type */
- const char* file, /*!< in: file where requested */
- ulint line, /*!< in: line where requested */
- ulint* index) /*!< out: index of the reserved cell */
-{
- sync_cell_t* cell;
- os_event_t event;
- ulint i;
-
- ut_a(object);
- ut_a(index);
-
- sync_array_enter(arr);
-
- arr->res_count++;
-
- /* Reserve a new cell. */
- for (i = 0; i < arr->n_cells; i++) {
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object == NULL) {
-
- cell->waiting = FALSE;
- cell->wait_object = object;
-
- if (type == SYNC_MUTEX || type == SYNC_PRIO_MUTEX) {
- cell->old_wait_mutex = object;
- } else {
- cell->old_wait_rw_lock = object;
- }
-
- cell->request_type = type;
-
- cell->file = file;
- cell->line = line;
-
- arr->n_reserved++;
-
- *index = i;
-
- sync_array_exit(arr);
-
- /* Make sure the event is reset and also store
- the value of signal_count at which the event
- was reset. */
- event = sync_cell_get_event(cell);
- cell->signal_count = os_event_reset(event);
-
- cell->reservation_time = ut_time();
-
- cell->thread = os_thread_get_curr_id();
-
- return(true);
- }
- }
-
- /* No free cell found */
- return false;
-}
-
-/******************************************************************//**
-This function should be called when a thread starts to wait on
-a wait array cell. In the debug version this function checks
-if the wait for a semaphore will result in a deadlock, in which
-case prints info and asserts. */
-UNIV_INTERN
-void
-sync_array_wait_event(
-/*==================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index) /*!< in: index of the reserved cell */
-{
- sync_cell_t* cell;
- os_event_t event;
-
- ut_a(arr);
-
- sync_array_enter(arr);
-
- cell = sync_array_get_nth_cell(arr, index);
-
- ut_a(cell->wait_object);
- ut_a(!cell->waiting);
- ut_ad(os_thread_get_curr_id() == cell->thread);
-
- event = sync_cell_get_event(cell);
- cell->waiting = TRUE;
-
-#ifdef UNIV_SYNC_DEBUG
-
- /* We use simple enter to the mutex below, because if
- we cannot acquire it at once, mutex_enter would call
- recursively sync_array routines, leading to trouble.
- rw_lock_debug_mutex freezes the debug lists. */
-
- rw_lock_debug_mutex_enter();
-
- if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
-
- fputs("########################################\n", stderr);
- ut_error;
- }
-
- rw_lock_debug_mutex_exit();
-#endif
- sync_array_exit(arr);
-
- os_event_wait_low(event, cell->signal_count);
-
- sync_array_free_cell(arr, index);
-}
-
-/******************************************************************//**
-Reports info of a wait array cell. */
-static
-void
-sync_array_cell_print(
-/*==================*/
- FILE* file, /*!< in: file where to print */
- sync_cell_t* cell, /*!< in: sync cell */
- os_thread_id_t* reserver) /*!< out: write reserver or
- 0 */
-{
- ib_mutex_t* mutex;
- ib_prio_mutex_t* prio_mutex;
- rw_lock_t* rwlock;
- prio_rw_lock_t* prio_rwlock = NULL;
- ulint type;
- ulint writer;
-
- type = cell->request_type;
-
- fprintf(file,
- "--Thread %lu has waited at %s line %lu"
- " for %#.5g seconds the semaphore:\n",
- (ulong) os_thread_pf(cell->thread),
- innobase_basename(cell->file), (ulong) cell->line,
- difftime(time(NULL), cell->reservation_time));
-
-
- if (type == SYNC_MUTEX || type == SYNC_PRIO_MUTEX) {
-
- /* We use old_wait_mutex in case the cell has already
- been freed meanwhile */
- if (type == SYNC_MUTEX) {
-
- mutex = static_cast<ib_mutex_t*>(cell->old_wait_mutex);
- } else {
-
- prio_mutex = static_cast<ib_prio_mutex_t*>
- (cell->old_wait_mutex);
- mutex = &prio_mutex->base_mutex;
- }
-
-
- if (mutex) {
- fprintf(file,
- "Mutex at %p '%s', lock var %lu\n"
- "Last time reserved by thread " ULINTPF
- " in file %s line " ULINTPF ", "
- "waiters flag " ULINTPF "\n",
- (void*) mutex, mutex->cmutex_name,
- (ulong) mutex->lock_word,
- os_thread_pf(mutex->thread_id),
- mutex->file_name, mutex->line,
- mutex->waiters);
- }
-
- /* If stacktrace feature is enabled we will send a SIGUSR2
- signal to thread waiting for the semaphore. Signal handler
- will then dump the current stack to error log. */
- if (srv_use_stacktrace && cell && cell->thread) {
-#ifdef __linux__
- pthread_kill(cell->thread, SIGUSR2);
-#endif
- }
-
- if (type == SYNC_PRIO_MUTEX) {
-
- fprintf(file,
- "high-priority waiters count %lu\n",
- (ulong) prio_mutex->high_priority_waiters);
- }
-
- } else if (type == RW_LOCK_EX
- || type == RW_LOCK_WAIT_EX
- || type == RW_LOCK_SHARED
- || type == PRIO_RW_LOCK_SHARED
- || type == PRIO_RW_LOCK_EX) {
-
- fputs((type == RW_LOCK_EX || type == PRIO_RW_LOCK_EX)
- ? "X-lock on"
- : type == RW_LOCK_WAIT_EX ? "X-lock (wait_ex) on"
- : "S-lock on", file);
-
- /* Currently we are unable to tell high priority
- RW_LOCK_WAIT_EX waiter from a regular priority one. Assume
- it's a regular one. */
- if (type == RW_LOCK_EX || type == RW_LOCK_WAIT_EX
- || type == RW_LOCK_SHARED) {
-
- rwlock = static_cast<rw_lock_t *>
- (cell->old_wait_rw_lock);
- } else {
-
- prio_rwlock = static_cast<prio_rw_lock_t *>
- (cell->old_wait_rw_lock);
- rwlock = &prio_rwlock->base_lock;
- }
-
- if (rwlock) {
- fprintf(file,
- " RW-latch at %p '%s'\n",
- (void*) rwlock, rwlock->lock_name);
-
- writer = rw_lock_get_writer(rwlock);
-
- if (writer && writer != RW_LOCK_NOT_LOCKED) {
- fprintf(file,
- "a writer (thread id " ULINTPF ") has"
- " reserved it in mode %s",
- os_thread_pf(rwlock->writer_thread),
- writer == RW_LOCK_EX
- ? " exclusive\n"
- : " wait exclusive\n");
-
- *reserver = rwlock->writer_thread;
- }
-
- fprintf(file,
- "number of readers " ULINTPF
- ", waiters flag " ULINTPF ", "
- "lock_word: %lx\n"
- "Last time read locked in file %s line %u\n"
- "Last time write locked in file %s line %u\n"
- "Holder thread " ULINTPF
- " file %s line " ULINTPF "\n",
- rw_lock_get_reader_count(rwlock),
- rwlock->waiters,
- rwlock->lock_word,
- innobase_basename(rwlock->last_s_file_name),
- rwlock->last_s_line,
- innobase_basename(rwlock->last_x_file_name),
- rwlock->last_x_line,
- os_thread_pf(rwlock->thread_id),
- innobase_basename(rwlock->file_name),
- rwlock->line);
-
- /* If stacktrace feature is enabled we will send a SIGUSR2
- signal to thread that has locked RW-latch with write mode.
- Signal handler will then dump the current stack to error log. */
- if (writer != RW_LOCK_NOT_LOCKED && srv_use_stacktrace &&
- rwlock && rwlock->writer_thread) {
-#ifdef __linux__
- pthread_kill(rwlock->writer_thread, SIGUSR2);
-#endif
- }
- }
-
- if (prio_rwlock) {
- fprintf(file, "high priority S waiters count %lu, "
- "high priority X waiters count %lu, "
- "wait-exclusive waiter is "
- "high priority if exists: %lu\n",
- prio_rwlock->high_priority_s_waiters,
- prio_rwlock->high_priority_x_waiters,
- prio_rwlock->high_priority_wait_ex_waiter);
- }
- } else {
- ut_error;
- }
-
- if (!cell->waiting) {
- fputs("wait has ended\n", file);
- }
-}
-
-#ifdef UNIV_SYNC_DEBUG
-
-/******************************************************************//**
-Recursion step for deadlock detection.
-@return TRUE if deadlock detected */
-static
-ibool
-sync_array_deadlock_step(
-/*=====================*/
- sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /*!< in: cell where recursive search
- started */
- os_thread_id_t thread, /*!< in: thread to look at */
- ulint pass, /*!< in: pass value */
- ulint depth) /*!< in: recursion depth */
-{
- sync_cell_t* new_cell;
-
- if (pass != 0) {
- /* If pass != 0, then we do not know which threads are
- responsible of releasing the lock, and no deadlock can
- be detected. */
-
- return(FALSE);
- }
-
- new_cell = sync_array_find_thread(arr, thread);
-
- if (new_cell == start) {
- /* Deadlock */
- fputs("########################################\n"
- "DEADLOCK of threads detected!\n", stderr);
-
- return(TRUE);
-
- } else if (new_cell) {
- return(sync_array_detect_deadlock(
- arr, start, new_cell, depth + 1));
- }
- return(FALSE);
-}
-
-/******************************************************************//**
-This function is called only in the debug version. Detects a deadlock
-of one or more threads because of waits of semaphores.
-@return TRUE if deadlock detected */
-static
-ibool
-sync_array_detect_deadlock(
-/*=======================*/
- sync_array_t* arr, /*!< in: wait array; NOTE! the caller must
- own the mutex to array */
- sync_cell_t* start, /*!< in: cell where recursive search started */
- sync_cell_t* cell, /*!< in: cell to search */
- ulint depth) /*!< in: recursion depth */
-{
- ib_mutex_t* mutex;
- rw_lock_t* lock;
- os_thread_id_t thread;
- ibool ret;
- rw_lock_debug_t*debug;
- os_thread_id_t r = 0;
-
- ut_a(arr);
- ut_a(start);
- ut_a(cell);
- ut_ad(cell->wait_object);
- ut_ad(os_thread_get_curr_id() == start->thread);
- ut_ad(depth < 100);
-
- depth++;
-
- if (!cell->waiting) {
-
- return(FALSE); /* No deadlock here */
- }
-
- if (cell->request_type == SYNC_MUTEX
- || cell->request_type == SYNC_PRIO_MUTEX) {
-
- if (cell->request_type == SYNC_MUTEX) {
- mutex = static_cast<ib_mutex_t*>(cell->wait_object);
- } else {
- mutex = &(static_cast<ib_prio_mutex_t*>(
- cell->wait_object))->base_mutex;
- }
-
- if (mutex_get_lock_word(mutex) != 0) {
-
- thread = mutex->thread_id;
-
- /* Note that mutex->thread_id above may be
- also OS_THREAD_ID_UNDEFINED, because the
- thread which held the mutex maybe has not
- yet updated the value, or it has already
- released the mutex: in this case no deadlock
- can occur, as the wait array cannot contain
- a thread with ID_UNDEFINED value. */
-
- ret = sync_array_deadlock_step(arr, start, thread, 0,
- depth);
- if (ret) {
- fprintf(stderr,
- "Mutex %p owned by thread %lu file %s line %lu\n",
- mutex, (ulong) os_thread_pf(mutex->thread_id),
- mutex->file_name, (ulong) mutex->line);
- sync_array_cell_print(stderr, cell, &r);
-
- return(TRUE);
- }
- }
-
- return(FALSE); /* No deadlock */
-
- } else if (cell->request_type == RW_LOCK_EX
- || cell->request_type == PRIO_RW_LOCK_EX
- || cell->request_type == RW_LOCK_WAIT_EX) {
-
- lock = static_cast<rw_lock_t*>(cell->wait_object);
-
- for (debug = UT_LIST_GET_FIRST(lock->debug_list);
- debug != 0;
- debug = UT_LIST_GET_NEXT(list, debug)) {
-
- thread = debug->thread_id;
-
- if (((debug->lock_type == RW_LOCK_EX)
- && !os_thread_eq(thread, cell->thread))
- || ((debug->lock_type == RW_LOCK_WAIT_EX)
- && !os_thread_eq(thread, cell->thread))
- || (debug->lock_type == RW_LOCK_SHARED)) {
-
- /* The (wait) x-lock request can block
- infinitely only if someone (can be also cell
- thread) is holding s-lock, or someone
- (cannot be cell thread) (wait) x-lock, and
- he is blocked by start thread */
-
- ret = sync_array_deadlock_step(
- arr, start, thread, debug->pass,
- depth);
- if (ret) {
-print:
- fprintf(stderr, "rw-lock %p ",
- (void*) lock);
- sync_array_cell_print(stderr, cell, &r);
- rw_lock_debug_print(stderr, debug);
- return(TRUE);
- }
- }
- }
-
- return(FALSE);
-
- } else if (cell->request_type == RW_LOCK_SHARED
- || cell->request_type == PRIO_RW_LOCK_SHARED) {
-
- lock = static_cast<rw_lock_t*>(cell->wait_object);
-
- for (debug = UT_LIST_GET_FIRST(lock->debug_list);
- debug != 0;
- debug = UT_LIST_GET_NEXT(list, debug)) {
-
- thread = debug->thread_id;
-
- if ((debug->lock_type == RW_LOCK_EX)
- || (debug->lock_type == RW_LOCK_WAIT_EX)) {
-
- /* The s-lock request can block infinitely
- only if someone (can also be cell thread) is
- holding (wait) x-lock, and he is blocked by
- start thread */
-
- ret = sync_array_deadlock_step(
- arr, start, thread, debug->pass,
- depth);
- if (ret) {
- goto print;
- }
- }
- }
-
- return(FALSE);
-
- } else {
- ut_error;
- }
-
- return(TRUE); /* Execution never reaches this line: for compiler
- fooling only */
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Determines if we can wake up the thread waiting for a sempahore. */
-static
-ibool
-sync_arr_cell_can_wake_up(
-/*======================*/
- sync_cell_t* cell) /*!< in: cell to search */
-{
- ib_mutex_t* mutex;
- rw_lock_t* lock;
-
- if (cell->request_type == SYNC_MUTEX
- || cell->request_type == SYNC_PRIO_MUTEX) {
-
- if (cell->request_type == SYNC_MUTEX) {
- mutex = static_cast<ib_mutex_t*>(cell->wait_object);
- } else {
- mutex = &(static_cast<ib_prio_mutex_t*>(
- cell->wait_object))->base_mutex;
- }
-
- os_rmb;
- if (mutex_get_lock_word(mutex) == 0) {
-
- return(TRUE);
- }
-
- } else if (cell->request_type == RW_LOCK_EX
- || cell->request_type == PRIO_RW_LOCK_EX) {
-
- lock = static_cast<rw_lock_t*>(cell->wait_object);
-
- os_rmb;
- if (lock->lock_word > 0) {
- /* Either unlocked or only read locked. */
-
- return(TRUE);
- }
-
- } else if (cell->request_type == RW_LOCK_WAIT_EX) {
-
- lock = static_cast<rw_lock_t*>(cell->wait_object);
-
- /* lock_word == 0 means all readers have left */
- os_rmb;
- if (lock->lock_word == 0) {
-
- return(TRUE);
- }
- } else if (cell->request_type == RW_LOCK_SHARED
- || cell->request_type == PRIO_RW_LOCK_SHARED) {
- lock = static_cast<rw_lock_t*>(cell->wait_object);
-
- /* lock_word > 0 means no writer or reserved writer */
- os_rmb;
- if (lock->lock_word > 0) {
-
- return(TRUE);
- }
- } else {
-
- ut_error;
- }
-
- return(FALSE);
-}
-
-/******************************************************************//**
-Frees the cell. NOTE! sync_array_wait_event frees the cell
-automatically! */
-UNIV_INTERN
-void
-sync_array_free_cell(
-/*=================*/
- sync_array_t* arr, /*!< in: wait array */
- ulint index) /*!< in: index of the cell in array */
-{
- sync_cell_t* cell;
-
- sync_array_enter(arr);
-
- cell = sync_array_get_nth_cell(arr, index);
-
- ut_a(cell->wait_object != NULL);
-
- cell->waiting = FALSE;
- cell->wait_object = NULL;
- cell->signal_count = 0;
-
- ut_a(arr->n_reserved > 0);
- arr->n_reserved--;
-
- sync_array_exit(arr);
-}
-
-/**********************************************************************//**
-Increments the signalled count. */
-UNIV_INTERN
-void
-sync_array_object_signalled(void)
-/*=============================*/
-{
-#ifdef HAVE_ATOMIC_BUILTINS
- (void) os_atomic_increment_ulint(&sg_count, 1);
-#else
- ++sg_count;
-#endif /* HAVE_ATOMIC_BUILTINS */
-}
-
-/**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server.
-
-Note that there's a race condition between this thread and mutex_exit
-changing the lock_word and calling signal_object, so sometimes this finds
-threads to wake up even when nothing has gone wrong. */
-static
-void
-sync_array_wake_threads_if_sema_free_low(
-/*=====================================*/
- sync_array_t* arr) /* in/out: wait array */
-{
- ulint i = 0;
- ulint count;
-
- sync_array_enter(arr);
-
- for (count = 0; count < arr->n_reserved; ++i) {
- sync_cell_t* cell;
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL) {
-
- count++;
-
- if (sync_arr_cell_can_wake_up(cell)) {
- os_event_t event;
-
- event = sync_cell_get_event(cell);
-
- os_event_set(event);
- }
- }
- }
-
- sync_array_exit(arr);
-}
-
-/**********************************************************************//**
-If the wakeup algorithm does not work perfectly at semaphore relases,
-this function will do the waking (see the comment in mutex_exit). This
-function should be called about every 1 second in the server.
-
-Note that there's a race condition between this thread and mutex_exit
-changing the lock_word and calling signal_object, so sometimes this finds
-threads to wake up even when nothing has gone wrong. */
-UNIV_INTERN
-void
-sync_arr_wake_threads_if_sema_free(void)
-/*====================================*/
-{
- ulint i;
-
- for (i = 0; i < sync_array_size; ++i) {
-
- sync_array_wake_threads_if_sema_free_low(
- sync_wait_array[i]);
- }
-}
-
-/**********************************************************************//**
-Prints warnings of long semaphore waits to stderr.
-@return TRUE if fatal semaphore wait threshold was exceeded */
-static
-ibool
-sync_array_print_long_waits_low(
-/*============================*/
- sync_array_t* arr, /*!< in: sync array instance */
- os_thread_id_t* waiter, /*!< out: longest waiting thread */
- const void** sema, /*!< out: longest-waited-for semaphore */
- ibool* noticed)/*!< out: TRUE if long wait noticed */
-{
- ulint i;
- ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
- ibool fatal = FALSE;
- double longest_diff = 0;
-
- /* For huge tables, skip the check during CHECK TABLE etc... */
- if (fatal_timeout > SRV_SEMAPHORE_WAIT_EXTENSION) {
- return(FALSE);
- }
-
-#ifdef UNIV_DEBUG_VALGRIND
- /* Increase the timeouts if running under valgrind because it executes
- extremely slowly. UNIV_DEBUG_VALGRIND does not necessary mean that
- we are running under valgrind but we have no better way to tell.
- See Bug#58432 innodb.innodb_bug56143 fails under valgrind
- for an example */
-# define SYNC_ARRAY_TIMEOUT 2400
- fatal_timeout *= 10;
-#else
-# define SYNC_ARRAY_TIMEOUT 240
-#endif
-
- for (i = 0; i < arr->n_cells; i++) {
-
- double diff;
- sync_cell_t* cell;
- void* wait_object;
- os_thread_id_t reserver=0;
-
- cell = sync_array_get_nth_cell(arr, i);
-
- wait_object = cell->wait_object;
-
- if (wait_object == NULL || !cell->waiting) {
-
- continue;
- }
-
- diff = difftime(time(NULL), cell->reservation_time);
-
- if (diff > SYNC_ARRAY_TIMEOUT) {
- fputs("InnoDB: Warning: a long semaphore wait:\n",
- stderr);
- sync_array_cell_print(stderr, cell, &reserver);
- *noticed = TRUE;
- }
-
- if (diff > fatal_timeout) {
- fatal = TRUE;
- }
-
- if (diff > longest_diff) {
- longest_diff = diff;
- *sema = wait_object;
- *waiter = cell->thread;
- }
- }
-
- /* We found a long semaphore wait, wait all threads that are
- waiting for a semaphore. */
- if (*noticed) {
- for (i = 0; i < arr->n_cells; i++) {
- void* wait_object;
- sync_cell_t* cell;
- os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED;
- ulint loop=0;
-
- cell = sync_array_get_nth_cell(arr, i);
-
- wait_object = cell->wait_object;
-
- if (wait_object == NULL || !cell->waiting) {
-
- continue;
- }
-
- fputs("InnoDB: Warning: semaphore wait:\n",
- stderr);
- sync_array_cell_print(stderr, cell, &reserver);
-
- /* Try to output cell information for writer recursive way */
- while (reserver != (os_thread_id_t)ULINT_UNDEFINED) {
- sync_cell_t* reserver_wait;
-
- reserver_wait = sync_array_find_thread(arr, reserver);
-
- if (reserver_wait &&
- reserver_wait->wait_object != NULL &&
- reserver_wait->waiting) {
- fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n",
- stderr);
- sync_array_cell_print(stderr, reserver_wait, &reserver);
-
- if (reserver_wait->thread == reserver) {
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- }
- } else {
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- }
-
- /* This is protection against loop */
- if (loop > 100) {
- fputs("InnoDB: Warning: Too many waiting threads.\n", stderr);
- break;
- }
- }
- }
- }
-
-#undef SYNC_ARRAY_TIMEOUT
-
- return(fatal);
-}
-
-/**********************************************************************//**
-Prints warnings of long semaphore waits to stderr.
-@return TRUE if fatal semaphore wait threshold was exceeded */
-UNIV_INTERN
-ibool
-sync_array_print_long_waits(
-/*========================*/
- os_thread_id_t* waiter, /*!< out: longest waiting thread */
- const void** sema) /*!< out: longest-waited-for semaphore */
-{
- ulint i;
- ibool fatal = FALSE;
- ibool noticed = FALSE;
-
- for (i = 0; i < sync_array_size; ++i) {
-
- sync_array_t* arr = sync_wait_array[i];
-
- sync_array_enter(arr);
-
- if (sync_array_print_long_waits_low(
- arr, waiter, sema, &noticed)) {
-
- fatal = TRUE;
- }
-
- sync_array_exit(arr);
- }
-
- if (noticed) {
- ibool old_val;
-
- fprintf(stderr,
- "InnoDB: ###### Starts InnoDB Monitor"
- " for 30 secs to print diagnostic info:\n");
-
- old_val = srv_print_innodb_monitor;
-
- /* If some crucial semaphore is reserved, then also the InnoDB
- Monitor can hang, and we do not get diagnostics. Since in
- many cases an InnoDB hang is caused by a pwrite() or a pread()
- call hanging inside the operating system, let us print right
- now the values of pending calls of these. */
-
- fprintf(stderr,
- "InnoDB: Pending reads " UINT64PF
- ", writes " UINT64PF "\n",
- MONITOR_VALUE(MONITOR_OS_PENDING_READS),
- MONITOR_VALUE(MONITOR_OS_PENDING_WRITES));
-
- srv_print_innodb_monitor = TRUE;
- os_event_set(srv_monitor_event);
-
- os_thread_sleep(30000000);
-
- srv_print_innodb_monitor = static_cast<my_bool>(old_val);
- fprintf(stderr,
- "InnoDB: ###### Diagnostic info printed"
- " to the standard error stream\n");
- }
-
- return(fatal);
-}
-
-/**********************************************************************//**
-Prints info of the wait array. */
-static
-void
-sync_array_print_info_low(
-/*======================*/
- FILE* file, /*!< in: file where to print */
- sync_array_t* arr) /*!< in: wait array */
-{
- ulint i;
- ulint count = 0;
-
- fprintf(file,
- "OS WAIT ARRAY INFO: reservation count " ULINTPF "\n",
- arr->res_count);
-
- for (i = 0; count < arr->n_reserved; ++i) {
- sync_cell_t* cell;
- os_thread_id_t r = 0;
-
- cell = sync_array_get_nth_cell(arr, i);
-
- if (cell->wait_object != NULL) {
- count++;
- sync_array_cell_print(file, cell, &r);
- }
- }
-}
-
-/**********************************************************************//**
-Prints info of the wait array. */
-static
-void
-sync_array_print_info(
-/*==================*/
- FILE* file, /*!< in: file where to print */
- sync_array_t* arr) /*!< in: wait array */
-{
- sync_array_enter(arr);
-
- sync_array_print_info_low(file, arr);
-
- sync_array_exit(arr);
-}
-
-/**********************************************************************//**
-Create the primary system wait array(s), they are protected by an OS mutex */
-UNIV_INTERN
-void
-sync_array_init(
-/*============*/
- ulint n_threads) /*!< in: Number of slots to
- create in all arrays */
-{
- ulint i;
- ulint n_slots;
-
- ut_a(sync_wait_array == NULL);
- ut_a(srv_sync_array_size > 0);
- ut_a(n_threads > 0);
-
- sync_array_size = srv_sync_array_size;
-
- /* We have to use ut_malloc() because the mutex infrastructure
- hasn't been initialised yet. It is required by mem_alloc() and
- the heap functions. */
-
- sync_wait_array = static_cast<sync_array_t**>(
- ut_malloc(sizeof(*sync_wait_array) * sync_array_size));
-
- n_slots = 1 + (n_threads - 1) / sync_array_size;
-
- for (i = 0; i < sync_array_size; ++i) {
-
- sync_wait_array[i] = sync_array_create(n_slots);
- }
-}
-
-/**********************************************************************//**
-Close sync array wait sub-system. */
-UNIV_INTERN
-void
-sync_array_close(void)
-/*==================*/
-{
- ulint i;
-
- for (i = 0; i < sync_array_size; ++i) {
- sync_array_free(sync_wait_array[i]);
- }
-
- ut_free(sync_wait_array);
- sync_wait_array = NULL;
-}
-
-/**********************************************************************//**
-Print info about the sync array(s). */
-UNIV_INTERN
-void
-sync_array_print(
-/*=============*/
- FILE* file) /*!< in/out: Print to this stream */
-{
- ulint i;
-
- for (i = 0; i < sync_array_size; ++i) {
- sync_array_print_info(file, sync_wait_array[i]);
- }
-
- fprintf(file,
- "OS WAIT ARRAY INFO: signal count " ULINTPF "\n", sg_count);
-
-}
-
-/**********************************************************************//**
-Get an instance of the sync wait array. */
-UNIV_INTERN
-sync_array_t*
-sync_array_get(void)
-/*================*/
-{
- ulint i;
- static ulint count;
-
-#ifdef HAVE_ATOMIC_BUILTINS
- i = os_atomic_increment_ulint(&count, 1);
-#else
- i = count++;
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- return(sync_wait_array[i % sync_array_size]);
-}
-
-/**********************************************************************//**
-Prints info of the wait array without using any mutexes/semaphores. */
-UNIV_INTERN
-void
-sync_array_print_xtradb(void)
-/*=========================*/
-{
- ulint i;
- sync_array_t* arr = sync_array_get();
-
- fputs("InnoDB: Semaphore wait debug output started for XtraDB:\n", stderr);
-
- for (i = 0; i < arr->n_cells; i++) {
- void* wait_object;
- sync_cell_t* cell;
- os_thread_id_t reserver=(os_thread_id_t)ULINT_UNDEFINED;
- ulint loop=0;
-
- cell = sync_array_get_nth_cell(arr, i);
-
- wait_object = cell->wait_object;
-
- if (wait_object == NULL || !cell->waiting) {
-
- continue;
- }
-
- fputs("InnoDB: Warning: semaphore wait:\n",
- stderr);
- sync_array_cell_print(stderr, cell, &reserver);
-
- /* Try to output cell information for writer recursive way */
- while (reserver != (os_thread_id_t)ULINT_UNDEFINED) {
- sync_cell_t* reserver_wait;
-
- reserver_wait = sync_array_find_thread(arr, reserver);
-
- if (reserver_wait &&
- reserver_wait->wait_object != NULL &&
- reserver_wait->waiting) {
- fputs("InnoDB: Warning: Writer thread is waiting this semaphore:\n",
- stderr);
- sync_array_cell_print(stderr, reserver_wait, &reserver);
-
- if (reserver_wait->thread == reserver) {
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- }
- } else {
- reserver = (os_thread_id_t)ULINT_UNDEFINED;
- }
-
- /* This is protection against loop */
- if (loop > 100) {
- fputs("InnoDB: Warning: Too many waiting threads.\n", stderr);
- break;
- }
- }
- }
-
- fputs("InnoDB: Semaphore wait debug output ended:\n", stderr);
-}
-
-/**********************************************************************//**
-Get number of items on sync array. */
-UNIV_INTERN
-ulint
-sync_arr_get_n_items(void)
-/*======================*/
-{
- sync_array_t* sync_arr = sync_array_get();
- return (ulint) sync_arr->n_cells;
-}
-
-/******************************************************************//**
-Get specified item from sync array if it is reserved. Set given
-pointer to array item if it is reserved.
-@return true if item is reserved, false othervise */
-UNIV_INTERN
-ibool
-sync_arr_get_item(
-/*==============*/
- ulint i, /*!< in: requested item */
- sync_cell_t **cell) /*!< out: cell contents if item
- reserved */
-{
- sync_array_t* sync_arr;
- sync_cell_t* wait_cell;
- void* wait_object;
- ibool found = FALSE;
-
- sync_arr = sync_array_get();
- wait_cell = sync_array_get_nth_cell(sync_arr, i);
-
- if (wait_cell) {
- wait_object = wait_cell->wait_object;
-
- if(wait_object != NULL && wait_cell->waiting) {
- found = TRUE;
- *cell = wait_cell;
- }
- }
-
- return found;
-}
-
-/*******************************************************************//**
-Function to populate INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
-Loop through each item on sync array, and extract the column
-information and fill the INFORMATION_SCHEMA.INNODB_SYS_SEMAPHORE_WAITS table.
-@return 0 on success */
-UNIV_INTERN
-int
-sync_arr_fill_sys_semphore_waits_table(
-/*===================================*/
- THD* thd, /*!< in: thread */
- TABLE_LIST* tables, /*!< in/out: tables to fill */
- Item* ) /*!< in: condition (not used) */
-{
- Field** fields;
- ulint n_items;
-
- DBUG_ENTER("i_s_sys_semaphore_waits_fill_table");
- RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
-
- /* deny access to user without PROCESS_ACL privilege */
- if (check_global_access(thd, PROCESS_ACL)) {
- DBUG_RETURN(0);
- }
-
- fields = tables->table->field;
- n_items = sync_arr_get_n_items();
- ulint type;
-
- for(ulint i=0; i < n_items;i++) {
- sync_cell_t *cell=NULL;
- if (sync_arr_get_item(i, &cell)) {
- ib_prio_mutex_t* prio_mutex;
- ib_mutex_t* mutex;
- type = cell->request_type;
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_THREAD_ID], (longlong)os_thread_pf(cell->thread)));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_FILE], innobase_basename(cell->file)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LINE], cell->line));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_TIME], (longlong)difftime(time(NULL), cell->reservation_time)));
-
- if (type == SYNC_MUTEX || type == SYNC_PRIO_MUTEX) {
- if (type == SYNC_MUTEX) {
- mutex = static_cast<ib_mutex_t*>(cell->old_wait_mutex);
- } else {
-
- prio_mutex = static_cast<ib_prio_mutex_t*>
- (cell->old_wait_mutex);
- mutex = &prio_mutex->base_mutex;
- }
-
- if (mutex) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], mutex->cmutex_name));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)mutex));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "MUTEX"));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)mutex->thread_id));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(mutex->file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], mutex->line));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_CREATED_FILE], innobase_basename(mutex->cfile_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_CREATED_LINE], mutex->cline));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)mutex->waiters));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)mutex->lock_word));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(mutex->file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], mutex->line));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], mutex->count_os_wait));
- }
- } else if (type == RW_LOCK_EX
- || type == RW_LOCK_WAIT_EX
- || type == RW_LOCK_SHARED
- || type == PRIO_RW_LOCK_SHARED
- || type == PRIO_RW_LOCK_EX) {
- rw_lock_t* rwlock=NULL;
- prio_rw_lock_t* prio_rwlock=NULL;
-
- if (type == RW_LOCK_EX || type == RW_LOCK_WAIT_EX
- || type == RW_LOCK_SHARED) {
-
- rwlock = static_cast<rw_lock_t *>
- (cell->old_wait_rw_lock);
- } else {
-
- prio_rwlock = static_cast<prio_rw_lock_t *>
- (cell->old_wait_rw_lock);
- rwlock = &prio_rwlock->base_lock;
- }
-
- if (rwlock) {
- ulint writer = rw_lock_get_writer(rwlock);
-
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAIT_OBJECT], (longlong)rwlock));
- if (type == RW_LOCK_EX) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_EX"));
- } else if (type == RW_LOCK_WAIT_EX) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_WAIT_EX"));
- } else if (type == RW_LOCK_SHARED) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_WAIT_TYPE], "RW_LOCK_SHARED"));
- }
-
- if (writer != RW_LOCK_NOT_LOCKED) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_OBJECT_NAME], rwlock->lock_name));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WRITER_THREAD], (longlong)os_thread_pf(rwlock->writer_thread)));
-
- if (writer == RW_LOCK_EX) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_EX"));
- } else if (writer == RW_LOCK_WAIT_EX) {
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_RESERVATION_MODE], "RW_LOCK_WAIT_EX"));
- }
-
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_THREAD_ID], (longlong)rwlock->thread_id));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_HOLDER_FILE], innobase_basename(rwlock->file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_HOLDER_LINE], rwlock->line));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_READERS], rw_lock_get_reader_count(rwlock)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_WAITERS_FLAG], (longlong)rwlock->waiters));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LOCK_WORD], (longlong)rwlock->lock_word));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_READER_FILE], innobase_basename(rwlock->last_s_file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_READER_LINE], rwlock->last_s_line));
- OK(field_store_string(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_FILE], innobase_basename(rwlock->last_x_file_name)));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_LAST_WRITER_LINE], rwlock->last_x_line));
- OK(field_store_ulint(fields[SYS_SEMAPHORE_WAITS_OS_WAIT_COUNT], rwlock->count_os_wait));
- }
- }
- }
-
- OK(schema_table_store_record(thd, tables->table));
- }
- }
-
- DBUG_RETURN(0);
-}
diff --git a/storage/xtradb/sync/sync0rw.cc b/storage/xtradb/sync/sync0rw.cc
deleted file mode 100644
index 729f510013d..00000000000
--- a/storage/xtradb/sync/sync0rw.cc
+++ /dev/null
@@ -1,1297 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file sync/sync0rw.cc
-The read-write lock (for thread synchronization)
-
-Created 9/11/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0rw.h"
-#ifdef UNIV_NONINL
-#include "sync0rw.ic"
-#include "sync0arr.ic"
-#endif
-
-#include "os0thread.h"
-#include "mem0mem.h"
-#include "srv0srv.h"
-#include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
-#include "ha_prototypes.h"
-#include "my_cpu.h"
-
-/*
- IMPLEMENTATION OF THE RW_LOCK
- =============================
-The status of a rw_lock is held in lock_word. The initial value of lock_word is
-X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
-for each x-lock. This describes the lock state for each value of lock_word:
-
-lock_word == X_LOCK_DECR: Unlocked.
-0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers.
- (X_LOCK_DECR - lock_word) is the
- number of readers that hold the lock.
-lock_word == 0: Write locked
--X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer.
- (-lock_word) is the number of readers
- that hold the lock.
-lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
- decremented by X_LOCK_DECR for the first lock
- and the first recursive lock, then by 1 for
- each recursive lock thereafter.
- So the number of locks is:
- (lock_copy == 0) ? 1 : 2 - (lock_copy + X_LOCK_DECR)
-
-The lock_word is always read and updated atomically and consistently, so that
-it always represents the state of the lock, and the state of the lock changes
-with a single atomic operation. This lock_word holds all of the information
-that a thread needs in order to determine if it is eligible to gain the lock
-or if it must spin or sleep. The one exception to this is that writer_thread
-must be verified before recursive write locks: to solve this scenario, we make
-writer_thread readable by all threads, but only writeable by the x-lock holder.
-
-The other members of the lock obey the following rules to remain consistent:
-
-recursive: This and the writer_thread field together control the
- behaviour of recursive x-locking.
- lock->recursive must be FALSE in following states:
- 1) The writer_thread contains garbage i.e.: the
- lock has just been initialized.
- 2) The lock is not x-held and there is no
- x-waiter waiting on WAIT_EX event.
- 3) The lock is x-held or there is an x-waiter
- waiting on WAIT_EX event but the 'pass' value
- is non-zero.
- lock->recursive is TRUE iff:
- 1) The lock is x-held or there is an x-waiter
- waiting on WAIT_EX event and the 'pass' value
- is zero.
- This flag must be set after the writer_thread field
- has been updated with a memory ordering barrier.
- It is unset before the lock_word has been incremented.
-writer_thread: Is used only in recursive x-locking. Can only be safely
- read iff lock->recursive flag is TRUE.
- This field is uninitialized at lock creation time and
- is updated atomically when x-lock is acquired or when
- move_ownership is called. A thread is only allowed to
- set the value of this field to it's thread_id i.e.: a
- thread cannot set writer_thread to some other thread's
- id.
-waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
- signals, it should only be set to 1 when there are threads
- waiting on event. Must be 1 when a writer starts waiting to
- ensure the current x-locking thread sends a wake-up signal
- during unlock. May only be reset to 0 immediately before a
- a wake-up signal is sent to event. On most platforms, a
- memory barrier is required after waiters is set, and before
- verifying lock_word is still held, to ensure some unlocker
- really does see the flags new value.
-event: Threads wait on event for read or writer lock when another
- thread has an x-lock or an x-lock reservation (wait_ex). A
- thread may only wait on event after performing the following
- actions in order:
- (1) Record the counter value of event (with os_event_reset).
- (2) Set waiters to 1.
- (3) Verify lock_word <= 0.
- (1) must come before (2) to ensure signal is not missed.
- (2) must come before (3) to ensure a signal is sent.
- These restrictions force the above ordering.
- Immediately before sending the wake-up signal, we should:
- (1) Verify lock_word == X_LOCK_DECR (unlocked)
- (2) Reset waiters to 0.
-wait_ex_event: A thread may only wait on the wait_ex_event after it has
- performed the following actions in order:
- (1) Decrement lock_word by X_LOCK_DECR.
- (2) Record counter value of wait_ex_event (os_event_reset,
- called from sync_array_reserve_cell).
- (3) Verify that lock_word < 0.
- (1) must come first to ensures no other threads become reader
- or next writer, and notifies unlocker that signal must be sent.
- (2) must come before (3) to ensure the signal is not missed.
- These restrictions force the above ordering.
- Immediately before sending the wake-up signal, we should:
- Verify lock_word == 0 (waiting thread holds x_lock)
-*/
-
-UNIV_INTERN rw_lock_stats_t rw_lock_stats;
-
-/* The global list of rw-locks */
-UNIV_INTERN rw_lock_list_t rw_lock_list;
-UNIV_INTERN ib_mutex_t rw_lock_list_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t rw_lock_list_mutex_key;
-UNIV_INTERN mysql_pfs_key_t rw_lock_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/* The global mutex which protects debug info lists of all rw-locks.
-To modify the debug info list of an rw-lock, this mutex has to be
-acquired in addition to the mutex protecting the lock. */
-
-UNIV_INTERN os_fast_mutex_t rw_lock_debug_mutex;
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t rw_lock_debug_mutex_key;
-# endif
-
-/******************************************************************//**
-Creates a debug info struct. */
-static
-rw_lock_debug_t*
-rw_lock_debug_create(void);
-/*======================*/
-/******************************************************************//**
-Frees a debug info struct. */
-static
-void
-rw_lock_debug_free(
-/*===============*/
- rw_lock_debug_t* info);
-
-/******************************************************************//**
-Creates a debug info struct.
-@return own: debug info struct */
-static
-rw_lock_debug_t*
-rw_lock_debug_create(void)
-/*======================*/
-{
- return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
-}
-
-/******************************************************************//**
-Frees a debug info struct. */
-static
-void
-rw_lock_debug_free(
-/*===============*/
- rw_lock_debug_t* info)
-{
- mem_free(info);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Creates, or rather, initializes an rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-rw_lock_create_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
-{
- /* If this is the very first time a synchronization object is
- created, then the following call initializes the sync system. */
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_create(rw_lock_mutex_key, rw_lock_get_mutex(lock),
- SYNC_NO_ORDER_CHECK);
-
- lock->mutex.cfile_name = cfile_name;
- lock->mutex.cline = cline;
- lock->mutex.lock_name = cmutex_name;
- ut_d(lock->mutex.ib_mutex_type = 1);
-
-#else /* INNODB_RW_LOCKS_USE_ATOMICS */
-# ifdef UNIV_DEBUG
- UT_NOT_USED(cmutex_name);
-# endif
-#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
-
- lock->lock_word = X_LOCK_DECR;
- lock->waiters = 0;
-
- /* We set this value to signify that lock->writer_thread
- contains garbage at initialization and cannot be used for
- recursive x-locking. */
- lock->recursive = FALSE;
- /* Silence Valgrind when UNIV_DEBUG_VALGRIND is not enabled. */
- memset((void*) &lock->writer_thread, 0, sizeof lock->writer_thread);
- UNIV_MEM_INVALID(&lock->writer_thread, sizeof lock->writer_thread);
-
-#ifdef UNIV_SYNC_DEBUG
- UT_LIST_INIT(lock->debug_list);
-
- lock->level = level;
-#endif /* UNIV_SYNC_DEBUG */
-
- ut_d(lock->magic_n = RW_LOCK_MAGIC_N);
-
- lock->cfile_name = cfile_name;
- lock->cline = (unsigned int) cline;
- lock->lock_name = cmutex_name;
- lock->count_os_wait = 0;
- lock->file_name = "not yet reserved";
- lock->line = 0;
- lock->last_s_file_name = "not yet reserved";
- lock->last_x_file_name = "not yet reserved";
- lock->last_s_line = 0;
- lock->last_x_line = 0;
- os_event_create(&lock->event);
- os_event_create(&lock->wait_ex_event);
-
- mutex_enter(&rw_lock_list_mutex);
-
- ut_ad(UT_LIST_GET_FIRST(rw_lock_list) == NULL
- || UT_LIST_GET_FIRST(rw_lock_list)->magic_n == RW_LOCK_MAGIC_N);
-
- UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
-
- mutex_exit(&rw_lock_list_mutex);
-}
-
-/******************************************************************//**
-Creates, or rather, initializes a priority rw-lock object in a specified memory
-location (which must be appropriately aligned). The rw-lock is initialized
-to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-rw_lock_create_func(
-/*================*/
- prio_rw_lock_t* lock, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline) /*!< in: file line where created */
-{
- rw_lock_create_func(&lock->base_lock,
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- level,
-# endif
-#endif
- cmutex_name,
- cfile_name,
- cline);
-
- lock->high_priority_s_waiters = 0;
- os_event_create(&lock->high_priority_s_event);
- lock->high_priority_x_waiters = 0;
- os_event_create(&lock->high_priority_x_event);
- lock->high_priority_wait_ex_waiter = 0;
-}
-
-/******************************************************************//**
-Calling this function is obligatory only if the memory buffer containing
-the rw-lock is freed. Removes an rw-lock object from the global list. The
-rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
-void
-rw_lock_free_func(
-/*==============*/
- rw_lock_t* lock) /*!< in: rw-lock */
-{
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- ib_mutex_t* mutex;
-#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
-
- os_rmb;
- ut_ad(rw_lock_validate(lock));
- ut_a(lock->lock_word == X_LOCK_DECR);
-
- mutex_enter(&rw_lock_list_mutex);
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex = rw_lock_get_mutex(lock);
-#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
-
- os_event_free(&lock->event, false);
-
- os_event_free(&lock->wait_ex_event, false);
-
- ut_ad(UT_LIST_GET_PREV(list, lock) == NULL
- || UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
- ut_ad(UT_LIST_GET_NEXT(list, lock) == NULL
- || UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N);
-
- UT_LIST_REMOVE(list, rw_lock_list, lock);
-
- mutex_exit(&rw_lock_list_mutex);
-
- ut_d(lock->magic_n = 0);
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- /* We have merely removed the rw_lock from the list, the memory
- has not been freed. Therefore the pointer to mutex is valid. */
- mutex_free(mutex);
-#endif /* !INNODB_RW_LOCKS_USE_ATOMICS */
-}
-
-/******************************************************************//**
-Calling this function is obligatory only if the memory buffer containing
-the priority rw-lock is freed. Removes an rw-lock object from the global list.
-The rw-lock is checked to be in the non-locked state. */
-UNIV_INTERN
-void
-rw_lock_free_func(
-/*==============*/
- prio_rw_lock_t* lock) /*!< in: rw-lock */
-{
- os_event_free(&lock->high_priority_s_event, false);
- os_event_free(&lock->high_priority_x_event, false);
- rw_lock_free_func(&lock->base_lock);
-}
-
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks.
-@return TRUE */
-UNIV_INTERN
-ibool
-rw_lock_validate(
-/*=============*/
- rw_lock_t* lock) /*!< in: rw-lock */
-{
- ulint waiters;
- lint lock_word;
-
- ut_ad(lock);
-
- waiters = rw_lock_get_waiters(lock);
- lock_word = lock->lock_word;
-
- ut_ad(lock->magic_n == RW_LOCK_MAGIC_N);
- ut_ad(waiters == 0 || waiters == 1);
- ut_ad(lock_word > -(2 * X_LOCK_DECR));
- ut_ad(lock_word <= X_LOCK_DECR);
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Checks that the priority rw-lock has been initialized and that there are no
-simultaneous shared and exclusive locks.
-@return TRUE */
-UNIV_INTERN
-ibool
-rw_lock_validate(
-/*=============*/
- prio_rw_lock_t* lock) /*!< in: rw-lock */
-{
- return(rw_lock_validate(&lock->base_lock));
-}
-
-#endif /* UNIV_DEBUG */
-
-/******************************************************************//**
-Lock a regular or priority rw-lock in shared mode for the current thread. If
-the rw-lock is locked in exclusive mode, or there is an exclusive lock request
-waiting, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
-waiting for the lock, before suspending the thread. */
-UNIV_INTERN
-void
-rw_lock_s_lock_spin(
-/*================*/
- void* _lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock
- will be passed to another thread to unlock */
- bool priority_lock,
- /*!< in: whether the lock is a priority lock */
- bool high_priority,
- /*!< in: whether we are acquiring a priority
- lock with high priority */
- const char* file_name, /*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- ulint index; /* index of the reserved wait cell */
- ulint i = 0; /* spin round count */
- sync_array_t* sync_arr;
- size_t counter_index;
- rw_lock_t* lock = (rw_lock_t *) _lock;
-
- /* We reuse the thread id to index into the counter, cache
- it here for efficiency. */
-
- counter_index = (size_t) os_thread_get_curr_id();
-
- ut_ad(rw_lock_validate(lock));
-
- rw_lock_stats.rw_s_spin_wait_count.add(counter_index, 1);
-lock_loop:
-
- if (!rw_lock_higher_prio_waiters_exist(priority_lock, high_priority,
- lock)) {
-
- /* Spin waiting for the writer field to become free */
- os_rmb;
- HMT_low();
- while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0,
- srv_spin_wait_delay));
- }
-
- i++;
- os_rmb;
- }
-
- HMT_medium();
- if (i >= SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread " ULINTPF " spin wait rw-s-lock at %p"
- " '%s' rnds " ULINTPF "\n",
- os_thread_pf(os_thread_get_curr_id()),
- (void*) lock, lock->lock_name, i);
- }
- } else {
-
- /* In case of higher priority waiters already present, perform
- only this part of the spinning code path. */
- os_thread_yield();
- }
-
- /* We try once again to obtain the lock */
- if (!rw_lock_higher_prio_waiters_exist(priority_lock, high_priority,
- lock)
- && (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line))) {
- rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
-
- return; /* Success */
- } else {
-
- prio_rw_lock_t* prio_rw_lock = NULL;
-
- if (i > 0 && i < SYNC_SPIN_ROUNDS) {
- goto lock_loop;
- }
-
- rw_lock_stats.rw_s_spin_round_count.add(counter_index, i);
-
- sync_arr = sync_array_get_and_reserve_cell(lock,
- high_priority
- ? PRIO_RW_LOCK_SHARED
- : RW_LOCK_SHARED,
- file_name,
- line, &index);
-
- /* Set waiters before checking lock_word to ensure wake-up
- signal is sent. This may lead to some unnecessary signals. */
- if (high_priority) {
-
- prio_rw_lock = reinterpret_cast<prio_rw_lock_t *>
- (_lock);
- os_atomic_increment_ulint(
- &prio_rw_lock->high_priority_s_waiters,
- 1);
- } else {
-
- rw_lock_set_waiter_flag(lock);
- }
-
- if (!rw_lock_higher_prio_waiters_exist(priority_lock,
- high_priority, lock)
- && (TRUE == rw_lock_s_lock_low(lock, pass,
- file_name, line))) {
- sync_array_free_cell(sync_arr, index);
- if (prio_rw_lock) {
-
- os_atomic_decrement_ulint(
- &prio_rw_lock->high_priority_s_waiters,
- 1);
- }
- return; /* Success */
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread " ULINTPF " OS wait rw-s-lock at %p"
- " '%s'\n",
- os_thread_pf(os_thread_get_curr_id()),
- (void*) lock, lock->lock_name);
- }
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_lock_stats.rw_s_os_wait_count.add(counter_index, 1);
-
- sync_array_wait_event(sync_arr, index);
-
- if (prio_rw_lock) {
-
- os_atomic_decrement_ulint(
- &prio_rw_lock->high_priority_s_waiters,
- 1);
- }
-
- i = 0;
- goto lock_loop;
- }
-}
-
-/******************************************************************//**
-This function is used in the insert buffer to move the ownership of an
-x-latch on a buffer frame to the current thread. The x-latch was set by
-the buffer read operation and it protected the buffer frame while the
-read was done. The ownership is moved because we want that the current
-thread is able to acquire a second x-latch which is stored in an mtr.
-This, in turn, is needed to pass the debug checks of index page
-operations. */
-UNIV_INTERN
-void
-rw_lock_x_lock_move_ownership(
-/*==========================*/
- rw_lock_t* lock) /*!< in: lock which was x-locked in the
- buffer read */
-{
- ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
-
- rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
-}
-
-/******************************************************************//**
-Function for the next writer to call. Waits for readers to exit.
-The caller must have already decremented lock_word by X_LOCK_DECR. */
-UNIV_INLINE
-void
-rw_lock_x_lock_wait(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- bool high_priority,
- /*!< in: if true, the rw lock is a priority
- lock and is being acquired with high
- priority */
-#ifdef UNIV_SYNC_DEBUG
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
-#endif
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- ulint index;
- ulint i = 0;
- sync_array_t* sync_arr;
- size_t counter_index;
- prio_rw_lock_t* prio_rw_lock = NULL;
-
- /* We reuse the thread id to index into the counter, cache
- it here for efficiency. */
-
- counter_index = (size_t) os_thread_get_curr_id();
-
- os_rmb;
- ut_ad(lock->lock_word <= 0);
-
- HMT_low();
- if (high_priority) {
-
- prio_rw_lock = reinterpret_cast<prio_rw_lock_t *>(lock);
- prio_rw_lock->high_priority_wait_ex_waiter = 1;
- }
-
- while (lock->lock_word < 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
- if(i < SYNC_SPIN_ROUNDS) {
- i++;
- os_rmb;
- continue;
- }
- HMT_medium();
-
- /* If there is still a reader, then go to sleep.*/
- rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
-
- sync_arr = sync_array_get_and_reserve_cell(lock,
- RW_LOCK_WAIT_EX,
- file_name,
- line, &index);
-
- i = 0;
-
- /* Check lock_word to ensure wake-up isn't missed.*/
- if (lock->lock_word < 0) {
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
-
- /* Add debug info as it is needed to detect possible
- deadlock. We must add info for WAIT_EX thread for
- deadlock detection to work properly. */
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
- file_name, line);
-#endif
-
- if (srv_instrument_semaphores) {
- lock->thread_id = os_thread_get_curr_id();
- lock->file_name = file_name;
- lock->line = line;
- }
-
- sync_array_wait_event(sync_arr, index);
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_remove_debug_info(
- lock, pass, RW_LOCK_WAIT_EX);
-#endif
- /* It is possible to wake when lock_word < 0.
- We must pass the while-loop check to proceed.*/
- } else {
- sync_array_free_cell(sync_arr, index);
- }
- HMT_low();
- }
- HMT_medium();
-
- if (prio_rw_lock) {
-
- prio_rw_lock->high_priority_wait_ex_waiter = 0;
- }
-
- rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
-}
-
-/******************************************************************//**
-Low-level function for acquiring an exclusive lock.
-@return FALSE if did not succeed, TRUE if success. */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_low(
-/*===============*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- bool high_priority,
- /*!< in: if true, the rw lock is a priority
- lock and is being acquired with high
- priority */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- ibool local_recursive= lock->recursive;
-
- if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
-
- /* lock->recursive also tells us if the writer_thread
- field is stale or active. As we are going to write
- our own thread id in that field it must be that the
- current writer_thread value is not active. */
- ut_a(!lock->recursive);
-
- /* Decrement occurred: we are writer or next-writer. */
- rw_lock_set_writer_id_and_recursion_flag(
- lock, pass ? FALSE : TRUE);
-
- rw_lock_x_lock_wait(lock, high_priority,
-#ifdef UNIV_SYNC_DEBUG
- pass,
-#endif
- file_name, line);
-
- } else {
- os_thread_id_t thread_id = os_thread_get_curr_id();
-
- /* Decrement failed: relock or failed lock
- Note: recursive must be loaded before writer_thread see
- comment for rw_lock_set_writer_id_and_recursion_flag().
- To achieve this we load it before rw_lock_lock_word_decr(),
- which implies full memory barrier in current implementation. */
- if (!pass && local_recursive
- && os_thread_eq(lock->writer_thread, thread_id)) {
- /* Relock */
- if (lock->lock_word == 0) {
- lock->lock_word -= X_LOCK_DECR;
- } else {
- --lock->lock_word;
- }
-
- } else {
- /* Another thread locked before us */
- return(FALSE);
- }
- }
-#ifdef UNIV_SYNC_DEBUG
- rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line);
-#endif
- lock->last_x_file_name = file_name;
- lock->last_x_line = (unsigned int) line;
-
- if (srv_instrument_semaphores) {
- lock->thread_id = os_thread_get_curr_id();
- lock->file_name = file_name;
- lock->line = line;
- }
-
- return(TRUE);
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread. If the rw-lock is locked
-in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock before suspending the thread. If the same thread has an x-lock
-on the rw-lock, locking succeed, with the following exception: if pass != 0,
-only a single x-lock may be taken on the lock. NOTE: If the same thread has
-an s-lock, locking does not succeed! */
-UNIV_INTERN
-void
-rw_lock_x_lock_func(
-/*================*/
- rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line, /*!< in: line where requested */
- bool priority_lock,
- /*!< in: whether the lock is a priority lock */
- bool high_priority)
- /*!< in: whether we are acquiring a priority
- lock with high priority */
-{
- ulint i; /*!< spin round count */
- ulint index; /*!< index of the reserved wait cell */
- sync_array_t* sync_arr;
- ibool spinning = FALSE;
- size_t counter_index;
- prio_rw_lock_t* prio_lock = NULL;
-
- /* We reuse the thread id to index into the counter, cache
- it here for efficiency. */
-
- counter_index = (size_t) os_thread_get_curr_id();
-
- ut_ad(rw_lock_validate(lock));
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
-
- i = 0;
-
- ut_ad(priority_lock || !high_priority);
-
-lock_loop:
-
- if (!rw_lock_higher_prio_waiters_exist(priority_lock, high_priority,
- lock)
- && rw_lock_x_lock_low(lock, high_priority, pass,
- file_name, line)) {
- rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
-
- return; /* Locking succeeded */
-
- } else if (!rw_lock_higher_prio_waiters_exist(priority_lock,
- high_priority, lock)) {
-
- if (!spinning) {
- spinning = TRUE;
-
- rw_lock_stats.rw_x_spin_wait_count.add(
- counter_index, 1);
- }
-
- /* Spin waiting for the lock_word to become free */
- os_rmb;
- HMT_low();
- while (i < SYNC_SPIN_ROUNDS
- && lock->lock_word <= 0) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0,
- srv_spin_wait_delay));
- }
-
- i++;
- os_rmb;
- }
- HMT_medium();
- if (i >= SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- } else {
- goto lock_loop;
- }
- } else {
-
- /* In case we skipped spinning because of higher-priority
- waiters already waiting, perform only this bit of the spinning
- code path. */
- os_thread_yield();
- }
-
- if (spinning) {
-
- rw_lock_stats.rw_x_spin_round_count.add(counter_index, i);
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread " ULINTPF " spin wait rw-x-lock at %p"
- " '%s' rnds " ULINTPF "\n",
- os_thread_pf(os_thread_get_curr_id()),
- (void*) lock,lock->lock_name, i);
- }
- }
-
- sync_arr = sync_array_get_and_reserve_cell(lock,
- high_priority
- ? PRIO_RW_LOCK_EX
- : RW_LOCK_EX,
- file_name, line, &index);
-
- /* Waiters must be set before checking lock_word, to ensure signal
- is sent. This could lead to a few unnecessary wake-up signals. */
- if (high_priority) {
-
- prio_lock = reinterpret_cast<prio_rw_lock_t *>(lock);
- os_atomic_increment_ulint(&prio_lock->high_priority_x_waiters,
- 1);
- } else {
- rw_lock_set_waiter_flag(lock);
- }
-
- if (rw_lock_x_lock_low(lock, high_priority, pass, file_name, line)) {
- sync_array_free_cell(sync_arr, index);
- if (prio_lock) {
-
- os_atomic_decrement_ulint(
- &prio_lock->high_priority_x_waiters,
- 1);
- }
- return; /* Locking succeeded */
- }
-
- if (srv_print_latch_waits) {
- fprintf(stderr,
- "Thread " ULINTPF " OS wait for rw-x-lock at %p"
- " '%s'\n",
- os_thread_pf(os_thread_get_curr_id()), (void*) lock,
- lock->lock_name);
- }
-
- /* these stats may not be accurate */
- lock->count_os_wait++;
- rw_lock_stats.rw_x_os_wait_count.add(counter_index, 1);
-
- sync_array_wait_event(sync_arr, index);
-
- if (prio_lock) {
-
- os_atomic_decrement_ulint(&prio_lock->high_priority_x_waiters,
- 1);
- }
-
- i = 0;
- goto lock_loop;
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro, not directly this function! Lock a priority
-rw-lock in exclusive mode for the current thread. If the rw-lock is locked
-in shared or exclusive mode, or there is an exclusive lock request waiting,
-the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
-for the lock, before suspending the thread. If the same thread has an x-lock
-on the rw-lock, locking succeed, with the following exception: if pass != 0,
-only a single x-lock may be taken on the lock. NOTE: If the same thread has
-an s-lock, locking does not succeed! */
-UNIV_INTERN
-void
-rw_lock_x_lock_func(
-/*================*/
- prio_rw_lock_t* lock, /*!< in: pointer to rw-lock */
- ulint pass, /*!< in: pass value; != 0, if the lock will
- be passed to another thread to unlock */
- const char* file_name,/*!< in: file name where lock requested */
- ulint line) /*!< in: line where requested */
-{
- rw_lock_x_lock_func(&lock->base_lock, pass, file_name, line, true,
- srv_current_thread_priority > 0);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
-because the debug mutex is also acquired in sync0arr while holding the OS
-mutex protecting the sync array, and the ordinary mutex_enter might
-recursively call routines in sync0arr, leading to a deadlock on the OS
-mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_enter(void)
-/*===========================*/
-{
- os_fast_mutex_lock(&rw_lock_debug_mutex);
-}
-
-/******************************************************************//**
-Releases the debug mutex. */
-UNIV_INTERN
-void
-rw_lock_debug_mutex_exit(void)
-/*==========================*/
-{
- os_fast_mutex_unlock(&rw_lock_debug_mutex);
-}
-
-/******************************************************************//**
-Inserts the debug information for an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_add_debug_info(
-/*===================*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint pass, /*!< in: pass value */
- ulint lock_type, /*!< in: lock type */
- const char* file_name, /*!< in: file where requested */
- ulint line) /*!< in: line where requested */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
- ut_ad(file_name);
-
- info = rw_lock_debug_create();
-
- rw_lock_debug_mutex_enter();
-
- info->file_name = file_name;
- info->line = line;
- info->lock_type = lock_type;
- info->thread_id = os_thread_get_curr_id();
- info->pass = pass;
-
- UT_LIST_ADD_FIRST(list, lock->debug_list, info);
-
- rw_lock_debug_mutex_exit();
-
- if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_add_level(lock, lock->level,
- lock_type == RW_LOCK_EX
- && lock->lock_word < 0);
- }
-}
-
-/******************************************************************//**
-Removes a debug information struct for an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_remove_debug_info(
-/*======================*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint pass, /*!< in: pass value */
- ulint lock_type) /*!< in: lock type */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
-
- if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
- sync_thread_reset_level(lock);
- }
-
- rw_lock_debug_mutex_enter();
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (info != NULL) {
- if ((pass == info->pass)
- && ((pass != 0)
- || os_thread_eq(info->thread_id,
- os_thread_get_curr_id()))
- && (info->lock_type == lock_type)) {
-
- /* Found! */
- UT_LIST_REMOVE(list, lock->debug_list, info);
- rw_lock_debug_mutex_exit();
-
- rw_lock_debug_free(info);
-
- return;
- }
-
- info = UT_LIST_GET_NEXT(list, info);
- }
-
- ut_error;
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Checks if the thread has locked the rw-lock in the specified mode, with
-the pass value == 0.
-@return TRUE if locked */
-UNIV_INTERN
-ibool
-rw_lock_own(
-/*========*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-{
- rw_lock_debug_t* info;
-
- ut_ad(lock);
- ut_ad(rw_lock_validate(lock));
-
- rw_lock_debug_mutex_enter();
-
- info = UT_LIST_GET_FIRST(lock->debug_list);
-
- while (info != NULL) {
-
- if (os_thread_eq(info->thread_id, os_thread_get_curr_id())
- && (info->pass == 0)
- && (info->lock_type == lock_type)) {
-
- rw_lock_debug_mutex_exit();
- /* Found! */
-
- return(TRUE);
- }
-
- info = UT_LIST_GET_NEXT(list, info);
- }
- rw_lock_debug_mutex_exit();
-
- return(FALSE);
-}
-
-/******************************************************************//**
-Checks if the thread has locked the priority rw-lock in the specified mode,
-with the pass value == 0. */
-UNIV_INTERN
-ibool
-rw_lock_own(
-/*========*/
- prio_rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-{
- return(rw_lock_own(&lock->base_lock, lock_type));
-}
-
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Checks if somebody has locked the rw-lock in the specified mode.
-@return TRUE if locked */
-UNIV_INTERN
-ibool
-rw_lock_is_locked(
-/*==============*/
- rw_lock_t* lock, /*!< in: rw-lock */
- ulint lock_type) /*!< in: lock type: RW_LOCK_SHARED,
- RW_LOCK_EX */
-{
- ibool ret = FALSE;
-
- ut_ad(lock);
- ut_ad(rw_lock_validate(lock));
-
- if (lock_type == RW_LOCK_SHARED) {
- if (rw_lock_get_reader_count(lock) > 0) {
- ret = TRUE;
- }
- } else if (lock_type == RW_LOCK_EX) {
- if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
- ret = TRUE;
- }
- } else {
- ut_error;
- }
-
- return(ret);
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/***************************************************************//**
-Prints debug info of currently locked rw-locks. */
-UNIV_INTERN
-void
-rw_lock_list_print_info(
-/*====================*/
- FILE* file) /*!< in: file where to print */
-{
- rw_lock_t* lock;
- ulint count = 0;
- rw_lock_debug_t* info;
-
- mutex_enter(&rw_lock_list_mutex);
-
- fputs("-------------\n"
- "RW-LATCH INFO\n"
- "-------------\n", file);
-
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
-
- count++;
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_enter(&(lock->mutex));
-#endif
- if (lock->lock_word != X_LOCK_DECR) {
-
- fprintf(file, "RW-LOCK: %p ", (void*) lock);
-
- if (rw_lock_get_waiters(lock)) {
- fputs(" Waiters for the lock exist\n", file);
- } else {
- putc('\n', file);
- }
-
- rw_lock_debug_mutex_enter();
- info = UT_LIST_GET_FIRST(lock->debug_list);
- while (info != NULL) {
- rw_lock_debug_print(file, info);
- info = UT_LIST_GET_NEXT(list, info);
- }
- rw_lock_debug_mutex_exit();
- }
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- mutex_exit(&(lock->mutex));
-#endif
-
- lock = UT_LIST_GET_NEXT(list, lock);
- }
-
- fprintf(file, "Total number of rw-locks %ld\n", count);
- mutex_exit(&rw_lock_list_mutex);
-}
-
-/***************************************************************//**
-Prints debug info of an rw-lock. */
-UNIV_INTERN
-void
-rw_lock_print(
-/*==========*/
- rw_lock_t* lock) /*!< in: rw-lock */
-{
- rw_lock_debug_t* info;
-
- fprintf(stderr,
- "-------------\n"
- "RW-LATCH INFO\n"
- "RW-LATCH: %p ", (void*) lock);
-
-#ifndef INNODB_RW_LOCKS_USE_ATOMICS
- /* We used to acquire lock->mutex here, but it would cause a
- recursive call to sync_thread_add_level() if UNIV_SYNC_DEBUG
- is defined. Since this function is only invoked from
- sync_thread_levels_g(), let us choose the smaller evil:
- performing dirty reads instead of causing bogus deadlocks or
- assertion failures. */
-#endif
- if (lock->lock_word != X_LOCK_DECR) {
-
- if (rw_lock_get_waiters(lock)) {
- fputs(" Waiters for the lock exist\n", stderr);
- } else {
- putc('\n', stderr);
- }
-
- rw_lock_debug_mutex_enter();
- info = UT_LIST_GET_FIRST(lock->debug_list);
- while (info != NULL) {
- rw_lock_debug_print(stderr, info);
- info = UT_LIST_GET_NEXT(list, info);
- }
- rw_lock_debug_mutex_exit();
- }
-}
-
-/*********************************************************************//**
-Prints info of a debug struct. */
-UNIV_INTERN
-void
-rw_lock_debug_print(
-/*================*/
- FILE* f, /*!< in: output stream */
- rw_lock_debug_t* info) /*!< in: debug struct */
-{
- ulint rwt;
-
- rwt = info->lock_type;
-
- fprintf(f, "Locked: thread %lu file %s line %lu ",
- (ulong) os_thread_pf(info->thread_id), info->file_name,
- (ulong) info->line);
- if (rwt == RW_LOCK_SHARED) {
- fputs("S-LOCK", f);
- } else if (rwt == RW_LOCK_EX) {
- fputs("X-LOCK", f);
- } else if (rwt == RW_LOCK_WAIT_EX) {
- fputs("WAIT X-LOCK", f);
- } else {
- ut_error;
- }
- if (info->pass != 0) {
- fprintf(f, " pass value %lu", (ulong) info->pass);
- }
- putc('\n', f);
-}
-
-/***************************************************************//**
-Returns the number of currently locked rw-locks. Works only in the debug
-version.
-@return number of locked rw-locks */
-UNIV_INTERN
-ulint
-rw_lock_n_locked(void)
-/*==================*/
-{
- rw_lock_t* lock;
- ulint count = 0;
-
- mutex_enter(&rw_lock_list_mutex);
-
- lock = UT_LIST_GET_FIRST(rw_lock_list);
-
- while (lock != NULL) {
-
- if (lock->lock_word != X_LOCK_DECR) {
- count++;
- }
-
- lock = UT_LIST_GET_NEXT(list, lock);
- }
-
- mutex_exit(&rw_lock_list_mutex);
-
- return(count);
-}
-#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/xtradb/sync/sync0sync.cc b/storage/xtradb/sync/sync0sync.cc
deleted file mode 100644
index 37ac3c56fff..00000000000
--- a/storage/xtradb/sync/sync0sync.cc
+++ /dev/null
@@ -1,1705 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2017, MariaDB Corporation.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file sync/sync0sync.cc
-Mutex, the basic synchronization primitive
-
-Created 9/5/1995 Heikki Tuuri
-*******************************************************/
-
-#include "sync0sync.h"
-#ifdef UNIV_NONINL
-#include "sync0sync.ic"
-#include "sync0arr.ic"
-#endif
-
-#include "sync0rw.h"
-#include "buf0buf.h"
-#include "srv0srv.h"
-#include "btr0types.h"
-#include "buf0types.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#ifdef UNIV_SYNC_DEBUG
-# include "srv0start.h" /* srv_is_being_started */
-#endif /* UNIV_SYNC_DEBUG */
-#include "ha_prototypes.h"
-#include "my_cpu.h"
-
-#include <vector>
-
-/*
- REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
- ============================================
-
-Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
-takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
-Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
-implement our own efficient spin lock mutex. Future operating systems may
-provide efficient spin locks, but we cannot count on that.
-
-Another reason for implementing a spin lock is that on multiprocessor systems
-it can be more efficient for a processor to run a loop waiting for the
-semaphore to be released than to switch to a different thread. A thread switch
-takes 25 us on both platforms mentioned above. See Gray and Reuter's book
-Transaction processing for background.
-
-How long should the spin loop last before suspending the thread? On a
-uniprocessor, spinning does not help at all, because if the thread owning the
-mutex is not executing, it cannot be released. Spinning actually wastes
-resources.
-
-On a multiprocessor, we do not know if the thread owning the mutex is
-executing or not. Thus it would make sense to spin as long as the operation
-guarded by the mutex would typically last assuming that the thread is
-executing. If the mutex is not released by that time, we may assume that the
-thread owning the mutex is not executing and suspend the waiting thread.
-
-A typical operation (where no i/o involved) guarded by a mutex or a read-write
-lock may last 1 - 20 us on the current Pentium platform. The longest
-operations are the binary searches on an index node.
-
-We conclude that the best choice is to set the spin time at 20 us. Then the
-system should work well on a multiprocessor. On a uniprocessor we have to
-make sure that thread swithches due to mutex collisions are not frequent,
-i.e., they do not happen every 100 us or so, because that wastes too much
-resources. If the thread switches are not frequent, the 20 us wasted in spin
-loop is not too much.
-
-Empirical studies on the effect of spin time should be done for different
-platforms.
-
-
- IMPLEMENTATION OF THE MUTEX
- ===========================
-
-For background, see Curt Schimmel's book on Unix implementation on modern
-architectures. The key points in the implementation are atomicity and
-serialization of memory accesses. The test-and-set instruction (XCHG in
-Pentium) must be atomic. As new processors may have weak memory models, also
-serialization of memory references may be necessary. The successor of Pentium,
-P6, has at least one mode where the memory model is weak. As far as we know,
-in Pentium all memory accesses are serialized in the program order and we do
-not have to worry about the memory model. On other processors there are
-special machine instructions called a fence, memory barrier, or storage
-barrier (STBAR in Sparc), which can be used to serialize the memory accesses
-to happen in program order relative to the fence instruction.
-
-Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
-the atomic test-and-set, but his algorithm should be modified for weak memory
-models. We do not use Lamport's algorithm, because we guess it is slower than
-the atomic test-and-set.
-
-Our mutex implementation works as follows: After that we perform the atomic
-test-and-set instruction on the memory word. If the test returns zero, we
-know we got the lock first. If the test returns not zero, some other thread
-was quicker and got the lock: then we spin in a loop reading the memory word,
-waiting it to become zero. It is wise to just read the word in the loop, not
-perform numerous test-and-set instructions, because they generate memory
-traffic between the cache and the main memory. The read loop can just access
-the cache, saving bus bandwidth.
-
-If we cannot acquire the mutex lock in the specified time, we reserve a cell
-in the wait array, set the waiters byte in the mutex to 1. To avoid a race
-condition, after setting the waiters byte and before suspending the waiting
-thread, we still have to check that the mutex is reserved, because it may
-have happened that the thread which was holding the mutex has just released
-it and did not see the waiters byte set to 1, a case which would lead the
-other thread to an infinite wait.
-
-LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
-======
-thread will eventually call os_event_set() on that particular event.
-Thus no infinite wait is possible in this case.
-
-Proof: After making the reservation the thread sets the waiters field in the
-mutex to 1. Then it checks that the mutex is still reserved by some thread,
-or it reserves the mutex for itself. In any case, some thread (which may be
-also some earlier thread, not necessarily the one currently holding the mutex)
-will set the waiters field to 0 in mutex_exit, and then call
-os_event_set() with the mutex as an argument.
-Q.E.D.
-
-LEMMA 2: If an os_event_set() call is made after some thread has called
-======
-the os_event_reset() and before it starts wait on that event, the call
-will not be lost to the second thread. This is true even if there is an
-intervening call to os_event_reset() by another thread.
-Thus no infinite wait is possible in this case.
-
-Proof (non-windows platforms): os_event_reset() returns a monotonically
-increasing value of signal_count. This value is increased at every
-call of os_event_set() If thread A has called os_event_reset() followed
-by thread B calling os_event_set() and then some other thread C calling
-os_event_reset(), the is_set flag of the event will be set to FALSE;
-but now if thread A calls os_event_wait_low() with the signal_count
-value returned from the earlier call of os_event_reset(), it will
-return immediately without waiting.
-Q.E.D.
-
-Proof (windows): If there is a writer thread which is forced to wait for
-the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
-The design of rw_lock ensures that there is one and only one thread
-that is able to change the state to RW_LOCK_WAIT_EX and this thread is
-guaranteed to acquire the lock after it is released by the current
-holders and before any other waiter gets the lock.
-On windows this thread waits on a separate event i.e.: wait_ex_event.
-Since only one thread can wait on this event there is no chance
-of this event getting reset before the writer starts wait on it.
-Therefore, this thread is guaranteed to catch the os_set_event()
-signalled unconditionally at the release of the lock.
-Q.E.D. */
-
-/* Number of spin waits on mutexes: for performance monitoring */
-
-/** The number of iterations in the mutex_spin_wait() spin loop.
-Intended for performance monitoring. */
-UNIV_INTERN ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_round_count;
-/** The number of mutex_spin_wait() calls. Intended for
-performance monitoring. */
-UNIV_INTERN ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_spin_wait_count;
-/** The number of OS waits in mutex_spin_wait(). Intended for
-performance monitoring. */
-UNIV_INTERN ib_counter_t<ib_int64_t, IB_N_SLOTS> mutex_os_wait_count;
-/** The number of mutex_exit() calls. Intended for performance
-monitoring. */
-UNIV_INTERN ib_int64_t mutex_exit_count;
-
-/** This variable is set to TRUE when sync_init is called */
-UNIV_INTERN ibool sync_initialized = FALSE;
-
-#ifdef UNIV_SYNC_DEBUG
-/** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_t;
-/** Mutexes or rw-locks held by a thread */
-struct sync_thread_t;
-
-/** The latch levels currently owned by threads are stored in this data
-structure; the size of this array is OS_THREAD_MAX_N */
-
-UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
-
-/** Mutex protecting sync_thread_level_arrays */
-UNIV_INTERN ib_mutex_t sync_thread_mutex;
-
-# ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t sync_thread_mutex_key;
-# endif /* UNIV_PFS_MUTEX */
-#endif /* UNIV_SYNC_DEBUG */
-
-/** Global list of database mutexes (not OS mutexes) created. */
-UNIV_INTERN ut_list_base_node_t mutex_list;
-
-/** Mutex protecting the mutex_list variable */
-UNIV_INTERN ib_mutex_t mutex_list_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t mutex_list_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_SYNC_DEBUG
-/** Latching order checks start when this is set TRUE */
-UNIV_INTERN ibool sync_order_checks_on = FALSE;
-
-/** Array for tracking sync levels per thread. */
-typedef std::vector<sync_level_t> sync_arr_t;
-
-
-/** Mutexes or rw-locks held by a thread */
-struct sync_thread_t{
- os_thread_id_t id; /*!< OS thread id */
- sync_arr_t* levels; /*!< level array for this thread; if
- this is NULL this slot is unused */
-};
-
-/** An acquired mutex or rw-lock and its level in the latching order */
-struct sync_level_t{
- void* latch; /*!< pointer to a mutex or an
- rw-lock; NULL means that
- the slot is empty */
- ulint level; /*!< level of the latch in the
- latching order. This field is
- overloaded to serve as a node in a
- linked list of free nodes too. When
- latch == NULL then this will contain
- the ordinal value of the next free
- element */
-};
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Creates, or rather, initializes a mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
- ib_mutex_t* mutex, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where created */
- ulint cline, /*!< in: file line where created */
- const char* cmutex_name) /*!< in: mutex name */
-{
-#if defined(HAVE_ATOMIC_BUILTINS)
- mutex_reset_lock_word(mutex);
-#else
- os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mutex->os_fast_mutex);
- mutex->lock_word = 0;
-#endif
- os_event_create(&mutex->event);
- mutex_set_waiters(mutex, 0);
-#ifdef UNIV_DEBUG
- mutex->magic_n = MUTEX_MAGIC_N;
- mutex->level = level;
-#endif /* UNIV_DEBUG */
-
- mutex->line = 0;
- mutex->file_name = "not yet reserved";
- mutex->cfile_name = cfile_name;
- mutex->cline = cline;
- mutex->count_os_wait = 0;
- mutex->cmutex_name= cmutex_name;
-
- /* Check that lock_word is aligned; this is important on Intel */
- ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
-
- /* NOTE! The very first mutexes are not put to the mutex list */
-
- if (mutex == &mutex_list_mutex
-#ifdef UNIV_SYNC_DEBUG
- || mutex == &sync_thread_mutex
-#endif /* UNIV_SYNC_DEBUG */
- ) {
-
- return;
- }
-
- mutex_enter(&mutex_list_mutex);
-
- ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
- || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
-
- UT_LIST_ADD_FIRST(list, mutex_list, mutex);
-
- mutex_exit(&mutex_list_mutex);
-}
-
-/******************************************************************//**
-Creates, or rather, initializes a priority mutex object in a specified memory
-location (which must be appropriately aligned). The mutex is initialized
-in the reset state. Explicit freeing of the mutex with mutex_free is
-necessary only if the memory block containing it is freed. */
-UNIV_INTERN
-void
-mutex_create_func(
-/*==============*/
- ib_prio_mutex_t* mutex, /*!< in: pointer to memory */
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- ulint level, /*!< in: level */
-# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cfile_name, /*!< in: file name where
- created */
- ulint cline, /*!< in: file line where
- created */
- const char* cmutex_name) /*!< in: mutex name */
-{
- mutex_create_func(&mutex->base_mutex,
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
- level,
-#endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- cfile_name,
- cline,
- cmutex_name);
- mutex->high_priority_waiters = 0;
- os_event_create(&mutex->high_priority_event);
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a mutex object from the mutex list. The mutex
-is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free_func(
-/*============*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
- ut_ad(mutex_validate(mutex));
- ut_a(mutex_get_lock_word(mutex) == 0);
- ut_a(mutex_get_waiters(mutex) == 0);
-
-#ifdef UNIV_MEM_DEBUG
- if (mutex == &mem_hash_mutex) {
- ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
- ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
- UT_LIST_REMOVE(list, mutex_list, mutex);
- goto func_exit;
- }
-#endif /* UNIV_MEM_DEBUG */
-
- if (mutex != &mutex_list_mutex
-#ifdef UNIV_SYNC_DEBUG
- && mutex != &sync_thread_mutex
-#endif /* UNIV_SYNC_DEBUG */
- ) {
-
- mutex_enter(&mutex_list_mutex);
-
- ut_ad(!UT_LIST_GET_PREV(list, mutex)
- || UT_LIST_GET_PREV(list, mutex)->magic_n
- == MUTEX_MAGIC_N);
- ut_ad(!UT_LIST_GET_NEXT(list, mutex)
- || UT_LIST_GET_NEXT(list, mutex)->magic_n
- == MUTEX_MAGIC_N);
-
- UT_LIST_REMOVE(list, mutex_list, mutex);
-
- mutex_exit(&mutex_list_mutex);
- }
-
- os_event_free(&mutex->event, false);
-#ifdef UNIV_MEM_DEBUG
-func_exit:
-#endif /* UNIV_MEM_DEBUG */
-#if !defined(HAVE_ATOMIC_BUILTINS)
- os_fast_mutex_free(&(mutex->os_fast_mutex));
-#endif
- /* If we free the mutex protecting the mutex list (freeing is
- not necessary), we have to reset the magic number AFTER removing
- it from the list. */
-#ifdef UNIV_DEBUG
- mutex->magic_n = 0;
-#endif /* UNIV_DEBUG */
- return;
-}
-
-/******************************************************************//**
-NOTE! Use the corresponding macro mutex_free(), not directly this function!
-Calling this function is obligatory only if the memory buffer containing
-the mutex is freed. Removes a priority mutex object from the mutex list. The
-mutex is checked to be in the reset state. */
-UNIV_INTERN
-void
-mutex_free_func(
-/*============*/
- ib_prio_mutex_t* mutex) /*!< in: mutex */
-{
- ut_a(mutex->high_priority_waiters == 0);
- os_event_free(&mutex->high_priority_event, false);
- mutex_free_func(&mutex->base_mutex);
-}
-
-/********************************************************************//**
-NOTE! Use the corresponding macro in the header file, not this function
-directly. Tries to lock the mutex for the current thread. If the lock is not
-acquired immediately, returns with return value 1.
-@return 0 if succeed, 1 if not */
-UNIV_INTERN
-ulint
-mutex_enter_nowait_func(
-/*====================*/
- ib_mutex_t* mutex, /*!< in: pointer to mutex */
- const char* file_name MY_ATTRIBUTE((unused)),
- /*!< in: file name where mutex
- requested */
- ulint line MY_ATTRIBUTE((unused)))
- /*!< in: line where requested */
-{
- ut_ad(mutex_validate(mutex));
-
- if (!ib_mutex_test_and_set(mutex)) {
-
- mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- if (srv_instrument_semaphores) {
- mutex->file_name = file_name;
- mutex->line = line;
- }
-
- return(0); /* Succeeded! */
- }
-
- return(1);
-}
-
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Checks that the mutex has been initialized.
-@return TRUE */
-UNIV_INTERN
-ibool
-mutex_validate(
-/*===========*/
- const ib_mutex_t* mutex) /*!< in: mutex */
-{
- ut_a(mutex);
-
- if (mutex->magic_n != MUTEX_MAGIC_N) {
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Mutex %p not initialized file %s line %lu.",
- mutex, mutex->cfile_name, mutex->cline);
- }
- ut_ad(mutex->magic_n == MUTEX_MAGIC_N);
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Checks that the current thread owns the mutex. Works only in the debug
-version.
-@return TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
- const ib_mutex_t* mutex) /*!< in: mutex */
-{
- ut_ad(mutex_validate(mutex));
-
- return(mutex_get_lock_word(mutex) == 1
- && os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
-}
-
-/******************************************************************//**
-Checks that the current thread owns the priority mutex. Works only
-in the debug version.
-@return TRUE if owns */
-UNIV_INTERN
-ibool
-mutex_own(
-/*======*/
- const ib_prio_mutex_t* mutex) /*!< in: priority mutex */
-{
- return mutex_own(&mutex->base_mutex);
-}
-
-#endif /* UNIV_DEBUG */
-
-/******************************************************************//**
-Sets the waiters field in a mutex. */
-UNIV_INTERN
-void
-mutex_set_waiters(
-/*==============*/
- ib_mutex_t* mutex, /*!< in: mutex */
- ulint n) /*!< in: value to set */
-{
- volatile ulint* ptr; /* declared volatile to ensure that
- the value is stored to memory */
- ut_ad(mutex);
-
- ptr = &(mutex->waiters);
-
- *ptr = n; /* Here we assume that the write of a single
- word in memory is atomic */
-}
-
-/******************************************************************//**
-Reserves a mutex or a priority mutex for the current thread. If the mutex is
-reserved, the function spins a preset time (controlled by SYNC_SPIN_ROUNDS),
-waiting for the mutex before suspending the thread. */
-UNIV_INTERN
-void
-mutex_spin_wait(
-/*============*/
- void* _mutex, /*!< in: pointer to mutex */
- bool high_priority, /*!< in: whether the mutex is a
- priority mutex with high priority
- specified */
- const char* file_name, /*!< in: file name where mutex
- requested */
- ulint line) /*!< in: line where requested */
-{
- ulint i; /* spin round count */
- ulint index; /* index of the reserved wait cell */
- sync_array_t* sync_arr;
- size_t counter_index;
- /* The typecast below is performed for some of the priority mutexes
- too, when !high_priority. This exploits the fact that regular mutex is
- a prefix of the priority mutex in memory. */
- ib_mutex_t* mutex = (ib_mutex_t *) _mutex;
- ib_prio_mutex_t* prio_mutex = NULL;
-
- counter_index = (size_t) os_thread_get_curr_id();
-
- ut_ad(mutex);
-
- /* This update is not thread safe, but we don't mind if the count
- isn't exact. Moved out of ifdef that follows because we are willing
- to sacrifice the cost of counting this as the data is valuable.
- Count the number of calls to mutex_spin_wait. */
- mutex_spin_wait_count.add(counter_index, 1);
-
-mutex_loop:
-
- i = 0;
-
- /* Spin waiting for the lock word to become zero. Note that we do
- not have to assume that the read access to the lock word is atomic,
- as the actual locking is always committed with atomic test-and-set.
- In reality, however, all processors probably have an atomic read of
- a memory word. */
-
-spin_loop:
-
- HMT_low();
- os_rmb;
- while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
- if (srv_spin_wait_delay) {
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
- }
- i++;
- }
- HMT_medium();
-
- if (i >= SYNC_SPIN_ROUNDS) {
- os_thread_yield();
- }
-
- mutex_spin_round_count.add(counter_index, i);
-
- if (ib_mutex_test_and_set(mutex) == 0) {
- /* Succeeded! */
-
- mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- if (srv_instrument_semaphores) {
- mutex->file_name = file_name;
- mutex->line = line;
- }
-
- return;
- }
-
- /* We may end up with a situation where lock_word is 0 but the OS
- fast mutex is still reserved. On FreeBSD the OS does not seem to
- schedule a thread which is constantly calling pthread_mutex_trylock
- (in ib_mutex_test_and_set implementation). Then we could end up
- spinning here indefinitely. The following 'i++' stops this infinite
- spin. */
-
- i++;
-
- if (i < SYNC_SPIN_ROUNDS) {
- goto spin_loop;
- }
-
- sync_arr = sync_array_get_and_reserve_cell(mutex,
- high_priority
- ? SYNC_PRIO_MUTEX
- : SYNC_MUTEX,
- file_name, line, &index);
-
- /* The memory order of the array reservation and the change in the
- waiters field is important: when we suspend a thread, we first
- reserve the cell and then set waiters field to 1. When threads are
- released in mutex_exit, the waiters field is first set to zero and
- then the event is set to the signaled state. */
-
- if (high_priority) {
-
- prio_mutex = reinterpret_cast<ib_prio_mutex_t *>(_mutex);
- os_atomic_increment_ulint(&prio_mutex->high_priority_waiters,
- 1);
- } else {
-
- mutex_set_waiters(mutex, 1);
- }
-
- /* Make sure waiters store won't pass over mutex_test_and_set */
-#ifdef __powerpc__
- os_mb;
-#endif
-
- /* Try to reserve still a few times */
- for (i = 0; i < 4; i++) {
- if (ib_mutex_test_and_set(mutex) == 0) {
- /* Succeeded! Free the reserved wait cell */
-
- sync_array_free_cell(sync_arr, index);
-
- mutex->thread_id = os_thread_get_curr_id();
-#ifdef UNIV_SYNC_DEBUG
- mutex_set_debug_info(mutex, file_name, line);
-#endif
- if (srv_instrument_semaphores) {
- mutex->file_name = file_name;
- mutex->line = line;
- }
-
- if (prio_mutex) {
- os_atomic_decrement_ulint(
- &prio_mutex->high_priority_waiters,
- 1);
- }
- return;
-
- /* Note that in this case we leave the waiters field
- set to 1. We cannot reset it to zero, as we do not
- know if there are other waiters. */
- }
- }
-
- /* Now we know that there has been some thread holding the mutex
- after the change in the wait array and the waiters field was made.
- Now there is no risk of infinite wait on the event. */
-
- mutex_os_wait_count.add(counter_index, 1);
-
- mutex->count_os_wait++;
-
- sync_array_wait_event(sync_arr, index);
-
- if (prio_mutex) {
-
- os_atomic_decrement_ulint(&prio_mutex->high_priority_waiters,
- 1);
- }
-
- goto mutex_loop;
-}
-
-/******************************************************************//**
-Releases the threads waiting in the primary wait array for this mutex. */
-UNIV_INTERN
-void
-mutex_signal_object(
-/*================*/
- ib_mutex_t* mutex) /*!< in: mutex */
-{
- mutex_set_waiters(mutex, 0);
-
- /* The memory order of resetting the waiters field and
- signaling the object is important. See LEMMA 1 above. */
- os_event_set(&mutex->event);
- sync_array_object_signalled();
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Sets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_set_debug_info(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: mutex */
- const char* file_name, /*!< in: file where requested */
- ulint line) /*!< in: line where requested */
-{
- ut_ad(mutex);
- ut_ad(file_name);
-
- sync_thread_add_level(mutex, mutex->level, FALSE);
-
- mutex->file_name = file_name;
- mutex->line = line;
-}
-
-/******************************************************************//**
-Gets the debug information for a reserved mutex. */
-UNIV_INTERN
-void
-mutex_get_debug_info(
-/*=================*/
- ib_mutex_t* mutex, /*!< in: mutex */
- const char** file_name, /*!< out: file where requested */
- ulint* line, /*!< out: line where requested */
- os_thread_id_t* thread_id) /*!< out: id of the thread which owns
- the mutex */
-{
- ut_ad(mutex);
-
- *file_name = mutex->file_name;
- *line = mutex->line;
- *thread_id = mutex->thread_id;
-}
-
-/******************************************************************//**
-Prints debug info of currently reserved mutexes. */
-static
-void
-mutex_list_print_info(
-/*==================*/
- FILE* file) /*!< in: file where to print */
-{
- ib_mutex_t* mutex;
- const char* file_name;
- ulint line;
- os_thread_id_t thread_id;
- ulint count = 0;
-
- fputs("----------\n"
- "MUTEX INFO\n"
- "----------\n", file);
-
- mutex_enter(&mutex_list_mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
-
- while (mutex != NULL) {
- count++;
-
- if (mutex_get_lock_word(mutex) != 0) {
- mutex_get_debug_info(mutex, &file_name, &line,
- &thread_id);
- fprintf(file,
- "Locked mutex: addr %p thread %ld"
- " file %s line %ld\n",
- (void*) mutex, os_thread_pf(thread_id),
- file_name, line);
- }
-
- mutex = UT_LIST_GET_NEXT(list, mutex);
- }
-
- fprintf(file, "Total number of mutexes %ld\n", count);
-
- mutex_exit(&mutex_list_mutex);
-}
-
-/******************************************************************//**
-Counts currently reserved mutexes. Works only in the debug version.
-@return number of reserved mutexes */
-UNIV_INTERN
-ulint
-mutex_n_reserved(void)
-/*==================*/
-{
- ib_mutex_t* mutex;
- ulint count = 0;
-
- mutex_enter(&mutex_list_mutex);
-
- for (mutex = UT_LIST_GET_FIRST(mutex_list);
- mutex != NULL;
- mutex = UT_LIST_GET_NEXT(list, mutex)) {
-
- if (mutex_get_lock_word(mutex) != 0) {
-
- count++;
- }
- }
-
- mutex_exit(&mutex_list_mutex);
-
- ut_a(count >= 1);
-
- /* Subtract one, because this function itself was holding
- one mutex (mutex_list_mutex) */
-
- return(count - 1);
-}
-
-/******************************************************************//**
-Returns TRUE if no mutex or rw-lock is currently locked. Works only in
-the debug version.
-@return TRUE if no mutexes and rw-locks reserved */
-UNIV_INTERN
-ibool
-sync_all_freed(void)
-/*================*/
-{
- return(mutex_n_reserved() + rw_lock_n_locked() == 0);
-}
-
-/******************************************************************//**
-Looks for the thread slot for the calling thread.
-@return pointer to thread slot, NULL if not found */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_slot(void)
-/*====================================*/
-
-{
- ulint i;
- os_thread_id_t id;
-
- id = os_thread_get_curr_id();
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- sync_thread_t* slot;
-
- slot = &sync_thread_level_arrays[i];
-
- if (slot->levels && os_thread_eq(slot->id, id)) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/******************************************************************//**
-Looks for an unused thread slot.
-@return pointer to thread slot */
-static
-sync_thread_t*
-sync_thread_level_arrays_find_free(void)
-/*====================================*/
-
-{
- ulint i;
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- sync_thread_t* slot;
-
- slot = &sync_thread_level_arrays[i];
-
- if (slot->levels == NULL) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/******************************************************************//**
-Print warning. */
-static
-void
-sync_print_warning(
-/*===============*/
- const sync_level_t* slot) /*!< in: slot for which to
- print warning */
-{
- ib_mutex_t* mutex;
-
- mutex = static_cast<ib_mutex_t*>(slot->latch);
-
- if (mutex->magic_n == MUTEX_MAGIC_N) {
- fprintf(stderr,
- "Mutex '%s'\n",
- mutex->cmutex_name);
-
- if (mutex_get_lock_word(mutex) != 0) {
- ulint line;
- const char* file_name;
- os_thread_id_t thread_id;
-
- mutex_get_debug_info(
- mutex, &file_name, &line, &thread_id);
-
- fprintf(stderr,
- "InnoDB: Locked mutex:"
- " addr %p thread %ld file %s line %ld\n",
- (void*) mutex, os_thread_pf(thread_id),
- file_name, (ulong) line);
- } else {
- fputs("Not locked\n", stderr);
- }
- } else {
- rw_lock_t* lock;
-
- lock = static_cast<rw_lock_t*>(slot->latch);
-
- rw_lock_print(lock);
- }
-}
-
-/******************************************************************//**
-Checks if all the level values stored in the level array are greater than
-the given limit.
-@return TRUE if all greater */
-static
-ibool
-sync_thread_levels_g(
-/*=================*/
- sync_arr_t* arr, /*!< in: pointer to level array for an OS
- thread */
- ulint limit, /*!< in: level limit */
- ulint warn) /*!< in: TRUE=display a diagnostic message */
-{
- ulint i;
-
- for (i = 0; i < arr->size(); i++) {
- const sync_level_t* slot;
-
- slot = (const sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL && slot->level <= limit) {
- if (warn) {
- fprintf(stderr,
- "InnoDB: sync levels should be"
- " > %lu but a level is %lu\n",
- (ulong) limit, (ulong) slot->level);
-
- sync_print_warning(slot);
- }
-
- return(FALSE);
- }
- }
-
- return(TRUE);
-}
-
-/******************************************************************//**
-Checks if the level value is stored in the level array.
-@return slot if found or NULL */
-static
-const sync_level_t*
-sync_thread_levels_contain(
-/*=======================*/
- sync_arr_t* arr, /*!< in: pointer to level array for an OS
- thread */
- ulint level) /*!< in: level */
-{
- ulint i;
-
- for (i = 0; i < arr->size(); i++) {
- const sync_level_t* slot;
-
- slot = (const sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL && slot->level == level) {
-
- return(slot);
- }
- }
-
- return(NULL);
-}
-
-/******************************************************************//**
-Checks if the level array for the current thread contains a
-mutex or rw-latch at the specified level.
-@return a matching latch, or NULL if not found */
-UNIV_INTERN
-void*
-sync_thread_levels_contains(
-/*========================*/
- ulint level) /*!< in: latching order level
- (SYNC_DICT, ...)*/
-{
- ulint i;
- sync_arr_t* arr;
- sync_thread_t* thread_slot;
-
- if (!sync_order_checks_on) {
-
- return(NULL);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
- }
-
- arr = thread_slot->levels;
-
- for (i = 0; i < arr->size(); i++) {
- sync_level_t* slot;
-
- slot = (sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL && slot->level == level) {
-
- mutex_exit(&sync_thread_mutex);
- return(slot->latch);
- }
- }
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
-}
-
-/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_gen(
-/*============================*/
- ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
- allowed to be owned by the thread */
-{
- ulint i;
- sync_arr_t* arr;
- sync_thread_t* thread_slot;
-
- if (!sync_order_checks_on) {
-
- return(NULL);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
- }
-
- arr = thread_slot->levels;
-
- for (i = 0; i < arr->size(); ++i) {
- const sync_level_t* slot;
-
- slot = (const sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL
- && (!dict_mutex_allowed
- || (slot->level != SYNC_DICT
- && slot->level != SYNC_DICT_OPERATION
- && slot->level != SYNC_FTS_CACHE))) {
-
- mutex_exit(&sync_thread_mutex);
- ut_error;
-
- return(slot->latch);
- }
- }
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
-}
-
-/******************************************************************//**
-Checks if the level array for the current thread is empty,
-except for the btr_search_latch.
-@return a latch, or NULL if empty except the exceptions specified below */
-UNIV_INTERN
-void*
-sync_thread_levels_nonempty_trx(
-/*============================*/
- ibool has_search_latch)
- /*!< in: TRUE if and only if the thread
- is supposed to hold btr_search_latch */
-{
- ulint i;
- sync_arr_t* arr;
- sync_thread_t* thread_slot;
-
- if (!sync_order_checks_on) {
-
- return(NULL);
- }
-
- ut_a(!has_search_latch
- || sync_thread_levels_contains(SYNC_SEARCH_SYS));
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
- }
-
- arr = thread_slot->levels;
-
- for (i = 0; i < arr->size(); ++i) {
- const sync_level_t* slot;
-
- slot = (const sync_level_t*)&(arr->at(i));
-
- if (slot->latch != NULL
- && (!has_search_latch
- || slot->level != SYNC_SEARCH_SYS)) {
-
- mutex_exit(&sync_thread_mutex);
- ut_error;
-
- return(slot->latch);
- }
- }
-
- mutex_exit(&sync_thread_mutex);
-
- return(NULL);
-}
-
-/******************************************************************//**
-Adds a latch and its level in the thread level array. Allocates the memory
-for the array if called first time for this OS thread. Makes the checks
-against other latch levels stored in the array for this thread. */
-UNIV_INTERN
-void
-sync_thread_add_level(
-/*==================*/
- void* latch, /*!< in: pointer to a mutex or an rw-lock */
- ulint level, /*!< in: level in the latching order; if
- SYNC_LEVEL_VARYING, nothing is done */
- ibool relock) /*!< in: TRUE if re-entering an x-lock */
-{
- sync_arr_t* array;
- sync_thread_t* thread_slot;
- sync_level_t sync_level;
-
- if (!sync_order_checks_on) {
-
- return;
- }
-
- if ((latch == (void*) &sync_thread_mutex)
- || (latch == (void*) &mutex_list_mutex)
- || (latch == (void*) &rw_lock_debug_mutex)
- || (latch == (void*) &rw_lock_list_mutex)) {
-
- return;
- }
-
- if (level == SYNC_LEVEL_VARYING) {
-
- return;
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- /* We have to allocate the level array for a new thread */
- array = new sync_arr_t();
- ut_a(array != NULL);
- thread_slot = sync_thread_level_arrays_find_free();
- thread_slot->levels = array;
- thread_slot->id = os_thread_get_curr_id();
- }
-
- array = thread_slot->levels;
-
- if (relock) {
- goto levels_ok;
- }
-
- /* NOTE that there is a problem with _NODE and _LEAF levels: if the
- B-tree height changes, then a leaf can change to an internal node
- or the other way around. We do not know at present if this can cause
- unnecessary assertion failures below. */
-
- switch (level) {
- case SYNC_NO_ORDER_CHECK:
- case SYNC_EXTERN_STORAGE:
- case SYNC_TREE_NODE_FROM_HASH:
- /* Do no order checking */
- break;
- case SYNC_TRX_SYS_HEADER:
- if (srv_is_being_started) {
- /* This is violated during trx_sys_create_rsegs()
- when creating additional rollback segments when
- upgrading in innobase_start_or_create_for_mysql(). */
- break;
- }
- /* fall through */
- case SYNC_MEM_POOL:
- case SYNC_MEM_HASH:
- case SYNC_RECV:
- case SYNC_FTS_BG_THREADS:
- case SYNC_WORK_QUEUE:
- case SYNC_FTS_TOKENIZE:
- case SYNC_FTS_OPTIMIZE:
- case SYNC_FTS_CACHE:
- case SYNC_FTS_CACHE_INIT:
- case SYNC_LOG_ONLINE:
- case SYNC_LOG:
- case SYNC_LOG_FLUSH_ORDER:
- case SYNC_ANY_LATCH:
- case SYNC_FILE_FORMAT_TAG:
- case SYNC_DOUBLEWRITE:
- case SYNC_THREADS:
- case SYNC_LOCK_SYS:
- case SYNC_LOCK_WAIT_SYS:
- case SYNC_TRX_SYS:
- case SYNC_IBUF_BITMAP_MUTEX:
- case SYNC_RSEG:
- case SYNC_TRX_UNDO:
- case SYNC_PURGE_LATCH:
- case SYNC_PURGE_QUEUE:
- case SYNC_DICT_AUTOINC_MUTEX:
- case SYNC_DICT_OPERATION:
- case SYNC_DICT_HEADER:
- case SYNC_TRX_I_S_RWLOCK:
- case SYNC_TRX_I_S_LAST_READ:
- case SYNC_IBUF_MUTEX:
- case SYNC_INDEX_ONLINE_LOG:
- case SYNC_STATS_AUTO_RECALC:
- case SYNC_STATS_DEFRAG:
- if (!sync_thread_levels_g(array, level, TRUE)) {
- fprintf(stderr,
- "InnoDB: sync_thread_levels_g(array, %lu)"
- " does not hold!\n", level);
- ut_error;
- }
- break;
- case SYNC_TRX:
- /* Either the thread must own the lock_sys->mutex, or
- it is allowed to own only ONE trx->mutex. */
- if (!sync_thread_levels_g(array, level, FALSE)) {
- ut_a(sync_thread_levels_g(array, level - 1, TRUE));
- ut_a(sync_thread_levels_contain(array, SYNC_LOCK_SYS));
- }
- break;
- case SYNC_SEARCH_SYS: {
- /* Verify the lock order inside the split btr_search_latch
- array */
- bool found_current = false;
- for (ulint i = 0; i < btr_search_index_num; i++) {
- if (&btr_search_latch_arr[i] == latch) {
- found_current = true;
- } else if (found_current) {
- ut_ad(!rw_lock_own(&btr_search_latch_arr[i],
- RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch_arr[i],
- RW_LOCK_EX));
- }
- }
- ut_ad(found_current);
- }
-
- /* fall through */
- case SYNC_BUF_FLUSH_LIST:
- case SYNC_BUF_LRU_LIST:
- case SYNC_BUF_FREE_LIST:
- case SYNC_BUF_ZIP_FREE:
- case SYNC_BUF_ZIP_HASH:
- case SYNC_BUF_FLUSH_STATE:
- /* We can have multiple mutexes of this type therefore we
- can only check whether the greater than condition holds. */
- if (!sync_thread_levels_g(array, level-1, TRUE)) {
- fprintf(stderr,
- "InnoDB: sync_thread_levels_g(array, %lu)"
- " does not hold!\n", level-1);
- ut_error;
- }
- break;
-
-
- case SYNC_BUF_PAGE_HASH:
- /* Multiple page_hash locks are only allowed during
- buf_validate. */
- /* Fall through */
-
- case SYNC_BUF_BLOCK:
- if (!sync_thread_levels_g(array, level, FALSE)) {
- ut_a(sync_thread_levels_g(array, level - 1, TRUE));
- }
- break;
- case SYNC_REC_LOCK:
- if (sync_thread_levels_contain(array, SYNC_LOCK_SYS)) {
- ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK - 1,
- TRUE));
- } else {
- ut_a(sync_thread_levels_g(array, SYNC_REC_LOCK, TRUE));
- }
- break;
- case SYNC_IBUF_BITMAP:
- /* Either the thread must own the master mutex to all
- the bitmap pages, or it is allowed to latch only ONE
- bitmap page. */
- if (sync_thread_levels_contain(array,
- SYNC_IBUF_BITMAP_MUTEX)) {
- ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1,
- TRUE));
- } else {
- /* This is violated during trx_sys_create_rsegs()
- when creating additional rollback segments when
- upgrading in innobase_start_or_create_for_mysql(). */
- ut_a(srv_is_being_started
- || sync_thread_levels_g(array, SYNC_IBUF_BITMAP,
- TRUE));
- }
- break;
- case SYNC_FSP_PAGE:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP));
- break;
- case SYNC_FSP:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP)
- || sync_thread_levels_g(array, SYNC_FSP, TRUE));
- break;
- case SYNC_TRX_UNDO_PAGE:
- /* Purge is allowed to read in as many UNDO pages as it likes,
- there was a bogus rule here earlier that forced the caller to
- acquire the trx_purge_t::mutex. The purge mutex did not really
- protect anything because it was only ever acquired by the
- single purge thread. The purge thread can read the UNDO pages
- without any covering mutex. */
-
- ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
- || sync_thread_levels_contain(array, SYNC_RSEG)
- || sync_thread_levels_g(array, level - 1, TRUE));
- break;
- case SYNC_RSEG_HEADER:
- ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
- break;
- case SYNC_RSEG_HEADER_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE));
- break;
- case SYNC_TREE_NODE:
- ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
- || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
- || sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
- break;
- case SYNC_TREE_NODE_NEW:
- ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE));
- break;
- case SYNC_INDEX_TREE:
- ut_a(sync_thread_levels_g(array, SYNC_TREE_NODE - 1, TRUE));
- break;
- case SYNC_IBUF_TREE_NODE:
- ut_a(sync_thread_levels_contain(array, SYNC_IBUF_INDEX_TREE)
- || sync_thread_levels_g(array, SYNC_IBUF_TREE_NODE - 1,
- TRUE));
- break;
- case SYNC_IBUF_TREE_NODE_NEW:
- /* ibuf_add_free_page() allocates new pages for the
- change buffer while only holding the tablespace
- x-latch. These pre-allocated new pages may only be
- taken in use while holding ibuf_mutex, in
- btr_page_alloc_for_ibuf(). */
- ut_a(sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
- || sync_thread_levels_contain(array, SYNC_FSP));
- break;
- case SYNC_IBUF_INDEX_TREE:
- if (sync_thread_levels_contain(array, SYNC_FSP)) {
- ut_a(sync_thread_levels_g(array, level - 1, TRUE));
- } else {
- ut_a(sync_thread_levels_g(
- array, SYNC_IBUF_TREE_NODE - 1, TRUE));
- }
- break;
- case SYNC_IBUF_PESS_INSERT_MUTEX:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
- ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- break;
- case SYNC_IBUF_HEADER:
- ut_a(sync_thread_levels_g(array, SYNC_FSP - 1, TRUE));
- ut_a(!sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
- ut_a(!sync_thread_levels_contain(array,
- SYNC_IBUF_PESS_INSERT_MUTEX));
- break;
- case SYNC_DICT:
-#ifdef UNIV_DEBUG
- ut_a(buf_debug_prints
- || sync_thread_levels_g(array, SYNC_DICT, TRUE));
-#else /* UNIV_DEBUG */
- ut_a(sync_thread_levels_g(array, SYNC_DICT, TRUE));
-#endif /* UNIV_DEBUG */
- break;
- default:
- ut_error;
- }
-
-levels_ok:
-
- sync_level.latch = latch;
- sync_level.level = level;
- array->push_back(sync_level);
-
- mutex_exit(&sync_thread_mutex);
-}
-
-/******************************************************************//**
-Removes a latch from the thread level array if it is found there.
-@return TRUE if found in the array; it is no error if the latch is
-not found, as we presently are not able to determine the level for
-every latch reservation the program does */
-UNIV_INTERN
-ibool
-sync_thread_reset_level(
-/*====================*/
- void* latch) /*!< in: pointer to a mutex or an rw-lock */
-{
- sync_arr_t* array;
- sync_thread_t* thread_slot;
-
- if (!sync_order_checks_on) {
-
- return(FALSE);
- }
-
- if ((latch == (void*) &sync_thread_mutex)
- || (latch == (void*) &mutex_list_mutex)
- || (latch == (void*) &rw_lock_debug_mutex)
- || (latch == (void*) &rw_lock_list_mutex)) {
-
- return(FALSE);
- }
-
- mutex_enter(&sync_thread_mutex);
-
- thread_slot = sync_thread_level_arrays_find_slot();
-
- if (thread_slot == NULL) {
-
- ut_error;
-
- mutex_exit(&sync_thread_mutex);
- return(FALSE);
- }
-
- array = thread_slot->levels;
-
- for (std::vector<sync_level_t>::iterator it = array->begin(); it != array->end(); ++it) {
- sync_level_t level = *it;
-
- if (level.latch != latch) {
- continue;
- }
-
- array->erase(it);
- mutex_exit(&sync_thread_mutex);
- return(TRUE);
- }
-
- if (((ib_mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
- rw_lock_t* rw_lock;
-
- rw_lock = (rw_lock_t*) latch;
-
- if (rw_lock->level == SYNC_LEVEL_VARYING) {
- mutex_exit(&sync_thread_mutex);
-
- return(TRUE);
- }
- }
-
- ut_error;
-
- mutex_exit(&sync_thread_mutex);
-
- return(FALSE);
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Initializes the synchronization data structures. */
-UNIV_INTERN
-void
-sync_init(void)
-/*===========*/
-{
- ut_a(sync_initialized == FALSE);
-
- sync_initialized = TRUE;
-
- sync_array_init(OS_THREAD_MAX_N);
-
-#ifdef UNIV_SYNC_DEBUG
- /* Create the thread latch level array where the latch levels
- are stored for each OS thread */
-
- sync_thread_level_arrays = static_cast<sync_thread_t*>(
- calloc(sizeof(sync_thread_t), OS_THREAD_MAX_N));
-
- ut_a(sync_thread_level_arrays != NULL);
-
-#endif /* UNIV_SYNC_DEBUG */
- /* Init the mutex list and create the mutex to protect it. */
-
- UT_LIST_INIT(mutex_list);
- mutex_create(mutex_list_mutex_key, &mutex_list_mutex,
- SYNC_NO_ORDER_CHECK);
-#ifdef UNIV_SYNC_DEBUG
- mutex_create(sync_thread_mutex_key, &sync_thread_mutex,
- SYNC_NO_ORDER_CHECK);
-#endif /* UNIV_SYNC_DEBUG */
-
- /* Init the rw-lock list and create the mutex to protect it. */
-
- UT_LIST_INIT(rw_lock_list);
- mutex_create(rw_lock_list_mutex_key, &rw_lock_list_mutex,
- SYNC_NO_ORDER_CHECK);
-
-#ifdef UNIV_SYNC_DEBUG
- os_fast_mutex_init(rw_lock_debug_mutex_key, &rw_lock_debug_mutex);
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-#ifdef UNIV_SYNC_DEBUG
-/******************************************************************//**
-Frees all debug memory. */
-static
-void
-sync_thread_level_arrays_free(void)
-/*===============================*/
-
-{
- ulint i;
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- sync_thread_t* slot;
-
- slot = &sync_thread_level_arrays[i];
-
- /* If this slot was allocated then free the slot memory too. */
- if (slot->levels != NULL) {
- delete slot->levels;
- }
- }
-
- free(sync_thread_level_arrays);
- sync_thread_level_arrays = NULL;
-}
-#endif /* UNIV_SYNC_DEBUG */
-
-/******************************************************************//**
-Frees the resources in InnoDB's own synchronization data structures. */
-UNIV_INTERN
-void
-sync_close(void)
-/*===========*/
-{
- ib_mutex_t* mutex;
-
- sync_array_close();
-
- mutex_free(&rw_lock_list_mutex);
-
- for (mutex = UT_LIST_GET_FIRST(mutex_list);
- mutex != NULL;
- /* No op */) {
-
-#ifdef UNIV_MEM_DEBUG
- if (mutex == &mem_hash_mutex) {
- mutex = UT_LIST_GET_NEXT(list, mutex);
- continue;
- }
-#endif /* UNIV_MEM_DEBUG */
-
- mutex_free(mutex);
-
- mutex = UT_LIST_GET_FIRST(mutex_list);
- }
-
-#ifdef UNIV_SYNC_DEBUG
- mutex_free(&sync_thread_mutex);
-
- /* Switch latching order checks on in sync0sync.cc */
- sync_order_checks_on = FALSE;
-
- sync_thread_level_arrays_free();
- os_fast_mutex_free(&rw_lock_debug_mutex);
-#endif /* UNIV_SYNC_DEBUG */
-
- mutex_free(&mutex_list_mutex);
-
- sync_initialized = FALSE;
-}
-
-/*******************************************************************//**
-Prints wait info of the sync system. */
-UNIV_INTERN
-void
-sync_print_wait_info(
-/*=================*/
- FILE* file) /*!< in: file where to print */
-{
- // Sum counter values once
- ib_int64_t mutex_spin_wait_count_val
- = static_cast<ib_int64_t>(mutex_spin_wait_count);
- ib_int64_t mutex_spin_round_count_val
- = static_cast<ib_int64_t>(mutex_spin_round_count);
- ib_int64_t mutex_os_wait_count_val
- = static_cast<ib_int64_t>(mutex_os_wait_count);
- ib_int64_t rw_s_spin_wait_count_val
- = static_cast<ib_int64_t>(rw_lock_stats.rw_s_spin_wait_count);
- ib_int64_t rw_s_spin_round_count_val
- = static_cast<ib_int64_t>(rw_lock_stats.rw_s_spin_round_count);
- ib_int64_t rw_s_os_wait_count_val
- = static_cast<ib_int64_t>(rw_lock_stats.rw_s_os_wait_count);
- ib_int64_t rw_x_spin_wait_count_val
- = static_cast<ib_int64_t>(rw_lock_stats.rw_x_spin_wait_count);
- ib_int64_t rw_x_spin_round_count_val
- = static_cast<ib_int64_t>(rw_lock_stats.rw_x_spin_round_count);
- ib_int64_t rw_x_os_wait_count_val
- = static_cast<ib_int64_t>(rw_lock_stats.rw_x_os_wait_count);
-
- fprintf(file,
- "Mutex spin waits " INT64PF ", rounds " INT64PF ", "
- "OS waits " INT64PF "\n"
- "RW-shared spins " INT64PF ", rounds " INT64PF ", "
- "OS waits " INT64PF "\n"
- "RW-excl spins " INT64PF ", rounds " INT64PF ", "
- "OS waits " INT64PF "\n",
- mutex_spin_wait_count_val, mutex_spin_round_count_val,
- mutex_os_wait_count_val,
- rw_s_spin_wait_count_val, rw_s_spin_round_count_val,
- rw_s_os_wait_count_val,
- rw_x_spin_wait_count_val, rw_x_spin_round_count_val,
- rw_x_os_wait_count_val);
-
- fprintf(file,
- "Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
- "%.2f RW-excl\n",
- (double) mutex_spin_round_count_val /
- (mutex_spin_wait_count_val ? mutex_spin_wait_count_val : 1LL),
- (double) rw_s_spin_round_count_val /
- (rw_s_spin_wait_count_val ? rw_s_spin_wait_count_val : 1LL),
- (double) rw_x_spin_round_count_val /
- (rw_x_spin_wait_count_val ? rw_x_spin_wait_count_val : 1LL));
-}
-
-/*******************************************************************//**
-Prints info of the sync system. */
-UNIV_INTERN
-void
-sync_print(
-/*=======*/
- FILE* file) /*!< in: file where to print */
-{
-#ifdef UNIV_SYNC_DEBUG
- mutex_list_print_info(file);
-
- rw_lock_list_print_info(file);
-#endif /* UNIV_SYNC_DEBUG */
-
- sync_array_print(file);
-
- sync_print_wait_info(file);
-}
diff --git a/storage/xtradb/trx/trx0i_s.cc b/storage/xtradb/trx/trx0i_s.cc
deleted file mode 100644
index 0c9618d98eb..00000000000
--- a/storage/xtradb/trx/trx0i_s.cc
+++ /dev/null
@@ -1,1692 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0i_s.cc
-INFORMATION SCHEMA innodb_trx, innodb_locks and
-innodb_lock_waits tables fetch code.
-
-The code below fetches information needed to fill those
-3 dynamic tables and uploads it into a "transactions
-table cache" for later retrieval.
-
-Created July 17, 2007 Vasil Dimov
-*******************************************************/
-
-/* Found during the build of 5.5.3 on Linux 2.4 and early 2.6 kernels:
- The includes "univ.i" -> "my_global.h" cause a different path
- to be taken further down with pthread functions and types,
- so they must come first.
- From the symptoms, this is related to bug#46587 in the MySQL bug DB.
-*/
-#include "univ.i"
-
-#include <mysql/plugin.h>
-
-#include "buf0buf.h"
-#include "dict0dict.h"
-#include "ha0storage.h"
-#include "ha_prototypes.h"
-#include "hash0hash.h"
-#include "lock0iter.h"
-#include "lock0lock.h"
-#include "mem0mem.h"
-#include "page0page.h"
-#include "rem0rec.h"
-#include "row0row.h"
-#include "srv0srv.h"
-#include "sync0rw.h"
-#include "sync0sync.h"
-#include "sync0types.h"
-#include "trx0i_s.h"
-#include "trx0sys.h"
-#include "trx0trx.h"
-#include "ut0mem.h"
-#include "ut0ut.h"
-
-/** Initial number of rows in the table cache */
-#define TABLE_CACHE_INITIAL_ROWSNUM 1024
-
-/** @brief The maximum number of chunks to allocate for a table cache.
-
-The rows of a table cache are stored in a set of chunks. When a new
-row is added a new chunk is allocated if necessary. Assuming that the
-first one is 1024 rows (TABLE_CACHE_INITIAL_ROWSNUM) and each
-subsequent is N/2 where N is the number of rows we have allocated till
-now, then 39th chunk would accommodate 1677416425 rows and all chunks
-would accommodate 3354832851 rows. */
-#define MEM_CHUNKS_IN_TABLE_CACHE 39
-
-/** The following are some testing auxiliary macros. Do not enable them
-in a production environment. */
-/* @{ */
-
-#if 0
-/** If this is enabled then lock folds will always be different
-resulting in equal rows being put in a different cells of the hash
-table. Checking for duplicates will be flawed because different
-fold will be calculated when a row is searched in the hash table. */
-#define TEST_LOCK_FOLD_ALWAYS_DIFFERENT
-#endif
-
-#if 0
-/** This effectively kills the search-for-duplicate-before-adding-a-row
-function, but searching in the hash is still performed. It will always
-be assumed that lock is not present and insertion will be performed in
-the hash table. */
-#define TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T
-#endif
-
-#if 0
-/** This aggressively repeats adding each row many times. Depending on
-the above settings this may be noop or may result in lots of rows being
-added. */
-#define TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
-#endif
-
-#if 0
-/** Very similar to TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T but hash
-table search is not performed at all. */
-#define TEST_DO_NOT_CHECK_FOR_DUPLICATE_ROWS
-#endif
-
-#if 0
-/** Do not insert each row into the hash table, duplicates may appear
-if this is enabled, also if this is enabled searching into the hash is
-noop because it will be empty. */
-#define TEST_DO_NOT_INSERT_INTO_THE_HASH_TABLE
-#endif
-/* @} */
-
-/** Memory limit passed to ha_storage_put_memlim().
-@param cache hash storage
-@return maximum allowed allocation size */
-#define MAX_ALLOWED_FOR_STORAGE(cache) \
- (TRX_I_S_MEM_LIMIT \
- - (cache)->mem_allocd)
-
-/** Memory limit in table_cache_create_empty_row().
-@param cache hash storage
-@return maximum allowed allocation size */
-#define MAX_ALLOWED_FOR_ALLOC(cache) \
- (TRX_I_S_MEM_LIMIT \
- - (cache)->mem_allocd \
- - ha_storage_get_size((cache)->storage))
-
-/** Memory for each table in the intermediate buffer is allocated in
-separate chunks. These chunks are considered to be concatenated to
-represent one flat array of rows. */
-struct i_s_mem_chunk_t {
- ulint offset; /*!< offset, in number of rows */
- ulint rows_allocd; /*!< the size of this chunk, in number
- of rows */
- void* base; /*!< start of the chunk */
-};
-
-/** This represents one table's cache. */
-struct i_s_table_cache_t {
- ulint rows_used; /*!< number of used rows */
- ulint rows_allocd; /*!< number of allocated rows */
- ulint row_size; /*!< size of a single row */
- i_s_mem_chunk_t chunks[MEM_CHUNKS_IN_TABLE_CACHE]; /*!< array of
- memory chunks that stores the
- rows */
-};
-
-/** This structure describes the intermediate buffer */
-struct trx_i_s_cache_t {
- rw_lock_t rw_lock; /*!< read-write lock protecting
- the rest of this structure */
- ullint last_read; /*!< last time the cache was read;
- measured in microseconds since
- epoch */
- ib_mutex_t last_read_mutex;/*!< mutex protecting the
- last_read member - it is updated
- inside a shared lock of the
- rw_lock member */
- i_s_table_cache_t innodb_trx; /*!< innodb_trx table */
- i_s_table_cache_t innodb_locks; /*!< innodb_locks table */
- i_s_table_cache_t innodb_lock_waits;/*!< innodb_lock_waits table */
-/** the hash table size is LOCKS_HASH_CELLS_NUM * sizeof(void*) bytes */
-#define LOCKS_HASH_CELLS_NUM 10000
- hash_table_t* locks_hash; /*!< hash table used to eliminate
- duplicate entries in the
- innodb_locks table */
-/** Initial size of the cache storage */
-#define CACHE_STORAGE_INITIAL_SIZE 1024
-/** Number of hash cells in the cache storage */
-#define CACHE_STORAGE_HASH_CELLS 2048
- ha_storage_t* storage; /*!< storage for external volatile
- data that may become unavailable
- when we release
- lock_sys->mutex or trx_sys->mutex */
- ulint mem_allocd; /*!< the amount of memory
- allocated with mem_alloc*() */
- ibool is_truncated; /*!< this is TRUE if the memory
- limit was hit and thus the data
- in the cache is truncated */
-};
-
-/** This is the intermediate buffer where data needed to fill the
-INFORMATION SCHEMA tables is fetched and later retrieved by the C++
-code in handler/i_s.cc. */
-static trx_i_s_cache_t trx_i_s_cache_static;
-/** This is the intermediate buffer where data needed to fill the
-INFORMATION SCHEMA tables is fetched and later retrieved by the C++
-code in handler/i_s.cc. */
-UNIV_INTERN trx_i_s_cache_t* trx_i_s_cache = &trx_i_s_cache_static;
-
-/* Key to register the lock/mutex with performance schema */
-#ifdef UNIV_PFS_RWLOCK
-UNIV_INTERN mysql_pfs_key_t trx_i_s_cache_lock_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-UNIV_INTERN mysql_pfs_key_t cache_last_read_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/*******************************************************************//**
-For a record lock that is in waiting state retrieves the only bit that
-is set, for a table lock returns ULINT_UNDEFINED.
-@return record number within the heap */
-static
-ulint
-wait_lock_get_heap_no(
-/*==================*/
- const lock_t* lock) /*!< in: lock */
-{
- ulint ret;
-
- switch (lock_get_type(lock)) {
- case LOCK_REC:
- ret = lock_rec_find_set_bit(lock);
- ut_a(ret != ULINT_UNDEFINED);
- break;
- case LOCK_TABLE:
- ret = ULINT_UNDEFINED;
- break;
- default:
- ut_error;
- }
-
- return(ret);
-}
-
-/*******************************************************************//**
-Initializes the members of a table cache. */
-static
-void
-table_cache_init(
-/*=============*/
- i_s_table_cache_t* table_cache, /*!< out: table cache */
- size_t row_size) /*!< in: the size of a
- row */
-{
- ulint i;
-
- table_cache->rows_used = 0;
- table_cache->rows_allocd = 0;
- table_cache->row_size = row_size;
-
- for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
-
- /* the memory is actually allocated in
- table_cache_create_empty_row() */
- table_cache->chunks[i].base = NULL;
- }
-}
-
-/*******************************************************************//**
-Frees a table cache. */
-static
-void
-table_cache_free(
-/*=============*/
- i_s_table_cache_t* table_cache) /*!< in/out: table cache */
-{
- ulint i;
-
- for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
-
- /* the memory is actually allocated in
- table_cache_create_empty_row() */
- if (table_cache->chunks[i].base) {
- mem_free(table_cache->chunks[i].base);
- table_cache->chunks[i].base = NULL;
- }
- }
-}
-
-/*******************************************************************//**
-Returns an empty row from a table cache. The row is allocated if no more
-empty rows are available. The number of used rows is incremented.
-If the memory limit is hit then NULL is returned and nothing is
-allocated.
-@return empty row, or NULL if out of memory */
-static
-void*
-table_cache_create_empty_row(
-/*=========================*/
- i_s_table_cache_t* table_cache, /*!< in/out: table cache */
- trx_i_s_cache_t* cache) /*!< in/out: cache to record
- how many bytes are
- allocated */
-{
- ulint i;
- void* row;
-
- ut_a(table_cache->rows_used <= table_cache->rows_allocd);
-
- if (table_cache->rows_used == table_cache->rows_allocd) {
-
- /* rows_used == rows_allocd means that new chunk needs
- to be allocated: either no more empty rows in the
- last allocated chunk or nothing has been allocated yet
- (rows_num == rows_allocd == 0); */
-
- i_s_mem_chunk_t* chunk;
- ulint req_bytes;
- ulint got_bytes;
- ulint req_rows;
- ulint got_rows;
-
- /* find the first not allocated chunk */
- for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
-
- if (table_cache->chunks[i].base == NULL) {
-
- break;
- }
- }
-
- /* i == MEM_CHUNKS_IN_TABLE_CACHE means that all chunks
- have been allocated :-X */
- ut_a(i < MEM_CHUNKS_IN_TABLE_CACHE);
-
- /* allocate the chunk we just found */
-
- if (i == 0) {
-
- /* first chunk, nothing is allocated yet */
- req_rows = TABLE_CACHE_INITIAL_ROWSNUM;
- } else {
-
- /* Memory is increased by the formula
- new = old + old / 2; We are trying not to be
- aggressive here (= using the common new = old * 2)
- because the allocated memory will not be freed
- until InnoDB exit (it is reused). So it is better
- to once allocate the memory in more steps, but
- have less unused/wasted memory than to use less
- steps in allocation (which is done once in a
- lifetime) but end up with lots of unused/wasted
- memory. */
- req_rows = table_cache->rows_allocd / 2;
- }
- req_bytes = req_rows * table_cache->row_size;
-
- if (req_bytes > MAX_ALLOWED_FOR_ALLOC(cache)) {
-
- return(NULL);
- }
-
- chunk = &table_cache->chunks[i];
-
- chunk->base = mem_alloc2(req_bytes, &got_bytes);
-
- got_rows = got_bytes / table_cache->row_size;
-
- cache->mem_allocd += got_bytes;
-
-#if 0
- printf("allocating chunk %d req bytes=%lu, got bytes=%lu, "
- "row size=%lu, "
- "req rows=%lu, got rows=%lu\n",
- i, req_bytes, got_bytes,
- table_cache->row_size,
- req_rows, got_rows);
-#endif
-
- chunk->rows_allocd = got_rows;
-
- table_cache->rows_allocd += got_rows;
-
- /* adjust the offset of the next chunk */
- if (i < MEM_CHUNKS_IN_TABLE_CACHE - 1) {
-
- table_cache->chunks[i + 1].offset
- = chunk->offset + chunk->rows_allocd;
- }
-
- /* return the first empty row in the newly allocated
- chunk */
- row = chunk->base;
- } else {
-
- char* chunk_start;
- ulint offset;
-
- /* there is an empty row, no need to allocate new
- chunks */
-
- /* find the first chunk that contains allocated but
- empty/unused rows */
- for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
-
- if (table_cache->chunks[i].offset
- + table_cache->chunks[i].rows_allocd
- > table_cache->rows_used) {
-
- break;
- }
- }
-
- /* i == MEM_CHUNKS_IN_TABLE_CACHE means that all chunks
- are full, but
- table_cache->rows_used != table_cache->rows_allocd means
- exactly the opposite - there are allocated but
- empty/unused rows :-X */
- ut_a(i < MEM_CHUNKS_IN_TABLE_CACHE);
-
- chunk_start = (char*) table_cache->chunks[i].base;
- offset = table_cache->rows_used
- - table_cache->chunks[i].offset;
-
- row = chunk_start + offset * table_cache->row_size;
- }
-
- table_cache->rows_used++;
-
- return(row);
-}
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Validates a row in the locks cache.
-@return TRUE if valid */
-static
-ibool
-i_s_locks_row_validate(
-/*===================*/
- const i_s_locks_row_t* row) /*!< in: row to validate */
-{
- ut_ad(row->lock_trx_id != 0);
- ut_ad(row->lock_mode != NULL);
- ut_ad(row->lock_type != NULL);
- ut_ad(row->lock_table != NULL);
- ut_ad(row->lock_table_id != 0);
-
- if (row->lock_space == ULINT_UNDEFINED) {
- /* table lock */
- ut_ad(!strcmp("TABLE", row->lock_type));
- ut_ad(row->lock_index == NULL);
- ut_ad(row->lock_data == NULL);
- ut_ad(row->lock_page == ULINT_UNDEFINED);
- ut_ad(row->lock_rec == ULINT_UNDEFINED);
- } else {
- /* record lock */
- ut_ad(!strcmp("RECORD", row->lock_type));
- ut_ad(row->lock_index != NULL);
- /* row->lock_data == NULL if buf_page_try_get() == NULL */
- ut_ad(row->lock_page != ULINT_UNDEFINED);
- ut_ad(row->lock_rec != ULINT_UNDEFINED);
- }
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Fills i_s_trx_row_t object.
-If memory can not be allocated then FALSE is returned.
-@return FALSE if allocation fails */
-static
-ibool
-fill_trx_row(
-/*=========*/
- i_s_trx_row_t* row, /*!< out: result object
- that's filled */
- const trx_t* trx, /*!< in: transaction to
- get data from */
- const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the
- corresponding row in
- innodb_locks if trx is
- waiting or NULL if trx
- is not waiting */
- trx_i_s_cache_t* cache) /*!< in/out: cache into
- which to copy volatile
- strings */
-{
- const char* stmt;
- size_t stmt_len;
- const char* s;
-
- ut_ad(lock_mutex_own());
-
- row->trx_id = trx->id;
- row->trx_started = (ib_time_t) trx->start_time;
- row->trx_state = trx_get_que_state_str(trx);
- row->requested_lock_row = requested_lock_row;
- ut_ad(requested_lock_row == NULL
- || i_s_locks_row_validate(requested_lock_row));
-
- if (trx->lock.wait_lock != NULL) {
-
- ut_a(requested_lock_row != NULL);
- row->trx_wait_started = (ib_time_t) trx->lock.wait_started;
- } else {
- ut_a(requested_lock_row == NULL);
- row->trx_wait_started = 0;
- }
-
- row->trx_weight = (ullint) TRX_WEIGHT(trx);
-
- if (trx->mysql_thd == NULL) {
- /* For internal transactions e.g., purge and transactions
- being recovered at startup there is no associated MySQL
- thread data structure. */
- row->trx_mysql_thread_id = 0;
- row->trx_query = NULL;
- goto thd_done;
- }
-
- row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd);
-
- stmt = trx->mysql_thd
- ? innobase_get_stmt(trx->mysql_thd, &stmt_len)
- : NULL;
-
- if (stmt != NULL) {
- char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1];
-
- if (stmt_len > TRX_I_S_TRX_QUERY_MAX_LEN) {
- stmt_len = TRX_I_S_TRX_QUERY_MAX_LEN;
- }
-
- memcpy(query, stmt, stmt_len);
- query[stmt_len] = '\0';
-
- row->trx_query = static_cast<const char*>(
- ha_storage_put_memlim(
- cache->storage, query, stmt_len + 1,
- MAX_ALLOWED_FOR_STORAGE(cache)));
-
- row->trx_query_cs = innobase_get_charset(trx->mysql_thd);
-
- if (row->trx_query == NULL) {
-
- return(FALSE);
- }
- } else {
-
- row->trx_query = NULL;
- }
-
-thd_done:
- s = trx->op_info;
-
- if (s != NULL && s[0] != '\0') {
-
- TRX_I_S_STRING_COPY(s, row->trx_operation_state,
- TRX_I_S_TRX_OP_STATE_MAX_LEN, cache);
-
- if (row->trx_operation_state == NULL) {
-
- return(FALSE);
- }
- } else {
-
- row->trx_operation_state = NULL;
- }
-
- row->trx_tables_in_use = trx->n_mysql_tables_in_use;
-
- row->trx_tables_locked = trx->mysql_n_tables_locked;
-
- /* These are protected by both trx->mutex or lock_sys->mutex,
- or just lock_sys->mutex. For reading, it suffices to hold
- lock_sys->mutex. */
-
- row->trx_lock_structs = UT_LIST_GET_LEN(trx->lock.trx_locks);
-
- row->trx_lock_memory_bytes = mem_heap_get_size(trx->lock.lock_heap);
-
- row->trx_rows_locked = lock_number_of_rows_locked(&trx->lock);
-
- row->trx_rows_modified = trx->undo_no;
-
- row->trx_concurrency_tickets = trx->n_tickets_to_enter_innodb;
-
- switch (trx->isolation_level) {
- case TRX_ISO_READ_UNCOMMITTED:
- row->trx_isolation_level = "READ UNCOMMITTED";
- break;
- case TRX_ISO_READ_COMMITTED:
- row->trx_isolation_level = "READ COMMITTED";
- break;
- case TRX_ISO_REPEATABLE_READ:
- row->trx_isolation_level = "REPEATABLE READ";
- break;
- case TRX_ISO_SERIALIZABLE:
- row->trx_isolation_level = "SERIALIZABLE";
- break;
- /* Should not happen as TRX_ISO_READ_COMMITTED is default */
- default:
- row->trx_isolation_level = "UNKNOWN";
- }
-
- row->trx_unique_checks = (ibool) trx->check_unique_secondary;
-
- row->trx_foreign_key_checks = (ibool) trx->check_foreigns;
-
- s = trx->detailed_error;
-
- if (s != NULL && s[0] != '\0') {
-
- TRX_I_S_STRING_COPY(s,
- row->trx_foreign_key_error,
- TRX_I_S_TRX_FK_ERROR_MAX_LEN, cache);
-
- if (row->trx_foreign_key_error == NULL) {
-
- return(FALSE);
- }
- } else {
- row->trx_foreign_key_error = NULL;
- }
-
- row->trx_has_search_latch = (ibool) trx->has_search_latch;
-
- row->trx_search_latch_timeout = trx->search_latch_timeout;
-
- row->trx_is_read_only = trx->read_only;
-
- row->trx_is_autocommit_non_locking = trx_is_autocommit_non_locking(trx);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Format the nth field of "rec" and put it in "buf". The result is always
-NUL-terminated. Returns the number of bytes that were written to "buf"
-(including the terminating NUL).
-@return end of the result */
-static
-ulint
-put_nth_field(
-/*==========*/
- char* buf, /*!< out: buffer */
- ulint buf_size,/*!< in: buffer size in bytes */
- ulint n, /*!< in: number of field */
- const dict_index_t* index, /*!< in: index */
- const rec_t* rec, /*!< in: record */
- const ulint* offsets)/*!< in: record offsets, returned
- by rec_get_offsets() */
-{
- const byte* data;
- ulint data_len;
- dict_field_t* dict_field;
- ulint ret;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
-
- if (buf_size == 0) {
-
- return(0);
- }
-
- ret = 0;
-
- if (n > 0) {
- /* we must append ", " before the actual data */
-
- if (buf_size < 3) {
-
- buf[0] = '\0';
- return(1);
- }
-
- memcpy(buf, ", ", 3);
-
- buf += 2;
- buf_size -= 2;
- ret += 2;
- }
-
- /* now buf_size >= 1 */
-
- data = rec_get_nth_field(rec, offsets, n, &data_len);
-
- dict_field = dict_index_get_nth_field(index, n);
-
- ret += row_raw_format((const char*) data, data_len,
- dict_field, buf, buf_size);
-
- return(ret);
-}
-
-/*******************************************************************//**
-Fills the "lock_data" member of i_s_locks_row_t object.
-If memory can not be allocated then FALSE is returned.
-@return FALSE if allocation fails */
-static
-ibool
-fill_lock_data(
-/*===========*/
- const char** lock_data,/*!< out: "lock_data" to fill */
- const lock_t* lock, /*!< in: lock used to find the data */
- ulint heap_no,/*!< in: rec num used to find the data */
- trx_i_s_cache_t* cache) /*!< in/out: cache where to store
- volatile data */
-{
- mtr_t mtr;
-
- const buf_block_t* block;
- const page_t* page;
- const rec_t* rec;
-
- ut_a(lock_get_type(lock) == LOCK_REC);
-
- mtr_start(&mtr);
-
- block = buf_page_try_get(lock_rec_get_space_id(lock),
- lock_rec_get_page_no(lock),
- &mtr);
-
- if (block == NULL) {
-
- *lock_data = NULL;
-
- mtr_commit(&mtr);
-
- return(TRUE);
- }
-
- page = (const page_t*) buf_block_get_frame(block);
-
- rec = page_find_rec_with_heap_no(page, heap_no);
-
- if (page_rec_is_infimum(rec)) {
-
- *lock_data = ha_storage_put_str_memlim(
- cache->storage, "infimum pseudo-record",
- MAX_ALLOWED_FOR_STORAGE(cache));
- } else if (page_rec_is_supremum(rec)) {
-
- *lock_data = ha_storage_put_str_memlim(
- cache->storage, "supremum pseudo-record",
- MAX_ALLOWED_FOR_STORAGE(cache));
- } else {
-
- const dict_index_t* index;
- ulint n_fields;
- mem_heap_t* heap;
- ulint offsets_onstack[REC_OFFS_NORMAL_SIZE];
- ulint* offsets;
- char buf[TRX_I_S_LOCK_DATA_MAX_LEN];
- ulint buf_used;
- ulint i;
-
- rec_offs_init(offsets_onstack);
- offsets = offsets_onstack;
-
- index = lock_rec_get_index(lock);
-
- n_fields = dict_index_get_n_unique(index);
-
- ut_a(n_fields > 0);
-
- heap = NULL;
- offsets = rec_get_offsets(rec, index, offsets, n_fields,
- &heap);
-
- /* format and store the data */
-
- buf_used = 0;
- for (i = 0; i < n_fields; i++) {
-
- buf_used += put_nth_field(
- buf + buf_used, sizeof(buf) - buf_used,
- i, index, rec, offsets) - 1;
- }
-
- *lock_data = (const char*) ha_storage_put_memlim(
- cache->storage, buf, buf_used + 1,
- MAX_ALLOWED_FOR_STORAGE(cache));
-
- if (UNIV_UNLIKELY(heap != NULL)) {
-
- /* this means that rec_get_offsets() has created a new
- heap and has stored offsets in it; check that this is
- really the case and free the heap */
- ut_a(offsets != offsets_onstack);
- mem_heap_free(heap);
- }
- }
-
- mtr_commit(&mtr);
-
- if (*lock_data == NULL) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Fills i_s_locks_row_t object. Returns its first argument.
-If memory can not be allocated then FALSE is returned.
-@return FALSE if allocation fails */
-static
-ibool
-fill_locks_row(
-/*===========*/
- i_s_locks_row_t* row, /*!< out: result object that's filled */
- const lock_t* lock, /*!< in: lock to get data from */
- ulint heap_no,/*!< in: lock's record number
- or ULINT_UNDEFINED if the lock
- is a table lock */
- trx_i_s_cache_t* cache) /*!< in/out: cache into which to copy
- volatile strings */
-{
- row->lock_trx_id = lock_get_trx_id(lock);
- row->lock_mode = lock_get_mode_str(lock);
- row->lock_type = lock_get_type_str(lock);
-
- row->lock_table = ha_storage_put_str_memlim(
- cache->storage, lock_get_table_name(lock),
- MAX_ALLOWED_FOR_STORAGE(cache));
-
- /* memory could not be allocated */
- if (row->lock_table == NULL) {
-
- return(FALSE);
- }
-
- switch (lock_get_type(lock)) {
- case LOCK_REC:
- row->lock_index = ha_storage_put_str_memlim(
- cache->storage, lock_rec_get_index_name(lock),
- MAX_ALLOWED_FOR_STORAGE(cache));
-
- /* memory could not be allocated */
- if (row->lock_index == NULL) {
-
- return(FALSE);
- }
-
- row->lock_space = lock_rec_get_space_id(lock);
- row->lock_page = lock_rec_get_page_no(lock);
- row->lock_rec = heap_no;
-
- if (!fill_lock_data(&row->lock_data, lock, heap_no, cache)) {
-
- /* memory could not be allocated */
- return(FALSE);
- }
-
- break;
- case LOCK_TABLE:
- row->lock_index = NULL;
-
- row->lock_space = ULINT_UNDEFINED;
- row->lock_page = ULINT_UNDEFINED;
- row->lock_rec = ULINT_UNDEFINED;
-
- row->lock_data = NULL;
-
- break;
- default:
- ut_error;
- }
-
- row->lock_table_id = lock_get_table_id(lock);
-
- row->hash_chain.value = row;
- ut_ad(i_s_locks_row_validate(row));
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Fills i_s_lock_waits_row_t object. Returns its first argument.
-@return result object that's filled */
-static
-i_s_lock_waits_row_t*
-fill_lock_waits_row(
-/*================*/
- i_s_lock_waits_row_t* row, /*!< out: result object
- that's filled */
- const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the
- relevant requested lock
- row in innodb_locks */
- const i_s_locks_row_t* blocking_lock_row)/*!< in: pointer to the
- relevant blocking lock
- row in innodb_locks */
-{
- ut_ad(i_s_locks_row_validate(requested_lock_row));
- ut_ad(i_s_locks_row_validate(blocking_lock_row));
-
- row->requested_lock_row = requested_lock_row;
- row->blocking_lock_row = blocking_lock_row;
-
- return(row);
-}
-
-/*******************************************************************//**
-Calculates a hash fold for a lock. For a record lock the fold is
-calculated from 4 elements, which uniquely identify a lock at a given
-point in time: transaction id, space id, page number, record number.
-For a table lock the fold is table's id.
-@return fold */
-static
-ulint
-fold_lock(
-/*======*/
- const lock_t* lock, /*!< in: lock object to fold */
- ulint heap_no)/*!< in: lock's record number
- or ULINT_UNDEFINED if the lock
- is a table lock */
-{
-#ifdef TEST_LOCK_FOLD_ALWAYS_DIFFERENT
- static ulint fold = 0;
-
- return(fold++);
-#else
- ulint ret;
-
- switch (lock_get_type(lock)) {
- case LOCK_REC:
- ut_a(heap_no != ULINT_UNDEFINED);
-
- ret = ut_fold_ulint_pair((ulint) lock_get_trx_id(lock),
- lock_rec_get_space_id(lock));
-
- ret = ut_fold_ulint_pair(ret,
- lock_rec_get_page_no(lock));
-
- ret = ut_fold_ulint_pair(ret, heap_no);
-
- break;
- case LOCK_TABLE:
- /* this check is actually not necessary for continuing
- correct operation, but something must have gone wrong if
- it fails. */
- ut_a(heap_no == ULINT_UNDEFINED);
-
- ret = (ulint) lock_get_table_id(lock);
-
- break;
- default:
- ut_error;
- }
-
- return(ret);
-#endif
-}
-
-/*******************************************************************//**
-Checks whether i_s_locks_row_t object represents a lock_t object.
-@return TRUE if they match */
-static
-ibool
-locks_row_eq_lock(
-/*==============*/
- const i_s_locks_row_t* row, /*!< in: innodb_locks row */
- const lock_t* lock, /*!< in: lock object */
- ulint heap_no)/*!< in: lock's record number
- or ULINT_UNDEFINED if the lock
- is a table lock */
-{
- ut_ad(i_s_locks_row_validate(row));
-#ifdef TEST_NO_LOCKS_ROW_IS_EVER_EQUAL_TO_LOCK_T
- return(0);
-#else
- switch (lock_get_type(lock)) {
- case LOCK_REC:
- ut_a(heap_no != ULINT_UNDEFINED);
-
- return(row->lock_trx_id == lock_get_trx_id(lock)
- && row->lock_space == lock_rec_get_space_id(lock)
- && row->lock_page == lock_rec_get_page_no(lock)
- && row->lock_rec == heap_no);
-
- case LOCK_TABLE:
- /* this check is actually not necessary for continuing
- correct operation, but something must have gone wrong if
- it fails. */
- ut_a(heap_no == ULINT_UNDEFINED);
-
- return(row->lock_trx_id == lock_get_trx_id(lock)
- && row->lock_table_id == lock_get_table_id(lock));
-
- default:
- ut_error;
- return(FALSE);
- }
-#endif
-}
-
-/*******************************************************************//**
-Searches for a row in the innodb_locks cache that has a specified id.
-This happens in O(1) time since a hash table is used. Returns pointer to
-the row or NULL if none is found.
-@return row or NULL */
-static
-i_s_locks_row_t*
-search_innodb_locks(
-/*================*/
- trx_i_s_cache_t* cache, /*!< in: cache */
- const lock_t* lock, /*!< in: lock to search for */
- ulint heap_no)/*!< in: lock's record number
- or ULINT_UNDEFINED if the lock
- is a table lock */
-{
- i_s_hash_chain_t* hash_chain;
-
- HASH_SEARCH(
- /* hash_chain->"next" */
- next,
- /* the hash table */
- cache->locks_hash,
- /* fold */
- fold_lock(lock, heap_no),
- /* the type of the next variable */
- i_s_hash_chain_t*,
- /* auxiliary variable */
- hash_chain,
- /* assertion on every traversed item */
- ut_ad(i_s_locks_row_validate(hash_chain->value)),
- /* this determines if we have found the lock */
- locks_row_eq_lock(hash_chain->value, lock, heap_no));
-
- if (hash_chain == NULL) {
-
- return(NULL);
- }
- /* else */
-
- return(hash_chain->value);
-}
-
-/*******************************************************************//**
-Adds new element to the locks cache, enlarging it if necessary.
-Returns a pointer to the added row. If the row is already present then
-no row is added and a pointer to the existing row is returned.
-If row can not be allocated then NULL is returned.
-@return row */
-static
-i_s_locks_row_t*
-add_lock_to_cache(
-/*==============*/
- trx_i_s_cache_t* cache, /*!< in/out: cache */
- const lock_t* lock, /*!< in: the element to add */
- ulint heap_no)/*!< in: lock's record number
- or ULINT_UNDEFINED if the lock
- is a table lock */
-{
- i_s_locks_row_t* dst_row;
-
-#ifdef TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
- ulint i;
- for (i = 0; i < 10000; i++) {
-#endif
-#ifndef TEST_DO_NOT_CHECK_FOR_DUPLICATE_ROWS
- /* quit if this lock is already present */
- dst_row = search_innodb_locks(cache, lock, heap_no);
- if (dst_row != NULL) {
-
- ut_ad(i_s_locks_row_validate(dst_row));
- return(dst_row);
- }
-#endif
-
- dst_row = (i_s_locks_row_t*)
- table_cache_create_empty_row(&cache->innodb_locks, cache);
-
- /* memory could not be allocated */
- if (dst_row == NULL) {
-
- return(NULL);
- }
-
- if (!fill_locks_row(dst_row, lock, heap_no, cache)) {
-
- /* memory could not be allocated */
- cache->innodb_locks.rows_used--;
- return(NULL);
- }
-
-#ifndef TEST_DO_NOT_INSERT_INTO_THE_HASH_TABLE
- HASH_INSERT(
- /* the type used in the hash chain */
- i_s_hash_chain_t,
- /* hash_chain->"next" */
- next,
- /* the hash table */
- cache->locks_hash,
- /* fold */
- fold_lock(lock, heap_no),
- /* add this data to the hash */
- &dst_row->hash_chain);
-#endif
-#ifdef TEST_ADD_EACH_LOCKS_ROW_MANY_TIMES
- } /* for()-loop */
-#endif
-
- ut_ad(i_s_locks_row_validate(dst_row));
- return(dst_row);
-}
-
-/*******************************************************************//**
-Adds new pair of locks to the lock waits cache.
-If memory can not be allocated then FALSE is returned.
-@return FALSE if allocation fails */
-static
-ibool
-add_lock_wait_to_cache(
-/*===================*/
- trx_i_s_cache_t* cache, /*!< in/out: cache */
- const i_s_locks_row_t* requested_lock_row,/*!< in: pointer to the
- relevant requested lock
- row in innodb_locks */
- const i_s_locks_row_t* blocking_lock_row)/*!< in: pointer to the
- relevant blocking lock
- row in innodb_locks */
-{
- i_s_lock_waits_row_t* dst_row;
-
- dst_row = (i_s_lock_waits_row_t*)
- table_cache_create_empty_row(&cache->innodb_lock_waits,
- cache);
-
- /* memory could not be allocated */
- if (dst_row == NULL) {
-
- return(FALSE);
- }
-
- fill_lock_waits_row(dst_row, requested_lock_row, blocking_lock_row);
-
- return(TRUE);
-}
-
-/*******************************************************************//**
-Adds transaction's relevant (important) locks to cache.
-If the transaction is waiting, then the wait lock is added to
-innodb_locks and a pointer to the added row is returned in
-requested_lock_row, otherwise requested_lock_row is set to NULL.
-If rows can not be allocated then FALSE is returned and the value of
-requested_lock_row is undefined.
-@return FALSE if allocation fails */
-static
-ibool
-add_trx_relevant_locks_to_cache(
-/*============================*/
- trx_i_s_cache_t* cache, /*!< in/out: cache */
- const trx_t* trx, /*!< in: transaction */
- i_s_locks_row_t** requested_lock_row)/*!< out: pointer to the
- requested lock row, or NULL or
- undefined */
-{
- ut_ad(lock_mutex_own());
-
- /* If transaction is waiting we add the wait lock and all locks
- from another transactions that are blocking the wait lock. */
- if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
- const lock_t* curr_lock;
- ulint wait_lock_heap_no;
- i_s_locks_row_t* blocking_lock_row;
- lock_queue_iterator_t iter;
-
- ut_a(trx->lock.wait_lock != NULL);
-
- wait_lock_heap_no
- = wait_lock_get_heap_no(trx->lock.wait_lock);
-
- /* add the requested lock */
- *requested_lock_row
- = add_lock_to_cache(cache, trx->lock.wait_lock,
- wait_lock_heap_no);
-
- /* memory could not be allocated */
- if (*requested_lock_row == NULL) {
-
- return(FALSE);
- }
-
- /* then iterate over the locks before the wait lock and
- add the ones that are blocking it */
-
- lock_queue_iterator_reset(&iter, trx->lock.wait_lock,
- ULINT_UNDEFINED);
-
- for (curr_lock = lock_queue_iterator_get_prev(&iter);
- curr_lock != NULL;
- curr_lock = lock_queue_iterator_get_prev(&iter)) {
-
- if (lock_has_to_wait(trx->lock.wait_lock,
- curr_lock)) {
-
- /* add the lock that is
- blocking trx->lock.wait_lock */
- blocking_lock_row
- = add_lock_to_cache(
- cache, curr_lock,
- /* heap_no is the same
- for the wait and waited
- locks */
- wait_lock_heap_no);
-
- /* memory could not be allocated */
- if (blocking_lock_row == NULL) {
-
- return(FALSE);
- }
-
- /* add the relation between both locks
- to innodb_lock_waits */
- if (!add_lock_wait_to_cache(
- cache, *requested_lock_row,
- blocking_lock_row)) {
-
- /* memory could not be allocated */
- return(FALSE);
- }
- }
- }
- } else {
-
- *requested_lock_row = NULL;
- }
-
- return(TRUE);
-}
-
-/** The minimum time that a cache must not be updated after it has been
-read for the last time; measured in microseconds. We use this technique
-to ensure that SELECTs which join several INFORMATION SCHEMA tables read
-the same version of the cache. */
-#define CACHE_MIN_IDLE_TIME_US 100000 /* 0.1 sec */
-
-/*******************************************************************//**
-Checks if the cache can safely be updated.
-@return TRUE if can be updated */
-static
-ibool
-can_cache_be_updated(
-/*=================*/
- trx_i_s_cache_t* cache) /*!< in: cache */
-{
- ullint now;
-
- /* Here we read cache->last_read without acquiring its mutex
- because last_read is only updated when a shared rw lock on the
- whole cache is being held (see trx_i_s_cache_end_read()) and
- we are currently holding an exclusive rw lock on the cache.
- So it is not possible for last_read to be updated while we are
- reading it. */
-
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
-
- now = ut_time_us(NULL);
- if (now - cache->last_read > CACHE_MIN_IDLE_TIME_US) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Declare a cache empty, preparing it to be filled up. Not all resources
-are freed because they can be reused. */
-static
-void
-trx_i_s_cache_clear(
-/*================*/
- trx_i_s_cache_t* cache) /*!< out: cache to clear */
-{
- cache->innodb_trx.rows_used = 0;
- cache->innodb_locks.rows_used = 0;
- cache->innodb_lock_waits.rows_used = 0;
-
- hash_table_clear(cache->locks_hash);
-
- ha_storage_empty(&cache->storage);
-}
-
-/*******************************************************************//**
-Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
-table cache buffer. Cache must be locked for write. */
-static
-void
-fetch_data_into_cache_low(
-/*======================*/
- trx_i_s_cache_t* cache, /*!< in/out: cache */
- ibool only_ac_nl, /*!< in: only select non-locking
- autocommit transactions */
- trx_list_t* trx_list) /*!< in: trx list */
-{
- const trx_t* trx;
-
- ut_ad(trx_list == &trx_sys->rw_trx_list
- || trx_list == &trx_sys->ro_trx_list
- || trx_list == &trx_sys->mysql_trx_list);
-
- ut_ad(only_ac_nl == (trx_list == &trx_sys->mysql_trx_list));
-
- /* Iterate over the transaction list and add each one
- to innodb_trx's cache. We also add all locks that are relevant
- to each transaction into innodb_locks' and innodb_lock_waits'
- caches. */
-
- for (trx = UT_LIST_GET_FIRST(*trx_list);
- trx != NULL;
- trx =
- (trx_list == &trx_sys->mysql_trx_list
- ? UT_LIST_GET_NEXT(mysql_trx_list, trx)
- : UT_LIST_GET_NEXT(trx_list, trx))) {
-
- i_s_trx_row_t* trx_row;
- i_s_locks_row_t* requested_lock_row;
-
- if (trx->state == TRX_STATE_NOT_STARTED
- || (only_ac_nl && !trx_is_autocommit_non_locking(trx))) {
-
- continue;
- }
-
- assert_trx_nonlocking_or_in_list(trx);
-
- ut_ad(trx->in_ro_trx_list
- == (trx_list == &trx_sys->ro_trx_list));
-
- ut_ad(trx->in_rw_trx_list
- == (trx_list == &trx_sys->rw_trx_list));
-
- if (!add_trx_relevant_locks_to_cache(cache, trx,
- &requested_lock_row)) {
-
- cache->is_truncated = TRUE;
- return;
- }
-
- trx_row = (i_s_trx_row_t*)
- table_cache_create_empty_row(&cache->innodb_trx,
- cache);
-
- /* memory could not be allocated */
- if (trx_row == NULL) {
-
- cache->is_truncated = TRUE;
- return;
- }
-
- if (!fill_trx_row(trx_row, trx, requested_lock_row, cache)) {
-
- /* memory could not be allocated */
- cache->innodb_trx.rows_used--;
- cache->is_truncated = TRUE;
- return;
- }
- }
-}
-
-/*******************************************************************//**
-Fetches the data needed to fill the 3 INFORMATION SCHEMA tables into the
-table cache buffer. Cache must be locked for write. */
-static
-void
-fetch_data_into_cache(
-/*==================*/
- trx_i_s_cache_t* cache) /*!< in/out: cache */
-{
- ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
-
- trx_i_s_cache_clear(cache);
-
- fetch_data_into_cache_low(cache, FALSE, &trx_sys->rw_trx_list);
- fetch_data_into_cache_low(cache, FALSE, &trx_sys->ro_trx_list);
-
- /* Only select autocommit non-locking selects because they can
- only be on the MySQL transaction list (TRUE). */
- fetch_data_into_cache_low(cache, TRUE, &trx_sys->mysql_trx_list);
-
- cache->is_truncated = FALSE;
-}
-
-/*******************************************************************//**
-Update the transactions cache if it has not been read for some time.
-Called from handler/i_s.cc.
-@return 0 - fetched, 1 - not */
-UNIV_INTERN
-int
-trx_i_s_possibly_fetch_data_into_cache(
-/*===================================*/
- trx_i_s_cache_t* cache) /*!< in/out: cache */
-{
- if (!can_cache_be_updated(cache)) {
-
- return(1);
- }
-
- /* We need to read trx_sys and record/table lock queues */
-
- lock_mutex_enter();
-
- mutex_enter(&trx_sys->mutex);
-
- fetch_data_into_cache(cache);
-
- mutex_exit(&trx_sys->mutex);
-
- lock_mutex_exit();
-
- return(0);
-}
-
-/*******************************************************************//**
-Returns TRUE if the data in the cache is truncated due to the memory
-limit posed by TRX_I_S_MEM_LIMIT.
-@return TRUE if truncated */
-UNIV_INTERN
-ibool
-trx_i_s_cache_is_truncated(
-/*=======================*/
- trx_i_s_cache_t* cache) /*!< in: cache */
-{
- return(cache->is_truncated);
-}
-
-/*******************************************************************//**
-Initialize INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_init(
-/*===============*/
- trx_i_s_cache_t* cache) /*!< out: cache to init */
-{
- /* The latching is done in the following order:
- acquire trx_i_s_cache_t::rw_lock, X
- acquire lock mutex
- release lock mutex
- release trx_i_s_cache_t::rw_lock
- acquire trx_i_s_cache_t::rw_lock, S
- acquire trx_i_s_cache_t::last_read_mutex
- release trx_i_s_cache_t::last_read_mutex
- release trx_i_s_cache_t::rw_lock */
-
- rw_lock_create(trx_i_s_cache_lock_key, &cache->rw_lock,
- SYNC_TRX_I_S_RWLOCK);
-
- cache->last_read = 0;
-
- mutex_create(cache_last_read_mutex_key,
- &cache->last_read_mutex, SYNC_TRX_I_S_LAST_READ);
-
- table_cache_init(&cache->innodb_trx, sizeof(i_s_trx_row_t));
- table_cache_init(&cache->innodb_locks, sizeof(i_s_locks_row_t));
- table_cache_init(&cache->innodb_lock_waits,
- sizeof(i_s_lock_waits_row_t));
-
- cache->locks_hash = hash_create(LOCKS_HASH_CELLS_NUM);
-
- cache->storage = ha_storage_create(CACHE_STORAGE_INITIAL_SIZE,
- CACHE_STORAGE_HASH_CELLS);
-
- cache->mem_allocd = 0;
-
- cache->is_truncated = FALSE;
-}
-
-/*******************************************************************//**
-Free the INFORMATION SCHEMA trx related cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_free(
-/*===============*/
- trx_i_s_cache_t* cache) /*!< in, own: cache to free */
-{
- rw_lock_free(&cache->rw_lock);
- mutex_free(&cache->last_read_mutex);
- hash_table_free(cache->locks_hash);
- ha_storage_free(cache->storage);
- table_cache_free(&cache->innodb_trx);
- table_cache_free(&cache->innodb_locks);
- table_cache_free(&cache->innodb_lock_waits);
- memset(cache, 0, sizeof *cache);
-}
-
-/*******************************************************************//**
-Issue a shared/read lock on the tables cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_start_read(
-/*=====================*/
- trx_i_s_cache_t* cache) /*!< in: cache */
-{
- rw_lock_s_lock(&cache->rw_lock);
-}
-
-/*******************************************************************//**
-Release a shared/read lock on the tables cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_end_read(
-/*===================*/
- trx_i_s_cache_t* cache) /*!< in: cache */
-{
- ullint now;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED));
-#endif
-
- /* update cache last read time */
- now = ut_time_us(NULL);
- mutex_enter(&cache->last_read_mutex);
- cache->last_read = now;
- mutex_exit(&cache->last_read_mutex);
-
- rw_lock_s_unlock(&cache->rw_lock);
-}
-
-/*******************************************************************//**
-Issue an exclusive/write lock on the tables cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_start_write(
-/*======================*/
- trx_i_s_cache_t* cache) /*!< in: cache */
-{
- rw_lock_x_lock(&cache->rw_lock);
-}
-
-/*******************************************************************//**
-Release an exclusive/write lock on the tables cache. */
-UNIV_INTERN
-void
-trx_i_s_cache_end_write(
-/*====================*/
- trx_i_s_cache_t* cache) /*!< in: cache */
-{
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
-
- rw_lock_x_unlock(&cache->rw_lock);
-}
-
-/*******************************************************************//**
-Selects a INFORMATION SCHEMA table cache from the whole cache.
-@return table cache */
-static
-i_s_table_cache_t*
-cache_select_table(
-/*===============*/
- trx_i_s_cache_t* cache, /*!< in: whole cache */
- enum i_s_table table) /*!< in: which table */
-{
- i_s_table_cache_t* table_cache;
-
-#ifdef UNIV_SYNC_DEBUG
- ut_a(rw_lock_own(&cache->rw_lock, RW_LOCK_SHARED)
- || rw_lock_own(&cache->rw_lock, RW_LOCK_EX));
-#endif
-
- switch (table) {
- case I_S_INNODB_TRX:
- table_cache = &cache->innodb_trx;
- break;
- case I_S_INNODB_LOCKS:
- table_cache = &cache->innodb_locks;
- break;
- case I_S_INNODB_LOCK_WAITS:
- table_cache = &cache->innodb_lock_waits;
- break;
- default:
- ut_error;
- }
-
- return(table_cache);
-}
-
-/*******************************************************************//**
-Retrieves the number of used rows in the cache for a given
-INFORMATION SCHEMA table.
-@return number of rows */
-UNIV_INTERN
-ulint
-trx_i_s_cache_get_rows_used(
-/*========================*/
- trx_i_s_cache_t* cache, /*!< in: cache */
- enum i_s_table table) /*!< in: which table */
-{
- i_s_table_cache_t* table_cache;
-
- table_cache = cache_select_table(cache, table);
-
- return(table_cache->rows_used);
-}
-
-/*******************************************************************//**
-Retrieves the nth row (zero-based) in the cache for a given
-INFORMATION SCHEMA table.
-@return row */
-UNIV_INTERN
-void*
-trx_i_s_cache_get_nth_row(
-/*======================*/
- trx_i_s_cache_t* cache, /*!< in: cache */
- enum i_s_table table, /*!< in: which table */
- ulint n) /*!< in: row number */
-{
- i_s_table_cache_t* table_cache;
- ulint i;
- void* row;
-
- table_cache = cache_select_table(cache, table);
-
- ut_a(n < table_cache->rows_used);
-
- row = NULL;
-
- for (i = 0; i < MEM_CHUNKS_IN_TABLE_CACHE; i++) {
-
- if (table_cache->chunks[i].offset
- + table_cache->chunks[i].rows_allocd > n) {
-
- row = (char*) table_cache->chunks[i].base
- + (n - table_cache->chunks[i].offset)
- * table_cache->row_size;
- break;
- }
- }
-
- ut_a(row != NULL);
-
- return(row);
-}
-
-/*******************************************************************//**
-Crafts a lock id string from a i_s_locks_row_t object. Returns its
-second argument. This function aborts if there is not enough space in
-lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you
-want to be 100% sure that it will not abort.
-@return resulting lock id */
-UNIV_INTERN
-char*
-trx_i_s_create_lock_id(
-/*===================*/
- const i_s_locks_row_t* row, /*!< in: innodb_locks row */
- char* lock_id,/*!< out: resulting lock_id */
- ulint lock_id_size)/*!< in: size of the lock id
- buffer */
-{
- int res_len;
-
- /* please adjust TRX_I_S_LOCK_ID_MAX_LEN if you change this */
-
- if (row->lock_space != ULINT_UNDEFINED) {
- /* record lock */
- res_len = ut_snprintf(lock_id, lock_id_size,
- TRX_ID_FMT ":%lu:%lu:%lu",
- row->lock_trx_id, row->lock_space,
- row->lock_page, row->lock_rec);
- } else {
- /* table lock */
- res_len = ut_snprintf(lock_id, lock_id_size,
- TRX_ID_FMT ":" UINT64PF,
- row->lock_trx_id,
- row->lock_table_id);
- }
-
- /* the typecast is safe because snprintf(3) never returns
- negative result */
- ut_a(res_len >= 0);
- ut_a((ulint) res_len < lock_id_size);
-
- return(lock_id);
-}
-
-UNIV_INTERN
-void
-trx_i_s_get_lock_sys_memory_usage(ulint *constant, ulint *variable)
-{
- trx_t* trx;
-
- *constant = lock_sys->rec_hash->n_cells * sizeof(hash_cell_t);
- *variable = 0;
-
- if (trx_sys) {
- mutex_enter(&trx_sys->mutex);
- trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
- while (trx) {
- *variable += ((trx->lock.lock_heap) ? mem_heap_get_size(trx->lock.lock_heap) : 0);
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
- }
- mutex_exit(&trx_sys->mutex);
- }
-
-}
diff --git a/storage/xtradb/trx/trx0purge.cc b/storage/xtradb/trx/trx0purge.cc
deleted file mode 100644
index df4a3217820..00000000000
--- a/storage/xtradb/trx/trx0purge.cc
+++ /dev/null
@@ -1,1409 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0purge.cc
-Purge old versions
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0purge.h"
-
-#ifdef UNIV_NONINL
-#include "trx0purge.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0roll.h"
-#include "read0read.h"
-#include "fut0fut.h"
-#include "que0que.h"
-#include "row0purge.h"
-#include "row0upd.h"
-#include "trx0rec.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "os0thread.h"
-#include "srv0mon.h"
-#include "mtr0log.h"
-
-/** Maximum allowable purge history length. <=0 means 'infinite'. */
-UNIV_INTERN ulong srv_max_purge_lag = 0;
-
-/** Max DML user threads delay in micro-seconds. */
-UNIV_INTERN ulong srv_max_purge_lag_delay = 0;
-
-/** The global data structure coordinating a purge */
-UNIV_INTERN trx_purge_t* purge_sys = NULL;
-
-/** A dummy undo record used as a return value when we have a whole undo log
-which needs no purge */
-UNIV_INTERN trx_undo_rec_t trx_purge_dummy_rec;
-
-#ifdef UNIV_PFS_RWLOCK
-/* Key to register trx_purge_latch with performance schema */
-UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register purge_sys_bh_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN my_bool srv_purge_view_update_only_debug;
-#endif /* UNIV_DEBUG */
-
-/****************************************************************//**
-Builds a purge 'query' graph. The actual purge is performed by executing
-this query graph.
-@return own: the query graph */
-static
-que_t*
-trx_purge_graph_build(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- ulint n_purge_threads) /*!< in: number of purge
- threads */
-{
- ulint i;
- mem_heap_t* heap;
- que_fork_t* fork;
-
- heap = mem_heap_create(512);
- fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap);
- fork->trx = trx;
-
- for (i = 0; i < n_purge_threads; ++i) {
- que_thr_t* thr;
-
- thr = que_thr_create(fork, heap);
-
- thr->child = row_purge_node_create(thr, heap);
- }
-
- return(fork);
-}
-
-/********************************************************************//**
-Creates the global purge system control structure and inits the history
-mutex. */
-UNIV_INTERN
-void
-trx_purge_sys_create(
-/*=================*/
- ulint n_purge_threads, /*!< in: number of purge
- threads */
- ib_bh_t* ib_bh) /*!< in, own: UNDO log min
- binary heap */
-{
- purge_sys = static_cast<trx_purge_t*>(mem_zalloc(sizeof(*purge_sys)));
-
- purge_sys->state = PURGE_STATE_INIT;
- purge_sys->event = os_event_create();
-
- /* Take ownership of ib_bh, we are responsible for freeing it. */
- purge_sys->ib_bh = ib_bh;
-
- rw_lock_create(trx_purge_latch_key,
- &purge_sys->latch, SYNC_PURGE_LATCH);
-
- mutex_create(
- purge_sys_bh_mutex_key, &purge_sys->bh_mutex,
- SYNC_PURGE_QUEUE);
-
- purge_sys->heap = mem_heap_create(256);
-
- ut_a(n_purge_threads > 0);
-
- purge_sys->sess = sess_open();
-
- purge_sys->trx = purge_sys->sess->trx;
-
- ut_a(purge_sys->trx->sess == purge_sys->sess);
-
- /* A purge transaction is not a real transaction, we use a transaction
- here only because the query threads code requires it. It is otherwise
- quite unnecessary. We should get rid of it eventually. */
- purge_sys->trx->id = 0;
- purge_sys->trx->start_time = ut_time();
- purge_sys->trx->state = TRX_STATE_ACTIVE;
- purge_sys->trx->op_info = "purge trx";
-
- purge_sys->query = trx_purge_graph_build(
- purge_sys->trx, n_purge_threads);
-
- purge_sys->view = read_view_purge_open(purge_sys->prebuilt_clone,
- purge_sys->prebuilt_view);
-}
-
-/************************************************************************
-Frees the global purge system control structure. */
-UNIV_INTERN
-void
-trx_purge_sys_close(void)
-/*======================*/
-{
- que_graph_free(purge_sys->query);
-
- ut_a(purge_sys->trx->id == 0);
- ut_a(purge_sys->sess->trx == purge_sys->trx);
-
- purge_sys->trx->state = TRX_STATE_NOT_STARTED;
-
- sess_close(purge_sys->sess);
-
- read_view_free(purge_sys->prebuilt_view);
- read_view_free(purge_sys->prebuilt_clone);
-
- rw_lock_free(&purge_sys->latch);
- mutex_free(&purge_sys->bh_mutex);
-
- mem_heap_free(purge_sys->heap);
-
- ib_bh_free(purge_sys->ib_bh);
-
- os_event_free(purge_sys->event);
- mem_free(purge_sys);
-
- purge_sys = NULL;
-}
-
-/*================ UNDO LOG HISTORY LIST =============================*/
-
-/********************************************************************//**
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
-UNIV_INTERN
-void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
- trx_t* trx, /*!< in: transaction */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_undo_t* undo;
- trx_rseg_t* rseg;
- trx_rsegf_t* rseg_header;
- trx_ulogf_t* undo_header;
-
- undo = trx->update_undo;
- rseg = undo->rseg;
-
- rseg_header = trx_rsegf_get(
- undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no,
- mtr);
-
- undo_header = undo_page + undo->hdr_offset;
-
- if (undo->state != TRX_UNDO_CACHED) {
- ulint hist_size;
-#ifdef UNIV_DEBUG
- trx_usegf_t* seg_header = undo_page + TRX_UNDO_SEG_HDR;
-#endif /* UNIV_DEBUG */
-
- /* The undo log segment will not be reused */
-
- if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- ut_error;
- }
-
- trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
-
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
-
- hist_size = mtr_read_ulint(
- rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
-
- ut_ad(undo->size == flst_get_len(
- seg_header + TRX_UNDO_PAGE_LIST, mtr));
-
- mlog_write_ulint(
- rseg_header + TRX_RSEG_HISTORY_SIZE,
- hist_size + undo->size, MLOG_4BYTES, mtr);
- }
-
- /* Before any transaction-generating background threads or the
- purge have been started, recv_recovery_rollback_active() can
- start transactions in row_merge_drop_temp_indexes() and
- fts_drop_orphaned_tables(), and roll back recovered transactions.
- After the purge thread has been given permission to exit,
- in fast shutdown, we may roll back transactions (trx->undo_no==0)
- in THD::cleanup() invoked from unlink_thd(). */
- ut_ad(srv_undo_sources
- || ((srv_startup_is_before_trx_rollback_phase
- || trx_rollback_or_clean_is_active)
- && purge_sys->state == PURGE_STATE_INIT)
- || (trx->undo_no == 0 && srv_fast_shutdown));
-
- /* Add the log as the first in the history list */
- flst_add_first(rseg_header + TRX_RSEG_HISTORY,
- undo_header + TRX_UNDO_HISTORY_NODE, mtr);
-
-#ifdef HAVE_ATOMIC_BUILTINS
- os_atomic_increment_ulint(&trx_sys->rseg_history_len, 1);
-#else
- mutex_enter(&trx_sys->mutex);
- ++trx_sys->rseg_history_len;
- mutex_exit(&trx_sys->mutex);
-#endif /* HAVE_ATOMIC_BUILTINS */
-
- srv_wake_purge_thread_if_not_active();
-
- /* Write the trx number to the undo log header */
- mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
-
- /* Write information about delete markings to the undo log header */
-
- if (!undo->del_marks) {
- mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, mtr);
- }
-
- if (rseg->last_page_no == FIL_NULL) {
- rseg->last_page_no = undo->hdr_page_no;
- rseg->last_offset = undo->hdr_offset;
- rseg->last_trx_no = trx->no;
- rseg->last_del_marks = undo->del_marks;
- }
-}
-
-/** Remove undo log header from the history list.
-@param[in,out] rseg_hdr rollback segment header
-@param[in] log_hdr undo log segment header
-@param[in,out] mtr mini transaction. */
-static
-void
-trx_purge_remove_log_hdr(
- trx_rsegf_t* rseg_hdr,
- trx_ulogf_t* log_hdr,
- mtr_t* mtr)
-{
- flst_remove(rseg_hdr + TRX_RSEG_HISTORY,
- log_hdr + TRX_UNDO_HISTORY_NODE, mtr);
-
- os_atomic_decrement_ulint(&trx_sys->rseg_history_len, 1);
-}
-
-/** Frees an undo log segment which is in the history list. Removes the
-undo log hdr from the history list.
-@param[in,out] rseg rollback segment
-@param[in] hdr_addr file address of log_hdr
-@param[in] noredo skip redo logging. */
-static
-void
-trx_purge_free_segment(
- trx_rseg_t* rseg,
- fil_addr_t hdr_addr)
-{
- mtr_t mtr;
- trx_rsegf_t* rseg_hdr;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- ulint seg_size;
- ulint hist_size;
- ibool marked = FALSE;
-
- /* fputs("Freeing an update undo log segment\n", stderr); */
-
- for (;;) {
- page_t* undo_page;
-
- mtr_start(&mtr);
-
- mutex_enter(&rseg->mutex);
-
- rseg_hdr = trx_rsegf_get(
- rseg->space, rseg->zip_size, rseg->page_no, &mtr);
-
- undo_page = trx_undo_page_get(
- rseg->space, rseg->zip_size, hdr_addr.page, &mtr);
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- log_hdr = undo_page + hdr_addr.boffset;
-
- /* Mark the last undo log totally purged, so that if the
- system crashes, the tail of the undo log will not get accessed
- again. The list of pages in the undo log tail gets inconsistent
- during the freeing of the segment, and therefore purge should
- not try to access them again. */
-
- if (!marked) {
- mlog_write_ulint(
- log_hdr + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, &mtr);
-
- marked = TRUE;
- }
-
- if (fseg_free_step_not_header(
- seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr)) {
-
- break;
- }
-
- mutex_exit(&rseg->mutex);
-
- mtr_commit(&mtr);
- }
-
- /* The page list may now be inconsistent, but the length field
- stored in the list base node tells us how big it was before we
- started the freeing. */
-
- seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr);
-
- /* We may free the undo log segment header page; it must be freed
- within the same mtr as the undo log header is removed from the
- history list: otherwise, in case of a database crash, the segment
- could become inaccessible garbage in the file space. */
-
- trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr);
-
- do {
-
- /* Here we assume that a file segment with just the header
- page can be freed in a few steps, so that the buffer pool
- is not flooded with bufferfixed pages: see the note in
- fsp0fsp.cc. */
-
- } while(!fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, &mtr));
-
- hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, &mtr);
- ut_ad(hist_size >= seg_size);
-
- mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE,
- hist_size - seg_size, MLOG_4BYTES, &mtr);
-
- ut_ad(rseg->curr_size >= seg_size);
-
- rseg->curr_size -= seg_size;
-
- mutex_exit(&(rseg->mutex));
-
- mtr_commit(&mtr);
-}
-
-/********************************************************************//**
-Removes unnecessary history data from a rollback segment. */
-static
-void
-trx_purge_truncate_rseg_history(
-/*============================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- const purge_iter_t* limit) /*!< in: truncate offset */
-{
- fil_addr_t hdr_addr;
- fil_addr_t prev_hdr_addr;
- trx_rsegf_t* rseg_hdr;
- page_t* undo_page;
- trx_ulogf_t* log_hdr;
- trx_usegf_t* seg_hdr;
- mtr_t mtr;
- trx_id_t undo_trx_no;
-
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
-
- hdr_addr = trx_purge_get_log_from_hist(
- flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr));
-loop:
- if (hdr_addr.page == FIL_NULL) {
-
- mutex_exit(&(rseg->mutex));
-
- mtr_commit(&mtr);
-
- return;
- }
-
- undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- hdr_addr.page, &mtr);
-
- log_hdr = undo_page + hdr_addr.boffset;
-
- undo_trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
-
- if (undo_trx_no >= limit->trx_no) {
-
- if (undo_trx_no == limit->trx_no) {
-
- trx_undo_truncate_start(
- rseg, rseg->space, hdr_addr.page,
- hdr_addr.boffset, limit->undo_no);
- }
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- return;
- }
-
- prev_hdr_addr = trx_purge_get_log_from_hist(
- flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE)
- && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) {
-
- /* We can free the whole log segment */
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- /* calls the trx_purge_remove_log_hdr()
- inside trx_purge_free_segment(). */
- trx_purge_free_segment(rseg, hdr_addr);
-
- } else {
- /* Remove the log hdr from the rseg history. */
- trx_purge_remove_log_hdr(rseg_hdr, log_hdr, &mtr);
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
- }
-
- mtr_start(&mtr);
- mutex_enter(&(rseg->mutex));
-
- rseg_hdr = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, &mtr);
-
- hdr_addr = prev_hdr_addr;
-
- goto loop;
-}
-
-/********************************************************************//**
-Removes unnecessary history data from rollback segments. NOTE that when this
-function is called, the caller must not have any latches on undo log pages! */
-static
-void
-trx_purge_truncate_history(
-/*========================*/
- purge_iter_t* limit, /*!< in: truncate limit */
- const read_view_t* view) /*!< in: purge view */
-{
- ulint i;
-
- /* We play safe and set the truncate limit at most to the purge view
- low_limit number, though this is not necessary */
-
- if (limit->trx_no >= view->low_limit_no) {
- limit->trx_no = view->low_limit_no;
- limit->undo_no = 0;
- }
-
- ut_ad(limit->trx_no <= purge_sys->view->low_limit_no);
-
- for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- trx_rseg_t* rseg = trx_sys->rseg_array[i];
-
- if (rseg != NULL) {
- ut_a(rseg->id == i);
- trx_purge_truncate_rseg_history(rseg, limit);
- }
- }
-}
-
-/***********************************************************************//**
-Updates the last not yet purged history log info in rseg when we have purged
-a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
-static
-void
-trx_purge_rseg_get_next_history_log(
-/*================================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ulint* n_pages_handled)/*!< in/out: number of UNDO pages
- handled */
-{
- const void* ptr;
- page_t* undo_page;
- trx_ulogf_t* log_hdr;
- fil_addr_t prev_log_addr;
- trx_id_t trx_no;
- ibool del_marks;
- mtr_t mtr;
- rseg_queue_t rseg_queue;
-
- mutex_enter(&(rseg->mutex));
-
- ut_a(rseg->last_page_no != FIL_NULL);
-
- purge_sys->iter.trx_no = rseg->last_trx_no + 1;
- purge_sys->iter.undo_no = 0;
- purge_sys->next_stored = FALSE;
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(
- rseg->space, rseg->zip_size, rseg->last_page_no, &mtr);
-
- log_hdr = undo_page + rseg->last_offset;
-
- /* Increase the purge page count by one for every handled log */
-
- (*n_pages_handled)++;
-
- prev_log_addr = trx_purge_get_log_from_hist(
- flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
-
- if (prev_log_addr.page == FIL_NULL) {
- /* No logs left in the history list */
-
- rseg->last_page_no = FIL_NULL;
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- mutex_enter(&trx_sys->mutex);
-
- /* Add debug code to track history list corruption reported
- on the MySQL mailing list on Nov 9, 2004. The fut0lst.cc
- file-based list was corrupt. The prev node pointer was
- FIL_NULL, even though the list length was over 8 million nodes!
- We assume that purge truncates the history list in large
- size pieces, and if we here reach the head of the list, the
- list cannot be longer than 2000 000 undo logs now. */
-
- if (trx_sys->rseg_history_len > 2000000) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: purge reached the"
- " head of the history list,\n"
- "InnoDB: but its length is still"
- " reported as %lu! Make a detailed bug\n"
- "InnoDB: report, and submit it"
- " to http://bugs.mysql.com\n",
- (ulong) trx_sys->rseg_history_len);
- ut_ad(0);
- }
-
- mutex_exit(&trx_sys->mutex);
-
- return;
- }
-
- mutex_exit(&rseg->mutex);
-
- mtr_commit(&mtr);
-
- /* Read the trx number and del marks from the previous log header */
- mtr_start(&mtr);
-
- log_hdr = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- prev_log_addr.page, &mtr)
- + prev_log_addr.boffset;
-
- trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO);
-
- del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS);
-
- mtr_commit(&mtr);
-
- mutex_enter(&(rseg->mutex));
-
- rseg->last_page_no = prev_log_addr.page;
- rseg->last_offset = prev_log_addr.boffset;
- rseg->last_trx_no = trx_no;
- rseg->last_del_marks = del_marks;
-
- rseg_queue.rseg = rseg;
- rseg_queue.trx_no = rseg->last_trx_no;
-
- /* Purge can also produce events, however these are already ordered
- in the rollback segment and any user generated event will be greater
- than the events that Purge produces. ie. Purge can never produce
- events from an empty rollback segment. */
-
- mutex_enter(&purge_sys->bh_mutex);
-
- ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
- ut_a(ptr != NULL);
-
- mutex_exit(&purge_sys->bh_mutex);
-
- mutex_exit(&rseg->mutex);
-}
-
-/***********************************************************************//**
-Chooses the rollback segment with the smallest trx_id.
-@return zip_size if log is for a compressed table, ULINT_UNDEFINED if
- no rollback segments to purge, 0 for non compressed tables. */
-static
-ulint
-trx_purge_get_rseg_with_min_trx_id(
-/*===============================*/
- trx_purge_t* purge_sys) /*!< in/out: purge instance */
-
-{
- ulint zip_size = 0;
-
- mutex_enter(&purge_sys->bh_mutex);
-
- /* Only purge consumes events from the binary heap, user
- threads only produce the events. */
-
- if (!ib_bh_is_empty(purge_sys->ib_bh)) {
- trx_rseg_t* rseg;
-
- rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg;
- ib_bh_pop(purge_sys->ib_bh);
-
- mutex_exit(&purge_sys->bh_mutex);
-
- purge_sys->rseg = rseg;
- } else {
- mutex_exit(&purge_sys->bh_mutex);
-
- purge_sys->rseg = NULL;
-
- return(ULINT_UNDEFINED);
- }
-
- ut_a(purge_sys->rseg != NULL);
-
- mutex_enter(&purge_sys->rseg->mutex);
-
- ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
-
- /* We assume in purge of externally stored fields that space id is
- in the range of UNDO tablespace space ids */
- ut_a(purge_sys->rseg->space == 0
- || srv_is_undo_tablespace(purge_sys->rseg->space));
-
- zip_size = purge_sys->rseg->zip_size;
-
- ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
-
- purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no;
- purge_sys->hdr_offset = purge_sys->rseg->last_offset;
- purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
-
- mutex_exit(&purge_sys->rseg->mutex);
-
- return(zip_size);
-}
-
-/***********************************************************************//**
-Position the purge sys "iterator" on the undo record to use for purging. */
-static
-void
-trx_purge_read_undo_rec(
-/*====================*/
- trx_purge_t* purge_sys, /*!< in/out: purge instance */
- ulint zip_size) /*!< in: block size or 0 */
-{
- ulint offset;
- ulint page_no;
- ib_uint64_t undo_no;
-
- purge_sys->hdr_offset = purge_sys->rseg->last_offset;
- page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
-
- if (purge_sys->rseg->last_del_marks) {
- mtr_t mtr;
- trx_undo_rec_t* undo_rec = NULL;
-
- mtr_start(&mtr);
-
- undo_rec = trx_undo_get_first_rec(
- purge_sys->rseg->space,
- zip_size,
- purge_sys->hdr_page_no,
- purge_sys->hdr_offset, RW_S_LATCH, &mtr);
-
- if (undo_rec != NULL) {
- offset = page_offset(undo_rec);
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
- page_no = page_get_page_no(page_align(undo_rec));
- } else {
- offset = 0;
- undo_no = 0;
- }
-
- mtr_commit(&mtr);
- } else {
- offset = 0;
- undo_no = 0;
- }
-
- purge_sys->offset = offset;
- purge_sys->page_no = page_no;
- purge_sys->iter.undo_no = undo_no;
-
- purge_sys->next_stored = TRUE;
-}
-
-/***********************************************************************//**
-Chooses the next undo log to purge and updates the info in purge_sys. This
-function is used to initialize purge_sys when the next record to purge is
-not known, and also to update the purge system info on the next record when
-purge has handled the whole undo log for a transaction. */
-static
-void
-trx_purge_choose_next_log(void)
-/*===========================*/
-{
- ulint zip_size;
-
- ut_ad(purge_sys->next_stored == FALSE);
-
- zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys);
-
- if (purge_sys->rseg != NULL) {
- trx_purge_read_undo_rec(purge_sys, zip_size);
- } else {
- /* There is nothing to do yet. */
- os_thread_yield();
- }
-}
-
-/***********************************************************************//**
-Gets the next record to purge and updates the info in the purge system.
-@return copy of an undo log record or pointer to the dummy undo log record */
-static
-trx_undo_rec_t*
-trx_purge_get_next_rec(
-/*===================*/
- ulint* n_pages_handled,/*!< in/out: number of UNDO pages
- handled */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- trx_undo_rec_t* rec;
- trx_undo_rec_t* rec_copy;
- trx_undo_rec_t* rec2;
- page_t* undo_page;
- page_t* page;
- ulint offset;
- ulint page_no;
- ulint space;
- ulint zip_size;
- mtr_t mtr;
-
- ut_ad(purge_sys->next_stored);
- ut_ad(purge_sys->iter.trx_no < purge_sys->view->low_limit_no);
-
- space = purge_sys->rseg->space;
- zip_size = purge_sys->rseg->zip_size;
- page_no = purge_sys->page_no;
- offset = purge_sys->offset;
-
- if (offset == 0) {
- /* It is the dummy undo log record, which means that there is
- no need to purge this undo log */
-
- trx_purge_rseg_get_next_history_log(
- purge_sys->rseg, n_pages_handled);
-
- /* Look for the next undo log and record to purge */
-
- trx_purge_choose_next_log();
-
- return(&trx_purge_dummy_rec);
- }
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr);
-
- rec = undo_page + offset;
-
- rec2 = rec;
-
- for (;;) {
- ulint type;
- trx_undo_rec_t* next_rec;
- ulint cmpl_info;
-
- /* Try first to find the next record which requires a purge
- operation from the same page of the same undo log */
-
- next_rec = trx_undo_page_get_next_rec(
- rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset);
-
- if (next_rec == NULL) {
- rec2 = trx_undo_get_next_rec(
- rec2, purge_sys->hdr_page_no,
- purge_sys->hdr_offset, &mtr);
- break;
- }
-
- rec2 = next_rec;
-
- type = trx_undo_rec_get_type(rec2);
-
- if (type == TRX_UNDO_DEL_MARK_REC) {
-
- break;
- }
-
- cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
-
- if (trx_undo_rec_get_extern_storage(rec2)) {
- break;
- }
-
- if ((type == TRX_UNDO_UPD_EXIST_REC)
- && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- break;
- }
- }
-
- if (rec2 == NULL) {
- mtr_commit(&mtr);
-
- trx_purge_rseg_get_next_history_log(
- purge_sys->rseg, n_pages_handled);
-
- /* Look for the next undo log and record to purge */
-
- trx_purge_choose_next_log();
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(
- space, zip_size, page_no, &mtr);
-
- rec = undo_page + offset;
- } else {
- page = page_align(rec2);
-
- purge_sys->offset = rec2 - page;
- purge_sys->page_no = page_get_page_no(page);
- purge_sys->iter.undo_no = trx_undo_rec_get_undo_no(rec2);
-
- if (undo_page != page) {
- /* We advance to a new page of the undo log: */
- (*n_pages_handled)++;
- }
- }
-
- rec_copy = trx_undo_rec_copy(rec, heap);
-
- mtr_commit(&mtr);
-
- return(rec_copy);
-}
-
-/********************************************************************//**
-Fetches the next undo log record from the history list to purge. It must be
-released with the corresponding release function.
-@return copy of an undo log record or pointer to trx_purge_dummy_rec,
-if the whole undo log can skipped in purge; NULL if none left */
-static MY_ATTRIBUTE((warn_unused_result, nonnull))
-trx_undo_rec_t*
-trx_purge_fetch_next_rec(
-/*=====================*/
- roll_ptr_t* roll_ptr, /*!< out: roll pointer to undo record */
- ulint* n_pages_handled,/*!< in/out: number of UNDO log pages
- handled */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- if (!purge_sys->next_stored) {
- trx_purge_choose_next_log();
-
- if (!purge_sys->next_stored) {
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Purge: No logs left in the"
- " history list\n");
- }
-
- return(NULL);
- }
- }
-
- if (purge_sys->iter.trx_no >= purge_sys->view->low_limit_no) {
-
- return(NULL);
- }
-
- /* fprintf(stderr, "Thread %lu purging trx %llu undo record %llu\n",
- os_thread_get_curr_id(), iter->trx_no, iter->undo_no); */
-
- *roll_ptr = trx_undo_build_roll_ptr(
- FALSE, purge_sys->rseg->id,
- purge_sys->page_no, purge_sys->offset);
-
- /* The following call will advance the stored values of the
- purge iterator. */
-
- return(trx_purge_get_next_rec(n_pages_handled, heap));
-}
-
-/*******************************************************************//**
-This function runs a purge batch.
-@return number of undo log pages handled in the batch */
-static
-ulint
-trx_purge_attach_undo_recs(
-/*=======================*/
- ulint n_purge_threads,/*!< in: number of purge threads */
- trx_purge_t* purge_sys, /*!< in/out: purge instance */
- purge_iter_t* limit, /*!< out: records read up to */
- ulint batch_size) /*!< in: no. of pages to purge */
-{
- que_thr_t* thr;
- ulint i = 0;
- ulint n_pages_handled = 0;
- ulint n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs);
-
- ut_a(n_purge_threads > 0);
-
- *limit = purge_sys->iter;
-
- /* Debug code to validate some pre-requisites and reset done flag. */
- for (thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
- thr != NULL && i < n_purge_threads;
- thr = UT_LIST_GET_NEXT(thrs, thr), ++i) {
-
- purge_node_t* node;
-
- /* Get the purge node. */
- node = (purge_node_t*) thr->child;
-
- ut_a(que_node_get_type(node) == QUE_NODE_PURGE);
- ut_a(node->undo_recs == NULL);
- ut_a(node->done);
-
- node->done = FALSE;
- }
-
- /* There should never be fewer nodes than threads, the inverse
- however is allowed because we only use purge threads as needed. */
- ut_a(i == n_purge_threads);
-
- /* Fetch and parse the UNDO records. The UNDO records are added
- to a per purge node vector. */
- thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
- ut_a(n_thrs > 0 && thr != NULL);
-
- ut_ad(trx_purge_check_limit());
-
- i = 0;
-
- for (;;) {
- purge_node_t* node;
- trx_purge_rec_t* purge_rec;
-
- ut_a(!thr->is_active);
-
- /* Get the purge node. */
- node = (purge_node_t*) thr->child;
- ut_a(que_node_get_type(node) == QUE_NODE_PURGE);
-
- purge_rec = static_cast<trx_purge_rec_t*>(
- mem_heap_zalloc(node->heap, sizeof(*purge_rec)));
-
- /* Track the max {trx_id, undo_no} for truncating the
- UNDO logs once we have purged the records. */
-
- if (purge_sys->iter.trx_no > limit->trx_no
- || (purge_sys->iter.trx_no == limit->trx_no
- && purge_sys->iter.undo_no >= limit->undo_no)) {
-
- *limit = purge_sys->iter;
- }
-
- /* Fetch the next record, and advance the purge_sys->iter. */
- purge_rec->undo_rec = trx_purge_fetch_next_rec(
- &purge_rec->roll_ptr, &n_pages_handled, node->heap);
-
- if (purge_rec->undo_rec != NULL) {
-
- if (node->undo_recs == NULL) {
- node->undo_recs = ib_vector_create(
- ib_heap_allocator_create(node->heap),
- sizeof(trx_purge_rec_t),
- batch_size);
- } else {
- ut_a(!ib_vector_is_empty(node->undo_recs));
- }
-
- ib_vector_push(node->undo_recs, purge_rec);
-
- if (n_pages_handled >= batch_size) {
-
- break;
- }
- } else {
- break;
- }
-
- thr = UT_LIST_GET_NEXT(thrs, thr);
-
- if (!(++i % n_purge_threads)) {
- thr = UT_LIST_GET_FIRST(purge_sys->query->thrs);
- }
-
- ut_a(thr != NULL);
- }
-
- ut_ad(trx_purge_check_limit());
-
- return(n_pages_handled);
-}
-
-/*******************************************************************//**
-Calculate the DML delay required.
-@return delay in microseconds or ULINT_MAX */
-static
-ulint
-trx_purge_dml_delay(void)
-/*=====================*/
-{
- /* Determine how much data manipulation language (DML) statements
- need to be delayed in order to reduce the lagging of the purge
- thread. */
- ulint delay = 0; /* in microseconds; default: no delay */
-
- /* If purge lag is set (ie. > 0) then calculate the new DML delay.
- Note: we do a dirty read of the trx_sys_t data structure here,
- without holding trx_sys->mutex. */
-
- if (srv_max_purge_lag > 0) {
- float ratio;
-
- ratio = float(trx_sys->rseg_history_len) / srv_max_purge_lag;
-
- if (ratio > 1.0) {
- /* If the history list length exceeds the
- srv_max_purge_lag, the data manipulation
- statements are delayed by at least 5000
- microseconds. */
- delay = (ulint) ((ratio - .5) * 10000);
- }
-
- if (delay > srv_max_purge_lag_delay) {
- delay = srv_max_purge_lag_delay;
- }
-
- MONITOR_SET(MONITOR_DML_PURGE_DELAY, delay);
- }
-
- return(delay);
-}
-
-/*******************************************************************//**
-Wait for pending purge jobs to complete. */
-static
-void
-trx_purge_wait_for_workers_to_complete(
-/*===================================*/
- trx_purge_t* purge_sys) /*!< in: purge instance */
-{
- ulint n_submitted = purge_sys->n_submitted;
-
-#ifdef HAVE_ATOMIC_BUILTINS
- /* Ensure that the work queue empties out. */
- while (!os_compare_and_swap_ulint(
- &purge_sys->n_completed, n_submitted, n_submitted)) {
-#else
- mutex_enter(&purge_sys->bh_mutex);
-
- while (purge_sys->n_completed < n_submitted) {
-#endif /* HAVE_ATOMIC_BUILTINS */
-
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_exit(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
- if (srv_get_task_queue_length() > 0) {
- srv_release_threads(SRV_WORKER, 1);
- }
-
- os_thread_yield();
-
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_enter(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
- }
-
-#ifndef HAVE_ATOMIC_BUILTINS
- mutex_exit(&purge_sys->bh_mutex);
-#endif /* !HAVE_ATOMIC_BUILTINS */
-
- /* None of the worker threads should be doing any work. */
- ut_a(purge_sys->n_submitted == purge_sys->n_completed);
-
- /* There should be no outstanding tasks as long
- as the worker threads are active. */
- ut_a(srv_get_task_queue_length() == 0);
-}
-
-/******************************************************************//**
-Remove old historical changes from the rollback segments. */
-static
-void
-trx_purge_truncate(void)
-/*====================*/
-{
- ut_ad(trx_purge_check_limit());
-
- if (purge_sys->limit.trx_no == 0) {
- trx_purge_truncate_history(&purge_sys->iter, purge_sys->view);
- } else {
- trx_purge_truncate_history(&purge_sys->limit, purge_sys->view);
- }
-}
-
-/*******************************************************************//**
-This function runs a purge batch.
-@return number of undo log pages handled in the batch */
-UNIV_INTERN
-ulint
-trx_purge(
-/*======*/
- ulint n_purge_threads, /*!< in: number of purge tasks
- to submit to the work queue */
- ulint batch_size, /*!< in: the maximum number of records
- to purge in one batch */
- bool truncate) /*!< in: truncate history if true */
-{
- que_thr_t* thr = NULL;
- ulint n_pages_handled;
-
- ut_a(n_purge_threads > 0);
-
- srv_dml_needed_delay = trx_purge_dml_delay();
-
- /* The number of tasks submitted should be completed. */
- ut_a(purge_sys->n_submitted == purge_sys->n_completed);
-
- rw_lock_x_lock(&purge_sys->latch);
-
- purge_sys->view = NULL;
-
- mem_heap_empty(purge_sys->heap);
-
- purge_sys->view = read_view_purge_open(purge_sys->prebuilt_clone,
- purge_sys->prebuilt_view);
-
- rw_lock_x_unlock(&purge_sys->latch);
-
-#ifdef UNIV_DEBUG
- if (srv_purge_view_update_only_debug) {
- return(0);
- }
-#endif
-
- /* Fetch the UNDO recs that need to be purged. */
- n_pages_handled = trx_purge_attach_undo_recs(
- n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
-
- /* Do we do an asynchronous purge or not ? */
- if (n_purge_threads > 1) {
- ulint i = 0;
-
- /* Submit the tasks to the work queue. */
- for (i = 0; i < n_purge_threads - 1; ++i) {
- thr = que_fork_scheduler_round_robin(
- purge_sys->query, thr);
-
- ut_a(thr != NULL);
-
- srv_que_task_enqueue_low(thr);
- }
-
- thr = que_fork_scheduler_round_robin(purge_sys->query, thr);
- ut_a(thr != NULL);
-
- purge_sys->n_submitted += n_purge_threads - 1;
-
- goto run_synchronously;
-
- /* Do it synchronously. */
- } else {
- thr = que_fork_scheduler_round_robin(purge_sys->query, NULL);
- ut_ad(thr);
-
-run_synchronously:
- ++purge_sys->n_submitted;
-
- que_run_threads(thr);
-
- os_atomic_inc_ulint(
- &purge_sys->bh_mutex, &purge_sys->n_completed, 1);
-
- if (n_purge_threads > 1) {
- trx_purge_wait_for_workers_to_complete(purge_sys);
- }
- }
-
- ut_a(purge_sys->n_submitted == purge_sys->n_completed);
-
-#ifdef UNIV_DEBUG
- rw_lock_x_lock(&purge_sys->latch);
- if (purge_sys->limit.trx_no == 0) {
- purge_sys->done = purge_sys->iter;
- } else {
- purge_sys->done = purge_sys->limit;
- }
- rw_lock_x_unlock(&purge_sys->latch);
-#endif /* UNIV_DEBUG */
-
- if (truncate) {
- trx_purge_truncate();
- }
-
- MONITOR_INC_VALUE(MONITOR_PURGE_INVOKED, 1);
- MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled);
-
- return(n_pages_handled);
-}
-
-/*******************************************************************//**
-Get the purge state.
-@return purge state. */
-UNIV_INTERN
-purge_state_t
-trx_purge_state(void)
-/*=================*/
-{
- purge_state_t state;
-
- rw_lock_x_lock(&purge_sys->latch);
-
- state = purge_sys->state;
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- return(state);
-}
-
-/*******************************************************************//**
-Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-UNIV_INTERN
-void
-trx_purge_stop(void)
-/*================*/
-{
- ut_a(srv_n_purge_threads > 0);
-
- rw_lock_x_lock(&purge_sys->latch);
-
- const ib_int64_t sig_count = os_event_reset(purge_sys->event);
- const purge_state_t state = purge_sys->state;
-
- ut_a(state == PURGE_STATE_RUN || state == PURGE_STATE_STOP);
-
- ++purge_sys->n_stop;
-
- if (state == PURGE_STATE_RUN) {
- ib_logf(IB_LOG_LEVEL_INFO, "Stopping purge");
-
- /* We need to wakeup the purge thread in case it is suspended,
- so that it can acknowledge the state change. */
-
- srv_purge_wakeup();
- }
-
- purge_sys->state = PURGE_STATE_STOP;
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- if (state != PURGE_STATE_STOP) {
-
- /* Wait for purge coordinator to signal that it
- is suspended. */
- os_event_wait_low(purge_sys->event, sig_count);
- } else {
- bool once = true;
-
- rw_lock_x_lock(&purge_sys->latch);
-
- /* Wait for purge to signal that it has actually stopped. */
- while (purge_sys->running) {
-
- if (once) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Waiting for purge to stop");
- once = false;
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- os_thread_sleep(10000);
-
- rw_lock_x_lock(&purge_sys->latch);
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
- }
-
- MONITOR_INC_VALUE(MONITOR_PURGE_STOP_COUNT, 1);
-}
-
-/*******************************************************************//**
-Resume purge, move to PURGE_STATE_RUN. */
-UNIV_INTERN
-void
-trx_purge_run(void)
-/*===============*/
-{
- rw_lock_x_lock(&purge_sys->latch);
-
- switch(purge_sys->state) {
- case PURGE_STATE_INIT:
- case PURGE_STATE_EXIT:
- case PURGE_STATE_DISABLED:
- ut_error;
-
- case PURGE_STATE_RUN:
- case PURGE_STATE_STOP:
- break;
- }
-
- if (purge_sys->n_stop > 0) {
-
- ut_a(purge_sys->state == PURGE_STATE_STOP);
-
- --purge_sys->n_stop;
-
- if (purge_sys->n_stop == 0) {
-
- ib_logf(IB_LOG_LEVEL_INFO, "Resuming purge");
-
- purge_sys->state = PURGE_STATE_RUN;
- }
-
- MONITOR_INC_VALUE(MONITOR_PURGE_RESUME_COUNT, 1);
- } else {
- ut_a(purge_sys->state == PURGE_STATE_RUN);
- }
-
- rw_lock_x_unlock(&purge_sys->latch);
-
- srv_purge_wakeup();
-}
diff --git a/storage/xtradb/trx/trx0rec.cc b/storage/xtradb/trx/trx0rec.cc
deleted file mode 100644
index 8c0904dd57b..00000000000
--- a/storage/xtradb/trx/trx0rec.cc
+++ /dev/null
@@ -1,1633 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0rec.cc
-Transaction undo log record
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0rec.h"
-
-#ifdef UNIV_NONINL
-#include "trx0rec.ic"
-#endif
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0undo.h"
-#include "mtr0log.h"
-#ifndef UNIV_HOTBACKUP
-#include "dict0dict.h"
-#include "ut0mem.h"
-#include "read0read.h"
-#include "row0ext.h"
-#include "row0upd.h"
-#include "que0que.h"
-#include "trx0purge.h"
-#include "trx0rseg.h"
-#include "row0row.h"
-
-/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/
-
-/**********************************************************************//**
-Writes the mtr log entry of the inserted undo log record on the undo log
-page. */
-UNIV_INLINE
-void
-trx_undof_page_add_undo_rec_log(
-/*============================*/
- page_t* undo_page, /*!< in: undo log page */
- ulint old_free, /*!< in: start offset of the inserted entry */
- ulint new_free, /*!< in: end offset of the entry */
- mtr_t* mtr) /*!< in: mtr */
-{
- byte* log_ptr;
- const byte* log_end;
- ulint len;
-
- log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN);
-
- if (log_ptr == NULL) {
-
- return;
- }
-
- log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN];
- log_ptr = mlog_write_initial_log_record_fast(
- undo_page, MLOG_UNDO_INSERT, log_ptr, mtr);
- len = new_free - old_free - 4;
-
- mach_write_to_2(log_ptr, len);
- log_ptr += 2;
-
- if (log_ptr + len <= log_end) {
- memcpy(log_ptr, undo_page + old_free + 2, len);
- mlog_close(mtr, log_ptr + len);
- } else {
- mlog_close(mtr, log_ptr);
- mlog_catenate_string(mtr, undo_page + old_free + 2, len);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_add_undo_rec(
-/*========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page) /*!< in: page or NULL */
-{
- ulint len;
- byte* rec;
- ulint first_free;
-
- if (end_ptr < ptr + 2) {
-
- return(NULL);
- }
-
- len = mach_read_from_2(ptr);
- ptr += 2;
-
- if (end_ptr < ptr + len) {
-
- return(NULL);
- }
-
- if (page == NULL) {
-
- return(ptr + len);
- }
-
- first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- rec = page + first_free;
-
- mach_write_to_2(rec, first_free + 4 + len);
- mach_write_to_2(rec + 2 + len, first_free);
-
- mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- first_free + 4 + len);
- ut_memcpy(rec + 2, ptr, len);
-
- return(ptr + len);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Calculates the free space left for extending an undo log record.
-@return bytes left */
-UNIV_INLINE
-ulint
-trx_undo_left(
-/*==========*/
- const page_t* page, /*!< in: undo log page */
- const byte* ptr) /*!< in: pointer to page */
-{
- /* The '- 10' is a safety margin, in case we have some small
- calculation error below */
-
- return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END);
-}
-
-/**********************************************************************//**
-Set the next and previous pointers in the undo page for the undo record
-that was written to ptr. Update the first free value by the number of bytes
-written for this undo record.
-@return offset of the inserted entry on the page if succeeded, 0 if fail */
-static
-ulint
-trx_undo_page_set_next_prev_and_add(
-/*================================*/
- page_t* undo_page, /*!< in/out: undo log page */
- byte* ptr, /*!< in: ptr up to where data has been
- written on this undo page. */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint first_free; /*!< offset within undo_page */
- ulint end_of_rec; /*!< offset within undo_page */
- byte* ptr_to_first_free;
- /* pointer within undo_page
- that points to the next free
- offset value within undo_page.*/
-
- ut_ad(ptr > undo_page);
- ut_ad(ptr < undo_page + UNIV_PAGE_SIZE);
-
- if (UNIV_UNLIKELY(trx_undo_left(undo_page, ptr) < 2)) {
-
- return(0);
- }
-
- ptr_to_first_free = undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE;
-
- first_free = mach_read_from_2(ptr_to_first_free);
-
- /* Write offset of the previous undo log record */
- mach_write_to_2(ptr, first_free);
- ptr += 2;
-
- end_of_rec = ptr - undo_page;
-
- /* Write offset of the next undo log record */
- mach_write_to_2(undo_page + first_free, end_of_rec);
-
- /* Update the offset to first free undo record */
- mach_write_to_2(ptr_to_first_free, end_of_rec);
-
- /* Write this log entry to the UNDO log */
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
- end_of_rec, mtr);
-
- return(first_free);
-}
-
-/**********************************************************************//**
-Reports in the undo log of an insert of a clustered index record.
-@return offset of the inserted entry on the page if succeed, 0 if fail */
-static
-ulint
-trx_undo_page_report_insert(
-/*========================*/
- page_t* undo_page, /*!< in: undo log page */
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* clust_entry, /*!< in: index entry which will be
- inserted to the clustered index */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint first_free;
- byte* ptr;
- ulint i;
-
- ut_ad(dict_index_is_clust(index));
- ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT);
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- ptr = undo_page + first_free;
-
- ut_ad(first_free <= UNIV_PAGE_SIZE);
-
- if (trx_undo_left(undo_page, ptr) < 2 + 1 + 11 + 11) {
-
- /* Not enough space for writing the general parameters */
-
- return(0);
- }
-
- /* Reserve 2 bytes for the pointer to the next undo log record */
- ptr += 2;
-
- /* Store first some general parameters to the undo log */
- *ptr++ = TRX_UNDO_INSERT_REC;
- ptr += mach_ull_write_much_compressed(ptr, trx->undo_no);
- ptr += mach_ull_write_much_compressed(ptr, index->table->id);
- /*----------------------------------------*/
- /* Store then the fields required to uniquely determine the record
- to be inserted in the clustered index */
-
- for (i = 0; i < dict_index_get_n_unique(index); i++) {
-
- const dfield_t* field = dtuple_get_nth_field(clust_entry, i);
- ulint flen = dfield_get_len(field);
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- ptr += mach_write_compressed(ptr, flen);
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, dfield_get_data(field), flen);
- ptr += flen;
- }
- }
-
- return(trx_undo_page_set_next_prev_and_add(undo_page, ptr, mtr));
-}
-
-/**********************************************************************//**
-Reads from an undo log record the general parameters.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_pars(
-/*==================*/
- trx_undo_rec_t* undo_rec, /*!< in: undo log record */
- ulint* type, /*!< out: undo record type:
- TRX_UNDO_INSERT_REC, ... */
- ulint* cmpl_info, /*!< out: compiler info, relevant only
- for update type records */
- bool* updated_extern, /*!< out: true if we updated an
- externally stored fild */
- undo_no_t* undo_no, /*!< out: undo log record number */
- table_id_t* table_id) /*!< out: table id */
-{
- byte* ptr;
- ulint type_cmpl;
-
- ptr = undo_rec + 2;
-
- type_cmpl = mach_read_from_1(ptr);
- ptr++;
-
- *updated_extern = !!(type_cmpl & TRX_UNDO_UPD_EXTERN);
- type_cmpl &= ~TRX_UNDO_UPD_EXTERN;
-
- *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
- *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
-
- *undo_no = mach_ull_read_much_compressed(ptr);
- ptr += mach_ull_get_much_compressed_size(*undo_no);
-
- *table_id = mach_ull_read_much_compressed(ptr);
- ptr += mach_ull_get_much_compressed_size(*table_id);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an undo log record a stored column value.
-@return remaining part of undo log record after reading these values */
-static
-byte*
-trx_undo_rec_get_col_val(
-/*=====================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- byte** field, /*!< out: pointer to stored field */
- ulint* len, /*!< out: length of the field, or UNIV_SQL_NULL */
- ulint* orig_len)/*!< out: original length of the locally
- stored part of an externally stored column, or 0 */
-{
- *len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*len);
-
- *orig_len = 0;
-
- switch (*len) {
- case UNIV_SQL_NULL:
- *field = NULL;
- break;
- case UNIV_EXTERN_STORAGE_FIELD:
- *orig_len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*orig_len);
- *len = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*len);
- *field = ptr;
- ptr += *len;
-
- ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
- ut_ad(*len > *orig_len);
- /* @see dtuple_convert_big_rec() */
- ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE);
-
- /* we do not have access to index->table here
- ut_ad(dict_table_get_format(index->table) >= UNIV_FORMAT_B
- || *len >= col->max_prefix
- + BTR_EXTERN_FIELD_REF_SIZE);
- */
-
- *len += UNIV_EXTERN_STORAGE_FIELD;
- break;
- default:
- *field = ptr;
- if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
- ptr += *len - UNIV_EXTERN_STORAGE_FIELD;
- } else {
- ptr += *len;
- }
- }
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Builds a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_row_ref(
-/*=====================*/
- byte* ptr, /*!< in: remaining part of a copy of an undo log
- record, at the start of the row reference;
- NOTE that this copy of the undo log record must
- be preserved as long as the row reference is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t** ref, /*!< out, own: row reference */
- mem_heap_t* heap) /*!< in: memory heap from which the memory
- needed is allocated */
-{
- ulint ref_len;
- ulint i;
-
- ut_ad(index && ptr && ref && heap);
- ut_a(dict_index_is_clust(index));
-
- ref_len = dict_index_get_n_unique(index);
-
- *ref = dtuple_create(heap, ref_len);
-
- dict_index_copy_types(*ref, index, ref_len);
-
- for (i = 0; i < ref_len; i++) {
- dfield_t* dfield;
- byte* field;
- ulint len;
- ulint orig_len;
-
- dfield = dtuple_get_nth_field(*ref, i);
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
-
- dfield_set_data(dfield, field, len);
- }
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Skips a row reference from an undo log record.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_skip_row_ref(
-/*======================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, at the start of the row reference */
- dict_index_t* index) /*!< in: clustered index */
-{
- ulint ref_len;
- ulint i;
-
- ut_ad(index && ptr);
- ut_a(dict_index_is_clust(index));
-
- ref_len = dict_index_get_n_unique(index);
-
- for (i = 0; i < ref_len; i++) {
- byte* field;
- ulint len;
- ulint orig_len;
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Fetch a prefix of an externally stored column, for writing to the undo log
-of an update or delete marking of a clustered index record.
-@return ext_buf */
-static
-byte*
-trx_undo_page_fetch_ext(
-/*====================*/
- byte* ext_buf, /*!< in: buffer to hold the prefix
- data and BLOB pointer */
- ulint prefix_len, /*!< in: prefix size to store
- in the undo log */
- ulint zip_size, /*!< compressed page size in bytes,
- or 0 for uncompressed BLOB */
- const byte* field, /*!< in: an externally stored column */
- ulint* len) /*!< in: length of field;
- out: used length of ext_buf */
-{
- /* Fetch the BLOB. */
- ulint ext_len = btr_copy_externally_stored_field_prefix(
- ext_buf, prefix_len, zip_size, field, *len, NULL);
- /* BLOBs should always be nonempty. */
- ut_a(ext_len);
- /* Append the BLOB pointer to the prefix. */
- memcpy(ext_buf + ext_len,
- field + *len - BTR_EXTERN_FIELD_REF_SIZE,
- BTR_EXTERN_FIELD_REF_SIZE);
- *len = ext_len + BTR_EXTERN_FIELD_REF_SIZE;
- return(ext_buf);
-}
-
-/**********************************************************************//**
-Writes to the undo log a prefix of an externally stored column.
-@return undo log position */
-static
-byte*
-trx_undo_page_report_modify_ext(
-/*============================*/
- byte* ptr, /*!< in: undo log position,
- at least 15 bytes must be available */
- byte* ext_buf, /*!< in: a buffer of
- DICT_MAX_FIELD_LEN_BY_FORMAT() size,
- or NULL when should not fetch
- a longer prefix */
- ulint prefix_len, /*!< prefix size to store in the
- undo log */
- ulint zip_size, /*!< compressed page size in bytes,
- or 0 for uncompressed BLOB */
- const byte** field, /*!< in/out: the locally stored part of
- the externally stored column */
- ulint* len) /*!< in/out: length of field, in bytes */
-{
- if (ext_buf) {
- ut_a(prefix_len > 0);
-
- /* If an ordering column is externally stored, we will
- have to store a longer prefix of the field. In this
- case, write to the log a marker followed by the
- original length and the real length of the field. */
- ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD);
-
- ptr += mach_write_compressed(ptr, *len);
-
- *field = trx_undo_page_fetch_ext(ext_buf, prefix_len, zip_size,
- *field, len);
-
- ptr += mach_write_compressed(ptr, *len);
- } else {
- ptr += mach_write_compressed(ptr, UNIV_EXTERN_STORAGE_FIELD
- + *len);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reports in the undo log of an update or delete marking of a clustered index
-record.
-@return byte offset of the inserted undo log entry on the page if
-succeed, 0 if fail */
-static
-ulint
-trx_undo_page_report_modify(
-/*========================*/
- page_t* undo_page, /*!< in: undo log page */
- trx_t* trx, /*!< in: transaction */
- dict_index_t* index, /*!< in: clustered index where update or
- delete marking is done */
- const rec_t* rec, /*!< in: clustered index record which
- has NOT yet been modified */
- const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- const upd_t* update, /*!< in: update vector which tells the
- columns to be updated; in the case of
- a delete, this should be set to NULL */
- ulint cmpl_info, /*!< in: compiler info on secondary
- index updates */
- mtr_t* mtr) /*!< in: mtr */
-{
- dict_table_t* table;
- ulint first_free;
- byte* ptr;
- const byte* field;
- ulint flen;
- ulint col_no;
- ulint type_cmpl;
- byte* type_cmpl_ptr;
- ulint i;
- trx_id_t trx_id;
- ibool ignore_prefix = FALSE;
- byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE];
-
- ut_a(dict_index_is_clust(index));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE);
- table = index->table;
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- ptr = undo_page + first_free;
-
- ut_ad(first_free <= UNIV_PAGE_SIZE);
-
- if (trx_undo_left(undo_page, ptr) < 50) {
-
- /* NOTE: the value 50 must be big enough so that the general
- fields written below fit on the undo log page */
-
- return(0);
- }
-
- /* Reserve 2 bytes for the pointer to the next undo log record */
- ptr += 2;
-
- /* Store first some general parameters to the undo log */
-
- if (!update) {
- ut_ad(!rec_get_deleted_flag(rec, dict_table_is_comp(table)));
- type_cmpl = TRX_UNDO_DEL_MARK_REC;
- } else if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
- type_cmpl = TRX_UNDO_UPD_DEL_REC;
- /* We are about to update a delete marked record.
- We don't typically need the prefix in this case unless
- the delete marking is done by the same transaction
- (which we check below). */
- ignore_prefix = TRUE;
- } else {
- type_cmpl = TRX_UNDO_UPD_EXIST_REC;
- }
-
- type_cmpl |= cmpl_info * TRX_UNDO_CMPL_INFO_MULT;
- type_cmpl_ptr = ptr;
-
- *ptr++ = (byte) type_cmpl;
- ptr += mach_ull_write_much_compressed(ptr, trx->undo_no);
-
- ptr += mach_ull_write_much_compressed(ptr, table->id);
-
- /*----------------------------------------*/
- /* Store the state of the info bits */
-
- *ptr++ = (byte) rec_get_info_bits(rec, dict_table_is_comp(table));
-
- /* Store the values of the system columns */
- field = rec_get_nth_field(rec, offsets,
- dict_index_get_sys_col_pos(
- index, DATA_TRX_ID), &flen);
- ut_ad(flen == DATA_TRX_ID_LEN);
-
- trx_id = trx_read_trx_id(field);
-
- /* If it is an update of a delete marked record, then we are
- allowed to ignore blob prefixes if the delete marking was done
- by some other trx as it must have committed by now for us to
- allow an over-write. */
- if (ignore_prefix) {
- ignore_prefix = (trx_id != trx->id);
- }
- ptr += mach_ull_write_compressed(ptr, trx_id);
-
- field = rec_get_nth_field(rec, offsets,
- dict_index_get_sys_col_pos(
- index, DATA_ROLL_PTR), &flen);
- ut_ad(flen == DATA_ROLL_PTR_LEN);
-
- ptr += mach_ull_write_compressed(ptr, trx_read_roll_ptr(field));
-
- /*----------------------------------------*/
- /* Store then the fields required to uniquely determine the
- record which will be modified in the clustered index */
-
- for (i = 0; i < dict_index_get_n_unique(index); i++) {
-
- field = rec_get_nth_field(rec, offsets, i, &flen);
-
- /* The ordering columns must not be stored externally. */
- ut_ad(!rec_offs_nth_extern(offsets, i));
- ut_ad(dict_index_get_nth_col(index, i)->ord_part);
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- ptr += mach_write_compressed(ptr, flen);
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
-
- /*----------------------------------------*/
- /* Save to the undo log the old values of the columns to be updated. */
-
- if (update) {
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- ptr += mach_write_compressed(ptr, upd_get_n_fields(update));
-
- for (i = 0; i < upd_get_n_fields(update); i++) {
-
- ulint pos = upd_get_nth_field(update, i)->field_no;
-
- /* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- ptr += mach_write_compressed(ptr, pos);
-
- /* Save the old value of field */
- field = rec_get_nth_field(rec, offsets, pos, &flen);
-
- if (trx_undo_left(undo_page, ptr) < 15) {
-
- return(0);
- }
-
- if (rec_offs_nth_extern(offsets, pos)) {
- const dict_col_t* col
- = dict_index_get_nth_col(index, pos);
- ulint prefix_len
- = dict_max_field_len_store_undo(
- table, col);
-
- ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE
- <= sizeof ext_buf);
-
- ptr = trx_undo_page_report_modify_ext(
- ptr,
- col->ord_part
- && !ignore_prefix
- && flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
- ? ext_buf : NULL, prefix_len,
- dict_table_zip_size(table),
- &field, &flen);
-
- /* Notify purge that it eventually has to
- free the old externally stored field */
-
- trx->update_undo->del_marks = TRUE;
-
- *type_cmpl_ptr |= TRX_UNDO_UPD_EXTERN;
- } else {
- ptr += mach_write_compressed(ptr, flen);
- }
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr) < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
- }
-
- /*----------------------------------------*/
- /* In the case of a delete marking, and also in the case of an update
- where any ordering field of any index changes, store the values of all
- columns which occur as ordering fields in any index. This info is used
- in the purge of old versions where we use it to build and search the
- delete marked index records, to look if we can remove them from the
- index tree. Note that starting from 4.0.14 also externally stored
- fields can be ordering in some index. Starting from 5.2, we no longer
- store REC_MAX_INDEX_COL_LEN first bytes to the undo log record,
- but we can construct the column prefix fields in the index by
- fetching the first page of the BLOB that is pointed to by the
- clustered index. This works also in crash recovery, because all pages
- (including BLOBs) are recovered before anything is rolled back. */
-
- if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- byte* old_ptr = ptr;
-
- trx->update_undo->del_marks = TRUE;
-
- if (trx_undo_left(undo_page, ptr) < 5) {
-
- return(0);
- }
-
- /* Reserve 2 bytes to write the number of bytes the stored
- fields take in this undo record */
-
- ptr += 2;
-
- for (col_no = 0; col_no < dict_table_get_n_cols(table);
- col_no++) {
-
- const dict_col_t* col
- = dict_table_get_nth_col(table, col_no);
-
- if (col->ord_part) {
- ulint pos;
-
- /* Write field number to undo log */
- if (trx_undo_left(undo_page, ptr) < 5 + 15) {
-
- return(0);
- }
-
- pos = dict_index_get_nth_col_pos(index,
- col_no,
- NULL);
- ptr += mach_write_compressed(ptr, pos);
-
- /* Save the old value of field */
- field = rec_get_nth_field(rec, offsets, pos,
- &flen);
-
- if (rec_offs_nth_extern(offsets, pos)) {
- const dict_col_t* col =
- dict_index_get_nth_col(
- index, pos);
- ulint prefix_len =
- dict_max_field_len_store_undo(
- table, col);
-
- ut_a(prefix_len < sizeof ext_buf);
-
- ptr = trx_undo_page_report_modify_ext(
- ptr,
- flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
- && !ignore_prefix
- ? ext_buf : NULL, prefix_len,
- dict_table_zip_size(table),
- &field, &flen);
- } else {
- ptr += mach_write_compressed(
- ptr, flen);
- }
-
- if (flen != UNIV_SQL_NULL) {
- if (trx_undo_left(undo_page, ptr)
- < flen) {
-
- return(0);
- }
-
- ut_memcpy(ptr, field, flen);
- ptr += flen;
- }
- }
- }
-
- mach_write_to_2(old_ptr, ptr - old_ptr);
- }
-
- /*----------------------------------------*/
- /* Write pointers to the previous and the next undo log records */
- if (trx_undo_left(undo_page, ptr) < 2) {
-
- return(0);
- }
-
- mach_write_to_2(ptr, first_free);
- ptr += 2;
- mach_write_to_2(undo_page + first_free, ptr - undo_page);
-
- mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE,
- ptr - undo_page);
-
- /* Write to the REDO log about this change in the UNDO log */
-
- trx_undof_page_add_undo_rec_log(undo_page, first_free,
- ptr - undo_page, mtr);
- return(first_free);
-}
-
-/**********************************************************************//**
-Reads from an undo log update record the system field values of the old
-version.
-@return remaining part of undo log record after reading these values */
-UNIV_INTERN
-byte*
-trx_undo_update_rec_get_sys_cols(
-/*=============================*/
- byte* ptr, /*!< in: remaining part of undo
- log record after reading
- general parameters */
- trx_id_t* trx_id, /*!< out: trx id */
- roll_ptr_t* roll_ptr, /*!< out: roll ptr */
- ulint* info_bits) /*!< out: info bits state */
-{
- /* Read the state of the info bits */
- *info_bits = mach_read_from_1(ptr);
- ptr += 1;
-
- /* Read the values of the system columns */
-
- *trx_id = mach_ull_read_compressed(ptr);
- ptr += mach_ull_get_compressed_size(*trx_id);
-
- *roll_ptr = mach_ull_read_compressed(ptr);
- ptr += mach_ull_get_compressed_size(*roll_ptr);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an update undo log record the number of updated fields.
-@return remaining part of undo log record after reading this value */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_n_upd_fields(
-/*=================================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- ulint* n) /*!< out: number of fields */
-{
- *n = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*n);
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Reads from an update undo log record a stored field number.
-@return remaining part of undo log record after reading this value */
-UNIV_INLINE
-byte*
-trx_undo_update_rec_get_field_no(
-/*=============================*/
- byte* ptr, /*!< in: pointer to remaining part of undo log record */
- ulint* field_no)/*!< out: field number */
-{
- *field_no = mach_read_compressed(ptr);
- ptr += mach_get_compressed_size(*field_no);
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Builds an update vector based on a remaining part of an undo log record.
-@return remaining part of the record, NULL if an error detected, which
-means that the record is corrupted */
-UNIV_INTERN
-byte*
-trx_undo_update_rec_get_update(
-/*===========================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record, after reading the row reference
- NOTE that this copy of the undo log record must
- be preserved as long as the update vector is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- ulint type, /*!< in: TRX_UNDO_UPD_EXIST_REC,
- TRX_UNDO_UPD_DEL_REC, or
- TRX_UNDO_DEL_MARK_REC; in the last case,
- only trx id and roll ptr fields are added to
- the update vector */
- trx_id_t trx_id, /*!< in: transaction id from this undo record */
- roll_ptr_t roll_ptr,/*!< in: roll pointer from this undo record */
- ulint info_bits,/*!< in: info bits from this undo record */
- trx_t* trx, /*!< in: transaction */
- mem_heap_t* heap, /*!< in: memory heap from which the memory
- needed is allocated */
- upd_t** upd) /*!< out, own: update vector */
-{
- upd_field_t* upd_field;
- upd_t* update;
- ulint n_fields;
- byte* buf;
- ulint i;
-
- ut_a(dict_index_is_clust(index));
-
- if (type != TRX_UNDO_DEL_MARK_REC) {
- ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields);
- } else {
- n_fields = 0;
- }
-
- update = upd_create(n_fields + 2, heap);
-
- update->info_bits = info_bits;
-
- /* Store first trx id and roll ptr to update vector */
-
- upd_field = upd_get_nth_field(update, n_fields);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_TRX_ID_LEN));
-
- trx_write_trx_id(buf, trx_id);
-
- upd_field_set_field_no(upd_field,
- dict_index_get_sys_col_pos(index, DATA_TRX_ID),
- index, trx);
- dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN);
-
- upd_field = upd_get_nth_field(update, n_fields + 1);
-
- buf = static_cast<byte*>(mem_heap_alloc(heap, DATA_ROLL_PTR_LEN));
-
- trx_write_roll_ptr(buf, roll_ptr);
-
- upd_field_set_field_no(
- upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR),
- index, trx);
- dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN);
-
- /* Store then the updated ordinary columns to the update vector */
-
- for (i = 0; i < n_fields; i++) {
-
- byte* field;
- ulint len;
- ulint field_no;
- ulint orig_len;
-
- ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
-
- if (field_no >= dict_index_get_n_fields(index)) {
- fprintf(stderr,
- "InnoDB: Error: trying to access"
- " update undo rec field %lu in ",
- (ulong) field_no);
- dict_index_name_print(stderr, trx, index);
- fprintf(stderr, "\n"
- "InnoDB: but index has only %lu fields\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com\n"
- "InnoDB: Run also CHECK TABLE ",
- (ulong) dict_index_get_n_fields(index));
- ut_print_name(stderr, trx, TRUE, index->table_name);
- fprintf(stderr, "\n"
- "InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
- (ulong) n_fields, (ulong) i, ptr);
- ut_ad(0);
- *upd = NULL;
- return(NULL);
- }
-
- upd_field = upd_get_nth_field(update, i);
-
- upd_field_set_field_no(upd_field, field_no, index, trx);
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
-
- upd_field->orig_len = orig_len;
-
- if (len == UNIV_SQL_NULL) {
- dfield_set_null(&upd_field->new_val);
- } else if (len < UNIV_EXTERN_STORAGE_FIELD) {
- dfield_set_data(&upd_field->new_val, field, len);
- } else {
- len -= UNIV_EXTERN_STORAGE_FIELD;
-
- dfield_set_data(&upd_field->new_val, field, len);
- dfield_set_ext(&upd_field->new_val);
- }
- }
-
- *upd = update;
-
- return(ptr);
-}
-
-/*******************************************************************//**
-Builds a partial row from an update undo log record, for purge.
-It contains the columns which occur as ordering in any index of the table.
-Any missing columns are indicated by col->mtype == DATA_MISSING.
-@return pointer to remaining part of undo record */
-UNIV_INTERN
-byte*
-trx_undo_rec_get_partial_row(
-/*=========================*/
- byte* ptr, /*!< in: remaining part in update undo log
- record of a suitable type, at the start of
- the stored index columns;
- NOTE that this copy of the undo log record must
- be preserved as long as the partial row is
- used, as we do NOT copy the data in the
- record! */
- dict_index_t* index, /*!< in: clustered index */
- dtuple_t** row, /*!< out, own: partial row */
- ibool ignore_prefix, /*!< in: flag to indicate if we
- expect blob prefixes in undo. Used
- only in the assertion. */
- mem_heap_t* heap) /*!< in: memory heap from which the memory
- needed is allocated */
-{
- const byte* end_ptr;
- ulint row_len;
-
- ut_ad(index);
- ut_ad(ptr);
- ut_ad(row);
- ut_ad(heap);
- ut_ad(dict_index_is_clust(index));
-
- row_len = dict_table_get_n_cols(index->table);
-
- *row = dtuple_create(heap, row_len);
-
- /* Mark all columns in the row uninitialized, so that
- we can distinguish missing fields from fields that are SQL NULL. */
- for (ulint i = 0; i < row_len; i++) {
- dfield_get_type(dtuple_get_nth_field(*row, i))
- ->mtype = DATA_MISSING;
- }
-
- end_ptr = ptr + mach_read_from_2(ptr);
- ptr += 2;
-
- while (ptr != end_ptr) {
- dfield_t* dfield;
- byte* field;
- ulint field_no;
- const dict_col_t* col;
- ulint col_no;
- ulint len;
- ulint orig_len;
-
- ptr = trx_undo_update_rec_get_field_no(ptr, &field_no);
-
- col = dict_index_get_nth_col(index, field_no);
- col_no = dict_col_get_no(col);
-
- ptr = trx_undo_rec_get_col_val(ptr, &field, &len, &orig_len);
-
- dfield = dtuple_get_nth_field(*row, col_no);
- dict_col_copy_type(
- dict_table_get_nth_col(index->table, col_no),
- dfield_get_type(dfield));
- dfield_set_data(dfield, field, len);
-
- if (len != UNIV_SQL_NULL
- && len >= UNIV_EXTERN_STORAGE_FIELD) {
- dfield_set_len(dfield,
- len - UNIV_EXTERN_STORAGE_FIELD);
- dfield_set_ext(dfield);
- /* If the prefix of this column is indexed,
- ensure that enough prefix is stored in the
- undo log record. */
- if (!ignore_prefix && col->ord_part) {
- ut_a(dfield_get_len(dfield)
- >= BTR_EXTERN_FIELD_REF_SIZE);
- ut_a(dict_table_get_format(index->table)
- >= UNIV_FORMAT_B
- || dfield_get_len(dfield)
- >= REC_ANTELOPE_MAX_INDEX_COL_LEN
- + BTR_EXTERN_FIELD_REF_SIZE);
- }
- }
- }
-
- return(ptr);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************************//**
-Erases the unused undo log page end.
-@return TRUE if the page contained something, FALSE if it was empty */
-static MY_ATTRIBUTE((nonnull))
-ibool
-trx_undo_erase_page_end(
-/*====================*/
- page_t* undo_page, /*!< in/out: undo page whose end to erase */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
- ulint first_free;
-
- first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- memset(undo_page + first_free, 0xff,
- (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free);
-
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr);
- return(first_free != TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
-}
-
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(ptr && end_ptr);
-
- if (page == NULL) {
-
- return(ptr);
- }
-
- trx_undo_erase_page_end(page, mtr);
-
- return(ptr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Writes information to an undo log about an insert, update, or a delete marking
-of a clustered index record. This information is used in a rollback of the
-transaction and in consistent reads that must look to the history of this
-transaction.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-trx_undo_report_row_operation(
-/*==========================*/
- que_thr_t* thr, /*!< in: query thread */
- dict_index_t* index, /*!< in: clustered index */
- const dtuple_t* clust_entry, /*!< in: in the case of an insert,
- index entry to insert into the
- clustered index, otherwise NULL */
- const upd_t* update, /*!< in: in the case of an update,
- the update vector, otherwise NULL */
- ulint cmpl_info, /*!< in: compiler info on secondary
- index updates */
- const rec_t* rec, /*!< in: in case of an update or delete
- marking, the record in the clustered
- index, otherwise NULL */
- const ulint* offsets, /*!< in: rec_get_offsets(rec) */
- roll_ptr_t* roll_ptr) /*!< out: rollback pointer to the
- inserted undo log record,
- 0 if BTR_NO_UNDO_LOG
- flag was specified */
-{
- trx_t* trx;
- trx_undo_t* undo;
- ulint page_no;
- buf_block_t* undo_block;
- trx_rseg_t* rseg;
- mtr_t mtr;
- dberr_t err = DB_SUCCESS;
-#ifdef UNIV_DEBUG
- int loop_count = 0;
-#endif /* UNIV_DEBUG */
-
- ut_ad(!srv_read_only_mode);
- ut_a(dict_index_is_clust(index));
- ut_ad(!rec || rec_offs_validate(rec, index, offsets));
-
- ut_ad(thr);
- ut_ad(!clust_entry || (!update && !rec));
-
- trx = thr_get_trx(thr);
-
- /* This table is visible only to the session that created it. */
- if (trx->read_only) {
- ut_ad(!srv_read_only_mode);
- /* MySQL should block writes to non-temporary tables. */
- ut_a(DICT_TF2_FLAG_IS_SET(index->table, DICT_TF2_TEMPORARY));
- if (trx->rseg == 0) {
- trx_assign_rseg(trx);
- }
- }
-
- rseg = trx->rseg;
-
- mtr_start_trx(&mtr, trx);
- mutex_enter(&trx->undo_mutex);
-
- /* If the undo log is not assigned yet, assign one */
-
- if (clust_entry) {
- undo = trx->insert_undo;
-
- if (undo == NULL) {
-
- err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
- undo = trx->insert_undo;
-
- if (undo == NULL) {
- /* Did not succeed */
- ut_ad(err != DB_SUCCESS);
- goto err_exit;
- }
-
- ut_ad(err == DB_SUCCESS);
- }
- } else {
- undo = trx->update_undo;
-
- if (undo == NULL) {
- err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
- undo = trx->update_undo;
-
- if (undo == NULL) {
- /* Did not succeed */
- ut_ad(err != DB_SUCCESS);
- goto err_exit;
- }
- }
-
- ut_ad(err == DB_SUCCESS);
- }
-
- page_no = undo->last_page_no;
- undo_block = buf_page_get_gen(
- undo->space, undo->zip_size, page_no, RW_X_LATCH,
- undo->guess_block, BUF_GET, __FILE__, __LINE__, &mtr);
- buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE);
-
- do {
- ut_ad(page_no == buf_block_get_page_no(undo_block));
-
- page_t* undo_page = buf_block_get_frame(undo_block);
- ulint offset = clust_entry
- ? trx_undo_page_report_insert(
- undo_page, trx, index, clust_entry, &mtr)
- : trx_undo_page_report_modify(
- undo_page, trx, index, rec, offsets, update,
- cmpl_info, &mtr);
-
- if (UNIV_UNLIKELY(offset == 0)) {
- /* The record did not fit on the page. We erase the
- end segment of the undo log page and write a log
- record of it: this is to ensure that in the debug
- version the replicate page constructed using the log
- records stays identical to the original page */
-
- if (!trx_undo_erase_page_end(undo_page, &mtr)) {
- /* The record did not fit on an empty
- undo page. Discard the freshly allocated
- page and return an error. */
-
- /* When we remove a page from an undo
- log, this is analogous to a
- pessimistic insert in a B-tree, and we
- must reserve the counterpart of the
- tree latch, which is the rseg
- mutex. We must commit the mini-transaction
- first, because it may be holding lower-level
- latches, such as SYNC_FSP and SYNC_FSP_PAGE. */
-
- mtr_commit(&mtr);
- mtr_start_trx(&mtr, trx);
-
- mutex_enter(&rseg->mutex);
- trx_undo_free_last_page(trx, undo, &mtr);
- mutex_exit(&rseg->mutex);
-
- err = DB_UNDO_RECORD_TOO_BIG;
- goto err_exit;
- }
-
- mtr_commit(&mtr);
- } else {
- /* Success */
-
- mtr_commit(&mtr);
-
- undo->empty = FALSE;
- undo->top_page_no = page_no;
- undo->top_offset = offset;
- undo->top_undo_no = trx->undo_no;
- undo->guess_block = undo_block;
-
- trx->undo_no++;
-
- mutex_exit(&trx->undo_mutex);
-
- *roll_ptr = trx_undo_build_roll_ptr(
- clust_entry != NULL,
- rseg->id, page_no, offset);
- return(DB_SUCCESS);
- }
-
- ut_ad(page_no == undo->last_page_no);
-
- /* We have to extend the undo log by one page */
-
- ut_ad(++loop_count < 2);
- mtr_start_trx(&mtr, trx);
-
- /* When we add a page to an undo log, this is analogous to
- a pessimistic insert in a B-tree, and we must reserve the
- counterpart of the tree latch, which is the rseg mutex. */
-
- mutex_enter(&rseg->mutex);
- undo_block = trx_undo_add_page(trx, undo, &mtr);
- mutex_exit(&rseg->mutex);
-
- page_no = undo->last_page_no;
- } while (undo_block != NULL);
-
- /* Did not succeed: out of space */
- err = DB_OUT_OF_FILE_SPACE;
-
-err_exit:
- mutex_exit(&trx->undo_mutex);
- mtr_commit(&mtr);
- return(err);
-}
-
-/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
-
-/******************************************************************//**
-Copies an undo record to heap. This function can be called if we know that
-the undo log record exists.
-@return own: copy of the record */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_undo_rec_low(
-/*======================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- trx_undo_rec_t* undo_rec;
- ulint rseg_id;
- ulint page_no;
- ulint offset;
- const page_t* undo_page;
- trx_rseg_t* rseg;
- ibool is_insert;
- mtr_t mtr;
-
- trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no,
- &offset);
- rseg = trx_rseg_get_on_id(rseg_id);
-
- mtr_start(&mtr);
-
- undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- page_no, &mtr);
-
- undo_rec = trx_undo_rec_copy(undo_page + offset, heap);
-
- mtr_commit(&mtr);
-
- return(undo_rec);
-}
-
-/******************************************************************//**
-Copies an undo record to heap.
-
-NOTE: the caller must have latches on the clustered index page.
-
-@retval true if the undo log has been
-truncated and we cannot fetch the old version
-@retval false if the undo log record is available */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-bool
-trx_undo_get_undo_rec(
-/*==================*/
- roll_ptr_t roll_ptr, /*!< in: roll pointer to record */
- trx_id_t trx_id, /*!< in: id of the trx that generated
- the roll pointer: it points to an
- undo log of this transaction */
- trx_undo_rec_t**undo_rec, /*!< out, own: copy of the record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- bool missing_history;
-
- rw_lock_s_lock(&purge_sys->latch);
- missing_history = read_view_sees_trx_id(purge_sys->view, trx_id);
-
- if (!missing_history) {
- *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap);
- }
-
- rw_lock_s_unlock(&purge_sys->latch);
-
- return(missing_history);
-}
-
-#ifdef UNIV_DEBUG
-#define ATTRIB_USED_ONLY_IN_DEBUG
-#else /* UNIV_DEBUG */
-#define ATTRIB_USED_ONLY_IN_DEBUG MY_ATTRIBUTE((unused))
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Build a previous version of a clustered index record. The caller must
-hold a latch on the index page of the clustered index record.
-@retval true if previous version was built, or if it was an insert
-or the table has been rebuilt
-@retval false if the previous version is earlier than purge_view,
-which means that it may have been removed */
-UNIV_INTERN
-bool
-trx_undo_prev_version_build(
-/*========================*/
- const rec_t* index_rec ATTRIB_USED_ONLY_IN_DEBUG,
- /*!< in: clustered index record in the
- index tree */
- mtr_t* index_mtr ATTRIB_USED_ONLY_IN_DEBUG,
- /*!< in: mtr which contains the latch to
- index_rec page and purge_view */
- const rec_t* rec, /*!< in: version of a clustered index record */
- dict_index_t* index, /*!< in: clustered index */
- ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
- mem_heap_t* heap, /*!< in: memory heap from which the memory
- needed is allocated */
- rec_t** old_vers)/*!< out, own: previous version, or NULL if
- rec is the first inserted version, or if
- history data has been deleted (an error),
- or if the purge COULD have removed the version
- though it has not yet done so */
-{
- trx_undo_rec_t* undo_rec = NULL;
- dtuple_t* entry;
- trx_id_t rec_trx_id;
- ulint type;
- undo_no_t undo_no;
- table_id_t table_id;
- trx_id_t trx_id;
- roll_ptr_t roll_ptr;
- upd_t* update;
- byte* ptr;
- ulint info_bits;
- ulint cmpl_info;
- bool dummy_extern;
- byte* buf;
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&purge_sys->latch, RW_LOCK_SHARED));
-#endif /* UNIV_SYNC_DEBUG */
- ut_ad(mtr_memo_contains_page(index_mtr, index_rec, MTR_MEMO_PAGE_S_FIX)
- || mtr_memo_contains_page(index_mtr, index_rec,
- MTR_MEMO_PAGE_X_FIX));
- ut_ad(rec_offs_validate(rec, index, offsets));
- ut_a(dict_index_is_clust(index));
-
- roll_ptr = row_get_rec_roll_ptr(rec, index, offsets);
-
- *old_vers = NULL;
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
- /* The record rec is the first inserted version */
- return(true);
- }
-
- rec_trx_id = row_get_rec_trx_id(rec, index, offsets);
-
- if (trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap)) {
- /* The undo record may already have been purged,
- during purge or semi-consistent read. */
- return(false);
- }
-
- ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
- &dummy_extern, &undo_no, &table_id);
-
- if (table_id != index->table->id) {
- /* The table should have been rebuilt, but purge has
- not yet removed the undo log records for the
- now-dropped old table (table_id). */
- return(true);
- }
-
- ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
- &info_bits);
-
- /* (a) If a clustered index record version is such that the
- trx id stamp in it is bigger than purge_sys->view, then the
- BLOBs in that version are known to exist (the purge has not
- progressed that far);
-
- (b) if the version is the first version such that trx id in it
- is less than purge_sys->view, and it is not delete-marked,
- then the BLOBs in that version are known to exist (the purge
- cannot have purged the BLOBs referenced by that version
- yet).
-
- This function does not fetch any BLOBs. The callers might, by
- possibly invoking row_ext_create() via row_build(). However,
- they should have all needed information in the *old_vers
- returned by this function. This is because *old_vers is based
- on the transaction undo log records. The function
- trx_undo_page_fetch_ext() will write BLOB prefixes to the
- transaction undo log that are at least as long as the longest
- possible column prefix in a secondary index. Thus, secondary
- index entries for *old_vers can be constructed without
- dereferencing any BLOB pointers. */
-
- ptr = trx_undo_rec_skip_row_ref(ptr, index);
-
- ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id,
- roll_ptr, info_bits,
- NULL, heap, &update);
- ut_a(ptr);
-
-# if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
- ut_a(!rec_offs_any_null_extern(rec, offsets));
-# endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
- if (row_upd_changes_field_size_or_external(index, offsets, update)) {
- ulint n_ext;
-
- /* We should confirm the existence of disowned external data,
- if the previous version record is delete marked. If the trx_id
- of the previous record is seen by purge view, we should treat
- it as missing history, because the disowned external data
- might be purged already.
-
- The inherited external data (BLOBs) can be freed (purged)
- after trx_id was committed, provided that no view was started
- before trx_id. If the purge view can see the committed
- delete-marked record by trx_id, no transactions need to access
- the BLOB. */
-
- /* the row_upd_changes_disowned_external(update) call could be
- omitted, but the synchronization on purge_sys->latch is likely
- more expensive. */
-
- if ((update->info_bits & REC_INFO_DELETED_FLAG)
- && row_upd_changes_disowned_external(update)) {
- bool missing_extern;
-
- rw_lock_s_lock(&purge_sys->latch);
- missing_extern = read_view_sees_trx_id(purge_sys->view,
- trx_id);
- rw_lock_s_unlock(&purge_sys->latch);
-
- if (missing_extern) {
- /* treat as a fresh insert, not to
- cause assertion error at the caller. */
- return(true);
- }
- }
-
- /* We have to set the appropriate extern storage bits in the
- old version of the record: the extern bits in rec for those
- fields that update does NOT update, as well as the bits for
- those fields that update updates to become externally stored
- fields. Store the info: */
-
- entry = row_rec_to_index_entry(
- rec, index, offsets, &n_ext, heap);
- n_ext += btr_push_update_extern_fields(entry, update, heap);
- /* The page containing the clustered index record
- corresponding to entry is latched in mtr. Thus the
- following call is safe. */
- row_upd_index_replace_new_col_vals(entry, index, update, heap);
-
- buf = static_cast<byte*>(
- mem_heap_alloc(
- heap,
- rec_get_converted_size(index, entry, n_ext)));
-
- *old_vers = rec_convert_dtuple_to_rec(buf, index,
- entry, n_ext);
- } else {
- buf = static_cast<byte*>(
- mem_heap_alloc(heap, rec_offs_size(offsets)));
-
- *old_vers = rec_copy(buf, rec, offsets);
- rec_offs_make_valid(*old_vers, index, offsets);
- row_upd_rec_in_place(*old_vers, index, offsets, update, NULL);
- }
-
- return(true);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/trx/trx0roll.cc b/storage/xtradb/trx/trx0roll.cc
deleted file mode 100644
index 335ef8859c4..00000000000
--- a/storage/xtradb/trx/trx0roll.cc
+++ /dev/null
@@ -1,1417 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0roll.cc
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0roll.h"
-
-#ifdef UNIV_NONINL
-#include "trx0roll.ic"
-#endif
-
-#include <mysql/service_wsrep.h>
-
-#include "fsp0fsp.h"
-#include "mach0data.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "trx0undo.h"
-#include "trx0rec.h"
-#include "que0que.h"
-#include "usr0sess.h"
-#include "srv0start.h"
-#include "read0read.h"
-#include "row0undo.h"
-#include "row0mysql.h"
-#include "lock0lock.h"
-#include "pars0pars.h"
-#include "srv0mon.h"
-#include "trx0sys.h"
-#ifdef WITH_WSREP
-#include "ha_prototypes.h"
-#endif /* WITH_WSREP */
-
-/** This many pages must be undone before a truncate is tried within
-rollback */
-#define TRX_ROLL_TRUNC_THRESHOLD 1
-
-/** true if trx_rollback_or_clean_all_recovered() thread is active */
-bool trx_rollback_or_clean_is_active;
-
-/** In crash recovery, the current trx to be rolled back; NULL otherwise */
-static const trx_t* trx_roll_crash_recv_trx = NULL;
-
-/** In crash recovery we set this to the undo n:o of the current trx to be
-rolled back. Then we can print how many % the rollback has progressed. */
-static undo_no_t trx_roll_max_undo_no;
-
-/** Auxiliary variable which tells the previous progress % we printed */
-static ulint trx_roll_progress_printed_pct;
-
-/****************************************************************//**
-Finishes a transaction rollback. */
-static
-void
-trx_rollback_finish(
-/*================*/
- trx_t* trx); /*!< in: transaction */
-
-/*******************************************************************//**
-Rollback a transaction used in MySQL. */
-static
-void
-trx_rollback_to_savepoint_low(
-/*==========================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
- partial rollback requested, or NULL for
- complete rollback */
-{
- que_thr_t* thr;
- mem_heap_t* heap;
- roll_node_t* roll_node;
-
- heap = mem_heap_create(512);
-
- roll_node = roll_node_create(heap);
-
- if (savept != NULL) {
- roll_node->partial = TRUE;
- roll_node->savept = *savept;
- assert_trx_in_list(trx);
- } else {
- assert_trx_nonlocking_or_in_list(trx);
- }
-
- trx->error_state = DB_SUCCESS;
-
- if (trx->insert_undo || trx->update_undo) {
- thr = pars_complete_graph_for_exec(roll_node, trx, heap);
-
- ut_a(thr == que_fork_start_command(
- static_cast<que_fork_t*>(que_node_get_parent(thr))));
-
- que_run_threads(thr);
-
- ut_a(roll_node->undo_thr != NULL);
- que_run_threads(roll_node->undo_thr);
-
- /* Free the memory reserved by the undo graph. */
- que_graph_free(static_cast<que_t*>(
- roll_node->undo_thr->common.parent));
- }
-
- if (savept == NULL) {
- trx_rollback_finish(trx);
- MONITOR_INC(MONITOR_TRX_ROLLBACK);
- } else {
- trx->lock.que_state = TRX_QUE_RUNNING;
- MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT);
- }
-
- ut_a(trx->error_state == DB_SUCCESS);
- ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
-
- mem_heap_free(heap);
-
- /* There might be work for utility threads.*/
- srv_active_wake_master_thread();
-
- MONITOR_DEC(MONITOR_TRX_ACTIVE);
-}
-
-/*******************************************************************//**
-Rollback a transaction to a given savepoint or do a complete rollback.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_to_savepoint(
-/*======================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
- partial rollback requested, or NULL for
- complete rollback */
-{
- ut_ad(!trx_mutex_own(trx));
-
- trx_start_if_not_started_xa(trx);
-
- trx_rollback_to_savepoint_low(trx, savept);
-
- return(trx->error_state);
-}
-
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-static
-dberr_t
-trx_rollback_for_mysql_low(
-/*=======================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- trx->op_info = "rollback";
-
- /* If we are doing the XA recovery of prepared transactions,
- then the transaction object does not have an InnoDB session
- object, and we set a dummy session that we use for all MySQL
- transactions. */
-
- trx_rollback_to_savepoint_low(trx, NULL);
-
- trx->op_info = "";
-
- ut_a(trx->error_state == DB_SUCCESS);
-
- return(trx->error_state);
-}
-
-/*******************************************************************//**
-Rollback a transaction used in MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_for_mysql(
-/*===================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- /* We are reading trx->state without holding trx_sys->mutex
- here, because the rollback should be invoked for a running
- active MySQL transaction (or recovered prepared transaction)
- that is associated with the current thread. */
-
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
- ut_ad(trx->in_mysql_trx_list);
- return(DB_SUCCESS);
-
- case TRX_STATE_ACTIVE:
- ut_ad(trx->in_mysql_trx_list);
- assert_trx_nonlocking_or_in_list(trx);
- return(trx_rollback_for_mysql_low(trx));
-
- case TRX_STATE_PREPARED:
- ut_ad(!trx_is_autocommit_non_locking(trx));
- return(trx_rollback_for_mysql_low(trx));
-
- case TRX_STATE_COMMITTED_IN_MEMORY:
- assert_trx_in_list(trx);
- break;
- }
-
- ut_error;
- return(DB_CORRUPTION);
-}
-
-/*******************************************************************//**
-Rollback the latest SQL statement for MySQL.
-@return error code or DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_last_sql_stat_for_mysql(
-/*=================================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- dberr_t err;
-
- /* We are reading trx->state without holding trx_sys->mutex
- here, because the statement rollback should be invoked for a
- running active MySQL transaction that is associated with the
- current thread. */
- ut_ad(trx->in_mysql_trx_list);
-
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
- return(DB_SUCCESS);
- case TRX_STATE_ACTIVE:
- assert_trx_nonlocking_or_in_list(trx);
-
- trx->op_info = "rollback of SQL statement";
-
- err = trx_rollback_to_savepoint(
- trx, &trx->last_sql_stat_start);
-
- if (trx->fts_trx) {
- fts_savepoint_rollback_last_stmt(trx);
- }
-
- /* The following call should not be needed,
- but we play it safe: */
- trx_mark_sql_stat_end(trx);
-
- trx->op_info = "";
-
- return(err);
- case TRX_STATE_PREPARED:
- case TRX_STATE_COMMITTED_IN_MEMORY:
- /* The statement rollback is only allowed on an ACTIVE
- transaction, not a PREPARED or COMMITTED one. */
- break;
- }
-
- ut_error;
- return(DB_CORRUPTION);
-}
-
-/*******************************************************************//**
-Search for a savepoint using name.
-@return savepoint if found else NULL */
-static
-trx_named_savept_t*
-trx_savepoint_find(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- const char* name) /*!< in: savepoint name */
-{
- trx_named_savept_t* savep;
-
- for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
- savep != NULL;
- savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
-
- if (0 == ut_strcmp(savep->name, name)) {
- return(savep);
- }
- }
-
- return(NULL);
-}
-
-/*******************************************************************//**
-Frees a single savepoint struct. */
-static
-void
-trx_roll_savepoint_free(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep) /*!< in: savepoint to free */
-{
- UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
- mem_free(savep->name);
- mem_free(savep);
-}
-
-/*******************************************************************//**
-Frees savepoint structs starting from savep. */
-UNIV_INTERN
-void
-trx_roll_savepoints_free(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- trx_named_savept_t* savep) /*!< in: free all savepoints starting
- with this savepoint i*/
-{
- while (savep != NULL) {
- trx_named_savept_t* next_savep;
-
- next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
-
- trx_roll_savepoint_free(trx, savep);
-
- savep = next_savep;
- }
-}
-
-/*******************************************************************//**
-Rolls back a transaction back to a named savepoint. Modifications after the
-savepoint are undone but InnoDB does NOT release the corresponding locks
-which are stored in memory. If a lock is 'implicit', that is, a new inserted
-row holds a lock where the lock information is carried by the trx id stored in
-the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-trx_rollback_to_savepoint_for_mysql_low(
-/*====================================*/
- trx_t* trx, /*!< in/out: transaction */
- trx_named_savept_t* savep, /*!< in/out: savepoint */
- ib_int64_t* mysql_binlog_cache_pos)
- /*!< out: the MySQL binlog
- cache position corresponding
- to this savepoint; MySQL needs
- this information to remove the
- binlog entries of the queries
- executed after the savepoint */
-{
- dberr_t err;
-
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
- ut_ad(trx->in_mysql_trx_list);
-
- /* Free all savepoints strictly later than savep. */
-
- trx_roll_savepoints_free(
- trx, UT_LIST_GET_NEXT(trx_savepoints, savep));
-
- *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
-
- trx->op_info = "rollback to a savepoint";
-
- err = trx_rollback_to_savepoint(trx, &savep->savept);
-
- /* Store the current undo_no of the transaction so that
- we know where to roll back if we have to roll back the
- next SQL statement: */
-
- trx_mark_sql_stat_end(trx);
-
- trx->op_info = "";
-
-#ifdef WITH_WSREP
- if (wsrep_on(trx->mysql_thd) &&
- trx->lock.was_chosen_as_deadlock_victim) {
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- }
-#endif
-
- return(err);
-}
-
-/*******************************************************************//**
-Rolls back a transaction back to a named savepoint. Modifications after the
-savepoint are undone but InnoDB does NOT release the corresponding locks
-which are stored in memory. If a lock is 'implicit', that is, a new inserted
-row holds a lock where the lock information is carried by the trx id stored in
-the row, these locks are naturally released in the rollback. Savepoints which
-were set after this savepoint are deleted.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_rollback_to_savepoint_for_mysql(
-/*================================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
- position corresponding to this
- savepoint; MySQL needs this
- information to remove the
- binlog entries of the queries
- executed after the savepoint */
-{
- trx_named_savept_t* savep;
-
- /* We are reading trx->state without holding trx_sys->mutex
- here, because the savepoint rollback should be invoked for a
- running active MySQL transaction that is associated with the
- current thread. */
- ut_ad(trx->in_mysql_trx_list);
-
- savep = trx_savepoint_find(trx, savepoint_name);
-
- if (savep == NULL) {
- return(DB_NO_SAVEPOINT);
- }
-
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
- ut_print_name(stderr, trx, FALSE, savep->name);
- fputs(" though it is not started\n", stderr);
- return(DB_ERROR);
- case TRX_STATE_ACTIVE:
- return(trx_rollback_to_savepoint_for_mysql_low(
- trx, savep, mysql_binlog_cache_pos));
- case TRX_STATE_PREPARED:
- case TRX_STATE_COMMITTED_IN_MEMORY:
- /* The savepoint rollback is only allowed on an ACTIVE
- transaction, not a PREPARED or COMMITTED one. */
- break;
- }
-
- ut_error;
- return(DB_CORRUPTION);
-}
-
-/*******************************************************************//**
-Creates a named savepoint. If the transaction is not yet started, starts it.
-If there is already a savepoint of the same name, this call erases that old
-savepoint and replaces it with a new. Savepoints are deleted in a transaction
-commit or rollback.
-@return always DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_savepoint_for_mysql(
-/*====================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name, /*!< in: savepoint name */
- ib_int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
- position corresponding to this
- connection at the time of the
- savepoint */
-{
- trx_named_savept_t* savep;
-
- trx_start_if_not_started_xa(trx);
-
- savep = trx_savepoint_find(trx, savepoint_name);
-
- if (savep) {
- /* There is a savepoint with the same name: free that */
-
- UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
-
- mem_free(savep->name);
- mem_free(savep);
- }
-
- /* Create a new savepoint and add it as the last in the list */
-
- savep = static_cast<trx_named_savept_t*>(mem_alloc(sizeof(*savep)));
-
- savep->name = mem_strdup(savepoint_name);
-
- savep->savept = trx_savept_take(trx);
-
- savep->mysql_binlog_cache_pos = binlog_cache_pos;
-
- UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
-
- return(DB_SUCCESS);
-}
-
-/*******************************************************************//**
-Releases only the named savepoint. Savepoints which were set after this
-savepoint are left as is.
-@return if no savepoint of the name found then DB_NO_SAVEPOINT,
-otherwise DB_SUCCESS */
-UNIV_INTERN
-dberr_t
-trx_release_savepoint_for_mysql(
-/*============================*/
- trx_t* trx, /*!< in: transaction handle */
- const char* savepoint_name) /*!< in: savepoint name */
-{
- trx_named_savept_t* savep;
-
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE, true)
- || trx_state_eq(trx, TRX_STATE_PREPARED, true));
- ut_ad(trx->in_mysql_trx_list);
-
- savep = trx_savepoint_find(trx, savepoint_name);
-
- if (savep != NULL) {
- trx_roll_savepoint_free(trx, savep);
- }
-
- return(savep != NULL ? DB_SUCCESS : DB_NO_SAVEPOINT);
-}
-
-/*******************************************************************//**
-Determines if this transaction is rolling back an incomplete transaction
-in crash recovery.
-@return TRUE if trx is an incomplete transaction that is being rolled
-back in crash recovery */
-UNIV_INTERN
-ibool
-trx_is_recv(
-/*========*/
- const trx_t* trx) /*!< in: transaction */
-{
- return(trx == trx_roll_crash_recv_trx);
-}
-
-/*******************************************************************//**
-Returns a transaction savepoint taken at this point in time.
-@return savepoint */
-UNIV_INTERN
-trx_savept_t
-trx_savept_take(
-/*============*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_savept_t savept;
-
- savept.least_undo_no = trx->undo_no;
-
- return(savept);
-}
-
-/*******************************************************************//**
-Roll back an active transaction. */
-static
-void
-trx_rollback_active(
-/*================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- mem_heap_t* heap;
- que_fork_t* fork;
- que_thr_t* thr;
- roll_node_t* roll_node;
- dict_table_t* table;
- ib_int64_t rows_to_undo;
- const char* unit = "";
- ibool dictionary_locked = FALSE;
-
- heap = mem_heap_create(512);
-
- fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
-
- roll_node = roll_node_create(heap);
-
- thr->child = roll_node;
- roll_node->common.parent = thr;
-
- trx->graph = fork;
-
- ut_a(thr == que_fork_start_command(fork));
-
- mutex_enter(&trx_sys->mutex);
-
- trx_roll_crash_recv_trx = trx;
-
- trx_roll_max_undo_no = trx->undo_no;
-
- trx_roll_progress_printed_pct = 0;
-
- rows_to_undo = trx_roll_max_undo_no;
-
- mutex_exit(&trx_sys->mutex);
-
- if (rows_to_undo > 1000000000) {
- rows_to_undo = rows_to_undo / 1000000;
- unit = "M";
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
- " rows to undo\n",
- trx->id,
- (ulong) rows_to_undo, unit);
-
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- row_mysql_lock_data_dictionary(trx);
- dictionary_locked = TRUE;
- }
-
- que_run_threads(thr);
- ut_a(roll_node->undo_thr != NULL);
-
- que_run_threads(roll_node->undo_thr);
-
- trx_rollback_finish(thr_get_trx(roll_node->undo_thr));
-
- /* Free the memory reserved by the undo graph */
- que_graph_free(static_cast<que_t*>(
- roll_node->undo_thr->common.parent));
-
- ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
-
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
- && trx->table_id != 0) {
-
- /* If the transaction was for a dictionary operation,
- we drop the relevant table only if it is not flagged
- as DISCARDED. If it still exists. */
-
- table = dict_table_open_on_id(
- trx->table_id, dictionary_locked,
- DICT_TABLE_OP_NORMAL);
-
- if (table && !dict_table_is_discarded(table)) {
-
- dberr_t err;
-
- /* Ensure that the table doesn't get evicted from the
- cache, keeps things simple for drop. */
-
- if (table->can_be_evicted) {
- dict_table_move_from_lru_to_non_lru(table);
- }
-
- dict_table_close(table, dictionary_locked, FALSE);
-
- ib_logf(IB_LOG_LEVEL_WARN,
- "Dropping table '%s', with id " UINT64PF " "
- "in recovery",
- table->name, trx->table_id);
-
- err = row_drop_table_for_mysql(table->name, trx, TRUE, FALSE);
- trx_commit_for_mysql(trx);
-
- ut_a(err == DB_SUCCESS);
- }
- }
-
- if (dictionary_locked) {
- row_mysql_unlock_data_dictionary(trx);
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Rollback of trx with id " TRX_ID_FMT " completed", trx->id);
-
- mem_heap_free(heap);
-
- trx_roll_crash_recv_trx = NULL;
-}
-
-/*******************************************************************//**
-Rollback or clean up any resurrected incomplete transactions. It assumes
-that the caller holds the trx_sys_t::mutex and it will release the
-lock if it does a clean up or rollback.
-@return TRUE if the transaction was cleaned up or rolled back
-and trx_sys->mutex was released. */
-static
-ibool
-trx_rollback_resurrected(
-/*=====================*/
- trx_t* trx, /*!< in: transaction to rollback or clean */
- ibool all) /*!< in: FALSE=roll back dictionary transactions;
- TRUE=roll back all non-PREPARED transactions */
-{
- ut_ad(mutex_own(&trx_sys->mutex));
-
- /* The trx->is_recovered flag and trx->state are set
- atomically under the protection of the trx->mutex (and
- lock_sys->mutex) in lock_trx_release_locks(). We do not want
- to accidentally clean up a non-recovered transaction here. */
-
- trx_mutex_enter(trx);
- bool is_recovered = trx->is_recovered;
- trx_state_t state = trx->state;
- trx_mutex_exit(trx);
-
- if (!is_recovered) {
- return(FALSE);
- }
-
- switch (state) {
- case TRX_STATE_COMMITTED_IN_MEMORY:
- mutex_exit(&trx_sys->mutex);
- fprintf(stderr,
- "InnoDB: Cleaning up trx with id " TRX_ID_FMT "\n",
- trx->id);
- trx_cleanup_at_db_startup(trx);
- trx_free_for_background(trx);
- return(TRUE);
- case TRX_STATE_ACTIVE:
- if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- mutex_exit(&trx_sys->mutex);
- trx_rollback_active(trx);
- trx_free_for_background(trx);
- return(TRUE);
- }
- return(FALSE);
- case TRX_STATE_PREPARED:
- return(FALSE);
- case TRX_STATE_NOT_STARTED:
- break;
- }
-
- ut_error;
- return(FALSE);
-}
-
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
-UNIV_INTERN
-void
-trx_rollback_or_clean_recovered(
-/*============================*/
- ibool all) /*!< in: FALSE=roll back dictionary transactions;
- TRUE=roll back all non-PREPARED transactions */
-{
- trx_t* trx;
-
- ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO);
-
- if (trx_sys_get_n_rw_trx() == 0) {
-
- return;
- }
-
- if (all) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Starting in background the rollback"
- " of recovered transactions");
- }
-
- /* Note: For XA recovered transactions, we rely on MySQL to
- do rollback. They will be in TRX_STATE_PREPARED state. If the server
- is shutdown and they are still lingering in trx_sys_t::trx_list
- then the shutdown will hang. */
-
- /* Loop over the transaction list as long as there are
- recovered transactions to clean up or recover. */
-
- do {
- mutex_enter(&trx_sys->mutex);
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- assert_trx_in_rw_list(trx);
-
- if (srv_shutdown_state != SRV_SHUTDOWN_NONE
- && srv_fast_shutdown != 0) {
- all = FALSE;
- break;
- }
-
- /* If this function does a cleanup or rollback
- then it will release the trx_sys->mutex, therefore
- we need to reacquire it before retrying the loop. */
-
- if (trx_rollback_resurrected(trx, all)) {
-
- mutex_enter(&trx_sys->mutex);
-
- break;
- }
- }
-
- mutex_exit(&trx_sys->mutex);
-
- } while (trx != NULL);
-
- if (all) {
- ib_logf(IB_LOG_LEVEL_INFO,
- "Rollback of non-prepared transactions completed");
- }
-}
-
-/*******************************************************************//**
-Rollback or clean up any incomplete transactions which were
-encountered in crash recovery. If the transaction already was
-committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back.
-Note: this is done in a background thread.
-@return a dummy parameter */
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
-/*================================================*/
- void* arg MY_ATTRIBUTE((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- my_thread_init();
- ut_ad(!srv_read_only_mode);
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(trx_rollback_clean_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
- trx_rollback_or_clean_recovered(TRUE);
-
- trx_rollback_or_clean_is_active = false;
-
- my_thread_end();
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*******************************************************************//**
-Creates an undo number array.
-@return own: undo number array */
-static
-trx_undo_arr_t*
-trx_undo_arr_create(
-/*================*/
- ulint n_cells) /*!< Number of cells */
-{
- trx_undo_arr_t* arr;
- mem_heap_t* heap;
- ulint sz = sizeof(*arr) + sizeof(*arr->infos) * n_cells;
-
- heap = mem_heap_create(sz);
-
- arr = static_cast<trx_undo_arr_t*>(mem_heap_zalloc(heap, sz));
-
- arr->n_cells = n_cells;
-
- arr->infos = (trx_undo_inf_t*) (arr + 1);
-
- arr->heap = heap;
-
- return(arr);
-}
-
-/*******************************************************************//**
-Frees an undo number array. */
-UNIV_INTERN
-void
-trx_undo_arr_free(
-/*==============*/
- trx_undo_arr_t* arr) /*!< in: undo number array */
-{
- mem_heap_free(arr->heap);
-}
-
-/*******************************************************************//**
-Stores info of an undo log record to the array if it is not stored yet.
-@return FALSE if the record already existed in the array */
-static
-ibool
-trx_undo_arr_store_info(
-/*====================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t undo_no)/*!< in: undo number */
-{
- ulint i;
- trx_undo_arr_t* arr;
- ulint n = 0;
- ulint n_used;
- trx_undo_inf_t* stored_here = NULL;
-
- arr = trx->undo_no_arr;
- n_used = arr->n_used;
-
- for (i = 0; i < arr->n_cells; i++) {
- trx_undo_inf_t* cell;
-
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (!cell->in_use) {
- if (!stored_here) {
- /* Not in use, we may store here */
- cell->undo_no = undo_no;
- cell->in_use = TRUE;
-
- arr->n_used++;
-
- stored_here = cell;
- }
- } else {
- n++;
-
- if (cell->undo_no == undo_no) {
-
- if (stored_here) {
- stored_here->in_use = FALSE;
- ut_ad(arr->n_used > 0);
- arr->n_used--;
- }
-
- ut_ad(arr->n_used == n_used);
-
- return(FALSE);
- }
- }
-
- if (n == n_used && stored_here) {
-
- ut_ad(arr->n_used == 1 + n_used);
-
- return(TRUE);
- }
- }
-
- ut_error;
-
- return(FALSE);
-}
-
-/*******************************************************************//**
-Removes an undo number from the array. */
-static
-void
-trx_undo_arr_remove_info(
-/*=====================*/
- trx_undo_arr_t* arr, /*!< in: undo number array */
- undo_no_t undo_no)/*!< in: undo number */
-{
- ulint i;
-
- for (i = 0; i < arr->n_cells; i++) {
-
- trx_undo_inf_t* cell;
-
- cell = trx_undo_arr_get_nth_info(arr, i);
-
- if (cell->in_use && cell->undo_no == undo_no) {
- cell->in_use = FALSE;
- ut_ad(arr->n_used > 0);
- --arr->n_used;
- break;
- }
- }
-}
-
-/*******************************************************************//**
-Gets the biggest undo number in an array.
-@return biggest value, 0 if the array is empty */
-static
-undo_no_t
-trx_undo_arr_get_biggest(
-/*=====================*/
- const trx_undo_arr_t* arr) /*!< in: undo number array */
-{
- ulint i;
- undo_no_t biggest = 0;
- ulint n_checked = 0;
-
- for (i = 0; i < arr->n_cells && n_checked < arr->n_used; ++i) {
-
- const trx_undo_inf_t* cell = &arr->infos[i];
-
- if (cell->in_use) {
-
- ++n_checked;
-
- if (cell->undo_no > biggest) {
-
- biggest = cell->undo_no;
- }
- }
- }
-
- return(biggest);
-}
-
-/***********************************************************************//**
-Tries truncate the undo logs. */
-static
-void
-trx_roll_try_truncate(
-/*==================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- undo_no_t limit;
- const trx_undo_arr_t* arr;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&((trx->rseg)->mutex)));
-
- trx->pages_undone = 0;
-
- arr = trx->undo_no_arr;
-
- limit = trx->undo_no;
-
- if (arr->n_used > 0) {
- undo_no_t biggest;
-
- biggest = trx_undo_arr_get_biggest(arr);
-
- if (biggest >= limit) {
-
- limit = biggest + 1;
- }
- }
-
- if (trx->insert_undo) {
- trx_undo_truncate_end(trx, trx->insert_undo, limit);
- }
-
- if (trx->update_undo) {
- trx_undo_truncate_end(trx, trx->update_undo, limit);
- }
-
-#ifdef WITH_WSREP_OUT
- if (wsrep_on(trx->mysql_thd)) {
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- }
-#endif /* WITH_WSREP */
-}
-
-/***********************************************************************//**
-Pops the topmost undo log record in a single undo log and updates the info
-about the topmost record in the undo log memory struct.
-@return undo log record, the page s-latched */
-static
-trx_undo_rec_t*
-trx_roll_pop_top_rec(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* undo_page;
- ulint offset;
- trx_undo_rec_t* prev_rec;
- page_t* prev_rec_page;
-
- ut_ad(mutex_own(&trx->undo_mutex));
-
- undo_page = trx_undo_page_get_s_latched(
- undo->space, undo->zip_size, undo->top_page_no, mtr);
-
- offset = undo->top_offset;
-
- /* fprintf(stderr, "Thread %lu undoing trx " TRX_ID_FMT
- " undo record " TRX_ID_FMT "\n",
- os_thread_get_curr_id(), trx->id, undo->top_undo_no); */
-
- prev_rec = trx_undo_get_prev_rec(
- undo_page + offset, undo->hdr_page_no, undo->hdr_offset,
- true, mtr);
-
- if (prev_rec == NULL) {
-
- undo->empty = TRUE;
- } else {
- prev_rec_page = page_align(prev_rec);
-
- if (prev_rec_page != undo_page) {
-
- trx->pages_undone++;
- }
-
- undo->top_page_no = page_get_page_no(prev_rec_page);
- undo->top_offset = prev_rec - prev_rec_page;
- undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
- }
-
- return(undo_page + offset);
-}
-
-/********************************************************************//**
-Pops the topmost record when the two undo logs of a transaction are seen
-as a single stack of records ordered by their undo numbers. Inserts the
-undo number of the popped undo record to the array of currently processed
-undo numbers in the transaction. When the query thread finishes processing
-of this undo record, it must be released with trx_undo_rec_release.
-@return undo log record copied to heap, NULL if none left, or if the
-undo number of the top record would be less than the limit */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_roll_pop_top_rec_of_trx(
-/*========================*/
- trx_t* trx, /*!< in: transaction */
- undo_no_t limit, /*!< in: least undo number we need */
- roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */
- mem_heap_t* heap) /*!< in: memory heap where copied */
-{
- trx_undo_t* undo;
- trx_undo_t* ins_undo;
- trx_undo_t* upd_undo;
- trx_undo_rec_t* undo_rec;
- trx_undo_rec_t* undo_rec_copy;
- undo_no_t undo_no;
- ibool is_insert;
- trx_rseg_t* rseg;
- ulint progress_pct;
- mtr_t mtr;
-
- rseg = trx->rseg;
-try_again:
- mutex_enter(&(trx->undo_mutex));
-
- if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
- mutex_enter(&rseg->mutex);
-
- trx_roll_try_truncate(trx);
-
- mutex_exit(&rseg->mutex);
- }
-
- ins_undo = trx->insert_undo;
- upd_undo = trx->update_undo;
-
- if (!ins_undo || ins_undo->empty) {
- undo = upd_undo;
- } else if (!upd_undo || upd_undo->empty) {
- undo = ins_undo;
- } else if (upd_undo->top_undo_no > ins_undo->top_undo_no) {
- undo = upd_undo;
- } else {
- undo = ins_undo;
- }
-
- if (!undo || undo->empty || limit > undo->top_undo_no) {
-
- if ((trx->undo_no_arr)->n_used == 0) {
- /* Rollback is ending */
-
- mutex_enter(&(rseg->mutex));
-
- trx_roll_try_truncate(trx);
-
- mutex_exit(&(rseg->mutex));
- }
-
- mutex_exit(&(trx->undo_mutex));
-
- return(NULL);
- }
-
- is_insert = (undo == ins_undo);
-
- *roll_ptr = trx_undo_build_roll_ptr(
- is_insert, undo->rseg->id, undo->top_page_no, undo->top_offset);
-
- mtr_start(&mtr);
-
- undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
-
- undo_no = trx_undo_rec_get_undo_no(undo_rec);
-
- ut_ad(undo_no + 1 == trx->undo_no);
-
- /* We print rollback progress info if we are in a crash recovery
- and the transaction has at least 1000 row operations to undo. */
-
- if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
-
- progress_pct = 100 - (ulint)
- ((undo_no * 100) / trx_roll_max_undo_no);
- if (progress_pct != trx_roll_progress_printed_pct) {
- if (trx_roll_progress_printed_pct == 0) {
- fprintf(stderr,
- "\nInnoDB: Progress in percents:"
- " %lu", (ulong) progress_pct);
- } else {
- fprintf(stderr,
- " %lu", (ulong) progress_pct);
- }
- fflush(stderr);
- trx_roll_progress_printed_pct = progress_pct;
- }
- }
-
- trx->undo_no = undo_no;
-
- if (!trx_undo_arr_store_info(trx, undo_no)) {
- /* A query thread is already processing this undo log record */
-
- mutex_exit(&(trx->undo_mutex));
-
- mtr_commit(&mtr);
-
- goto try_again;
- }
-
- undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
-
- mutex_exit(&(trx->undo_mutex));
-
- mtr_commit(&mtr);
-
- return(undo_rec_copy);
-}
-
-/********************************************************************//**
-Reserves an undo log record for a query thread to undo. This should be
-called if the query thread gets the undo log record not using the pop
-function above.
-@return TRUE if succeeded */
-UNIV_INTERN
-ibool
-trx_undo_rec_reserve(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no)/*!< in: undo number of the record */
-{
- ibool ret;
-
- mutex_enter(&(trx->undo_mutex));
-
- ret = trx_undo_arr_store_info(trx, undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-
- return(ret);
-}
-
-/*******************************************************************//**
-Releases a reserved undo record. */
-UNIV_INTERN
-void
-trx_undo_rec_release(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- undo_no_t undo_no)/*!< in: undo number */
-{
- trx_undo_arr_t* arr;
-
- mutex_enter(&(trx->undo_mutex));
-
- arr = trx->undo_no_arr;
-
- trx_undo_arr_remove_info(arr, undo_no);
-
- mutex_exit(&(trx->undo_mutex));
-}
-
-/****************************************************************//**
-Builds an undo 'query' graph for a transaction. The actual rollback is
-performed by executing this query graph like a query subprocedure call.
-The reply about the completion of the rollback will be sent by this
-graph.
-@return own: the query graph */
-static
-que_t*
-trx_roll_graph_build(
-/*=================*/
- trx_t* trx) /*!< in: trx handle */
-{
- mem_heap_t* heap;
- que_fork_t* fork;
- que_thr_t* thr;
-
- ut_ad(trx_mutex_own(trx));
-
- heap = mem_heap_create(512);
- fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
- fork->trx = trx;
-
- thr = que_thr_create(fork, heap);
-
- thr->child = row_undo_node_create(trx, thr, heap);
-
- return(fork);
-}
-
-/*********************************************************************//**
-Starts a rollback operation, creates the UNDO graph that will do the
-actual undo operation.
-@return query graph thread that will perform the UNDO operations. */
-static
-que_thr_t*
-trx_rollback_start(
-/*===============*/
- trx_t* trx, /*!< in: transaction */
- ib_id_t roll_limit) /*!< in: rollback to undo no (for
- partial undo), 0 if we are rolling back
- the entire transaction */
-{
- que_t* roll_graph;
-
- ut_ad(trx_mutex_own(trx));
-
- ut_ad(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
-
- /* Initialize the rollback field in the transaction */
-
- trx->roll_limit = roll_limit;
-
- ut_a(trx->roll_limit <= trx->undo_no);
-
- trx->pages_undone = 0;
-
- if (trx->undo_no_arr == NULL) {
- /* Single query thread -> 1 */
- trx->undo_no_arr = trx_undo_arr_create(1);
- }
-
- /* Build a 'query' graph which will perform the undo operations */
-
- roll_graph = trx_roll_graph_build(trx);
-
- trx->graph = roll_graph;
-
- trx->lock.que_state = TRX_QUE_ROLLING_BACK;
-
- return(que_fork_start_command(roll_graph));
-}
-
-/****************************************************************//**
-Finishes a transaction rollback. */
-static
-void
-trx_rollback_finish(
-/*================*/
- trx_t* trx) /*!< in: transaction */
-{
- ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
-
- trx_commit(trx);
-
- trx->lock.que_state = TRX_QUE_RUNNING;
-}
-
-/*********************************************************************//**
-Creates a rollback command node struct.
-@return own: rollback node struct */
-UNIV_INTERN
-roll_node_t*
-roll_node_create(
-/*=============*/
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- roll_node_t* node;
-
- node = static_cast<roll_node_t*>(mem_heap_zalloc(heap, sizeof(*node)));
-
- node->state = ROLL_NODE_SEND;
-
- node->common.type = QUE_NODE_ROLLBACK;
-
- return(node);
-}
-
-/***********************************************************//**
-Performs an execution step for a rollback command node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_rollback_step(
-/*==============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- roll_node_t* node;
-
- node = static_cast<roll_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = ROLL_NODE_SEND;
- }
-
- if (node->state == ROLL_NODE_SEND) {
- trx_t* trx;
- ib_id_t roll_limit = 0;
-
- trx = thr_get_trx(thr);
-
- trx_mutex_enter(trx);
-
- node->state = ROLL_NODE_WAIT;
-
- ut_a(node->undo_thr == NULL);
-
- roll_limit = node->partial ? node->savept.least_undo_no : 0;
-
- trx_commit_or_rollback_prepare(trx);
-
- node->undo_thr = trx_rollback_start(trx, roll_limit);
-
- trx_mutex_exit(trx);
-
- } else {
- ut_ad(node->state == ROLL_NODE_WAIT);
-
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
diff --git a/storage/xtradb/trx/trx0rseg.cc b/storage/xtradb/trx/trx0rseg.cc
deleted file mode 100644
index 16fa334872b..00000000000
--- a/storage/xtradb/trx/trx0rseg.cc
+++ /dev/null
@@ -1,424 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0rseg.cc
-Rollback segment
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0rseg.h"
-
-#ifdef UNIV_NONINL
-#include "trx0rseg.ic"
-#endif
-
-#include "trx0undo.h"
-#include "fut0lst.h"
-#include "srv0srv.h"
-#include "trx0purge.h"
-#include "ut0bh.h"
-#include "srv0mon.h"
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register rseg_mutex_key with performance schema */
-UNIV_INTERN mysql_pfs_key_t rseg_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/****************************************************************//**
-Creates a rollback segment header. This function is called only when
-a new rollback segment is created in the database.
-@return page number of the created segment, FIL_NULL if fail */
-UNIV_INTERN
-ulint
-trx_rseg_header_create(
-/*===================*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint max_size, /*!< in: max size in pages */
- ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint page_no;
- trx_rsegf_t* rsegf;
- trx_sysf_t* sys_header;
- ulint i;
- buf_block_t* block;
-
- ut_ad(mtr);
- ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space, NULL),
- MTR_MEMO_X_LOCK));
-
- /* Allocate a new file segment for the rollback segment */
- block = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr);
-
- if (block == NULL) {
- /* No space left */
-
- return(FIL_NULL);
- }
-
- buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW);
-
- page_no = buf_block_get_page_no(block);
-
- /* Get the rollback segment file page */
- rsegf = trx_rsegf_get_new(space, zip_size, page_no, mtr);
-
- /* Initialize max size field */
- mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size,
- MLOG_4BYTES, mtr);
-
- /* Initialize the history list */
-
- mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr);
- flst_init(rsegf + TRX_RSEG_HISTORY, mtr);
-
- /* Reset the undo log slots */
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
-
- trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr);
- }
-
- /* Add the rollback segment info to the free slot in
- the trx system header */
-
- sys_header = trx_sysf_get(mtr);
-
- trx_sysf_rseg_set_space(sys_header, rseg_slot_no, space, mtr);
- trx_sysf_rseg_set_page_no(sys_header, rseg_slot_no, page_no, mtr);
-
- return(page_no);
-}
-
-/***********************************************************************//**
-Free's an instance of the rollback segment in memory. */
-UNIV_INTERN
-void
-trx_rseg_mem_free(
-/*==============*/
- trx_rseg_t* rseg) /* in, own: instance to free */
-{
- trx_undo_t* undo;
- trx_undo_t* next_undo;
-
- mutex_free(&rseg->mutex);
-
- /* There can't be any active transactions. */
- ut_a(UT_LIST_GET_LEN(rseg->update_undo_list) == 0);
- ut_a(UT_LIST_GET_LEN(rseg->insert_undo_list) == 0);
-
- for (undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
- undo != NULL;
- undo = next_undo) {
-
- next_undo = UT_LIST_GET_NEXT(undo_list, undo);
-
- UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
-
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
-
- trx_undo_mem_free(undo);
- }
-
- for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
- undo != NULL;
- undo = next_undo) {
-
- next_undo = UT_LIST_GET_NEXT(undo_list, undo);
-
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
-
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
-
- trx_undo_mem_free(undo);
- }
-
- /* const_cast<trx_rseg_t*>() because this function is
- like a destructor. */
-
- *((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = NULL;
-
- mem_free(rseg);
-}
-
-/***************************************************************************
-Creates and initializes a rollback segment object. The values for the
-fields are read from the header. The object is inserted to the rseg
-list of the trx system object and a pointer is inserted in the rseg
-array in the trx system object.
-@return own: rollback segment object */
-static
-trx_rseg_t*
-trx_rseg_mem_create(
-/*================*/
- ulint id, /*!< in: rollback segment id */
- ulint space, /*!< in: space where the segment
- placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the segment
- header */
- ib_bh_t* ib_bh, /*!< in/out: rseg queue */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint len;
- trx_rseg_t* rseg;
- fil_addr_t node_addr;
- trx_rsegf_t* rseg_header;
- trx_ulogf_t* undo_log_hdr;
- ulint sum_of_undo_sizes;
-
- rseg = static_cast<trx_rseg_t*>(mem_zalloc(sizeof(trx_rseg_t)));
-
- rseg->id = id;
- rseg->space = space;
- rseg->zip_size = zip_size;
- rseg->page_no = page_no;
-
- mutex_create(rseg_mutex_key, &rseg->mutex, SYNC_RSEG);
-
- /* const_cast<trx_rseg_t*>() because this function is
- like a constructor. */
- *((trx_rseg_t**) trx_sys->rseg_array + rseg->id) = rseg;
-
- rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr);
-
- rseg->max_size = mtr_read_ulint(
- rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr);
-
- /* Initialize the undo log lists according to the rseg header */
-
- sum_of_undo_sizes = trx_undo_lists_init(rseg);
-
- rseg->curr_size = mtr_read_ulint(
- rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr)
- + 1 + sum_of_undo_sizes;
-
- len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
-
- if (len > 0) {
- rseg_queue_t rseg_queue;
-
- trx_sys->rseg_history_len += len;
-
- node_addr = trx_purge_get_log_from_hist(
- flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr));
-
- rseg->last_page_no = node_addr.page;
- rseg->last_offset = node_addr.boffset;
-
- undo_log_hdr = trx_undo_page_get(
- rseg->space, rseg->zip_size, node_addr.page,
- mtr) + node_addr.boffset;
-
- rseg->last_trx_no = mach_read_from_8(
- undo_log_hdr + TRX_UNDO_TRX_NO);
-
- rseg->last_del_marks = mtr_read_ulint(
- undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
-
- rseg_queue.rseg = rseg;
- rseg_queue.trx_no = rseg->last_trx_no;
-
- if (rseg->last_page_no != FIL_NULL) {
- const void* ptr;
-
- /* There is no need to cover this operation by the purge
- mutex because we are still bootstrapping. */
-
- ptr = ib_bh_push(ib_bh, &rseg_queue);
- ut_a(ptr != NULL);
- }
- } else {
- rseg->last_page_no = FIL_NULL;
- }
-
- return(rseg);
-}
-
-/********************************************************************
-Creates the memory copies for the rollback segments and initializes the
-rseg array in trx_sys at a database startup. */
-static
-void
-trx_rseg_create_instance(
-/*=====================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
- ib_bh_t* ib_bh, /*!< in/out: rseg queue */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
- ulint page_no;
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
-
- if (page_no != FIL_NULL) {
- ulint space;
- ulint zip_size;
- trx_rseg_t* rseg = NULL;
-
- ut_a(!trx_rseg_get_on_id(i));
-
- space = trx_sysf_rseg_get_space(sys_header, i, mtr);
-
- zip_size = space ? fil_space_get_zip_size(space) : 0;
-
- rseg = trx_rseg_mem_create(
- i, space, zip_size, page_no, ib_bh, mtr);
-
- ut_a(rseg->id == i);
- } else {
- ut_a(trx_sys->rseg_array[i] == NULL);
- }
- }
-}
-
-/** Create a rollback segment.
-@param[in] space undo tablespace ID
-@return pointer to new rollback segment
-@retval NULL on failure */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(ulint space)
-{
- mtr_t mtr;
- ulint slot_no;
- trx_rseg_t* rseg = NULL;
-
- mtr_start(&mtr);
-
- /* To obey the latching order, acquire the file space
- x-latch before the trx_sys->mutex. */
- mtr_x_lock(fil_space_get_latch(space, NULL), &mtr);
-
- slot_no = trx_sysf_rseg_find_free(&mtr);
-
- if (slot_no != ULINT_UNDEFINED) {
- ulint id;
- ulint page_no;
- ulint zip_size;
- trx_sysf_t* sys_header;
-
- page_no = trx_rseg_header_create(
- space, 0, ULINT_MAX, slot_no, &mtr);
-
- if (page_no != FIL_NULL) {
- sys_header = trx_sysf_get(&mtr);
-
- id = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr);
- ut_a(id == space);
-
- zip_size = space ? fil_space_get_zip_size(space) : 0;
-
- rseg = trx_rseg_mem_create(
- slot_no, space, zip_size, page_no,
- purge_sys->ib_bh, &mtr);
- }
- }
-
- mtr_commit(&mtr);
- return(rseg);
-}
-
-/*********************************************************************//**
-Creates the memory copies for rollback segments and initializes the
-rseg array in trx_sys at a database startup. */
-UNIV_INTERN
-void
-trx_rseg_array_init(
-/*================*/
- trx_sysf_t* sys_header, /* in/out: trx system header */
- ib_bh_t* ib_bh, /*!< in: rseg queue */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_sys->rseg_history_len = 0;
-
- trx_rseg_create_instance(sys_header, ib_bh, mtr);
-}
-
-/********************************************************************
-Get the number of unique rollback tablespaces in use except space id 0.
-The last space id will be the sentinel value ULINT_UNDEFINED. The array
-will be sorted on space id. Note: space_ids should have have space for
-TRX_SYS_N_RSEGS + 1 elements.
-@return number of unique rollback tablespaces in use. */
-UNIV_INTERN
-ulint
-trx_rseg_get_n_undo_tablespaces(
-/*============================*/
- ulint* space_ids) /*!< out: array of space ids of
- UNDO tablespaces */
-{
- ulint i;
- mtr_t mtr;
- trx_sysf_t* sys_header;
- ulint n_undo_tablespaces = 0;
- ulint space_ids_aux[TRX_SYS_N_RSEGS + 1];
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
- ulint page_no;
- ulint space;
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, &mtr);
-
- if (page_no == FIL_NULL) {
- continue;
- }
-
- space = trx_sysf_rseg_get_space(sys_header, i, &mtr);
-
- if (space != 0) {
- ulint j;
- ibool found = FALSE;
-
- for (j = 0; j < n_undo_tablespaces; ++j) {
- if (space_ids[j] == space) {
- found = TRUE;
- break;
- }
- }
-
- if (!found) {
- ut_a(n_undo_tablespaces <= i);
- space_ids[n_undo_tablespaces++] = space;
- }
- }
- }
-
- mtr_commit(&mtr);
-
- ut_a(n_undo_tablespaces <= TRX_SYS_N_RSEGS);
-
- space_ids[n_undo_tablespaces] = ULINT_UNDEFINED;
-
- if (n_undo_tablespaces > 0) {
- ut_ulint_sort(space_ids, space_ids_aux, 0, n_undo_tablespaces);
- }
-
- return(n_undo_tablespaces);
-}
diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc
deleted file mode 100644
index 9accb4ef303..00000000000
--- a/storage/xtradb/trx/trx0sys.cc
+++ /dev/null
@@ -1,1523 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0sys.cc
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0sys.h"
-
-#ifdef UNIV_NONINL
-#include "trx0sys.ic"
-#endif
-
-#ifdef UNIV_HOTBACKUP
-#include "fsp0types.h"
-
-#else /* !UNIV_HOTBACKUP */
-#include "fsp0fsp.h"
-#include "mtr0log.h"
-#include "mtr0log.h"
-#include "trx0trx.h"
-#include "trx0rseg.h"
-#include "trx0undo.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "trx0purge.h"
-#include "log0log.h"
-#include "log0recv.h"
-#include "os0file.h"
-#include "read0read.h"
-
-#ifdef WITH_WSREP
-#include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */
-#endif /* */
-
-#include <mysql/service_wsrep.h>
-
-/** The file format tag structure with id and name. */
-struct file_format_t {
- ulint id; /*!< id of the file format */
- const char* name; /*!< text representation of the
- file format */
- ib_mutex_t mutex; /*!< covers changes to the above
- fields */
-};
-
-/** The transaction system */
-UNIV_INTERN trx_sys_t* trx_sys = NULL;
-
-/** In a MySQL replication slave, in crash recovery we store the master log
-file name and position here. */
-/* @{ */
-/** Master binlog file name */
-UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Master binlog file position. We have successfully got the updates
-up to this position. -1 means that no crash recovery was needed, or
-there was no master log position info inside InnoDB.*/
-UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
-/* @} */
-
-/** If this MySQL server uses binary logging, after InnoDB has been inited
-and if it has done a crash recovery, we store the binlog file name and position
-here. */
-/* @{ */
-/** Binlog file name */
-UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
-/** Binlog file position, or -1 if unknown */
-UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
-/* @} */
-#endif /* !UNIV_HOTBACKUP */
-
-/** List of animal names representing file format. */
-static const char* file_format_name_map[] = {
- "Antelope",
- "Barracuda",
- "Cheetah",
- "Dragon",
- "Elk",
- "Fox",
- "Gazelle",
- "Hornet",
- "Impala",
- "Jaguar",
- "Kangaroo",
- "Leopard",
- "Moose",
- "Nautilus",
- "Ocelot",
- "Porpoise",
- "Quail",
- "Rabbit",
- "Shark",
- "Tiger",
- "Urchin",
- "Viper",
- "Whale",
- "Xenops",
- "Yak",
- "Zebra"
-};
-
-/** The number of elements in the file format name array. */
-static const ulint FILE_FORMAT_NAME_N
- = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key;
-UNIV_INTERN mysql_pfs_key_t trx_sys_mutex_key;
-#endif /* UNIV_PFS_RWLOCK */
-
-#ifndef UNIV_HOTBACKUP
-#ifdef UNIV_DEBUG
-/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
-UNIV_INTERN uint trx_rseg_n_slots_debug = 0;
-#endif
-
-/** This is used to track the maximum file format id known to InnoDB. It's
-updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
-or create a table. */
-static file_format_t file_format_max;
-
-#ifdef UNIV_DEBUG
-/****************************************************************//**
-Checks whether a trx is in one of rw_trx_list or ro_trx_list.
-@return TRUE if is in */
-UNIV_INTERN
-ibool
-trx_in_trx_list(
-/*============*/
- const trx_t* in_trx) /*!< in: transaction */
-{
- const trx_t* trx;
- trx_list_t* trx_list;
-
- /* Non-locking autocommits should not hold any locks. */
- assert_trx_in_list(in_trx);
-
- trx_list = in_trx->read_only
- ? &trx_sys->ro_trx_list : &trx_sys->rw_trx_list;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_ad(trx_assert_started(in_trx));
-
- for (trx = UT_LIST_GET_FIRST(*trx_list);
- trx != NULL && trx != in_trx;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
- }
-
- return(trx != NULL);
-}
-#endif /* UNIV_DEBUG */
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-UNIV_INTERN
-void
-trx_sys_flush_max_trx_id(void)
-/*==========================*/
-{
- mtr_t mtr;
- trx_sysf_t* sys_header;
-
-#ifndef WITH_WSREP
- /* wsrep_fake_trx_id violates this assert
- * Copied from trx_sys_get_new_trx_id
- */
- ut_ad(mutex_own(&trx_sys->mutex));
-#endif /* WITH_WSREP */
-
- if (!srv_read_only_mode) {
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- mlog_write_ull(
- sys_header + TRX_SYS_TRX_ID_STORE,
- trx_sys->max_trx_id, &mtr);
-
- mtr_commit(&mtr);
- }
-}
-
-/*****************************************************************//**
-Updates the offset information about the end of the MySQL binlog entry
-which corresponds to the transaction just being committed. In a MySQL
-replication slave updates the latest master binlog position up to which
-replication has proceeded. */
-UNIV_INTERN
-void
-trx_sys_update_mysql_binlog_offset(
-/*===============================*/
- const char* file_name,/*!< in: MySQL log file name */
- ib_int64_t offset, /*!< in: position in that log file */
- ulint field, /*!< in: offset of the MySQL log info field in
- the trx sys header */
-#ifdef WITH_WSREP
- trx_sysf_t* sys_header, /*!< in: trx sys header */
-#endif /* WITH_WSREP */
- mtr_t* mtr) /*!< in: mtr */
-{
-#ifndef WITH_WSREP
- trx_sysf_t* sys_header;
-#endif /* !WITH_WSREP */
-
- if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
-
- /* We cannot fit the name to the 512 bytes we have reserved */
-
- return;
- }
-
-#ifndef WITH_WSREP
- sys_header = trx_sysf_get(mtr);
-#endif /* !WITH_WSREP */
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
- TRX_SYS_MYSQL_LOG_MAGIC_N,
- MLOG_4BYTES, mtr);
- }
-
- if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
- file_name)) {
-
- mlog_write_string(sys_header + field
- + TRX_SYS_MYSQL_LOG_NAME,
- (byte*) file_name, 1 + ut_strlen(file_name),
- mtr);
- }
-
- if (mach_read_from_4(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
- || (offset >> 32) > 0) {
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
- (ulint)(offset >> 32),
- MLOG_4BYTES, mtr);
- }
-
- mlog_write_ulint(sys_header + field
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
- (ulint)(offset & 0xFFFFFFFFUL),
- MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Stores the MySQL binlog offset info in the trx system header if
-the magic number shows it valid, and print the info to stderr */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset(void)
-/*===================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
- ulint trx_sys_mysql_bin_log_pos_high;
- ulint trx_sys_mysql_bin_log_pos_low;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
- trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
-
- trx_sys_mysql_bin_log_pos
- = (((ib_int64_t) trx_sys_mysql_bin_log_pos_high) << 32)
- + (ib_int64_t) trx_sys_mysql_bin_log_pos_low;
-
- ut_memcpy(trx_sys_mysql_bin_log_name,
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- fprintf(stderr,
- "InnoDB: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
- trx_sys_mysql_bin_log_name);
-
- mtr_commit(&mtr);
-}
-
-#ifdef WITH_WSREP
-
-#ifdef UNIV_DEBUG
-static long long trx_sys_cur_xid_seqno = -1;
-static unsigned char trx_sys_cur_xid_uuid[16];
-
-long long read_wsrep_xid_seqno(const XID* xid)
-{
- long long seqno;
- memcpy(&seqno, xid->data + 24, sizeof(long long));
- return seqno;
-}
-
-void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
-{
- memcpy(buf, xid->data + 8, 16);
-}
-
-#endif /* UNIV_DEBUG */
-
-void
-trx_sys_update_wsrep_checkpoint(
- const XID* xid, /*!< in: transaction XID */
- trx_sysf_t* sys_header, /*!< in: sys_header */
- mtr_t* mtr) /*!< in: mtr */
-{
-#ifdef UNIV_DEBUG
- {
- /* Check that seqno is monotonically increasing */
- unsigned char xid_uuid[16];
- long long xid_seqno = read_wsrep_xid_seqno(xid);
- read_wsrep_xid_uuid(xid, xid_uuid);
- if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 16))
- {
- /*
- This check is a protection against the initial seqno (-1)
- assigned in read_wsrep_xid_uuid(), which, if not checked,
- would cause the following assertion to fail.
- */
- if (xid_seqno > -1 )
- {
- ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
- }
- }
- else
- {
- memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
- }
- trx_sys_cur_xid_seqno = xid_seqno;
- }
-#endif /* UNIV_DEBUG */
-
- ut_ad(xid && mtr);
- ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid));
-
- if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
- != TRX_SYS_WSREP_XID_MAGIC_N) {
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
- TRX_SYS_WSREP_XID_MAGIC_N,
- MLOG_4BYTES, mtr);
- }
-
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_FORMAT,
- (int)xid->formatID,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_GTRID_LEN,
- (int)xid->gtrid_length,
- MLOG_4BYTES, mtr);
- mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_BQUAL_LEN,
- (int)xid->bqual_length,
- MLOG_4BYTES, mtr);
- mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_DATA,
- (const unsigned char*) xid->data,
- XIDDATASIZE, mtr);
-
-}
-
-bool
-trx_sys_read_wsrep_checkpoint(XID* xid)
-/*===================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
- ulint magic;
-
- ut_ad(xid);
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
- + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
- != TRX_SYS_WSREP_XID_MAGIC_N) {
- memset(xid, 0, sizeof(*xid));
- long long seqno= -1;
- memcpy(xid->data + 24, &seqno, sizeof(long long));
- xid->formatID = -1;
- trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
- mtr_commit(&mtr);
- return false;
- }
-
- xid->formatID = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
- xid->gtrid_length = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
- xid->bqual_length = (int)mach_read_from_4(
- sys_header
- + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
- ut_memcpy(xid->data,
- sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
- XIDDATASIZE);
-
- mtr_commit(&mtr);
- return true;
-}
-
-#endif /* WITH_WSREP */
-
-/*****************************************************************//**
-Prints to stderr the MySQL master log offset info in the trx system header if
-the magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_master_log_pos(void)
-/*====================================*/
-{
- trx_sysf_t* sys_header;
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- != TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- fprintf(stderr,
- "InnoDB: In a MySQL replication slave the last"
- " master binlog file\n"
- "InnoDB: position %lu %lu, file name %s\n",
- (ulong) mach_read_from_4(sys_header
- + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(sys_header
- + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- /* Copy the master log position info to global variables we can
- use in ha_innobase.cc to initialize glob_mi to right values */
-
- ut_memcpy(trx_sys_mysql_master_log_name,
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME,
- TRX_SYS_MYSQL_LOG_NAME_LEN);
-
- trx_sys_mysql_master_log_pos
- = (((ib_int64_t) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
- + ((ib_int64_t) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
- mtr_commit(&mtr);
-}
-
-/****************************************************************//**
-Looks for a free slot for a rollback segment in the trx system file copy.
-@return slot index or ULINT_UNDEFINED if not found */
-UNIV_INTERN
-ulint
-trx_sysf_rseg_find_free(
-/*====================*/
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint i;
- trx_sysf_t* sys_header;
-
- sys_header = trx_sysf_get(mtr);
-
- for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
- ulint page_no;
-
- page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
-
- if (page_no == FIL_NULL) {
-
- return(i);
- }
- }
-
- return(ULINT_UNDEFINED);
-}
-
-/*****************************************************************//**
-Creates the file page for the transaction system. This function is called only
-at the database creation, before trx_sys_init. */
-static
-void
-trx_sysf_create(
-/*============*/
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_sysf_t* sys_header;
- ulint slot_no;
- buf_block_t* block;
- page_t* page;
- ulint page_no;
- byte* ptr;
- ulint len;
-
- ut_ad(mtr);
-
- /* Note that below we first reserve the file space x-latch, and
- then enter the kernel: we must do it in this order to conform
- to the latching order rules. */
-
- mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
-
- /* Create the trx sys file block in a new allocated file segment */
- block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
- mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
- ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
-
- page = buf_block_get_frame(block);
-
- mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
- MLOG_2BYTES, mtr);
-
- /* Reset the doublewrite buffer magic number to zero so that we
- know that the doublewrite buffer has not yet been created (this
- suppresses a Valgrind warning) */
-
- mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
- + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
-
- sys_header = trx_sysf_get(mtr);
-
- /* Start counting transaction ids from number 1 up */
- mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
-
- /* Reset the rollback segment slots. Old versions of InnoDB
- define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
- that the whole array is initialized. */
- ptr = TRX_SYS_RSEGS + sys_header;
- len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
- * TRX_SYS_RSEG_SLOT_SIZE;
- memset(ptr, 0xff, len);
- ptr += len;
- ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
-
- /* Initialize all of the page. This part used to be uninitialized. */
- memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
-
- mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
- + page - sys_header, mtr);
-
- /* Create the first rollback segment in the SYSTEM tablespace */
- slot_no = trx_sysf_rseg_find_free(mtr);
- page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no,
- mtr);
-
- ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
- ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
-}
-
-/*****************************************************************//**
-Compare two trx_rseg_t instances on last_trx_no. */
-static
-int
-trx_rseg_compare_last_trx_no(
-/*=========================*/
- const void* p1, /*!< in: elem to compare */
- const void* p2) /*!< in: elem to compare */
-{
- ib_int64_t cmp;
-
- const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1;
- const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2;
-
- cmp = rseg_q1->trx_no - rseg_q2->trx_no;
-
- if (cmp < 0) {
- return(-1);
- } else if (cmp > 0) {
- return(1);
- }
-
- return(0);
-}
-
-/*****************************************************************//**
-Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started.
-@return min binary heap of rsegs to purge */
-UNIV_INTERN
-ib_bh_t*
-trx_sys_init_at_db_start(void)
-/*==========================*/
-{
- mtr_t mtr;
- ib_bh_t* ib_bh;
- trx_sysf_t* sys_header;
- ib_uint64_t rows_to_undo = 0;
- const char* unit = "";
-
- /* We create the min binary heap here and pass ownership to
- purge when we init the purge sub-system. Purge is responsible
- for freeing the binary heap. */
-
- ib_bh = ib_bh_create(
- trx_rseg_compare_last_trx_no,
- sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
-
- mtr_start(&mtr);
-
- /* Allocate the trx descriptors array */
- trx_sys->descriptors = static_cast<trx_id_t*>(
- ut_malloc(sizeof(trx_id_t) *
- TRX_DESCR_ARRAY_INITIAL_SIZE));
- trx_sys->descr_n_max = TRX_DESCR_ARRAY_INITIAL_SIZE;
- trx_sys->descr_n_used = 0;
- srv_descriptors_memory = TRX_DESCR_ARRAY_INITIAL_SIZE *
- sizeof(trx_id_t);
-
- sys_header = trx_sysf_get(&mtr);
-
- if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
- trx_rseg_array_init(sys_header, ib_bh, &mtr);
- }
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
- trx_sys_get_new_trx_id will evaluate to TRUE when the function
- is first time called, and the value for trx id will be written
- to the disk-based header! Thus trx id values will not overlap when
- the database is repeatedly started! */
-
- trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
- + ut_uint64_align_up(mach_read_from_8(sys_header
- + TRX_SYS_TRX_ID_STORE),
- TRX_SYS_TRX_ID_WRITE_MARGIN);
-
- ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
-
- UT_LIST_INIT(trx_sys->mysql_trx_list);
-
- trx_dummy_sess = sess_open();
-
- trx_lists_init_at_db_start();
-
- /* This S lock is not strictly required, it is here only to satisfy
- the debug code (assertions). We are still running in single threaded
- bootstrap mode. */
-
- mutex_enter(&trx_sys->mutex);
-
- ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
-
- if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
- const trx_t* trx;
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- ut_ad(trx->is_recovered);
- assert_trx_in_rw_list(trx);
-
- if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
- rows_to_undo += trx->undo_no;
- }
- }
-
- if (rows_to_undo > 1000000000) {
- unit = "M";
- rows_to_undo = rows_to_undo / 1000000;
- }
-
- fprintf(stderr,
- "InnoDB: %lu transaction(s) which must be"
- " rolled back or cleaned up\n"
- "InnoDB: in total %lu%s row operations to undo\n",
- (ulong) UT_LIST_GET_LEN(trx_sys->rw_trx_list),
- (ulong) rows_to_undo, unit);
-
- fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
- trx_sys->max_trx_id);
- }
-
- mutex_exit(&trx_sys->mutex);
-
- UT_LIST_INIT(trx_sys->view_list);
-
- mtr_commit(&mtr);
-
- return(ib_bh);
-}
-
-/*****************************************************************//**
-Creates the trx_sys instance and initializes ib_bh and mutex. */
-UNIV_INTERN
-void
-trx_sys_create(void)
-/*================*/
-{
- ut_ad(trx_sys == NULL);
-
- trx_sys = static_cast<trx_sys_t*>(mem_zalloc(sizeof(*trx_sys)));
-
- mutex_create(trx_sys_mutex_key, &trx_sys->mutex, SYNC_TRX_SYS);
-}
-
-/*****************************************************************//**
-Creates and initializes the transaction system at the database creation. */
-UNIV_INTERN
-void
-trx_sys_create_sys_pages(void)
-/*==========================*/
-{
- mtr_t mtr;
-
- mtr_start(&mtr);
-
- trx_sysf_create(&mtr);
-
- mtr_commit(&mtr);
-}
-
-/*****************************************************************//**
-Update the file format tag.
-@return always TRUE */
-static
-ibool
-trx_sys_file_format_max_write(
-/*==========================*/
- ulint format_id, /*!< in: file format id */
- const char** name) /*!< out: max file format name, can
- be NULL */
-{
- mtr_t mtr;
- byte* ptr;
- buf_block_t* block;
- ib_uint64_t tag_value;
-
- mtr_start(&mtr);
-
- block = buf_page_get(
- TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-
- file_format_max.id = format_id;
- file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
- ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
- tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (name) {
- *name = file_format_max.name;
- }
-
- mlog_write_ull(ptr, tag_value, &mtr);
-
- mtr_commit(&mtr);
-
- return(TRUE);
-}
-
-/*****************************************************************//**
-Read the file format tag.
-@return the file format or ULINT_UNDEFINED if not set. */
-static
-ulint
-trx_sys_file_format_max_read(void)
-/*==============================*/
-{
- mtr_t mtr;
- const byte* ptr;
- const buf_block_t* block;
- ib_id_t file_format_id;
-
- /* Since this is called during the startup phase it's safe to
- read the value without a covering mutex. */
- mtr_start(&mtr);
-
- block = buf_page_get(
- TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
-
- ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
- file_format_id = mach_read_from_8(ptr);
-
- mtr_commit(&mtr);
-
- file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (file_format_id >= FILE_FORMAT_NAME_N) {
-
- /* Either it has never been tagged, or garbage in it. */
- return(ULINT_UNDEFINED);
- }
-
- return((ulint) file_format_id);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id) /*!< in: id of the file format */
-{
- ut_a(id < FILE_FORMAT_NAME_N);
-
- return(file_format_name_map[id]);
-}
-
-/*****************************************************************//**
-Check for the max file format tag stored on disk. Note: If max_format_id
-is == UNIV_FORMAT_MAX + 1 then we only print a warning.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-dberr_t
-trx_sys_file_format_max_check(
-/*==========================*/
- ulint max_format_id) /*!< in: max format id to check */
-{
- ulint format_id;
-
- /* Check the file format in the tablespace. Do not try to
- recover if the file format is not supported by the engine
- unless forced by the user. */
- format_id = trx_sys_file_format_max_read();
- if (format_id == ULINT_UNDEFINED) {
- /* Format ID was not set. Set it to minimum possible
- value. */
- format_id = UNIV_FORMAT_MIN;
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "Highest supported file format is %s.",
- trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX));
-
- if (format_id > UNIV_FORMAT_MAX) {
-
- ut_a(format_id < FILE_FORMAT_NAME_N);
-
- ib_logf(max_format_id <= UNIV_FORMAT_MAX
- ? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN,
- "The system tablespace is in a file "
- "format that this version doesn't support - %s.",
- trx_sys_file_format_id_to_name(format_id));
-
- if (max_format_id <= UNIV_FORMAT_MAX) {
- return(DB_ERROR);
- }
- }
-
- format_id = (format_id > max_format_id) ? format_id : max_format_id;
-
- /* We don't need a mutex here, as this function should only
- be called once at start up. */
- file_format_max.id = format_id;
- file_format_max.name = trx_sys_file_format_id_to_name(format_id);
-
- return(DB_SUCCESS);
-}
-
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
- ulint format_id, /*!< in: file format id */
- const char** name) /*!< out: max file format name or
- NULL if not needed. */
-{
- ibool ret = FALSE;
-
- ut_a(format_id <= UNIV_FORMAT_MAX);
-
- mutex_enter(&file_format_max.mutex);
-
- /* Only update if not already same value. */
- if (format_id != file_format_max.id) {
-
- ret = trx_sys_file_format_max_write(format_id, name);
- }
-
- mutex_exit(&file_format_max.mutex);
-
- return(ret);
-}
-
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-UNIV_INTERN
-void
-trx_sys_file_format_tag_init(void)
-/*==============================*/
-{
- ulint format_id;
-
- format_id = trx_sys_file_format_max_read();
-
- /* If format_id is not set then set it to the minimum. */
- if (format_id == ULINT_UNDEFINED) {
- trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL);
- }
-}
-
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-UNIV_INTERN
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
- const char** name, /*!< out: max file format name */
- ulint format_id) /*!< in: file format identifier */
-{
- ibool ret = FALSE;
-
- ut_a(name);
- ut_a(file_format_max.name != NULL);
- ut_a(format_id <= UNIV_FORMAT_MAX);
-
- mutex_enter(&file_format_max.mutex);
-
- if (format_id > file_format_max.id) {
-
- ret = trx_sys_file_format_max_write(format_id, name);
- }
-
- mutex_exit(&file_format_max.mutex);
-
- return(ret);
-}
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_max_get(void)
-/*=============================*/
-{
- return(file_format_max.name);
-}
-
-/*****************************************************************//**
-Initializes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_init(void)
-/*==========================*/
-{
- mutex_create(file_format_max_mutex_key,
- &file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
-
- /* We don't need a mutex here, as this function should only
- be called once at start up. */
- file_format_max.id = UNIV_FORMAT_MIN;
-
- file_format_max.name = trx_sys_file_format_id_to_name(
- file_format_max.id);
-}
-
-/*****************************************************************//**
-Closes the tablespace tag system. */
-UNIV_INTERN
-void
-trx_sys_file_format_close(void)
-/*===========================*/
-{
- /* Does nothing at the moment */
-}
-
-/*********************************************************************
-Creates the rollback segments.
-@return number of rollback segments that are active. */
-UNIV_INTERN
-ulint
-trx_sys_create_rsegs(
-/*=================*/
- ulint n_spaces, /*!< number of tablespaces for UNDO logs */
- ulint n_rsegs) /*!< number of rollback segments to create */
-{
- mtr_t mtr;
- ulint n_used;
-
- ut_a(n_spaces < TRX_SYS_N_RSEGS);
- ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
-
- if (srv_read_only_mode) {
- return(ULINT_UNDEFINED);
- }
-
- /* This is executed in single-threaded mode therefore it is not
- necessary to use the same mtr in trx_rseg_create(). n_used cannot
- change while the function is executing. */
-
- mtr_start(&mtr);
- n_used = trx_sysf_rseg_find_free(&mtr);
- mtr_commit(&mtr);
-
- if (n_used == ULINT_UNDEFINED) {
- n_used = TRX_SYS_N_RSEGS;
- }
-
- /* Do not create additional rollback segments if innodb_force_recovery
- has been set and the database was not shutdown cleanly. */
-
- if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) {
- ulint i;
- ulint new_rsegs = n_rsegs - n_used;
-
- for (i = 0; i < new_rsegs; ++i) {
- ulint space_id;
- space_id = (n_spaces == 0) ? 0
- : (srv_undo_space_id_start + i % n_spaces);
-
- /* Tablespace 0 is the system tablespace. */
- if (trx_rseg_create(space_id) != NULL) {
- ++n_used;
- } else {
- break;
- }
- }
- }
-
- ib_logf(IB_LOG_LEVEL_INFO,
- "%lu rollback segment(s) are active.", n_used);
-
- return(n_used);
-}
-
-#else /* !UNIV_HOTBACKUP */
-/*****************************************************************//**
-Prints to stderr the MySQL binlog info in the system header if the
-magic number shows it valid. */
-UNIV_INTERN
-void
-trx_sys_print_mysql_binlog_offset_from_page(
-/*========================================*/
- const byte* page) /*!< in: buffer containing the trx
- system header page, i.e., page number
- TRX_SYS_PAGE_NO in the tablespace */
-{
- const trx_sysf_t* sys_header;
-
- sys_header = page + TRX_SYS;
-
- if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
- == TRX_SYS_MYSQL_LOG_MAGIC_N) {
-
- fprintf(stderr,
- "mysqlbackup: Last MySQL binlog file position %lu %lu,"
- " file name %s\n",
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
- (ulong) mach_read_from_4(
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
- sys_header + TRX_SYS_MYSQL_LOG_INFO
- + TRX_SYS_MYSQL_LOG_NAME);
- }
-}
-
-/*****************************************************************//**
-Reads the file format id from the first system table space file.
-Even if the call succeeds and returns TRUE, the returned format id
-may be ULINT_UNDEFINED signalling that the format id was not present
-in the data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_file_format_id(
-/*========================*/
- const char *pathname, /*!< in: pathname of the first system
- table space file */
- ulint *format_id) /*!< out: file format of the system table
- space */
-{
- os_file_t file;
- ibool success;
- byte buf[UNIV_PAGE_SIZE * 2];
- page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
- const byte* ptr;
- ib_id_t file_format_id;
-
- *format_id = ULINT_UNDEFINED;
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key,
- pathname,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " mysqlbackup: Error: trying to read system "
- "tablespace file format,\n"
- " mysqlbackup: but could not open the tablespace "
- "file %s!\n", pathname);
- return(FALSE);
- }
-
- /* Read the page on which file format is stored */
-
- success = os_file_read_no_error_handling(
- file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, UNIV_PAGE_SIZE);
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " mysqlbackup: Error: trying to read system "
- "tablespace file format,\n"
- " mysqlbackup: but failed to read the tablespace "
- "file %s!\n", pathname);
-
- os_file_close(file);
- return(FALSE);
- }
- os_file_close(file);
-
- /* get the file format from the page */
- ptr = page + TRX_SYS_FILE_FORMAT_TAG;
- file_format_id = mach_read_from_8(ptr);
- file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
-
- if (file_format_id >= FILE_FORMAT_NAME_N) {
-
- /* Either it has never been tagged, or garbage in it. */
- return(TRUE);
- }
-
- *format_id = (ulint) file_format_id;
-
- return(TRUE);
-}
-
-/*****************************************************************//**
-Reads the file format id from the given per-table data file.
-@return TRUE if call succeeds */
-UNIV_INTERN
-ibool
-trx_sys_read_pertable_file_format_id(
-/*=================================*/
- const char *pathname, /*!< in: pathname of a per-table
- datafile */
- ulint *format_id) /*!< out: file format of the per-table
- data file */
-{
- os_file_t file;
- ibool success;
- byte buf[UNIV_PAGE_SIZE * 2];
- page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
- const byte* ptr;
- ib_uint32_t flags;
-
- *format_id = ULINT_UNDEFINED;
-
- file = os_file_create_simple_no_error_handling(
- innodb_file_data_key,
- pathname,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success
- );
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " mysqlbackup: Error: trying to read per-table "
- "tablespace format,\n"
- " mysqlbackup: but could not open the tablespace "
- "file %s!\n", pathname);
-
- return(FALSE);
- }
-
- /* Read the first page of the per-table datafile */
-
- success = os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE);
-
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(true);
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " mysqlbackup: Error: trying to per-table data file "
- "format,\n"
- " mysqlbackup: but failed to read the tablespace "
- "file %s!\n", pathname);
-
- os_file_close(file);
- return(FALSE);
- }
- os_file_close(file);
-
- /* get the file format from the page */
- ptr = page + 54;
- flags = mach_read_from_4(ptr);
-
- if (!fsp_flags_is_valid(flags) {
- /* bad tablespace flags */
- return(FALSE);
- }
-
- *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags);
-
- return(TRUE);
-}
-
-
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-UNIV_INTERN
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id) /*!< in: id of the file format */
-{
- if (!(id < FILE_FORMAT_NAME_N)) {
- /* unknown id */
- return("Unknown");
- }
-
- return(file_format_name_map[id]);
-}
-
-#endif /* !UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************
-Shutdown/Close the transaction system. */
-UNIV_INTERN
-void
-trx_sys_close(void)
-/*===============*/
-{
- ulint i;
- trx_t* trx;
- read_view_t* view;
-
- ut_ad(trx_sys != NULL);
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
-
- /* Check that all read views are closed except read view owned
- by a purge. */
-
- mutex_enter(&trx_sys->mutex);
-
- if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
- fprintf(stderr,
- "InnoDB: Error: all read views were not closed"
- " before shutdown:\n"
- "InnoDB: %lu read views open \n",
- UT_LIST_GET_LEN(trx_sys->view_list) - 1);
- }
-
- mutex_exit(&trx_sys->mutex);
-
- sess_close(trx_dummy_sess);
- trx_dummy_sess = NULL;
-
- trx_purge_sys_close();
-
- /* Free the double write data structures. */
- if (buf_dblwr) {
- buf_dblwr_free();
- }
-
-
- /* Only prepared transactions may be left in the system. Free them. */
- ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx
- || srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
- || (IS_XTRABACKUP() && srv_apply_log_only));
-
-
- while ((trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) != NULL) {
- trx_free_prepared(trx);
- }
-
- /* There can't be any active transactions. */
- for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- trx_rseg_t* rseg;
-
- rseg = trx_sys->rseg_array[i];
-
- if (rseg != NULL) {
- trx_rseg_mem_free(rseg);
- } else {
- break;
- }
- }
-
- view = UT_LIST_GET_FIRST(trx_sys->view_list);
-
- while (view != NULL) {
- read_view_t* prev_view = view;
-
- view = UT_LIST_GET_NEXT(view_list, prev_view);
-
- /* Views are allocated from the trx_sys->global_read_view_heap.
- So, we simply remove the element here. */
- UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
- }
-
- if (!IS_XTRABACKUP() || !srv_apply_log_only) {
- ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
- ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
- }
-
- mutex_free(&trx_sys->mutex);
-
- ut_ad(trx_sys->descr_n_used == 0);
- ut_free(trx_sys->descriptors);
-
- mem_free(trx_sys);
-
- trx_sys = NULL;
-}
-
-/** @brief Convert an undo log to TRX_UNDO_PREPARED state on shutdown.
-
-If any prepared ACTIVE transactions exist, and their rollback was
-prevented by innodb_force_recovery, we convert these transactions to
-XA PREPARE state in the main-memory data structures, so that shutdown
-will proceed normally. These transactions will again recover as ACTIVE
-on the next restart, and they will be rolled back unless
-innodb_force_recovery prevents it again.
-
-@param[in] trx transaction
-@param[in,out] undo undo log to convert to TRX_UNDO_PREPARED */
-static
-void
-trx_undo_fake_prepared(
- const trx_t* trx,
- trx_undo_t* undo)
-{
- ut_ad(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
- ut_ad(trx->is_recovered);
-
- if (undo != NULL) {
- ut_ad(undo->state == TRX_UNDO_ACTIVE);
- undo->state = TRX_UNDO_PREPARED;
- }
-}
-
-/*********************************************************************
-Check if there are any active (non-prepared) transactions.
-@return total number of active transactions or 0 if none */
-UNIV_INTERN
-ulint
-trx_sys_any_active_transactions(void)
-/*=================================*/
-{
- if (IS_XTRABACKUP() && srv_apply_log_only) {
- return(0);
- }
- mutex_enter(&trx_sys->mutex);
-
- ulint total_trx = UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
-
- if (total_trx == 0) {
- total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
- ut_a(total_trx >= trx_sys->n_prepared_trx);
-
- if (total_trx > trx_sys->n_prepared_trx
- && srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
- for (trx_t* trx = UT_LIST_GET_FIRST(
- trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
- if (!trx_state_eq(trx, TRX_STATE_ACTIVE)
- || !trx->is_recovered) {
- continue;
- }
- /* This was a recovered transaction
- whose rollback was disabled by
- the innodb_force_recovery setting.
- Pretend that it is in XA PREPARE
- state so that shutdown will work. */
- trx_undo_fake_prepared(
- trx, trx->insert_undo);
- trx_undo_fake_prepared(
- trx, trx->update_undo);
- trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
- }
- }
-
- ut_a(total_trx >= trx_sys->n_prepared_trx);
- total_trx -= trx_sys->n_prepared_trx;
- }
-
- mutex_exit(&trx_sys->mutex);
-
- return(total_trx);
-}
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Validate the trx_list_t.
-@return TRUE if valid. */
-static
-ibool
-trx_sys_validate_trx_list_low(
-/*===========================*/
- trx_list_t* trx_list) /*!< in: &trx_sys->ro_trx_list
- or &trx_sys->rw_trx_list */
-{
- const trx_t* trx;
- const trx_t* prev_trx = NULL;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_ad(trx_list == &trx_sys->ro_trx_list
- || trx_list == &trx_sys->rw_trx_list);
-
- for (trx = UT_LIST_GET_FIRST(*trx_list);
- trx != NULL;
- prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
-
- assert_trx_in_list(trx);
- ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
-
- ut_a(prev_trx == NULL || prev_trx->id > trx->id);
- }
-
- return(TRUE);
-}
-
-/*************************************************************//**
-Validate the trx_sys_t::ro_trx_list and trx_sys_t::rw_trx_list.
-@return TRUE if lists are valid. */
-UNIV_INTERN
-ibool
-trx_sys_validate_trx_list(void)
-/*===========================*/
-{
- ut_ad(mutex_own(&trx_sys->mutex));
-
- ut_a(trx_sys_validate_trx_list_low(&trx_sys->ro_trx_list));
- ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/trx/trx0trx.cc b/storage/xtradb/trx/trx0trx.cc
deleted file mode 100644
index 1d2f7ada54e..00000000000
--- a/storage/xtradb/trx/trx0trx.cc
+++ /dev/null
@@ -1,2748 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0trx.cc
-The transaction
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "btr0types.h"
-#include "trx0trx.h"
-
-#ifdef UNIV_NONINL
-#include "trx0trx.ic"
-#endif
-
-#include <mysql/service_wsrep.h>
-
-#include "trx0undo.h"
-#include "trx0rseg.h"
-#include "log0log.h"
-#include "que0que.h"
-#include "lock0lock.h"
-#include "trx0roll.h"
-#include "usr0sess.h"
-#include "read0read.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "btr0sea.h"
-#include "os0proc.h"
-#include "trx0xa.h"
-#include "trx0rec.h"
-#include "trx0purge.h"
-#include "ha_prototypes.h"
-#include "srv0mon.h"
-#include "ut0vec.h"
-
-#include<set>
-
-extern "C"
-int thd_deadlock_victim_preference(const MYSQL_THD thd1, const MYSQL_THD thd2);
-
-/** Set of table_id */
-typedef std::set<table_id_t> table_id_set;
-
-/** Dummy session used currently in MySQL interface */
-UNIV_INTERN sess_t* trx_dummy_sess = NULL;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t trx_mutex_key;
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/*************************************************************//**
-Set detailed error message for the transaction. */
-UNIV_INTERN
-void
-trx_set_detailed_error(
-/*===================*/
- trx_t* trx, /*!< in: transaction struct */
- const char* msg) /*!< in: detailed error message */
-{
- ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
-}
-
-/*************************************************************//**
-Set detailed error message for the transaction from a file. Note that the
-file is rewinded before reading from it. */
-UNIV_INTERN
-void
-trx_set_detailed_error_from_file(
-/*=============================*/
- trx_t* trx, /*!< in: transaction struct */
- FILE* file) /*!< in: file to read message from */
-{
- os_file_read_string(file, trx->detailed_error,
- sizeof(trx->detailed_error));
-}
-
-/*************************************************************//**
-Callback function for trx_find_descriptor() to compare trx IDs. */
-UNIV_INTERN
-int
-trx_descr_cmp(
-/*==========*/
- const void *a, /*!< in: pointer to first comparison argument */
- const void *b) /*!< in: pointer to second comparison argument */
-{
- const trx_id_t* da = (const trx_id_t*) a;
- const trx_id_t* db = (const trx_id_t*) b;
-
- if (*da < *db) {
- return -1;
- } else if (*da > *db) {
- return 1;
- }
-
- return 0;
-}
-
-/*************************************************************//**
-Reserve a slot for a given trx in the global descriptors array. */
-UNIV_INLINE
-void
-trx_reserve_descriptor(
-/*===================*/
- const trx_t* trx) /*!< in: trx pointer */
-{
- ulint n_used;
- ulint n_max;
- trx_id_t* descr;
-
- ut_ad(mutex_own(&trx_sys->mutex) || srv_is_being_started);
- ut_ad(srv_is_being_started ||
- !trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- trx->id));
-
- n_used = trx_sys->descr_n_used + 1;
- n_max = trx_sys->descr_n_max;
-
- if (UNIV_UNLIKELY(n_used > n_max)) {
-
- n_max = n_max * 2;
-
- trx_sys->descriptors = static_cast<trx_id_t*>(
- ut_realloc(trx_sys->descriptors,
- n_max * sizeof(trx_id_t)));
-
- trx_sys->descr_n_max = n_max;
- srv_descriptors_memory = n_max * sizeof(trx_id_t);
- }
-
- descr = trx_sys->descriptors + n_used - 1;
-
- if (UNIV_UNLIKELY(n_used > 1 && trx->id < descr[-1])) {
-
- /* Find the slot where it should be inserted. We could use a
- binary search, but in reality linear search should be faster,
- because the slot we are looking for is near the array end. */
-
- trx_id_t* tdescr;
-
- for (tdescr = descr - 1;
- tdescr >= trx_sys->descriptors && *tdescr > trx->id;
- tdescr--) {
- }
-
- tdescr++;
-
- ut_memmove(tdescr + 1, tdescr, (descr - tdescr) *
- sizeof(trx_id_t));
-
- descr = tdescr;
- }
-
- *descr = trx->id;
-
- trx_sys->descr_n_used = n_used;
-}
-
-/*************************************************************//**
-Release a slot for a given trx in the global descriptors array. */
-UNIV_INTERN
-void
-trx_release_descriptor(
-/*===================*/
- trx_t* trx) /*!< in: trx pointer */
-{
- ulint size;
- trx_id_t* descr;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- if (UNIV_LIKELY(trx->in_trx_serial_list)) {
-
- UT_LIST_REMOVE(trx_serial_list, trx_sys->trx_serial_list,
- trx);
- trx->in_trx_serial_list = false;
- }
-
- descr = trx_find_descriptor(trx_sys->descriptors,
- trx_sys->descr_n_used,
- trx->id);
-
- if (UNIV_UNLIKELY(descr == NULL)) {
-
- return;
- }
-
- size = (trx_sys->descriptors + trx_sys->descr_n_used - 1 - descr) *
- sizeof(trx_id_t);
-
- if (UNIV_LIKELY(size > 0)) {
-
- ut_memmove(descr, descr + 1, size);
- }
-
- trx_sys->descr_n_used--;
-}
-
-/****************************************************************//**
-Creates and initializes a transaction object. It must be explicitly
-started with trx_start_if_not_started() before using it. The default
-isolation level is TRX_ISO_REPEATABLE_READ.
-@return transaction instance, should never be NULL */
-static
-trx_t*
-trx_create(void)
-/*============*/
-{
- trx_t* trx;
- mem_heap_t* heap;
- ib_alloc_t* heap_alloc;
-
- trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
-
- mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
-
- trx->magic_n = TRX_MAGIC_N;
-
- trx->active_commit_ordered = 0;
- trx->state = TRX_STATE_NOT_STARTED;
-
- trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
- trx->no = TRX_ID_MAX;
- trx->in_trx_serial_list = false;
-
- trx->support_xa = TRUE;
-
- trx->fake_changes = FALSE;
-
- trx->check_foreigns = TRUE;
- trx->check_unique_secondary = TRUE;
-
- trx->dict_operation = TRX_DICT_OP_NONE;
-
- trx->idle_start = 0;
- trx->last_stmt_start = 0;
-
- mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
-
- trx->error_state = DB_SUCCESS;
-
- trx->lock.que_state = TRX_QUE_RUNNING;
-
- trx->lock.lock_heap = mem_heap_create_typed(
- 256, MEM_HEAP_FOR_LOCK_HEAP);
-
- trx->search_latch_timeout = BTR_SEA_TIMEOUT;
-
- trx->io_reads = 0;
- trx->io_read = 0;
- trx->io_reads_wait_timer = 0;
- trx->lock_que_wait_timer = 0;
- trx->innodb_que_wait_timer = 0;
- trx->distinct_page_access = 0;
- trx->distinct_page_access_hash = NULL;
- trx->take_stats = FALSE;
-
- trx->xid.formatID = -1;
-
- trx->op_info = "";
-
- trx->api_trx = false;
-
- trx->api_auto_commit = false;
-
- trx->read_write = true;
-
- heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
- heap_alloc = ib_heap_allocator_create(heap);
-
- /* Remember to free the vector explicitly in trx_free(). */
- trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
-
- /* Remember to free the vector explicitly in trx_free(). */
- heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
- heap_alloc = ib_heap_allocator_create(heap);
-
- trx->lock.table_locks = ib_vector_create(
- heap_alloc, sizeof(void**), 32);
-#ifdef WITH_WSREP
- trx->wsrep_event = NULL;
-#endif /* WITH_WSREP */
-
- return(trx);
-}
-
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_background(void)
-/*=============================*/
-{
- trx_t* trx;
-
- trx = trx_create();
-
- trx->sess = trx_dummy_sess;
-
- return(trx);
-}
-
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-UNIV_INTERN
-trx_t*
-trx_allocate_for_mysql(void)
-/*========================*/
-{
- trx_t* trx;
-
- trx = trx_allocate_for_background();
-
- mutex_enter(&trx_sys->mutex);
-
- ut_d(trx->in_mysql_trx_list = TRUE);
- UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- mutex_exit(&trx_sys->mutex);
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- trx->distinct_page_access_hash
- = static_cast<byte *>(mem_alloc(DPAH_SIZE));
- memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
- }
-
- return(trx);
-}
-
-/********************************************************************//**
-Frees a transaction object without releasing the corresponding descriptor.
-Should be used by callers that already own trx_sys->mutex. */
-static
-void
-trx_free_low(
-/*=========*/
- trx_t* trx) /*!< in, own: trx object */
-{
- ut_a(trx->magic_n == TRX_MAGIC_N);
- ut_ad(!trx->in_ro_trx_list);
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_mysql_trx_list);
-
- mutex_free(&trx->undo_mutex);
-
- if (trx->undo_no_arr != NULL) {
- trx_undo_arr_free(trx->undo_no_arr);
- }
-
- ut_a(trx->lock.wait_lock == NULL);
- ut_a(trx->lock.wait_thr == NULL);
-
- ut_a(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
-#endif
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- if (trx->lock.lock_heap) {
- mem_heap_free(trx->lock.lock_heap);
- }
-
- ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
-
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
- /* We allocated a dedicated heap for the vector. */
- ib_vector_free(trx->autoinc_locks);
-
- if (trx->lock.table_locks != NULL) {
- /* We allocated a dedicated heap for the vector. */
- ib_vector_free(trx->lock.table_locks);
- }
-
- mutex_free(&trx->mutex);
-
- read_view_free(trx->prebuilt_view);
-
- mem_free(trx);
-}
-
-/********************************************************************//**
-Frees a transaction object. */
-static
-void
-trx_free(
-/*=========*/
- trx_t* trx) /*!< in, own: trx object */
-{
- mutex_enter(&trx_sys->mutex);
- trx_release_descriptor(trx);
- mutex_exit(&trx_sys->mutex);
-
- trx_free_low(trx);
-}
-
-/********************************************************************//**
-Frees a transaction object of a background operation of the master thread. */
-UNIV_INTERN
-void
-trx_free_for_background(
-/*====================*/
- trx_t* trx) /*!< in, own: trx object */
-{
-
- if (trx->distinct_page_access_hash)
- {
- mem_free(trx->distinct_page_access_hash);
- trx->distinct_page_access_hash= NULL;
- }
-
- if (trx->declared_to_be_inside_innodb) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
- "to be processing inside InnoDB", trx, trx->id);
-
- trx_print(stderr, trx, 600);
- putc('\n', stderr);
-
- /* This is an error but not a fatal error. We must keep
- the counters like srv_conc_n_threads accurate. */
- srv_conc_force_exit_innodb(trx);
- }
-
- if (trx->n_mysql_tables_in_use != 0
- || trx->mysql_n_tables_locked != 0) {
-
- ib_logf(IB_LOG_LEVEL_ERROR,
- "MySQL is freeing a thd though "
- "trx->n_mysql_tables_in_use is %lu and "
- "trx->mysql_n_tables_locked is %lu.",
- (ulong) trx->n_mysql_tables_in_use,
- (ulong) trx->mysql_n_tables_locked);
-
- trx_print(stderr, trx, 600);
- ut_print_buf(stderr, trx, sizeof(trx_t));
- putc('\n', stderr);
- }
-
- ut_a(trx->state == TRX_STATE_NOT_STARTED);
- ut_a(trx->insert_undo == NULL);
- ut_a(trx->update_undo == NULL);
- ut_a(trx->read_view == NULL);
-
- trx_free(trx);
-}
-
-/********************************************************************//**
-At shutdown, frees a transaction object that is in the PREPARED state. */
-UNIV_INTERN
-void
-trx_free_prepared(
-/*==============*/
- trx_t* trx) /*!< in, own: trx object */
-{
- ut_a(trx_state_eq(trx, TRX_STATE_PREPARED)
- || (trx_state_eq(trx, TRX_STATE_ACTIVE)
- && trx->is_recovered
- && (srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
- ut_a(trx->magic_n == TRX_MAGIC_N);
-
- lock_trx_release_locks(trx);
- trx_undo_free_prepared(trx);
-
- assert_trx_in_rw_list(trx);
-
- ut_a(!trx->read_only);
-
- UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
- ut_d(trx->in_rw_trx_list = FALSE);
-
- mutex_enter(&trx_sys->mutex);
- trx_release_descriptor(trx);
- mutex_exit(&trx_sys->mutex);
-
- /* Undo trx_resurrect_table_locks(). */
- UT_LIST_INIT(trx->lock.trx_locks);
-
- trx_free_low(trx);
-
- ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
-}
-
-/********************************************************************//**
-Frees a transaction object for MySQL. */
-UNIV_INTERN
-void
-trx_free_for_mysql(
-/*===============*/
- trx_t* trx) /*!< in, own: trx object */
-{
- if (trx->distinct_page_access_hash)
- {
- mem_free(trx->distinct_page_access_hash);
- trx->distinct_page_access_hash= NULL;
- }
-
- mutex_enter(&trx_sys->mutex);
-
- ut_ad(trx->in_mysql_trx_list);
- ut_d(trx->in_mysql_trx_list = FALSE);
- UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
-
- ut_ad(trx_sys_validate_trx_list());
-
- mutex_exit(&trx_sys->mutex);
-
- trx_free_for_background(trx);
-}
-
-/****************************************************************//**
-Inserts the trx handle in the trx system trx list in the right position.
-The list is sorted on the trx id so that the biggest id is at the list
-start. This function is used at the database startup to insert incomplete
-transactions to the list. */
-static
-void
-trx_list_rw_insert_ordered(
-/*=======================*/
- trx_t* trx) /*!< in: trx handle */
-{
- trx_t* trx2;
-
- ut_ad(!trx->read_only);
-
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
-
- ut_a(srv_is_being_started);
- ut_ad(!trx->in_ro_trx_list);
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(trx->state != TRX_STATE_NOT_STARTED);
- ut_ad(trx->is_recovered);
-
- for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx2 != NULL;
- trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
-
- assert_trx_in_rw_list(trx2);
-
- if (trx->id >= trx2->id) {
-
- ut_ad(trx->id > trx2->id);
- break;
- }
- }
-
- if (trx2 != NULL) {
- trx2 = UT_LIST_GET_PREV(trx_list, trx2);
-
- if (trx2 == NULL) {
- UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
- } else {
- UT_LIST_INSERT_AFTER(
- trx_list, trx_sys->rw_trx_list, trx2, trx);
- }
- } else {
- UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
- }
-
-#ifdef UNIV_DEBUG
- if (trx->id > trx_sys->rw_max_trx_id) {
- trx_sys->rw_max_trx_id = trx->id;
- }
-#endif /* UNIV_DEBUG */
-
- ut_ad(!trx->in_rw_trx_list);
- ut_d(trx->in_rw_trx_list = TRUE);
-}
-
-/****************************************************************//**
-Resurrect the table locks for a resurrected transaction. */
-static
-void
-trx_resurrect_table_locks(
-/*======================*/
- trx_t* trx, /*!< in/out: transaction */
- const trx_undo_t* undo) /*!< in: undo log */
-{
- mtr_t mtr;
- page_t* undo_page;
- trx_undo_rec_t* undo_rec;
- table_id_set tables;
-
- ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
-
- if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
- || undo->empty) {
- return;
- }
-
- mtr_start(&mtr);
- /* trx_rseg_mem_create() may have acquired an X-latch on this
- page, so we cannot acquire an S-latch. */
- undo_page = trx_undo_page_get(
- undo->space, undo->zip_size, undo->top_page_no, &mtr);
- undo_rec = undo_page + undo->top_offset;
-
- do {
- ulint type;
- ulint cmpl_info;
- bool updated_extern;
- undo_no_t undo_no;
- table_id_t table_id;
-
- page_t* undo_rec_page = page_align(undo_rec);
-
- if (undo_rec_page != undo_page) {
- if (!mtr_memo_release(&mtr,
- buf_block_align(undo_page),
- MTR_MEMO_PAGE_X_FIX)) {
- /* The page of the previous undo_rec
- should have been latched by
- trx_undo_page_get() or
- trx_undo_get_prev_rec(). */
- ut_ad(0);
- }
-
- undo_page = undo_rec_page;
- }
-
- trx_undo_rec_get_pars(
- undo_rec, &type, &cmpl_info,
- &updated_extern, &undo_no, &table_id);
- tables.insert(table_id);
-
- undo_rec = trx_undo_get_prev_rec(
- undo_rec, undo->hdr_page_no,
- undo->hdr_offset, false, &mtr);
- } while (undo_rec);
-
- mtr_commit(&mtr);
-
- for (table_id_set::const_iterator i = tables.begin();
- i != tables.end(); i++) {
- if (dict_table_t* table = dict_table_open_on_id(
- *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
- if (table->file_unreadable
- || dict_table_is_temporary(table)) {
- mutex_enter(&dict_sys->mutex);
- dict_table_close(table, TRUE, FALSE);
- dict_table_remove_from_cache(table);
- mutex_exit(&dict_sys->mutex);
- continue;
- }
-
- lock_table_ix_resurrect(table, trx);
-
- DBUG_PRINT("ib_trx",
- ("resurrect" TRX_ID_FMT
- " table '%s' IX lock from %s undo",
- trx->id, table->name,
- undo == trx->insert_undo
- ? "insert" : "update"));
-
- dict_table_close(table, FALSE, FALSE);
- }
- }
-}
-
-/****************************************************************//**
-Resurrect the transactions that were doing inserts the time of the
-crash, they need to be undone.
-@return trx_t instance */
-static
-trx_t*
-trx_resurrect_insert(
-/*=================*/
- trx_undo_t* undo, /*!< in: entry to UNDO */
- trx_rseg_t* rseg) /*!< in: rollback segment */
-{
- trx_t* trx;
-
- trx = trx_allocate_for_background();
-
- trx->rseg = rseg;
- trx->xid = undo->xid;
- trx->id = undo->trx_id;
- trx->insert_undo = undo;
- trx->is_recovered = TRUE;
-
- /* This is single-threaded startup code, we do not need the
- protection of trx->mutex or trx_sys->mutex here. */
-
- if (undo->state != TRX_UNDO_ACTIVE) {
-
- /* Prepared transactions are left in the prepared state
- waiting for a commit or abort decision from MySQL */
-
- if (undo->state == TRX_UNDO_PREPARED) {
-
- fprintf(stderr,
- "InnoDB: Transaction " TRX_ID_FMT " was in the"
- " XA prepared state.\n", trx->id);
-
- if (srv_force_recovery == 0) {
-
- /* XtraBackup should rollback prepared XA
- transactions */
- if (IS_XTRABACKUP()) {
- trx->state = TRX_STATE_ACTIVE;
- }
- else {
- trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
- }
- } else {
- fprintf(stderr,
- "InnoDB: Since innodb_force_recovery"
- " > 0, we will rollback it anyway.\n");
-
- trx->state = TRX_STATE_ACTIVE;
- }
- } else {
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
- }
-
- /* We give a dummy value for the trx no; this should have no
- relevance since purge is not interested in committed
- transaction numbers, unless they are in the history
- list, in which case it looks the number from the disk based
- undo log structure */
-
- trx->no = trx->id;
- } else {
- trx->state = TRX_STATE_ACTIVE;
-
- /* A running transaction always has the number
- field inited to TRX_ID_MAX */
-
- trx->no = TRX_ID_MAX;
- }
-
- /* trx_start_low() is not called with resurrect, so need to initialize
- start time here.*/
- if (trx->state == TRX_STATE_ACTIVE
- || trx->state == TRX_STATE_PREPARED) {
- trx->start_time = ut_time();
- }
-
- if (undo->dict_operation) {
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->table_id = undo->table_id;
- }
-
- if (!undo->empty) {
- trx->undo_no = undo->top_undo_no + 1;
- }
-
- return(trx);
-}
-
-/****************************************************************//**
-Prepared transactions are left in the prepared state waiting for a
-commit or abort decision from MySQL */
-static
-void
-trx_resurrect_update_in_prepared_state(
-/*===================================*/
- trx_t* trx, /*!< in,out: transaction */
- const trx_undo_t* undo) /*!< in: update UNDO record */
-{
- /* This is single-threaded startup code, we do not need the
- protection of trx->mutex or trx_sys->mutex here. */
-
- if (undo->state == TRX_UNDO_PREPARED) {
- fprintf(stderr,
- "InnoDB: Transaction " TRX_ID_FMT
- " was in the XA prepared state.\n", trx->id);
-
- if (srv_force_recovery == 0) {
- if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
- if (!IS_XTRABACKUP()) {
- trx_sys->n_prepared_trx++;
- trx_sys->n_prepared_recovered_trx++;
- }
- } else {
- ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
- }
- /* XtraBackup should rollback prepared XA
- transactions */
- trx->state = IS_XTRABACKUP()?TRX_STATE_ACTIVE: TRX_STATE_PREPARED;
- } else {
- fprintf(stderr,
- "InnoDB: Since innodb_force_recovery"
- " > 0, we will rollback it anyway.\n");
-
- trx->state = TRX_STATE_ACTIVE;
- }
- } else {
- trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
- }
-}
-
-/****************************************************************//**
-Resurrect the transactions that were doing updates the time of the
-crash, they need to be undone. */
-static
-void
-trx_resurrect_update(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- trx_undo_t* undo, /*!< in/out: update UNDO record */
- trx_rseg_t* rseg) /*!< in/out: rollback segment */
-{
- trx->rseg = rseg;
- trx->xid = undo->xid;
- trx->id = undo->trx_id;
- trx->update_undo = undo;
- trx->is_recovered = TRUE;
-
- /* This is single-threaded startup code, we do not need the
- protection of trx->mutex or trx_sys->mutex here. */
-
- if (undo->state != TRX_UNDO_ACTIVE) {
- trx_resurrect_update_in_prepared_state(trx, undo);
-
- /* We give a dummy value for the trx number */
-
- trx->no = trx->id;
-
- } else {
- trx->state = TRX_STATE_ACTIVE;
-
- /* A running transaction always has the number field inited to
- TRX_ID_MAX */
-
- trx->no = TRX_ID_MAX;
- }
-
- /* trx_start_low() is not called with resurrect, so need to initialize
- start time here.*/
- if (trx->state == TRX_STATE_ACTIVE
- || trx->state == TRX_STATE_PREPARED) {
- trx->start_time = ut_time();
- }
-
- if (undo->dict_operation) {
- trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
- trx->table_id = undo->table_id;
- }
-
- if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
-
- trx->undo_no = undo->top_undo_no + 1;
- }
-}
-
-/****************************************************************//**
-Creates trx objects for transactions and initializes the trx list of
-trx_sys at database start. Rollback segment and undo log lists must
-already exist when this function is called, because the lists of
-transactions to be rolled back or cleaned up are built based on the
-undo log lists. */
-UNIV_INTERN
-void
-trx_lists_init_at_db_start(void)
-/*============================*/
-{
- ulint i;
-
- ut_a(srv_is_being_started);
-
- UT_LIST_INIT(trx_sys->ro_trx_list);
- UT_LIST_INIT(trx_sys->rw_trx_list);
- UT_LIST_INIT(trx_sys->trx_serial_list);
-
- /* Look from the rollback segments if there exist undo logs for
- transactions */
-
- for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
- trx_undo_t* undo;
- trx_rseg_t* rseg;
-
- rseg = trx_sys->rseg_array[i];
-
- if (rseg == NULL) {
- continue;
- }
-
- /* Resurrect transactions that were doing inserts. */
- for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
- undo != NULL;
- undo = UT_LIST_GET_NEXT(undo_list, undo)) {
- trx_t* trx;
-
- trx = trx_resurrect_insert(undo, rseg);
-
- if (trx->state == TRX_STATE_ACTIVE ||
- trx->state == TRX_STATE_PREPARED) {
-
- trx_reserve_descriptor(trx);
- }
- trx_list_rw_insert_ordered(trx);
-
- trx_resurrect_table_locks(trx, undo);
- }
-
- /* Ressurrect transactions that were doing updates. */
- for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
- undo != NULL;
- undo = UT_LIST_GET_NEXT(undo_list, undo)) {
- trx_t* trx;
- ibool trx_created;
-
- /* Check the trx_sys->rw_trx_list first. */
- mutex_enter(&trx_sys->mutex);
- trx = trx_get_rw_trx_by_id(undo->trx_id);
- mutex_exit(&trx_sys->mutex);
-
- if (trx == NULL) {
- trx = trx_allocate_for_background();
- trx_created = TRUE;
- } else {
- trx_created = FALSE;
- }
-
- trx_resurrect_update(trx, undo, rseg);
-
- if (trx_created) {
- if (trx->state == TRX_STATE_ACTIVE ||
- trx->state == TRX_STATE_PREPARED) {
-
- trx_reserve_descriptor(trx);
- }
- trx_list_rw_insert_ordered(trx);
- }
-
- trx_resurrect_table_locks(trx, undo);
- }
- }
-}
-
-/******************************************************************//**
-Assigns a rollback segment to a transaction in a round-robin fashion.
-@return assigned rollback segment instance */
-static
-trx_rseg_t*
-trx_assign_rseg_low(
-/*================*/
- ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */
- ulint n_tablespaces) /*!< in: number of rollback tablespaces */
-{
- ulint i;
- trx_rseg_t* rseg;
- static ulint latest_rseg = 0;
-
- if (srv_read_only_mode) {
- ut_a(max_undo_logs == ULONG_UNDEFINED);
- return(NULL);
- }
-
- /* This breaks true round robin but that should be OK. */
-
- ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
-
- i = latest_rseg++;
- i %= max_undo_logs;
-
- /* Note: The assumption here is that there can't be any gaps in
- the array. Once we implement more flexible rollback segment
- management this may not hold. The assertion checks for that case. */
-
- if (trx_sys->rseg_array[0] == NULL) {
- return(NULL);
- }
-
- /* Skip the system tablespace if we have more than one tablespace
- defined for rollback segments. We want all UNDO records to be in
- the non-system tablespaces. */
-
- do {
- rseg = trx_sys->rseg_array[i];
- ut_a(rseg == NULL || i == rseg->id);
-
- i = (rseg == NULL) ? 0 : i + 1;
-
- } while (rseg == NULL
- || (rseg->space == 0
- && n_tablespaces > 0
- && trx_sys->rseg_array[1] != NULL));
-
- return(rseg);
-}
-
-/****************************************************************//**
-Assign a read-only transaction a rollback-segment, if it is attempting
-to write to a TEMPORARY table. */
-UNIV_INTERN
-void
-trx_assign_rseg(
-/*============*/
- trx_t* trx) /*!< A read-only transaction that
- needs to be assigned a RBS. */
-{
- ut_a(trx->rseg == 0);
- ut_a(trx->read_only);
- ut_a(!srv_read_only_mode);
- ut_a(!trx_is_autocommit_non_locking(trx));
-
- trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
-}
-
-/****************************************************************//**
-Starts a transaction. */
-static
-void
-trx_start_low(
-/*==========*/
- trx_t* trx) /*!< in: transaction */
-{
- ut_ad(trx->rseg == NULL);
-
- ut_ad(trx->start_file != 0);
- ut_ad(trx->start_line != 0);
- ut_ad(!trx->is_recovered);
- ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
- ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
-
- /* Check whether it is an AUTOCOMMIT SELECT */
- trx->auto_commit = (trx->api_trx && trx->api_auto_commit)
- || thd_trx_is_auto_commit(trx->mysql_thd);
-
- trx->read_only =
- (trx->api_trx && !trx->read_write)
- || (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
- || srv_read_only_mode;
-
- if (!trx->auto_commit) {
- ++trx->will_lock;
- } else if (trx->will_lock == 0) {
- trx->read_only = TRUE;
- }
-
- if (!trx->read_only) {
- trx->rseg = trx_assign_rseg_low(
- srv_undo_logs, srv_undo_tablespaces);
- }
-
-#ifdef WITH_WSREP
- memset(&trx->xid, 0, sizeof(trx->xid));
- trx->xid.formatID = -1;
-#endif /* WITH_WSREP */
-
- /* The initial value for trx->no: TRX_ID_MAX is used in
- read_view_open_now: */
-
- trx->no = TRX_ID_MAX;
-
- ut_a(ib_vector_is_empty(trx->autoinc_locks));
- ut_a(ib_vector_is_empty(trx->lock.table_locks));
-
- mutex_enter(&trx_sys->mutex);
-
- /* If this transaction came from trx_allocate_for_mysql(),
- trx->in_mysql_trx_list would hold. In that case, the trx->state
- change must be protected by the trx_sys->mutex, so that
- lock_print_info_all_transactions() will have a consistent view. */
-
- trx->state = TRX_STATE_ACTIVE;
-
- trx->id = trx_sys_get_new_trx_id();
-
- /* Cache the state of fake_changes that transaction will use for
- lifetime. Any change in session/global fake_changes configuration during
- lifetime of transaction will not be honored by already started
- transaction. */
- trx->fake_changes = thd_fake_changes(trx->mysql_thd);
-
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_ro_trx_list);
-
- if (trx->read_only) {
-
- /* Note: The trx_sys_t::ro_trx_list doesn't really need to
- be ordered, we should exploit this using a list type that
- doesn't need a list wide lock to increase concurrency. */
-
- if (!trx_is_autocommit_non_locking(trx)) {
- UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
- ut_d(trx->in_ro_trx_list = TRUE);
- }
- } else {
-
- ut_ad(trx->rseg != NULL
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
-
- ut_ad(!trx_is_autocommit_non_locking(trx));
- UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
- ut_d(trx->in_rw_trx_list = TRUE);
-
-#ifdef UNIV_DEBUG
- if (trx->id > trx_sys->rw_max_trx_id) {
- trx_sys->rw_max_trx_id = trx->id;
- }
-#endif /* UNIV_DEBUG */
-
- trx_reserve_descriptor(trx);
- }
-
- ut_ad(trx_sys_validate_trx_list());
-
- mutex_exit(&trx_sys->mutex);
-
- trx->start_time = ut_time();
-
- trx->start_time_micro =
- trx->mysql_thd ? thd_query_start_micro(trx->mysql_thd) : 0;
-
- MONITOR_INC(MONITOR_TRX_ACTIVE);
-}
-
-/****************************************************************//**
-Set the transaction serialisation number. */
-static
-void
-trx_serialisation_number_get(
-/*=========================*/
- trx_t* trx) /*!< in: transaction */
-{
- trx_rseg_t* rseg;
-
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&rseg->mutex));
-
- mutex_enter(&trx_sys->mutex);
-
- trx->no = trx_sys_get_new_trx_id();
-
- if (UNIV_LIKELY(!trx->in_trx_serial_list)) {
-
- UT_LIST_ADD_LAST(trx_serial_list, trx_sys->trx_serial_list,
- trx);
-
- trx->in_trx_serial_list = true;
- }
-
- /* If the rollack segment is not empty then the
- new trx_t::no can't be less than any trx_t::no
- already in the rollback segment. User threads only
- produce events when a rollback segment is empty. */
-
- if (rseg->last_page_no == FIL_NULL) {
- void* ptr;
- rseg_queue_t rseg_queue;
-
- rseg_queue.rseg = rseg;
- rseg_queue.trx_no = trx->no;
-
- mutex_enter(&purge_sys->bh_mutex);
-
- /* This is to reduce the pressure on the trx_sys_t::mutex
- though in reality it should make very little (read no)
- difference because this code path is only taken when the
- rbs is empty. */
-
- mutex_exit(&trx_sys->mutex);
-
- ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
- ut_a(ptr);
-
- mutex_exit(&purge_sys->bh_mutex);
- } else {
- mutex_exit(&trx_sys->mutex);
- }
-}
-
-/****************************************************************//**
-Assign the transaction its history serialisation number and write the
-update UNDO log record to the assigned rollback segment. */
-static MY_ATTRIBUTE((nonnull))
-void
-trx_write_serialisation_history(
-/*============================*/
- trx_t* trx, /*!< in/out: transaction */
- mtr_t* mtr) /*!< in/out: mini-transaction */
-{
-#ifdef WITH_WSREP
- trx_sysf_t* sys_header;
-#endif /* WITH_WSREP */
- trx_rseg_t* rseg;
-
- rseg = trx->rseg;
-
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based domain, at the serialization point of the log sequence
- number lsn obtained below. */
-
- if (trx->update_undo != NULL) {
- page_t* undo_hdr_page;
- trx_undo_t* undo = trx->update_undo;
-
- /* We have to hold the rseg mutex because update
- log headers have to be put to the history list in the
- (serialisation) order of the UNDO trx number. This is
- required for the purge in-memory data structures too. */
-
- mutex_enter(&rseg->mutex);
-
- /* Assign the transaction serialisation number and also
- update the purge min binary heap if this is the first
- UNDO log being written to the assigned rollback segment. */
-
- trx_serialisation_number_get(trx);
-
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
-
- undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
-
- trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
- } else {
- mutex_enter(&rseg->mutex);
- }
-
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(trx->insert_undo, mtr);
- }
-
- mutex_exit(&rseg->mutex);
-
- MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
-
-#ifdef WITH_WSREP
- sys_header = trx_sysf_get(mtr);
- /* Update latest MySQL wsrep XID in trx sys header. */
- if (wsrep_is_wsrep_xid(&trx->xid))
- {
- trx_sys_update_wsrep_checkpoint(&trx->xid, sys_header, mtr);
- }
-#endif /* WITH_WSREP */
-
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name
- && trx->mysql_log_file_name[0] != '\0') {
-
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO,
-#ifdef WITH_WSREP
- sys_header,
-#endif /* WITH_WSREP */
- mtr);
-
- trx->mysql_log_file_name = NULL;
- }
-}
-
-/********************************************************************
-Finalize a transaction containing updates for a FTS table. */
-static MY_ATTRIBUTE((nonnull))
-void
-trx_finalize_for_fts_table(
-/*=======================*/
- fts_trx_table_t* ftt) /* in: FTS trx table */
-{
- fts_t* fts = ftt->table->fts;
- fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
-
- mutex_enter(&fts->bg_threads_mutex);
-
- if (fts->fts_status & BG_THREAD_STOP) {
- /* The table is about to be dropped, no use
- adding anything to its work queue. */
-
- mutex_exit(&fts->bg_threads_mutex);
- } else {
- mem_heap_t* heap;
- mutex_exit(&fts->bg_threads_mutex);
-
- ut_a(fts->add_wq);
-
- heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
-
- ib_wqueue_add(fts->add_wq, doc_ids, heap);
-
- /* fts_trx_table_t no longer owns the list. */
- ftt->added_doc_ids = NULL;
- }
-}
-
-/******************************************************************//**
-Finalize a transaction containing updates to FTS tables. */
-static MY_ATTRIBUTE((nonnull))
-void
-trx_finalize_for_fts(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- bool is_commit) /*!< in: true if the transaction was
- committed, false if it was rolled back. */
-{
- if (is_commit) {
- const ib_rbt_node_t* node;
- ib_rbt_t* tables;
- fts_savepoint_t* savepoint;
-
- savepoint = static_cast<fts_savepoint_t*>(
- ib_vector_last(trx->fts_trx->savepoints));
-
- tables = savepoint->tables;
-
- for (node = rbt_first(tables);
- node;
- node = rbt_next(tables, node)) {
- fts_trx_table_t** ftt;
-
- ftt = rbt_value(fts_trx_table_t*, node);
-
- if ((*ftt)->added_doc_ids) {
- trx_finalize_for_fts_table(*ftt);
- }
- }
- }
-
- fts_trx_free(trx->fts_trx);
- trx->fts_trx = NULL;
-}
-
-/**********************************************************************//**
-If required, flushes the log to disk based on the value of
-innodb_flush_log_at_trx_commit. */
-static
-void
-trx_flush_log_if_needed_low(
-/*========================*/
- lsn_t lsn, /*!< in: lsn up to which logs are to be
- flushed. */
- trx_t* trx) /*!< in: transaction */
-{
- ulint flush_log_at_trx_commit;
-
- flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
- ? thd_flush_log_at_trx_commit(NULL)
- : thd_flush_log_at_trx_commit(trx->mysql_thd);
-
- switch (flush_log_at_trx_commit) {
- case 0:
- /* Do nothing */
- break;
- case 1:
- case 3:
- /* Write the log and optionally flush it to disk */
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
- srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
- break;
- case 2:
- /* Write the log but do not flush it to disk */
- log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
-
- break;
- default:
- ut_error;
- }
-}
-
-/**********************************************************************//**
-If required, flushes the log to disk based on the value of
-innodb_flush_log_at_trx_commit. */
-static MY_ATTRIBUTE((nonnull))
-void
-trx_flush_log_if_needed(
-/*====================*/
- lsn_t lsn, /*!< in: lsn up to which logs are to be
- flushed. */
- trx_t* trx) /*!< in/out: transaction */
-{
- trx->op_info = "flushing log";
- trx_flush_log_if_needed_low(lsn, trx);
- trx->op_info = "";
-}
-
-/****************************************************************//**
-Commits a transaction in memory. */
-static MY_ATTRIBUTE((nonnull))
-void
-trx_commit_in_memory(
-/*=================*/
- trx_t* trx, /*!< in/out: transaction */
- lsn_t lsn) /*!< in: log sequence number of the mini-transaction
- commit of trx_write_serialisation_history(), or 0
- if the transaction did not modify anything */
-{
- trx->must_flush_log_later = FALSE;
-
- if (trx_is_autocommit_non_locking(trx)) {
- ut_ad(trx->read_only);
- ut_a(!trx->is_recovered);
- ut_ad(trx->rseg == NULL);
- ut_ad(!trx->in_ro_trx_list);
- ut_ad(!trx->in_rw_trx_list);
-
- /* Note: We are asserting without holding the lock mutex. But
- that is OK because this transaction is not waiting and cannot
- be rolled back and no new locks can (or should not) be added
- becuase it is flagged as a non-locking read-only transaction. */
-
- ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
-
- /* This state change is not protected by any mutex, therefore
- there is an inherent race here around state transition during
- printouts. We ignore this race for the sake of efficiency.
- However, the trx_sys_t::mutex will protect the trx_t instance
- and it cannot be removed from the mysql_trx_list and freed
- without first acquiring the trx_sys_t::mutex. */
-
- ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
-
- trx->state = TRX_STATE_NOT_STARTED;
-
- read_view_remove(trx->global_read_view, false);
-
- MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
- } else {
- lock_trx_release_locks(trx);
-
- /* Remove the transaction from the list of active
- transactions now that it no longer holds any user locks. */
-
- ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
-
- mutex_enter(&trx_sys->mutex);
-
- assert_trx_in_list(trx);
-
- if (trx->read_only) {
- UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
- ut_d(trx->in_ro_trx_list = FALSE);
- MONITOR_INC(MONITOR_TRX_RO_COMMIT);
- } else {
- UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
- ut_d(trx->in_rw_trx_list = FALSE);
- ut_ad(trx_sys->descr_n_used <=
- UT_LIST_GET_LEN(trx_sys->rw_trx_list));
- MONITOR_INC(MONITOR_TRX_RW_COMMIT);
- }
-
- /* If this transaction came from trx_allocate_for_mysql(),
- trx->in_mysql_trx_list would hold. In that case, the
- trx->state change must be protected by trx_sys->mutex, so that
- lock_print_info_all_transactions() will have a consistent
- view. */
-
- trx->state = TRX_STATE_NOT_STARTED;
-
- /* We already own the trx_sys_t::mutex, by doing it here we
- avoid a potential context switch later. */
- read_view_remove(trx->global_read_view, true);
-
- ut_ad(trx_sys_validate_trx_list());
-
- mutex_exit(&trx_sys->mutex);
- }
-
- if (trx->global_read_view != NULL) {
-
- trx->global_read_view = NULL;
- }
-
- trx->read_view = NULL;
-
- if (lsn) {
- ulint flush_log_at_trx_commit;
-
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
- }
-
- if (srv_use_global_flush_log_at_trx_commit) {
- flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
- } else {
- flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
- }
-
- /* NOTE that we could possibly make a group commit more
- efficient here: call os_thread_yield here to allow also other
- trxs to come to commit! */
-
- /*-------------------------------------*/
-
- /* Depending on the my.cnf options, we may now write the log
- buffer to the log files, making the transaction durable if
- the OS does not crash. We may also flush the log files to
- disk, making the transaction durable also at an OS crash or a
- power outage.
-
- The idea in InnoDB's group commit is that a group of
- transactions gather behind a trx doing a physical disk write
- to log files, and when that physical write has been completed,
- one of those transactions does a write which commits the whole
- group. Note that this group commit will only bring benefit if
- there are > 2 users in the database. Then at least 2 users can
- gather behind one doing the physical log write to disk.
-
- If we are calling trx_commit() under prepare_commit_mutex, we
- will delay possible log write and flush to a separate function
- trx_commit_complete_for_mysql(), which is only called when the
- thread has released the mutex. This is to make the
- group commit algorithm to work. Otherwise, the prepare_commit
- mutex would serialize all commits and prevent a group of
- transactions from gathering. */
-
- if (trx->flush_log_later) {
- /* Do nothing yet */
- trx->must_flush_log_later = TRUE;
- } else if (flush_log_at_trx_commit == 0
- || thd_requested_durability(trx->mysql_thd)
- == HA_IGNORE_DURABILITY) {
- /* Do nothing */
- } else {
- trx_flush_log_if_needed(lsn, trx);
- }
-
- trx->commit_lsn = lsn;
-
- /* Tell server some activity has happened, since the trx
- does changes something. Background utility threads like
- master thread, purge thread or page_cleaner thread might
- have some work to do. */
- srv_active_wake_master_thread();
- }
-
- /* undo_no is non-zero if we're doing the final commit. */
- bool not_rollback = trx->undo_no != 0;
- /* Free all savepoints, starting from the first. */
- trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
- trx_roll_savepoints_free(trx, savep);
-
- trx->rseg = NULL;
- trx->undo_no = 0;
- trx->last_sql_stat_start.least_undo_no = 0;
-
- trx->ddl = false;
-#ifdef UNIV_DEBUG
- ut_ad(trx->start_file != 0);
- ut_ad(trx->start_line != 0);
- trx->start_file = 0;
- trx->start_line = 0;
-#endif /* UNIV_DEBUG */
-
- trx->will_lock = 0;
- trx->read_only = FALSE;
- trx->auto_commit = FALSE;
-
- if (trx->fts_trx) {
- trx_finalize_for_fts(trx, not_rollback);
- }
-
- ut_ad(trx->lock.wait_thr == NULL);
- ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
- ut_ad(!trx->in_ro_trx_list);
- ut_ad(!trx->in_rw_trx_list);
-
-#ifdef WITH_WSREP
- if (trx->mysql_thd && wsrep_on(trx->mysql_thd)) {
- trx->lock.was_chosen_as_deadlock_victim = FALSE;
- }
-#endif
- trx->dict_operation = TRX_DICT_OP_NONE;
-
- trx->error_state = DB_SUCCESS;
-
- /* trx->in_mysql_trx_list would hold between
- trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
- hold for recovered transactions or system transactions. */
-}
-
-/****************************************************************//**
-Commits a transaction and a mini-transaction. */
-UNIV_INTERN
-void
-trx_commit_low(
-/*===========*/
- trx_t* trx, /*!< in/out: transaction */
- mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
- or NULL if trx made no modifications */
-{
- lsn_t lsn;
-
- assert_trx_nonlocking_or_in_list(trx);
- ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
- ut_ad(!mtr || mtr->state == MTR_ACTIVE);
- ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
-
- /* undo_no is non-zero if we're doing the final commit. */
- if (trx->fts_trx && trx->undo_no != 0) {
- dberr_t error;
-
- ut_a(!trx_is_autocommit_non_locking(trx));
-
- error = fts_commit(trx);
-
- /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
- instead of dying. This is a possible scenario if there
- is a crash between insert to DELETED table committing
- and transaction committing. The fix would be able to
- return error from this function */
- if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
- /* FTS-FIXME: once we can return values from this
- function, we should do so and signal an error
- instead of just dying. */
-
- ut_error;
- }
- }
-
- if (mtr) {
- trx_write_serialisation_history(trx, mtr);
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
-
- /*--------------*/
- mtr_commit(mtr);
- /*--------------*/
- lsn = mtr->end_lsn;
- } else {
- lsn = 0;
- }
-
- trx_commit_in_memory(trx, lsn);
-}
-
-/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
-void
-trx_commit(
-/*=======*/
- trx_t* trx) /*!< in/out: transaction */
-{
- mtr_t local_mtr;
- mtr_t* mtr;
-
- if (trx->insert_undo || trx->update_undo) {
- mtr = &local_mtr;
- mtr_start(mtr);
- } else {
- mtr = NULL;
- }
-
- trx_commit_low(trx, mtr);
-}
-
-/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-UNIV_INTERN
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx) /*!< in: transaction */
-{
- ut_ad(trx->is_recovered);
-
- if (trx->insert_undo != NULL) {
-
- trx_undo_insert_cleanup(trx);
- }
-
- trx->rseg = NULL;
- trx->undo_no = 0;
- trx->last_sql_stat_start.least_undo_no = 0;
-
- mutex_enter(&trx_sys->mutex);
-
- ut_a(!trx->read_only);
-
- UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
- ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
-
- assert_trx_in_rw_list(trx);
- ut_d(trx->in_rw_trx_list = FALSE);
-
- trx->state = TRX_STATE_NOT_STARTED;
- trx_release_descriptor(trx);
-
- mutex_exit(&trx_sys->mutex);
-
- /* Change the transaction state without mutex protection, now
- that it no longer is in the trx_list. Recovered transactions
- are never placed in the mysql_trx_list. */
- ut_ad(trx->is_recovered);
- ut_ad(!trx->in_ro_trx_list);
- ut_ad(!trx->in_rw_trx_list);
- ut_ad(!trx->in_mysql_trx_list);
-}
-
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction.
-@return consistent read view */
-UNIV_INTERN
-read_view_t*
-trx_assign_read_view(
-/*=================*/
- trx_t* trx) /*!< in: active transaction */
-{
- ut_ad(trx->state == TRX_STATE_ACTIVE);
-
- if (trx->read_view != NULL) {
- return(trx->read_view);
- }
-
- trx->read_view = read_view_open_now(trx->id, trx->prebuilt_view);
- trx->global_read_view = trx->read_view;
-
- return(trx->read_view);
-}
-
-/********************************************************************//**
-Clones the read view from another transaction. All consistent reads within
-the receiver transaction will get the same read view as the donor transaction
-@return read view clone */
-UNIV_INTERN
-read_view_t*
-trx_clone_read_view(
-/*================*/
- trx_t* trx, /*!< in: receiver transaction */
- trx_t* from_trx) /*!< in: donor transaction */
-{
- ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
- ut_ad(trx_mutex_own(from_trx));
- ut_ad(trx->read_view == NULL);
-
- if (from_trx->state != TRX_STATE_ACTIVE ||
- from_trx->read_view == NULL) {
-
- return(NULL);
- }
-
- trx->read_view = read_view_clone(from_trx->read_view,
- trx->prebuilt_view);
-
- read_view_add(trx->read_view);
-
- trx->global_read_view = trx->read_view;
-
- return(trx->read_view);
-}
-
-/****************************************************************//**
-Prepares a transaction for commit/rollback. */
-UNIV_INTERN
-void
-trx_commit_or_rollback_prepare(
-/*===========================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- /* We are reading trx->state without holding trx_sys->mutex
- here, because the commit or rollback should be invoked for a
- running (or recovered prepared) transaction that is associated
- with the current thread. */
-
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
-#ifdef WITH_WSREP
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
-#endif /* WITH_WSREP */
- trx_start_low(trx);
- /* fall through */
- case TRX_STATE_ACTIVE:
- case TRX_STATE_PREPARED:
- /* If the trx is in a lock wait state, moves the waiting
- query thread to the suspended state */
-
- if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
-
- ulint sec;
- ulint ms;
- ib_uint64_t now;
-
- ut_a(trx->lock.wait_thr != NULL);
- trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
- trx->lock.wait_thr = NULL;
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- ut_usectime(&sec, &ms);
- now = (ib_uint64_t)sec * 1000000 + ms;
- trx->lock_que_wait_timer
- += (ulint)
- (now - trx->lock_que_wait_ustarted);
- }
-
- trx->lock.que_state = TRX_QUE_RUNNING;
- }
-
- ut_a(trx->lock.n_active_thrs == 1);
- return;
- case TRX_STATE_COMMITTED_IN_MEMORY:
- break;
- }
-
- ut_error;
-}
-
-/*********************************************************************//**
-Creates a commit command node struct.
-@return own: commit node struct */
-UNIV_INTERN
-commit_node_t*
-trx_commit_node_create(
-/*===================*/
- mem_heap_t* heap) /*!< in: mem heap where created */
-{
- commit_node_t* node;
-
- node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
- node->common.type = QUE_NODE_COMMIT;
- node->state = COMMIT_NODE_SEND;
-
- return(node);
-}
-
-/***********************************************************//**
-Performs an execution step for a commit type node in a query graph.
-@return query thread to run next, or NULL */
-UNIV_INTERN
-que_thr_t*
-trx_commit_step(
-/*============*/
- que_thr_t* thr) /*!< in: query thread */
-{
- commit_node_t* node;
-
- node = static_cast<commit_node_t*>(thr->run_node);
-
- ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
-
- if (thr->prev_node == que_node_get_parent(node)) {
- node->state = COMMIT_NODE_SEND;
- }
-
- if (node->state == COMMIT_NODE_SEND) {
- trx_t* trx;
-
- node->state = COMMIT_NODE_WAIT;
-
- trx = thr_get_trx(thr);
-
- ut_a(trx->lock.wait_thr == NULL);
- ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
-
- trx_commit_or_rollback_prepare(trx);
-
- trx->lock.que_state = TRX_QUE_COMMITTING;
-
- trx_commit(trx);
-
- ut_ad(trx->lock.wait_thr == NULL);
-
- trx->lock.que_state = TRX_QUE_RUNNING;
-
- thr = NULL;
- } else {
- ut_ad(node->state == COMMIT_NODE_WAIT);
-
- node->state = COMMIT_NODE_SEND;
-
- thr->run_node = que_node_get_parent(node);
- }
-
- return(thr);
-}
-
-/**********************************************************************//**
-Does the transaction commit for MySQL.
-@return DB_SUCCESS or error number */
-UNIV_INTERN
-dberr_t
-trx_commit_for_mysql(
-/*=================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- /* Because we do not do the commit by sending an Innobase
- sig to the transaction, we must here make sure that trx has been
- started. */
-
- ut_a(trx);
-
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
- /* Update the info whether we should skip XA steps that eat
- CPU time.
-
- For the duration of the transaction trx->support_xa is
- not reread from thd so any changes in the value take
- effect in the next transaction. This is to avoid a
- scenario where some undo log records generated by a
- transaction contain XA information and other undo log
- records, generated by the same transaction do not. */
- trx->support_xa = thd_supports_xa(trx->mysql_thd);
-
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
-
- trx_start_low(trx);
- /* fall through */
- case TRX_STATE_ACTIVE:
- case TRX_STATE_PREPARED:
- trx->op_info = "committing";
- trx_commit(trx);
- MONITOR_DEC(MONITOR_TRX_ACTIVE);
- trx->op_info = "";
- return(DB_SUCCESS);
- case TRX_STATE_COMMITTED_IN_MEMORY:
- break;
- }
- ut_error;
- return(DB_CORRUPTION);
-}
-
-/**********************************************************************//**
-If required, flushes the log to disk if we called trx_commit_for_mysql()
-with trx->flush_log_later == TRUE. */
-UNIV_INTERN
-void
-trx_commit_complete_for_mysql(
-/*==========================*/
- trx_t* trx) /*!< in/out: transaction */
-{
- ut_a(trx);
-
- if (!trx->must_flush_log_later
- || thd_requested_durability(trx->mysql_thd)
- == HA_IGNORE_DURABILITY) {
- return;
- }
-
- ulint flush_log_at_trx_commit;
-
- flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
- ? thd_flush_log_at_trx_commit(NULL)
- : thd_flush_log_at_trx_commit(trx->mysql_thd);
-
- if (flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
- return;
- }
-
- trx_flush_log_if_needed(trx->commit_lsn, trx);
-
- trx->must_flush_log_later = FALSE;
-}
-
-/**********************************************************************//**
-Marks the latest SQL statement ended. */
-UNIV_INTERN
-void
-trx_mark_sql_stat_end(
-/*==================*/
- trx_t* trx) /*!< in: trx handle */
-{
- ut_a(trx);
-
- switch (trx->state) {
- case TRX_STATE_PREPARED:
- case TRX_STATE_COMMITTED_IN_MEMORY:
- break;
- case TRX_STATE_NOT_STARTED:
- trx->undo_no = 0;
- /* fall through */
- case TRX_STATE_ACTIVE:
- trx->last_sql_stat_start.least_undo_no = trx->undo_no;
-
- if (trx->fts_trx) {
- fts_savepoint_laststmt_refresh(trx);
- }
-
- return;
- }
-
- ut_error;
-}
-
-/**********************************************************************//**
-Prints info about a transaction.
-Caller must hold trx_sys->mutex. */
-UNIV_INTERN
-void
-trx_print_low(
-/*==========*/
- FILE* f,
- /*!< in: output stream */
- const trx_t* trx,
- /*!< in: transaction */
- ulint max_query_len,
- /*!< in: max query length to print,
- or 0 to use the default max length */
- ulint n_rec_locks,
- /*!< in: lock_number_of_rows_locked(&trx->lock) */
- ulint n_trx_locks,
- /*!< in: length of trx->lock.trx_locks */
- ulint heap_size)
- /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
-{
- ibool newline;
- const char* op_info;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
-
- /* trx->state cannot change from or to NOT_STARTED while we
- are holding the trx_sys->mutex. It may change from ACTIVE to
- PREPARED or COMMITTED. */
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
- fputs(", not started", f);
- goto state_ok;
- case TRX_STATE_ACTIVE:
- fprintf(f, ", ACTIVE %lu sec",
- (ulong) difftime(time(NULL), trx->start_time));
- goto state_ok;
- case TRX_STATE_PREPARED:
- fprintf(f, ", ACTIVE (PREPARED) %lu sec",
- (ulong) difftime(time(NULL), trx->start_time));
- goto state_ok;
- case TRX_STATE_COMMITTED_IN_MEMORY:
- fputs(", COMMITTED IN MEMORY", f);
- goto state_ok;
- }
- fprintf(f, ", state %lu", (ulong) trx->state);
- ut_ad(0);
-state_ok:
-
- /* prevent a race condition */
- op_info = trx->op_info;
-
- if (*op_info) {
- putc(' ', f);
- fputs(op_info, f);
- }
-
- if (trx->is_recovered) {
- fputs(" recovered trx", f);
- }
-
- if (trx->declared_to_be_inside_innodb) {
- fprintf(f, ", thread declared inside InnoDB %lu",
- (ulong) trx->n_tickets_to_enter_innodb);
- }
-
- putc('\n', f);
-
- if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
- fprintf(f, "mysql tables in use %lu, locked %lu\n",
- (ulong) trx->n_mysql_tables_in_use,
- (ulong) trx->mysql_n_tables_locked);
- }
-
- newline = TRUE;
-
- /* trx->lock.que_state of an ACTIVE transaction may change
- while we are not holding trx->mutex. We perform a dirty read
- for performance reasons. */
-
- switch (trx->lock.que_state) {
- case TRX_QUE_RUNNING:
- newline = FALSE; break;
- case TRX_QUE_LOCK_WAIT:
- fputs("LOCK WAIT ", f); break;
- case TRX_QUE_ROLLING_BACK:
- fputs("ROLLING BACK ", f); break;
- case TRX_QUE_COMMITTING:
- fputs("COMMITTING ", f); break;
- default:
- fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
- }
-
- if (n_trx_locks > 0 || heap_size > 400) {
- newline = TRUE;
-
- fprintf(f, "%lu lock struct(s), heap size %lu,"
- " %lu row lock(s)",
- (ulong) n_trx_locks,
- (ulong) heap_size,
- (ulong) n_rec_locks);
- }
-
- if (trx->has_search_latch) {
- newline = TRUE;
- fputs(", holds adaptive hash latch", f);
- }
-
- if (trx->undo_no != 0) {
- newline = TRUE;
- fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
- }
-
- if (newline) {
- putc('\n', f);
- }
-
- if (trx->mysql_thd != NULL) {
- innobase_mysql_print_thd(
- f, trx->mysql_thd, static_cast<uint>(max_query_len));
- }
-}
-
-/**********************************************************************//**
-Prints info about a transaction.
-The caller must hold lock_sys->mutex and trx_sys->mutex.
-When possible, use trx_print() instead. */
-UNIV_INTERN
-void
-trx_print_latched(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
-{
- ut_ad(lock_mutex_own());
- ut_ad(mutex_own(&trx_sys->mutex));
-
- trx_print_low(f, trx, max_query_len,
- lock_number_of_rows_locked(&trx->lock),
- UT_LIST_GET_LEN(trx->lock.trx_locks),
- mem_heap_get_size(trx->lock.lock_heap));
-}
-
-#ifdef WITH_WSREP
-/**********************************************************************//**
-Prints info about a transaction.
-Transaction information may be retrieved without having trx_sys->mutex acquired
-so it may not be completely accurate. The caller must own lock_sys->mutex
-and the trx must have some locks to make sure that it does not escape
-without locking lock_sys->mutex. */
-UNIV_INTERN
-void
-wsrep_trx_print_locking(
-/*==========*/
- FILE* f,
- /*!< in: output stream */
- const trx_t* trx,
- /*!< in: transaction */
- ulint max_query_len)
- /*!< in: max query length to print,
- or 0 to use the default max length */
-{
- ibool newline;
- const char* op_info;
-
- ut_ad(lock_mutex_own());
- ut_ad(trx->lock.trx_locks.count > 0);
-
- fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
-
- /* trx->state may change since trx_sys->mutex is not required */
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
- fputs(", not started", f);
- goto state_ok;
- case TRX_STATE_ACTIVE:
- fprintf(f, ", ACTIVE %lu sec",
- (ulong) difftime(time(NULL), trx->start_time));
- goto state_ok;
- case TRX_STATE_PREPARED:
- fprintf(f, ", ACTIVE (PREPARED) %lu sec",
- (ulong) difftime(time(NULL), trx->start_time));
- goto state_ok;
- case TRX_STATE_COMMITTED_IN_MEMORY:
- fputs(", COMMITTED IN MEMORY", f);
- goto state_ok;
- }
- fprintf(f, ", state %lu", (ulong) trx->state);
- ut_ad(0);
-state_ok:
-
- /* prevent a race condition */
- op_info = trx->op_info;
-
- if (*op_info) {
- putc(' ', f);
- fputs(op_info, f);
- }
-
- if (trx->is_recovered) {
- fputs(" recovered trx", f);
- }
-
- if (trx->declared_to_be_inside_innodb) {
- fprintf(f, ", thread declared inside InnoDB %lu",
- (ulong) trx->n_tickets_to_enter_innodb);
- }
-
- putc('\n', f);
-
- if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
- fprintf(f, "mysql tables in use %lu, locked %lu\n",
- (ulong) trx->n_mysql_tables_in_use,
- (ulong) trx->mysql_n_tables_locked);
- }
-
- newline = TRUE;
-
- /* trx->lock.que_state of an ACTIVE transaction may change
- while we are not holding trx->mutex. We perform a dirty read
- for performance reasons. */
-
- switch (trx->lock.que_state) {
- case TRX_QUE_RUNNING:
- newline = FALSE; break;
- case TRX_QUE_LOCK_WAIT:
- fputs("LOCK WAIT ", f); break;
- case TRX_QUE_ROLLING_BACK:
- fputs("ROLLING BACK ", f); break;
- case TRX_QUE_COMMITTING:
- fputs("COMMITTING ", f); break;
- default:
- fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
- }
-
- if (trx->has_search_latch) {
- newline = TRUE;
- fputs(", holds adaptive hash latch", f);
- }
-
- if (trx->undo_no != 0) {
- newline = TRUE;
- fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
- }
-
- if (newline) {
- putc('\n', f);
- }
-
- if (trx->mysql_thd != NULL) {
- innobase_mysql_print_thd(
- f, trx->mysql_thd, static_cast<uint>(max_query_len));
- }
-}
-#endif /* WITH_WSREP */
-
-/**********************************************************************//**
-Prints info about a transaction.
-Acquires and releases lock_sys->mutex and trx_sys->mutex. */
-UNIV_INTERN
-void
-trx_print(
-/*======*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
-{
- ulint n_rec_locks;
- ulint n_trx_locks;
- ulint heap_size;
-
- lock_mutex_enter();
- n_rec_locks = lock_number_of_rows_locked(&trx->lock);
- n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
- heap_size = mem_heap_get_size(trx->lock.lock_heap);
- lock_mutex_exit();
-
- mutex_enter(&trx_sys->mutex);
- trx_print_low(f, trx, max_query_len,
- n_rec_locks, n_trx_locks, heap_size);
- mutex_exit(&trx_sys->mutex);
-}
-
-#ifdef UNIV_DEBUG
-/**********************************************************************//**
-Asserts that a transaction has been started.
-The caller must hold trx_sys->mutex.
-@return TRUE if started */
-UNIV_INTERN
-ibool
-trx_assert_started(
-/*===============*/
- const trx_t* trx) /*!< in: transaction */
-{
- ut_ad(mutex_own(&trx_sys->mutex));
-
- /* Non-locking autocommits should not hold any locks and this
- function is only called from the locking code. */
- assert_trx_in_list(trx);
-
- /* trx->state can change from or to NOT_STARTED while we are holding
- trx_sys->mutex for non-locking autocommit selects but not for other
- types of transactions. It may change from ACTIVE to PREPARED. Unless
- we are holding lock_sys->mutex, it may also change to COMMITTED. */
-
- switch (trx->state) {
- case TRX_STATE_PREPARED:
- return(TRUE);
-
- case TRX_STATE_ACTIVE:
- case TRX_STATE_COMMITTED_IN_MEMORY:
- return(TRUE);
-
- case TRX_STATE_NOT_STARTED:
- break;
- }
-
- ut_error;
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-
-/*******************************************************************//**
-Compares the "weight" (or size) of two transactions. The heavier the weight,
-the more reluctant we will be to choose the transaction as a deadlock victim.
-@return TRUE if weight(a) >= weight(b) */
-UNIV_INTERN
-ibool
-trx_weight_ge(
-/*==========*/
- const trx_t* a, /*!< in: the first transaction to be compared */
- const trx_t* b) /*!< in: the second transaction to be compared */
-{
- int pref;
-
- /* First ask the upper server layer if it has any preference for which
- to prefer as a deadlock victim. */
- pref= thd_deadlock_victim_preference(a->mysql_thd, b->mysql_thd);
- if (pref < 0) {
- return FALSE;
- } else if (pref > 0) {
- return TRUE;
- }
-
- /* Upper server layer had no preference, we fall back to comparing the
- number of altered/locked rows. */
-
-#if 0
- fprintf(stderr,
- "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
- __func__,
- a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
- b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
-#endif
-
- return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
-}
-
-/****************************************************************//**
-Prepares a transaction. */
-static
-void
-trx_prepare(
-/*========*/
- trx_t* trx) /*!< in/out: transaction */
-{
- trx_rseg_t* rseg;
- lsn_t lsn;
- mtr_t mtr;
-
- rseg = trx->rseg;
- /* Only fresh user transactions can be prepared.
- Recovered transactions cannot. */
- ut_a(!trx->is_recovered);
-
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
-
- mtr_start(&mtr);
-
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to TRX_UNDO_PREPARED: these modifications to the file data
- structure define the transaction as prepared in the
- file-based world, at the serialization point of lsn. */
-
- mutex_enter(&rseg->mutex);
-
- if (trx->insert_undo != NULL) {
-
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction prepare for this transaction. */
-
- trx_undo_set_state_at_prepare(trx, trx->insert_undo,
- &mtr);
- }
-
- if (trx->update_undo) {
- trx_undo_set_state_at_prepare(
- trx, trx->update_undo, &mtr);
- }
-
- mutex_exit(&rseg->mutex);
-
- /*--------------*/
- mtr_commit(&mtr); /* This mtr commit makes the
- transaction prepared in the file-based
- world */
- /*--------------*/
- lsn = mtr.end_lsn;
- ut_ad(lsn);
- } else {
- lsn = 0;
- }
-
- /*--------------------------------------*/
- ut_a(trx->state == TRX_STATE_ACTIVE);
- mutex_enter(&trx_sys->mutex);
- trx->state = TRX_STATE_PREPARED;
- trx_sys->n_prepared_trx++;
- mutex_exit(&trx_sys->mutex);
- /*--------------------------------------*/
-
- if (lsn) {
- /* Depending on the my.cnf options, we may now write the log
- buffer to the log files, making the prepared state of the
- transaction durable if the OS does not crash. We may also
- flush the log files to disk, making the prepared state of the
- transaction durable also at an OS crash or a power outage.
-
- The idea in InnoDB's group prepare is that a group of
- transactions gather behind a trx doing a physical disk write
- to log files, and when that physical write has been completed,
- one of those transactions does a write which prepares the whole
- group. Note that this group prepare will only bring benefit if
- there are > 2 users in the database. Then at least 2 users can
- gather behind one doing the physical log write to disk.
-
- TODO: find out if MySQL holds some mutex when calling this.
- That would spoil our group prepare algorithm. */
-
- trx_flush_log_if_needed(lsn, trx);
- }
-}
-
-/**********************************************************************//**
-Does the transaction prepare for MySQL. */
-UNIV_INTERN
-void
-trx_prepare_for_mysql(
-/*==================*/
- trx_t* trx) /*!< in/out: trx handle */
-{
- trx_start_if_not_started_xa(trx);
-
- trx->op_info = "preparing";
-
- trx_prepare(trx);
-
- trx->op_info = "";
-}
-
-/**********************************************************************//**
-This function is used to find number of prepared transactions and
-their transaction objects for a recovery.
-@return number of prepared transactions stored in xid_list */
-UNIV_INTERN
-int
-trx_recover_for_mysql(
-/*==================*/
- XID* xid_list, /*!< in/out: prepared transactions */
- ulint len) /*!< in: number of slots in xid_list */
-{
- const trx_t* trx;
- ulint count = 0;
-
- ut_ad(xid_list);
- ut_ad(len);
-
- /* We should set those transactions which are in the prepared state
- to the xid_list */
-
- mutex_enter(&trx_sys->mutex);
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- assert_trx_in_rw_list(trx);
-
- /* The state of a read-write transaction cannot change
- from or to NOT_STARTED while we are holding the
- trx_sys->mutex. It may change to PREPARED, but not if
- trx->is_recovered. It may also change to COMMITTED. */
- if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
- xid_list[count] = trx->xid;
-
- if (count == 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Starting recovery for"
- " XA transactions...\n");
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction " TRX_ID_FMT " in"
- " prepared state after recovery\n",
- trx->id);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Transaction contains changes"
- " to " TRX_ID_FMT " rows\n",
- trx->undo_no);
-
- count++;
-
- if (count == len) {
- break;
- }
- }
- }
-
- mutex_exit(&trx_sys->mutex);
-
- if (count > 0){
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: %d transactions in prepared state"
- " after recovery\n",
- int (count));
- }
-
- return(int (count));
-}
-
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-trx_t*
-trx_get_trx_by_xid_low(
-/*===================*/
- const XID* xid) /*!< in: X/Open XA transaction
- identifier */
-{
- trx_t* trx;
-
- ut_ad(mutex_own(&trx_sys->mutex));
-
- for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
- trx != NULL;
- trx = UT_LIST_GET_NEXT(trx_list, trx)) {
-
- assert_trx_in_rw_list(trx);
-
- /* Compare two X/Open XA transaction id's: their
- length should be the same and binary comparison
- of gtrid_length+bqual_length bytes should be
- the same */
-
- if (trx->is_recovered
- && trx_state_eq(trx, TRX_STATE_PREPARED)
- && xid->gtrid_length == trx->xid.gtrid_length
- && xid->bqual_length == trx->xid.bqual_length
- && memcmp(xid->data, trx->xid.data,
- xid->gtrid_length + xid->bqual_length) == 0) {
-
- /* Invalidate the XID, so that subsequent calls
- will not find it. */
- memset(&trx->xid, 0, sizeof(trx->xid));
- trx->xid.formatID = -1;
- break;
- }
- }
-
- return(trx);
-}
-
-/*******************************************************************//**
-This function is used to find one X/Open XA distributed transaction
-which is in the prepared state
-@return trx or NULL; on match, the trx->xid will be invalidated;
-note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
-UNIV_INTERN
-trx_t*
-trx_get_trx_by_xid(
-/*===============*/
- const XID* xid) /*!< in: X/Open XA transaction identifier */
-{
- trx_t* trx;
-
- if (xid == NULL) {
-
- return(NULL);
- }
-
- mutex_enter(&trx_sys->mutex);
-
- /* Recovered/Resurrected transactions are always only on the
- trx_sys_t::rw_trx_list. */
- trx = trx_get_trx_by_xid_low(xid);
-
- mutex_exit(&trx_sys->mutex);
-
- return(trx);
-}
-
-/*************************************************************//**
-Starts the transaction if it is not yet started. */
-UNIV_INTERN
-void
-trx_start_if_not_started_xa_low(
-/*============================*/
- trx_t* trx) /*!< in: transaction */
-{
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
-
- /* Update the info whether we should skip XA steps
- that eat CPU time.
-
- For the duration of the transaction trx->support_xa is
- not reread from thd so any changes in the value take
- effect in the next transaction. This is to avoid a
- scenario where some undo generated by a transaction,
- has XA stuff, and other undo, generated by the same
- transaction, doesn't. */
- trx->support_xa = thd_supports_xa(trx->mysql_thd);
-
- trx_start_low(trx);
- /* fall through */
- case TRX_STATE_ACTIVE:
- return;
- case TRX_STATE_PREPARED:
- case TRX_STATE_COMMITTED_IN_MEMORY:
- break;
- }
-
- ut_error;
-}
-
-/*************************************************************//**
-Starts the transaction if it is not yet started. */
-UNIV_INTERN
-void
-trx_start_if_not_started_low(
-/*=========================*/
- trx_t* trx) /*!< in: transaction */
-{
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
-#ifdef WITH_WSREP
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
-#endif /* WITH_WSREP */
- trx_start_low(trx);
- /* fall through */
- case TRX_STATE_ACTIVE:
- return;
- case TRX_STATE_PREPARED:
- case TRX_STATE_COMMITTED_IN_MEMORY:
- break;
- }
-
- ut_error;
-}
-
-/*************************************************************//**
-Starts the transaction for a DDL operation. */
-UNIV_INTERN
-void
-trx_start_for_ddl_low(
-/*==================*/
- trx_t* trx, /*!< in/out: transaction */
- trx_dict_op_t op) /*!< in: dictionary operation type */
-{
- switch (trx->state) {
- case TRX_STATE_NOT_STARTED:
- /* Flag this transaction as a dictionary operation, so that
- the data dictionary will be locked in crash recovery. */
-
- trx_set_dict_operation(trx, op);
-
- /* Ensure it is not flagged as an auto-commit-non-locking
- transation. */
- trx->will_lock = 1;
-
- trx->ddl = true;
-
-#ifdef WITH_WSREP
- ut_d(trx->start_file = __FILE__);
- ut_d(trx->start_line = __LINE__);
-#endif /* WITH_WSREP */
- trx_start_low(trx);
- return;
-
- case TRX_STATE_ACTIVE:
- /* We have this start if not started idiom, therefore we
- can't add stronger checks here. */
- trx->ddl = true;
-
- ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
- ut_ad(trx->will_lock > 0);
- return;
- case TRX_STATE_PREPARED:
- case TRX_STATE_COMMITTED_IN_MEMORY:
- break;
- }
-
- ut_error;
-}
diff --git a/storage/xtradb/trx/trx0undo.cc b/storage/xtradb/trx/trx0undo.cc
deleted file mode 100644
index 220589dd9ff..00000000000
--- a/storage/xtradb/trx/trx0undo.cc
+++ /dev/null
@@ -1,2051 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file trx/trx0undo.cc
-Transaction undo log
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0undo.h"
-
-#ifdef UNIV_NONINL
-#include "trx0undo.ic"
-#endif
-
-#include "fsp0fsp.h"
-#ifndef UNIV_HOTBACKUP
-#include "mach0data.h"
-#include "mtr0log.h"
-#include "trx0rseg.h"
-#include "trx0trx.h"
-#include "srv0srv.h"
-#include "srv0start.h"
-#include "trx0rec.h"
-#include "trx0purge.h"
-#include "srv0mon.h"
-
-/* How should the old versions in the history list be managed?
- ----------------------------------------------------------
-If each transaction is given a whole page for its update undo log, file
-space consumption can be 10 times higher than necessary. Therefore,
-partly filled update undo log pages should be reusable. But then there
-is no way individual pages can be ordered so that the ordering agrees
-with the serialization numbers of the transactions on the pages. Thus,
-the history list must be formed of undo logs, not their header pages as
-it was in the old implementation.
- However, on a single header page the transactions are placed in
-the order of their serialization numbers. As old versions are purged, we
-may free the page when the last transaction on the page has been purged.
- A problem is that the purge has to go through the transactions
-in the serialization order. This means that we have to look through all
-rollback segments for the one that has the smallest transaction number
-in its history list.
- When should we do a purge? A purge is necessary when space is
-running out in any of the rollback segments. Then we may have to purge
-also old version which might be needed by some consistent read. How do
-we trigger the start of a purge? When a transaction writes to an undo log,
-it may notice that the space is running out. When a read view is closed,
-it may make some history superfluous. The server can have an utility which
-periodically checks if it can purge some history.
- In a parallellized purge we have the problem that a query thread
-can remove a delete marked clustered index record before another query
-thread has processed an earlier version of the record, which cannot then
-be done because the row cannot be constructed from the clustered index
-record. To avoid this problem, we will store in the update and delete mark
-undo record also the columns necessary to construct the secondary index
-entries which are modified.
- We can latch the stack of versions of a single clustered index record
-by taking a latch on the clustered index page. As long as the latch is held,
-no new versions can be added and no versions removed by undo. But, a purge
-can still remove old versions from the bottom of the stack. */
-
-/* How to protect rollback segments, undo logs, and history lists with
- -------------------------------------------------------------------
-latches?
--------
-The contention of the trx_sys_t::mutex should be minimized. When a transaction
-does its first insert or modify in an index, an undo log is assigned for it.
-Then we must have an x-latch to the rollback segment header.
- When the transaction does more modifys or rolls back, the undo log is
-protected with undo_mutex in the transaction.
- When the transaction commits, its insert undo log is either reset and
-cached for a fast reuse, or freed. In these cases we must have an x-latch on
-the rollback segment page. The update undo log is put to the history list. If
-it is not suitable for reuse, its slot in the rollback segment is reset. In
-both cases, an x-latch must be acquired on the rollback segment.
- The purge operation steps through the history list without modifying
-it until a truncate operation occurs, which can remove undo logs from the end
-of the list and release undo log segments. In stepping through the list,
-s-latches on the undo log pages are enough, but in a truncate, x-latches must
-be obtained on the rollback segment and individual pages. */
-#endif /* !UNIV_HOTBACKUP */
-
-/********************************************************************//**
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
- page_t* undo_page, /*!< in: undo log segment page */
- ulint type, /*!< in: undo log segment type */
- mtr_t* mtr); /*!< in: mtr */
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Creates and initializes an undo log memory object.
-@return own: the undo log memory object */
-static
-trx_undo_t*
-trx_undo_mem_create(
-/*================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint id, /*!< in: slot index within rseg */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open XA transaction identification*/
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header byte offset on page */
-#endif /* !UNIV_HOTBACKUP */
-/***************************************************************//**
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function!
-@return undo log header byte offset on page */
-static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
- page_t* undo_page, /*!< in/out: insert undo log segment
- header page, x-latched */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr); /*!< in: mtr */
-/**********************************************************************//**
-If an update undo log can be discarded immediately, this function frees the
-space, resetting the page to the proper state for caching. */
-static
-void
-trx_undo_discard_latest_update_undo(
-/*================================*/
- page_t* undo_page, /*!< in: header page of an undo log of size 1 */
- mtr_t* mtr); /*!< in: mtr */
-
-#ifndef UNIV_HOTBACKUP
-/***********************************************************************//**
-Gets the previous record in an undo log from the previous page.
-@return undo log record, the page s-latched, NULL if none */
-static
-trx_undo_rec_t*
-trx_undo_get_prev_rec_from_prev_page(
-/*=================================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- bool shared, /*!< in: true=S-latch, false=X-latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- ulint prev_page_no;
- page_t* prev_page;
- page_t* undo_page;
-
- undo_page = page_align(rec);
-
- prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_NODE, mtr)
- .page;
-
- if (prev_page_no == FIL_NULL) {
-
- return(NULL);
- }
-
- space = page_get_space_id(undo_page);
- zip_size = fil_space_get_zip_size(space);
-
- buf_block_t* block = buf_page_get(space, zip_size, prev_page_no,
- shared ? RW_S_LATCH : RW_X_LATCH,
- mtr);
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- prev_page = buf_block_get_frame(block);
-
- return(trx_undo_page_get_last_rec(prev_page, page_no, offset));
-}
-
-/***********************************************************************//**
-Gets the previous record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_prev_rec(
-/*==================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- bool shared, /*!< in: true=S-latch, false=X-latch */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_undo_rec_t* prev_rec;
-
- prev_rec = trx_undo_page_get_prev_rec(rec, page_no, offset);
-
- if (prev_rec) {
-
- return(prev_rec);
- }
-
- /* We have to go to the previous undo log page to look for the
- previous record */
-
- return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset,
- shared, mtr));
-}
-
-/***********************************************************************//**
-Gets the next record in an undo log from the next page.
-@return undo log record, the page latched, NULL if none */
-static
-trx_undo_rec_t*
-trx_undo_get_next_rec_from_next_page(
-/*=================================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- page_t* undo_page, /*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latch mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_ulogf_t* log_hdr;
- ulint next_page_no;
- page_t* next_page;
- ulint next;
-
- if (page_no == page_get_page_no(undo_page)) {
-
- log_hdr = undo_page + offset;
- next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
- if (next != 0) {
-
- return(NULL);
- }
- }
-
- next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_NODE, mtr)
- .page;
- if (next_page_no == FIL_NULL) {
-
- return(NULL);
- }
-
- if (mode == RW_S_LATCH) {
- next_page = trx_undo_page_get_s_latched(space, zip_size,
- next_page_no, mtr);
- } else {
- ut_ad(mode == RW_X_LATCH);
- next_page = trx_undo_page_get(space, zip_size,
- next_page_no, mtr);
- }
-
- return(trx_undo_page_get_first_rec(next_page, page_no, offset));
-}
-
-/***********************************************************************//**
-Gets the next record in an undo log.
-@return undo log record, the page s-latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_next_rec(
-/*==================*/
- trx_undo_rec_t* rec, /*!< in: undo record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint space;
- ulint zip_size;
- trx_undo_rec_t* next_rec;
-
- next_rec = trx_undo_page_get_next_rec(rec, page_no, offset);
-
- if (next_rec) {
- return(next_rec);
- }
-
- space = page_get_space_id(page_align(rec));
- zip_size = fil_space_get_zip_size(space);
-
- return(trx_undo_get_next_rec_from_next_page(space, zip_size,
- page_align(rec),
- page_no, offset,
- RW_S_LATCH, mtr));
-}
-
-/***********************************************************************//**
-Gets the first record in an undo log.
-@return undo log record, the page latched, NULL if none */
-UNIV_INTERN
-trx_undo_rec_t*
-trx_undo_get_first_rec(
-/*===================*/
- ulint space, /*!< in: undo log header space */
- ulint zip_size,/*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset, /*!< in: undo log header offset on page */
- ulint mode, /*!< in: latching mode: RW_S_LATCH or RW_X_LATCH */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* undo_page;
- trx_undo_rec_t* rec;
-
- if (mode == RW_S_LATCH) {
- undo_page = trx_undo_page_get_s_latched(space, zip_size,
- page_no, mtr);
- } else {
- undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
- }
-
- rec = trx_undo_page_get_first_rec(undo_page, page_no, offset);
-
- if (rec) {
- return(rec);
- }
-
- return(trx_undo_get_next_rec_from_next_page(space, zip_size,
- undo_page, page_no, offset,
- mode, mtr));
-}
-
-/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/
-
-/**********************************************************************//**
-Writes the mtr log entry of an undo log page initialization. */
-UNIV_INLINE
-void
-trx_undo_page_init_log(
-/*===================*/
- page_t* undo_page, /*!< in: undo log page */
- ulint type, /*!< in: undo log type */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr);
-
- mlog_catenate_ulint_compressed(mtr, type);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_page_init_log(undo_page,type,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_page_init(
-/*=====================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ulint type;
-
- ptr = mach_parse_compressed(ptr, end_ptr, &type);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- trx_undo_page_init(page, type, mtr);
- }
-
- return(ptr);
-}
-
-/********************************************************************//**
-Initializes the fields in an undo log segment page. */
-static
-void
-trx_undo_page_init(
-/*===============*/
- page_t* undo_page, /*!< in: undo log segment page */
- ulint type, /*!< in: undo log segment type */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
- TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE,
- TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE);
-
- fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG);
-
- trx_undo_page_init_log(undo_page, type, mtr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/***************************************************************//**
-Creates a new undo log segment in file.
-@return DB_SUCCESS if page creation OK possible error codes are:
-DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-trx_undo_seg_create(
-/*================*/
- trx_rseg_t* rseg MY_ATTRIBUTE((unused)),/*!< in: rollback segment */
- trx_rsegf_t* rseg_hdr,/*!< in: rollback segment header, page
- x-latched */
- ulint type, /*!< in: type of the segment: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- ulint* id, /*!< out: slot index within rseg header */
- page_t** undo_page,
- /*!< out: segment header page x-latched, NULL
- if there was an error */
- mtr_t* mtr) /*!< in: mtr */
-{
- ulint slot_no;
- ulint space;
- buf_block_t* block;
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- ulint n_reserved;
- ibool success;
- dberr_t err = DB_SUCCESS;
-
- ut_ad(mtr != NULL);
- ut_ad(id != NULL);
- ut_ad(rseg_hdr != NULL);
- ut_ad(mutex_own(&(rseg->mutex)));
-
- /* fputs(type == TRX_UNDO_INSERT
- ? "Creating insert undo log segment\n"
- : "Creating update undo log segment\n", stderr); */
- slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr);
-
- if (slot_no == ULINT_UNDEFINED) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: cannot find a free slot for"
- " an undo log. Do you have too\n"
- "InnoDB: many active transactions"
- " running concurrently?\n");
-
- return(DB_TOO_MANY_CONCURRENT_TRXS);
- }
-
- space = page_get_space_id(page_align(rseg_hdr));
-
- success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO,
- mtr);
- if (!success) {
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- /* Allocate a new file segment for the undo log */
- block = fseg_create_general(space, 0,
- TRX_UNDO_SEG_HDR
- + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
-
- fil_space_release_free_extents(space, n_reserved);
-
- if (block == NULL) {
- /* No space left */
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE);
-
- *undo_page = buf_block_get_frame(block);
-
- page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
-
- trx_undo_page_init(*undo_page, type, mtr);
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
- TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr);
-
- flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr);
-
- flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST,
- page_hdr + TRX_UNDO_PAGE_NODE, mtr);
-
- trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
- page_get_page_no(*undo_page), mtr);
- *id = slot_no;
-
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
-
- return(err);
-}
-
-/**********************************************************************//**
-Writes the mtr log entry of an undo log header initialization. */
-UNIV_INLINE
-void
-trx_undo_header_create_log(
-/*=======================*/
- const page_t* undo_page, /*!< in: undo log header page */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr);
-
- mlog_catenate_ull_compressed(mtr, trx_id);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_header_create_log(undo_page,trx_id,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***************************************************************//**
-Creates a new undo log header in file. NOTE that this function has its own
-log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of
-this function!
-@return header byte offset on page */
-static
-ulint
-trx_undo_header_create(
-/*===================*/
- page_t* undo_page, /*!< in/out: undo log segment
- header page, x-latched; it is
- assumed that there is
- TRX_UNDO_LOG_XA_HDR_SIZE bytes
- free space on it */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- trx_ulogf_t* log_hdr;
- trx_ulogf_t* prev_log_hdr;
- ulint prev_log;
- ulint free;
- ulint new_free;
-
- ut_ad(mtr && undo_page);
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE);
-
- log_hdr = undo_page + free;
-
- new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
-
- ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
-
- prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
-
- if (prev_log != 0) {
- prev_log_hdr = undo_page + prev_log;
-
- mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free);
- }
-
- mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, free);
-
- log_hdr = undo_page + free;
-
- mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE);
-
- mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
- mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
-
- mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
- mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
-
- mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0);
- mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log);
-
- /* Write the log record about the header creation */
- trx_undo_header_create_log(undo_page, trx_id, mtr);
-
- return(free);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Write X/Open XA Transaction Identification (XID) to undo log header */
-static
-void
-trx_undo_write_xid(
-/*===============*/
- trx_ulogf_t* log_hdr,/*!< in: undo log header */
- const XID* xid, /*!< in: X/Open XA Transaction Identification */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT,
- (ulint) xid->formatID, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN,
- (ulint) xid->gtrid_length, MLOG_4BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN,
- (ulint) xid->bqual_length, MLOG_4BYTES, mtr);
-
- mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data,
- XIDDATASIZE, mtr);
-}
-
-/********************************************************************//**
-Read X/Open XA Transaction Identification (XID) from undo log header */
-static
-void
-trx_undo_read_xid(
-/*==============*/
- trx_ulogf_t* log_hdr,/*!< in: undo log header */
- XID* xid) /*!< out: X/Open XA Transaction Identification */
-{
- xid->formatID = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT);
-
- xid->gtrid_length
- = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN);
- xid->bqual_length
- = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN);
-
- memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE);
-}
-
-/***************************************************************//**
-Adds space for the XA XID after an undo log old-style header. */
-static
-void
-trx_undo_header_add_space_for_xid(
-/*==============================*/
- page_t* undo_page,/*!< in: undo log segment header page */
- trx_ulogf_t* log_hdr,/*!< in: undo log header */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
- ulint free;
- ulint new_free;
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE);
-
- /* free is now the end offset of the old style undo log header */
-
- ut_a(free == (ulint)(log_hdr - undo_page) + TRX_UNDO_LOG_OLD_HDR_SIZE);
-
- new_free = free + (TRX_UNDO_LOG_XA_HDR_SIZE
- - TRX_UNDO_LOG_OLD_HDR_SIZE);
-
- /* Add space for a XID after the header, update the free offset
- fields on the undo log page and in the undo log header */
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_START, new_free,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, new_free,
- MLOG_2BYTES, mtr);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, new_free,
- MLOG_2BYTES, mtr);
-}
-
-/**********************************************************************//**
-Writes the mtr log entry of an undo log header reuse. */
-UNIV_INLINE
-void
-trx_undo_insert_header_reuse_log(
-/*=============================*/
- const page_t* undo_page, /*!< in: undo log header page */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr);
-
- mlog_catenate_ull_compressed(mtr, trx_id);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_insert_header_reuse_log(undo_page,trx_id,mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page header create or reuse.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_page_header(
-/*=======================*/
- ulint type, /*!< in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- trx_id_t trx_id;
- /* Silence a GCC warning about possibly uninitialized variable
- when mach_ull_parse_compressed() is not inlined. */
- ut_d(trx_id = 0);
- /* Declare the variable uninitialized in Valgrind, so that the
- above initialization will not mask any bugs. */
- UNIV_MEM_INVALID(&trx_id, sizeof trx_id);
-
- ptr = mach_ull_parse_compressed(ptr, end_ptr, &trx_id);
-
- if (ptr == NULL) {
-
- return(NULL);
- }
-
- if (page) {
- if (type == MLOG_UNDO_HDR_CREATE) {
- trx_undo_header_create(page, trx_id, mtr);
- } else {
- ut_ad(type == MLOG_UNDO_HDR_REUSE);
- trx_undo_insert_header_reuse(page, trx_id, mtr);
- }
- }
-
- return(ptr);
-}
-
-/***************************************************************//**
-Initializes a cached insert undo log header page for new use. NOTE that this
-function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change
-the operation of this function!
-@return undo log header byte offset on page */
-static
-ulint
-trx_undo_insert_header_reuse(
-/*=========================*/
- page_t* undo_page, /*!< in/out: insert undo log segment
- header page, x-latched */
- trx_id_t trx_id, /*!< in: transaction id */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_upagef_t* page_hdr;
- trx_usegf_t* seg_hdr;
- trx_ulogf_t* log_hdr;
- ulint free;
- ulint new_free;
-
- ut_ad(mtr && undo_page);
-
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE;
-
- ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100);
-
- log_hdr = undo_page + free;
-
- new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE;
-
- /* Insert undo data is not needed after commit: we may free all
- the space on the page */
-
- ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_INSERT);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free);
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE);
-
- log_hdr = undo_page + free;
-
- mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id);
- mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free);
-
- mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE);
- mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE);
-
- /* Write the log record MLOG_UNDO_HDR_REUSE */
- trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr);
-
- return(free);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Writes the redo log entry of an update undo log header discard. */
-UNIV_INLINE
-void
-trx_undo_discard_latest_log(
-/*========================*/
- page_t* undo_page, /*!< in: undo log header page */
- mtr_t* mtr) /*!< in: mtr */
-{
- mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr);
-}
-#else /* !UNIV_HOTBACKUP */
-# define trx_undo_discard_latest_log(undo_page, mtr) ((void) 0)
-#endif /* !UNIV_HOTBACKUP */
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page header discard.
-@return end of log record or NULL */
-UNIV_INTERN
-byte*
-trx_undo_parse_discard_latest(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr) /*!< in: mtr or NULL */
-{
- ut_ad(end_ptr);
-
- if (page) {
- trx_undo_discard_latest_update_undo(page, mtr);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-If an update undo log can be discarded immediately, this function frees the
-space, resetting the page to the proper state for caching. */
-static
-void
-trx_undo_discard_latest_update_undo(
-/*================================*/
- page_t* undo_page, /*!< in: header page of an undo log of size 1 */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- trx_ulogf_t* log_hdr;
- trx_ulogf_t* prev_log_hdr;
- ulint free;
- ulint prev_hdr_offset;
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
- log_hdr = undo_page + free;
-
- prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG);
-
- if (prev_hdr_offset != 0) {
- prev_log_hdr = undo_page + prev_hdr_offset;
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START,
- mach_read_from_2(prev_log_hdr
- + TRX_UNDO_LOG_START));
- mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0);
- }
-
- mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free);
-
- mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED);
- mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset);
-
- trx_undo_discard_latest_log(undo_page, mtr);
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-Tries to add a page to the undo log segment where the undo log is placed.
-@return X-latched block if success, else NULL */
-UNIV_INTERN
-buf_block_t*
-trx_undo_add_page(
-/*==============*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory object */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-{
- page_t* header_page;
- buf_block_t* new_block;
- page_t* new_page;
- trx_rseg_t* rseg;
- ulint n_reserved;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&(trx->rseg->mutex)));
-
- rseg = trx->rseg;
-
- if (rseg->curr_size == rseg->max_size) {
-
- return(NULL);
- }
-
- header_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- if (!fsp_reserve_free_extents(&n_reserved, undo->space, 1,
- FSP_UNDO, mtr)) {
-
- return(NULL);
- }
-
- new_block = fseg_alloc_free_page_general(
- TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER
- + header_page,
- undo->top_page_no + 1, FSP_UP, TRUE, mtr, mtr);
-
- fil_space_release_free_extents(undo->space, n_reserved);
-
- if (new_block == NULL) {
-
- /* No space left */
-
- return(NULL);
- }
-
- ut_ad(rw_lock_get_x_lock_count(&new_block->lock) == 1);
- buf_block_dbg_add_level(new_block, SYNC_TRX_UNDO_PAGE);
- undo->last_page_no = buf_block_get_page_no(new_block);
-
- new_page = buf_block_get_frame(new_block);
-
- trx_undo_page_init(new_page, undo->type, mtr);
-
- flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
- new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
- undo->size++;
- rseg->curr_size++;
-
- return(new_block);
-}
-
-/********************************************************************//**
-Frees an undo log page that is not the header page.
-@return last page number in remaining log */
-static
-ulint
-trx_undo_free_page(
-/*===============*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ibool in_history, /*!< in: TRUE if the undo log is in the history
- list */
- ulint space, /*!< in: space */
- ulint hdr_page_no, /*!< in: header page number */
- ulint page_no, /*!< in: page number to free: must not be the
- header page */
- mtr_t* mtr) /*!< in: mtr which does not have a latch to any
- undo log page; the caller must have reserved
- the rollback segment mutex */
-{
- page_t* header_page;
- page_t* undo_page;
- fil_addr_t last_addr;
- trx_rsegf_t* rseg_header;
- ulint hist_size;
- ulint zip_size;
-
- ut_a(hdr_page_no != page_no);
- ut_ad(mutex_own(&(rseg->mutex)));
-
- zip_size = rseg->zip_size;
-
- undo_page = trx_undo_page_get(space, zip_size, page_no, mtr);
-
- header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
-
- flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST,
- undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr);
-
- fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER,
- space, page_no, mtr);
-
- last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR
- + TRX_UNDO_PAGE_LIST, mtr);
- rseg->curr_size--;
-
- if (in_history) {
- rseg_header = trx_rsegf_get(space, zip_size,
- rseg->page_no, mtr);
-
- hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr);
- ut_ad(hist_size > 0);
- mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- hist_size - 1, MLOG_4BYTES, mtr);
- }
-
- return(last_addr.page);
-}
-
-/********************************************************************//**
-Frees the last undo log page.
-The caller must hold the rollback segment mutex. */
-UNIV_INTERN
-void
-trx_undo_free_last_page_func(
-/*==========================*/
-#ifdef UNIV_DEBUG
- const trx_t* trx, /*!< in: transaction */
-#endif /* UNIV_DEBUG */
- trx_undo_t* undo, /*!< in/out: undo log memory copy */
- mtr_t* mtr) /*!< in/out: mini-transaction which does not
- have a latch to any undo log page or which
- has allocated the undo log page */
-{
- ut_ad(mutex_own(&trx->undo_mutex));
- ut_ad(undo->hdr_page_no != undo->last_page_no);
- ut_ad(undo->size > 0);
-
- undo->last_page_no = trx_undo_free_page(
- undo->rseg, FALSE, undo->space,
- undo->hdr_page_no, undo->last_page_no, mtr);
-
- undo->size--;
-}
-
-/********************************************************************//**
-Empties an undo log header page of undo records for that undo log. Other
-undo logs may still have records on that page, if it is an update undo log. */
-static
-void
-trx_undo_empty_header_page(
-/*=======================*/
- ulint space, /*!< in: space */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* header_page;
- trx_ulogf_t* log_hdr;
- ulint end;
-
- header_page = trx_undo_page_get(space, zip_size, hdr_page_no, mtr);
-
- log_hdr = header_page + hdr_offset;
-
- end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset);
-
- mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr);
-}
-
-/***********************************************************************//**
-Truncates an undo log from the end. This function is used during a rollback
-to free space from an undo log. */
-UNIV_INTERN
-void
-trx_undo_truncate_end(
-/*=======================*/
- trx_t* trx, /*!< in: transaction whose undo log it is */
- trx_undo_t* undo, /*!< in: undo log */
- undo_no_t limit) /*!< in: all undo records with undo number
- >= this value should be truncated */
-{
- page_t* undo_page;
- ulint last_page_no;
- trx_undo_rec_t* rec;
- trx_undo_rec_t* trunc_here;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
- ut_ad(mutex_own(&(trx->rseg->mutex)));
-
- for (;;) {
- mtr_start_trx(&mtr, trx);
-
- trunc_here = NULL;
-
- last_page_no = undo->last_page_no;
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- last_page_no, &mtr);
-
- rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no,
- undo->hdr_offset);
- while (rec) {
- if (trx_undo_rec_get_undo_no(rec) >= limit) {
- /* Truncate at least this record off, maybe
- more */
- trunc_here = rec;
- } else {
- goto function_exit;
- }
-
- rec = trx_undo_page_get_prev_rec(rec,
- undo->hdr_page_no,
- undo->hdr_offset);
- }
-
- if (last_page_no == undo->hdr_page_no) {
-
- goto function_exit;
- }
-
- ut_ad(last_page_no == undo->last_page_no);
- trx_undo_free_last_page(trx, undo, &mtr);
-
- mtr_commit(&mtr);
- }
-
-function_exit:
- if (trunc_here) {
- mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE,
- trunc_here - undo_page, MLOG_2BYTES, &mtr);
- }
-
- mtr_commit(&mtr);
-}
-
-/***********************************************************************//**
-Truncates an undo log from the start. This function is used during a purge
-operation. */
-UNIV_INTERN
-void
-trx_undo_truncate_start(
-/*====================*/
- trx_rseg_t* rseg, /*!< in: rollback segment */
- ulint space, /*!< in: space id of the log */
- ulint hdr_page_no, /*!< in: header page number */
- ulint hdr_offset, /*!< in: header offset on the page */
- undo_no_t limit) /*!< in: all undo pages with
- undo numbers < this value
- should be truncated; NOTE that
- the function only frees whole
- pages; the header page is not
- freed, but emptied, if all the
- records there are < limit */
-{
- page_t* undo_page;
- trx_undo_rec_t* rec;
- trx_undo_rec_t* last_rec;
- ulint page_no;
- mtr_t mtr;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (!limit) {
-
- return;
- }
-loop:
- mtr_start(&mtr);
-
- rec = trx_undo_get_first_rec(space, rseg->zip_size,
- hdr_page_no, hdr_offset,
- RW_X_LATCH, &mtr);
- if (rec == NULL) {
- /* Already empty */
-
- mtr_commit(&mtr);
-
- return;
- }
-
- undo_page = page_align(rec);
-
- last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no,
- hdr_offset);
- if (trx_undo_rec_get_undo_no(last_rec) >= limit) {
-
- mtr_commit(&mtr);
-
- return;
- }
-
- page_no = page_get_page_no(undo_page);
-
- if (page_no == hdr_page_no) {
- trx_undo_empty_header_page(space, rseg->zip_size,
- hdr_page_no, hdr_offset,
- &mtr);
- } else {
- trx_undo_free_page(rseg, TRUE, space, hdr_page_no,
- page_no, &mtr);
- }
-
- mtr_commit(&mtr);
-
- goto loop;
-}
-
-/**********************************************************************//**
-Frees an undo log segment which is not in the history list. */
-static
-void
-trx_undo_seg_free(
-/*==============*/
- trx_undo_t* undo) /*!< in: undo log */
-{
- trx_rseg_t* rseg;
- fseg_header_t* file_seg;
- trx_rsegf_t* rseg_header;
- trx_usegf_t* seg_header;
- ibool finished;
- mtr_t mtr;
-
- rseg = undo->rseg;
-
- do {
-
- mtr_start(&mtr);
-
- mutex_enter(&(rseg->mutex));
-
- seg_header = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no,
- &mtr) + TRX_UNDO_SEG_HDR;
-
- file_seg = seg_header + TRX_UNDO_FSEG_HEADER;
-
- finished = fseg_free_step(file_seg, &mtr);
-
- if (finished) {
- /* Update the rseg header */
- rseg_header = trx_rsegf_get(
- rseg->space, rseg->zip_size, rseg->page_no,
- &mtr);
- trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL,
- &mtr);
-
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
- }
-
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
- } while (!finished);
-}
-
-/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/
-
-/********************************************************************//**
-Creates and initializes an undo log memory object according to the values
-in the header in file, when the database is started. The memory object is
-inserted in the appropriate list of rseg.
-@return own: the undo log memory object */
-static
-trx_undo_t*
-trx_undo_mem_create_at_db_start(
-/*============================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint id, /*!< in: slot index within rseg */
- ulint page_no,/*!< in: undo log segment page number */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* undo_page;
- trx_upagef_t* page_header;
- trx_usegf_t* seg_header;
- trx_ulogf_t* undo_header;
- trx_undo_t* undo;
- ulint type;
- ulint state;
- trx_id_t trx_id;
- ulint offset;
- fil_addr_t last_addr;
- page_t* last_page;
- trx_undo_rec_t* rec;
- XID xid;
- ibool xid_exists = FALSE;
-
- if (id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) id);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- page_no, mtr);
-
- page_header = undo_page + TRX_UNDO_PAGE_HDR;
-
- type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES,
- mtr);
- seg_header = undo_page + TRX_UNDO_SEG_HDR;
-
- state = mach_read_from_2(seg_header + TRX_UNDO_STATE);
-
- offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG);
-
- undo_header = undo_page + offset;
-
- trx_id = mach_read_from_8(undo_header + TRX_UNDO_TRX_ID);
-
- xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS,
- MLOG_1BYTE, mtr);
-
- /* Read X/Open XA transaction identification if it exists, or
- set it to NULL. */
-
- memset(&xid, 0, sizeof(xid));
- xid.formatID = -1;
-
- if (xid_exists == TRUE) {
- trx_undo_read_xid(undo_header, &xid);
- }
-
- mutex_enter(&(rseg->mutex));
-
- undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid,
- page_no, offset);
- mutex_exit(&(rseg->mutex));
-
- undo->dict_operation = mtr_read_ulint(
- undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr);
-
- undo->table_id = mach_read_from_8(undo_header + TRX_UNDO_TABLE_ID);
- undo->state = state;
- undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr);
-
- /* If the log segment is being freed, the page list is inconsistent! */
- if (state == TRX_UNDO_TO_FREE) {
-
- goto add_to_list;
- }
-
- last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr);
-
- undo->last_page_no = last_addr.page;
- undo->top_page_no = last_addr.page;
-
- last_page = trx_undo_page_get(rseg->space, rseg->zip_size,
- undo->last_page_no, mtr);
-
- rec = trx_undo_page_get_last_rec(last_page, page_no, offset);
-
- if (rec == NULL) {
- undo->empty = TRUE;
- } else {
- undo->empty = FALSE;
- undo->top_offset = rec - last_page;
- undo->top_undo_no = trx_undo_rec_get_undo_no(rec);
- }
-add_to_list:
- if (type == TRX_UNDO_INSERT) {
- if (state != TRX_UNDO_CACHED) {
- UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list,
- undo);
- } else {
- UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached,
- undo);
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
- }
- } else {
- ut_ad(type == TRX_UNDO_UPDATE);
- if (state != TRX_UNDO_CACHED) {
- UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list,
- undo);
- } else {
- UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached,
- undo);
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
- }
- }
-
- return(undo);
-}
-
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy. This
-function is only called when the database is started or a new rollback
-segment is created.
-@return the combined size of undo log segments in pages */
-UNIV_INTERN
-ulint
-trx_undo_lists_init(
-/*================*/
- trx_rseg_t* rseg) /*!< in: rollback segment memory object */
-{
- ulint size = 0;
- trx_rsegf_t* rseg_header;
- ulint i;
- mtr_t mtr;
-
- UT_LIST_INIT(rseg->update_undo_list);
- UT_LIST_INIT(rseg->update_undo_cached);
- UT_LIST_INIT(rseg->insert_undo_list);
- UT_LIST_INIT(rseg->insert_undo_cached);
-
- mtr_start(&mtr);
-
- rseg_header = trx_rsegf_get_new(
- rseg->space, rseg->zip_size, rseg->page_no, &mtr);
-
- for (i = 0; i < TRX_RSEG_N_SLOTS; i++) {
- ulint page_no;
-
- page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr);
-
- /* In forced recovery: try to avoid operations which look
- at database pages; undo logs are rapidly changing data, and
- the probability that they are in an inconsistent state is
- high */
-
- if (page_no != FIL_NULL
- && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
-
- trx_undo_t* undo;
-
- undo = trx_undo_mem_create_at_db_start(
- rseg, i, page_no, &mtr);
-
- size += undo->size;
-
- mtr_commit(&mtr);
-
- mtr_start(&mtr);
-
- rseg_header = trx_rsegf_get(
- rseg->space, rseg->zip_size, rseg->page_no,
- &mtr);
-
- /* Found a used slot */
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_USED);
- }
- }
-
- mtr_commit(&mtr);
-
- return(size);
-}
-
-/********************************************************************//**
-Creates and initializes an undo log memory object.
-@return own: the undo log memory object */
-static
-trx_undo_t*
-trx_undo_mem_create(
-/*================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint id, /*!< in: slot index within rseg */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open transaction identification */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header byte offset on page */
-{
- trx_undo_t* undo;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) id);
- ut_error;
- }
-
- undo = static_cast<trx_undo_t*>(mem_alloc(sizeof(*undo)));
-
- if (undo == NULL) {
-
- return(NULL);
- }
-
- undo->id = id;
- undo->type = type;
- undo->state = TRX_UNDO_ACTIVE;
- undo->del_marks = FALSE;
- undo->trx_id = trx_id;
- undo->xid = *xid;
-
- undo->dict_operation = FALSE;
-
- undo->rseg = rseg;
-
- undo->space = rseg->space;
- undo->zip_size = rseg->zip_size;
- undo->hdr_page_no = page_no;
- undo->hdr_offset = offset;
- undo->last_page_no = page_no;
- undo->size = 1;
-
- undo->empty = TRUE;
- undo->top_page_no = page_no;
- undo->guess_block = NULL;
-
- return(undo);
-}
-
-/********************************************************************//**
-Initializes a cached undo log object for new use. */
-static
-void
-trx_undo_mem_init_for_reuse(
-/*========================*/
- trx_undo_t* undo, /*!< in: undo log to init */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open XA transaction identification*/
- ulint offset) /*!< in: undo log header byte offset on page */
-{
- ut_ad(mutex_own(&((undo->rseg)->mutex)));
-
- if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
-
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo->state = TRX_UNDO_ACTIVE;
- undo->del_marks = FALSE;
- undo->trx_id = trx_id;
- undo->xid = *xid;
-
- undo->dict_operation = FALSE;
-
- undo->hdr_offset = offset;
- undo->empty = TRUE;
-}
-
-/********************************************************************//**
-Frees an undo log memory copy. */
-UNIV_INTERN
-void
-trx_undo_mem_free(
-/*==============*/
- trx_undo_t* undo) /*!< in: the undo object to be freed */
-{
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr,
- "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id);
- ut_error;
- }
-
- mem_free(undo);
-}
-
-/**********************************************************************//**
-Creates a new undo log.
-@return DB_SUCCESS if successful in creating the new undo lob object,
-possible error codes are: DB_TOO_MANY_CONCURRENT_TRXS
-DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY */
-static MY_ATTRIBUTE((nonnull, warn_unused_result))
-dberr_t
-trx_undo_create(
-/*============*/
- trx_t* trx, /*!< in: transaction */
- trx_rseg_t* rseg, /*!< in: rollback segment memory copy */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is created */
- const XID* xid, /*!< in: X/Open transaction identification*/
- trx_undo_t** undo, /*!< out: the new undo log object, undefined
- * if did not succeed */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_rsegf_t* rseg_header;
- ulint page_no;
- ulint offset;
- ulint id;
- page_t* undo_page;
- dberr_t err;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (rseg->curr_size == rseg->max_size) {
-
- return(DB_OUT_OF_FILE_SPACE);
- }
-
- rseg->curr_size++;
-
- rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, rseg->page_no,
- mtr);
-
- err = trx_undo_seg_create(rseg, rseg_header, type, &id,
- &undo_page, mtr);
-
- if (err != DB_SUCCESS) {
- /* Did not succeed */
-
- rseg->curr_size--;
-
- return(err);
- }
-
- page_no = page_get_page_no(undo_page);
-
- offset = trx_undo_header_create(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(undo_page,
- undo_page + offset, mtr);
- }
-
- *undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
- page_no, offset);
- if (*undo == NULL) {
-
- err = DB_OUT_OF_MEMORY;
- }
-
- return(err);
-}
-
-/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
-
-/********************************************************************//**
-Reuses a cached undo log.
-@return the undo log memory object, NULL if none cached */
-static
-trx_undo_t*
-trx_undo_reuse_cached(
-/*==================*/
- trx_t* trx, /*!< in: transaction */
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- ulint type, /*!< in: type of the log: TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
- trx_id_t trx_id, /*!< in: id of the trx for which the undo log
- is used */
- const XID* xid, /*!< in: X/Open XA transaction identification */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_undo_t* undo;
- page_t* undo_page;
- ulint offset;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- if (type == TRX_UNDO_INSERT) {
-
- undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached);
- if (undo == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo);
-
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- } else {
- ut_ad(type == TRX_UNDO_UPDATE);
-
- undo = UT_LIST_GET_FIRST(rseg->update_undo_cached);
- if (undo == NULL) {
-
- return(NULL);
- }
-
- UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo);
-
- MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_CACHED);
- }
-
- ut_ad(undo->size == 1);
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- if (type == TRX_UNDO_INSERT) {
- offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- }
- } else {
- ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_TYPE)
- == TRX_UNDO_UPDATE);
-
- offset = trx_undo_header_create(undo_page, trx_id, mtr);
-
- if (trx->support_xa) {
- trx_undo_header_add_space_for_xid(
- undo_page, undo_page + offset, mtr);
- }
- }
-
- trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset);
-
- return(undo);
-}
-
-/**********************************************************************//**
-Marks an undo log header as a header of a data dictionary operation
-transaction. */
-static
-void
-trx_undo_mark_as_dict_operation(
-/*============================*/
- trx_t* trx, /*!< in: dict op transaction */
- trx_undo_t* undo, /*!< in: assigned undo log */
- mtr_t* mtr) /*!< in: mtr */
-{
- page_t* hdr_page;
-
- hdr_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- switch (trx_get_dict_operation(trx)) {
- case TRX_DICT_OP_NONE:
- ut_error;
- case TRX_DICT_OP_INDEX:
- /* Do not discard the table on recovery. */
- undo->table_id = 0;
- break;
- case TRX_DICT_OP_TABLE:
- undo->table_id = trx->table_id;
- break;
- }
-
- mlog_write_ulint(hdr_page + undo->hdr_offset
- + TRX_UNDO_DICT_TRANS,
- TRUE, MLOG_1BYTE, mtr);
-
- mlog_write_ull(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID,
- undo->table_id, mtr);
-
- undo->dict_operation = TRUE;
-}
-
-/**********************************************************************//**
-Assigns an undo log for a transaction. A new undo log is created or a cached
-undo log reused.
-@return DB_SUCCESS if undo log assign successful, possible error codes
-are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY
-DB_OUT_OF_MEMORY */
-UNIV_INTERN
-dberr_t
-trx_undo_assign_undo(
-/*=================*/
- trx_t* trx, /*!< in: transaction */
- ulint type) /*!< in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
- mtr_t mtr;
- dberr_t err = DB_SUCCESS;
-
- ut_ad(trx);
-
- if (trx->rseg == NULL) {
- return(DB_READ_ONLY);
- }
-
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&(trx->undo_mutex)));
-
- mtr_start_trx(&mtr, trx);
-
- mutex_enter(&rseg->mutex);
-
- DBUG_EXECUTE_IF(
- "ib_create_table_fail_too_many_trx",
- err = DB_TOO_MANY_CONCURRENT_TRXS;
- goto func_exit;
- );
-
- undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
- &mtr);
- if (undo == NULL) {
- err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
- &undo, &mtr);
- if (err != DB_SUCCESS) {
-
- goto func_exit;
- }
- }
-
- if (type == TRX_UNDO_INSERT) {
- UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo);
- ut_ad(trx->insert_undo == NULL);
- trx->insert_undo = undo;
- } else {
- UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo);
- ut_ad(trx->update_undo == NULL);
- trx->update_undo = undo;
- }
-
- if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
- trx_undo_mark_as_dict_operation(trx, undo, &mtr);
- }
-
-func_exit:
- mutex_exit(&(rseg->mutex));
- mtr_commit(&mtr);
-
- return(err);
-}
-
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction finish.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
-page_t*
-trx_undo_set_state_at_finish(
-/*=========================*/
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_upagef_t* page_hdr;
- page_t* undo_page;
- ulint state;
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
- page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-
- if (undo->size == 1
- && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE)
- < TRX_UNDO_PAGE_REUSE_LIMIT) {
-
- state = TRX_UNDO_CACHED;
-
- } else if (undo->type == TRX_UNDO_INSERT) {
-
- state = TRX_UNDO_TO_FREE;
- } else {
- state = TRX_UNDO_TO_PURGE;
- }
-
- undo->state = state;
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, state, MLOG_2BYTES, mtr);
-
- return(undo_page);
-}
-
-/******************************************************************//**
-Sets the state of the undo log segment at a transaction prepare.
-@return undo log segment header page, x-latched */
-UNIV_INTERN
-page_t*
-trx_undo_set_state_at_prepare(
-/*==========================*/
- trx_t* trx, /*!< in: transaction */
- trx_undo_t* undo, /*!< in: undo log memory copy */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_usegf_t* seg_hdr;
- trx_ulogf_t* undo_header;
- page_t* undo_page;
- ulint offset;
-
- ut_ad(trx && undo && mtr);
-
- if (undo->id >= TRX_RSEG_N_SLOTS) {
- fprintf(stderr, "InnoDB: Error: undo->id is %lu\n",
- (ulong) undo->id);
- mem_analyze_corruption(undo);
- ut_error;
- }
-
- undo_page = trx_undo_page_get(undo->space, undo->zip_size,
- undo->hdr_page_no, mtr);
-
- seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
-
- /*------------------------------*/
- undo->state = TRX_UNDO_PREPARED;
- undo->xid = trx->xid;
- /*------------------------------*/
-
- mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, undo->state,
- MLOG_2BYTES, mtr);
-
- offset = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG);
- undo_header = undo_page + offset;
-
- mlog_write_ulint(undo_header + TRX_UNDO_XID_EXISTS,
- TRUE, MLOG_1BYTE, mtr);
-
- trx_undo_write_xid(undo_header, &undo->xid, mtr);
-
- return(undo_page);
-}
-
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-UNIV_INTERN
-void
-trx_undo_update_cleanup(
-/*====================*/
- trx_t* trx, /*!< in: trx owning the update undo log */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr) /*!< in: mtr */
-{
- trx_rseg_t* rseg;
- trx_undo_t* undo;
-
- undo = trx->update_undo;
- rseg = trx->rseg;
-
- ut_ad(mutex_own(&(rseg->mutex)));
-
- trx_purge_add_update_undo_to_history(trx, undo_page, mtr);
-
- UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo);
-
- trx->update_undo = NULL;
-
- if (undo->state == TRX_UNDO_CACHED) {
-
- UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo);
-
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
- } else {
- ut_ad(undo->state == TRX_UNDO_TO_PURGE);
-
- trx_undo_mem_free(undo);
- }
-}
-
-/******************************************************************//**
-Frees or caches an insert undo log after a transaction commit or rollback.
-Knowledge of inserts is not needed after a commit or rollback, therefore
-the data can be discarded. */
-UNIV_INTERN
-void
-trx_undo_insert_cleanup(
-/*====================*/
- trx_t* trx) /*!< in: transaction handle */
-{
- trx_undo_t* undo;
- trx_rseg_t* rseg;
-
- undo = trx->insert_undo;
- ut_ad(undo);
-
- rseg = trx->rseg;
-
- mutex_enter(&(rseg->mutex));
-
- UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo);
- trx->insert_undo = NULL;
-
- if (undo->state == TRX_UNDO_CACHED) {
-
- UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo);
-
- MONITOR_INC(MONITOR_NUM_UNDO_SLOT_CACHED);
- } else {
- ut_ad(undo->state == TRX_UNDO_TO_FREE);
-
- /* Delete first the undo log segment in the file */
-
- mutex_exit(&(rseg->mutex));
-
- trx_undo_seg_free(undo);
-
- mutex_enter(&(rseg->mutex));
-
- ut_ad(rseg->curr_size > undo->size);
-
- rseg->curr_size -= undo->size;
-
- trx_undo_mem_free(undo);
- }
-
- mutex_exit(&(rseg->mutex));
-}
-
-/********************************************************************//**
-At shutdown, frees the undo logs of a PREPARED transaction. */
-UNIV_INTERN
-void
-trx_undo_free_prepared(
-/*===================*/
- trx_t* trx) /*!< in/out: PREPARED transaction */
-{
- ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
-
- if (trx->update_undo) {
- switch (trx->update_undo->state) {
- case TRX_UNDO_PREPARED:
- break;
- case TRX_UNDO_ACTIVE:
- /* lock_trx_release_locks() assigns
- trx->is_recovered=false */
- ut_a(srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
- break;
- default:
- ut_error;
- }
-
- UT_LIST_REMOVE(undo_list, trx->rseg->update_undo_list,
- trx->update_undo);
- trx_undo_mem_free(trx->update_undo);
- }
- if (trx->insert_undo) {
- switch (trx->insert_undo->state) {
- case TRX_UNDO_PREPARED:
- break;
- case TRX_UNDO_ACTIVE:
- /* lock_trx_release_locks() assigns
- trx->is_recovered=false */
- ut_a(srv_read_only_mode
- || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
- break;
- default:
- ut_error;
- }
-
- UT_LIST_REMOVE(undo_list, trx->rseg->insert_undo_list,
- trx->insert_undo);
- trx_undo_mem_free(trx->insert_undo);
- }
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/usr/usr0sess.cc b/storage/xtradb/usr/usr0sess.cc
deleted file mode 100644
index e1bd71ff1a0..00000000000
--- a/storage/xtradb/usr/usr0sess.cc
+++ /dev/null
@@ -1,67 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file usr/usr0sess.cc
-Sessions
-
-Created 6/25/1996 Heikki Tuuri
-*******************************************************/
-
-#include "usr0sess.h"
-
-#ifdef UNIV_NONINL
-#include "usr0sess.ic"
-#endif
-
-#include "trx0trx.h"
-
-/*********************************************************************//**
-Opens a session.
-@return own: session object */
-UNIV_INTERN
-sess_t*
-sess_open(void)
-/*===========*/
-{
- sess_t* sess;
-
- sess = static_cast<sess_t*>(mem_zalloc(sizeof(*sess)));
-
- sess->state = SESS_ACTIVE;
-
- sess->trx = trx_allocate_for_background();
- sess->trx->sess = sess;
-
- return(sess);
-}
-
-/*********************************************************************//**
-Closes a session, freeing the memory occupied by it. */
-UNIV_INTERN
-void
-sess_close(
-/*=======*/
- sess_t* sess) /*!< in, own: session object */
-{
- ut_a(UT_LIST_GET_LEN(sess->graphs) == 0);
-
- trx_free_for_background(sess->trx);
- mem_free(sess);
-}
diff --git a/storage/xtradb/ut/ut0bh.cc b/storage/xtradb/ut/ut0bh.cc
deleted file mode 100644
index 1a3038a0d71..00000000000
--- a/storage/xtradb/ut/ut0bh.cc
+++ /dev/null
@@ -1,159 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file ut/ut0bh.cc
-Binary min-heap implementation.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#include "ut0bh.h"
-#include "ut0mem.h"
-
-#ifdef UNIV_NONINL
-#include "ut0bh.ic"
-#endif
-
-#include <string.h>
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
- ib_bh_cmp_t compare, /*!< in: comparator */
- ulint sizeof_elem, /*!< in: size of one element */
- ulint max_elems) /*!< in: max elements allowed */
-{
- ulint sz;
- ib_bh_t* ib_bh;
-
- sz = sizeof(*ib_bh) + (sizeof_elem * max_elems);
-
- ib_bh = (ib_bh_t*) ut_malloc(sz);
- memset(ib_bh, 0x0, sz);
-
- ib_bh->compare = compare;
- ib_bh->max_elems = max_elems;
- ib_bh->sizeof_elem = sizeof_elem;
-
- return(ib_bh);
-}
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
- ib_bh_t* ib_bh) /*!< in/own: instance */
-{
- ut_free(ib_bh);
-}
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
- ib_bh_t* ib_bh, /*!< in/out: instance */
- const void* elem) /*!< in: element to add */
-{
- void* ptr;
-
- if (ib_bh_is_full(ib_bh)) {
- return(NULL);
- } else if (ib_bh_is_empty(ib_bh)) {
- ++ib_bh->n_elems;
- return(ib_bh_set(ib_bh, 0, elem));
- } else {
- ulint i;
-
- i = ib_bh->n_elems;
-
- ++ib_bh->n_elems;
-
- for (ptr = ib_bh_get(ib_bh, i >> 1);
- i > 0 && ib_bh->compare(ptr, elem) > 0;
- i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) {
-
- ib_bh_set(ib_bh, i, ptr);
- }
-
- ptr = ib_bh_set(ib_bh, i, elem);
- }
-
- return(ptr);
-}
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
- ib_bh_t* ib_bh) /*!< in/out: instance */
-{
- byte* ptr;
- byte* last;
- ulint parent = 0;
-
- if (ib_bh_is_empty(ib_bh)) {
- return;
- } else if (ib_bh_size(ib_bh) == 1) {
- --ib_bh->n_elems;
- return;
- }
-
- last = (byte*) ib_bh_last(ib_bh);
-
- /* Start from the child node */
- ptr = (byte*) ib_bh_get(ib_bh, 1);
-
- while (ptr < last) {
- /* If the "right" child node is < "left" child node */
- if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) {
- ptr += ib_bh->sizeof_elem;
- }
-
- if (ib_bh->compare(last, ptr) <= 0) {
- break;
- }
-
- ib_bh_set(ib_bh, parent, ptr);
-
- parent = (ptr - (byte*) ib_bh_first(ib_bh))
- / ib_bh->sizeof_elem;
-
- if ((parent << 1) >= ib_bh_size(ib_bh)) {
- break;
- }
-
- ptr = (byte*) ib_bh_get(ib_bh, parent << 1);
- }
-
- --ib_bh->n_elems;
-
- ib_bh_set(ib_bh, parent, last);
-}
diff --git a/storage/xtradb/ut/ut0byte.cc b/storage/xtradb/ut/ut0byte.cc
deleted file mode 100644
index bc592edc6bf..00000000000
--- a/storage/xtradb/ut/ut0byte.cc
+++ /dev/null
@@ -1,30 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/***************************************************************//**
-@file ut/ut0byte.cc
-Byte utilities
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0byte.h"
-
-#ifdef UNIV_NONINL
-#include "ut0byte.ic"
-#endif
diff --git a/storage/xtradb/ut/ut0crc32.cc b/storage/xtradb/ut/ut0crc32.cc
deleted file mode 100644
index 15ed6bfadee..00000000000
--- a/storage/xtradb/ut/ut0crc32.cc
+++ /dev/null
@@ -1,342 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2009, 2010 Facebook, Inc. All Rights Reserved.
-Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/***************************************************************//**
-@file ut/ut0crc32.cc
-CRC32 implementation from Facebook, based on the zlib implementation.
-
-Created Aug 8, 2011, Vasil Dimov, based on mysys/my_crc32.c and
-mysys/my_perf.c, contributed by Facebook under the following license.
-********************************************************************/
-
-/* Copyright (C) 2009-2010 Facebook, Inc. All Rights Reserved.
-
- Dual licensed under BSD license and GPLv2.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- 1. Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- 2. Redistributions in binary form must reproduce the above copyright notice,
- this list of conditions and the following disclaimer in the documentation
- and/or other materials provided with the distribution.
-
- THIS SOFTWARE IS PROVIDED BY FACEBOOK, INC. ``AS IS'' AND ANY EXPRESS OR
- IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
- EVENT SHALL FACEBOOK, INC. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
- This program is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by the Free
- Software Foundation; version 2 of the License.
-
- This program is distributed in the hope that it will be useful, but WITHOUT
- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- You should have received a copy of the GNU General Public License along with
- this program; if not, write to the Free Software Foundation, Inc.,
- 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
-
-/* The below CRC32 implementation is based on the implementation included with
- * zlib with modifications to process 8 bytes at a time and using SSE 4.2
- * extentions when available. The polynomial constant has been changed to
- * match the one used by SSE 4.2 and does not return the same value as the
- * version used by zlib. This implementation only supports 64-bit
- * little-endian processors. The original zlib copyright notice follows. */
-
-/* crc32.c -- compute the CRC-32 of a buf stream
- * Copyright (C) 1995-2005 Mark Adler
- * For conditions of distribution and use, see copyright notice in zlib.h
- *
- * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
- * CRC methods: exclusive-oring 32 bits of buf at a time, and pre-computing
- * tables for updating the shift register in one step with three exclusive-ors
- * instead of four steps with four exclusive-ors. This results in about a
- * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
- */
-
-#include "univ.i"
-#include "ut0crc32.h"
-
-#if defined(__linux__) && defined(__powerpc__)
-/* Used to detect at runtime if we have vpmsum instructions (PowerISA 2.07) */
-#include <sys/auxv.h>
-#include <bits/hwcap.h>
-#endif /* defined(__linux__) && defined(__powerpc__) */
-
-#include <string.h>
-
-ib_ut_crc32_t ut_crc32;
-
-/* Precalculated table used to generate the CRC32 if the CPU does not
-have support for it */
-static ib_uint32_t ut_crc32_slice8_table[8][256];
-static ibool ut_crc32_slice8_table_initialized = FALSE;
-
-/** Text description of CRC32 implementation */
-const char *ut_crc32_implementation = NULL;
-
-/********************************************************************//**
-Initializes the table that is used to generate the CRC32 if the CPU does
-not have support for it. */
-#ifndef HAVE_CRC32_VPMSUM
-static
-void
-ut_crc32_slice8_table_init()
-/*========================*/
-{
- /* bit-reversed poly 0x1EDC6F41 (from SSE42 crc32 instruction) */
- static const ib_uint32_t poly = 0x82f63b78;
- ib_uint32_t n;
- ib_uint32_t k;
- ib_uint32_t c;
-
- for (n = 0; n < 256; n++) {
- c = n;
- for (k = 0; k < 8; k++) {
- c = (c & 1) ? (poly ^ (c >> 1)) : (c >> 1);
- }
- ut_crc32_slice8_table[0][n] = c;
- }
-
- for (n = 0; n < 256; n++) {
- c = ut_crc32_slice8_table[0][n];
- for (k = 1; k < 8; k++) {
- c = ut_crc32_slice8_table[0][c & 0xFF] ^ (c >> 8);
- ut_crc32_slice8_table[k][n] = c;
- }
- }
-
- ut_crc32_slice8_table_initialized = TRUE;
-}
-#endif
-
-#if defined(__GNUC__) && defined(__x86_64__)
-/********************************************************************//**
-Fetches CPU info */
-static
-void
-ut_cpuid(
-/*=====*/
- ib_uint32_t vend[3], /*!< out: CPU vendor */
- ib_uint32_t* model, /*!< out: CPU model */
- ib_uint32_t* family, /*!< out: CPU family */
- ib_uint32_t* stepping, /*!< out: CPU stepping */
- ib_uint32_t* features_ecx, /*!< out: CPU features ecx */
- ib_uint32_t* features_edx) /*!< out: CPU features edx */
-{
- ib_uint32_t sig;
- asm("cpuid" : "=b" (vend[0]), "=c" (vend[2]), "=d" (vend[1]) : "a" (0));
- asm("cpuid" : "=a" (sig), "=c" (*features_ecx), "=d" (*features_edx)
- : "a" (1)
- : "ebx");
-
- *model = ((sig >> 4) & 0xF);
- *family = ((sig >> 8) & 0xF);
- *stepping = (sig & 0xF);
-
- if (memcmp(vend, "GenuineIntel", 12) == 0
- || (memcmp(vend, "AuthenticAMD", 12) == 0 && *family == 0xF)) {
-
- *model += (((sig >> 16) & 0xF) << 4);
- *family += ((sig >> 20) & 0xFF);
- }
-}
-
-/* opcodes taken from objdump of "crc32b (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_byte \
- asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf0, 0x0a" \
- : "=c"(crc) : "c"(crc), "d"(buf)); \
- len--, buf++
-
-/* opcodes taken from objdump of "crc32q (%%rdx), %%rcx"
-for RHEL4 support (GCC 3 doesn't support this instruction) */
-#define ut_crc32_sse42_quadword \
- asm(".byte 0xf2, 0x48, 0x0f, 0x38, 0xf1, 0x0a" \
- : "=c"(crc) : "c"(crc), "d"(buf)); \
- len -= 8, buf += 8
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-
-
-#ifdef HAVE_CRC32_VPMSUM
-extern "C" {
-unsigned int crc32c_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
-};
-
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_power8(
-/*===========*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
-{
- return crc32c_vpmsum(0, buf, len);
-}
-#endif
-
-/********************************************************************//**
-Calculates CRC32 using CPU instructions.
-@return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_sse42(
-/*===========*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
-{
-#if defined(__GNUC__) && defined(__x86_64__)
- ib_uint64_t crc = (ib_uint32_t) (-1);
-
- while (len && ((ulint) buf & 7)) {
- ut_crc32_sse42_byte;
- }
-
- while (len >= 32) {
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
- ut_crc32_sse42_quadword;
- }
-
- while (len >= 8) {
- ut_crc32_sse42_quadword;
- }
-
- while (len) {
- ut_crc32_sse42_byte;
- }
-
- return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
-#else
- ut_error;
- /* silence compiler warning about unused parameters */
- return((ib_uint32_t) buf[len]);
-#endif /* defined(__GNUC__) && defined(__x86_64__) */
-}
-
-#define ut_crc32_slice8_byte \
- crc = (crc >> 8) ^ ut_crc32_slice8_table[0][(crc ^ *buf++) & 0xFF]; \
- len--
-
-#define ut_crc32_slice8_quadword \
- crc ^= *(ib_uint64_t*) buf; \
- crc = ut_crc32_slice8_table[7][(crc ) & 0xFF] ^ \
- ut_crc32_slice8_table[6][(crc >> 8) & 0xFF] ^ \
- ut_crc32_slice8_table[5][(crc >> 16) & 0xFF] ^ \
- ut_crc32_slice8_table[4][(crc >> 24) & 0xFF] ^ \
- ut_crc32_slice8_table[3][(crc >> 32) & 0xFF] ^ \
- ut_crc32_slice8_table[2][(crc >> 40) & 0xFF] ^ \
- ut_crc32_slice8_table[1][(crc >> 48) & 0xFF] ^ \
- ut_crc32_slice8_table[0][(crc >> 56)]; \
- len -= 8, buf += 8
-
-/********************************************************************//**
-Calculates CRC32 manually.
-@return CRC-32C (polynomial 0x11EDC6F41) */
-UNIV_INLINE
-ib_uint32_t
-ut_crc32_slice8(
-/*============*/
- const byte* buf, /*!< in: data over which to calculate CRC32 */
- ulint len) /*!< in: data length */
-{
- ib_uint64_t crc = (ib_uint32_t) (-1);
-
- ut_a(ut_crc32_slice8_table_initialized);
-
- while (len && ((ulint) buf & 7)) {
- ut_crc32_slice8_byte;
- }
-
- while (len >= 32) {
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
- ut_crc32_slice8_quadword;
- }
-
- while (len >= 8) {
- ut_crc32_slice8_quadword;
- }
-
- while (len) {
- ut_crc32_slice8_byte;
- }
-
- return((ib_uint32_t) ((~crc) & 0xFFFFFFFF));
-}
-
-/********************************************************************//**
-Initializes the data structures used by ut_crc32(). Does not do any
-allocations, would not hurt if called twice, but would be pointless. */
-UNIV_INTERN
-void
-ut_crc32_init()
-/*===========*/
-{
- ut_crc32_slice8_table_init();
- ut_crc32 = ut_crc32_slice8;
- ut_crc32_implementation = "Using generic crc32 instructions";
-
-#if defined(__GNUC__) && defined(__x86_64__)
- ib_uint32_t vend[3];
- ib_uint32_t model;
- ib_uint32_t family;
- ib_uint32_t stepping;
- ib_uint32_t features_ecx;
- ib_uint32_t features_edx;
-
- ut_cpuid(vend, &model, &family, &stepping,
- &features_ecx, &features_edx);
-
- /* Valgrind does not understand the CRC32 instructions:
-
- vex amd64->IR: unhandled instruction bytes: 0xF2 0x48 0xF 0x38 0xF0 0xA
- valgrind: Unrecognised instruction at address 0xad3db5.
- Your program just tried to execute an instruction that Valgrind
- did not recognise. There are two possible reasons for this.
- 1. Your program has a bug and erroneously jumped to a non-code
- location. If you are running Memcheck and you just saw a
- warning about a bad jump, it's probably your program's fault.
- 2. The instruction is legitimate but Valgrind doesn't handle it,
- i.e. it's Valgrind's fault. If you think this is the case or
- you are not sure, please let us know and we'll try to fix it.
- Either way, Valgrind will now raise a SIGILL signal which will
- probably kill your program.
-
- */
- if ((features_ecx >> 20) & 1) {
- ut_crc32 = ut_crc32_sse42;
- ut_crc32_implementation = "Using SSE2 crc32 instructions";
- }
-
-#elif defined(HAVE_CRC32_VPMSUM)
- ut_crc32 = ut_crc32_power8;
- ut_crc32_implementation = "Using POWER8 crc32 instructions";
-#endif
-}
diff --git a/storage/xtradb/ut/ut0dbg.cc b/storage/xtradb/ut/ut0dbg.cc
deleted file mode 100644
index a1cad144da4..00000000000
--- a/storage/xtradb/ut/ut0dbg.cc
+++ /dev/null
@@ -1,139 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*****************************************************************//**
-@file ut/ut0dbg.cc
-Debug utilities for Innobase.
-
-Created 1/30/1994 Heikki Tuuri
-**********************************************************************/
-
-#include "univ.i"
-#include "ut0dbg.h"
-#ifndef UNIV_HOTBACKUP
-# include "ha_prototypes.h"
-#endif /* !UNIV_HOTBACKUP */
-
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#else
-/** This is used to eliminate compiler warnings */
-UNIV_INTERN ulint ut_dbg_zero = 0;
-#endif
-
-/*************************************************************//**
-Report a failed assertion. */
-UNIV_INTERN
-void
-ut_dbg_assertion_failed(
-/*====================*/
- const char* expr, /*!< in: the failed assertion (optional) */
- const char* file, /*!< in: source file containing the assertion */
- ulint line) /*!< in: line number of the assertion */
-{
- ut_print_timestamp(stderr);
-#ifdef UNIV_HOTBACKUP
- fprintf(stderr, " InnoDB: Assertion failure in file %s line %lu\n",
- file, line);
-#else /* UNIV_HOTBACKUP */
- fprintf(stderr,
- " InnoDB: Assertion failure in thread %lu"
- " in file %s line %lu\n",
- os_thread_pf(os_thread_get_curr_id()),
- innobase_basename(file), line);
-#endif /* UNIV_HOTBACKUP */
- if (expr) {
- fprintf(stderr,
- "InnoDB: Failing assertion: %s\n", expr);
- }
-
- fputs("InnoDB: We intentionally generate a memory trap.\n"
- "InnoDB: Submit a detailed bug report"
- " to http://bugs.mysql.com.\n"
- "InnoDB: If you get repeated assertion failures"
- " or crashes, even\n"
- "InnoDB: immediately after the mysqld startup, there may be\n"
- "InnoDB: corruption in the InnoDB tablespace. Please refer to\n"
- "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
- "InnoDB: about forcing recovery.\n", stderr);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-#include <sys/types.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#include <unistd.h>
-
-#ifndef timersub
-#define timersub(a, b, r) \
- do { \
- (r)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
- (r)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
- if ((r)->tv_usec < 0) { \
- (r)->tv_sec--; \
- (r)->tv_usec += 1000000; \
- } \
- } while (0)
-#endif /* timersub */
-
-/*******************************************************************//**
-Resets a speedo (records the current time in it). */
-UNIV_INTERN
-void
-speedo_reset(
-/*=========*/
- speedo_t* speedo) /*!< out: speedo */
-{
- gettimeofday(&speedo->tv, NULL);
-
- getrusage(RUSAGE_SELF, &speedo->ru);
-}
-
-/*******************************************************************//**
-Shows the time elapsed and usage statistics since the last reset of a
-speedo. */
-UNIV_INTERN
-void
-speedo_show(
-/*========*/
- const speedo_t* speedo) /*!< in: speedo */
-{
- struct rusage ru_now;
- struct timeval tv_now;
- struct timeval tv_diff;
-
- getrusage(RUSAGE_SELF, &ru_now);
-
- gettimeofday(&tv_now, NULL);
-
-#define PRINT_TIMEVAL(prefix, tvp) \
- fprintf(stderr, "%s% 5ld.%06ld sec\n", \
- prefix, (tvp)->tv_sec, (tvp)->tv_usec)
-
- timersub(&tv_now, &speedo->tv, &tv_diff);
- PRINT_TIMEVAL("real", &tv_diff);
-
- timersub(&ru_now.ru_utime, &speedo->ru.ru_utime, &tv_diff);
- PRINT_TIMEVAL("user", &tv_diff);
-
- timersub(&ru_now.ru_stime, &speedo->ru.ru_stime, &tv_diff);
- PRINT_TIMEVAL("sys ", &tv_diff);
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
diff --git a/storage/xtradb/ut/ut0list.cc b/storage/xtradb/ut/ut0list.cc
deleted file mode 100644
index f906061d185..00000000000
--- a/storage/xtradb/ut/ut0list.cc
+++ /dev/null
@@ -1,203 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file ut/ut0list.cc
-A double-linked list
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-#include "ut0list.h"
-#ifdef UNIV_NONINL
-#include "ut0list.ic"
-#endif
-
-/****************************************************************//**
-Create a new list.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create(void)
-/*=================*/
-{
- ib_list_t* list;
-
- list = static_cast<ib_list_t*>(mem_alloc(sizeof(*list)));
-
- list->first = NULL;
- list->last = NULL;
- list->is_heap_list = FALSE;
-
- return(list);
-}
-
-/****************************************************************//**
-Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for
-lists created with this function.
-@return list */
-UNIV_INTERN
-ib_list_t*
-ib_list_create_heap(
-/*================*/
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- ib_list_t* list;
-
- list = static_cast<ib_list_t*>(mem_heap_alloc(heap, sizeof(*list)));
-
- list->first = NULL;
- list->last = NULL;
- list->is_heap_list = TRUE;
-
- return(list);
-}
-
-/****************************************************************//**
-Free a list. */
-UNIV_INTERN
-void
-ib_list_free(
-/*=========*/
- ib_list_t* list) /*!< in: list */
-{
- ut_a(!list->is_heap_list);
-
- /* We don't check that the list is empty because it's entirely valid
- to e.g. have all the nodes allocated from a single heap that is then
- freed after the list itself is freed. */
-
- mem_free(list);
-}
-
-/****************************************************************//**
-Add the data to the start of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_first(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- return(ib_list_add_after(list, ib_list_get_first(list), data, heap));
-}
-
-/****************************************************************//**
-Add the data to the end of the list.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_last(
-/*=============*/
- ib_list_t* list, /*!< in: list */
- void* data, /*!< in: data */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- return(ib_list_add_after(list, ib_list_get_last(list), data, heap));
-}
-
-/****************************************************************//**
-Add the data after the indicated node.
-@return new list node */
-UNIV_INTERN
-ib_list_node_t*
-ib_list_add_after(
-/*==============*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* prev_node, /*!< in: node preceding new node (can
- be NULL) */
- void* data, /*!< in: data */
- mem_heap_t* heap) /*!< in: memory heap to use */
-{
- ib_list_node_t* node;
-
- node = static_cast<ib_list_node_t*>(
- mem_heap_alloc(heap, sizeof(*node)));
-
- node->data = data;
-
- if (!list->first) {
- /* Empty list. */
-
- ut_a(!prev_node);
-
- node->prev = NULL;
- node->next = NULL;
-
- list->first = node;
- list->last = node;
- } else if (!prev_node) {
- /* Start of list. */
-
- node->prev = NULL;
- node->next = list->first;
-
- list->first->prev = node;
-
- list->first = node;
- } else {
- /* Middle or end of list. */
-
- node->prev = prev_node;
- node->next = prev_node->next;
-
- prev_node->next = node;
-
- if (node->next) {
- node->next->prev = node;
- } else {
- list->last = node;
- }
- }
-
- return(node);
-}
-
-/****************************************************************//**
-Remove the node from the list. */
-UNIV_INTERN
-void
-ib_list_remove(
-/*===========*/
- ib_list_t* list, /*!< in: list */
- ib_list_node_t* node) /*!< in: node to remove */
-{
- if (node->prev) {
- node->prev->next = node->next;
- } else {
- /* First item in list. */
-
- ut_ad(list->first == node);
-
- list->first = node->next;
- }
-
- if (node->next) {
- node->next->prev = node->prev;
- } else {
- /* Last item in list. */
-
- ut_ad(list->last == node);
-
- list->last = node->prev;
- }
-
- node->prev = node->next = NULL;
-}
diff --git a/storage/xtradb/ut/ut0mem.cc b/storage/xtradb/ut/ut0mem.cc
deleted file mode 100644
index 2bb5d9ce332..00000000000
--- a/storage/xtradb/ut/ut0mem.cc
+++ /dev/null
@@ -1,609 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file ut/ut0mem.cc
-Memory primitives
-
-Created 5/11/1994 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0mem.h"
-
-#ifdef UNIV_NONINL
-#include "ut0mem.ic"
-#endif
-
-#ifndef UNIV_HOTBACKUP
-# include "os0thread.h"
-# include "srv0srv.h"
-
-#include <stdlib.h>
-
-/** The total amount of memory currently allocated from the operating
-system with os_mem_alloc_large() or malloc(). Does not count malloc()
-if srv_use_sys_malloc is set. Protected by ut_list_mutex. */
-UNIV_INTERN ulint ut_total_allocated_memory = 0;
-
-/** Mutex protecting ut_total_allocated_memory and ut_mem_block_list */
-UNIV_INTERN os_fast_mutex_t ut_list_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register server_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t ut_list_mutex_key;
-#endif
-
-/** Dynamically allocated memory block */
-struct ut_mem_block_t{
- UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;
- /*!< mem block list node */
- ulint size; /*!< size of allocated memory */
- ulint magic_n;/*!< magic number (UT_MEM_MAGIC_N) */
-};
-
-/** The value of ut_mem_block_t::magic_n. Used in detecting
-memory corruption. */
-#define UT_MEM_MAGIC_N 1601650166
-
-/** List of all memory blocks allocated from the operating system
-with malloc. Protected by ut_list_mutex. */
-static UT_LIST_BASE_NODE_T(ut_mem_block_t) ut_mem_block_list;
-
-/** Flag: has ut_mem_block_list been initialized? */
-static ibool ut_mem_block_list_inited = FALSE;
-
-/** A dummy pointer for generating a null pointer exception in
-ut_malloc_low() */
-static ulint* ut_mem_null_ptr = NULL;
-
-/**********************************************************************//**
-Initializes the mem block list at database startup. */
-UNIV_INTERN
-void
-ut_mem_init(void)
-/*=============*/
-{
- ut_a(!ut_mem_block_list_inited);
- os_fast_mutex_init(ut_list_mutex_key, &ut_list_mutex);
- UT_LIST_INIT(ut_mem_block_list);
- ut_mem_block_list_inited = TRUE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Allocates memory.
-@return own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc_low(
-/*==========*/
- ulint n, /*!< in: number of bytes to allocate */
- ibool assert_on_error)/*!< in: if TRUE, we crash mysqld if the
- memory cannot be allocated */
-{
-#ifndef UNIV_HOTBACKUP
- ulint retry_count;
- void* ret;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- ret = malloc(n);
- ut_a(ret || !assert_on_error);
-
- return(ret);
- }
-
- ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */
- ut_a(ut_mem_block_list_inited);
-
- retry_count = 0;
-retry:
- os_fast_mutex_lock(&ut_list_mutex);
-
- ret = malloc(n + sizeof(ut_mem_block_t));
-
- if (ret == NULL && retry_count < 60) {
- if (retry_count == 0) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Error: cannot allocate"
- " %lu bytes of\n"
- "InnoDB: memory with malloc!"
- " Total allocated memory\n"
- "InnoDB: by InnoDB %lu bytes."
- " Operating system errno: %lu\n"
- "InnoDB: Check if you should"
- " increase the swap file or\n"
- "InnoDB: ulimits of your operating system.\n"
- "InnoDB: On FreeBSD check you"
- " have compiled the OS with\n"
- "InnoDB: a big enough maximum process size.\n"
- "InnoDB: Note that in most 32-bit"
- " computers the process\n"
- "InnoDB: memory space is limited"
- " to 2 GB or 4 GB.\n"
- "InnoDB: We keep retrying"
- " the allocation for 60 seconds...\n",
- (ulong) n, (ulong) ut_total_allocated_memory,
-#ifdef __WIN__
- (ulong) GetLastError()
-#else
- (ulong) errno
-#endif
- );
- }
-
- os_fast_mutex_unlock(&ut_list_mutex);
-
- /* Sleep for a second and retry the allocation; maybe this is
- just a temporary shortage of memory */
-
- os_thread_sleep(1000000);
-
- retry_count++;
-
- goto retry;
- }
-
- if (ret == NULL) {
- /* Flush stderr to make more probable that the error
- message gets in the error file before we generate a seg
- fault */
-
- fflush(stderr);
-
- os_fast_mutex_unlock(&ut_list_mutex);
-
- /* Make an intentional seg fault so that we get a stack
- trace */
- if (assert_on_error) {
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: We now intentionally"
- " generate a seg fault so that\n"
- "InnoDB: on Linux we get a stack trace.\n");
-
- if (*ut_mem_null_ptr) ut_mem_null_ptr = 0;
- } else {
- return(NULL);
- }
- }
-
- UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t));
-
- ((ut_mem_block_t*) ret)->size = n + sizeof(ut_mem_block_t);
- ((ut_mem_block_t*) ret)->magic_n = UT_MEM_MAGIC_N;
-
- ut_total_allocated_memory += n + sizeof(ut_mem_block_t);
-
- UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list,
- ((ut_mem_block_t*) ret));
- os_fast_mutex_unlock(&ut_list_mutex);
-
- return((void*)((byte*) ret + sizeof(ut_mem_block_t)));
-#else /* !UNIV_HOTBACKUP */
- void* ret = malloc(n);
- ut_a(ret || !assert_on_error);
-
- return(ret);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-/**********************************************************************//**
-Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
-a nop. */
-UNIV_INTERN
-void
-ut_free(
-/*====*/
- void* ptr) /*!< in, own: memory block, can be NULL */
-{
-#ifndef UNIV_HOTBACKUP
- ut_mem_block_t* block;
-
- if (ptr == NULL) {
- return;
- } else if (UNIV_LIKELY(srv_use_sys_malloc)) {
- free(ptr);
- return;
- }
-
- block = (ut_mem_block_t*)((byte*) ptr - sizeof(ut_mem_block_t));
-
- os_fast_mutex_lock(&ut_list_mutex);
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
- ut_a(ut_total_allocated_memory >= block->size);
-
- ut_total_allocated_memory -= block->size;
-
- UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
- free(block);
-
- os_fast_mutex_unlock(&ut_list_mutex);
-#else /* !UNIV_HOTBACKUP */
- free(ptr);
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not
-use this function because the allocation functions in mem0mem.h are the
-recommended ones in InnoDB.
-
-man realloc in Linux, 2004:
-
- realloc() changes the size of the memory block pointed to
- by ptr to size bytes. The contents will be unchanged to
- the minimum of the old and new sizes; newly allocated mem-
- ory will be uninitialized. If ptr is NULL, the call is
- equivalent to malloc(size); if size is equal to zero, the
- call is equivalent to free(ptr). Unless ptr is NULL, it
- must have been returned by an earlier call to malloc(),
- calloc() or realloc().
-
-RETURN VALUE
- realloc() returns a pointer to the newly allocated memory,
- which is suitably aligned for any kind of variable and may
- be different from ptr, or NULL if the request fails. If
- size was equal to 0, either NULL or a pointer suitable to
- be passed to free() is returned. If realloc() fails the
- original block is left untouched - it is not freed or
- moved.
-@return own: pointer to new mem block or NULL */
-UNIV_INTERN
-void*
-ut_realloc(
-/*=======*/
- void* ptr, /*!< in: pointer to old block or NULL */
- ulint size) /*!< in: desired size */
-{
- ut_mem_block_t* block;
- ulint old_size;
- ulint min_size;
- void* new_ptr;
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- return(realloc(ptr, size));
- }
-
- if (ptr == NULL) {
-
- return(ut_malloc(size));
- }
-
- if (size == 0) {
- ut_free(ptr);
-
- return(NULL);
- }
-
- block = (ut_mem_block_t*)((byte*) ptr - sizeof(ut_mem_block_t));
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
-
- old_size = block->size - sizeof(ut_mem_block_t);
-
- if (size < old_size) {
- min_size = size;
- } else {
- min_size = old_size;
- }
-
- new_ptr = ut_malloc(size);
-
- if (new_ptr == NULL) {
-
- return(NULL);
- }
-
- /* Copy the old data from ptr */
- ut_memcpy(new_ptr, ptr, min_size);
-
- ut_free(ptr);
-
- return(new_ptr);
-}
-
-/**********************************************************************//**
-Frees in shutdown all allocated memory not freed yet. */
-UNIV_INTERN
-void
-ut_free_all_mem(void)
-/*=================*/
-{
- ut_mem_block_t* block;
-
- ut_a(ut_mem_block_list_inited);
- ut_mem_block_list_inited = FALSE;
- os_fast_mutex_free(&ut_list_mutex);
-
- while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) {
-
- ut_a(block->magic_n == UT_MEM_MAGIC_N);
- ut_a(ut_total_allocated_memory >= block->size);
-
- ut_total_allocated_memory -= block->size;
-
- UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block);
- free(block);
- }
-
- if (ut_total_allocated_memory != 0) {
- fprintf(stderr,
- "InnoDB: Warning: after shutdown"
- " total allocated memory is %lu\n",
- (ulong) ut_total_allocated_memory);
- }
-
- ut_mem_block_list_inited = FALSE;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
-Copies up to size - 1 characters from the NUL-terminated string src to
-dst, NUL-terminating the result. Returns strlen(src), so truncation
-occurred if the return value >= size.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy(
-/*=======*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size) /*!< in: size of destination buffer */
-{
- ulint src_size = strlen(src);
-
- if (size != 0) {
- ulint n = ut_min(src_size, size - 1);
-
- memcpy(dst, src, n);
- dst[n] = '\0';
- }
-
- return(src_size);
-}
-
-/**********************************************************************//**
-Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last
-(size - 1) bytes of src, not the first.
-@return strlen(src) */
-UNIV_INTERN
-ulint
-ut_strlcpy_rev(
-/*===========*/
- char* dst, /*!< in: destination buffer */
- const char* src, /*!< in: source buffer */
- ulint size) /*!< in: size of destination buffer */
-{
- ulint src_size = strlen(src);
-
- if (size != 0) {
- ulint n = ut_min(src_size, size - 1);
-
- memcpy(dst, src + src_size - n, n + 1);
- }
-
- return(src_size);
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Return the number of times s2 occurs in s1. Overlapping instances of s2
-are only counted once.
-@return the number of times s2 occurs in s1 */
-UNIV_INTERN
-ulint
-ut_strcount(
-/*========*/
- const char* s1, /*!< in: string to search in */
- const char* s2) /*!< in: string to search for */
-{
- ulint count = 0;
- ulint len = strlen(s2);
-
- if (len == 0) {
-
- return(0);
- }
-
- for (;;) {
- s1 = strstr(s1, s2);
-
- if (!s1) {
-
- break;
- }
-
- count++;
- s1 += len;
- }
-
- return(count);
-}
-
-/********************************************************************
-Concatenate 3 strings.*/
-
-char*
-ut_str3cat(
-/*=======*/
- /* out, own: concatenated string, must be
- freed with mem_free() */
- const char* s1, /* in: string 1 */
- const char* s2, /* in: string 2 */
- const char* s3) /* in: string 3 */
-{
- char* s;
- ulint s1_len = strlen(s1);
- ulint s2_len = strlen(s2);
- ulint s3_len = strlen(s3);
-
- s = static_cast<char*>(mem_alloc(s1_len + s2_len + s3_len + 1));
-
- memcpy(s, s1, s1_len);
- memcpy(s + s1_len, s2, s2_len);
- memcpy(s + s1_len + s2_len, s3, s3_len);
-
- s[s1_len + s2_len + s3_len] = '\0';
-
- return(s);
-}
-/**********************************************************************//**
-Replace every occurrence of s1 in str with s2. Overlapping instances of s1
-are only replaced once.
-@return own: modified string, must be freed with mem_free() */
-UNIV_INTERN
-char*
-ut_strreplace(
-/*==========*/
- const char* str, /*!< in: string to operate on */
- const char* s1, /*!< in: string to replace */
- const char* s2) /*!< in: string to replace s1 with */
-{
- char* new_str;
- char* ptr;
- const char* str_end;
- ulint str_len = strlen(str);
- ulint s1_len = strlen(s1);
- ulint s2_len = strlen(s2);
- ulint count = 0;
- int len_delta = (int) s2_len - (int) s1_len;
-
- str_end = str + str_len;
-
- if (len_delta <= 0) {
- len_delta = 0;
- } else {
- count = ut_strcount(str, s1);
- }
-
- new_str = static_cast<char*>(
- mem_alloc(str_len + count * len_delta + 1));
-
- ptr = new_str;
-
- while (str) {
- const char* next = strstr(str, s1);
-
- if (!next) {
- next = str_end;
- }
-
- memcpy(ptr, str, next - str);
- ptr += next - str;
-
- if (next == str_end) {
-
- break;
- }
-
- memcpy(ptr, s2, s2_len);
- ptr += s2_len;
-
- str = next + s1_len;
- }
-
- *ptr = '\0';
-
- return(new_str);
-}
-
-#ifdef UNIV_COMPILE_TEST_FUNCS
-
-void
-test_ut_str_sql_format()
-{
- char buf[128];
- ulint ret;
-
-#define CALL_AND_TEST(str, str_len, buf, buf_size, ret_expected, buf_expected)\
- do {\
- ibool ok = TRUE;\
- memset(buf, 'x', 10);\
- buf[10] = '\0';\
- fprintf(stderr, "TESTING \"%s\", %lu, %lu\n",\
- str, (ulint) str_len, (ulint) buf_size);\
- ret = ut_str_sql_format(str, str_len, buf, buf_size);\
- if (ret != ret_expected) {\
- fprintf(stderr, "expected ret %lu, got %lu\n",\
- (ulint) ret_expected, ret);\
- ok = FALSE;\
- }\
- if (strcmp((char*) buf, buf_expected) != 0) {\
- fprintf(stderr, "expected buf \"%s\", got \"%s\"\n",\
- buf_expected, buf);\
- ok = FALSE;\
- }\
- if (ok) {\
- fprintf(stderr, "OK: %lu, \"%s\"\n\n",\
- (ulint) ret, buf);\
- } else {\
- return;\
- }\
- } while (0)
-
- CALL_AND_TEST("abcd", 4, buf, 0, 0, "xxxxxxxxxx");
-
- CALL_AND_TEST("abcd", 4, buf, 1, 1, "");
-
- CALL_AND_TEST("abcd", 4, buf, 2, 1, "");
-
- CALL_AND_TEST("abcd", 0, buf, 3, 3, "''");
- CALL_AND_TEST("abcd", 1, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 2, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 3, buf, 3, 1, "");
- CALL_AND_TEST("abcd", 4, buf, 3, 1, "");
-
- CALL_AND_TEST("abcd", 0, buf, 4, 3, "''");
- CALL_AND_TEST("abcd", 1, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 2, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 3, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcd", 4, buf, 4, 4, "'a'");
- CALL_AND_TEST("abcde", 5, buf, 4, 4, "'a'");
- CALL_AND_TEST("'", 1, buf, 4, 3, "''");
- CALL_AND_TEST("''", 2, buf, 4, 3, "''");
- CALL_AND_TEST("a'", 2, buf, 4, 4, "'a'");
- CALL_AND_TEST("'a", 2, buf, 4, 3, "''");
- CALL_AND_TEST("ab", 2, buf, 4, 4, "'a'");
-
- CALL_AND_TEST("abcdef", 0, buf, 5, 3, "''");
- CALL_AND_TEST("abcdef", 1, buf, 5, 4, "'a'");
- CALL_AND_TEST("abcdef", 2, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 3, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 4, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 5, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abcdef", 6, buf, 5, 5, "'ab'");
- CALL_AND_TEST("'", 1, buf, 5, 5, "''''");
- CALL_AND_TEST("''", 2, buf, 5, 5, "''''");
- CALL_AND_TEST("a'", 2, buf, 5, 4, "'a'");
- CALL_AND_TEST("'a", 2, buf, 5, 5, "''''");
- CALL_AND_TEST("ab", 2, buf, 5, 5, "'ab'");
- CALL_AND_TEST("abc", 3, buf, 5, 5, "'ab'");
-
- CALL_AND_TEST("ab", 2, buf, 6, 5, "'ab'");
-
- CALL_AND_TEST("a'b'c", 5, buf, 32, 10, "'a''b''c'");
- CALL_AND_TEST("a'b'c'", 6, buf, 32, 12, "'a''b''c'''");
-}
-
-#endif /* UNIV_COMPILE_TEST_FUNCS */
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/ut/ut0rbt.cc b/storage/xtradb/ut/ut0rbt.cc
deleted file mode 100644
index a6c02a8514a..00000000000
--- a/storage/xtradb/ut/ut0rbt.cc
+++ /dev/null
@@ -1,1353 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-/********************************************************************//**
-Red-Black tree implementation
-
-(c) 2007 Oracle/Innobase Oy
-
-Created 2007-03-20 Sunny Bains
-***********************************************************************/
-
-#include "ut0rbt.h"
-
-/**********************************************************************//**
-Definition of a red-black tree
-==============================
-
-A red-black tree is a binary search tree which has the following
-red-black properties:
-
- 1. Every node is either red or black.
- 2. Every leaf (NULL - in our case tree->nil) is black.
- 3. If a node is red, then both its children are black.
- 4. Every simple path from a node to a descendant leaf contains the
- same number of black nodes.
-
- from (3) above, the implication is that on any path from the root
- to a leaf, red nodes must not be adjacent.
-
- However, any number of black nodes may appear in a sequence.
- */
-
-#if defined(IB_RBT_TESTING)
-#warning "Testing enabled!"
-#endif
-
-#define ROOT(t) (t->root->left)
-
-/**********************************************************************//**
-Print out the sub-tree recursively. */
-static
-void
-rbt_print_subtree(
-/*==============*/
- const ib_rbt_t* tree, /*!< in: tree to traverse */
- const ib_rbt_node_t* node, /*!< in: node to print */
- ib_rbt_print_node print) /*!< in: print key function */
-{
- /* FIXME: Doesn't do anything yet */
- if (node != tree->nil) {
- print(node);
- rbt_print_subtree(tree, node->left, print);
- rbt_print_subtree(tree, node->right, print);
- }
-}
-
-/**********************************************************************//**
-Verify that the keys are in order.
-@return TRUE of OK. FALSE if not ordered */
-static
-ibool
-rbt_check_ordering(
-/*===============*/
- const ib_rbt_t* tree) /*!< in: tree to verfify */
-{
- const ib_rbt_node_t* node;
- const ib_rbt_node_t* prev = NULL;
-
- /* Iterate over all the nodes, comparing each node with the prev */
- for (node = rbt_first(tree); node; node = rbt_next(tree, prev)) {
-
- if (prev) {
- int result;
-
- if (tree->cmp_arg) {
- result = tree->compare_with_arg(
- tree->cmp_arg, prev->value,
- node->value);
- } else {
- result = tree->compare(
- prev->value, node->value);
- }
-
- if (result >= 0) {
- return(FALSE);
- }
- }
-
- prev = node;
- }
-
- return(TRUE);
-}
-
-/**********************************************************************//**
-Check that every path from the root to the leaves has the same count.
-Count is expressed in the number of black nodes.
-@return 0 on failure else black height of the subtree */
-static
-ibool
-rbt_count_black_nodes(
-/*==================*/
- const ib_rbt_t* tree, /*!< in: tree to verify */
- const ib_rbt_node_t* node) /*!< in: start of sub-tree */
-{
- ulint result;
-
- if (node != tree->nil) {
- ulint left_height = rbt_count_black_nodes(tree, node->left);
-
- ulint right_height = rbt_count_black_nodes(tree, node->right);
-
- if (left_height == 0
- || right_height == 0
- || left_height != right_height) {
-
- result = 0;
- } else if (node->color == IB_RBT_RED) {
-
- /* Case 3 */
- if (node->left->color != IB_RBT_BLACK
- || node->right->color != IB_RBT_BLACK) {
-
- result = 0;
- } else {
- result = left_height;
- }
- /* Check if it's anything other than RED or BLACK. */
- } else if (node->color != IB_RBT_BLACK) {
-
- result = 0;
- } else {
-
- result = right_height + 1;
- }
- } else {
- result = 1;
- }
-
- return(result);
-}
-
-/**********************************************************************//**
-Turn the node's right child's left sub-tree into node's right sub-tree.
-This will also make node's right child it's parent. */
-static
-void
-rbt_rotate_left(
-/*============*/
- const ib_rbt_node_t* nil, /*!< in: nil node of the tree */
- ib_rbt_node_t* node) /*!< in: node to rotate */
-{
- ib_rbt_node_t* right = node->right;
-
- node->right = right->left;
-
- if (right->left != nil) {
- right->left->parent = node;
- }
-
- /* Right's new parent was node's parent. */
- right->parent = node->parent;
-
- /* Since root's parent is tree->nil and root->parent->left points
- back to root, we can avoid the check. */
- if (node == node->parent->left) {
- /* Node was on the left of its parent. */
- node->parent->left = right;
- } else {
- /* Node must have been on the right. */
- node->parent->right = right;
- }
-
- /* Finally, put node on right's left. */
- right->left = node;
- node->parent = right;
-}
-
-/**********************************************************************//**
-Turn the node's left child's right sub-tree into node's left sub-tree.
-This also make node's left child it's parent. */
-static
-void
-rbt_rotate_right(
-/*=============*/
- const ib_rbt_node_t* nil, /*!< in: nil node of tree */
- ib_rbt_node_t* node) /*!< in: node to rotate */
-{
- ib_rbt_node_t* left = node->left;
-
- node->left = left->right;
-
- if (left->right != nil) {
- left->right->parent = node;
- }
-
- /* Left's new parent was node's parent. */
- left->parent = node->parent;
-
- /* Since root's parent is tree->nil and root->parent->left points
- back to root, we can avoid the check. */
- if (node == node->parent->right) {
- /* Node was on the left of its parent. */
- node->parent->right = left;
- } else {
- /* Node must have been on the left. */
- node->parent->left = left;
- }
-
- /* Finally, put node on left's right. */
- left->right = node;
- node->parent = left;
-}
-
-/**********************************************************************//**
-Append a node to the tree. */
-static
-ib_rbt_node_t*
-rbt_tree_add_child(
-/*===============*/
- const ib_rbt_t* tree,
- ib_rbt_bound_t* parent,
- ib_rbt_node_t* node)
-{
- /* Cast away the const. */
- ib_rbt_node_t* last = (ib_rbt_node_t*) parent->last;
-
- if (last == tree->root || parent->result < 0) {
- last->left = node;
- } else {
- /* FIXME: We don't handle duplicates (yet)! */
- ut_a(parent->result != 0);
-
- last->right = node;
- }
-
- node->parent = last;
-
- return(node);
-}
-
-/**********************************************************************//**
-Generic binary tree insert */
-static
-ib_rbt_node_t*
-rbt_tree_insert(
-/*============*/
- ib_rbt_t* tree,
- const void* key,
- ib_rbt_node_t* node)
-{
- ib_rbt_bound_t parent;
- ib_rbt_node_t* current = ROOT(tree);
-
- parent.result = 0;
- parent.last = tree->root;
-
- /* Regular binary search. */
- while (current != tree->nil) {
-
- parent.last = current;
-
- if (tree->cmp_arg) {
- parent.result = tree->compare_with_arg(
- tree->cmp_arg, key, current->value);
- } else {
- parent.result = tree->compare(key, current->value);
- }
-
- if (parent.result < 0) {
- current = current->left;
- } else {
- current = current->right;
- }
- }
-
- ut_a(current == tree->nil);
-
- rbt_tree_add_child(tree, &parent, node);
-
- return(node);
-}
-
-/**********************************************************************//**
-Balance a tree after inserting a node. */
-static
-void
-rbt_balance_tree(
-/*=============*/
- const ib_rbt_t* tree, /*!< in: tree to balance */
- ib_rbt_node_t* node) /*!< in: node that was inserted */
-{
- const ib_rbt_node_t* nil = tree->nil;
- ib_rbt_node_t* parent = node->parent;
-
- /* Restore the red-black property. */
- node->color = IB_RBT_RED;
-
- while (node != ROOT(tree) && parent->color == IB_RBT_RED) {
- ib_rbt_node_t* grand_parent = parent->parent;
-
- if (parent == grand_parent->left) {
- ib_rbt_node_t* uncle = grand_parent->right;
-
- if (uncle->color == IB_RBT_RED) {
-
- /* Case 1 - change the colors. */
- uncle->color = IB_RBT_BLACK;
- parent->color = IB_RBT_BLACK;
- grand_parent->color = IB_RBT_RED;
-
- /* Move node up the tree. */
- node = grand_parent;
-
- } else {
-
- if (node == parent->right) {
- /* Right is a black node and node is
- to the right, case 2 - move node
- up and rotate. */
- node = parent;
- rbt_rotate_left(nil, node);
- }
-
- grand_parent = node->parent->parent;
-
- /* Case 3. */
- node->parent->color = IB_RBT_BLACK;
- grand_parent->color = IB_RBT_RED;
-
- rbt_rotate_right(nil, grand_parent);
- }
-
- } else {
- ib_rbt_node_t* uncle = grand_parent->left;
-
- if (uncle->color == IB_RBT_RED) {
-
- /* Case 1 - change the colors. */
- uncle->color = IB_RBT_BLACK;
- parent->color = IB_RBT_BLACK;
- grand_parent->color = IB_RBT_RED;
-
- /* Move node up the tree. */
- node = grand_parent;
-
- } else {
-
- if (node == parent->left) {
- /* Left is a black node and node is to
- the right, case 2 - move node up and
- rotate. */
- node = parent;
- rbt_rotate_right(nil, node);
- }
-
- grand_parent = node->parent->parent;
-
- /* Case 3. */
- node->parent->color = IB_RBT_BLACK;
- grand_parent->color = IB_RBT_RED;
-
- rbt_rotate_left(nil, grand_parent);
- }
- }
-
- parent = node->parent;
- }
-
- /* Color the root black. */
- ROOT(tree)->color = IB_RBT_BLACK;
-}
-
-/**********************************************************************//**
-Find the given node's successor.
-@return successor node or NULL if no successor */
-static
-ib_rbt_node_t*
-rbt_find_successor(
-/*===============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* current) /*!< in: this is declared const
- because it can be called via
- rbt_next() */
-{
- const ib_rbt_node_t* nil = tree->nil;
- ib_rbt_node_t* next = current->right;
-
- /* Is there a sub-tree to the right that we can follow. */
- if (next != nil) {
-
- /* Follow the left most links of the current right child. */
- while (next->left != nil) {
- next = next->left;
- }
-
- } else { /* We will have to go up the tree to find the successor. */
- ib_rbt_node_t* parent = current->parent;
-
- /* Cast away the const. */
- next = (ib_rbt_node_t*) current;
-
- while (parent != tree->root && next == parent->right) {
- next = parent;
- parent = next->parent;
- }
-
- next = (parent == tree->root) ? NULL : parent;
- }
-
- return(next);
-}
-
-/**********************************************************************//**
-Find the given node's precedecessor.
-@return predecessor node or NULL if no predecesor */
-static
-ib_rbt_node_t*
-rbt_find_predecessor(
-/*=================*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* current) /*!< in: this is declared const
- because it can be called via
- rbt_prev() */
-{
- const ib_rbt_node_t* nil = tree->nil;
- ib_rbt_node_t* prev = current->left;
-
- /* Is there a sub-tree to the left that we can follow. */
- if (prev != nil) {
-
- /* Follow the right most links of the current left child. */
- while (prev->right != nil) {
- prev = prev->right;
- }
-
- } else { /* We will have to go up the tree to find the precedecessor. */
- ib_rbt_node_t* parent = current->parent;
-
- /* Cast away the const. */
- prev = (ib_rbt_node_t*) current;
-
- while (parent != tree->root && prev == parent->left) {
- prev = parent;
- parent = prev->parent;
- }
-
- prev = (parent == tree->root) ? NULL : parent;
- }
-
- return(prev);
-}
-
-/**********************************************************************//**
-Replace node with child. After applying transformations eject becomes
-an orphan. */
-static
-void
-rbt_eject_node(
-/*===========*/
- ib_rbt_node_t* eject, /*!< in: node to eject */
- ib_rbt_node_t* node) /*!< in: node to replace with */
-{
- /* Update the to be ejected node's parent's child pointers. */
- if (eject->parent->left == eject) {
- eject->parent->left = node;
- } else if (eject->parent->right == eject) {
- eject->parent->right = node;
- } else {
- ut_a(0);
- }
- /* eject is now an orphan but otherwise its pointers
- and color are left intact. */
-
- node->parent = eject->parent;
-}
-
-/**********************************************************************//**
-Replace a node with another node. */
-static
-void
-rbt_replace_node(
-/*=============*/
- ib_rbt_node_t* replace, /*!< in: node to replace */
- ib_rbt_node_t* node) /*!< in: node to replace with */
-{
- ib_rbt_color_t color = node->color;
-
- /* Update the node pointers. */
- node->left = replace->left;
- node->right = replace->right;
-
- /* Update the child node pointers. */
- node->left->parent = node;
- node->right->parent = node;
-
- /* Make the parent of replace point to node. */
- rbt_eject_node(replace, node);
-
- /* Swap the colors. */
- node->color = replace->color;
- replace->color = color;
-}
-
-/**********************************************************************//**
-Detach node from the tree replacing it with one of it's children.
-@return the child node that now occupies the position of the detached node */
-static
-ib_rbt_node_t*
-rbt_detach_node(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_node_t* node) /*!< in: node to detach */
-{
- ib_rbt_node_t* child;
- const ib_rbt_node_t* nil = tree->nil;
-
- if (node->left != nil && node->right != nil) {
- /* Case where the node to be deleted has two children. */
- ib_rbt_node_t* successor = rbt_find_successor(tree, node);
-
- ut_a(successor != nil);
- ut_a(successor->parent != nil);
- ut_a(successor->left == nil);
-
- child = successor->right;
-
- /* Remove the successor node and replace with its child. */
- rbt_eject_node(successor, child);
-
- /* Replace the node to delete with its successor node. */
- rbt_replace_node(node, successor);
- } else {
- ut_a(node->left == nil || node->right == nil);
-
- child = (node->left != nil) ? node->left : node->right;
-
- /* Replace the node to delete with one of it's children. */
- rbt_eject_node(node, child);
- }
-
- /* Reset the node links. */
- node->parent = node->right = node->left = tree->nil;
-
- return(child);
-}
-
-/**********************************************************************//**
-Rebalance the right sub-tree after deletion.
-@return node to rebalance if more rebalancing required else NULL */
-static
-ib_rbt_node_t*
-rbt_balance_right(
-/*==============*/
- const ib_rbt_node_t* nil, /*!< in: rb tree nil node */
- ib_rbt_node_t* parent, /*!< in: parent node */
- ib_rbt_node_t* sibling) /*!< in: sibling node */
-{
- ib_rbt_node_t* node = NULL;
-
- ut_a(sibling != nil);
-
- /* Case 3. */
- if (sibling->color == IB_RBT_RED) {
-
- parent->color = IB_RBT_RED;
- sibling->color = IB_RBT_BLACK;
-
- rbt_rotate_left(nil, parent);
-
- sibling = parent->right;
-
- ut_a(sibling != nil);
- }
-
- /* Since this will violate case 3 because of the change above. */
- if (sibling->left->color == IB_RBT_BLACK
- && sibling->right->color == IB_RBT_BLACK) {
-
- node = parent; /* Parent needs to be rebalanced too. */
- sibling->color = IB_RBT_RED;
-
- } else {
- if (sibling->right->color == IB_RBT_BLACK) {
-
- ut_a(sibling->left->color == IB_RBT_RED);
-
- sibling->color = IB_RBT_RED;
- sibling->left->color = IB_RBT_BLACK;
-
- rbt_rotate_right(nil, sibling);
-
- sibling = parent->right;
- ut_a(sibling != nil);
- }
-
- sibling->color = parent->color;
- sibling->right->color = IB_RBT_BLACK;
-
- parent->color = IB_RBT_BLACK;
-
- rbt_rotate_left(nil, parent);
- }
-
- return(node);
-}
-
-/**********************************************************************//**
-Rebalance the left sub-tree after deletion.
-@return node to rebalance if more rebalancing required else NULL */
-static
-ib_rbt_node_t*
-rbt_balance_left(
-/*=============*/
- const ib_rbt_node_t* nil, /*!< in: rb tree nil node */
- ib_rbt_node_t* parent, /*!< in: parent node */
- ib_rbt_node_t* sibling) /*!< in: sibling node */
-{
- ib_rbt_node_t* node = NULL;
-
- ut_a(sibling != nil);
-
- /* Case 3. */
- if (sibling->color == IB_RBT_RED) {
-
- parent->color = IB_RBT_RED;
- sibling->color = IB_RBT_BLACK;
-
- rbt_rotate_right(nil, parent);
- sibling = parent->left;
-
- ut_a(sibling != nil);
- }
-
- /* Since this will violate case 3 because of the change above. */
- if (sibling->right->color == IB_RBT_BLACK
- && sibling->left->color == IB_RBT_BLACK) {
-
- node = parent; /* Parent needs to be rebalanced too. */
- sibling->color = IB_RBT_RED;
-
- } else {
- if (sibling->left->color == IB_RBT_BLACK) {
-
- ut_a(sibling->right->color == IB_RBT_RED);
-
- sibling->color = IB_RBT_RED;
- sibling->right->color = IB_RBT_BLACK;
-
- rbt_rotate_left(nil, sibling);
-
- sibling = parent->left;
-
- ut_a(sibling != nil);
- }
-
- sibling->color = parent->color;
- sibling->left->color = IB_RBT_BLACK;
-
- parent->color = IB_RBT_BLACK;
-
- rbt_rotate_right(nil, parent);
- }
-
- return(node);
-}
-
-/**********************************************************************//**
-Delete the node and rebalance the tree if necessary */
-static
-void
-rbt_remove_node_and_rebalance(
-/*==========================*/
- ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_node_t* node) /*!< in: node to remove */
-{
- /* Detach node and get the node that will be used
- as rebalance start. */
- ib_rbt_node_t* child = rbt_detach_node(tree, node);
-
- if (node->color == IB_RBT_BLACK) {
- ib_rbt_node_t* last = child;
-
- ROOT(tree)->color = IB_RBT_RED;
-
- while (child && child->color == IB_RBT_BLACK) {
- ib_rbt_node_t* parent = child->parent;
-
- /* Did the deletion cause an imbalance in the
- parents left sub-tree. */
- if (parent->left == child) {
-
- child = rbt_balance_right(
- tree->nil, parent, parent->right);
-
- } else if (parent->right == child) {
-
- child = rbt_balance_left(
- tree->nil, parent, parent->left);
-
- } else {
- ut_error;
- }
-
- if (child) {
- last = child;
- }
- }
-
- ut_a(last);
-
- last->color = IB_RBT_BLACK;
- ROOT(tree)->color = IB_RBT_BLACK;
- }
-
- /* Note that we have removed a node from the tree. */
- --tree->n_nodes;
-}
-
-/**********************************************************************//**
-Recursively free the nodes. */
-static
-void
-rbt_free_node(
-/*==========*/
- ib_rbt_node_t* node, /*!< in: node to free */
- ib_rbt_node_t* nil) /*!< in: rb tree nil node */
-{
- if (node != nil) {
- rbt_free_node(node->left, nil);
- rbt_free_node(node->right, nil);
-
- ut_free(node);
- }
-}
-
-/**********************************************************************//**
-Free all the nodes and free the tree. */
-UNIV_INTERN
-void
-rbt_free(
-/*=====*/
- ib_rbt_t* tree) /*!< in: rb tree to free */
-{
- rbt_free_node(tree->root, tree->nil);
- ut_free(tree->nil);
- ut_free(tree);
-}
-
-/**********************************************************************//**
-Create an instance of a red black tree, whose comparison function takes
-an argument
-@return an empty rb tree */
-UNIV_INTERN
-ib_rbt_t*
-rbt_create_arg_cmp(
-/*===============*/
- size_t sizeof_value, /*!< in: sizeof data item */
- ib_rbt_arg_compare
- compare, /*!< in: fn to compare items */
- void* cmp_arg) /*!< in: compare fn arg */
-{
- ib_rbt_t* tree;
-
- ut_a(cmp_arg);
-
- tree = rbt_create(sizeof_value, NULL);
- tree->cmp_arg = cmp_arg;
- tree->compare_with_arg = compare;
-
- return(tree);
-}
-
-/**********************************************************************//**
-Create an instance of a red black tree.
-@return an empty rb tree */
-UNIV_INTERN
-ib_rbt_t*
-rbt_create(
-/*=======*/
- size_t sizeof_value, /*!< in: sizeof data item */
- ib_rbt_compare compare) /*!< in: fn to compare items */
-{
- ib_rbt_t* tree;
- ib_rbt_node_t* node;
-
- tree = (ib_rbt_t*) ut_malloc(sizeof(*tree));
- memset(tree, 0, sizeof(*tree));
-
- tree->sizeof_value = sizeof_value;
-
- /* Create the sentinel (NIL) node. */
- node = tree->nil = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
- memset(node, 0, sizeof(*node));
-
- node->color = IB_RBT_BLACK;
- node->parent = node->left = node->right = node;
-
- /* Create the "fake" root, the real root node will be the
- left child of this node. */
- node = tree->root = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
- memset(node, 0, sizeof(*node));
-
- node->color = IB_RBT_BLACK;
- node->parent = node->left = node->right = tree->nil;
-
- tree->compare = compare;
-
- return(tree);
-}
-
-/**********************************************************************//**
-Generic insert of a value in the rb tree.
-@return inserted node */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_insert(
-/*=======*/
- ib_rbt_t* tree, /*!< in: rb tree */
- const void* key, /*!< in: key for ordering */
- const void* value) /*!< in: value of key, this value
- is copied to the node */
-{
- ib_rbt_node_t* node;
-
- /* Create the node that will hold the value data. */
- node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
-
- memcpy(node->value, value, tree->sizeof_value);
- node->parent = node->left = node->right = tree->nil;
-
- /* Insert in the tree in the usual way. */
- rbt_tree_insert(tree, key, node);
- rbt_balance_tree(tree, node);
-
- ++tree->n_nodes;
-
- return(node);
-}
-
-/**********************************************************************//**
-Add a new node to the tree, useful for data that is pre-sorted.
-@return appended node */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_add_node(
-/*=========*/
- ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: bounds */
- const void* value) /*!< in: this value is copied
- to the node */
-{
- ib_rbt_node_t* node;
-
- /* Create the node that will hold the value data */
- node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
-
- memcpy(node->value, value, tree->sizeof_value);
- return(rbt_add_preallocated_node(tree, parent, node));
-}
-
-/****************************************************************//**
-Add a new caller-provided node to tree at the specified position.
-The node must have its key fields initialized correctly.
-@return added node */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_add_preallocated_node(
-/*======================*/
- ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: parent */
- ib_rbt_node_t* node) /*!< in: node */
-{
- node->parent = node->left = node->right = tree->nil;
-
- /* If tree is empty */
- if (parent->last == NULL) {
- parent->last = tree->root;
- }
-
- /* Append the node, the hope here is that the caller knows
- what s/he is doing. */
- rbt_tree_add_child(tree, parent, node);
- rbt_balance_tree(tree, node);
-
- ++tree->n_nodes;
-
-#if defined(IB_RBT_TESTING)
- ut_a(rbt_validate(tree));
-#endif
- return(node);
-}
-
-
-/**********************************************************************//**
-Find a matching node in the rb tree.
-@return NULL if not found else the node where key was found */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_lookup(
-/*=======*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key) /*!< in: key to use for search */
-{
- const ib_rbt_node_t* current = ROOT(tree);
-
- /* Regular binary search. */
- while (current != tree->nil) {
- int result;
-
- if (tree->cmp_arg) {
- result = tree->compare_with_arg(
- tree->cmp_arg, key, current->value);
- } else {
- result = tree->compare(key, current->value);
- }
-
- if (result < 0) {
- current = current->left;
- } else if (result > 0) {
- current = current->right;
- } else {
- break;
- }
- }
-
- return(current != tree->nil ? current : NULL);
-}
-
-/**********************************************************************//**
-Delete a node indentified by key.
-@return TRUE if success FALSE if not found */
-UNIV_INTERN
-ibool
-rbt_delete(
-/*=======*/
- ib_rbt_t* tree, /*!< in: rb tree */
- const void* key) /*!< in: key to delete */
-{
- ibool deleted = FALSE;
- ib_rbt_node_t* node = (ib_rbt_node_t*) rbt_lookup(tree, key);
-
- if (node) {
- rbt_remove_node_and_rebalance(tree, node);
-
- ut_free(node);
- deleted = TRUE;
- }
-
- return(deleted);
-}
-
-/**********************************************************************//**
-Remove a node from the rb tree, the node is not free'd, that is the
-callers responsibility.
-@return deleted node but without the const */
-UNIV_INTERN
-ib_rbt_node_t*
-rbt_remove_node(
-/*============*/
- ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* const_node) /*!< in: node to delete, this
- is a fudge and declared const
- because the caller can access
- only const nodes */
-{
- /* Cast away the const. */
- rbt_remove_node_and_rebalance(tree, (ib_rbt_node_t*) const_node);
-
- /* This is to make it easier to do something like this:
- ut_free(rbt_remove_node(node));
- */
-
- return((ib_rbt_node_t*) const_node);
-}
-
-/**********************************************************************//**
-Find the node that has the lowest key that is >= key.
-@return node satisfying the lower bound constraint or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_lower_bound(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key) /*!< in: key to search */
-{
- ib_rbt_node_t* lb_node = NULL;
- ib_rbt_node_t* current = ROOT(tree);
-
- while (current != tree->nil) {
- int result;
-
- if (tree->cmp_arg) {
- result = tree->compare_with_arg(
- tree->cmp_arg, key, current->value);
- } else {
- result = tree->compare(key, current->value);
- }
-
- if (result > 0) {
-
- current = current->right;
-
- } else if (result < 0) {
-
- lb_node = current;
- current = current->left;
-
- } else {
- lb_node = current;
- break;
- }
- }
-
- return(lb_node);
-}
-
-/**********************************************************************//**
-Find the node that has the greatest key that is <= key.
-@return node satisfying the upper bound constraint or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_upper_bound(
-/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key) /*!< in: key to search */
-{
- ib_rbt_node_t* ub_node = NULL;
- ib_rbt_node_t* current = ROOT(tree);
-
- while (current != tree->nil) {
- int result;
-
- if (tree->cmp_arg) {
- result = tree->compare_with_arg(
- tree->cmp_arg, key, current->value);
- } else {
- result = tree->compare(key, current->value);
- }
-
- if (result > 0) {
-
- ub_node = current;
- current = current->right;
-
- } else if (result < 0) {
-
- current = current->left;
-
- } else {
- ub_node = current;
- break;
- }
- }
-
- return(ub_node);
-}
-
-/**********************************************************************//**
-Find the node that has the greatest key that is <= key.
-@return value of result */
-UNIV_INTERN
-int
-rbt_search(
-/*=======*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: search bounds */
- const void* key) /*!< in: key to search */
-{
- ib_rbt_node_t* current = ROOT(tree);
-
- /* Every thing is greater than the NULL root. */
- parent->result = 1;
- parent->last = NULL;
-
- while (current != tree->nil) {
-
- parent->last = current;
-
- if (tree->cmp_arg) {
- parent->result = tree->compare_with_arg(
- tree->cmp_arg, key, current->value);
- } else {
- parent->result = tree->compare(key, current->value);
- }
-
- if (parent->result > 0) {
- current = current->right;
- } else if (parent->result < 0) {
- current = current->left;
- } else {
- break;
- }
- }
-
- return(parent->result);
-}
-
-/**********************************************************************//**
-Find the node that has the greatest key that is <= key. But use the
-supplied comparison function.
-@return value of result */
-UNIV_INTERN
-int
-rbt_search_cmp(
-/*===========*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: search bounds */
- const void* key, /*!< in: key to search */
- ib_rbt_compare compare, /*!< in: fn to compare items */
- ib_rbt_arg_compare
- arg_compare) /*!< in: fn to compare items
- with argument */
-{
- ib_rbt_node_t* current = ROOT(tree);
-
- /* Every thing is greater than the NULL root. */
- parent->result = 1;
- parent->last = NULL;
-
- while (current != tree->nil) {
-
- parent->last = current;
-
- if (arg_compare) {
- ut_ad(tree->cmp_arg);
- parent->result = arg_compare(
- tree->cmp_arg, key, current->value);
- } else {
- parent->result = compare(key, current->value);
- }
-
- if (parent->result > 0) {
- current = current->right;
- } else if (parent->result < 0) {
- current = current->left;
- } else {
- break;
- }
- }
-
- return(parent->result);
-}
-
-/**********************************************************************//**
-Return the left most node in the tree. */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_first(
-/*======*/
- /* out leftmost node or NULL */
- const ib_rbt_t* tree) /* in: rb tree */
-{
- ib_rbt_node_t* first = NULL;
- ib_rbt_node_t* current = ROOT(tree);
-
- while (current != tree->nil) {
- first = current;
- current = current->left;
- }
-
- return(first);
-}
-
-/**********************************************************************//**
-Return the right most node in the tree.
-@return the rightmost node or NULL */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_last(
-/*=====*/
- const ib_rbt_t* tree) /*!< in: rb tree */
-{
- ib_rbt_node_t* last = NULL;
- ib_rbt_node_t* current = ROOT(tree);
-
- while (current != tree->nil) {
- last = current;
- current = current->right;
- }
-
- return(last);
-}
-
-/**********************************************************************//**
-Return the next node.
-@return node next from current */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_next(
-/*=====*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* current) /*!< in: current node */
-{
- return(current ? rbt_find_successor(tree, current) : NULL);
-}
-
-/**********************************************************************//**
-Return the previous node.
-@return node prev from current */
-UNIV_INTERN
-const ib_rbt_node_t*
-rbt_prev(
-/*=====*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* current) /*!< in: current node */
-{
- return(current ? rbt_find_predecessor(tree, current) : NULL);
-}
-
-/**********************************************************************//**
-Reset the tree. Delete all the nodes. */
-UNIV_INTERN
-void
-rbt_clear(
-/*======*/
- ib_rbt_t* tree) /*!< in: rb tree */
-{
- rbt_free_node(ROOT(tree), tree->nil);
- rbt_reset(tree);
-}
-
-/****************************************************************//**
-Clear the tree without deleting and freeing its nodes. */
-UNIV_INTERN
-void
-rbt_reset(
-/*======*/
- ib_rbt_t* tree) /*!< in: rb tree */
-{
- tree->n_nodes = 0;
- tree->root->left = tree->root->right = tree->nil;
-}
-
-/**********************************************************************//**
-Merge the node from dst into src. Return the number of nodes merged.
-@return no. of recs merged */
-UNIV_INTERN
-ulint
-rbt_merge_uniq(
-/*===========*/
- ib_rbt_t* dst, /*!< in: dst rb tree */
- const ib_rbt_t* src) /*!< in: src rb tree */
-{
- ib_rbt_bound_t parent;
- ulint n_merged = 0;
- const ib_rbt_node_t* src_node = rbt_first(src);
-
- if (rbt_empty(src) || dst == src) {
- return(0);
- }
-
- for (/* No op */; src_node; src_node = rbt_next(src, src_node)) {
-
- if (rbt_search(dst, &parent, src_node->value) != 0) {
- rbt_add_node(dst, &parent, src_node->value);
- ++n_merged;
- }
- }
-
- return(n_merged);
-}
-
-/**********************************************************************//**
-Merge the node from dst into src. Return the number of nodes merged.
-Delete the nodes from src after copying node to dst. As a side effect
-the duplicates will be left untouched in the src.
-@return no. of recs merged */
-UNIV_INTERN
-ulint
-rbt_merge_uniq_destructive(
-/*=======================*/
- ib_rbt_t* dst, /*!< in: dst rb tree */
- ib_rbt_t* src) /*!< in: src rb tree */
-{
- ib_rbt_bound_t parent;
- ib_rbt_node_t* src_node;
- ulint old_size = rbt_size(dst);
-
- if (rbt_empty(src) || dst == src) {
- return(0);
- }
-
- for (src_node = (ib_rbt_node_t*) rbt_first(src); src_node; /* */) {
- ib_rbt_node_t* prev = src_node;
-
- src_node = (ib_rbt_node_t*) rbt_next(src, prev);
-
- /* Skip duplicates. */
- if (rbt_search(dst, &parent, prev->value) != 0) {
-
- /* Remove and reset the node but preserve
- the node (data) value. */
- rbt_remove_node_and_rebalance(src, prev);
-
- /* The nil should be taken from the dst tree. */
- prev->parent = prev->left = prev->right = dst->nil;
- rbt_tree_add_child(dst, &parent, prev);
- rbt_balance_tree(dst, prev);
-
- ++dst->n_nodes;
- }
- }
-
-#if defined(IB_RBT_TESTING)
- ut_a(rbt_validate(dst));
- ut_a(rbt_validate(src));
-#endif
- return(rbt_size(dst) - old_size);
-}
-
-/**********************************************************************//**
-Check that every path from the root to the leaves has the same count and
-the tree nodes are in order.
-@return TRUE if OK FALSE otherwise */
-UNIV_INTERN
-ibool
-rbt_validate(
-/*=========*/
- const ib_rbt_t* tree) /*!< in: RB tree to validate */
-{
- if (rbt_count_black_nodes(tree, ROOT(tree)) > 0) {
- return(rbt_check_ordering(tree));
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
-Iterate over the tree in depth first order. */
-UNIV_INTERN
-void
-rbt_print(
-/*======*/
- const ib_rbt_t* tree, /*!< in: tree to traverse */
- ib_rbt_print_node print) /*!< in: print function */
-{
- rbt_print_subtree(tree, ROOT(tree), print);
-}
diff --git a/storage/xtradb/ut/ut0rnd.cc b/storage/xtradb/ut/ut0rnd.cc
deleted file mode 100644
index 3b4d7381181..00000000000
--- a/storage/xtradb/ut/ut0rnd.cc
+++ /dev/null
@@ -1,97 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/***************************************************************//**
-@file ut/ut0rnd.cc
-Random numbers and hashing
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0rnd.h"
-
-#ifdef UNIV_NONINL
-#include "ut0rnd.ic"
-#endif
-
-/** These random numbers are used in ut_find_prime */
-/*@{*/
-#define UT_RANDOM_1 1.0412321
-#define UT_RANDOM_2 1.1131347
-#define UT_RANDOM_3 1.0132677
-/*@}*/
-
-/** Seed value of ut_rnd_gen_ulint(). */
-UNIV_INTERN ulint ut_rnd_ulint_counter = 65654363;
-
-/***********************************************************//**
-Looks for a prime number slightly greater than the given argument.
-The prime is chosen so that it is not near any power of 2.
-@return prime */
-UNIV_INTERN
-ulint
-ut_find_prime(
-/*==========*/
- ulint n) /*!< in: positive number > 100 */
-{
- ulint pow2;
- ulint i;
-
- n += 100;
-
- pow2 = 1;
- while (pow2 * 2 < n) {
- pow2 = 2 * pow2;
- }
-
- if ((double) n < 1.05 * (double) pow2) {
- n = (ulint) ((double) n * UT_RANDOM_1);
- }
-
- pow2 = 2 * pow2;
-
- if ((double) n > 0.95 * (double) pow2) {
- n = (ulint) ((double) n * UT_RANDOM_2);
- }
-
- if (n > pow2 - 20) {
- n += 30;
- }
-
- /* Now we have n far enough from powers of 2. To make
- n more random (especially, if it was not near
- a power of 2), we then multiply it by a random number. */
-
- n = (ulint) ((double) n * UT_RANDOM_3);
-
- for (;; n++) {
- i = 2;
- while (i * i <= n) {
- if (n % i == 0) {
- goto next_n;
- }
- i++;
- }
-
- /* Found a prime */
- break;
-next_n: ;
- }
-
- return(n);
-}
diff --git a/storage/xtradb/ut/ut0timer.cc b/storage/xtradb/ut/ut0timer.cc
deleted file mode 100644
index 85292cce28c..00000000000
--- a/storage/xtradb/ut/ut0timer.cc
+++ /dev/null
@@ -1,92 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2013, 2014, Facebook, Inc. All Rights Reserved.
-Copyright (c) 2014, SkySQL Ab. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file ut/ut0timer.cc
-Timer rountines
-
-Created 30/07/2014 Jan Lindström jan.lindstrom@skysql.com
-modified from https://github.com/facebook/mysql-5.6/commit/c75a413edeb96eb99bf11d7269bdfea06f96d6b6
-*************************************************************************/
-
-#include "data0type.h"
-#include <my_rdtsc.h>
-#include <ut0timer.h>
-
-/**************************************************************//**
-Initial timer definition
-@return 0 */
-static
-ulonglong
-ut_timer_none(void)
-/*===============*/
-{
- return 0;
-}
-
-/**************************************************************//**
-Function pointer to point selected timer function.
-@return timer current value */
-ulonglong (*ut_timer_now)(void) = &ut_timer_none;
-
-struct my_timer_unit_info ut_timer;
-
-/**************************************************************//**
-Sets up the data required for use of my_timer_* functions.
-Selects the best timer by high frequency, and tight resolution.
-Points my_timer_now() to the selected timer function.
-Initializes my_timer struct to contain the info for selected timer.*/
-UNIV_INTERN
-void
-ut_init_timer(void)
-/*===============*/
-{
- MY_TIMER_INFO all_timer_info;
- my_timer_init(&all_timer_info);
-
- if (all_timer_info.cycles.frequency > 1000000 &&
- all_timer_info.cycles.resolution == 1) {
- ut_timer = all_timer_info.cycles;
- ut_timer_now = &my_timer_cycles;
- } else if (all_timer_info.nanoseconds.frequency > 1000000 &&
- all_timer_info.nanoseconds.resolution == 1) {
- ut_timer = all_timer_info.nanoseconds;
- ut_timer_now = &my_timer_nanoseconds;
- } else if (all_timer_info.microseconds.frequency >= 1000000 &&
- all_timer_info.microseconds.resolution == 1) {
- ut_timer = all_timer_info.microseconds;
- ut_timer_now = &my_timer_microseconds;
-
- } else if (all_timer_info.milliseconds.frequency >= 1000 &&
- all_timer_info.milliseconds.resolution == 1) {
- ut_timer = all_timer_info.milliseconds;
- ut_timer_now = &my_timer_milliseconds;
- } else if (all_timer_info.ticks.frequency >= 1000 &&
- /* Will probably be false */
- all_timer_info.ticks.resolution == 1) {
- ut_timer = all_timer_info.ticks;
- ut_timer_now = &my_timer_ticks;
- } else {
- /* None are acceptable, so leave it as "None", and fill in struct */
- ut_timer.frequency = 1; /* Avoid div-by-zero */
- ut_timer.overhead = 0; /* Since it doesn't do anything */
- ut_timer.resolution = 10; /* Another sign it's bad */
- ut_timer.routine = 0; /* None */
- }
-}
diff --git a/storage/xtradb/ut/ut0ut.cc b/storage/xtradb/ut/ut0ut.cc
deleted file mode 100644
index fd52537ae11..00000000000
--- a/storage/xtradb/ut/ut0ut.cc
+++ /dev/null
@@ -1,870 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/***************************************************************//**
-@file ut/ut0ut.cc
-Various utilities for Innobase.
-
-Created 5/11/1994 Heikki Tuuri
-********************************************************************/
-
-#include "ut0ut.h"
-
-#ifndef UNIV_INNOCHECKSUM
-
-#include "ut0sort.h"
-#include "os0thread.h" /* thread-ID */
-
-#ifdef UNIV_NONINL
-#include "ut0ut.ic"
-#endif
-
-#include <stdarg.h>
-#include <string.h>
-#include <ctype.h>
-
-#ifndef UNIV_HOTBACKUP
-# include "btr0types.h"
-# include "trx0trx.h"
-# include "ha_prototypes.h"
-# include "mysql_com.h" /* NAME_LEN */
-# include <string>
-#endif /* UNIV_HOTBACKUP */
-
-#ifdef __WIN__
-/*****************************************************************//**
-NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix
-epoch starts from 1970/1/1. For selection of constant see:
-http://support.microsoft.com/kb/167296/ */
-#define WIN_TO_UNIX_DELTA_USEC ((ib_int64_t) 11644473600000000ULL)
-
-
-/*****************************************************************//**
-This is the Windows version of gettimeofday(2).
-@return 0 if all OK else -1 */
-static
-int
-ut_gettimeofday(
-/*============*/
- struct timeval* tv, /*!< out: Values are relative to Unix epoch */
- void* tz) /*!< in: not used */
-{
- FILETIME ft;
- ib_int64_t tm;
-
- if (!tv) {
- errno = EINVAL;
- return(-1);
- }
-
- GetSystemTimeAsFileTime(&ft);
-
- tm = (ib_int64_t) ft.dwHighDateTime << 32;
- tm |= ft.dwLowDateTime;
-
- ut_a(tm >= 0); /* If tm wraps over to negative, the quotient / 10
- does not work */
-
- tm /= 10; /* Convert from 100 nsec periods to usec */
-
- /* If we don't convert to the Unix epoch the value for
- struct timeval::tv_sec will overflow.*/
- tm -= WIN_TO_UNIX_DELTA_USEC;
-
- tv->tv_sec = (long) (tm / 1000000L);
- tv->tv_usec = (long) (tm % 1000000L);
-
- return(0);
-}
-#else
-/** An alias for gettimeofday(2). On Microsoft Windows, we have to
-reimplement this function. */
-#define ut_gettimeofday gettimeofday
-#endif
-
-/**********************************************************//**
-Returns system time. We do not specify the format of the time returned:
-the only way to manipulate it is to use the function ut_difftime.
-@return system time */
-UNIV_INTERN
-ib_time_t
-ut_time(void)
-/*=========*/
-{
- return(time(NULL));
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************//**
-Returns system time.
-Upon successful completion, the value 0 is returned; otherwise the
-value -1 is returned and the global variable errno is set to indicate the
-error.
-@return 0 on success, -1 otherwise */
-UNIV_INTERN
-int
-ut_usectime(
-/*========*/
- ulint* sec, /*!< out: seconds since the Epoch */
- ulint* ms) /*!< out: microseconds since the Epoch+*sec */
-{
- struct timeval tv;
- int ret;
- int errno_gettimeofday;
- int i;
-
- for (i = 0; i < 10; i++) {
-
- ret = ut_gettimeofday(&tv, NULL);
-
- if (ret == -1) {
- errno_gettimeofday = errno;
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: gettimeofday(): %s\n",
- strerror(errno_gettimeofday));
- os_thread_sleep(100000); /* 0.1 sec */
- errno = errno_gettimeofday;
- } else {
- break;
- }
- }
-
- if (ret != -1) {
- *sec = (ulint) tv.tv_sec;
- *ms = (ulint) tv.tv_usec;
- }
-
- return(ret);
-}
-
-/**********************************************************//**
-Returns the number of microseconds since epoch. Similar to
-time(3), the return value is also stored in *tloc, provided
-that tloc is non-NULL.
-@return us since epoch */
-UNIV_INTERN
-ullint
-ut_time_us(
-/*=======*/
- ullint* tloc) /*!< out: us since epoch, if non-NULL */
-{
- struct timeval tv;
- ullint us;
-
- ut_gettimeofday(&tv, NULL);
-
- us = (ullint) tv.tv_sec * 1000000 + tv.tv_usec;
-
- if (tloc != NULL) {
- *tloc = us;
- }
-
- return(us);
-}
-
-/**********************************************************//**
-Returns the number of milliseconds since some epoch. The
-value may wrap around. It should only be used for heuristic
-purposes.
-@return ms since epoch */
-UNIV_INTERN
-ulint
-ut_time_ms(void)
-/*============*/
-{
- struct timeval tv;
-
- ut_gettimeofday(&tv, NULL);
-
- return((ulint) tv.tv_sec * 1000 + tv.tv_usec / 1000);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************//**
-Returns the difference of two times in seconds.
-@return time2 - time1 expressed in seconds */
-UNIV_INTERN
-double
-ut_difftime(
-/*========*/
- ib_time_t time2, /*!< in: time */
- ib_time_t time1) /*!< in: time */
-{
- return(difftime(time2, time1));
-}
-
-#endif /* !UNIV_INNOCHECKSUM */
-
-/**********************************************************//**
-Prints a timestamp to a file. */
-UNIV_INTERN
-void
-ut_print_timestamp(
-/*===============*/
- FILE* file) /*!< in: file where to print */
-{
- ulint thread_id = 0;
-
-#ifndef UNIV_INNOCHECKSUM
- thread_id = os_thread_pf(os_thread_get_curr_id());
-#endif
-
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
- (int) cal_tm.wYear,
- (int) cal_tm.wMonth,
- (int) cal_tm.wDay,
- (int) cal_tm.wHour,
- (int) cal_tm.wMinute,
- (int) cal_tm.wSecond,
- thread_id);
-#else
- struct tm* cal_tm_ptr;
- time_t tm;
-
-#ifdef HAVE_LOCALTIME_R
- struct tm cal_tm;
- time(&tm);
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- time(&tm);
- cal_tm_ptr = localtime(&tm);
-#endif
- fprintf(file, "%d-%02d-%02d %02d:%02d:%02d %lx",
- cal_tm_ptr->tm_year + 1900,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec,
- thread_id);
-#endif
-}
-
-#ifndef UNIV_INNOCHECKSUM
-
-/**********************************************************//**
-Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp(
-/*=================*/
- char* buf) /*!< in: buffer where to sprintf */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
- (int) cal_tm.wYear % 100,
- (int) cal_tm.wMonth,
- (int) cal_tm.wDay,
- (int) cal_tm.wHour,
- (int) cal_tm.wMinute,
- (int) cal_tm.wSecond);
-#else
- struct tm* cal_tm_ptr;
- time_t tm;
-
-#ifdef HAVE_LOCALTIME_R
- struct tm cal_tm;
- time(&tm);
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- time(&tm);
- cal_tm_ptr = localtime(&tm);
-#endif
- sprintf(buf, "%02d%02d%02d %2d:%02d:%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-
-#ifdef UNIV_HOTBACKUP
-/**********************************************************//**
-Sprintfs a timestamp to a buffer with no spaces and with ':' characters
-replaced by '_'. */
-UNIV_INTERN
-void
-ut_sprintf_timestamp_without_extra_chars(
-/*=====================================*/
- char* buf) /*!< in: buffer where to sprintf */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
- (int) cal_tm.wYear % 100,
- (int) cal_tm.wMonth,
- (int) cal_tm.wDay,
- (int) cal_tm.wHour,
- (int) cal_tm.wMinute,
- (int) cal_tm.wSecond);
-#else
- struct tm* cal_tm_ptr;
- time_t tm;
-
-#ifdef HAVE_LOCALTIME_R
- struct tm cal_tm;
- time(&tm);
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- time(&tm);
- cal_tm_ptr = localtime(&tm);
-#endif
- sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d",
- cal_tm_ptr->tm_year % 100,
- cal_tm_ptr->tm_mon + 1,
- cal_tm_ptr->tm_mday,
- cal_tm_ptr->tm_hour,
- cal_tm_ptr->tm_min,
- cal_tm_ptr->tm_sec);
-#endif
-}
-
-/**********************************************************//**
-Returns current year, month, day. */
-UNIV_INTERN
-void
-ut_get_year_month_day(
-/*==================*/
- ulint* year, /*!< out: current year */
- ulint* month, /*!< out: month */
- ulint* day) /*!< out: day */
-{
-#ifdef __WIN__
- SYSTEMTIME cal_tm;
-
- GetLocalTime(&cal_tm);
-
- *year = (ulint) cal_tm.wYear;
- *month = (ulint) cal_tm.wMonth;
- *day = (ulint) cal_tm.wDay;
-#else
- struct tm* cal_tm_ptr;
- time_t tm;
-
-#ifdef HAVE_LOCALTIME_R
- struct tm cal_tm;
- time(&tm);
- localtime_r(&tm, &cal_tm);
- cal_tm_ptr = &cal_tm;
-#else
- time(&tm);
- cal_tm_ptr = localtime(&tm);
-#endif
- *year = (ulint) cal_tm_ptr->tm_year + 1900;
- *month = (ulint) cal_tm_ptr->tm_mon + 1;
- *day = (ulint) cal_tm_ptr->tm_mday;
-#endif
-}
-#endif /* UNIV_HOTBACKUP */
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************//**
-Runs an idle loop on CPU. The argument gives the desired delay
-in microseconds on 100 MHz Pentium + Visual C++.
-@return dummy value */
-UNIV_INTERN
-void
-ut_delay(
-/*=====*/
- ulint delay) /*!< in: delay in microseconds on 100 MHz Pentium */
-{
- ulint i;
-
- UT_LOW_PRIORITY_CPU();
-
- for (i = 0; i < delay * 50; i++) {
- UT_RELAX_CPU();
- UT_COMPILER_BARRIER();
- }
-
- UT_RESUME_PRIORITY_CPU();
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************//**
-Prints the contents of a memory buffer in hex and ascii. */
-UNIV_INTERN
-void
-ut_print_buf(
-/*=========*/
- FILE* file, /*!< in: file where to print */
- const void* buf, /*!< in: memory buffer */
- ulint len) /*!< in: length of the buffer */
-{
- const byte* data;
- ulint i;
-
- UNIV_MEM_ASSERT_RW(buf, len);
-
- fprintf(file, " len %lu; hex ", len);
-
- for (data = (const byte*) buf, i = 0; i < len; i++) {
- fprintf(file, "%02lx", (ulong)*data++);
- }
-
- fputs("; asc ", file);
-
- data = (const byte*) buf;
-
- for (i = 0; i < len; i++) {
- int c = (int) *data++;
- putc(isprint(c) ? c : ' ', file);
- }
-
- putc(';', file);
-}
-
-/**********************************************************************//**
-Sort function for ulint arrays. */
-UNIV_INTERN
-void
-ut_ulint_sort(
-/*==========*/
- ulint* arr, /*!< in/out: array to sort */
- ulint* aux_arr, /*!< in/out: aux array to use in sort */
- ulint low, /*!< in: lower bound */
- ulint high) /*!< in: upper bound */
-{
- UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high,
- ut_ulint_cmp);
-}
-
-/*************************************************************//**
-Calculates fast the number rounded up to the nearest power of 2.
-@return first power of 2 which is >= n */
-UNIV_INTERN
-ulint
-ut_2_power_up(
-/*==========*/
- ulint n) /*!< in: number != 0 */
-{
- ulint res;
-
- res = 1;
-
- ut_ad(n > 0);
-
- while (res < n) {
- res = res * 2;
- }
-
- return(res);
-}
-
-/**********************************************************************//**
-Outputs a NUL-terminated file name, quoted with apostrophes. */
-UNIV_INTERN
-void
-ut_print_filename(
-/*==============*/
- FILE* f, /*!< in: output stream */
- const char* name) /*!< in: name to print */
-{
- putc('\'', f);
- for (;;) {
- int c = *name++;
- switch (c) {
- case 0:
- goto done;
- case '\'':
- putc(c, f);
- /* fall through */
- default:
- putc(c, f);
- }
- }
-done:
- putc('\'', f);
-}
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_name(
-/*==========*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name) /*!< in: name to print */
-{
- ut_print_namel(f, trx, table_id, name, strlen(name));
-}
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-void
-ut_print_namel(
-/*===========*/
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name, /*!< in: name to print */
- ulint namelen)/*!< in: length of name */
-{
- /* 2 * NAME_LEN for database and table name,
- and some slack for the #mysql50# prefix and quotes */
- char buf[3 * NAME_LEN];
- const char* bufend;
-
- bufend = innobase_convert_name(buf, sizeof buf,
- name, namelen,
- trx ? trx->mysql_thd : NULL,
- table_id);
-
- (void) fwrite(buf, 1, bufend - buf, f);
-}
-
-/**********************************************************************//**
-Outputs a fixed-length string, quoted as an SQL identifier.
-If the string contains a slash '/', the string will be
-output as two identifiers separated by a period (.),
-as in SQL database_name.identifier. */
-UNIV_INTERN
-std::string
-ut_get_name(
-/*=========*/
- const trx_t* trx, /*!< in: transaction (NULL=no quotes) */
- ibool table_id,/*!< in: TRUE=print a table name,
- FALSE=print other identifier */
- const char* name) /*!< in: name to print */
-{
- /* 2 * NAME_LEN for database and table name,
- and some slack for the #mysql50# prefix and quotes */
- char buf[3 * NAME_LEN];
- const char* bufend;
- ulint namelen = strlen(name);
-
- bufend = innobase_convert_name(buf, sizeof buf,
- name, namelen,
- trx ? trx->mysql_thd : NULL,
- table_id);
- buf[bufend-buf]='\0';
- std::string str(buf);
- return str;
-}
-
-/**********************************************************************//**
-Formats a table or index name, quoted as an SQL identifier. If the name
-contains a slash '/', the result will contain two identifiers separated by
-a period (.), as in SQL database_name.identifier.
-@return pointer to 'formatted' */
-UNIV_INTERN
-char*
-ut_format_name(
-/*===========*/
- const char* name, /*!< in: table or index name, must be
- '\0'-terminated */
- ibool is_table, /*!< in: if TRUE then 'name' is a table
- name */
- char* formatted, /*!< out: formatted result, will be
- '\0'-terminated */
- ulint formatted_size) /*!< out: no more than this number of
- bytes will be written to 'formatted' */
-{
- switch (formatted_size) {
- case 1:
- formatted[0] = '\0';
- /* FALL-THROUGH */
- case 0:
- return(formatted);
- }
-
- char* end;
-
- end = innobase_convert_name(formatted, formatted_size,
- name, strlen(name), NULL, is_table);
-
- /* If the space in 'formatted' was completely used, then sacrifice
- the last character in order to write '\0' at the end. */
- if ((ulint) (end - formatted) == formatted_size) {
- end--;
- }
-
- ut_a((ulint) (end - formatted) < formatted_size);
-
- *end = '\0';
-
- return(formatted);
-}
-
-/**********************************************************************//**
-Catenate files. */
-UNIV_INTERN
-void
-ut_copy_file(
-/*=========*/
- FILE* dest, /*!< in: output file */
- FILE* src) /*!< in: input file to be appended to output */
-{
- long len = ftell(src);
- char buf[4096];
-
- rewind(src);
- do {
- size_t maxs = len < (long) sizeof buf
- ? (size_t) len
- : sizeof buf;
- size_t size = fread(buf, 1, maxs, src);
- (void) fwrite(buf, 1, size, dest);
- len -= (long) size;
- if (size < maxs) {
- break;
- }
- } while (len > 0);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-#ifdef __WIN__
-# include <stdarg.h>
-/**********************************************************************//**
-A substitute for vsnprintf(3), formatted output conversion into
-a limited buffer. Note: this function DOES NOT return the number of
-characters that would have been printed if the buffer was unlimited because
-VC's _vsnprintf() returns -1 in this case and we would need to call
-_vscprintf() in addition to estimate that but we would need another copy
-of "ap" for that and VC does not provide va_copy(). */
-UNIV_INTERN
-void
-ut_vsnprintf(
-/*=========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- va_list ap) /*!< in: format values */
-{
- _vsnprintf(str, size, fmt, ap);
- str[size - 1] = '\0';
-}
-
-/**********************************************************************//**
-A substitute for snprintf(3), formatted output conversion into
-a limited buffer.
-@return number of characters that would have been printed if the size
-were unlimited, not including the terminating '\0'. */
-UNIV_INTERN
-int
-ut_snprintf(
-/*========*/
- char* str, /*!< out: string */
- size_t size, /*!< in: str size */
- const char* fmt, /*!< in: format */
- ...) /*!< in: format values */
-{
- int res;
- va_list ap1;
- va_list ap2;
-
- va_start(ap1, fmt);
- va_start(ap2, fmt);
-
- res = _vscprintf(fmt, ap1);
- ut_a(res != -1);
-
- if (size > 0) {
- _vsnprintf(str, size, fmt, ap2);
-
- if ((size_t) res >= size) {
- str[size - 1] = '\0';
- }
- }
-
- va_end(ap1);
- va_end(ap2);
-
- return(res);
-}
-#endif /* __WIN__ */
-
-/*************************************************************//**
-Convert an error number to a human readable text message. The
-returned string is static and should not be freed or modified.
-@return string, describing the error */
-UNIV_INTERN
-const char*
-ut_strerr(
-/*======*/
- dberr_t num) /*!< in: error number */
-{
- switch (num) {
- case DB_SUCCESS:
- return("Success");
- case DB_SUCCESS_LOCKED_REC:
- return("Success, record lock created");
- case DB_ERROR:
- return("Generic error");
- case DB_READ_ONLY:
- return("Read only transaction");
- case DB_INTERRUPTED:
- return("Operation interrupted");
- case DB_OUT_OF_MEMORY:
- return("Cannot allocate memory");
- case DB_OUT_OF_FILE_SPACE:
- return("Out of disk space");
- case DB_LOCK_WAIT:
- return("Lock wait");
- case DB_DEADLOCK:
- return("Deadlock");
- case DB_ROLLBACK:
- return("Rollback");
- case DB_DUPLICATE_KEY:
- return("Duplicate key");
- case DB_QUE_THR_SUSPENDED:
- return("The queue thread has been suspended");
- case DB_MISSING_HISTORY:
- return("Required history data has been deleted");
- case DB_CLUSTER_NOT_FOUND:
- return("Cluster not found");
- case DB_TABLE_NOT_FOUND:
- return("Table not found");
- case DB_MUST_GET_MORE_FILE_SPACE:
- return("More file space needed");
- case DB_TABLE_IS_BEING_USED:
- return("Table is being used");
- case DB_TOO_BIG_RECORD:
- return("Record too big");
- case DB_TOO_BIG_INDEX_COL:
- return("Index columns size too big");
- case DB_LOCK_WAIT_TIMEOUT:
- return("Lock wait timeout");
- case DB_NO_REFERENCED_ROW:
- return("Referenced key value not found");
- case DB_ROW_IS_REFERENCED:
- return("Row is referenced");
- case DB_CANNOT_ADD_CONSTRAINT:
- return("Cannot add constraint");
- case DB_CORRUPTION:
- return("Data structure corruption");
- case DB_CANNOT_DROP_CONSTRAINT:
- return("Cannot drop constraint");
- case DB_NO_SAVEPOINT:
- return("No such savepoint");
- case DB_TABLESPACE_EXISTS:
- return("Tablespace already exists");
- case DB_TABLESPACE_DELETED:
- return("Tablespace deleted or being deleted");
- case DB_TABLESPACE_NOT_FOUND:
- return("Tablespace not found");
- case DB_LOCK_TABLE_FULL:
- return("Lock structs have exhausted the buffer pool");
- case DB_FOREIGN_DUPLICATE_KEY:
- return("Foreign key activated with duplicate keys");
- case DB_FOREIGN_EXCEED_MAX_CASCADE:
- return("Foreign key cascade delete/update exceeds max depth");
- case DB_TOO_MANY_CONCURRENT_TRXS:
- return("Too many concurrent transactions");
- case DB_UNSUPPORTED:
- return("Unsupported");
- case DB_INVALID_NULL:
- return("NULL value encountered in NOT NULL column");
- case DB_STATS_DO_NOT_EXIST:
- return("Persistent statistics do not exist");
- case DB_FAIL:
- return("Failed, retry may succeed");
- case DB_OVERFLOW:
- return("Overflow");
- case DB_UNDERFLOW:
- return("Underflow");
- case DB_STRONG_FAIL:
- return("Failed, retry will not succeed");
- case DB_ZIP_OVERFLOW:
- return("Zip overflow");
- case DB_RECORD_NOT_FOUND:
- return("Record not found");
- case DB_CHILD_NO_INDEX:
- return("No index on referencing keys in referencing table");
- case DB_PARENT_NO_INDEX:
- return("No index on referenced keys in referenced table");
- case DB_FTS_INVALID_DOCID:
- return("FTS Doc ID cannot be zero");
- case DB_INDEX_CORRUPT:
- return("Index corrupted");
- case DB_UNDO_RECORD_TOO_BIG:
- return("Undo record too big");
- case DB_END_OF_INDEX:
- return("End of index");
- case DB_SEARCH_ABORTED_BY_USER:
- return("Operation was interrupted by end user");
- case DB_IO_ERROR:
- return("I/O error");
- case DB_TABLE_IN_FK_CHECK:
- return("Table is being used in foreign key check");
- case DB_DATA_MISMATCH:
- return("data mismatch");
- case DB_SCHEMA_NOT_LOCKED:
- return("schema not locked");
- case DB_NOT_FOUND:
- return("not found");
- case DB_ONLINE_LOG_TOO_BIG:
- return("Log size exceeded during online index creation");
- case DB_DICT_CHANGED:
- return("Table dictionary has changed");
- case DB_IDENTIFIER_TOO_LONG:
- return("Identifier name is too long");
- case DB_FTS_EXCEED_RESULT_CACHE_LIMIT:
- return("FTS query exceeds result cache limit");
- case DB_TEMP_FILE_WRITE_FAILURE:
- return("Temp file write failure");
- case DB_FTS_TOO_MANY_WORDS_IN_PHRASE:
- return("Too many words in a FTS phrase or proximity search");
- case DB_TOO_BIG_FOR_REDO:
- return("BLOB record length is greater than 10%% of redo log");
- case DB_DECRYPTION_FAILED:
- return("Table is encrypted but decrypt failed.");
- case DB_PAGE_CORRUPTED:
- return("Page read from tablespace is corrupted.");
-
- /* do not add default: in order to produce a warning if new code
- is added to the enum but not added here */
- }
-
- /* we abort here because if unknown error code is given, this could
- mean that memory corruption has happened and someone's error-code
- variable has been overwritten with bogus data */
- ut_error;
-
- /* NOT REACHED */
- return("Unknown error");
-}
-#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/xtradb/ut/ut0vec.cc b/storage/xtradb/ut/ut0vec.cc
deleted file mode 100644
index 5842d9f1c0e..00000000000
--- a/storage/xtradb/ut/ut0vec.cc
+++ /dev/null
@@ -1,78 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/*******************************************************************//**
-@file ut/ut0vec.cc
-A vector of pointers to data items
-
-Created 4/6/2006 Osku Salerma
-************************************************************************/
-
-#include "ut0vec.h"
-#ifdef UNIV_NONINL
-#include "ut0vec.ic"
-#endif
-#include "mem0mem.h"
-
-/********************************************************************
-Create a new vector with the given initial size. */
-UNIV_INTERN
-ib_vector_t*
-ib_vector_create(
-/*=============*/
- /* out: vector */
- ib_alloc_t* allocator, /* in: vector allocator */
- ulint sizeof_value, /* in: size of data item */
- ulint size) /* in: initial size */
-{
- ib_vector_t* vec;
-
- ut_a(size > 0);
-
- vec = static_cast<ib_vector_t*>(
- allocator->mem_malloc(allocator, sizeof(*vec)));
-
- vec->used = 0;
- vec->total = size;
- vec->allocator = allocator;
- vec->sizeof_value = sizeof_value;
-
- vec->data = static_cast<void*>(
- allocator->mem_malloc(allocator, vec->sizeof_value * size));
-
- return(vec);
-}
-
-/********************************************************************
-Resize the vector, currently the vector can only grow and we
-expand the number of elements it can hold by 2 times. */
-UNIV_INTERN
-void
-ib_vector_resize(
-/*=============*/
- ib_vector_t* vec) /* in: vector */
-{
- ulint new_total = vec->total * 2;
- ulint old_size = vec->used * vec->sizeof_value;
- ulint new_size = new_total * vec->sizeof_value;
-
- vec->data = static_cast<void*>(vec->allocator->mem_resize(
- vec->allocator, vec->data, old_size, new_size));
-
- vec->total = new_total;
-}
diff --git a/storage/xtradb/ut/ut0wqueue.cc b/storage/xtradb/ut/ut0wqueue.cc
deleted file mode 100644
index 1607e535a94..00000000000
--- a/storage/xtradb/ut/ut0wqueue.cc
+++ /dev/null
@@ -1,224 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-#include "ut0wqueue.h"
-
-/*******************************************************************//**
-@file ut/ut0wqueue.cc
-A work queue
-
-Created 4/26/2006 Osku Salerma
-************************************************************************/
-
-/****************************************************************//**
-Create a new work queue.
-@return work queue */
-UNIV_INTERN
-ib_wqueue_t*
-ib_wqueue_create(void)
-/*===================*/
-{
- ib_wqueue_t* wq = static_cast<ib_wqueue_t*>(mem_alloc(sizeof(*wq)));
-
- /* Function ib_wqueue_create() has not been used anywhere,
- not necessary to instrument this mutex */
- mutex_create(PFS_NOT_INSTRUMENTED, &wq->mutex, SYNC_WORK_QUEUE);
-
- wq->items = ib_list_create();
- wq->event = os_event_create();
-
- return(wq);
-}
-
-/****************************************************************//**
-Free a work queue. */
-UNIV_INTERN
-void
-ib_wqueue_free(
-/*===========*/
- ib_wqueue_t* wq) /*!< in: work queue */
-{
- mutex_free(&wq->mutex);
- ib_list_free(wq->items);
- os_event_free(wq->event);
-
- mem_free(wq);
-}
-
-/****************************************************************//**
-Add a work item to the queue. */
-UNIV_INTERN
-void
-ib_wqueue_add(
-/*==========*/
- ib_wqueue_t* wq, /*!< in: work queue */
- void* item, /*!< in: work item */
- mem_heap_t* heap) /*!< in: memory heap to use for allocating the
- list node */
-{
- mutex_enter(&wq->mutex);
-
- ib_list_add_last(wq->items, item, heap);
- os_event_set(wq->event);
-
- mutex_exit(&wq->mutex);
-}
-
-/****************************************************************//**
-Wait for a work item to appear in the queue.
-@return work item */
-UNIV_INTERN
-void*
-ib_wqueue_wait(
-/*===========*/
- ib_wqueue_t* wq) /*!< in: work queue */
-{
- ib_list_node_t* node;
-
- for (;;) {
- os_event_wait(wq->event);
-
- mutex_enter(&wq->mutex);
-
- node = ib_list_get_first(wq->items);
-
- if (node) {
- ib_list_remove(wq->items, node);
-
- if (!ib_list_get_first(wq->items)) {
- /* We must reset the event when the list
- gets emptied. */
- os_event_reset(wq->event);
- }
-
- break;
- }
-
- mutex_exit(&wq->mutex);
- }
-
- mutex_exit(&wq->mutex);
-
- return(node->data);
-}
-
-
-/********************************************************************
-Wait for a work item to appear in the queue for specified time. */
-
-void*
-ib_wqueue_timedwait(
-/*================*/
- /* out: work item or NULL on timeout*/
- ib_wqueue_t* wq, /* in: work queue */
- ib_time_t wait_in_usecs) /* in: wait time in micro seconds */
-{
- ib_list_node_t* node = NULL;
-
- for (;;) {
- ulint error;
- ib_int64_t sig_count;
-
- mutex_enter(&wq->mutex);
-
- node = ib_list_get_first(wq->items);
-
- if (node) {
- ib_list_remove(wq->items, node);
-
- mutex_exit(&wq->mutex);
- break;
- }
-
- sig_count = os_event_reset(wq->event);
-
- mutex_exit(&wq->mutex);
-
- error = os_event_wait_time_low(wq->event,
- (ulint) wait_in_usecs,
- sig_count);
-
- if (error == OS_SYNC_TIME_EXCEEDED) {
- break;
- }
- }
-
- return(node ? node->data : NULL);
-}
-
-/********************************************************************
-Return first item on work queue or NULL if queue is empty
-@return work item or NULL */
-void*
-ib_wqueue_nowait(
-/*=============*/
- ib_wqueue_t* wq) /*<! in: work queue */
-{
- ib_list_node_t* node = NULL;
-
- mutex_enter(&wq->mutex);
-
- if(!ib_list_is_empty(wq->items)) {
- node = ib_list_get_first(wq->items);
-
- if (node) {
- ib_list_remove(wq->items, node);
-
- }
- }
-
- /* We must reset the event when the list
- gets emptied. */
- if(ib_list_is_empty(wq->items)) {
- os_event_reset(wq->event);
- }
-
- mutex_exit(&wq->mutex);
-
- return (node ? node->data : NULL);
-}
-
-/********************************************************************
-Check if queue is empty. */
-
-ibool
-ib_wqueue_is_empty(
-/*===============*/
- /* out: TRUE if queue empty
- else FALSE */
- const ib_wqueue_t* wq) /* in: work queue */
-{
- return(ib_list_is_empty(wq->items));
-}
-
-/********************************************************************
-Get number of items on queue.
-@return number of items on queue */
-ulint
-ib_wqueue_len(
-/*==========*/
- ib_wqueue_t* wq) /*<! in: work queue */
-{
- ulint len = 0;
-
- mutex_enter(&wq->mutex);
- len = ib_list_len(wq->items);
- mutex_exit(&wq->mutex);
-
- return(len);
-}